1/* $NetBSD: mm.c,v 1.22 2016/10/13 08:56:31 ryo Exp $ */
2
3/*-
4 * Copyright (c) 2002, 2008, 2010 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Christos Zoulas, Joerg Sonnenberger and Mindaugas Rasiukevicius.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*
33 * Special /dev/{mem,kmem,zero,null} memory devices.
34 */
35
36#include <sys/cdefs.h>
37__KERNEL_RCSID(0, "$NetBSD: mm.c,v 1.22 2016/10/13 08:56:31 ryo Exp $");
38
39#include "opt_compat_netbsd.h"
40
41#include <sys/param.h>
42#include <sys/conf.h>
43#include <sys/ioctl.h>
44#include <sys/mman.h>
45#include <sys/uio.h>
46#include <sys/termios.h>
47
48#include <dev/mm.h>
49
50#include <uvm/uvm_extern.h>
51
52static void * dev_zero_page __read_mostly;
53static kmutex_t dev_mem_lock __cacheline_aligned;
54static vaddr_t dev_mem_addr __read_mostly;
55
56static dev_type_read(mm_readwrite);
57static dev_type_ioctl(mm_ioctl);
58static dev_type_mmap(mm_mmap);
59static dev_type_ioctl(mm_ioctl);
60
61const struct cdevsw mem_cdevsw = {
62#ifdef __HAVE_MM_MD_OPEN
63 .d_open = mm_md_open,
64#else
65 .d_open = nullopen,
66#endif
67 .d_close = nullclose,
68 .d_read = mm_readwrite,
69 .d_write = mm_readwrite,
70 .d_ioctl = mm_ioctl,
71 .d_stop = nostop,
72 .d_tty = notty,
73 .d_poll = nopoll,
74 .d_mmap = mm_mmap,
75 .d_kqfilter = nokqfilter,
76 .d_discard = nodiscard,
77 .d_flag = D_MPSAFE
78};
79
80#ifdef pmax /* XXX */
81const struct cdevsw mem_ultrix_cdevsw = {
82 .d_open = nullopen,
83 .d_close = nullclose,
84 .d_read = mm_readwrite,
85 .d_write = mm_readwrite,
86 .d_ioctl = mm_ioctl,
87 .d_stop = nostop,
88 .d_tty = notty,
89 .d_poll = nopoll,
90 .d_mmap = mm_mmap,
91 .d_kqfilter = nokqfilter,
92 .d_discard = nodiscard,
93 .d_flag = D_MPSAFE
94};
95#endif
96
97/*
98 * mm_init: initialize memory device driver.
99 */
100void
101mm_init(void)
102{
103 vaddr_t pg;
104
105 mutex_init(&dev_mem_lock, MUTEX_DEFAULT, IPL_NONE);
106
107 /* Read-only zero-page. */
108 pg = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
109 KASSERT(pg != 0);
110 pmap_protect(pmap_kernel(), pg, pg + PAGE_SIZE, VM_PROT_READ);
111 pmap_update(pmap_kernel());
112 dev_zero_page = (void *)pg;
113
114#ifndef __HAVE_MM_MD_CACHE_ALIASING
115 /* KVA for mappings during I/O. */
116 dev_mem_addr = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
117 UVM_KMF_VAONLY|UVM_KMF_WAITVA);
118 KASSERT(dev_mem_addr != 0);
119#else
120 dev_mem_addr = 0;
121#endif
122}
123
124
125/*
126 * dev_mem_getva: get a special virtual address. If architecture requires,
127 * allocate VA according to PA, which avoids cache-aliasing issues. Use a
128 * constant, general mapping address otherwise.
129 */
130static inline vaddr_t
131dev_mem_getva(paddr_t pa, int color)
132{
133#ifdef __HAVE_MM_MD_CACHE_ALIASING
134 return uvm_km_alloc(kernel_map, PAGE_SIZE,
135 color & uvmexp.colormask,
136 UVM_KMF_VAONLY | UVM_KMF_WAITVA | UVM_KMF_COLORMATCH);
137#else
138 return dev_mem_addr;
139#endif
140}
141
142static inline void
143dev_mem_relva(paddr_t pa, vaddr_t va)
144{
145#ifdef __HAVE_MM_MD_CACHE_ALIASING
146 uvm_km_free(kernel_map, va, PAGE_SIZE, UVM_KMF_VAONLY);
147#else
148 KASSERT(dev_mem_addr == va);
149#endif
150}
151
152/*
153 * dev_kmem_readwrite: helper for DEV_MEM (/dev/mem) case of R/W.
154 */
155static int
156dev_mem_readwrite(struct uio *uio, struct iovec *iov)
157{
158 paddr_t paddr;
159 vaddr_t vaddr;
160 vm_prot_t prot;
161 size_t len, offset;
162 bool have_direct;
163 int error;
164 int color = 0;
165
166 /* Check for wrap around. */
167 if ((uintptr_t)uio->uio_offset != uio->uio_offset) {
168 return EFAULT;
169 }
170 paddr = uio->uio_offset & ~PAGE_MASK;
171 prot = (uio->uio_rw == UIO_WRITE) ? VM_PROT_WRITE : VM_PROT_READ;
172 error = mm_md_physacc(paddr, prot);
173 if (error) {
174 return error;
175 }
176 offset = uio->uio_offset & PAGE_MASK;
177 len = MIN(uio->uio_resid, PAGE_SIZE - offset);
178
179#ifdef __HAVE_MM_MD_CACHE_ALIASING
180 have_direct = mm_md_page_color(paddr, &color);
181#else
182 have_direct = true;
183 color = 0;
184#endif
185
186#ifdef __HAVE_MM_MD_DIRECT_MAPPED_PHYS
187 /* Is physical address directly mapped? Return VA. */
188 if (have_direct)
189 have_direct = mm_md_direct_mapped_phys(paddr, &vaddr);
190#else
191 vaddr = 0;
192 have_direct = false;
193#endif
194 if (!have_direct) {
195 /* Get a special virtual address. */
196 const vaddr_t va = dev_mem_getva(paddr, color);
197
198 /* Map selected KVA to physical address. */
199 mutex_enter(&dev_mem_lock);
200 pmap_kenter_pa(va, paddr, prot, 0);
201 pmap_update(pmap_kernel());
202
203 /* Perform I/O. */
204 vaddr = va + offset;
205 error = uiomove((void *)vaddr, len, uio);
206
207 /* Unmap, flush before unlock. */
208 pmap_kremove(va, PAGE_SIZE);
209 pmap_update(pmap_kernel());
210 mutex_exit(&dev_mem_lock);
211
212 /* "Release" the virtual address. */
213 dev_mem_relva(paddr, va);
214 } else {
215 /* Direct map, just perform I/O. */
216 vaddr += offset;
217 error = uiomove((void *)vaddr, len, uio);
218 }
219 return error;
220}
221
222/*
223 * dev_kmem_readwrite: helper for DEV_KMEM (/dev/kmem) case of R/W.
224 */
225static int
226dev_kmem_readwrite(struct uio *uio, struct iovec *iov)
227{
228 void *addr;
229 size_t len, offset;
230 vm_prot_t prot;
231 int error;
232 bool md_kva;
233
234 /* Check for wrap around. */
235 addr = (void *)(intptr_t)uio->uio_offset;
236 if ((uintptr_t)addr != uio->uio_offset) {
237 return EFAULT;
238 }
239 /*
240 * Handle non-page aligned offset.
241 * Otherwise, we operate in page-by-page basis.
242 */
243 offset = uio->uio_offset & PAGE_MASK;
244 len = MIN(uio->uio_resid, PAGE_SIZE - offset);
245 prot = (uio->uio_rw == UIO_WRITE) ? VM_PROT_WRITE : VM_PROT_READ;
246
247 md_kva = false;
248
249#ifdef __HAVE_MM_MD_DIRECT_MAPPED_IO
250 paddr_t paddr;
251 /* MD case: is this is a directly mapped address? */
252 if (mm_md_direct_mapped_io(addr, &paddr)) {
253 /* If so, validate physical address. */
254 error = mm_md_physacc(paddr, prot);
255 if (error) {
256 return error;
257 }
258 md_kva = true;
259 }
260#endif
261 if (!md_kva) {
262 bool checked = false;
263
264#ifdef __HAVE_MM_MD_KERNACC
265 /* MD check for the address. */
266 error = mm_md_kernacc(addr, prot, &checked);
267 if (error) {
268 return error;
269 }
270#endif
271 /* UVM check for the address (unless MD indicated to not). */
272 if (!checked && !uvm_kernacc(addr, len, prot)) {
273 return EFAULT;
274 }
275 }
276 error = uiomove(addr, len, uio);
277 return error;
278}
279
280/*
281 * dev_zero_readwrite: helper for DEV_ZERO (/dev/null) case of R/W.
282 */
283static inline int
284dev_zero_readwrite(struct uio *uio, struct iovec *iov)
285{
286 size_t len;
287
288 /* Nothing to do for the write case. */
289 if (uio->uio_rw == UIO_WRITE) {
290 uio->uio_resid = 0;
291 return 0;
292 }
293 /*
294 * Read in page-by-page basis, caller will continue.
295 * Cut appropriately for a single/last-iteration cases.
296 */
297 len = MIN(iov->iov_len, PAGE_SIZE);
298 return uiomove(dev_zero_page, len, uio);
299}
300
301/*
302 * mm_readwrite: general memory R/W function.
303 */
304static int
305mm_readwrite(dev_t dev, struct uio *uio, int flags)
306{
307 struct iovec *iov;
308 int error;
309
310#ifdef __HAVE_MM_MD_READWRITE
311 /* If defined - there are extra MD cases. */
312 switch (minor(dev)) {
313 case DEV_MEM:
314 case DEV_KMEM:
315 case DEV_NULL:
316 case DEV_ZERO:
317#if defined(COMPAT_16) && defined(__arm)
318 case _DEV_ZERO_oARM:
319#endif
320 break;
321 default:
322 return mm_md_readwrite(dev, uio);
323 }
324#endif
325 error = 0;
326 while (uio->uio_resid > 0 && error == 0) {
327 iov = uio->uio_iov;
328 if (iov->iov_len == 0) {
329 /* Processed; next I/O vector. */
330 uio->uio_iov++;
331 uio->uio_iovcnt--;
332 KASSERT(uio->uio_iovcnt >= 0);
333 continue;
334 }
335 /* Helper functions will process in page-by-page basis. */
336 switch (minor(dev)) {
337 case DEV_MEM:
338 error = dev_mem_readwrite(uio, iov);
339 break;
340 case DEV_KMEM:
341 error = dev_kmem_readwrite(uio, iov);
342 break;
343 case DEV_NULL:
344 if (uio->uio_rw == UIO_WRITE) {
345 uio->uio_resid = 0;
346 }
347 /* Break directly out of the loop. */
348 return 0;
349 case DEV_FULL:
350 if (uio->uio_rw == UIO_WRITE) {
351 return ENOSPC;
352 }
353 /*FALLTHROUGH*/
354#if defined(COMPAT_16) && defined(__arm)
355 case _DEV_ZERO_oARM:
356#endif
357 case DEV_ZERO:
358 error = dev_zero_readwrite(uio, iov);
359 break;
360 default:
361 error = ENXIO;
362 break;
363 }
364 }
365 return error;
366}
367
368/*
369 * mm_mmap: general mmap() handler.
370 */
371static paddr_t
372mm_mmap(dev_t dev, off_t off, int acc)
373{
374 vm_prot_t prot;
375
376#ifdef __HAVE_MM_MD_MMAP
377 /* If defined - there are extra mmap() MD cases. */
378 switch (minor(dev)) {
379 case DEV_MEM:
380 case DEV_KMEM:
381 case DEV_NULL:
382#if defined(COMPAT_16) && defined(__arm)
383 case _DEV_ZERO_oARM:
384#endif
385 case DEV_ZERO:
386 break;
387 default:
388 return mm_md_mmap(dev, off, acc);
389 }
390#endif
391 /*
392 * /dev/null does not make sense, /dev/kmem is volatile and
393 * /dev/zero is handled in mmap already.
394 */
395 if (minor(dev) != DEV_MEM) {
396 return -1;
397 }
398
399 prot = 0;
400 if (acc & PROT_EXEC)
401 prot |= VM_PROT_EXECUTE;
402 if (acc & PROT_READ)
403 prot |= VM_PROT_READ;
404 if (acc & PROT_WRITE)
405 prot |= VM_PROT_WRITE;
406
407 /* Validate the physical address. */
408 if (mm_md_physacc(off, prot) != 0) {
409 return -1;
410 }
411 return off >> PGSHIFT;
412}
413
414static int
415mm_ioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
416{
417
418 switch (cmd) {
419 case FIONBIO:
420 /* We never block anyway. */
421 return 0;
422
423 case FIOSETOWN:
424 case FIOGETOWN:
425 case TIOCGPGRP:
426 case TIOCSPGRP:
427 case TIOCGETA:
428 return ENOTTY;
429
430 case FIOASYNC:
431 if ((*(int *)data) == 0) {
432 return 0;
433 }
434 /* FALLTHROUGH */
435 default:
436 return EOPNOTSUPP;
437 }
438}
439