1 | /* $NetBSD: vnd.c,v 1.258 2016/08/05 08:21:24 pgoyette Exp $ */ |
2 | |
3 | /*- |
4 | * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc. |
5 | * All rights reserved. |
6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Jason R. Thorpe. |
9 | * |
10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions |
12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. |
18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | * POSSIBILITY OF SUCH DAMAGE. |
30 | */ |
31 | |
32 | /* |
33 | * Copyright (c) 1988 University of Utah. |
34 | * Copyright (c) 1990, 1993 |
35 | * The Regents of the University of California. All rights reserved. |
36 | * |
37 | * This code is derived from software contributed to Berkeley by |
38 | * the Systems Programming Group of the University of Utah Computer |
39 | * Science Department. |
40 | * |
41 | * Redistribution and use in source and binary forms, with or without |
42 | * modification, are permitted provided that the following conditions |
43 | * are met: |
44 | * 1. Redistributions of source code must retain the above copyright |
45 | * notice, this list of conditions and the following disclaimer. |
46 | * 2. Redistributions in binary form must reproduce the above copyright |
47 | * notice, this list of conditions and the following disclaimer in the |
48 | * documentation and/or other materials provided with the distribution. |
49 | * 3. Neither the name of the University nor the names of its contributors |
50 | * may be used to endorse or promote products derived from this software |
51 | * without specific prior written permission. |
52 | * |
53 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
54 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
55 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
56 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
57 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
58 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
59 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
60 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
61 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
62 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
63 | * SUCH DAMAGE. |
64 | * |
65 | * from: Utah $Hdr: vn.c 1.13 94/04/02$ |
66 | * |
67 | * @(#)vn.c 8.9 (Berkeley) 5/14/95 |
68 | */ |
69 | |
70 | /* |
71 | * Vnode disk driver. |
72 | * |
73 | * Block/character interface to a vnode. Allows one to treat a file |
74 | * as a disk (e.g. build a filesystem in it, mount it, etc.). |
75 | * |
76 | * NOTE 1: If the vnode supports the VOP_BMAP and VOP_STRATEGY operations, |
77 | * this uses them to avoid distorting the local buffer cache. If those |
78 | * block-level operations are not available, this falls back to the regular |
79 | * read and write calls. Using these may distort the cache in some cases |
80 | * but better have the driver working than preventing it to work on file |
81 | * systems where the block-level operations are not implemented for |
82 | * whatever reason. |
83 | * |
84 | * NOTE 2: There is a security issue involved with this driver. |
85 | * Once mounted all access to the contents of the "mapped" file via |
86 | * the special file is controlled by the permissions on the special |
87 | * file, the protection of the mapped file is ignored (effectively, |
88 | * by using root credentials in all transactions). |
89 | * |
90 | * NOTE 3: Doesn't interact with leases, should it? |
91 | */ |
92 | |
93 | #include <sys/cdefs.h> |
94 | __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.258 2016/08/05 08:21:24 pgoyette Exp $" ); |
95 | |
96 | #if defined(_KERNEL_OPT) |
97 | #include "opt_vnd.h" |
98 | #include "opt_compat_netbsd.h" |
99 | #endif |
100 | |
101 | #include <sys/param.h> |
102 | #include <sys/systm.h> |
103 | #include <sys/namei.h> |
104 | #include <sys/proc.h> |
105 | #include <sys/kthread.h> |
106 | #include <sys/errno.h> |
107 | #include <sys/buf.h> |
108 | #include <sys/bufq.h> |
109 | #include <sys/malloc.h> |
110 | #include <sys/ioctl.h> |
111 | #include <sys/disklabel.h> |
112 | #include <sys/device.h> |
113 | #include <sys/disk.h> |
114 | #include <sys/stat.h> |
115 | #include <sys/mount.h> |
116 | #include <sys/vnode.h> |
117 | #include <sys/file.h> |
118 | #include <sys/uio.h> |
119 | #include <sys/conf.h> |
120 | #include <sys/kauth.h> |
121 | #include <sys/module.h> |
122 | |
123 | #include <net/zlib.h> |
124 | |
125 | #include <miscfs/genfs/genfs.h> |
126 | #include <miscfs/specfs/specdev.h> |
127 | |
128 | #include <dev/dkvar.h> |
129 | #include <dev/vndvar.h> |
130 | |
131 | #include "ioconf.h" |
132 | |
133 | #if defined(VNDDEBUG) && !defined(DEBUG) |
134 | #define DEBUG |
135 | #endif |
136 | |
137 | #ifdef DEBUG |
138 | int dovndcluster = 1; |
139 | #define VDB_FOLLOW 0x01 |
140 | #define VDB_INIT 0x02 |
141 | #define VDB_IO 0x04 |
142 | #define VDB_LABEL 0x08 |
143 | int vnddebug = 0; |
144 | #endif |
145 | |
146 | #define vndunit(x) DISKUNIT(x) |
147 | |
148 | struct vndxfer { |
149 | struct buf vx_buf; |
150 | struct vnd_softc *vx_vnd; |
151 | }; |
152 | #define VND_BUFTOXFER(bp) ((struct vndxfer *)(void *)bp) |
153 | |
154 | #define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK) |
155 | #define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) |
156 | |
157 | #define VNDLABELDEV(dev) \ |
158 | (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) |
159 | |
160 | #define VND_MAXPENDING(vnd) ((vnd)->sc_maxactive * 4) |
161 | |
162 | |
163 | static void vndclear(struct vnd_softc *, int); |
164 | static int vnddoclear(struct vnd_softc *, int, int, bool); |
165 | static int vndsetcred(struct vnd_softc *, kauth_cred_t); |
166 | static void vndthrottle(struct vnd_softc *, struct vnode *); |
167 | static void vndiodone(struct buf *); |
168 | #if 0 |
169 | static void vndshutdown(void); |
170 | #endif |
171 | |
172 | static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *); |
173 | static void vndgetdisklabel(dev_t, struct vnd_softc *); |
174 | |
175 | static int vndlock(struct vnd_softc *); |
176 | static void vndunlock(struct vnd_softc *); |
177 | #ifdef VND_COMPRESSION |
178 | static void compstrategy(struct buf *, off_t); |
179 | static void *vnd_alloc(void *, u_int, u_int); |
180 | static void vnd_free(void *, void *); |
181 | #endif /* VND_COMPRESSION */ |
182 | |
183 | static void vndthread(void *); |
184 | static bool vnode_has_op(const struct vnode *, int); |
185 | static void handle_with_rdwr(struct vnd_softc *, const struct buf *, |
186 | struct buf *); |
187 | static void handle_with_strategy(struct vnd_softc *, const struct buf *, |
188 | struct buf *); |
189 | static void vnd_set_geometry(struct vnd_softc *); |
190 | |
191 | static dev_type_open(vndopen); |
192 | static dev_type_close(vndclose); |
193 | static dev_type_read(vndread); |
194 | static dev_type_write(vndwrite); |
195 | static dev_type_ioctl(vndioctl); |
196 | static dev_type_strategy(vndstrategy); |
197 | static dev_type_dump(vnddump); |
198 | static dev_type_size(vndsize); |
199 | |
200 | const struct bdevsw vnd_bdevsw = { |
201 | .d_open = vndopen, |
202 | .d_close = vndclose, |
203 | .d_strategy = vndstrategy, |
204 | .d_ioctl = vndioctl, |
205 | .d_dump = vnddump, |
206 | .d_psize = vndsize, |
207 | .d_discard = nodiscard, |
208 | .d_flag = D_DISK |
209 | }; |
210 | |
211 | const struct cdevsw vnd_cdevsw = { |
212 | .d_open = vndopen, |
213 | .d_close = vndclose, |
214 | .d_read = vndread, |
215 | .d_write = vndwrite, |
216 | .d_ioctl = vndioctl, |
217 | .d_stop = nostop, |
218 | .d_tty = notty, |
219 | .d_poll = nopoll, |
220 | .d_mmap = nommap, |
221 | .d_kqfilter = nokqfilter, |
222 | .d_discard = nodiscard, |
223 | .d_flag = D_DISK |
224 | }; |
225 | |
226 | static int vnd_match(device_t, cfdata_t, void *); |
227 | static void vnd_attach(device_t, device_t, void *); |
228 | static int vnd_detach(device_t, int); |
229 | |
230 | CFATTACH_DECL3_NEW(vnd, sizeof(struct vnd_softc), |
231 | vnd_match, vnd_attach, vnd_detach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN); |
232 | extern struct cfdriver vnd_cd; |
233 | |
234 | static struct vnd_softc *vnd_spawn(int); |
235 | int vnd_destroy(device_t); |
236 | |
237 | static struct dkdriver vnddkdriver = { |
238 | .d_strategy = vndstrategy, |
239 | .d_minphys = minphys |
240 | }; |
241 | |
242 | void |
243 | vndattach(int num) |
244 | { |
245 | int error; |
246 | |
247 | error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); |
248 | if (error) |
249 | aprint_error("%s: unable to register cfattach, error = %d\n" , |
250 | vnd_cd.cd_name, error); |
251 | } |
252 | |
253 | static int |
254 | vnd_match(device_t self, cfdata_t cfdata, void *aux) |
255 | { |
256 | |
257 | return 1; |
258 | } |
259 | |
260 | static void |
261 | vnd_attach(device_t parent, device_t self, void *aux) |
262 | { |
263 | struct vnd_softc *sc = device_private(self); |
264 | |
265 | sc->sc_dev = self; |
266 | sc->sc_comp_offsets = NULL; |
267 | sc->sc_comp_buff = NULL; |
268 | sc->sc_comp_decombuf = NULL; |
269 | bufq_alloc(&sc->sc_tab, "disksort" , BUFQ_SORT_RAWBLOCK); |
270 | disk_init(&sc->sc_dkdev, device_xname(self), &vnddkdriver); |
271 | if (!pmf_device_register(self, NULL, NULL)) |
272 | aprint_error_dev(self, "couldn't establish power handler\n" ); |
273 | } |
274 | |
275 | static int |
276 | vnd_detach(device_t self, int flags) |
277 | { |
278 | int error; |
279 | struct vnd_softc *sc = device_private(self); |
280 | |
281 | if (sc->sc_flags & VNF_INITED) { |
282 | error = vnddoclear(sc, 0, -1, (flags & DETACH_FORCE) != 0); |
283 | if (error != 0) |
284 | return error; |
285 | } |
286 | |
287 | pmf_device_deregister(self); |
288 | bufq_free(sc->sc_tab); |
289 | disk_destroy(&sc->sc_dkdev); |
290 | |
291 | return 0; |
292 | } |
293 | |
294 | static struct vnd_softc * |
295 | vnd_spawn(int unit) |
296 | { |
297 | cfdata_t cf; |
298 | |
299 | cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); |
300 | cf->cf_name = vnd_cd.cd_name; |
301 | cf->cf_atname = vnd_cd.cd_name; |
302 | cf->cf_unit = unit; |
303 | cf->cf_fstate = FSTATE_STAR; |
304 | |
305 | return device_private(config_attach_pseudo(cf)); |
306 | } |
307 | |
308 | int |
309 | vnd_destroy(device_t dev) |
310 | { |
311 | int error; |
312 | cfdata_t cf; |
313 | |
314 | cf = device_cfdata(dev); |
315 | error = config_detach(dev, DETACH_QUIET); |
316 | if (error) |
317 | return error; |
318 | free(cf, M_DEVBUF); |
319 | return 0; |
320 | } |
321 | |
322 | static int |
323 | vndopen(dev_t dev, int flags, int mode, struct lwp *l) |
324 | { |
325 | int unit = vndunit(dev); |
326 | struct vnd_softc *sc; |
327 | int error = 0, part, pmask; |
328 | struct disklabel *lp; |
329 | |
330 | #ifdef DEBUG |
331 | if (vnddebug & VDB_FOLLOW) |
332 | printf("vndopen(0x%" PRIx64", 0x%x, 0x%x, %p)\n" , dev, flags, mode, l); |
333 | #endif |
334 | sc = device_lookup_private(&vnd_cd, unit); |
335 | if (sc == NULL) { |
336 | sc = vnd_spawn(unit); |
337 | if (sc == NULL) |
338 | return ENOMEM; |
339 | |
340 | /* compatibility, keep disklabel after close */ |
341 | sc->sc_flags = VNF_KLABEL; |
342 | } |
343 | |
344 | if ((error = vndlock(sc)) != 0) |
345 | return error; |
346 | |
347 | mutex_enter(&sc->sc_dkdev.dk_openlock); |
348 | |
349 | if ((sc->sc_flags & VNF_CLEARING) != 0) { |
350 | error = ENXIO; |
351 | goto done; |
352 | } |
353 | |
354 | lp = sc->sc_dkdev.dk_label; |
355 | |
356 | part = DISKPART(dev); |
357 | pmask = (1 << part); |
358 | |
359 | if (sc->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) { |
360 | error = EBUSY; |
361 | goto done; |
362 | } |
363 | |
364 | if (sc->sc_flags & VNF_INITED) { |
365 | if ((sc->sc_dkdev.dk_openmask & ~(1<<RAW_PART)) != 0) { |
366 | /* |
367 | * If any non-raw partition is open, but the disk |
368 | * has been invalidated, disallow further opens. |
369 | */ |
370 | if ((sc->sc_flags & VNF_VLABEL) == 0) { |
371 | error = EIO; |
372 | goto done; |
373 | } |
374 | } else { |
375 | /* |
376 | * Load the partition info if not already loaded. |
377 | */ |
378 | if ((sc->sc_flags & VNF_VLABEL) == 0) { |
379 | sc->sc_flags |= VNF_VLABEL; |
380 | vndgetdisklabel(dev, sc); |
381 | } |
382 | } |
383 | } |
384 | |
385 | /* Check that the partitions exists. */ |
386 | if (part != RAW_PART) { |
387 | if (((sc->sc_flags & VNF_INITED) == 0) || |
388 | ((part >= lp->d_npartitions) || |
389 | (lp->d_partitions[part].p_fstype == FS_UNUSED))) { |
390 | error = ENXIO; |
391 | goto done; |
392 | } |
393 | } |
394 | |
395 | /* Prevent our unit from being unconfigured while open. */ |
396 | switch (mode) { |
397 | case S_IFCHR: |
398 | sc->sc_dkdev.dk_copenmask |= pmask; |
399 | break; |
400 | |
401 | case S_IFBLK: |
402 | sc->sc_dkdev.dk_bopenmask |= pmask; |
403 | break; |
404 | } |
405 | sc->sc_dkdev.dk_openmask = |
406 | sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; |
407 | |
408 | done: |
409 | mutex_exit(&sc->sc_dkdev.dk_openlock); |
410 | vndunlock(sc); |
411 | return error; |
412 | } |
413 | |
414 | static int |
415 | vndclose(dev_t dev, int flags, int mode, struct lwp *l) |
416 | { |
417 | int unit = vndunit(dev); |
418 | struct vnd_softc *sc; |
419 | int error = 0, part; |
420 | |
421 | #ifdef DEBUG |
422 | if (vnddebug & VDB_FOLLOW) |
423 | printf("vndclose(0x%" PRIx64", 0x%x, 0x%x, %p)\n" , dev, flags, mode, l); |
424 | #endif |
425 | sc = device_lookup_private(&vnd_cd, unit); |
426 | if (sc == NULL) |
427 | return ENXIO; |
428 | |
429 | if ((error = vndlock(sc)) != 0) |
430 | return error; |
431 | |
432 | mutex_enter(&sc->sc_dkdev.dk_openlock); |
433 | |
434 | part = DISKPART(dev); |
435 | |
436 | /* ...that much closer to allowing unconfiguration... */ |
437 | switch (mode) { |
438 | case S_IFCHR: |
439 | sc->sc_dkdev.dk_copenmask &= ~(1 << part); |
440 | break; |
441 | |
442 | case S_IFBLK: |
443 | sc->sc_dkdev.dk_bopenmask &= ~(1 << part); |
444 | break; |
445 | } |
446 | sc->sc_dkdev.dk_openmask = |
447 | sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; |
448 | |
449 | /* are we last opener ? */ |
450 | if (sc->sc_dkdev.dk_openmask == 0) { |
451 | if ((sc->sc_flags & VNF_KLABEL) == 0) |
452 | sc->sc_flags &= ~VNF_VLABEL; |
453 | } |
454 | |
455 | mutex_exit(&sc->sc_dkdev.dk_openlock); |
456 | |
457 | vndunlock(sc); |
458 | |
459 | if ((sc->sc_flags & VNF_INITED) == 0) { |
460 | if ((error = vnd_destroy(sc->sc_dev)) != 0) { |
461 | aprint_error_dev(sc->sc_dev, |
462 | "unable to detach instance\n" ); |
463 | return error; |
464 | } |
465 | } |
466 | |
467 | return 0; |
468 | } |
469 | |
470 | /* |
471 | * Queue the request, and wakeup the kernel thread to handle it. |
472 | */ |
473 | static void |
474 | vndstrategy(struct buf *bp) |
475 | { |
476 | int unit = vndunit(bp->b_dev); |
477 | struct vnd_softc *vnd = |
478 | device_lookup_private(&vnd_cd, unit); |
479 | struct disklabel *lp; |
480 | daddr_t blkno; |
481 | int s = splbio(); |
482 | |
483 | if (vnd == NULL) { |
484 | bp->b_error = ENXIO; |
485 | goto done; |
486 | } |
487 | lp = vnd->sc_dkdev.dk_label; |
488 | |
489 | if ((vnd->sc_flags & VNF_INITED) == 0) { |
490 | bp->b_error = ENXIO; |
491 | goto done; |
492 | } |
493 | |
494 | /* |
495 | * The transfer must be a whole number of blocks. |
496 | */ |
497 | if ((bp->b_bcount % lp->d_secsize) != 0) { |
498 | bp->b_error = EINVAL; |
499 | goto done; |
500 | } |
501 | |
502 | /* |
503 | * check if we're read-only. |
504 | */ |
505 | if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) { |
506 | bp->b_error = EACCES; |
507 | goto done; |
508 | } |
509 | |
510 | /* If it's a nil transfer, wake up the top half now. */ |
511 | if (bp->b_bcount == 0) { |
512 | goto done; |
513 | } |
514 | |
515 | /* |
516 | * Do bounds checking and adjust transfer. If there's an error, |
517 | * the bounds check will flag that for us. |
518 | */ |
519 | if (DISKPART(bp->b_dev) == RAW_PART) { |
520 | if (bounds_check_with_mediasize(bp, DEV_BSIZE, |
521 | vnd->sc_size) <= 0) |
522 | goto done; |
523 | } else { |
524 | if (bounds_check_with_label(&vnd->sc_dkdev, |
525 | bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0) |
526 | goto done; |
527 | } |
528 | |
529 | /* |
530 | * Put the block number in terms of the logical blocksize |
531 | * of the "device". |
532 | */ |
533 | |
534 | blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); |
535 | |
536 | /* |
537 | * Translate the partition-relative block number to an absolute. |
538 | */ |
539 | if (DISKPART(bp->b_dev) != RAW_PART) { |
540 | struct partition *pp; |
541 | |
542 | pp = &vnd->sc_dkdev.dk_label->d_partitions[ |
543 | DISKPART(bp->b_dev)]; |
544 | blkno += pp->p_offset; |
545 | } |
546 | bp->b_rawblkno = blkno; |
547 | |
548 | #ifdef DEBUG |
549 | if (vnddebug & VDB_FOLLOW) |
550 | printf("vndstrategy(%p): unit %d\n" , bp, unit); |
551 | #endif |
552 | if ((vnd->sc_flags & VNF_USE_VN_RDWR)) { |
553 | KASSERT(vnd->sc_pending >= 0 && |
554 | vnd->sc_pending <= VND_MAXPENDING(vnd)); |
555 | while (vnd->sc_pending == VND_MAXPENDING(vnd)) |
556 | tsleep(&vnd->sc_pending, PRIBIO, "vndpc" , 0); |
557 | vnd->sc_pending++; |
558 | } |
559 | bufq_put(vnd->sc_tab, bp); |
560 | wakeup(&vnd->sc_tab); |
561 | splx(s); |
562 | return; |
563 | |
564 | done: |
565 | bp->b_resid = bp->b_bcount; |
566 | biodone(bp); |
567 | splx(s); |
568 | } |
569 | |
570 | static bool |
571 | vnode_has_strategy(struct vnd_softc *vnd) |
572 | { |
573 | return vnode_has_op(vnd->sc_vp, VOFFSET(vop_bmap)) && |
574 | vnode_has_op(vnd->sc_vp, VOFFSET(vop_strategy)); |
575 | } |
576 | |
577 | static bool |
578 | vnode_has_large_blocks(struct vnd_softc *vnd) |
579 | { |
580 | u_int32_t vnd_secsize, mnt_secsize; |
581 | uint64_t numsec; |
582 | unsigned secsize; |
583 | |
584 | if (getdisksize(vnd->sc_vp, &numsec, &secsize)) |
585 | return true; |
586 | |
587 | vnd_secsize = vnd->sc_geom.vng_secsize; |
588 | mnt_secsize = secsize; |
589 | |
590 | return vnd_secsize % mnt_secsize != 0; |
591 | } |
592 | |
593 | /* XXX this function needs a reliable check to detect |
594 | * sparse files. Otherwise, bmap/strategy may be used |
595 | * and fail on non-allocated blocks. VOP_READ/VOP_WRITE |
596 | * works on sparse files. |
597 | */ |
598 | #if notyet |
599 | static bool |
600 | vnode_strategy_probe(struct vnd_softc *vnd) |
601 | { |
602 | int error; |
603 | daddr_t nbn; |
604 | |
605 | if (!vnode_has_strategy(vnd)) |
606 | return false; |
607 | |
608 | if (vnode_has_large_blocks(vnd)) |
609 | return false; |
610 | |
611 | /* Convert the first logical block number to its |
612 | * physical block number. |
613 | */ |
614 | error = 0; |
615 | vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); |
616 | error = VOP_BMAP(vnd->sc_vp, 0, NULL, &nbn, NULL); |
617 | VOP_UNLOCK(vnd->sc_vp); |
618 | |
619 | /* Test if that worked. */ |
620 | if (error == 0 && (long)nbn == -1) |
621 | return false; |
622 | |
623 | return true; |
624 | } |
625 | #endif |
626 | |
627 | static void |
628 | vndthread(void *arg) |
629 | { |
630 | struct vnd_softc *vnd = arg; |
631 | int s; |
632 | |
633 | /* Determine whether we can *use* VOP_BMAP and VOP_STRATEGY to |
634 | * directly access the backing vnode. If we can, use these two |
635 | * operations to avoid messing with the local buffer cache. |
636 | * Otherwise fall back to regular VOP_READ/VOP_WRITE operations |
637 | * which are guaranteed to work with any file system. */ |
638 | if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0 && |
639 | ! vnode_has_strategy(vnd)) |
640 | vnd->sc_flags |= VNF_USE_VN_RDWR; |
641 | |
642 | /* VOP_STRATEGY can only be used if the backing vnode allows |
643 | * to access blocks as small as defined by the vnd geometry. |
644 | */ |
645 | if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0 && |
646 | vnode_has_large_blocks(vnd)) |
647 | vnd->sc_flags |= VNF_USE_VN_RDWR; |
648 | |
649 | #ifdef DEBUG |
650 | if (vnddebug & VDB_INIT) |
651 | printf("vndthread: vp %p, %s\n" , vnd->sc_vp, |
652 | (vnd->sc_flags & VNF_USE_VN_RDWR) == 0 ? |
653 | "using bmap/strategy operations" : |
654 | "using read/write operations" ); |
655 | #endif |
656 | |
657 | s = splbio(); |
658 | vnd->sc_flags |= VNF_KTHREAD; |
659 | wakeup(&vnd->sc_kthread); |
660 | |
661 | /* |
662 | * Dequeue requests and serve them depending on the available |
663 | * vnode operations. |
664 | */ |
665 | while ((vnd->sc_flags & VNF_VUNCONF) == 0) { |
666 | struct vndxfer *vnx; |
667 | struct buf *obp; |
668 | struct buf *bp; |
669 | |
670 | obp = bufq_get(vnd->sc_tab); |
671 | if (obp == NULL) { |
672 | tsleep(&vnd->sc_tab, PRIBIO, "vndbp" , 0); |
673 | continue; |
674 | }; |
675 | if ((vnd->sc_flags & VNF_USE_VN_RDWR)) { |
676 | KASSERT(vnd->sc_pending > 0 && |
677 | vnd->sc_pending <= VND_MAXPENDING(vnd)); |
678 | if (vnd->sc_pending-- == VND_MAXPENDING(vnd)) |
679 | wakeup(&vnd->sc_pending); |
680 | } |
681 | splx(s); |
682 | #ifdef DEBUG |
683 | if (vnddebug & VDB_FOLLOW) |
684 | printf("vndthread(%p)\n" , obp); |
685 | #endif |
686 | |
687 | if (vnd->sc_vp->v_mount == NULL) { |
688 | obp->b_error = ENXIO; |
689 | goto done; |
690 | } |
691 | #ifdef VND_COMPRESSION |
692 | /* handle a compressed read */ |
693 | if ((obp->b_flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) { |
694 | off_t bn; |
695 | |
696 | /* Convert to a byte offset within the file. */ |
697 | bn = obp->b_rawblkno * |
698 | vnd->sc_dkdev.dk_label->d_secsize; |
699 | |
700 | compstrategy(obp, bn); |
701 | goto done; |
702 | } |
703 | #endif /* VND_COMPRESSION */ |
704 | |
705 | /* |
706 | * Allocate a header for this transfer and link it to the |
707 | * buffer |
708 | */ |
709 | s = splbio(); |
710 | vnx = VND_GETXFER(vnd); |
711 | splx(s); |
712 | vnx->vx_vnd = vnd; |
713 | |
714 | s = splbio(); |
715 | while (vnd->sc_active >= vnd->sc_maxactive) { |
716 | tsleep(&vnd->sc_tab, PRIBIO, "vndac" , 0); |
717 | } |
718 | vnd->sc_active++; |
719 | splx(s); |
720 | |
721 | /* Instrumentation. */ |
722 | disk_busy(&vnd->sc_dkdev); |
723 | |
724 | bp = &vnx->vx_buf; |
725 | buf_init(bp); |
726 | bp->b_flags = (obp->b_flags & B_READ); |
727 | bp->b_oflags = obp->b_oflags; |
728 | bp->b_cflags = obp->b_cflags; |
729 | bp->b_iodone = vndiodone; |
730 | bp->b_private = obp; |
731 | bp->b_vp = vnd->sc_vp; |
732 | bp->b_objlock = bp->b_vp->v_interlock; |
733 | bp->b_data = obp->b_data; |
734 | bp->b_bcount = obp->b_bcount; |
735 | BIO_COPYPRIO(bp, obp); |
736 | |
737 | /* Handle the request using the appropriate operations. */ |
738 | if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0) |
739 | handle_with_strategy(vnd, obp, bp); |
740 | else |
741 | handle_with_rdwr(vnd, obp, bp); |
742 | |
743 | s = splbio(); |
744 | continue; |
745 | |
746 | done: |
747 | biodone(obp); |
748 | s = splbio(); |
749 | } |
750 | |
751 | vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF); |
752 | wakeup(&vnd->sc_kthread); |
753 | splx(s); |
754 | kthread_exit(0); |
755 | } |
756 | |
757 | /* |
758 | * Checks if the given vnode supports the requested operation. |
759 | * The operation is specified the offset returned by VOFFSET. |
760 | * |
761 | * XXX The test below used to determine this is quite fragile |
762 | * because it relies on the file system to use genfs to specify |
763 | * unimplemented operations. There might be another way to do |
764 | * it more cleanly. |
765 | */ |
766 | static bool |
767 | vnode_has_op(const struct vnode *vp, int opoffset) |
768 | { |
769 | int (*defaultp)(void *); |
770 | int (*opp)(void *); |
771 | |
772 | defaultp = vp->v_op[VOFFSET(vop_default)]; |
773 | opp = vp->v_op[opoffset]; |
774 | |
775 | return opp != defaultp && opp != genfs_eopnotsupp && |
776 | opp != genfs_badop && opp != genfs_nullop; |
777 | } |
778 | |
779 | /* |
780 | * Handles the read/write request given in 'bp' using the vnode's VOP_READ |
781 | * and VOP_WRITE operations. |
782 | * |
783 | * 'obp' is a pointer to the original request fed to the vnd device. |
784 | */ |
785 | static void |
786 | handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp) |
787 | { |
788 | bool doread; |
789 | off_t offset; |
790 | size_t len, resid; |
791 | struct vnode *vp; |
792 | |
793 | doread = bp->b_flags & B_READ; |
794 | offset = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; |
795 | len = bp->b_bcount; |
796 | vp = vnd->sc_vp; |
797 | |
798 | #if defined(DEBUG) |
799 | if (vnddebug & VDB_IO) |
800 | printf("vnd (rdwr): vp %p, %s, rawblkno 0x%" PRIx64 |
801 | ", secsize %d, offset %" PRIu64 |
802 | ", bcount %d\n" , |
803 | vp, doread ? "read" : "write" , obp->b_rawblkno, |
804 | vnd->sc_dkdev.dk_label->d_secsize, offset, |
805 | bp->b_bcount); |
806 | #endif |
807 | |
808 | /* Issue the read or write operation. */ |
809 | bp->b_error = |
810 | vn_rdwr(doread ? UIO_READ : UIO_WRITE, |
811 | vp, bp->b_data, len, offset, UIO_SYSSPACE, |
812 | IO_ADV_ENCODE(POSIX_FADV_NOREUSE), vnd->sc_cred, &resid, NULL); |
813 | bp->b_resid = resid; |
814 | |
815 | mutex_enter(vp->v_interlock); |
816 | (void) VOP_PUTPAGES(vp, 0, 0, |
817 | PGO_ALLPAGES | PGO_CLEANIT | PGO_FREE | PGO_SYNCIO); |
818 | |
819 | /* We need to increase the number of outputs on the vnode if |
820 | * there was any write to it. */ |
821 | if (!doread) { |
822 | mutex_enter(vp->v_interlock); |
823 | vp->v_numoutput++; |
824 | mutex_exit(vp->v_interlock); |
825 | } |
826 | |
827 | biodone(bp); |
828 | } |
829 | |
830 | /* |
831 | * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP |
832 | * and VOP_STRATEGY operations. |
833 | * |
834 | * 'obp' is a pointer to the original request fed to the vnd device. |
835 | */ |
836 | static void |
837 | handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp, |
838 | struct buf *bp) |
839 | { |
840 | int bsize, error, flags, skipped; |
841 | size_t resid, sz; |
842 | off_t bn, offset; |
843 | struct vnode *vp; |
844 | struct buf *nbp = NULL; |
845 | |
846 | flags = obp->b_flags; |
847 | |
848 | |
849 | /* convert to a byte offset within the file. */ |
850 | bn = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; |
851 | |
852 | bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; |
853 | skipped = 0; |
854 | |
855 | /* |
856 | * Break the request into bsize pieces and feed them |
857 | * sequentially using VOP_BMAP/VOP_STRATEGY. |
858 | * We do it this way to keep from flooding NFS servers if we |
859 | * are connected to an NFS file. This places the burden on |
860 | * the client rather than the server. |
861 | */ |
862 | error = 0; |
863 | bp->b_resid = bp->b_bcount; |
864 | for (offset = 0, resid = bp->b_resid; /* true */; |
865 | resid -= sz, offset += sz) { |
866 | daddr_t nbn; |
867 | int off, nra; |
868 | |
869 | nra = 0; |
870 | vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); |
871 | error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); |
872 | VOP_UNLOCK(vnd->sc_vp); |
873 | |
874 | if (error == 0 && (long)nbn == -1) |
875 | error = EIO; |
876 | |
877 | /* |
878 | * If there was an error or a hole in the file...punt. |
879 | * Note that we may have to wait for any operations |
880 | * that we have already fired off before releasing |
881 | * the buffer. |
882 | * |
883 | * XXX we could deal with holes here but it would be |
884 | * a hassle (in the write case). |
885 | */ |
886 | if (error) { |
887 | skipped += resid; |
888 | break; |
889 | } |
890 | |
891 | #ifdef DEBUG |
892 | if (!dovndcluster) |
893 | nra = 0; |
894 | #endif |
895 | |
896 | off = bn % bsize; |
897 | sz = MIN(((off_t)1 + nra) * bsize - off, resid); |
898 | #ifdef DEBUG |
899 | if (vnddebug & VDB_IO) |
900 | printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 |
901 | " sz 0x%zx\n" , vnd->sc_vp, vp, (long long)bn, |
902 | nbn, sz); |
903 | #endif |
904 | |
905 | nbp = getiobuf(vp, true); |
906 | nestiobuf_setup(bp, nbp, offset, sz); |
907 | nbp->b_blkno = nbn + btodb(off); |
908 | |
909 | #if 0 /* XXX #ifdef DEBUG */ |
910 | if (vnddebug & VDB_IO) |
911 | printf("vndstart(%ld): bp %p vp %p blkno " |
912 | "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n" , |
913 | (long) (vnd-vnd_softc), &nbp->vb_buf, |
914 | nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno, |
915 | nbp->vb_buf.b_flags, nbp->vb_buf.b_data, |
916 | nbp->vb_buf.b_bcount); |
917 | #endif |
918 | if (resid == sz) { |
919 | break; |
920 | } |
921 | VOP_STRATEGY(vp, nbp); |
922 | bn += sz; |
923 | } |
924 | if (!(flags & B_READ)) { |
925 | struct vnode *w_vp; |
926 | /* |
927 | * this is the last nested buf, account for |
928 | * the parent buf write too. |
929 | * This has to be done last, so that |
930 | * fsync won't wait for this write which |
931 | * has no chance to complete before all nested bufs |
932 | * have been queued. But it has to be done |
933 | * before the last VOP_STRATEGY() |
934 | * or the call to nestiobuf_done(). |
935 | */ |
936 | w_vp = bp->b_vp; |
937 | mutex_enter(w_vp->v_interlock); |
938 | w_vp->v_numoutput++; |
939 | mutex_exit(w_vp->v_interlock); |
940 | } |
941 | KASSERT(skipped != 0 || nbp != NULL); |
942 | if (skipped) |
943 | nestiobuf_done(bp, skipped, error); |
944 | else |
945 | VOP_STRATEGY(vp, nbp); |
946 | } |
947 | |
948 | static void |
949 | vndiodone(struct buf *bp) |
950 | { |
951 | struct vndxfer *vnx = VND_BUFTOXFER(bp); |
952 | struct vnd_softc *vnd = vnx->vx_vnd; |
953 | struct buf *obp = bp->b_private; |
954 | int s = splbio(); |
955 | |
956 | KASSERT(&vnx->vx_buf == bp); |
957 | KASSERT(vnd->sc_active > 0); |
958 | #ifdef DEBUG |
959 | if (vnddebug & VDB_IO) { |
960 | printf("vndiodone1: bp %p iodone: error %d\n" , |
961 | bp, bp->b_error); |
962 | } |
963 | #endif |
964 | disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid, |
965 | (bp->b_flags & B_READ)); |
966 | vnd->sc_active--; |
967 | if (vnd->sc_active == 0) { |
968 | wakeup(&vnd->sc_tab); |
969 | } |
970 | splx(s); |
971 | obp->b_error = bp->b_error; |
972 | obp->b_resid = bp->b_resid; |
973 | buf_destroy(bp); |
974 | VND_PUTXFER(vnd, vnx); |
975 | biodone(obp); |
976 | } |
977 | |
978 | /* ARGSUSED */ |
979 | static int |
980 | vndread(dev_t dev, struct uio *uio, int flags) |
981 | { |
982 | int unit = vndunit(dev); |
983 | struct vnd_softc *sc; |
984 | |
985 | #ifdef DEBUG |
986 | if (vnddebug & VDB_FOLLOW) |
987 | printf("vndread(0x%" PRIx64", %p)\n" , dev, uio); |
988 | #endif |
989 | |
990 | sc = device_lookup_private(&vnd_cd, unit); |
991 | if (sc == NULL) |
992 | return ENXIO; |
993 | |
994 | if ((sc->sc_flags & VNF_INITED) == 0) |
995 | return ENXIO; |
996 | |
997 | return physio(vndstrategy, NULL, dev, B_READ, minphys, uio); |
998 | } |
999 | |
1000 | /* ARGSUSED */ |
1001 | static int |
1002 | vndwrite(dev_t dev, struct uio *uio, int flags) |
1003 | { |
1004 | int unit = vndunit(dev); |
1005 | struct vnd_softc *sc; |
1006 | |
1007 | #ifdef DEBUG |
1008 | if (vnddebug & VDB_FOLLOW) |
1009 | printf("vndwrite(0x%" PRIx64", %p)\n" , dev, uio); |
1010 | #endif |
1011 | |
1012 | sc = device_lookup_private(&vnd_cd, unit); |
1013 | if (sc == NULL) |
1014 | return ENXIO; |
1015 | |
1016 | if ((sc->sc_flags & VNF_INITED) == 0) |
1017 | return ENXIO; |
1018 | |
1019 | return physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio); |
1020 | } |
1021 | |
1022 | static int |
1023 | vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va) |
1024 | { |
1025 | int error; |
1026 | struct vnd_softc *vnd; |
1027 | |
1028 | if (*un == -1) |
1029 | *un = unit; |
1030 | if (*un < 0) |
1031 | return EINVAL; |
1032 | |
1033 | vnd = device_lookup_private(&vnd_cd, *un); |
1034 | if (vnd == NULL) |
1035 | return -1; |
1036 | |
1037 | if ((vnd->sc_flags & VNF_INITED) == 0) |
1038 | return -1; |
1039 | |
1040 | vn_lock(vnd->sc_vp, LK_SHARED | LK_RETRY); |
1041 | error = VOP_GETATTR(vnd->sc_vp, va, l->l_cred); |
1042 | VOP_UNLOCK(vnd->sc_vp); |
1043 | return error; |
1044 | } |
1045 | |
1046 | static int |
1047 | vnddoclear(struct vnd_softc *vnd, int pmask, int minor, bool force) |
1048 | { |
1049 | int error; |
1050 | |
1051 | if ((error = vndlock(vnd)) != 0) |
1052 | return error; |
1053 | |
1054 | /* |
1055 | * Don't unconfigure if any other partitions are open |
1056 | * or if both the character and block flavors of this |
1057 | * partition are open. |
1058 | */ |
1059 | if (DK_BUSY(vnd, pmask) && !force) { |
1060 | vndunlock(vnd); |
1061 | return EBUSY; |
1062 | } |
1063 | |
1064 | /* Delete all of our wedges */ |
1065 | dkwedge_delall(&vnd->sc_dkdev); |
1066 | |
1067 | /* |
1068 | * XXX vndclear() might call vndclose() implicitly; |
1069 | * release lock to avoid recursion |
1070 | * |
1071 | * Set VNF_CLEARING to prevent vndopen() from |
1072 | * sneaking in after we vndunlock(). |
1073 | */ |
1074 | vnd->sc_flags |= VNF_CLEARING; |
1075 | vndunlock(vnd); |
1076 | vndclear(vnd, minor); |
1077 | #ifdef DEBUG |
1078 | if (vnddebug & VDB_INIT) |
1079 | printf("%s: CLRed\n" , __func__); |
1080 | #endif |
1081 | |
1082 | /* Destroy the xfer and buffer pools. */ |
1083 | pool_destroy(&vnd->sc_vxpool); |
1084 | |
1085 | /* Detach the disk. */ |
1086 | disk_detach(&vnd->sc_dkdev); |
1087 | |
1088 | return 0; |
1089 | } |
1090 | |
1091 | static int |
1092 | vndioctl_get(struct lwp *l, void *data, int unit, struct vattr *va) |
1093 | { |
1094 | int error; |
1095 | |
1096 | KASSERT(l); |
1097 | |
1098 | /* the first member is always int vnd_unit in all the versions */ |
1099 | if (*(int *)data >= vnd_cd.cd_ndevs) |
1100 | return ENXIO; |
1101 | |
1102 | switch (error = vnd_cget(l, unit, (int *)data, va)) { |
1103 | case -1: |
1104 | /* unused is not an error */ |
1105 | memset(va, 0, sizeof(*va)); |
1106 | /*FALLTHROUGH*/ |
1107 | case 0: |
1108 | return 0; |
1109 | default: |
1110 | return error; |
1111 | } |
1112 | } |
1113 | |
1114 | /* ARGSUSED */ |
1115 | static int |
1116 | vndioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) |
1117 | { |
1118 | bool force; |
1119 | int unit = vndunit(dev); |
1120 | struct vnd_softc *vnd; |
1121 | struct vnd_ioctl *vio; |
1122 | struct vattr vattr; |
1123 | struct pathbuf *pb; |
1124 | struct nameidata nd; |
1125 | int error, part, pmask; |
1126 | uint64_t geomsize; |
1127 | int fflags; |
1128 | #ifdef __HAVE_OLD_DISKLABEL |
1129 | struct disklabel newlabel; |
1130 | #endif |
1131 | |
1132 | #ifdef DEBUG |
1133 | if (vnddebug & VDB_FOLLOW) |
1134 | printf("vndioctl(0x%" PRIx64", 0x%lx, %p, 0x%x, %p): unit %d\n" , |
1135 | dev, cmd, data, flag, l->l_proc, unit); |
1136 | #endif |
1137 | /* Do the get's first; they don't need initialization or verification */ |
1138 | switch (cmd) { |
1139 | #ifdef COMPAT_30 |
1140 | case VNDIOCGET30: { |
1141 | if ((error = vndioctl_get(l, data, unit, &vattr)) != 0) |
1142 | return error; |
1143 | |
1144 | struct vnd_user30 *vnu = data; |
1145 | vnu->vnu_dev = vattr.va_fsid; |
1146 | vnu->vnu_ino = vattr.va_fileid; |
1147 | return 0; |
1148 | } |
1149 | #endif |
1150 | #ifdef COMPAT_50 |
1151 | case VNDIOCGET50: { |
1152 | if ((error = vndioctl_get(l, data, unit, &vattr)) != 0) |
1153 | return error; |
1154 | |
1155 | struct vnd_user50 *vnu = data; |
1156 | vnu->vnu_dev = vattr.va_fsid; |
1157 | vnu->vnu_ino = vattr.va_fileid; |
1158 | return 0; |
1159 | } |
1160 | #endif |
1161 | |
1162 | case VNDIOCGET: { |
1163 | if ((error = vndioctl_get(l, data, unit, &vattr)) != 0) |
1164 | return error; |
1165 | |
1166 | struct vnd_user *vnu = data; |
1167 | vnu->vnu_dev = vattr.va_fsid; |
1168 | vnu->vnu_ino = vattr.va_fileid; |
1169 | return 0; |
1170 | } |
1171 | default: |
1172 | break; |
1173 | } |
1174 | |
1175 | vnd = device_lookup_private(&vnd_cd, unit); |
1176 | if (vnd == NULL) |
1177 | return ENXIO; |
1178 | vio = (struct vnd_ioctl *)data; |
1179 | |
1180 | /* Must be open for writes for these commands... */ |
1181 | switch (cmd) { |
1182 | case VNDIOCSET: |
1183 | case VNDIOCCLR: |
1184 | #ifdef COMPAT_50 |
1185 | case VNDIOCSET50: |
1186 | case VNDIOCCLR50: |
1187 | #endif |
1188 | case DIOCSDINFO: |
1189 | case DIOCWDINFO: |
1190 | #ifdef __HAVE_OLD_DISKLABEL |
1191 | case ODIOCSDINFO: |
1192 | case ODIOCWDINFO: |
1193 | #endif |
1194 | case DIOCKLABEL: |
1195 | case DIOCWLABEL: |
1196 | if ((flag & FWRITE) == 0) |
1197 | return EBADF; |
1198 | } |
1199 | |
1200 | /* Must be initialized for these... */ |
1201 | switch (cmd) { |
1202 | case VNDIOCCLR: |
1203 | #ifdef VNDIOCCLR50 |
1204 | case VNDIOCCLR50: |
1205 | #endif |
1206 | case DIOCGDINFO: |
1207 | case DIOCSDINFO: |
1208 | case DIOCWDINFO: |
1209 | case DIOCGPARTINFO: |
1210 | case DIOCKLABEL: |
1211 | case DIOCWLABEL: |
1212 | case DIOCGDEFLABEL: |
1213 | case DIOCCACHESYNC: |
1214 | #ifdef __HAVE_OLD_DISKLABEL |
1215 | case ODIOCGDINFO: |
1216 | case ODIOCSDINFO: |
1217 | case ODIOCWDINFO: |
1218 | case ODIOCGDEFLABEL: |
1219 | #endif |
1220 | if ((vnd->sc_flags & VNF_INITED) == 0) |
1221 | return ENXIO; |
1222 | } |
1223 | |
1224 | error = disk_ioctl(&vnd->sc_dkdev, dev, cmd, data, flag, l); |
1225 | if (error != EPASSTHROUGH) |
1226 | return error; |
1227 | |
1228 | |
1229 | switch (cmd) { |
1230 | #ifdef VNDIOCSET50 |
1231 | case VNDIOCSET50: |
1232 | #endif |
1233 | case VNDIOCSET: |
1234 | if (vnd->sc_flags & VNF_INITED) |
1235 | return EBUSY; |
1236 | |
1237 | if ((error = vndlock(vnd)) != 0) |
1238 | return error; |
1239 | |
1240 | fflags = FREAD; |
1241 | if ((vio->vnd_flags & VNDIOF_READONLY) == 0) |
1242 | fflags |= FWRITE; |
1243 | error = pathbuf_copyin(vio->vnd_file, &pb); |
1244 | if (error) { |
1245 | goto unlock_and_exit; |
1246 | } |
1247 | NDINIT(&nd, LOOKUP, FOLLOW, pb); |
1248 | if ((error = vn_open(&nd, fflags, 0)) != 0) { |
1249 | pathbuf_destroy(pb); |
1250 | goto unlock_and_exit; |
1251 | } |
1252 | KASSERT(l); |
1253 | error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_cred); |
1254 | if (!error && nd.ni_vp->v_type != VREG) |
1255 | error = EOPNOTSUPP; |
1256 | if (!error && vattr.va_bytes < vattr.va_size) |
1257 | /* File is definitely sparse, use vn_rdwr() */ |
1258 | vnd->sc_flags |= VNF_USE_VN_RDWR; |
1259 | if (error) { |
1260 | VOP_UNLOCK(nd.ni_vp); |
1261 | goto close_and_exit; |
1262 | } |
1263 | |
1264 | /* If using a compressed file, initialize its info */ |
1265 | /* (or abort with an error if kernel has no compression) */ |
1266 | if (vio->vnd_flags & VNF_COMP) { |
1267 | #ifdef VND_COMPRESSION |
1268 | struct vnd_comp_header *ch; |
1269 | int i; |
1270 | u_int32_t comp_size; |
1271 | u_int32_t comp_maxsize; |
1272 | |
1273 | /* allocate space for compresed file header */ |
1274 | ch = malloc(sizeof(struct vnd_comp_header), |
1275 | M_TEMP, M_WAITOK); |
1276 | |
1277 | /* read compressed file header */ |
1278 | error = vn_rdwr(UIO_READ, nd.ni_vp, (void *)ch, |
1279 | sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE, |
1280 | IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); |
1281 | if (error) { |
1282 | free(ch, M_TEMP); |
1283 | VOP_UNLOCK(nd.ni_vp); |
1284 | goto close_and_exit; |
1285 | } |
1286 | |
1287 | /* save some header info */ |
1288 | vnd->sc_comp_blksz = ntohl(ch->block_size); |
1289 | /* note last offset is the file byte size */ |
1290 | vnd->sc_comp_numoffs = ntohl(ch->num_blocks)+1; |
1291 | free(ch, M_TEMP); |
1292 | if (!DK_DEV_BSIZE_OK(vnd->sc_comp_blksz)) { |
1293 | VOP_UNLOCK(nd.ni_vp); |
1294 | error = EINVAL; |
1295 | goto close_and_exit; |
1296 | } |
1297 | if (sizeof(struct vnd_comp_header) + |
1298 | sizeof(u_int64_t) * vnd->sc_comp_numoffs > |
1299 | vattr.va_size) { |
1300 | VOP_UNLOCK(nd.ni_vp); |
1301 | error = EINVAL; |
1302 | goto close_and_exit; |
1303 | } |
1304 | |
1305 | /* set decompressed file size */ |
1306 | vattr.va_size = |
1307 | ((u_quad_t)vnd->sc_comp_numoffs - 1) * |
1308 | (u_quad_t)vnd->sc_comp_blksz; |
1309 | |
1310 | /* allocate space for all the compressed offsets */ |
1311 | vnd->sc_comp_offsets = |
1312 | malloc(sizeof(u_int64_t) * vnd->sc_comp_numoffs, |
1313 | M_DEVBUF, M_WAITOK); |
1314 | |
1315 | /* read in the offsets */ |
1316 | error = vn_rdwr(UIO_READ, nd.ni_vp, |
1317 | (void *)vnd->sc_comp_offsets, |
1318 | sizeof(u_int64_t) * vnd->sc_comp_numoffs, |
1319 | sizeof(struct vnd_comp_header), UIO_SYSSPACE, |
1320 | IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); |
1321 | if (error) { |
1322 | VOP_UNLOCK(nd.ni_vp); |
1323 | goto close_and_exit; |
1324 | } |
1325 | /* |
1326 | * find largest block size (used for allocation limit). |
1327 | * Also convert offset to native byte order. |
1328 | */ |
1329 | comp_maxsize = 0; |
1330 | for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) { |
1331 | vnd->sc_comp_offsets[i] = |
1332 | be64toh(vnd->sc_comp_offsets[i]); |
1333 | comp_size = be64toh(vnd->sc_comp_offsets[i + 1]) |
1334 | - vnd->sc_comp_offsets[i]; |
1335 | if (comp_size > comp_maxsize) |
1336 | comp_maxsize = comp_size; |
1337 | } |
1338 | vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] = |
1339 | be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1]); |
1340 | |
1341 | /* create compressed data buffer */ |
1342 | vnd->sc_comp_buff = malloc(comp_maxsize, |
1343 | M_DEVBUF, M_WAITOK); |
1344 | |
1345 | /* create decompressed buffer */ |
1346 | vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz, |
1347 | M_DEVBUF, M_WAITOK); |
1348 | vnd->sc_comp_buffblk = -1; |
1349 | |
1350 | /* Initialize decompress stream */ |
1351 | memset(&vnd->sc_comp_stream, 0, sizeof(z_stream)); |
1352 | vnd->sc_comp_stream.zalloc = vnd_alloc; |
1353 | vnd->sc_comp_stream.zfree = vnd_free; |
1354 | error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS); |
1355 | if (error) { |
1356 | if (vnd->sc_comp_stream.msg) |
1357 | printf("vnd%d: compressed file, %s\n" , |
1358 | unit, vnd->sc_comp_stream.msg); |
1359 | VOP_UNLOCK(nd.ni_vp); |
1360 | error = EINVAL; |
1361 | goto close_and_exit; |
1362 | } |
1363 | |
1364 | vnd->sc_flags |= VNF_COMP | VNF_READONLY; |
1365 | #else /* !VND_COMPRESSION */ |
1366 | VOP_UNLOCK(nd.ni_vp); |
1367 | error = EOPNOTSUPP; |
1368 | goto close_and_exit; |
1369 | #endif /* VND_COMPRESSION */ |
1370 | } |
1371 | |
1372 | VOP_UNLOCK(nd.ni_vp); |
1373 | vnd->sc_vp = nd.ni_vp; |
1374 | vnd->sc_size = btodb(vattr.va_size); /* note truncation */ |
1375 | |
1376 | /* |
1377 | * Use pseudo-geometry specified. If none was provided, |
1378 | * use "standard" Adaptec fictitious geometry. |
1379 | */ |
1380 | if (vio->vnd_flags & VNDIOF_HASGEOM) { |
1381 | |
1382 | memcpy(&vnd->sc_geom, &vio->vnd_geom, |
1383 | sizeof(vio->vnd_geom)); |
1384 | |
1385 | /* |
1386 | * Sanity-check the sector size. |
1387 | */ |
1388 | if (!DK_DEV_BSIZE_OK(vnd->sc_geom.vng_secsize) || |
1389 | vnd->sc_geom.vng_ncylinders == 0 || |
1390 | vnd->sc_geom.vng_ntracks == 0 || |
1391 | vnd->sc_geom.vng_nsectors == 0) { |
1392 | error = EINVAL; |
1393 | goto close_and_exit; |
1394 | } |
1395 | |
1396 | /* |
1397 | * Compute the size (in DEV_BSIZE blocks) specified |
1398 | * by the geometry. |
1399 | */ |
1400 | geomsize = (int64_t)vnd->sc_geom.vng_nsectors * |
1401 | vnd->sc_geom.vng_ntracks * |
1402 | vnd->sc_geom.vng_ncylinders * |
1403 | (vnd->sc_geom.vng_secsize / DEV_BSIZE); |
1404 | |
1405 | /* |
1406 | * Sanity-check the size against the specified |
1407 | * geometry. |
1408 | */ |
1409 | if (vnd->sc_size < geomsize) { |
1410 | error = EINVAL; |
1411 | goto close_and_exit; |
1412 | } |
1413 | } else if (vnd->sc_size >= (32 * 64)) { |
1414 | /* |
1415 | * Size must be at least 2048 DEV_BSIZE blocks |
1416 | * (1M) in order to use this geometry. |
1417 | */ |
1418 | vnd->sc_geom.vng_secsize = DEV_BSIZE; |
1419 | vnd->sc_geom.vng_nsectors = 32; |
1420 | vnd->sc_geom.vng_ntracks = 64; |
1421 | vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); |
1422 | } else { |
1423 | vnd->sc_geom.vng_secsize = DEV_BSIZE; |
1424 | vnd->sc_geom.vng_nsectors = 1; |
1425 | vnd->sc_geom.vng_ntracks = 1; |
1426 | vnd->sc_geom.vng_ncylinders = vnd->sc_size; |
1427 | } |
1428 | |
1429 | vnd_set_geometry(vnd); |
1430 | |
1431 | if (vio->vnd_flags & VNDIOF_READONLY) { |
1432 | vnd->sc_flags |= VNF_READONLY; |
1433 | } |
1434 | |
1435 | if ((error = vndsetcred(vnd, l->l_cred)) != 0) |
1436 | goto close_and_exit; |
1437 | |
1438 | vndthrottle(vnd, vnd->sc_vp); |
1439 | vio->vnd_osize = dbtob(vnd->sc_size); |
1440 | #ifdef VNDIOCSET50 |
1441 | if (cmd != VNDIOCSET50) |
1442 | #endif |
1443 | vio->vnd_size = dbtob(vnd->sc_size); |
1444 | vnd->sc_flags |= VNF_INITED; |
1445 | |
1446 | /* create the kernel thread, wait for it to be up */ |
1447 | error = kthread_create(PRI_NONE, 0, NULL, vndthread, vnd, |
1448 | &vnd->sc_kthread, "%s" , device_xname(vnd->sc_dev)); |
1449 | if (error) |
1450 | goto close_and_exit; |
1451 | while ((vnd->sc_flags & VNF_KTHREAD) == 0) { |
1452 | tsleep(&vnd->sc_kthread, PRIBIO, "vndthr" , 0); |
1453 | } |
1454 | #ifdef DEBUG |
1455 | if (vnddebug & VDB_INIT) |
1456 | printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n" , |
1457 | vnd->sc_vp, (unsigned long) vnd->sc_size, |
1458 | vnd->sc_geom.vng_secsize, |
1459 | vnd->sc_geom.vng_nsectors, |
1460 | vnd->sc_geom.vng_ntracks, |
1461 | vnd->sc_geom.vng_ncylinders); |
1462 | #endif |
1463 | |
1464 | /* Attach the disk. */ |
1465 | disk_attach(&vnd->sc_dkdev); |
1466 | |
1467 | /* Initialize the xfer and buffer pools. */ |
1468 | pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, |
1469 | 0, 0, "vndxpl" , NULL, IPL_BIO); |
1470 | |
1471 | vndunlock(vnd); |
1472 | |
1473 | pathbuf_destroy(pb); |
1474 | |
1475 | /* Discover wedges on this disk */ |
1476 | dkwedge_discover(&vnd->sc_dkdev); |
1477 | |
1478 | break; |
1479 | |
1480 | close_and_exit: |
1481 | (void) vn_close(nd.ni_vp, fflags, l->l_cred); |
1482 | pathbuf_destroy(pb); |
1483 | unlock_and_exit: |
1484 | #ifdef VND_COMPRESSION |
1485 | /* free any allocated memory (for compressed file) */ |
1486 | if (vnd->sc_comp_offsets) { |
1487 | free(vnd->sc_comp_offsets, M_DEVBUF); |
1488 | vnd->sc_comp_offsets = NULL; |
1489 | } |
1490 | if (vnd->sc_comp_buff) { |
1491 | free(vnd->sc_comp_buff, M_DEVBUF); |
1492 | vnd->sc_comp_buff = NULL; |
1493 | } |
1494 | if (vnd->sc_comp_decombuf) { |
1495 | free(vnd->sc_comp_decombuf, M_DEVBUF); |
1496 | vnd->sc_comp_decombuf = NULL; |
1497 | } |
1498 | #endif /* VND_COMPRESSION */ |
1499 | vndunlock(vnd); |
1500 | return error; |
1501 | |
1502 | #ifdef VNDIOCCLR50 |
1503 | case VNDIOCCLR50: |
1504 | #endif |
1505 | case VNDIOCCLR: |
1506 | part = DISKPART(dev); |
1507 | pmask = (1 << part); |
1508 | force = (vio->vnd_flags & VNDIOF_FORCE) != 0; |
1509 | |
1510 | if ((error = vnddoclear(vnd, pmask, minor(dev), force)) != 0) |
1511 | return error; |
1512 | |
1513 | break; |
1514 | |
1515 | |
1516 | case DIOCWDINFO: |
1517 | case DIOCSDINFO: |
1518 | #ifdef __HAVE_OLD_DISKLABEL |
1519 | case ODIOCWDINFO: |
1520 | case ODIOCSDINFO: |
1521 | #endif |
1522 | { |
1523 | struct disklabel *lp; |
1524 | |
1525 | if ((error = vndlock(vnd)) != 0) |
1526 | return error; |
1527 | |
1528 | vnd->sc_flags |= VNF_LABELLING; |
1529 | |
1530 | #ifdef __HAVE_OLD_DISKLABEL |
1531 | if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { |
1532 | memset(&newlabel, 0, sizeof newlabel); |
1533 | memcpy(&newlabel, data, sizeof (struct olddisklabel)); |
1534 | lp = &newlabel; |
1535 | } else |
1536 | #endif |
1537 | lp = (struct disklabel *)data; |
1538 | |
1539 | error = setdisklabel(vnd->sc_dkdev.dk_label, |
1540 | lp, 0, vnd->sc_dkdev.dk_cpulabel); |
1541 | if (error == 0) { |
1542 | if (cmd == DIOCWDINFO |
1543 | #ifdef __HAVE_OLD_DISKLABEL |
1544 | || cmd == ODIOCWDINFO |
1545 | #endif |
1546 | ) |
1547 | error = writedisklabel(VNDLABELDEV(dev), |
1548 | vndstrategy, vnd->sc_dkdev.dk_label, |
1549 | vnd->sc_dkdev.dk_cpulabel); |
1550 | } |
1551 | |
1552 | vnd->sc_flags &= ~VNF_LABELLING; |
1553 | |
1554 | vndunlock(vnd); |
1555 | |
1556 | if (error) |
1557 | return error; |
1558 | break; |
1559 | } |
1560 | |
1561 | case DIOCKLABEL: |
1562 | if (*(int *)data != 0) |
1563 | vnd->sc_flags |= VNF_KLABEL; |
1564 | else |
1565 | vnd->sc_flags &= ~VNF_KLABEL; |
1566 | break; |
1567 | |
1568 | case DIOCWLABEL: |
1569 | if (*(int *)data != 0) |
1570 | vnd->sc_flags |= VNF_WLABEL; |
1571 | else |
1572 | vnd->sc_flags &= ~VNF_WLABEL; |
1573 | break; |
1574 | |
1575 | case DIOCGDEFLABEL: |
1576 | vndgetdefaultlabel(vnd, (struct disklabel *)data); |
1577 | break; |
1578 | |
1579 | #ifdef __HAVE_OLD_DISKLABEL |
1580 | case ODIOCGDEFLABEL: |
1581 | vndgetdefaultlabel(vnd, &newlabel); |
1582 | if (newlabel.d_npartitions > OLDMAXPARTITIONS) |
1583 | return ENOTTY; |
1584 | memcpy(data, &newlabel, sizeof (struct olddisklabel)); |
1585 | break; |
1586 | #endif |
1587 | |
1588 | case DIOCCACHESYNC: |
1589 | vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); |
1590 | error = VOP_FSYNC(vnd->sc_vp, vnd->sc_cred, |
1591 | FSYNC_WAIT | FSYNC_DATAONLY | FSYNC_CACHE, 0, 0); |
1592 | VOP_UNLOCK(vnd->sc_vp); |
1593 | return error; |
1594 | |
1595 | default: |
1596 | return ENOTTY; |
1597 | } |
1598 | |
1599 | return 0; |
1600 | } |
1601 | |
1602 | /* |
1603 | * Duplicate the current processes' credentials. Since we are called only |
1604 | * as the result of a SET ioctl and only root can do that, any future access |
1605 | * to this "disk" is essentially as root. Note that credentials may change |
1606 | * if some other uid can write directly to the mapped file (NFS). |
1607 | */ |
1608 | static int |
1609 | vndsetcred(struct vnd_softc *vnd, kauth_cred_t cred) |
1610 | { |
1611 | struct uio auio; |
1612 | struct iovec aiov; |
1613 | char *tmpbuf; |
1614 | int error; |
1615 | |
1616 | vnd->sc_cred = kauth_cred_dup(cred); |
1617 | tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); |
1618 | |
1619 | /* XXX: Horrible kludge to establish credentials for NFS */ |
1620 | aiov.iov_base = tmpbuf; |
1621 | aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); |
1622 | auio.uio_iov = &aiov; |
1623 | auio.uio_iovcnt = 1; |
1624 | auio.uio_offset = 0; |
1625 | auio.uio_rw = UIO_READ; |
1626 | auio.uio_resid = aiov.iov_len; |
1627 | UIO_SETUP_SYSSPACE(&auio); |
1628 | vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); |
1629 | error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); |
1630 | if (error == 0) { |
1631 | /* |
1632 | * Because vnd does all IO directly through the vnode |
1633 | * we need to flush (at least) the buffer from the above |
1634 | * VOP_READ from the buffer cache to prevent cache |
1635 | * incoherencies. Also, be careful to write dirty |
1636 | * buffers back to stable storage. |
1637 | */ |
1638 | error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, |
1639 | curlwp, 0, 0); |
1640 | } |
1641 | VOP_UNLOCK(vnd->sc_vp); |
1642 | |
1643 | free(tmpbuf, M_TEMP); |
1644 | return error; |
1645 | } |
1646 | |
1647 | /* |
1648 | * Set maxactive based on FS type |
1649 | */ |
1650 | static void |
1651 | vndthrottle(struct vnd_softc *vnd, struct vnode *vp) |
1652 | { |
1653 | |
1654 | if (vp->v_tag == VT_NFS) |
1655 | vnd->sc_maxactive = 2; |
1656 | else |
1657 | vnd->sc_maxactive = 8; |
1658 | |
1659 | if (vnd->sc_maxactive < 1) |
1660 | vnd->sc_maxactive = 1; |
1661 | } |
1662 | |
1663 | #if 0 |
1664 | static void |
1665 | vndshutdown(void) |
1666 | { |
1667 | struct vnd_softc *vnd; |
1668 | |
1669 | for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) |
1670 | if (vnd->sc_flags & VNF_INITED) |
1671 | vndclear(vnd); |
1672 | } |
1673 | #endif |
1674 | |
1675 | static void |
1676 | vndclear(struct vnd_softc *vnd, int myminor) |
1677 | { |
1678 | struct vnode *vp = vnd->sc_vp; |
1679 | int fflags = FREAD; |
1680 | int bmaj, cmaj, i, mn; |
1681 | int s; |
1682 | |
1683 | #ifdef DEBUG |
1684 | if (vnddebug & VDB_FOLLOW) |
1685 | printf("vndclear(%p): vp %p\n" , vnd, vp); |
1686 | #endif |
1687 | /* locate the major number */ |
1688 | bmaj = bdevsw_lookup_major(&vnd_bdevsw); |
1689 | cmaj = cdevsw_lookup_major(&vnd_cdevsw); |
1690 | |
1691 | /* Nuke the vnodes for any open instances */ |
1692 | for (i = 0; i < MAXPARTITIONS; i++) { |
1693 | mn = DISKMINOR(device_unit(vnd->sc_dev), i); |
1694 | vdevgone(bmaj, mn, mn, VBLK); |
1695 | if (mn != myminor) /* XXX avoid to kill own vnode */ |
1696 | vdevgone(cmaj, mn, mn, VCHR); |
1697 | } |
1698 | |
1699 | if ((vnd->sc_flags & VNF_READONLY) == 0) |
1700 | fflags |= FWRITE; |
1701 | |
1702 | s = splbio(); |
1703 | bufq_drain(vnd->sc_tab); |
1704 | splx(s); |
1705 | |
1706 | vnd->sc_flags |= VNF_VUNCONF; |
1707 | wakeup(&vnd->sc_tab); |
1708 | while (vnd->sc_flags & VNF_KTHREAD) |
1709 | tsleep(&vnd->sc_kthread, PRIBIO, "vnthr" , 0); |
1710 | |
1711 | #ifdef VND_COMPRESSION |
1712 | /* free the compressed file buffers */ |
1713 | if (vnd->sc_flags & VNF_COMP) { |
1714 | if (vnd->sc_comp_offsets) { |
1715 | free(vnd->sc_comp_offsets, M_DEVBUF); |
1716 | vnd->sc_comp_offsets = NULL; |
1717 | } |
1718 | if (vnd->sc_comp_buff) { |
1719 | free(vnd->sc_comp_buff, M_DEVBUF); |
1720 | vnd->sc_comp_buff = NULL; |
1721 | } |
1722 | if (vnd->sc_comp_decombuf) { |
1723 | free(vnd->sc_comp_decombuf, M_DEVBUF); |
1724 | vnd->sc_comp_decombuf = NULL; |
1725 | } |
1726 | } |
1727 | #endif /* VND_COMPRESSION */ |
1728 | vnd->sc_flags &= |
1729 | ~(VNF_INITED | VNF_READONLY | VNF_KLABEL | VNF_VLABEL |
1730 | | VNF_VUNCONF | VNF_COMP | VNF_CLEARING); |
1731 | if (vp == NULL) |
1732 | panic("vndclear: null vp" ); |
1733 | (void) vn_close(vp, fflags, vnd->sc_cred); |
1734 | kauth_cred_free(vnd->sc_cred); |
1735 | vnd->sc_vp = NULL; |
1736 | vnd->sc_cred = NULL; |
1737 | vnd->sc_size = 0; |
1738 | } |
1739 | |
1740 | static int |
1741 | vndsize(dev_t dev) |
1742 | { |
1743 | struct vnd_softc *sc; |
1744 | struct disklabel *lp; |
1745 | int part, unit, omask; |
1746 | int size; |
1747 | |
1748 | unit = vndunit(dev); |
1749 | sc = device_lookup_private(&vnd_cd, unit); |
1750 | if (sc == NULL) |
1751 | return -1; |
1752 | |
1753 | if ((sc->sc_flags & VNF_INITED) == 0) |
1754 | return -1; |
1755 | |
1756 | part = DISKPART(dev); |
1757 | omask = sc->sc_dkdev.dk_openmask & (1 << part); |
1758 | lp = sc->sc_dkdev.dk_label; |
1759 | |
1760 | if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp)) /* XXX */ |
1761 | return -1; |
1762 | |
1763 | if (lp->d_partitions[part].p_fstype != FS_SWAP) |
1764 | size = -1; |
1765 | else |
1766 | size = lp->d_partitions[part].p_size * |
1767 | (lp->d_secsize / DEV_BSIZE); |
1768 | |
1769 | if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp)) /* XXX */ |
1770 | return -1; |
1771 | |
1772 | return size; |
1773 | } |
1774 | |
1775 | static int |
1776 | vnddump(dev_t dev, daddr_t blkno, void *va, |
1777 | size_t size) |
1778 | { |
1779 | |
1780 | /* Not implemented. */ |
1781 | return ENXIO; |
1782 | } |
1783 | |
1784 | static void |
1785 | vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp) |
1786 | { |
1787 | struct vndgeom *vng = &sc->sc_geom; |
1788 | struct partition *pp; |
1789 | unsigned spb; |
1790 | |
1791 | memset(lp, 0, sizeof(*lp)); |
1792 | |
1793 | spb = vng->vng_secsize / DEV_BSIZE; |
1794 | if (sc->sc_size / spb > UINT32_MAX) |
1795 | lp->d_secperunit = UINT32_MAX; |
1796 | else |
1797 | lp->d_secperunit = sc->sc_size / spb; |
1798 | lp->d_secsize = vng->vng_secsize; |
1799 | lp->d_nsectors = vng->vng_nsectors; |
1800 | lp->d_ntracks = vng->vng_ntracks; |
1801 | lp->d_ncylinders = vng->vng_ncylinders; |
1802 | lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; |
1803 | |
1804 | strncpy(lp->d_typename, "vnd" , sizeof(lp->d_typename)); |
1805 | lp->d_type = DKTYPE_VND; |
1806 | strncpy(lp->d_packname, "fictitious" , sizeof(lp->d_packname)); |
1807 | lp->d_rpm = 3600; |
1808 | lp->d_interleave = 1; |
1809 | lp->d_flags = 0; |
1810 | |
1811 | pp = &lp->d_partitions[RAW_PART]; |
1812 | pp->p_offset = 0; |
1813 | pp->p_size = lp->d_secperunit; |
1814 | pp->p_fstype = FS_UNUSED; |
1815 | lp->d_npartitions = RAW_PART + 1; |
1816 | |
1817 | lp->d_magic = DISKMAGIC; |
1818 | lp->d_magic2 = DISKMAGIC; |
1819 | lp->d_checksum = dkcksum(lp); |
1820 | } |
1821 | |
1822 | /* |
1823 | * Read the disklabel from a vnd. If one is not present, create a fake one. |
1824 | */ |
1825 | static void |
1826 | vndgetdisklabel(dev_t dev, struct vnd_softc *sc) |
1827 | { |
1828 | const char *errstring; |
1829 | struct disklabel *lp = sc->sc_dkdev.dk_label; |
1830 | struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; |
1831 | int i; |
1832 | |
1833 | memset(clp, 0, sizeof(*clp)); |
1834 | |
1835 | vndgetdefaultlabel(sc, lp); |
1836 | |
1837 | /* |
1838 | * Call the generic disklabel extraction routine. |
1839 | */ |
1840 | errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); |
1841 | if (errstring) { |
1842 | /* |
1843 | * Lack of disklabel is common, but we print the warning |
1844 | * anyway, since it might contain other useful information. |
1845 | */ |
1846 | aprint_normal_dev(sc->sc_dev, "%s\n" , errstring); |
1847 | |
1848 | /* |
1849 | * For historical reasons, if there's no disklabel |
1850 | * present, all partitions must be FS_BSDFFS and |
1851 | * occupy the entire disk. |
1852 | */ |
1853 | for (i = 0; i < MAXPARTITIONS; i++) { |
1854 | /* |
1855 | * Don't wipe out port specific hack (such as |
1856 | * dos partition hack of i386 port). |
1857 | */ |
1858 | if (lp->d_partitions[i].p_size != 0) |
1859 | continue; |
1860 | |
1861 | lp->d_partitions[i].p_size = lp->d_secperunit; |
1862 | lp->d_partitions[i].p_offset = 0; |
1863 | lp->d_partitions[i].p_fstype = FS_BSDFFS; |
1864 | } |
1865 | |
1866 | strncpy(lp->d_packname, "default label" , |
1867 | sizeof(lp->d_packname)); |
1868 | |
1869 | lp->d_npartitions = MAXPARTITIONS; |
1870 | lp->d_checksum = dkcksum(lp); |
1871 | } |
1872 | } |
1873 | |
1874 | /* |
1875 | * Wait interruptibly for an exclusive lock. |
1876 | * |
1877 | * XXX |
1878 | * Several drivers do this; it should be abstracted and made MP-safe. |
1879 | */ |
1880 | static int |
1881 | vndlock(struct vnd_softc *sc) |
1882 | { |
1883 | int error; |
1884 | |
1885 | while ((sc->sc_flags & VNF_LOCKED) != 0) { |
1886 | sc->sc_flags |= VNF_WANTED; |
1887 | if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck" , 0)) != 0) |
1888 | return error; |
1889 | } |
1890 | sc->sc_flags |= VNF_LOCKED; |
1891 | return 0; |
1892 | } |
1893 | |
1894 | /* |
1895 | * Unlock and wake up any waiters. |
1896 | */ |
1897 | static void |
1898 | vndunlock(struct vnd_softc *sc) |
1899 | { |
1900 | |
1901 | sc->sc_flags &= ~VNF_LOCKED; |
1902 | if ((sc->sc_flags & VNF_WANTED) != 0) { |
1903 | sc->sc_flags &= ~VNF_WANTED; |
1904 | wakeup(sc); |
1905 | } |
1906 | } |
1907 | |
1908 | #ifdef VND_COMPRESSION |
1909 | /* compressed file read */ |
1910 | static void |
1911 | compstrategy(struct buf *bp, off_t bn) |
1912 | { |
1913 | int error; |
1914 | int unit = vndunit(bp->b_dev); |
1915 | struct vnd_softc *vnd = |
1916 | device_lookup_private(&vnd_cd, unit); |
1917 | u_int32_t comp_block; |
1918 | struct uio auio; |
1919 | char *addr; |
1920 | int s; |
1921 | |
1922 | /* set up constants for data move */ |
1923 | auio.uio_rw = UIO_READ; |
1924 | UIO_SETUP_SYSSPACE(&auio); |
1925 | |
1926 | /* read, and transfer the data */ |
1927 | addr = bp->b_data; |
1928 | bp->b_resid = bp->b_bcount; |
1929 | s = splbio(); |
1930 | while (bp->b_resid > 0) { |
1931 | unsigned length; |
1932 | size_t length_in_buffer; |
1933 | u_int32_t offset_in_buffer; |
1934 | struct iovec aiov; |
1935 | |
1936 | /* calculate the compressed block number */ |
1937 | comp_block = bn / (off_t)vnd->sc_comp_blksz; |
1938 | |
1939 | /* check for good block number */ |
1940 | if (comp_block >= vnd->sc_comp_numoffs) { |
1941 | bp->b_error = EINVAL; |
1942 | splx(s); |
1943 | return; |
1944 | } |
1945 | |
1946 | /* read in the compressed block, if not in buffer */ |
1947 | if (comp_block != vnd->sc_comp_buffblk) { |
1948 | length = vnd->sc_comp_offsets[comp_block + 1] - |
1949 | vnd->sc_comp_offsets[comp_block]; |
1950 | vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); |
1951 | error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff, |
1952 | length, vnd->sc_comp_offsets[comp_block], |
1953 | UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vnd->sc_cred, |
1954 | NULL, NULL); |
1955 | if (error) { |
1956 | bp->b_error = error; |
1957 | VOP_UNLOCK(vnd->sc_vp); |
1958 | splx(s); |
1959 | return; |
1960 | } |
1961 | /* uncompress the buffer */ |
1962 | vnd->sc_comp_stream.next_in = vnd->sc_comp_buff; |
1963 | vnd->sc_comp_stream.avail_in = length; |
1964 | vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf; |
1965 | vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz; |
1966 | inflateReset(&vnd->sc_comp_stream); |
1967 | error = inflate(&vnd->sc_comp_stream, Z_FINISH); |
1968 | if (error != Z_STREAM_END) { |
1969 | if (vnd->sc_comp_stream.msg) |
1970 | aprint_normal_dev(vnd->sc_dev, |
1971 | "compressed file, %s\n" , |
1972 | vnd->sc_comp_stream.msg); |
1973 | bp->b_error = EBADMSG; |
1974 | VOP_UNLOCK(vnd->sc_vp); |
1975 | splx(s); |
1976 | return; |
1977 | } |
1978 | vnd->sc_comp_buffblk = comp_block; |
1979 | VOP_UNLOCK(vnd->sc_vp); |
1980 | } |
1981 | |
1982 | /* transfer the usable uncompressed data */ |
1983 | offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz; |
1984 | length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer; |
1985 | if (length_in_buffer > bp->b_resid) |
1986 | length_in_buffer = bp->b_resid; |
1987 | auio.uio_iov = &aiov; |
1988 | auio.uio_iovcnt = 1; |
1989 | aiov.iov_base = addr; |
1990 | aiov.iov_len = length_in_buffer; |
1991 | auio.uio_resid = aiov.iov_len; |
1992 | auio.uio_offset = 0; |
1993 | error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer, |
1994 | length_in_buffer, &auio); |
1995 | if (error) { |
1996 | bp->b_error = error; |
1997 | splx(s); |
1998 | return; |
1999 | } |
2000 | |
2001 | bn += length_in_buffer; |
2002 | addr += length_in_buffer; |
2003 | bp->b_resid -= length_in_buffer; |
2004 | } |
2005 | splx(s); |
2006 | } |
2007 | |
2008 | /* compression memory allocation routines */ |
2009 | static void * |
2010 | vnd_alloc(void *aux, u_int items, u_int siz) |
2011 | { |
2012 | return malloc(items * siz, M_TEMP, M_NOWAIT); |
2013 | } |
2014 | |
2015 | static void |
2016 | vnd_free(void *aux, void *ptr) |
2017 | { |
2018 | free(ptr, M_TEMP); |
2019 | } |
2020 | #endif /* VND_COMPRESSION */ |
2021 | |
2022 | static void |
2023 | vnd_set_geometry(struct vnd_softc *vnd) |
2024 | { |
2025 | struct disk_geom *dg = &vnd->sc_dkdev.dk_geom; |
2026 | |
2027 | memset(dg, 0, sizeof(*dg)); |
2028 | |
2029 | dg->dg_secperunit = (int64_t)vnd->sc_geom.vng_nsectors * |
2030 | vnd->sc_geom.vng_ntracks * vnd->sc_geom.vng_ncylinders; |
2031 | dg->dg_secsize = vnd->sc_geom.vng_secsize; |
2032 | dg->dg_nsectors = vnd->sc_geom.vng_nsectors; |
2033 | dg->dg_ntracks = vnd->sc_geom.vng_ntracks; |
2034 | dg->dg_ncylinders = vnd->sc_geom.vng_ncylinders; |
2035 | |
2036 | #ifdef DEBUG |
2037 | if (vnddebug & VDB_LABEL) { |
2038 | printf("dg->dg_secperunit: %" PRId64 "\n" , dg->dg_secperunit); |
2039 | printf("dg->dg_ncylinders: %u\n" , dg->dg_ncylinders); |
2040 | } |
2041 | #endif |
2042 | disk_set_info(vnd->sc_dev, &vnd->sc_dkdev, NULL); |
2043 | } |
2044 | |
2045 | #ifdef VND_COMPRESSION |
2046 | #define VND_DEPENDS "zlib" |
2047 | #else |
2048 | #define VND_DEPENDS NULL |
2049 | #endif |
2050 | |
2051 | MODULE(MODULE_CLASS_DRIVER, vnd, VND_DEPENDS); |
2052 | |
2053 | #ifdef _MODULE |
2054 | int vnd_bmajor = -1, vnd_cmajor = -1; |
2055 | |
2056 | CFDRIVER_DECL(vnd, DV_DISK, NULL); |
2057 | #endif |
2058 | |
2059 | static int |
2060 | vnd_modcmd(modcmd_t cmd, void *arg) |
2061 | { |
2062 | int error = 0; |
2063 | |
2064 | switch (cmd) { |
2065 | case MODULE_CMD_INIT: |
2066 | #ifdef _MODULE |
2067 | error = config_cfdriver_attach(&vnd_cd); |
2068 | if (error) |
2069 | break; |
2070 | |
2071 | error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); |
2072 | if (error) { |
2073 | config_cfdriver_detach(&vnd_cd); |
2074 | aprint_error("%s: unable to register cfattach for \n" |
2075 | "%s, error %d" , __func__, vnd_cd.cd_name, error); |
2076 | break; |
2077 | } |
2078 | |
2079 | /* |
2080 | * Attach the {b,c}devsw's |
2081 | */ |
2082 | error = devsw_attach("vnd" , &vnd_bdevsw, &vnd_bmajor, |
2083 | &vnd_cdevsw, &vnd_cmajor); |
2084 | /* |
2085 | * If devsw_attach fails, remove from autoconf database |
2086 | */ |
2087 | if (error) { |
2088 | config_cfattach_detach(vnd_cd.cd_name, &vnd_ca); |
2089 | config_cfdriver_detach(&vnd_cd); |
2090 | aprint_error("%s: unable to attach %s devsw, " |
2091 | "error %d" , __func__, vnd_cd.cd_name, error); |
2092 | break; |
2093 | } |
2094 | #endif |
2095 | break; |
2096 | |
2097 | case MODULE_CMD_FINI: |
2098 | #ifdef _MODULE |
2099 | /* |
2100 | * Remove {b,c}devsw's |
2101 | */ |
2102 | devsw_detach(&vnd_bdevsw, &vnd_cdevsw); |
2103 | |
2104 | /* |
2105 | * Now remove device from autoconf database |
2106 | */ |
2107 | error = config_cfattach_detach(vnd_cd.cd_name, &vnd_ca); |
2108 | if (error) { |
2109 | (void)devsw_attach("vnd" , &vnd_bdevsw, &vnd_bmajor, |
2110 | &vnd_cdevsw, &vnd_cmajor); |
2111 | aprint_error("%s: failed to detach %s cfattach, " |
2112 | "error %d\n" , __func__, vnd_cd.cd_name, error); |
2113 | break; |
2114 | } |
2115 | error = config_cfdriver_detach(&vnd_cd); |
2116 | if (error) { |
2117 | (void)config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); |
2118 | (void)devsw_attach("vnd" , &vnd_bdevsw, &vnd_bmajor, |
2119 | &vnd_cdevsw, &vnd_cmajor); |
2120 | aprint_error("%s: failed to detach %s cfdriver, " |
2121 | "error %d\n" , __func__, vnd_cd.cd_name, error); |
2122 | break; |
2123 | } |
2124 | #endif |
2125 | break; |
2126 | |
2127 | case MODULE_CMD_STAT: |
2128 | return ENOTTY; |
2129 | |
2130 | default: |
2131 | return ENOTTY; |
2132 | } |
2133 | |
2134 | return error; |
2135 | } |
2136 | |