Index: src/distrib/sets/lists/base/shl.mi =================================================================== RCS file: /home/chs/netbsd/cvs/src/distrib/sets/lists/base/shl.mi,v retrieving revision 1.823 diff -u -p -r1.823 shl.mi --- src/distrib/sets/lists/base/shl.mi 10 Oct 2017 19:31:56 -0000 1.823 +++ src/distrib/sets/lists/base/shl.mi 10 Oct 2017 20:14:55 -0000 @@ -854,6 +854,9 @@ ./usr/lib/libzfs.so base-zfs-shlib compatfile,zfs ./usr/lib/libzfs.so.0 base-zfs-shlib compatfile,zfs ./usr/lib/libzfs.so.0.0 base-zfs-shlib compatfile,zfs +./usr/lib/libzfs_core.so base-zfs-shlib compatfile,zfs +./usr/lib/libzfs_core.so.0 base-zfs-shlib compatfile,zfs +./usr/lib/libzfs_core.so.0.0 base-zfs-shlib compatfile,zfs ./usr/lib/libzpool.so base-zfs-shlib compatfile,zfs ./usr/lib/libzpool.so.0 base-zfs-shlib compatfile,zfs ./usr/lib/libzpool.so.0.0 base-zfs-shlib compatfile,zfs Index: src/distrib/sets/lists/comp/mi =================================================================== RCS file: /home/chs/netbsd/cvs/src/distrib/sets/lists/comp/mi,v retrieving revision 1.2151 diff -u -p -r1.2151 mi --- src/distrib/sets/lists/comp/mi 10 Oct 2017 19:31:56 -0000 1.2151 +++ src/distrib/sets/lists/comp/mi 10 Oct 2017 20:14:55 -0000 @@ -3674,6 +3674,8 @@ ./usr/lib/libz_p.a comp-c-proflib compatfile,profile ./usr/lib/libzfs.a comp-zfs-lib compatfile,zfs ./usr/lib/libzfs_p.a comp-zfs-proflib compatfile,zfs,profile +./usr/lib/libzfs_core.a comp-zfs-lib compatfile,zfs +./usr/lib/libzfs_core_p.a comp-zfs-proflib compatfile,zfs,profile ./usr/lib/libzpool.a comp-zfs-lib compatfile,zfs ./usr/lib/libzpool_p.a comp-zfs-proflib compatfile,zfs,profile ./usr/lib/pkgconfig comp-c-lib Index: src/distrib/sets/lists/comp/shl.mi =================================================================== RCS file: /home/chs/netbsd/cvs/src/distrib/sets/lists/comp/shl.mi,v retrieving revision 1.308 diff -u -p -r1.308 shl.mi --- src/distrib/sets/lists/comp/shl.mi 10 Oct 2017 19:31:56 -0000 1.308 +++ src/distrib/sets/lists/comp/shl.mi 10 Oct 2017 20:14:55 -0000 @@ -274,7 +274,8 @@ ./usr/lib/libwrap_pic.a comp-c-piclib compatfile,picinstall ./usr/lib/libz_pic.a comp-c-piclib compatfile,picinstall ./usr/lib/libzfs_pic.a comp-zfs-piclib compatfile,picinstall,zfs -./usr/lib/libzpool_pic.a comp-zfs-piclib compatfile,zfs,picinstall +./usr/lib/libzfs_core_pic.a comp-zfs-piclib compatfile,picinstall,zfs +./usr/lib/libzpool_pic.a comp-zfs-piclib compatfile,picinstall,zfs ./usr/libexec/liblto_plugin.so comp-c-bin gcc ./usr/libexec/liblto_plugin.so.0 comp-c-bin gcc ./usr/libexec/liblto_plugin.so.0.0 comp-c-bin gcc Index: src/distrib/sets/lists/debug/mi =================================================================== RCS file: /home/chs/netbsd/cvs/src/distrib/sets/lists/debug/mi,v retrieving revision 1.227 diff -u -p -r1.227 mi --- src/distrib/sets/lists/debug/mi 10 Oct 2017 19:31:56 -0000 1.227 +++ src/distrib/sets/lists/debug/mi 10 Oct 2017 20:14:55 -0000 @@ -264,6 +264,7 @@ ./usr/lib/liby_g.a comp-c-debuglib debuglib,compatfile ./usr/lib/libz_g.a comp-c-debuglib debuglib,compatfile ./usr/lib/libzfs_g.a comp-c-debuglib debuglib,compatfile,zfs +./usr/lib/libzfs_core_g.a comp-c-debuglib debuglib,compatfile,zfs ./usr/lib/libzpool_g.a comp-c-debuglib debuglib,compatfile,zfs ./usr/libdata/debug/bin/cat.debug comp-util-debug debug ./usr/libdata/debug/bin/chio.debug comp-util-debug debug Index: src/distrib/sets/lists/debug/shl.mi =================================================================== RCS file: /home/chs/netbsd/cvs/src/distrib/sets/lists/debug/shl.mi,v retrieving revision 1.185 diff -u -p -r1.185 shl.mi --- src/distrib/sets/lists/debug/shl.mi 10 Oct 2017 19:31:57 -0000 1.185 +++ src/distrib/sets/lists/debug/shl.mi 10 Oct 2017 20:14:55 -0000 @@ -289,6 +289,7 @@ ./usr/libdata/debug/usr/lib/libwrap.so.1.0.debug comp-net-debug debug,compatfile ./usr/libdata/debug/usr/lib/libz.so.1.0.debug comp-sys-debug debug,compatfile ./usr/libdata/debug/usr/lib/libzfs.so.0.0.debug comp-zfs-debug debug,compatfile,zfs +./usr/libdata/debug/usr/lib/libzfs_core.so.0.0.debug comp-zfs-debug debug,compatfile,zfs ./usr/libdata/debug/usr/lib/libzpool.so.0.0.debug comp-zfs-debug debug,compatfile,zfs ./usr/libdata/debug/usr/lib/npf/ext_log.so.0.0.debug comp-obsolete debug,compatfile,npf,obsolete ./usr/libdata/debug/usr/lib/npf/ext_normalise.so.0.0.debug comp-obsolete debug,compatfile,npf,obsolete Index: src/external/bsd/libproc/dist/libproc.h =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/bsd/libproc/dist/libproc.h,v retrieving revision 1.3 diff -u -p -r1.3 libproc.h --- src/external/bsd/libproc/dist/libproc.h 9 Jun 2017 01:17:25 -0000 1.3 +++ src/external/bsd/libproc/dist/libproc.h 10 Jun 2017 00:35:09 -0000 @@ -51,6 +51,11 @@ typedef void (*proc_child_func)(void *); #define PS_DEAD 5 #define PS_LOST 6 +/* Flags for proc_attach(). */ +#define PATTACH_FORCE 0x01 +#define PATTACH_RDONLY 0x02 +#define PATTACH_NOSTOP 0x04 + /* Reason values for proc_detach(). */ #define PRELEASE_HANG 1 #define PRELEASE_KILL 2 Index: src/external/cddl/osnet/Makefile.inc =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/Makefile.inc,v retrieving revision 1.3 diff -u -p -r1.3 Makefile.inc --- src/external/cddl/osnet/Makefile.inc 23 Jan 2016 21:22:45 -0000 1.3 +++ src/external/cddl/osnet/Makefile.inc 10 Jun 2017 06:04:19 -0000 @@ -1,8 +1,10 @@ -# $FreeBSD: src/cddl/Makefile.inc,v 1.6.2.1 2009/08/03 08:13:06 kensmith Exp $ +# $FreeBSD: head/cddl/Makefile.inc 270358 2014-08-22 20:04:51Z delphij $ WARNS?=5 + OSNETDIR= ${NETBSDSRCDIR}/external/cddl/osnet OPENSOLARIS_USR_DISTDIR=${OSNETDIR}/dist OPENSOLARIS_SYS_DISTDIR=${OSNETDIR}/dist + CPPFLAGS+=-Wno-unknown-pragmas -Wno-sign-compare -D_KERNTYPES Index: src/external/cddl/osnet/Makefile.zfs =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/Makefile.zfs,v retrieving revision 1.4 diff -u -p -r1.4 Makefile.zfs --- src/external/cddl/osnet/Makefile.zfs 5 Sep 2012 23:08:42 -0000 1.4 +++ src/external/cddl/osnet/Makefile.zfs 10 Jun 2017 05:39:13 -0000 @@ -6,15 +6,13 @@ NOGCCERROR= yes # Basic compilation stuff. -CPPFLAGS+= "-D__va_list=va_list" -CPPFLAGS+= "-Doffsetof(s, m)=((size_t)(&(((s *)0)->m)))" CPPFLAGS+= -std=c99 # Pick a VTOC format - ick. CPPFLAGS+= -D_SUNOS_VTOC_16 CPPFLAGS+= -D_PROPLIB_ZFS_CONFLICT -CFLAGS+= -O0 -fno-inline +#CFLAGS+= -O0 -fno-inline #DBG= -g @@ -36,14 +34,24 @@ CPPFLAGS+= -I${ZFSDIR}/dist/lib/libshare CPPFLAGS+= -I${ZFSDIR}/dist/lib/libumem CPPFLAGS+= -I${ZFSDIR}/dist/lib/libuutil/common CPPFLAGS+= -I${ZFSDIR}/dist/lib/libzfs/common +CPPFLAGS+= -I${ZFSDIR}/dist/lib/libzfs_core/common CPPFLAGS+= -I${ZFSDIR}/dist/lib/libzpool/common CPPFLAGS+= -I${ZFSDIR}/dist/common +CWARNFLAGS+= -Wno-missing-field-initializers +CWARNFLAGS+= -Wno-strict-prototypes +CWARNFLAGS+= -Wno-cast-qual +CWARNFLAGS+= -Wno-discarded-qualifiers +CWARNFLAGS+= -Wno-switch +CWARNFLAGS+= -Wno-missing-prototypes +CWARNFLAGS+= -Wno-unused-variable +CWARNFLAGS+= -Wno-shadow + LIBAVL_SRCDIR= ${ZFSDIR}/lib/libavl LIBNVPAIR_SRCDIR= ${ZFSDIR}/lib/libnvpair LIBUMEM_SRCDIR= ${ZFSDIR}/lib/libumem LIBUUTIL_SRCDIR= ${ZFSDIR}/lib/libuutil LIBZFS_SRCDIR= ${ZFSDIR}/lib/libzfs +LIBZFS_CORE_SRCDIR= ${ZFSDIR}/lib/libzfs_core LIBZPOOL_SRCDIR= ${ZFSDIR}/lib/libzpool - Index: src/external/cddl/osnet/dev/cyclic/cyclic.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/cyclic/cyclic.c,v retrieving revision 1.7 diff -u -p -r1.7 cyclic.c --- src/external/cddl/osnet/dev/cyclic/cyclic.c 1 Feb 2017 21:59:09 -0000 1.7 +++ src/external/cddl/osnet/dev/cyclic/cyclic.c 11 Jun 2017 12:07:53 -0000 @@ -23,7 +23,7 @@ * * Portions Copyright 2008 John Birrell * - * $FreeBSD$ + * $FreeBSD: head/sys/cddl/dev/cyclic/cyclic.c 227293 2011-11-07 06:44:47Z ed $ * * This is a simplified version of the cyclic timer subsystem from * OpenSolaris. In the FreeBSD version, we don't use interrupt levels. @@ -352,7 +352,6 @@ #define mtx_unlock_spin(x) mutex_spin_exit(x) #define mtx_destroy(x) mutex_destroy(x) -#define ASSERT(x) KASSERT(x) #define SYSINIT(a1, a2, a3, a4, a5) #define SYSUNINIT(a1, a2, a3, a4, a5) #define CPU_FOREACH(var) \ Index: src/external/cddl/osnet/dev/cyclic/cyclic_test.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/cyclic/cyclic_test.c,v retrieving revision 1.2 diff -u -p -r1.2 cyclic_test.c --- src/external/cddl/osnet/dev/cyclic/cyclic_test.c 21 Feb 2010 01:46:33 -0000 1.2 +++ src/external/cddl/osnet/dev/cyclic/cyclic_test.c 11 Jun 2017 12:07:24 -0000 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: src/sys/cddl/dev/cyclic/cyclic_test.c,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $ + * $FreeBSD: head/sys/cddl/dev/cyclic/cyclic_test.c 179260 2008-05-23 22:21:58Z jb $ * */ Index: src/external/cddl/osnet/dev/cyclic/arm/cyclic_machdep.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/cyclic/arm/cyclic_machdep.c,v retrieving revision 1.1 diff -u -p -r1.1 cyclic_machdep.c --- src/external/cddl/osnet/dev/cyclic/arm/cyclic_machdep.c 5 Mar 2014 06:35:44 -0000 1.1 +++ src/external/cddl/osnet/dev/cyclic/arm/cyclic_machdep.c 11 Jun 2017 12:13:45 -0000 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD$ + * $FreeBSD: head/sys/cddl/dev/cyclic/i386/cyclic_machdep.c 222813 2011-06-07 08:46:13Z attilio $ * */ Index: src/external/cddl/osnet/dev/cyclic/i386/cyclic_machdep.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/cyclic/i386/cyclic_machdep.c,v retrieving revision 1.4 diff -u -p -r1.4 cyclic_machdep.c --- src/external/cddl/osnet/dev/cyclic/i386/cyclic_machdep.c 2 Dec 2012 01:05:16 -0000 1.4 +++ src/external/cddl/osnet/dev/cyclic/i386/cyclic_machdep.c 11 Jun 2017 12:09:34 -0000 @@ -24,7 +24,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD$ + * $FreeBSD: head/sys/cddl/dev/cyclic/i386/cyclic_machdep.c 222813 2011-06-07 08:46:13Z attilio $ * */ Index: src/external/cddl/osnet/dev/dtmalloc/dtmalloc.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/dtmalloc/dtmalloc.c,v retrieving revision 1.2 diff -u -p -r1.2 dtmalloc.c --- src/external/cddl/osnet/dev/dtmalloc/dtmalloc.c 21 Feb 2010 01:46:33 -0000 1.2 +++ src/external/cddl/osnet/dev/dtmalloc/dtmalloc.c 10 Jun 2017 16:13:27 -0000 @@ -22,7 +22,7 @@ * * Portions Copyright 2006-2008 John Birrell jb@freebsd.org * - * $FreeBSD: src/sys/cddl/dev/dtmalloc/dtmalloc.c,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $ + * $FreeBSD: head/sys/cddl/dev/dtmalloc/dtmalloc.c 252325 2013-06-28 03:14:40Z markj $ * */ @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -113,8 +114,17 @@ dtmalloc_type_cb(struct malloc_type *mtp { char name[DTRACE_FUNCNAMELEN]; struct malloc_type_internal *mtip = mtp->ks_handle; + int i; + /* + * malloc_type descriptions are allowed to contain whitespace, but + * DTrace probe identifiers are not, so replace the whitespace with + * underscores. + */ strlcpy(name, mtp->ks_shortdesc, sizeof(name)); + for (i = 0; name[i] != 0; i++) + if (isspace(name[i])) + name[i] = '_'; if (dtrace_probe_lookup(dtmalloc_id, NULL, name, "malloc") != 0) return; Index: src/external/cddl/osnet/dev/dtrace/dtrace_anon.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/dtrace/dtrace_anon.c,v retrieving revision 1.2 diff -u -p -r1.2 dtrace_anon.c --- src/external/cddl/osnet/dev/dtrace/dtrace_anon.c 21 Feb 2010 01:46:33 -0000 1.2 +++ src/external/cddl/osnet/dev/dtrace/dtrace_anon.c 12 Apr 2017 15:45:10 -0000 @@ -20,7 +20,7 @@ * * CDDL HEADER END * - * $FreeBSD: src/sys/cddl/dev/dtrace/dtrace_anon.c,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $ + * $FreeBSD: head/sys/cddl/dev/dtrace/dtrace_anon.c 179237 2008-05-23 05:59:42Z jb $ */ /* Index: src/external/cddl/osnet/dev/dtrace/dtrace_cddl.h =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/dtrace/dtrace_cddl.h,v retrieving revision 1.2 diff -u -p -r1.2 dtrace_cddl.h --- src/external/cddl/osnet/dev/dtrace/dtrace_cddl.h 21 Feb 2010 01:46:33 -0000 1.2 +++ src/external/cddl/osnet/dev/dtrace/dtrace_cddl.h 16 Jun 2017 17:07:29 -0000 @@ -20,7 +20,7 @@ * * CDDL HEADER END * - * $FreeBSD: src/sys/cddl/dev/dtrace/dtrace_cddl.h,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $ + * $FreeBSD: head/sys/cddl/dev/dtrace/dtrace_cddl.h 292388 2015-12-17 00:00:27Z markj $ * */ @@ -28,6 +28,11 @@ #define _DTRACE_CDDL_H_ #include +#include + +#define SYSCTL_NODE(...) +#define SYSCTL_DECL(...) +#define SYSCTL_INT(...) #define LOCK_LEVEL 10 @@ -38,6 +43,7 @@ typedef struct kdtrace_proc { int p_dtrace_probes; /* Are there probes for this proc? */ u_int64_t p_dtrace_count; /* Number of DTrace tracepoints */ void *p_dtrace_helpers; /* DTrace helpers, if any */ + int p_dtrace_model; } kdtrace_proc_t; @@ -61,6 +67,9 @@ typedef struct kdtrace_thread { /* Handling a return probe. */ u_int8_t _td_dtrace_ast; /* Saved ast flag. */ +#ifdef __amd64__ + u_int8_t _td_dtrace_reg; +#endif } _tds; u_long _td_dtrace_ft; /* Bitwise or of these flags. */ } _tdu; @@ -69,6 +78,7 @@ typedef struct kdtrace_thread { #define td_dtrace_step _tdu._tds._td_dtrace_step #define td_dtrace_ret _tdu._tds._td_dtrace_ret #define td_dtrace_ast _tdu._tds._td_dtrace_ast +#define td_dtrace_reg _tdu._tds._td_dtrace_reg uintptr_t td_dtrace_pc; /* DTrace saved pc from fasttrap. */ uintptr_t td_dtrace_npc; /* DTrace next pc from fasttrap. */ @@ -76,8 +86,12 @@ typedef struct kdtrace_thread { /* DTrace per-thread scratch location. */ uintptr_t td_dtrace_astpc; /* DTrace return sequence location. */ +#ifdef __amd64__ + uintptr_t td_dtrace_regv; +#endif u_int64_t td_hrtime; /* Last time on cpu. */ - int td_errno; /* Syscall return value. */ + void *td_dtrace_sscr; /* Saved scratch space location. */ + void *td_systrace_args; /* syscall probe arguments. */ } kdtrace_thread_t; /* @@ -86,21 +100,47 @@ typedef struct kdtrace_thread { * that the separation on FreeBSD is a licensing constraint designed to * keep the GENERIC kernel BSD licensed. */ -#define t_dtrace_vtime l_dtrace->td_dtrace_vtime -#define t_dtrace_start l_dtrace->td_dtrace_start -#define t_dtrace_stop l_dtrace->td_dtrace_stop -#define t_dtrace_sig l_dtrace->td_dtrace_sig -#define t_predcache l_dtrace->td_predcache -#define p_dtrace_helpers p_dtrace->p_dtrace_helpers +#define td_dtrace l_dtrace +#define t_dtrace_vtime td_dtrace->td_dtrace_vtime +#define t_dtrace_start td_dtrace->td_dtrace_start +#define t_dtrace_stop td_dtrace->td_dtrace_stop +#define t_dtrace_sig td_dtrace->td_dtrace_sig +#define t_predcache td_dtrace->td_predcache +#define t_dtrace_ft td_dtrace->td_dtrace_ft +#define t_dtrace_on td_dtrace->td_dtrace_on +#define t_dtrace_step td_dtrace->td_dtrace_step +#define t_dtrace_ret td_dtrace->td_dtrace_ret +#define t_dtrace_ast td_dtrace->td_dtrace_ast +#define t_dtrace_reg td_dtrace->td_dtrace_reg +#define t_dtrace_pc td_dtrace->td_dtrace_pc +#define t_dtrace_npc td_dtrace->td_dtrace_npc +#define t_dtrace_scrpc td_dtrace->td_dtrace_scrpc +#define t_dtrace_astpc td_dtrace->td_dtrace_astpc +#define t_dtrace_regv td_dtrace->td_dtrace_regv +#define t_dtrace_sscr td_dtrace->td_dtrace_sscr +#define t_dtrace_systrace_args td_dtrace->td_systrace_args +#define p_dtrace_helpers p_dtrace->p_dtrace_helpers +#define p_dtrace_count p_dtrace->p_dtrace_count +#define p_dtrace_probes p_dtrace->p_dtrace_probes +#define p_model p_dtrace->p_dtrace_model + +#define DATAMODEL_NATIVE 0 +#ifdef __amd64__ +#define DATAMODEL_LP64 0 +#define DATAMODEL_ILP32 1 +#else +#define DATAMODEL_LP64 1 +#define DATAMODEL_ILP32 0 +#endif /* - * Definitions for fields in struct proc which are named differntly in FreeBSD. + * Definitions for fields in struct proc which are named differently in FreeBSD. */ //#define p_cred p_ucred #define p_parent p_pptr /* - * Definitions for fields in struct thread which are named differntly in NetBSD. + * Definitions for fields in struct thread which are named differently in NetBSD. */ #define t_procp l_proc #define t_tid l_lid Index: src/external/cddl/osnet/dev/dtrace/dtrace_clone.c =================================================================== RCS file: src/external/cddl/osnet/dev/dtrace/dtrace_clone.c diff -N src/external/cddl/osnet/dev/dtrace/dtrace_clone.c --- src/external/cddl/osnet/dev/dtrace/dtrace_clone.c 21 Feb 2010 01:46:33 -0000 1.2 +++ /dev/null 1 Jan 1970 00:00:00 -0000 @@ -1,63 +0,0 @@ -/* $NetBSD: dtrace_clone.c,v 1.2 2010/02/21 01:46:33 darran Exp $ */ - -/*- - * Copyright (C) 2006 John Birrell . - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice(s), this list of conditions and the following disclaimer as - * the first lines of this file unmodified other than the possible - * addition of one or more copyright notices. - * 2. Redistributions in binary form must reproduce the above copyright - * notice(s), this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH - * DAMAGE. - * - * $FreeBSD: src/sys/cddl/dev/dtrace/dtrace_clone.c,v 1.2.2.1 2009/08/03 08:13:06 kensmith Exp $ - * - */ - -static void -dtrace_clone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev) -{ - int u = -1; - size_t len; - - if (*dev != NULL) - return; - - len = strlen(name); - - if (len != 6 && len != 13) - return; - - if (bcmp(name,"dtrace",6) != 0) - return; - - if (len == 13 && bcmp(name,"dtrace/dtrace",13) != 0) - return; - - /* Clone the device to the new minor number. */ - if (clone_create(&dtrace_clones, &dtrace_cdevsw, &u, dev, 0) != 0) - /* Create the /dev/dtrace/dtraceNN entry. */ - *dev = make_dev_cred(&dtrace_cdevsw, u, cred, - UID_ROOT, GID_WHEEL, 0600, "dtrace/dtrace%d", u); - if (*dev != NULL) { - dev_ref(*dev); - (*dev)->si_flags |= SI_CHEAPCLONE; - } -} Index: src/external/cddl/osnet/dev/dtrace/dtrace_debug.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/dtrace/dtrace_debug.c,v retrieving revision 1.8 diff -u -p -r1.8 dtrace_debug.c --- src/external/cddl/osnet/dev/dtrace/dtrace_debug.c 5 Mar 2014 06:06:42 -0000 1.8 +++ src/external/cddl/osnet/dev/dtrace/dtrace_debug.c 10 May 2017 11:09:52 -0000 @@ -27,33 +27,35 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * - * $FreeBSD: src/sys/cddl/dev/dtrace/dtrace_debug.c,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $ + * $FreeBSD: head/sys/cddl/dev/dtrace/dtrace_debug.c 315208 2017-03-13 18:43:00Z markj $ * */ static char const hex2ascii_data[] = "0123456789abcdefghijklmnopqrstuvwxyz"; #define hex2ascii(hex) (hex2ascii_data[hex]) +#define MAXCPU MAXCPUS #ifdef DEBUG #define DTRACE_DEBUG_BUFR_SIZE (32 * 1024) struct dtrace_debug_data { + u_long lock __aligned(CACHE_LINE_SIZE); char bufr[DTRACE_DEBUG_BUFR_SIZE]; char *first; char *last; char *next; -} dtrace_debug_data[MAXCPUS]; +} dtrace_debug_data[MAXCPU]; static char dtrace_debug_bufr[DTRACE_DEBUG_BUFR_SIZE]; -static volatile u_long dtrace_debug_flag[MAXCPUS]; - static void dtrace_debug_lock(int cpu) { - /* FIXME: use atomic_cmpset_ulong once we have it */ - while (atomic_cas_ulong(&dtrace_debug_flag[cpu], 0, 1) == 0) + void *tid; + + tid = curlwp; + while (atomic_cas_ptr(&dtrace_debug_data[cpu].lock, 0, tid) == 0) /* Loop until the lock is obtained. */ ; } @@ -61,7 +63,9 @@ dtrace_debug_lock(int cpu) static void dtrace_debug_unlock(int cpu) { - dtrace_debug_flag[cpu] = 0; + + membar_producer(); + dtrace_debug_data[cpu].lock = 0; } static void @@ -83,25 +87,26 @@ dtrace_debug_init(void *dummy) } } -//SYSINIT(dtrace_debug_init, SI_SUB_KDTRACE, SI_ORDER_ANY, dtrace_debug_init, NULL); -//SYSINIT(dtrace_debug_smpinit, SI_SUB_SMP, SI_ORDER_ANY, dtrace_debug_init, NULL); +#ifdef __FreeBSD__ +SYSINIT(dtrace_debug_init, SI_SUB_KDTRACE, SI_ORDER_ANY, dtrace_debug_init, NULL); +SYSINIT(dtrace_debug_smpinit, SI_SUB_SMP, SI_ORDER_ANY, dtrace_debug_init, NULL); +#endif static void dtrace_debug_output(void) { char *p; + int i; struct dtrace_debug_data *d; uintptr_t count; CPU_INFO_ITERATOR cpuind; struct cpu_info *cinfo; - cpuid_t cpuid; for (CPU_INFO_FOREACH(cpuind, cinfo)) { - cpuid = cpu_index(cinfo); + i = cpu_index(cinfo); + dtrace_debug_lock(i); - dtrace_debug_lock(cpuid); - - d = &dtrace_debug_data[cpuid]; + d = &dtrace_debug_data[i]; count = 0; @@ -129,7 +134,7 @@ dtrace_debug_output(void) d->first = d->bufr; d->next = d->bufr; - dtrace_debug_unlock(cpuid); + dtrace_debug_unlock(i); if (count > 0) { char *last = dtrace_debug_bufr + count; @@ -158,10 +163,11 @@ dtrace_debug_output(void) */ static __inline void -dtrace_debug__putc(char c) +dtrace_debug__putc(int cpu, char c) { - struct dtrace_debug_data *d = &dtrace_debug_data[cpu_number()]; + struct dtrace_debug_data *d; + d = &dtrace_debug_data[cpu]; *d->next++ = c; if (d->next == d->last) @@ -179,24 +185,30 @@ dtrace_debug__putc(char c) static void __used dtrace_debug_putc(char c) { - dtrace_debug_lock(cpu_number()); + int cpu; + + cpu = cpu_number(); + dtrace_debug_lock(cpu); - dtrace_debug__putc(c); + dtrace_debug__putc(cpu, c); - dtrace_debug_unlock(cpu_number()); + dtrace_debug_unlock(cpu); } static void __used dtrace_debug_puts(const char *s) { - dtrace_debug_lock(cpu_number()); + int cpu; + + cpu = cpu_number(); + dtrace_debug_lock(cpu); while (*s != '\0') - dtrace_debug__putc(*s++); + dtrace_debug__putc(cpu, *s++); - dtrace_debug__putc('\0'); + dtrace_debug__putc(cpu, '\0'); - dtrace_debug_unlock(cpu_number()); + dtrace_debug_unlock(cpu); } /* @@ -205,30 +217,30 @@ dtrace_debug_puts(const char *s) * Put a NUL-terminated ASCII number (base <= 36) in a buffer in reverse * order; return an optional length and a pointer to the last character * written in the buffer (i.e., the first character of the string). - * The buffer pointed to by `xbuf' must have length >= MAXNBUF. + * The buffer pointed to by `nbuf' must have length >= MAXNBUF. */ static char * -dtrace_debug_ksprintn(char *xbuf, uintmax_t num, int base, int *lenp, int upper) +dtrace_debug_ksprintn(char *nbuf, uintmax_t num, int base, int *lenp, int upper) { char *p, c; - p = xbuf; + p = nbuf; *p = '\0'; do { c = hex2ascii(num % base); *++p = upper ? toupper(c) : c; } while (num /= base); if (lenp) - *lenp = p - xbuf; + *lenp = p - nbuf; return (p); } #define MAXNBUF (sizeof(intmax_t) * NBBY + 1) static void -dtrace_debug_vprintf(const char *fmt, va_list ap) +dtrace_debug_vprintf(int cpu, const char *fmt, va_list ap) { - char xbuf[MAXNBUF]; + char nbuf[MAXNBUF]; const char *p, *percent, *q; u_char *up; int ch, n; @@ -250,10 +262,10 @@ dtrace_debug_vprintf(const char *fmt, va width = 0; while ((ch = (u_char)*fmt++) != '%' || stop) { if (ch == '\0') { - dtrace_debug__putc('\0'); + dtrace_debug__putc(cpu, '\0'); return; } - dtrace_debug__putc(ch); + dtrace_debug__putc(cpu, ch); } percent = fmt - 1; qflag = 0; lflag = 0; ladjust = 0; sharpflag = 0; neg = 0; @@ -273,7 +285,7 @@ reswitch: switch (ch = (u_char)*fmt++) { ladjust = 1; goto reswitch; case '%': - dtrace_debug__putc(ch); + dtrace_debug__putc(cpu, ch); break; case '*': if (!dot) { @@ -307,8 +319,8 @@ reswitch: switch (ch = (u_char)*fmt++) { case 'b': num = (u_int)va_arg(ap, int); p = va_arg(ap, char *); - for (q = dtrace_debug_ksprintn(xbuf, num, *p++, NULL, 0); *q;) - dtrace_debug__putc(*q--); + for (q = dtrace_debug_ksprintn(nbuf, num, *p++, NULL, 0); *q;) + dtrace_debug__putc(cpu, *q--); if (num == 0) break; @@ -316,19 +328,19 @@ reswitch: switch (ch = (u_char)*fmt++) { for (tmp = 0; *p;) { n = *p++; if (num & (1 << (n - 1))) { - dtrace_debug__putc(tmp ? ',' : '<'); + dtrace_debug__putc(cpu, tmp ? ',' : '<'); for (; (n = *p) > ' '; ++p) - dtrace_debug__putc(n); + dtrace_debug__putc(cpu, n); tmp = 1; } else for (; *p > ' '; ++p) continue; } if (tmp) - dtrace_debug__putc('>'); + dtrace_debug__putc(cpu, '>'); break; case 'c': - dtrace_debug__putc(va_arg(ap, int)); + dtrace_debug__putc(cpu, va_arg(ap, int)); break; case 'D': up = va_arg(ap, u_char *); @@ -336,12 +348,12 @@ reswitch: switch (ch = (u_char)*fmt++) { if (!width) width = 16; while(width--) { - dtrace_debug__putc(hex2ascii(*up >> 4)); - dtrace_debug__putc(hex2ascii(*up & 0x0f)); + dtrace_debug__putc(cpu, hex2ascii(*up >> 4)); + dtrace_debug__putc(cpu, hex2ascii(*up & 0x0f)); up++; if (width) for (q=p;*q;q++) - dtrace_debug__putc(*q); + dtrace_debug__putc(cpu, *q); } break; case 'd': @@ -413,12 +425,12 @@ reswitch: switch (ch = (u_char)*fmt++) { if (!ladjust && width > 0) while (width--) - dtrace_debug__putc(padc); + dtrace_debug__putc(cpu, padc); while (n--) - dtrace_debug__putc(*p++); + dtrace_debug__putc(cpu, *p++); if (ladjust && width > 0) while (width--) - dtrace_debug__putc(padc); + dtrace_debug__putc(cpu, padc); break; case 't': tflag = 1; @@ -479,7 +491,7 @@ number: neg = 1; num = -(intmax_t)num; } - p = dtrace_debug_ksprintn(xbuf, num, base, &tmp, upper); + p = dtrace_debug_ksprintn(nbuf, num, base, &tmp, upper); if (sharpflag && num != 0) { if (base == 8) tmp++; @@ -492,32 +504,32 @@ number: if (!ladjust && padc != '0' && width && (width -= tmp) > 0) while (width--) - dtrace_debug__putc(padc); + dtrace_debug__putc(cpu, padc); if (neg) - dtrace_debug__putc('-'); + dtrace_debug__putc(cpu, '-'); if (sharpflag && num != 0) { if (base == 8) { - dtrace_debug__putc('0'); + dtrace_debug__putc(cpu, '0'); } else if (base == 16) { - dtrace_debug__putc('0'); - dtrace_debug__putc('x'); + dtrace_debug__putc(cpu, '0'); + dtrace_debug__putc(cpu, 'x'); } } if (!ladjust && width && (width -= tmp) > 0) while (width--) - dtrace_debug__putc(padc); + dtrace_debug__putc(cpu, padc); while (*p) - dtrace_debug__putc(*p--); + dtrace_debug__putc(cpu, *p--); if (ladjust && width && (width -= tmp) > 0) while (width--) - dtrace_debug__putc(padc); + dtrace_debug__putc(cpu, padc); break; default: while (percent < fmt) - dtrace_debug__putc(*percent++); + dtrace_debug__putc(cpu, *percent++); /* * Since we ignore an formatting argument it is no * longer safe to obey the remaining formatting @@ -529,23 +541,25 @@ number: } } - dtrace_debug__putc('\0'); + dtrace_debug__putc(cpu, '\0'); } void dtrace_debug_printf(const char *fmt, ...) { va_list ap; + int cpu; - dtrace_debug_lock(cpu_number()); + cpu = cpu_number(); + dtrace_debug_lock(cpu); va_start(ap, fmt); - dtrace_debug_vprintf(fmt, ap); + dtrace_debug_vprintf(cpu, fmt, ap); va_end(ap); - dtrace_debug_unlock(cpu_number()); + dtrace_debug_unlock(cpu); } #else @@ -554,4 +568,9 @@ dtrace_debug_printf(const char *fmt, ... #define dtrace_debug_puts(_s) #define dtrace_debug_printf(fmt, ...) +static void +dtrace_debug_init(void *dummy) +{ +} + #endif Index: src/external/cddl/osnet/dev/dtrace/dtrace_hacks.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/dtrace/dtrace_hacks.c,v retrieving revision 1.5 diff -u -p -r1.5 dtrace_hacks.c --- src/external/cddl/osnet/dev/dtrace/dtrace_hacks.c 23 Jun 2016 06:44:52 -0000 1.5 +++ src/external/cddl/osnet/dev/dtrace/dtrace_hacks.c 5 May 2017 11:52:00 -0000 @@ -1,12 +1,13 @@ /* $NetBSD: dtrace_hacks.c,v 1.5 2016/06/23 06:44:52 pgoyette Exp $ */ -/* $FreeBSD: src/sys/cddl/dev/dtrace/dtrace_hacks.c,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $ */ +/* $FreeBSD: head/sys/cddl/dev/dtrace/dtrace_hacks.c 281916 2015-04-24 03:19:30Z markj $ */ /* XXX Hacks.... */ dtrace_cacheid_t dtrace_predcache_id; boolean_t -priv_policy_only(const cred_t *a, int b, boolean_t c) +priv_policy_only(const cred_t *cr, int b, boolean_t c) { - return 1; + + return kauth_authorize_generic(cr, KAUTH_GENERIC_ISSUSER, NULL) == 0; } Index: src/external/cddl/osnet/dev/dtrace/dtrace_ioctl.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/dtrace/dtrace_ioctl.c,v retrieving revision 1.6 diff -u -p -r1.6 dtrace_ioctl.c --- src/external/cddl/osnet/dev/dtrace/dtrace_ioctl.c 30 Sep 2015 20:59:13 -0000 1.6 +++ src/external/cddl/osnet/dev/dtrace/dtrace_ioctl.c 11 Jun 2017 11:47:33 -0000 @@ -20,18 +20,106 @@ * * CDDL HEADER END * - * $FreeBSD: src/sys/cddl/dev/dtrace/dtrace_ioctl.c,v 1.2.2.1 2009/08/03 08:13:06 kensmith Exp $ + * $FreeBSD: head/sys/cddl/dev/dtrace/dtrace_ioctl.c 313262 2017-02-05 02:39:12Z markj $ * */ -static int dtrace_verbose_ioctl=0; -//SYSCTL_INT(_debug_dtrace, OID_AUTO, verbose_ioctl, CTLFLAG_RW, &dtrace_verbose_ioctl, 0, ""); +static int dtrace_verbose_ioctl; +SYSCTL_INT(_debug_dtrace, OID_AUTO, verbose_ioctl, CTLFLAG_RW, + &dtrace_verbose_ioctl, 0, "log DTrace ioctls"); + +#define pfind(pid) proc_find((pid)) #define DTRACE_IOCTL_PRINTF(fmt, ...) if (dtrace_verbose_ioctl) printf(fmt, ## __VA_ARGS__ ) +#ifdef __FreeBSD__ +static int +dtrace_ioctl_helper(struct cdev *dev, u_long cmd, caddr_t addr, int flags, + struct thread *td) +#endif +#ifdef __NetBSD__ +static int +dtrace_ioctl_helper(dev_t dev, u_long cmd, caddr_t addr, int flags) +#endif +{ + struct proc *p; + dof_helper_t *dhp; + dof_hdr_t *dof; + int rval; + + dhp = NULL; + dof = NULL; + rval = 0; + switch (cmd) { + case DTRACEHIOC_ADDDOF: + dhp = (dof_helper_t *)addr; + addr = (caddr_t)(uintptr_t)dhp->dofhp_dof; + p = curproc; + if (p->p_pid == dhp->dofhp_pid) { + dof = dtrace_dof_copyin((uintptr_t)addr, &rval); + } else { +#ifdef __FreeBSD__ + p = pfind(dhp->dofhp_pid); + if (p == NULL) + return (EINVAL); + if (!P_SHOULDSTOP(p) || + (p->p_flag & (P_TRACED | P_WEXIT)) != P_TRACED || + p->p_pptr != curproc) { + PROC_UNLOCK(p); + return (EINVAL); + } + _PHOLD(p); + PROC_UNLOCK(p); + dof = dtrace_dof_copyin_proc(p, (uintptr_t)addr, &rval); +#endif +#ifdef __NetBSD__ + dof = dtrace_dof_copyin_pid(dhp->dofhp_pid, addr, &rval); +#endif + } + + if (dof == NULL) { +#ifdef __FreeBSD__ + if (p != curproc) + PRELE(p); +#endif + break; + } + + mutex_enter(&dtrace_lock); + if ((rval = dtrace_helper_slurp(dof, dhp, p)) != -1) { + dhp->dofhp_gen = rval; + rval = 0; + } else { + rval = EINVAL; + } + mutex_exit(&dtrace_lock); +#ifdef __FreeBSD__ + if (p != curproc) + PRELE(p); +#endif + break; + case DTRACEHIOC_REMOVE: + mutex_enter(&dtrace_lock); + rval = dtrace_helper_destroygen(NULL, *(int *)(uintptr_t)addr); + mutex_exit(&dtrace_lock); + break; + default: + rval = ENOTTY; + break; + } + return (rval); +} + /* ARGSUSED */ +#ifdef __FreeBSD__ +static int +dtrace_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, + int flags __unused, struct thread *td) +#endif +#ifdef __NetBSD__ static int dtrace_ioctl(struct file *fp, u_long cmd, void *addr) +#endif { dtrace_state_t *state = (dtrace_state_t *)fp->f_data; int error = 0; @@ -224,6 +312,7 @@ dtrace_ioctl(struct file *fp, u_long cmd desc.dtbd_drops = buf->dtb_drops; desc.dtbd_errors = buf->dtb_errors; desc.dtbd_oldest = buf->dtb_xamot_offset; + desc.dtbd_timestamp = dtrace_gethrtime(); mutex_exit(&dtrace_lock); @@ -278,6 +367,7 @@ dtrace_ioctl(struct file *fp, u_long cmd desc.dtbd_drops = buf->dtb_xamot_drops; desc.dtbd_errors = buf->dtb_xamot_errors; desc.dtbd_oldest = 0; + desc.dtbd_timestamp = buf->dtb_switched; mutex_exit(&dtrace_lock); @@ -361,7 +451,8 @@ dtrace_ioctl(struct file *fp, u_long cmd return (EBUSY); } - if (dtrace_dof_slurp(dof, vstate, curlwp->l_cred, &enab, 0, B_TRUE) != 0) { + if (dtrace_dof_slurp(dof, vstate, CRED(), &enab, 0, 0, + B_TRUE) != 0) { mutex_exit(&dtrace_lock); mutex_exit(&cpu_lock); dtrace_dof_destroy(dof); @@ -528,19 +619,25 @@ dtrace_ioctl(struct file *fp, u_long cmd return (EINVAL); mutex_enter(&dtrace_provider_lock); +#ifdef illumos mutex_enter(&mod_lock); +#endif mutex_enter(&dtrace_lock); if (desc->dtargd_id > dtrace_nprobes) { mutex_exit(&dtrace_lock); +#ifdef illumos mutex_exit(&mod_lock); +#endif mutex_exit(&dtrace_provider_lock); return (EINVAL); } if ((probe = dtrace_probes[desc->dtargd_id - 1]) == NULL) { mutex_exit(&dtrace_lock); +#ifdef illumos mutex_exit(&mod_lock); +#endif mutex_exit(&dtrace_provider_lock); return (EINVAL); } @@ -564,7 +661,9 @@ dtrace_ioctl(struct file *fp, u_long cmd probe->dtpr_id, probe->dtpr_arg, desc); } +#ifdef illumos mutex_exit(&mod_lock); +#endif mutex_exit(&dtrace_provider_lock); return (0); @@ -710,7 +809,7 @@ again: case DTRACEIOC_STATUS: { dtrace_status_t *stat = (dtrace_status_t *) addr; dtrace_dstate_t *dstate; - int j; + int i, j; uint64_t nerrs; CPU_INFO_ITERATOR cpuind; struct cpu_info *cinfo; @@ -742,24 +841,25 @@ again: dstate = &state->dts_vstate.dtvs_dynvars; for (CPU_INFO_FOREACH(cpuind, cinfo)) { - int ci = cpu_index(cinfo); - dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[ci]; + i = cpu_index(cinfo); + + dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i]; stat->dtst_dyndrops += dcpu->dtdsc_drops; stat->dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops; stat->dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops; - if (state->dts_buffer[ci].dtb_flags & DTRACEBUF_FULL) + if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL) stat->dtst_filled++; - nerrs += state->dts_buffer[ci].dtb_errors; + nerrs += state->dts_buffer[i].dtb_errors; for (j = 0; j < state->dts_nspeculations; j++) { dtrace_speculation_t *spec; dtrace_buffer_t *buf; spec = &state->dts_speculations[j]; - buf = &spec->dtsp_buffer[ci]; + buf = &spec->dtsp_buffer[i]; stat->dtst_specdrops += buf->dtb_xamot_drops; } } @@ -777,15 +877,16 @@ again: return (0); } case DTRACEIOC_STOP: { + int rval; processorid_t *cpuid = (processorid_t *) addr; DTRACE_IOCTL_PRINTF("%s(%d): DTRACEIOC_STOP\n",__func__,__LINE__); mutex_enter(&dtrace_lock); - error = dtrace_state_stop(state, cpuid); + rval = dtrace_state_stop(state, cpuid); mutex_exit(&dtrace_lock); - return (error); + return (rval); } default: error = ENOTTY; Index: src/external/cddl/osnet/dev/dtrace/dtrace_load.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/dtrace/dtrace_load.c,v retrieving revision 1.3 diff -u -p -r1.3 dtrace_load.c --- src/external/cddl/osnet/dev/dtrace/dtrace_load.c 31 Aug 2011 21:57:16 -0000 1.3 +++ src/external/cddl/osnet/dev/dtrace/dtrace_load.c 15 May 2017 23:58:54 -0000 @@ -20,7 +20,7 @@ * * CDDL HEADER END * - * $FreeBSD: src/sys/cddl/dev/dtrace/dtrace_load.c,v 1.2.2.1 2009/08/03 08:13:06 kensmith Exp $ + * $FreeBSD: head/sys/cddl/dev/dtrace/dtrace_load.c 309069 2016-11-23 22:50:20Z gnn $ * */ @@ -30,8 +30,32 @@ void dtrace_gethrtime_init(void *); int dtrace_helptrace_size=0; -#ifndef mutex_init -#define mutex_init(a, b, c, d) mutex_init(a, c, IPL_NONE) +#ifdef __FreeBSD__ +#ifndef EARLY_AP_STARTUP +static void +dtrace_ap_start(void *dummy) +{ + int i; + + mutex_enter(&cpu_lock); + + /* Setup the rest of the CPUs. */ + CPU_FOREACH(i) { + if (i == 0) + continue; + + (void) dtrace_cpu_setup(CPU_CONFIG, i); + } + + mutex_exit(&cpu_lock); +} + +SYSINIT(dtrace_ap_start, SI_SUB_SMP, SI_ORDER_ANY, dtrace_ap_start, NULL); +#endif +#endif + +#ifdef __NetBSD__ +void *dtrace_modcb; #endif static void @@ -44,6 +68,17 @@ dtrace_load(void *dummy) dtrace_debug_init(NULL); dtrace_gethrtime_init(NULL); +#ifdef __FreeBSD__ + /* + * DTrace uses negative logic for the destructive mode switch, so it + * is required to translate from the sysctl which uses positive logic. + */ + if (dtrace_allow_destructive) + dtrace_destructive_disallow = 0; + else + dtrace_destructive_disallow = 1; +#endif + /* Hook into the trap handler. */ dtrace_trap_func = dtrace_trap; @@ -53,11 +88,23 @@ dtrace_load(void *dummy) /* Hang our hook for exceptions. */ dtrace_invop_init(); - /* - * XXX This is a short term hack to avoid having to comment - * out lots and lots of lock/unlock calls. - */ - mutex_init(&mod_lock,"XXX mod_lock hack", MUTEX_DEFAULT, NULL); +#ifdef __FreeBSD__ + dtrace_taskq = taskq_create("dtrace_taskq", 1, maxclsyspri, 0, 0, 0); + + dtrace_arena = new_unrhdr(1, INT_MAX, &dtrace_unr_mtx); + + /* Register callbacks for linker file load and unload events. */ + dtrace_kld_load_tag = EVENTHANDLER_REGISTER(kld_load, + dtrace_kld_load, NULL, EVENTHANDLER_PRI_ANY); + dtrace_kld_unload_try_tag = EVENTHANDLER_REGISTER(kld_unload_try, + dtrace_kld_unload_try, NULL, EVENTHANDLER_PRI_ANY); +#endif + +#ifdef __NetBSD__ + dtrace_arena = vmem_create("dtrace", 1, INT_MAX, 1, + NULL, NULL, NULL, 0, VM_SLEEP, IPL_NONE); + +#endif /* * Initialise the mutexes without 'witness' because the dtrace @@ -70,7 +117,9 @@ dtrace_load(void *dummy) mutex_init(&dtrace_lock,"dtrace probe state", MUTEX_DEFAULT, NULL); mutex_init(&dtrace_provider_lock,"dtrace provider state", MUTEX_DEFAULT, NULL); mutex_init(&dtrace_meta_lock,"dtrace meta-provider state", MUTEX_DEFAULT, NULL); +#ifdef DEBUG mutex_init(&dtrace_errlock,"dtrace error lock", MUTEX_DEFAULT, NULL); +#endif mutex_enter(&dtrace_provider_lock); mutex_enter(&dtrace_lock); @@ -78,9 +127,6 @@ dtrace_load(void *dummy) ASSERT(MUTEX_HELD(&cpu_lock)); - dtrace_arena = vmem_create("dtrace", 1, INT_MAX, 1, - NULL, NULL, NULL, 0, VM_SLEEP, IPL_NONE); - dtrace_state_cache = kmem_cache_create(__UNCONST("dtrace_state_cache"), sizeof (dtrace_dstate_percpu_t) * NCPU, DTRACE_STATE_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); @@ -130,19 +176,6 @@ dtrace_load(void *dummy) dtrace_provider, NULL, NULL, "ERROR", 1, NULL); mutex_exit(&cpu_lock); - - /* - * If DTrace helper tracing is enabled, we need to allocate the - * trace buffer and initialize the values. - */ - if (dtrace_helptrace_enabled) { - ASSERT(dtrace_helptrace_buffer == NULL); - dtrace_helptrace_buffer = - kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP); - dtrace_helptrace_next = 0; - dtrace_helptrace_size = dtrace_helptrace_bufsize; - } - mutex_exit(&dtrace_lock); mutex_exit(&dtrace_provider_lock); @@ -155,9 +188,17 @@ dtrace_load(void *dummy) mutex_exit(&cpu_lock); +#ifdef __NetBSD__ dtrace_anon_init(NULL); -#if 0 - dtrace_dev = make_dev(&dtrace_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "dtrace/dtrace"); + + dtrace_modcb = module_register_callbacks(dtrace_module_loaded, + dtrace_module_unloaded); +#endif +#ifdef __FreeBSD__ + dtrace_dev = make_dev(&dtrace_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, + "dtrace/dtrace"); + helper_dev = make_dev(&helper_cdevsw, 0, UID_ROOT, GID_WHEEL, 0660, + "dtrace/helper"); #endif return; Index: src/external/cddl/osnet/dev/dtrace/dtrace_modevent.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/dtrace/dtrace_modevent.c,v retrieving revision 1.5 diff -u -p -r1.5 dtrace_modevent.c --- src/external/cddl/osnet/dev/dtrace/dtrace_modevent.c 28 Nov 2015 22:41:36 -0000 1.5 +++ src/external/cddl/osnet/dev/dtrace/dtrace_modevent.c 12 Apr 2017 15:44:25 -0000 @@ -20,7 +20,7 @@ * * CDDL HEADER END * - * $FreeBSD: src/sys/cddl/dev/dtrace/dtrace_modevent.c,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $ + * $FreeBSD: head/sys/cddl/dev/dtrace/dtrace_load.c 309069 2016-11-23 22:50:20Z gnn $ * */ Index: src/external/cddl/osnet/dev/dtrace/dtrace_sysctl.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/dtrace/dtrace_sysctl.c,v retrieving revision 1.3 diff -u -p -r1.3 dtrace_sysctl.c --- src/external/cddl/osnet/dev/dtrace/dtrace_sysctl.c 23 Apr 2010 11:39:52 -0000 1.3 +++ src/external/cddl/osnet/dev/dtrace/dtrace_sysctl.c 12 Apr 2017 15:49:42 -0000 @@ -20,16 +20,10 @@ * * CDDL HEADER END * - * $FreeBSD: src/sys/cddl/dev/dtrace/dtrace_sysctl.c,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $ + * $FreeBSD: head/sys/cddl/dev/dtrace/dtrace_sysctl.c 309069 2016-11-23 22:50:20Z gnn $ * */ -int dtrace_debug = 0; -#if 0 -TUNABLE_INT("debug.dtrace.debug", &dtrace_debug); -SYSCTL_INT(_debug_dtrace, OID_AUTO, debug, CTLFLAG_RW, &dtrace_debug, 0, ""); -#endif - #if 0 /* XXX TBD sysctl */ /* Report registered DTrace providers. */ static int @@ -82,6 +76,27 @@ sysctl_dtrace_providers(SYSCTL_HANDLER_A return (error); } +SYSCTL_NODE(_debug, OID_AUTO, dtrace, CTLFLAG_RD, 0, "DTrace debug parameters"); + SYSCTL_PROC(_debug_dtrace, OID_AUTO, providers, CTLTYPE_STRING | CTLFLAG_RD, - 0, 0, sysctl_dtrace_providers, "A", ""); + 0, 0, sysctl_dtrace_providers, "A", "available DTrace providers"); + +SYSCTL_NODE(_kern, OID_AUTO, dtrace, CTLFLAG_RD, 0, "DTrace parameters"); + +SYSCTL_INT(_kern_dtrace, OID_AUTO, err_verbose, CTLFLAG_RW, + &dtrace_err_verbose, 0, + "print DIF and DOF validation errors to the message buffer"); + +SYSCTL_INT(_kern_dtrace, OID_AUTO, memstr_max, CTLFLAG_RW, &dtrace_memstr_max, + 0, "largest allowed argument to memstr(), 0 indicates no limit"); + +SYSCTL_QUAD(_kern_dtrace, OID_AUTO, dof_maxsize, CTLFLAG_RW, + &dtrace_dof_maxsize, 0, "largest allowed DOF table"); + +SYSCTL_QUAD(_kern_dtrace, OID_AUTO, helper_actions_max, CTLFLAG_RW, + &dtrace_helper_actions_max, 0, "maximum number of allowed helper actions"); + +SYSCTL_INT(_security_bsd, OID_AUTO, allow_destructive_dtrace, CTLFLAG_RDTUN, + &dtrace_allow_destructive, 1, "Allow destructive mode DTrace scripts"); + #endif Index: src/external/cddl/osnet/dev/dtrace/dtrace_test.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/dtrace/dtrace_test.c,v retrieving revision 1.2 diff -u -p -r1.2 dtrace_test.c --- src/external/cddl/osnet/dev/dtrace/dtrace_test.c 21 Feb 2010 01:46:33 -0000 1.2 +++ src/external/cddl/osnet/dev/dtrace/dtrace_test.c 12 Apr 2017 15:53:25 -0000 @@ -24,18 +24,26 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: src/sys/cddl/dev/dtrace/dtrace_test.c,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $ + * $FreeBSD: head/sys/cddl/dev/dtrace/dtrace_test.c 258622 2013-11-26 08:46:27Z avg $ * */ - #include #include #include +#include + #include #include #include +#include +#include #include +SDT_PROVIDER_DEFINE(test); + +SDT_PROBE_DEFINE7(test, , , sdttest, "int", "int", "int", "int", "int", + "int", "int"); + /* * These are variables that the DTrace test suite references in the * Solaris kernel. We define them here so that the tests function @@ -47,6 +55,33 @@ typedef struct vnode vnode_t; vnode_t dummy; vnode_t *rootvp = &dummy; +/* + * Test SDT probes with more than 5 arguments. On amd64, such probes require + * special handling since only the first 5 arguments will be passed to + * dtrace_probe() in registers; the rest must be fetched off the stack. + */ +static int +dtrace_test_sdttest(SYSCTL_HANDLER_ARGS) +{ + int val, error; + + val = 0; + error = sysctl_handle_int(oidp, &val, 0, req); + if (error || req->newptr == NULL) + return (error); + else if (val == 0) + return (0); + + SDT_PROBE7(test, , , sdttest, 1, 2, 3, 4, 5, 6, 7); + + return (error); +} + +static SYSCTL_NODE(_debug, OID_AUTO, dtracetest, CTLFLAG_RD, 0, ""); + +SYSCTL_PROC(_debug_dtracetest, OID_AUTO, sdttest, CTLTYPE_INT | CTLFLAG_RW, + NULL, 0, dtrace_test_sdttest, "I", "Trigger the SDT test probe"); + static int dtrace_test_modevent(module_t mod, int type, void *data) { Index: src/external/cddl/osnet/dev/dtrace/dtrace_unload.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/dtrace/dtrace_unload.c,v retrieving revision 1.6 diff -u -p -r1.6 dtrace_unload.c --- src/external/cddl/osnet/dev/dtrace/dtrace_unload.c 26 Feb 2015 09:10:52 -0000 1.6 +++ src/external/cddl/osnet/dev/dtrace/dtrace_unload.c 15 May 2017 23:59:03 -0000 @@ -20,19 +20,25 @@ * * CDDL HEADER END * - * $FreeBSD: src/sys/cddl/dev/dtrace/dtrace_unload.c,v 1.2.2.1 2009/08/03 08:13:06 kensmith Exp $ + * $FreeBSD: head/sys/cddl/dev/dtrace/dtrace_unload.c 278166 2015-02-03 19:39:53Z pfg $ * */ -extern int dtrace_probes_size; -extern int dtrace_helptrace_size; - static int dtrace_unload() { dtrace_state_t *state; int error = 0; +#ifdef __FreeBSD__ + destroy_dev(dtrace_dev); + destroy_dev(helper_dev); +#endif + +#ifdef __NetBSD__ + module_unregister_callbacks(dtrace_modcb); +#endif + mutex_enter(&dtrace_provider_lock); mutex_enter(&dtrace_lock); mutex_enter(&cpu_lock); @@ -52,6 +58,10 @@ dtrace_unload() } dtrace_provider = NULL; +#ifdef __FreeBSD__ + EVENTHANDLER_DEREGISTER(kld_load, dtrace_kld_load_tag); + EVENTHANDLER_DEREGISTER(kld_unload_try, dtrace_kld_unload_try_tag); +#endif if ((state = dtrace_anon_grab()) != NULL) { /* @@ -67,13 +77,8 @@ dtrace_unload() mutex_exit(&cpu_lock); - if (dtrace_helptrace_enabled) { - kmem_free(dtrace_helptrace_buffer, dtrace_helptrace_size); - dtrace_helptrace_buffer = NULL; - } - if (dtrace_probes != NULL) { - kmem_free(dtrace_probes, dtrace_probes_size); + kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *)); dtrace_probes = NULL; dtrace_nprobes = 0; } @@ -87,7 +92,12 @@ dtrace_unload() kmem_cache_destroy(dtrace_state_cache); +#ifdef __FreeBSD__ + delete_unrhdr(dtrace_arena); +#endif +#ifdef __NetBSD__ vmem_destroy(dtrace_arena); +#endif if (dtrace_toxrange != NULL) { kmem_free(dtrace_toxrange, @@ -107,10 +117,13 @@ dtrace_unload() mutex_destroy(&dtrace_meta_lock); mutex_destroy(&dtrace_provider_lock); mutex_destroy(&dtrace_lock); +#ifdef DEBUG mutex_destroy(&dtrace_errlock); +#endif - /* XXX Hack */ - mutex_destroy(&mod_lock); +#ifdef __FreeBSD__ + taskq_destroy(dtrace_taskq); +#endif /* Reset our hook for exceptions. */ dtrace_invop_uninit(); Index: src/external/cddl/osnet/dev/dtrace/dtrace_vtime.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/dtrace/dtrace_vtime.c,v retrieving revision 1.2 diff -u -p -r1.2 dtrace_vtime.c --- src/external/cddl/osnet/dev/dtrace/dtrace_vtime.c 21 Feb 2010 01:46:33 -0000 1.2 +++ src/external/cddl/osnet/dev/dtrace/dtrace_vtime.c 12 Apr 2017 16:01:35 -0000 @@ -20,7 +20,7 @@ * * CDDL HEADER END * - * $FreeBSD: src/sys/cddl/dev/dtrace/dtrace_vtime.c,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $ + * $FreeBSD: head/sys/cddl/dev/dtrace/dtrace_vtime.c 179237 2008-05-23 05:59:42Z jb $ */ /* Index: src/external/cddl/osnet/dev/dtrace/amd64/dis_tables.c =================================================================== RCS file: src/external/cddl/osnet/dev/dtrace/amd64/dis_tables.c diff -N src/external/cddl/osnet/dev/dtrace/amd64/dis_tables.c --- src/external/cddl/osnet/dev/dtrace/amd64/dis_tables.c 18 Jul 2011 00:42:40 -0000 1.3 +++ /dev/null 1 Jan 1970 00:00:00 -0000 @@ -1,3195 +0,0 @@ -/* $NetBSD: dis_tables.c,v 1.3 2011/07/18 00:42:40 christos Exp $ */ - -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - * - * $FreeBSD: src/sys/cddl/dev/dtrace/amd64/dis_tables.c,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $ - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* Copyright (c) 1988 AT&T */ -/* All Rights Reserved */ - - -#if defined(sun) -#pragma ident "@(#)dis_tables.c 1.11 06/03/02 SMI" -#endif - -#include "dis_tables.h" - -/* BEGIN CSTYLED */ - -/* - * Disassembly begins in dis_distable, which is equivalent to the One-byte - * Opcode Map in the Intel IA32 ISA Reference (page A-6 in my copy). The - * decoding loops then traverse out through the other tables as necessary to - * decode a given instruction. - * - * The behavior of this file can be controlled by one of the following flags: - * - * DIS_TEXT Include text for disassembly - * DIS_MEM Include memory-size calculations - * - * Either or both of these can be defined. - * - * This file is not, and will never be, cstyled. If anything, the tables should - * be taken out another tab stop or two so nothing overlaps. - */ - -/* - * These functions must be provided for the consumer to do disassembly. - */ -#ifdef DIS_TEXT -extern char *strncpy(char *, const char *, size_t); -extern size_t strlen(const char *); -extern int strcmp(const char *, const char *); -extern int strncmp(const char *, const char *, size_t); -extern size_t strlcat(char *, const char *, size_t); -#endif - - -#define TERM NULL /* used to indicate that the 'indirect' */ - /* field terminates - no pointer. */ - -/* Used to decode instructions. */ -typedef struct instable { - const struct instable *it_indirect; /* for decode op codes */ - uchar_t it_adrmode; -#ifdef DIS_TEXT - char it_name[NCPS]; - uint_t it_suffix:1; /* mneu + "w", "l", or "d" */ -#endif -#ifdef DIS_MEM - uint_t it_size:16; -#endif - uint_t it_invalid64:1; /* opcode invalid in amd64 */ - uint_t it_always64:1; /* 64 bit when in 64 bit mode */ - uint_t it_invalid32:1; /* invalid in IA32 */ - uint_t it_stackop:1; /* push/pop stack operation */ -} instable_t; - -/* - * Instruction formats. - */ -enum { - UNKNOWN, - MRw, - IMlw, - IMw, - IR, - OA, - AO, - MS, - SM, - Mv, - Mw, - M, /* register or memory */ - Mb, /* register or memory, always byte sized */ - MO, /* memory only (no registers) */ - PREF, - SWAPGS, - R, - RA, - SEG, - MR, - RM, - IA, - MA, - SD, - AD, - SA, - D, - INM, - SO, - BD, - I, - P, - V, - DSHIFT, /* for double shift that has an 8-bit immediate */ - U, - OVERRIDE, - NORM, /* instructions w/o ModR/M byte, no memory access */ - IMPLMEM, /* instructions w/o ModR/M byte, implicit mem access */ - O, /* for call */ - JTAB, /* jump table */ - IMUL, /* for 186 iimul instr */ - CBW, /* so data16 can be evaluated for cbw and variants */ - MvI, /* for 186 logicals */ - ENTER, /* for 186 enter instr */ - RMw, /* for 286 arpl instr */ - Ib, /* for push immediate byte */ - F, /* for 287 instructions */ - FF, /* for 287 instructions */ - FFC, /* for 287 instructions */ - DM, /* 16-bit data */ - AM, /* 16-bit addr */ - LSEG, /* for 3-bit seg reg encoding */ - MIb, /* for 386 logicals */ - SREG, /* for 386 special registers */ - PREFIX, /* a REP instruction prefix */ - LOCK, /* a LOCK instruction prefix */ - INT3, /* The int 3 instruction, which has a fake operand */ - INTx, /* The normal int instruction, with explicit int num */ - DSHIFTcl, /* for double shift that implicitly uses %cl */ - CWD, /* so data16 can be evaluated for cwd and variants */ - RET, /* single immediate 16-bit operand */ - MOVZ, /* for movs and movz, with different size operands */ - XADDB, /* for xaddb */ - MOVSXZ, /* AMD64 mov sign extend 32 to 64 bit instruction */ - -/* - * MMX/SIMD addressing modes. - */ - - MMO, /* Prefixable MMX/SIMD-Int mm/mem -> mm */ - MMOIMPL, /* Prefixable MMX/SIMD-Int mm -> mm (mem) */ - MMO3P, /* Prefixable MMX/SIMD-Int mm -> r32,imm8 */ - MMOM3, /* Prefixable MMX/SIMD-Int mm -> r32 */ - MMOS, /* Prefixable MMX/SIMD-Int mm -> mm/mem */ - MMOMS, /* Prefixable MMX/SIMD-Int mm -> mem */ - MMOPM, /* MMX/SIMD-Int mm/mem -> mm,imm8 */ - MMOPRM, /* Prefixable MMX/SIMD-Int r32/mem -> mm,imm8 */ - MMOSH, /* Prefixable MMX mm,imm8 */ - MM, /* MMX/SIMD-Int mm/mem -> mm */ - MMS, /* MMX/SIMD-Int mm -> mm/mem */ - MMSH, /* MMX mm,imm8 */ - XMMO, /* Prefixable SIMD xmm/mem -> xmm */ - XMMOS, /* Prefixable SIMD xmm -> xmm/mem */ - XMMOPM, /* Prefixable SIMD xmm/mem w/to xmm,imm8 */ - XMMOMX, /* Prefixable SIMD mm/mem -> xmm */ - XMMOX3, /* Prefixable SIMD xmm -> r32 */ - XMMOXMM, /* Prefixable SIMD xmm/mem -> mm */ - XMMOM, /* Prefixable SIMD xmm -> mem */ - XMMOMS, /* Prefixable SIMD mem -> xmm */ - XMM, /* SIMD xmm/mem -> xmm */ - XMMXIMPL, /* SIMD xmm -> xmm (mem) */ - XMM3P, /* SIMD xmm -> r32,imm8 */ - XMMP, /* SIMD xmm/mem w/to xmm,imm8 */ - XMMPRM, /* SIMD r32/mem -> xmm,imm8 */ - XMMS, /* SIMD xmm -> xmm/mem */ - XMMM, /* SIMD mem -> xmm */ - XMMMS, /* SIMD xmm -> mem */ - XMM3MX, /* SIMD r32/mem -> xmm */ - XMM3MXS, /* SIMD xmm -> r32/mem */ - XMMSH, /* SIMD xmm,imm8 */ - XMMXM3, /* SIMD xmm/mem -> r32 */ - XMMX3, /* SIMD xmm -> r32 */ - XMMXMM, /* SIMD xmm/mem -> mm */ - XMMMX, /* SIMD mm -> xmm */ - XMMXM, /* SIMD xmm -> mm */ - XMMFENCE, /* SIMD lfence or mfence */ - XMMSFNC /* SIMD sfence (none or mem) */ -}; - -#define FILL 0x90 /* Fill byte used for alignment (nop) */ - -/* -** Register numbers for the i386 -*/ -#define EAX_REGNO 0 -#define ECX_REGNO 1 -#define EDX_REGNO 2 -#define EBX_REGNO 3 -#define ESP_REGNO 4 -#define EBP_REGNO 5 -#define ESI_REGNO 6 -#define EDI_REGNO 7 - -/* - * modes for immediate values - */ -#define MODE_NONE 0 -#define MODE_IPREL 1 /* signed IP relative value */ -#define MODE_SIGNED 2 /* sign extended immediate */ -#define MODE_IMPLIED 3 /* constant value implied from opcode */ -#define MODE_OFFSET 4 /* offset part of an address */ - -/* - * The letters used in these macros are: - * IND - indirect to another to another table - * "T" - means to Terminate indirections (this is the final opcode) - * "S" - means "operand length suffix required" - * "NS" - means "no suffix" which is the operand length suffix of the opcode - * "Z" - means instruction size arg required - * "u" - means the opcode is invalid in IA32 but valid in amd64 - * "x" - means the opcode is invalid in amd64, but not IA32 - * "y" - means the operand size is always 64 bits in 64 bit mode - * "p" - means push/pop stack operation - */ - -#if defined(DIS_TEXT) && defined(DIS_MEM) -#define IND(table) {table, 0, "", 0, 0, 0, 0, 0, 0} -#define INDx(table) {table, 0, "", 0, 0, 1, 0, 0, 0} -#define TNS(name, amode) {TERM, amode, name, 0, 0, 0, 0, 0, 0} -#define TNSu(name, amode) {TERM, amode, name, 0, 0, 0, 0, 1, 0} -#define TNSx(name, amode) {TERM, amode, name, 0, 0, 1, 0, 0, 0} -#define TNSy(name, amode) {TERM, amode, name, 0, 0, 0, 1, 0, 0} -#define TNSyp(name, amode) {TERM, amode, name, 0, 0, 0, 1, 0, 1} -#define TNSZ(name, amode, sz) {TERM, amode, name, 0, sz, 0, 0, 0, 0} -#define TNSZy(name, amode, sz) {TERM, amode, name, 0, sz, 0, 1, 0, 0} -#define TS(name, amode) {TERM, amode, name, 1, 0, 0, 0, 0, 0} -#define TSx(name, amode) {TERM, amode, name, 1, 0, 1, 0, 0, 0} -#define TSy(name, amode) {TERM, amode, name, 1, 0, 0, 1, 0, 0} -#define TSp(name, amode) {TERM, amode, name, 1, 0, 0, 0, 0, 1} -#define TSZ(name, amode, sz) {TERM, amode, name, 1, sz, 0, 0, 0, 0} -#define TSZx(name, amode, sz) {TERM, amode, name, 1, sz, 1, 0, 0, 0} -#define TSZy(name, amode, sz) {TERM, amode, name, 1, sz, 0, 1, 0, 0} -#define INVALID {TERM, UNKNOWN, "", 0, 0, 0, 0, 0} -#elif defined(DIS_TEXT) -#define IND(table) {table, 0, "", 0, 0, 0, 0, 0} -#define INDx(table) {table, 0, "", 0, 1, 0, 0, 0} -#define TNS(name, amode) {TERM, amode, name, 0, 0, 0, 0, 0} -#define TNSu(name, amode) {TERM, amode, name, 0, 0, 0, 1, 0} -#define TNSx(name, amode) {TERM, amode, name, 0, 1, 0, 0, 0} -#define TNSy(name, amode) {TERM, amode, name, 0, 0, 1, 0, 0} -#define TNSyp(name, amode) {TERM, amode, name, 0, 0, 1, 0, 1} -#define TNSZ(name, amode, sz) {TERM, amode, name, 0, 0, 0, 0, 0} -#define TNSZy(name, amode, sz) {TERM, amode, name, 0, 0, 1, 0, 0} -#define TS(name, amode) {TERM, amode, name, 1, 0, 0, 0, 0} -#define TSx(name, amode) {TERM, amode, name, 1, 1, 0, 0, 0} -#define TSy(name, amode) {TERM, amode, name, 1, 0, 1, 0, 0} -#define TSp(name, amode) {TERM, amode, name, 1, 0, 0, 0, 1} -#define TSZ(name, amode, sz) {TERM, amode, name, 1, 0, 0, 0, 0} -#define TSZx(name, amode, sz) {TERM, amode, name, 1, 1, 0, 0, 0} -#define TSZy(name, amode, sz) {TERM, amode, name, 1, 0, 1, 0, 0} -#define INVALID {TERM, UNKNOWN, "", 0, 0, 0, 0, 0} -#elif defined(DIS_MEM) -#define IND(table) {table, 0, 0, 0, 0, 0, 0} -#define INDx(table) {table, 0, 0, 1, 0, 0, 0} -#define TNS(name, amode) {TERM, amode, 0, 0, 0, 0, 0} -#define TNSu(name, amode) {TERM, amode, 0, 0, 0, 1, 0} -#define TNSy(name, amode) {TERM, amode, 0, 0, 1, 0, 0} -#define TNSyp(name, amode) {TERM, amode, 0, 0, 1, 0, 1} -#define TNSx(name, amode) {TERM, amode, 0, 1, 0, 0, 0} -#define TNSZ(name, amode, sz) {TERM, amode, sz, 0, 0, 0, 0} -#define TNSZy(name, amode, sz) {TERM, amode, sz, 0, 1, 0, 0} -#define TS(name, amode) {TERM, amode, 0, 0, 0, 0, 0} -#define TSx(name, amode) {TERM, amode, 0, 1, 0, 0, 0} -#define TSy(name, amode) {TERM, amode, 0, 0, 1, 0, 0} -#define TSp(name, amode) {TERM, amode, 0, 0, 0, 0, 1} -#define TSZ(name, amode, sz) {TERM, amode, sz, 0, 0, 0, 0} -#define TSZx(name, amode, sz) {TERM, amode, sz, 1, 0, 0, 0} -#define TSZy(name, amode, sz) {TERM, amode, sz, 0, 1, 0, 0} -#define INVALID {TERM, UNKNOWN, 0, 0, 0, 0, 0} -#else -#define IND(table) {table[0], 0, 0, 0, 0, 0} -#define INDx(table) {table[0], 0, 1, 0, 0, 0} -#define TNS(name, amode) {TERM, amode, 0, 0, 0, 0} -#define TNSu(name, amode) {TERM, amode, 0, 0, 1, 0} -#define TNSy(name, amode) {TERM, amode, 0, 1, 0, 0} -#define TNSyp(name, amode) {TERM, amode, 0, 1, 0, 1} -#define TNSx(name, amode) {TERM, amode, 1, 0, 0, 0} -#define TNSZ(name, amode, sz) {TERM, amode, 0, 0, 0, 0} -#define TNSZy(name, amode, sz) {TERM, amode, 0, 1, 0, 0} -#define TS(name, amode) {TERM, amode, 0, 0, 0, 0} -#define TSx(name, amode) {TERM, amode, 1, 0, 0, 0} -#define TSy(name, amode) {TERM, amode, 0, 1, 0, 0} -#define TSp(name, amode) {TERM, amode, 0, 0, 0, 1} -#define TSZ(name, amode, sz) {TERM, amode, 0, 0, 0, 0} -#define TSZx(name, amode, sz) {TERM, amode, 1, 0, 0, 0} -#define TSZy(name, amode, sz) {TERM, amode, 0, 1, 0, 0} -#define INVALID {TERM, UNKNOWN, 0, 0, 0, 0} -#endif - -#ifdef DIS_TEXT -/* - * this decodes the r_m field for mode's 0, 1, 2 in 16 bit mode - */ -const char *const dis_addr16[3][8] = { -"(%bx,%si)", "(%bx,%di)", "(%bp,%si)", "(%bp,%di)", "(%si)", "(%di)", "", - "(%bx)", -"(%bx,%si)", "(%bx,%di)", "(%bp,%si)", "(%bp,%di)", "(%si)", "(%di", "(%bp)", - "(%bx)", -"(%bx,%si)", "(%bx,%di)", "(%bp,%si)", "(%bp,%di)", "(%si)", "(%di)", "(%bp)", - "(%bx)", -}; - - -/* - * This decodes 32 bit addressing mode r_m field for modes 0, 1, 2 - */ -const char *const dis_addr32_mode0[16] = { - "(%eax)", "(%ecx)", "(%edx)", "(%ebx)", "", "", "(%esi)", "(%edi)", - "(%r8d)", "(%r9d)", "(%r10d)", "(%r11d)", "", "", "(%r14d)", "(%r15d)" -}; - -const char *const dis_addr32_mode12[16] = { - "(%eax)", "(%ecx)", "(%edx)", "(%ebx)", "", "(%ebp)", "(%esi)", "(%edi)", - "(%r8d)", "(%r9d)", "(%r10d)", "(%r11d)", "", "(%r13d)", "(%r14d)", "(%r15d)" -}; - -/* - * This decodes 64 bit addressing mode r_m field for modes 0, 1, 2 - */ -const char *const dis_addr64_mode0[16] = { - "(%rax)", "(%rcx)", "(%rdx)", "(%rbx)", "", "(%rip)", "(%rsi)", "(%rdi)", - "(%r8)", "(%r9)", "(%r10)", "(%r11)", "(%r12)", "(%rip)", "(%r14)", "(%r15)" -}; -const char *const dis_addr64_mode12[16] = { - "(%rax)", "(%rcx)", "(%rdx)", "(%rbx)", "", "(%rbp)", "(%rsi)", "(%rdi)", - "(%r8)", "(%r9)", "(%r10)", "(%r11)", "(%r12)", "(%r13)", "(%r14)", "(%r15)" -}; - -/* - * decode for scale from SIB byte - */ -const char *const dis_scale_factor[4] = { ")", ",2)", ",4)", ",8)" }; - -/* - * register decoding for normal references to registers (ie. not addressing) - */ -const char *const dis_REG8[16] = { - "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh", - "%r8b", "%r9b", "%r10b", "%r11b", "%r12b", "%r13b", "%r14b", "%r15b" -}; - -const char *const dis_REG8_REX[16] = { - "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil", - "%r8b", "%r9b", "%r10b", "%r11b", "%r12b", "%r13b", "%r14b", "%r15b" -}; - -const char *const dis_REG16[16] = { - "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di", - "%r8w", "%r9w", "%r10w", "%r11w", "%r12w", "%r13w", "%r14w", "%r15w" -}; - -const char *const dis_REG32[16] = { - "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", - "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" -}; - -const char *const dis_REG64[16] = { - "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", - "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" -}; - -const char *const dis_DEBUGREG[16] = { - "%db0", "%db1", "%db2", "%db3", "%db4", "%db5", "%db6", "%db7", - "%db8", "%db9", "%db10", "%db11", "%db12", "%db13", "%db14", "%db15" -}; - -const char *const dis_CONTROLREG[16] = { - "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5?", "%cr6?", "%cr7?", - "%cr8", "%cr9?", "%cr10?", "%cr11?", "%cr12?", "%cr13?", "%cr14?", "%cr15?" -}; - -const char *const dis_TESTREG[16] = { - "%tr0?", "%tr1?", "%tr2?", "%tr3", "%tr4", "%tr5", "%tr6", "%tr7", - "%tr0?", "%tr1?", "%tr2?", "%tr3", "%tr4", "%tr5", "%tr6", "%tr7" -}; - -const char *const dis_MMREG[16] = { - "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7", - "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" -}; - -const char *const dis_XMMREG[16] = { - "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", - "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15" -}; - -const char *const dis_SEGREG[16] = { - "%es", "%cs", "%ss", "%ds", "%fs", "%gs", "", "", - "%es", "%cs", "%ss", "%ds", "%fs", "%gs", "", "" -}; - -/* - * SIMD predicate suffixes - */ -const char *const dis_PREDSUFFIX[8] = { - "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord" -}; - - - -#endif /* DIS_TEXT */ - - - - -/* - * "decode table" for 64 bit mode MOVSXD instruction (opcode 0x63) - */ -const instable_t dis_opMOVSLD = TNS("movslq",MOVSXZ); - -/* - * "decode table" for pause and clflush instructions - */ -const instable_t dis_opPause = TNS("pause", NORM); - -/* - * Decode table for 0x0F00 opcodes - */ -const instable_t dis_op0F00[8] = { - -/* [0] */ TNS("sldt",M), TNS("str",M), TNSy("lldt",M), TNSy("ltr",M), -/* [4] */ TNSZ("verr",M,2), TNSZ("verw",M,2), INVALID, INVALID, -}; - - -/* - * Decode table for 0x0F01 opcodes - */ -const instable_t dis_op0F01[8] = { - -/* [0] */ TNSZ("sgdt",MO,6), TNSZ("sidt",MO,6), TNSZ("lgdt",MO,6), TNSZ("lidt",MO,6), -/* [4] */ TNSZ("smsw",M,2), INVALID, TNSZ("lmsw",M,2), TNS("invlpg",SWAPGS), -}; - -/* - * Decode table for 0x0F18 opcodes -- SIMD prefetch - */ -const instable_t dis_op0F18[8] = { - -/* [0] */ TNS("prefetchnta",PREF),TNS("prefetcht0",PREF), TNS("prefetcht1",PREF), TNS("prefetcht2",PREF), -/* [4] */ INVALID, INVALID, INVALID, INVALID, -}; - -/* - * Decode table for 0x0FAE opcodes -- SIMD state save/restore - */ -const instable_t dis_op0FAE[8] = { -/* [0] */ TNSZ("fxsave",M,512), TNSZ("fxrstor",M,512), TNS("ldmxcsr",M), TNS("stmxcsr",M), -/* [4] */ INVALID, TNS("lfence",XMMFENCE), TNS("mfence",XMMFENCE), TNS("sfence",XMMSFNC), -}; - -/* - * Decode table for 0x0FBA opcodes - */ - -const instable_t dis_op0FBA[8] = { - -/* [0] */ INVALID, INVALID, INVALID, INVALID, -/* [4] */ TS("bt",MIb), TS("bts",MIb), TS("btr",MIb), TS("btc",MIb), -}; - -/* - * Decode table for 0x0FC7 opcode - */ - -const instable_t dis_op0FC7[8] = { - -/* [0] */ INVALID, TNS("cmpxchg8b",M), INVALID, INVALID, -/* [4] */ INVALID, INVALID, INVALID, INVALID, -}; - - -/* - * Decode table for 0x0FC8 opcode -- 486 bswap instruction - * - *bit pattern: 0000 1111 1100 1reg - */ -const instable_t dis_op0FC8[4] = { -/* [0] */ TNS("bswap",R), INVALID, INVALID, INVALID, -}; - -/* - * Decode table for 0x0F71, 0x0F72, and 0x0F73 opcodes -- MMX instructions - */ -const instable_t dis_op0F7123[4][8] = { -{ -/* [70].0 */ INVALID, INVALID, INVALID, INVALID, -/* .4 */ INVALID, INVALID, INVALID, INVALID, -}, { -/* [71].0 */ INVALID, INVALID, TNS("psrlw",MMOSH), INVALID, -/* .4 */ TNS("psraw",MMOSH), INVALID, TNS("psllw",MMOSH), INVALID, -}, { -/* [72].0 */ INVALID, INVALID, TNS("psrld",MMOSH), INVALID, -/* .4 */ TNS("psrad",MMOSH), INVALID, TNS("pslld",MMOSH), INVALID, -}, { -/* [73].0 */ INVALID, INVALID, TNS("psrlq",MMOSH), TNS("INVALID",MMOSH), -/* .4 */ INVALID, INVALID, TNS("psllq",MMOSH), TNS("INVALID",MMOSH), -} }; - -/* - * Decode table for SIMD extensions to above 0x0F71-0x0F73 opcodes. - */ -const instable_t dis_opSIMD7123[32] = { -/* [70].0 */ INVALID, INVALID, INVALID, INVALID, -/* .4 */ INVALID, INVALID, INVALID, INVALID, - -/* [71].0 */ INVALID, INVALID, TNS("psrlw",XMMSH), INVALID, -/* .4 */ TNS("psraw",XMMSH), INVALID, TNS("psllw",XMMSH), INVALID, - -/* [72].0 */ INVALID, INVALID, TNS("psrld",XMMSH), INVALID, -/* .4 */ TNS("psrad",XMMSH), INVALID, TNS("pslld",XMMSH), INVALID, - -/* [73].0 */ INVALID, INVALID, TNS("psrlq",XMMSH), TNS("psrldq",XMMSH), -/* .4 */ INVALID, INVALID, TNS("psllq",XMMSH), TNS("pslldq",XMMSH), -}; - -/* - * SIMD instructions have been wedged into the existing IA32 instruction - * set through the use of prefixes. That is, while 0xf0 0x58 may be - * addps, 0xf3 0xf0 0x58 (literally, repz addps) is a completely different - * instruction - addss. At present, three prefixes have been coopted in - * this manner - address size (0x66), repnz (0xf2) and repz (0xf3). The - * following tables are used to provide the prefixed instruction names. - * The arrays are sparse, but they're fast. - */ - -/* - * Decode table for SIMD instructions with the address size (0x66) prefix. - */ -const instable_t dis_opSIMDdata16[256] = { -/* [00] */ INVALID, INVALID, INVALID, INVALID, -/* [04] */ INVALID, INVALID, INVALID, INVALID, -/* [08] */ INVALID, INVALID, INVALID, INVALID, -/* [0C] */ INVALID, INVALID, INVALID, INVALID, - -/* [10] */ TNSZ("movupd",XMM,16), TNSZ("movupd",XMMS,16), TNSZ("movlpd",XMMM,8), TNSZ("movlpd",XMMMS,8), -/* [14] */ TNSZ("unpcklpd",XMM,16),TNSZ("unpckhpd",XMM,16),TNSZ("movhpd",XMMM,8), TNSZ("movhpd",XMMMS,8), -/* [18] */ INVALID, INVALID, INVALID, INVALID, -/* [1C] */ INVALID, INVALID, INVALID, INVALID, - -/* [20] */ INVALID, INVALID, INVALID, INVALID, -/* [24] */ INVALID, INVALID, INVALID, INVALID, -/* [28] */ TNSZ("movapd",XMM,16), TNSZ("movapd",XMMS,16), TNSZ("cvtpi2pd",XMMOMX,8),TNSZ("movntpd",XMMOMS,16), -/* [2C] */ TNSZ("cvttpd2pi",XMMXMM,16),TNSZ("cvtpd2pi",XMMXMM,16),TNSZ("ucomisd",XMM,8),TNSZ("comisd",XMM,8), - -/* [30] */ INVALID, INVALID, INVALID, INVALID, -/* [34] */ INVALID, INVALID, INVALID, INVALID, -/* [38] */ INVALID, INVALID, INVALID, INVALID, -/* [3C] */ INVALID, INVALID, INVALID, INVALID, - -/* [40] */ INVALID, INVALID, INVALID, INVALID, -/* [44] */ INVALID, INVALID, INVALID, INVALID, -/* [48] */ INVALID, INVALID, INVALID, INVALID, -/* [4C] */ INVALID, INVALID, INVALID, INVALID, - -/* [50] */ TNS("movmskpd",XMMOX3), TNSZ("sqrtpd",XMM,16), INVALID, INVALID, -/* [54] */ TNSZ("andpd",XMM,16), TNSZ("andnpd",XMM,16), TNSZ("orpd",XMM,16), TNSZ("xorpd",XMM,16), -/* [58] */ TNSZ("addpd",XMM,16), TNSZ("mulpd",XMM,16), TNSZ("cvtpd2ps",XMM,16),TNSZ("cvtps2dq",XMM,16), -/* [5C] */ TNSZ("subpd",XMM,16), TNSZ("minpd",XMM,16), TNSZ("divpd",XMM,16), TNSZ("maxpd",XMM,16), - -/* [60] */ TNSZ("punpcklbw",XMM,16),TNSZ("punpcklwd",XMM,16),TNSZ("punpckldq",XMM,16),TNSZ("packsswb",XMM,16), -/* [64] */ TNSZ("pcmpgtb",XMM,16), TNSZ("pcmpgtw",XMM,16), TNSZ("pcmpgtd",XMM,16), TNSZ("packuswb",XMM,16), -/* [68] */ TNSZ("punpckhbw",XMM,16),TNSZ("punpckhwd",XMM,16),TNSZ("punpckhdq",XMM,16),TNSZ("packssdw",XMM,16), -/* [6C] */ TNSZ("punpcklqdq",XMM,16),TNSZ("punpckhqdq",XMM,16),TNSZ("movd",XMM3MX,4),TNSZ("movdqa",XMM,16), - -/* [70] */ TNSZ("pshufd",XMMP,16), INVALID, INVALID, INVALID, -/* [74] */ TNSZ("pcmpeqb",XMM,16), TNSZ("pcmpeqw",XMM,16), TNSZ("pcmpeqd",XMM,16), INVALID, -/* [78] */ INVALID, INVALID, INVALID, INVALID, -/* [7C] */ INVALID, INVALID, TNSZ("movd",XMM3MXS,4), TNSZ("movdqa",XMMS,16), - -/* [80] */ INVALID, INVALID, INVALID, INVALID, -/* [84] */ INVALID, INVALID, INVALID, INVALID, -/* [88] */ INVALID, INVALID, INVALID, INVALID, -/* [8C] */ INVALID, INVALID, INVALID, INVALID, - -/* [90] */ INVALID, INVALID, INVALID, INVALID, -/* [94] */ INVALID, INVALID, INVALID, INVALID, -/* [98] */ INVALID, INVALID, INVALID, INVALID, -/* [9C] */ INVALID, INVALID, INVALID, INVALID, - -/* [A0] */ INVALID, INVALID, INVALID, INVALID, -/* [A4] */ INVALID, INVALID, INVALID, INVALID, -/* [A8] */ INVALID, INVALID, INVALID, INVALID, -/* [AC] */ INVALID, INVALID, INVALID, INVALID, - -/* [B0] */ INVALID, INVALID, INVALID, INVALID, -/* [B4] */ INVALID, INVALID, INVALID, INVALID, -/* [B8] */ INVALID, INVALID, INVALID, INVALID, -/* [BC] */ INVALID, INVALID, INVALID, INVALID, - -/* [C0] */ INVALID, INVALID, TNSZ("cmppd",XMMP,16), INVALID, -/* [C4] */ TNSZ("pinsrw",XMMPRM,2),TNS("pextrw",XMM3P), TNSZ("shufpd",XMMP,16), INVALID, -/* [C8] */ INVALID, INVALID, INVALID, INVALID, -/* [CC] */ INVALID, INVALID, INVALID, INVALID, - -/* [D0] */ INVALID, TNSZ("psrlw",XMM,16), TNSZ("psrld",XMM,16), TNSZ("psrlq",XMM,16), -/* [D4] */ TNSZ("paddq",XMM,16), TNSZ("pmullw",XMM,16), TNSZ("movq",XMMS,8), TNS("pmovmskb",XMMX3), -/* [D8] */ TNSZ("psubusb",XMM,16), TNSZ("psubusw",XMM,16), TNSZ("pminub",XMM,16), TNSZ("pand",XMM,16), -/* [DC] */ TNSZ("paddusb",XMM,16), TNSZ("paddusw",XMM,16), TNSZ("pmaxub",XMM,16), TNSZ("pandn",XMM,16), - -/* [E0] */ TNSZ("pavgb",XMM,16), TNSZ("psraw",XMM,16), TNSZ("psrad",XMM,16), TNSZ("pavgw",XMM,16), -/* [E4] */ TNSZ("pmulhuw",XMM,16), TNSZ("pmulhw",XMM,16), TNSZ("cvttpd2dq",XMM,16),TNSZ("movntdq",XMMS,16), -/* [E8] */ TNSZ("psubsb",XMM,16), TNSZ("psubsw",XMM,16), TNSZ("pminsw",XMM,16), TNSZ("por",XMM,16), -/* [EC] */ TNSZ("paddsb",XMM,16), TNSZ("paddsw",XMM,16), TNSZ("pmaxsw",XMM,16), TNSZ("pxor",XMM,16), - -/* [F0] */ INVALID, TNSZ("psllw",XMM,16), TNSZ("pslld",XMM,16), TNSZ("psllq",XMM,16), -/* [F4] */ TNSZ("pmuludq",XMM,16), TNSZ("pmaddwd",XMM,16), TNSZ("psadbw",XMM,16), TNSZ("maskmovdqu", XMMXIMPL,16), -/* [F8] */ TNSZ("psubb",XMM,16), TNSZ("psubw",XMM,16), TNSZ("psubd",XMM,16), TNSZ("psubq",XMM,16), -/* [FC] */ TNSZ("paddb",XMM,16), TNSZ("paddw",XMM,16), TNSZ("paddd",XMM,16), INVALID, -}; - -/* - * Decode table for SIMD instructions with the repnz (0xf2) prefix. - */ -const instable_t dis_opSIMDrepnz[256] = { -/* [00] */ INVALID, INVALID, INVALID, INVALID, -/* [04] */ INVALID, INVALID, INVALID, INVALID, -/* [08] */ INVALID, INVALID, INVALID, INVALID, -/* [0C] */ INVALID, INVALID, INVALID, INVALID, - -/* [10] */ TNSZ("movsd",XMM,8), TNSZ("movsd",XMMS,8), INVALID, INVALID, -/* [14] */ INVALID, INVALID, INVALID, INVALID, -/* [18] */ INVALID, INVALID, INVALID, INVALID, -/* [1C] */ INVALID, INVALID, INVALID, INVALID, - -/* [20] */ INVALID, INVALID, INVALID, INVALID, -/* [24] */ INVALID, INVALID, INVALID, INVALID, -/* [28] */ INVALID, INVALID, TNSZ("cvtsi2sd",XMM3MX,4),INVALID, -/* [2C] */ TNSZ("cvttsd2si",XMMXM3,8),TNSZ("cvtsd2si",XMMXM3,8),INVALID, INVALID, - -/* [30] */ INVALID, INVALID, INVALID, INVALID, -/* [34] */ INVALID, INVALID, INVALID, INVALID, -/* [38] */ INVALID, INVALID, INVALID, INVALID, -/* [3C] */ INVALID, INVALID, INVALID, INVALID, - -/* [40] */ INVALID, INVALID, INVALID, INVALID, -/* [44] */ INVALID, INVALID, INVALID, INVALID, -/* [48] */ INVALID, INVALID, INVALID, INVALID, -/* [4C] */ INVALID, INVALID, INVALID, INVALID, - -/* [50] */ INVALID, TNSZ("sqrtsd",XMM,8), INVALID, INVALID, -/* [54] */ INVALID, INVALID, INVALID, INVALID, -/* [58] */ TNSZ("addsd",XMM,8), TNSZ("mulsd",XMM,8), TNSZ("cvtsd2ss",XMM,8), INVALID, -/* [5C] */ TNSZ("subsd",XMM,8), TNSZ("minsd",XMM,8), TNSZ("divsd",XMM,8), TNSZ("maxsd",XMM,8), - -/* [60] */ INVALID, INVALID, INVALID, INVALID, -/* [64] */ INVALID, INVALID, INVALID, INVALID, -/* [68] */ INVALID, INVALID, INVALID, INVALID, -/* [6C] */ INVALID, INVALID, INVALID, INVALID, - -/* [70] */ TNSZ("pshuflw",XMMP,16),INVALID, INVALID, INVALID, -/* [74] */ INVALID, INVALID, INVALID, INVALID, -/* [78] */ INVALID, INVALID, INVALID, INVALID, -/* [7C] */ INVALID, INVALID, INVALID, INVALID, - -/* [80] */ INVALID, INVALID, INVALID, INVALID, -/* [84] */ INVALID, INVALID, INVALID, INVALID, -/* [88] */ INVALID, INVALID, INVALID, INVALID, -/* [0C] */ INVALID, INVALID, INVALID, INVALID, - -/* [90] */ INVALID, INVALID, INVALID, INVALID, -/* [94] */ INVALID, INVALID, INVALID, INVALID, -/* [98] */ INVALID, INVALID, INVALID, INVALID, -/* [9C] */ INVALID, INVALID, INVALID, INVALID, - -/* [A0] */ INVALID, INVALID, INVALID, INVALID, -/* [A4] */ INVALID, INVALID, INVALID, INVALID, -/* [A8] */ INVALID, INVALID, INVALID, INVALID, -/* [AC] */ INVALID, INVALID, INVALID, INVALID, - -/* [B0] */ INVALID, INVALID, INVALID, INVALID, -/* [B4] */ INVALID, INVALID, INVALID, INVALID, -/* [B8] */ INVALID, INVALID, INVALID, INVALID, -/* [BC] */ INVALID, INVALID, INVALID, INVALID, - -/* [C0] */ INVALID, INVALID, TNSZ("cmpsd",XMMP,8), INVALID, -/* [C4] */ INVALID, INVALID, INVALID, INVALID, -/* [C8] */ INVALID, INVALID, INVALID, INVALID, -/* [CC] */ INVALID, INVALID, INVALID, INVALID, - -/* [D0] */ INVALID, INVALID, INVALID, INVALID, -/* [D4] */ INVALID, INVALID, TNS("movdq2q",XMMXM), INVALID, -/* [D8] */ INVALID, INVALID, INVALID, INVALID, -/* [DC] */ INVALID, INVALID, INVALID, INVALID, - -/* [E0] */ INVALID, INVALID, INVALID, INVALID, -/* [E4] */ INVALID, INVALID, TNSZ("cvtpd2dq",XMM,16),INVALID, -/* [E8] */ INVALID, INVALID, INVALID, INVALID, -/* [EC] */ INVALID, INVALID, INVALID, INVALID, - -/* [F0] */ INVALID, INVALID, INVALID, INVALID, -/* [F4] */ INVALID, INVALID, INVALID, INVALID, -/* [F8] */ INVALID, INVALID, INVALID, INVALID, -/* [FC] */ INVALID, INVALID, INVALID, INVALID, -}; - -/* - * Decode table for SIMD instructions with the repz (0xf3) prefix. - */ -const instable_t dis_opSIMDrepz[256] = { -/* [00] */ INVALID, INVALID, INVALID, INVALID, -/* [04] */ INVALID, INVALID, INVALID, INVALID, -/* [08] */ INVALID, INVALID, INVALID, INVALID, -/* [0C] */ INVALID, INVALID, INVALID, INVALID, - -/* [10] */ TNSZ("movss",XMM,4), TNSZ("movss",XMMS,4), INVALID, INVALID, -/* [14] */ INVALID, INVALID, INVALID, INVALID, -/* [18] */ INVALID, INVALID, INVALID, INVALID, -/* [1C] */ INVALID, INVALID, INVALID, INVALID, - -/* [20] */ INVALID, INVALID, INVALID, INVALID, -/* [24] */ INVALID, INVALID, INVALID, INVALID, -/* [28] */ INVALID, INVALID, TNSZ("cvtsi2ss",XMM3MX,4),INVALID, -/* [2C] */ TNSZ("cvttss2si",XMMXM3,4),TNSZ("cvtss2si",XMMXM3,4),INVALID, INVALID, - -/* [30] */ INVALID, INVALID, INVALID, INVALID, -/* [34] */ INVALID, INVALID, INVALID, INVALID, -/* [38] */ INVALID, INVALID, INVALID, INVALID, -/* [3C] */ INVALID, INVALID, INVALID, INVALID, - -/* [40] */ INVALID, INVALID, INVALID, INVALID, -/* [44] */ INVALID, INVALID, INVALID, INVALID, -/* [48] */ INVALID, INVALID, INVALID, INVALID, -/* [4C] */ INVALID, INVALID, INVALID, INVALID, - -/* [50] */ INVALID, TNSZ("sqrtss",XMM,4), TNSZ("rsqrtss",XMM,4), TNSZ("rcpss",XMM,4), -/* [54] */ INVALID, INVALID, INVALID, INVALID, -/* [58] */ TNSZ("addss",XMM,4), TNSZ("mulss",XMM,4), TNSZ("cvtss2sd",XMM,4), TNSZ("cvttps2dq",XMM,16), -/* [5C] */ TNSZ("subss",XMM,4), TNSZ("minss",XMM,4), TNSZ("divss",XMM,4), TNSZ("maxss",XMM,4), - -/* [60] */ INVALID, INVALID, INVALID, INVALID, -/* [64] */ INVALID, INVALID, INVALID, INVALID, -/* [68] */ INVALID, INVALID, INVALID, INVALID, -/* [6C] */ INVALID, INVALID, INVALID, TNSZ("movdqu",XMM,16), - -/* [70] */ TNSZ("pshufhw",XMMP,16),INVALID, INVALID, INVALID, -/* [74] */ INVALID, INVALID, INVALID, INVALID, -/* [78] */ INVALID, INVALID, INVALID, INVALID, -/* [7C] */ INVALID, INVALID, TNSZ("movq",XMM,8), TNSZ("movdqu",XMMS,16), - -/* [80] */ INVALID, INVALID, INVALID, INVALID, -/* [84] */ INVALID, INVALID, INVALID, INVALID, -/* [88] */ INVALID, INVALID, INVALID, INVALID, -/* [0C] */ INVALID, INVALID, INVALID, INVALID, - -/* [90] */ INVALID, INVALID, INVALID, INVALID, -/* [94] */ INVALID, INVALID, INVALID, INVALID, -/* [98] */ INVALID, INVALID, INVALID, INVALID, -/* [9C] */ INVALID, INVALID, INVALID, INVALID, - -/* [A0] */ INVALID, INVALID, INVALID, INVALID, -/* [A4] */ INVALID, INVALID, INVALID, INVALID, -/* [A8] */ INVALID, INVALID, INVALID, INVALID, -/* [AC] */ INVALID, INVALID, INVALID, INVALID, - -/* [B0] */ INVALID, INVALID, INVALID, INVALID, -/* [B4] */ INVALID, INVALID, INVALID, INVALID, -/* [B8] */ INVALID, INVALID, INVALID, INVALID, -/* [BC] */ INVALID, INVALID, INVALID, INVALID, - -/* [C0] */ INVALID, INVALID, TNSZ("cmpss",XMMP,4), INVALID, -/* [C4] */ INVALID, INVALID, INVALID, INVALID, -/* [C8] */ INVALID, INVALID, INVALID, INVALID, -/* [CC] */ INVALID, INVALID, INVALID, INVALID, - -/* [D0] */ INVALID, INVALID, INVALID, INVALID, -/* [D4] */ INVALID, INVALID, TNS("movq2dq",XMMMX), INVALID, -/* [D8] */ INVALID, INVALID, INVALID, INVALID, -/* [DC] */ INVALID, INVALID, INVALID, INVALID, - -/* [E0] */ INVALID, INVALID, INVALID, INVALID, -/* [E4] */ INVALID, INVALID, TNSZ("cvtdq2pd",XMM,8), INVALID, -/* [E8] */ INVALID, INVALID, INVALID, INVALID, -/* [EC] */ INVALID, INVALID, INVALID, INVALID, - -/* [F0] */ INVALID, INVALID, INVALID, INVALID, -/* [F4] */ INVALID, INVALID, INVALID, INVALID, -/* [F8] */ INVALID, INVALID, INVALID, INVALID, -/* [FC] */ INVALID, INVALID, INVALID, INVALID, -}; - -/* - * Decode table for 0x0F opcodes - */ - -const instable_t dis_op0F[16][16] = { -{ -/* [00] */ IND(dis_op0F00), IND(dis_op0F01), TNS("lar",MR), TNS("lsl",MR), -/* [04] */ INVALID, TNS("syscall",NORM), TNS("clts",NORM), TNS("sysret",NORM), -/* [08] */ TNS("invd",NORM), TNS("wbinvd",NORM), INVALID, TNS("ud2",NORM), -/* [0C] */ INVALID, INVALID, INVALID, INVALID, -}, { -/* [10] */ TNSZ("movups",XMMO,16), TNSZ("movups",XMMOS,16),TNSZ("movlps",XMMO,8), TNSZ("movlps",XMMOS,8), -/* [14] */ TNSZ("unpcklps",XMMO,16),TNSZ("unpckhps",XMMO,16),TNSZ("movhps",XMMOM,8),TNSZ("movhps",XMMOMS,8), -/* [18] */ IND(dis_op0F18), INVALID, INVALID, INVALID, -/* [1C] */ INVALID, INVALID, INVALID, INVALID, -}, { -/* [20] */ TSy("mov",SREG), TSy("mov",SREG), TSy("mov",SREG), TSy("mov",SREG), -/* [24] */ TSx("mov",SREG), INVALID, TSx("mov",SREG), INVALID, -/* [28] */ TNSZ("movaps",XMMO,16), TNSZ("movaps",XMMOS,16),TNSZ("cvtpi2ps",XMMOMX,8),TNSZ("movntps",XMMOS,16), -/* [2C] */ TNSZ("cvttps2pi",XMMOXMM,8),TNSZ("cvtps2pi",XMMOXMM,8),TNSZ("ucomiss",XMMO,4),TNSZ("comiss",XMMO,4), -}, { -/* [30] */ TNS("wrmsr",NORM), TNS("rdtsc",NORM), TNS("rdmsr",NORM), TNS("rdpmc",NORM), -/* [34] */ TNSx("sysenter",NORM), TNSx("sysexit",NORM), INVALID, INVALID, -/* [38] */ INVALID, INVALID, INVALID, INVALID, -/* [3C] */ INVALID, INVALID, INVALID, INVALID, -}, { -/* [40] */ TS("cmovx.o",MR), TS("cmovx.no",MR), TS("cmovx.b",MR), TS("cmovx.ae",MR), -/* [44] */ TS("cmovx.e",MR), TS("cmovx.ne",MR), TS("cmovx.be",MR), TS("cmovx.a",MR), -/* [48] */ TS("cmovx.s",MR), TS("cmovx.ns",MR), TS("cmovx.pe",MR), TS("cmovx.po",MR), -/* [4C] */ TS("cmovx.l",MR), TS("cmovx.ge",MR), TS("cmovx.le",MR), TS("cmovx.g",MR), -}, { -/* [50] */ TNS("movmskps",XMMOX3), TNSZ("sqrtps",XMMO,16), TNSZ("rsqrtps",XMMO,16),TNSZ("rcpps",XMMO,16), -/* [54] */ TNSZ("andps",XMMO,16), TNSZ("andnps",XMMO,16), TNSZ("orps",XMMO,16), TNSZ("xorps",XMMO,16), -/* [58] */ TNSZ("addps",XMMO,16), TNSZ("mulps",XMMO,16), TNSZ("cvtps2pd",XMMO,8),TNSZ("cvtdq2ps",XMMO,16), -/* [5C] */ TNSZ("subps",XMMO,16), TNSZ("minps",XMMO,16), TNSZ("divps",XMMO,16), TNSZ("maxps",XMMO,16), -}, { -/* [60] */ TNSZ("punpcklbw",MMO,4),TNSZ("punpcklwd",MMO,4),TNSZ("punpckldq",MMO,4),TNSZ("packsswb",MMO,8), -/* [64] */ TNSZ("pcmpgtb",MMO,8), TNSZ("pcmpgtw",MMO,8), TNSZ("pcmpgtd",MMO,8), TNSZ("packuswb",MMO,8), -/* [68] */ TNSZ("punpckhbw",MMO,8),TNSZ("punpckhwd",MMO,8),TNSZ("punpckhdq",MMO,8),TNSZ("packssdw",MMO,8), -/* [6C] */ TNSZ("INVALID",MMO,0), TNSZ("INVALID",MMO,0), TNSZ("movd",MMO,4), TNSZ("movq",MMO,8), -}, { -/* [70] */ TNSZ("pshufw",MMOPM,8), TNS("psrXXX",MR), TNS("psrXXX",MR), TNS("psrXXX",MR), -/* [74] */ TNSZ("pcmpeqb",MMO,8), TNSZ("pcmpeqw",MMO,8), TNSZ("pcmpeqd",MMO,8), TNS("emms",NORM), -/* [78] */ INVALID, INVALID, INVALID, INVALID, -/* [7C] */ INVALID, INVALID, TNSZ("movd",MMOS,4), TNSZ("movq",MMOS,8), -}, { -/* [80] */ TNS("jo",D), TNS("jno",D), TNS("jb",D), TNS("jae",D), -/* [84] */ TNS("je",D), TNS("jne",D), TNS("jbe",D), TNS("ja",D), -/* [88] */ TNS("js",D), TNS("jns",D), TNS("jp",D), TNS("jnp",D), -/* [8C] */ TNS("jl",D), TNS("jge",D), TNS("jle",D), TNS("jg",D), -}, { -/* [90] */ TNS("seto",Mb), TNS("setno",Mb), TNS("setb",Mb), TNS("setae",Mb), -/* [94] */ TNS("sete",Mb), TNS("setne",Mb), TNS("setbe",Mb), TNS("seta",Mb), -/* [98] */ TNS("sets",Mb), TNS("setns",Mb), TNS("setp",Mb), TNS("setnp",Mb), -/* [9C] */ TNS("setl",Mb), TNS("setge",Mb), TNS("setle",Mb), TNS("setg",Mb), -}, { -/* [A0] */ TSp("push",LSEG), TSp("pop",LSEG), TNS("cpuid",NORM), TS("bt",RMw), -/* [A4] */ TS("shld",DSHIFT), TS("shld",DSHIFTcl), INVALID, INVALID, -/* [A8] */ TSp("push",LSEG), TSp("pop",LSEG), TNS("rsm",NORM), TS("bts",RMw), -/* [AC] */ TS("shrd",DSHIFT), TS("shrd",DSHIFTcl), IND(dis_op0FAE), TS("imul",MRw), -}, { -/* [B0] */ TNS("cmpxchgb",RMw), TS("cmpxchg",RMw), TS("lss",MR), TS("btr",RMw), -/* [B4] */ TS("lfs",MR), TS("lgs",MR), TS("movzb",MOVZ), TNS("movzwl",MOVZ), -/* [B8] */ INVALID, INVALID, IND(dis_op0FBA), TS("btc",RMw), -/* [BC] */ TS("bsf",MRw), TS("bsr",MRw), TS("movsb",MOVZ), TNS("movswl",MOVZ), -}, { -/* [C0] */ TNS("xaddb",XADDB), TS("xadd",RMw), TNSZ("cmpps",XMMOPM,16),TNS("movnti",RM), -/* [C4] */ TNSZ("pinsrw",MMOPRM,2),TNS("pextrw",MMO3P), TNSZ("shufps",XMMOPM,16),IND(dis_op0FC7), -/* [C8] */ INVALID, INVALID, INVALID, INVALID, -/* [CC] */ INVALID, INVALID, INVALID, INVALID, -}, { -/* [D0] */ INVALID, TNSZ("psrlw",MMO,8), TNSZ("psrld",MMO,8), TNSZ("psrlq",MMO,8), -/* [D4] */ TNSZ("paddq",MMO,8), TNSZ("pmullw",MMO,8), TNSZ("INVALID",MMO,0), TNS("pmovmskb",MMOM3), -/* [D8] */ TNSZ("psubusb",MMO,8), TNSZ("psubusw",MMO,8), TNSZ("pminub",MMO,8), TNSZ("pand",MMO,8), -/* [DC] */ TNSZ("paddusb",MMO,8), TNSZ("paddusw",MMO,8), TNSZ("pmaxub",MMO,8), TNSZ("pandn",MMO,8), -}, { -/* [E0] */ TNSZ("pavgb",MMO,8), TNSZ("psraw",MMO,8), TNSZ("psrad",MMO,8), TNSZ("pavgw",MMO,8), -/* [E4] */ TNSZ("pmulhuw",MMO,8), TNSZ("pmulhw",MMO,8), TNS("INVALID",XMMO), TNSZ("movntq",MMOMS,8), -/* [E8] */ TNSZ("psubsb",MMO,8), TNSZ("psubsw",MMO,8), TNSZ("pminsw",MMO,8), TNSZ("por",MMO,8), -/* [EC] */ TNSZ("paddsb",MMO,8), TNSZ("paddsw",MMO,8), TNSZ("pmaxsw",MMO,8), TNSZ("pxor",MMO,8), -}, { -/* [F0] */ INVALID, TNSZ("psllw",MMO,8), TNSZ("pslld",MMO,8), TNSZ("psllq",MMO,8), -/* [F4] */ TNSZ("pmuludq",MMO,8), TNSZ("pmaddwd",MMO,8), TNSZ("psadbw",MMO,8), TNSZ("maskmovq",MMOIMPL,8), -/* [F8] */ TNSZ("psubb",MMO,8), TNSZ("psubw",MMO,8), TNSZ("psubd",MMO,8), TNSZ("psubq",MMO,8), -/* [FC] */ TNSZ("paddb",MMO,8), TNSZ("paddw",MMO,8), TNSZ("paddd",MMO,8), INVALID, -} }; - - -/* - * Decode table for 0x80 opcodes - */ - -const instable_t dis_op80[8] = { - -/* [0] */ TNS("addb",IMlw), TNS("orb",IMw), TNS("adcb",IMlw), TNS("sbbb",IMlw), -/* [4] */ TNS("andb",IMw), TNS("subb",IMlw), TNS("xorb",IMw), TNS("cmpb",IMlw), -}; - - -/* - * Decode table for 0x81 opcodes. - */ - -const instable_t dis_op81[8] = { - -/* [0] */ TS("add",IMlw), TS("or",IMw), TS("adc",IMlw), TS("sbb",IMlw), -/* [4] */ TS("and",IMw), TS("sub",IMlw), TS("xor",IMw), TS("cmp",IMlw), -}; - - -/* - * Decode table for 0x82 opcodes. - */ - -const instable_t dis_op82[8] = { - -/* [0] */ TNSx("addb",IMlw), TNSx("orb",IMlw), TNSx("adcb",IMlw), TNSx("sbbb",IMlw), -/* [4] */ TNSx("andb",IMlw), TNSx("subb",IMlw), TNSx("xorb",IMlw), TNSx("cmpb",IMlw), -}; -/* - * Decode table for 0x83 opcodes. - */ - -const instable_t dis_op83[8] = { - -/* [0] */ TS("add",IMlw), TS("or",IMlw), TS("adc",IMlw), TS("sbb",IMlw), -/* [4] */ TS("and",IMlw), TS("sub",IMlw), TS("xor",IMlw), TS("cmp",IMlw), -}; - -/* - * Decode table for 0xC0 opcodes. - */ - -const instable_t dis_opC0[8] = { - -/* [0] */ TNS("rolb",MvI), TNS("rorb",MvI), TNS("rclb",MvI), TNS("rcrb",MvI), -/* [4] */ TNS("shlb",MvI), TNS("shrb",MvI), INVALID, TNS("sarb",MvI), -}; - -/* - * Decode table for 0xD0 opcodes. - */ - -const instable_t dis_opD0[8] = { - -/* [0] */ TNS("rolb",Mv), TNS("rorb",Mv), TNS("rclb",Mv), TNS("rcrb",Mv), -/* [4] */ TNS("shlb",Mv), TNS("shrb",Mv), TNS("salb",Mv), TNS("sarb",Mv), -}; - -/* - * Decode table for 0xC1 opcodes. - * 186 instruction set - */ - -const instable_t dis_opC1[8] = { - -/* [0] */ TS("rol",MvI), TS("ror",MvI), TS("rcl",MvI), TS("rcr",MvI), -/* [4] */ TS("shl",MvI), TS("shr",MvI), TS("sal",MvI), TS("sar",MvI), -}; - -/* - * Decode table for 0xD1 opcodes. - */ - -const instable_t dis_opD1[8] = { - -/* [0] */ TS("rol",Mv), TS("ror",Mv), TS("rcl",Mv), TS("rcr",Mv), -/* [4] */ TS("shl",Mv), TS("shr",Mv), TS("sal",Mv), TS("sar",Mv), -}; - - -/* - * Decode table for 0xD2 opcodes. - */ - -const instable_t dis_opD2[8] = { - -/* [0] */ TNS("rolb",Mv), TNS("rorb",Mv), TNS("rclb",Mv), TNS("rcrb",Mv), -/* [4] */ TNS("shlb",Mv), TNS("shrb",Mv), TNS("salb",Mv), TNS("sarb",Mv), -}; -/* - * Decode table for 0xD3 opcodes. - */ - -const instable_t dis_opD3[8] = { - -/* [0] */ TS("rol",Mv), TS("ror",Mv), TS("rcl",Mv), TS("rcr",Mv), -/* [4] */ TS("shl",Mv), TS("shr",Mv), TS("salb",Mv), TS("sar",Mv), -}; - - -/* - * Decode table for 0xF6 opcodes. - */ - -const instable_t dis_opF6[8] = { - -/* [0] */ TNS("testb",IMw), TNS("testb",IMw), TNS("notb",Mw), TNS("negb",Mw), -/* [4] */ TNS("mulb",MA), TNS("imulb",MA), TNS("divb",MA), TNS("idivb",MA), -}; - - -/* - * Decode table for 0xF7 opcodes. - */ - -const instable_t dis_opF7[8] = { - -/* [0] */ TS("test",IMw), TS("test",IMw), TS("not",Mw), TS("neg",Mw), -/* [4] */ TS("mul",MA), TS("imul",MA), TS("div",MA), TS("idiv",MA), -}; - - -/* - * Decode table for 0xFE opcodes. - */ - -const instable_t dis_opFE[8] = { - -/* [0] */ TNS("incb",Mw), TNS("decb",Mw), INVALID, INVALID, -/* [4] */ INVALID, INVALID, INVALID, INVALID, -}; -/* - * Decode table for 0xFF opcodes. - */ - -const instable_t dis_opFF[8] = { - -/* [0] */ TS("inc",Mw), TS("dec",Mw), TNSyp("call",INM), TNS("lcall",INM), -/* [4] */ TNSy("jmp",INM), TNS("ljmp",INM), TSp("push",M), INVALID, -}; - -/* for 287 instructions, which are a mess to decode */ - -const instable_t dis_opFP1n2[8][8] = { -{ -/* bit pattern: 1101 1xxx MODxx xR/M */ -/* [0,0] */ TNS("fadds",M), TNS("fmuls",M), TNS("fcoms",M), TNS("fcomps",M), -/* [0,4] */ TNS("fsubs",M), TNS("fsubrs",M), TNS("fdivs",M), TNS("fdivrs",M), -}, { -/* [1,0] */ TNS("flds",M), INVALID, TNS("fsts",M), TNS("fstps",M), -/* [1,4] */ TNSZ("fldenv",M,28), TNSZ("fldcw",M,2), TNSZ("fnstenv",M,28), TNSZ("fnstcw",M,2), -}, { -/* [2,0] */ TNS("fiaddl",M), TNS("fimull",M), TNS("ficoml",M), TNS("ficompl",M), -/* [2,4] */ TNS("fisubl",M), TNS("fisubrl",M), TNS("fidivl",M), TNS("fidivrl",M), -}, { -/* [3,0] */ TNS("fildl",M), INVALID, TNS("fistl",M), TNS("fistpl",M), -/* [3,4] */ INVALID, TNSZ("fldt",M,10), INVALID, TNSZ("fstpt",M,10), -}, { -/* [4,0] */ TNSZ("faddl",M,8), TNSZ("fmull",M,8), TNSZ("fcoml",M,8), TNSZ("fcompl",M,8), -/* [4,1] */ TNSZ("fsubl",M,8), TNSZ("fsubrl",M,8), TNSZ("fdivl",M,8), TNSZ("fdivrl",M,8), -}, { -/* [5,0] */ TNSZ("fldl",M,8), INVALID, TNSZ("fstl",M,8), TNSZ("fstpl",M,8), -/* [5,4] */ TNSZ("frstor",M,108), INVALID, TNSZ("fnsave",M,108), TNSZ("fnstsw",M,2), -}, { -/* [6,0] */ TNSZ("fiadd",M,2), TNSZ("fimul",M,2), TNSZ("ficom",M,2), TNSZ("ficomp",M,2), -/* [6,4] */ TNSZ("fisub",M,2), TNSZ("fisubr",M,2), TNSZ("fidiv",M,2), TNSZ("fidivr",M,2), -}, { -/* [7,0] */ TNSZ("fild",M,2), INVALID, TNSZ("fist",M,2), TNSZ("fistp",M,2), -/* [7,4] */ TNSZ("fbld",M,10), TNSZ("fildll",M,8), TNSZ("fbstp",M,10), TNSZ("fistpll",M,8), -} }; - -const instable_t dis_opFP3[8][8] = { -{ -/* bit pattern: 1101 1xxx 11xx xREG */ -/* [0,0] */ TNS("fadd",FF), TNS("fmul",FF), TNS("fcom",F), TNS("fcomp",F), -/* [0,4] */ TNS("fsub",FF), TNS("fsubr",FF), TNS("fdiv",FF), TNS("fdivr",FF), -}, { -/* [1,0] */ TNS("fld",F), TNS("fxch",F), TNS("fnop",NORM), TNS("fstp",F), -/* [1,4] */ INVALID, INVALID, INVALID, INVALID, -}, { -/* [2,0] */ INVALID, INVALID, INVALID, INVALID, -/* [2,4] */ INVALID, TNS("fucompp",NORM), INVALID, INVALID, -}, { -/* [3,0] */ INVALID, INVALID, INVALID, INVALID, -/* [3,4] */ INVALID, INVALID, INVALID, INVALID, -}, { -/* [4,0] */ TNS("fadd",FF), TNS("fmul",FF), TNS("fcom",F), TNS("fcomp",F), -/* [4,4] */ TNS("fsub",FF), TNS("fsubr",FF), TNS("fdiv",FF), TNS("fdivr",FF), -}, { -/* [5,0] */ TNS("ffree",F), TNS("fxch",F), TNS("fst",F), TNS("fstp",F), -/* [5,4] */ TNS("fucom",F), TNS("fucomp",F), INVALID, INVALID, -}, { -/* [6,0] */ TNS("faddp",FF), TNS("fmulp",FF), TNS("fcomp",F), TNS("fcompp",NORM), -/* [6,4] */ TNS("fsubp",FF), TNS("fsubrp",FF), TNS("fdivp",FF), TNS("fdivrp",FF), -}, { -/* [7,0] */ TNS("ffree",F), TNS("fxch",F), TNS("fstp",F), TNS("fstp",F), -/* [7,4] */ TNS("fnstsw",M), TNS("fucomip",FFC), TNS("fcomip",FFC), INVALID, -} }; - -const instable_t dis_opFP4[4][8] = { -{ -/* bit pattern: 1101 1001 111x xxxx */ -/* [0,0] */ TNS("fchs",NORM), TNS("fabs",NORM), INVALID, INVALID, -/* [0,4] */ TNS("ftst",NORM), TNS("fxam",NORM), TNS("ftstp",NORM), INVALID, -}, { -/* [1,0] */ TNS("fld1",NORM), TNS("fldl2t",NORM), TNS("fldl2e",NORM), TNS("fldpi",NORM), -/* [1,4] */ TNS("fldlg2",NORM), TNS("fldln2",NORM), TNS("fldz",NORM), INVALID, -}, { -/* [2,0] */ TNS("f2xm1",NORM), TNS("fyl2x",NORM), TNS("fptan",NORM), TNS("fpatan",NORM), -/* [2,4] */ TNS("fxtract",NORM), TNS("fprem1",NORM), TNS("fdecstp",NORM), TNS("fincstp",NORM), -}, { -/* [3,0] */ TNS("fprem",NORM), TNS("fyl2xp1",NORM), TNS("fsqrt",NORM), TNS("fsincos",NORM), -/* [3,4] */ TNS("frndint",NORM), TNS("fscale",NORM), TNS("fsin",NORM), TNS("fcos",NORM), -} }; - -const instable_t dis_opFP5[8] = { -/* bit pattern: 1101 1011 111x xxxx */ -/* [0] */ TNS("feni",NORM), TNS("fdisi",NORM), TNS("fnclex",NORM), TNS("fninit",NORM), -/* [4] */ TNS("fsetpm",NORM), TNS("frstpm",NORM), INVALID, INVALID, -}; - -const instable_t dis_opFP6[8] = { -/* bit pattern: 1101 1011 11yy yxxx */ -/* [00] */ TNS("fcmov.nb",FF), TNS("fcmov.ne",FF), TNS("fcmov.nbe",FF), TNS("fcmov.nu",FF), -/* [04] */ INVALID, TNS("fucomi",F), TNS("fcomi",F), INVALID, -}; - -const instable_t dis_opFP7[8] = { -/* bit pattern: 1101 1010 11yy yxxx */ -/* [00] */ TNS("fcmov.b",FF), TNS("fcmov.e",FF), TNS("fcmov.be",FF), TNS("fcmov.u",FF), -/* [04] */ INVALID, INVALID, INVALID, INVALID, -}; - -/* - * Main decode table for the op codes. The first two nibbles - * will be used as an index into the table. If there is a - * a need to further decode an instruction, the array to be - * referenced is indicated with the other two entries being - * empty. - */ - -const instable_t dis_distable[16][16] = { -{ -/* [0,0] */ TNS("addb",RMw), TS("add",RMw), TNS("addb",MRw), TS("add",MRw), -/* [0,4] */ TNS("addb",IA), TS("add",IA), TSx("push",SEG), TSx("pop",SEG), -/* [0,8] */ TNS("orb",RMw), TS("or",RMw), TNS("orb",MRw), TS("or",MRw), -/* [0,C] */ TNS("orb",IA), TS("or",IA), TSx("push",SEG), IND(&dis_op0F[0][0]), -}, { -/* [1,0] */ TNS("adcb",RMw), TS("adc",RMw), TNS("adcb",MRw), TS("adc",MRw), -/* [1,4] */ TNS("adcb",IA), TS("adc",IA), TSx("push",SEG), TSx("pop",SEG), -/* [1,8] */ TNS("sbbb",RMw), TS("sbb",RMw), TNS("sbbb",MRw), TS("sbb",MRw), -/* [1,C] */ TNS("sbbb",IA), TS("sbb",IA), TSx("push",SEG), TSx("pop",SEG), -}, { -/* [2,0] */ TNS("andb",RMw), TS("and",RMw), TNS("andb",MRw), TS("and",MRw), -/* [2,4] */ TNS("andb",IA), TS("and",IA), TNSx("%es:",OVERRIDE), TNSx("daa",NORM), -/* [2,8] */ TNS("subb",RMw), TS("sub",RMw), TNS("subb",MRw), TS("sub",MRw), -/* [2,C] */ TNS("subb",IA), TS("sub",IA), TNSx("%cs:",OVERRIDE), TNSx("das",NORM), -}, { -/* [3,0] */ TNS("xorb",RMw), TS("xor",RMw), TNS("xorb",MRw), TS("xor",MRw), -/* [3,4] */ TNS("xorb",IA), TS("xor",IA), TNSx("%ss:",OVERRIDE), TNSx("aaa",NORM), -/* [3,8] */ TNS("cmpb",RMw), TS("cmp",RMw), TNS("cmpb",MRw), TS("cmp",MRw), -/* [3,C] */ TNS("cmpb",IA), TS("cmp",IA), TNSx("%ds:",OVERRIDE), TNSx("aas",NORM), -}, { -/* [4,0] */ TSx("inc",R), TSx("inc",R), TSx("inc",R), TSx("inc",R), -/* [4,4] */ TSx("inc",R), TSx("inc",R), TSx("inc",R), TSx("inc",R), -/* [4,8] */ TSx("dec",R), TSx("dec",R), TSx("dec",R), TSx("dec",R), -/* [4,C] */ TSx("dec",R), TSx("dec",R), TSx("dec",R), TSx("dec",R), -}, { -/* [5,0] */ TSp("push",R), TSp("push",R), TSp("push",R), TSp("push",R), -/* [5,4] */ TSp("push",R), TSp("push",R), TSp("push",R), TSp("push",R), -/* [5,8] */ TSp("pop",R), TSp("pop",R), TSp("pop",R), TSp("pop",R), -/* [5,C] */ TSp("pop",R), TSp("pop",R), TSp("pop",R), TSp("pop",R), -}, { -/* [6,0] */ TSZx("pusha",IMPLMEM,28),TSZx("popa",IMPLMEM,28), TSx("bound",MR), TNS("arpl",RMw), -/* [6,4] */ TNS("%fs:",OVERRIDE), TNS("%gs:",OVERRIDE), TNS("data16",DM), TNS("addr16",AM), -/* [6,8] */ TSp("push",I), TS("imul",IMUL), TSp("push",Ib), TS("imul",IMUL), -/* [6,C] */ TNSZ("insb",IMPLMEM,1), TSZ("ins",IMPLMEM,4), TNSZ("outsb",IMPLMEM,1),TSZ("outs",IMPLMEM,4), -}, { -/* [7,0] */ TNSy("jo",BD), TNSy("jno",BD), TNSy("jb",BD), TNSy("jae",BD), -/* [7,4] */ TNSy("je",BD), TNSy("jne",BD), TNSy("jbe",BD), TNSy("ja",BD), -/* [7,8] */ TNSy("js",BD), TNSy("jns",BD), TNSy("jp",BD), TNSy("jnp",BD), -/* [7,C] */ TNSy("jl",BD), TNSy("jge",BD), TNSy("jle",BD), TNSy("jg",BD), -}, { -/* [8,0] */ IND(dis_op80), IND(dis_op81), INDx(dis_op82), IND(dis_op83), -/* [8,4] */ TNS("testb",RMw), TS("test",RMw), TNS("xchgb",RMw), TS("xchg",RMw), -/* [8,8] */ TNS("movb",RMw), TS("mov",RMw), TNS("movb",MRw), TS("mov",MRw), -/* [8,C] */ TNS("movw",SM), TS("lea",MR), TNS("movw",MS), TSp("pop",M), -}, { -/* [9,0] */ TNS("nop",NORM), TS("xchg",RA), TS("xchg",RA), TS("xchg",RA), -/* [9,4] */ TS("xchg",RA), TS("xchg",RA), TS("xchg",RA), TS("xchg",RA), -/* [9,8] */ TNS("cXtX",CBW), TNS("cXtX",CWD), TNSx("lcall",SO), TNS("fwait",NORM), -/* [9,C] */ TSZy("pushf",IMPLMEM,4),TSZy("popf",IMPLMEM,4), TNSx("sahf",NORM), TNSx("lahf",NORM), -}, { -/* [A,0] */ TNS("movb",OA), TS("mov",OA), TNS("movb",AO), TS("mov",AO), -/* [A,4] */ TNSZ("movsb",SD,1), TS("movs",SD), TNSZ("cmpsb",SD,1), TS("cmps",SD), -/* [A,8] */ TNS("testb",IA), TS("test",IA), TNS("stosb",AD), TS("stos",AD), -/* [A,C] */ TNS("lodsb",SA), TS("lods",SA), TNS("scasb",AD), TS("scas",AD), -}, { -/* [B,0] */ TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), -/* [B,4] */ TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), -/* [B,8] */ TS("mov",IR), TS("mov",IR), TS("mov",IR), TS("mov",IR), -/* [B,C] */ TS("mov",IR), TS("mov",IR), TS("mov",IR), TS("mov",IR), -}, { -/* [C,0] */ IND(dis_opC0), IND(dis_opC1), TNSyp("ret",RET), TNSyp("ret",NORM), -/* [C,4] */ TNSx("les",MR), TNSx("lds",MR), TNS("movb",IMw), TS("mov",IMw), -/* [C,8] */ TNSyp("enter",ENTER), TNSyp("leave",NORM), TNS("lret",RET), TNS("lret",NORM), -/* [C,C] */ TNS("int",INT3), TNS("int",INTx), TNSx("into",NORM), TNS("iret",NORM), -}, { -/* [D,0] */ IND(dis_opD0), IND(dis_opD1), IND(dis_opD2), IND(dis_opD3), -/* [D,4] */ TNSx("aam",U), TNSx("aad",U), TNSx("falc",NORM), TNSZ("xlat",IMPLMEM,1), - -/* 287 instructions. Note that although the indirect field */ -/* indicates opFP1n2 for further decoding, this is not necessarily */ -/* the case since the opFP arrays are not partitioned according to key1 */ -/* and key2. opFP1n2 is given only to indicate that we haven't */ -/* finished decoding the instruction. */ -/* [D,8] */ IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), -/* [D,C] */ IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), -}, { -/* [E,0] */ TNSy("loopnz",BD), TNSy("loopz",BD), TNSy("loop",BD), TNSy("jcxz",BD), -/* [E,4] */ TNS("inb",P), TS("in",P), TNS("outb",P), TS("out",P), -/* [E,8] */ TNSyp("call",D), TNSy("jmp",D), TNSx("ljmp",SO), TNSy("jmp",BD), -/* [E,C] */ TNS("inb",V), TS("in",V), TNS("outb",V), TS("out",V), -}, { -/* [F,0] */ TNS("lock",LOCK), TNS("icebp", NORM), TNS("repnz",PREFIX), TNS("repz",PREFIX), -/* [F,4] */ TNS("hlt",NORM), TNS("cmc",NORM), IND(dis_opF6), IND(dis_opF7), -/* [F,8] */ TNS("clc",NORM), TNS("stc",NORM), TNS("cli",NORM), TNS("sti",NORM), -/* [F,C] */ TNS("cld",NORM), TNS("std",NORM), IND(dis_opFE), IND(dis_opFF), -} }; - -/* END CSTYLED */ - -/* - * common functions to decode and disassemble an x86 or amd64 instruction - */ - -/* - * These are the individual fields of a REX prefix. Note that a REX - * prefix with none of these set is still needed to: - * - use the MOVSXD (sign extend 32 to 64 bits) instruction - * - access the %sil, %dil, %bpl, %spl registers - */ -#define REX_W 0x08 /* 64 bit operand size when set */ -#define REX_R 0x04 /* high order bit extension of ModRM reg field */ -#define REX_X 0x02 /* high order bit extension of SIB index field */ -#define REX_B 0x01 /* extends ModRM r_m, SIB base, or opcode reg */ - -static uint_t opnd_size; /* SIZE16, SIZE32 or SIZE64 */ -static uint_t addr_size; /* SIZE16, SIZE32 or SIZE64 */ - -/* - * Even in 64 bit mode, usually only 4 byte immediate operands are supported. - */ -static int isize[] = {1, 2, 4, 4}; -static int isize64[] = {1, 2, 4, 8}; - -/* - * Just a bunch of useful macros. - */ -#define WBIT(x) (x & 0x1) /* to get w bit */ -#define REGNO(x) (x & 0x7) /* to get 3 bit register */ -#define VBIT(x) ((x)>>1 & 0x1) /* to get 'v' bit */ -#define OPSIZE(osize, wbit) ((wbit) ? isize[osize] : 1) -#define OPSIZE64(osize, wbit) ((wbit) ? isize64[osize] : 1) - -#define REG_ONLY 3 /* mode to indicate a register operand (not memory) */ - -#define BYTE_OPND 0 /* w-bit value indicating byte register */ -#define LONG_OPND 1 /* w-bit value indicating opnd_size register */ -#define MM_OPND 2 /* "value" used to indicate a mmx reg */ -#define XMM_OPND 3 /* "value" used to indicate a xmm reg */ -#define SEG_OPND 4 /* "value" used to indicate a segment reg */ -#define CONTROL_OPND 5 /* "value" used to indicate a control reg */ -#define DEBUG_OPND 6 /* "value" used to indicate a debug reg */ -#define TEST_OPND 7 /* "value" used to indicate a test reg */ -#define WORD_OPND 8 /* w-bit value indicating word size reg */ - -/* - * Get the next byte and separate the op code into the high and low nibbles. - */ -static int -dtrace_get_opcode(dis86_t *x, uint_t *high, uint_t *low) -{ - int byte; - - /* - * x86 instructions have a maximum length of 15 bytes. Bail out if - * we try to read more. - */ - if (x->d86_len >= 15) - return (x->d86_error = 1); - - if (x->d86_error) - return (1); - byte = x->d86_get_byte(x->d86_data); - if (byte < 0) - return (x->d86_error = 1); - x->d86_bytes[x->d86_len++] = byte; - *low = byte & 0xf; /* ----xxxx low 4 bits */ - *high = byte >> 4 & 0xf; /* xxxx---- bits 7 to 4 */ - return (0); -} - -/* - * Get and decode an SIB (scaled index base) byte - */ -static void -dtrace_get_SIB(dis86_t *x, uint_t *ss, uint_t *index, uint_t *base) -{ - int byte; - - if (x->d86_error) - return; - - byte = x->d86_get_byte(x->d86_data); - if (byte < 0) { - x->d86_error = 1; - return; - } - x->d86_bytes[x->d86_len++] = byte; - - *base = byte & 0x7; - *index = (byte >> 3) & 0x7; - *ss = (byte >> 6) & 0x3; -} - -/* - * Get the byte following the op code and separate it into the - * mode, register, and r/m fields. - */ -static void -dtrace_get_modrm(dis86_t *x, uint_t *mode, uint_t *reg, uint_t *r_m) -{ - if (x->d86_got_modrm == 0) { - if (x->d86_rmindex == -1) - x->d86_rmindex = x->d86_len; - dtrace_get_SIB(x, mode, reg, r_m); - x->d86_got_modrm = 1; - } -} - -/* - * Adjust register selection based on any REX prefix bits present. - */ -/*ARGSUSED*/ -static void -dtrace_rex_adjust(uint_t rex_prefix, uint_t mode, uint_t *reg, uint_t *r_m) -{ - if (reg != NULL && r_m == NULL) { - if (rex_prefix & REX_B) - *reg += 8; - } else { - if (reg != NULL && (REX_R & rex_prefix) != 0) - *reg += 8; - if (r_m != NULL && (REX_B & rex_prefix) != 0) - *r_m += 8; - } -} - -/* - * Get an immediate operand of the given size, with sign extension. - */ -static void -dtrace_imm_opnd(dis86_t *x, int wbit, int size, int opindex) -{ - int i; - int byte; - int valsize = 0; - - if (x->d86_numopnds < opindex + 1) - x->d86_numopnds = opindex + 1; - - switch (wbit) { - case BYTE_OPND: - valsize = 1; - break; - case LONG_OPND: - if (x->d86_opnd_size == SIZE16) - valsize = 2; - else if (x->d86_opnd_size == SIZE32) - valsize = 4; - else - valsize = 8; - break; - case MM_OPND: - case XMM_OPND: - case SEG_OPND: - case CONTROL_OPND: - case DEBUG_OPND: - case TEST_OPND: - valsize = size; - break; - case WORD_OPND: - valsize = 2; - break; - } - if (valsize < size) - valsize = size; - - if (x->d86_error) - return; - x->d86_opnd[opindex].d86_value = 0; - for (i = 0; i < size; ++i) { - byte = x->d86_get_byte(x->d86_data); - if (byte < 0) { - x->d86_error = 1; - return; - } - x->d86_bytes[x->d86_len++] = byte; - x->d86_opnd[opindex].d86_value |= (uint64_t)byte << (i * 8); - } - /* Do sign extension */ - if (x->d86_bytes[x->d86_len - 1] & 0x80) { - for (; i < valsize; i++) - x->d86_opnd[opindex].d86_value |= - (uint64_t)0xff << (i* 8); - } -#ifdef DIS_TEXT - x->d86_opnd[opindex].d86_mode = MODE_SIGNED; - x->d86_opnd[opindex].d86_value_size = valsize; - x->d86_imm_bytes += size; -#endif -} - -/* - * Get an ip relative operand of the given size, with sign extension. - */ -static void -dtrace_disp_opnd(dis86_t *x, int wbit, int size, int opindex) -{ - dtrace_imm_opnd(x, wbit, size, opindex); -#ifdef DIS_TEXT - x->d86_opnd[opindex].d86_mode = MODE_IPREL; -#endif -} - -/* - * Check to see if there is a segment override prefix pending. - * If so, print it in the current 'operand' location and set - * the override flag back to false. - */ -/*ARGSUSED*/ -static void -dtrace_check_override(dis86_t *x, int opindex) -{ -#ifdef DIS_TEXT - if (x->d86_seg_prefix) { - (void) strlcat(x->d86_opnd[opindex].d86_prefix, - x->d86_seg_prefix, PFIXLEN); - } -#endif - x->d86_seg_prefix = NULL; -} - - -/* - * Process a single instruction Register or Memory operand. - * - * mode = addressing mode from ModRM byte - * r_m = r_m (or reg if mode == 3) field from ModRM byte - * wbit = indicates which register (8bit, 16bit, ... MMX, etc.) set to use. - * o = index of operand that we are processing (0, 1 or 2) - * - * the value of reg or r_m must have already been adjusted for any REX prefix. - */ -/*ARGSUSED*/ -static void -dtrace_get_operand(dis86_t *x, uint_t mode, uint_t r_m, int wbit, int opindex) -{ - int have_SIB = 0; /* flag presence of scale-index-byte */ - uint_t ss; /* scale-factor from opcode */ - uint_t index; /* index register number */ - uint_t base; /* base register number */ - int dispsize; /* size of displacement in bytes */ -#ifdef DIS_TEXT - char *opnd = x->d86_opnd[opindex].d86_opnd; -#endif - - if (x->d86_numopnds < opindex + 1) - x->d86_numopnds = opindex + 1; - - if (x->d86_error) - return; - - /* - * first handle a simple register - */ - if (mode == REG_ONLY) { -#ifdef DIS_TEXT - switch (wbit) { - case MM_OPND: - (void) strlcat(opnd, dis_MMREG[r_m], OPLEN); - break; - case XMM_OPND: - (void) strlcat(opnd, dis_XMMREG[r_m], OPLEN); - break; - case SEG_OPND: - (void) strlcat(opnd, dis_SEGREG[r_m], OPLEN); - break; - case CONTROL_OPND: - (void) strlcat(opnd, dis_CONTROLREG[r_m], OPLEN); - break; - case DEBUG_OPND: - (void) strlcat(opnd, dis_DEBUGREG[r_m], OPLEN); - break; - case TEST_OPND: - (void) strlcat(opnd, dis_TESTREG[r_m], OPLEN); - break; - case BYTE_OPND: - if (x->d86_rex_prefix == 0) - (void) strlcat(opnd, dis_REG8[r_m], OPLEN); - else - (void) strlcat(opnd, dis_REG8_REX[r_m], OPLEN); - break; - case WORD_OPND: - (void) strlcat(opnd, dis_REG16[r_m], OPLEN); - break; - case LONG_OPND: - if (x->d86_opnd_size == SIZE16) - (void) strlcat(opnd, dis_REG16[r_m], OPLEN); - else if (x->d86_opnd_size == SIZE32) - (void) strlcat(opnd, dis_REG32[r_m], OPLEN); - else - (void) strlcat(opnd, dis_REG64[r_m], OPLEN); - break; - } -#endif /* DIS_TEXT */ - return; - } - - /* - * if symbolic representation, skip override prefix, if any - */ - dtrace_check_override(x, opindex); - - /* - * Handle 16 bit memory references first, since they decode - * the mode values more simply. - * mode 1 is r_m + 8 bit displacement - * mode 2 is r_m + 16 bit displacement - * mode 0 is just r_m, unless r_m is 6 which is 16 bit disp - */ - if (x->d86_addr_size == SIZE16) { - if ((mode == 0 && r_m == 6) || mode == 2) - dtrace_imm_opnd(x, WORD_OPND, 2, opindex); - else if (mode == 1) - dtrace_imm_opnd(x, BYTE_OPND, 1, opindex); -#ifdef DIS_TEXT - if (mode == 0 && r_m == 6) - x->d86_opnd[opindex].d86_mode = MODE_SIGNED; - else if (mode == 0) - x->d86_opnd[opindex].d86_mode = MODE_NONE; - else - x->d86_opnd[opindex].d86_mode = MODE_OFFSET; - (void) strlcat(opnd, dis_addr16[mode][r_m], OPLEN); -#endif - return; - } - - /* - * 32 and 64 bit addressing modes are more complex since they - * can involve an SIB (scaled index and base) byte to decode. - */ - if (r_m == ESP_REGNO || r_m == ESP_REGNO + 8) { - have_SIB = 1; - dtrace_get_SIB(x, &ss, &index, &base); - if (x->d86_error) - return; - if (base != 5 || mode != 0) - if (x->d86_rex_prefix & REX_B) - base += 8; - if (x->d86_rex_prefix & REX_X) - index += 8; - } else { - base = r_m; - } - - /* - * Compute the displacement size and get its bytes - */ - dispsize = 0; - - if (mode == 1) - dispsize = 1; - else if (mode == 2) - dispsize = 4; - else if ((r_m & 7) == EBP_REGNO || - (have_SIB && (base & 7) == EBP_REGNO)) - dispsize = 4; - - if (dispsize > 0) { - dtrace_imm_opnd(x, dispsize == 4 ? LONG_OPND : BYTE_OPND, - dispsize, opindex); - if (x->d86_error) - return; - } - -#ifdef DIS_TEXT - if (dispsize > 0) - x->d86_opnd[opindex].d86_mode = MODE_OFFSET; - - if (have_SIB == 0) { - if (x->d86_mode == SIZE32) { - if (mode == 0) - (void) strlcat(opnd, dis_addr32_mode0[r_m], - OPLEN); - else - (void) strlcat(opnd, dis_addr32_mode12[r_m], - OPLEN); - } else { - if (mode == 0) - (void) strlcat(opnd, dis_addr64_mode0[r_m], - OPLEN); - else - (void) strlcat(opnd, dis_addr64_mode12[r_m], - OPLEN); - } - } else { - uint_t need_paren = 0; - char **regs; - if (x->d86_mode == SIZE32) /* NOTE this is not addr_size! */ - regs = (char **)dis_REG32; - else - regs = (char **)dis_REG64; - - /* - * print the base (if any) - */ - if (base == EBP_REGNO && mode == 0) { - if (index != ESP_REGNO) { - (void) strlcat(opnd, "(", OPLEN); - need_paren = 1; - } - } else { - (void) strlcat(opnd, "(", OPLEN); - (void) strlcat(opnd, regs[base], OPLEN); - need_paren = 1; - } - - /* - * print the index (if any) - */ - if (index != ESP_REGNO) { - (void) strlcat(opnd, ",", OPLEN); - (void) strlcat(opnd, regs[index], OPLEN); - (void) strlcat(opnd, dis_scale_factor[ss], OPLEN); - } else - if (need_paren) - (void) strlcat(opnd, ")", OPLEN); - } -#endif -} - -/* - * Operand sequence for standard instruction involving one register - * and one register/memory operand. - * wbit indicates a byte(0) or opnd_size(1) operation - * vbit indicates direction (0 for "opcode r,r_m") or (1 for "opcode r_m, r") - */ -#define STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, vbit) { \ - dtrace_get_modrm(x, &mode, ®, &r_m); \ - dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ - dtrace_get_operand(x, mode, r_m, wbit, vbit); \ - dtrace_get_operand(x, REG_ONLY, reg, wbit, 1 - vbit); \ -} - -/* - * Similar to above, but allows for the two operands to be of different - * classes (ie. wbit). - * wbit is for the r_m operand - * w2 is for the reg operand - */ -#define MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, w2, vbit) { \ - dtrace_get_modrm(x, &mode, ®, &r_m); \ - dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ - dtrace_get_operand(x, mode, r_m, wbit, vbit); \ - dtrace_get_operand(x, REG_ONLY, reg, w2, 1 - vbit); \ -} - -/* - * Similar, but for 2 operands plus an immediate. - */ -#define THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, w2, immsize) { \ - dtrace_get_modrm(x, &mode, ®, &r_m); \ - dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ - dtrace_get_operand(x, mode, r_m, wbit, 1); \ - dtrace_get_operand(x, REG_ONLY, reg, w2, 2); \ - dtrace_imm_opnd(x, wbit, immsize, 0); \ -} - -/* - * Dissassemble a single x86 or amd64 instruction. - * - * Mode determines the default operating mode (SIZE16, SIZE32 or SIZE64) - * for interpreting instructions. - * - * returns non-zero for bad opcode - */ -int -dtrace_disx86(dis86_t *x, uint_t cpu_mode) -{ - const instable_t *dp = NULL; /* decode table being used */ -#ifdef DIS_TEXT - uint_t i; -#endif -#ifdef DIS_MEM - uint_t nomem = 0; -#define NOMEM (nomem = 1) -#else -#define NOMEM /* nothing */ -#endif - uint_t wbit = 0; /* opcode wbit, 0 is 8 bit, !0 for opnd_size */ - uint_t w2; /* wbit value for second operand */ - uint_t vbit; - uint_t mode = 0; /* mode value from ModRM byte */ - uint_t reg; /* reg value from ModRM byte */ - uint_t r_m; /* r_m value from ModRM byte */ - - uint_t opcode1; /* high nibble of 1st byte */ - uint_t opcode2; /* low nibble of 1st byte */ - uint_t opcode3; /* extra opcode bits usually from ModRM byte */ - uint_t opcode4; /* high nibble of 2nd byte */ - uint_t opcode5 = 0; /* low nibble of 2ne byte */ /* XXX: gcc */ - uint_t opcode6; /* high nibble of 3rd byte */ - uint_t opcode7 = 0; /* low nibble of 3rd byte */ /* XXX: gcc */ - uint_t opcode_bytes = 1; - - /* - * legacy prefixes come in 5 flavors, you should have only one of each - */ - uint_t opnd_size_prefix = 0; - uint_t addr_size_prefix = 0; - uint_t segment_prefix = 0; - uint_t lock_prefix = 0; - uint_t rep_prefix = 0; - uint_t rex_prefix = 0; /* amd64 register extension prefix */ - size_t off; - - x->d86_len = 0; - x->d86_rmindex = -1; - x->d86_error = 0; -#ifdef DIS_TEXT - x->d86_numopnds = 0; - x->d86_seg_prefix = NULL; - x->d86_mneu[0] = 0; - for (i = 0; i < 3; ++i) { - x->d86_opnd[i].d86_opnd[0] = 0; - x->d86_opnd[i].d86_prefix[0] = 0; - x->d86_opnd[i].d86_value_size = 0; - x->d86_opnd[i].d86_value = 0; - x->d86_opnd[i].d86_mode = MODE_NONE; - } -#endif - x->d86_error = 0; - x->d86_memsize = 0; - - if (cpu_mode == SIZE16) { - opnd_size = SIZE16; - addr_size = SIZE16; - } else if (cpu_mode == SIZE32) { - opnd_size = SIZE32; - addr_size = SIZE32; - } else { - opnd_size = SIZE32; - addr_size = SIZE64; - } - - /* - * Get one opcode byte and check for zero padding that follows - * jump tables. - */ - if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) - goto error; - - if (opcode1 == 0 && opcode2 == 0 && - x->d86_check_func != NULL && x->d86_check_func(x->d86_data)) { -#ifdef DIS_TEXT - (void) strncpy(x->d86_mneu, ".byte\t0", OPLEN); -#endif - goto done; - } - - /* - * Gather up legacy x86 prefix bytes. - */ - for (;;) { - uint_t *which_prefix = NULL; - - dp = &dis_distable[opcode1][opcode2]; - - switch (dp->it_adrmode) { - case PREFIX: - which_prefix = &rep_prefix; - break; - case LOCK: - which_prefix = &lock_prefix; - break; - case OVERRIDE: - which_prefix = &segment_prefix; -#ifdef DIS_TEXT - x->d86_seg_prefix = (char *)dp->it_name; -#endif - if (dp->it_invalid64 && cpu_mode == SIZE64) - goto error; - break; - case AM: - which_prefix = &addr_size_prefix; - break; - case DM: - which_prefix = &opnd_size_prefix; - break; - } - if (which_prefix == NULL) - break; - *which_prefix = (opcode1 << 4) | opcode2; - if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) - goto error; - } - - /* - * Handle amd64 mode PREFIX values. - * Some of the segment prefixes are no-ops. (only FS/GS actually work) - * We might have a REX prefix (opcodes 0x40-0x4f) - */ - if (cpu_mode == SIZE64) { - if (segment_prefix != 0x64 && segment_prefix != 0x65) - segment_prefix = 0; - - if (opcode1 == 0x4) { - rex_prefix = (opcode1 << 4) | opcode2; - if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) - goto error; - dp = &dis_distable[opcode1][opcode2]; - } - } - - /* - * Deal with selection of operand and address size now. - * Note that the REX.W bit being set causes opnd_size_prefix to be - * ignored. - */ - if (cpu_mode == SIZE64) { - if (rex_prefix & 0x08) - opnd_size = SIZE64; - else if (opnd_size_prefix) - opnd_size = SIZE16; - - if (addr_size_prefix) - addr_size = SIZE32; - } else if (cpu_mode == SIZE32) { - if (opnd_size_prefix) - opnd_size = SIZE16; - if (addr_size_prefix) - addr_size = SIZE16; - } else { - if (opnd_size_prefix) - opnd_size = SIZE32; - if (addr_size_prefix) - addr_size = SIZE32; - } - - /* - * The pause instruction - a repz'd nop. This doesn't fit - * with any of the other prefix goop added for SSE, so we'll - * special-case it here. - */ - if (rep_prefix == 0xf3 && opcode1 == 0x9 && opcode2 == 0x0) { - rep_prefix = 0; - dp = &dis_opPause; - } - - /* - * Some 386 instructions have 2 bytes of opcode before the mod_r/m - * byte so we may need to perform a table indirection. - */ - if (dp->it_indirect == dis_op0F[0]) { - if (dtrace_get_opcode(x, &opcode4, &opcode5) != 0) - goto error; - opcode_bytes = 2; - if (opcode4 == 0x7 && opcode5 >= 0x1 && opcode5 <= 0x3) { - uint_t subcode; - - if (dtrace_get_opcode(x, &opcode6, &opcode7) != 0) - goto error; - opcode_bytes = 3; - subcode = ((opcode6 & 0x3) << 1) | - ((opcode7 & 0x8) >> 3); - dp = &dis_op0F7123[opcode5][subcode]; - } else if ((opcode4 == 0xc) && (opcode5 >= 0x8)) { - dp = &dis_op0FC8[0]; - } else { - dp = &dis_op0F[opcode4][opcode5]; - } - } - - /* - * If still not at a TERM decode entry, then a ModRM byte - * exists and its fields further decode the instruction. - */ - x->d86_got_modrm = 0; - if (dp->it_indirect != TERM) { - dtrace_get_modrm(x, &mode, &opcode3, &r_m); - if (x->d86_error) - goto error; - reg = opcode3; - - /* - * decode 287 instructions (D8-DF) from opcodeN - */ - if (opcode1 == 0xD && opcode2 >= 0x8) { - if (opcode2 == 0xB && mode == 0x3 && opcode3 == 4) - dp = &dis_opFP5[r_m]; - else if (opcode2 == 0xA && mode == 0x3 && opcode3 < 4) - dp = &dis_opFP7[opcode3]; - else if (opcode2 == 0xB && mode == 0x3) - dp = &dis_opFP6[opcode3]; - else if (opcode2 == 0x9 && mode == 0x3 && opcode3 >= 4) - dp = &dis_opFP4[opcode3 - 4][r_m]; - else if (mode == 0x3) - dp = &dis_opFP3[opcode2 - 8][opcode3]; - else - dp = &dis_opFP1n2[opcode2 - 8][opcode3]; - } else { - dp = dp->it_indirect + opcode3; - } - } - - /* - * In amd64 bit mode, ARPL opcode is changed to MOVSXD - * (sign extend 32bit to 64 bit) - */ - if (cpu_mode == SIZE64 && opcode1 == 0x6 && opcode2 == 0x3) - dp = &dis_opMOVSLD; - - /* - * at this point we should have a correct (or invalid) opcode - */ - if ((cpu_mode == SIZE64 && dp->it_invalid64) || - (cpu_mode != SIZE64 && dp->it_invalid32)) - goto error; - if (dp->it_indirect != TERM) - goto error; - - /* - * deal with MMX/SSE opcodes which are changed by prefixes - */ - switch (dp->it_adrmode) { - case MMO: - case MMOIMPL: - case MMO3P: - case MMOM3: - case MMOMS: - case MMOPM: - case MMOPRM: - case MMOS: - case XMMO: - case XMMOM: - case XMMOMS: - case XMMOPM: - case XMMOS: - case XMMOMX: - case XMMOX3: - case XMMOXMM: - /* - * This is horrible. Some SIMD instructions take the - * form 0x0F 0x?? ..., which is easily decoded using the - * existing tables. Other SIMD instructions use various - * prefix bytes to overload existing instructions. For - * Example, addps is F0, 58, whereas addss is F3 (repz), - * F0, 58. Presumably someone got a raise for this. - * - * If we see one of the instructions which can be - * modified in this way (if we've got one of the SIMDO* - * address modes), we'll check to see if the last prefix - * was a repz. If it was, we strip the prefix from the - * mnemonic, and we indirect using the dis_opSIMDrepz - * table. - */ - - /* - * Calculate our offset in dis_op0F - */ - if ((uintptr_t)dp - (uintptr_t)dis_op0F > sizeof (dis_op0F)) - goto error; - - off = ((uintptr_t)dp - (uintptr_t)dis_op0F) / - sizeof (instable_t); - - /* - * Rewrite if this instruction used one of the magic prefixes. - */ - if (rep_prefix) { - if (rep_prefix == 0xf2) - dp = &dis_opSIMDrepnz[off]; - else - dp = &dis_opSIMDrepz[off]; - rep_prefix = 0; - } else if (opnd_size_prefix) { - dp = &dis_opSIMDdata16[off]; - opnd_size_prefix = 0; - if (opnd_size == SIZE16) - opnd_size = SIZE32; - } - break; - - case MMOSH: - /* - * As with the "normal" SIMD instructions, the MMX - * shuffle instructions are overloaded. These - * instructions, however, are special in that they use - * an extra byte, and thus an extra table. As of this - * writing, they only use the opnd_size prefix. - */ - - /* - * Calculate our offset in dis_op0F7123 - */ - if ((uintptr_t)dp - (uintptr_t)dis_op0F7123 > - sizeof (dis_op0F7123)) - goto error; - - if (opnd_size_prefix) { - off = ((uintptr_t)dp - (uintptr_t)dis_op0F7123) / - sizeof (instable_t); - dp = &dis_opSIMD7123[off]; - opnd_size_prefix = 0; - if (opnd_size == SIZE16) - opnd_size = SIZE32; - } - break; - } - - /* - * In 64 bit mode, some opcodes automatically use opnd_size == SIZE64. - */ - if (cpu_mode == SIZE64) - if (dp->it_always64 || (opnd_size == SIZE32 && dp->it_stackop)) - opnd_size = SIZE64; - -#ifdef DIS_TEXT - /* - * At this point most instructions can format the opcode mnemonic - * including the prefixes. - */ - if (lock_prefix) - (void) strlcat(x->d86_mneu, "lock ", OPLEN); - - if (rep_prefix == 0xf2) - (void) strlcat(x->d86_mneu, "repnz ", OPLEN); - else if (rep_prefix == 0xf3) - (void) strlcat(x->d86_mneu, "repz ", OPLEN); - - if (cpu_mode == SIZE64 && addr_size_prefix) - (void) strlcat(x->d86_mneu, "addr32 ", OPLEN); - - if (dp->it_adrmode != CBW && - dp->it_adrmode != CWD && - dp->it_adrmode != XMMSFNC) { - if (strcmp(dp->it_name, "INVALID") == 0) - goto error; - (void) strlcat(x->d86_mneu, dp->it_name, OPLEN); - if (dp->it_suffix) { - char *types[] = {"", "w", "l", "q"}; - if (opcode_bytes == 2 && opcode4 == 4) { - /* It's a cmovx.yy. Replace the suffix x */ - for (i = 5; i < OPLEN; i++) { - if (x->d86_mneu[i] == '.') - break; - } - x->d86_mneu[i - 1] = *types[opnd_size]; - } else { - (void) strlcat(x->d86_mneu, types[opnd_size], - OPLEN); - } - } - } -#endif - - /* - * Process operands based on the addressing modes. - */ - x->d86_mode = cpu_mode; - x->d86_rex_prefix = rex_prefix; - x->d86_opnd_size = opnd_size; - x->d86_addr_size = addr_size; - vbit = 0; /* initialize for mem/reg -> reg */ - switch (dp->it_adrmode) { - /* - * amd64 instruction to sign extend 32 bit reg/mem operands - * into 64 bit register values - */ - case MOVSXZ: -#ifdef DIS_TEXT - if (rex_prefix == 0) - (void) strncpy(x->d86_mneu, "movzld", OPLEN); -#endif - dtrace_get_modrm(x, &mode, ®, &r_m); - dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); - x->d86_opnd_size = SIZE64; - dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); - x->d86_opnd_size = opnd_size = SIZE32; - wbit = LONG_OPND; - dtrace_get_operand(x, mode, r_m, wbit, 0); - break; - - /* - * movsbl movsbw movsbq (0x0FBE) or movswl movswq (0x0FBF) - * movzbl movzbw movzbq (0x0FB6) or mobzwl movzwq (0x0FB7) - * wbit lives in 2nd byte, note that operands - * are different sized - */ - case MOVZ: - if (rex_prefix & REX_W) { - /* target register size = 64 bit */ - x->d86_mneu[5] = 'q'; - } - dtrace_get_modrm(x, &mode, ®, &r_m); - dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); - dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); - x->d86_opnd_size = opnd_size = SIZE16; - wbit = WBIT(opcode5); - dtrace_get_operand(x, mode, r_m, wbit, 0); - break; - - /* - * imul instruction, with either 8-bit or longer immediate - * opcode 0x6B for byte, sign-extended displacement, 0x69 for word(s) - */ - case IMUL: - wbit = LONG_OPND; - THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, - OPSIZE(opnd_size, opcode2 == 0x9)); - break; - - /* memory or register operand to register, with 'w' bit */ - case MRw: - wbit = WBIT(opcode2); - STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 0); - break; - - /* register to memory or register operand, with 'w' bit */ - /* arpl happens to fit here also because it is odd */ - case RMw: - if (opcode_bytes == 2) - wbit = WBIT(opcode5); - else - wbit = WBIT(opcode2); - STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); - break; - - /* xaddb instruction */ - case XADDB: - wbit = 0; - STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); - break; - - /* MMX register to memory or register operand */ - case MMS: - case MMOS: -#ifdef DIS_TEXT - wbit = strcmp(dp->it_name, "movd") ? MM_OPND : LONG_OPND; -#else - wbit = LONG_OPND; -#endif - MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 1); - break; - - /* MMX register to memory */ - case MMOMS: - dtrace_get_modrm(x, &mode, ®, &r_m); - if (mode == REG_ONLY) - goto error; - wbit = MM_OPND; - MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 1); - break; - - /* Double shift. Has immediate operand specifying the shift. */ - case DSHIFT: - wbit = LONG_OPND; - dtrace_get_modrm(x, &mode, ®, &r_m); - dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); - dtrace_get_operand(x, mode, r_m, wbit, 2); - dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); - dtrace_imm_opnd(x, wbit, 1, 0); - break; - - /* - * Double shift. With no immediate operand, specifies using %cl. - */ - case DSHIFTcl: - wbit = LONG_OPND; - STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); - break; - - /* immediate to memory or register operand */ - case IMlw: - wbit = WBIT(opcode2); - dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); - dtrace_get_operand(x, mode, r_m, wbit, 1); - /* - * Have long immediate for opcode 0x81, but not 0x80 nor 0x83 - */ - dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, opcode2 == 1), 0); - break; - - /* immediate to memory or register operand with the */ - /* 'w' bit present */ - case IMw: - wbit = WBIT(opcode2); - dtrace_get_modrm(x, &mode, ®, &r_m); - dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); - dtrace_get_operand(x, mode, r_m, wbit, 1); - dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, wbit), 0); - break; - - /* immediate to register with register in low 3 bits */ - /* of op code */ - case IR: - /* w-bit here (with regs) is bit 3 */ - wbit = opcode2 >>3 & 0x1; - reg = REGNO(opcode2); - dtrace_rex_adjust(rex_prefix, mode, ®, NULL); - mode = REG_ONLY; - r_m = reg; - dtrace_get_operand(x, mode, r_m, wbit, 1); - dtrace_imm_opnd(x, wbit, OPSIZE64(opnd_size, wbit), 0); - break; - - /* MMX immediate shift of register */ - case MMSH: - case MMOSH: - wbit = MM_OPND; - goto mm_shift; /* in next case */ - - /* SIMD immediate shift of register */ - case XMMSH: - wbit = XMM_OPND; -mm_shift: - reg = REGNO(opcode7); - dtrace_rex_adjust(rex_prefix, mode, ®, NULL); - dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); - dtrace_imm_opnd(x, wbit, 1, 0); - NOMEM; - break; - - /* accumulator to memory operand */ - case AO: - vbit = 1; - /*FALLTHROUGH*/ - - /* memory operand to accumulator */ - case OA: - wbit = WBIT(opcode2); - dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 1 - vbit); - dtrace_imm_opnd(x, wbit, OPSIZE64(addr_size, LONG_OPND), vbit); -#ifdef DIS_TEXT - x->d86_opnd[vbit].d86_mode = MODE_OFFSET; -#endif - break; - - - /* segment register to memory or register operand */ - case SM: - vbit = 1; - /*FALLTHROUGH*/ - - /* memory or register operand to segment register */ - case MS: - dtrace_get_modrm(x, &mode, ®, &r_m); - dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); - dtrace_get_operand(x, mode, r_m, LONG_OPND, vbit); - dtrace_get_operand(x, REG_ONLY, reg, SEG_OPND, 1 - vbit); - break; - - /* - * rotate or shift instructions, which may shift by 1 or - * consult the cl register, depending on the 'v' bit - */ - case Mv: - vbit = VBIT(opcode2); - wbit = WBIT(opcode2); - dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); - dtrace_get_operand(x, mode, r_m, wbit, 1); -#ifdef DIS_TEXT - if (vbit) { - (void) strlcat(x->d86_opnd[0].d86_opnd, "%cl", OPLEN); - } else { - x->d86_opnd[0].d86_mode = MODE_SIGNED; - x->d86_opnd[0].d86_value_size = 1; - x->d86_opnd[0].d86_value = 1; - } -#endif - break; - /* - * immediate rotate or shift instructions - */ - case MvI: - wbit = WBIT(opcode2); -normal_imm_mem: - dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); - dtrace_get_operand(x, mode, r_m, wbit, 1); - dtrace_imm_opnd(x, wbit, 1, 0); - break; - - /* bit test instructions */ - case MIb: - wbit = LONG_OPND; - goto normal_imm_mem; - - /* single memory or register operand with 'w' bit present */ - case Mw: - wbit = WBIT(opcode2); -just_mem: - dtrace_get_modrm(x, &mode, ®, &r_m); - dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); - dtrace_get_operand(x, mode, r_m, wbit, 0); - break; - - case SWAPGS: - if (cpu_mode == SIZE64 && mode == 3 && r_m == 0) { -#ifdef DIS_TEXT - (void) strncpy(x->d86_mneu, "swapgs", OPLEN); -#endif - NOMEM; - break; - } - /*FALLTHROUGH*/ - - /* prefetch instruction - memory operand, but no memory acess */ - case PREF: - NOMEM; - /*FALLTHROUGH*/ - - /* single memory or register operand */ - case M: - wbit = LONG_OPND; - goto just_mem; - - /* single memory or register byte operand */ - case Mb: - wbit = BYTE_OPND; - goto just_mem; - - case MO: - /* Similar to M, but only memory (no direct registers) */ - wbit = LONG_OPND; - dtrace_get_modrm(x, &mode, ®, &r_m); - if (mode == 3) - goto error; - dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); - dtrace_get_operand(x, mode, r_m, wbit, 0); - break; - - /* move special register to register or reverse if vbit */ - case SREG: - switch (opcode5) { - - case 2: - vbit = 1; - /*FALLTHROUGH*/ - case 0: - wbit = CONTROL_OPND; - break; - - case 3: - vbit = 1; - /*FALLTHROUGH*/ - case 1: - wbit = DEBUG_OPND; - break; - - case 6: - vbit = 1; - /*FALLTHROUGH*/ - case 4: - wbit = TEST_OPND; - break; - - } - dtrace_get_modrm(x, &mode, ®, &r_m); - dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); - dtrace_get_operand(x, REG_ONLY, reg, wbit, vbit); - dtrace_get_operand(x, REG_ONLY, r_m, LONG_OPND, 1 - vbit); - NOMEM; - break; - - /* - * single register operand with register in the low 3 - * bits of op code - */ - case R: - if (opcode_bytes == 2) - reg = REGNO(opcode5); - else - reg = REGNO(opcode2); - dtrace_rex_adjust(rex_prefix, mode, ®, NULL); - dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 0); - NOMEM; - break; - - /* - * register to accumulator with register in the low 3 - * bits of op code, xchg instructions - */ - case RA: - NOMEM; - reg = REGNO(opcode2); - dtrace_rex_adjust(rex_prefix, mode, ®, NULL); - dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 0); - dtrace_get_operand(x, REG_ONLY, EAX_REGNO, LONG_OPND, 1); - break; - - /* - * single segment register operand, with register in - * bits 3-4 of op code byte - */ - case SEG: - NOMEM; - reg = (x->d86_bytes[x->d86_len - 1] >> 3) & 0x3; - dtrace_get_operand(x, REG_ONLY, reg, SEG_OPND, 0); - break; - - /* - * single segment register operand, with register in - * bits 3-5 of op code - */ - case LSEG: - NOMEM; - /* long seg reg from opcode */ - reg = (x->d86_bytes[x->d86_len - 1] >> 3) & 0x7; - dtrace_get_operand(x, REG_ONLY, reg, SEG_OPND, 0); - break; - - /* memory or register operand to register */ - case MR: - wbit = LONG_OPND; - STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 0); - break; - - case RM: - wbit = LONG_OPND; - STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); - break; - - /* MMX/SIMD-Int memory or mm reg to mm reg */ - case MM: - case MMO: -#ifdef DIS_TEXT - wbit = strcmp(dp->it_name, "movd") ? MM_OPND : LONG_OPND; -#else - wbit = LONG_OPND; -#endif - MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 0); - break; - - case MMOIMPL: -#ifdef DIS_TEXT - wbit = strcmp(dp->it_name, "movd") ? MM_OPND : LONG_OPND; -#else - wbit = LONG_OPND; -#endif - dtrace_get_modrm(x, &mode, ®, &r_m); - if (mode != REG_ONLY) - goto error; - - dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); - dtrace_get_operand(x, mode, r_m, wbit, 0); - dtrace_get_operand(x, REG_ONLY, reg, MM_OPND, 1); - mode = 0; /* change for memory access size... */ - break; - - /* MMX/SIMD-Int and SIMD-FP predicated mm reg to r32 */ - case MMO3P: - wbit = MM_OPND; - goto xmm3p; - case XMM3P: - wbit = XMM_OPND; -xmm3p: - dtrace_get_modrm(x, &mode, ®, &r_m); - if (mode != REG_ONLY) - goto error; - - THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, 1); - NOMEM; - break; - - /* MMX/SIMD-Int predicated r32/mem to mm reg */ - case MMOPRM: - wbit = LONG_OPND; - w2 = MM_OPND; - goto xmmprm; - case XMMPRM: - wbit = LONG_OPND; - w2 = XMM_OPND; -xmmprm: - THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, w2, 1); - break; - - /* MMX/SIMD-Int predicated mm/mem to mm reg */ - case MMOPM: - wbit = w2 = MM_OPND; - goto xmmprm; - - /* MMX/SIMD-Int mm reg to r32 */ - case MMOM3: - NOMEM; - dtrace_get_modrm(x, &mode, ®, &r_m); - if (mode != REG_ONLY) - goto error; - wbit = MM_OPND; - MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, 0); - break; - - /* SIMD memory or xmm reg operand to xmm reg */ - case XMM: - case XMMO: - case XMMXIMPL: - wbit = XMM_OPND; - STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 0); - - if (dp->it_adrmode == XMMXIMPL && mode != REG_ONLY) - goto error; - -#ifdef DIS_TEXT - /* - * movlps and movhlps share opcodes. They differ in the - * addressing modes allowed for their operands. - * movhps and movlhps behave similarly. - */ - if (mode == REG_ONLY) { - if (strcmp(dp->it_name, "movlps") == 0) - (void) strncpy(x->d86_mneu, "movhlps", OPLEN); - else if (strcmp(dp->it_name, "movhps") == 0) - (void) strncpy(x->d86_mneu, "movlhps", OPLEN); - } -#endif - if (dp->it_adrmode == XMMXIMPL) - mode = 0; /* change for memory access size... */ - break; - - /* SIMD xmm reg to memory or xmm reg */ - case XMMS: - case XMMOS: - case XMMMS: - case XMMOMS: - dtrace_get_modrm(x, &mode, ®, &r_m); -#ifdef DIS_TEXT - if ((strcmp(dp->it_name, "movlps") == 0 || - strcmp(dp->it_name, "movhps") == 0 || - strcmp(dp->it_name, "movntps") == 0) && - mode == REG_ONLY) - goto error; -#endif - wbit = XMM_OPND; - MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 1); - break; - - /* SIMD memory to xmm reg */ - case XMMM: - case XMMOM: - wbit = XMM_OPND; - dtrace_get_modrm(x, &mode, ®, &r_m); -#ifdef DIS_TEXT - if (mode == REG_ONLY) { - if (strcmp(dp->it_name, "movhps") == 0) - (void) strncpy(x->d86_mneu, "movlhps", OPLEN); - else - goto error; - } -#endif - MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 0); - break; - - /* SIMD memory or r32 to xmm reg */ - case XMM3MX: - wbit = LONG_OPND; - MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 0); - break; - - case XMM3MXS: - wbit = LONG_OPND; - MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 1); - break; - - /* SIMD memory or mm reg to xmm reg */ - case XMMOMX: - /* SIMD mm to xmm */ - case XMMMX: - wbit = MM_OPND; - MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 0); - break; - - /* SIMD memory or xmm reg to mm reg */ - case XMMXMM: - case XMMOXMM: - case XMMXM: - wbit = XMM_OPND; - MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 0); - break; - - - /* SIMD memory or xmm reg to r32 */ - case XMMXM3: - wbit = XMM_OPND; - MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, 0); - break; - - /* SIMD xmm to r32 */ - case XMMX3: - case XMMOX3: - dtrace_get_modrm(x, &mode, ®, &r_m); - if (mode != REG_ONLY) - goto error; - dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); - dtrace_get_operand(x, mode, r_m, XMM_OPND, 0); - dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); - NOMEM; - break; - - /* SIMD predicated memory or xmm reg with/to xmm reg */ - case XMMP: - case XMMOPM: - wbit = XMM_OPND; - THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 1); - -#ifdef DIS_TEXT - /* - * cmpps and cmpss vary their instruction name based - * on the value of imm8. Other XMMP instructions, - * such as shufps, require explicit specification of - * the predicate. - */ - if (dp->it_name[0] == 'c' && - dp->it_name[1] == 'm' && - dp->it_name[2] == 'p' && - strlen(dp->it_name) == 5) { - uchar_t pred = x->d86_opnd[0].d86_value & 0xff; - - if (pred >= (sizeof (dis_PREDSUFFIX) / sizeof (char *))) - goto error; - - (void) strncpy(x->d86_mneu, "cmp", OPLEN); - (void) strlcat(x->d86_mneu, dis_PREDSUFFIX[pred], - OPLEN); - (void) strlcat(x->d86_mneu, - dp->it_name + strlen(dp->it_name) - 2, - OPLEN); - x->d86_opnd[0] = x->d86_opnd[1]; - x->d86_opnd[1] = x->d86_opnd[2]; - x->d86_numopnds = 2; - } -#endif - break; - - /* immediate operand to accumulator */ - case IA: - wbit = WBIT(opcode2); - dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 1); - dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, wbit), 0); - NOMEM; - break; - - /* memory or register operand to accumulator */ - case MA: - wbit = WBIT(opcode2); - dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); - dtrace_get_operand(x, mode, r_m, wbit, 0); - break; - - /* si register to di register used to reference memory */ - case SD: -#ifdef DIS_TEXT - dtrace_check_override(x, 0); - x->d86_numopnds = 2; - if (addr_size == SIZE64) { - (void) strlcat(x->d86_opnd[0].d86_opnd, "(%rsi)", - OPLEN); - (void) strlcat(x->d86_opnd[1].d86_opnd, "(%rdi)", - OPLEN); - } else if (addr_size == SIZE32) { - (void) strlcat(x->d86_opnd[0].d86_opnd, "(%esi)", - OPLEN); - (void) strlcat(x->d86_opnd[1].d86_opnd, "(%edi)", - OPLEN); - } else { - (void) strlcat(x->d86_opnd[0].d86_opnd, "(%si)", - OPLEN); - (void) strlcat(x->d86_opnd[1].d86_opnd, "(%di)", - OPLEN); - } -#endif - wbit = LONG_OPND; - break; - - /* accumulator to di register */ - case AD: - wbit = WBIT(opcode2); -#ifdef DIS_TEXT - dtrace_check_override(x, 1); - x->d86_numopnds = 2; - dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 0); - if (addr_size == SIZE64) - (void) strlcat(x->d86_opnd[1].d86_opnd, "(%rdi)", - OPLEN); - else if (addr_size == SIZE32) - (void) strlcat(x->d86_opnd[1].d86_opnd, "(%edi)", - OPLEN); - else - (void) strlcat(x->d86_opnd[1].d86_opnd, "(%di)", - OPLEN); -#endif - break; - - /* si register to accumulator */ - case SA: - wbit = WBIT(opcode2); -#ifdef DIS_TEXT - dtrace_check_override(x, 0); - x->d86_numopnds = 2; - if (addr_size == SIZE64) - (void) strlcat(x->d86_opnd[0].d86_opnd, "(%rsi)", - OPLEN); - else if (addr_size == SIZE32) - (void) strlcat(x->d86_opnd[0].d86_opnd, "(%esi)", - OPLEN); - else - (void) strlcat(x->d86_opnd[0].d86_opnd, "(%si)", - OPLEN); - dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 1); -#endif - break; - - /* - * single operand, a 16/32 bit displacement - */ - case D: - wbit = LONG_OPND; - dtrace_disp_opnd(x, wbit, OPSIZE(opnd_size, LONG_OPND), 0); - NOMEM; - break; - - /* jmp/call indirect to memory or register operand */ - case INM: -#ifdef DIS_TEXT - (void) strlcat(x->d86_opnd[0].d86_prefix, "*", OPLEN); -#endif - dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); - dtrace_get_operand(x, mode, r_m, LONG_OPND, 0); - wbit = LONG_OPND; - break; - - /* - * for long jumps and long calls -- a new code segment - * register and an offset in IP -- stored in object - * code in reverse order. Note - not valid in amd64 - */ - case SO: - dtrace_check_override(x, 1); - wbit = LONG_OPND; - dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, LONG_OPND), 1); -#ifdef DIS_TEXT - x->d86_opnd[1].d86_mode = MODE_SIGNED; -#endif - /* will now get segment operand */ - dtrace_imm_opnd(x, wbit, 2, 0); - break; - - /* - * jmp/call. single operand, 8 bit displacement. - * added to current EIP in 'compofff' - */ - case BD: - dtrace_disp_opnd(x, BYTE_OPND, 1, 0); - NOMEM; - break; - - /* single 32/16 bit immediate operand */ - case I: - wbit = LONG_OPND; - dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, LONG_OPND), 0); - break; - - /* single 8 bit immediate operand */ - case Ib: - wbit = LONG_OPND; - dtrace_imm_opnd(x, wbit, 1, 0); - break; - - case ENTER: - wbit = LONG_OPND; - dtrace_imm_opnd(x, wbit, 2, 0); - dtrace_imm_opnd(x, wbit, 1, 1); - switch (opnd_size) { - case SIZE64: - x->d86_memsize = (x->d86_opnd[1].d86_value + 1) * 8; - break; - case SIZE32: - x->d86_memsize = (x->d86_opnd[1].d86_value + 1) * 4; - break; - case SIZE16: - x->d86_memsize = (x->d86_opnd[1].d86_value + 1) * 2; - break; - } - - break; - - /* 16-bit immediate operand */ - case RET: - wbit = LONG_OPND; - dtrace_imm_opnd(x, wbit, 2, 0); - break; - - /* single 8 bit port operand */ - case P: - dtrace_check_override(x, 0); - dtrace_imm_opnd(x, BYTE_OPND, 1, 0); - NOMEM; - break; - - /* single operand, dx register (variable port instruction) */ - case V: - x->d86_numopnds = 1; - dtrace_check_override(x, 0); -#ifdef DIS_TEXT - (void) strlcat(x->d86_opnd[0].d86_opnd, "(%dx)", OPLEN); -#endif - NOMEM; - break; - - /* - * The int instruction, which has two forms: - * int 3 (breakpoint) or - * int n, where n is indicated in the subsequent - * byte (format Ib). The int 3 instruction (opcode 0xCC), - * where, although the 3 looks like an operand, - * it is implied by the opcode. It must be converted - * to the correct base and output. - */ - case INT3: -#ifdef DIS_TEXT - x->d86_numopnds = 1; - x->d86_opnd[0].d86_mode = MODE_SIGNED; - x->d86_opnd[0].d86_value_size = 1; - x->d86_opnd[0].d86_value = 3; -#endif - NOMEM; - break; - - /* single 8 bit immediate operand */ - case INTx: - dtrace_imm_opnd(x, BYTE_OPND, 1, 0); - NOMEM; - break; - - /* an unused byte must be discarded */ - case U: - if (x->d86_get_byte(x->d86_data) < 0) - goto error; - x->d86_len++; - NOMEM; - break; - - case CBW: -#ifdef DIS_TEXT - if (opnd_size == SIZE16) - (void) strlcat(x->d86_mneu, "cbtw", OPLEN); - else if (opnd_size == SIZE32) - (void) strlcat(x->d86_mneu, "cwtl", OPLEN); - else - (void) strlcat(x->d86_mneu, "cltq", OPLEN); -#endif - wbit = LONG_OPND; - NOMEM; - break; - - case CWD: -#ifdef DIS_TEXT - if (opnd_size == SIZE16) - (void) strlcat(x->d86_mneu, "cwtd", OPLEN); - else if (opnd_size == SIZE32) - (void) strlcat(x->d86_mneu, "cltd", OPLEN); - else - (void) strlcat(x->d86_mneu, "cqtd", OPLEN); -#endif - wbit = LONG_OPND; - NOMEM; - break; - - case XMMSFNC: - /* - * sfence is sfence if mode is REG_ONLY. If mode isn't - * REG_ONLY, mnemonic should be 'clflush'. - */ - dtrace_get_modrm(x, &mode, ®, &r_m); - - /* sfence doesn't take operands */ -#ifdef DIS_TEXT - if (mode == REG_ONLY) { - (void) strlcat(x->d86_mneu, "sfence", OPLEN); - } else { - (void) strlcat(x->d86_mneu, "clflush", OPLEN); - dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); - dtrace_get_operand(x, mode, r_m, BYTE_OPND, 0); - NOMEM; - } -#else - if (mode != REG_ONLY) { - dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); - dtrace_get_operand(x, mode, r_m, BYTE_OPND, 0); - NOMEM; - } -#endif - break; - - /* - * no disassembly, the mnemonic was all there was so go on - */ - case NORM: - if (dp->it_invalid32 && cpu_mode != SIZE64) - goto error; - NOMEM; - /*FALLTHROUGH*/ - case IMPLMEM: - break; - - case XMMFENCE: - /* - * Only the following exact byte sequences are allowed: - * - * 0f ae e8 lfence - * 0f ae f0 mfence - */ - if ((uint8_t)x->d86_bytes[x->d86_len - 1] != 0xe8 && - (uint8_t)x->d86_bytes[x->d86_len - 1] != 0xf0) - goto error; - - break; - - - /* float reg */ - case F: -#ifdef DIS_TEXT - x->d86_numopnds = 1; - (void) strlcat(x->d86_opnd[0].d86_opnd, "%st(X)", OPLEN); - x->d86_opnd[0].d86_opnd[4] = r_m + '0'; -#endif - NOMEM; - break; - - /* float reg to float reg, with ret bit present */ - case FF: - vbit = opcode2 >> 2 & 0x1; /* vbit = 1: st -> st(i) */ - /*FALLTHROUGH*/ - case FFC: /* case for vbit always = 0 */ -#ifdef DIS_TEXT - x->d86_numopnds = 2; - (void) strlcat(x->d86_opnd[1 - vbit].d86_opnd, "%st", OPLEN); - (void) strlcat(x->d86_opnd[vbit].d86_opnd, "%st(X)", OPLEN); - x->d86_opnd[vbit].d86_opnd[4] = r_m + '0'; -#endif - NOMEM; - break; - - /* an invalid op code */ - case AM: - case DM: - case OVERRIDE: - case PREFIX: - case UNKNOWN: - NOMEM; - default: - goto error; - } /* end switch */ - if (x->d86_error) - goto error; - -done: -#ifdef DIS_MEM - /* - * compute the size of any memory accessed by the instruction - */ - if (x->d86_memsize != 0) { - return (0); - } else if (dp->it_stackop) { - switch (opnd_size) { - case SIZE16: - x->d86_memsize = 2; - break; - case SIZE32: - x->d86_memsize = 4; - break; - case SIZE64: - x->d86_memsize = 8; - break; - } - } else if (nomem || mode == REG_ONLY) { - x->d86_memsize = 0; - - } else if (dp->it_size != 0) { - /* - * In 64 bit mode descriptor table entries - * go up to 10 bytes and popf/pushf are always 8 bytes - */ - if (x->d86_mode == SIZE64 && dp->it_size == 6) - x->d86_memsize = 10; - else if (x->d86_mode == SIZE64 && opcode1 == 0x9 && - (opcode2 == 0xc || opcode2 == 0xd)) - x->d86_memsize = 8; - else - x->d86_memsize = dp->it_size; - - } else if (wbit == 0) { - x->d86_memsize = 1; - - } else if (wbit == LONG_OPND) { - if (opnd_size == SIZE64) - x->d86_memsize = 8; - else if (opnd_size == SIZE32) - x->d86_memsize = 4; - else - x->d86_memsize = 2; - - } else if (wbit == SEG_OPND) { - x->d86_memsize = 4; - - } else { - x->d86_memsize = 8; - } -#endif - return (0); - -error: -#ifdef DIS_TEXT - (void) strlcat(x->d86_mneu, "undef", OPLEN); -#endif - return (1); -} - -#ifdef DIS_TEXT - -/* - * Some instructions should have immediate operands printed - * as unsigned integers. We compare against this table. - */ -static char *unsigned_ops[] = { - "or", "and", "xor", "test", "in", "out", "lcall", "ljmp", - "rcr", "rcl", "ror", "rol", "shl", "shr", "sal", "psr", "psl", - 0 -}; - -static int -isunsigned_op(char *opcode) -{ - char *where; - int i; - int is_unsigned = 0; - - /* - * Work back to start of last mnemonic, since we may have - * prefixes on some opcodes. - */ - where = opcode + strlen(opcode) - 1; - while (where > opcode && *where != ' ') - --where; - if (*where == ' ') - ++where; - - for (i = 0; unsigned_ops[i]; ++i) { - if (strncmp(where, unsigned_ops[i], - strlen(unsigned_ops[i]))) - continue; - is_unsigned = 1; - break; - } - return (is_unsigned); -} - -/* ARGSUSED */ -void -dtrace_disx86_str(dis86_t *dis, uint_t mode, uintptr_t pc, char *buf, - size_t buflen) -{ - int i; - - dis->d86_sprintf_func(buf, buflen, "%-6s ", dis->d86_mneu); - - /* - * For PC-relative jumps, the pc is really the next pc after executing - * this instruction, so increment it appropriately. - */ - pc += dis->d86_len; - - for (i = 0; i < dis->d86_numopnds; i++) { - d86opnd_t *op = &dis->d86_opnd[i]; - int64_t sv; - uint64_t mask; - - if (i != 0) - (void) strlcat(buf, ",", buflen); - - (void) strlcat(buf, op->d86_prefix, buflen); - - sv = op->d86_value; - - switch (op->d86_mode) { - - case MODE_NONE: - - (void) strlcat(buf, op->d86_opnd, buflen); - break; - - case MODE_SIGNED: - case MODE_IMPLIED: - case MODE_OFFSET: - - if (dis->d86_seg_prefix) - (void) strlcat(buf, dis->d86_seg_prefix, - buflen); - - switch (op->d86_value_size) { - case 1: - sv = (int8_t)sv; - mask = 0xff; - break; - case 2: - sv = (int16_t)sv; - mask = 0xffff; - break; - case 4: - sv = (int32_t)sv; - mask = 0xffffffff; - break; - case 8: - mask = 0xffffffffffffffffULL; - break; - } - - if (op->d86_mode == MODE_SIGNED || - op->d86_mode == MODE_IMPLIED) - (void) strlcat(buf, "$", buflen); - - if (sv < 0 && sv > -0xffff && - !isunsigned_op(dis->d86_mneu)) { - dis->d86_sprintf_func(buf + strlen(buf), - buflen - strlen(buf), - (dis->d86_flags & DIS_OP_OCTAL) ? - "-0%llo" : "-0x%llx", -sv & mask); - } else { - dis->d86_sprintf_func(buf + strlen(buf), - buflen - strlen(buf), - (dis->d86_flags & DIS_OP_OCTAL) ? - "0%llo" : "0x%llx", sv & mask); - } - (void) strlcat(buf, op->d86_opnd, buflen); - break; - - case MODE_IPREL: - - switch (op->d86_value_size) { - case 1: - sv = (int8_t)sv; - break; - case 2: - sv = (int16_t)sv; - break; - case 4: - sv = (int32_t)sv; - break; - } - - if (sv < 0) - dis->d86_sprintf_func(buf + strlen(buf), - buflen - strlen(buf), - (dis->d86_flags & DIS_OP_OCTAL) ? - "-0%llo" : "-0x%llx", -sv - dis->d86_len); - else - dis->d86_sprintf_func(buf + strlen(buf), - buflen - strlen(buf), - (dis->d86_flags & DIS_OP_OCTAL) ? - "+0%llo" : "+0x%llx", sv + dis->d86_len); - - (void) strlcat(buf, "\t<", buflen); - - if (dis->d86_sym_lookup == NULL || - dis->d86_sym_lookup(dis->d86_data, pc + sv, - buf + strlen(buf), buflen - strlen(buf)) != 0) - dis->d86_sprintf_func(buf + strlen(buf), - buflen - strlen(buf), - (dis->d86_flags & DIS_OP_OCTAL) ? - "0%llo" : "0x%llx", pc + sv); - - (void) strlcat(buf, ">", buflen); - - break; - } - } -} - -#endif /* DIS_TEXT */ Index: src/external/cddl/osnet/dev/dtrace/amd64/dis_tables.h =================================================================== RCS file: src/external/cddl/osnet/dev/dtrace/amd64/dis_tables.h diff -N src/external/cddl/osnet/dev/dtrace/amd64/dis_tables.h --- src/external/cddl/osnet/dev/dtrace/amd64/dis_tables.h 21 Feb 2010 01:46:33 -0000 1.2 +++ /dev/null 1 Jan 1970 00:00:00 -0000 @@ -1,114 +0,0 @@ -/* $NetBSD: dis_tables.h,v 1.2 2010/02/21 01:46:33 darran Exp $ */ - -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - * - * $FreeBSD: src/sys/cddl/dev/dtrace/amd64/dis_tables.h,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $ - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* Copyright (c) 1988 AT&T */ -/* All Rights Reserved */ - - -#ifndef _DIS_TABLES_H -#define _DIS_TABLES_H - -#if defined(sun) -#pragma ident "@(#)dis_tables.h 1.7 06/03/02 SMI" -#endif - -/* - * Constants and prototypes for the IA32 disassembler backend. See dis_tables.c - * for usage information and documentation. - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include - -/* - * values for cpu mode - */ -#define SIZE16 1 -#define SIZE32 2 -#define SIZE64 3 - -#define OPLEN 256 -#define PFIXLEN 8 -#define NCPS 12 /* number of chars per symbol */ - -/* - * data structures that must be provided to dtrace_dis86() - */ -typedef struct d86opnd { - char d86_opnd[OPLEN]; /* symbolic rep of operand */ - char d86_prefix[PFIXLEN]; /* any prefix string or "" */ - uint_t d86_mode; /* mode for immediate */ - uint_t d86_value_size; /* size in bytes of d86_value */ - uint64_t d86_value; /* immediate value of opnd */ -} d86opnd_t; - -typedef struct dis86 { - uint_t d86_mode; - uint_t d86_error; - uint_t d86_len; /* instruction length */ - int d86_rmindex; /* index of modrm byte or -1 */ - uint_t d86_memsize; /* size of memory referenced */ - char d86_bytes[16]; /* bytes of instruction */ - char d86_mneu[OPLEN]; - uint_t d86_numopnds; - uint_t d86_rex_prefix; /* value of REX prefix if !0 */ - char *d86_seg_prefix; /* segment prefix, if any */ - uint_t d86_opnd_size; - uint_t d86_addr_size; - uint_t d86_got_modrm; - struct d86opnd d86_opnd[3]; /* up to 3 operands */ - int (*d86_check_func)(void *); - int (*d86_get_byte)(void *); -#ifdef DIS_TEXT - int (*d86_sym_lookup)(void *, uint64_t, char *, size_t); - int (*d86_sprintf_func)(char *, size_t, const char *, ...); - int d86_flags; - uint_t d86_imm_bytes; -#endif - void *d86_data; -} dis86_t; - -extern int dtrace_disx86(dis86_t *x, uint_t cpu_mode); - -#define DIS_OP_OCTAL 0x1 /* Print all numbers in octal */ - -#ifdef DIS_TEXT -extern void dtrace_disx86_str(dis86_t *x, uint_t cpu_mode, uintptr_t pc, - char *buf, size_t len); -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* _DIS_TABLES_H */ Index: src/external/cddl/osnet/dev/dtrace/amd64/dtrace_asm.S =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/dtrace/amd64/dtrace_asm.S,v retrieving revision 1.7 diff -u -p -r1.7 dtrace_asm.S --- src/external/cddl/osnet/dev/dtrace/amd64/dtrace_asm.S 27 Feb 2017 06:46:59 -0000 1.7 +++ src/external/cddl/osnet/dev/dtrace/amd64/dtrace_asm.S 8 May 2017 22:42:29 -0000 @@ -22,7 +22,7 @@ * * Portions Copyright 2008 John Birrell * - * $FreeBSD: src/sys/cddl/dev/dtrace/amd64/dtrace_asm.S,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $ + * $FreeBSD: head/sys/cddl/dev/dtrace/amd64/dtrace_asm.S 298171 2016-04-17 23:08:47Z markj $ * */ /* Index: src/external/cddl/osnet/dev/dtrace/amd64/dtrace_isa.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/dtrace/amd64/dtrace_isa.c,v retrieving revision 1.6 diff -u -p -r1.6 dtrace_isa.c --- src/external/cddl/osnet/dev/dtrace/amd64/dtrace_isa.c 27 Feb 2017 06:46:59 -0000 1.6 +++ src/external/cddl/osnet/dev/dtrace/amd64/dtrace_isa.c 20 Apr 2017 11:07:10 -0000 @@ -21,7 +21,7 @@ * * CDDL HEADER END * - * $FreeBSD: src/sys/cddl/dev/dtrace/amd64/dtrace_isa.c,v 1.2.2.1 2009/08/03 08:13:06 kensmith Exp $ + * $FreeBSD: head/sys/cddl/dev/dtrace/amd64/dtrace_isa.c 298171 2016-04-17 23:08:47Z markj $ */ /* * Copyright 2005 Sun Microsystems, Inc. All rights reserved. @@ -38,6 +38,8 @@ #include +#include "regset.h" + uint8_t dtrace_fuword8_nocheck(void *); uint16_t dtrace_fuword16_nocheck(void *); uint32_t dtrace_fuword32_nocheck(void *); @@ -54,6 +56,8 @@ struct amd64_frame { typedef unsigned long vm_offset_t; +int dtrace_ustackdepth_max = 2048; + void dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes, uint32_t *intrpc) @@ -107,14 +111,25 @@ static int dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc, uintptr_t sp) { + uintptr_t oldsp; volatile uint16_t *flags = (volatile uint16_t *)&cpu_core[cpu_number()].cpuc_dtrace_flags; int ret = 0; ASSERT(pcstack == NULL || pcstack_limit > 0); + ASSERT(dtrace_ustackdepth_max > 0); while (pc != 0) { - ret++; + /* + * We limit the number of times we can go around this + * loop to account for a circular stack. + */ + if (ret++ >= dtrace_ustackdepth_max) { + *flags |= CPU_DTRACE_BADSTACK; + cpu_core[cpu_number()].cpuc_dtrace_illval = sp; + break; + } + if (pcstack != NULL) { *pcstack++ = (uint64_t)pc; pcstack_limit--; @@ -125,10 +140,18 @@ dtrace_getustack_common(uint64_t *pcstac if (sp == 0) break; + oldsp = sp; + pc = dtrace_fuword64((void *)(sp + offsetof(struct amd64_frame, f_retaddr))); sp = dtrace_fuword64((void *)sp); + if (sp == oldsp) { + *flags |= CPU_DTRACE_BADSTACK; + cpu_core[cpu_number()].cpuc_dtrace_illval = sp; + break; + } + /* * This is totally bogus: if we faulted, we're going to clear * the fault and break. This is to deal with the apparently @@ -467,11 +490,10 @@ dtrace_getstackdepth(int aframes) return depth - aframes; } -#ifdef notyet ulong_t -dtrace_getreg(struct regs *rp, uint_t reg) +dtrace_getreg(struct trapframe *rp, uint_t reg) { -#if defined(__amd64) + /* CHUQ skipped */ int regmap[] = { REG_GS, /* GS */ REG_FS, /* FS */ @@ -507,72 +529,62 @@ dtrace_getreg(struct regs *rp, uint_t re switch (reg) { case REG_RDI: - return (rp->r_rdi); + return (rp->tf_rdi); case REG_RSI: - return (rp->r_rsi); + return (rp->tf_rsi); case REG_RDX: - return (rp->r_rdx); + return (rp->tf_rdx); case REG_RCX: - return (rp->r_rcx); + return (rp->tf_rcx); case REG_R8: - return (rp->r_r8); + return (rp->tf_r8); case REG_R9: - return (rp->r_r9); + return (rp->tf_r9); case REG_RAX: - return (rp->r_rax); + return (rp->tf_rax); case REG_RBX: - return (rp->r_rbx); + return (rp->tf_rbx); case REG_RBP: - return (rp->r_rbp); + return (rp->tf_rbp); case REG_R10: - return (rp->r_r10); + return (rp->tf_r10); case REG_R11: - return (rp->r_r11); + return (rp->tf_r11); case REG_R12: - return (rp->r_r12); + return (rp->tf_r12); case REG_R13: - return (rp->r_r13); + return (rp->tf_r13); case REG_R14: - return (rp->r_r14); + return (rp->tf_r14); case REG_R15: - return (rp->r_r15); + return (rp->tf_r15); case REG_DS: - return (rp->r_ds); + return (rp->tf_ds); case REG_ES: - return (rp->r_es); + return (rp->tf_es); case REG_FS: - return (rp->r_fs); + return (rp->tf_fs); case REG_GS: - return (rp->r_gs); + return (rp->tf_gs); case REG_TRAPNO: - return (rp->r_trapno); + return (rp->tf_trapno); case REG_ERR: - return (rp->r_err); + return (rp->tf_err); case REG_RIP: - return (rp->r_rip); + return (rp->tf_rip); case REG_CS: - return (rp->r_cs); + return (rp->tf_cs); case REG_SS: - return (rp->r_ss); + return (rp->tf_ss); case REG_RFL: - return (rp->r_rfl); + return (rp->tf_rflags); case REG_RSP: - return (rp->r_rsp); + return (rp->tf_rsp); default: DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); return (0); } - -#else - if (reg > SS) { - DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); - return (0); - } - - return ((&rp->r_gs)[reg]); -#endif } -#endif static int dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size) Index: src/external/cddl/osnet/dev/dtrace/amd64/dtrace_subr.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/dtrace/amd64/dtrace_subr.c,v retrieving revision 1.8 diff -u -p -r1.8 dtrace_subr.c --- src/external/cddl/osnet/dev/dtrace/amd64/dtrace_subr.c 27 Feb 2017 06:46:59 -0000 1.8 +++ src/external/cddl/osnet/dev/dtrace/amd64/dtrace_subr.c 20 Apr 2017 11:58:41 -0000 @@ -21,7 +21,7 @@ * * CDDL HEADER END * - * $FreeBSD: src/sys/cddl/dev/dtrace/amd64/dtrace_subr.c,v 1.3.2.1 2009/08/03 08:13:06 kensmith Exp $ + * $FreeBSD: head/sys/cddl/dev/dtrace/amd64/dtrace_subr.c 313850 2017-02-17 03:27:20Z markj $ * */ /* @@ -29,6 +29,11 @@ * Use is subject to license terms. */ +/* + * Copyright (c) 2011, Joyent, Inc. All rights reserved. + */ + +#include #include #include #include @@ -38,18 +43,13 @@ #include #include #include -//#include #include #include #include #include -#include -#include -#include +#include extern uintptr_t kernelbase; -extern uintptr_t dtrace_in_probe_addr; -extern int dtrace_in_probe; int dtrace_invop(uintptr_t, struct trapframe *, uintptr_t); @@ -61,6 +61,7 @@ typedef struct dtrace_invop_hdlr { dtrace_invop_hdlr_t *dtrace_invop_hdlr; void dtrace_gethrtime_init(void *); +void dtrace_getnanotime(struct timespec *); int dtrace_invop(uintptr_t addr, struct trapframe *frame, uintptr_t eax) @@ -160,122 +161,6 @@ dtrace_sync(void) } #ifdef notyet -int (*dtrace_fasttrap_probe_ptr)(struct regs *); -int (*dtrace_pid_probe_ptr)(struct regs *); -int (*dtrace_return_probe_ptr)(struct regs *); - -void -dtrace_user_probe(struct regs *rp, caddr_t addr, processorid_t cpuid) -{ - krwlock_t *rwp; - proc_t *p = curproc; - extern void trap(struct regs *, caddr_t, processorid_t); - - if (USERMODE(rp->r_cs) || (rp->r_ps & PS_VM)) { - if (curthread->t_cred != p->p_cred) { - cred_t *oldcred = curthread->t_cred; - /* - * DTrace accesses t_cred in probe context. t_cred - * must always be either NULL, or point to a valid, - * allocated cred structure. - */ - curthread->t_cred = crgetcred(); - crfree(oldcred); - } - } - - if (rp->r_trapno == T_DTRACE_RET) { - uint8_t step = curthread->t_dtrace_step; - uint8_t ret = curthread->t_dtrace_ret; - uintptr_t npc = curthread->t_dtrace_npc; - - if (curthread->t_dtrace_ast) { - aston(curthread); - curthread->t_sig_check = 1; - } - - /* - * Clear all user tracing flags. - */ - curthread->t_dtrace_ft = 0; - - /* - * If we weren't expecting to take a return probe trap, kill - * the process as though it had just executed an unassigned - * trap instruction. - */ - if (step == 0) { - tsignal(curthread, SIGILL); - return; - } - - /* - * If we hit this trap unrelated to a return probe, we're - * just here to reset the AST flag since we deferred a signal - * until after we logically single-stepped the instruction we - * copied out. - */ - if (ret == 0) { - rp->r_pc = npc; - return; - } - - /* - * We need to wait until after we've called the - * dtrace_return_probe_ptr function pointer to set %pc. - */ - rwp = &CPU->cpu_ft_lock; - rw_enter(rwp, RW_READER); - if (dtrace_return_probe_ptr != NULL) - (void) (*dtrace_return_probe_ptr)(rp); - rw_exit(rwp); - rp->r_pc = npc; - - } else if (rp->r_trapno == T_DTRACE_PROBE) { - rwp = &CPU->cpu_ft_lock; - rw_enter(rwp, RW_READER); - if (dtrace_fasttrap_probe_ptr != NULL) - (void) (*dtrace_fasttrap_probe_ptr)(rp); - rw_exit(rwp); - - } else if (rp->r_trapno == T_BPTFLT) { - uint8_t instr; - rwp = &CPU->cpu_ft_lock; - - /* - * The DTrace fasttrap provider uses the breakpoint trap - * (int 3). We let DTrace take the first crack at handling - * this trap; if it's not a probe that DTrace knowns about, - * we call into the trap() routine to handle it like a - * breakpoint placed by a conventional debugger. - */ - rw_enter(rwp, RW_READER); - if (dtrace_pid_probe_ptr != NULL && - (*dtrace_pid_probe_ptr)(rp) == 0) { - rw_exit(rwp); - return; - } - rw_exit(rwp); - - /* - * If the instruction that caused the breakpoint trap doesn't - * look like an int 3 anymore, it may be that this tracepoint - * was removed just after the user thread executed it. In - * that case, return to user land to retry the instuction. - */ - if (fuword8((void *)(rp->r_pc - 1), &instr) == 0 && - instr != FASTTRAP_INSTR) { - rp->r_pc--; - return; - } - - trap(rp, addr, cpuid); - - } else { - trap(rp, addr, cpuid); - } -} - void dtrace_safe_synchronous_signal(void) { @@ -321,14 +206,15 @@ dtrace_safe_defer_signal(void) } /* - * If we've executed the original instruction, but haven't performed - * the jmp back to t->t_dtrace_npc or the clean up of any registers - * used to emulate %rip-relative instructions in 64-bit mode, do that - * here and take the signal right away. We detect this condition by - * seeing if the program counter is the range [scrpc + isz, astpc). + * If we have executed the original instruction, but we have performed + * neither the jmp back to t->t_dtrace_npc nor the clean up of any + * registers used to emulate %rip-relative instructions in 64-bit mode, + * we'll save ourselves some effort by doing that here and taking the + * signal right away. We detect this condition by seeing if the program + * counter is the range [scrpc + isz, astpc). */ - if (t->t_dtrace_astpc - rp->r_pc < - t->t_dtrace_astpc - t->t_dtrace_scrpc - isz) { + if (rp->r_pc >= t->t_dtrace_scrpc + isz && + rp->r_pc < t->t_dtrace_astpc) { #ifdef __amd64 /* * If there is a scratch register and we're on the @@ -451,7 +337,6 @@ dtrace_gethrtime_init(void *arg) * another 32-bit integer without overflowing 64-bit. * Thus minimum supported TSC frequency is 62.5MHz. */ - //KASSERT(tsc_f > (NANOSEC >> (32 - SCALE_SHIFT)), ("TSC frequency is too low")); KASSERT(tsc_f > (NANOSEC >> (32 - SCALE_SHIFT))); /* @@ -472,27 +357,6 @@ dtrace_gethrtime_init(void *arg) /* use skew relative to cpu 0 */ tsc_skew[cpu_index(cinfo)] = cinfo->ci_data.cpu_cc_skew; } - - /* Already handled in x86/tsc.c for ci_data.cpu_cc_skew */ -#if 0 - for (i = 0; i <= mp_maxid; i++) { - if (i == curcpu) - continue; - - if (pcpu_find(i) == NULL) - continue; - - map = 0; - map |= (1 << curcpu); - map |= (1 << i); - - smp_rendezvous_cpus(map, dtrace_gethrtime_init_sync, - dtrace_gethrtime_init_cpu, - smp_no_rendevous_barrier, (void *)(uintptr_t) i); - - tsc_skew[i] = tgt_cpu_tsc - hst_cpu_tsc; - } -#endif } /* @@ -525,14 +389,20 @@ dtrace_gethrtime() uint64_t dtrace_gethrestime(void) { - printf("%s(%d): XXX\n",__func__,__LINE__); - return (0); + struct timespec current_time; + + dtrace_getnanotime(¤t_time); + + return (current_time.tv_sec * 1000000000ULL + current_time.tv_nsec); } /* Function to handle DTrace traps during probes. See amd64/amd64/trap.c */ int dtrace_trap(struct trapframe *frame, u_int type) { + bool nofault; + cpuid_t cpuid = cpu_number(); /* current cpu id */ + /* * A trap can occur while DTrace executes a probe. Before * executing the probe, DTrace blocks re-scheduling and sets @@ -543,19 +413,19 @@ dtrace_trap(struct trapframe *frame, u_i * Check if DTrace has enabled 'no-fault' mode: * */ - if ((cpu_core[cpu_number()].cpuc_dtrace_flags & CPU_DTRACE_NOFAULT) != 0) { + nofault = (cpu_core[cpuid].cpuc_dtrace_flags & CPU_DTRACE_NOFAULT) != 0; + if (nofault) { + KASSERTMSG((read_rflags() & PSL_I) == 0, "interrupts enabled"); + /* * There are only a couple of trap types that are expected. * All the rest will be handled in the usual way. */ switch (type) { - /* Privilieged instruction fault. */ - case T_PRIVINFLT: - break; /* General protection fault. */ case T_PROTFLT: /* Flag an illegal operation. */ - cpu_core[cpu_number()].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; + cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; /* * Offset the instruction pointer to the instruction @@ -566,8 +436,8 @@ dtrace_trap(struct trapframe *frame, u_i /* Page fault. */ case T_PAGEFLT: /* Flag a bad address. */ - cpu_core[cpu_number()].cpuc_dtrace_flags |= CPU_DTRACE_BADADDR; - cpu_core[cpu_number()].cpuc_dtrace_illval = rcr2(); + cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_BADADDR; + cpu_core[cpuid].cpuc_dtrace_illval = rcr2(); /* * Offset the instruction pointer to the instruction Index: src/external/cddl/osnet/dev/dtrace/amd64/instr_size.c =================================================================== RCS file: src/external/cddl/osnet/dev/dtrace/amd64/instr_size.c diff -N src/external/cddl/osnet/dev/dtrace/amd64/instr_size.c --- src/external/cddl/osnet/dev/dtrace/amd64/instr_size.c 21 Feb 2010 01:46:33 -0000 1.2 +++ /dev/null 1 Jan 1970 00:00:00 -0000 @@ -1,134 +0,0 @@ -/* $NetBSD: instr_size.c,v 1.2 2010/02/21 01:46:33 darran Exp $ */ - -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - * - * $FreeBSD: src/sys/cddl/dev/dtrace/amd64/instr_size.c,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $ - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* Copyright (c) 1988 AT&T */ -/* All Rights Reserved */ - - -#if defined(sun) -#pragma ident "@(#)instr_size.c 1.14 05/07/08 SMI" -#endif - -#include -#include -#include -#if defined(sun) -#include -#include -#include -#include -#include -#else -typedef u_int model_t; -#define DATAMODEL_NATIVE 0 -int dtrace_instr_size(uchar_t *); -#endif - -#include - -/* - * This subsystem (with the minor exception of the instr_size() function) is - * is called from DTrace probe context. This imposes several requirements on - * the implementation: - * - * 1. External subsystems and functions may not be referenced. The one current - * exception is for cmn_err, but only to signal the detection of table - * errors. Assuming the tables are correct, no combination of input is to - * trigger a cmn_err call. - * - * 2. These functions can't be allowed to be traced. To prevent this, - * all functions in the probe path (everything except instr_size()) must - * have names that begin with "dtrace_". - */ - -typedef enum dis_isize { - DIS_ISIZE_INSTR, - DIS_ISIZE_OPERAND -} dis_isize_t; - - -/* - * get a byte from instruction stream - */ -static int -dtrace_dis_get_byte(void *p) -{ - int ret; - uchar_t **instr = p; - - ret = **instr; - *instr += 1; - - return (ret); -} - -/* - * Returns either the size of a given instruction, in bytes, or the size of that - * instruction's memory access (if any), depending on the value of `which'. - * If a programming error in the tables is detected, the system will panic to - * ease diagnosis. Invalid instructions will not be flagged. They will appear - * to have an instruction size between 1 and the actual size, and will be - * reported as having no memory impact. - */ -/* ARGSUSED2 */ -static int -dtrace_dis_isize(uchar_t *instr, dis_isize_t which, model_t model, int *rmindex) -{ - int sz; - dis86_t x; - uint_t mode = SIZE64; - -#if defined(sun) - mode = (model == DATAMODEL_LP64) ? SIZE64 : SIZE32; -#endif - - x.d86_data = (void **)&instr; - x.d86_get_byte = dtrace_dis_get_byte; - x.d86_check_func = NULL; - - if (dtrace_disx86(&x, mode) != 0) - return (-1); - - if (which == DIS_ISIZE_INSTR) - sz = x.d86_len; /* length of the instruction */ - else - sz = x.d86_memsize; /* length of memory operand */ - - if (rmindex != NULL) - *rmindex = x.d86_rmindex; - return (sz); -} - -int -dtrace_instr_size(uchar_t *instr) -{ - return (dtrace_dis_isize(instr, DIS_ISIZE_INSTR, DATAMODEL_NATIVE, - NULL)); -} Index: src/external/cddl/osnet/dev/dtrace/arm/dtrace_asm.S =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/dtrace/arm/dtrace_asm.S,v retrieving revision 1.6 diff -u -p -r1.6 dtrace_asm.S --- src/external/cddl/osnet/dev/dtrace/arm/dtrace_asm.S 23 Jun 2016 04:35:35 -0000 1.6 +++ src/external/cddl/osnet/dev/dtrace/arm/dtrace_asm.S 25 Apr 2017 02:37:35 -0000 @@ -19,7 +19,7 @@ * * CDDL HEADER END * - * $FreeBSD$ + * $FreeBSD: head/sys/cddl/dev/dtrace/arm/dtrace_asm.S 308427 2016-11-07 20:02:18Z gonzo $ */ /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. @@ -28,7 +28,6 @@ #define _ASM #define _LOCORE -#define LOCORE #include #include @@ -36,6 +35,16 @@ #include #include +#define PSR_I I32_bit +#define PSR_F F32_bit + +#ifdef __ARM_BIG_ENDIAN +#define __BIG_ENDIAN 1 +#endif + +#define EENTRY(x) ENTRY_NP(x) +#define EEND(x) /* nothing */ + /* void dtrace_membar_producer(void) */ @@ -56,7 +65,7 @@ dtrace_icookie_t dtrace_interrupt_disabl ENTRY(dtrace_interrupt_disable) mrs r0, cpsr mov r1, r0 - orr r1, r1, #(I32_bit|F32_bit) + orr r1, r1, #(PSR_I | PSR_F) msr cpsr_c, r1 RET END(dtrace_interrupt_disable) @@ -65,44 +74,15 @@ END(dtrace_interrupt_disable) void dtrace_interrupt_enable(dtrace_icookie_t cookie) */ ENTRY(dtrace_interrupt_enable) - and r0, r0, #(I32_bit|F32_bit) + and r0, r0, #(PSR_I | PSR_F) mrs r1, cpsr - bic r1, r1, #(I32_bit|F32_bit) + bic r1, r1, #(PSR_I | PSR_F) orr r1, r1, r0 msr cpsr_c, r1 RET END(dtrace_interrupt_enable) /* -uint32_t dtrace_cas32(uint32_t *target, uint32_t cmp, uint32_t new) -XXX: just disable interrupts for now, add proper implementation for -ARMv6/ARMv7 later -*/ -ENTRY_NP(dtrace_casptr) -ENTRY(dtrace_cas32) - stmfd sp!, {r4, r5} - - mrs r3, cpsr - mov r4, r3 - orr r4, r4, #(I32_bit|F32_bit) - msr cpsr_c, r4 - - ldr r5, [r0] - cmp r5, r1 - movne r0, r5 - bne 2f - - str r2, [r0] - mov r0, r5 - -2: - msr cpsr_c, r3 - ldmfd sp!, {r4, r5} - RET -END(dtrace_cas32) -END(dtrace_casptr) - -/* uint8_t dtrace_fuword8_nocheck(void *addr) */ @@ -135,20 +115,21 @@ END(dtrace_fuword32_nocheck) /* uint64_t dtrace_fuword64_nocheck(void *addr) -XXX: add byteorder check */ ENTRY(dtrace_fuword64_nocheck) ldm r0, {r2, r3} mov r0, r2 mov r1, r3 -#if 0 -/* little endian */ - mov r0, r2 - mov r1, r3 +#if defined(__BIG_ENDIAN__) /* big endian */ mov r0, r3 mov r1, r2 +#else +/* little endian */ + mov r0, r2 + mov r1, r3 + #endif RET END(dtrace_fuword64_nocheck) @@ -159,21 +140,20 @@ dtrace_copy(uintptr_t uaddr, uintptr_t k */ ENTRY(dtrace_copy) stmfd sp!, {r4-r5} /* stack is 8 byte aligned */ - teq r2, #0x00000000 - mov r5, #0x00000000 - beq 2f - -1: ldrb r4, [r0], #0x0001 - add r5, r5, #0x00000001 - strb r4, [r1], #0x0001 - teqne r5, r2 - bne 1b + teq r2, #0x00000000 + mov r5, #0x00000000 + beq 2f + +1: ldrb r4, [r0], #0x0001 + add r5, r5, #0x00000001 + strb r4, [r1], #0x0001 + teqne r5, r2 + bne 1b -2: ldmfd sp!, {r4-r5} /* stack is 8 byte aligned */ +2: ldmfd sp!, {r4-r5} /* stack is 8 byte aligned */ RET END(dtrace_copy) - /* void dtrace_copystr(uintptr_t uaddr, uintptr_t kaddr, size_t size, @@ -181,48 +161,22 @@ dtrace_copystr(uintptr_t uaddr, uintptr_ XXX: Check for flags? */ ENTRY(dtrace_copystr) - stmfd sp!, {r4-r5} /* stack is 8 byte aligned */ - teq r2, #0x00000000 - mov r5, #0x00000000 - beq 2f - -1: ldrb r4, [r0], #0x0001 - add r5, r5, #0x00000001 - teq r4, #0x00000000 - strb r4, [r1], #0x0001 - teqne r5, r2 - bne 1b + stmfd sp!, {r4-r5} /* stack is 8 byte aligned */ + teq r2, #0x00000000 + mov r5, #0x00000000 + beq 2f + +1: ldrb r4, [r0], #0x0001 + add r5, r5, #0x00000001 + teq r4, #0x00000000 + strb r4, [r1], #0x0001 + teqne r5, r2 + bne 1b -2: ldmfd sp!, {r4-r5} /* stack is 8 byte aligned */ +2: ldmfd sp!, {r4-r5} /* stack is 8 byte aligned */ RET END(dtrace_copystr) - -/* -void dtrace_invop_init(void) -*/ -ENTRY(dtrace_invop_init) - ldr r1, .Ldtrace_invop - ldr r2, .Ldtrace_invop_jump_addr - str r1, [r2] - RET - .align 0 -.Ldtrace_invop: - .word dtrace_invop -.Ldtrace_invop_jump_addr: - .word dtrace_invop_jump_addr -END(dtrace_invop_init) - -/* -void dtrace_invop_uninit(void) -*/ -ENTRY(dtrace_invop_uninit) - mov r0, #0 - ldr r1, .Ldtrace_invop_jump_addr - str r0, [r1] - RET -END(dtrace_invop_uninit) - /* uintptr_t dtrace_caller(int aframes) @@ -231,3 +185,53 @@ ENTRY(dtrace_caller) mov r0, #-1 RET END(dtrace_caller) + +/* +uint32_t +dtrace_cas32(uint32_t *target, uint32_t cmp, uint32_t new) + +void * +dtrace_casptr(volatile void *target, volatile void *cmp, volatile void *new) +*/ +EENTRY(dtrace_casptr) +ENTRY(dtrace_cas32) +#if __ARM_ARCH >= 6 + +1: ldrex r3, [r0] /* Load target */ + cmp r3, r1 /* Check if *target == cmp */ + bne 2f /* No, return */ + strex ip, r2, [r0] /* Store new to target */ + cmp ip, #0 /* Did the store succeed? */ + bne 1b /* No, try again */ +2: mov r0, r3 /* Return the value loaded from target */ + RET + +#else + + /* + * We don't support MP on CPUs older than v6, so just disable interrupts + * and use non-atomic instructions. + */ + + stmfd sp!, {r4, r5} + + mrs r3, cpsr + mov r4, r3 + orr r4, r4, #(PSR_I | PSR_F) + msr cpsr_c, r4 + + ldr r5, [r0] + cmp r5, r1 + movne r0, r5 + bne 2f + + str r2, [r0] + mov r0, r5 + +2: + msr cpsr_c, r3 + ldmfd sp!, {r4, r5} + RET +#endif +END(dtrace_cas32) +EEND(dtrace_casptr) Index: src/external/cddl/osnet/dev/dtrace/arm/dtrace_isa.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/dtrace/arm/dtrace_isa.c,v retrieving revision 1.5 diff -u -p -r1.5 dtrace_isa.c --- src/external/cddl/osnet/dev/dtrace/arm/dtrace_isa.c 2 Oct 2015 22:15:18 -0000 1.5 +++ src/external/cddl/osnet/dev/dtrace/arm/dtrace_isa.c 5 Jul 2017 20:28:13 -0000 @@ -19,7 +19,7 @@ * * CDDL HEADER END * - * $FreeBSD$ + * $FreeBSD: head/sys/cddl/dev/dtrace/arm/dtrace_isa.c 295882 2016-02-22 09:08:04Z skra $ */ /* * Copyright 2005 Sun Microsystems, Inc. All rights reserved. @@ -100,8 +100,6 @@ dtrace_getpcstack(pc_t *pcstack, int pcs * that generated the stack frame. We hope for the best. */ scp = frame[FR_SCP]; - printf("--> %08x\n", (uint32_t)scp); - if (aframes > 0) { aframes--; if ((aframes == 0) && (caller != 0)) { @@ -109,7 +107,6 @@ dtrace_getpcstack(pc_t *pcstack, int pcs } } else { - printf("++ --> %08x\n", (uint32_t)scp); pcstack[depth++] = scp; } @@ -154,13 +151,13 @@ dtrace_getpcstack(pc_t *pcstack, int pcs void dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit) { - printf("unimplemented\n"); + printf("IMPLEMENT ME: %s\n", __func__); } int dtrace_getustackdepth(void) { - printf("unimplemented\n"); + printf("IMPLEMENT ME: %s\n", __func__); return (0); } @@ -174,8 +171,9 @@ dtrace_getufpstack(uint64_t *pcstack, ui uint64_t dtrace_getarg(int arg, int aframes) { - printf("unimplemented\n"); +/* struct arm_frame *fp = (struct arm_frame *)dtrace_getfp();*/ + printf("IMPLEMENT ME: %s\n", __func__); return (0); } @@ -227,6 +225,7 @@ dtrace_getstackdepth(int aframes) ulong_t dtrace_getreg(struct trapframe *rp, uint_t reg) { + printf("IMPLEMENT ME: %s\n", __func__); return (0); } Index: src/external/cddl/osnet/dev/dtrace/arm/dtrace_subr.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/dtrace/arm/dtrace_subr.c,v retrieving revision 1.3 diff -u -p -r1.3 dtrace_subr.c --- src/external/cddl/osnet/dev/dtrace/arm/dtrace_subr.c 27 Feb 2017 06:47:00 -0000 1.3 +++ src/external/cddl/osnet/dev/dtrace/arm/dtrace_subr.c 10 May 2017 10:10:04 -0000 @@ -21,7 +21,7 @@ * * CDDL HEADER END * - * $FreeBSD$ + * $FreeBSD: head/sys/cddl/dev/dtrace/arm/dtrace_subr.c 308457 2016-11-08 23:59:41Z bdrewery $ * */ /* @@ -47,12 +47,26 @@ #include #include +#define FAULT_ALIGN FAULT_ALIGN_0 extern uintptr_t kernelbase; extern uintptr_t dtrace_in_probe_addr; extern int dtrace_in_probe; + +void dtrace_gethrtime_init(void *arg); + +#define DELAYBRANCH(x) ((int)(x) < 0) + +#define BIT_PC 15 +#define BIT_LR 14 +#define BIT_SP 13 + extern dtrace_id_t dtrace_probeid_error; +extern int (*dtrace_invop_jump_addr)(struct trapframe *); +extern void dtrace_getnanotime(struct timespec *tsp); int dtrace_invop(uintptr_t, struct trapframe *, uintptr_t); +void dtrace_invop_init(void); +void dtrace_invop_uninit(void); typedef struct dtrace_invop_hdlr { int (*dtih_func)(uintptr_t, struct trapframe *, uintptr_t); @@ -61,8 +75,6 @@ typedef struct dtrace_invop_hdlr { dtrace_invop_hdlr_t *dtrace_invop_hdlr; -void dtrace_gethrtime_init(void *arg); - int dtrace_invop(uintptr_t addr, struct trapframe *frame, uintptr_t eax) { @@ -76,6 +88,7 @@ dtrace_invop(uintptr_t addr, struct trap return (0); } + void dtrace_invop_add(int (*func)(uintptr_t, struct trapframe *, uintptr_t)) { @@ -114,6 +127,7 @@ dtrace_invop_remove(int (*func)(uintptr_ kmem_free(hdlr, sizeof (dtrace_invop_hdlr_t)); } +/*ARGSUSED*/ void dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit)) { @@ -156,7 +170,7 @@ dtrace_sync_func(void) void dtrace_sync(void) { - dtrace_xcall(DTRACE_CPUALL, (dtrace_xcall_t)dtrace_sync_func, NULL); + dtrace_xcall(DTRACE_CPUALL, (dtrace_xcall_t)dtrace_sync_func, NULL); } /* @@ -167,36 +181,35 @@ dtrace_sync(void) * Returns nanoseconds since boot. */ uint64_t -dtrace_gethrtime() +dtrace_gethrtime(void) { - struct timespec curtime; + struct timespec curtime; nanouptime(&curtime); return (curtime.tv_sec * 1000000000UL + curtime.tv_nsec); - } uint64_t dtrace_gethrestime(void) { - struct timespec curtime; + struct timespec current_time; - getnanotime(&curtime); + dtrace_getnanotime(¤t_time); - return (curtime.tv_sec * 1000000000UL + curtime.tv_nsec); + return (current_time.tv_sec * 1000000000UL + current_time.tv_nsec); } /* Function to handle DTrace traps during probes. Not used on ARM yet */ int dtrace_trap(struct trapframe *frame, u_int type) { - cpuid_t cpuid = cpu_number(); /* current cpu id */ + cpuid_t curcpu_id = cpu_number(); /* current cpu id */ /* * A trap can occur while DTrace executes a probe. Before * executing the probe, DTrace blocks re-scheduling and sets - * a flag in it's per-cpu flags to indicate that it doesn't + * a flag in its per-cpu flags to indicate that it doesn't * want to fault. On returning from the probe, the no-fault * flag is cleared and finally re-scheduling is enabled. * @@ -204,24 +217,23 @@ dtrace_trap(struct trapframe *frame, u_i * */ - if ((cpu_core[cpuid].cpuc_dtrace_flags & CPU_DTRACE_NOFAULT) != 0) { + if ((cpu_core[curcpu_id].cpuc_dtrace_flags & CPU_DTRACE_NOFAULT) != 0) { /* * There are only a couple of trap types that are expected. * All the rest will be handled in the usual way. */ switch (type) { /* Page fault. */ - case 0: + case FAULT_ALIGN: /* Flag a bad address. */ - cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_BADADDR; - cpu_core[cpuid].cpuc_dtrace_illval = 0; + cpu_core[curcpu_id].cpuc_dtrace_flags |= CPU_DTRACE_BADADDR; + cpu_core[curcpu_id].cpuc_dtrace_illval = 0; /* * Offset the instruction pointer to the instruction * following the one causing the fault. */ - panic("%s", __func__); - // frame->pc += sizeof(int); + frame->tf_pc += sizeof(int); return (1); default: /* Handle all other traps in the usual way. */ @@ -248,3 +260,349 @@ dtrace_gethrtime_init(void *arg) { /* FIXME */ } + +static uint32_t +dtrace_expand_imm(uint32_t imm12) +{ + uint32_t unrot = imm12 & 0xff; + int amount = 2 * (imm12 >> 8); + + if (amount) + return (unrot >> amount) | (unrot << (32 - amount)); + else + return unrot; +} + +static uint32_t +dtrace_add_with_carry(uint32_t x, uint32_t y, int carry_in, + int *carry_out, int *overflow) +{ + uint32_t result; + uint64_t unsigned_sum = x + y + (uint32_t)carry_in; + int64_t signed_sum = (int32_t)x + (int32_t)y + (int32_t)carry_in; + KASSERT(carry_in == 1); + + result = (uint32_t)(unsigned_sum & 0xffffffff); + *carry_out = ((uint64_t)result == unsigned_sum) ? 1 : 0; + *overflow = ((int64_t)result == signed_sum) ? 0 : 1; + + return result; +} + +static void +dtrace_invop_emulate(int invop, struct trapframe *frame) +{ + uint32_t op = invop; +#if 1 + /* nbsd encoding */ + uint32_t code = op >> 28; + uint32_t data = op; +#else + /* fbsd encoding */ + uint32_t code = op & DTRACE_INVOP_MASK; + uint32_t data = DTRACE_INVOP_DATA(invop); +#endif + + switch (code) { + case DTRACE_INVOP_MOV_IP_SP: + /* mov ip, sp */ + frame->tf_ip = frame->tf_svc_sp; + frame->tf_pc += 4; + break; + case DTRACE_INVOP_BX_LR: + /* bx lr */ + frame->tf_pc = frame->tf_svc_lr; + break; + case DTRACE_INVOP_MOV_PC_LR: + /* mov pc, lr */ + frame->tf_pc = frame->tf_svc_lr; + break; + case DTRACE_INVOP_LDM: + /* ldm sp, {..., pc} */ + /* FALLTHRU */ + case DTRACE_INVOP_POPM: { + /* ldmib sp, {..., pc} */ + uint32_t register_list = (op & 0xffff); + uint32_t *sp = (uint32_t *)(intptr_t)frame->tf_svc_sp; + uint32_t *regs = &frame->tf_r0; + int i; + + /* POPM */ + if (code == DTRACE_INVOP_POPM) + sp++; + + for (i = 0; i <= 12; i++) { + if (register_list & (1 << i)) + regs[i] = *sp++; + } + if (register_list & (1 << 13)) + frame->tf_svc_sp = *sp++; + if (register_list & (1 << 14)) + frame->tf_svc_lr = *sp++; + frame->tf_pc = *sp; + break; + } + case DTRACE_INVOP_LDR_IMM: { + /* ldr r?, [{pc,r?}, #?] */ + uint32_t rt = (op >> 12) & 0xf; + uint32_t rn = (op >> 16) & 0xf; + uint32_t imm = op & 0xfff; + uint32_t *regs = &frame->tf_r0; + KDASSERT(rt <= 12); + KDASSERT(rn == 15 || rn <= 12); + if (rn == 15) + regs[rt] = *((uint32_t *)(intptr_t)(frame->tf_pc + 8 + imm)); + else + regs[rt] = *((uint32_t *)(intptr_t)(regs[rn] + imm)); + frame->tf_pc += 4; + break; + } + case DTRACE_INVOP_MOVW: { + /* movw r?, #? */ + uint32_t rd = (op >> 12) & 0xf; + uint32_t imm = (op & 0xfff) | ((op & 0xf0000) >> 4); + uint32_t *regs = &frame->tf_r0; + KDASSERT(rd <= 12); + regs[rd] = imm; + frame->tf_pc += 4; + break; + } + case DTRACE_INVOP_MOV_IMM: { + /* mov r?, #? */ + uint32_t rd = (op >> 12) & 0xf; + uint32_t imm = dtrace_expand_imm(op & 0xfff); + uint32_t *regs = &frame->tf_r0; + KDASSERT(rd <= 12); + regs[rd] = imm; + frame->tf_pc += 4; + break; + } + case DTRACE_INVOP_CMP_IMM: { + /* cmp r?, #? */ + uint32_t rn = (op >> 16) & 0xf; + uint32_t *regs = &frame->tf_r0; + uint32_t imm = dtrace_expand_imm(op & 0xfff); + uint32_t spsr = frame->tf_spsr; + uint32_t result; + int carry; + int overflow; + /* + * (result, carry, overflow) = AddWithCarry(R[n], NOT(imm32), ’1’); + * APSR.N = result<31>; + * APSR.Z = IsZeroBit(result); + * APSR.C = carry; + * APSR.V = overflow; + */ + KDASSERT(rn <= 12); + result = dtrace_add_with_carry(regs[rn], ~imm, 1, &carry, &overflow); + if (result & 0x80000000) + spsr |= PSR_N_bit; + else + spsr &= ~PSR_N_bit; + if (result == 0) + spsr |= PSR_Z_bit; + else + spsr &= ~PSR_Z_bit; + if (carry) + spsr |= PSR_C_bit; + else + spsr &= ~PSR_C_bit; + if (overflow) + spsr |= PSR_V_bit; + else + spsr &= ~PSR_V_bit; + +#if 0 + aprint_normal("pc=%x Rn=%x imm=%x %c%c%c%c\n", frame->tf_pc, regs[rn], imm, + (spsr & PSR_N_bit) ? 'N' : 'n', + (spsr & PSR_Z_bit) ? 'Z' : 'z', + (spsr & PSR_C_bit) ? 'C' : 'c', + (spsr & PSR_V_bit) ? 'V' : 'v'); +#endif + frame->tf_spsr = spsr; + frame->tf_pc += 4; + break; + } + case DTRACE_INVOP_B: { + /* b ??? */ + uint32_t imm = (op & 0x00ffffff) << 2; + int32_t diff; + /* SignExtend(imm26, 32) */ + if (imm & 0x02000000) + imm |= 0xfc000000; + diff = (int32_t)imm; + frame->tf_pc += 8 + diff; + break; + } + case DTRACE_INVOP_PUSHM: { + /* push {...} */ + uint32_t register_list = (op & 0xffff); + uint32_t *sp = (uint32_t *)(intptr_t)frame->tf_svc_sp; + uint32_t *regs = &frame->tf_r0; + int i; + int count = 0; + +#if 0 + if ((op & 0x0fff0fff) == 0x052d0004) { + /* A2: str r4, [sp, #-4]! */ + *(sp - 1) = regs[4]; + frame->tf_pc += 4; + break; + } +#endif + + for (i = 0; i < 16; i++) { + if (register_list & (1 << i)) + count++; + } + sp -= count; + + for (i = 0; i <= 12; i++) { + if (register_list & (1 << i)) + *sp++ = regs[i]; + } + if (register_list & (1 << 13)) + *sp++ = frame->tf_svc_sp; + if (register_list & (1 << 14)) + *sp++ = frame->tf_svc_lr; + if (register_list & (1 << 15)) + *sp = frame->tf_pc + 8; + + /* make sure the caches and memory are in sync */ + cpu_dcache_wbinv_range(frame->tf_svc_sp, count * 4); + + /* In case the current page tables have been modified ... */ + cpu_tlb_flushID(); + cpu_cpwait(); + + frame->tf_svc_sp -= count * 4; + frame->tf_pc += 4; + + break; + } + default: + KDASSERTMSG(0, "invop 0x%08x code %u tf %p", invop, code, frame); + } +} + +static int +dtrace_invop_start(struct trapframe *frame) +{ +#if 0 + register_t *r0, *sp; + int data, invop, reg, update_sp; +#endif + int invop; + + invop = dtrace_invop(frame->tf_pc, frame, frame->tf_r0); + + dtrace_invop_emulate(invop, frame); + +#if 0 + switch (invop & DTRACE_INVOP_MASK) { + case DTRACE_INVOP_PUSHM: + sp = (register_t *)frame->tf_svc_sp; + r0 = &frame->tf_r0; + data = DTRACE_INVOP_DATA(invop); + + /* + * Store the pc, lr, and sp. These have their own + * entries in the struct. + */ + if (data & (1 << BIT_PC)) { + sp--; + *sp = frame->tf_pc; + } + if (data & (1 << BIT_LR)) { + sp--; + *sp = frame->tf_svc_lr; + } + if (data & (1 << BIT_SP)) { + sp--; + *sp = frame->tf_svc_sp; + } + + /* Store the general registers */ + for (reg = 12; reg >= 0; reg--) { + if (data & (1 << reg)) { + sp--; + *sp = r0[reg]; + } + } + + /* Update the stack pointer and program counter to continue */ + frame->tf_svc_sp = (register_t)sp; + frame->tf_pc += 4; + break; + case DTRACE_INVOP_POPM: + sp = (register_t *)frame->tf_svc_sp; + r0 = &frame->tf_r0; + data = DTRACE_INVOP_DATA(invop); + + /* Read the general registers */ + for (reg = 0; reg <= 12; reg++) { + if (data & (1 << reg)) { + r0[reg] = *sp; + sp++; + } + } + + /* + * Set the stack pointer. If we don't update it here we will + * need to update it at the end as the instruction would do + */ + update_sp = 1; + if (data & (1 << BIT_SP)) { + frame->tf_svc_sp = *sp; + *sp++; + update_sp = 0; + } + + /* Update the link register, we need to use the correct copy */ + if (data & (1 << BIT_LR)) { + frame->tf_svc_lr = *sp; + *sp++; + } + /* + * And the program counter. If it's not in the list skip over + * it when we return so to not hit this again. + */ + if (data & (1 << BIT_PC)) { + frame->tf_pc = *sp; + *sp++; + } else + frame->tf_pc += 4; + + /* Update the stack pointer if we haven't already done so */ + if (update_sp) + frame->tf_svc_sp = (register_t)sp; + break; + case DTRACE_INVOP_B: + data = DTRACE_INVOP_DATA(invop) & 0x00ffffff; + /* Sign extend the data */ + if ((data & (1 << 23)) != 0) + data |= 0xff000000; + /* The data is the number of 4-byte words to change the pc */ + data *= 4; + data += 8; + frame->tf_pc += data; + break; + + default: + return (-1); + break; + } +#endif + + return (0); +} + +void dtrace_invop_init(void) +{ + dtrace_invop_jump_addr = dtrace_invop_start; +} + +void dtrace_invop_uninit(void) +{ + dtrace_invop_jump_addr = 0; +} Index: src/external/cddl/osnet/dev/dtrace/arm/regset.h =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/dtrace/arm/regset.h,v retrieving revision 1.1 diff -u -p -r1.1 regset.h --- src/external/cddl/osnet/dev/dtrace/arm/regset.h 21 Jun 2013 19:16:00 -0000 1.1 +++ src/external/cddl/osnet/dev/dtrace/arm/regset.h 12 Apr 2017 09:46:41 -0000 @@ -19,7 +19,7 @@ * * CDDL HEADER END * - * $FreeBSD$ + * $FreeBSD: head/sys/cddl/dev/dtrace/arm/regset.h 278529 2015-02-10 19:41:30Z gnn $ */ /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. @@ -42,18 +42,13 @@ extern "C" { #endif -/* - * XXX: define registers properly - */ - #if 0 -#define REG_PC PC -#define REG_FP EBP -#define REG_SP SP -#define REG_PS EFL -#define REG_R0 EAX -#define REG_R1 EDX -#endif +#define REG_LINK R14 +#define REG_SP R12 +#define REG_PS R0 +#define REG_R0 R0 +#define REG_R1 R1 +#endif #ifdef __cplusplus } Index: src/external/cddl/osnet/dev/dtrace/i386/dis_tables.c =================================================================== RCS file: src/external/cddl/osnet/dev/dtrace/i386/dis_tables.c diff -N src/external/cddl/osnet/dev/dtrace/i386/dis_tables.c --- src/external/cddl/osnet/dev/dtrace/i386/dis_tables.c 20 Jul 2011 19:51:57 -0000 1.3 +++ /dev/null 1 Jan 1970 00:00:00 -0000 @@ -1,3195 +0,0 @@ -/* $NetBSD: dis_tables.c,v 1.3 2011/07/20 19:51:57 tron Exp $ */ - -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - * - * $FreeBSD: src/sys/cddl/dev/dtrace/i386/dis_tables.c,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $ - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* Copyright (c) 1988 AT&T */ -/* All Rights Reserved */ - - -#if defined(sun) -#pragma ident "@(#)dis_tables.c 1.11 06/03/02 SMI" -#endif - -#include "dis_tables.h" - -/* BEGIN CSTYLED */ - -/* - * Disassembly begins in dis_distable, which is equivalent to the One-byte - * Opcode Map in the Intel IA32 ISA Reference (page A-6 in my copy). The - * decoding loops then traverse out through the other tables as necessary to - * decode a given instruction. - * - * The behavior of this file can be controlled by one of the following flags: - * - * DIS_TEXT Include text for disassembly - * DIS_MEM Include memory-size calculations - * - * Either or both of these can be defined. - * - * This file is not, and will never be, cstyled. If anything, the tables should - * be taken out another tab stop or two so nothing overlaps. - */ - -/* - * These functions must be provided for the consumer to do disassembly. - */ -#ifdef DIS_TEXT -extern char *strncpy(char *, const char *, size_t); -extern size_t strlen(const char *); -extern int strcmp(const char *, const char *); -extern int strncmp(const char *, const char *, size_t); -extern size_t strlcat(char *, const char *, size_t); -#endif - - -#define TERM NULL /* used to indicate that the 'indirect' */ - /* field terminates - no pointer. */ - -/* Used to decode instructions. */ -typedef struct instable { - const struct instable *it_indirect; /* for decode op codes */ - uchar_t it_adrmode; -#ifdef DIS_TEXT - char it_name[NCPS]; - uint_t it_suffix:1; /* mneu + "w", "l", or "d" */ -#endif -#ifdef DIS_MEM - uint_t it_size:16; -#endif - uint_t it_invalid64:1; /* opcode invalid in amd64 */ - uint_t it_always64:1; /* 64 bit when in 64 bit mode */ - uint_t it_invalid32:1; /* invalid in IA32 */ - uint_t it_stackop:1; /* push/pop stack operation */ -} instable_t; - -/* - * Instruction formats. - */ -enum { - UNKNOWN, - MRw, - IMlw, - IMw, - IR, - OA, - AO, - MS, - SM, - Mv, - Mw, - M, /* register or memory */ - Mb, /* register or memory, always byte sized */ - MO, /* memory only (no registers) */ - PREF, - SWAPGS, - R, - RA, - SEG, - MR, - RM, - IA, - MA, - SD, - AD, - SA, - D, - INM, - SO, - BD, - I, - P, - V, - DSHIFT, /* for double shift that has an 8-bit immediate */ - U, - OVERRIDE, - NORM, /* instructions w/o ModR/M byte, no memory access */ - IMPLMEM, /* instructions w/o ModR/M byte, implicit mem access */ - O, /* for call */ - JTAB, /* jump table */ - IMUL, /* for 186 iimul instr */ - CBW, /* so data16 can be evaluated for cbw and variants */ - MvI, /* for 186 logicals */ - ENTER, /* for 186 enter instr */ - RMw, /* for 286 arpl instr */ - Ib, /* for push immediate byte */ - F, /* for 287 instructions */ - FF, /* for 287 instructions */ - FFC, /* for 287 instructions */ - DM, /* 16-bit data */ - AM, /* 16-bit addr */ - LSEG, /* for 3-bit seg reg encoding */ - MIb, /* for 386 logicals */ - SREG, /* for 386 special registers */ - PREFIX, /* a REP instruction prefix */ - LOCK, /* a LOCK instruction prefix */ - INT3, /* The int 3 instruction, which has a fake operand */ - INTx, /* The normal int instruction, with explicit int num */ - DSHIFTcl, /* for double shift that implicitly uses %cl */ - CWD, /* so data16 can be evaluated for cwd and variants */ - RET, /* single immediate 16-bit operand */ - MOVZ, /* for movs and movz, with different size operands */ - XADDB, /* for xaddb */ - MOVSXZ, /* AMD64 mov sign extend 32 to 64 bit instruction */ - -/* - * MMX/SIMD addressing modes. - */ - - MMO, /* Prefixable MMX/SIMD-Int mm/mem -> mm */ - MMOIMPL, /* Prefixable MMX/SIMD-Int mm -> mm (mem) */ - MMO3P, /* Prefixable MMX/SIMD-Int mm -> r32,imm8 */ - MMOM3, /* Prefixable MMX/SIMD-Int mm -> r32 */ - MMOS, /* Prefixable MMX/SIMD-Int mm -> mm/mem */ - MMOMS, /* Prefixable MMX/SIMD-Int mm -> mem */ - MMOPM, /* MMX/SIMD-Int mm/mem -> mm,imm8 */ - MMOPRM, /* Prefixable MMX/SIMD-Int r32/mem -> mm,imm8 */ - MMOSH, /* Prefixable MMX mm,imm8 */ - MM, /* MMX/SIMD-Int mm/mem -> mm */ - MMS, /* MMX/SIMD-Int mm -> mm/mem */ - MMSH, /* MMX mm,imm8 */ - XMMO, /* Prefixable SIMD xmm/mem -> xmm */ - XMMOS, /* Prefixable SIMD xmm -> xmm/mem */ - XMMOPM, /* Prefixable SIMD xmm/mem w/to xmm,imm8 */ - XMMOMX, /* Prefixable SIMD mm/mem -> xmm */ - XMMOX3, /* Prefixable SIMD xmm -> r32 */ - XMMOXMM, /* Prefixable SIMD xmm/mem -> mm */ - XMMOM, /* Prefixable SIMD xmm -> mem */ - XMMOMS, /* Prefixable SIMD mem -> xmm */ - XMM, /* SIMD xmm/mem -> xmm */ - XMMXIMPL, /* SIMD xmm -> xmm (mem) */ - XMM3P, /* SIMD xmm -> r32,imm8 */ - XMMP, /* SIMD xmm/mem w/to xmm,imm8 */ - XMMPRM, /* SIMD r32/mem -> xmm,imm8 */ - XMMS, /* SIMD xmm -> xmm/mem */ - XMMM, /* SIMD mem -> xmm */ - XMMMS, /* SIMD xmm -> mem */ - XMM3MX, /* SIMD r32/mem -> xmm */ - XMM3MXS, /* SIMD xmm -> r32/mem */ - XMMSH, /* SIMD xmm,imm8 */ - XMMXM3, /* SIMD xmm/mem -> r32 */ - XMMX3, /* SIMD xmm -> r32 */ - XMMXMM, /* SIMD xmm/mem -> mm */ - XMMMX, /* SIMD mm -> xmm */ - XMMXM, /* SIMD xmm -> mm */ - XMMFENCE, /* SIMD lfence or mfence */ - XMMSFNC /* SIMD sfence (none or mem) */ -}; - -#define FILL 0x90 /* Fill byte used for alignment (nop) */ - -/* -** Register numbers for the i386 -*/ -#define EAX_REGNO 0 -#define ECX_REGNO 1 -#define EDX_REGNO 2 -#define EBX_REGNO 3 -#define ESP_REGNO 4 -#define EBP_REGNO 5 -#define ESI_REGNO 6 -#define EDI_REGNO 7 - -/* - * modes for immediate values - */ -#define MODE_NONE 0 -#define MODE_IPREL 1 /* signed IP relative value */ -#define MODE_SIGNED 2 /* sign extended immediate */ -#define MODE_IMPLIED 3 /* constant value implied from opcode */ -#define MODE_OFFSET 4 /* offset part of an address */ - -/* - * The letters used in these macros are: - * IND - indirect to another to another table - * "T" - means to Terminate indirections (this is the final opcode) - * "S" - means "operand length suffix required" - * "NS" - means "no suffix" which is the operand length suffix of the opcode - * "Z" - means instruction size arg required - * "u" - means the opcode is invalid in IA32 but valid in amd64 - * "x" - means the opcode is invalid in amd64, but not IA32 - * "y" - means the operand size is always 64 bits in 64 bit mode - * "p" - means push/pop stack operation - */ - -#if defined(DIS_TEXT) && defined(DIS_MEM) -#define IND(table) {table, 0, "", 0, 0, 0, 0, 0, 0} -#define INDx(table) {table, 0, "", 0, 0, 1, 0, 0, 0} -#define TNS(name, amode) {TERM, amode, name, 0, 0, 0, 0, 0, 0} -#define TNSu(name, amode) {TERM, amode, name, 0, 0, 0, 0, 1, 0} -#define TNSx(name, amode) {TERM, amode, name, 0, 0, 1, 0, 0, 0} -#define TNSy(name, amode) {TERM, amode, name, 0, 0, 0, 1, 0, 0} -#define TNSyp(name, amode) {TERM, amode, name, 0, 0, 0, 1, 0, 1} -#define TNSZ(name, amode, sz) {TERM, amode, name, 0, sz, 0, 0, 0, 0} -#define TNSZy(name, amode, sz) {TERM, amode, name, 0, sz, 0, 1, 0, 0} -#define TS(name, amode) {TERM, amode, name, 1, 0, 0, 0, 0, 0} -#define TSx(name, amode) {TERM, amode, name, 1, 0, 1, 0, 0, 0} -#define TSy(name, amode) {TERM, amode, name, 1, 0, 0, 1, 0, 0} -#define TSp(name, amode) {TERM, amode, name, 1, 0, 0, 0, 0, 1} -#define TSZ(name, amode, sz) {TERM, amode, name, 1, sz, 0, 0, 0, 0} -#define TSZx(name, amode, sz) {TERM, amode, name, 1, sz, 1, 0, 0, 0} -#define TSZy(name, amode, sz) {TERM, amode, name, 1, sz, 0, 1, 0, 0} -#define INVALID {TERM, UNKNOWN, "", 0, 0, 0, 0, 0} -#elif defined(DIS_TEXT) -#define IND(table) {table, 0, "", 0, 0, 0, 0, 0} -#define INDx(table) {table, 0, "", 0, 1, 0, 0, 0} -#define TNS(name, amode) {TERM, amode, name, 0, 0, 0, 0, 0} -#define TNSu(name, amode) {TERM, amode, name, 0, 0, 0, 1, 0} -#define TNSx(name, amode) {TERM, amode, name, 0, 1, 0, 0, 0} -#define TNSy(name, amode) {TERM, amode, name, 0, 0, 1, 0, 0} -#define TNSyp(name, amode) {TERM, amode, name, 0, 0, 1, 0, 1} -#define TNSZ(name, amode, sz) {TERM, amode, name, 0, 0, 0, 0, 0} -#define TNSZy(name, amode, sz) {TERM, amode, name, 0, 0, 1, 0, 0} -#define TS(name, amode) {TERM, amode, name, 1, 0, 0, 0, 0} -#define TSx(name, amode) {TERM, amode, name, 1, 1, 0, 0, 0} -#define TSy(name, amode) {TERM, amode, name, 1, 0, 1, 0, 0} -#define TSp(name, amode) {TERM, amode, name, 1, 0, 0, 0, 1} -#define TSZ(name, amode, sz) {TERM, amode, name, 1, 0, 0, 0, 0} -#define TSZx(name, amode, sz) {TERM, amode, name, 1, 1, 0, 0, 0} -#define TSZy(name, amode, sz) {TERM, amode, name, 1, 0, 1, 0, 0} -#define INVALID {TERM, UNKNOWN, "", 0, 0, 0, 0, 0} -#elif defined(DIS_MEM) -#define IND(table) {table, 0, 0, 0, 0, 0, 0} -#define INDx(table) {table, 0, 0, 1, 0, 0, 0} -#define TNS(name, amode) {TERM, amode, 0, 0, 0, 0, 0} -#define TNSu(name, amode) {TERM, amode, 0, 0, 0, 1, 0} -#define TNSy(name, amode) {TERM, amode, 0, 0, 1, 0, 0} -#define TNSyp(name, amode) {TERM, amode, 0, 0, 1, 0, 1} -#define TNSx(name, amode) {TERM, amode, 0, 1, 0, 0, 0} -#define TNSZ(name, amode, sz) {TERM, amode, sz, 0, 0, 0, 0} -#define TNSZy(name, amode, sz) {TERM, amode, sz, 0, 1, 0, 0} -#define TS(name, amode) {TERM, amode, 0, 0, 0, 0, 0} -#define TSx(name, amode) {TERM, amode, 0, 1, 0, 0, 0} -#define TSy(name, amode) {TERM, amode, 0, 0, 1, 0, 0} -#define TSp(name, amode) {TERM, amode, 0, 0, 0, 0, 1} -#define TSZ(name, amode, sz) {TERM, amode, sz, 0, 0, 0, 0} -#define TSZx(name, amode, sz) {TERM, amode, sz, 1, 0, 0, 0} -#define TSZy(name, amode, sz) {TERM, amode, sz, 0, 1, 0, 0} -#define INVALID {TERM, UNKNOWN, 0, 0, 0, 0, 0} -#else -#define IND(table) {table[0], 0, 0, 0, 0, 0} -#define INDx(table) {table[0], 0, 1, 0, 0, 0} -#define TNS(name, amode) {TERM, amode, 0, 0, 0, 0} -#define TNSu(name, amode) {TERM, amode, 0, 0, 1, 0} -#define TNSy(name, amode) {TERM, amode, 0, 1, 0, 0} -#define TNSyp(name, amode) {TERM, amode, 0, 1, 0, 1} -#define TNSx(name, amode) {TERM, amode, 1, 0, 0, 0} -#define TNSZ(name, amode, sz) {TERM, amode, 0, 0, 0, 0} -#define TNSZy(name, amode, sz) {TERM, amode, 0, 1, 0, 0} -#define TS(name, amode) {TERM, amode, 0, 0, 0, 0} -#define TSx(name, amode) {TERM, amode, 1, 0, 0, 0} -#define TSy(name, amode) {TERM, amode, 0, 1, 0, 0} -#define TSp(name, amode) {TERM, amode, 0, 0, 0, 1} -#define TSZ(name, amode, sz) {TERM, amode, 0, 0, 0, 0} -#define TSZx(name, amode, sz) {TERM, amode, 1, 0, 0, 0} -#define TSZy(name, amode, sz) {TERM, amode, 0, 1, 0, 0} -#define INVALID {TERM, UNKNOWN, 0, 0, 0, 0} -#endif - -#ifdef DIS_TEXT -/* - * this decodes the r_m field for mode's 0, 1, 2 in 16 bit mode - */ -const char *const dis_addr16[3][8] = { -"(%bx,%si)", "(%bx,%di)", "(%bp,%si)", "(%bp,%di)", "(%si)", "(%di)", "", - "(%bx)", -"(%bx,%si)", "(%bx,%di)", "(%bp,%si)", "(%bp,%di)", "(%si)", "(%di", "(%bp)", - "(%bx)", -"(%bx,%si)", "(%bx,%di)", "(%bp,%si)", "(%bp,%di)", "(%si)", "(%di)", "(%bp)", - "(%bx)", -}; - - -/* - * This decodes 32 bit addressing mode r_m field for modes 0, 1, 2 - */ -const char *const dis_addr32_mode0[16] = { - "(%eax)", "(%ecx)", "(%edx)", "(%ebx)", "", "", "(%esi)", "(%edi)", - "(%r8d)", "(%r9d)", "(%r10d)", "(%r11d)", "", "", "(%r14d)", "(%r15d)" -}; - -const char *const dis_addr32_mode12[16] = { - "(%eax)", "(%ecx)", "(%edx)", "(%ebx)", "", "(%ebp)", "(%esi)", "(%edi)", - "(%r8d)", "(%r9d)", "(%r10d)", "(%r11d)", "", "(%r13d)", "(%r14d)", "(%r15d)" -}; - -/* - * This decodes 64 bit addressing mode r_m field for modes 0, 1, 2 - */ -const char *const dis_addr64_mode0[16] = { - "(%rax)", "(%rcx)", "(%rdx)", "(%rbx)", "", "(%rip)", "(%rsi)", "(%rdi)", - "(%r8)", "(%r9)", "(%r10)", "(%r11)", "(%r12)", "(%rip)", "(%r14)", "(%r15)" -}; -const char *const dis_addr64_mode12[16] = { - "(%rax)", "(%rcx)", "(%rdx)", "(%rbx)", "", "(%rbp)", "(%rsi)", "(%rdi)", - "(%r8)", "(%r9)", "(%r10)", "(%r11)", "(%r12)", "(%r13)", "(%r14)", "(%r15)" -}; - -/* - * decode for scale from SIB byte - */ -const char *const dis_scale_factor[4] = { ")", ",2)", ",4)", ",8)" }; - -/* - * register decoding for normal references to registers (ie. not addressing) - */ -const char *const dis_REG8[16] = { - "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh", - "%r8b", "%r9b", "%r10b", "%r11b", "%r12b", "%r13b", "%r14b", "%r15b" -}; - -const char *const dis_REG8_REX[16] = { - "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil", - "%r8b", "%r9b", "%r10b", "%r11b", "%r12b", "%r13b", "%r14b", "%r15b" -}; - -const char *const dis_REG16[16] = { - "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di", - "%r8w", "%r9w", "%r10w", "%r11w", "%r12w", "%r13w", "%r14w", "%r15w" -}; - -const char *const dis_REG32[16] = { - "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", - "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" -}; - -const char *const dis_REG64[16] = { - "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", - "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" -}; - -const char *const dis_DEBUGREG[16] = { - "%db0", "%db1", "%db2", "%db3", "%db4", "%db5", "%db6", "%db7", - "%db8", "%db9", "%db10", "%db11", "%db12", "%db13", "%db14", "%db15" -}; - -const char *const dis_CONTROLREG[16] = { - "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5?", "%cr6?", "%cr7?", - "%cr8", "%cr9?", "%cr10?", "%cr11?", "%cr12?", "%cr13?", "%cr14?", "%cr15?" -}; - -const char *const dis_TESTREG[16] = { - "%tr0?", "%tr1?", "%tr2?", "%tr3", "%tr4", "%tr5", "%tr6", "%tr7", - "%tr0?", "%tr1?", "%tr2?", "%tr3", "%tr4", "%tr5", "%tr6", "%tr7" -}; - -const char *const dis_MMREG[16] = { - "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7", - "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" -}; - -const char *const dis_XMMREG[16] = { - "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", - "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15" -}; - -const char *const dis_SEGREG[16] = { - "%es", "%cs", "%ss", "%ds", "%fs", "%gs", "", "", - "%es", "%cs", "%ss", "%ds", "%fs", "%gs", "", "" -}; - -/* - * SIMD predicate suffixes - */ -const char *const dis_PREDSUFFIX[8] = { - "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord" -}; - - - -#endif /* DIS_TEXT */ - - - - -/* - * "decode table" for 64 bit mode MOVSXD instruction (opcode 0x63) - */ -const instable_t dis_opMOVSLD = TNS("movslq",MOVSXZ); - -/* - * "decode table" for pause and clflush instructions - */ -const instable_t dis_opPause = TNS("pause", NORM); - -/* - * Decode table for 0x0F00 opcodes - */ -const instable_t dis_op0F00[8] = { - -/* [0] */ TNS("sldt",M), TNS("str",M), TNSy("lldt",M), TNSy("ltr",M), -/* [4] */ TNSZ("verr",M,2), TNSZ("verw",M,2), INVALID, INVALID, -}; - - -/* - * Decode table for 0x0F01 opcodes - */ -const instable_t dis_op0F01[8] = { - -/* [0] */ TNSZ("sgdt",MO,6), TNSZ("sidt",MO,6), TNSZ("lgdt",MO,6), TNSZ("lidt",MO,6), -/* [4] */ TNSZ("smsw",M,2), INVALID, TNSZ("lmsw",M,2), TNS("invlpg",SWAPGS), -}; - -/* - * Decode table for 0x0F18 opcodes -- SIMD prefetch - */ -const instable_t dis_op0F18[8] = { - -/* [0] */ TNS("prefetchnta",PREF),TNS("prefetcht0",PREF), TNS("prefetcht1",PREF), TNS("prefetcht2",PREF), -/* [4] */ INVALID, INVALID, INVALID, INVALID, -}; - -/* - * Decode table for 0x0FAE opcodes -- SIMD state save/restore - */ -const instable_t dis_op0FAE[8] = { -/* [0] */ TNSZ("fxsave",M,512), TNSZ("fxrstor",M,512), TNS("ldmxcsr",M), TNS("stmxcsr",M), -/* [4] */ INVALID, TNS("lfence",XMMFENCE), TNS("mfence",XMMFENCE), TNS("sfence",XMMSFNC), -}; - -/* - * Decode table for 0x0FBA opcodes - */ - -const instable_t dis_op0FBA[8] = { - -/* [0] */ INVALID, INVALID, INVALID, INVALID, -/* [4] */ TS("bt",MIb), TS("bts",MIb), TS("btr",MIb), TS("btc",MIb), -}; - -/* - * Decode table for 0x0FC7 opcode - */ - -const instable_t dis_op0FC7[8] = { - -/* [0] */ INVALID, TNS("cmpxchg8b",M), INVALID, INVALID, -/* [4] */ INVALID, INVALID, INVALID, INVALID, -}; - - -/* - * Decode table for 0x0FC8 opcode -- 486 bswap instruction - * - *bit pattern: 0000 1111 1100 1reg - */ -const instable_t dis_op0FC8[4] = { -/* [0] */ TNS("bswap",R), INVALID, INVALID, INVALID, -}; - -/* - * Decode table for 0x0F71, 0x0F72, and 0x0F73 opcodes -- MMX instructions - */ -const instable_t dis_op0F7123[4][8] = { -{ -/* [70].0 */ INVALID, INVALID, INVALID, INVALID, -/* .4 */ INVALID, INVALID, INVALID, INVALID, -}, { -/* [71].0 */ INVALID, INVALID, TNS("psrlw",MMOSH), INVALID, -/* .4 */ TNS("psraw",MMOSH), INVALID, TNS("psllw",MMOSH), INVALID, -}, { -/* [72].0 */ INVALID, INVALID, TNS("psrld",MMOSH), INVALID, -/* .4 */ TNS("psrad",MMOSH), INVALID, TNS("pslld",MMOSH), INVALID, -}, { -/* [73].0 */ INVALID, INVALID, TNS("psrlq",MMOSH), TNS("INVALID",MMOSH), -/* .4 */ INVALID, INVALID, TNS("psllq",MMOSH), TNS("INVALID",MMOSH), -} }; - -/* - * Decode table for SIMD extensions to above 0x0F71-0x0F73 opcodes. - */ -const instable_t dis_opSIMD7123[32] = { -/* [70].0 */ INVALID, INVALID, INVALID, INVALID, -/* .4 */ INVALID, INVALID, INVALID, INVALID, - -/* [71].0 */ INVALID, INVALID, TNS("psrlw",XMMSH), INVALID, -/* .4 */ TNS("psraw",XMMSH), INVALID, TNS("psllw",XMMSH), INVALID, - -/* [72].0 */ INVALID, INVALID, TNS("psrld",XMMSH), INVALID, -/* .4 */ TNS("psrad",XMMSH), INVALID, TNS("pslld",XMMSH), INVALID, - -/* [73].0 */ INVALID, INVALID, TNS("psrlq",XMMSH), TNS("psrldq",XMMSH), -/* .4 */ INVALID, INVALID, TNS("psllq",XMMSH), TNS("pslldq",XMMSH), -}; - -/* - * SIMD instructions have been wedged into the existing IA32 instruction - * set through the use of prefixes. That is, while 0xf0 0x58 may be - * addps, 0xf3 0xf0 0x58 (literally, repz addps) is a completely different - * instruction - addss. At present, three prefixes have been coopted in - * this manner - address size (0x66), repnz (0xf2) and repz (0xf3). The - * following tables are used to provide the prefixed instruction names. - * The arrays are sparse, but they're fast. - */ - -/* - * Decode table for SIMD instructions with the address size (0x66) prefix. - */ -const instable_t dis_opSIMDdata16[256] = { -/* [00] */ INVALID, INVALID, INVALID, INVALID, -/* [04] */ INVALID, INVALID, INVALID, INVALID, -/* [08] */ INVALID, INVALID, INVALID, INVALID, -/* [0C] */ INVALID, INVALID, INVALID, INVALID, - -/* [10] */ TNSZ("movupd",XMM,16), TNSZ("movupd",XMMS,16), TNSZ("movlpd",XMMM,8), TNSZ("movlpd",XMMMS,8), -/* [14] */ TNSZ("unpcklpd",XMM,16),TNSZ("unpckhpd",XMM,16),TNSZ("movhpd",XMMM,8), TNSZ("movhpd",XMMMS,8), -/* [18] */ INVALID, INVALID, INVALID, INVALID, -/* [1C] */ INVALID, INVALID, INVALID, INVALID, - -/* [20] */ INVALID, INVALID, INVALID, INVALID, -/* [24] */ INVALID, INVALID, INVALID, INVALID, -/* [28] */ TNSZ("movapd",XMM,16), TNSZ("movapd",XMMS,16), TNSZ("cvtpi2pd",XMMOMX,8),TNSZ("movntpd",XMMOMS,16), -/* [2C] */ TNSZ("cvttpd2pi",XMMXMM,16),TNSZ("cvtpd2pi",XMMXMM,16),TNSZ("ucomisd",XMM,8),TNSZ("comisd",XMM,8), - -/* [30] */ INVALID, INVALID, INVALID, INVALID, -/* [34] */ INVALID, INVALID, INVALID, INVALID, -/* [38] */ INVALID, INVALID, INVALID, INVALID, -/* [3C] */ INVALID, INVALID, INVALID, INVALID, - -/* [40] */ INVALID, INVALID, INVALID, INVALID, -/* [44] */ INVALID, INVALID, INVALID, INVALID, -/* [48] */ INVALID, INVALID, INVALID, INVALID, -/* [4C] */ INVALID, INVALID, INVALID, INVALID, - -/* [50] */ TNS("movmskpd",XMMOX3), TNSZ("sqrtpd",XMM,16), INVALID, INVALID, -/* [54] */ TNSZ("andpd",XMM,16), TNSZ("andnpd",XMM,16), TNSZ("orpd",XMM,16), TNSZ("xorpd",XMM,16), -/* [58] */ TNSZ("addpd",XMM,16), TNSZ("mulpd",XMM,16), TNSZ("cvtpd2ps",XMM,16),TNSZ("cvtps2dq",XMM,16), -/* [5C] */ TNSZ("subpd",XMM,16), TNSZ("minpd",XMM,16), TNSZ("divpd",XMM,16), TNSZ("maxpd",XMM,16), - -/* [60] */ TNSZ("punpcklbw",XMM,16),TNSZ("punpcklwd",XMM,16),TNSZ("punpckldq",XMM,16),TNSZ("packsswb",XMM,16), -/* [64] */ TNSZ("pcmpgtb",XMM,16), TNSZ("pcmpgtw",XMM,16), TNSZ("pcmpgtd",XMM,16), TNSZ("packuswb",XMM,16), -/* [68] */ TNSZ("punpckhbw",XMM,16),TNSZ("punpckhwd",XMM,16),TNSZ("punpckhdq",XMM,16),TNSZ("packssdw",XMM,16), -/* [6C] */ TNSZ("punpcklqdq",XMM,16),TNSZ("punpckhqdq",XMM,16),TNSZ("movd",XMM3MX,4),TNSZ("movdqa",XMM,16), - -/* [70] */ TNSZ("pshufd",XMMP,16), INVALID, INVALID, INVALID, -/* [74] */ TNSZ("pcmpeqb",XMM,16), TNSZ("pcmpeqw",XMM,16), TNSZ("pcmpeqd",XMM,16), INVALID, -/* [78] */ INVALID, INVALID, INVALID, INVALID, -/* [7C] */ INVALID, INVALID, TNSZ("movd",XMM3MXS,4), TNSZ("movdqa",XMMS,16), - -/* [80] */ INVALID, INVALID, INVALID, INVALID, -/* [84] */ INVALID, INVALID, INVALID, INVALID, -/* [88] */ INVALID, INVALID, INVALID, INVALID, -/* [8C] */ INVALID, INVALID, INVALID, INVALID, - -/* [90] */ INVALID, INVALID, INVALID, INVALID, -/* [94] */ INVALID, INVALID, INVALID, INVALID, -/* [98] */ INVALID, INVALID, INVALID, INVALID, -/* [9C] */ INVALID, INVALID, INVALID, INVALID, - -/* [A0] */ INVALID, INVALID, INVALID, INVALID, -/* [A4] */ INVALID, INVALID, INVALID, INVALID, -/* [A8] */ INVALID, INVALID, INVALID, INVALID, -/* [AC] */ INVALID, INVALID, INVALID, INVALID, - -/* [B0] */ INVALID, INVALID, INVALID, INVALID, -/* [B4] */ INVALID, INVALID, INVALID, INVALID, -/* [B8] */ INVALID, INVALID, INVALID, INVALID, -/* [BC] */ INVALID, INVALID, INVALID, INVALID, - -/* [C0] */ INVALID, INVALID, TNSZ("cmppd",XMMP,16), INVALID, -/* [C4] */ TNSZ("pinsrw",XMMPRM,2),TNS("pextrw",XMM3P), TNSZ("shufpd",XMMP,16), INVALID, -/* [C8] */ INVALID, INVALID, INVALID, INVALID, -/* [CC] */ INVALID, INVALID, INVALID, INVALID, - -/* [D0] */ INVALID, TNSZ("psrlw",XMM,16), TNSZ("psrld",XMM,16), TNSZ("psrlq",XMM,16), -/* [D4] */ TNSZ("paddq",XMM,16), TNSZ("pmullw",XMM,16), TNSZ("movq",XMMS,8), TNS("pmovmskb",XMMX3), -/* [D8] */ TNSZ("psubusb",XMM,16), TNSZ("psubusw",XMM,16), TNSZ("pminub",XMM,16), TNSZ("pand",XMM,16), -/* [DC] */ TNSZ("paddusb",XMM,16), TNSZ("paddusw",XMM,16), TNSZ("pmaxub",XMM,16), TNSZ("pandn",XMM,16), - -/* [E0] */ TNSZ("pavgb",XMM,16), TNSZ("psraw",XMM,16), TNSZ("psrad",XMM,16), TNSZ("pavgw",XMM,16), -/* [E4] */ TNSZ("pmulhuw",XMM,16), TNSZ("pmulhw",XMM,16), TNSZ("cvttpd2dq",XMM,16),TNSZ("movntdq",XMMS,16), -/* [E8] */ TNSZ("psubsb",XMM,16), TNSZ("psubsw",XMM,16), TNSZ("pminsw",XMM,16), TNSZ("por",XMM,16), -/* [EC] */ TNSZ("paddsb",XMM,16), TNSZ("paddsw",XMM,16), TNSZ("pmaxsw",XMM,16), TNSZ("pxor",XMM,16), - -/* [F0] */ INVALID, TNSZ("psllw",XMM,16), TNSZ("pslld",XMM,16), TNSZ("psllq",XMM,16), -/* [F4] */ TNSZ("pmuludq",XMM,16), TNSZ("pmaddwd",XMM,16), TNSZ("psadbw",XMM,16), TNSZ("maskmovdqu", XMMXIMPL,16), -/* [F8] */ TNSZ("psubb",XMM,16), TNSZ("psubw",XMM,16), TNSZ("psubd",XMM,16), TNSZ("psubq",XMM,16), -/* [FC] */ TNSZ("paddb",XMM,16), TNSZ("paddw",XMM,16), TNSZ("paddd",XMM,16), INVALID, -}; - -/* - * Decode table for SIMD instructions with the repnz (0xf2) prefix. - */ -const instable_t dis_opSIMDrepnz[256] = { -/* [00] */ INVALID, INVALID, INVALID, INVALID, -/* [04] */ INVALID, INVALID, INVALID, INVALID, -/* [08] */ INVALID, INVALID, INVALID, INVALID, -/* [0C] */ INVALID, INVALID, INVALID, INVALID, - -/* [10] */ TNSZ("movsd",XMM,8), TNSZ("movsd",XMMS,8), INVALID, INVALID, -/* [14] */ INVALID, INVALID, INVALID, INVALID, -/* [18] */ INVALID, INVALID, INVALID, INVALID, -/* [1C] */ INVALID, INVALID, INVALID, INVALID, - -/* [20] */ INVALID, INVALID, INVALID, INVALID, -/* [24] */ INVALID, INVALID, INVALID, INVALID, -/* [28] */ INVALID, INVALID, TNSZ("cvtsi2sd",XMM3MX,4),INVALID, -/* [2C] */ TNSZ("cvttsd2si",XMMXM3,8),TNSZ("cvtsd2si",XMMXM3,8),INVALID, INVALID, - -/* [30] */ INVALID, INVALID, INVALID, INVALID, -/* [34] */ INVALID, INVALID, INVALID, INVALID, -/* [38] */ INVALID, INVALID, INVALID, INVALID, -/* [3C] */ INVALID, INVALID, INVALID, INVALID, - -/* [40] */ INVALID, INVALID, INVALID, INVALID, -/* [44] */ INVALID, INVALID, INVALID, INVALID, -/* [48] */ INVALID, INVALID, INVALID, INVALID, -/* [4C] */ INVALID, INVALID, INVALID, INVALID, - -/* [50] */ INVALID, TNSZ("sqrtsd",XMM,8), INVALID, INVALID, -/* [54] */ INVALID, INVALID, INVALID, INVALID, -/* [58] */ TNSZ("addsd",XMM,8), TNSZ("mulsd",XMM,8), TNSZ("cvtsd2ss",XMM,8), INVALID, -/* [5C] */ TNSZ("subsd",XMM,8), TNSZ("minsd",XMM,8), TNSZ("divsd",XMM,8), TNSZ("maxsd",XMM,8), - -/* [60] */ INVALID, INVALID, INVALID, INVALID, -/* [64] */ INVALID, INVALID, INVALID, INVALID, -/* [68] */ INVALID, INVALID, INVALID, INVALID, -/* [6C] */ INVALID, INVALID, INVALID, INVALID, - -/* [70] */ TNSZ("pshuflw",XMMP,16),INVALID, INVALID, INVALID, -/* [74] */ INVALID, INVALID, INVALID, INVALID, -/* [78] */ INVALID, INVALID, INVALID, INVALID, -/* [7C] */ INVALID, INVALID, INVALID, INVALID, - -/* [80] */ INVALID, INVALID, INVALID, INVALID, -/* [84] */ INVALID, INVALID, INVALID, INVALID, -/* [88] */ INVALID, INVALID, INVALID, INVALID, -/* [0C] */ INVALID, INVALID, INVALID, INVALID, - -/* [90] */ INVALID, INVALID, INVALID, INVALID, -/* [94] */ INVALID, INVALID, INVALID, INVALID, -/* [98] */ INVALID, INVALID, INVALID, INVALID, -/* [9C] */ INVALID, INVALID, INVALID, INVALID, - -/* [A0] */ INVALID, INVALID, INVALID, INVALID, -/* [A4] */ INVALID, INVALID, INVALID, INVALID, -/* [A8] */ INVALID, INVALID, INVALID, INVALID, -/* [AC] */ INVALID, INVALID, INVALID, INVALID, - -/* [B0] */ INVALID, INVALID, INVALID, INVALID, -/* [B4] */ INVALID, INVALID, INVALID, INVALID, -/* [B8] */ INVALID, INVALID, INVALID, INVALID, -/* [BC] */ INVALID, INVALID, INVALID, INVALID, - -/* [C0] */ INVALID, INVALID, TNSZ("cmpsd",XMMP,8), INVALID, -/* [C4] */ INVALID, INVALID, INVALID, INVALID, -/* [C8] */ INVALID, INVALID, INVALID, INVALID, -/* [CC] */ INVALID, INVALID, INVALID, INVALID, - -/* [D0] */ INVALID, INVALID, INVALID, INVALID, -/* [D4] */ INVALID, INVALID, TNS("movdq2q",XMMXM), INVALID, -/* [D8] */ INVALID, INVALID, INVALID, INVALID, -/* [DC] */ INVALID, INVALID, INVALID, INVALID, - -/* [E0] */ INVALID, INVALID, INVALID, INVALID, -/* [E4] */ INVALID, INVALID, TNSZ("cvtpd2dq",XMM,16),INVALID, -/* [E8] */ INVALID, INVALID, INVALID, INVALID, -/* [EC] */ INVALID, INVALID, INVALID, INVALID, - -/* [F0] */ INVALID, INVALID, INVALID, INVALID, -/* [F4] */ INVALID, INVALID, INVALID, INVALID, -/* [F8] */ INVALID, INVALID, INVALID, INVALID, -/* [FC] */ INVALID, INVALID, INVALID, INVALID, -}; - -/* - * Decode table for SIMD instructions with the repz (0xf3) prefix. - */ -const instable_t dis_opSIMDrepz[256] = { -/* [00] */ INVALID, INVALID, INVALID, INVALID, -/* [04] */ INVALID, INVALID, INVALID, INVALID, -/* [08] */ INVALID, INVALID, INVALID, INVALID, -/* [0C] */ INVALID, INVALID, INVALID, INVALID, - -/* [10] */ TNSZ("movss",XMM,4), TNSZ("movss",XMMS,4), INVALID, INVALID, -/* [14] */ INVALID, INVALID, INVALID, INVALID, -/* [18] */ INVALID, INVALID, INVALID, INVALID, -/* [1C] */ INVALID, INVALID, INVALID, INVALID, - -/* [20] */ INVALID, INVALID, INVALID, INVALID, -/* [24] */ INVALID, INVALID, INVALID, INVALID, -/* [28] */ INVALID, INVALID, TNSZ("cvtsi2ss",XMM3MX,4),INVALID, -/* [2C] */ TNSZ("cvttss2si",XMMXM3,4),TNSZ("cvtss2si",XMMXM3,4),INVALID, INVALID, - -/* [30] */ INVALID, INVALID, INVALID, INVALID, -/* [34] */ INVALID, INVALID, INVALID, INVALID, -/* [38] */ INVALID, INVALID, INVALID, INVALID, -/* [3C] */ INVALID, INVALID, INVALID, INVALID, - -/* [40] */ INVALID, INVALID, INVALID, INVALID, -/* [44] */ INVALID, INVALID, INVALID, INVALID, -/* [48] */ INVALID, INVALID, INVALID, INVALID, -/* [4C] */ INVALID, INVALID, INVALID, INVALID, - -/* [50] */ INVALID, TNSZ("sqrtss",XMM,4), TNSZ("rsqrtss",XMM,4), TNSZ("rcpss",XMM,4), -/* [54] */ INVALID, INVALID, INVALID, INVALID, -/* [58] */ TNSZ("addss",XMM,4), TNSZ("mulss",XMM,4), TNSZ("cvtss2sd",XMM,4), TNSZ("cvttps2dq",XMM,16), -/* [5C] */ TNSZ("subss",XMM,4), TNSZ("minss",XMM,4), TNSZ("divss",XMM,4), TNSZ("maxss",XMM,4), - -/* [60] */ INVALID, INVALID, INVALID, INVALID, -/* [64] */ INVALID, INVALID, INVALID, INVALID, -/* [68] */ INVALID, INVALID, INVALID, INVALID, -/* [6C] */ INVALID, INVALID, INVALID, TNSZ("movdqu",XMM,16), - -/* [70] */ TNSZ("pshufhw",XMMP,16),INVALID, INVALID, INVALID, -/* [74] */ INVALID, INVALID, INVALID, INVALID, -/* [78] */ INVALID, INVALID, INVALID, INVALID, -/* [7C] */ INVALID, INVALID, TNSZ("movq",XMM,8), TNSZ("movdqu",XMMS,16), - -/* [80] */ INVALID, INVALID, INVALID, INVALID, -/* [84] */ INVALID, INVALID, INVALID, INVALID, -/* [88] */ INVALID, INVALID, INVALID, INVALID, -/* [0C] */ INVALID, INVALID, INVALID, INVALID, - -/* [90] */ INVALID, INVALID, INVALID, INVALID, -/* [94] */ INVALID, INVALID, INVALID, INVALID, -/* [98] */ INVALID, INVALID, INVALID, INVALID, -/* [9C] */ INVALID, INVALID, INVALID, INVALID, - -/* [A0] */ INVALID, INVALID, INVALID, INVALID, -/* [A4] */ INVALID, INVALID, INVALID, INVALID, -/* [A8] */ INVALID, INVALID, INVALID, INVALID, -/* [AC] */ INVALID, INVALID, INVALID, INVALID, - -/* [B0] */ INVALID, INVALID, INVALID, INVALID, -/* [B4] */ INVALID, INVALID, INVALID, INVALID, -/* [B8] */ INVALID, INVALID, INVALID, INVALID, -/* [BC] */ INVALID, INVALID, INVALID, INVALID, - -/* [C0] */ INVALID, INVALID, TNSZ("cmpss",XMMP,4), INVALID, -/* [C4] */ INVALID, INVALID, INVALID, INVALID, -/* [C8] */ INVALID, INVALID, INVALID, INVALID, -/* [CC] */ INVALID, INVALID, INVALID, INVALID, - -/* [D0] */ INVALID, INVALID, INVALID, INVALID, -/* [D4] */ INVALID, INVALID, TNS("movq2dq",XMMMX), INVALID, -/* [D8] */ INVALID, INVALID, INVALID, INVALID, -/* [DC] */ INVALID, INVALID, INVALID, INVALID, - -/* [E0] */ INVALID, INVALID, INVALID, INVALID, -/* [E4] */ INVALID, INVALID, TNSZ("cvtdq2pd",XMM,8), INVALID, -/* [E8] */ INVALID, INVALID, INVALID, INVALID, -/* [EC] */ INVALID, INVALID, INVALID, INVALID, - -/* [F0] */ INVALID, INVALID, INVALID, INVALID, -/* [F4] */ INVALID, INVALID, INVALID, INVALID, -/* [F8] */ INVALID, INVALID, INVALID, INVALID, -/* [FC] */ INVALID, INVALID, INVALID, INVALID, -}; - -/* - * Decode table for 0x0F opcodes - */ - -const instable_t dis_op0F[16][16] = { -{ -/* [00] */ IND(dis_op0F00), IND(dis_op0F01), TNS("lar",MR), TNS("lsl",MR), -/* [04] */ INVALID, TNS("syscall",NORM), TNS("clts",NORM), TNS("sysret",NORM), -/* [08] */ TNS("invd",NORM), TNS("wbinvd",NORM), INVALID, TNS("ud2",NORM), -/* [0C] */ INVALID, INVALID, INVALID, INVALID, -}, { -/* [10] */ TNSZ("movups",XMMO,16), TNSZ("movups",XMMOS,16),TNSZ("movlps",XMMO,8), TNSZ("movlps",XMMOS,8), -/* [14] */ TNSZ("unpcklps",XMMO,16),TNSZ("unpckhps",XMMO,16),TNSZ("movhps",XMMOM,8),TNSZ("movhps",XMMOMS,8), -/* [18] */ IND(dis_op0F18), INVALID, INVALID, INVALID, -/* [1C] */ INVALID, INVALID, INVALID, INVALID, -}, { -/* [20] */ TSy("mov",SREG), TSy("mov",SREG), TSy("mov",SREG), TSy("mov",SREG), -/* [24] */ TSx("mov",SREG), INVALID, TSx("mov",SREG), INVALID, -/* [28] */ TNSZ("movaps",XMMO,16), TNSZ("movaps",XMMOS,16),TNSZ("cvtpi2ps",XMMOMX,8),TNSZ("movntps",XMMOS,16), -/* [2C] */ TNSZ("cvttps2pi",XMMOXMM,8),TNSZ("cvtps2pi",XMMOXMM,8),TNSZ("ucomiss",XMMO,4),TNSZ("comiss",XMMO,4), -}, { -/* [30] */ TNS("wrmsr",NORM), TNS("rdtsc",NORM), TNS("rdmsr",NORM), TNS("rdpmc",NORM), -/* [34] */ TNSx("sysenter",NORM), TNSx("sysexit",NORM), INVALID, INVALID, -/* [38] */ INVALID, INVALID, INVALID, INVALID, -/* [3C] */ INVALID, INVALID, INVALID, INVALID, -}, { -/* [40] */ TS("cmovx.o",MR), TS("cmovx.no",MR), TS("cmovx.b",MR), TS("cmovx.ae",MR), -/* [44] */ TS("cmovx.e",MR), TS("cmovx.ne",MR), TS("cmovx.be",MR), TS("cmovx.a",MR), -/* [48] */ TS("cmovx.s",MR), TS("cmovx.ns",MR), TS("cmovx.pe",MR), TS("cmovx.po",MR), -/* [4C] */ TS("cmovx.l",MR), TS("cmovx.ge",MR), TS("cmovx.le",MR), TS("cmovx.g",MR), -}, { -/* [50] */ TNS("movmskps",XMMOX3), TNSZ("sqrtps",XMMO,16), TNSZ("rsqrtps",XMMO,16),TNSZ("rcpps",XMMO,16), -/* [54] */ TNSZ("andps",XMMO,16), TNSZ("andnps",XMMO,16), TNSZ("orps",XMMO,16), TNSZ("xorps",XMMO,16), -/* [58] */ TNSZ("addps",XMMO,16), TNSZ("mulps",XMMO,16), TNSZ("cvtps2pd",XMMO,8),TNSZ("cvtdq2ps",XMMO,16), -/* [5C] */ TNSZ("subps",XMMO,16), TNSZ("minps",XMMO,16), TNSZ("divps",XMMO,16), TNSZ("maxps",XMMO,16), -}, { -/* [60] */ TNSZ("punpcklbw",MMO,4),TNSZ("punpcklwd",MMO,4),TNSZ("punpckldq",MMO,4),TNSZ("packsswb",MMO,8), -/* [64] */ TNSZ("pcmpgtb",MMO,8), TNSZ("pcmpgtw",MMO,8), TNSZ("pcmpgtd",MMO,8), TNSZ("packuswb",MMO,8), -/* [68] */ TNSZ("punpckhbw",MMO,8),TNSZ("punpckhwd",MMO,8),TNSZ("punpckhdq",MMO,8),TNSZ("packssdw",MMO,8), -/* [6C] */ TNSZ("INVALID",MMO,0), TNSZ("INVALID",MMO,0), TNSZ("movd",MMO,4), TNSZ("movq",MMO,8), -}, { -/* [70] */ TNSZ("pshufw",MMOPM,8), TNS("psrXXX",MR), TNS("psrXXX",MR), TNS("psrXXX",MR), -/* [74] */ TNSZ("pcmpeqb",MMO,8), TNSZ("pcmpeqw",MMO,8), TNSZ("pcmpeqd",MMO,8), TNS("emms",NORM), -/* [78] */ INVALID, INVALID, INVALID, INVALID, -/* [7C] */ INVALID, INVALID, TNSZ("movd",MMOS,4), TNSZ("movq",MMOS,8), -}, { -/* [80] */ TNS("jo",D), TNS("jno",D), TNS("jb",D), TNS("jae",D), -/* [84] */ TNS("je",D), TNS("jne",D), TNS("jbe",D), TNS("ja",D), -/* [88] */ TNS("js",D), TNS("jns",D), TNS("jp",D), TNS("jnp",D), -/* [8C] */ TNS("jl",D), TNS("jge",D), TNS("jle",D), TNS("jg",D), -}, { -/* [90] */ TNS("seto",Mb), TNS("setno",Mb), TNS("setb",Mb), TNS("setae",Mb), -/* [94] */ TNS("sete",Mb), TNS("setne",Mb), TNS("setbe",Mb), TNS("seta",Mb), -/* [98] */ TNS("sets",Mb), TNS("setns",Mb), TNS("setp",Mb), TNS("setnp",Mb), -/* [9C] */ TNS("setl",Mb), TNS("setge",Mb), TNS("setle",Mb), TNS("setg",Mb), -}, { -/* [A0] */ TSp("push",LSEG), TSp("pop",LSEG), TNS("cpuid",NORM), TS("bt",RMw), -/* [A4] */ TS("shld",DSHIFT), TS("shld",DSHIFTcl), INVALID, INVALID, -/* [A8] */ TSp("push",LSEG), TSp("pop",LSEG), TNS("rsm",NORM), TS("bts",RMw), -/* [AC] */ TS("shrd",DSHIFT), TS("shrd",DSHIFTcl), IND(dis_op0FAE), TS("imul",MRw), -}, { -/* [B0] */ TNS("cmpxchgb",RMw), TS("cmpxchg",RMw), TS("lss",MR), TS("btr",RMw), -/* [B4] */ TS("lfs",MR), TS("lgs",MR), TS("movzb",MOVZ), TNS("movzwl",MOVZ), -/* [B8] */ INVALID, INVALID, IND(dis_op0FBA), TS("btc",RMw), -/* [BC] */ TS("bsf",MRw), TS("bsr",MRw), TS("movsb",MOVZ), TNS("movswl",MOVZ), -}, { -/* [C0] */ TNS("xaddb",XADDB), TS("xadd",RMw), TNSZ("cmpps",XMMOPM,16),TNS("movnti",RM), -/* [C4] */ TNSZ("pinsrw",MMOPRM,2),TNS("pextrw",MMO3P), TNSZ("shufps",XMMOPM,16),IND(dis_op0FC7), -/* [C8] */ INVALID, INVALID, INVALID, INVALID, -/* [CC] */ INVALID, INVALID, INVALID, INVALID, -}, { -/* [D0] */ INVALID, TNSZ("psrlw",MMO,8), TNSZ("psrld",MMO,8), TNSZ("psrlq",MMO,8), -/* [D4] */ TNSZ("paddq",MMO,8), TNSZ("pmullw",MMO,8), TNSZ("INVALID",MMO,0), TNS("pmovmskb",MMOM3), -/* [D8] */ TNSZ("psubusb",MMO,8), TNSZ("psubusw",MMO,8), TNSZ("pminub",MMO,8), TNSZ("pand",MMO,8), -/* [DC] */ TNSZ("paddusb",MMO,8), TNSZ("paddusw",MMO,8), TNSZ("pmaxub",MMO,8), TNSZ("pandn",MMO,8), -}, { -/* [E0] */ TNSZ("pavgb",MMO,8), TNSZ("psraw",MMO,8), TNSZ("psrad",MMO,8), TNSZ("pavgw",MMO,8), -/* [E4] */ TNSZ("pmulhuw",MMO,8), TNSZ("pmulhw",MMO,8), TNS("INVALID",XMMO), TNSZ("movntq",MMOMS,8), -/* [E8] */ TNSZ("psubsb",MMO,8), TNSZ("psubsw",MMO,8), TNSZ("pminsw",MMO,8), TNSZ("por",MMO,8), -/* [EC] */ TNSZ("paddsb",MMO,8), TNSZ("paddsw",MMO,8), TNSZ("pmaxsw",MMO,8), TNSZ("pxor",MMO,8), -}, { -/* [F0] */ INVALID, TNSZ("psllw",MMO,8), TNSZ("pslld",MMO,8), TNSZ("psllq",MMO,8), -/* [F4] */ TNSZ("pmuludq",MMO,8), TNSZ("pmaddwd",MMO,8), TNSZ("psadbw",MMO,8), TNSZ("maskmovq",MMOIMPL,8), -/* [F8] */ TNSZ("psubb",MMO,8), TNSZ("psubw",MMO,8), TNSZ("psubd",MMO,8), TNSZ("psubq",MMO,8), -/* [FC] */ TNSZ("paddb",MMO,8), TNSZ("paddw",MMO,8), TNSZ("paddd",MMO,8), INVALID, -} }; - - -/* - * Decode table for 0x80 opcodes - */ - -const instable_t dis_op80[8] = { - -/* [0] */ TNS("addb",IMlw), TNS("orb",IMw), TNS("adcb",IMlw), TNS("sbbb",IMlw), -/* [4] */ TNS("andb",IMw), TNS("subb",IMlw), TNS("xorb",IMw), TNS("cmpb",IMlw), -}; - - -/* - * Decode table for 0x81 opcodes. - */ - -const instable_t dis_op81[8] = { - -/* [0] */ TS("add",IMlw), TS("or",IMw), TS("adc",IMlw), TS("sbb",IMlw), -/* [4] */ TS("and",IMw), TS("sub",IMlw), TS("xor",IMw), TS("cmp",IMlw), -}; - - -/* - * Decode table for 0x82 opcodes. - */ - -const instable_t dis_op82[8] = { - -/* [0] */ TNSx("addb",IMlw), TNSx("orb",IMlw), TNSx("adcb",IMlw), TNSx("sbbb",IMlw), -/* [4] */ TNSx("andb",IMlw), TNSx("subb",IMlw), TNSx("xorb",IMlw), TNSx("cmpb",IMlw), -}; -/* - * Decode table for 0x83 opcodes. - */ - -const instable_t dis_op83[8] = { - -/* [0] */ TS("add",IMlw), TS("or",IMlw), TS("adc",IMlw), TS("sbb",IMlw), -/* [4] */ TS("and",IMlw), TS("sub",IMlw), TS("xor",IMlw), TS("cmp",IMlw), -}; - -/* - * Decode table for 0xC0 opcodes. - */ - -const instable_t dis_opC0[8] = { - -/* [0] */ TNS("rolb",MvI), TNS("rorb",MvI), TNS("rclb",MvI), TNS("rcrb",MvI), -/* [4] */ TNS("shlb",MvI), TNS("shrb",MvI), INVALID, TNS("sarb",MvI), -}; - -/* - * Decode table for 0xD0 opcodes. - */ - -const instable_t dis_opD0[8] = { - -/* [0] */ TNS("rolb",Mv), TNS("rorb",Mv), TNS("rclb",Mv), TNS("rcrb",Mv), -/* [4] */ TNS("shlb",Mv), TNS("shrb",Mv), TNS("salb",Mv), TNS("sarb",Mv), -}; - -/* - * Decode table for 0xC1 opcodes. - * 186 instruction set - */ - -const instable_t dis_opC1[8] = { - -/* [0] */ TS("rol",MvI), TS("ror",MvI), TS("rcl",MvI), TS("rcr",MvI), -/* [4] */ TS("shl",MvI), TS("shr",MvI), TS("sal",MvI), TS("sar",MvI), -}; - -/* - * Decode table for 0xD1 opcodes. - */ - -const instable_t dis_opD1[8] = { - -/* [0] */ TS("rol",Mv), TS("ror",Mv), TS("rcl",Mv), TS("rcr",Mv), -/* [4] */ TS("shl",Mv), TS("shr",Mv), TS("sal",Mv), TS("sar",Mv), -}; - - -/* - * Decode table for 0xD2 opcodes. - */ - -const instable_t dis_opD2[8] = { - -/* [0] */ TNS("rolb",Mv), TNS("rorb",Mv), TNS("rclb",Mv), TNS("rcrb",Mv), -/* [4] */ TNS("shlb",Mv), TNS("shrb",Mv), TNS("salb",Mv), TNS("sarb",Mv), -}; -/* - * Decode table for 0xD3 opcodes. - */ - -const instable_t dis_opD3[8] = { - -/* [0] */ TS("rol",Mv), TS("ror",Mv), TS("rcl",Mv), TS("rcr",Mv), -/* [4] */ TS("shl",Mv), TS("shr",Mv), TS("salb",Mv), TS("sar",Mv), -}; - - -/* - * Decode table for 0xF6 opcodes. - */ - -const instable_t dis_opF6[8] = { - -/* [0] */ TNS("testb",IMw), TNS("testb",IMw), TNS("notb",Mw), TNS("negb",Mw), -/* [4] */ TNS("mulb",MA), TNS("imulb",MA), TNS("divb",MA), TNS("idivb",MA), -}; - - -/* - * Decode table for 0xF7 opcodes. - */ - -const instable_t dis_opF7[8] = { - -/* [0] */ TS("test",IMw), TS("test",IMw), TS("not",Mw), TS("neg",Mw), -/* [4] */ TS("mul",MA), TS("imul",MA), TS("div",MA), TS("idiv",MA), -}; - - -/* - * Decode table for 0xFE opcodes. - */ - -const instable_t dis_opFE[8] = { - -/* [0] */ TNS("incb",Mw), TNS("decb",Mw), INVALID, INVALID, -/* [4] */ INVALID, INVALID, INVALID, INVALID, -}; -/* - * Decode table for 0xFF opcodes. - */ - -const instable_t dis_opFF[8] = { - -/* [0] */ TS("inc",Mw), TS("dec",Mw), TNSyp("call",INM), TNS("lcall",INM), -/* [4] */ TNSy("jmp",INM), TNS("ljmp",INM), TSp("push",M), INVALID, -}; - -/* for 287 instructions, which are a mess to decode */ - -const instable_t dis_opFP1n2[8][8] = { -{ -/* bit pattern: 1101 1xxx MODxx xR/M */ -/* [0,0] */ TNS("fadds",M), TNS("fmuls",M), TNS("fcoms",M), TNS("fcomps",M), -/* [0,4] */ TNS("fsubs",M), TNS("fsubrs",M), TNS("fdivs",M), TNS("fdivrs",M), -}, { -/* [1,0] */ TNS("flds",M), INVALID, TNS("fsts",M), TNS("fstps",M), -/* [1,4] */ TNSZ("fldenv",M,28), TNSZ("fldcw",M,2), TNSZ("fnstenv",M,28), TNSZ("fnstcw",M,2), -}, { -/* [2,0] */ TNS("fiaddl",M), TNS("fimull",M), TNS("ficoml",M), TNS("ficompl",M), -/* [2,4] */ TNS("fisubl",M), TNS("fisubrl",M), TNS("fidivl",M), TNS("fidivrl",M), -}, { -/* [3,0] */ TNS("fildl",M), INVALID, TNS("fistl",M), TNS("fistpl",M), -/* [3,4] */ INVALID, TNSZ("fldt",M,10), INVALID, TNSZ("fstpt",M,10), -}, { -/* [4,0] */ TNSZ("faddl",M,8), TNSZ("fmull",M,8), TNSZ("fcoml",M,8), TNSZ("fcompl",M,8), -/* [4,1] */ TNSZ("fsubl",M,8), TNSZ("fsubrl",M,8), TNSZ("fdivl",M,8), TNSZ("fdivrl",M,8), -}, { -/* [5,0] */ TNSZ("fldl",M,8), INVALID, TNSZ("fstl",M,8), TNSZ("fstpl",M,8), -/* [5,4] */ TNSZ("frstor",M,108), INVALID, TNSZ("fnsave",M,108), TNSZ("fnstsw",M,2), -}, { -/* [6,0] */ TNSZ("fiadd",M,2), TNSZ("fimul",M,2), TNSZ("ficom",M,2), TNSZ("ficomp",M,2), -/* [6,4] */ TNSZ("fisub",M,2), TNSZ("fisubr",M,2), TNSZ("fidiv",M,2), TNSZ("fidivr",M,2), -}, { -/* [7,0] */ TNSZ("fild",M,2), INVALID, TNSZ("fist",M,2), TNSZ("fistp",M,2), -/* [7,4] */ TNSZ("fbld",M,10), TNSZ("fildll",M,8), TNSZ("fbstp",M,10), TNSZ("fistpll",M,8), -} }; - -const instable_t dis_opFP3[8][8] = { -{ -/* bit pattern: 1101 1xxx 11xx xREG */ -/* [0,0] */ TNS("fadd",FF), TNS("fmul",FF), TNS("fcom",F), TNS("fcomp",F), -/* [0,4] */ TNS("fsub",FF), TNS("fsubr",FF), TNS("fdiv",FF), TNS("fdivr",FF), -}, { -/* [1,0] */ TNS("fld",F), TNS("fxch",F), TNS("fnop",NORM), TNS("fstp",F), -/* [1,4] */ INVALID, INVALID, INVALID, INVALID, -}, { -/* [2,0] */ INVALID, INVALID, INVALID, INVALID, -/* [2,4] */ INVALID, TNS("fucompp",NORM), INVALID, INVALID, -}, { -/* [3,0] */ INVALID, INVALID, INVALID, INVALID, -/* [3,4] */ INVALID, INVALID, INVALID, INVALID, -}, { -/* [4,0] */ TNS("fadd",FF), TNS("fmul",FF), TNS("fcom",F), TNS("fcomp",F), -/* [4,4] */ TNS("fsub",FF), TNS("fsubr",FF), TNS("fdiv",FF), TNS("fdivr",FF), -}, { -/* [5,0] */ TNS("ffree",F), TNS("fxch",F), TNS("fst",F), TNS("fstp",F), -/* [5,4] */ TNS("fucom",F), TNS("fucomp",F), INVALID, INVALID, -}, { -/* [6,0] */ TNS("faddp",FF), TNS("fmulp",FF), TNS("fcomp",F), TNS("fcompp",NORM), -/* [6,4] */ TNS("fsubp",FF), TNS("fsubrp",FF), TNS("fdivp",FF), TNS("fdivrp",FF), -}, { -/* [7,0] */ TNS("ffree",F), TNS("fxch",F), TNS("fstp",F), TNS("fstp",F), -/* [7,4] */ TNS("fnstsw",M), TNS("fucomip",FFC), TNS("fcomip",FFC), INVALID, -} }; - -const instable_t dis_opFP4[4][8] = { -{ -/* bit pattern: 1101 1001 111x xxxx */ -/* [0,0] */ TNS("fchs",NORM), TNS("fabs",NORM), INVALID, INVALID, -/* [0,4] */ TNS("ftst",NORM), TNS("fxam",NORM), TNS("ftstp",NORM), INVALID, -}, { -/* [1,0] */ TNS("fld1",NORM), TNS("fldl2t",NORM), TNS("fldl2e",NORM), TNS("fldpi",NORM), -/* [1,4] */ TNS("fldlg2",NORM), TNS("fldln2",NORM), TNS("fldz",NORM), INVALID, -}, { -/* [2,0] */ TNS("f2xm1",NORM), TNS("fyl2x",NORM), TNS("fptan",NORM), TNS("fpatan",NORM), -/* [2,4] */ TNS("fxtract",NORM), TNS("fprem1",NORM), TNS("fdecstp",NORM), TNS("fincstp",NORM), -}, { -/* [3,0] */ TNS("fprem",NORM), TNS("fyl2xp1",NORM), TNS("fsqrt",NORM), TNS("fsincos",NORM), -/* [3,4] */ TNS("frndint",NORM), TNS("fscale",NORM), TNS("fsin",NORM), TNS("fcos",NORM), -} }; - -const instable_t dis_opFP5[8] = { -/* bit pattern: 1101 1011 111x xxxx */ -/* [0] */ TNS("feni",NORM), TNS("fdisi",NORM), TNS("fnclex",NORM), TNS("fninit",NORM), -/* [4] */ TNS("fsetpm",NORM), TNS("frstpm",NORM), INVALID, INVALID, -}; - -const instable_t dis_opFP6[8] = { -/* bit pattern: 1101 1011 11yy yxxx */ -/* [00] */ TNS("fcmov.nb",FF), TNS("fcmov.ne",FF), TNS("fcmov.nbe",FF), TNS("fcmov.nu",FF), -/* [04] */ INVALID, TNS("fucomi",F), TNS("fcomi",F), INVALID, -}; - -const instable_t dis_opFP7[8] = { -/* bit pattern: 1101 1010 11yy yxxx */ -/* [00] */ TNS("fcmov.b",FF), TNS("fcmov.e",FF), TNS("fcmov.be",FF), TNS("fcmov.u",FF), -/* [04] */ INVALID, INVALID, INVALID, INVALID, -}; - -/* - * Main decode table for the op codes. The first two nibbles - * will be used as an index into the table. If there is a - * a need to further decode an instruction, the array to be - * referenced is indicated with the other two entries being - * empty. - */ - -const instable_t dis_distable[16][16] = { -{ -/* [0,0] */ TNS("addb",RMw), TS("add",RMw), TNS("addb",MRw), TS("add",MRw), -/* [0,4] */ TNS("addb",IA), TS("add",IA), TSx("push",SEG), TSx("pop",SEG), -/* [0,8] */ TNS("orb",RMw), TS("or",RMw), TNS("orb",MRw), TS("or",MRw), -/* [0,C] */ TNS("orb",IA), TS("or",IA), TSx("push",SEG), IND(&dis_op0F[0][0]), -}, { -/* [1,0] */ TNS("adcb",RMw), TS("adc",RMw), TNS("adcb",MRw), TS("adc",MRw), -/* [1,4] */ TNS("adcb",IA), TS("adc",IA), TSx("push",SEG), TSx("pop",SEG), -/* [1,8] */ TNS("sbbb",RMw), TS("sbb",RMw), TNS("sbbb",MRw), TS("sbb",MRw), -/* [1,C] */ TNS("sbbb",IA), TS("sbb",IA), TSx("push",SEG), TSx("pop",SEG), -}, { -/* [2,0] */ TNS("andb",RMw), TS("and",RMw), TNS("andb",MRw), TS("and",MRw), -/* [2,4] */ TNS("andb",IA), TS("and",IA), TNSx("%es:",OVERRIDE), TNSx("daa",NORM), -/* [2,8] */ TNS("subb",RMw), TS("sub",RMw), TNS("subb",MRw), TS("sub",MRw), -/* [2,C] */ TNS("subb",IA), TS("sub",IA), TNSx("%cs:",OVERRIDE), TNSx("das",NORM), -}, { -/* [3,0] */ TNS("xorb",RMw), TS("xor",RMw), TNS("xorb",MRw), TS("xor",MRw), -/* [3,4] */ TNS("xorb",IA), TS("xor",IA), TNSx("%ss:",OVERRIDE), TNSx("aaa",NORM), -/* [3,8] */ TNS("cmpb",RMw), TS("cmp",RMw), TNS("cmpb",MRw), TS("cmp",MRw), -/* [3,C] */ TNS("cmpb",IA), TS("cmp",IA), TNSx("%ds:",OVERRIDE), TNSx("aas",NORM), -}, { -/* [4,0] */ TSx("inc",R), TSx("inc",R), TSx("inc",R), TSx("inc",R), -/* [4,4] */ TSx("inc",R), TSx("inc",R), TSx("inc",R), TSx("inc",R), -/* [4,8] */ TSx("dec",R), TSx("dec",R), TSx("dec",R), TSx("dec",R), -/* [4,C] */ TSx("dec",R), TSx("dec",R), TSx("dec",R), TSx("dec",R), -}, { -/* [5,0] */ TSp("push",R), TSp("push",R), TSp("push",R), TSp("push",R), -/* [5,4] */ TSp("push",R), TSp("push",R), TSp("push",R), TSp("push",R), -/* [5,8] */ TSp("pop",R), TSp("pop",R), TSp("pop",R), TSp("pop",R), -/* [5,C] */ TSp("pop",R), TSp("pop",R), TSp("pop",R), TSp("pop",R), -}, { -/* [6,0] */ TSZx("pusha",IMPLMEM,28),TSZx("popa",IMPLMEM,28), TSx("bound",MR), TNS("arpl",RMw), -/* [6,4] */ TNS("%fs:",OVERRIDE), TNS("%gs:",OVERRIDE), TNS("data16",DM), TNS("addr16",AM), -/* [6,8] */ TSp("push",I), TS("imul",IMUL), TSp("push",Ib), TS("imul",IMUL), -/* [6,C] */ TNSZ("insb",IMPLMEM,1), TSZ("ins",IMPLMEM,4), TNSZ("outsb",IMPLMEM,1),TSZ("outs",IMPLMEM,4), -}, { -/* [7,0] */ TNSy("jo",BD), TNSy("jno",BD), TNSy("jb",BD), TNSy("jae",BD), -/* [7,4] */ TNSy("je",BD), TNSy("jne",BD), TNSy("jbe",BD), TNSy("ja",BD), -/* [7,8] */ TNSy("js",BD), TNSy("jns",BD), TNSy("jp",BD), TNSy("jnp",BD), -/* [7,C] */ TNSy("jl",BD), TNSy("jge",BD), TNSy("jle",BD), TNSy("jg",BD), -}, { -/* [8,0] */ IND(dis_op80), IND(dis_op81), INDx(dis_op82), IND(dis_op83), -/* [8,4] */ TNS("testb",RMw), TS("test",RMw), TNS("xchgb",RMw), TS("xchg",RMw), -/* [8,8] */ TNS("movb",RMw), TS("mov",RMw), TNS("movb",MRw), TS("mov",MRw), -/* [8,C] */ TNS("movw",SM), TS("lea",MR), TNS("movw",MS), TSp("pop",M), -}, { -/* [9,0] */ TNS("nop",NORM), TS("xchg",RA), TS("xchg",RA), TS("xchg",RA), -/* [9,4] */ TS("xchg",RA), TS("xchg",RA), TS("xchg",RA), TS("xchg",RA), -/* [9,8] */ TNS("cXtX",CBW), TNS("cXtX",CWD), TNSx("lcall",SO), TNS("fwait",NORM), -/* [9,C] */ TSZy("pushf",IMPLMEM,4),TSZy("popf",IMPLMEM,4), TNSx("sahf",NORM), TNSx("lahf",NORM), -}, { -/* [A,0] */ TNS("movb",OA), TS("mov",OA), TNS("movb",AO), TS("mov",AO), -/* [A,4] */ TNSZ("movsb",SD,1), TS("movs",SD), TNSZ("cmpsb",SD,1), TS("cmps",SD), -/* [A,8] */ TNS("testb",IA), TS("test",IA), TNS("stosb",AD), TS("stos",AD), -/* [A,C] */ TNS("lodsb",SA), TS("lods",SA), TNS("scasb",AD), TS("scas",AD), -}, { -/* [B,0] */ TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), -/* [B,4] */ TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), -/* [B,8] */ TS("mov",IR), TS("mov",IR), TS("mov",IR), TS("mov",IR), -/* [B,C] */ TS("mov",IR), TS("mov",IR), TS("mov",IR), TS("mov",IR), -}, { -/* [C,0] */ IND(dis_opC0), IND(dis_opC1), TNSyp("ret",RET), TNSyp("ret",NORM), -/* [C,4] */ TNSx("les",MR), TNSx("lds",MR), TNS("movb",IMw), TS("mov",IMw), -/* [C,8] */ TNSyp("enter",ENTER), TNSyp("leave",NORM), TNS("lret",RET), TNS("lret",NORM), -/* [C,C] */ TNS("int",INT3), TNS("int",INTx), TNSx("into",NORM), TNS("iret",NORM), -}, { -/* [D,0] */ IND(dis_opD0), IND(dis_opD1), IND(dis_opD2), IND(dis_opD3), -/* [D,4] */ TNSx("aam",U), TNSx("aad",U), TNSx("falc",NORM), TNSZ("xlat",IMPLMEM,1), - -/* 287 instructions. Note that although the indirect field */ -/* indicates opFP1n2 for further decoding, this is not necessarily */ -/* the case since the opFP arrays are not partitioned according to key1 */ -/* and key2. opFP1n2 is given only to indicate that we haven't */ -/* finished decoding the instruction. */ -/* [D,8] */ IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), -/* [D,C] */ IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), IND(&dis_opFP1n2[0][0]), -}, { -/* [E,0] */ TNSy("loopnz",BD), TNSy("loopz",BD), TNSy("loop",BD), TNSy("jcxz",BD), -/* [E,4] */ TNS("inb",P), TS("in",P), TNS("outb",P), TS("out",P), -/* [E,8] */ TNSyp("call",D), TNSy("jmp",D), TNSx("ljmp",SO), TNSy("jmp",BD), -/* [E,C] */ TNS("inb",V), TS("in",V), TNS("outb",V), TS("out",V), -}, { -/* [F,0] */ TNS("lock",LOCK), TNS("icebp", NORM), TNS("repnz",PREFIX), TNS("repz",PREFIX), -/* [F,4] */ TNS("hlt",NORM), TNS("cmc",NORM), IND(dis_opF6), IND(dis_opF7), -/* [F,8] */ TNS("clc",NORM), TNS("stc",NORM), TNS("cli",NORM), TNS("sti",NORM), -/* [F,C] */ TNS("cld",NORM), TNS("std",NORM), IND(dis_opFE), IND(dis_opFF), -} }; - -/* END CSTYLED */ - -/* - * common functions to decode and disassemble an x86 or amd64 instruction - */ - -/* - * These are the individual fields of a REX prefix. Note that a REX - * prefix with none of these set is still needed to: - * - use the MOVSXD (sign extend 32 to 64 bits) instruction - * - access the %sil, %dil, %bpl, %spl registers - */ -#define REX_W 0x08 /* 64 bit operand size when set */ -#define REX_R 0x04 /* high order bit extension of ModRM reg field */ -#define REX_X 0x02 /* high order bit extension of SIB index field */ -#define REX_B 0x01 /* extends ModRM r_m, SIB base, or opcode reg */ - -static uint_t opnd_size; /* SIZE16, SIZE32 or SIZE64 */ -static uint_t addr_size; /* SIZE16, SIZE32 or SIZE64 */ - -/* - * Even in 64 bit mode, usually only 4 byte immediate operands are supported. - */ -static int isize[] = {1, 2, 4, 4}; -static int isize64[] = {1, 2, 4, 8}; - -/* - * Just a bunch of useful macros. - */ -#define WBIT(x) (x & 0x1) /* to get w bit */ -#define REGNO(x) (x & 0x7) /* to get 3 bit register */ -#define VBIT(x) ((x)>>1 & 0x1) /* to get 'v' bit */ -#define OPSIZE(osize, wbit) ((wbit) ? isize[osize] : 1) -#define OPSIZE64(osize, wbit) ((wbit) ? isize64[osize] : 1) - -#define REG_ONLY 3 /* mode to indicate a register operand (not memory) */ - -#define BYTE_OPND 0 /* w-bit value indicating byte register */ -#define LONG_OPND 1 /* w-bit value indicating opnd_size register */ -#define MM_OPND 2 /* "value" used to indicate a mmx reg */ -#define XMM_OPND 3 /* "value" used to indicate a xmm reg */ -#define SEG_OPND 4 /* "value" used to indicate a segment reg */ -#define CONTROL_OPND 5 /* "value" used to indicate a control reg */ -#define DEBUG_OPND 6 /* "value" used to indicate a debug reg */ -#define TEST_OPND 7 /* "value" used to indicate a test reg */ -#define WORD_OPND 8 /* w-bit value indicating word size reg */ - -/* - * Get the next byte and separate the op code into the high and low nibbles. - */ -static int -dtrace_get_opcode(dis86_t *x, uint_t *high, uint_t *low) -{ - int byte; - - /* - * x86 instructions have a maximum length of 15 bytes. Bail out if - * we try to read more. - */ - if (x->d86_len >= 15) - return (x->d86_error = 1); - - if (x->d86_error) - return (1); - byte = x->d86_get_byte(x->d86_data); - if (byte < 0) - return (x->d86_error = 1); - x->d86_bytes[x->d86_len++] = byte; - *low = byte & 0xf; /* ----xxxx low 4 bits */ - *high = byte >> 4 & 0xf; /* xxxx---- bits 7 to 4 */ - return (0); -} - -/* - * Get and decode an SIB (scaled index base) byte - */ -static void -dtrace_get_SIB(dis86_t *x, uint_t *ss, uint_t *index, uint_t *base) -{ - int byte; - - if (x->d86_error) - return; - - byte = x->d86_get_byte(x->d86_data); - if (byte < 0) { - x->d86_error = 1; - return; - } - x->d86_bytes[x->d86_len++] = byte; - - *base = byte & 0x7; - *index = (byte >> 3) & 0x7; - *ss = (byte >> 6) & 0x3; -} - -/* - * Get the byte following the op code and separate it into the - * mode, register, and r/m fields. - */ -static void -dtrace_get_modrm(dis86_t *x, uint_t *mode, uint_t *reg, uint_t *r_m) -{ - if (x->d86_got_modrm == 0) { - if (x->d86_rmindex == -1) - x->d86_rmindex = x->d86_len; - dtrace_get_SIB(x, mode, reg, r_m); - x->d86_got_modrm = 1; - } -} - -/* - * Adjust register selection based on any REX prefix bits present. - */ -/*ARGSUSED*/ -static void -dtrace_rex_adjust(uint_t rex_prefix, uint_t mode, uint_t *reg, uint_t *r_m) -{ - if (reg != NULL && r_m == NULL) { - if (rex_prefix & REX_B) - *reg += 8; - } else { - if (reg != NULL && (REX_R & rex_prefix) != 0) - *reg += 8; - if (r_m != NULL && (REX_B & rex_prefix) != 0) - *r_m += 8; - } -} - -/* - * Get an immediate operand of the given size, with sign extension. - */ -static void -dtrace_imm_opnd(dis86_t *x, int wbit, int size, int opindex) -{ - int i; - int byte; - int valsize = 0; - - if (x->d86_numopnds < opindex + 1) - x->d86_numopnds = opindex + 1; - - switch (wbit) { - case BYTE_OPND: - valsize = 1; - break; - case LONG_OPND: - if (x->d86_opnd_size == SIZE16) - valsize = 2; - else if (x->d86_opnd_size == SIZE32) - valsize = 4; - else - valsize = 8; - break; - case MM_OPND: - case XMM_OPND: - case SEG_OPND: - case CONTROL_OPND: - case DEBUG_OPND: - case TEST_OPND: - valsize = size; - break; - case WORD_OPND: - valsize = 2; - break; - } - if (valsize < size) - valsize = size; - - if (x->d86_error) - return; - x->d86_opnd[opindex].d86_value = 0; - for (i = 0; i < size; ++i) { - byte = x->d86_get_byte(x->d86_data); - if (byte < 0) { - x->d86_error = 1; - return; - } - x->d86_bytes[x->d86_len++] = byte; - x->d86_opnd[opindex].d86_value |= (uint64_t)byte << (i * 8); - } - /* Do sign extension */ - if (x->d86_bytes[x->d86_len - 1] & 0x80) { - for (; i < valsize; i++) - x->d86_opnd[opindex].d86_value |= - (uint64_t)0xff << (i* 8); - } -#ifdef DIS_TEXT - x->d86_opnd[opindex].d86_mode = MODE_SIGNED; - x->d86_opnd[opindex].d86_value_size = valsize; - x->d86_imm_bytes += size; -#endif -} - -/* - * Get an ip relative operand of the given size, with sign extension. - */ -static void -dtrace_disp_opnd(dis86_t *x, int wbit, int size, int opindex) -{ - dtrace_imm_opnd(x, wbit, size, opindex); -#ifdef DIS_TEXT - x->d86_opnd[opindex].d86_mode = MODE_IPREL; -#endif -} - -/* - * Check to see if there is a segment override prefix pending. - * If so, print it in the current 'operand' location and set - * the override flag back to false. - */ -/*ARGSUSED*/ -static void -dtrace_check_override(dis86_t *x, int opindex) -{ -#ifdef DIS_TEXT - if (x->d86_seg_prefix) { - (void) strlcat(x->d86_opnd[opindex].d86_prefix, - x->d86_seg_prefix, PFIXLEN); - } -#endif - x->d86_seg_prefix = NULL; -} - - -/* - * Process a single instruction Register or Memory operand. - * - * mode = addressing mode from ModRM byte - * r_m = r_m (or reg if mode == 3) field from ModRM byte - * wbit = indicates which register (8bit, 16bit, ... MMX, etc.) set to use. - * o = index of operand that we are processing (0, 1 or 2) - * - * the value of reg or r_m must have already been adjusted for any REX prefix. - */ -/*ARGSUSED*/ -static void -dtrace_get_operand(dis86_t *x, uint_t mode, uint_t r_m, int wbit, int opindex) -{ - int have_SIB = 0; /* flag presence of scale-index-byte */ - uint_t ss; /* scale-factor from opcode */ - uint_t index; /* index register number */ - uint_t base; /* base register number */ - int dispsize; /* size of displacement in bytes */ -#ifdef DIS_TEXT - char *opnd = x->d86_opnd[opindex].d86_opnd; -#endif - - if (x->d86_numopnds < opindex + 1) - x->d86_numopnds = opindex + 1; - - if (x->d86_error) - return; - - /* - * first handle a simple register - */ - if (mode == REG_ONLY) { -#ifdef DIS_TEXT - switch (wbit) { - case MM_OPND: - (void) strlcat(opnd, dis_MMREG[r_m], OPLEN); - break; - case XMM_OPND: - (void) strlcat(opnd, dis_XMMREG[r_m], OPLEN); - break; - case SEG_OPND: - (void) strlcat(opnd, dis_SEGREG[r_m], OPLEN); - break; - case CONTROL_OPND: - (void) strlcat(opnd, dis_CONTROLREG[r_m], OPLEN); - break; - case DEBUG_OPND: - (void) strlcat(opnd, dis_DEBUGREG[r_m], OPLEN); - break; - case TEST_OPND: - (void) strlcat(opnd, dis_TESTREG[r_m], OPLEN); - break; - case BYTE_OPND: - if (x->d86_rex_prefix == 0) - (void) strlcat(opnd, dis_REG8[r_m], OPLEN); - else - (void) strlcat(opnd, dis_REG8_REX[r_m], OPLEN); - break; - case WORD_OPND: - (void) strlcat(opnd, dis_REG16[r_m], OPLEN); - break; - case LONG_OPND: - if (x->d86_opnd_size == SIZE16) - (void) strlcat(opnd, dis_REG16[r_m], OPLEN); - else if (x->d86_opnd_size == SIZE32) - (void) strlcat(opnd, dis_REG32[r_m], OPLEN); - else - (void) strlcat(opnd, dis_REG64[r_m], OPLEN); - break; - } -#endif /* DIS_TEXT */ - return; - } - - /* - * if symbolic representation, skip override prefix, if any - */ - dtrace_check_override(x, opindex); - - /* - * Handle 16 bit memory references first, since they decode - * the mode values more simply. - * mode 1 is r_m + 8 bit displacement - * mode 2 is r_m + 16 bit displacement - * mode 0 is just r_m, unless r_m is 6 which is 16 bit disp - */ - if (x->d86_addr_size == SIZE16) { - if ((mode == 0 && r_m == 6) || mode == 2) - dtrace_imm_opnd(x, WORD_OPND, 2, opindex); - else if (mode == 1) - dtrace_imm_opnd(x, BYTE_OPND, 1, opindex); -#ifdef DIS_TEXT - if (mode == 0 && r_m == 6) - x->d86_opnd[opindex].d86_mode = MODE_SIGNED; - else if (mode == 0) - x->d86_opnd[opindex].d86_mode = MODE_NONE; - else - x->d86_opnd[opindex].d86_mode = MODE_OFFSET; - (void) strlcat(opnd, dis_addr16[mode][r_m], OPLEN); -#endif - return; - } - - /* - * 32 and 64 bit addressing modes are more complex since they - * can involve an SIB (scaled index and base) byte to decode. - */ - if (r_m == ESP_REGNO || r_m == ESP_REGNO + 8) { - have_SIB = 1; - dtrace_get_SIB(x, &ss, &index, &base); - if (x->d86_error) - return; - if (base != 5 || mode != 0) - if (x->d86_rex_prefix & REX_B) - base += 8; - if (x->d86_rex_prefix & REX_X) - index += 8; - } else { - base = r_m; - } - - /* - * Compute the displacement size and get its bytes - */ - dispsize = 0; - - if (mode == 1) - dispsize = 1; - else if (mode == 2) - dispsize = 4; - else if ((r_m & 7) == EBP_REGNO || - (have_SIB && (base & 7) == EBP_REGNO)) - dispsize = 4; - - if (dispsize > 0) { - dtrace_imm_opnd(x, dispsize == 4 ? LONG_OPND : BYTE_OPND, - dispsize, opindex); - if (x->d86_error) - return; - } - -#ifdef DIS_TEXT - if (dispsize > 0) - x->d86_opnd[opindex].d86_mode = MODE_OFFSET; - - if (have_SIB == 0) { - if (x->d86_mode == SIZE32) { - if (mode == 0) - (void) strlcat(opnd, dis_addr32_mode0[r_m], - OPLEN); - else - (void) strlcat(opnd, dis_addr32_mode12[r_m], - OPLEN); - } else { - if (mode == 0) - (void) strlcat(opnd, dis_addr64_mode0[r_m], - OPLEN); - else - (void) strlcat(opnd, dis_addr64_mode12[r_m], - OPLEN); - } - } else { - uint_t need_paren = 0; - char **regs; - if (x->d86_mode == SIZE32) /* NOTE this is not addr_size! */ - regs = (char **)dis_REG32; - else - regs = (char **)dis_REG64; - - /* - * print the base (if any) - */ - if (base == EBP_REGNO && mode == 0) { - if (index != ESP_REGNO) { - (void) strlcat(opnd, "(", OPLEN); - need_paren = 1; - } - } else { - (void) strlcat(opnd, "(", OPLEN); - (void) strlcat(opnd, regs[base], OPLEN); - need_paren = 1; - } - - /* - * print the index (if any) - */ - if (index != ESP_REGNO) { - (void) strlcat(opnd, ",", OPLEN); - (void) strlcat(opnd, regs[index], OPLEN); - (void) strlcat(opnd, dis_scale_factor[ss], OPLEN); - } else - if (need_paren) - (void) strlcat(opnd, ")", OPLEN); - } -#endif -} - -/* - * Operand sequence for standard instruction involving one register - * and one register/memory operand. - * wbit indicates a byte(0) or opnd_size(1) operation - * vbit indicates direction (0 for "opcode r,r_m") or (1 for "opcode r_m, r") - */ -#define STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, vbit) { \ - dtrace_get_modrm(x, &mode, ®, &r_m); \ - dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ - dtrace_get_operand(x, mode, r_m, wbit, vbit); \ - dtrace_get_operand(x, REG_ONLY, reg, wbit, 1 - vbit); \ -} - -/* - * Similar to above, but allows for the two operands to be of different - * classes (ie. wbit). - * wbit is for the r_m operand - * w2 is for the reg operand - */ -#define MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, w2, vbit) { \ - dtrace_get_modrm(x, &mode, ®, &r_m); \ - dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ - dtrace_get_operand(x, mode, r_m, wbit, vbit); \ - dtrace_get_operand(x, REG_ONLY, reg, w2, 1 - vbit); \ -} - -/* - * Similar, but for 2 operands plus an immediate. - */ -#define THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, w2, immsize) { \ - dtrace_get_modrm(x, &mode, ®, &r_m); \ - dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ - dtrace_get_operand(x, mode, r_m, wbit, 1); \ - dtrace_get_operand(x, REG_ONLY, reg, w2, 2); \ - dtrace_imm_opnd(x, wbit, immsize, 0); \ -} - -/* - * Dissassemble a single x86 or amd64 instruction. - * - * Mode determines the default operating mode (SIZE16, SIZE32 or SIZE64) - * for interpreting instructions. - * - * returns non-zero for bad opcode - */ -int -dtrace_disx86(dis86_t *x, uint_t cpu_mode) -{ - const instable_t *dp = NULL; /* decode table being used */ -#ifdef DIS_TEXT - uint_t i; -#endif -#ifdef DIS_MEM - uint_t nomem = 0; -#define NOMEM (nomem = 1) -#else -#define NOMEM /* nothing */ -#endif - uint_t wbit = 0; /* opcode wbit, 0 is 8 bit, !0 for opnd_size */ - uint_t w2; /* wbit value for second operand */ - uint_t vbit; - uint_t mode = 0; /* mode value from ModRM byte */ - uint_t reg; /* reg value from ModRM byte */ - uint_t r_m; /* r_m value from ModRM byte */ - - uint_t opcode1; /* high nibble of 1st byte */ - uint_t opcode2; /* low nibble of 1st byte */ - uint_t opcode3; /* extra opcode bits usually from ModRM byte */ - uint_t opcode4; /* high nibble of 2nd byte */ - uint_t opcode5 = 0xff; /* low nibble of 2nd byte */ - uint_t opcode6; /* high nibble of 3rd byte */ - uint_t opcode7 = 0xff; /* low nibble of 3rd byte */ - uint_t opcode_bytes = 1; - - /* - * legacy prefixes come in 5 flavors, you should have only one of each - */ - uint_t opnd_size_prefix = 0; - uint_t addr_size_prefix = 0; - uint_t segment_prefix = 0; - uint_t lock_prefix = 0; - uint_t rep_prefix = 0; - uint_t rex_prefix = 0; /* amd64 register extension prefix */ - size_t off; - - x->d86_len = 0; - x->d86_rmindex = -1; - x->d86_error = 0; -#ifdef DIS_TEXT - x->d86_numopnds = 0; - x->d86_seg_prefix = NULL; - x->d86_mneu[0] = 0; - for (i = 0; i < 3; ++i) { - x->d86_opnd[i].d86_opnd[0] = 0; - x->d86_opnd[i].d86_prefix[0] = 0; - x->d86_opnd[i].d86_value_size = 0; - x->d86_opnd[i].d86_value = 0; - x->d86_opnd[i].d86_mode = MODE_NONE; - } -#endif - x->d86_error = 0; - x->d86_memsize = 0; - - if (cpu_mode == SIZE16) { - opnd_size = SIZE16; - addr_size = SIZE16; - } else if (cpu_mode == SIZE32) { - opnd_size = SIZE32; - addr_size = SIZE32; - } else { - opnd_size = SIZE32; - addr_size = SIZE64; - } - - /* - * Get one opcode byte and check for zero padding that follows - * jump tables. - */ - if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) - goto error; - - if (opcode1 == 0 && opcode2 == 0 && - x->d86_check_func != NULL && x->d86_check_func(x->d86_data)) { -#ifdef DIS_TEXT - (void) strncpy(x->d86_mneu, ".byte\t0", OPLEN); -#endif - goto done; - } - - /* - * Gather up legacy x86 prefix bytes. - */ - for (;;) { - uint_t *which_prefix = NULL; - - dp = &dis_distable[opcode1][opcode2]; - - switch (dp->it_adrmode) { - case PREFIX: - which_prefix = &rep_prefix; - break; - case LOCK: - which_prefix = &lock_prefix; - break; - case OVERRIDE: - which_prefix = &segment_prefix; -#ifdef DIS_TEXT - x->d86_seg_prefix = (char *)dp->it_name; -#endif - if (dp->it_invalid64 && cpu_mode == SIZE64) - goto error; - break; - case AM: - which_prefix = &addr_size_prefix; - break; - case DM: - which_prefix = &opnd_size_prefix; - break; - } - if (which_prefix == NULL) - break; - *which_prefix = (opcode1 << 4) | opcode2; - if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) - goto error; - } - - /* - * Handle amd64 mode PREFIX values. - * Some of the segment prefixes are no-ops. (only FS/GS actually work) - * We might have a REX prefix (opcodes 0x40-0x4f) - */ - if (cpu_mode == SIZE64) { - if (segment_prefix != 0x64 && segment_prefix != 0x65) - segment_prefix = 0; - - if (opcode1 == 0x4) { - rex_prefix = (opcode1 << 4) | opcode2; - if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) - goto error; - dp = &dis_distable[opcode1][opcode2]; - } - } - - /* - * Deal with selection of operand and address size now. - * Note that the REX.W bit being set causes opnd_size_prefix to be - * ignored. - */ - if (cpu_mode == SIZE64) { - if (rex_prefix & 0x08) - opnd_size = SIZE64; - else if (opnd_size_prefix) - opnd_size = SIZE16; - - if (addr_size_prefix) - addr_size = SIZE32; - } else if (cpu_mode == SIZE32) { - if (opnd_size_prefix) - opnd_size = SIZE16; - if (addr_size_prefix) - addr_size = SIZE16; - } else { - if (opnd_size_prefix) - opnd_size = SIZE32; - if (addr_size_prefix) - addr_size = SIZE32; - } - - /* - * The pause instruction - a repz'd nop. This doesn't fit - * with any of the other prefix goop added for SSE, so we'll - * special-case it here. - */ - if (rep_prefix == 0xf3 && opcode1 == 0x9 && opcode2 == 0x0) { - rep_prefix = 0; - dp = &dis_opPause; - } - - /* - * Some 386 instructions have 2 bytes of opcode before the mod_r/m - * byte so we may need to perform a table indirection. - */ - if (dp->it_indirect == dis_op0F[0]) { - if (dtrace_get_opcode(x, &opcode4, &opcode5) != 0) - goto error; - opcode_bytes = 2; - if (opcode4 == 0x7 && opcode5 >= 0x1 && opcode5 <= 0x3) { - uint_t subcode; - - if (dtrace_get_opcode(x, &opcode6, &opcode7) != 0) - goto error; - opcode_bytes = 3; - subcode = ((opcode6 & 0x3) << 1) | - ((opcode7 & 0x8) >> 3); - dp = &dis_op0F7123[opcode5][subcode]; - } else if ((opcode4 == 0xc) && (opcode5 >= 0x8)) { - dp = &dis_op0FC8[0]; - } else { - dp = &dis_op0F[opcode4][opcode5]; - } - } - - /* - * If still not at a TERM decode entry, then a ModRM byte - * exists and its fields further decode the instruction. - */ - x->d86_got_modrm = 0; - if (dp->it_indirect != TERM) { - dtrace_get_modrm(x, &mode, &opcode3, &r_m); - if (x->d86_error) - goto error; - reg = opcode3; - - /* - * decode 287 instructions (D8-DF) from opcodeN - */ - if (opcode1 == 0xD && opcode2 >= 0x8) { - if (opcode2 == 0xB && mode == 0x3 && opcode3 == 4) - dp = &dis_opFP5[r_m]; - else if (opcode2 == 0xA && mode == 0x3 && opcode3 < 4) - dp = &dis_opFP7[opcode3]; - else if (opcode2 == 0xB && mode == 0x3) - dp = &dis_opFP6[opcode3]; - else if (opcode2 == 0x9 && mode == 0x3 && opcode3 >= 4) - dp = &dis_opFP4[opcode3 - 4][r_m]; - else if (mode == 0x3) - dp = &dis_opFP3[opcode2 - 8][opcode3]; - else - dp = &dis_opFP1n2[opcode2 - 8][opcode3]; - } else { - dp = dp->it_indirect + opcode3; - } - } - - /* - * In amd64 bit mode, ARPL opcode is changed to MOVSXD - * (sign extend 32bit to 64 bit) - */ - if (cpu_mode == SIZE64 && opcode1 == 0x6 && opcode2 == 0x3) - dp = &dis_opMOVSLD; - - /* - * at this point we should have a correct (or invalid) opcode - */ - if ((cpu_mode == SIZE64 && dp->it_invalid64) || - (cpu_mode != SIZE64 && dp->it_invalid32)) - goto error; - if (dp->it_indirect != TERM) - goto error; - - /* - * deal with MMX/SSE opcodes which are changed by prefixes - */ - switch (dp->it_adrmode) { - case MMO: - case MMOIMPL: - case MMO3P: - case MMOM3: - case MMOMS: - case MMOPM: - case MMOPRM: - case MMOS: - case XMMO: - case XMMOM: - case XMMOMS: - case XMMOPM: - case XMMOS: - case XMMOMX: - case XMMOX3: - case XMMOXMM: - /* - * This is horrible. Some SIMD instructions take the - * form 0x0F 0x?? ..., which is easily decoded using the - * existing tables. Other SIMD instructions use various - * prefix bytes to overload existing instructions. For - * Example, addps is F0, 58, whereas addss is F3 (repz), - * F0, 58. Presumably someone got a raise for this. - * - * If we see one of the instructions which can be - * modified in this way (if we've got one of the SIMDO* - * address modes), we'll check to see if the last prefix - * was a repz. If it was, we strip the prefix from the - * mnemonic, and we indirect using the dis_opSIMDrepz - * table. - */ - - /* - * Calculate our offset in dis_op0F - */ - if ((uintptr_t)dp - (uintptr_t)dis_op0F > sizeof (dis_op0F)) - goto error; - - off = ((uintptr_t)dp - (uintptr_t)dis_op0F) / - sizeof (instable_t); - - /* - * Rewrite if this instruction used one of the magic prefixes. - */ - if (rep_prefix) { - if (rep_prefix == 0xf2) - dp = &dis_opSIMDrepnz[off]; - else - dp = &dis_opSIMDrepz[off]; - rep_prefix = 0; - } else if (opnd_size_prefix) { - dp = &dis_opSIMDdata16[off]; - opnd_size_prefix = 0; - if (opnd_size == SIZE16) - opnd_size = SIZE32; - } - break; - - case MMOSH: - /* - * As with the "normal" SIMD instructions, the MMX - * shuffle instructions are overloaded. These - * instructions, however, are special in that they use - * an extra byte, and thus an extra table. As of this - * writing, they only use the opnd_size prefix. - */ - - /* - * Calculate our offset in dis_op0F7123 - */ - if ((uintptr_t)dp - (uintptr_t)dis_op0F7123 > - sizeof (dis_op0F7123)) - goto error; - - if (opnd_size_prefix) { - off = ((uintptr_t)dp - (uintptr_t)dis_op0F7123) / - sizeof (instable_t); - dp = &dis_opSIMD7123[off]; - opnd_size_prefix = 0; - if (opnd_size == SIZE16) - opnd_size = SIZE32; - } - break; - } - - /* - * In 64 bit mode, some opcodes automatically use opnd_size == SIZE64. - */ - if (cpu_mode == SIZE64) - if (dp->it_always64 || (opnd_size == SIZE32 && dp->it_stackop)) - opnd_size = SIZE64; - -#ifdef DIS_TEXT - /* - * At this point most instructions can format the opcode mnemonic - * including the prefixes. - */ - if (lock_prefix) - (void) strlcat(x->d86_mneu, "lock ", OPLEN); - - if (rep_prefix == 0xf2) - (void) strlcat(x->d86_mneu, "repnz ", OPLEN); - else if (rep_prefix == 0xf3) - (void) strlcat(x->d86_mneu, "repz ", OPLEN); - - if (cpu_mode == SIZE64 && addr_size_prefix) - (void) strlcat(x->d86_mneu, "addr32 ", OPLEN); - - if (dp->it_adrmode != CBW && - dp->it_adrmode != CWD && - dp->it_adrmode != XMMSFNC) { - if (strcmp(dp->it_name, "INVALID") == 0) - goto error; - (void) strlcat(x->d86_mneu, dp->it_name, OPLEN); - if (dp->it_suffix) { - char *types[] = {"", "w", "l", "q"}; - if (opcode_bytes == 2 && opcode4 == 4) { - /* It's a cmovx.yy. Replace the suffix x */ - for (i = 5; i < OPLEN; i++) { - if (x->d86_mneu[i] == '.') - break; - } - x->d86_mneu[i - 1] = *types[opnd_size]; - } else { - (void) strlcat(x->d86_mneu, types[opnd_size], - OPLEN); - } - } - } -#endif - - /* - * Process operands based on the addressing modes. - */ - x->d86_mode = cpu_mode; - x->d86_rex_prefix = rex_prefix; - x->d86_opnd_size = opnd_size; - x->d86_addr_size = addr_size; - vbit = 0; /* initialize for mem/reg -> reg */ - switch (dp->it_adrmode) { - /* - * amd64 instruction to sign extend 32 bit reg/mem operands - * into 64 bit register values - */ - case MOVSXZ: -#ifdef DIS_TEXT - if (rex_prefix == 0) - (void) strncpy(x->d86_mneu, "movzld", OPLEN); -#endif - dtrace_get_modrm(x, &mode, ®, &r_m); - dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); - x->d86_opnd_size = SIZE64; - dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); - x->d86_opnd_size = opnd_size = SIZE32; - wbit = LONG_OPND; - dtrace_get_operand(x, mode, r_m, wbit, 0); - break; - - /* - * movsbl movsbw movsbq (0x0FBE) or movswl movswq (0x0FBF) - * movzbl movzbw movzbq (0x0FB6) or mobzwl movzwq (0x0FB7) - * wbit lives in 2nd byte, note that operands - * are different sized - */ - case MOVZ: - if (rex_prefix & REX_W) { - /* target register size = 64 bit */ - x->d86_mneu[5] = 'q'; - } - dtrace_get_modrm(x, &mode, ®, &r_m); - dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); - dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); - x->d86_opnd_size = opnd_size = SIZE16; - wbit = WBIT(opcode5); - dtrace_get_operand(x, mode, r_m, wbit, 0); - break; - - /* - * imul instruction, with either 8-bit or longer immediate - * opcode 0x6B for byte, sign-extended displacement, 0x69 for word(s) - */ - case IMUL: - wbit = LONG_OPND; - THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, - OPSIZE(opnd_size, opcode2 == 0x9)); - break; - - /* memory or register operand to register, with 'w' bit */ - case MRw: - wbit = WBIT(opcode2); - STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 0); - break; - - /* register to memory or register operand, with 'w' bit */ - /* arpl happens to fit here also because it is odd */ - case RMw: - if (opcode_bytes == 2) - wbit = WBIT(opcode5); - else - wbit = WBIT(opcode2); - STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); - break; - - /* xaddb instruction */ - case XADDB: - wbit = 0; - STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); - break; - - /* MMX register to memory or register operand */ - case MMS: - case MMOS: -#ifdef DIS_TEXT - wbit = strcmp(dp->it_name, "movd") ? MM_OPND : LONG_OPND; -#else - wbit = LONG_OPND; -#endif - MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 1); - break; - - /* MMX register to memory */ - case MMOMS: - dtrace_get_modrm(x, &mode, ®, &r_m); - if (mode == REG_ONLY) - goto error; - wbit = MM_OPND; - MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 1); - break; - - /* Double shift. Has immediate operand specifying the shift. */ - case DSHIFT: - wbit = LONG_OPND; - dtrace_get_modrm(x, &mode, ®, &r_m); - dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); - dtrace_get_operand(x, mode, r_m, wbit, 2); - dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); - dtrace_imm_opnd(x, wbit, 1, 0); - break; - - /* - * Double shift. With no immediate operand, specifies using %cl. - */ - case DSHIFTcl: - wbit = LONG_OPND; - STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); - break; - - /* immediate to memory or register operand */ - case IMlw: - wbit = WBIT(opcode2); - dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); - dtrace_get_operand(x, mode, r_m, wbit, 1); - /* - * Have long immediate for opcode 0x81, but not 0x80 nor 0x83 - */ - dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, opcode2 == 1), 0); - break; - - /* immediate to memory or register operand with the */ - /* 'w' bit present */ - case IMw: - wbit = WBIT(opcode2); - dtrace_get_modrm(x, &mode, ®, &r_m); - dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); - dtrace_get_operand(x, mode, r_m, wbit, 1); - dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, wbit), 0); - break; - - /* immediate to register with register in low 3 bits */ - /* of op code */ - case IR: - /* w-bit here (with regs) is bit 3 */ - wbit = opcode2 >>3 & 0x1; - reg = REGNO(opcode2); - dtrace_rex_adjust(rex_prefix, mode, ®, NULL); - mode = REG_ONLY; - r_m = reg; - dtrace_get_operand(x, mode, r_m, wbit, 1); - dtrace_imm_opnd(x, wbit, OPSIZE64(opnd_size, wbit), 0); - break; - - /* MMX immediate shift of register */ - case MMSH: - case MMOSH: - wbit = MM_OPND; - goto mm_shift; /* in next case */ - - /* SIMD immediate shift of register */ - case XMMSH: - wbit = XMM_OPND; -mm_shift: - reg = REGNO(opcode7); - dtrace_rex_adjust(rex_prefix, mode, ®, NULL); - dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); - dtrace_imm_opnd(x, wbit, 1, 0); - NOMEM; - break; - - /* accumulator to memory operand */ - case AO: - vbit = 1; - /*FALLTHROUGH*/ - - /* memory operand to accumulator */ - case OA: - wbit = WBIT(opcode2); - dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 1 - vbit); - dtrace_imm_opnd(x, wbit, OPSIZE64(addr_size, LONG_OPND), vbit); -#ifdef DIS_TEXT - x->d86_opnd[vbit].d86_mode = MODE_OFFSET; -#endif - break; - - - /* segment register to memory or register operand */ - case SM: - vbit = 1; - /*FALLTHROUGH*/ - - /* memory or register operand to segment register */ - case MS: - dtrace_get_modrm(x, &mode, ®, &r_m); - dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); - dtrace_get_operand(x, mode, r_m, LONG_OPND, vbit); - dtrace_get_operand(x, REG_ONLY, reg, SEG_OPND, 1 - vbit); - break; - - /* - * rotate or shift instructions, which may shift by 1 or - * consult the cl register, depending on the 'v' bit - */ - case Mv: - vbit = VBIT(opcode2); - wbit = WBIT(opcode2); - dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); - dtrace_get_operand(x, mode, r_m, wbit, 1); -#ifdef DIS_TEXT - if (vbit) { - (void) strlcat(x->d86_opnd[0].d86_opnd, "%cl", OPLEN); - } else { - x->d86_opnd[0].d86_mode = MODE_SIGNED; - x->d86_opnd[0].d86_value_size = 1; - x->d86_opnd[0].d86_value = 1; - } -#endif - break; - /* - * immediate rotate or shift instructions - */ - case MvI: - wbit = WBIT(opcode2); -normal_imm_mem: - dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); - dtrace_get_operand(x, mode, r_m, wbit, 1); - dtrace_imm_opnd(x, wbit, 1, 0); - break; - - /* bit test instructions */ - case MIb: - wbit = LONG_OPND; - goto normal_imm_mem; - - /* single memory or register operand with 'w' bit present */ - case Mw: - wbit = WBIT(opcode2); -just_mem: - dtrace_get_modrm(x, &mode, ®, &r_m); - dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); - dtrace_get_operand(x, mode, r_m, wbit, 0); - break; - - case SWAPGS: - if (cpu_mode == SIZE64 && mode == 3 && r_m == 0) { -#ifdef DIS_TEXT - (void) strncpy(x->d86_mneu, "swapgs", OPLEN); -#endif - NOMEM; - break; - } - /*FALLTHROUGH*/ - - /* prefetch instruction - memory operand, but no memory acess */ - case PREF: - NOMEM; - /*FALLTHROUGH*/ - - /* single memory or register operand */ - case M: - wbit = LONG_OPND; - goto just_mem; - - /* single memory or register byte operand */ - case Mb: - wbit = BYTE_OPND; - goto just_mem; - - case MO: - /* Similar to M, but only memory (no direct registers) */ - wbit = LONG_OPND; - dtrace_get_modrm(x, &mode, ®, &r_m); - if (mode == 3) - goto error; - dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); - dtrace_get_operand(x, mode, r_m, wbit, 0); - break; - - /* move special register to register or reverse if vbit */ - case SREG: - switch (opcode5) { - - case 2: - vbit = 1; - /*FALLTHROUGH*/ - case 0: - wbit = CONTROL_OPND; - break; - - case 3: - vbit = 1; - /*FALLTHROUGH*/ - case 1: - wbit = DEBUG_OPND; - break; - - case 6: - vbit = 1; - /*FALLTHROUGH*/ - case 4: - wbit = TEST_OPND; - break; - - } - dtrace_get_modrm(x, &mode, ®, &r_m); - dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); - dtrace_get_operand(x, REG_ONLY, reg, wbit, vbit); - dtrace_get_operand(x, REG_ONLY, r_m, LONG_OPND, 1 - vbit); - NOMEM; - break; - - /* - * single register operand with register in the low 3 - * bits of op code - */ - case R: - if (opcode_bytes == 2) - reg = REGNO(opcode5); - else - reg = REGNO(opcode2); - dtrace_rex_adjust(rex_prefix, mode, ®, NULL); - dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 0); - NOMEM; - break; - - /* - * register to accumulator with register in the low 3 - * bits of op code, xchg instructions - */ - case RA: - NOMEM; - reg = REGNO(opcode2); - dtrace_rex_adjust(rex_prefix, mode, ®, NULL); - dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 0); - dtrace_get_operand(x, REG_ONLY, EAX_REGNO, LONG_OPND, 1); - break; - - /* - * single segment register operand, with register in - * bits 3-4 of op code byte - */ - case SEG: - NOMEM; - reg = (x->d86_bytes[x->d86_len - 1] >> 3) & 0x3; - dtrace_get_operand(x, REG_ONLY, reg, SEG_OPND, 0); - break; - - /* - * single segment register operand, with register in - * bits 3-5 of op code - */ - case LSEG: - NOMEM; - /* long seg reg from opcode */ - reg = (x->d86_bytes[x->d86_len - 1] >> 3) & 0x7; - dtrace_get_operand(x, REG_ONLY, reg, SEG_OPND, 0); - break; - - /* memory or register operand to register */ - case MR: - wbit = LONG_OPND; - STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 0); - break; - - case RM: - wbit = LONG_OPND; - STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); - break; - - /* MMX/SIMD-Int memory or mm reg to mm reg */ - case MM: - case MMO: -#ifdef DIS_TEXT - wbit = strcmp(dp->it_name, "movd") ? MM_OPND : LONG_OPND; -#else - wbit = LONG_OPND; -#endif - MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 0); - break; - - case MMOIMPL: -#ifdef DIS_TEXT - wbit = strcmp(dp->it_name, "movd") ? MM_OPND : LONG_OPND; -#else - wbit = LONG_OPND; -#endif - dtrace_get_modrm(x, &mode, ®, &r_m); - if (mode != REG_ONLY) - goto error; - - dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); - dtrace_get_operand(x, mode, r_m, wbit, 0); - dtrace_get_operand(x, REG_ONLY, reg, MM_OPND, 1); - mode = 0; /* change for memory access size... */ - break; - - /* MMX/SIMD-Int and SIMD-FP predicated mm reg to r32 */ - case MMO3P: - wbit = MM_OPND; - goto xmm3p; - case XMM3P: - wbit = XMM_OPND; -xmm3p: - dtrace_get_modrm(x, &mode, ®, &r_m); - if (mode != REG_ONLY) - goto error; - - THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, 1); - NOMEM; - break; - - /* MMX/SIMD-Int predicated r32/mem to mm reg */ - case MMOPRM: - wbit = LONG_OPND; - w2 = MM_OPND; - goto xmmprm; - case XMMPRM: - wbit = LONG_OPND; - w2 = XMM_OPND; -xmmprm: - THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, w2, 1); - break; - - /* MMX/SIMD-Int predicated mm/mem to mm reg */ - case MMOPM: - wbit = w2 = MM_OPND; - goto xmmprm; - - /* MMX/SIMD-Int mm reg to r32 */ - case MMOM3: - NOMEM; - dtrace_get_modrm(x, &mode, ®, &r_m); - if (mode != REG_ONLY) - goto error; - wbit = MM_OPND; - MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, 0); - break; - - /* SIMD memory or xmm reg operand to xmm reg */ - case XMM: - case XMMO: - case XMMXIMPL: - wbit = XMM_OPND; - STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 0); - - if (dp->it_adrmode == XMMXIMPL && mode != REG_ONLY) - goto error; - -#ifdef DIS_TEXT - /* - * movlps and movhlps share opcodes. They differ in the - * addressing modes allowed for their operands. - * movhps and movlhps behave similarly. - */ - if (mode == REG_ONLY) { - if (strcmp(dp->it_name, "movlps") == 0) - (void) strncpy(x->d86_mneu, "movhlps", OPLEN); - else if (strcmp(dp->it_name, "movhps") == 0) - (void) strncpy(x->d86_mneu, "movlhps", OPLEN); - } -#endif - if (dp->it_adrmode == XMMXIMPL) - mode = 0; /* change for memory access size... */ - break; - - /* SIMD xmm reg to memory or xmm reg */ - case XMMS: - case XMMOS: - case XMMMS: - case XMMOMS: - dtrace_get_modrm(x, &mode, ®, &r_m); -#ifdef DIS_TEXT - if ((strcmp(dp->it_name, "movlps") == 0 || - strcmp(dp->it_name, "movhps") == 0 || - strcmp(dp->it_name, "movntps") == 0) && - mode == REG_ONLY) - goto error; -#endif - wbit = XMM_OPND; - MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 1); - break; - - /* SIMD memory to xmm reg */ - case XMMM: - case XMMOM: - wbit = XMM_OPND; - dtrace_get_modrm(x, &mode, ®, &r_m); -#ifdef DIS_TEXT - if (mode == REG_ONLY) { - if (strcmp(dp->it_name, "movhps") == 0) - (void) strncpy(x->d86_mneu, "movlhps", OPLEN); - else - goto error; - } -#endif - MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 0); - break; - - /* SIMD memory or r32 to xmm reg */ - case XMM3MX: - wbit = LONG_OPND; - MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 0); - break; - - case XMM3MXS: - wbit = LONG_OPND; - MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 1); - break; - - /* SIMD memory or mm reg to xmm reg */ - case XMMOMX: - /* SIMD mm to xmm */ - case XMMMX: - wbit = MM_OPND; - MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 0); - break; - - /* SIMD memory or xmm reg to mm reg */ - case XMMXMM: - case XMMOXMM: - case XMMXM: - wbit = XMM_OPND; - MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 0); - break; - - - /* SIMD memory or xmm reg to r32 */ - case XMMXM3: - wbit = XMM_OPND; - MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, 0); - break; - - /* SIMD xmm to r32 */ - case XMMX3: - case XMMOX3: - dtrace_get_modrm(x, &mode, ®, &r_m); - if (mode != REG_ONLY) - goto error; - dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); - dtrace_get_operand(x, mode, r_m, XMM_OPND, 0); - dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); - NOMEM; - break; - - /* SIMD predicated memory or xmm reg with/to xmm reg */ - case XMMP: - case XMMOPM: - wbit = XMM_OPND; - THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 1); - -#ifdef DIS_TEXT - /* - * cmpps and cmpss vary their instruction name based - * on the value of imm8. Other XMMP instructions, - * such as shufps, require explicit specification of - * the predicate. - */ - if (dp->it_name[0] == 'c' && - dp->it_name[1] == 'm' && - dp->it_name[2] == 'p' && - strlen(dp->it_name) == 5) { - uchar_t pred = x->d86_opnd[0].d86_value & 0xff; - - if (pred >= (sizeof (dis_PREDSUFFIX) / sizeof (char *))) - goto error; - - (void) strncpy(x->d86_mneu, "cmp", OPLEN); - (void) strlcat(x->d86_mneu, dis_PREDSUFFIX[pred], - OPLEN); - (void) strlcat(x->d86_mneu, - dp->it_name + strlen(dp->it_name) - 2, - OPLEN); - x->d86_opnd[0] = x->d86_opnd[1]; - x->d86_opnd[1] = x->d86_opnd[2]; - x->d86_numopnds = 2; - } -#endif - break; - - /* immediate operand to accumulator */ - case IA: - wbit = WBIT(opcode2); - dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 1); - dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, wbit), 0); - NOMEM; - break; - - /* memory or register operand to accumulator */ - case MA: - wbit = WBIT(opcode2); - dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); - dtrace_get_operand(x, mode, r_m, wbit, 0); - break; - - /* si register to di register used to reference memory */ - case SD: -#ifdef DIS_TEXT - dtrace_check_override(x, 0); - x->d86_numopnds = 2; - if (addr_size == SIZE64) { - (void) strlcat(x->d86_opnd[0].d86_opnd, "(%rsi)", - OPLEN); - (void) strlcat(x->d86_opnd[1].d86_opnd, "(%rdi)", - OPLEN); - } else if (addr_size == SIZE32) { - (void) strlcat(x->d86_opnd[0].d86_opnd, "(%esi)", - OPLEN); - (void) strlcat(x->d86_opnd[1].d86_opnd, "(%edi)", - OPLEN); - } else { - (void) strlcat(x->d86_opnd[0].d86_opnd, "(%si)", - OPLEN); - (void) strlcat(x->d86_opnd[1].d86_opnd, "(%di)", - OPLEN); - } -#endif - wbit = LONG_OPND; - break; - - /* accumulator to di register */ - case AD: - wbit = WBIT(opcode2); -#ifdef DIS_TEXT - dtrace_check_override(x, 1); - x->d86_numopnds = 2; - dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 0); - if (addr_size == SIZE64) - (void) strlcat(x->d86_opnd[1].d86_opnd, "(%rdi)", - OPLEN); - else if (addr_size == SIZE32) - (void) strlcat(x->d86_opnd[1].d86_opnd, "(%edi)", - OPLEN); - else - (void) strlcat(x->d86_opnd[1].d86_opnd, "(%di)", - OPLEN); -#endif - break; - - /* si register to accumulator */ - case SA: - wbit = WBIT(opcode2); -#ifdef DIS_TEXT - dtrace_check_override(x, 0); - x->d86_numopnds = 2; - if (addr_size == SIZE64) - (void) strlcat(x->d86_opnd[0].d86_opnd, "(%rsi)", - OPLEN); - else if (addr_size == SIZE32) - (void) strlcat(x->d86_opnd[0].d86_opnd, "(%esi)", - OPLEN); - else - (void) strlcat(x->d86_opnd[0].d86_opnd, "(%si)", - OPLEN); - dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 1); -#endif - break; - - /* - * single operand, a 16/32 bit displacement - */ - case D: - wbit = LONG_OPND; - dtrace_disp_opnd(x, wbit, OPSIZE(opnd_size, LONG_OPND), 0); - NOMEM; - break; - - /* jmp/call indirect to memory or register operand */ - case INM: -#ifdef DIS_TEXT - (void) strlcat(x->d86_opnd[0].d86_prefix, "*", OPLEN); -#endif - dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); - dtrace_get_operand(x, mode, r_m, LONG_OPND, 0); - wbit = LONG_OPND; - break; - - /* - * for long jumps and long calls -- a new code segment - * register and an offset in IP -- stored in object - * code in reverse order. Note - not valid in amd64 - */ - case SO: - dtrace_check_override(x, 1); - wbit = LONG_OPND; - dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, LONG_OPND), 1); -#ifdef DIS_TEXT - x->d86_opnd[1].d86_mode = MODE_SIGNED; -#endif - /* will now get segment operand */ - dtrace_imm_opnd(x, wbit, 2, 0); - break; - - /* - * jmp/call. single operand, 8 bit displacement. - * added to current EIP in 'compofff' - */ - case BD: - dtrace_disp_opnd(x, BYTE_OPND, 1, 0); - NOMEM; - break; - - /* single 32/16 bit immediate operand */ - case I: - wbit = LONG_OPND; - dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, LONG_OPND), 0); - break; - - /* single 8 bit immediate operand */ - case Ib: - wbit = LONG_OPND; - dtrace_imm_opnd(x, wbit, 1, 0); - break; - - case ENTER: - wbit = LONG_OPND; - dtrace_imm_opnd(x, wbit, 2, 0); - dtrace_imm_opnd(x, wbit, 1, 1); - switch (opnd_size) { - case SIZE64: - x->d86_memsize = (x->d86_opnd[1].d86_value + 1) * 8; - break; - case SIZE32: - x->d86_memsize = (x->d86_opnd[1].d86_value + 1) * 4; - break; - case SIZE16: - x->d86_memsize = (x->d86_opnd[1].d86_value + 1) * 2; - break; - } - - break; - - /* 16-bit immediate operand */ - case RET: - wbit = LONG_OPND; - dtrace_imm_opnd(x, wbit, 2, 0); - break; - - /* single 8 bit port operand */ - case P: - dtrace_check_override(x, 0); - dtrace_imm_opnd(x, BYTE_OPND, 1, 0); - NOMEM; - break; - - /* single operand, dx register (variable port instruction) */ - case V: - x->d86_numopnds = 1; - dtrace_check_override(x, 0); -#ifdef DIS_TEXT - (void) strlcat(x->d86_opnd[0].d86_opnd, "(%dx)", OPLEN); -#endif - NOMEM; - break; - - /* - * The int instruction, which has two forms: - * int 3 (breakpoint) or - * int n, where n is indicated in the subsequent - * byte (format Ib). The int 3 instruction (opcode 0xCC), - * where, although the 3 looks like an operand, - * it is implied by the opcode. It must be converted - * to the correct base and output. - */ - case INT3: -#ifdef DIS_TEXT - x->d86_numopnds = 1; - x->d86_opnd[0].d86_mode = MODE_SIGNED; - x->d86_opnd[0].d86_value_size = 1; - x->d86_opnd[0].d86_value = 3; -#endif - NOMEM; - break; - - /* single 8 bit immediate operand */ - case INTx: - dtrace_imm_opnd(x, BYTE_OPND, 1, 0); - NOMEM; - break; - - /* an unused byte must be discarded */ - case U: - if (x->d86_get_byte(x->d86_data) < 0) - goto error; - x->d86_len++; - NOMEM; - break; - - case CBW: -#ifdef DIS_TEXT - if (opnd_size == SIZE16) - (void) strlcat(x->d86_mneu, "cbtw", OPLEN); - else if (opnd_size == SIZE32) - (void) strlcat(x->d86_mneu, "cwtl", OPLEN); - else - (void) strlcat(x->d86_mneu, "cltq", OPLEN); -#endif - wbit = LONG_OPND; - NOMEM; - break; - - case CWD: -#ifdef DIS_TEXT - if (opnd_size == SIZE16) - (void) strlcat(x->d86_mneu, "cwtd", OPLEN); - else if (opnd_size == SIZE32) - (void) strlcat(x->d86_mneu, "cltd", OPLEN); - else - (void) strlcat(x->d86_mneu, "cqtd", OPLEN); -#endif - wbit = LONG_OPND; - NOMEM; - break; - - case XMMSFNC: - /* - * sfence is sfence if mode is REG_ONLY. If mode isn't - * REG_ONLY, mnemonic should be 'clflush'. - */ - dtrace_get_modrm(x, &mode, ®, &r_m); - - /* sfence doesn't take operands */ -#ifdef DIS_TEXT - if (mode == REG_ONLY) { - (void) strlcat(x->d86_mneu, "sfence", OPLEN); - } else { - (void) strlcat(x->d86_mneu, "clflush", OPLEN); - dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); - dtrace_get_operand(x, mode, r_m, BYTE_OPND, 0); - NOMEM; - } -#else - if (mode != REG_ONLY) { - dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); - dtrace_get_operand(x, mode, r_m, BYTE_OPND, 0); - NOMEM; - } -#endif - break; - - /* - * no disassembly, the mnemonic was all there was so go on - */ - case NORM: - if (dp->it_invalid32 && cpu_mode != SIZE64) - goto error; - NOMEM; - /*FALLTHROUGH*/ - case IMPLMEM: - break; - - case XMMFENCE: - /* - * Only the following exact byte sequences are allowed: - * - * 0f ae e8 lfence - * 0f ae f0 mfence - */ - if ((uint8_t)x->d86_bytes[x->d86_len - 1] != 0xe8 && - (uint8_t)x->d86_bytes[x->d86_len - 1] != 0xf0) - goto error; - - break; - - - /* float reg */ - case F: -#ifdef DIS_TEXT - x->d86_numopnds = 1; - (void) strlcat(x->d86_opnd[0].d86_opnd, "%st(X)", OPLEN); - x->d86_opnd[0].d86_opnd[4] = r_m + '0'; -#endif - NOMEM; - break; - - /* float reg to float reg, with ret bit present */ - case FF: - vbit = opcode2 >> 2 & 0x1; /* vbit = 1: st -> st(i) */ - /*FALLTHROUGH*/ - case FFC: /* case for vbit always = 0 */ -#ifdef DIS_TEXT - x->d86_numopnds = 2; - (void) strlcat(x->d86_opnd[1 - vbit].d86_opnd, "%st", OPLEN); - (void) strlcat(x->d86_opnd[vbit].d86_opnd, "%st(X)", OPLEN); - x->d86_opnd[vbit].d86_opnd[4] = r_m + '0'; -#endif - NOMEM; - break; - - /* an invalid op code */ - case AM: - case DM: - case OVERRIDE: - case PREFIX: - case UNKNOWN: - NOMEM; - default: - goto error; - } /* end switch */ - if (x->d86_error) - goto error; - -done: -#ifdef DIS_MEM - /* - * compute the size of any memory accessed by the instruction - */ - if (x->d86_memsize != 0) { - return (0); - } else if (dp->it_stackop) { - switch (opnd_size) { - case SIZE16: - x->d86_memsize = 2; - break; - case SIZE32: - x->d86_memsize = 4; - break; - case SIZE64: - x->d86_memsize = 8; - break; - } - } else if (nomem || mode == REG_ONLY) { - x->d86_memsize = 0; - - } else if (dp->it_size != 0) { - /* - * In 64 bit mode descriptor table entries - * go up to 10 bytes and popf/pushf are always 8 bytes - */ - if (x->d86_mode == SIZE64 && dp->it_size == 6) - x->d86_memsize = 10; - else if (x->d86_mode == SIZE64 && opcode1 == 0x9 && - (opcode2 == 0xc || opcode2 == 0xd)) - x->d86_memsize = 8; - else - x->d86_memsize = dp->it_size; - - } else if (wbit == 0) { - x->d86_memsize = 1; - - } else if (wbit == LONG_OPND) { - if (opnd_size == SIZE64) - x->d86_memsize = 8; - else if (opnd_size == SIZE32) - x->d86_memsize = 4; - else - x->d86_memsize = 2; - - } else if (wbit == SEG_OPND) { - x->d86_memsize = 4; - - } else { - x->d86_memsize = 8; - } -#endif - return (0); - -error: -#ifdef DIS_TEXT - (void) strlcat(x->d86_mneu, "undef", OPLEN); -#endif - return (1); -} - -#ifdef DIS_TEXT - -/* - * Some instructions should have immediate operands printed - * as unsigned integers. We compare against this table. - */ -static char *unsigned_ops[] = { - "or", "and", "xor", "test", "in", "out", "lcall", "ljmp", - "rcr", "rcl", "ror", "rol", "shl", "shr", "sal", "psr", "psl", - 0 -}; - -static int -isunsigned_op(char *opcode) -{ - char *where; - int i; - int is_unsigned = 0; - - /* - * Work back to start of last mnemonic, since we may have - * prefixes on some opcodes. - */ - where = opcode + strlen(opcode) - 1; - while (where > opcode && *where != ' ') - --where; - if (*where == ' ') - ++where; - - for (i = 0; unsigned_ops[i]; ++i) { - if (strncmp(where, unsigned_ops[i], - strlen(unsigned_ops[i]))) - continue; - is_unsigned = 1; - break; - } - return (is_unsigned); -} - -/* ARGSUSED */ -void -dtrace_disx86_str(dis86_t *dis, uint_t mode, uintptr_t pc, char *buf, - size_t buflen) -{ - int i; - - dis->d86_sprintf_func(buf, buflen, "%-6s ", dis->d86_mneu); - - /* - * For PC-relative jumps, the pc is really the next pc after executing - * this instruction, so increment it appropriately. - */ - pc += dis->d86_len; - - for (i = 0; i < dis->d86_numopnds; i++) { - d86opnd_t *op = &dis->d86_opnd[i]; - int64_t sv; - uint64_t mask; - - if (i != 0) - (void) strlcat(buf, ",", buflen); - - (void) strlcat(buf, op->d86_prefix, buflen); - - sv = op->d86_value; - - switch (op->d86_mode) { - - case MODE_NONE: - - (void) strlcat(buf, op->d86_opnd, buflen); - break; - - case MODE_SIGNED: - case MODE_IMPLIED: - case MODE_OFFSET: - - if (dis->d86_seg_prefix) - (void) strlcat(buf, dis->d86_seg_prefix, - buflen); - - switch (op->d86_value_size) { - case 1: - sv = (int8_t)sv; - mask = 0xff; - break; - case 2: - sv = (int16_t)sv; - mask = 0xffff; - break; - case 4: - sv = (int32_t)sv; - mask = 0xffffffff; - break; - case 8: - mask = 0xffffffffffffffffULL; - break; - } - - if (op->d86_mode == MODE_SIGNED || - op->d86_mode == MODE_IMPLIED) - (void) strlcat(buf, "$", buflen); - - if (sv < 0 && sv > -0xffff && - !isunsigned_op(dis->d86_mneu)) { - dis->d86_sprintf_func(buf + strlen(buf), - buflen - strlen(buf), - (dis->d86_flags & DIS_OP_OCTAL) ? - "-0%llo" : "-0x%llx", -sv & mask); - } else { - dis->d86_sprintf_func(buf + strlen(buf), - buflen - strlen(buf), - (dis->d86_flags & DIS_OP_OCTAL) ? - "0%llo" : "0x%llx", sv & mask); - } - (void) strlcat(buf, op->d86_opnd, buflen); - break; - - case MODE_IPREL: - - switch (op->d86_value_size) { - case 1: - sv = (int8_t)sv; - break; - case 2: - sv = (int16_t)sv; - break; - case 4: - sv = (int32_t)sv; - break; - } - - if (sv < 0) - dis->d86_sprintf_func(buf + strlen(buf), - buflen - strlen(buf), - (dis->d86_flags & DIS_OP_OCTAL) ? - "-0%llo" : "-0x%llx", -sv - dis->d86_len); - else - dis->d86_sprintf_func(buf + strlen(buf), - buflen - strlen(buf), - (dis->d86_flags & DIS_OP_OCTAL) ? - "+0%llo" : "+0x%llx", sv + dis->d86_len); - - (void) strlcat(buf, "\t<", buflen); - - if (dis->d86_sym_lookup == NULL || - dis->d86_sym_lookup(dis->d86_data, pc + sv, - buf + strlen(buf), buflen - strlen(buf)) != 0) - dis->d86_sprintf_func(buf + strlen(buf), - buflen - strlen(buf), - (dis->d86_flags & DIS_OP_OCTAL) ? - "0%llo" : "0x%llx", pc + sv); - - (void) strlcat(buf, ">", buflen); - - break; - } - } -} - -#endif /* DIS_TEXT */ Index: src/external/cddl/osnet/dev/dtrace/i386/dis_tables.h =================================================================== RCS file: src/external/cddl/osnet/dev/dtrace/i386/dis_tables.h diff -N src/external/cddl/osnet/dev/dtrace/i386/dis_tables.h --- src/external/cddl/osnet/dev/dtrace/i386/dis_tables.h 21 Feb 2010 01:46:33 -0000 1.2 +++ /dev/null 1 Jan 1970 00:00:00 -0000 @@ -1,114 +0,0 @@ -/* $NetBSD: dis_tables.h,v 1.2 2010/02/21 01:46:33 darran Exp $ */ - -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - * - * $FreeBSD: src/sys/cddl/dev/dtrace/i386/dis_tables.h,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $ - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* Copyright (c) 1988 AT&T */ -/* All Rights Reserved */ - - -#ifndef _DIS_TABLES_H -#define _DIS_TABLES_H - -#if defined(sun) -#pragma ident "@(#)dis_tables.h 1.7 06/03/02 SMI" -#endif - -/* - * Constants and prototypes for the IA32 disassembler backend. See dis_tables.c - * for usage information and documentation. - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include - -/* - * values for cpu mode - */ -#define SIZE16 1 -#define SIZE32 2 -#define SIZE64 3 - -#define OPLEN 256 -#define PFIXLEN 8 -#define NCPS 12 /* number of chars per symbol */ - -/* - * data structures that must be provided to dtrace_dis86() - */ -typedef struct d86opnd { - char d86_opnd[OPLEN]; /* symbolic rep of operand */ - char d86_prefix[PFIXLEN]; /* any prefix string or "" */ - uint_t d86_mode; /* mode for immediate */ - uint_t d86_value_size; /* size in bytes of d86_value */ - uint64_t d86_value; /* immediate value of opnd */ -} d86opnd_t; - -typedef struct dis86 { - uint_t d86_mode; - uint_t d86_error; - uint_t d86_len; /* instruction length */ - int d86_rmindex; /* index of modrm byte or -1 */ - uint_t d86_memsize; /* size of memory referenced */ - char d86_bytes[16]; /* bytes of instruction */ - char d86_mneu[OPLEN]; - uint_t d86_numopnds; - uint_t d86_rex_prefix; /* value of REX prefix if !0 */ - char *d86_seg_prefix; /* segment prefix, if any */ - uint_t d86_opnd_size; - uint_t d86_addr_size; - uint_t d86_got_modrm; - struct d86opnd d86_opnd[3]; /* up to 3 operands */ - int (*d86_check_func)(void *); - int (*d86_get_byte)(void *); -#ifdef DIS_TEXT - int (*d86_sym_lookup)(void *, uint64_t, char *, size_t); - int (*d86_sprintf_func)(char *, size_t, const char *, ...); - int d86_flags; - uint_t d86_imm_bytes; -#endif - void *d86_data; -} dis86_t; - -extern int dtrace_disx86(dis86_t *x, uint_t cpu_mode); - -#define DIS_OP_OCTAL 0x1 /* Print all numbers in octal */ - -#ifdef DIS_TEXT -extern void dtrace_disx86_str(dis86_t *x, uint_t cpu_mode, uintptr_t pc, - char *buf, size_t len); -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* _DIS_TABLES_H */ Index: src/external/cddl/osnet/dev/dtrace/i386/dtrace_asm.S =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/dtrace/i386/dtrace_asm.S,v retrieving revision 1.6 diff -u -p -r1.6 dtrace_asm.S --- src/external/cddl/osnet/dev/dtrace/i386/dtrace_asm.S 27 Feb 2017 06:47:00 -0000 1.6 +++ src/external/cddl/osnet/dev/dtrace/i386/dtrace_asm.S 12 Apr 2017 16:19:26 -0000 @@ -21,7 +21,7 @@ * * CDDL HEADER END * - * $FreeBSD: src/sys/cddl/dev/dtrace/i386/dtrace_asm.S,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $ + * $FreeBSD: head/sys/cddl/dev/dtrace/i386/dtrace_asm.S 298171 2016-04-17 23:08:47Z markj $ */ /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. @@ -146,11 +146,11 @@ invop_leave: movl 8(%esp), %eax /* load calling EIP */ incl %eax /* increment over LOCK prefix */ movl %eax, -8(%ebx) /* store calling EIP */ - movl %ebx, -4(%esp) /* temporarily store new %esp */ + subl $8, %ebx /* adjust for three pushes, one pop */ + movl %ebx, 8(%esp) /* temporarily store new %esp */ popl %ebx /* pop off temp */ popl %eax /* pop off temp */ - movl -12(%esp), %esp /* set stack pointer */ - subl $8, %esp /* adjust for three pushes, one pop */ + movl (%esp), %esp /* set stack pointer */ iret /* return from interrupt */ invop_nop: /* @@ -193,19 +193,7 @@ uint32_t dtrace_cas32(uint32_t *target, */ ENTRY(dtrace_cas32) - movl 4(%esp), %edx - movl 8(%esp), %eax - movl 12(%esp), %ecx - lock - cmpxchgl %ecx, (%edx) - ret - END(dtrace_cas32) - -/* -uint32_t dtrace_casptr(uint32_t *target, uint32_t cmp, uint32_t new) -*/ - - ENTRY(dtrace_casptr) + ALTENTRY(dtrace_casptr) movl 4(%esp), %edx movl 8(%esp), %eax movl 12(%esp), %ecx @@ -213,7 +201,7 @@ uint32_t dtrace_casptr(uint32_t *target, cmpxchgl %ecx, (%edx) ret END(dtrace_casptr) - + END(dtrace_cas32) /* uintptr_t dtrace_caller(int aframes) Index: src/external/cddl/osnet/dev/dtrace/i386/dtrace_isa.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/dtrace/i386/dtrace_isa.c,v retrieving revision 1.5 diff -u -p -r1.5 dtrace_isa.c --- src/external/cddl/osnet/dev/dtrace/i386/dtrace_isa.c 27 Feb 2017 06:47:00 -0000 1.5 +++ src/external/cddl/osnet/dev/dtrace/i386/dtrace_isa.c 8 May 2017 08:27:51 -0000 @@ -21,7 +21,7 @@ * * CDDL HEADER END * - * $FreeBSD: src/sys/cddl/dev/dtrace/i386/dtrace_isa.c,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $ + * $FreeBSD: head/sys/cddl/dev/dtrace/i386/dtrace_isa.c 298171 2016-04-17 23:08:47Z markj $ */ /* * Copyright 2005 Sun Microsystems, Inc. All rights reserved. @@ -36,6 +36,8 @@ #include #include +#include "regset.h" + uintptr_t kernelbase = (uintptr_t)KERNBASE; #define INKERNEL(va) \ @@ -54,6 +56,8 @@ uint16_t dtrace_fuword16_nocheck(void *) uint32_t dtrace_fuword32_nocheck(void *); uint64_t dtrace_fuword64_nocheck(void *); +int dtrace_ustackdepth_max = 2048; + void dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes, uint32_t *intrpc) @@ -112,11 +116,13 @@ dtrace_getustack_common(uint64_t *pcstac uintptr_t oldcontext = lwp->lwp_oldcontext; /* XXX signal stack. */ size_t s1, s2; #endif + uintptr_t oldsp; volatile uint16_t *flags = (volatile uint16_t *)&cpu_core[cpu_number()].cpuc_dtrace_flags; int ret = 0; ASSERT(pcstack == NULL || pcstack_limit > 0); + ASSERT(dtrace_ustackdepth_max > 0); #ifdef notyet /* XXX signal stack. */ if (p->p_model == DATAMODEL_NATIVE) { @@ -129,7 +135,16 @@ dtrace_getustack_common(uint64_t *pcstac #endif while (pc != 0) { - ret++; + /* + * We limit the number of times we can go around this + * loop to account for a circular stack. + */ + if (ret++ >= dtrace_ustackdepth_max) { + *flags |= CPU_DTRACE_BADSTACK; + cpu_core[cpu_number()].cpuc_dtrace_illval = sp; + break; + } + if (pcstack != NULL) { *pcstack++ = (uint64_t)pc; pcstack_limit--; @@ -140,6 +155,8 @@ dtrace_getustack_common(uint64_t *pcstac if (sp == 0) break; + oldsp = sp; + #ifdef notyet /* XXX signal stack. */ if (oldcontext == sp + s1 || oldcontext == sp + s2) { if (p->p_model == DATAMODEL_NATIVE) { @@ -178,6 +195,12 @@ dtrace_getustack_common(uint64_t *pcstac sp = dtrace_fuword32((void *)sp); #endif /* ! notyet */ + if (sp == oldsp) { + *flags |= CPU_DTRACE_BADSTACK; + cpu_core[cpu_number()].cpuc_dtrace_illval = sp; + break; + } + /* * This is totally bogus: if we faulted, we're going to clear * the fault and break. This is to deal with the apparently @@ -242,7 +265,7 @@ dtrace_getupcstack(uint64_t *pcstack, in pc = dtrace_fuword32((void *) sp); } - n = dtrace_getustack_common(pcstack, pcstack_limit, pc, fp); + n = dtrace_getustack_common(pcstack, pcstack_limit, pc, sp); ASSERT(n >= 0); ASSERT(n <= pcstack_limit); @@ -429,6 +452,7 @@ dtrace_getarg(int arg, int aframes) stack = (uintptr_t *)&frame->tf_esp + 1; goto load; } + } /* @@ -480,112 +504,102 @@ dtrace_getstackdepth(int aframes) return depth - aframes; } -#ifdef notyet ulong_t -dtrace_getreg(struct regs *rp, uint_t reg) +dtrace_getreg(struct trapframe *rp, uint_t reg) { -#if defined(__amd64) - int regmap[] = { - REG_GS, /* GS */ - REG_FS, /* FS */ - REG_ES, /* ES */ - REG_DS, /* DS */ - REG_RDI, /* EDI */ - REG_RSI, /* ESI */ - REG_RBP, /* EBP */ - REG_RSP, /* ESP */ - REG_RBX, /* EBX */ - REG_RDX, /* EDX */ - REG_RCX, /* ECX */ - REG_RAX, /* EAX */ - REG_TRAPNO, /* TRAPNO */ - REG_ERR, /* ERR */ - REG_RIP, /* EIP */ - REG_CS, /* CS */ - REG_RFL, /* EFL */ - REG_RSP, /* UESP */ - REG_SS /* SS */ + struct pcb *pcb; + int regmap[] = { /* Order is dependent on reg.d */ + REG_GS, /* 0 GS */ + REG_FS, /* 1 FS */ + REG_ES, /* 2 ES */ + REG_DS, /* 3 DS */ + REG_RDI, /* 4 EDI */ + REG_RSI, /* 5 ESI */ + REG_RBP, /* 6 EBP, REG_FP */ + REG_RSP, /* 7 ESP */ + REG_RBX, /* 8 EBX */ + REG_RDX, /* 9 EDX, REG_R1 */ + REG_RCX, /* 10 ECX */ + REG_RAX, /* 11 EAX, REG_R0 */ + REG_TRAPNO, /* 12 TRAPNO */ + REG_ERR, /* 13 ERR */ + REG_RIP, /* 14 EIP, REG_PC */ + REG_CS, /* 15 CS */ + REG_RFL, /* 16 EFL, REG_PS */ + REG_RSP, /* 17 UESP, REG_SP */ + REG_SS /* 18 SS */ }; - if (reg <= SS) { - if (reg >= sizeof (regmap) / sizeof (int)) { - DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); - return (0); - } + if (reg > SS) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return (0); + } - reg = regmap[reg]; - } else { - reg -= SS + 1; + if (reg >= sizeof (regmap) / sizeof (int)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return (0); } - switch (reg) { + reg = regmap[reg]; + + switch(reg) { + case REG_GS: +#ifdef __FreeBSD__ + if ((pcb = curthread->td_pcb) == NULL) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return (0); + } + return (pcb->pcb_gs); +#endif +#ifdef __NetBSD__ + return (rp->tf_gs); +#endif + case REG_FS: + return (rp->tf_fs); + case REG_ES: + return (rp->tf_es); + case REG_DS: + return (rp->tf_ds); case REG_RDI: - return (rp->r_rdi); + return (rp->tf_edi); case REG_RSI: - return (rp->r_rsi); - case REG_RDX: - return (rp->r_rdx); + return (rp->tf_esi); + case REG_RBP: + return (rp->tf_ebp); + case REG_RSP: +#ifdef __FreeBSD__ + return (rp->tf_isp); +#endif +#ifdef __NetBSD__ + return (rp->tf_esp); +#endif + case REG_RBX: + return (rp->tf_ebx); case REG_RCX: - return (rp->r_rcx); - case REG_R8: - return (rp->r_r8); - case REG_R9: - return (rp->r_r9); + return (rp->tf_ecx); case REG_RAX: - return (rp->r_rax); - case REG_RBX: - return (rp->r_rbx); - case REG_RBP: - return (rp->r_rbp); - case REG_R10: - return (rp->r_r10); - case REG_R11: - return (rp->r_r11); - case REG_R12: - return (rp->r_r12); - case REG_R13: - return (rp->r_r13); - case REG_R14: - return (rp->r_r14); - case REG_R15: - return (rp->r_r15); - case REG_DS: - return (rp->r_ds); - case REG_ES: - return (rp->r_es); - case REG_FS: - return (rp->r_fs); - case REG_GS: - return (rp->r_gs); + return (rp->tf_eax); case REG_TRAPNO: - return (rp->r_trapno); + return (rp->tf_trapno); case REG_ERR: - return (rp->r_err); + return (rp->tf_err); case REG_RIP: - return (rp->r_rip); + return (rp->tf_eip); case REG_CS: - return (rp->r_cs); - case REG_SS: - return (rp->r_ss); + return (rp->tf_cs); case REG_RFL: - return (rp->r_rfl); + return (rp->tf_eflags); +#if 0 case REG_RSP: - return (rp->r_rsp); + return (rp->tf_esp); +#endif + case REG_SS: + return (rp->tf_ss); default: DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); return (0); } - -#else - if (reg > SS) { - DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); - return (0); - } - - return ((&rp->r_gs)[reg]); -#endif } -#endif static int dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size) Index: src/external/cddl/osnet/dev/dtrace/i386/dtrace_subr.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/dtrace/i386/dtrace_subr.c,v retrieving revision 1.8 diff -u -p -r1.8 dtrace_subr.c --- src/external/cddl/osnet/dev/dtrace/i386/dtrace_subr.c 27 Feb 2017 06:47:00 -0000 1.8 +++ src/external/cddl/osnet/dev/dtrace/i386/dtrace_subr.c 19 Apr 2017 17:15:40 -0000 @@ -21,7 +21,7 @@ * * CDDL HEADER END * - * $FreeBSD: src/sys/cddl/dev/dtrace/i386/dtrace_subr.c,v 1.3.2.1 2009/08/03 08:13:06 kensmith Exp $ + * $FreeBSD: head/sys/cddl/dev/dtrace/i386/dtrace_subr.c 313850 2017-02-17 03:27:20Z markj $ * */ /* @@ -29,6 +29,10 @@ * Use is subject to license terms. */ +/* + * Copyright (c) 2011, Joyent, Inc. All rights reserved. + */ + #include #include #include @@ -38,7 +42,6 @@ #include #include #include -//#include #include #include #include @@ -51,8 +54,8 @@ #include extern uintptr_t kernelbase; -extern uintptr_t dtrace_in_probe_addr; -extern int dtrace_in_probe; + +extern void dtrace_getnanotime(struct timespec *tsp); int dtrace_invop(uintptr_t, struct trapframe *, uintptr_t); @@ -162,122 +165,6 @@ dtrace_sync(void) } #ifdef notyet -int (*dtrace_fasttrap_probe_ptr)(struct regs *); -int (*dtrace_pid_probe_ptr)(struct regs *); -int (*dtrace_return_probe_ptr)(struct regs *); - -void -dtrace_user_probe(struct regs *rp, caddr_t addr, processorid_t cpuid) -{ - krwlock_t *rwp; - proc_t *p = curproc; - extern void trap(struct regs *, caddr_t, processorid_t); - - if (USERMODE(rp->r_cs) || (rp->r_ps & PS_VM)) { - if (curthread->t_cred != p->p_cred) { - cred_t *oldcred = curthread->t_cred; - /* - * DTrace accesses t_cred in probe context. t_cred - * must always be either NULL, or point to a valid, - * allocated cred structure. - */ - curthread->t_cred = crgetcred(); - crfree(oldcred); - } - } - - if (rp->r_trapno == T_DTRACE_RET) { - uint8_t step = curthread->t_dtrace_step; - uint8_t ret = curthread->t_dtrace_ret; - uintptr_t npc = curthread->t_dtrace_npc; - - if (curthread->t_dtrace_ast) { - aston(curthread); - curthread->t_sig_check = 1; - } - - /* - * Clear all user tracing flags. - */ - curthread->t_dtrace_ft = 0; - - /* - * If we weren't expecting to take a return probe trap, kill - * the process as though it had just executed an unassigned - * trap instruction. - */ - if (step == 0) { - tsignal(curthread, SIGILL); - return; - } - - /* - * If we hit this trap unrelated to a return probe, we're - * just here to reset the AST flag since we deferred a signal - * until after we logically single-stepped the instruction we - * copied out. - */ - if (ret == 0) { - rp->r_pc = npc; - return; - } - - /* - * We need to wait until after we've called the - * dtrace_return_probe_ptr function pointer to set %pc. - */ - rwp = &CPU->cpu_ft_lock; - rw_enter(rwp, RW_READER); - if (dtrace_return_probe_ptr != NULL) - (void) (*dtrace_return_probe_ptr)(rp); - rw_exit(rwp); - rp->r_pc = npc; - - } else if (rp->r_trapno == T_DTRACE_PROBE) { - rwp = &CPU->cpu_ft_lock; - rw_enter(rwp, RW_READER); - if (dtrace_fasttrap_probe_ptr != NULL) - (void) (*dtrace_fasttrap_probe_ptr)(rp); - rw_exit(rwp); - - } else if (rp->r_trapno == T_BPTFLT) { - uint8_t instr; - rwp = &CPU->cpu_ft_lock; - - /* - * The DTrace fasttrap provider uses the breakpoint trap - * (int 3). We let DTrace take the first crack at handling - * this trap; if it's not a probe that DTrace knowns about, - * we call into the trap() routine to handle it like a - * breakpoint placed by a conventional debugger. - */ - rw_enter(rwp, RW_READER); - if (dtrace_pid_probe_ptr != NULL && - (*dtrace_pid_probe_ptr)(rp) == 0) { - rw_exit(rwp); - return; - } - rw_exit(rwp); - - /* - * If the instruction that caused the breakpoint trap doesn't - * look like an int 3 anymore, it may be that this tracepoint - * was removed just after the user thread executed it. In - * that case, return to user land to retry the instuction. - */ - if (fuword8((void *)(rp->r_pc - 1), &instr) == 0 && - instr != FASTTRAP_INSTR) { - rp->r_pc--; - return; - } - - trap(rp, addr, cpuid); - - } else { - trap(rp, addr, cpuid); - } -} - void dtrace_safe_synchronous_signal(void) { @@ -323,14 +210,15 @@ dtrace_safe_defer_signal(void) } /* - * If we've executed the original instruction, but haven't performed - * the jmp back to t->t_dtrace_npc or the clean up of any registers - * used to emulate %rip-relative instructions in 64-bit mode, do that - * here and take the signal right away. We detect this condition by - * seeing if the program counter is the range [scrpc + isz, astpc). + * If we have executed the original instruction, but we have performed + * neither the jmp back to t->t_dtrace_npc nor the clean up of any + * registers used to emulate %rip-relative instructions in 64-bit mode, + * we'll save ourselves some effort by doing that here and taking the + * signal right away. We detect this condition by seeing if the program + * counter is the range [scrpc + isz, astpc). */ - if (t->t_dtrace_astpc - rp->r_pc < - t->t_dtrace_astpc - t->t_dtrace_scrpc - isz) { + if (rp->r_pc >= t->t_dtrace_scrpc + isz && + rp->r_pc < t->t_dtrace_astpc) { #ifdef __amd64 /* * If there is a scratch register and we're on the @@ -376,10 +264,8 @@ dtrace_safe_defer_signal(void) } #endif -#if 0 static int64_t tgt_cpu_tsc; static int64_t hst_cpu_tsc; -#endif static int64_t tsc_skew[MAXCPUS]; static uint64_t nsec_scale; @@ -395,29 +281,6 @@ dtrace_rdtsc(void) return (rv); } -#if 0 -static void -dtrace_gethrtime_init_sync(void *arg) -{ -#ifdef CHECK_SYNC - /* - * Delay this function from returning on one - * of the CPUs to check that the synchronisation - * works. - */ - uintptr_t cpu = (uintptr_t) arg; - - if (cpu == curcpu) { - int i; - for (i = 0; i < 1000000000; i++) - tgt_cpu_tsc = dtrace_rdtsc(); - tgt_cpu_tsc = 0; - } -#endif -} -#endif - -#if 0 static void dtrace_gethrtime_init_cpu(void *arg) { @@ -428,7 +291,6 @@ dtrace_gethrtime_init_cpu(void *arg) else hst_cpu_tsc = dtrace_rdtsc(); } -#endif void dtrace_gethrtime_init(void *arg) @@ -452,8 +314,8 @@ dtrace_gethrtime_init(void *arg) * another 32-bit integer without overflowing 64-bit. * Thus minimum supported TSC frequency is 62.5MHz. */ - //KASSERT(tsc_f > (NANOSEC >> (32 - SCALE_SHIFT)), ("TSC frequency is too low")); - KASSERT(tsc_f > (NANOSEC >> (32 - SCALE_SHIFT))); + KASSERTMSG(tsc_f > (NANOSEC >> (32 - SCALE_SHIFT)), + "TSC frequency is too low"); /* * We scale up NANOSEC/tsc_f ratio to preserve as much precision @@ -476,26 +338,37 @@ dtrace_gethrtime_init(void *arg) /* Already handled in x86/tsc.c for ci_data.cpu_cc_skew */ #if 0 - for (i = 0; i <= mp_maxid; i++) { + /* The current CPU is the reference one. */ + sched_pin(); + tsc_skew[curcpu] = 0; + CPU_FOREACH(i) { if (i == curcpu) continue; - if (pcpu_find(i) == NULL) - continue; - - map = 0; - map |= (1 << curcpu); - map |= (1 << i); + pc = pcpu_find(i); + CPU_SETOF(PCPU_GET(cpuid), &map); + CPU_SET(pc->pc_cpuid, &map); - smp_rendezvous_cpus(map, dtrace_gethrtime_init_sync, + smp_rendezvous_cpus(map, NULL, dtrace_gethrtime_init_cpu, smp_no_rendevous_barrier, (void *)(uintptr_t) i); tsc_skew[i] = tgt_cpu_tsc - hst_cpu_tsc; } + sched_unpin(); #endif } +#ifdef __FreeBSD__ +#ifdef EARLY_AP_STARTUP +SYSINIT(dtrace_gethrtime_init, SI_SUB_DTRACE, SI_ORDER_ANY, + dtrace_gethrtime_init, NULL); +#else +SYSINIT(dtrace_gethrtime_init, SI_SUB_SMP, SI_ORDER_ANY, dtrace_gethrtime_init, + NULL); +#endif +#endif + /* * DTrace needs a high resolution time function which can * be called from a probe context and guaranteed not to have @@ -526,27 +399,33 @@ dtrace_gethrtime() uint64_t dtrace_gethrestime(void) { - printf("%s(%d): XXX\n",__func__,__LINE__); - return (0); + struct timespec current_time; + + dtrace_getnanotime(¤t_time); + + return (current_time.tv_sec * 1000000000ULL + current_time.tv_nsec); } /* Function to handle DTrace traps during probes. See i386/i386/trap.c */ int dtrace_trap(struct trapframe *frame, u_int type) { + bool nofault; cpuid_t cpuid = cpu_number(); /* current cpu id */ /* * A trap can occur while DTrace executes a probe. Before * executing the probe, DTrace blocks re-scheduling and sets - * a flag in it's per-cpu flags to indicate that it doesn't - * want to fault. On returning from the the probe, the no-fault + * a flag in its per-cpu flags to indicate that it doesn't + * want to fault. On returning from the probe, the no-fault * flag is cleared and finally re-scheduling is enabled. * * Check if DTrace has enabled 'no-fault' mode: - * */ - if ((cpu_core[cpuid].cpuc_dtrace_flags & CPU_DTRACE_NOFAULT) != 0) { + nofault = (cpu_core[cpuid].cpuc_dtrace_flags & CPU_DTRACE_NOFAULT) != 0; + if (nofault) { + KASSERTMSG((read_eflags() & PSL_I) == 0, "interrupts enabled"); + /* * There are only a couple of trap types that are expected. * All the rest will be handled in the usual way. Index: src/external/cddl/osnet/dev/dtrace/i386/instr_size.c =================================================================== RCS file: src/external/cddl/osnet/dev/dtrace/i386/instr_size.c diff -N src/external/cddl/osnet/dev/dtrace/i386/instr_size.c --- src/external/cddl/osnet/dev/dtrace/i386/instr_size.c 21 Feb 2010 01:46:33 -0000 1.2 +++ /dev/null 1 Jan 1970 00:00:00 -0000 @@ -1,134 +0,0 @@ -/* $NetBSD: instr_size.c,v 1.2 2010/02/21 01:46:33 darran Exp $ */ - -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - * - * $FreeBSD: src/sys/cddl/dev/dtrace/i386/instr_size.c,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $ - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* Copyright (c) 1988 AT&T */ -/* All Rights Reserved */ - - -#if defined(sun) -#pragma ident "@(#)instr_size.c 1.14 05/07/08 SMI" -#endif - -#include -#include -#include -#if defined(sun) -#include -#include -#include -#include -#include -#else -typedef u_int model_t; -#define DATAMODEL_NATIVE 0 -int dtrace_instr_size(uchar_t *); -#endif - -#include - -/* - * This subsystem (with the minor exception of the instr_size() function) is - * is called from DTrace probe context. This imposes several requirements on - * the implementation: - * - * 1. External subsystems and functions may not be referenced. The one current - * exception is for cmn_err, but only to signal the detection of table - * errors. Assuming the tables are correct, no combination of input is to - * trigger a cmn_err call. - * - * 2. These functions can't be allowed to be traced. To prevent this, - * all functions in the probe path (everything except instr_size()) must - * have names that begin with "dtrace_". - */ - -typedef enum dis_isize { - DIS_ISIZE_INSTR, - DIS_ISIZE_OPERAND -} dis_isize_t; - - -/* - * get a byte from instruction stream - */ -static int -dtrace_dis_get_byte(void *p) -{ - int ret; - uchar_t **instr = p; - - ret = **instr; - *instr += 1; - - return (ret); -} - -/* - * Returns either the size of a given instruction, in bytes, or the size of that - * instruction's memory access (if any), depending on the value of `which'. - * If a programming error in the tables is detected, the system will panic to - * ease diagnosis. Invalid instructions will not be flagged. They will appear - * to have an instruction size between 1 and the actual size, and will be - * reported as having no memory impact. - */ -/* ARGSUSED2 */ -static int -dtrace_dis_isize(uchar_t *instr, dis_isize_t which, model_t model, int *rmindex) -{ - int sz; - dis86_t x; - uint_t mode = SIZE32; - -#if defined(sun) - mode = (model == DATAMODEL_LP64) ? SIZE64 : SIZE32; -#endif - - x.d86_data = (void **)&instr; - x.d86_get_byte = dtrace_dis_get_byte; - x.d86_check_func = NULL; - - if (dtrace_disx86(&x, mode) != 0) - return (-1); - - if (which == DIS_ISIZE_INSTR) - sz = x.d86_len; /* length of the instruction */ - else - sz = x.d86_memsize; /* length of memory operand */ - - if (rmindex != NULL) - *rmindex = x.d86_rmindex; - return (sz); -} - -int -dtrace_instr_size(uchar_t *instr) -{ - return (dtrace_dis_isize(instr, DIS_ISIZE_INSTR, DATAMODEL_NATIVE, - NULL)); -} Index: src/external/cddl/osnet/dev/dtrace/x86/dis_tables.c =================================================================== RCS file: src/external/cddl/osnet/dev/dtrace/x86/dis_tables.c diff -N src/external/cddl/osnet/dev/dtrace/x86/dis_tables.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ src/external/cddl/osnet/dev/dtrace/x86/dis_tables.c 2 Mar 2017 10:54:26 -0000 @@ -0,0 +1,5597 @@ +/* + * + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2016 Joyent, Inc. + */ + +/* + * Copyright (c) 2010, Intel Corporation. + * All rights reserved. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +/* + * $FreeBSD: head/sys/cddl/dev/dtrace/x86/dis_tables.c 313133 2017-02-03 03:22:47Z markj $ + */ + +#include "dis_tables.h" + +/* BEGIN CSTYLED */ + +/* + * Disassembly begins in dis_distable, which is equivalent to the One-byte + * Opcode Map in the Intel IA32 ISA Reference (page A-6 in my copy). The + * decoding loops then traverse out through the other tables as necessary to + * decode a given instruction. + * + * The behavior of this file can be controlled by one of the following flags: + * + * DIS_TEXT Include text for disassembly + * DIS_MEM Include memory-size calculations + * + * Either or both of these can be defined. + * + * This file is not, and will never be, cstyled. If anything, the tables should + * be taken out another tab stop or two so nothing overlaps. + */ + +/* + * These functions must be provided for the consumer to do disassembly. + */ +#ifdef DIS_TEXT +extern char *strncpy(char *, const char *, size_t); +extern size_t strlen(const char *); +extern int strcmp(const char *, const char *); +extern int strncmp(const char *, const char *, size_t); +extern size_t strlcat(char *, const char *, size_t); +#endif + + +#define TERM 0 /* used to indicate that the 'indirect' */ + /* field terminates - no pointer. */ + +/* Used to decode instructions. */ +typedef struct instable { + struct instable *it_indirect; /* for decode op codes */ + uchar_t it_adrmode; +#ifdef DIS_TEXT + char it_name[NCPS]; + uint_t it_suffix:1; /* mnem + "w", "l", or "d" */ +#endif +#ifdef DIS_MEM + uint_t it_size:16; +#endif + uint_t it_invalid64:1; /* opcode invalid in amd64 */ + uint_t it_always64:1; /* 64 bit when in 64 bit mode */ + uint_t it_invalid32:1; /* invalid in IA32 */ + uint_t it_stackop:1; /* push/pop stack operation */ + uint_t it_vexwoxmm:1; /* VEX instructions that don't use XMM/YMM */ + uint_t it_avxsuf:1; /* AVX suffix required */ +} instable_t; + +/* + * Instruction formats. + */ +enum { + UNKNOWN, + MRw, + IMlw, + IMw, + IR, + OA, + AO, + MS, + SM, + Mv, + Mw, + M, /* register or memory */ + MG9, /* register or memory in group 9 (prefix optional) */ + Mb, /* register or memory, always byte sized */ + MO, /* memory only (no registers) */ + PREF, + SWAPGS_RDTSCP, + MONITOR_MWAIT, + R, + RA, + SEG, + MR, + RM, + RM_66r, /* RM, but with a required 0x66 prefix */ + IA, + MA, + SD, + AD, + SA, + D, + INM, + SO, + BD, + I, + P, + V, + DSHIFT, /* for double shift that has an 8-bit immediate */ + U, + OVERRIDE, + NORM, /* instructions w/o ModR/M byte, no memory access */ + IMPLMEM, /* instructions w/o ModR/M byte, implicit mem access */ + O, /* for call */ + JTAB, /* jump table */ + IMUL, /* for 186 iimul instr */ + CBW, /* so data16 can be evaluated for cbw and variants */ + MvI, /* for 186 logicals */ + ENTER, /* for 186 enter instr */ + RMw, /* for 286 arpl instr */ + Ib, /* for push immediate byte */ + F, /* for 287 instructions */ + FF, /* for 287 instructions */ + FFC, /* for 287 instructions */ + DM, /* 16-bit data */ + AM, /* 16-bit addr */ + LSEG, /* for 3-bit seg reg encoding */ + MIb, /* for 386 logicals */ + SREG, /* for 386 special registers */ + PREFIX, /* a REP instruction prefix */ + LOCK, /* a LOCK instruction prefix */ + INT3, /* The int 3 instruction, which has a fake operand */ + INTx, /* The normal int instruction, with explicit int num */ + DSHIFTcl, /* for double shift that implicitly uses %cl */ + CWD, /* so data16 can be evaluated for cwd and variants */ + RET, /* single immediate 16-bit operand */ + MOVZ, /* for movs and movz, with different size operands */ + CRC32, /* for crc32, with different size operands */ + XADDB, /* for xaddb */ + MOVSXZ, /* AMD64 mov sign extend 32 to 64 bit instruction */ + MOVBE, /* movbe instruction */ + +/* + * MMX/SIMD addressing modes. + */ + + MMO, /* Prefixable MMX/SIMD-Int mm/mem -> mm */ + MMOIMPL, /* Prefixable MMX/SIMD-Int mm -> mm (mem) */ + MMO3P, /* Prefixable MMX/SIMD-Int mm -> r32,imm8 */ + MMOM3, /* Prefixable MMX/SIMD-Int mm -> r32 */ + MMOS, /* Prefixable MMX/SIMD-Int mm -> mm/mem */ + MMOMS, /* Prefixable MMX/SIMD-Int mm -> mem */ + MMOPM, /* MMX/SIMD-Int mm/mem -> mm,imm8 */ + MMOPM_66o, /* MMX/SIMD-Int 0x66 optional mm/mem -> mm,imm8 */ + MMOPRM, /* Prefixable MMX/SIMD-Int r32/mem -> mm,imm8 */ + MMOSH, /* Prefixable MMX mm,imm8 */ + MM, /* MMX/SIMD-Int mm/mem -> mm */ + MMS, /* MMX/SIMD-Int mm -> mm/mem */ + MMSH, /* MMX mm,imm8 */ + XMMO, /* Prefixable SIMD xmm/mem -> xmm */ + XMMOS, /* Prefixable SIMD xmm -> xmm/mem */ + XMMOPM, /* Prefixable SIMD xmm/mem w/to xmm,imm8 */ + XMMOMX, /* Prefixable SIMD mm/mem -> xmm */ + XMMOX3, /* Prefixable SIMD xmm -> r32 */ + XMMOXMM, /* Prefixable SIMD xmm/mem -> mm */ + XMMOM, /* Prefixable SIMD xmm -> mem */ + XMMOMS, /* Prefixable SIMD mem -> xmm */ + XMM, /* SIMD xmm/mem -> xmm */ + XMM_66r, /* SIMD 0x66 prefix required xmm/mem -> xmm */ + XMM_66o, /* SIMD 0x66 prefix optional xmm/mem -> xmm */ + XMMXIMPL, /* SIMD xmm -> xmm (mem) */ + XMM3P, /* SIMD xmm -> r32,imm8 */ + XMM3PM_66r, /* SIMD 0x66 prefix required xmm -> r32/mem,imm8 */ + XMMP, /* SIMD xmm/mem w/to xmm,imm8 */ + XMMP_66o, /* SIMD 0x66 prefix optional xmm/mem w/to xmm,imm8 */ + XMMP_66r, /* SIMD 0x66 prefix required xmm/mem w/to xmm,imm8 */ + XMMPRM, /* SIMD r32/mem -> xmm,imm8 */ + XMMPRM_66r, /* SIMD 0x66 prefix required r32/mem -> xmm,imm8 */ + XMMS, /* SIMD xmm -> xmm/mem */ + XMMM, /* SIMD mem -> xmm */ + XMMM_66r, /* SIMD 0x66 prefix required mem -> xmm */ + XMMMS, /* SIMD xmm -> mem */ + XMM3MX, /* SIMD r32/mem -> xmm */ + XMM3MXS, /* SIMD xmm -> r32/mem */ + XMMSH, /* SIMD xmm,imm8 */ + XMMXM3, /* SIMD xmm/mem -> r32 */ + XMMX3, /* SIMD xmm -> r32 */ + XMMXMM, /* SIMD xmm/mem -> mm */ + XMMMX, /* SIMD mm -> xmm */ + XMMXM, /* SIMD xmm -> mm */ + XMMX2I, /* SIMD xmm -> xmm, imm, imm */ + XMM2I, /* SIMD xmm, imm, imm */ + XMMFENCE, /* SIMD lfence or mfence */ + XMMSFNC, /* SIMD sfence (none or mem) */ + XGETBV_XSETBV, + VEX_NONE, /* VEX no operand */ + VEX_MO, /* VEX mod_rm -> implicit reg */ + VEX_RMrX, /* VEX VEX.vvvv, mod_rm -> mod_reg */ + VEX_VRMrX, /* VEX mod_rm, VEX.vvvv -> mod_rm */ + VEX_RRX, /* VEX VEX.vvvv, mod_reg -> mod_rm */ + VEX_RMRX, /* VEX VEX.vvvv, mod_rm, imm8[7:4] -> mod_reg */ + VEX_MX, /* VEX mod_rm -> mod_reg */ + VEX_MXI, /* VEX mod_rm, imm8 -> mod_reg */ + VEX_XXI, /* VEX mod_rm, imm8 -> VEX.vvvv */ + VEX_MR, /* VEX mod_rm -> mod_reg */ + VEX_RRI, /* VEX mod_reg, mod_rm -> implicit(eflags/r32) */ + VEX_RX, /* VEX mod_reg -> mod_rm */ + VEX_RR, /* VEX mod_rm -> mod_reg */ + VEX_RRi, /* VEX mod_rm, imm8 -> mod_reg */ + VEX_RM, /* VEX mod_reg -> mod_rm */ + VEX_RIM, /* VEX mod_reg, imm8 -> mod_rm */ + VEX_RRM, /* VEX VEX.vvvv, mod_reg -> mod_rm */ + VEX_RMX, /* VEX VEX.vvvv, mod_rm -> mod_reg */ + VEX_SbVM, /* VEX SIB, VEX.vvvv -> mod_rm */ + VMx, /* vmcall/vmlaunch/vmresume/vmxoff */ + VMxo, /* VMx instruction with optional prefix */ + SVM, /* AMD SVM instructions */ + BLS, /* BLSR, BLSMSK, BLSI */ + FMA, /* FMA instructions, all VEX_RMrX */ + ADX /* ADX instructions, support REX.w, mod_rm->mod_reg */ +}; + +/* + * VEX prefixes + */ +#define VEX_2bytes 0xC5 /* the first byte of two-byte form */ +#define VEX_3bytes 0xC4 /* the first byte of three-byte form */ + +#define FILL 0x90 /* Fill byte used for alignment (nop) */ + +/* +** Register numbers for the i386 +*/ +#define EAX_REGNO 0 +#define ECX_REGNO 1 +#define EDX_REGNO 2 +#define EBX_REGNO 3 +#define ESP_REGNO 4 +#define EBP_REGNO 5 +#define ESI_REGNO 6 +#define EDI_REGNO 7 + +/* + * modes for immediate values + */ +#define MODE_NONE 0 +#define MODE_IPREL 1 /* signed IP relative value */ +#define MODE_SIGNED 2 /* sign extended immediate */ +#define MODE_IMPLIED 3 /* constant value implied from opcode */ +#define MODE_OFFSET 4 /* offset part of an address */ +#define MODE_RIPREL 5 /* like IPREL, but from %rip (amd64) */ + +/* + * The letters used in these macros are: + * IND - indirect to another to another table + * "T" - means to Terminate indirections (this is the final opcode) + * "S" - means "operand length suffix required" + * "Sa" - means AVX2 suffix (d/q) required + * "NS" - means "no suffix" which is the operand length suffix of the opcode + * "Z" - means instruction size arg required + * "u" - means the opcode is invalid in IA32 but valid in amd64 + * "x" - means the opcode is invalid in amd64, but not IA32 + * "y" - means the operand size is always 64 bits in 64 bit mode + * "p" - means push/pop stack operation + * "vr" - means VEX instruction that operates on normal registers, not fpu + */ + +#if defined(DIS_TEXT) && defined(DIS_MEM) +#define IND(table) {(instable_t *)table, 0, "", 0, 0, 0, 0, 0, 0} +#define INDx(table) {(instable_t *)table, 0, "", 0, 0, 1, 0, 0, 0} +#define TNS(name, amode) {TERM, amode, name, 0, 0, 0, 0, 0, 0} +#define TNSu(name, amode) {TERM, amode, name, 0, 0, 0, 0, 1, 0} +#define TNSx(name, amode) {TERM, amode, name, 0, 0, 1, 0, 0, 0} +#define TNSy(name, amode) {TERM, amode, name, 0, 0, 0, 1, 0, 0} +#define TNSyp(name, amode) {TERM, amode, name, 0, 0, 0, 1, 0, 1} +#define TNSZ(name, amode, sz) {TERM, amode, name, 0, sz, 0, 0, 0, 0} +#define TNSZy(name, amode, sz) {TERM, amode, name, 0, sz, 0, 1, 0, 0} +#define TNSZvr(name, amode, sz) {TERM, amode, name, 0, sz, 0, 0, 0, 0, 1} +#define TS(name, amode) {TERM, amode, name, 1, 0, 0, 0, 0, 0} +#define TSx(name, amode) {TERM, amode, name, 1, 0, 1, 0, 0, 0} +#define TSy(name, amode) {TERM, amode, name, 1, 0, 0, 1, 0, 0} +#define TSp(name, amode) {TERM, amode, name, 1, 0, 0, 0, 0, 1} +#define TSZ(name, amode, sz) {TERM, amode, name, 1, sz, 0, 0, 0, 0} +#define TSaZ(name, amode, sz) {TERM, amode, name, 1, sz, 0, 0, 0, 0, 0, 1} +#define TSZx(name, amode, sz) {TERM, amode, name, 1, sz, 1, 0, 0, 0} +#define TSZy(name, amode, sz) {TERM, amode, name, 1, sz, 0, 1, 0, 0} +#define INVALID {TERM, UNKNOWN, "", 0, 0, 0, 0, 0} +#elif defined(DIS_TEXT) +#define IND(table) {(instable_t *)table, 0, "", 0, 0, 0, 0, 0} +#define INDx(table) {(instable_t *)table, 0, "", 0, 1, 0, 0, 0} +#define TNS(name, amode) {TERM, amode, name, 0, 0, 0, 0, 0} +#define TNSu(name, amode) {TERM, amode, name, 0, 0, 0, 1, 0} +#define TNSx(name, amode) {TERM, amode, name, 0, 1, 0, 0, 0} +#define TNSy(name, amode) {TERM, amode, name, 0, 0, 1, 0, 0} +#define TNSyp(name, amode) {TERM, amode, name, 0, 0, 1, 0, 1} +#define TNSZ(name, amode, sz) {TERM, amode, name, 0, 0, 0, 0, 0} +#define TNSZy(name, amode, sz) {TERM, amode, name, 0, 0, 1, 0, 0} +#define TNSZvr(name, amode, sz) {TERM, amode, name, 0, 0, 0, 0, 0, 1} +#define TS(name, amode) {TERM, amode, name, 1, 0, 0, 0, 0} +#define TSx(name, amode) {TERM, amode, name, 1, 1, 0, 0, 0} +#define TSy(name, amode) {TERM, amode, name, 1, 0, 1, 0, 0} +#define TSp(name, amode) {TERM, amode, name, 1, 0, 0, 0, 1} +#define TSZ(name, amode, sz) {TERM, amode, name, 1, 0, 0, 0, 0} +#define TSaZ(name, amode, sz) {TERM, amode, name, 1, 0, 0, 0, 0, 0, 1} +#define TSZx(name, amode, sz) {TERM, amode, name, 1, 1, 0, 0, 0} +#define TSZy(name, amode, sz) {TERM, amode, name, 1, 0, 1, 0, 0} +#define INVALID {TERM, UNKNOWN, "", 0, 0, 0, 0, 0} +#elif defined(DIS_MEM) +#define IND(table) {(instable_t *)table, 0, 0, 0, 0, 0, 0} +#define INDx(table) {(instable_t *)table, 0, 0, 1, 0, 0, 0} +#define TNS(name, amode) {TERM, amode, 0, 0, 0, 0, 0} +#define TNSu(name, amode) {TERM, amode, 0, 0, 0, 1, 0} +#define TNSy(name, amode) {TERM, amode, 0, 0, 1, 0, 0} +#define TNSyp(name, amode) {TERM, amode, 0, 0, 1, 0, 1} +#define TNSx(name, amode) {TERM, amode, 0, 1, 0, 0, 0} +#define TNSZ(name, amode, sz) {TERM, amode, sz, 0, 0, 0, 0} +#define TNSZy(name, amode, sz) {TERM, amode, sz, 0, 1, 0, 0} +#define TNSZvr(name, amode, sz) {TERM, amode, sz, 0, 0, 0, 0, 1} +#define TS(name, amode) {TERM, amode, 0, 0, 0, 0, 0} +#define TSx(name, amode) {TERM, amode, 0, 1, 0, 0, 0} +#define TSy(name, amode) {TERM, amode, 0, 0, 1, 0, 0} +#define TSp(name, amode) {TERM, amode, 0, 0, 0, 0, 1} +#define TSZ(name, amode, sz) {TERM, amode, sz, 0, 0, 0, 0} +#define TSaZ(name, amode, sz) {TERM, amode, sz, 0, 0, 0, 0, 0, 1} +#define TSZx(name, amode, sz) {TERM, amode, sz, 1, 0, 0, 0} +#define TSZy(name, amode, sz) {TERM, amode, sz, 0, 1, 0, 0} +#define INVALID {TERM, UNKNOWN, 0, 0, 0, 0, 0} +#else +#define IND(table) {(instable_t *)table, 0, 0, 0, 0, 0} +#define INDx(table) {(instable_t *)table, 0, 1, 0, 0, 0} +#define TNS(name, amode) {TERM, amode, 0, 0, 0, 0} +#define TNSu(name, amode) {TERM, amode, 0, 0, 1, 0} +#define TNSy(name, amode) {TERM, amode, 0, 1, 0, 0} +#define TNSyp(name, amode) {TERM, amode, 0, 1, 0, 1} +#define TNSx(name, amode) {TERM, amode, 1, 0, 0, 0} +#define TNSZ(name, amode, sz) {TERM, amode, 0, 0, 0, 0} +#define TNSZy(name, amode, sz) {TERM, amode, 0, 1, 0, 0} +#define TNSZvr(name, amode, sz) {TERM, amode, 0, 0, 0, 0, 1} +#define TS(name, amode) {TERM, amode, 0, 0, 0, 0} +#define TSx(name, amode) {TERM, amode, 1, 0, 0, 0} +#define TSy(name, amode) {TERM, amode, 0, 1, 0, 0} +#define TSp(name, amode) {TERM, amode, 0, 0, 0, 1} +#define TSZ(name, amode, sz) {TERM, amode, 0, 0, 0, 0} +#define TSaZ(name, amode, sz) {TERM, amode, 0, 0, 0, 0, 0, 1} +#define TSZx(name, amode, sz) {TERM, amode, 1, 0, 0, 0} +#define TSZy(name, amode, sz) {TERM, amode, 0, 1, 0, 0} +#define INVALID {TERM, UNKNOWN, 0, 0, 0, 0} +#endif + +#ifdef DIS_TEXT +/* + * this decodes the r_m field for mode's 0, 1, 2 in 16 bit mode + */ +const char *const dis_addr16[3][8] = { +"(%bx,%si)", "(%bx,%di)", "(%bp,%si)", "(%bp,%di)", "(%si)", "(%di)", "", + "(%bx)", +"(%bx,%si)", "(%bx,%di)", "(%bp,%si)", "(%bp,%di)", "(%si)", "(%di", "(%bp)", + "(%bx)", +"(%bx,%si)", "(%bx,%di)", "(%bp,%si)", "(%bp,%di)", "(%si)", "(%di)", "(%bp)", + "(%bx)", +}; + + +/* + * This decodes 32 bit addressing mode r_m field for modes 0, 1, 2 + */ +const char *const dis_addr32_mode0[16] = { + "(%eax)", "(%ecx)", "(%edx)", "(%ebx)", "", "", "(%esi)", "(%edi)", + "(%r8d)", "(%r9d)", "(%r10d)", "(%r11d)", "", "", "(%r14d)", "(%r15d)" +}; + +const char *const dis_addr32_mode12[16] = { + "(%eax)", "(%ecx)", "(%edx)", "(%ebx)", "", "(%ebp)", "(%esi)", "(%edi)", + "(%r8d)", "(%r9d)", "(%r10d)", "(%r11d)", "", "(%r13d)", "(%r14d)", "(%r15d)" +}; + +/* + * This decodes 64 bit addressing mode r_m field for modes 0, 1, 2 + */ +const char *const dis_addr64_mode0[16] = { + "(%rax)", "(%rcx)", "(%rdx)", "(%rbx)", "", "(%rip)", "(%rsi)", "(%rdi)", + "(%r8)", "(%r9)", "(%r10)", "(%r11)", "(%r12)", "(%rip)", "(%r14)", "(%r15)" +}; +const char *const dis_addr64_mode12[16] = { + "(%rax)", "(%rcx)", "(%rdx)", "(%rbx)", "", "(%rbp)", "(%rsi)", "(%rdi)", + "(%r8)", "(%r9)", "(%r10)", "(%r11)", "(%r12)", "(%r13)", "(%r14)", "(%r15)" +}; + +/* + * decode for scale from SIB byte + */ +const char *const dis_scale_factor[4] = { ")", ",2)", ",4)", ",8)" }; + +/* + * decode for scale from VSIB byte, note that we always include the scale factor + * to match gas. + */ +const char *const dis_vscale_factor[4] = { ",1)", ",2)", ",4)", ",8)" }; + +/* + * register decoding for normal references to registers (ie. not addressing) + */ +const char *const dis_REG8[16] = { + "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh", + "%r8b", "%r9b", "%r10b", "%r11b", "%r12b", "%r13b", "%r14b", "%r15b" +}; + +const char *const dis_REG8_REX[16] = { + "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil", + "%r8b", "%r9b", "%r10b", "%r11b", "%r12b", "%r13b", "%r14b", "%r15b" +}; + +const char *const dis_REG16[16] = { + "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di", + "%r8w", "%r9w", "%r10w", "%r11w", "%r12w", "%r13w", "%r14w", "%r15w" +}; + +const char *const dis_REG32[16] = { + "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi", + "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" +}; + +const char *const dis_REG64[16] = { + "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi", + "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" +}; + +const char *const dis_DEBUGREG[16] = { + "%db0", "%db1", "%db2", "%db3", "%db4", "%db5", "%db6", "%db7", + "%db8", "%db9", "%db10", "%db11", "%db12", "%db13", "%db14", "%db15" +}; + +const char *const dis_CONTROLREG[16] = { + "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5?", "%cr6?", "%cr7?", + "%cr8", "%cr9?", "%cr10?", "%cr11?", "%cr12?", "%cr13?", "%cr14?", "%cr15?" +}; + +const char *const dis_TESTREG[16] = { + "%tr0?", "%tr1?", "%tr2?", "%tr3", "%tr4", "%tr5", "%tr6", "%tr7", + "%tr0?", "%tr1?", "%tr2?", "%tr3", "%tr4", "%tr5", "%tr6", "%tr7" +}; + +const char *const dis_MMREG[16] = { + "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7", + "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" +}; + +const char *const dis_XMMREG[16] = { + "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15" +}; + +const char *const dis_YMMREG[16] = { + "%ymm0", "%ymm1", "%ymm2", "%ymm3", "%ymm4", "%ymm5", "%ymm6", "%ymm7", + "%ymm8", "%ymm9", "%ymm10", "%ymm11", "%ymm12", "%ymm13", "%ymm14", "%ymm15" +}; + +const char *const dis_SEGREG[16] = { + "%es", "%cs", "%ss", "%ds", "%fs", "%gs", "", "", + "%es", "%cs", "%ss", "%ds", "%fs", "%gs", "", "" +}; + +/* + * SIMD predicate suffixes + */ +const char *const dis_PREDSUFFIX[8] = { + "eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord" +}; + +const char *const dis_AVXvgrp7[3][8] = { + /*0 1 2 3 4 5 6 7*/ +/*71*/ {"", "", "vpsrlw", "", "vpsraw", "", "vpsllw", ""}, +/*72*/ {"", "", "vpsrld", "", "vpsrad", "", "vpslld", ""}, +/*73*/ {"", "", "vpsrlq", "vpsrldq", "", "", "vpsllq", "vpslldq"} +}; + +#endif /* DIS_TEXT */ + +/* + * "decode table" for 64 bit mode MOVSXD instruction (opcode 0x63) + */ +const instable_t dis_opMOVSLD = TNS("movslq",MOVSXZ); + +/* + * "decode table" for pause and clflush instructions + */ +const instable_t dis_opPause = TNS("pause", NORM); + +/* + * Decode table for 0x0F00 opcodes + */ +const instable_t dis_op0F00[8] = { + +/* [0] */ TNS("sldt",M), TNS("str",M), TNSy("lldt",M), TNSy("ltr",M), +/* [4] */ TNSZ("verr",M,2), TNSZ("verw",M,2), INVALID, INVALID, +}; + + +/* + * Decode table for 0x0F01 opcodes + */ +const instable_t dis_op0F01[8] = { + +/* [0] */ TNSZ("sgdt",VMx,6), TNSZ("sidt",MONITOR_MWAIT,6), TNSZ("lgdt",XGETBV_XSETBV,6), TNSZ("lidt",SVM,6), +/* [4] */ TNSZ("smsw",M,2), INVALID, TNSZ("lmsw",M,2), TNS("invlpg",SWAPGS_RDTSCP), +}; + +/* + * Decode table for 0x0F18 opcodes -- SIMD prefetch + */ +const instable_t dis_op0F18[8] = { + +/* [0] */ TNS("prefetchnta",PREF),TNS("prefetcht0",PREF), TNS("prefetcht1",PREF), TNS("prefetcht2",PREF), +/* [4] */ INVALID, INVALID, INVALID, INVALID, +}; + +/* + * Decode table for 0x0FAE opcodes -- SIMD state save/restore + */ +const instable_t dis_op0FAE[8] = { +/* [0] */ TNSZ("fxsave",M,512), TNSZ("fxrstor",M,512), TNS("ldmxcsr",M), TNS("stmxcsr",M), +/* [4] */ TNSZ("xsave",M,512), TNS("lfence",XMMFENCE), TNS("mfence",XMMFENCE), TNS("sfence",XMMSFNC), +}; + +/* + * Decode table for 0x0FBA opcodes + */ + +const instable_t dis_op0FBA[8] = { + +/* [0] */ INVALID, INVALID, INVALID, INVALID, +/* [4] */ TS("bt",MIb), TS("bts",MIb), TS("btr",MIb), TS("btc",MIb), +}; + +/* + * Decode table for 0x0FC7 opcode (group 9) + */ + +const instable_t dis_op0FC7[8] = { + +/* [0] */ INVALID, TNS("cmpxchg8b",M), INVALID, INVALID, +/* [4] */ INVALID, INVALID, TNS("vmptrld",MG9), TNS("vmptrst",MG9), +}; + +/* + * Decode table for 0x0FC7 opcode (group 9) mode 3 + */ + +const instable_t dis_op0FC7m3[8] = { + +/* [0] */ INVALID, INVALID, INVALID, INVALID, +/* [4] */ INVALID, INVALID, TNS("rdrand",MG9), TNS("rdseed", MG9), +}; + +/* + * Decode table for 0x0FC7 opcode with 0x66 prefix + */ + +const instable_t dis_op660FC7[8] = { + +/* [0] */ INVALID, INVALID, INVALID, INVALID, +/* [4] */ INVALID, INVALID, TNS("vmclear",M), INVALID, +}; + +/* + * Decode table for 0x0FC7 opcode with 0xF3 prefix + */ + +const instable_t dis_opF30FC7[8] = { + +/* [0] */ INVALID, INVALID, INVALID, INVALID, +/* [4] */ INVALID, INVALID, TNS("vmxon",M), INVALID, +}; + +/* + * Decode table for 0x0FC8 opcode -- 486 bswap instruction + * + *bit pattern: 0000 1111 1100 1reg + */ +const instable_t dis_op0FC8[4] = { +/* [0] */ TNS("bswap",R), INVALID, INVALID, INVALID, +}; + +/* + * Decode table for 0x0F71, 0x0F72, and 0x0F73 opcodes -- MMX instructions + */ +const instable_t dis_op0F7123[4][8] = { +{ +/* [70].0 */ INVALID, INVALID, INVALID, INVALID, +/* .4 */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [71].0 */ INVALID, INVALID, TNS("psrlw",MMOSH), INVALID, +/* .4 */ TNS("psraw",MMOSH), INVALID, TNS("psllw",MMOSH), INVALID, +}, { +/* [72].0 */ INVALID, INVALID, TNS("psrld",MMOSH), INVALID, +/* .4 */ TNS("psrad",MMOSH), INVALID, TNS("pslld",MMOSH), INVALID, +}, { +/* [73].0 */ INVALID, INVALID, TNS("psrlq",MMOSH), TNS("INVALID",MMOSH), +/* .4 */ INVALID, INVALID, TNS("psllq",MMOSH), TNS("INVALID",MMOSH), +} }; + +/* + * Decode table for SIMD extensions to above 0x0F71-0x0F73 opcodes. + */ +const instable_t dis_opSIMD7123[32] = { +/* [70].0 */ INVALID, INVALID, INVALID, INVALID, +/* .4 */ INVALID, INVALID, INVALID, INVALID, + +/* [71].0 */ INVALID, INVALID, TNS("psrlw",XMMSH), INVALID, +/* .4 */ TNS("psraw",XMMSH), INVALID, TNS("psllw",XMMSH), INVALID, + +/* [72].0 */ INVALID, INVALID, TNS("psrld",XMMSH), INVALID, +/* .4 */ TNS("psrad",XMMSH), INVALID, TNS("pslld",XMMSH), INVALID, + +/* [73].0 */ INVALID, INVALID, TNS("psrlq",XMMSH), TNS("psrldq",XMMSH), +/* .4 */ INVALID, INVALID, TNS("psllq",XMMSH), TNS("pslldq",XMMSH), +}; + +/* + * SIMD instructions have been wedged into the existing IA32 instruction + * set through the use of prefixes. That is, while 0xf0 0x58 may be + * addps, 0xf3 0xf0 0x58 (literally, repz addps) is a completely different + * instruction - addss. At present, three prefixes have been coopted in + * this manner - address size (0x66), repnz (0xf2) and repz (0xf3). The + * following tables are used to provide the prefixed instruction names. + * The arrays are sparse, but they're fast. + */ + +/* + * Decode table for SIMD instructions with the address size (0x66) prefix. + */ +const instable_t dis_opSIMDdata16[256] = { +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [10] */ TNSZ("movupd",XMM,16), TNSZ("movupd",XMMS,16), TNSZ("movlpd",XMMM,8), TNSZ("movlpd",XMMMS,8), +/* [14] */ TNSZ("unpcklpd",XMM,16),TNSZ("unpckhpd",XMM,16),TNSZ("movhpd",XMMM,8), TNSZ("movhpd",XMMMS,8), +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ INVALID, INVALID, INVALID, INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ TNSZ("movapd",XMM,16), TNSZ("movapd",XMMS,16), TNSZ("cvtpi2pd",XMMOMX,8),TNSZ("movntpd",XMMOMS,16), +/* [2C] */ TNSZ("cvttpd2pi",XMMXMM,16),TNSZ("cvtpd2pi",XMMXMM,16),TNSZ("ucomisd",XMM,8),TNSZ("comisd",XMM,8), + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ TNS("movmskpd",XMMOX3), TNSZ("sqrtpd",XMM,16), INVALID, INVALID, +/* [54] */ TNSZ("andpd",XMM,16), TNSZ("andnpd",XMM,16), TNSZ("orpd",XMM,16), TNSZ("xorpd",XMM,16), +/* [58] */ TNSZ("addpd",XMM,16), TNSZ("mulpd",XMM,16), TNSZ("cvtpd2ps",XMM,16),TNSZ("cvtps2dq",XMM,16), +/* [5C] */ TNSZ("subpd",XMM,16), TNSZ("minpd",XMM,16), TNSZ("divpd",XMM,16), TNSZ("maxpd",XMM,16), + +/* [60] */ TNSZ("punpcklbw",XMM,16),TNSZ("punpcklwd",XMM,16),TNSZ("punpckldq",XMM,16),TNSZ("packsswb",XMM,16), +/* [64] */ TNSZ("pcmpgtb",XMM,16), TNSZ("pcmpgtw",XMM,16), TNSZ("pcmpgtd",XMM,16), TNSZ("packuswb",XMM,16), +/* [68] */ TNSZ("punpckhbw",XMM,16),TNSZ("punpckhwd",XMM,16),TNSZ("punpckhdq",XMM,16),TNSZ("packssdw",XMM,16), +/* [6C] */ TNSZ("punpcklqdq",XMM,16),TNSZ("punpckhqdq",XMM,16),TNSZ("movd",XMM3MX,4),TNSZ("movdqa",XMM,16), + +/* [70] */ TNSZ("pshufd",XMMP,16), INVALID, INVALID, INVALID, +/* [74] */ TNSZ("pcmpeqb",XMM,16), TNSZ("pcmpeqw",XMM,16), TNSZ("pcmpeqd",XMM,16), INVALID, +/* [78] */ TNSZ("extrq",XMM2I,16), TNSZ("extrq",XMM,16), INVALID, INVALID, +/* [7C] */ TNSZ("haddpd",XMM,16), TNSZ("hsubpd",XMM,16), TNSZ("movd",XMM3MXS,4), TNSZ("movdqa",XMMS,16), + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [8C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, TNSZ("cmppd",XMMP,16), INVALID, +/* [C4] */ TNSZ("pinsrw",XMMPRM,2),TNS("pextrw",XMM3P), TNSZ("shufpd",XMMP,16), INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ TNSZ("addsubpd",XMM,16),TNSZ("psrlw",XMM,16), TNSZ("psrld",XMM,16), TNSZ("psrlq",XMM,16), +/* [D4] */ TNSZ("paddq",XMM,16), TNSZ("pmullw",XMM,16), TNSZ("movq",XMMS,8), TNS("pmovmskb",XMMX3), +/* [D8] */ TNSZ("psubusb",XMM,16), TNSZ("psubusw",XMM,16), TNSZ("pminub",XMM,16), TNSZ("pand",XMM,16), +/* [DC] */ TNSZ("paddusb",XMM,16), TNSZ("paddusw",XMM,16), TNSZ("pmaxub",XMM,16), TNSZ("pandn",XMM,16), + +/* [E0] */ TNSZ("pavgb",XMM,16), TNSZ("psraw",XMM,16), TNSZ("psrad",XMM,16), TNSZ("pavgw",XMM,16), +/* [E4] */ TNSZ("pmulhuw",XMM,16), TNSZ("pmulhw",XMM,16), TNSZ("cvttpd2dq",XMM,16),TNSZ("movntdq",XMMS,16), +/* [E8] */ TNSZ("psubsb",XMM,16), TNSZ("psubsw",XMM,16), TNSZ("pminsw",XMM,16), TNSZ("por",XMM,16), +/* [EC] */ TNSZ("paddsb",XMM,16), TNSZ("paddsw",XMM,16), TNSZ("pmaxsw",XMM,16), TNSZ("pxor",XMM,16), + +/* [F0] */ INVALID, TNSZ("psllw",XMM,16), TNSZ("pslld",XMM,16), TNSZ("psllq",XMM,16), +/* [F4] */ TNSZ("pmuludq",XMM,16), TNSZ("pmaddwd",XMM,16), TNSZ("psadbw",XMM,16), TNSZ("maskmovdqu", XMMXIMPL,16), +/* [F8] */ TNSZ("psubb",XMM,16), TNSZ("psubw",XMM,16), TNSZ("psubd",XMM,16), TNSZ("psubq",XMM,16), +/* [FC] */ TNSZ("paddb",XMM,16), TNSZ("paddw",XMM,16), TNSZ("paddd",XMM,16), INVALID, +}; + +const instable_t dis_opAVX660F[256] = { +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [10] */ TNSZ("vmovupd",VEX_MX,16), TNSZ("vmovupd",VEX_RX,16), TNSZ("vmovlpd",VEX_RMrX,8), TNSZ("vmovlpd",VEX_RM,8), +/* [14] */ TNSZ("vunpcklpd",VEX_RMrX,16),TNSZ("vunpckhpd",VEX_RMrX,16),TNSZ("vmovhpd",VEX_RMrX,8), TNSZ("vmovhpd",VEX_RM,8), +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ INVALID, INVALID, INVALID, INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ TNSZ("vmovapd",VEX_MX,16), TNSZ("vmovapd",VEX_RX,16), INVALID, TNSZ("vmovntpd",VEX_RM,16), +/* [2C] */ INVALID, INVALID, TNSZ("vucomisd",VEX_MX,8),TNSZ("vcomisd",VEX_MX,8), + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ TNS("vmovmskpd",VEX_MR), TNSZ("vsqrtpd",VEX_MX,16), INVALID, INVALID, +/* [54] */ TNSZ("vandpd",VEX_RMrX,16), TNSZ("vandnpd",VEX_RMrX,16), TNSZ("vorpd",VEX_RMrX,16), TNSZ("vxorpd",VEX_RMrX,16), +/* [58] */ TNSZ("vaddpd",VEX_RMrX,16), TNSZ("vmulpd",VEX_RMrX,16), TNSZ("vcvtpd2ps",VEX_MX,16),TNSZ("vcvtps2dq",VEX_MX,16), +/* [5C] */ TNSZ("vsubpd",VEX_RMrX,16), TNSZ("vminpd",VEX_RMrX,16), TNSZ("vdivpd",VEX_RMrX,16), TNSZ("vmaxpd",VEX_RMrX,16), + +/* [60] */ TNSZ("vpunpcklbw",VEX_RMrX,16),TNSZ("vpunpcklwd",VEX_RMrX,16),TNSZ("vpunpckldq",VEX_RMrX,16),TNSZ("vpacksswb",VEX_RMrX,16), +/* [64] */ TNSZ("vpcmpgtb",VEX_RMrX,16), TNSZ("vpcmpgtw",VEX_RMrX,16), TNSZ("vpcmpgtd",VEX_RMrX,16), TNSZ("vpackuswb",VEX_RMrX,16), +/* [68] */ TNSZ("vpunpckhbw",VEX_RMrX,16),TNSZ("vpunpckhwd",VEX_RMrX,16),TNSZ("vpunpckhdq",VEX_RMrX,16),TNSZ("vpackssdw",VEX_RMrX,16), +/* [6C] */ TNSZ("vpunpcklqdq",VEX_RMrX,16),TNSZ("vpunpckhqdq",VEX_RMrX,16),TNSZ("vmovd",VEX_MX,4),TNSZ("vmovdqa",VEX_MX,16), + +/* [70] */ TNSZ("vpshufd",VEX_MXI,16), TNSZ("vgrp71",VEX_XXI,16), TNSZ("vgrp72",VEX_XXI,16), TNSZ("vgrp73",VEX_XXI,16), +/* [74] */ TNSZ("vpcmpeqb",VEX_RMrX,16), TNSZ("vpcmpeqw",VEX_RMrX,16), TNSZ("vpcmpeqd",VEX_RMrX,16), INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ TNSZ("vhaddpd",VEX_RMrX,16), TNSZ("vhsubpd",VEX_RMrX,16), TNSZ("vmovd",VEX_RR,4), TNSZ("vmovdqa",VEX_RX,16), + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [8C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, TNSZ("vcmppd",VEX_RMRX,16), INVALID, +/* [C4] */ TNSZ("vpinsrw",VEX_RMRX,2),TNS("vpextrw",VEX_MR), TNSZ("vshufpd",VEX_RMRX,16), INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ TNSZ("vaddsubpd",VEX_RMrX,16),TNSZ("vpsrlw",VEX_RMrX,16), TNSZ("vpsrld",VEX_RMrX,16), TNSZ("vpsrlq",VEX_RMrX,16), +/* [D4] */ TNSZ("vpaddq",VEX_RMrX,16), TNSZ("vpmullw",VEX_RMrX,16), TNSZ("vmovq",VEX_RX,8), TNS("vpmovmskb",VEX_MR), +/* [D8] */ TNSZ("vpsubusb",VEX_RMrX,16), TNSZ("vpsubusw",VEX_RMrX,16), TNSZ("vpminub",VEX_RMrX,16), TNSZ("vpand",VEX_RMrX,16), +/* [DC] */ TNSZ("vpaddusb",VEX_RMrX,16), TNSZ("vpaddusw",VEX_RMrX,16), TNSZ("vpmaxub",VEX_RMrX,16), TNSZ("vpandn",VEX_RMrX,16), + +/* [E0] */ TNSZ("vpavgb",VEX_RMrX,16), TNSZ("vpsraw",VEX_RMrX,16), TNSZ("vpsrad",VEX_RMrX,16), TNSZ("vpavgw",VEX_RMrX,16), +/* [E4] */ TNSZ("vpmulhuw",VEX_RMrX,16), TNSZ("vpmulhw",VEX_RMrX,16), TNSZ("vcvttpd2dq",VEX_MX,16),TNSZ("vmovntdq",VEX_RM,16), +/* [E8] */ TNSZ("vpsubsb",VEX_RMrX,16), TNSZ("vpsubsw",VEX_RMrX,16), TNSZ("vpminsw",VEX_RMrX,16), TNSZ("vpor",VEX_RMrX,16), +/* [EC] */ TNSZ("vpaddsb",VEX_RMrX,16), TNSZ("vpaddsw",VEX_RMrX,16), TNSZ("vpmaxsw",VEX_RMrX,16), TNSZ("vpxor",VEX_RMrX,16), + +/* [F0] */ INVALID, TNSZ("vpsllw",VEX_RMrX,16), TNSZ("vpslld",VEX_RMrX,16), TNSZ("vpsllq",VEX_RMrX,16), +/* [F4] */ TNSZ("vpmuludq",VEX_RMrX,16), TNSZ("vpmaddwd",VEX_RMrX,16), TNSZ("vpsadbw",VEX_RMrX,16), TNS("vmaskmovdqu",VEX_MX), +/* [F8] */ TNSZ("vpsubb",VEX_RMrX,16), TNSZ("vpsubw",VEX_RMrX,16), TNSZ("vpsubd",VEX_RMrX,16), TNSZ("vpsubq",VEX_RMrX,16), +/* [FC] */ TNSZ("vpaddb",VEX_RMrX,16), TNSZ("vpaddw",VEX_RMrX,16), TNSZ("vpaddd",VEX_RMrX,16), INVALID, +}; + +/* + * Decode table for SIMD instructions with the repnz (0xf2) prefix. + */ +const instable_t dis_opSIMDrepnz[256] = { +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [10] */ TNSZ("movsd",XMM,8), TNSZ("movsd",XMMS,8), TNSZ("movddup",XMM,8), INVALID, +/* [14] */ INVALID, INVALID, INVALID, INVALID, +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ INVALID, INVALID, INVALID, INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ INVALID, INVALID, TNSZ("cvtsi2sd",XMM3MX,4),TNSZ("movntsd",XMMMS,8), +/* [2C] */ TNSZ("cvttsd2si",XMMXM3,8),TNSZ("cvtsd2si",XMMXM3,8),INVALID, INVALID, + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ INVALID, TNSZ("sqrtsd",XMM,8), INVALID, INVALID, +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ TNSZ("addsd",XMM,8), TNSZ("mulsd",XMM,8), TNSZ("cvtsd2ss",XMM,8), INVALID, +/* [5C] */ TNSZ("subsd",XMM,8), TNSZ("minsd",XMM,8), TNSZ("divsd",XMM,8), TNSZ("maxsd",XMM,8), + +/* [60] */ INVALID, INVALID, INVALID, INVALID, +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, INVALID, + +/* [70] */ TNSZ("pshuflw",XMMP,16),INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ TNSZ("insertq",XMMX2I,16),TNSZ("insertq",XMM,8),INVALID, INVALID, +/* [7C] */ TNSZ("haddps",XMM,16), TNSZ("hsubps",XMM,16), INVALID, INVALID, + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, TNSZ("cmpsd",XMMP,8), INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ TNSZ("addsubps",XMM,16),INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, TNS("movdq2q",XMMXM), INVALID, +/* [D8] */ INVALID, INVALID, INVALID, INVALID, +/* [DC] */ INVALID, INVALID, INVALID, INVALID, + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, TNSZ("cvtpd2dq",XMM,16),INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, + +/* [F0] */ TNS("lddqu",XMMM), INVALID, INVALID, INVALID, +/* [F4] */ INVALID, INVALID, INVALID, INVALID, +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; + +const instable_t dis_opAVXF20F[256] = { +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [10] */ TNSZ("vmovsd",VEX_RMrX,8), TNSZ("vmovsd",VEX_RRX,8), TNSZ("vmovddup",VEX_MX,8), INVALID, +/* [14] */ INVALID, INVALID, INVALID, INVALID, +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ INVALID, INVALID, INVALID, INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ INVALID, INVALID, TNSZ("vcvtsi2sd",VEX_RMrX,4),INVALID, +/* [2C] */ TNSZ("vcvttsd2si",VEX_MR,8),TNSZ("vcvtsd2si",VEX_MR,8),INVALID, INVALID, + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ INVALID, TNSZ("vsqrtsd",VEX_RMrX,8), INVALID, INVALID, +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ TNSZ("vaddsd",VEX_RMrX,8), TNSZ("vmulsd",VEX_RMrX,8), TNSZ("vcvtsd2ss",VEX_RMrX,8), INVALID, +/* [5C] */ TNSZ("vsubsd",VEX_RMrX,8), TNSZ("vminsd",VEX_RMrX,8), TNSZ("vdivsd",VEX_RMrX,8), TNSZ("vmaxsd",VEX_RMrX,8), + +/* [60] */ INVALID, INVALID, INVALID, INVALID, +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, INVALID, + +/* [70] */ TNSZ("vpshuflw",VEX_MXI,16),INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ TNSZ("vhaddps",VEX_RMrX,8), TNSZ("vhsubps",VEX_RMrX,8), INVALID, INVALID, + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, TNSZ("vcmpsd",VEX_RMRX,8), INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ TNSZ("vaddsubps",VEX_RMrX,8), INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, INVALID, INVALID, +/* [D8] */ INVALID, INVALID, INVALID, INVALID, +/* [DC] */ INVALID, INVALID, INVALID, INVALID, + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, TNSZ("vcvtpd2dq",VEX_MX,16),INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, + +/* [F0] */ TNSZ("vlddqu",VEX_MX,16), INVALID, INVALID, INVALID, +/* [F4] */ INVALID, INVALID, INVALID, INVALID, +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; + +const instable_t dis_opAVXF20F3A[256] = { +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [10] */ INVALID, INVALID, INVALID, INVALID, +/* [14] */ INVALID, INVALID, INVALID, INVALID, +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ INVALID, INVALID, INVALID, INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ INVALID, INVALID, INVALID, INVALID, +/* [2C] */ INVALID, INVALID, INVALID, INVALID, + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ INVALID, INVALID, INVALID, INVALID, +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ INVALID, INVALID, INVALID, INVALID, +/* [5C] */ INVALID, INVALID, INVALID, INVALID, + +/* [60] */ INVALID, INVALID, INVALID, INVALID, +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, INVALID, + +/* [70] */ INVALID, INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, INVALID, INVALID, + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, INVALID, INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, INVALID, INVALID, +/* [D8] */ INVALID, INVALID, INVALID, INVALID, +/* [DC] */ INVALID, INVALID, INVALID, INVALID, + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, INVALID, INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, + +/* [F0] */ TNSZvr("rorx",VEX_MXI,6),INVALID, INVALID, INVALID, +/* [F4] */ INVALID, INVALID, INVALID, INVALID, +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; + +const instable_t dis_opAVXF20F38[256] = { +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [10] */ INVALID, INVALID, INVALID, INVALID, +/* [14] */ INVALID, INVALID, INVALID, INVALID, +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ INVALID, INVALID, INVALID, INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ INVALID, INVALID, INVALID, INVALID, +/* [2C] */ INVALID, INVALID, INVALID, INVALID, + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ INVALID, INVALID, INVALID, INVALID, +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ INVALID, INVALID, INVALID, INVALID, +/* [5C] */ INVALID, INVALID, INVALID, INVALID, + +/* [60] */ INVALID, INVALID, INVALID, INVALID, +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, INVALID, + +/* [70] */ INVALID, INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, INVALID, INVALID, + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, INVALID, INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, INVALID, INVALID, +/* [D8] */ INVALID, INVALID, INVALID, INVALID, +/* [DC] */ INVALID, INVALID, INVALID, INVALID, + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, INVALID, INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, + +/* [F0] */ INVALID, INVALID, INVALID, INVALID, +/* [F4] */ INVALID, TNSZvr("pdep",VEX_RMrX,5),TNSZvr("mulx",VEX_RMrX,5),TNSZvr("shrx",VEX_VRMrX,5), +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; + +const instable_t dis_opAVXF30F38[256] = { +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [10] */ INVALID, INVALID, INVALID, INVALID, +/* [14] */ INVALID, INVALID, INVALID, INVALID, +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ INVALID, INVALID, INVALID, INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ INVALID, INVALID, INVALID, INVALID, +/* [2C] */ INVALID, INVALID, INVALID, INVALID, + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ INVALID, INVALID, INVALID, INVALID, +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ INVALID, INVALID, INVALID, INVALID, +/* [5C] */ INVALID, INVALID, INVALID, INVALID, + +/* [60] */ INVALID, INVALID, INVALID, INVALID, +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, INVALID, + +/* [70] */ INVALID, INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, INVALID, INVALID, + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, INVALID, INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, INVALID, INVALID, +/* [D8] */ INVALID, INVALID, INVALID, INVALID, +/* [DC] */ INVALID, INVALID, INVALID, INVALID, + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, INVALID, INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, + +/* [F0] */ INVALID, INVALID, INVALID, INVALID, +/* [F4] */ INVALID, TNSZvr("pext",VEX_RMrX,5),INVALID, TNSZvr("sarx",VEX_VRMrX,5), +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; +/* + * Decode table for SIMD instructions with the repz (0xf3) prefix. + */ +const instable_t dis_opSIMDrepz[256] = { +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [10] */ TNSZ("movss",XMM,4), TNSZ("movss",XMMS,4), TNSZ("movsldup",XMM,16),INVALID, +/* [14] */ INVALID, INVALID, TNSZ("movshdup",XMM,16),INVALID, +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ INVALID, INVALID, INVALID, INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ INVALID, INVALID, TNSZ("cvtsi2ss",XMM3MX,4),TNSZ("movntss",XMMMS,4), +/* [2C] */ TNSZ("cvttss2si",XMMXM3,4),TNSZ("cvtss2si",XMMXM3,4),INVALID, INVALID, + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ INVALID, TNSZ("sqrtss",XMM,4), TNSZ("rsqrtss",XMM,4), TNSZ("rcpss",XMM,4), +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ TNSZ("addss",XMM,4), TNSZ("mulss",XMM,4), TNSZ("cvtss2sd",XMM,4), TNSZ("cvttps2dq",XMM,16), +/* [5C] */ TNSZ("subss",XMM,4), TNSZ("minss",XMM,4), TNSZ("divss",XMM,4), TNSZ("maxss",XMM,4), + +/* [60] */ INVALID, INVALID, INVALID, INVALID, +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, TNSZ("movdqu",XMM,16), + +/* [70] */ TNSZ("pshufhw",XMMP,16),INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, TNSZ("movq",XMM,8), TNSZ("movdqu",XMMS,16), + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ TS("popcnt",MRw), INVALID, INVALID, INVALID, +/* [BC] */ TNSZ("tzcnt",MRw,5), TS("lzcnt",MRw), INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, TNSZ("cmpss",XMMP,4), INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, TNS("movq2dq",XMMMX), INVALID, +/* [D8] */ INVALID, INVALID, INVALID, INVALID, +/* [DC] */ INVALID, INVALID, INVALID, INVALID, + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, TNSZ("cvtdq2pd",XMM,8), INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, + +/* [F0] */ INVALID, INVALID, INVALID, INVALID, +/* [F4] */ INVALID, INVALID, INVALID, INVALID, +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; + +const instable_t dis_opAVXF30F[256] = { +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [10] */ TNSZ("vmovss",VEX_RMrX,4), TNSZ("vmovss",VEX_RRX,4), TNSZ("vmovsldup",VEX_MX,4), INVALID, +/* [14] */ INVALID, INVALID, TNSZ("vmovshdup",VEX_MX,4), INVALID, +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ INVALID, INVALID, INVALID, INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ INVALID, INVALID, TNSZ("vcvtsi2ss",VEX_RMrX,4),INVALID, +/* [2C] */ TNSZ("vcvttss2si",VEX_MR,4),TNSZ("vcvtss2si",VEX_MR,4),INVALID, INVALID, + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ INVALID, TNSZ("vsqrtss",VEX_RMrX,4), TNSZ("vrsqrtss",VEX_RMrX,4), TNSZ("vrcpss",VEX_RMrX,4), +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ TNSZ("vaddss",VEX_RMrX,4), TNSZ("vmulss",VEX_RMrX,4), TNSZ("vcvtss2sd",VEX_RMrX,4), TNSZ("vcvttps2dq",VEX_MX,16), +/* [5C] */ TNSZ("vsubss",VEX_RMrX,4), TNSZ("vminss",VEX_RMrX,4), TNSZ("vdivss",VEX_RMrX,4), TNSZ("vmaxss",VEX_RMrX,4), + +/* [60] */ INVALID, INVALID, INVALID, INVALID, +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, TNSZ("vmovdqu",VEX_MX,16), + +/* [70] */ TNSZ("vpshufhw",VEX_MXI,16),INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, TNSZ("vmovq",VEX_MX,8), TNSZ("vmovdqu",VEX_RX,16), + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, TNSZ("vcmpss",VEX_RMRX,4), INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, INVALID, INVALID, +/* [D8] */ INVALID, INVALID, INVALID, INVALID, +/* [DC] */ INVALID, INVALID, INVALID, INVALID, + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, TNSZ("vcvtdq2pd",VEX_MX,8), INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, + +/* [F0] */ INVALID, INVALID, INVALID, INVALID, +/* [F4] */ INVALID, INVALID, INVALID, INVALID, +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; +/* + * The following two tables are used to encode crc32 and movbe + * since they share the same opcodes. + */ +const instable_t dis_op0F38F0[2] = { +/* [00] */ TNS("crc32b",CRC32), + TS("movbe",MOVBE), +}; + +const instable_t dis_op0F38F1[2] = { +/* [00] */ TS("crc32",CRC32), + TS("movbe",MOVBE), +}; + +/* + * The following table is used to distinguish between adox and adcx which share + * the same opcodes. + */ +const instable_t dis_op0F38F6[2] = { +/* [00] */ TNS("adcx",ADX), + TNS("adox",ADX), +}; + +const instable_t dis_op0F38[256] = { +/* [00] */ TNSZ("pshufb",XMM_66o,16),TNSZ("phaddw",XMM_66o,16),TNSZ("phaddd",XMM_66o,16),TNSZ("phaddsw",XMM_66o,16), +/* [04] */ TNSZ("pmaddubsw",XMM_66o,16),TNSZ("phsubw",XMM_66o,16), TNSZ("phsubd",XMM_66o,16),TNSZ("phsubsw",XMM_66o,16), +/* [08] */ TNSZ("psignb",XMM_66o,16),TNSZ("psignw",XMM_66o,16),TNSZ("psignd",XMM_66o,16),TNSZ("pmulhrsw",XMM_66o,16), +/* [0C] */ INVALID, INVALID, INVALID, INVALID, + +/* [10] */ TNSZ("pblendvb",XMM_66r,16),INVALID, INVALID, INVALID, +/* [14] */ TNSZ("blendvps",XMM_66r,16),TNSZ("blendvpd",XMM_66r,16),INVALID, TNSZ("ptest",XMM_66r,16), +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ TNSZ("pabsb",XMM_66o,16),TNSZ("pabsw",XMM_66o,16),TNSZ("pabsd",XMM_66o,16),INVALID, + +/* [20] */ TNSZ("pmovsxbw",XMM_66r,16),TNSZ("pmovsxbd",XMM_66r,16),TNSZ("pmovsxbq",XMM_66r,16),TNSZ("pmovsxwd",XMM_66r,16), +/* [24] */ TNSZ("pmovsxwq",XMM_66r,16),TNSZ("pmovsxdq",XMM_66r,16),INVALID, INVALID, +/* [28] */ TNSZ("pmuldq",XMM_66r,16),TNSZ("pcmpeqq",XMM_66r,16),TNSZ("movntdqa",XMMM_66r,16),TNSZ("packusdw",XMM_66r,16), +/* [2C] */ INVALID, INVALID, INVALID, INVALID, + +/* [30] */ TNSZ("pmovzxbw",XMM_66r,16),TNSZ("pmovzxbd",XMM_66r,16),TNSZ("pmovzxbq",XMM_66r,16),TNSZ("pmovzxwd",XMM_66r,16), +/* [34] */ TNSZ("pmovzxwq",XMM_66r,16),TNSZ("pmovzxdq",XMM_66r,16),INVALID, TNSZ("pcmpgtq",XMM_66r,16), +/* [38] */ TNSZ("pminsb",XMM_66r,16),TNSZ("pminsd",XMM_66r,16),TNSZ("pminuw",XMM_66r,16),TNSZ("pminud",XMM_66r,16), +/* [3C] */ TNSZ("pmaxsb",XMM_66r,16),TNSZ("pmaxsd",XMM_66r,16),TNSZ("pmaxuw",XMM_66r,16),TNSZ("pmaxud",XMM_66r,16), + +/* [40] */ TNSZ("pmulld",XMM_66r,16),TNSZ("phminposuw",XMM_66r,16),INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ INVALID, INVALID, INVALID, INVALID, +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ INVALID, INVALID, INVALID, INVALID, +/* [5C] */ INVALID, INVALID, INVALID, INVALID, + +/* [60] */ INVALID, INVALID, INVALID, INVALID, +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, INVALID, + +/* [70] */ INVALID, INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, INVALID, INVALID, + +/* [80] */ TNSy("invept", RM_66r), TNSy("invvpid", RM_66r),TNSy("invpcid", RM_66r),INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [8C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, INVALID, INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ TNSZ("sha1nexte",XMM,16),TNSZ("sha1msg1",XMM,16),TNSZ("sha1msg2",XMM,16),TNSZ("sha256rnds2",XMM,16), +/* [CC] */ TNSZ("sha256msg1",XMM,16),TNSZ("sha256msg2",XMM,16),INVALID, INVALID, + +/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, INVALID, INVALID, +/* [D8] */ INVALID, INVALID, INVALID, TNSZ("aesimc",XMM_66r,16), +/* [DC] */ TNSZ("aesenc",XMM_66r,16),TNSZ("aesenclast",XMM_66r,16),TNSZ("aesdec",XMM_66r,16),TNSZ("aesdeclast",XMM_66r,16), + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, INVALID, INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, +/* [F0] */ IND(dis_op0F38F0), IND(dis_op0F38F1), INVALID, INVALID, +/* [F4] */ INVALID, INVALID, IND(dis_op0F38F6), INVALID, +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; + +const instable_t dis_opAVX660F38[256] = { +/* [00] */ TNSZ("vpshufb",VEX_RMrX,16),TNSZ("vphaddw",VEX_RMrX,16),TNSZ("vphaddd",VEX_RMrX,16),TNSZ("vphaddsw",VEX_RMrX,16), +/* [04] */ TNSZ("vpmaddubsw",VEX_RMrX,16),TNSZ("vphsubw",VEX_RMrX,16), TNSZ("vphsubd",VEX_RMrX,16),TNSZ("vphsubsw",VEX_RMrX,16), +/* [08] */ TNSZ("vpsignb",VEX_RMrX,16),TNSZ("vpsignw",VEX_RMrX,16),TNSZ("vpsignd",VEX_RMrX,16),TNSZ("vpmulhrsw",VEX_RMrX,16), +/* [0C] */ TNSZ("vpermilps",VEX_RMrX,8),TNSZ("vpermilpd",VEX_RMrX,16),TNSZ("vtestps",VEX_RRI,8), TNSZ("vtestpd",VEX_RRI,16), + +/* [10] */ INVALID, INVALID, INVALID, TNSZ("vcvtph2ps",VEX_MX,16), +/* [14] */ INVALID, INVALID, TNSZ("vpermps",VEX_RMrX,16),TNSZ("vptest",VEX_RRI,16), +/* [18] */ TNSZ("vbroadcastss",VEX_MX,4),TNSZ("vbroadcastsd",VEX_MX,8),TNSZ("vbroadcastf128",VEX_MX,16),INVALID, +/* [1C] */ TNSZ("vpabsb",VEX_MX,16),TNSZ("vpabsw",VEX_MX,16),TNSZ("vpabsd",VEX_MX,16),INVALID, + +/* [20] */ TNSZ("vpmovsxbw",VEX_MX,16),TNSZ("vpmovsxbd",VEX_MX,16),TNSZ("vpmovsxbq",VEX_MX,16),TNSZ("vpmovsxwd",VEX_MX,16), +/* [24] */ TNSZ("vpmovsxwq",VEX_MX,16),TNSZ("vpmovsxdq",VEX_MX,16),INVALID, INVALID, +/* [28] */ TNSZ("vpmuldq",VEX_RMrX,16),TNSZ("vpcmpeqq",VEX_RMrX,16),TNSZ("vmovntdqa",VEX_MX,16),TNSZ("vpackusdw",VEX_RMrX,16), +/* [2C] */ TNSZ("vmaskmovps",VEX_RMrX,8),TNSZ("vmaskmovpd",VEX_RMrX,16),TNSZ("vmaskmovps",VEX_RRM,8),TNSZ("vmaskmovpd",VEX_RRM,16), + +/* [30] */ TNSZ("vpmovzxbw",VEX_MX,16),TNSZ("vpmovzxbd",VEX_MX,16),TNSZ("vpmovzxbq",VEX_MX,16),TNSZ("vpmovzxwd",VEX_MX,16), +/* [34] */ TNSZ("vpmovzxwq",VEX_MX,16),TNSZ("vpmovzxdq",VEX_MX,16),TNSZ("vpermd",VEX_RMrX,16),TNSZ("vpcmpgtq",VEX_RMrX,16), +/* [38] */ TNSZ("vpminsb",VEX_RMrX,16),TNSZ("vpminsd",VEX_RMrX,16),TNSZ("vpminuw",VEX_RMrX,16),TNSZ("vpminud",VEX_RMrX,16), +/* [3C] */ TNSZ("vpmaxsb",VEX_RMrX,16),TNSZ("vpmaxsd",VEX_RMrX,16),TNSZ("vpmaxuw",VEX_RMrX,16),TNSZ("vpmaxud",VEX_RMrX,16), + +/* [40] */ TNSZ("vpmulld",VEX_RMrX,16),TNSZ("vphminposuw",VEX_MX,16),INVALID, INVALID, +/* [44] */ INVALID, TSaZ("vpsrlv",VEX_RMrX,16),TNSZ("vpsravd",VEX_RMrX,16),TSaZ("vpsllv",VEX_RMrX,16), +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ INVALID, INVALID, INVALID, INVALID, +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ TNSZ("vpbroadcastd",VEX_MX,16),TNSZ("vpbroadcastq",VEX_MX,16),TNSZ("vbroadcasti128",VEX_MX,16),INVALID, +/* [5C] */ INVALID, INVALID, INVALID, INVALID, + +/* [60] */ INVALID, INVALID, INVALID, INVALID, +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, INVALID, + +/* [70] */ INVALID, INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ TNSZ("vpbroadcastb",VEX_MX,16),TNSZ("vpbroadcastw",VEX_MX,16),INVALID, INVALID, +/* [7C] */ INVALID, INVALID, INVALID, INVALID, + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [8C] */ TSaZ("vpmaskmov",VEX_RMrX,16),INVALID, TSaZ("vpmaskmov",VEX_RRM,16),INVALID, + +/* [90] */ TNSZ("vpgatherd",VEX_SbVM,16),TNSZ("vpgatherq",VEX_SbVM,16),TNSZ("vgatherdp",VEX_SbVM,16),TNSZ("vgatherqp",VEX_SbVM,16), +/* [94] */ INVALID, INVALID, TNSZ("vfmaddsub132p",FMA,16),TNSZ("vfmsubadd132p",FMA,16), +/* [98] */ TNSZ("vfmadd132p",FMA,16),TNSZ("vfmadd132s",FMA,16),TNSZ("vfmsub132p",FMA,16),TNSZ("vfmsub132s",FMA,16), +/* [9C] */ TNSZ("vfnmadd132p",FMA,16),TNSZ("vfnmadd132s",FMA,16),TNSZ("vfnmsub132p",FMA,16),TNSZ("vfnmsub132s",FMA,16), + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, TNSZ("vfmaddsub213p",FMA,16),TNSZ("vfmsubadd213p",FMA,16), +/* [A8] */ TNSZ("vfmadd213p",FMA,16),TNSZ("vfmadd213s",FMA,16),TNSZ("vfmsub213p",FMA,16),TNSZ("vfmsub213s",FMA,16), +/* [AC] */ TNSZ("vfnmadd213p",FMA,16),TNSZ("vfnmadd213s",FMA,16),TNSZ("vfnmsub213p",FMA,16),TNSZ("vfnmsub213s",FMA,16), + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, TNSZ("vfmaddsub231p",FMA,16),TNSZ("vfmsubadd231p",FMA,16), +/* [B8] */ TNSZ("vfmadd231p",FMA,16),TNSZ("vfmadd231s",FMA,16),TNSZ("vfmsub231p",FMA,16),TNSZ("vfmsub231s",FMA,16), +/* [BC] */ TNSZ("vfnmadd231p",FMA,16),TNSZ("vfnmadd231s",FMA,16),TNSZ("vfnmsub231p",FMA,16),TNSZ("vfnmsub231s",FMA,16), + +/* [C0] */ INVALID, INVALID, INVALID, INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, INVALID, INVALID, +/* [D8] */ INVALID, INVALID, INVALID, TNSZ("vaesimc",VEX_MX,16), +/* [DC] */ TNSZ("vaesenc",VEX_RMrX,16),TNSZ("vaesenclast",VEX_RMrX,16),TNSZ("vaesdec",VEX_RMrX,16),TNSZ("vaesdeclast",VEX_RMrX,16), + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, INVALID, INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, +/* [F0] */ IND(dis_op0F38F0), IND(dis_op0F38F1), INVALID, INVALID, +/* [F4] */ INVALID, INVALID, INVALID, TNSZvr("shlx",VEX_VRMrX,5), +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; + +const instable_t dis_op0F3A[256] = { +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ TNSZ("roundps",XMMP_66r,16),TNSZ("roundpd",XMMP_66r,16),TNSZ("roundss",XMMP_66r,16),TNSZ("roundsd",XMMP_66r,16), +/* [0C] */ TNSZ("blendps",XMMP_66r,16),TNSZ("blendpd",XMMP_66r,16),TNSZ("pblendw",XMMP_66r,16),TNSZ("palignr",XMMP_66o,16), + +/* [10] */ INVALID, INVALID, INVALID, INVALID, +/* [14] */ TNSZ("pextrb",XMM3PM_66r,8),TNSZ("pextrw",XMM3PM_66r,16),TSZ("pextr",XMM3PM_66r,16),TNSZ("extractps",XMM3PM_66r,16), +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, + +/* [20] */ TNSZ("pinsrb",XMMPRM_66r,8),TNSZ("insertps",XMMP_66r,16),TSZ("pinsr",XMMPRM_66r,16),INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ INVALID, INVALID, INVALID, INVALID, +/* [2C] */ INVALID, INVALID, INVALID, INVALID, + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ TNSZ("dpps",XMMP_66r,16),TNSZ("dppd",XMMP_66r,16),TNSZ("mpsadbw",XMMP_66r,16),INVALID, +/* [44] */ TNSZ("pclmulqdq",XMMP_66r,16),INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, + +/* [50] */ INVALID, INVALID, INVALID, INVALID, +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ INVALID, INVALID, INVALID, INVALID, +/* [5C] */ INVALID, INVALID, INVALID, INVALID, + +/* [60] */ TNSZ("pcmpestrm",XMMP_66r,16),TNSZ("pcmpestri",XMMP_66r,16),TNSZ("pcmpistrm",XMMP_66r,16),TNSZ("pcmpistri",XMMP_66r,16), +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, INVALID, + +/* [70] */ INVALID, INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, INVALID, INVALID, + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [8C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, INVALID, INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ TNSZ("sha1rnds4",XMMP,16),INVALID, INVALID, INVALID, + +/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, INVALID, INVALID, +/* [D8] */ INVALID, INVALID, INVALID, INVALID, +/* [DC] */ INVALID, INVALID, INVALID, TNSZ("aeskeygenassist",XMMP_66r,16), + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, INVALID, INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, + +/* [F0] */ INVALID, INVALID, INVALID, INVALID, +/* [F4] */ INVALID, INVALID, INVALID, INVALID, +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; + +const instable_t dis_opAVX660F3A[256] = { +/* [00] */ TNSZ("vpermq",VEX_MXI,16),TNSZ("vpermpd",VEX_MXI,16),TNSZ("vpblendd",VEX_RMRX,16),INVALID, +/* [04] */ TNSZ("vpermilps",VEX_MXI,8),TNSZ("vpermilpd",VEX_MXI,16),TNSZ("vperm2f128",VEX_RMRX,16),INVALID, +/* [08] */ TNSZ("vroundps",VEX_MXI,16),TNSZ("vroundpd",VEX_MXI,16),TNSZ("vroundss",VEX_RMRX,16),TNSZ("vroundsd",VEX_RMRX,16), +/* [0C] */ TNSZ("vblendps",VEX_RMRX,16),TNSZ("vblendpd",VEX_RMRX,16),TNSZ("vpblendw",VEX_RMRX,16),TNSZ("vpalignr",VEX_RMRX,16), + +/* [10] */ INVALID, INVALID, INVALID, INVALID, +/* [14] */ TNSZ("vpextrb",VEX_RRi,8),TNSZ("vpextrw",VEX_RRi,16),TNSZ("vpextrd",VEX_RRi,16),TNSZ("vextractps",VEX_RM,16), +/* [18] */ TNSZ("vinsertf128",VEX_RMRX,16),TNSZ("vextractf128",VEX_RX,16),INVALID, INVALID, +/* [1C] */ INVALID, TNSZ("vcvtps2ph",VEX_RX,16), INVALID, INVALID, + +/* [20] */ TNSZ("vpinsrb",VEX_RMRX,8),TNSZ("vinsertps",VEX_RMRX,16),TNSZ("vpinsrd",VEX_RMRX,16),INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ INVALID, INVALID, INVALID, INVALID, +/* [2C] */ INVALID, INVALID, INVALID, INVALID, + +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ TNSZ("vinserti128",VEX_RMRX,16),TNSZ("vextracti128",VEX_RIM,16),INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, + +/* [40] */ TNSZ("vdpps",VEX_RMRX,16),TNSZ("vdppd",VEX_RMRX,16),TNSZ("vmpsadbw",VEX_RMRX,16),INVALID, +/* [44] */ TNSZ("vpclmulqdq",VEX_RMRX,16),INVALID, TNSZ("vperm2i128",VEX_RMRX,16),INVALID, +/* [48] */ INVALID, INVALID, TNSZ("vblendvps",VEX_RMRX,8), TNSZ("vblendvpd",VEX_RMRX,16), +/* [4C] */ TNSZ("vpblendvb",VEX_RMRX,16),INVALID, INVALID, INVALID, + +/* [50] */ INVALID, INVALID, INVALID, INVALID, +/* [54] */ INVALID, INVALID, INVALID, INVALID, +/* [58] */ INVALID, INVALID, INVALID, INVALID, +/* [5C] */ INVALID, INVALID, INVALID, INVALID, + +/* [60] */ TNSZ("vpcmpestrm",VEX_MXI,16),TNSZ("vpcmpestri",VEX_MXI,16),TNSZ("vpcmpistrm",VEX_MXI,16),TNSZ("vpcmpistri",VEX_MXI,16), +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, INVALID, + +/* [70] */ INVALID, INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, INVALID, +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, INVALID, INVALID, + +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [8C] */ INVALID, INVALID, INVALID, INVALID, + +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, + +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, INVALID, INVALID, + +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, + +/* [C0] */ INVALID, INVALID, INVALID, INVALID, +/* [C4] */ INVALID, INVALID, INVALID, INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, + +/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, INVALID, INVALID, +/* [D8] */ INVALID, INVALID, INVALID, INVALID, +/* [DC] */ INVALID, INVALID, INVALID, TNSZ("vaeskeygenassist",VEX_MXI,16), + +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, INVALID, INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, + +/* [F0] */ INVALID, INVALID, INVALID, INVALID, +/* [F4] */ INVALID, INVALID, INVALID, INVALID, +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +}; + +/* + * Decode table for 0x0F0D which uses the first byte of the mod_rm to + * indicate a sub-code. + */ +const instable_t dis_op0F0D[8] = { +/* [00] */ INVALID, TNS("prefetchw",PREF), TNS("prefetchwt1",PREF),INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +}; + +/* + * Decode table for 0x0F opcodes + */ + +const instable_t dis_op0F[16][16] = { +{ +/* [00] */ IND(dis_op0F00), IND(dis_op0F01), TNS("lar",MR), TNS("lsl",MR), +/* [04] */ INVALID, TNS("syscall",NORM), TNS("clts",NORM), TNS("sysret",NORM), +/* [08] */ TNS("invd",NORM), TNS("wbinvd",NORM), INVALID, TNS("ud2",NORM), +/* [0C] */ INVALID, IND(dis_op0F0D), INVALID, INVALID, +}, { +/* [10] */ TNSZ("movups",XMMO,16), TNSZ("movups",XMMOS,16),TNSZ("movlps",XMMO,8), TNSZ("movlps",XMMOS,8), +/* [14] */ TNSZ("unpcklps",XMMO,16),TNSZ("unpckhps",XMMO,16),TNSZ("movhps",XMMOM,8),TNSZ("movhps",XMMOMS,8), +/* [18] */ IND(dis_op0F18), INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, TS("nop",Mw), +}, { +/* [20] */ TSy("mov",SREG), TSy("mov",SREG), TSy("mov",SREG), TSy("mov",SREG), +/* [24] */ TSx("mov",SREG), INVALID, TSx("mov",SREG), INVALID, +/* [28] */ TNSZ("movaps",XMMO,16), TNSZ("movaps",XMMOS,16),TNSZ("cvtpi2ps",XMMOMX,8),TNSZ("movntps",XMMOS,16), +/* [2C] */ TNSZ("cvttps2pi",XMMOXMM,8),TNSZ("cvtps2pi",XMMOXMM,8),TNSZ("ucomiss",XMMO,4),TNSZ("comiss",XMMO,4), +}, { +/* [30] */ TNS("wrmsr",NORM), TNS("rdtsc",NORM), TNS("rdmsr",NORM), TNS("rdpmc",NORM), +/* [34] */ TNSx("sysenter",NORM), TNSx("sysexit",NORM), INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [40] */ TS("cmovx.o",MR), TS("cmovx.no",MR), TS("cmovx.b",MR), TS("cmovx.ae",MR), +/* [44] */ TS("cmovx.e",MR), TS("cmovx.ne",MR), TS("cmovx.be",MR), TS("cmovx.a",MR), +/* [48] */ TS("cmovx.s",MR), TS("cmovx.ns",MR), TS("cmovx.pe",MR), TS("cmovx.po",MR), +/* [4C] */ TS("cmovx.l",MR), TS("cmovx.ge",MR), TS("cmovx.le",MR), TS("cmovx.g",MR), +}, { +/* [50] */ TNS("movmskps",XMMOX3), TNSZ("sqrtps",XMMO,16), TNSZ("rsqrtps",XMMO,16),TNSZ("rcpps",XMMO,16), +/* [54] */ TNSZ("andps",XMMO,16), TNSZ("andnps",XMMO,16), TNSZ("orps",XMMO,16), TNSZ("xorps",XMMO,16), +/* [58] */ TNSZ("addps",XMMO,16), TNSZ("mulps",XMMO,16), TNSZ("cvtps2pd",XMMO,8),TNSZ("cvtdq2ps",XMMO,16), +/* [5C] */ TNSZ("subps",XMMO,16), TNSZ("minps",XMMO,16), TNSZ("divps",XMMO,16), TNSZ("maxps",XMMO,16), +}, { +/* [60] */ TNSZ("punpcklbw",MMO,4),TNSZ("punpcklwd",MMO,4),TNSZ("punpckldq",MMO,4),TNSZ("packsswb",MMO,8), +/* [64] */ TNSZ("pcmpgtb",MMO,8), TNSZ("pcmpgtw",MMO,8), TNSZ("pcmpgtd",MMO,8), TNSZ("packuswb",MMO,8), +/* [68] */ TNSZ("punpckhbw",MMO,8),TNSZ("punpckhwd",MMO,8),TNSZ("punpckhdq",MMO,8),TNSZ("packssdw",MMO,8), +/* [6C] */ TNSZ("INVALID",MMO,0), TNSZ("INVALID",MMO,0), TNSZ("movd",MMO,4), TNSZ("movq",MMO,8), +}, { +/* [70] */ TNSZ("pshufw",MMOPM,8), TNS("psrXXX",MR), TNS("psrXXX",MR), TNS("psrXXX",MR), +/* [74] */ TNSZ("pcmpeqb",MMO,8), TNSZ("pcmpeqw",MMO,8), TNSZ("pcmpeqd",MMO,8), TNS("emms",NORM), +/* [78] */ TNSy("vmread",RM), TNSy("vmwrite",MR), INVALID, INVALID, +/* [7C] */ INVALID, INVALID, TNSZ("movd",MMOS,4), TNSZ("movq",MMOS,8), +}, { +/* [80] */ TNS("jo",D), TNS("jno",D), TNS("jb",D), TNS("jae",D), +/* [84] */ TNS("je",D), TNS("jne",D), TNS("jbe",D), TNS("ja",D), +/* [88] */ TNS("js",D), TNS("jns",D), TNS("jp",D), TNS("jnp",D), +/* [8C] */ TNS("jl",D), TNS("jge",D), TNS("jle",D), TNS("jg",D), +}, { +/* [90] */ TNS("seto",Mb), TNS("setno",Mb), TNS("setb",Mb), TNS("setae",Mb), +/* [94] */ TNS("sete",Mb), TNS("setne",Mb), TNS("setbe",Mb), TNS("seta",Mb), +/* [98] */ TNS("sets",Mb), TNS("setns",Mb), TNS("setp",Mb), TNS("setnp",Mb), +/* [9C] */ TNS("setl",Mb), TNS("setge",Mb), TNS("setle",Mb), TNS("setg",Mb), +}, { +/* [A0] */ TSp("push",LSEG), TSp("pop",LSEG), TNS("cpuid",NORM), TS("bt",RMw), +/* [A4] */ TS("shld",DSHIFT), TS("shld",DSHIFTcl), INVALID, INVALID, +/* [A8] */ TSp("push",LSEG), TSp("pop",LSEG), TNS("rsm",NORM), TS("bts",RMw), +/* [AC] */ TS("shrd",DSHIFT), TS("shrd",DSHIFTcl), IND(dis_op0FAE), TS("imul",MRw), +}, { +/* [B0] */ TNS("cmpxchgb",RMw), TS("cmpxchg",RMw), TS("lss",MR), TS("btr",RMw), +/* [B4] */ TS("lfs",MR), TS("lgs",MR), TS("movzb",MOVZ), TNS("movzwl",MOVZ), +/* [B8] */ TNS("INVALID",MRw), INVALID, IND(dis_op0FBA), TS("btc",RMw), +/* [BC] */ TS("bsf",MRw), TS("bsr",MRw), TS("movsb",MOVZ), TNS("movswl",MOVZ), +}, { +/* [C0] */ TNS("xaddb",XADDB), TS("xadd",RMw), TNSZ("cmpps",XMMOPM,16),TNS("movnti",RM), +/* [C4] */ TNSZ("pinsrw",MMOPRM,2),TNS("pextrw",MMO3P), TNSZ("shufps",XMMOPM,16),IND(dis_op0FC7), +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [D0] */ INVALID, TNSZ("psrlw",MMO,8), TNSZ("psrld",MMO,8), TNSZ("psrlq",MMO,8), +/* [D4] */ TNSZ("paddq",MMO,8), TNSZ("pmullw",MMO,8), TNSZ("INVALID",MMO,0), TNS("pmovmskb",MMOM3), +/* [D8] */ TNSZ("psubusb",MMO,8), TNSZ("psubusw",MMO,8), TNSZ("pminub",MMO,8), TNSZ("pand",MMO,8), +/* [DC] */ TNSZ("paddusb",MMO,8), TNSZ("paddusw",MMO,8), TNSZ("pmaxub",MMO,8), TNSZ("pandn",MMO,8), +}, { +/* [E0] */ TNSZ("pavgb",MMO,8), TNSZ("psraw",MMO,8), TNSZ("psrad",MMO,8), TNSZ("pavgw",MMO,8), +/* [E4] */ TNSZ("pmulhuw",MMO,8), TNSZ("pmulhw",MMO,8), TNS("INVALID",XMMO), TNSZ("movntq",MMOMS,8), +/* [E8] */ TNSZ("psubsb",MMO,8), TNSZ("psubsw",MMO,8), TNSZ("pminsw",MMO,8), TNSZ("por",MMO,8), +/* [EC] */ TNSZ("paddsb",MMO,8), TNSZ("paddsw",MMO,8), TNSZ("pmaxsw",MMO,8), TNSZ("pxor",MMO,8), +}, { +/* [F0] */ INVALID, TNSZ("psllw",MMO,8), TNSZ("pslld",MMO,8), TNSZ("psllq",MMO,8), +/* [F4] */ TNSZ("pmuludq",MMO,8), TNSZ("pmaddwd",MMO,8), TNSZ("psadbw",MMO,8), TNSZ("maskmovq",MMOIMPL,8), +/* [F8] */ TNSZ("psubb",MMO,8), TNSZ("psubw",MMO,8), TNSZ("psubd",MMO,8), TNSZ("psubq",MMO,8), +/* [FC] */ TNSZ("paddb",MMO,8), TNSZ("paddw",MMO,8), TNSZ("paddd",MMO,8), INVALID, +} }; + +const instable_t dis_opAVX0F[16][16] = { +{ +/* [00] */ INVALID, INVALID, INVALID, INVALID, +/* [04] */ INVALID, INVALID, INVALID, INVALID, +/* [08] */ INVALID, INVALID, INVALID, INVALID, +/* [0C] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [10] */ TNSZ("vmovups",VEX_MX,16), TNSZ("vmovups",VEX_RM,16),TNSZ("vmovlps",VEX_RMrX,8), TNSZ("vmovlps",VEX_RM,8), +/* [14] */ TNSZ("vunpcklps",VEX_RMrX,16),TNSZ("vunpckhps",VEX_RMrX,16),TNSZ("vmovhps",VEX_RMrX,8),TNSZ("vmovhps",VEX_RM,8), +/* [18] */ INVALID, INVALID, INVALID, INVALID, +/* [1C] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [20] */ INVALID, INVALID, INVALID, INVALID, +/* [24] */ INVALID, INVALID, INVALID, INVALID, +/* [28] */ TNSZ("vmovaps",VEX_MX,16), TNSZ("vmovaps",VEX_RX,16),INVALID, TNSZ("vmovntps",VEX_RM,16), +/* [2C] */ INVALID, INVALID, TNSZ("vucomiss",VEX_MX,4),TNSZ("vcomiss",VEX_MX,4), +}, { +/* [30] */ INVALID, INVALID, INVALID, INVALID, +/* [34] */ INVALID, INVALID, INVALID, INVALID, +/* [38] */ INVALID, INVALID, INVALID, INVALID, +/* [3C] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [40] */ INVALID, INVALID, INVALID, INVALID, +/* [44] */ INVALID, INVALID, INVALID, INVALID, +/* [48] */ INVALID, INVALID, INVALID, INVALID, +/* [4C] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [50] */ TNS("vmovmskps",VEX_MR), TNSZ("vsqrtps",VEX_MX,16), TNSZ("vrsqrtps",VEX_MX,16),TNSZ("vrcpps",VEX_MX,16), +/* [54] */ TNSZ("vandps",VEX_RMrX,16), TNSZ("vandnps",VEX_RMrX,16), TNSZ("vorps",VEX_RMrX,16), TNSZ("vxorps",VEX_RMrX,16), +/* [58] */ TNSZ("vaddps",VEX_RMrX,16), TNSZ("vmulps",VEX_RMrX,16), TNSZ("vcvtps2pd",VEX_MX,8),TNSZ("vcvtdq2ps",VEX_MX,16), +/* [5C] */ TNSZ("vsubps",VEX_RMrX,16), TNSZ("vminps",VEX_RMrX,16), TNSZ("vdivps",VEX_RMrX,16), TNSZ("vmaxps",VEX_RMrX,16), +}, { +/* [60] */ INVALID, INVALID, INVALID, INVALID, +/* [64] */ INVALID, INVALID, INVALID, INVALID, +/* [68] */ INVALID, INVALID, INVALID, INVALID, +/* [6C] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [70] */ INVALID, INVALID, INVALID, INVALID, +/* [74] */ INVALID, INVALID, INVALID, TNS("vzeroupper", VEX_NONE), +/* [78] */ INVALID, INVALID, INVALID, INVALID, +/* [7C] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [80] */ INVALID, INVALID, INVALID, INVALID, +/* [84] */ INVALID, INVALID, INVALID, INVALID, +/* [88] */ INVALID, INVALID, INVALID, INVALID, +/* [8C] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [90] */ INVALID, INVALID, INVALID, INVALID, +/* [94] */ INVALID, INVALID, INVALID, INVALID, +/* [98] */ INVALID, INVALID, INVALID, INVALID, +/* [9C] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [A0] */ INVALID, INVALID, INVALID, INVALID, +/* [A4] */ INVALID, INVALID, INVALID, INVALID, +/* [A8] */ INVALID, INVALID, INVALID, INVALID, +/* [AC] */ INVALID, INVALID, TNSZ("vldmxcsr",VEX_MO,2), INVALID, +}, { +/* [B0] */ INVALID, INVALID, INVALID, INVALID, +/* [B4] */ INVALID, INVALID, INVALID, INVALID, +/* [B8] */ INVALID, INVALID, INVALID, INVALID, +/* [BC] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [C0] */ INVALID, INVALID, TNSZ("vcmpps",VEX_RMRX,16),INVALID, +/* [C4] */ INVALID, INVALID, TNSZ("vshufps",VEX_RMRX,16),INVALID, +/* [C8] */ INVALID, INVALID, INVALID, INVALID, +/* [CC] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [D0] */ INVALID, INVALID, INVALID, INVALID, +/* [D4] */ INVALID, INVALID, INVALID, INVALID, +/* [D8] */ INVALID, INVALID, INVALID, INVALID, +/* [DC] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [E0] */ INVALID, INVALID, INVALID, INVALID, +/* [E4] */ INVALID, INVALID, INVALID, INVALID, +/* [E8] */ INVALID, INVALID, INVALID, INVALID, +/* [EC] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [F0] */ INVALID, INVALID, TNSZvr("andn",VEX_RMrX,5),TNSZvr("bls",BLS,5), +/* [F4] */ INVALID, TNSZvr("bzhi",VEX_VRMrX,5),INVALID, TNSZvr("bextr",VEX_VRMrX,5), +/* [F8] */ INVALID, INVALID, INVALID, INVALID, +/* [FC] */ INVALID, INVALID, INVALID, INVALID, +} }; + +/* + * Decode table for 0x80 opcodes + */ + +const instable_t dis_op80[8] = { + +/* [0] */ TNS("addb",IMlw), TNS("orb",IMw), TNS("adcb",IMlw), TNS("sbbb",IMlw), +/* [4] */ TNS("andb",IMw), TNS("subb",IMlw), TNS("xorb",IMw), TNS("cmpb",IMlw), +}; + + +/* + * Decode table for 0x81 opcodes. + */ + +const instable_t dis_op81[8] = { + +/* [0] */ TS("add",IMlw), TS("or",IMw), TS("adc",IMlw), TS("sbb",IMlw), +/* [4] */ TS("and",IMw), TS("sub",IMlw), TS("xor",IMw), TS("cmp",IMlw), +}; + + +/* + * Decode table for 0x82 opcodes. + */ + +const instable_t dis_op82[8] = { + +/* [0] */ TNSx("addb",IMlw), TNSx("orb",IMlw), TNSx("adcb",IMlw), TNSx("sbbb",IMlw), +/* [4] */ TNSx("andb",IMlw), TNSx("subb",IMlw), TNSx("xorb",IMlw), TNSx("cmpb",IMlw), +}; +/* + * Decode table for 0x83 opcodes. + */ + +const instable_t dis_op83[8] = { + +/* [0] */ TS("add",IMlw), TS("or",IMlw), TS("adc",IMlw), TS("sbb",IMlw), +/* [4] */ TS("and",IMlw), TS("sub",IMlw), TS("xor",IMlw), TS("cmp",IMlw), +}; + +/* + * Decode table for 0xC0 opcodes. + */ + +const instable_t dis_opC0[8] = { + +/* [0] */ TNS("rolb",MvI), TNS("rorb",MvI), TNS("rclb",MvI), TNS("rcrb",MvI), +/* [4] */ TNS("shlb",MvI), TNS("shrb",MvI), INVALID, TNS("sarb",MvI), +}; + +/* + * Decode table for 0xD0 opcodes. + */ + +const instable_t dis_opD0[8] = { + +/* [0] */ TNS("rolb",Mv), TNS("rorb",Mv), TNS("rclb",Mv), TNS("rcrb",Mv), +/* [4] */ TNS("shlb",Mv), TNS("shrb",Mv), TNS("salb",Mv), TNS("sarb",Mv), +}; + +/* + * Decode table for 0xC1 opcodes. + * 186 instruction set + */ + +const instable_t dis_opC1[8] = { + +/* [0] */ TS("rol",MvI), TS("ror",MvI), TS("rcl",MvI), TS("rcr",MvI), +/* [4] */ TS("shl",MvI), TS("shr",MvI), TS("sal",MvI), TS("sar",MvI), +}; + +/* + * Decode table for 0xD1 opcodes. + */ + +const instable_t dis_opD1[8] = { + +/* [0] */ TS("rol",Mv), TS("ror",Mv), TS("rcl",Mv), TS("rcr",Mv), +/* [4] */ TS("shl",Mv), TS("shr",Mv), TS("sal",Mv), TS("sar",Mv), +}; + + +/* + * Decode table for 0xD2 opcodes. + */ + +const instable_t dis_opD2[8] = { + +/* [0] */ TNS("rolb",Mv), TNS("rorb",Mv), TNS("rclb",Mv), TNS("rcrb",Mv), +/* [4] */ TNS("shlb",Mv), TNS("shrb",Mv), TNS("salb",Mv), TNS("sarb",Mv), +}; +/* + * Decode table for 0xD3 opcodes. + */ + +const instable_t dis_opD3[8] = { + +/* [0] */ TS("rol",Mv), TS("ror",Mv), TS("rcl",Mv), TS("rcr",Mv), +/* [4] */ TS("shl",Mv), TS("shr",Mv), TS("salb",Mv), TS("sar",Mv), +}; + + +/* + * Decode table for 0xF6 opcodes. + */ + +const instable_t dis_opF6[8] = { + +/* [0] */ TNS("testb",IMw), TNS("testb",IMw), TNS("notb",Mw), TNS("negb",Mw), +/* [4] */ TNS("mulb",MA), TNS("imulb",MA), TNS("divb",MA), TNS("idivb",MA), +}; + + +/* + * Decode table for 0xF7 opcodes. + */ + +const instable_t dis_opF7[8] = { + +/* [0] */ TS("test",IMw), TS("test",IMw), TS("not",Mw), TS("neg",Mw), +/* [4] */ TS("mul",MA), TS("imul",MA), TS("div",MA), TS("idiv",MA), +}; + + +/* + * Decode table for 0xFE opcodes. + */ + +const instable_t dis_opFE[8] = { + +/* [0] */ TNS("incb",Mw), TNS("decb",Mw), INVALID, INVALID, +/* [4] */ INVALID, INVALID, INVALID, INVALID, +}; +/* + * Decode table for 0xFF opcodes. + */ + +const instable_t dis_opFF[8] = { + +/* [0] */ TS("inc",Mw), TS("dec",Mw), TNSyp("call",INM), TNS("lcall",INM), +/* [4] */ TNSy("jmp",INM), TNS("ljmp",INM), TSp("push",M), INVALID, +}; + +/* for 287 instructions, which are a mess to decode */ + +const instable_t dis_opFP1n2[8][8] = { +{ +/* bit pattern: 1101 1xxx MODxx xR/M */ +/* [0,0] */ TNS("fadds",M), TNS("fmuls",M), TNS("fcoms",M), TNS("fcomps",M), +/* [0,4] */ TNS("fsubs",M), TNS("fsubrs",M), TNS("fdivs",M), TNS("fdivrs",M), +}, { +/* [1,0] */ TNS("flds",M), INVALID, TNS("fsts",M), TNS("fstps",M), +/* [1,4] */ TNSZ("fldenv",M,28), TNSZ("fldcw",M,2), TNSZ("fnstenv",M,28), TNSZ("fnstcw",M,2), +}, { +/* [2,0] */ TNS("fiaddl",M), TNS("fimull",M), TNS("ficoml",M), TNS("ficompl",M), +/* [2,4] */ TNS("fisubl",M), TNS("fisubrl",M), TNS("fidivl",M), TNS("fidivrl",M), +}, { +/* [3,0] */ TNS("fildl",M), TNSZ("tisttpl",M,4), TNS("fistl",M), TNS("fistpl",M), +/* [3,4] */ INVALID, TNSZ("fldt",M,10), INVALID, TNSZ("fstpt",M,10), +}, { +/* [4,0] */ TNSZ("faddl",M,8), TNSZ("fmull",M,8), TNSZ("fcoml",M,8), TNSZ("fcompl",M,8), +/* [4,1] */ TNSZ("fsubl",M,8), TNSZ("fsubrl",M,8), TNSZ("fdivl",M,8), TNSZ("fdivrl",M,8), +}, { +/* [5,0] */ TNSZ("fldl",M,8), TNSZ("fisttpll",M,8), TNSZ("fstl",M,8), TNSZ("fstpl",M,8), +/* [5,4] */ TNSZ("frstor",M,108), INVALID, TNSZ("fnsave",M,108), TNSZ("fnstsw",M,2), +}, { +/* [6,0] */ TNSZ("fiadd",M,2), TNSZ("fimul",M,2), TNSZ("ficom",M,2), TNSZ("ficomp",M,2), +/* [6,4] */ TNSZ("fisub",M,2), TNSZ("fisubr",M,2), TNSZ("fidiv",M,2), TNSZ("fidivr",M,2), +}, { +/* [7,0] */ TNSZ("fild",M,2), TNSZ("fisttp",M,2), TNSZ("fist",M,2), TNSZ("fistp",M,2), +/* [7,4] */ TNSZ("fbld",M,10), TNSZ("fildll",M,8), TNSZ("fbstp",M,10), TNSZ("fistpll",M,8), +} }; + +const instable_t dis_opFP3[8][8] = { +{ +/* bit pattern: 1101 1xxx 11xx xREG */ +/* [0,0] */ TNS("fadd",FF), TNS("fmul",FF), TNS("fcom",F), TNS("fcomp",F), +/* [0,4] */ TNS("fsub",FF), TNS("fsubr",FF), TNS("fdiv",FF), TNS("fdivr",FF), +}, { +/* [1,0] */ TNS("fld",F), TNS("fxch",F), TNS("fnop",NORM), TNS("fstp",F), +/* [1,4] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [2,0] */ INVALID, INVALID, INVALID, INVALID, +/* [2,4] */ INVALID, TNS("fucompp",NORM), INVALID, INVALID, +}, { +/* [3,0] */ INVALID, INVALID, INVALID, INVALID, +/* [3,4] */ INVALID, INVALID, INVALID, INVALID, +}, { +/* [4,0] */ TNS("fadd",FF), TNS("fmul",FF), TNS("fcom",F), TNS("fcomp",F), +/* [4,4] */ TNS("fsub",FF), TNS("fsubr",FF), TNS("fdiv",FF), TNS("fdivr",FF), +}, { +/* [5,0] */ TNS("ffree",F), TNS("fxch",F), TNS("fst",F), TNS("fstp",F), +/* [5,4] */ TNS("fucom",F), TNS("fucomp",F), INVALID, INVALID, +}, { +/* [6,0] */ TNS("faddp",FF), TNS("fmulp",FF), TNS("fcomp",F), TNS("fcompp",NORM), +/* [6,4] */ TNS("fsubp",FF), TNS("fsubrp",FF), TNS("fdivp",FF), TNS("fdivrp",FF), +}, { +/* [7,0] */ TNS("ffreep",F), TNS("fxch",F), TNS("fstp",F), TNS("fstp",F), +/* [7,4] */ TNS("fnstsw",M), TNS("fucomip",FFC), TNS("fcomip",FFC), INVALID, +} }; + +const instable_t dis_opFP4[4][8] = { +{ +/* bit pattern: 1101 1001 111x xxxx */ +/* [0,0] */ TNS("fchs",NORM), TNS("fabs",NORM), INVALID, INVALID, +/* [0,4] */ TNS("ftst",NORM), TNS("fxam",NORM), TNS("ftstp",NORM), INVALID, +}, { +/* [1,0] */ TNS("fld1",NORM), TNS("fldl2t",NORM), TNS("fldl2e",NORM), TNS("fldpi",NORM), +/* [1,4] */ TNS("fldlg2",NORM), TNS("fldln2",NORM), TNS("fldz",NORM), INVALID, +}, { +/* [2,0] */ TNS("f2xm1",NORM), TNS("fyl2x",NORM), TNS("fptan",NORM), TNS("fpatan",NORM), +/* [2,4] */ TNS("fxtract",NORM), TNS("fprem1",NORM), TNS("fdecstp",NORM), TNS("fincstp",NORM), +}, { +/* [3,0] */ TNS("fprem",NORM), TNS("fyl2xp1",NORM), TNS("fsqrt",NORM), TNS("fsincos",NORM), +/* [3,4] */ TNS("frndint",NORM), TNS("fscale",NORM), TNS("fsin",NORM), TNS("fcos",NORM), +} }; + +const instable_t dis_opFP5[8] = { +/* bit pattern: 1101 1011 111x xxxx */ +/* [0] */ TNS("feni",NORM), TNS("fdisi",NORM), TNS("fnclex",NORM), TNS("fninit",NORM), +/* [4] */ TNS("fsetpm",NORM), TNS("frstpm",NORM), INVALID, INVALID, +}; + +const instable_t dis_opFP6[8] = { +/* bit pattern: 1101 1011 11yy yxxx */ +/* [00] */ TNS("fcmov.nb",FF), TNS("fcmov.ne",FF), TNS("fcmov.nbe",FF), TNS("fcmov.nu",FF), +/* [04] */ INVALID, TNS("fucomi",F), TNS("fcomi",F), INVALID, +}; + +const instable_t dis_opFP7[8] = { +/* bit pattern: 1101 1010 11yy yxxx */ +/* [00] */ TNS("fcmov.b",FF), TNS("fcmov.e",FF), TNS("fcmov.be",FF), TNS("fcmov.u",FF), +/* [04] */ INVALID, INVALID, INVALID, INVALID, +}; + +/* + * Main decode table for the op codes. The first two nibbles + * will be used as an index into the table. If there is a + * a need to further decode an instruction, the array to be + * referenced is indicated with the other two entries being + * empty. + */ + +const instable_t dis_distable[16][16] = { +{ +/* [0,0] */ TNS("addb",RMw), TS("add",RMw), TNS("addb",MRw), TS("add",MRw), +/* [0,4] */ TNS("addb",IA), TS("add",IA), TSx("push",SEG), TSx("pop",SEG), +/* [0,8] */ TNS("orb",RMw), TS("or",RMw), TNS("orb",MRw), TS("or",MRw), +/* [0,C] */ TNS("orb",IA), TS("or",IA), TSx("push",SEG), IND(dis_op0F), +}, { +/* [1,0] */ TNS("adcb",RMw), TS("adc",RMw), TNS("adcb",MRw), TS("adc",MRw), +/* [1,4] */ TNS("adcb",IA), TS("adc",IA), TSx("push",SEG), TSx("pop",SEG), +/* [1,8] */ TNS("sbbb",RMw), TS("sbb",RMw), TNS("sbbb",MRw), TS("sbb",MRw), +/* [1,C] */ TNS("sbbb",IA), TS("sbb",IA), TSx("push",SEG), TSx("pop",SEG), +}, { +/* [2,0] */ TNS("andb",RMw), TS("and",RMw), TNS("andb",MRw), TS("and",MRw), +/* [2,4] */ TNS("andb",IA), TS("and",IA), TNSx("%es:",OVERRIDE), TNSx("daa",NORM), +/* [2,8] */ TNS("subb",RMw), TS("sub",RMw), TNS("subb",MRw), TS("sub",MRw), +/* [2,C] */ TNS("subb",IA), TS("sub",IA), TNS("%cs:",OVERRIDE), TNSx("das",NORM), +}, { +/* [3,0] */ TNS("xorb",RMw), TS("xor",RMw), TNS("xorb",MRw), TS("xor",MRw), +/* [3,4] */ TNS("xorb",IA), TS("xor",IA), TNSx("%ss:",OVERRIDE), TNSx("aaa",NORM), +/* [3,8] */ TNS("cmpb",RMw), TS("cmp",RMw), TNS("cmpb",MRw), TS("cmp",MRw), +/* [3,C] */ TNS("cmpb",IA), TS("cmp",IA), TNSx("%ds:",OVERRIDE), TNSx("aas",NORM), +}, { +/* [4,0] */ TSx("inc",R), TSx("inc",R), TSx("inc",R), TSx("inc",R), +/* [4,4] */ TSx("inc",R), TSx("inc",R), TSx("inc",R), TSx("inc",R), +/* [4,8] */ TSx("dec",R), TSx("dec",R), TSx("dec",R), TSx("dec",R), +/* [4,C] */ TSx("dec",R), TSx("dec",R), TSx("dec",R), TSx("dec",R), +}, { +/* [5,0] */ TSp("push",R), TSp("push",R), TSp("push",R), TSp("push",R), +/* [5,4] */ TSp("push",R), TSp("push",R), TSp("push",R), TSp("push",R), +/* [5,8] */ TSp("pop",R), TSp("pop",R), TSp("pop",R), TSp("pop",R), +/* [5,C] */ TSp("pop",R), TSp("pop",R), TSp("pop",R), TSp("pop",R), +}, { +/* [6,0] */ TSZx("pusha",IMPLMEM,28),TSZx("popa",IMPLMEM,28), TSx("bound",MR), TNS("arpl",RMw), +/* [6,4] */ TNS("%fs:",OVERRIDE), TNS("%gs:",OVERRIDE), TNS("data16",DM), TNS("addr16",AM), +/* [6,8] */ TSp("push",I), TS("imul",IMUL), TSp("push",Ib), TS("imul",IMUL), +/* [6,C] */ TNSZ("insb",IMPLMEM,1), TSZ("ins",IMPLMEM,4), TNSZ("outsb",IMPLMEM,1),TSZ("outs",IMPLMEM,4), +}, { +/* [7,0] */ TNSy("jo",BD), TNSy("jno",BD), TNSy("jb",BD), TNSy("jae",BD), +/* [7,4] */ TNSy("je",BD), TNSy("jne",BD), TNSy("jbe",BD), TNSy("ja",BD), +/* [7,8] */ TNSy("js",BD), TNSy("jns",BD), TNSy("jp",BD), TNSy("jnp",BD), +/* [7,C] */ TNSy("jl",BD), TNSy("jge",BD), TNSy("jle",BD), TNSy("jg",BD), +}, { +/* [8,0] */ IND(dis_op80), IND(dis_op81), INDx(dis_op82), IND(dis_op83), +/* [8,4] */ TNS("testb",RMw), TS("test",RMw), TNS("xchgb",RMw), TS("xchg",RMw), +/* [8,8] */ TNS("movb",RMw), TS("mov",RMw), TNS("movb",MRw), TS("mov",MRw), +/* [8,C] */ TNS("movw",SM), TS("lea",MR), TNS("movw",MS), TSp("pop",M), +}, { +/* [9,0] */ TNS("nop",NORM), TS("xchg",RA), TS("xchg",RA), TS("xchg",RA), +/* [9,4] */ TS("xchg",RA), TS("xchg",RA), TS("xchg",RA), TS("xchg",RA), +/* [9,8] */ TNS("cXtX",CBW), TNS("cXtX",CWD), TNSx("lcall",SO), TNS("fwait",NORM), +/* [9,C] */ TSZy("pushf",IMPLMEM,4),TSZy("popf",IMPLMEM,4), TNS("sahf",NORM), TNS("lahf",NORM), +}, { +/* [A,0] */ TNS("movb",OA), TS("mov",OA), TNS("movb",AO), TS("mov",AO), +/* [A,4] */ TNSZ("movsb",SD,1), TS("movs",SD), TNSZ("cmpsb",SD,1), TS("cmps",SD), +/* [A,8] */ TNS("testb",IA), TS("test",IA), TNS("stosb",AD), TS("stos",AD), +/* [A,C] */ TNS("lodsb",SA), TS("lods",SA), TNS("scasb",AD), TS("scas",AD), +}, { +/* [B,0] */ TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), +/* [B,4] */ TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), TNS("movb",IR), +/* [B,8] */ TS("mov",IR), TS("mov",IR), TS("mov",IR), TS("mov",IR), +/* [B,C] */ TS("mov",IR), TS("mov",IR), TS("mov",IR), TS("mov",IR), +}, { +/* [C,0] */ IND(dis_opC0), IND(dis_opC1), TNSyp("ret",RET), TNSyp("ret",NORM), +/* [C,4] */ TNSx("les",MR), TNSx("lds",MR), TNS("movb",IMw), TS("mov",IMw), +/* [C,8] */ TNSyp("enter",ENTER), TNSyp("leave",NORM), TNS("lret",RET), TNS("lret",NORM), +/* [C,C] */ TNS("int",INT3), TNS("int",INTx), TNSx("into",NORM), TNS("iret",NORM), +}, { +/* [D,0] */ IND(dis_opD0), IND(dis_opD1), IND(dis_opD2), IND(dis_opD3), +/* [D,4] */ TNSx("aam",U), TNSx("aad",U), TNSx("falc",NORM), TNSZ("xlat",IMPLMEM,1), + +/* 287 instructions. Note that although the indirect field */ +/* indicates opFP1n2 for further decoding, this is not necessarily */ +/* the case since the opFP arrays are not partitioned according to key1 */ +/* and key2. opFP1n2 is given only to indicate that we haven't */ +/* finished decoding the instruction. */ +/* [D,8] */ IND(dis_opFP1n2), IND(dis_opFP1n2), IND(dis_opFP1n2), IND(dis_opFP1n2), +/* [D,C] */ IND(dis_opFP1n2), IND(dis_opFP1n2), IND(dis_opFP1n2), IND(dis_opFP1n2), +}, { +/* [E,0] */ TNSy("loopnz",BD), TNSy("loopz",BD), TNSy("loop",BD), TNSy("jcxz",BD), +/* [E,4] */ TNS("inb",P), TS("in",P), TNS("outb",P), TS("out",P), +/* [E,8] */ TNSyp("call",D), TNSy("jmp",D), TNSx("ljmp",SO), TNSy("jmp",BD), +/* [E,C] */ TNS("inb",V), TS("in",V), TNS("outb",V), TS("out",V), +}, { +/* [F,0] */ TNS("lock",LOCK), TNS("icebp", NORM), TNS("repnz",PREFIX), TNS("repz",PREFIX), +/* [F,4] */ TNS("hlt",NORM), TNS("cmc",NORM), IND(dis_opF6), IND(dis_opF7), +/* [F,8] */ TNS("clc",NORM), TNS("stc",NORM), TNS("cli",NORM), TNS("sti",NORM), +/* [F,C] */ TNS("cld",NORM), TNS("std",NORM), IND(dis_opFE), IND(dis_opFF), +} }; + +/* END CSTYLED */ + +/* + * common functions to decode and disassemble an x86 or amd64 instruction + */ + +/* + * These are the individual fields of a REX prefix. Note that a REX + * prefix with none of these set is still needed to: + * - use the MOVSXD (sign extend 32 to 64 bits) instruction + * - access the %sil, %dil, %bpl, %spl registers + */ +#define REX_W 0x08 /* 64 bit operand size when set */ +#define REX_R 0x04 /* high order bit extension of ModRM reg field */ +#define REX_X 0x02 /* high order bit extension of SIB index field */ +#define REX_B 0x01 /* extends ModRM r_m, SIB base, or opcode reg */ + +/* + * These are the individual fields of a VEX prefix. + */ +#define VEX_R 0x08 /* REX.R in 1's complement form */ +#define VEX_X 0x04 /* REX.X in 1's complement form */ +#define VEX_B 0x02 /* REX.B in 1's complement form */ +/* Vector Length, 0: scalar or 128-bit vector, 1: 256-bit vector */ +#define VEX_L 0x04 +#define VEX_W 0x08 /* opcode specific, use like REX.W */ +#define VEX_m 0x1F /* VEX m-mmmm field */ +#define VEX_v 0x78 /* VEX register specifier */ +#define VEX_p 0x03 /* VEX pp field, opcode extension */ + +/* VEX m-mmmm field, only used by three bytes prefix */ +#define VEX_m_0F 0x01 /* implied 0F leading opcode byte */ +#define VEX_m_0F38 0x02 /* implied 0F 38 leading opcode byte */ +#define VEX_m_0F3A 0x03 /* implied 0F 3A leading opcode byte */ + +/* VEX pp field, providing equivalent functionality of a SIMD prefix */ +#define VEX_p_66 0x01 +#define VEX_p_F3 0x02 +#define VEX_p_F2 0x03 + +/* + * Even in 64 bit mode, usually only 4 byte immediate operands are supported. + */ +static int isize[] = {1, 2, 4, 4}; +static int isize64[] = {1, 2, 4, 8}; + +/* + * Just a bunch of useful macros. + */ +#define WBIT(x) (x & 0x1) /* to get w bit */ +#define REGNO(x) (x & 0x7) /* to get 3 bit register */ +#define VBIT(x) ((x)>>1 & 0x1) /* to get 'v' bit */ +#define OPSIZE(osize, wbit) ((wbit) ? isize[osize] : 1) +#define OPSIZE64(osize, wbit) ((wbit) ? isize64[osize] : 1) + +#define REG_ONLY 3 /* mode to indicate a register operand (not memory) */ + +#define BYTE_OPND 0 /* w-bit value indicating byte register */ +#define LONG_OPND 1 /* w-bit value indicating opnd_size register */ +#define MM_OPND 2 /* "value" used to indicate a mmx reg */ +#define XMM_OPND 3 /* "value" used to indicate a xmm reg */ +#define SEG_OPND 4 /* "value" used to indicate a segment reg */ +#define CONTROL_OPND 5 /* "value" used to indicate a control reg */ +#define DEBUG_OPND 6 /* "value" used to indicate a debug reg */ +#define TEST_OPND 7 /* "value" used to indicate a test reg */ +#define WORD_OPND 8 /* w-bit value indicating word size reg */ +#define YMM_OPND 9 /* "value" used to indicate a ymm reg */ + +/* + * The AVX2 gather instructions are a bit of a mess. While there's a pattern, + * there's not really a consistent scheme that we can use to know what the mode + * is supposed to be for a given type. Various instructions, like VPGATHERDD, + * always match the value of VEX_L. Other instructions like VPGATHERDQ, have + * some registers match VEX_L, but the VSIB is always XMM. + * + * The simplest way to deal with this is to just define a table based on the + * instruction opcodes, which are 0x90-0x93, so we subtract 0x90 to index into + * them. + * + * We further have to subdivide this based on the value of VEX_W and the value + * of VEX_L. The array is constructed to be indexed as: + * [opcode - 0x90][VEX_W][VEX_L]. + */ +/* w = 0, 0x90 */ +typedef struct dis_gather_regs { + uint_t dgr_arg0; /* src reg */ + uint_t dgr_arg1; /* vsib reg */ + uint_t dgr_arg2; /* dst reg */ + char *dgr_suffix; /* suffix to append */ +} dis_gather_regs_t; + +static dis_gather_regs_t dis_vgather[4][2][2] = { + { + /* op 0x90, W.0 */ + { + { XMM_OPND, XMM_OPND, XMM_OPND, "d" }, + { YMM_OPND, YMM_OPND, YMM_OPND, "d" } + }, + /* op 0x90, W.1 */ + { + { XMM_OPND, XMM_OPND, XMM_OPND, "q" }, + { YMM_OPND, XMM_OPND, YMM_OPND, "q" } + } + }, + { + /* op 0x91, W.0 */ + { + { XMM_OPND, XMM_OPND, XMM_OPND, "d" }, + { XMM_OPND, YMM_OPND, XMM_OPND, "d" }, + }, + /* op 0x91, W.1 */ + { + { XMM_OPND, XMM_OPND, XMM_OPND, "q" }, + { YMM_OPND, YMM_OPND, YMM_OPND, "q" }, + } + }, + { + /* op 0x92, W.0 */ + { + { XMM_OPND, XMM_OPND, XMM_OPND, "s" }, + { YMM_OPND, YMM_OPND, YMM_OPND, "s" } + }, + /* op 0x92, W.1 */ + { + { XMM_OPND, XMM_OPND, XMM_OPND, "d" }, + { YMM_OPND, XMM_OPND, YMM_OPND, "d" } + } + }, + { + /* op 0x93, W.0 */ + { + { XMM_OPND, XMM_OPND, XMM_OPND, "s" }, + { XMM_OPND, YMM_OPND, XMM_OPND, "s" } + }, + /* op 0x93, W.1 */ + { + { XMM_OPND, XMM_OPND, XMM_OPND, "d" }, + { YMM_OPND, YMM_OPND, YMM_OPND, "d" } + } + } +}; + +/* + * Get the next byte and separate the op code into the high and low nibbles. + */ +static int +dtrace_get_opcode(dis86_t *x, uint_t *high, uint_t *low) +{ + int byte; + + /* + * x86 instructions have a maximum length of 15 bytes. Bail out if + * we try to read more. + */ + if (x->d86_len >= 15) + return (x->d86_error = 1); + + if (x->d86_error) + return (1); + byte = x->d86_get_byte(x->d86_data); + if (byte < 0) + return (x->d86_error = 1); + x->d86_bytes[x->d86_len++] = byte; + *low = byte & 0xf; /* ----xxxx low 4 bits */ + *high = byte >> 4 & 0xf; /* xxxx---- bits 7 to 4 */ + return (0); +} + +/* + * Get and decode an SIB (scaled index base) byte + */ +static void +dtrace_get_SIB(dis86_t *x, uint_t *ss, uint_t *index, uint_t *base) +{ + int byte; + + if (x->d86_error) + return; + + byte = x->d86_get_byte(x->d86_data); + if (byte < 0) { + x->d86_error = 1; + return; + } + x->d86_bytes[x->d86_len++] = byte; + + *base = byte & 0x7; + *index = (byte >> 3) & 0x7; + *ss = (byte >> 6) & 0x3; +} + +/* + * Get the byte following the op code and separate it into the + * mode, register, and r/m fields. + */ +static void +dtrace_get_modrm(dis86_t *x, uint_t *mode, uint_t *reg, uint_t *r_m) +{ + if (x->d86_got_modrm == 0) { + if (x->d86_rmindex == -1) + x->d86_rmindex = x->d86_len; + dtrace_get_SIB(x, mode, reg, r_m); + x->d86_got_modrm = 1; + } +} + +/* + * Adjust register selection based on any REX prefix bits present. + */ +/*ARGSUSED*/ +static void +dtrace_rex_adjust(uint_t rex_prefix, uint_t mode, uint_t *reg, uint_t *r_m) +{ + if (reg != NULL && r_m == NULL) { + if (rex_prefix & REX_B) + *reg += 8; + } else { + if (reg != NULL && (REX_R & rex_prefix) != 0) + *reg += 8; + if (r_m != NULL && (REX_B & rex_prefix) != 0) + *r_m += 8; + } +} + +/* + * Adjust register selection based on any VEX prefix bits present. + * Notes: VEX.R, VEX.X and VEX.B use the inverted form compared with REX prefix + */ +/*ARGSUSED*/ +static void +dtrace_vex_adjust(uint_t vex_byte1, uint_t mode, uint_t *reg, uint_t *r_m) +{ + if (reg != NULL && r_m == NULL) { + if (!(vex_byte1 & VEX_B)) + *reg += 8; + } else { + if (reg != NULL && ((VEX_R & vex_byte1) == 0)) + *reg += 8; + if (r_m != NULL && ((VEX_B & vex_byte1) == 0)) + *r_m += 8; + } +} + +/* + * Get an immediate operand of the given size, with sign extension. + */ +static void +dtrace_imm_opnd(dis86_t *x, int wbit, int size, int opindex) +{ + int i; + int byte; + int valsize; + + if (x->d86_numopnds < opindex + 1) + x->d86_numopnds = opindex + 1; + + switch (wbit) { + case BYTE_OPND: + valsize = 1; + break; + case LONG_OPND: + if (x->d86_opnd_size == SIZE16) + valsize = 2; + else if (x->d86_opnd_size == SIZE32) + valsize = 4; + else + valsize = 8; + break; + case MM_OPND: + case XMM_OPND: + case YMM_OPND: + case SEG_OPND: + case CONTROL_OPND: + case DEBUG_OPND: + case TEST_OPND: + valsize = size; + break; + case WORD_OPND: + valsize = 2; + break; + } + if (valsize < size) + valsize = size; + + if (x->d86_error) + return; + x->d86_opnd[opindex].d86_value = 0; + for (i = 0; i < size; ++i) { + byte = x->d86_get_byte(x->d86_data); + if (byte < 0) { + x->d86_error = 1; + return; + } + x->d86_bytes[x->d86_len++] = byte; + x->d86_opnd[opindex].d86_value |= (uint64_t)byte << (i * 8); + } + /* Do sign extension */ + if (x->d86_bytes[x->d86_len - 1] & 0x80) { + for (; i < sizeof (uint64_t); i++) + x->d86_opnd[opindex].d86_value |= + (uint64_t)0xff << (i * 8); + } +#ifdef DIS_TEXT + x->d86_opnd[opindex].d86_mode = MODE_SIGNED; + x->d86_opnd[opindex].d86_value_size = valsize; + x->d86_imm_bytes += size; +#endif +} + +/* + * Get an ip relative operand of the given size, with sign extension. + */ +static void +dtrace_disp_opnd(dis86_t *x, int wbit, int size, int opindex) +{ + dtrace_imm_opnd(x, wbit, size, opindex); +#ifdef DIS_TEXT + x->d86_opnd[opindex].d86_mode = MODE_IPREL; +#endif +} + +/* + * Check to see if there is a segment override prefix pending. + * If so, print it in the current 'operand' location and set + * the override flag back to false. + */ +/*ARGSUSED*/ +static void +dtrace_check_override(dis86_t *x, int opindex) +{ +#ifdef DIS_TEXT + if (x->d86_seg_prefix) { + (void) strlcat(x->d86_opnd[opindex].d86_prefix, + x->d86_seg_prefix, PFIXLEN); + } +#endif + x->d86_seg_prefix = NULL; +} + + +/* + * Process a single instruction Register or Memory operand. + * + * mode = addressing mode from ModRM byte + * r_m = r_m (or reg if mode == 3) field from ModRM byte + * wbit = indicates which register (8bit, 16bit, ... MMX, etc.) set to use. + * o = index of operand that we are processing (0, 1 or 2) + * + * the value of reg or r_m must have already been adjusted for any REX prefix. + */ +/*ARGSUSED*/ +static void +dtrace_get_operand(dis86_t *x, uint_t mode, uint_t r_m, int wbit, int opindex) +{ + int have_SIB = 0; /* flag presence of scale-index-byte */ + uint_t ss; /* scale-factor from opcode */ + uint_t index; /* index register number */ + uint_t base; /* base register number */ + int dispsize; /* size of displacement in bytes */ +#ifdef DIS_TEXT + char *opnd = x->d86_opnd[opindex].d86_opnd; +#endif + + if (x->d86_numopnds < opindex + 1) + x->d86_numopnds = opindex + 1; + + if (x->d86_error) + return; + + /* + * first handle a simple register + */ + if (mode == REG_ONLY) { +#ifdef DIS_TEXT + switch (wbit) { + case MM_OPND: + (void) strlcat(opnd, dis_MMREG[r_m], OPLEN); + break; + case XMM_OPND: + (void) strlcat(opnd, dis_XMMREG[r_m], OPLEN); + break; + case YMM_OPND: + (void) strlcat(opnd, dis_YMMREG[r_m], OPLEN); + break; + case SEG_OPND: + (void) strlcat(opnd, dis_SEGREG[r_m], OPLEN); + break; + case CONTROL_OPND: + (void) strlcat(opnd, dis_CONTROLREG[r_m], OPLEN); + break; + case DEBUG_OPND: + (void) strlcat(opnd, dis_DEBUGREG[r_m], OPLEN); + break; + case TEST_OPND: + (void) strlcat(opnd, dis_TESTREG[r_m], OPLEN); + break; + case BYTE_OPND: + if (x->d86_rex_prefix == 0) + (void) strlcat(opnd, dis_REG8[r_m], OPLEN); + else + (void) strlcat(opnd, dis_REG8_REX[r_m], OPLEN); + break; + case WORD_OPND: + (void) strlcat(opnd, dis_REG16[r_m], OPLEN); + break; + case LONG_OPND: + if (x->d86_opnd_size == SIZE16) + (void) strlcat(opnd, dis_REG16[r_m], OPLEN); + else if (x->d86_opnd_size == SIZE32) + (void) strlcat(opnd, dis_REG32[r_m], OPLEN); + else + (void) strlcat(opnd, dis_REG64[r_m], OPLEN); + break; + } +#endif /* DIS_TEXT */ + return; + } + + /* + * if symbolic representation, skip override prefix, if any + */ + dtrace_check_override(x, opindex); + + /* + * Handle 16 bit memory references first, since they decode + * the mode values more simply. + * mode 1 is r_m + 8 bit displacement + * mode 2 is r_m + 16 bit displacement + * mode 0 is just r_m, unless r_m is 6 which is 16 bit disp + */ + if (x->d86_addr_size == SIZE16) { + if ((mode == 0 && r_m == 6) || mode == 2) + dtrace_imm_opnd(x, WORD_OPND, 2, opindex); + else if (mode == 1) + dtrace_imm_opnd(x, BYTE_OPND, 1, opindex); +#ifdef DIS_TEXT + if (mode == 0 && r_m == 6) + x->d86_opnd[opindex].d86_mode = MODE_SIGNED; + else if (mode == 0) + x->d86_opnd[opindex].d86_mode = MODE_NONE; + else + x->d86_opnd[opindex].d86_mode = MODE_OFFSET; + (void) strlcat(opnd, dis_addr16[mode][r_m], OPLEN); +#endif + return; + } + + /* + * 32 and 64 bit addressing modes are more complex since they + * can involve an SIB (scaled index and base) byte to decode. + */ + if (r_m == ESP_REGNO || r_m == ESP_REGNO + 8) { + have_SIB = 1; + dtrace_get_SIB(x, &ss, &index, &base); + if (x->d86_error) + return; + if (base != 5 || mode != 0) + if (x->d86_rex_prefix & REX_B) + base += 8; + if (x->d86_rex_prefix & REX_X) + index += 8; + } else { + base = r_m; + } + + /* + * Compute the displacement size and get its bytes + */ + dispsize = 0; + + if (mode == 1) + dispsize = 1; + else if (mode == 2) + dispsize = 4; + else if ((r_m & 7) == EBP_REGNO || + (have_SIB && (base & 7) == EBP_REGNO)) + dispsize = 4; + + if (dispsize > 0) { + dtrace_imm_opnd(x, dispsize == 4 ? LONG_OPND : BYTE_OPND, + dispsize, opindex); + if (x->d86_error) + return; + } + +#ifdef DIS_TEXT + if (dispsize > 0) + x->d86_opnd[opindex].d86_mode = MODE_OFFSET; + + if (have_SIB == 0) { + if (x->d86_mode == SIZE32) { + if (mode == 0) + (void) strlcat(opnd, dis_addr32_mode0[r_m], + OPLEN); + else + (void) strlcat(opnd, dis_addr32_mode12[r_m], + OPLEN); + } else { + if (mode == 0) { + (void) strlcat(opnd, dis_addr64_mode0[r_m], + OPLEN); + if (r_m == 5) { + x->d86_opnd[opindex].d86_mode = + MODE_RIPREL; + } + } else { + (void) strlcat(opnd, dis_addr64_mode12[r_m], + OPLEN); + } + } + } else { + uint_t need_paren = 0; + char **regs; + char **bregs; + const char *const *sf; + if (x->d86_mode == SIZE32) /* NOTE this is not addr_size! */ + regs = (char **)dis_REG32; + else + regs = (char **)dis_REG64; + + if (x->d86_vsib != 0) { + if (wbit == YMM_OPND) /* NOTE this is not addr_size! */ + bregs = (char **)dis_YMMREG; + else + bregs = (char **)dis_XMMREG; + sf = dis_vscale_factor; + } else { + bregs = regs; + sf = dis_scale_factor; + } + + /* + * print the base (if any) + */ + if (base == EBP_REGNO && mode == 0) { + if (index != ESP_REGNO || x->d86_vsib != 0) { + (void) strlcat(opnd, "(", OPLEN); + need_paren = 1; + } + } else { + (void) strlcat(opnd, "(", OPLEN); + (void) strlcat(opnd, regs[base], OPLEN); + need_paren = 1; + } + + /* + * print the index (if any) + */ + if (index != ESP_REGNO || x->d86_vsib) { + (void) strlcat(opnd, ",", OPLEN); + (void) strlcat(opnd, bregs[index], OPLEN); + (void) strlcat(opnd, sf[ss], OPLEN); + } else + if (need_paren) + (void) strlcat(opnd, ")", OPLEN); + } +#endif +} + +/* + * Operand sequence for standard instruction involving one register + * and one register/memory operand. + * wbit indicates a byte(0) or opnd_size(1) operation + * vbit indicates direction (0 for "opcode r,r_m") or (1 for "opcode r_m, r") + */ +#define STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, vbit) { \ + dtrace_get_modrm(x, &mode, ®, &r_m); \ + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ + dtrace_get_operand(x, mode, r_m, wbit, vbit); \ + dtrace_get_operand(x, REG_ONLY, reg, wbit, 1 - vbit); \ +} + +/* + * Similar to above, but allows for the two operands to be of different + * classes (ie. wbit). + * wbit is for the r_m operand + * w2 is for the reg operand + */ +#define MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, w2, vbit) { \ + dtrace_get_modrm(x, &mode, ®, &r_m); \ + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ + dtrace_get_operand(x, mode, r_m, wbit, vbit); \ + dtrace_get_operand(x, REG_ONLY, reg, w2, 1 - vbit); \ +} + +/* + * Similar, but for 2 operands plus an immediate. + * vbit indicates direction + * 0 for "opcode imm, r, r_m" or + * 1 for "opcode imm, r_m, r" + */ +#define THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, w2, immsize, vbit) { \ + dtrace_get_modrm(x, &mode, ®, &r_m); \ + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ + dtrace_get_operand(x, mode, r_m, wbit, 2-vbit); \ + dtrace_get_operand(x, REG_ONLY, reg, w2, 1+vbit); \ + dtrace_imm_opnd(x, wbit, immsize, 0); \ +} + +/* + * Similar, but for 2 operands plus two immediates. + */ +#define FOUROPERAND(x, mode, reg, r_m, rex_prefix, wbit, w2, immsize) { \ + dtrace_get_modrm(x, &mode, ®, &r_m); \ + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ + dtrace_get_operand(x, mode, r_m, wbit, 2); \ + dtrace_get_operand(x, REG_ONLY, reg, w2, 3); \ + dtrace_imm_opnd(x, wbit, immsize, 1); \ + dtrace_imm_opnd(x, wbit, immsize, 0); \ +} + +/* + * 1 operands plus two immediates. + */ +#define ONEOPERAND_TWOIMM(x, mode, reg, r_m, rex_prefix, wbit, immsize) { \ + dtrace_get_modrm(x, &mode, ®, &r_m); \ + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); \ + dtrace_get_operand(x, mode, r_m, wbit, 2); \ + dtrace_imm_opnd(x, wbit, immsize, 1); \ + dtrace_imm_opnd(x, wbit, immsize, 0); \ +} + +/* + * Dissassemble a single x86 or amd64 instruction. + * + * Mode determines the default operating mode (SIZE16, SIZE32 or SIZE64) + * for interpreting instructions. + * + * returns non-zero for bad opcode + */ +int +dtrace_disx86(dis86_t *x, uint_t cpu_mode) +{ + instable_t *dp; /* decode table being used */ +#ifdef DIS_TEXT + uint_t i; +#endif +#ifdef DIS_MEM + uint_t nomem = 0; +#define NOMEM (nomem = 1) +#else +#define NOMEM /* nothing */ +#endif + uint_t opnd_size; /* SIZE16, SIZE32 or SIZE64 */ + uint_t addr_size; /* SIZE16, SIZE32 or SIZE64 */ + uint_t wbit; /* opcode wbit, 0 is 8 bit, !0 for opnd_size */ + uint_t w2; /* wbit value for second operand */ + uint_t vbit; + uint_t mode = 0; /* mode value from ModRM byte */ + uint_t reg; /* reg value from ModRM byte */ + uint_t r_m; /* r_m value from ModRM byte */ + + uint_t opcode1; /* high nibble of 1st byte */ + uint_t opcode2; /* low nibble of 1st byte */ + uint_t opcode3; /* extra opcode bits usually from ModRM byte */ + uint_t opcode4; /* high nibble of 2nd byte */ + uint_t opcode5; /* low nibble of 2nd byte */ + uint_t opcode6; /* high nibble of 3rd byte */ + uint_t opcode7; /* low nibble of 3rd byte */ + uint_t opcode_bytes = 1; + + /* + * legacy prefixes come in 5 flavors, you should have only one of each + */ + uint_t opnd_size_prefix = 0; + uint_t addr_size_prefix = 0; + uint_t segment_prefix = 0; + uint_t lock_prefix = 0; + uint_t rep_prefix = 0; + uint_t rex_prefix = 0; /* amd64 register extension prefix */ + + /* + * Intel VEX instruction encoding prefix and fields + */ + + /* 0xC4 means 3 bytes prefix, 0xC5 means 2 bytes prefix */ + uint_t vex_prefix = 0; + + /* + * VEX prefix byte 1, includes vex.r, vex.x and vex.b + * (for 3 bytes prefix) + */ + uint_t vex_byte1 = 0; + + /* + * For 32-bit mode, it should prefetch the next byte to + * distinguish between AVX and les/lds + */ + uint_t vex_prefetch = 0; + + uint_t vex_m = 0; + uint_t vex_v = 0; + uint_t vex_p = 0; + uint_t vex_R = 1; + uint_t vex_X = 1; + uint_t vex_B = 1; + uint_t vex_W = 0; + uint_t vex_L; + dis_gather_regs_t *vreg; + +#ifdef DIS_TEXT + /* Instruction name for BLS* family of instructions */ + char *blsinstr; +#endif + + size_t off; + + instable_t dp_mmx; + + x->d86_len = 0; + x->d86_rmindex = -1; + x->d86_error = 0; +#ifdef DIS_TEXT + x->d86_numopnds = 0; + x->d86_seg_prefix = NULL; + x->d86_mnem[0] = 0; + for (i = 0; i < 4; ++i) { + x->d86_opnd[i].d86_opnd[0] = 0; + x->d86_opnd[i].d86_prefix[0] = 0; + x->d86_opnd[i].d86_value_size = 0; + x->d86_opnd[i].d86_value = 0; + x->d86_opnd[i].d86_mode = MODE_NONE; + } +#endif + x->d86_rex_prefix = 0; + x->d86_got_modrm = 0; + x->d86_memsize = 0; + x->d86_vsib = 0; + + if (cpu_mode == SIZE16) { + opnd_size = SIZE16; + addr_size = SIZE16; + } else if (cpu_mode == SIZE32) { + opnd_size = SIZE32; + addr_size = SIZE32; + } else { + opnd_size = SIZE32; + addr_size = SIZE64; + } + + /* + * Get one opcode byte and check for zero padding that follows + * jump tables. + */ + if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) + goto error; + + if (opcode1 == 0 && opcode2 == 0 && + x->d86_check_func != NULL && x->d86_check_func(x->d86_data)) { +#ifdef DIS_TEXT + (void) strncpy(x->d86_mnem, ".byte\t0", OPLEN); +#endif + goto done; + } + + /* + * Gather up legacy x86 prefix bytes. + */ + for (;;) { + uint_t *which_prefix = NULL; + + dp = (instable_t *)&dis_distable[opcode1][opcode2]; + + switch (dp->it_adrmode) { + case PREFIX: + which_prefix = &rep_prefix; + break; + case LOCK: + which_prefix = &lock_prefix; + break; + case OVERRIDE: + which_prefix = &segment_prefix; +#ifdef DIS_TEXT + x->d86_seg_prefix = (char *)dp->it_name; +#endif + if (dp->it_invalid64 && cpu_mode == SIZE64) + goto error; + break; + case AM: + which_prefix = &addr_size_prefix; + break; + case DM: + which_prefix = &opnd_size_prefix; + break; + } + if (which_prefix == NULL) + break; + *which_prefix = (opcode1 << 4) | opcode2; + if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) + goto error; + } + + /* + * Handle amd64 mode PREFIX values. + * Some of the segment prefixes are no-ops. (only FS/GS actually work) + * We might have a REX prefix (opcodes 0x40-0x4f) + */ + if (cpu_mode == SIZE64) { + if (segment_prefix != 0x64 && segment_prefix != 0x65) + segment_prefix = 0; + + if (opcode1 == 0x4) { + rex_prefix = (opcode1 << 4) | opcode2; + if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) + goto error; + dp = (instable_t *)&dis_distable[opcode1][opcode2]; + } else if (opcode1 == 0xC && + (opcode2 == 0x4 || opcode2 == 0x5)) { + /* AVX instructions */ + vex_prefix = (opcode1 << 4) | opcode2; + x->d86_rex_prefix = 0x40; + } + } else if (opcode1 == 0xC && (opcode2 == 0x4 || opcode2 == 0x5)) { + /* LDS, LES or AVX */ + dtrace_get_modrm(x, &mode, ®, &r_m); + vex_prefetch = 1; + + if (mode == REG_ONLY) { + /* AVX */ + vex_prefix = (opcode1 << 4) | opcode2; + x->d86_rex_prefix = 0x40; + opcode3 = (((mode << 3) | reg)>>1) & 0x0F; + opcode4 = ((reg << 3) | r_m) & 0x0F; + } + } + + if (vex_prefix == VEX_2bytes) { + if (!vex_prefetch) { + if (dtrace_get_opcode(x, &opcode3, &opcode4) != 0) + goto error; + } + vex_R = ((opcode3 & VEX_R) & 0x0F) >> 3; + vex_L = ((opcode4 & VEX_L) & 0x0F) >> 2; + vex_v = (((opcode3 << 4) | opcode4) & VEX_v) >> 3; + vex_p = opcode4 & VEX_p; + /* + * The vex.x and vex.b bits are not defined in two bytes + * mode vex prefix, their default values are 1 + */ + vex_byte1 = (opcode3 & VEX_R) | VEX_X | VEX_B; + + if (vex_R == 0) + x->d86_rex_prefix |= REX_R; + + if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) + goto error; + + switch (vex_p) { + case VEX_p_66: + dp = (instable_t *) + &dis_opAVX660F[(opcode1 << 4) | opcode2]; + break; + case VEX_p_F3: + dp = (instable_t *) + &dis_opAVXF30F[(opcode1 << 4) | opcode2]; + break; + case VEX_p_F2: + dp = (instable_t *) + &dis_opAVXF20F [(opcode1 << 4) | opcode2]; + break; + default: + dp = (instable_t *) + &dis_opAVX0F[opcode1][opcode2]; + + } + + } else if (vex_prefix == VEX_3bytes) { + if (!vex_prefetch) { + if (dtrace_get_opcode(x, &opcode3, &opcode4) != 0) + goto error; + } + vex_R = (opcode3 & VEX_R) >> 3; + vex_X = (opcode3 & VEX_X) >> 2; + vex_B = (opcode3 & VEX_B) >> 1; + vex_m = (((opcode3 << 4) | opcode4) & VEX_m); + vex_byte1 = opcode3 & (VEX_R | VEX_X | VEX_B); + + if (vex_R == 0) + x->d86_rex_prefix |= REX_R; + if (vex_X == 0) + x->d86_rex_prefix |= REX_X; + if (vex_B == 0) + x->d86_rex_prefix |= REX_B; + + if (dtrace_get_opcode(x, &opcode5, &opcode6) != 0) + goto error; + vex_W = (opcode5 & VEX_W) >> 3; + vex_L = (opcode6 & VEX_L) >> 2; + vex_v = (((opcode5 << 4) | opcode6) & VEX_v) >> 3; + vex_p = opcode6 & VEX_p; + + if (vex_W) + x->d86_rex_prefix |= REX_W; + + /* Only these three vex_m values valid; others are reserved */ + if ((vex_m != VEX_m_0F) && (vex_m != VEX_m_0F38) && + (vex_m != VEX_m_0F3A)) + goto error; + + if (dtrace_get_opcode(x, &opcode1, &opcode2) != 0) + goto error; + + switch (vex_p) { + case VEX_p_66: + if (vex_m == VEX_m_0F) { + dp = (instable_t *) + &dis_opAVX660F + [(opcode1 << 4) | opcode2]; + } else if (vex_m == VEX_m_0F38) { + dp = (instable_t *) + &dis_opAVX660F38 + [(opcode1 << 4) | opcode2]; + } else if (vex_m == VEX_m_0F3A) { + dp = (instable_t *) + &dis_opAVX660F3A + [(opcode1 << 4) | opcode2]; + } else { + goto error; + } + break; + case VEX_p_F3: + if (vex_m == VEX_m_0F) { + dp = (instable_t *) + &dis_opAVXF30F + [(opcode1 << 4) | opcode2]; + } else if (vex_m == VEX_m_0F38) { + dp = (instable_t *) + &dis_opAVXF30F38 + [(opcode1 << 4) | opcode2]; + } else { + goto error; + } + break; + case VEX_p_F2: + if (vex_m == VEX_m_0F) { + dp = (instable_t *) + &dis_opAVXF20F + [(opcode1 << 4) | opcode2]; + } else if (vex_m == VEX_m_0F3A) { + dp = (instable_t *) + &dis_opAVXF20F3A + [(opcode1 << 4) | opcode2]; + } else if (vex_m == VEX_m_0F38) { + dp = (instable_t *) + &dis_opAVXF20F38 + [(opcode1 << 4) | opcode2]; + } else { + goto error; + } + break; + default: + dp = (instable_t *) + &dis_opAVX0F[opcode1][opcode2]; + + } + } + if (vex_prefix) { + if (dp->it_vexwoxmm) { + wbit = LONG_OPND; + } else { + if (vex_L) + wbit = YMM_OPND; + else + wbit = XMM_OPND; + } + } + + /* + * Deal with selection of operand and address size now. + * Note that the REX.W bit being set causes opnd_size_prefix to be + * ignored. + */ + if (cpu_mode == SIZE64) { + if ((rex_prefix & REX_W) || vex_W) + opnd_size = SIZE64; + else if (opnd_size_prefix) + opnd_size = SIZE16; + + if (addr_size_prefix) + addr_size = SIZE32; + } else if (cpu_mode == SIZE32) { + if (opnd_size_prefix) + opnd_size = SIZE16; + if (addr_size_prefix) + addr_size = SIZE16; + } else { + if (opnd_size_prefix) + opnd_size = SIZE32; + if (addr_size_prefix) + addr_size = SIZE32; + } + /* + * The pause instruction - a repz'd nop. This doesn't fit + * with any of the other prefix goop added for SSE, so we'll + * special-case it here. + */ + if (rep_prefix == 0xf3 && opcode1 == 0x9 && opcode2 == 0x0) { + rep_prefix = 0; + dp = (instable_t *)&dis_opPause; + } + + /* + * Some 386 instructions have 2 bytes of opcode before the mod_r/m + * byte so we may need to perform a table indirection. + */ + if (dp->it_indirect == (instable_t *)dis_op0F) { + if (dtrace_get_opcode(x, &opcode4, &opcode5) != 0) + goto error; + opcode_bytes = 2; + if (opcode4 == 0x7 && opcode5 >= 0x1 && opcode5 <= 0x3) { + uint_t subcode; + + if (dtrace_get_opcode(x, &opcode6, &opcode7) != 0) + goto error; + opcode_bytes = 3; + subcode = ((opcode6 & 0x3) << 1) | + ((opcode7 & 0x8) >> 3); + dp = (instable_t *)&dis_op0F7123[opcode5][subcode]; + } else if ((opcode4 == 0xc) && (opcode5 >= 0x8)) { + dp = (instable_t *)&dis_op0FC8[0]; + } else if ((opcode4 == 0x3) && (opcode5 == 0xA)) { + opcode_bytes = 3; + if (dtrace_get_opcode(x, &opcode6, &opcode7) != 0) + goto error; + if (opnd_size == SIZE16) + opnd_size = SIZE32; + + dp = (instable_t *)&dis_op0F3A[(opcode6<<4)|opcode7]; +#ifdef DIS_TEXT + if (strcmp(dp->it_name, "INVALID") == 0) + goto error; +#endif + switch (dp->it_adrmode) { + case XMMP: + break; + case XMMP_66r: + case XMMPRM_66r: + case XMM3PM_66r: + if (opnd_size_prefix == 0) { + goto error; + } + break; + case XMMP_66o: + if (opnd_size_prefix == 0) { + /* SSSE3 MMX instructions */ + dp_mmx = *dp; + dp = &dp_mmx; + dp->it_adrmode = MMOPM_66o; +#ifdef DIS_MEM + dp->it_size = 8; +#endif + } + break; + default: + goto error; + } + } else if ((opcode4 == 0x3) && (opcode5 == 0x8)) { + opcode_bytes = 3; + if (dtrace_get_opcode(x, &opcode6, &opcode7) != 0) + goto error; + dp = (instable_t *)&dis_op0F38[(opcode6<<4)|opcode7]; + + /* + * Both crc32 and movbe have the same 3rd opcode + * byte of either 0xF0 or 0xF1, so we use another + * indirection to distinguish between the two. + */ + if (dp->it_indirect == (instable_t *)dis_op0F38F0 || + dp->it_indirect == (instable_t *)dis_op0F38F1) { + + dp = dp->it_indirect; + if (rep_prefix != 0xF2) { + /* It is movbe */ + dp++; + } + } + + /* + * The adx family of instructions (adcx and adox) + * continue the classic Intel tradition of abusing + * arbitrary prefixes without actually meaning the + * prefix bit. Therefore, if we find either the + * opnd_size_prefix or rep_prefix we end up zeroing it + * out after making our determination so as to ensure + * that we don't get confused and accidentally print + * repz prefixes and the like on these instructions. + * + * In addition, these instructions are actually much + * closer to AVX instructions in semantics. Importantly, + * they always default to having 32-bit operands. + * However, if the CPU is in 64-bit mode, then and only + * then, does it use REX.w promotes things to 64-bits + * and REX.r allows 64-bit mode to use register r8-r15. + */ + if (dp->it_indirect == (instable_t *)dis_op0F38F6) { + dp = dp->it_indirect; + if (opnd_size_prefix == 0 && + rep_prefix == 0xf3) { + /* It is adox */ + dp++; + } else if (opnd_size_prefix != 0x66 && + rep_prefix != 0) { + /* It isn't adcx */ + goto error; + } + opnd_size_prefix = 0; + rep_prefix = 0; + opnd_size = SIZE32; + if (rex_prefix & REX_W) + opnd_size = SIZE64; + } + +#ifdef DIS_TEXT + if (strcmp(dp->it_name, "INVALID") == 0) + goto error; +#endif + switch (dp->it_adrmode) { + case ADX: + case XMM: + break; + case RM_66r: + case XMM_66r: + case XMMM_66r: + if (opnd_size_prefix == 0) { + goto error; + } + break; + case XMM_66o: + if (opnd_size_prefix == 0) { + /* SSSE3 MMX instructions */ + dp_mmx = *dp; + dp = &dp_mmx; + dp->it_adrmode = MM; +#ifdef DIS_MEM + dp->it_size = 8; +#endif + } + break; + case CRC32: + if (rep_prefix != 0xF2) { + goto error; + } + rep_prefix = 0; + break; + case MOVBE: + if (rep_prefix != 0x0) { + goto error; + } + break; + default: + goto error; + } + } else { + dp = (instable_t *)&dis_op0F[opcode4][opcode5]; + } + } + + /* + * If still not at a TERM decode entry, then a ModRM byte + * exists and its fields further decode the instruction. + */ + x->d86_got_modrm = 0; + if (dp->it_indirect != TERM) { + dtrace_get_modrm(x, &mode, &opcode3, &r_m); + if (x->d86_error) + goto error; + reg = opcode3; + + /* + * decode 287 instructions (D8-DF) from opcodeN + */ + if (opcode1 == 0xD && opcode2 >= 0x8) { + if (opcode2 == 0xB && mode == 0x3 && opcode3 == 4) + dp = (instable_t *)&dis_opFP5[r_m]; + else if (opcode2 == 0xA && mode == 0x3 && opcode3 < 4) + dp = (instable_t *)&dis_opFP7[opcode3]; + else if (opcode2 == 0xB && mode == 0x3) + dp = (instable_t *)&dis_opFP6[opcode3]; + else if (opcode2 == 0x9 && mode == 0x3 && opcode3 >= 4) + dp = (instable_t *)&dis_opFP4[opcode3 - 4][r_m]; + else if (mode == 0x3) + dp = (instable_t *) + &dis_opFP3[opcode2 - 8][opcode3]; + else + dp = (instable_t *) + &dis_opFP1n2[opcode2 - 8][opcode3]; + } else { + dp = (instable_t *)dp->it_indirect + opcode3; + } + } + + /* + * In amd64 bit mode, ARPL opcode is changed to MOVSXD + * (sign extend 32bit to 64 bit) + */ + if ((vex_prefix == 0) && cpu_mode == SIZE64 && + opcode1 == 0x6 && opcode2 == 0x3) + dp = (instable_t *)&dis_opMOVSLD; + + /* + * at this point we should have a correct (or invalid) opcode + */ + if (cpu_mode == SIZE64 && dp->it_invalid64 || + cpu_mode != SIZE64 && dp->it_invalid32) + goto error; + if (dp->it_indirect != TERM) + goto error; + + /* + * Deal with MMX/SSE opcodes which are changed by prefixes. Note, we do + * need to include UNKNOWN below, as we may have instructions that + * actually have a prefix, but don't exist in any other form. + */ + switch (dp->it_adrmode) { + case UNKNOWN: + case MMO: + case MMOIMPL: + case MMO3P: + case MMOM3: + case MMOMS: + case MMOPM: + case MMOPRM: + case MMOS: + case XMMO: + case XMMOM: + case XMMOMS: + case XMMOPM: + case XMMOS: + case XMMOMX: + case XMMOX3: + case XMMOXMM: + /* + * This is horrible. Some SIMD instructions take the + * form 0x0F 0x?? ..., which is easily decoded using the + * existing tables. Other SIMD instructions use various + * prefix bytes to overload existing instructions. For + * Example, addps is F0, 58, whereas addss is F3 (repz), + * F0, 58. Presumably someone got a raise for this. + * + * If we see one of the instructions which can be + * modified in this way (if we've got one of the SIMDO* + * address modes), we'll check to see if the last prefix + * was a repz. If it was, we strip the prefix from the + * mnemonic, and we indirect using the dis_opSIMDrepz + * table. + */ + + /* + * Calculate our offset in dis_op0F + */ + if ((uintptr_t)dp - (uintptr_t)dis_op0F > sizeof (dis_op0F)) + goto error; + + off = ((uintptr_t)dp - (uintptr_t)dis_op0F) / + sizeof (instable_t); + + /* + * Rewrite if this instruction used one of the magic prefixes. + */ + if (rep_prefix) { + if (rep_prefix == 0xf2) + dp = (instable_t *)&dis_opSIMDrepnz[off]; + else + dp = (instable_t *)&dis_opSIMDrepz[off]; + rep_prefix = 0; + } else if (opnd_size_prefix) { + dp = (instable_t *)&dis_opSIMDdata16[off]; + opnd_size_prefix = 0; + if (opnd_size == SIZE16) + opnd_size = SIZE32; + } + break; + + case MG9: + /* + * More horribleness: the group 9 (0xF0 0xC7) instructions are + * allowed an optional prefix of 0x66 or 0xF3. This is similar + * to the SIMD business described above, but with a different + * addressing mode (and an indirect table), so we deal with it + * separately (if similarly). + * + * Intel further complicated this with the release of Ivy Bridge + * where they overloaded these instructions based on the ModR/M + * bytes. The VMX instructions have a mode of 0 since they are + * memory instructions but rdrand instructions have a mode of + * 0b11 (REG_ONLY) because they only operate on registers. While + * there are different prefix formats, for now it is sufficient + * to use a single different table. + */ + + /* + * Calculate our offset in dis_op0FC7 (the group 9 table) + */ + if ((uintptr_t)dp - (uintptr_t)dis_op0FC7 > sizeof (dis_op0FC7)) + goto error; + + off = ((uintptr_t)dp - (uintptr_t)dis_op0FC7) / + sizeof (instable_t); + + /* + * If we have a mode of 0b11 then we have to rewrite this. + */ + dtrace_get_modrm(x, &mode, ®, &r_m); + if (mode == REG_ONLY) { + dp = (instable_t *)&dis_op0FC7m3[off]; + break; + } + + /* + * Rewrite if this instruction used one of the magic prefixes. + */ + if (rep_prefix) { + if (rep_prefix == 0xf3) + dp = (instable_t *)&dis_opF30FC7[off]; + else + goto error; + rep_prefix = 0; + } else if (opnd_size_prefix) { + dp = (instable_t *)&dis_op660FC7[off]; + opnd_size_prefix = 0; + if (opnd_size == SIZE16) + opnd_size = SIZE32; + } + break; + + + case MMOSH: + /* + * As with the "normal" SIMD instructions, the MMX + * shuffle instructions are overloaded. These + * instructions, however, are special in that they use + * an extra byte, and thus an extra table. As of this + * writing, they only use the opnd_size prefix. + */ + + /* + * Calculate our offset in dis_op0F7123 + */ + if ((uintptr_t)dp - (uintptr_t)dis_op0F7123 > + sizeof (dis_op0F7123)) + goto error; + + if (opnd_size_prefix) { + off = ((uintptr_t)dp - (uintptr_t)dis_op0F7123) / + sizeof (instable_t); + dp = (instable_t *)&dis_opSIMD7123[off]; + opnd_size_prefix = 0; + if (opnd_size == SIZE16) + opnd_size = SIZE32; + } + break; + case MRw: + if (rep_prefix) { + if (rep_prefix == 0xf3) { + + /* + * Calculate our offset in dis_op0F + */ + if ((uintptr_t)dp - (uintptr_t)dis_op0F + > sizeof (dis_op0F)) + goto error; + + off = ((uintptr_t)dp - (uintptr_t)dis_op0F) / + sizeof (instable_t); + + dp = (instable_t *)&dis_opSIMDrepz[off]; + rep_prefix = 0; + } else { + goto error; + } + } + break; + } + + /* + * In 64 bit mode, some opcodes automatically use opnd_size == SIZE64. + */ + if (cpu_mode == SIZE64) + if (dp->it_always64 || (opnd_size == SIZE32 && dp->it_stackop)) + opnd_size = SIZE64; + +#ifdef DIS_TEXT + /* + * At this point most instructions can format the opcode mnemonic + * including the prefixes. + */ + if (lock_prefix) + (void) strlcat(x->d86_mnem, "lock ", OPLEN); + + if (rep_prefix == 0xf2) + (void) strlcat(x->d86_mnem, "repnz ", OPLEN); + else if (rep_prefix == 0xf3) + (void) strlcat(x->d86_mnem, "repz ", OPLEN); + + if (cpu_mode == SIZE64 && addr_size_prefix) + (void) strlcat(x->d86_mnem, "addr32 ", OPLEN); + + if (dp->it_adrmode != CBW && + dp->it_adrmode != CWD && + dp->it_adrmode != XMMSFNC) { + if (strcmp(dp->it_name, "INVALID") == 0) + goto error; + (void) strlcat(x->d86_mnem, dp->it_name, OPLEN); + if (dp->it_avxsuf && dp->it_suffix) { + (void) strlcat(x->d86_mnem, vex_W != 0 ? "q" : "d", + OPLEN); + } else if (dp->it_suffix) { + char *types[] = {"", "w", "l", "q"}; + if (opcode_bytes == 2 && opcode4 == 4) { + /* It's a cmovx.yy. Replace the suffix x */ + for (i = 5; i < OPLEN; i++) { + if (x->d86_mnem[i] == '.') + break; + } + x->d86_mnem[i - 1] = *types[opnd_size]; + } else if ((opnd_size == 2) && (opcode_bytes == 3) && + ((opcode6 == 1 && opcode7 == 6) || + (opcode6 == 2 && opcode7 == 2))) { + /* + * To handle PINSRD and PEXTRD + */ + (void) strlcat(x->d86_mnem, "d", OPLEN); + } else { + (void) strlcat(x->d86_mnem, types[opnd_size], + OPLEN); + } + } + } +#endif + + /* + * Process operands based on the addressing modes. + */ + x->d86_mode = cpu_mode; + /* + * In vex mode the rex_prefix has no meaning + */ + if (!vex_prefix) + x->d86_rex_prefix = rex_prefix; + x->d86_opnd_size = opnd_size; + x->d86_addr_size = addr_size; + vbit = 0; /* initialize for mem/reg -> reg */ + switch (dp->it_adrmode) { + /* + * amd64 instruction to sign extend 32 bit reg/mem operands + * into 64 bit register values + */ + case MOVSXZ: +#ifdef DIS_TEXT + if (rex_prefix == 0) + (void) strncpy(x->d86_mnem, "movzld", OPLEN); +#endif + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + x->d86_opnd_size = SIZE64; + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); + x->d86_opnd_size = opnd_size = SIZE32; + wbit = LONG_OPND; + dtrace_get_operand(x, mode, r_m, wbit, 0); + break; + + /* + * movsbl movsbw movsbq (0x0FBE) or movswl movswq (0x0FBF) + * movzbl movzbw movzbq (0x0FB6) or movzwl movzwq (0x0FB7) + * wbit lives in 2nd byte, note that operands + * are different sized + */ + case MOVZ: + if (rex_prefix & REX_W) { + /* target register size = 64 bit */ + x->d86_mnem[5] = 'q'; + } + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); + x->d86_opnd_size = opnd_size = SIZE16; + wbit = WBIT(opcode5); + dtrace_get_operand(x, mode, r_m, wbit, 0); + break; + case CRC32: + opnd_size = SIZE32; + if (rex_prefix & REX_W) + opnd_size = SIZE64; + x->d86_opnd_size = opnd_size; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); + wbit = WBIT(opcode7); + if (opnd_size_prefix) + x->d86_opnd_size = opnd_size = SIZE16; + dtrace_get_operand(x, mode, r_m, wbit, 0); + break; + case MOVBE: + opnd_size = SIZE32; + if (rex_prefix & REX_W) + opnd_size = SIZE64; + x->d86_opnd_size = opnd_size; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + wbit = WBIT(opcode7); + if (opnd_size_prefix) + x->d86_opnd_size = opnd_size = SIZE16; + if (wbit) { + /* reg -> mem */ + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 0); + dtrace_get_operand(x, mode, r_m, wbit, 1); + } else { + /* mem -> reg */ + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); + dtrace_get_operand(x, mode, r_m, wbit, 0); + } + break; + + /* + * imul instruction, with either 8-bit or longer immediate + * opcode 0x6B for byte, sign-extended displacement, 0x69 for word(s) + */ + case IMUL: + wbit = LONG_OPND; + THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, + OPSIZE(opnd_size, opcode2 == 0x9), 1); + break; + + /* memory or register operand to register, with 'w' bit */ + case MRw: + case ADX: + wbit = WBIT(opcode2); + STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 0); + break; + + /* register to memory or register operand, with 'w' bit */ + /* arpl happens to fit here also because it is odd */ + case RMw: + if (opcode_bytes == 2) + wbit = WBIT(opcode5); + else + wbit = WBIT(opcode2); + STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); + break; + + /* xaddb instruction */ + case XADDB: + wbit = 0; + STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); + break; + + /* MMX register to memory or register operand */ + case MMS: + case MMOS: +#ifdef DIS_TEXT + wbit = strcmp(dp->it_name, "movd") ? MM_OPND : LONG_OPND; +#else + wbit = LONG_OPND; +#endif + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 1); + break; + + /* MMX register to memory */ + case MMOMS: + dtrace_get_modrm(x, &mode, ®, &r_m); + if (mode == REG_ONLY) + goto error; + wbit = MM_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 1); + break; + + /* Double shift. Has immediate operand specifying the shift. */ + case DSHIFT: + wbit = LONG_OPND; + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 2); + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); + dtrace_imm_opnd(x, wbit, 1, 0); + break; + + /* + * Double shift. With no immediate operand, specifies using %cl. + */ + case DSHIFTcl: + wbit = LONG_OPND; + STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); + break; + + /* immediate to memory or register operand */ + case IMlw: + wbit = WBIT(opcode2); + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 1); + /* + * Have long immediate for opcode 0x81, but not 0x80 nor 0x83 + */ + dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, opcode2 == 1), 0); + break; + + /* immediate to memory or register operand with the */ + /* 'w' bit present */ + case IMw: + wbit = WBIT(opcode2); + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 1); + dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, wbit), 0); + break; + + /* immediate to register with register in low 3 bits */ + /* of op code */ + case IR: + /* w-bit here (with regs) is bit 3 */ + wbit = opcode2 >>3 & 0x1; + reg = REGNO(opcode2); + dtrace_rex_adjust(rex_prefix, mode, ®, NULL); + mode = REG_ONLY; + r_m = reg; + dtrace_get_operand(x, mode, r_m, wbit, 1); + dtrace_imm_opnd(x, wbit, OPSIZE64(opnd_size, wbit), 0); + break; + + /* MMX immediate shift of register */ + case MMSH: + case MMOSH: + wbit = MM_OPND; + goto mm_shift; /* in next case */ + + /* SIMD immediate shift of register */ + case XMMSH: + wbit = XMM_OPND; +mm_shift: + reg = REGNO(opcode7); + dtrace_rex_adjust(rex_prefix, mode, ®, NULL); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); + dtrace_imm_opnd(x, wbit, 1, 0); + NOMEM; + break; + + /* accumulator to memory operand */ + case AO: + vbit = 1; + /*FALLTHROUGH*/ + + /* memory operand to accumulator */ + case OA: + wbit = WBIT(opcode2); + dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 1 - vbit); + dtrace_imm_opnd(x, wbit, OPSIZE64(addr_size, LONG_OPND), vbit); +#ifdef DIS_TEXT + x->d86_opnd[vbit].d86_mode = MODE_OFFSET; +#endif + break; + + + /* segment register to memory or register operand */ + case SM: + vbit = 1; + /*FALLTHROUGH*/ + + /* memory or register operand to segment register */ + case MS: + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, LONG_OPND, vbit); + dtrace_get_operand(x, REG_ONLY, reg, SEG_OPND, 1 - vbit); + break; + + /* + * rotate or shift instructions, which may shift by 1 or + * consult the cl register, depending on the 'v' bit + */ + case Mv: + vbit = VBIT(opcode2); + wbit = WBIT(opcode2); + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 1); +#ifdef DIS_TEXT + if (vbit) { + (void) strlcat(x->d86_opnd[0].d86_opnd, "%cl", OPLEN); + } else { + x->d86_opnd[0].d86_mode = MODE_SIGNED; + x->d86_opnd[0].d86_value_size = 1; + x->d86_opnd[0].d86_value = 1; + } +#endif + break; + /* + * immediate rotate or shift instructions + */ + case MvI: + wbit = WBIT(opcode2); +normal_imm_mem: + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 1); + dtrace_imm_opnd(x, wbit, 1, 0); + break; + + /* bit test instructions */ + case MIb: + wbit = LONG_OPND; + goto normal_imm_mem; + + /* single memory or register operand with 'w' bit present */ + case Mw: + wbit = WBIT(opcode2); +just_mem: + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 0); + break; + + case SWAPGS_RDTSCP: + if (cpu_mode == SIZE64 && mode == 3 && r_m == 0) { +#ifdef DIS_TEXT + (void) strncpy(x->d86_mnem, "swapgs", OPLEN); +#endif + NOMEM; + break; + } else if (mode == 3 && r_m == 1) { +#ifdef DIS_TEXT + (void) strncpy(x->d86_mnem, "rdtscp", OPLEN); +#endif + NOMEM; + break; + } + + /*FALLTHROUGH*/ + + /* prefetch instruction - memory operand, but no memory acess */ + case PREF: + NOMEM; + /*FALLTHROUGH*/ + + /* single memory or register operand */ + case M: + case MG9: + wbit = LONG_OPND; + goto just_mem; + + /* single memory or register byte operand */ + case Mb: + wbit = BYTE_OPND; + goto just_mem; + + case VMx: + if (mode == 3) { +#ifdef DIS_TEXT + char *vminstr; + + switch (r_m) { + case 1: + vminstr = "vmcall"; + break; + case 2: + vminstr = "vmlaunch"; + break; + case 3: + vminstr = "vmresume"; + break; + case 4: + vminstr = "vmxoff"; + break; + default: + goto error; + } + + (void) strncpy(x->d86_mnem, vminstr, OPLEN); +#else + if (r_m < 1 || r_m > 4) + goto error; +#endif + + NOMEM; + break; + } + /*FALLTHROUGH*/ + case SVM: + if (mode == 3) { +#ifdef DIS_TEXT + char *vinstr; + + switch (r_m) { + case 0: + vinstr = "vmrun"; + break; + case 1: + vinstr = "vmmcall"; + break; + case 2: + vinstr = "vmload"; + break; + case 3: + vinstr = "vmsave"; + break; + case 4: + vinstr = "stgi"; + break; + case 5: + vinstr = "clgi"; + break; + case 6: + vinstr = "skinit"; + break; + case 7: + vinstr = "invlpga"; + break; + } + + (void) strncpy(x->d86_mnem, vinstr, OPLEN); +#endif + NOMEM; + break; + } + /*FALLTHROUGH*/ + case MONITOR_MWAIT: + if (mode == 3) { + if (r_m == 0) { +#ifdef DIS_TEXT + (void) strncpy(x->d86_mnem, "monitor", OPLEN); +#endif + NOMEM; + break; + } else if (r_m == 1) { +#ifdef DIS_TEXT + (void) strncpy(x->d86_mnem, "mwait", OPLEN); +#endif + NOMEM; + break; + } else if (r_m == 2) { +#ifdef DIS_TEXT + (void) strncpy(x->d86_mnem, "clac", OPLEN); +#endif + NOMEM; + break; + } else if (r_m == 3) { +#ifdef DIS_TEXT + (void) strncpy(x->d86_mnem, "stac", OPLEN); +#endif + NOMEM; + break; + } else { + goto error; + } + } + /*FALLTHROUGH*/ + case XGETBV_XSETBV: + if (mode == 3) { + if (r_m == 0) { +#ifdef DIS_TEXT + (void) strncpy(x->d86_mnem, "xgetbv", OPLEN); +#endif + NOMEM; + break; + } else if (r_m == 1) { +#ifdef DIS_TEXT + (void) strncpy(x->d86_mnem, "xsetbv", OPLEN); +#endif + NOMEM; + break; + } else { + goto error; + } + + } + /*FALLTHROUGH*/ + case MO: + /* Similar to M, but only memory (no direct registers) */ + wbit = LONG_OPND; + dtrace_get_modrm(x, &mode, ®, &r_m); + if (mode == 3) + goto error; + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 0); + break; + + /* move special register to register or reverse if vbit */ + case SREG: + switch (opcode5) { + + case 2: + vbit = 1; + /*FALLTHROUGH*/ + case 0: + wbit = CONTROL_OPND; + break; + + case 3: + vbit = 1; + /*FALLTHROUGH*/ + case 1: + wbit = DEBUG_OPND; + break; + + case 6: + vbit = 1; + /*FALLTHROUGH*/ + case 4: + wbit = TEST_OPND; + break; + + } + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, REG_ONLY, reg, wbit, vbit); + dtrace_get_operand(x, REG_ONLY, r_m, LONG_OPND, 1 - vbit); + NOMEM; + break; + + /* + * single register operand with register in the low 3 + * bits of op code + */ + case R: + if (opcode_bytes == 2) + reg = REGNO(opcode5); + else + reg = REGNO(opcode2); + dtrace_rex_adjust(rex_prefix, mode, ®, NULL); + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 0); + NOMEM; + break; + + /* + * register to accumulator with register in the low 3 + * bits of op code, xchg instructions + */ + case RA: + NOMEM; + reg = REGNO(opcode2); + dtrace_rex_adjust(rex_prefix, mode, ®, NULL); + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 0); + dtrace_get_operand(x, REG_ONLY, EAX_REGNO, LONG_OPND, 1); + break; + + /* + * single segment register operand, with register in + * bits 3-4 of op code byte + */ + case SEG: + NOMEM; + reg = (x->d86_bytes[x->d86_len - 1] >> 3) & 0x3; + dtrace_get_operand(x, REG_ONLY, reg, SEG_OPND, 0); + break; + + /* + * single segment register operand, with register in + * bits 3-5 of op code + */ + case LSEG: + NOMEM; + /* long seg reg from opcode */ + reg = (x->d86_bytes[x->d86_len - 1] >> 3) & 0x7; + dtrace_get_operand(x, REG_ONLY, reg, SEG_OPND, 0); + break; + + /* memory or register operand to register */ + case MR: + if (vex_prefetch) + x->d86_got_modrm = 1; + wbit = LONG_OPND; + STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 0); + break; + + case RM: + case RM_66r: + wbit = LONG_OPND; + STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 1); + break; + + /* MMX/SIMD-Int memory or mm reg to mm reg */ + case MM: + case MMO: +#ifdef DIS_TEXT + wbit = strcmp(dp->it_name, "movd") ? MM_OPND : LONG_OPND; +#else + wbit = LONG_OPND; +#endif + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 0); + break; + + case MMOIMPL: +#ifdef DIS_TEXT + wbit = strcmp(dp->it_name, "movd") ? MM_OPND : LONG_OPND; +#else + wbit = LONG_OPND; +#endif + dtrace_get_modrm(x, &mode, ®, &r_m); + if (mode != REG_ONLY) + goto error; + + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 0); + dtrace_get_operand(x, REG_ONLY, reg, MM_OPND, 1); + mode = 0; /* change for memory access size... */ + break; + + /* MMX/SIMD-Int and SIMD-FP predicated mm reg to r32 */ + case MMO3P: + wbit = MM_OPND; + goto xmm3p; + case XMM3P: + wbit = XMM_OPND; +xmm3p: + dtrace_get_modrm(x, &mode, ®, &r_m); + if (mode != REG_ONLY) + goto error; + + THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, 1, + 1); + NOMEM; + break; + + case XMM3PM_66r: + THREEOPERAND(x, mode, reg, r_m, rex_prefix, LONG_OPND, XMM_OPND, + 1, 0); + break; + + /* MMX/SIMD-Int predicated r32/mem to mm reg */ + case MMOPRM: + wbit = LONG_OPND; + w2 = MM_OPND; + goto xmmprm; + case XMMPRM: + case XMMPRM_66r: + wbit = LONG_OPND; + w2 = XMM_OPND; +xmmprm: + THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, w2, 1, 1); + break; + + /* MMX/SIMD-Int predicated mm/mem to mm reg */ + case MMOPM: + case MMOPM_66o: + wbit = w2 = MM_OPND; + goto xmmprm; + + /* MMX/SIMD-Int mm reg to r32 */ + case MMOM3: + NOMEM; + dtrace_get_modrm(x, &mode, ®, &r_m); + if (mode != REG_ONLY) + goto error; + wbit = MM_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, 0); + break; + + /* SIMD memory or xmm reg operand to xmm reg */ + case XMM: + case XMM_66o: + case XMM_66r: + case XMMO: + case XMMXIMPL: + wbit = XMM_OPND; + STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 0); + + if (dp->it_adrmode == XMMXIMPL && mode != REG_ONLY) + goto error; + +#ifdef DIS_TEXT + /* + * movlps and movhlps share opcodes. They differ in the + * addressing modes allowed for their operands. + * movhps and movlhps behave similarly. + */ + if (mode == REG_ONLY) { + if (strcmp(dp->it_name, "movlps") == 0) + (void) strncpy(x->d86_mnem, "movhlps", OPLEN); + else if (strcmp(dp->it_name, "movhps") == 0) + (void) strncpy(x->d86_mnem, "movlhps", OPLEN); + } +#endif + if (dp->it_adrmode == XMMXIMPL) + mode = 0; /* change for memory access size... */ + break; + + /* SIMD xmm reg to memory or xmm reg */ + case XMMS: + case XMMOS: + case XMMMS: + case XMMOMS: + dtrace_get_modrm(x, &mode, ®, &r_m); +#ifdef DIS_TEXT + if ((strcmp(dp->it_name, "movlps") == 0 || + strcmp(dp->it_name, "movhps") == 0 || + strcmp(dp->it_name, "movntps") == 0) && + mode == REG_ONLY) + goto error; +#endif + wbit = XMM_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 1); + break; + + /* SIMD memory to xmm reg */ + case XMMM: + case XMMM_66r: + case XMMOM: + wbit = XMM_OPND; + dtrace_get_modrm(x, &mode, ®, &r_m); +#ifdef DIS_TEXT + if (mode == REG_ONLY) { + if (strcmp(dp->it_name, "movhps") == 0) + (void) strncpy(x->d86_mnem, "movlhps", OPLEN); + else + goto error; + } +#endif + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 0); + break; + + /* SIMD memory or r32 to xmm reg */ + case XMM3MX: + wbit = LONG_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 0); + break; + + case XMM3MXS: + wbit = LONG_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 1); + break; + + /* SIMD memory or mm reg to xmm reg */ + case XMMOMX: + /* SIMD mm to xmm */ + case XMMMX: + wbit = MM_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 0); + break; + + /* SIMD memory or xmm reg to mm reg */ + case XMMXMM: + case XMMOXMM: + case XMMXM: + wbit = XMM_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, MM_OPND, 0); + break; + + + /* SIMD memory or xmm reg to r32 */ + case XMMXM3: + wbit = XMM_OPND; + MIXED_MM(x, mode, reg, r_m, rex_prefix, wbit, LONG_OPND, 0); + break; + + /* SIMD xmm to r32 */ + case XMMX3: + case XMMOX3: + dtrace_get_modrm(x, &mode, ®, &r_m); + if (mode != REG_ONLY) + goto error; + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, mode, r_m, XMM_OPND, 0); + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, 1); + NOMEM; + break; + + /* SIMD predicated memory or xmm reg with/to xmm reg */ + case XMMP: + case XMMP_66r: + case XMMP_66o: + case XMMOPM: + wbit = XMM_OPND; + THREEOPERAND(x, mode, reg, r_m, rex_prefix, wbit, XMM_OPND, 1, + 1); + +#ifdef DIS_TEXT + /* + * cmpps and cmpss vary their instruction name based + * on the value of imm8. Other XMMP instructions, + * such as shufps, require explicit specification of + * the predicate. + */ + if (dp->it_name[0] == 'c' && + dp->it_name[1] == 'm' && + dp->it_name[2] == 'p' && + strlen(dp->it_name) == 5) { + uchar_t pred = x->d86_opnd[0].d86_value & 0xff; + + if (pred >= (sizeof (dis_PREDSUFFIX) / sizeof (char *))) + goto error; + + (void) strncpy(x->d86_mnem, "cmp", OPLEN); + (void) strlcat(x->d86_mnem, dis_PREDSUFFIX[pred], + OPLEN); + (void) strlcat(x->d86_mnem, + dp->it_name + strlen(dp->it_name) - 2, + OPLEN); + x->d86_opnd[0] = x->d86_opnd[1]; + x->d86_opnd[1] = x->d86_opnd[2]; + x->d86_numopnds = 2; + } +#endif + break; + + case XMMX2I: + FOUROPERAND(x, mode, reg, r_m, rex_prefix, XMM_OPND, XMM_OPND, + 1); + NOMEM; + break; + + case XMM2I: + ONEOPERAND_TWOIMM(x, mode, reg, r_m, rex_prefix, XMM_OPND, 1); + NOMEM; + break; + + /* immediate operand to accumulator */ + case IA: + wbit = WBIT(opcode2); + dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 1); + dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, wbit), 0); + NOMEM; + break; + + /* memory or register operand to accumulator */ + case MA: + wbit = WBIT(opcode2); + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 0); + break; + + /* si register to di register used to reference memory */ + case SD: +#ifdef DIS_TEXT + dtrace_check_override(x, 0); + x->d86_numopnds = 2; + if (addr_size == SIZE64) { + (void) strlcat(x->d86_opnd[0].d86_opnd, "(%rsi)", + OPLEN); + (void) strlcat(x->d86_opnd[1].d86_opnd, "(%rdi)", + OPLEN); + } else if (addr_size == SIZE32) { + (void) strlcat(x->d86_opnd[0].d86_opnd, "(%esi)", + OPLEN); + (void) strlcat(x->d86_opnd[1].d86_opnd, "(%edi)", + OPLEN); + } else { + (void) strlcat(x->d86_opnd[0].d86_opnd, "(%si)", + OPLEN); + (void) strlcat(x->d86_opnd[1].d86_opnd, "(%di)", + OPLEN); + } +#endif + wbit = LONG_OPND; + break; + + /* accumulator to di register */ + case AD: + wbit = WBIT(opcode2); +#ifdef DIS_TEXT + dtrace_check_override(x, 1); + x->d86_numopnds = 2; + dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 0); + if (addr_size == SIZE64) + (void) strlcat(x->d86_opnd[1].d86_opnd, "(%rdi)", + OPLEN); + else if (addr_size == SIZE32) + (void) strlcat(x->d86_opnd[1].d86_opnd, "(%edi)", + OPLEN); + else + (void) strlcat(x->d86_opnd[1].d86_opnd, "(%di)", + OPLEN); +#endif + break; + + /* si register to accumulator */ + case SA: + wbit = WBIT(opcode2); +#ifdef DIS_TEXT + dtrace_check_override(x, 0); + x->d86_numopnds = 2; + if (addr_size == SIZE64) + (void) strlcat(x->d86_opnd[0].d86_opnd, "(%rsi)", + OPLEN); + else if (addr_size == SIZE32) + (void) strlcat(x->d86_opnd[0].d86_opnd, "(%esi)", + OPLEN); + else + (void) strlcat(x->d86_opnd[0].d86_opnd, "(%si)", + OPLEN); + dtrace_get_operand(x, REG_ONLY, EAX_REGNO, wbit, 1); +#endif + break; + + /* + * single operand, a 16/32 bit displacement + */ + case D: + wbit = LONG_OPND; + dtrace_disp_opnd(x, wbit, OPSIZE(opnd_size, LONG_OPND), 0); + NOMEM; + break; + + /* jmp/call indirect to memory or register operand */ + case INM: +#ifdef DIS_TEXT + (void) strlcat(x->d86_opnd[0].d86_prefix, "*", OPLEN); +#endif + dtrace_rex_adjust(rex_prefix, mode, NULL, &r_m); + dtrace_get_operand(x, mode, r_m, LONG_OPND, 0); + wbit = LONG_OPND; + break; + + /* + * for long jumps and long calls -- a new code segment + * register and an offset in IP -- stored in object + * code in reverse order. Note - not valid in amd64 + */ + case SO: + dtrace_check_override(x, 1); + wbit = LONG_OPND; + dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, LONG_OPND), 1); +#ifdef DIS_TEXT + x->d86_opnd[1].d86_mode = MODE_SIGNED; +#endif + /* will now get segment operand */ + dtrace_imm_opnd(x, wbit, 2, 0); + break; + + /* + * jmp/call. single operand, 8 bit displacement. + * added to current EIP in 'compofff' + */ + case BD: + dtrace_disp_opnd(x, BYTE_OPND, 1, 0); + NOMEM; + break; + + /* single 32/16 bit immediate operand */ + case I: + wbit = LONG_OPND; + dtrace_imm_opnd(x, wbit, OPSIZE(opnd_size, LONG_OPND), 0); + break; + + /* single 8 bit immediate operand */ + case Ib: + wbit = LONG_OPND; + dtrace_imm_opnd(x, wbit, 1, 0); + break; + + case ENTER: + wbit = LONG_OPND; + dtrace_imm_opnd(x, wbit, 2, 0); + dtrace_imm_opnd(x, wbit, 1, 1); + switch (opnd_size) { + case SIZE64: + x->d86_memsize = (x->d86_opnd[1].d86_value + 1) * 8; + break; + case SIZE32: + x->d86_memsize = (x->d86_opnd[1].d86_value + 1) * 4; + break; + case SIZE16: + x->d86_memsize = (x->d86_opnd[1].d86_value + 1) * 2; + break; + } + + break; + + /* 16-bit immediate operand */ + case RET: + wbit = LONG_OPND; + dtrace_imm_opnd(x, wbit, 2, 0); + break; + + /* single 8 bit port operand */ + case P: + dtrace_check_override(x, 0); + dtrace_imm_opnd(x, BYTE_OPND, 1, 0); + NOMEM; + break; + + /* single operand, dx register (variable port instruction) */ + case V: + x->d86_numopnds = 1; + dtrace_check_override(x, 0); +#ifdef DIS_TEXT + (void) strlcat(x->d86_opnd[0].d86_opnd, "(%dx)", OPLEN); +#endif + NOMEM; + break; + + /* + * The int instruction, which has two forms: + * int 3 (breakpoint) or + * int n, where n is indicated in the subsequent + * byte (format Ib). The int 3 instruction (opcode 0xCC), + * where, although the 3 looks like an operand, + * it is implied by the opcode. It must be converted + * to the correct base and output. + */ + case INT3: +#ifdef DIS_TEXT + x->d86_numopnds = 1; + x->d86_opnd[0].d86_mode = MODE_SIGNED; + x->d86_opnd[0].d86_value_size = 1; + x->d86_opnd[0].d86_value = 3; +#endif + NOMEM; + break; + + /* single 8 bit immediate operand */ + case INTx: + dtrace_imm_opnd(x, BYTE_OPND, 1, 0); + NOMEM; + break; + + /* an unused byte must be discarded */ + case U: + if (x->d86_get_byte(x->d86_data) < 0) + goto error; + x->d86_len++; + NOMEM; + break; + + case CBW: +#ifdef DIS_TEXT + if (opnd_size == SIZE16) + (void) strlcat(x->d86_mnem, "cbtw", OPLEN); + else if (opnd_size == SIZE32) + (void) strlcat(x->d86_mnem, "cwtl", OPLEN); + else + (void) strlcat(x->d86_mnem, "cltq", OPLEN); +#endif + wbit = LONG_OPND; + NOMEM; + break; + + case CWD: +#ifdef DIS_TEXT + if (opnd_size == SIZE16) + (void) strlcat(x->d86_mnem, "cwtd", OPLEN); + else if (opnd_size == SIZE32) + (void) strlcat(x->d86_mnem, "cltd", OPLEN); + else + (void) strlcat(x->d86_mnem, "cqtd", OPLEN); +#endif + wbit = LONG_OPND; + NOMEM; + break; + + case XMMSFNC: + /* + * sfence is sfence if mode is REG_ONLY. If mode isn't + * REG_ONLY, mnemonic should be 'clflush'. + */ + dtrace_get_modrm(x, &mode, ®, &r_m); + + /* sfence doesn't take operands */ +#ifdef DIS_TEXT + if (mode == REG_ONLY) { + (void) strlcat(x->d86_mnem, "sfence", OPLEN); + } else { + (void) strlcat(x->d86_mnem, "clflush", OPLEN); + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, mode, r_m, BYTE_OPND, 0); + NOMEM; + } +#else + if (mode != REG_ONLY) { + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, mode, r_m, LONG_OPND, 0); + NOMEM; + } +#endif + break; + + /* + * no disassembly, the mnemonic was all there was so go on + */ + case NORM: + if (dp->it_invalid32 && cpu_mode != SIZE64) + goto error; + NOMEM; + /*FALLTHROUGH*/ + case IMPLMEM: + break; + + case XMMFENCE: + /* + * XRSTOR and LFENCE share the same opcode but differ in mode + */ + dtrace_get_modrm(x, &mode, ®, &r_m); + + if (mode == REG_ONLY) { + /* + * Only the following exact byte sequences are allowed: + * + * 0f ae e8 lfence + * 0f ae f0 mfence + */ + if ((uint8_t)x->d86_bytes[x->d86_len - 1] != 0xe8 && + (uint8_t)x->d86_bytes[x->d86_len - 1] != 0xf0) + goto error; + } else { +#ifdef DIS_TEXT + (void) strncpy(x->d86_mnem, "xrstor", OPLEN); +#endif + dtrace_rex_adjust(rex_prefix, mode, ®, &r_m); + dtrace_get_operand(x, mode, r_m, BYTE_OPND, 0); + } + break; + + /* float reg */ + case F: +#ifdef DIS_TEXT + x->d86_numopnds = 1; + (void) strlcat(x->d86_opnd[0].d86_opnd, "%st(X)", OPLEN); + x->d86_opnd[0].d86_opnd[4] = r_m + '0'; +#endif + NOMEM; + break; + + /* float reg to float reg, with ret bit present */ + case FF: + vbit = opcode2 >> 2 & 0x1; /* vbit = 1: st -> st(i) */ + /*FALLTHROUGH*/ + case FFC: /* case for vbit always = 0 */ +#ifdef DIS_TEXT + x->d86_numopnds = 2; + (void) strlcat(x->d86_opnd[1 - vbit].d86_opnd, "%st", OPLEN); + (void) strlcat(x->d86_opnd[vbit].d86_opnd, "%st(X)", OPLEN); + x->d86_opnd[vbit].d86_opnd[4] = r_m + '0'; +#endif + NOMEM; + break; + + /* AVX instructions */ + case VEX_MO: + /* op(ModR/M.r/m) */ + x->d86_numopnds = 1; + dtrace_get_modrm(x, &mode, ®, &r_m); +#ifdef DIS_TEXT + if ((dp == &dis_opAVX0F[0xA][0xE]) && (reg == 3)) + (void) strncpy(x->d86_mnem, "vstmxcsr", OPLEN); +#endif + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 0); + break; + case VEX_RMrX: + case FMA: + /* ModR/M.reg := op(VEX.vvvv, ModR/M.r/m) */ + x->d86_numopnds = 3; + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + + /* + * In classic Intel fashion, the opcodes for all of the FMA + * instructions all have two possible mnemonics which vary by + * one letter, which is selected based on the value of the wbit. + * When wbit is one, they have the 'd' suffix and when 'wbit' is + * 0, they have the 's' suffix. Otherwise, the FMA instructions + * are all a standard VEX_RMrX. + */ +#ifdef DIS_TEXT + if (dp->it_adrmode == FMA) { + size_t len = strlen(dp->it_name); + (void) strncpy(x->d86_mnem, dp->it_name, OPLEN); + if (len + 1 < OPLEN) { + (void) strncpy(x->d86_mnem + len, + vex_W != 0 ? "d" : "s", OPLEN - len); + } + } +#endif + + if (mode != REG_ONLY) { + if ((dp == &dis_opAVXF20F[0x10]) || + (dp == &dis_opAVXF30F[0x10])) { + /* vmovsd , */ + /* or vmovss , */ + x->d86_numopnds = 2; + goto L_VEX_MX; + } + } + + dtrace_get_operand(x, REG_ONLY, reg, wbit, 2); + /* + * VEX prefix uses the 1's complement form to encode the + * XMM/YMM regs + */ + dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 1); + + if ((dp == &dis_opAVXF20F[0x2A]) || + (dp == &dis_opAVXF30F[0x2A])) { + /* + * vcvtsi2si , , or vcvtsi2ss , + * , + */ + wbit = LONG_OPND; + } +#ifdef DIS_TEXT + else if ((mode == REG_ONLY) && + (dp == &dis_opAVX0F[0x1][0x6])) { /* vmovlhps */ + (void) strncpy(x->d86_mnem, "vmovlhps", OPLEN); + } else if ((mode == REG_ONLY) && + (dp == &dis_opAVX0F[0x1][0x2])) { /* vmovhlps */ + (void) strncpy(x->d86_mnem, "vmovhlps", OPLEN); + } +#endif + dtrace_get_operand(x, mode, r_m, wbit, 0); + + break; + + case VEX_VRMrX: + /* ModR/M.reg := op(MODR/M.r/m, VEX.vvvv) */ + x->d86_numopnds = 3; + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + + dtrace_get_operand(x, REG_ONLY, reg, wbit, 2); + /* + * VEX prefix uses the 1's complement form to encode the + * XMM/YMM regs + */ + dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 0); + + dtrace_get_operand(x, mode, r_m, wbit, 1); + break; + + case VEX_SbVM: + /* ModR/M.reg := op(MODR/M.r/m, VSIB, VEX.vvvv) */ + x->d86_numopnds = 3; + x->d86_vsib = 1; + + /* + * All instructions that use VSIB are currently a mess. See the + * comment around the dis_gather_regs_t structure definition. + */ + + vreg = &dis_vgather[opcode2][vex_W][vex_L]; + +#ifdef DIS_TEXT + (void) strncpy(x->d86_mnem, dp->it_name, OPLEN); + (void) strlcat(x->d86_mnem + strlen(dp->it_name), + vreg->dgr_suffix, OPLEN - strlen(dp->it_name)); +#endif + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + + dtrace_get_operand(x, REG_ONLY, reg, vreg->dgr_arg2, 2); + /* + * VEX prefix uses the 1's complement form to encode the + * XMM/YMM regs + */ + dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), vreg->dgr_arg0, + 0); + dtrace_get_operand(x, mode, r_m, vreg->dgr_arg1, 1); + break; + + case VEX_RRX: + /* ModR/M.rm := op(VEX.vvvv, ModR/M.reg) */ + x->d86_numopnds = 3; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + + if (mode != REG_ONLY) { + if ((dp == &dis_opAVXF20F[0x11]) || + (dp == &dis_opAVXF30F[0x11])) { + /* vmovsd , */ + /* or vmovss , */ + x->d86_numopnds = 2; + goto L_VEX_RM; + } + } + + dtrace_get_operand(x, mode, r_m, wbit, 2); + dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 1); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 0); + break; + + case VEX_RMRX: + /* ModR/M.reg := op(VEX.vvvv, ModR/M.r_m, imm8[7:4]) */ + x->d86_numopnds = 4; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 3); + dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 2); + if (dp == &dis_opAVX660F3A[0x18]) { + /* vinsertf128 , , , */ + dtrace_get_operand(x, mode, r_m, XMM_OPND, 1); + } else if ((dp == &dis_opAVX660F3A[0x20]) || + (dp == & dis_opAVX660F[0xC4])) { + /* vpinsrb , , , */ + /* or vpinsrw , , , */ + dtrace_get_operand(x, mode, r_m, LONG_OPND, 1); + } else if (dp == &dis_opAVX660F3A[0x22]) { + /* vpinsrd/q , , , */ +#ifdef DIS_TEXT + if (vex_W) + x->d86_mnem[6] = 'q'; +#endif + dtrace_get_operand(x, mode, r_m, LONG_OPND, 1); + } else { + dtrace_get_operand(x, mode, r_m, wbit, 1); + } + + /* one byte immediate number */ + dtrace_imm_opnd(x, wbit, 1, 0); + + /* vblendvpd, vblendvps, vblendvb use the imm encode the regs */ + if ((dp == &dis_opAVX660F3A[0x4A]) || + (dp == &dis_opAVX660F3A[0x4B]) || + (dp == &dis_opAVX660F3A[0x4C])) { +#ifdef DIS_TEXT + int regnum = (x->d86_opnd[0].d86_value & 0xF0) >> 4; +#endif + x->d86_opnd[0].d86_mode = MODE_NONE; +#ifdef DIS_TEXT + if (vex_L) + (void) strncpy(x->d86_opnd[0].d86_opnd, + dis_YMMREG[regnum], OPLEN); + else + (void) strncpy(x->d86_opnd[0].d86_opnd, + dis_XMMREG[regnum], OPLEN); +#endif + } + break; + + case VEX_MX: + /* ModR/M.reg := op(ModR/M.rm) */ + x->d86_numopnds = 2; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); +L_VEX_MX: + + if ((dp == &dis_opAVXF20F[0xE6]) || + (dp == &dis_opAVX660F[0x5A]) || + (dp == &dis_opAVX660F[0xE6])) { + /* vcvtpd2dq , */ + /* or vcvtpd2ps , */ + /* or vcvttpd2dq , */ + dtrace_get_operand(x, REG_ONLY, reg, XMM_OPND, 1); + dtrace_get_operand(x, mode, r_m, wbit, 0); + } else if ((dp == &dis_opAVXF30F[0xE6]) || + (dp == &dis_opAVX0F[0x5][0xA]) || + (dp == &dis_opAVX660F38[0x13]) || + (dp == &dis_opAVX660F38[0x18]) || + (dp == &dis_opAVX660F38[0x19]) || + (dp == &dis_opAVX660F38[0x58]) || + (dp == &dis_opAVX660F38[0x78]) || + (dp == &dis_opAVX660F38[0x79]) || + (dp == &dis_opAVX660F38[0x59])) { + /* vcvtdq2pd , */ + /* or vcvtps2pd , */ + /* or vcvtph2ps , */ + /* or vbroadcasts* , */ + dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); + dtrace_get_operand(x, mode, r_m, XMM_OPND, 0); + } else if (dp == &dis_opAVX660F[0x6E]) { + /* vmovd/q , */ +#ifdef DIS_TEXT + if (vex_W) + x->d86_mnem[4] = 'q'; +#endif + dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); + dtrace_get_operand(x, mode, r_m, LONG_OPND, 0); + } else { + dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); + dtrace_get_operand(x, mode, r_m, wbit, 0); + } + + break; + + case VEX_MXI: + /* ModR/M.reg := op(ModR/M.rm, imm8) */ + x->d86_numopnds = 3; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + + dtrace_get_operand(x, REG_ONLY, reg, wbit, 2); + dtrace_get_operand(x, mode, r_m, wbit, 1); + + /* one byte immediate number */ + dtrace_imm_opnd(x, wbit, 1, 0); + break; + + case VEX_XXI: + /* VEX.vvvv := op(ModR/M.rm, imm8) */ + x->d86_numopnds = 3; + + dtrace_get_modrm(x, &mode, ®, &r_m); +#ifdef DIS_TEXT + (void) strncpy(x->d86_mnem, dis_AVXvgrp7[opcode2 - 1][reg], + OPLEN); +#endif + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + + dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 2); + dtrace_get_operand(x, REG_ONLY, r_m, wbit, 1); + + /* one byte immediate number */ + dtrace_imm_opnd(x, wbit, 1, 0); + break; + + case VEX_MR: + /* ModR/M.reg (reg32/64) := op(ModR/M.rm) */ + if (dp == &dis_opAVX660F[0xC5]) { + /* vpextrw , , */ + x->d86_numopnds = 2; + vbit = 2; + } else { + x->d86_numopnds = 2; + vbit = 1; + } + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + dtrace_get_operand(x, REG_ONLY, reg, LONG_OPND, vbit); + dtrace_get_operand(x, mode, r_m, wbit, vbit - 1); + + if (vbit == 2) + dtrace_imm_opnd(x, wbit, 1, 0); + + break; + + case VEX_RRI: + /* implicit(eflags/r32) := op(ModR/M.reg, ModR/M.rm) */ + x->d86_numopnds = 2; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); + dtrace_get_operand(x, mode, r_m, wbit, 0); + break; + + case VEX_RX: + /* ModR/M.rm := op(ModR/M.reg) */ + /* vextractf128 || vcvtps2ph */ + if (dp == &dis_opAVX660F3A[0x19] || + dp == &dis_opAVX660F3A[0x1d]) { + x->d86_numopnds = 3; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + + dtrace_get_operand(x, mode, r_m, XMM_OPND, 2); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); + + /* one byte immediate number */ + dtrace_imm_opnd(x, wbit, 1, 0); + break; + } + + x->d86_numopnds = 2; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 1); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 0); + break; + + case VEX_RR: + /* ModR/M.rm := op(ModR/M.reg) */ + x->d86_numopnds = 2; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + + if (dp == &dis_opAVX660F[0x7E]) { + /* vmovd/q , */ +#ifdef DIS_TEXT + if (vex_W) + x->d86_mnem[4] = 'q'; +#endif + dtrace_get_operand(x, mode, r_m, LONG_OPND, 1); + } else + dtrace_get_operand(x, mode, r_m, wbit, 1); + + dtrace_get_operand(x, REG_ONLY, reg, wbit, 0); + break; + + case VEX_RRi: + /* ModR/M.rm := op(ModR/M.reg, imm) */ + x->d86_numopnds = 3; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + +#ifdef DIS_TEXT + if (dp == &dis_opAVX660F3A[0x16]) { + /* vpextrd/q , , */ + if (vex_W) + x->d86_mnem[6] = 'q'; + } +#endif + dtrace_get_operand(x, mode, r_m, LONG_OPND, 2); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); + + /* one byte immediate number */ + dtrace_imm_opnd(x, wbit, 1, 0); + break; + case VEX_RIM: + /* ModR/M.rm := op(ModR/M.reg, imm) */ + x->d86_numopnds = 3; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + + dtrace_get_operand(x, mode, r_m, XMM_OPND, 2); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); + /* one byte immediate number */ + dtrace_imm_opnd(x, wbit, 1, 0); + break; + + case VEX_RM: + /* ModR/M.rm := op(ModR/M.reg) */ + if (dp == &dis_opAVX660F3A[0x17]) { /* vextractps */ + x->d86_numopnds = 3; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + + dtrace_get_operand(x, mode, r_m, LONG_OPND, 2); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 1); + /* one byte immediate number */ + dtrace_imm_opnd(x, wbit, 1, 0); + break; + } + x->d86_numopnds = 2; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); +L_VEX_RM: + vbit = 1; + dtrace_get_operand(x, mode, r_m, wbit, vbit); + dtrace_get_operand(x, REG_ONLY, reg, wbit, vbit - 1); + + break; + + case VEX_RRM: + /* ModR/M.rm := op(VEX.vvvv, ModR/M.reg) */ + x->d86_numopnds = 3; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + dtrace_get_operand(x, mode, r_m, wbit, 2); + /* VEX use the 1's complement form encode the XMM/YMM regs */ + dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 1); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 0); + break; + + case VEX_RMX: + /* ModR/M.reg := op(VEX.vvvv, ModR/M.rm) */ + x->d86_numopnds = 3; + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + dtrace_get_operand(x, REG_ONLY, reg, wbit, 2); + dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 1); + dtrace_get_operand(x, REG_ONLY, r_m, wbit, 0); + break; + + case VEX_NONE: +#ifdef DIS_TEXT + if (vex_L) + (void) strncpy(x->d86_mnem, "vzeroall", OPLEN); +#endif + break; + case BLS: { + + /* + * The BLS instructions are VEX instructions that are based on + * VEX.0F38.F3; however, they are considered special group 17 + * and like everything else, they use the bits in 3-5 of the + * MOD R/M to determine the sub instruction. Unlike many others + * like the VMX instructions, these are valid both for memory + * and register forms. + */ + + dtrace_get_modrm(x, &mode, ®, &r_m); + dtrace_vex_adjust(vex_byte1, mode, ®, &r_m); + + switch (reg) { + case 1: +#ifdef DIS_TEXT + blsinstr = "blsr"; +#endif + break; + case 2: +#ifdef DIS_TEXT + blsinstr = "blsmsk"; +#endif + break; + case 3: +#ifdef DIS_TEXT + blsinstr = "blsi"; +#endif + break; + default: + goto error; + } + + x->d86_numopnds = 2; +#ifdef DIS_TEXT + (void) strncpy(x->d86_mnem, blsinstr, OPLEN); +#endif + dtrace_get_operand(x, REG_ONLY, (0xF - vex_v), wbit, 1); + dtrace_get_operand(x, mode, r_m, wbit, 0); + break; + } + /* an invalid op code */ + case AM: + case DM: + case OVERRIDE: + case PREFIX: + case UNKNOWN: + NOMEM; + default: + goto error; + } /* end switch */ + if (x->d86_error) + goto error; + +done: +#ifdef DIS_MEM + /* + * compute the size of any memory accessed by the instruction + */ + if (x->d86_memsize != 0) { + return (0); + } else if (dp->it_stackop) { + switch (opnd_size) { + case SIZE16: + x->d86_memsize = 2; + break; + case SIZE32: + x->d86_memsize = 4; + break; + case SIZE64: + x->d86_memsize = 8; + break; + } + } else if (nomem || mode == REG_ONLY) { + x->d86_memsize = 0; + + } else if (dp->it_size != 0) { + /* + * In 64 bit mode descriptor table entries + * go up to 10 bytes and popf/pushf are always 8 bytes + */ + if (x->d86_mode == SIZE64 && dp->it_size == 6) + x->d86_memsize = 10; + else if (x->d86_mode == SIZE64 && opcode1 == 0x9 && + (opcode2 == 0xc || opcode2 == 0xd)) + x->d86_memsize = 8; + else + x->d86_memsize = dp->it_size; + + } else if (wbit == 0) { + x->d86_memsize = 1; + + } else if (wbit == LONG_OPND) { + if (opnd_size == SIZE64) + x->d86_memsize = 8; + else if (opnd_size == SIZE32) + x->d86_memsize = 4; + else + x->d86_memsize = 2; + + } else if (wbit == SEG_OPND) { + x->d86_memsize = 4; + + } else { + x->d86_memsize = 8; + } +#endif + return (0); + +error: +#ifdef DIS_TEXT + (void) strlcat(x->d86_mnem, "undef", OPLEN); +#endif + return (1); +} + +#ifdef DIS_TEXT + +/* + * Some instructions should have immediate operands printed + * as unsigned integers. We compare against this table. + */ +static char *unsigned_ops[] = { + "or", "and", "xor", "test", "in", "out", "lcall", "ljmp", + "rcr", "rcl", "ror", "rol", "shl", "shr", "sal", "psr", "psl", + 0 +}; + + +static int +isunsigned_op(char *opcode) +{ + char *where; + int i; + int is_unsigned = 0; + + /* + * Work back to start of last mnemonic, since we may have + * prefixes on some opcodes. + */ + where = opcode + strlen(opcode) - 1; + while (where > opcode && *where != ' ') + --where; + if (*where == ' ') + ++where; + + for (i = 0; unsigned_ops[i]; ++i) { + if (strncmp(where, unsigned_ops[i], + strlen(unsigned_ops[i]))) + continue; + is_unsigned = 1; + break; + } + return (is_unsigned); +} + +/* + * Print a numeric immediate into end of buf, maximum length buflen. + * The immediate may be an address or a displacement. Mask is set + * for address size. If the immediate is a "small negative", or + * if it's a negative displacement of any magnitude, print as -. + * Respect the "octal" flag. "Small negative" is defined as "in the + * interval [NEG_LIMIT, 0)". + * + * Also, "isunsigned_op()" instructions never print negatives. + * + * Return whether we decided to print a negative value or not. + */ + +#define NEG_LIMIT -255 +enum {IMM, DISP}; +enum {POS, TRY_NEG}; + +static int +print_imm(dis86_t *dis, uint64_t usv, uint64_t mask, char *buf, + size_t buflen, int disp, int try_neg) +{ + int curlen; + int64_t sv = (int64_t)usv; + int octal = dis->d86_flags & DIS_F_OCTAL; + + curlen = strlen(buf); + + if (try_neg == TRY_NEG && sv < 0 && + (disp || sv >= NEG_LIMIT) && + !isunsigned_op(dis->d86_mnem)) { + dis->d86_sprintf_func(buf + curlen, buflen - curlen, + octal ? "-0%llo" : "-0x%llx", (-sv) & mask); + return (1); + } else { + if (disp == DISP) + dis->d86_sprintf_func(buf + curlen, buflen - curlen, + octal ? "+0%llo" : "+0x%llx", usv & mask); + else + dis->d86_sprintf_func(buf + curlen, buflen - curlen, + octal ? "0%llo" : "0x%llx", usv & mask); + return (0); + + } +} + + +static int +log2(int size) +{ + switch (size) { + case 1: return (0); + case 2: return (1); + case 4: return (2); + case 8: return (3); + } + return (0); +} + +/* ARGSUSED */ +void +dtrace_disx86_str(dis86_t *dis, uint_t mode, uint64_t pc, char *buf, + size_t buflen) +{ + uint64_t reltgt = 0; + uint64_t tgt = 0; + int curlen; + int (*lookup)(void *, uint64_t, char *, size_t); + int i; + int64_t sv; + uint64_t usv, mask, save_mask, save_usv; + static uint64_t masks[] = + {0xffU, 0xffffU, 0xffffffffU, 0xffffffffffffffffULL}; + save_usv = 0; + + dis->d86_sprintf_func(buf, buflen, "%-6s ", dis->d86_mnem); + + /* + * For PC-relative jumps, the pc is really the next pc after executing + * this instruction, so increment it appropriately. + */ + pc += dis->d86_len; + + for (i = 0; i < dis->d86_numopnds; i++) { + d86opnd_t *op = &dis->d86_opnd[i]; + + if (i != 0) + (void) strlcat(buf, ",", buflen); + + (void) strlcat(buf, op->d86_prefix, buflen); + + /* + * sv is for the signed, possibly-truncated immediate or + * displacement; usv retains the original size and + * unsignedness for symbol lookup. + */ + + sv = usv = op->d86_value; + + /* + * About masks: for immediates that represent + * addresses, the appropriate display size is + * the effective address size of the instruction. + * This includes MODE_OFFSET, MODE_IPREL, and + * MODE_RIPREL. Immediates that are simply + * immediate values should display in the operand's + * size, however, since they don't represent addresses. + */ + + /* d86_addr_size is SIZEnn, which is log2(real size) */ + mask = masks[dis->d86_addr_size]; + + /* d86_value_size and d86_imm_bytes are in bytes */ + if (op->d86_mode == MODE_SIGNED || + op->d86_mode == MODE_IMPLIED) + mask = masks[log2(op->d86_value_size)]; + + switch (op->d86_mode) { + + case MODE_NONE: + + (void) strlcat(buf, op->d86_opnd, buflen); + break; + + case MODE_SIGNED: + case MODE_IMPLIED: + case MODE_OFFSET: + + tgt = usv; + + if (dis->d86_seg_prefix) + (void) strlcat(buf, dis->d86_seg_prefix, + buflen); + + if (op->d86_mode == MODE_SIGNED || + op->d86_mode == MODE_IMPLIED) { + (void) strlcat(buf, "$", buflen); + } + + if (print_imm(dis, usv, mask, buf, buflen, + IMM, TRY_NEG) && + (op->d86_mode == MODE_SIGNED || + op->d86_mode == MODE_IMPLIED)) { + + /* + * We printed a negative value for an + * immediate that wasn't a + * displacement. Note that fact so we can + * print the positive value as an + * annotation. + */ + + save_usv = usv; + save_mask = mask; + } + (void) strlcat(buf, op->d86_opnd, buflen); + + break; + + case MODE_IPREL: + case MODE_RIPREL: + + reltgt = pc + sv; + + switch (mode) { + case SIZE16: + reltgt = (uint16_t)reltgt; + break; + case SIZE32: + reltgt = (uint32_t)reltgt; + break; + } + + (void) print_imm(dis, usv, mask, buf, buflen, + DISP, TRY_NEG); + + if (op->d86_mode == MODE_RIPREL) + (void) strlcat(buf, "(%rip)", buflen); + break; + } + } + + /* + * The symbol lookups may result in false positives, + * particularly on object files, where small numbers may match + * the 0-relative non-relocated addresses of symbols. + */ + + lookup = dis->d86_sym_lookup; + if (tgt != 0) { + if ((dis->d86_flags & DIS_F_NOIMMSYM) == 0 && + lookup(dis->d86_data, tgt, NULL, 0) == 0) { + (void) strlcat(buf, "\t<", buflen); + curlen = strlen(buf); + lookup(dis->d86_data, tgt, buf + curlen, + buflen - curlen); + (void) strlcat(buf, ">", buflen); + } + + /* + * If we printed a negative immediate above, print the + * positive in case our heuristic was unhelpful + */ + if (save_usv) { + (void) strlcat(buf, "\t<", buflen); + (void) print_imm(dis, save_usv, save_mask, buf, buflen, + IMM, POS); + (void) strlcat(buf, ">", buflen); + } + } + + if (reltgt != 0) { + /* Print symbol or effective address for reltgt */ + + (void) strlcat(buf, "\t<", buflen); + curlen = strlen(buf); + lookup(dis->d86_data, reltgt, buf + curlen, + buflen - curlen); + (void) strlcat(buf, ">", buflen); + } +} + +#endif /* DIS_TEXT */ Index: src/external/cddl/osnet/dev/dtrace/x86/dis_tables.h =================================================================== RCS file: src/external/cddl/osnet/dev/dtrace/x86/dis_tables.h diff -N src/external/cddl/osnet/dev/dtrace/x86/dis_tables.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ src/external/cddl/osnet/dev/dtrace/x86/dis_tables.h 20 Apr 2017 11:49:47 -0000 @@ -0,0 +1,112 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +/* + * $FreeBSD: head/sys/cddl/dev/dtrace/x86/dis_tables.h 313133 2017-02-03 03:22:47Z markj $ + */ + +#ifndef _DIS_TABLES_H +#define _DIS_TABLES_H + +/* + * Constants and prototypes for the IA32 disassembler backend. See dis_tables.c + * for usage information and documentation. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +/* + * values for cpu mode + */ +#define SIZE16 1 +#define SIZE32 2 +#define SIZE64 3 + +#define OPLEN 256 +#define PFIXLEN 8 +#define NCPS 20 /* number of chars per symbol */ + +/* + * data structures that must be provided to dtrace_dis86() + */ +typedef struct d86opnd { + char d86_opnd[OPLEN]; /* symbolic rep of operand */ + char d86_prefix[PFIXLEN]; /* any prefix string or "" */ + uint_t d86_mode; /* mode for immediate */ + uint_t d86_value_size; /* size in bytes of d86_value */ + uint64_t d86_value; /* immediate value of opnd */ +} d86opnd_t; + +typedef struct dis86 { + uint_t d86_mode; + uint_t d86_error; + uint_t d86_len; /* instruction length */ + int d86_rmindex; /* index of modrm byte or -1 */ + uint_t d86_memsize; /* size of memory referenced */ + char d86_bytes[16]; /* bytes of instruction */ + char d86_mnem[OPLEN]; + uint_t d86_numopnds; + uint_t d86_rex_prefix; /* value of REX prefix if !0 */ + char *d86_seg_prefix; /* segment prefix, if any */ + uint_t d86_opnd_size; + uint_t d86_addr_size; + uint_t d86_got_modrm; + uint_t d86_vsib; /* Has a VSIB */ + struct d86opnd d86_opnd[4]; /* up to 4 operands */ + int (*d86_check_func)(void *); + int (*d86_get_byte)(void *); +#ifdef DIS_TEXT + int (*d86_sym_lookup)(void *, uint64_t, char *, size_t); + int (*d86_sprintf_func)(char *, size_t, const char *, ...); + int d86_flags; + uint_t d86_imm_bytes; +#endif + void *d86_data; +} dis86_t; + +extern int dtrace_disx86(dis86_t *x, uint_t cpu_mode); + +#define DIS_F_OCTAL 0x1 /* Print all numbers in octal */ +#define DIS_F_NOIMMSYM 0x2 /* Don't print symbols for immediates (.o) */ + +#ifdef DIS_TEXT +extern void dtrace_disx86_str(dis86_t *x, uint_t cpu_mode, uint64_t pc, + char *buf, size_t len); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _DIS_TABLES_H */ Index: src/external/cddl/osnet/dev/dtrace/x86/instr_size.c =================================================================== RCS file: src/external/cddl/osnet/dev/dtrace/x86/instr_size.c diff -N src/external/cddl/osnet/dev/dtrace/x86/instr_size.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ src/external/cddl/osnet/dev/dtrace/x86/instr_size.c 20 Apr 2017 11:58:22 -0000 @@ -0,0 +1,149 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD: head/sys/cddl/dev/dtrace/x86/instr_size.c 303050 2016-07-20 00:02:10Z markj $ + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + + +#ifdef illumos +#pragma ident "@(#)instr_size.c 1.14 05/07/08 SMI" +#endif + +#include +#include +#include +#ifdef illumos +#include +#include +#include +#include +#include +#endif +#ifdef __FreeBSD__ +#include +#include + +typedef u_int model_t; +#define DATAMODEL_NATIVE 0 +int dtrace_instr_size(uchar_t *); +int dtrace_instr_size_isa(uchar_t *, model_t, int *); +#endif +#ifdef __NetBSD__ +#include + +typedef u_int model_t; +#define DATAMODEL_NATIVE 0 +int dtrace_instr_size(uchar_t *); +int dtrace_instr_size_isa(uchar_t *, model_t, int *); +#endif + +#include + +/* + * This subsystem (with the minor exception of the instr_size() function) is + * is called from DTrace probe context. This imposes several requirements on + * the implementation: + * + * 1. External subsystems and functions may not be referenced. The one current + * exception is for cmn_err, but only to signal the detection of table + * errors. Assuming the tables are correct, no combination of input is to + * trigger a cmn_err call. + * + * 2. These functions can't be allowed to be traced. To prevent this, + * all functions in the probe path (everything except instr_size()) must + * have names that begin with "dtrace_". + */ + +typedef enum dis_isize { + DIS_ISIZE_INSTR, + DIS_ISIZE_OPERAND +} dis_isize_t; + + +/* + * get a byte from instruction stream + */ +static int +dtrace_dis_get_byte(void *p) +{ + int ret; + uchar_t **instr = p; + + ret = **instr; + *instr += 1; + + return (ret); +} + +/* + * Returns either the size of a given instruction, in bytes, or the size of that + * instruction's memory access (if any), depending on the value of `which'. + * If a programming error in the tables is detected, the system will panic to + * ease diagnosis. Invalid instructions will not be flagged. They will appear + * to have an instruction size between 1 and the actual size, and will be + * reported as having no memory impact. + */ +/* ARGSUSED2 */ +static int +dtrace_dis_isize(uchar_t *instr, dis_isize_t which, model_t model, int *rmindex) +{ + int sz; + dis86_t x; + uint_t mode = SIZE32; + + mode = (model == DATAMODEL_LP64) ? SIZE64 : SIZE32; + + x.d86_data = (void **)&instr; + x.d86_get_byte = dtrace_dis_get_byte; + x.d86_check_func = NULL; + + if (dtrace_disx86(&x, mode) != 0) + return (-1); + + if (which == DIS_ISIZE_INSTR) + sz = x.d86_len; /* length of the instruction */ + else + sz = x.d86_memsize; /* length of memory operand */ + + if (rmindex != NULL) + *rmindex = x.d86_rmindex; + return (sz); +} + +int +dtrace_instr_size_isa(uchar_t *instr, model_t model, int *rmindex) +{ + return (dtrace_dis_isize(instr, DIS_ISIZE_INSTR, model, rmindex)); +} + +int +dtrace_instr_size(uchar_t *instr) +{ + return (dtrace_dis_isize(instr, DIS_ISIZE_INSTR, DATAMODEL_NATIVE, + NULL)); +} Index: src/external/cddl/osnet/dev/dtrace/x86/regset.h =================================================================== RCS file: src/external/cddl/osnet/dev/dtrace/x86/regset.h diff -N src/external/cddl/osnet/dev/dtrace/x86/regset.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ src/external/cddl/osnet/dev/dtrace/x86/regset.h 8 May 2017 08:17:51 -0000 @@ -0,0 +1,178 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD: head/sys/cddl/dev/dtrace/x86/regset.h 277300 2015-01-17 14:44:59Z smh $ + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ + +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +#ifndef _REGSET_H +#define _REGSET_H + +/* + * #pragma ident "@(#)regset.h 1.11 05/06/08 SMI" + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The names and offsets defined here should be specified by the + * AMD64 ABI suppl. + * + * We make fsbase and gsbase part of the lwp context (since they're + * the only way to access the full 64-bit address range via the segment + * registers) and thus belong here too. However we treat them as + * read-only; if %fs or %gs are updated, the results of the descriptor + * table lookup that those updates implicitly cause will be reflected + * in the corresponding fsbase and/or gsbase values the next time the + * context can be inspected. However it is NOT possible to override + * the fsbase/gsbase settings via this interface. + * + * Direct modification of the base registers (thus overriding the + * descriptor table base address) can be achieved with _lwp_setprivate. + */ + +#define REG_GSBASE 27 +#define REG_FSBASE 26 +#ifdef illumos +#define REG_DS 25 +#define REG_ES 24 + +#define REG_GS 23 +#define REG_FS 22 +#define REG_SS 21 +#define REG_RSP 20 +#define REG_RFL 19 +#define REG_CS 18 +#define REG_RIP 17 +#define REG_ERR 16 +#define REG_TRAPNO 15 +#define REG_RAX 14 +#define REG_RCX 13 +#define REG_RDX 12 +#define REG_RBX 11 +#define REG_RBP 10 +#define REG_RSI 9 +#define REG_RDI 8 +#define REG_R8 7 +#define REG_R9 6 +#define REG_R10 5 +#define REG_R11 4 +#define REG_R12 3 +#define REG_R13 2 +#define REG_R14 1 +#define REG_R15 0 +#else /* !illumos */ +#define REG_SS 25 +#define REG_RSP 24 +#define REG_RFL 23 +#define REG_CS 22 +#define REG_RIP 21 +#define REG_DS 20 +#define REG_ES 19 +#define REG_ERR 18 +#define REG_GS 17 +#define REG_FS 16 +#define REG_TRAPNO 15 +#define REG_RAX 14 +#define REG_RCX 13 +#define REG_RDX 12 +#define REG_RBX 11 +#define REG_RBP 10 +#define REG_RSI 9 +#define REG_RDI 8 +#define REG_R8 7 +#define REG_R9 6 +#define REG_R10 5 +#define REG_R11 4 +#define REG_R12 3 +#define REG_R13 2 +#define REG_R14 1 +#define REG_R15 0 +#endif /* illumos */ + +/* + * The names and offsets defined here are specified by i386 ABI suppl. + */ + +#ifdef illumos +#define SS 18 /* only stored on a privilege transition */ +#define UESP 17 /* only stored on a privilege transition */ +#define EFL 16 +#define CS 15 +#define EIP 14 +#define ERR 13 +#define TRAPNO 12 +#define EAX 11 +#define ECX 10 +#define EDX 9 +#define EBX 8 +#define ESP 7 +#define EBP 6 +#define ESI 5 +#define EDI 4 +#define DS 3 +#define ES 2 +#define FS 1 +#define GS 0 +#else /* !illumos */ +#define GS 18 +#define SS 17 /* only stored on a privilege transition */ +#define UESP 16 /* only stored on a privilege transition */ +#define EFL 15 +#define CS 14 +#define EIP 13 +#define ERR 12 +#define TRAPNO 11 +#define EAX 10 +#define ECX 9 +#define EDX 8 +#define EBX 7 +#define ESP 6 +#define EBP 5 +#define ESI 4 +#define EDI 3 +#define DS 2 +#define ES 1 +#define FS 0 +#endif /* illumos */ + +#define REG_PC EIP +#define REG_FP EBP +#define REG_SP UESP +#define REG_PS EFL +#define REG_R0 EAX +#define REG_R1 EDX + +#ifdef __cplusplus +} +#endif + +#endif /* _REGSET_H */ Index: src/external/cddl/osnet/dev/fbt/fbt.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/fbt/fbt.c,v retrieving revision 1.22 diff -u -p -r1.22 fbt.c --- src/external/cddl/osnet/dev/fbt/fbt.c 27 Feb 2017 06:47:00 -0000 1.22 +++ src/external/cddl/osnet/dev/fbt/fbt.c 5 Jul 2017 00:25:12 -0000 @@ -23,7 +23,7 @@ * Portions Copyright 2006-2008 John Birrell jb@freebsd.org * Portions Copyright 2010 Darran Hunt darran@NetBSD.org * - * $FreeBSD: src/sys/cddl/dev/fbt/fbt.c,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $ + * $FreeBSD: head/sys/cddl/dev/fbt/fbt.c 309786 2016-12-10 03:13:11Z markj $ * */ @@ -33,6 +33,7 @@ */ #include +#include #include #include #include @@ -57,21 +58,6 @@ #include #include -#include -#if defined(__i386__) || defined(__amd64__) -#include -#include -#if 0 -#include -#endif -#include -#elif __arm__ -#include -#include -#include -#include -#endif - #define ELFSIZE ARCH_ELFSIZE #include @@ -80,65 +66,19 @@ #include #include -mod_ctf_t *modptr; - -MALLOC_DEFINE(M_FBT, "fbt", "Function Boundary Tracing"); +#include "fbt.h" -#if defined(__i386__) || defined(__amd64__) -#define FBT_PUSHL_EBP 0x55 -#define FBT_MOVL_ESP_EBP0_V0 0x8b -#define FBT_MOVL_ESP_EBP1_V0 0xec -#define FBT_MOVL_ESP_EBP0_V1 0x89 -#define FBT_MOVL_ESP_EBP1_V1 0xe5 -#define FBT_REX_RSP_RBP 0x48 - -#define FBT_POPL_EBP 0x5d -#define FBT_RET 0xc3 -#define FBT_RET_IMM16 0xc2 -#define FBT_LEAVE 0xc9 -#endif - -#ifdef __amd64__ -#define FBT_PATCHVAL 0xcc -#elif defined(__i386__) -#define FBT_PATCHVAL 0xf0 - -#elif defined(__arm__) -#define FBT_PATCHVAL DTRACE_BREAKPOINT - -/* entry and return */ -#define FBT_BX_LR_P(insn) (((insn) & ~INSN_COND_MASK) == 0x012fff1e) -#define FBT_B_LABEL_P(insn) (((insn) & 0xff000000) == 0xea000000) -/* entry */ -#define FBT_MOV_IP_SP_P(insn) ((insn) == 0xe1a0c00d) -/* index=1, add=1, wback=0 */ -#define FBT_LDR_IMM_P(insn) (((insn) & 0xfff00000) == 0xe5900000) -#define FBT_MOVW_P(insn) (((insn) & 0xfff00000) == 0xe3000000) -#define FBT_MOV_IMM_P(insn) (((insn) & 0xffff0000) == 0xe3a00000) -#define FBT_CMP_IMM_P(insn) (((insn) & 0xfff00000) == 0xe3500000) -#define FBT_PUSH_P(insn) (((insn) & 0xffff0000) == 0xe92d0000) -/* return */ -/* cond=always, writeback=no, rn=sp and register_list includes pc */ -#define FBT_LDM_P(insn) (((insn) & 0x0fff8000) == 0x089d8000) -#define FBT_LDMIB_P(insn) (((insn) & 0x0fff8000) == 0x099d8000) -#define FBT_MOV_PC_LR_P(insn) (((insn) & ~INSN_COND_MASK) == 0x01a0f00e) -/* cond=always, writeback=no, rn=sp and register_list includes lr, but not pc */ -#define FBT_LDM_LR_P(insn) (((insn) & 0xffffc000) == 0xe89d4000) -#define FBT_LDMIB_LR_P(insn) (((insn) & 0xffffc000) == 0xe99d4000) - -/* rval = insn | invop_id (overwriting cond with invop ID) */ -#define BUILD_RVAL(insn, id) (((insn) & ~INSN_COND_MASK) | __SHIFTIN((id), INSN_COND_MASK)) -/* encode cond in the first byte */ -#define PATCHVAL_ENCODE_COND(insn) (FBT_PATCHVAL | __SHIFTOUT((insn), INSN_COND_MASK)) +mod_ctf_t *modptr; -#else -#error "architecture not supported" -#endif +dtrace_provider_id_t fbt_id; +fbt_probe_t **fbt_probetab; +int fbt_probetab_mask; +static int fbt_probetab_size; static dev_type_open(fbt_open); static int fbt_unload(void); static void fbt_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *); -static void fbt_provide_module(void *, dtrace_modctl_t *); +static void fbt_provide_module(void *, modctl_t *); static void fbt_destroy(void *, dtrace_id_t, void *); static int fbt_enable(void *, dtrace_id_t, void *); static void fbt_disable(void *, dtrace_id_t, void *); @@ -146,11 +86,6 @@ static void fbt_load(void); static void fbt_suspend(void *, dtrace_id_t, void *); static void fbt_resume(void *, dtrace_id_t, void *); -#define FBT_ENTRY "entry" -#define FBT_RETURN "return" -#define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask) -#define FBT_PROBETAB_SIZE 0x8000 /* 32k entries -- 128K total */ - static const struct cdevsw fbt_cdevsw = { .d_open = fbt_open, .d_close = noclose, @@ -187,259 +122,71 @@ static dtrace_pops_t fbt_pops = { fbt_destroy }; -typedef struct fbt_probe { - struct fbt_probe *fbtp_hashnext; -#if defined(__i386__) || defined(__amd64__) - uint8_t *fbtp_patchpoint; - int8_t fbtp_rval; - uint8_t fbtp_patchval; - uint8_t fbtp_savedval; -#elif __arm__ - uint32_t *fbtp_patchpoint; - int32_t fbtp_rval; - uint32_t fbtp_patchval; - uint32_t fbtp_savedval; -#endif - uintptr_t fbtp_roffset; - dtrace_id_t fbtp_id; - const char *fbtp_name; - dtrace_modctl_t *fbtp_ctl; - int fbtp_loadcnt; - int fbtp_primary; - int fbtp_invop_cnt; - int fbtp_symindx; - struct fbt_probe *fbtp_next; -} fbt_probe_t; - -#ifdef notyet -static struct cdev *fbt_cdev; +#ifdef __FreeBSD__ static int fbt_verbose = 0; -#endif -static dtrace_provider_id_t fbt_id; -static fbt_probe_t **fbt_probetab; -static int fbt_probetab_size; -static int fbt_probetab_mask; - -#ifdef __arm__ -extern void (* dtrace_emulation_jump_addr)(int, struct trapframe *); -static uint32_t -expand_imm(uint32_t imm12) -{ - uint32_t unrot = imm12 & 0xff; - int amount = 2 * (imm12 >> 8); +static struct cdev *fbt_cdev; +#endif /* __FreeBSD__ */ - if (amount) - return (unrot >> amount) | (unrot << (32 - amount)); - else - return unrot; -} +#ifdef __NetBSD__ +specificdata_key_t fbt_module_key; -static uint32_t -add_with_carry(uint32_t x, uint32_t y, int carry_in, - int *carry_out, int *overflow) -{ - uint32_t result; - uint64_t unsigned_sum = x + y + (uint32_t)carry_in; - int64_t signed_sum = (int32_t)x + (int32_t)y + (int32_t)carry_in; - KASSERT(carry_in == 1); - - result = (uint32_t)(unsigned_sum & 0xffffffff); - *carry_out = ((uint64_t)result == unsigned_sum) ? 1 : 0; - *overflow = ((int64_t)result == signed_sum) ? 0 : 1; - - return result; -} +#define version xversion +#endif /* __NetBSD__ */ -static void -fbt_emulate(int _op, struct trapframe *frame) +int +fbt_excluded(const char *name) { - uint32_t op = _op; - switch (op >> 28) { - case DTRACE_INVOP_MOV_IP_SP: - /* mov ip, sp */ - frame->tf_ip = frame->tf_svc_sp; - frame->tf_pc += 4; - break; - case DTRACE_INVOP_BX_LR: - /* bx lr */ - frame->tf_pc = frame->tf_svc_lr; - break; - case DTRACE_INVOP_MOV_PC_LR: - /* mov pc, lr */ - frame->tf_pc = frame->tf_svc_lr; - break; - case DTRACE_INVOP_LDM: - /* ldm sp, {..., pc} */ - /* FALLTHRU */ - case DTRACE_INVOP_LDMIB: { - /* ldmib sp, {..., pc} */ - uint32_t register_list = (op & 0xffff); - uint32_t *sp = (uint32_t *)(intptr_t)frame->tf_svc_sp; - uint32_t *regs = &frame->tf_r0; - int i; - - /* IDMIB */ - if ((op >> 28) == 5) - sp++; - - for (i=0; i <= 12; i++) { - if (register_list & (1 << i)) - regs[i] = *sp++; - } - if (register_list & (1 << 13)) - frame->tf_svc_sp = *sp++; - if (register_list & (1 << 14)) - frame->tf_svc_lr = *sp++; - frame->tf_pc = *sp; - break; - } - case DTRACE_INVOP_LDR_IMM: { - /* ldr r?, [{pc,r?}, #?] */ - uint32_t rt = (op >> 12) & 0xf; - uint32_t rn = (op >> 16) & 0xf; - uint32_t imm = op & 0xfff; - uint32_t *regs = &frame->tf_r0; - KDASSERT(rt <= 12); - KDASSERT(rn == 15 || rn =< 12); - if (rn == 15) - regs[rt] = *((uint32_t *)(intptr_t)(frame->tf_pc + 8 + imm)); - else - regs[rt] = *((uint32_t *)(intptr_t)(regs[rn] + imm)); - frame->tf_pc += 4; - break; - } - case DTRACE_INVOP_MOVW: { - /* movw r?, #? */ - uint32_t rd = (op >> 12) & 0xf; - uint32_t imm = (op & 0xfff) | ((op & 0xf0000) >> 4); - uint32_t *regs = &frame->tf_r0; - KDASSERT(rd <= 12); - regs[rd] = imm; - frame->tf_pc += 4; - break; - } - case DTRACE_INVOP_MOV_IMM: { - /* mov r?, #? */ - uint32_t rd = (op >> 12) & 0xf; - uint32_t imm = expand_imm(op & 0xfff); - uint32_t *regs = &frame->tf_r0; - KDASSERT(rd <= 12); - regs[rd] = imm; - frame->tf_pc += 4; - break; - } - case DTRACE_INVOP_CMP_IMM: { - /* cmp r?, #? */ - uint32_t rn = (op >> 16) & 0xf; - uint32_t *regs = &frame->tf_r0; - uint32_t imm = expand_imm(op & 0xfff); - uint32_t spsr = frame->tf_spsr; - uint32_t result; - int carry; - int overflow; + if (strncmp(name, "dtrace_", 7) == 0 && + strncmp(name, "dtrace_safe_", 12) != 0) { /* - * (result, carry, overflow) = AddWithCarry(R[n], NOT(imm32), ’1’); - * APSR.N = result<31>; - * APSR.Z = IsZeroBit(result); - * APSR.C = carry; - * APSR.V = overflow; + * Anything beginning with "dtrace_" may be called + * from probe context unless it explicitly indicates + * that it won't be called from probe context by + * using the prefix "dtrace_safe_". */ - KDASSERT(rn <= 12); - result = add_with_carry(regs[rn], ~imm, 1, &carry, &overflow); - if (result & 0x80000000) - spsr |= PSR_N_bit; - else - spsr &= ~PSR_N_bit; - if (result == 0) - spsr |= PSR_Z_bit; - else - spsr &= ~PSR_Z_bit; - if (carry) - spsr |= PSR_C_bit; - else - spsr &= ~PSR_C_bit; - if (overflow) - spsr |= PSR_V_bit; - else - spsr &= ~PSR_V_bit; - -#if 0 - aprint_normal("pc=%x Rn=%x imm=%x %c%c%c%c\n", frame->tf_pc, regs[rn], imm, - (spsr & PSR_N_bit) ? 'N' : 'n', - (spsr & PSR_Z_bit) ? 'Z' : 'z', - (spsr & PSR_C_bit) ? 'C' : 'c', - (spsr & PSR_V_bit) ? 'V' : 'v'); -#endif - frame->tf_spsr = spsr; - frame->tf_pc += 4; - break; - } - case DTRACE_INVOP_B_LABEL: { - /* b ??? */ - uint32_t imm = (op & 0x00ffffff) << 2; - int32_t diff; - /* SignExtend(imm26, 32) */ - if (imm & 0x02000000) - imm |= 0xfc000000; - diff = (int32_t)imm; - frame->tf_pc += 8 + diff; - break; + return (1); } - /* FIXME: push will overwrite trapframe... */ - case DTRACE_INVOP_PUSH: { - /* push {...} */ - uint32_t register_list = (op & 0xffff); - uint32_t *sp = (uint32_t *)(intptr_t)frame->tf_svc_sp; - uint32_t *regs = &frame->tf_r0; - int i; - int count = 0; - -#if 0 - if ((op & 0x0fff0fff) == 0x052d0004) { - /* A2: str r4, [sp, #-4]! */ - *(sp - 1) = regs[4]; - frame->tf_pc += 4; - break; - } -#endif - for (i=0; i < 16; i++) { - if (register_list & (1 << i)) - count++; - } - sp -= count; +#ifdef __FreeBSD__ + /* + * Lock owner methods may be called from probe context. + */ + if (strcmp(name, "owner_mtx") == 0 || + strcmp(name, "owner_rm") == 0 || + strcmp(name, "owner_rw") == 0 || + strcmp(name, "owner_sx") == 0) + return (1); - for (i=0; i <= 12; i++) { - if (register_list & (1 << i)) - *sp++ = regs[i]; - } - if (register_list & (1 << 13)) - *sp++ = frame->tf_svc_sp; - if (register_list & (1 << 14)) - *sp++ = frame->tf_svc_lr; - if (register_list & (1 << 15)) - *sp = frame->tf_pc + 8; - - /* make sure the caches and memory are in sync */ - cpu_dcache_wbinv_range(frame->tf_svc_sp, count * 4); - - /* In case the current page tables have been modified ... */ - cpu_tlb_flushID(); - cpu_cpwait(); + /* + * When DTrace is built into the kernel we need to exclude + * the FBT functions from instrumentation. + */ +#ifndef _KLD_MODULE + if (strncmp(name, "fbt_", 4) == 0) + return (1); +#endif +#endif - frame->tf_svc_sp -= count * 4; - frame->tf_pc += 4; +#ifdef __NetBSD__ + if (name[0] == '_' && name[1] == '_') + return (1); - break; - } - default: - KDASSERTMSG(0, "op=%u\n", op >> 28); + if (strcmp(name, "cpu_index") == 0 || + strncmp(name, "db_", 3) == 0 || + strncmp(name, "ddb_", 4) == 0 || + strncmp(name, "kdb_", 4) == 0 || + strncmp(name, "lockdebug_", 10) == 0 || + strncmp(name, "kauth_", 5) == 0 || + strncmp(name, "ktext_write", 11) == 0) { + return (1); } -} #endif + return (0); +} + static void fbt_doubletrap(void) { @@ -450,489 +197,70 @@ fbt_doubletrap(void) fbt = fbt_probetab[i]; for (; fbt != NULL; fbt = fbt->fbtp_next) - *fbt->fbtp_patchpoint = fbt->fbtp_savedval; - } -} - - -static int -fbt_invop(uintptr_t addr, struct trapframe *frame, uintptr_t rval) -{ - solaris_cpu_t *cpu; - uintptr_t *stack; - uintptr_t arg0, arg1, arg2, arg3, arg4; - fbt_probe_t *fbt; - -#ifdef __amd64__ - stack = (uintptr_t *)frame->tf_rsp; -#endif -#ifdef __i386__ - /* Skip hardware-saved registers. */ - stack = (uintptr_t *)&frame->tf_esp; -#endif -#ifdef __arm__ - stack = (uintptr_t *)frame->tf_svc_sp; -#endif - - cpu = &solaris_cpu[cpu_number()]; - fbt = fbt_probetab[FBT_ADDR2NDX(addr)]; - for (; fbt != NULL; fbt = fbt->fbtp_hashnext) { - if ((uintptr_t)fbt->fbtp_patchpoint == addr) { - fbt->fbtp_invop_cnt++; - if (fbt->fbtp_roffset == 0) { -#ifdef __amd64__ - /* fbt->fbtp_rval == DTRACE_INVOP_PUSHQ_RBP */ - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - cpu->cpu_dtrace_caller = stack[0]; - DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | - CPU_DTRACE_BADADDR); - - arg0 = frame->tf_rdi; - arg1 = frame->tf_rsi; - arg2 = frame->tf_rdx; - arg3 = frame->tf_rcx; - arg4 = frame->tf_r8; -#else - int i = 0; - - /* - * When accessing the arguments on the stack, - * we must protect against accessing beyond - * the stack. We can safely set NOFAULT here - * -- we know that interrupts are already - * disabled. - */ - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - cpu->cpu_dtrace_caller = stack[i++]; - arg0 = stack[i++]; - arg1 = stack[i++]; - arg2 = stack[i++]; - arg3 = stack[i++]; - arg4 = stack[i++]; - DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | - CPU_DTRACE_BADADDR); -#endif - - dtrace_probe(fbt->fbtp_id, arg0, arg1, - arg2, arg3, arg4); - - cpu->cpu_dtrace_caller = 0; - } else { -#ifdef __amd64__ - /* - * On amd64, we instrument the ret, not the - * leave. We therefore need to set the caller - * to ensure that the top frame of a stack() - * action is correct. - */ - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - cpu->cpu_dtrace_caller = stack[0]; - DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | - CPU_DTRACE_BADADDR); -#endif - - dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, - rval, 0, 0, 0); - cpu->cpu_dtrace_caller = 0; - } - - return (fbt->fbtp_rval); - } + fbt_patch_tracepoint(fbt, fbt->fbtp_savedval); } - - return (0); } -#if defined(__i386__) || defined(__amd64__) -static int -fbt_provide_module_cb(const char *name, int symindx, void *value, - uint32_t symsize, int type, void *opaque) +#ifdef __FreeBSD__ +static void +fbt_provide_module(void *arg, modctl_t *lf) { - fbt_probe_t *fbt, *retfbt; - u_int8_t *instr, *limit; - dtrace_modctl_t *mod = opaque; - const char *modname = mod->mod_info->mi_name; - int j; - int size; - - /* got a function? */ - if (ELF_ST_TYPE(type) != STT_FUNC) { - return 0; - } - - if (strncmp(name, "dtrace_", 7) == 0 && - strncmp(name, "dtrace_safe_", 12) != 0) { - /* - * Anything beginning with "dtrace_" may be called - * from probe context unless it explicitly indicates - * that it won't be called from probe context by - * using the prefix "dtrace_safe_". - */ - return (0); - } + char modname[MAXPATHLEN]; + int i; + size_t len; - if (name[0] == '_' && name[1] == '_') - return (0); + strlcpy(modname, lf->filename, sizeof(modname)); + len = strlen(modname); + if (len > 3 && strcmp(modname + len - 3, ".ko") == 0) + modname[len - 3] = '\0'; /* - * Exclude some more symbols which can be called from probe context. + * Employees of dtrace and their families are ineligible. Void + * where prohibited. */ - if (strcmp(name, "x86_curcpu") == 0 /* CPU */ - || strcmp(name, "x86_curlwp") == 0 /* curproc, curlwp, curthread */ - || strcmp(name, "cpu_index") == 0 /* cpu_number, curcpu_id */ - || strncmp(name, "db_", 3) == 0 /* debugger */ - || strncmp(name, "ddb_", 4) == 0 /* debugger */ - || strncmp(name, "kdb_", 4) == 0 /* debugger */ - || strncmp(name, "lockdebug_", 10) == 0 /* lockdebug XXX for now */ - || strncmp(name, "kauth_", 5) == 0 /* CRED XXX for now */ - ) { - return 0; - } - - instr = (u_int8_t *) value; - limit = (u_int8_t *) value + symsize; - -#ifdef __amd64__ - while (instr < limit) { - if (*instr == FBT_PUSHL_EBP) - break; - - if ((size = dtrace_instr_size(instr)) <= 0) - break; - - instr += size; - } - - if (instr >= limit || *instr != FBT_PUSHL_EBP) { - /* - * We either don't save the frame pointer in this - * function, or we ran into some disassembly - * screw-up. Either way, we bail. - */ - return (0); - } -#else - if (instr[0] != FBT_PUSHL_EBP) { - return (0); - } - - if (!(instr[1] == FBT_MOVL_ESP_EBP0_V0 && - instr[2] == FBT_MOVL_ESP_EBP1_V0) && - !(instr[1] == FBT_MOVL_ESP_EBP0_V1 && - instr[2] == FBT_MOVL_ESP_EBP1_V1)) { - return (0); - } -#endif - fbt = malloc(sizeof (fbt_probe_t), M_FBT, M_WAITOK | M_ZERO); - fbt->fbtp_name = name; - fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, - name, FBT_ENTRY, 3, fbt); - fbt->fbtp_patchpoint = instr; - fbt->fbtp_ctl = mod; - /* fbt->fbtp_loadcnt = lf->loadcnt; */ - fbt->fbtp_rval = DTRACE_INVOP_PUSHL_EBP; - fbt->fbtp_savedval = *instr; - fbt->fbtp_patchval = FBT_PATCHVAL; - fbt->fbtp_symindx = symindx; - - fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; - fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; - mod->mod_fbtentries++; - - retfbt = NULL; - - while (instr < limit) { - if (instr >= limit) - return (0); - - /* - * If this disassembly fails, then we've likely walked off into - * a jump table or some other unsuitable area. Bail out of the - * disassembly now. - */ - if ((size = dtrace_instr_size(instr)) <= 0) - return (0); - -#ifdef __amd64__ - /* - * We only instrument "ret" on amd64 -- we don't yet instrument - * ret imm16, largely because the compiler doesn't seem to - * (yet) emit them in the kernel... - */ - if (*instr != FBT_RET) { - instr += size; - continue; - } -#else - if (!(size == 1 && - (*instr == FBT_POPL_EBP || *instr == FBT_LEAVE) && - (*(instr + 1) == FBT_RET || - *(instr + 1) == FBT_RET_IMM16))) { - instr += size; - continue; - } -#endif - - /* - * We (desperately) want to avoid erroneously instrumenting a - * jump table, especially given that our markers are pretty - * short: two bytes on x86, and just one byte on amd64. To - * determine if we're looking at a true instruction sequence - * or an inline jump table that happens to contain the same - * byte sequences, we resort to some heuristic sleeze: we - * treat this instruction as being contained within a pointer, - * and see if that pointer points to within the body of the - * function. If it does, we refuse to instrument it. - */ - for (j = 0; j < sizeof (uintptr_t); j++) { - caddr_t check = (caddr_t) instr - j; - uint8_t *ptr; - - if (check < (caddr_t)value) - break; - - if (check + sizeof (caddr_t) > (caddr_t)limit) - continue; - - ptr = *(uint8_t **)check; - - if (ptr >= (uint8_t *) value && ptr < limit) { - instr += size; - continue; - } - } - - /* - * We have a winner! - */ - fbt = malloc(sizeof (fbt_probe_t), M_FBT, M_WAITOK | M_ZERO); - fbt->fbtp_name = name; - - if (retfbt == NULL) { - fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, - name, FBT_RETURN, 3, fbt); - } else { - retfbt->fbtp_next = fbt; - fbt->fbtp_id = retfbt->fbtp_id; - } - - retfbt = fbt; - fbt->fbtp_patchpoint = instr; - fbt->fbtp_ctl = mod; - /* fbt->fbtp_loadcnt = lf->loadcnt; */ - fbt->fbtp_symindx = symindx; - -#ifndef __amd64__ - if (*instr == FBT_POPL_EBP) { - fbt->fbtp_rval = DTRACE_INVOP_POPL_EBP; - } else { - ASSERT(*instr == FBT_LEAVE); - fbt->fbtp_rval = DTRACE_INVOP_LEAVE; - } - fbt->fbtp_roffset = - (uintptr_t)(instr - (uint8_t *) value) + 1; - -#else - ASSERT(*instr == FBT_RET); - fbt->fbtp_rval = DTRACE_INVOP_RET; - fbt->fbtp_roffset = - (uintptr_t)(instr - (uint8_t *) value); -#endif - - fbt->fbtp_savedval = *instr; - fbt->fbtp_patchval = FBT_PATCHVAL; - fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; - fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; - - mod->mod_fbtentries++; - - instr += size; - } - - return 0; -} - -#elif defined(__arm__) - -static int -fbt_provide_module_cb(const char *name, int symindx, void *value, - uint32_t symsize, int type, void *opaque) -{ - fbt_probe_t *fbt, *retfbt; - uint32_t *instr, *limit; - bool was_ldm_lr = false; - dtrace_modctl_t *mod = opaque; - const char *modname = mod->mod_info->mi_name; - int size; - - /* got a function? */ - if (ELF_ST_TYPE(type) != STT_FUNC) { - return 0; - } - - if (strncmp(name, "dtrace_", 7) == 0 && - strncmp(name, "dtrace_safe_", 12) != 0) { - /* - * Anything beginning with "dtrace_" may be called - * from probe context unless it explicitly indicates - * that it won't be called from probe context by - * using the prefix "dtrace_safe_". - */ - return (0); - } - - if (name[0] == '_' && name[1] == '_') - return (0); + if (strcmp(modname, "dtrace") == 0) + return; /* - * Exclude some more symbols which can be called from probe context. + * To register with DTrace, a module must list 'dtrace' as a + * dependency in order for the kernel linker to resolve + * symbols like dtrace_register(). All modules with such a + * dependency are ineligible for FBT tracing. */ - if (strncmp(name, "db_", 3) == 0 /* debugger */ - || strncmp(name, "ddb_", 4) == 0 /* debugger */ - || strncmp(name, "kdb_", 4) == 0 /* debugger */ - || strncmp(name, "lockdebug_", 10) == 0 /* lockdebug XXX for now */ - || strncmp(name, "kauth_", 5) == 0 /* CRED XXX for now */ - /* Sensitive functions on ARM */ - || strncmp(name, "_spl", 4) == 0 - || strcmp(name, "binuptime") == 0 - || strcmp(name, "dosoftints") == 0 - || strcmp(name, "fbt_emulate") == 0 - || strcmp(name, "nanouptime") == 0 - || strcmp(name, "undefinedinstruction") == 0 - || strncmp(name, "dmt_", 4) == 0 /* omap */ - || strncmp(name, "mvsoctmr_", 9) == 0 /* marvell */ - ) { - return 0; - } - - instr = (uint32_t *) value; - limit = (uint32_t *)((uintptr_t)value + symsize); - - if (!FBT_MOV_IP_SP_P(*instr) - && !FBT_BX_LR_P(*instr) - && !FBT_MOVW_P(*instr) - && !FBT_MOV_IMM_P(*instr) - && !FBT_B_LABEL_P(*instr) - && !FBT_LDR_IMM_P(*instr) - && !FBT_CMP_IMM_P(*instr) - /* && !FBT_PUSH_P(*instr) */ - ) { - return 0; - } - - fbt = malloc(sizeof (fbt_probe_t), M_FBT, M_WAITOK | M_ZERO); - fbt->fbtp_name = name; - fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, - name, FBT_ENTRY, 3, fbt); - fbt->fbtp_patchpoint = instr; - fbt->fbtp_ctl = mod; - /* fbt->fbtp_loadcnt = lf->loadcnt; */ - if (FBT_MOV_IP_SP_P(*instr)) - fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_MOV_IP_SP); - else if (FBT_LDR_IMM_P(*instr)) - fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_LDR_IMM); - else if (FBT_MOVW_P(*instr)) - fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_MOVW); - else if (FBT_MOV_IMM_P(*instr)) - fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_MOV_IMM); - else if (FBT_CMP_IMM_P(*instr)) - fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_CMP_IMM); - else if (FBT_BX_LR_P(*instr)) - fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_BX_LR); - else if (FBT_PUSH_P(*instr)) - fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_PUSH); - else if (FBT_B_LABEL_P(*instr)) - fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_B_LABEL); - - fbt->fbtp_patchval = PATCHVAL_ENCODE_COND(*instr); - fbt->fbtp_savedval = *instr; - fbt->fbtp_symindx = symindx; - - fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; - fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; - mod->mod_fbtentries++; - - retfbt = NULL; - - while (instr < limit) { - if (instr >= limit) - return (0); - - size = 1; - - if (!FBT_BX_LR_P(*instr) - && !FBT_MOV_PC_LR_P(*instr) - && !FBT_LDM_P(*instr) - && !FBT_LDMIB_P(*instr) - && !(was_ldm_lr && FBT_B_LABEL_P(*instr)) - ) { - if (FBT_LDM_LR_P(*instr) || FBT_LDMIB_LR_P(*instr)) - was_ldm_lr = true; - else - was_ldm_lr = false; - instr += size; - continue; - } + for (i = 0; i < lf->ndeps; i++) + if (strncmp(lf->deps[i]->filename, "dtrace", 6) == 0) + return; + if (lf->fbt_nentries) { /* - * We have a winner! + * This module has some FBT entries allocated; we're afraid + * to screw with it. */ - fbt = malloc(sizeof (fbt_probe_t), M_FBT, M_WAITOK | M_ZERO); - fbt->fbtp_name = name; - - if (retfbt == NULL) { - fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, - name, FBT_RETURN, 3, fbt); - } else { - retfbt->fbtp_next = fbt; - fbt->fbtp_id = retfbt->fbtp_id; - } - - retfbt = fbt; - fbt->fbtp_patchpoint = instr; - fbt->fbtp_ctl = mod; - /* fbt->fbtp_loadcnt = lf->loadcnt; */ - fbt->fbtp_symindx = symindx; - - if (FBT_BX_LR_P(*instr)) - fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_BX_LR); - else if (FBT_MOV_PC_LR_P(*instr)) - fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_MOV_PC_LR); - else if (FBT_LDM_P(*instr)) - fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_LDM); - else if (FBT_LDMIB_P(*instr)) - fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_LDMIB); - else if (FBT_B_LABEL_P(*instr)) - fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_B_LABEL); - - fbt->fbtp_roffset = (uintptr_t)(instr - (uint32_t *) value); - fbt->fbtp_patchval = PATCHVAL_ENCODE_COND(*instr); - - fbt->fbtp_savedval = *instr; - fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; - fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; - - mod->mod_fbtentries++; - - instr += size; - was_ldm_lr = false; + return; } - return 0; + /* + * List the functions in the module and the symbol values. + */ + (void) linker_file_function_listall(lf, fbt_provide_module_function, modname); } -#else -#error "architecture not supported" #endif - - +#ifdef __NetBSD__ static void -fbt_provide_module(void *arg, dtrace_modctl_t *mod) +fbt_provide_module(void *arg, modctl_t *mod) { + struct fbt_ksyms_arg fka; + struct mod_ctf *mc; char modname[MAXPATHLEN]; int i; size_t len; - strlcpy(modname, mod->mod_info->mi_name, sizeof(modname)); + if (mod_ctf_get(mod, &mc)) { + printf("fbt: no CTF data for module %s\n", module_name(mod)); + return; + } + + strlcpy(modname, module_name(mod), sizeof(modname)); len = strlen(modname); if (len > 5 && strcmp(modname + len - 3, ".kmod") == 0) modname[len - 4] = '\0'; @@ -958,36 +286,52 @@ fbt_provide_module(void *arg, dtrace_mod * dependency are ineligible for FBT tracing. */ for (i = 0; i < mod->mod_nrequired; i++) { - if (strncmp(mod->mod_required[i]->mod_info->mi_name, + if (strncmp(module_name(mod->mod_required[i]), "dtrace", 6) == 0) return; } - - if (mod->mod_fbtentries) { - /* - * This module has some FBT entries allocated; we're afraid - * to screw with it. - */ + if (mc->fbt_provided) { return; } /* * List the functions in the module and the symbol values. */ - ksyms_mod_foreach(modname, fbt_provide_module_cb, mod); + memset(&fka, 0, sizeof(fka)); + fka.fka_mod = mod; + fka.fka_mc = mc; + ksyms_mod_foreach(modname, fbt_provide_module_cb, &fka); + mc->fbt_provided = true; +} + +static void +fbt_module_dtor(void *arg) +{ + mod_ctf_t *mc = arg; + + if (mc->ctfalloc) + free(mc->ctftab, M_TEMP); + kmem_free(mc, sizeof(*mc)); } +#endif static void fbt_destroy(void *arg, dtrace_id_t id, void *parg) { fbt_probe_t *fbt = parg, *next, *hash, *last; - dtrace_modctl_t *ctl; + modctl_t *ctl; int ndx; do { ctl = fbt->fbtp_ctl; +#ifdef __FreeBSD__ ctl->mod_fbtentries--; +#endif +#ifdef __NetBSD__ + mod_ctf_t *mc = module_getspecific(ctl, fbt_module_key); + mc->fbt_provided = false; +#endif /* * Now we need to remove this probe from the fbt_probetab. @@ -1009,26 +353,21 @@ fbt_destroy(void *arg, dtrace_id_t id, v } next = fbt->fbtp_next; - free(fbt, M_FBT); + kmem_free(fbt, sizeof(*fbt)); fbt = next; } while (fbt != NULL); } -#if defined(__i386__) || defined(__amd64__) - static int fbt_enable(void *arg, dtrace_id_t id, void *parg) { fbt_probe_t *fbt = parg; -#if 0 - dtrace_modctl_t *ctl = fbt->fbtp_ctl; -#endif - u_long psl; - u_long cr0; + modctl_t *ctl = fbt->fbtp_ctl; - -#if 0 /* XXX TBD */ +#ifdef __NetBSD__ + module_hold(ctl); +#else ctl->nenabled++; /* @@ -1040,33 +379,15 @@ fbt_enable(void *arg, dtrace_id_t id, vo if (fbt_verbose) { printf("fbt is failing for probe %s " "(module %s reloaded)", - fbt->fbtp_name, ctl->filename); + fbt->fbtp_name, module_name(ctl)); } - return; + return 0; } #endif - /* Disable interrupts. */ - psl = x86_read_psl(); - x86_disable_intr(); - - /* Disable write protection in supervisor mode. */ - cr0 = rcr0(); - lcr0(cr0 & ~CR0_WP); - - for (; fbt != NULL; fbt = fbt->fbtp_next) { - *fbt->fbtp_patchpoint = fbt->fbtp_patchval; - } - - /* Write back and invalidate cache, flush pipelines. */ - wbinvd(); - x86_flush(); - x86_write_psl(psl); - - /* Re-enable write protection. */ - lcr0(cr0); - + for (; fbt != NULL; fbt = fbt->fbtp_next) + fbt_patch_tracepoint(fbt, fbt->fbtp_patchval); return 0; } @@ -1074,240 +395,60 @@ static void fbt_disable(void *arg, dtrace_id_t id, void *parg) { fbt_probe_t *fbt = parg; -#if 0 - dtrace_modctl_t *ctl = fbt->fbtp_ctl; -#endif - u_long psl; - u_long cr0; + modctl_t *ctl = fbt->fbtp_ctl; -#if 0 /* XXX TBD */ +#ifndef __NetBSD__ ASSERT(ctl->nenabled > 0); ctl->nenabled--; if ((ctl->loadcnt != fbt->fbtp_loadcnt)) return; #endif - /* Disable interrupts. */ - psl = x86_read_psl(); - x86_disable_intr(); - - /* Disable write protection in supervisor mode. */ - cr0 = rcr0(); - lcr0(cr0 & ~CR0_WP); for (; fbt != NULL; fbt = fbt->fbtp_next) - *fbt->fbtp_patchpoint = fbt->fbtp_savedval; + fbt_patch_tracepoint(fbt, fbt->fbtp_savedval); - /* Write back and invalidate cache, flush pipelines. */ - wbinvd(); - x86_flush(); - x86_write_psl(psl); - - /* Re-enable write protection. */ - lcr0(cr0); +#ifdef __NetBSD__ + module_rele(ctl); +#endif } static void fbt_suspend(void *arg, dtrace_id_t id, void *parg) { fbt_probe_t *fbt = parg; -#if 0 - dtrace_modctl_t *ctl = fbt->fbtp_ctl; -#endif - u_long psl; - u_long cr0; +#ifndef __NetBSD__ + modctl_t *ctl = fbt->fbtp_ctl; -#if 0 /* XXX TBD */ ASSERT(ctl->nenabled > 0); if ((ctl->loadcnt != fbt->fbtp_loadcnt)) return; #endif - /* Disable interrupts. */ - psl = x86_read_psl(); - x86_disable_intr(); - - /* Disable write protection in supervisor mode. */ - cr0 = rcr0(); - lcr0(cr0 & ~CR0_WP); - for (; fbt != NULL; fbt = fbt->fbtp_next) - *fbt->fbtp_patchpoint = fbt->fbtp_savedval; - - /* Write back and invalidate cache, flush pipelines. */ - wbinvd(); - x86_flush(); - x86_write_psl(psl); - - /* Re-enable write protection. */ - lcr0(cr0); + fbt_patch_tracepoint(fbt, fbt->fbtp_savedval); } static void fbt_resume(void *arg, dtrace_id_t id, void *parg) { fbt_probe_t *fbt = parg; -#if 0 - dtrace_modctl_t *ctl = fbt->fbtp_ctl; -#endif - u_long psl; - u_long cr0; +#ifndef __NetBSD__ + modctl_t *ctl = fbt->fbtp_ctl; -#if 0 /* XXX TBD */ ASSERT(ctl->nenabled > 0); if ((ctl->loadcnt != fbt->fbtp_loadcnt)) return; #endif - /* Disable interrupts. */ - psl = x86_read_psl(); - x86_disable_intr(); - - /* Disable write protection in supervisor mode. */ - cr0 = rcr0(); - lcr0(cr0 & ~CR0_WP); for (; fbt != NULL; fbt = fbt->fbtp_next) - *fbt->fbtp_patchpoint = fbt->fbtp_patchval; - - /* Write back and invalidate cache, flush pipelines. */ - wbinvd(); - x86_flush(); - x86_write_psl(psl); - - /* Re-enable write protection. */ - lcr0(cr0); + fbt_patch_tracepoint(fbt, fbt->fbtp_patchval); } -#elif defined(__arm__) - static int -fbt_enable(void *arg, dtrace_id_t id, void *parg) -{ - fbt_probe_t *fbt = parg; -#if 0 - dtrace_modctl_t *ctl = fbt->fbtp_ctl; -#endif - dtrace_icookie_t c; - - -#if 0 /* XXX TBD */ - ctl->nenabled++; - - /* - * Now check that our modctl has the expected load count. If it - * doesn't, this module must have been unloaded and reloaded -- and - * we're not going to touch it. - */ - if (ctl->loadcnt != fbt->fbtp_loadcnt) { - if (fbt_verbose) { - printf("fbt is failing for probe %s " - "(module %s reloaded)", - fbt->fbtp_name, ctl->filename); - } - - return; - } -#endif - - c = dtrace_interrupt_disable(); - - for (fbt = parg; fbt != NULL; fbt = fbt->fbtp_next) { - *fbt->fbtp_patchpoint = fbt->fbtp_patchval; - cpu_idcache_wbinv_range((vaddr_t)fbt->fbtp_patchpoint, 4); - } - - dtrace_interrupt_enable(c); - - return 0; -} - -static void -fbt_disable(void *arg, dtrace_id_t id, void *parg) -{ - fbt_probe_t *fbt = parg; -#if 0 - dtrace_modctl_t *ctl = fbt->fbtp_ctl; -#endif - dtrace_icookie_t c; - -#if 0 /* XXX TBD */ - ASSERT(ctl->nenabled > 0); - ctl->nenabled--; - - if ((ctl->loadcnt != fbt->fbtp_loadcnt)) - return; -#endif - - c = dtrace_interrupt_disable(); - - for (; fbt != NULL; fbt = fbt->fbtp_next) { - *fbt->fbtp_patchpoint = fbt->fbtp_savedval; - cpu_idcache_wbinv_range((vaddr_t)fbt->fbtp_patchpoint, 4); - } - - dtrace_interrupt_enable(c); -} - -static void -fbt_suspend(void *arg, dtrace_id_t id, void *parg) -{ - fbt_probe_t *fbt = parg; -#if 0 - dtrace_modctl_t *ctl = fbt->fbtp_ctl; -#endif - dtrace_icookie_t c; - -#if 0 /* XXX TBD */ - ASSERT(ctl->nenabled > 0); - - if ((ctl->loadcnt != fbt->fbtp_loadcnt)) - return; -#endif - - c = dtrace_interrupt_disable(); - - for (; fbt != NULL; fbt = fbt->fbtp_next) { - *fbt->fbtp_patchpoint = fbt->fbtp_savedval; - cpu_idcache_wbinv_range((vaddr_t)fbt->fbtp_patchpoint, 4); - } - - dtrace_interrupt_enable(c); -} - -static void -fbt_resume(void *arg, dtrace_id_t id, void *parg) -{ - fbt_probe_t *fbt = parg; -#if 0 - dtrace_modctl_t *ctl = fbt->fbtp_ctl; -#endif - dtrace_icookie_t c; - -#if 0 /* XXX TBD */ - ASSERT(ctl->nenabled > 0); - - if ((ctl->loadcnt != fbt->fbtp_loadcnt)) - return; -#endif - - c = dtrace_interrupt_disable(); - - for (; fbt != NULL; fbt = fbt->fbtp_next) { - *fbt->fbtp_patchpoint = fbt->fbtp_patchval; - cpu_idcache_wbinv_range((vaddr_t)fbt->fbtp_patchpoint, 4); - } - - dtrace_interrupt_enable(c); -} - -#else -#error "architecture not supported" -#endif - -static int -fbt_ctfoff_init(dtrace_modctl_t *mod, mod_ctf_t *mc) +fbt_ctfoff_init(modctl_t *mod, mod_ctf_t *mc) { const Elf_Sym *symp = mc->symtab; const ctf_header_t *hp = (const ctf_header_t *) mc->ctftab; @@ -1323,19 +464,16 @@ fbt_ctfoff_init(dtrace_modctl_t *mod, mo /* Sanity check. */ if (hp->cth_magic != CTF_MAGIC) { printf("Bad magic value in CTF data of '%s'\n", - mod->mod_info->mi_name); + module_name(mod)); return (EINVAL); } if (mc->symtab == NULL) { - printf("No symbol table in '%s'\n", - mod->mod_info->mi_name); + printf("No symbol table in '%s'\n", module_name(mod)); return (EINVAL); } - if ((ctfoff = malloc(sizeof(uint32_t) * nsyms, M_FBT, M_WAITOK)) == NULL) - return (ENOMEM); - + ctfoff = malloc(sizeof(uint32_t) * nsyms, M_FBT, M_WAITOK); mc->ctfoffp = ctfoff; for (i = 0; i < nsyms; i++, ctfoff++, symp++) { @@ -1346,9 +484,10 @@ fbt_ctfoff_init(dtrace_modctl_t *mod, mo continue; } - /* CTF expects the pre-sorted symbol ordering, + /* + * CTF expects the unsorted symbol ordering, * so map it from that to the current sorted - * and trimmed symbol table. + * symbol table. * ctfoff[new-ind] = oldind symbol info. */ @@ -1359,6 +498,12 @@ fbt_ctfoff_init(dtrace_modctl_t *mod, mo ctfoff = &mc->ctfoffp[mc->nmap[i]-1]; } + /* + * Note that due to how kern_ksyms.c adjusts st_name + * to be the offset into a virtual combined strtab, + * st_name will never be 0 for loaded modules. + */ + if (symp->st_name == 0 || symp->st_shndx == SHN_UNDEF) { *ctfoff = 0xffffffff; continue; @@ -1408,12 +553,12 @@ fbt_ctfoff_init(dtrace_modctl_t *mod, mo } static ssize_t -fbt_get_ctt_size(uint8_t xversion, const ctf_type_t *tp, ssize_t *sizep, +fbt_get_ctt_size(uint8_t version, const ctf_type_t *tp, ssize_t *sizep, ssize_t *incrementp) { ssize_t size, increment; - if (xversion > CTF_VERSION_1 && + if (version > CTF_VERSION_1 && tp->ctt_size == CTF_LSIZE_SENT) { size = CTF_TYPE_LSIZE(tp); increment = sizeof (ctf_type_t); @@ -1442,6 +587,7 @@ fbt_typoff_init(mod_ctf_t *mc) uint32_t *xp; ulong_t pop[CTF_K_MAX + 1] = { 0 }; + /* Sanity check. */ if (hp->cth_magic != CTF_MAGIC) return (EINVAL); @@ -1522,17 +668,19 @@ fbt_typoff_init(mod_ctf_t *mc) vbytes = 0; break; default: - printf("%s(%d): detected invalid CTF kind -- %u\n", __func__, __LINE__, kind); + printf("%s(%d): detected invalid CTF kind -- %u\n", + __func__, __LINE__, kind); return (EIO); } tp = (ctf_type_t *)((uintptr_t)tp + increment + vbytes); pop[kind]++; } + /* account for a sentinel value below */ + ctf_typemax++; mc->typlen = ctf_typemax; - if ((xp = malloc(sizeof(uint32_t) * ctf_typemax, M_FBT, M_ZERO | M_WAITOK)) == NULL) - return (ENOMEM); + xp = malloc(sizeof(uint32_t) * ctf_typemax, M_FBT, M_ZERO | M_WAITOK); mc->typoffp = xp; @@ -1854,11 +1002,7 @@ ctf_decl_push(ctf_decl_t *cd, mod_ctf_t prec = CTF_PREC_BASE; } - if ((cdp = malloc(sizeof (ctf_decl_node_t), M_FBT, M_WAITOK)) == NULL) { - cd->cd_err = EAGAIN; - return; - } - + cdp = malloc(sizeof(*cdp), M_FBT, M_WAITOK); cdp->cd_type = type; cdp->cd_kind = kind; cdp->cd_n = n; @@ -2002,8 +1146,8 @@ fbt_getargdesc(void *arg __unused, dtrac { const ushort_t *dp; fbt_probe_t *fbt = parg; - mod_ctf_t mc; - dtrace_modctl_t *ctl = fbt->fbtp_ctl; + mod_ctf_t *mc; + modctl_t *ctl = fbt->fbtp_ctl; int ndx = desc->dtargd_ndx; int symindx = fbt->fbtp_symindx; uint32_t *ctfoff; @@ -2011,41 +1155,44 @@ fbt_getargdesc(void *arg __unused, dtrac ushort_t info, kind, n; int nsyms; + if (fbt->fbtp_roffset != 0 && desc->dtargd_ndx == 0) { + (void) strcpy(desc->dtargd_native, "int"); + return; + } + desc->dtargd_ndx = DTRACE_ARGNONE; - /* Get a pointer to the CTF data and it's length. */ + /* Get a pointer to the CTF data and its length. */ if (mod_ctf_get(ctl, &mc) != 0) { - static int report=0; + static int report = 0; if (report < 1) { - report++; - printf("FBT: Error no CTF section found in module \"%s\"\n", - ctl->mod_info->mi_name); + report++; + printf("FBT: Error no CTF section found in module \"%s\"\n", + module_name(ctl)); } /* No CTF data? Something wrong? *shrug* */ return; } - nsyms = (mc.nmap != NULL) ? mc.nmapsize : mc.nsym; + nsyms = (mc->nmap != NULL) ? mc->nmapsize : mc->nsym; /* Check if this module hasn't been initialised yet. */ - if (mc.ctfoffp == NULL) { + if (mc->ctfoffp == NULL) { /* * Initialise the CTF object and function symindx to * byte offset array. */ - if (fbt_ctfoff_init(ctl, &mc) != 0) { + if (fbt_ctfoff_init(ctl, mc) != 0) return; - } /* Initialise the CTF type to byte offset array. */ - if (fbt_typoff_init(&mc) != 0) { + if (fbt_typoff_init(mc) != 0) return; - } } - ctfoff = mc.ctfoffp; + ctfoff = mc->ctfoffp; - if (ctfoff == NULL || mc.typoffp == NULL) { + if (ctfoff == NULL || mc->typoffp == NULL) { return; } @@ -2057,7 +1204,7 @@ fbt_getargdesc(void *arg __unused, dtrac if ((offset = ctfoff[symindx]) == 0xffffffff) return; - dp = (const ushort_t *)(mc.ctftab + offset + sizeof(ctf_header_t)); + dp = (const ushort_t *)(mc->ctftab + offset + sizeof(ctf_header_t)); info = *dp++; kind = CTF_INFO_KIND(info); @@ -2075,23 +1222,50 @@ fbt_getargdesc(void *arg __unused, dtrac return; } - /* Check if the requested argument doesn't exist. */ - if (ndx >= n) - return; + if (fbt->fbtp_roffset != 0) { + /* Only return type is available for args[1] in return probe. */ + if (ndx > 1) + return; + ASSERT(ndx == 1); + } else { + /* Check if the requested argument doesn't exist. */ + if (ndx >= n) + return; - /* Skip the return type and arguments up to the one requested. */ - dp += ndx + 1; + /* Skip the return type and arguments up to the one requested. */ + dp += ndx + 1; + } - if (fbt_type_name(&mc, *dp, desc->dtargd_native, sizeof(desc->dtargd_native)) > 0) { + if (fbt_type_name(mc, *dp, desc->dtargd_native, sizeof(desc->dtargd_native)) > 0) desc->dtargd_ndx = ndx; - } return; } +#ifdef __FreeBSD__ +static int +fbt_linker_file_cb(linker_file_t lf, void *arg) +{ + + fbt_provide_module(arg, lf); + + return (0); +} +#endif + static void fbt_load(void) { + +#ifdef __FreeBSD__ + /* Create the /dev/dtrace/fbt entry. */ + fbt_cdev = make_dev(&fbt_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, + "dtrace/fbt"); +#endif +#ifdef __NetBSD__ + (void) module_specific_key_create(&fbt_module_key, fbt_module_dtor); +#endif + /* Default the probe table size if not specified. */ if (fbt_probetab_size == 0) fbt_probetab_size = FBT_PROBETAB_SIZE; @@ -2105,9 +1279,6 @@ fbt_load(void) dtrace_doubletrap_func = fbt_doubletrap; dtrace_invop_add(fbt_invop); -#ifdef __arm__ - dtrace_emulation_jump_addr = fbt_emulate; -#endif if (dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_USER, NULL, &fbt_pops, NULL, &fbt_id) != 0) @@ -2120,9 +1291,6 @@ fbt_unload(void) { int error = 0; -#ifdef __arm__ - dtrace_emulation_jump_addr = NULL; -#endif /* De-register the invalid opcode handler. */ dtrace_invop_remove(fbt_invop); @@ -2137,6 +1305,12 @@ fbt_unload(void) fbt_probetab = NULL; fbt_probetab_mask = 0; +#ifdef __FreeBSD__ + destroy_dev(fbt_cdev); +#endif +#ifdef __NetBSD__ + (void) module_specific_key_delete(fbt_module_key); +#endif return (error); } @@ -2170,4 +1344,15 @@ fbt_open(dev_t dev, int flags, int mode, return (0); } +#ifdef __FreeBSD__ +SYSINIT(fbt_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_load, NULL); +SYSUNINIT(fbt_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fbt_unload, NULL); + +DEV_MODULE(fbt, fbt_modevent, NULL); +MODULE_VERSION(fbt, 1); +MODULE_DEPEND(fbt, dtrace, 1, 1, 1); +MODULE_DEPEND(fbt, opensolaris, 1, 1, 1); +#endif +#ifdef __NetBSD__ MODULE(MODULE_CLASS_MISC, dtrace_fbt, "dtrace,zlib"); +#endif Index: src/external/cddl/osnet/dev/fbt/fbt.h =================================================================== RCS file: src/external/cddl/osnet/dev/fbt/fbt.h diff -N src/external/cddl/osnet/dev/fbt/fbt.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ src/external/cddl/osnet/dev/fbt/fbt.h 20 Jun 2017 13:43:33 -0000 @@ -0,0 +1,82 @@ +/* $NetBSD$ */ + +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Portions Copyright 2006-2008 John Birrell jb@freebsd.org + * + * $FreeBSD: head/sys/cddl/dev/fbt/fbt.h 298171 2016-04-17 23:08:47Z markj $ + * + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _FBT_H_ +#define _FBT_H_ + +#include "fbt_isa.h" + +typedef struct fbt_probe { + struct fbt_probe *fbtp_hashnext; + fbt_patchval_t *fbtp_patchpoint; + fbt_patchval_t fbtp_rval; + fbt_patchval_t fbtp_patchval; + fbt_patchval_t fbtp_savedval; + uintptr_t fbtp_roffset; + dtrace_id_t fbtp_id; + const char *fbtp_name; + modctl_t *fbtp_ctl; + int fbtp_loadcnt; + int fbtp_symindx; + struct fbt_probe *fbtp_next; +} fbt_probe_t; + +struct fbt_ksyms_arg { + modctl_t *fka_mod; + void *fka_mc; +}; + +struct linker_file; +struct linker_symval; +struct trapframe; + +int fbt_invop(uintptr_t, struct trapframe *, uintptr_t); +void fbt_patch_tracepoint(fbt_probe_t *, fbt_patchval_t); +int fbt_provide_module_function(struct linker_file *, int, + struct linker_symval *, void *); +int fbt_provide_module_cb(const char *, int, void *, + uint32_t, int, void *); +int fbt_excluded(const char *); + +extern dtrace_provider_id_t fbt_id; +extern fbt_probe_t **fbt_probetab; +extern int fbt_probetab_mask; + +#define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask) +#define FBT_PROBETAB_SIZE 0x8000 /* 32k entries -- 128K total */ + +#ifdef MALLOC_DECLARE +MALLOC_DECLARE(M_FBT); +#endif + +#endif Index: src/external/cddl/osnet/dev/fbt/arm/fbt_isa.c =================================================================== RCS file: src/external/cddl/osnet/dev/fbt/arm/fbt_isa.c diff -N src/external/cddl/osnet/dev/fbt/arm/fbt_isa.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ src/external/cddl/osnet/dev/fbt/arm/fbt_isa.c 20 Jun 2017 19:14:24 -0000 @@ -0,0 +1,403 @@ +/* $NetBSD$ */ + +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Portions Copyright 2006-2008 John Birrell jb@freebsd.org + * Portions Copyright 2013 Justin Hibbits jhibbits@freebsd.org + * Portions Copyright 2013 Howard Su howardsu@freebsd.org + * + * $FreeBSD: head/sys/cddl/dev/fbt/arm/fbt_isa.c 312378 2017-01-18 13:27:24Z andrew $ + * + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include "fbt.h" + +#define FBT_PUSHM 0xe92d0000 +#define FBT_POPM 0xe8bd0000 +#define FBT_JUMP 0xea000000 +#define FBT_SUBSP 0xe24dd000 + +#define FBT_ENTRY "entry" +#define FBT_RETURN "return" + +int +fbt_invop(uintptr_t addr, struct trapframe *frame, uintptr_t rval) +{ + solaris_cpu_t *cpu = &solaris_cpu[cpu_number()]; + fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)]; + register_t fifthparam; + + for (; fbt != NULL; fbt = fbt->fbtp_hashnext) { + if ((uintptr_t)fbt->fbtp_patchpoint == addr) { + if (fbt->fbtp_roffset == 0) { + /* Get 5th parameter from stack */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + fifthparam = *(register_t *)frame->tf_svc_sp; + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | + CPU_DTRACE_BADADDR); + + cpu->cpu_dtrace_caller = frame->tf_svc_lr; + dtrace_probe(fbt->fbtp_id, frame->tf_r0, + frame->tf_r1, frame->tf_r2, + frame->tf_r3, fifthparam); + } else { + /* XXX set caller */ + cpu->cpu_dtrace_caller = 0; + dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, + rval, 0, 0, 0); + } + + cpu->cpu_dtrace_caller = 0; + return (fbt->fbtp_rval); + } + } + + return (0); +} + + +void +fbt_patch_tracepoint(fbt_probe_t *fbt, fbt_patchval_t val) +{ + dtrace_icookie_t c; + + c = dtrace_interrupt_disable(); + + ktext_write(fbt->fbtp_patchpoint, &val, sizeof (val)); + + dtrace_interrupt_enable(c); +} + +#ifdef __FreeBSD__ + +int +fbt_provide_module_function(linker_file_t lf, int symindx, + linker_symval_t *symval, void *opaque) +{ + char *modname = opaque; + const char *name = symval->name; + fbt_probe_t *fbt, *retfbt; + uint32_t *instr, *limit; + int popm; + + if (fbt_excluded(name)) + return (0); + + instr = (uint32_t *)symval->value; + limit = (uint32_t *)(symval->value + symval->size); + + /* + * va_arg functions has first instruction of + * sub sp, sp, #? + */ + if ((*instr & 0xfffff000) == FBT_SUBSP) + instr++; + + /* + * check if insn is a pushm with LR + */ + if ((*instr & 0xffff0000) != FBT_PUSHM || + (*instr & (1 << LR)) == 0) + return (0); + + fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP); + fbt->fbtp_name = name; + fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, + name, FBT_ENTRY, 5, fbt); + fbt->fbtp_patchpoint = instr; + fbt->fbtp_ctl = lf; + fbt->fbtp_loadcnt = lf->loadcnt; + fbt->fbtp_savedval = *instr; + fbt->fbtp_patchval = FBT_BREAKPOINT; + fbt->fbtp_rval = DTRACE_INVOP_PUSHM; + fbt->fbtp_symindx = symindx; + + fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; + fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; + + lf->fbt_nentries++; + + popm = FBT_POPM | ((*instr) & 0x3FFF) | 0x8000; + + retfbt = NULL; +again: + for (; instr < limit; instr++) { + if (*instr == popm) + break; + else if ((*instr & 0xff000000) == FBT_JUMP) { + uint32_t *target, *start; + int offset; + + offset = (*instr & 0xffffff); + offset <<= 8; + offset /= 64; + target = instr + (2 + offset); + start = (uint32_t *)symval->value; + if (target >= limit || target < start) + break; + } + } + + if (instr >= limit) + return (0); + + /* + * We have a winner! + */ + fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP); + fbt->fbtp_name = name; + if (retfbt == NULL) { + fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, + name, FBT_RETURN, 5, fbt); + } else { + retfbt->fbtp_next = fbt; + fbt->fbtp_id = retfbt->fbtp_id; + } + retfbt = fbt; + + fbt->fbtp_patchpoint = instr; + fbt->fbtp_ctl = lf; + fbt->fbtp_loadcnt = lf->loadcnt; + fbt->fbtp_symindx = symindx; + if ((*instr & 0xff000000) == FBT_JUMP) + fbt->fbtp_rval = DTRACE_INVOP_B; + else + fbt->fbtp_rval = DTRACE_INVOP_POPM; + fbt->fbtp_savedval = *instr; + fbt->fbtp_patchval = FBT_BREAKPOINT; + fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; + fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; + + lf->fbt_nentries++; + + instr++; + goto again; +} + +#endif /* __FreeBSD_ */ + +#ifdef __NetBSD__ + +#define FBT_PATCHVAL DTRACE_BREAKPOINT + +/* entry and return */ +#define FBT_BX_LR_P(insn) (((insn) & ~INSN_COND_MASK) == 0x012fff1e) +#define FBT_B_LABEL_P(insn) (((insn) & 0xff000000) == 0xea000000) +/* entry */ +#define FBT_MOV_IP_SP_P(insn) ((insn) == 0xe1a0c00d) +/* index=1, add=1, wback=0 */ +#define FBT_LDR_IMM_P(insn) (((insn) & 0xfff00000) == 0xe5900000) +#define FBT_MOVW_P(insn) (((insn) & 0xfff00000) == 0xe3000000) +#define FBT_MOV_IMM_P(insn) (((insn) & 0xffff0000) == 0xe3a00000) +#define FBT_CMP_IMM_P(insn) (((insn) & 0xfff00000) == 0xe3500000) +#define FBT_PUSH_P(insn) (((insn) & 0xffff0000) == 0xe92d0000) +/* return */ +/* cond=always, writeback=no, rn=sp and register_list includes pc */ +#define FBT_LDM_P(insn) (((insn) & 0x0fff8000) == 0x089d8000) +#define FBT_LDMIB_P(insn) (((insn) & 0x0fff8000) == 0x099d8000) +#define FBT_MOV_PC_LR_P(insn) (((insn) & ~INSN_COND_MASK) == 0x01a0f00e) +/* cond=always, writeback=no, rn=sp and register_list includes lr, but not pc */ +#define FBT_LDM_LR_P(insn) (((insn) & 0xffffc000) == 0xe89d4000) +#define FBT_LDMIB_LR_P(insn) (((insn) & 0xffffc000) == 0xe99d4000) + +/* rval = insn | invop_id (overwriting cond with invop ID) */ +#define BUILD_RVAL(insn, id) (((insn) & ~INSN_COND_MASK) | __SHIFTIN((id), INSN_COND_MASK)) +/* encode cond in the first byte */ +#define PATCHVAL_ENCODE_COND(insn) (FBT_PATCHVAL | __SHIFTOUT((insn), INSN_COND_MASK)) + +int +fbt_provide_module_cb(const char *name, int symindx, void *value, + uint32_t symsize, int type, void *opaque) +{ + fbt_probe_t *fbt, *retfbt; + uint32_t *instr, *limit; + bool was_ldm_lr = false; + int size; + + struct fbt_ksyms_arg *fka = opaque; + modctl_t *mod = fka->fka_mod; + const char *modname = module_name(mod); + + + /* got a function? */ + if (ELF_ST_TYPE(type) != STT_FUNC) + return 0; + + if (fbt_excluded(name)) + return (0); + + /* + * Exclude some more symbols which can be called from probe context. + */ + if (strncmp(name, "_spl", 4) == 0 || + strcmp(name, "binuptime") == 0 || + strcmp(name, "nanouptime") == 0 || + strcmp(name, "dosoftints") == 0 || + strcmp(name, "fbt_emulate") == 0 || + strcmp(name, "undefinedinstruction") == 0 || + strncmp(name, "dmt_", 4) == 0 /* omap */ || + strncmp(name, "mvsoctmr_", 9) == 0 /* marvell */ ) { + return 0; + } + + instr = (uint32_t *) value; + limit = (uint32_t *)((uintptr_t)value + symsize); + + if (!FBT_MOV_IP_SP_P(*instr) + && !FBT_BX_LR_P(*instr) + && !FBT_MOVW_P(*instr) + && !FBT_MOV_IMM_P(*instr) + && !FBT_B_LABEL_P(*instr) + && !FBT_LDR_IMM_P(*instr) + && !FBT_CMP_IMM_P(*instr) + && !FBT_PUSH_P(*instr) + ) { + return 0; + } + + fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP); + fbt->fbtp_name = name; + fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, + name, FBT_ENTRY, 5, fbt); + fbt->fbtp_patchpoint = instr; + fbt->fbtp_ctl = mod; + /* fbt->fbtp_loadcnt = lf->loadcnt; */ + if (FBT_MOV_IP_SP_P(*instr)) + fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_MOV_IP_SP); + else if (FBT_LDR_IMM_P(*instr)) + fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_LDR_IMM); + else if (FBT_MOVW_P(*instr)) + fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_MOVW); + else if (FBT_MOV_IMM_P(*instr)) + fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_MOV_IMM); + else if (FBT_CMP_IMM_P(*instr)) + fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_CMP_IMM); + else if (FBT_BX_LR_P(*instr)) + fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_BX_LR); + else if (FBT_PUSH_P(*instr)) + fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_PUSHM); + else if (FBT_B_LABEL_P(*instr)) + fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_B); + else + KASSERT(0); + + KASSERTMSG((fbt->fbtp_rval >> 28) != 0, + "fbt %p insn 0x%x name %s rval 0x%08x", + fbt, *instr, name, fbt->fbtp_rval); + + fbt->fbtp_patchval = PATCHVAL_ENCODE_COND(*instr); + fbt->fbtp_savedval = *instr; + fbt->fbtp_symindx = symindx; + + fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; + fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; + + retfbt = NULL; + + while (instr < limit) { + if (instr >= limit) + return (0); + + size = 1; + + if (!FBT_BX_LR_P(*instr) + && !FBT_MOV_PC_LR_P(*instr) + && !FBT_LDM_P(*instr) + && !FBT_LDMIB_P(*instr) + && !(was_ldm_lr && FBT_B_LABEL_P(*instr)) + ) { + if (FBT_LDM_LR_P(*instr) || FBT_LDMIB_LR_P(*instr)) + was_ldm_lr = true; + else + was_ldm_lr = false; + instr += size; + continue; + } + + /* + * We have a winner! + */ + fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP); + fbt->fbtp_name = name; + + if (retfbt == NULL) { + fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, + name, FBT_RETURN, 5, fbt); + } else { + retfbt->fbtp_next = fbt; + fbt->fbtp_id = retfbt->fbtp_id; + } + + retfbt = fbt; + fbt->fbtp_patchpoint = instr; + fbt->fbtp_ctl = mod; + /* fbt->fbtp_loadcnt = lf->loadcnt; */ + fbt->fbtp_symindx = symindx; + + if (FBT_BX_LR_P(*instr)) + fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_BX_LR); + else if (FBT_MOV_PC_LR_P(*instr)) + fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_MOV_PC_LR); + else if (FBT_LDM_P(*instr)) + fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_LDM); + else if (FBT_LDMIB_P(*instr)) + fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_POPM); + else if (FBT_B_LABEL_P(*instr)) + fbt->fbtp_rval = BUILD_RVAL(*instr, DTRACE_INVOP_B); + else + KASSERT(0); + + KASSERTMSG((fbt->fbtp_rval >> 28) != 0, "fbt %p name %s rval 0x%08x", + fbt, name, fbt->fbtp_rval); + + fbt->fbtp_roffset = (uintptr_t)(instr - (uint32_t *) value); + fbt->fbtp_patchval = PATCHVAL_ENCODE_COND(*instr); + + fbt->fbtp_savedval = *instr; + fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; + fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; + + instr += size; + was_ldm_lr = false; + } + + return 0; +} + +#endif /* __NetBSD__ */ Index: src/external/cddl/osnet/dev/fbt/arm/fbt_isa.h =================================================================== RCS file: src/external/cddl/osnet/dev/fbt/arm/fbt_isa.h diff -N src/external/cddl/osnet/dev/fbt/arm/fbt_isa.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ src/external/cddl/osnet/dev/fbt/arm/fbt_isa.h 12 Apr 2017 18:56:59 -0000 @@ -0,0 +1,32 @@ +/* $NetBSD$ */ + +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD: head/sys/cddl/dev/fbt/arm/fbt_isa.h 278529 2015-02-10 19:41:30Z gnn $ + * + */ + +#ifndef _FBT_ISA_H_ +#define _FBT_ISA_H_ + +typedef uint32_t fbt_patchval_t; + +#endif Index: src/external/cddl/osnet/dev/fbt/x86/fbt_isa.c =================================================================== RCS file: src/external/cddl/osnet/dev/fbt/x86/fbt_isa.c diff -N src/external/cddl/osnet/dev/fbt/x86/fbt_isa.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ src/external/cddl/osnet/dev/fbt/x86/fbt_isa.c 15 Jun 2017 19:39:26 -0000 @@ -0,0 +1,425 @@ +/* $NetBSD$ */ + +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Portions Copyright 2006-2008 John Birrell jb@freebsd.org + * + * $FreeBSD: head/sys/cddl/dev/fbt/x86/fbt_isa.c 309785 2016-12-10 03:11:05Z markj $ + * + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#if 1 +#include +#include +#if 0 +#include +#endif +#include +#endif + +#include "fbt.h" + +#define FBT_PUSHL_EBP 0x55 +#define FBT_MOVL_ESP_EBP0_V0 0x8b +#define FBT_MOVL_ESP_EBP1_V0 0xec +#define FBT_MOVL_ESP_EBP0_V1 0x89 +#define FBT_MOVL_ESP_EBP1_V1 0xe5 +#define FBT_REX_RSP_RBP 0x48 + +#define FBT_POPL_EBP 0x5d +#define FBT_RET 0xc3 +#define FBT_RET_IMM16 0xc2 +#define FBT_LEAVE 0xc9 + +#ifdef __amd64__ +#define FBT_PATCHVAL 0xcc +#else +#define FBT_PATCHVAL 0xf0 +#endif + +#define FBT_ENTRY "entry" +#define FBT_RETURN "return" + +int +fbt_invop(uintptr_t addr, struct trapframe *frame, uintptr_t rval) +{ + solaris_cpu_t *cpu; + uintptr_t *stack; + uintptr_t arg0, arg1, arg2, arg3, arg4; + fbt_probe_t *fbt; + +#ifdef __amd64__ + stack = (uintptr_t *)frame->tf_rsp; +#else + /* Skip hardware-saved registers. */ +#ifdef __NetBSD__ + stack = (uintptr_t *)&frame->tf_esp; +#else + stack = (uintptr_t *)frame->tf_isp + 3; +#endif +#endif + + cpu = &solaris_cpu[cpu_number()]; + fbt = fbt_probetab[FBT_ADDR2NDX(addr)]; + for (; fbt != NULL; fbt = fbt->fbtp_hashnext) { + if ((uintptr_t)fbt->fbtp_patchpoint == addr) { + if (fbt->fbtp_roffset == 0) { +#ifdef __amd64__ + /* fbt->fbtp_rval == DTRACE_INVOP_PUSHQ_RBP */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + cpu->cpu_dtrace_caller = stack[0]; + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | + CPU_DTRACE_BADADDR); + + arg0 = frame->tf_rdi; + arg1 = frame->tf_rsi; + arg2 = frame->tf_rdx; + arg3 = frame->tf_rcx; + arg4 = frame->tf_r8; +#else + int i = 0; + + /* + * When accessing the arguments on the stack, + * we must protect against accessing beyond + * the stack. We can safely set NOFAULT here + * -- we know that interrupts are already + * disabled. + */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + cpu->cpu_dtrace_caller = stack[i++]; + arg0 = stack[i++]; + arg1 = stack[i++]; + arg2 = stack[i++]; + arg3 = stack[i++]; + arg4 = stack[i++]; + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | + CPU_DTRACE_BADADDR); +#endif + + dtrace_probe(fbt->fbtp_id, arg0, arg1, + arg2, arg3, arg4); + + cpu->cpu_dtrace_caller = 0; + } else { +#ifdef __amd64__ + /* + * On amd64, we instrument the ret, not the + * leave. We therefore need to set the caller + * to ensure that the top frame of a stack() + * action is correct. + */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + cpu->cpu_dtrace_caller = stack[0]; + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | + CPU_DTRACE_BADADDR); +#endif + + dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, + rval, 0, 0, 0); + cpu->cpu_dtrace_caller = 0; + } + + return (fbt->fbtp_rval); + } + } + + return (0); +} + + + + +#ifdef __FreeBSD__ +void +fbt_patch_tracepoint(fbt_probe_t *fbt, fbt_patchval_t val) +{ + + *fbt->fbtp_patchpoint = val; +} +#endif + +#ifdef __NetBSD__ +void +fbt_patch_tracepoint(fbt_probe_t *fbt, fbt_patchval_t val) +{ + u_long psl; + u_long cr0; + + /* Disable interrupts. */ + psl = x86_read_psl(); + x86_disable_intr(); + + /* Disable write protection in supervisor mode. */ + cr0 = rcr0(); + lcr0(cr0 & ~CR0_WP); + + for (; fbt != NULL; fbt = fbt->fbtp_next) { + *fbt->fbtp_patchpoint = val; + } + + /* Write back and invalidate cache, flush pipelines. */ + wbinvd(); + x86_flush(); + x86_write_psl(psl); + + /* Re-enable write protection. */ + lcr0(cr0); +} +#endif + + +#ifdef __FreeBSD__ +int +fbt_provide_module_function(linker_file_t lf, int symindx, + linker_symval_t *symval, void *opaque) +#endif +#ifdef __NetBSD__ +int +fbt_provide_module_cb(const char *name, int symindx, void *value, + uint32_t symsize, int type, void *opaque) +#endif +{ + fbt_probe_t *fbt, *retfbt; + u_int8_t *instr, *limit; + int j; + int size; + +#ifdef __FreeBSD_ + char *modname = opaque; + const char *name = symval->name; + size_t symsize = symval->size; + void *value = symval->value; + + /* + * trap_check() is a wrapper for DTrace's fault handler, so we don't + * want to be able to instrument it. + */ + if (strcmp(name, "trap_check") == 0) + return (0); +#endif +#ifdef __NetBSD__ + struct fbt_ksyms_arg *fka = opaque; + modctl_t *mod = fka->fka_mod; + const char *modname = module_name(mod); + + /* got a function? */ + if (ELF_ST_TYPE(type) != STT_FUNC) + return 0; + + /* + * Exclude some more symbols which can be called from probe context. + */ + if (strcmp(name, "x86_curcpu") == 0 || + strcmp(name, "x86_curlwp") == 0) { + return 0; + } +#endif + + if (fbt_excluded(name)) + return (0); + + instr = (u_int8_t *) value; + limit = (u_int8_t *) value + symsize; + +#ifdef __amd64__ + while (instr < limit) { + if (*instr == FBT_PUSHL_EBP) + break; + + if ((size = dtrace_instr_size(instr)) <= 0) + break; + + instr += size; + } + + if (instr >= limit || *instr != FBT_PUSHL_EBP) { + /* + * We either don't save the frame pointer in this + * function, or we ran into some disassembly + * screw-up. Either way, we bail. + */ + return (0); + } +#else + if (instr[0] != FBT_PUSHL_EBP) + return (0); + + if (!(instr[1] == FBT_MOVL_ESP_EBP0_V0 && + instr[2] == FBT_MOVL_ESP_EBP1_V0) && + !(instr[1] == FBT_MOVL_ESP_EBP0_V1 && + instr[2] == FBT_MOVL_ESP_EBP1_V1)) + return (0); +#endif + + fbt = kmem_zalloc(sizeof (*fbt), KM_SLEEP); + fbt->fbtp_name = name; + fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, + name, FBT_ENTRY, 3, fbt); + fbt->fbtp_patchpoint = instr; +#ifdef __FreeBSD__ + fbt->fbtp_ctl = lf; + fbt->fbtp_loadcnt = lf->loadcnt; +#endif +#ifdef __NetBSD__ + fbt->fbtp_ctl = mod; +#endif + fbt->fbtp_rval = DTRACE_INVOP_PUSHL_EBP; + fbt->fbtp_savedval = *instr; + fbt->fbtp_patchval = FBT_PATCHVAL; + fbt->fbtp_symindx = symindx; + + fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; + fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; +#ifdef __FreeBSD__ + lf->fbt_nentries++; +#endif + + retfbt = NULL; +again: + if (instr >= limit) + return (0); + + /* + * If this disassembly fails, then we've likely walked off into + * a jump table or some other unsuitable area. Bail out of the + * disassembly now. + */ + if ((size = dtrace_instr_size(instr)) <= 0) + return (0); + +#ifdef __amd64__ + /* + * We only instrument "ret" on amd64 -- we don't yet instrument + * ret imm16, largely because the compiler doesn't seem to + * (yet) emit them in the kernel... + */ + if (*instr != FBT_RET) { + instr += size; + goto again; + } +#else + if (!(size == 1 && + (*instr == FBT_POPL_EBP || *instr == FBT_LEAVE) && + (*(instr + 1) == FBT_RET || + *(instr + 1) == FBT_RET_IMM16))) { + instr += size; + goto again; + } +#endif + + /* + * We (desperately) want to avoid erroneously instrumenting a + * jump table, especially given that our markers are pretty + * short: two bytes on x86, and just one byte on amd64. To + * determine if we're looking at a true instruction sequence + * or an inline jump table that happens to contain the same + * byte sequences, we resort to some heuristic sleeze: we + * treat this instruction as being contained within a pointer, + * and see if that pointer points to within the body of the + * function. If it does, we refuse to instrument it. + */ + for (j = 0; j < sizeof (uintptr_t); j++) { + caddr_t check = (caddr_t) instr - j; + uint8_t *ptr; + + if (check < (caddr_t)value) + break; + + if (check + sizeof (caddr_t) > (caddr_t)limit) + continue; + + ptr = *(uint8_t **)check; + + if (ptr >= (uint8_t *) value && ptr < limit) { + instr += size; + goto again; + } + } + + /* + * We have a winner! + */ + fbt = kmem_zalloc(sizeof (*fbt), KM_SLEEP); + fbt->fbtp_name = name; + + if (retfbt == NULL) { + fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, + name, FBT_RETURN, 3, fbt); + } else { + retfbt->fbtp_next = fbt; + fbt->fbtp_id = retfbt->fbtp_id; + } + + retfbt = fbt; + fbt->fbtp_patchpoint = instr; +#ifdef __FreeBSD__ + fbt->fbtp_ctl = lf; + fbt->fbtp_loadcnt = lf->loadcnt; +#endif +#ifdef __NetBSD__ + fbt->fbtp_ctl = mod; +#endif + fbt->fbtp_symindx = symindx; + +#ifndef __amd64__ + if (*instr == FBT_POPL_EBP) { + fbt->fbtp_rval = DTRACE_INVOP_POPL_EBP; + } else { + ASSERT(*instr == FBT_LEAVE); + fbt->fbtp_rval = DTRACE_INVOP_LEAVE; + } + fbt->fbtp_roffset = + (uintptr_t)(instr - (uint8_t *) value) + 1; + +#else + ASSERT(*instr == FBT_RET); + fbt->fbtp_rval = DTRACE_INVOP_RET; + fbt->fbtp_roffset = + (uintptr_t)(instr - (uint8_t *) value); +#endif + + fbt->fbtp_savedval = *instr; + fbt->fbtp_patchval = FBT_PATCHVAL; + fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; + fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; + +#ifdef __FreeBSD__ + lf->fbt_nentries++; +#endif + + instr += size; + goto again; +} Index: src/external/cddl/osnet/dev/fbt/x86/fbt_isa.h =================================================================== RCS file: src/external/cddl/osnet/dev/fbt/x86/fbt_isa.h diff -N src/external/cddl/osnet/dev/fbt/x86/fbt_isa.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ src/external/cddl/osnet/dev/fbt/x86/fbt_isa.h 19 Apr 2017 17:15:48 -0000 @@ -0,0 +1,32 @@ +/* $NetBSD$ */ + +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * $FreeBSD: head/sys/cddl/dev/fbt/x86/fbt_isa.h 270067 2014-08-16 21:42:55Z markj $ + * + */ + +#ifndef _FBT_ISA_H_ +#define _FBT_ISA_H_ + +typedef uint8_t fbt_patchval_t; + +#endif Index: src/external/cddl/osnet/dev/lockstat/lockstat.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/lockstat/lockstat.c,v retrieving revision 1.8 diff -u -p -r1.8 lockstat.c --- src/external/cddl/osnet/dev/lockstat/lockstat.c 9 Mar 2015 01:42:26 -0000 1.8 +++ src/external/cddl/osnet/dev/lockstat/lockstat.c 20 Apr 2017 14:17:11 -0000 @@ -29,6 +29,7 @@ __KERNEL_RCSID(0, "$NetBSD: lockstat.c,v 1.8 2015/03/09 01:42:26 christos Exp $"); #include +#include #include #include #include @@ -41,8 +42,6 @@ __KERNEL_RCSID(0, "$NetBSD: lockstat.c,v #define NLOCKSTAT 1 #include -#define ASSERT KASSERT - typedef struct lockstat_probe { const char *lsp_func; const char *lsp_name; @@ -100,7 +99,7 @@ lockstat_disable(void *arg, dtrace_id_t /*ARGSUSED*/ static void -lockstat_provide(void *arg, const dtrace_probedesc_t *desc) +lockstat_provide(void *arg, dtrace_probedesc_t *desc) { int i = 0; Index: src/external/cddl/osnet/dev/profile/profile.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/profile/profile.c,v retrieving revision 1.7 diff -u -p -r1.7 profile.c --- src/external/cddl/osnet/dev/profile/profile.c 7 Jan 2017 21:39:52 -0000 1.7 +++ src/external/cddl/osnet/dev/profile/profile.c 6 May 2017 23:59:31 -0000 @@ -22,7 +22,7 @@ * * Portions Copyright 2006-2008 John Birrell jb@freebsd.org * - * $FreeBSD: src/sys/cddl/dev/profile/profile.c,v 1.1.4.1 2009/08/03 08:13:06 kensmith Exp $ + * $FreeBSD: head/sys/cddl/dev/profile/profile.c 300618 2016-05-24 16:41:37Z br $ * */ @@ -44,7 +44,9 @@ #include #include #include -#include +#ifdef __FreeBSD__ +#include +#endif #include #include #include @@ -55,17 +57,22 @@ #include #ifdef __FreeBSD__ #include +#include #endif #include #include +#ifdef __FreeBSD__ +#include +#include +#endif #ifdef __NetBSD__ +#include #include #include -#define ASSERT(x) KASSERT(x) +#include #endif -#include #include #include @@ -109,7 +116,7 @@ */ #ifdef __FreeBSD__ #ifdef __amd64 -#define PROF_ARTIFICIAL_FRAMES 7 +#define PROF_ARTIFICIAL_FRAMES 10 #else #ifdef __i386 #define PROF_ARTIFICIAL_FRAMES 6 @@ -123,8 +130,44 @@ #endif #endif #endif + +#ifdef __mips +/* + * This value is bogus just to make module compilable on mips + */ +#define PROF_ARTIFICIAL_FRAMES 3 +#endif + +#ifdef __powerpc__ +/* + * This value is bogus just to make module compilable on powerpc + */ +#define PROF_ARTIFICIAL_FRAMES 3 +#endif + +struct profile_probe_percpu; + +#ifdef __mips +/* bogus */ +#define PROF_ARTIFICIAL_FRAMES 3 +#endif + +#ifdef __arm__ +#define PROF_ARTIFICIAL_FRAMES 3 +#endif + +#ifdef __aarch64__ +/* TODO: verify */ +#define PROF_ARTIFICIAL_FRAMES 10 +#endif + +#ifdef __riscv__ +/* TODO: verify */ +#define PROF_ARTIFICIAL_FRAMES 10 #endif +#endif /* __FreeBSD__ */ + #ifdef __NetBSD__ #define PROF_ARTIFICIAL_FRAMES 3 #endif @@ -133,14 +176,25 @@ typedef struct profile_probe { char prof_name[PROF_NAMELEN]; dtrace_id_t prof_id; int prof_kind; +#if defined(illumos) || defined(__NetBSD__) hrtime_t prof_interval; cyclic_id_t prof_cyclic; +#endif +#ifdef __FreeBSD__ + sbintime_t prof_interval; + struct callout prof_cyclic; + sbintime_t prof_expected; + struct profile_probe_percpu **prof_pcpus; +#endif } profile_probe_t; typedef struct profile_probe_percpu { hrtime_t profc_expected; hrtime_t profc_interval; profile_probe_t *profc_probe; +#ifdef __FreeBSD__ + struct callout profc_cyclic; +#endif } profile_probe_percpu_t; #ifdef __FreeBSD__ @@ -152,7 +206,7 @@ static void profile_destroy(void *, dtra static int profile_enable(void *, dtrace_id_t, void *); static void profile_disable(void *, dtrace_id_t, void *); static void profile_load(void *); -static void profile_provide(void *, const dtrace_probedesc_t *); +static void profile_provide(void *, dtrace_probedesc_t *); static int profile_rates[] = { 97, 199, 499, 997, 1999, @@ -213,8 +267,105 @@ static struct cdev *profile_cdev; #endif static dtrace_provider_id_t profile_id; static hrtime_t profile_interval_min = NANOSEC / 5000; /* 5000 hz */ -static int profile_aframes = 0; /* override */ +static int profile_aframes = PROF_ARTIFICIAL_FRAMES; + +#ifdef __FreeBSD__ +SYSCTL_DECL(_kern_dtrace); +SYSCTL_NODE(_kern_dtrace, OID_AUTO, profile, CTLFLAG_RD, 0, "DTrace profile parameters"); +SYSCTL_INT(_kern_dtrace_profile, OID_AUTO, aframes, CTLFLAG_RW, &profile_aframes, + 0, "Skipped frames for profile provider"); + +static sbintime_t +nsec_to_sbt(hrtime_t nsec) +{ + time_t sec; + + /* + * We need to calculate nsec * 2^32 / 10^9 + * Seconds and nanoseconds are split to avoid overflow. + */ + sec = nsec / NANOSEC; + nsec = nsec % NANOSEC; + return (((sbintime_t)sec << 32) | ((sbintime_t)nsec << 32) / NANOSEC); +} + +static hrtime_t +sbt_to_nsec(sbintime_t sbt) +{ + + return ((sbt >> 32) * NANOSEC + + (((uint32_t)sbt * (hrtime_t)NANOSEC) >> 32)); +} + +static void +profile_fire(void *arg) +{ + profile_probe_percpu_t *pcpu = arg; + profile_probe_t *prof = pcpu->profc_probe; + hrtime_t late; + struct trapframe *frame; + uintfptr_t pc, upc; + +#ifdef illumos + late = gethrtime() - pcpu->profc_expected; +#else + late = sbt_to_nsec(sbinuptime() - pcpu->profc_expected); +#endif + + pc = 0; + upc = 0; + + /* + * td_intr_frame can be unset if this is a catch up event + * after waking up from idle sleep. + * This can only happen on a CPU idle thread. + */ + frame = curthread->td_intr_frame; + if (frame != NULL) { + if (TRAPF_USERMODE(frame)) + upc = TRAPF_PC(frame); + else + pc = TRAPF_PC(frame); + } + dtrace_probe(prof->prof_id, pc, upc, late, 0, 0); + + pcpu->profc_expected += pcpu->profc_interval; + callout_schedule_sbt_curcpu(&pcpu->profc_cyclic, + pcpu->profc_expected, 0, C_DIRECT_EXEC | C_ABSOLUTE); +} + +static void +profile_tick(void *arg) +{ + profile_probe_t *prof = arg; + struct trapframe *frame; + uintfptr_t pc, upc; + + pc = 0; + upc = 0; + + /* + * td_intr_frame can be unset if this is a catch up event + * after waking up from idle sleep. + * This can only happen on a CPU idle thread. + */ + frame = curthread->td_intr_frame; + if (frame != NULL) { + if (TRAPF_USERMODE(frame)) + upc = TRAPF_PC(frame); + else + pc = TRAPF_PC(frame); + } + dtrace_probe(prof->prof_id, pc, upc, 0, 0, 0); + + prof->prof_expected += prof->prof_interval; + callout_schedule_sbt(&prof->prof_cyclic, + prof->prof_expected, 0, C_DIRECT_EXEC | C_ABSOLUTE); +} + +#endif +#ifdef __NetBSD__ static void profile_fire(void *arg) { @@ -240,6 +391,8 @@ profile_tick(void *arg) c->cpu_profile_upc, 0, 0, 0); } +#endif + static void profile_create(hrtime_t interval, char *name, int kind) { @@ -259,24 +412,29 @@ profile_create(hrtime_t interval, char * prof = kmem_zalloc(sizeof (profile_probe_t), KM_SLEEP); (void) strcpy(prof->prof_name, name); +#ifdef __FreeBSD__ + prof->prof_interval = nsec_to_sbt(interval); + callout_init(&prof->prof_cyclic, 1); +#else prof->prof_interval = interval; prof->prof_cyclic = CYCLIC_NONE; +#endif prof->prof_kind = kind; prof->prof_id = dtrace_probe_create(profile_id, NULL, NULL, name, - profile_aframes ? profile_aframes : PROF_ARTIFICIAL_FRAMES, prof); + profile_aframes, prof); } /*ARGSUSED*/ static void -profile_provide(void *arg, const dtrace_probedesc_t *desc) +profile_provide(void *arg, dtrace_probedesc_t *desc) { int i, j, rate, kind; hrtime_t val = 0, mult = 1, len = 0; char *name, *suffix = NULL; const struct { - const char *prefix; + char *prefix; int kind; } types[] = { { PROF_PREFIX_PROFILE, PROF_PROFILE }, @@ -285,7 +443,7 @@ profile_provide(void *arg, const dtrace_ }; const struct { - const char *name; + char *name; hrtime_t mult; } suffixes[] = { { "ns", NANOSEC / NANOSEC }, @@ -333,7 +491,7 @@ profile_provide(void *arg, const dtrace_ return; } - name = (char *)desc->dtpd_name; + name = desc->dtpd_name; for (i = 0; types[i].prefix != NULL; i++) { len = strlen(types[i].prefix); @@ -405,13 +563,19 @@ profile_destroy(void *arg, dtrace_id_t i { profile_probe_t *prof = parg; +#ifdef __FreeBSD__ + ASSERT(!callout_active(&prof->prof_cyclic) && prof->prof_pcpus == NULL); +#else ASSERT(prof->prof_cyclic == CYCLIC_NONE); +#endif kmem_free(prof, sizeof (profile_probe_t)); ASSERT(profile_total >= 1); atomic_add_32(&profile_total, -1); } +#ifndef __FreeBSD__ + /*ARGSUSED*/ static void profile_online(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when) @@ -488,6 +652,81 @@ profile_disable(void *arg, dtrace_id_t i prof->prof_cyclic = CYCLIC_NONE; } +#else + +static void +profile_enable_omni(profile_probe_t *prof) +{ + profile_probe_percpu_t *pcpu; + int cpu; + + prof->prof_pcpus = kmem_zalloc((mp_maxid + 1) * sizeof(pcpu), KM_SLEEP); + CPU_FOREACH(cpu) { + pcpu = kmem_zalloc(sizeof(profile_probe_percpu_t), KM_SLEEP); + prof->prof_pcpus[cpu] = pcpu; + pcpu->profc_probe = prof; + pcpu->profc_expected = sbinuptime() + prof->prof_interval; + pcpu->profc_interval = prof->prof_interval; + callout_init(&pcpu->profc_cyclic, 1); + callout_reset_sbt_on(&pcpu->profc_cyclic, + pcpu->profc_expected, 0, profile_fire, pcpu, + cpu, C_DIRECT_EXEC | C_ABSOLUTE); + } +} + +static void +profile_disable_omni(profile_probe_t *prof) +{ + profile_probe_percpu_t *pcpu; + int cpu; + + ASSERT(prof->prof_pcpus != NULL); + CPU_FOREACH(cpu) { + pcpu = prof->prof_pcpus[cpu]; + ASSERT(pcpu->profc_probe == prof); + ASSERT(callout_active(&pcpu->profc_cyclic)); + callout_stop(&pcpu->profc_cyclic); + callout_drain(&pcpu->profc_cyclic); + kmem_free(pcpu, sizeof(profile_probe_percpu_t)); + } + kmem_free(prof->prof_pcpus, (mp_maxid + 1) * sizeof(pcpu)); + prof->prof_pcpus = NULL; +} + +/* ARGSUSED */ +static void +profile_enable(void *arg, dtrace_id_t id, void *parg) +{ + profile_probe_t *prof = parg; + + if (prof->prof_kind == PROF_TICK) { + prof->prof_expected = sbinuptime() + prof->prof_interval; + callout_reset_sbt(&prof->prof_cyclic, + prof->prof_expected, 0, profile_tick, prof, + C_DIRECT_EXEC | C_ABSOLUTE); + } else { + ASSERT(prof->prof_kind == PROF_PROFILE); + profile_enable_omni(prof); + } +} + +/* ARGSUSED */ +static void +profile_disable(void *arg, dtrace_id_t id, void *parg) +{ + profile_probe_t *prof = parg; + + if (prof->prof_kind == PROF_TICK) { + ASSERT(callout_active(&prof->prof_cyclic)); + callout_stop(&prof->prof_cyclic); + callout_drain(&prof->prof_cyclic); + } else { + ASSERT(prof->prof_kind == PROF_PROFILE); + profile_disable_omni(prof); + } +} +#endif + static void profile_load(void *dummy) { Index: src/external/cddl/osnet/dev/sdt/sdt.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/sdt/sdt.c,v retrieving revision 1.18 diff -u -p -r1.18 sdt.c --- src/external/cddl/osnet/dev/sdt/sdt.c 7 Jan 2017 21:39:52 -0000 1.18 +++ src/external/cddl/osnet/dev/sdt/sdt.c 20 Apr 2017 13:42:05 -0000 @@ -20,7 +20,7 @@ * * Portions Copyright 2006-2008 John Birrell jb@freebsd.org * - * $FreeBSD: head/sys/cddl/dev/sdt/sdt.c 285703 2015-07-19 22:14:09Z markj $ + * $FreeBSD: head/sys/cddl/dev/sdt/sdt.c 297771 2016-04-10 01:24:27Z markj $ * */ @@ -42,6 +42,7 @@ __KERNEL_RCSID(0, "$NetBSD: sdt.c,v 1.18 2017/01/07 21:39:52 christos Exp $"); #include +#include #include #include @@ -72,7 +73,7 @@ __KERNEL_RCSID(0, "$NetBSD: sdt.c,v 1.18 /* DTrace methods. */ static void sdt_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *); -static void sdt_provide_probes(void *, const dtrace_probedesc_t *); +static void sdt_provide_probes(void *, dtrace_probedesc_t *); static void sdt_destroy(void *, dtrace_id_t, void *); static int sdt_enable(void *, dtrace_id_t, void *); static void sdt_disable(void *, dtrace_id_t, void *); @@ -115,7 +116,7 @@ static dtrace_pops_t sdt_pops = { static int sdt_open(dev_t dev, int flags, int mode, struct lwp *l) { - return (0); + return 0; } static const struct cdevsw sdt_cdevsw = { @@ -137,8 +138,8 @@ static const struct cdevsw sdt_cdevsw = static TAILQ_HEAD(, sdt_provider) sdt_prov_list; #ifdef __FreeBSD__ -eventhandler_tag sdt_kld_load_tag; -eventhandler_tag sdt_kld_unload_try_tag; +static eventhandler_tag sdt_kld_load_tag; +static eventhandler_tag sdt_kld_unload_try_tag; #endif #ifdef __NetBSD__ @@ -191,6 +192,12 @@ sdt_create_probe(struct sdt_probe *probe char *to; size_t len; + if (probe->version != (int)sizeof(*probe)) { + printf("ignoring probe %p, version %u expected %u\n", + probe, probe->version, (int)sizeof(*probe)); + return; + } + TAILQ_FOREACH(prov, &sdt_prov_list, prov_entry) if (strcmp(prov->name, probe->prov->name) == 0) break; @@ -214,6 +221,8 @@ sdt_create_probe(struct sdt_probe *probe * in the C compiler, so we have to respect const vs non-const. */ strlcpy(func, probe->func, sizeof(func)); + if (func[0] == '\0') + strcpy(func, "none"); from = probe->name; to = name; @@ -239,7 +248,7 @@ sdt_create_probe(struct sdt_probe *probe * requires one of provide_probes and provide_module to be defined. */ static void -sdt_provide_probes(void *arg, const dtrace_probedesc_t *desc) +sdt_provide_probes(void *arg, dtrace_probedesc_t *desc) { } @@ -248,10 +257,6 @@ sdt_enable(void *arg __unused, dtrace_id { struct sdt_probe *probe = parg; -#ifdef SDT_DEBUG - printf("sdt: %s\n", __func__); -#endif - probe->id = id; #ifdef __FreeBSD__ probe->sdtp_lf->nenabled++; @@ -268,13 +273,6 @@ sdt_disable(void *arg __unused, dtrace_i #ifdef __FreeBSD__ SDT_KASSERT(probe->sdtp_lf->nenabled > 0, ("no probes enabled")); -#endif - -#ifdef SDT_DEBUG - printf("sdt: %s\n", __func__); -#endif - -#ifdef __FreeBSD__ if (strcmp(probe->prov->name, "lockstat") == 0) lockstat_enabled--; probe->sdtp_lf->nenabled--; @@ -288,16 +286,6 @@ sdt_getargdesc(void *arg, dtrace_id_t id struct sdt_argtype *argtype; struct sdt_probe *probe = parg; -#ifdef SDT_DEBUG - printf("sdt: %s probe %d\n", __func__, id); - printf("%s: probe %d (%s:%s:%s:%s).%d\n", - __func__, id, - probe->provider, - probe->module, - probe->function, - probe->name, - desc->dtargd_ndx); -#endif if (desc->dtargd_ndx >= probe->n_args) { desc->dtargd_ndx = DTRACE_ARGNONE; return; @@ -488,12 +476,19 @@ sdt_load(void) { TAILQ_INIT(&sdt_prov_list); - sdt_init(dtrace_probe); #ifdef __FreeBSD__ + sdt_probe_func = dtrace_probe; + + sdt_kld_load_tag = EVENTHANDLER_REGISTER(kld_load, sdt_kld_load, NULL, + EVENTHANDLER_PRI_ANY); + sdt_kld_unload_try_tag = EVENTHANDLER_REGISTER(kld_unload_try, + sdt_kld_unload_try, NULL, EVENTHANDLER_PRI_ANY); + /* Pick up probes from the kernel and already-loaded linker files. */ linker_file_foreach(sdt_linker_file_cb, NULL); #endif #ifdef __NetBSD__ + sdt_init(dtrace_probe); sdt_link_set_load(); #endif } @@ -504,53 +499,51 @@ sdt_unload(void) struct sdt_provider *prov, *tmp; int ret; - sdt_exit(); +#ifdef __FreeBSD__ + EVENTHANDLER_DEREGISTER(kld_load, sdt_kld_load_tag); + EVENTHANDLER_DEREGISTER(kld_unload_try, sdt_kld_unload_try_tag); + + sdt_probe_func = sdt_probe_stub; +#endif #ifdef __NetBSD__ + sdt_exit(); + sdt_link_set_unload(); #endif TAILQ_FOREACH_SAFE(prov, &sdt_prov_list, prov_entry, tmp) { ret = dtrace_unregister(prov->id); if (ret != 0) - return ret; + return (ret); TAILQ_REMOVE(&sdt_prov_list, prov, prov_entry); free(__UNCONST(prov->name), M_SDT); free(prov, M_SDT); } - return 0; + return (0); } #ifdef __FreeBSD__ static int sdt_modevent(module_t mod __unused, int type, void *data __unused) { - int error = 0; switch (type) { case MOD_LOAD: - sdt_load(); - break; - case MOD_UNLOAD: - error = sdt_unload(); - break; - case MOD_SHUTDOWN: - break; - + return (0); default: - error = EOPNOTSUPP; - break; + return (EOPNOTSUPP); } - - return (error); } +SYSINIT(sdt_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, sdt_load, NULL); +SYSUNINIT(sdt_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, sdt_unload, NULL); + DEV_MODULE(sdt, sdt_modevent, NULL); MODULE_VERSION(sdt, 1); MODULE_DEPEND(sdt, dtrace, 1, 1, 1); -MODULE_DEPEND(sdt, opensolaris, 1, 1, 1); #endif #ifdef __NetBSD__ Index: src/external/cddl/osnet/dev/systrace/systrace.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dev/systrace/systrace.c,v retrieving revision 1.9 diff -u -p -r1.9 systrace.c --- src/external/cddl/osnet/dev/systrace/systrace.c 7 Jan 2017 21:39:52 -0000 1.9 +++ src/external/cddl/osnet/dev/systrace/systrace.c 20 Apr 2017 14:31:12 -0000 @@ -22,8 +22,6 @@ * * Portions Copyright 2006-2008 John Birrell jb@freebsd.org * - * $FreeBSD: src/sys/cddl/dev/systrace/systrace.c,v 1.2.2.1 2009/08/03 08:13:06 kensmith Exp $ - * */ /* @@ -32,6 +30,9 @@ */ #include +/* __FBSDID("$FreeBSD: head/sys/cddl/dev/systrace/systrace.c 306220 2016-09-22 23:22:53Z markj $"); */ + +#include #include #include #include @@ -55,12 +56,82 @@ #include #include +#include "dtrace_cddl.h" #include "emultrace.h" #define CONCAT(x,y) __CONCAT(x,y) #define STRING(s) __STRING(s) +#ifdef __FreeBSD__ +#ifdef LINUX_SYSTRACE +#if defined(__amd64__) +#include +#include +#include +#include +#elif defined(__i386__) +#include +#include +#include +#include +#else +#error Only i386 and amd64 are supported. +#endif +#define MODNAME "linux" +extern struct sysent linux_sysent[]; +#define MAXSYSCALL LINUX_SYS_MAXSYSCALL +#define SYSCALLNAMES linux_syscallnames +#define SYSENT linux_sysent +#elif defined(LINUX32_SYSTRACE) +#if defined(__amd64__) +#include +#include +#include +#include +#else +#error Only amd64 is supported. +#endif +#define MODNAME "linux32" +extern struct sysent linux32_sysent[]; +#define MAXSYSCALL LINUX32_SYS_MAXSYSCALL +#define SYSCALLNAMES linux32_syscallnames +#define SYSENT linux32_sysent +#elif defined(FREEBSD32_SYSTRACE) +/* + * The syscall arguments are processed into a DTrace argument array + * using a generated function. See sys/kern/makesyscalls.sh. + */ +#include +#include +#include +#include +extern const char *freebsd32_syscallnames[]; +#define MODNAME "freebsd32" +#define MAXSYSCALL FREEBSD32_SYS_MAXSYSCALL +#define SYSCALLNAMES freebsd32_syscallnames +#define SYSENT freebsd32_sysent +#else +/* + * The syscall arguments are processed into a DTrace argument array + * using a generated function. See sys/kern/makesyscalls.sh. + */ +#include +#include +#define MODNAME "freebsd" +#define MAXSYSCALL SYS_MAXSYSCALL +#define SYSCALLNAMES syscallnames +#define SYSENT sysent +#define NATIVE_ABI +#endif + +#define PROVNAME "syscall" +#define DEVNAME "dtrace/systrace/" MODNAME +#endif /* __FreeBSD__ */ + +#ifdef __NetBSD__ +#include + #ifndef NATIVE extern const char * const CONCAT(emulname,_syscallnames)[]; extern const char * const CONCAT(alt,CONCAT(emulname,_syscallnames))[]; @@ -87,6 +158,8 @@ extern const char * const altsyscallname #define MODCMD CONCAT(MODNAME,_modcmd) #define EMUL CONCAT(emul_,emulname) extern struct emul EMUL; +#define curthread curlwp +#endif /* __NetBSD__ */ #define SYSTRACE_ARTIFICIAL_FRAMES 1 @@ -102,12 +175,20 @@ extern struct emul EMUL; static int systrace_unload(void); static void systrace_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *); -static void systrace_provide(void *, const dtrace_probedesc_t *); +static uint64_t systrace_getargval(void *, dtrace_id_t, void *, int, int); +static void systrace_provide(void *, dtrace_probedesc_t *); static void systrace_destroy(void *, dtrace_id_t, void *); static int systrace_enable(void *, dtrace_id_t, void *); static void systrace_disable(void *, dtrace_id_t, void *); static void systrace_load(void *); +#ifdef __FreeBSD__ +static union { + const char **p_constnames; + char **pp_syscallnames; +} uglyhack = { SYSCALLNAMES }; +#endif + static dtrace_pattr_t systrace_attr = { { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, @@ -124,7 +205,7 @@ static dtrace_pops_t systrace_pops = { NULL, NULL, systrace_getargdesc, - NULL, + systrace_getargval, NULL, systrace_destroy }; @@ -138,6 +219,57 @@ static dtrace_provider_id_t systrace_id; * array the syscall comes from. It could be a standard syscall or a * compat syscall from something like Linux. */ +#ifdef __FreeBSD__ +#ifdef NATIVE_ABI +static void +systrace_probe(struct syscall_args *sa, enum systrace_probe_t type, int retval) +{ + uint64_t uargs[nitems(sa->args)]; + dtrace_id_t id; + int n_args, sysnum; + + sysnum = sa->code; + memset(uargs, 0, sizeof(uargs)); + + if (type == SYSTRACE_ENTRY) { + if ((id = sa->callp->sy_entry) == DTRACE_IDNONE) + return; + + if (sa->callp->sy_systrace_args_func != NULL) + /* + * Convert the syscall parameters using the registered + * function. + */ + (*sa->callp->sy_systrace_args_func)(sysnum, sa->args, + uargs, &n_args); + else + /* + * Use the built-in system call argument conversion + * function to translate the syscall structure fields + * into the array of 64-bit values that DTrace expects. + */ + systrace_args(sysnum, sa->args, uargs, &n_args); + /* + * Save probe arguments now so that we can retrieve them if + * the getargval method is called from further down the stack. + */ + curthread->t_dtrace_systrace_args = uargs; + } else { + if ((id = sa->callp->sy_return) == DTRACE_IDNONE) + return; + + curthread->t_dtrace_systrace_args = NULL; + /* Set arg0 and arg1 as the return value of this syscall. */ + uargs[0] = uargs[1] = retval; + } + + /* Process the probe using the converted argments. */ + dtrace_probe(id, uargs[0], uargs[1], uargs[2], uargs[3], uargs[4]); +} +#endif /* NATIVE_ABI */ +#endif /* __FreeBSD__ */ + +#ifdef __NetBSD__ static void systrace_probe(uint32_t id, register_t sysnum, const struct sysent *se, const void *params, const register_t *ret, int error) @@ -160,24 +292,47 @@ systrace_probe(uint32_t id, register_t s /* XXX: fix for more arguments! */ dtrace_probe(id, uargs[0], uargs[1], uargs[2], uargs[3], uargs[4]); } +#endif static void systrace_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc) { int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); + if (SYSTRACE_ISENTRY((uintptr_t)parg)) - systrace_entry_setargdesc(sysnum, desc->dtargd_ndx, + systrace_entry_setargdesc(sysnum, desc->dtargd_ndx, desc->dtargd_native, sizeof(desc->dtargd_native)); else - systrace_return_setargdesc(sysnum, desc->dtargd_ndx, + systrace_return_setargdesc(sysnum, desc->dtargd_ndx, desc->dtargd_native, sizeof(desc->dtargd_native)); if (desc->dtargd_native[0] == '\0') desc->dtargd_ndx = DTRACE_ARGNONE; } +static uint64_t +systrace_getargval(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) +{ + uint64_t *uargs; + + uargs = curthread->t_dtrace_systrace_args; + if (uargs == NULL) + /* This is a return probe. */ + return (0); +#ifdef __FreeBSD__ + if (argno >= nitems(((struct syscall_args *)NULL)->args)) + return (0); +#endif +#ifdef __NetBSD__ + if (argno >= SYS_MAXSYSARGS) + return (0); +#endif + + return (uargs[argno]); +} + static void -systrace_provide(void *arg, const dtrace_probedesc_t *desc) +systrace_provide(void *arg, dtrace_probedesc_t *desc) { int i; @@ -185,6 +340,20 @@ systrace_provide(void *arg, const dtrace return; for (i = 0; i < MAXSYSCALL; i++) { +#ifdef __FreeBSD__ + if (dtrace_probe_lookup(systrace_id, MODNAME, + uglyhack.pp_syscallnames[i], "entry") != 0) + continue; + + (void)dtrace_probe_create(systrace_id, MODNAME, + uglyhack.pp_syscallnames[i], "entry", + SYSTRACE_ARTIFICIAL_FRAMES, + (void *)((uintptr_t)SYSTRACE_ENTRY(i))); + (void)dtrace_probe_create(systrace_id, MODNAME, + uglyhack.pp_syscallnames[i], "return", + SYSTRACE_ARTIFICIAL_FRAMES, + (void *)((uintptr_t)SYSTRACE_RETURN(i))); +#else const char *name = ALTSYSCALLNAMES[i] ? ALTSYSCALLNAMES[i] : SYSCALLNAMES[i]; if (dtrace_probe_lookup(systrace_id, NULL, name, "entry") != 0) @@ -196,6 +365,7 @@ systrace_provide(void *arg, const dtrace (void) dtrace_probe_create(systrace_id, NULL, name, "return", SYSTRACE_ARTIFICIAL_FRAMES, (void *)(intptr_t)SYSTRACE_RETURN(i)); +#endif } } @@ -222,10 +392,16 @@ systrace_enable(void *arg, dtrace_id_t i { int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg); +#ifdef __FreeBSD__ + if (SYSENT[sysnum].sy_systrace_args_func == NULL) + SYSENT[sysnum].sy_systrace_args_func = systrace_args; +#endif + if (SYSTRACE_ISENTRY((uintptr_t)parg)) SYSENT[sysnum].sy_entry = id; else SYSENT[sysnum].sy_return = id; + return 0; } @@ -241,11 +417,16 @@ systrace_disable(void *arg, dtrace_id_t static void systrace_load(void *dummy) { - if (dtrace_register(PROVNAME, &systrace_attr, DTRACE_PRIV_USER, - NULL, &systrace_pops, NULL, &systrace_id) != 0) + if (dtrace_register(PROVNAME, &systrace_attr, DTRACE_PRIV_USER, NULL, + &systrace_pops, NULL, &systrace_id) != 0) return; +#ifdef NATIVE_ABI + systrace_probe_func = systrace_probe; +#endif +#ifdef __NetBSD__ EMUL.e_dtrace_syscall = systrace_probe; +#endif } @@ -254,14 +435,80 @@ systrace_unload() { int error; +#ifdef NATIVE_ABI + systrace_probe_func = NULL; +#endif +#ifdef __NetBSD__ + EMUL.e_dtrace_syscall = NULL; +#endif + if ((error = dtrace_unregister(systrace_id)) != 0) return (error); - EMUL.e_dtrace_syscall = NULL; - return error; } +#ifdef __FreeBSD__ +static int +systrace_modevent(module_t mod __unused, int type, void *data __unused) +{ + int error; + + error = 0; + switch (type) { + case MOD_LOAD: + break; + + case MOD_UNLOAD: + break; + + case MOD_SHUTDOWN: + break; + + default: + error = EOPNOTSUPP; + break; + + } + return (error); +} + +SYSINIT(systrace_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, + systrace_load, NULL); +SYSUNINIT(systrace_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, + systrace_unload, NULL); + +#ifdef LINUX_SYSTRACE +DEV_MODULE(systrace_linux, systrace_modevent, NULL); +MODULE_VERSION(systrace_linux, 1); +#ifdef __amd64__ +MODULE_DEPEND(systrace_linux, linux64, 1, 1, 1); +#else +MODULE_DEPEND(systrace_linux, linux, 1, 1, 1); +#endif +MODULE_DEPEND(systrace_linux, dtrace, 1, 1, 1); +MODULE_DEPEND(systrace_linux, opensolaris, 1, 1, 1); +#elif defined(LINUX32_SYSTRACE) +DEV_MODULE(systrace_linux32, systrace_modevent, NULL); +MODULE_VERSION(systrace_linux32, 1); +MODULE_DEPEND(systrace_linux32, linux, 1, 1, 1); +MODULE_DEPEND(systrace_linux32, dtrace, 1, 1, 1); +MODULE_DEPEND(systrace_linux32, opensolaris, 1, 1, 1); +#elif defined(FREEBSD32_SYSTRACE) +DEV_MODULE(systrace_freebsd32, systrace_modevent, NULL); +MODULE_VERSION(systrace_freebsd32, 1); +MODULE_DEPEND(systrace_freebsd32, dtrace, 1, 1, 1); +MODULE_DEPEND(systrace_freebsd32, opensolaris, 1, 1, 1); +#else +DEV_MODULE(systrace, systrace_modevent, NULL); +MODULE_VERSION(systrace, 1); +MODULE_DEPEND(systrace, dtrace, 1, 1, 1); +MODULE_DEPEND(systrace, opensolaris, 1, 1, 1); +#endif +#endif /* __FreeBSD__ */ + +#ifdef __NetBSD__ + static int MODCMD(modcmd_t cmd, void *data) { @@ -282,3 +529,5 @@ MODCMD(modcmd_t cmd, void *data) } MODULE(MODULE_CLASS_MISC, MODNAME, MODDEP) + +#endif /* __NetBSD__ */ Index: src/external/cddl/osnet/dist/cmd/dtrace/dtrace.1 =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dist/cmd/dtrace/dtrace.1,v retrieving revision 1.3 diff -u -p -r1.3 dtrace.1 --- src/external/cddl/osnet/dist/cmd/dtrace/dtrace.1 12 May 2017 21:01:36 -0000 1.3 +++ src/external/cddl/osnet/dist/cmd/dtrace/dtrace.1 17 May 2017 00:00:52 -0000 @@ -2,7 +2,7 @@ .\" CDDL HEADER START .\" .\" The contents of this file are subject to the terms of the -.\" Common Development and Distribution License (the "License"). +.\" Common Development and Distribution License (the "License"). .\" You may not use this file except in compliance with the License. .\" .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE @@ -18,653 +18,660 @@ .\" .\" CDDL HEADER END .\" Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved. -.TH dtrace 1 "5 Sep 2006" "SunOS 5.11" "System Administration Commands" -.SH NAME -dtrace \- DTrace dynamic tracing compiler and tracing utility -.SH SYNOPSIS -.LP -.nf -\fBdtrace\fR [\fB-32\fR | \fB-64\fR] [\fB-aACeFGHhlqSvVwZ\fR] [\fB-b\fR \fIbufsz\fR] [\fB-c\fR \fIcmd\fR] - [\fB-D\fR \fIname\fR [\fI=value\fR]] [\fB-I\fR \fIpath\fR] [\fB-L\fR \fIpath\fR] [\fB-o\fR \fIoutput\fR] - [\fB-s\fR \fIscript\fR] [\fB-U\fR \fIname\fR] [\fB-x\fR \fIarg\fR [\fI=val\fR]] - [\fB-X\fR a | c | s | t] [\fB-p\fR \fIpid\fR] - [\fB-P\fR \fIprovider\fR [[\fIpredicate\fR] \fIaction\fR]] - [\fB-m\fR [\fIprovider:\fR] \fImodule\fR [[\fIpredicate\fR] \fIaction\fR]] - [\fB-f\fR [[\fIprovider:\fR] \fImodule:\fR] \fIfunction\fR [[\fIpredicate\fR] \fIaction\fR]] - [\fB-n\fR [[[\fIprovider:\fR] \fImodule:\fR] \fIfunction:\fR] \fIname\fR [[\fIpredicate\fR] \fIaction\fR]] - [\fB-i\fR \fIprobe-id\fR [[\fIpredicate\fR] \fIaction\fR]] -.fi - -.SH DESCRIPTION -.sp -.LP -DTrace is a comprehensive dynamic tracing framework for the Solaris Operating System. DTrace provides a powerful infrastructure that permits administrators, developers, and service personnel to concisely answer arbitrary questions about the behavior of the operating system and user programs. -.sp -.LP -The \fISolaris Dynamic Tracing Guide\fR describes how to use DTrace to observe, debug, and tune system behavior. Refer to this book for a detailed description of DTrace features, including the bundled DTrace observability -tools, instrumentation providers, and the D programming language. -.sp -.LP -The \fBdtrace\fR command provides a generic interface to the essential services provided by the DTrace facility, including: -.RS +4 -.TP -.ie t \(bu -.el o +.\" +.\" $FreeBSD: head/cddl/contrib/opensolaris/cmd/dtrace/dtrace.1 281705 2015-04-18 21:00:36Z markj $ +.\" +.Dd April 18, 2015 +.Dt DTRACE 1 +.Os +.Sh NAME +.Nm dtrace +.Nd dynamic tracing compiler and tracing utility +.Sh SYNOPSIS +.Nm +.Op Fl 32 | Fl 64 +.Op Fl aACeFGhHlqSvVwZ +.Op Fl b Ar bufsz +.Op Fl c Ar cmd +.Op Fl D Ar name Op Ns = Ns value +.Op Fl I Ar path +.Op Fl L Ar path +.Op Fl o Ar output +.Op Fl s Ar script +.Op Fl U Ar name +.Op Fl x Ar arg Op Ns = Ns value +.Op Fl X Cm a | c | s | t +.Op Fl p Ar pid +.Op Fl P Ar provider Oo Oo Ar predicate Oc Ar action Oc +.Op Fl m Oo Ar provider : Oc Ar module Oo Oo Ar predicate Oc Ar action Oc +.Op Fl f Oo Oo Ar provider : Oc Ar module : Oc Ar function Oo Oo Ar predicate \ + Oc Ar action Oc +.Op Fl n Oo Oo Oo Ar provider : Oc Ar module : Oc Ar function : Oc Ar name \ + Oo Oo Ar predicate Oc Ar action Oc +.Op Fl i Ar probe-id Oo Oo Ar predicate Oc Ar action Oc +.Sh DESCRIPTION +DTrace is a comprehensive dynamic tracing framework ported from Solaris. +DTrace provides a powerful infrastructure that permits administrators, +developers, and service personnel to concisely answer arbitrary questions about +the behavior of the operating system and user programs. +.Pp +The +.Nm +command provides a generic interface to the essential services provided by the +DTrace facility, including: +.Bl -bullet -offset indent +.It Options that list the set of probes and providers currently published by DTrace -.RE -.RS +4 -.TP -.ie t \(bu -.el o -Options that enable probes directly using any of the probe description specifiers (provider, module, function, name) -.RE -.RS +4 -.TP -.ie t \(bu -.el o -Options that run the D compiler and compile one or more D program files or programs written directly on the command line -.RE -.RS +4 -.TP -.ie t \(bu -.el o +.It +Options that enable probes directly using any of the probe description +specifiers (provider, module, function, name) +.It +Options that run the D compiler and compile one or more D program files or +programs written directly on the command line +.It Options that generate anonymous tracing programs -.RE -.RS +4 -.TP -.ie t \(bu -.el o +.It Options that generate program stability reports -.RE -.RS +4 -.TP -.ie t \(bu -.el o -Options that modify DTrace tracing and buffering behavior and enable additional D compiler features -.RE -.sp -.LP -You can use \fBdtrace\fR to create D scripts by using it in a \fB#!\fR declaration to create an interpreter file. You can also use \fBdtrace\fR to attempt to compile D programs and determine their properties without actually enabling tracing using the \fB-e\fR option. See \fBOPTIONS\fR. See the \fISolaris Dynamic Tracing Guide\fR for detailed examples of how to use the \fBdtrace\fR utility to perform these tasks. -.SH OPTIONS -.sp -.LP -The arguments accepted by the \fB-P\fR, \fB-m\fR, \fB-f\fR, \fB-n\fR, and \fB-i\fR options can include an optional D language \fIpredicate\fR enclosed in slashes \fB//\fR and optional D language \fIaction\fR statement list enclosed in braces \fB{}\fR. D program code specified on the command line must be appropriately quoted to avoid interpretation of meta-characters by the shell. -.sp -.LP +.It +Options that modify DTrace tracing and buffering behavior and enable +additional D compiler features +.El +.Pp +You can use +.Nm +to create D scripts by using it in a shebang declaration to create an +interpreter file. +You can also use +.Nm +to attempt to compile D programs and determine their properties without +actually enabling traces using the +.Fl e +option. +.Sh OPTIONS +The arguments accepted by the +.Fl P , +.Fl m , +.Fl f , +.Fl n , +and +.Fl i +options can include an optional D language +.Ar predicate +enclosed in slashes and an optional D language +.Ar action +statement list enclosed in braces. +D program code specified on the command line must be appropriately quoted to +avoid interpretation of meta-characters by the shell. +.Pp The following options are supported: -.sp -.ne 2 -.mk -.na -\fB\fB-32\fR | \fB-64\fR\fR -.ad -.sp .6 -.RS 4n -The D compiler produces programs using the native data model of the operating system kernel. You can use the \fBisainfo\fR \fB-b\fR command to determine the current operating system data model. If the \fB-32\fR option is specified, \fBdtrace\fR forces -the D compiler to compile a D program using the 32-bit data model. If the \fB-64\fR option is specified, \fBdtrace\fR forces the D compiler to compile a D program using the 64-bit data model. These options are typically not required as \fBdtrace\fR selects the -native data model as the default. The data model affects the sizes of integer types and other language properties. D programs compiled for either data model can be executed on both 32-bit and 64-bit kernels. The \fB-32\fR and \fB-64\fR options also determine the ELF file format -(ELF32 or ELF64) produced by the \fB-G\fR option. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-a\fR\fR -.ad -.sp .6 -.RS 4n -Claim anonymous tracing state and display the traced data. You can combine the \fB-a\fR option with the \fB-e\fR option to force \fBdtrace\fR to exit immediately after consuming the anonymous tracing state rather than continuing to wait for new -data. See the \fISolaris Dynamic Tracing Guide\fR for more information about anonymous tracing. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-A\fR\fR -.ad -.sp .6 -.RS 4n -Generate \fBdriver.conf\fR(4) directives for anonymous tracing. This option constructs a set of \fBdtrace\fR(7D) configuration file directives to enable the specified probes for anonymous tracing and then exits. By default, \fBdtrace\fR attempts to store the directives to the file \fB/kernel/drv/dtrace.conf\fR. You can modify this behavior if you use the \fB-o\fR option to specify an alternate output file. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-b\fR \fIbufsz\fR\fR -.ad -.sp .6 -.RS 4n -Set principal trace buffer size (\fIbufsz\fR). The trace buffer size can include any of the size suffixes \fBk\fR, \fBm\fR, \fBg\fR, or \fBt\fR. If the buffer space cannot be allocated, \fBdtrace\fR attempts -to reduce the buffer size or exit depending on the setting of the \fBbufresize\fR property. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-c\fR \fIcmd\fR\fR -.ad -.sp .6 -.RS 4n -Run the specified command \fIcmd\fR and exit upon its completion. If more than one \fB-c\fR option is present on the command line, \fBdtrace\fR exits when all commands have exited, reporting the exit status for each child process as it -terminates. The process-ID of the first command is made available to any D programs specified on the command line or using the \fB-s\fR option through the \fB$target\fR macro variable. Refer to the \fISolaris Dynamic Tracing Guide\fR for more information -on macro variables. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-C\fR\fR -.ad -.sp .6 -.RS 4n -Run the C preprocessor \fBcpp\fR(1) over D programs before compiling them. You can pass options to the C preprocessor using the \fB-D\fR, \fB-U\fR, \fB-I\fR, and \fB-H\fR options. You can select the degree of C standard conformance if you use the \fB-X\fR option. For a description of the set of tokens defined by the D compiler when invoking the C preprocessor, see \fB-X\fR. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-D\fR \fIname\fR \fB[=\fR\fIvalue\fR\fB]\fR\fR -.ad -.sp .6 -.RS 4n -Define \fIname\fR when invoking \fBcpp\fR(1) (enabled using the \fB-C\fR option). If you specify the equals sign (\fB=\fR) -and additional \fIvalue\fR, the name is assigned the corresponding value. This option passes the \fB-D\fR option to each \fBcpp\fR invocation. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-e\fR\fR -.ad -.sp .6 -.RS 4n -Exit after compiling any requests and consuming anonymous tracing state (\fB-a\fR option) but prior to enabling any probes. You can combine this option with the \fB-a\fR option to print anonymous tracing data and exit. You can also combine this option with D -compiler options. This combination verifies that the programs compile without actually executing them and enabling the corresponding instrumentation. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-f\fR\fB[[\fR\fIprovider\fR\fB:]\fR\fImodule\fR\fB:]\fR\fIfunction\fR\fB[[\fR\fIpredicate\fR\fB]\fR\fIaction\fR\fB]]\fR\fR -.ad -.sp .6 -.RS 4n -Specify function name to trace or list (\fB-l\fR option). The corresponding argument can include any of the probe description forms \fIprovider:module:function\fR, \fImodule:function\fR, or \fIfunction\fR. -Unspecified probe description fields are left blank and match any probes regardless of the values in those fields. If no qualifiers other than \fIfunction\fR are specified in the description, all probes with the corresponding \fIfunction\fR are matched. -The \fB-f\fR argument can be suffixed with an optional D probe clause. You can specify more than one \fB-f\fR option on the command line at a time. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-F\fR\fR -.ad -.sp .6 -.RS 4n -Coalesce trace output by identifying function entry and return. Function entry probe reports are indented and their output is prefixed with \fB->\fR. Function return probe reports are unindented and their output is prefixed with \fB<-\fR\&. System call -entry probe reports are indented and their output is prefixed with \fB=>\fR. System call return probe reports are unindented and their output is prefixed with \fB<=\fR\&. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-G\fR\fR -.ad -.sp .6 -.RS 4n -Generate an ELF file containing an embedded DTrace program. The DTrace probes specified in the program are saved inside of a relocatable ELF object which can be linked into another program. If the \fB-o\fR option is present, the ELF file is saved using the pathname specified -as the argument for this operand. If the \fB-o\fR option is not present and the DTrace program is contained with a file whose name is \fB\fIfilename\fR.d\fR, then the ELF file is saved using the name \fB\fIfilename\fR.o\fR. -Otherwise the ELF file is saved using the name \fBd.out\fR. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-H\fR\fR -.ad -.sp .6 -.RS 4n -Print the pathnames of included files when invoking \fBcpp\fR(1) (enabled using the \fB-C\fR option). This option passes the \fB-H\fR option -to each \fBcpp\fR invocation, causing it to display the list of pathnames, one for each line, to \fBstderr\fR. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-h\fR\fR -.ad -.sp .6 -.RS 4n -Generate a header file containing macros that correspond to probes in the specified provider definitions. This option should be used to generate a header file that is included by other source files for later use with the \fB-G\fR option. If the \fB-o\fR option -is present, the header file is saved using the pathname specified as the argument for that option. If the \fB-o\fR option is not present and the DTrace program is contained with a file whose name is \fIfilename\fR\fB\&.d\fR, then the header file is saved -using the name \fIfilename\fR\fB\&.h\fR. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-i\fR \fIprobe-id\fR\fB[[\fR\fIpredicate\fR] \fIaction\fR\fB]\fR\fR -.ad -.sp .6 -.RS 4n -Specify probe identifier (\fIprobe-id\fR) to trace or list (\fB-l\fR option). You can specify probe IDs using decimal integers as shown by \fBdtrace\fR \fB-l\fR. The \fB-i\fR argument can be suffixed with an optional -D probe clause. You can specify more than one \fB-i\fR option at a time. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-I\fR \fIpath\fR\fR -.ad -.sp .6 -.RS 4n -Add the specified directory \fIpath\fR to the search path for \fB#include\fR files when invoking \fBcpp\fR(1) (enabled -using the \fB-C\fR option). This option passes the \fB-I\fR option to each \fBcpp\fR invocation. The specified \fIpath\fR is inserted into the search path ahead of the default directory list. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-L\fR \fIpath\fR\fR -.ad -.sp .6 -.RS 4n -Add the specified directory \fIpath\fR to the search path for DTrace libraries. DTrace libraries are used to contain common definitions that can be used when writing D programs. The specified \fIpath\fR is added after the default library -search path. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-l\fR\fR -.ad -.sp .6 -.RS 4n -List probes instead of enabling them. If the \fB-l\fR option is specified, \fBdtrace\fR produces a report of the probes matching the descriptions given using the \fB-P\fR, \fB-m\fR, \fB-f\fR, \fB-n\fR, \fB-i\fR, -and \fB-s\fR options. If none of these options are specified, this option lists all probes. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-m\fR [[\fIprovider:\fR] \fImodule:\fR [[\fIpredicate\fR] \fIaction\fR]]\fR -.ad -.sp .6 -.RS 4n -Specify module name to trace or list (\fB-l\fR option). The corresponding argument can include any of the probe description forms \fIprovider:module\fR or \fImodule\fR. Unspecified probe description fields are left blank and match -any probes regardless of the values in those fields. If no qualifiers other than \fImodule\fR are specified in the description, all probes with a corresponding \fImodule\fR are matched. The \fB-m\fR argument can be suffixed with an optional D -probe clause. More than one \fB-m\fR option can be specified on the command line at a time. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-n\fR [[[\fIprovider:\fR] \fImodule:\fR] \fIfunction:\fR] \fIname\fR [[\fIpredicate\fR] \fIaction\fR]\fR -.ad -.sp .6 -.RS 4n -Specify probe name to trace or list (\fB-l\fR option). The corresponding argument can include any of the probe description forms \fIprovider:module:function:name\fR, \fImodule:function:name\fR, \fIfunction:name\fR, -or \fIname\fR. Unspecified probe description fields are left blank and match any probes regardless of the values in those fields. If no qualifiers other than \fIname\fR are specified in the description, all probes with a corresponding \fIname\fR are -matched. The \fB-n\fR argument can be suffixed with an optional D probe clause. More than one \fB-n\fR option can be specified on the command line at a time. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-o\fR \fIoutput\fR\fR -.ad -.sp .6 -.RS 4n -Specify the \fIoutput\fR file for the \fB-A\fR , \fB-G\fR, and \fB-l\fR options, or for the traced data itself. If the \fB-A\fR option is present and \fB-o\fR is not present, the default output file is \fB/kernel/drv/dtrace.conf\fR. If the \fB-G\fR option is present and the \fB-s\fR option's argument is of the form \fB\fIfilename\fR.d\fR and \fB-o\fR is not present, the default output file is \fB\fIfilename\fR.o\fR. -Otherwise the default output file is \fBd.out\fR. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-p\fR \fIpid\fR\fR -.ad -.sp .6 -.RS 4n -Grab the specified process-ID \fIpid\fR, cache its symbol tables, and exit upon its completion. If more than one \fB-p\fR option is present on the command line, \fBdtrace\fR exits when all commands have exited, reporting the exit status -for each process as it terminates. The first process-ID is made available to any D programs specified on the command line or using the \fB-s\fR option through the \fB$target\fR macro variable. Refer to the \fISolaris Dynamic Tracing Guide\fR for -more information on macro variables. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-P\fR \fIprovider\fR \fB[[\fR\fIpredicate\fR\fB]\fR \fIaction\fR]\fR -.ad -.sp .6 -.RS 4n -Specify provider name to trace or list (\fB-l\fR option). The remaining probe description fields module, function, and name are left blank and match any probes regardless of the values in those fields. The \fB-P\fR argument can be suffixed with an optional D -probe clause. You can specify more than one \fB-P\fR option on the command line at a time. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-q\fR\fR -.ad -.sp .6 -.RS 4n -Set quiet mode. \fBdtrace\fR suppresses messages such as the number of probes matched by the specified options and D programs and does not print column headers, the CPU ID, the probe ID, or insert newlines into the output. Only data traced and formatted by D program -statements such as \fBtrace()\fR and \fBprintf()\fR is displayed to \fBstdout\fR. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-s\fR\fR -.ad -.sp .6 -.RS 4n -Compile the specified D program source file. If the \fB-e\fR option is present, the program is compiled but instrumentation is not enabled. If the \fB-l\fR option is present, the program is compiled and the set of probes matched by it is listed, but instrumentation -is not enabled. If none of \fB-e\fR, \fB-l\fR, \fB-G\fR, or \fB-A\fR are present, the instrumentation specified by the D program is enabled and tracing begins. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-S\fR\fR -.ad -.sp .6 -.RS 4n -Show D compiler intermediate code. The D compiler produces a report of the intermediate code generated for each D program to \fBstderr\fR. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-U\fR \fIname\fR\fR -.ad -.sp .6 -.RS 4n -Undefine the specified \fIname\fR when invoking \fBcpp\fR(1) (enabled using the \fB-C\fR option). This option passes the \fB-U\fR option to each \fBcpp\fR invocation. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-v\fR\fR -.ad -.sp .6 -.RS 4n -Set verbose mode. If the \fB-v\fR option is specified, \fBdtrace\fR produces a program stability report showing the minimum interface stability and dependency level for the specified D programs. DTrace stability levels are explained in further detail in the \fISolaris Dynamic Tracing Guide\fR. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-V\fR\fR -.ad -.sp .6 -.RS 4n -Report the highest D programming interface version supported by \fBdtrace\fR. The version information is printed to \fBstdout\fR and the \fBdtrace\fR command exits. Refer to the \fISolaris Dynamic Tracing Guide\fR for -more information about DTrace versioning features. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-w\fR\fR -.ad -.sp .6 -.RS 4n -Permit destructive actions in D programs specified using the \fB-s\fR, \fB-P\fR, \fB-m\fR, \fB-f\fR, \fB-n\fR, or \fB-i\fR options. If the \fB-w\fR option is not specified, \fBdtrace\fR does not -permit the compilation or enabling of a D program that contains destructive actions. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-x\fR \fIarg\fR [\fI=val\fR]\fR -.ad -.sp .6 -.RS 4n -Enable or modify a DTrace runtime option or D compiler option. The list of options is found in the \fISolaris Dynamic Tracing Guide\fR. Boolean options are enabled by specifying their name. Options with values are set by separating the option name and -value with an equals sign (\fB=\fR). -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-X\fR \fBa | c | s | t\fR\fR -.ad -.sp .6 -.RS 4n -Specify the degree of conformance to the ISO C standard that should be selected when invoking \fBcpp\fR(1) (enabled using the \fB-C\fR option). -The \fB-X\fR option argument affects the value and presence of the \fB__STDC__\fR macro depending upon the value of the argument letter. -.sp -The \fB-X\fR option supports the following arguments: -.sp -.ne 2 -.mk -.na -\fB\fBa\fR\fR -.ad -.RS 5n -.rt -Default. ISO C plus K&R compatibility extensions, with semantic changes required by ISO C. This is the default mode if \fB-X\fR is not specified. The predefined macro \fB__STDC__\fR has a value of 0 when \fBcpp\fR is invoked in conjunction -with the \fB-Xa\fR option. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fBc\fR\fR -.ad -.RS 5n -.rt -Conformance. Strictly conformant ISO C, without K&R C compatibility extensions. The predefined macro \fB__STDC__\fR has a value of 1 when \fBcpp\fR is invoked in conjunction with the \fB-Xc\fR option. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fBs\fR\fR -.ad -.RS 5n -.rt -K&R C only. The macro \fB__STDC__\fR is not defined when \fBcpp\fR is invoked in conjunction with the \fB-Xs\fR option. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fBt\fR\fR -.ad -.RS 5n -.rt -Transition. ISO C plus K&R C compatibility extensions, without semantic changes required by ISO C. The predefined macro \fB__STDC__\fR has a value of 0 when \fBcpp\fR is invoked in conjunction with the \fB-Xt\fR option. -.RE - -As the \fB-X\fR option only affects how the D compiler invokes the C preprocessor, the \fB-Xa\fR and \fB-Xt\fR options are equivalent from the perspective of D and both are provided only to ease re-use of settings from a C build environment. -.sp -Regardless of the \fB-X\fR mode, the following additional C preprocessor definitions are always specified and valid in all modes: -.RS +4 -.TP -.ie t \(bu -.el o -\fB__sun\fR -.RE -.RS +4 -.TP -.ie t \(bu -.el o -\fB__unix\fR -.RE -.RS +4 -.TP -.ie t \(bu -.el o -\fB__SVR4\fR -.RE -.RS +4 -.TP -.ie t \(bu -.el o -\fB__sparc\fR (on SPARC systems only) -.RE -.RS +4 -.TP -.ie t \(bu -.el o -\fB__sparcv9\fR (on SPARC systems only when 64-bit programs are compiled) -.RE -.RS +4 -.TP -.ie t \(bu -.el o -\fB__i386\fR (on x86 systems only when 32-bit programs are compiled) -.RE -.RS +4 -.TP -.ie t \(bu -.el o -\fB__amd64\fR (on x86 systems only when 64-bit programs are compiled) -.RE -.RS +4 -.TP -.ie t \(bu -.el o -\fB__\fI`uname -s`\fR_\fI`uname -r`\fR\fR (for example, \fB__SunOS_5_10\fR) -.RE -.RS +4 -.TP -.ie t \(bu -.el o -\fB__SUNW_D=1\fR -.RE -.RS +4 -.TP -.ie t \(bu -.el o -\fB__SUNW_D_VERSION=0x\fIMMmmmuuu\fR\fR -.sp -Where \fIMM\fR is the major release value in hexadecimal, \fImmm\fR is the minor release value in hexadecimal, and \fIuuu\fR is the -micro release value in hexadecimal. Refer to the \fISolaris Dynamic Tracing Guide\fR for more information about DTrace versioning. -.RE -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB-Z\fR\fR -.ad -.sp .6 -.RS 4n -Permit probe descriptions that match zero probes. If the \fB-Z\fR option is not specified, \fBdtrace\fR reports an error and exits if any probe descriptions specified in D program files (\fB-s\fR option) or on the command line (\fB-P\fR, \fB-m\fR, \fB-f\fR, \fB-n\fR, or \fB-i\fR options) contain descriptions that do not match any known probes. -.RE - -.SH OPERANDS -.sp -.LP -You can specify zero or more additional arguments on the \fBdtrace\fR command line to define a set of macro variables (\fB$1\fR, \fB$2\fR, and so forth). The additional arguments can be used in D programs specified using the \fB-s\fR option -or on the command line. The use of macro variables is described further in the \fISolaris Dynamic Tracing Guide\fR. -.SH EXIT STATUS -.sp -.LP -The following exit values are returned: -.sp -.ne 2 -.mk -.na -\fB0\fR -.ad -.RS 5n -.rt -Successful completion. -.sp -For D program requests, an exit status of \fB0\fR indicates that programs were successfully compiled, probes were successfully enabled, or anonymous state was successfully retrieved. \fBdtrace\fR returns \fB0\fR even if the specified tracing requests -encountered errors or drops. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB1\fR\fR -.ad -.RS 5n -.rt +.Bl -tag -width indent +.It Fl 32 | Fl 64 +The D compiler produces programs using the native data model of the operating +system kernel. +If the +.Fl 32 +option is specified, +.Nm +forces the D compiler to compile a D program using the 32-bit data model. +If the +.Fl 64 +option is specified, +.Nm +forces the D compiler to compile a D program using the 64-bit data model. +These options are typically not required as +.Nm +selects the native data model as the default. +The data model affects the sizes of integer types and other language properties. +D programs compiled for either data model can be executed on both 32-bit and +64-bit kernels. +The +.Fl 32 +and +.Fl 64 +options also determine the +.Xr elf 5 +file format (ELF32 or ELF64) produced by the +.Fl G +option. +.It Fl a +Claim anonymous tracing state and display the traced data. +You can combine the +.Fl a +option with the +.Fl e +option to force +.Nm +to exit immediately after consuming the anonymous tracing state rather than +continuing to wait for new data. +.It Fl A +Generate directives for anonymous tracing and write them to +.Pa /boot/dtrace.dof . +This option constructs a set of dtrace configuration file directives to enable +the specified probes for anonymous tracing and then exits. +By default, +.Nm +attempts to store the directives to the file +.Pa /boot/dtrace.dof . +This behavior can be modified using the +.Fl o +option to specify an alternate output file. +.It Fl b Ar bufsz +Set the principal trace buffer size to +.Ar bufsz . +The trace buffer size can include any of the size suffixes k, m, g, or t. +If the buffer space cannot be allocated, +.Nm dtrace +attempts to reduce the buffer size or exit depending on the setting of the +bufresize property. +.It Fl c Ar cmd +Run the specified command +.Ar cmd +and exit upon its completion. +If more than one +.Fl c +option is present on the command line, +.Nm dtrace +exits when all commands have exited, reporting the exit status for each child +process as it terminates. +The process ID of the first command is made available to any D programs +specified on the command line or using the +.Fl s +option through the +.Li $target +macro variable. +.It Fl C +Run the C preprocessor +.Xr cpp 1 +over D programs before compiling them. +You can pass options to the C preprocessor using the +.Fl D , +.Fl U , +.Fl I , +and +.Fl H +options. +You can select the degree of C standard conformance if you use the +.Fl X +option. +For a description of the set of tokens defined by the D compiler when invoking +the C preprocessor, see +.Fl X . +.It Fl D Ar name Op Ns = Ns value +Define +.Ar name +when invoking +.Xr cpp 1 +(enabled using the +.Fl C +option). +If you specify an additional +.Ar value , +the name is assigned the corresponding value. +This option passes the +.Fl D +option to each +.Xr cpp 1 +invocation. +.It Fl e +Exit after compiling any requests and consuming anonymous tracing state +.Fl ( a +option) but prior to enabling any probes. +You can combine this option with the +.Fl a +option to print anonymous tracing data and exit. +You can also combine this option with D compiler options. +This combination verifies that the programs compile without actually executing +them and enabling the corresponding instrumentation. +.It Fl f Oo Oo Ar provider : Oc Ar module : Oc Ar function Oo Oo Ar predicate \ + Oc Ar action Oc +Specify function name to trace or list +.Fl ( l +option). +The corresponding argument can include any of the probe description forms +.Ar provider:module:function , +.Ar module:function , +or +.Ar function . +Unspecified probe description fields are left blank and match any probes +regardless of the values in those fields. +If no qualifiers other than +.Ar function +are specified in the description, all probes with the corresponding +.Ar function +are matched. +The +.Fl f +argument can be suffixed with an optional D probe clause. +You can specify more than one +.Fl f +option on the command line at a time. +.It Fl F +Coalesce trace output by identifying function entry and return. +Function entry probe reports are indented and their output is prefixed with +.Ql -> . +Function return probe reports are unindented and their output is prefixed with +.Ql <- . +System call entry probe reports are indented and their output is prefixed with +.Ql => . +System call return probe reports are unindented and their output is prefixed +with +.Ql <= . +.It Fl G +Generate an ELF file containing an embedded DTrace program. +The DTrace probes specified in the program are saved inside of a relocatable ELF +object which can be linked into another program. +If the +.Fl o +option is present, the ELF file is saved using the pathname specified as the +argument for this operand. +If the +.Fl o +option is not present and the DTrace program is contained with a file whose name +is +.Ar filename.d , +then the ELF file is saved using the name +.Ar filename.o . +Otherwise the ELF file is saved using the name d.out. +.It Fl h +Generate a header file containing macros that correspond to probes in the +specified provider definitions. +This option should be used to generate a header file that is included by other +source files for later use with the +.Fl G +option. +If the +.Fl o +option is present, the header file is saved using the pathname specified as the +argument for that option. +If the +.Fl o +option is not present and the DTrace program is contained within a file whose +name is +.Ar filename.d , +then the header file is saved using the name +.Ar filename.h . +.It Fl H +Print the pathnames of included files when invoking +.Xr cpp 1 +(enabled using the +.Fl C +option). +This option passes the +.Fl H +option to each +.Xr cpp 1 +invocation, causing it to display the list of pathnames, one for each line, to +standard error. +.It Fl i Ar probe-id Op Oo Ar predicate Oc Ar action +Specify probe identifier +.Ar ( probe-id ) +to trace or list +.Ar ( l +option). +You can specify probe IDs using decimal integers as shown by `dtrace -l`. +The +.Fl i +argument can be suffixed with an optional D probe clause. +You can specify more than one +.Fl i +option at a time. +.It Fl I Ar path +Add the specified directory +.Ar path +to the search path for #include files when invoking +.Xr cpp 1 +(enabled using the +.Fl C +option). +This option passes the +.Fl I +option to each +.Xr cpp 1 +invocation. +The specified +.Ar path +is inserted into the search path ahead of the default directory list. +.It Fl l +List probes instead of enabling them. +If the +.Fl l +option is specified, +.Nm +produces a report of the probes matching the descriptions given using the +.Fl P , m , f , n , i , +and +.Fl s +options. +If none of these options are specified, this option lists all probes. +.It Fl L Ar path +Add the specified directory +.Ar path +to the search path for DTrace libraries. +DTrace libraries are used to contain common definitions that can be used when +writing D programs. +The specified +.Ar path +is added after the default library search path. +.It Fl m Oo Ar provider : Oc Ar module Oo Oo Ar predicate Oc Ar action Oc +Specify module name to trace or list +.Fl ( l +option). +The corresponding argument can include any of the probe description forms +.Ar provider:module +or +.Ar module . +Unspecified probe description fields are left blank and match any probes +regardless of the values in those fields. +If no qualifiers other than +.Ar module +are specified in the description, all probes with a corresponding +.Ar module +are matched. +The +.Fl m +argument can be suffixed with an optional D probe clause. +More than one +.Fl m +option can be specified on the command line at a time. +.It Fl n Oo Oo Oo Ar provider : Oc Ar module : Oc Ar function : Oc Ar name \ + Oo Oo Ar predicate Oc Ar action Oc +Specify probe name to trace or list +.Fl ( l +option). +The corresponding argument can include any of the probe description forms +.Ar provider:module:function:name , module:function:name , function:name , +or +.Ar name . +Unspecified probe description fields are left blank and match any probes +regardless of the values in those fields. +If no qualifiers other than +.Ar name +are specified in the description, all probes with a corresponding +.Ar name +are matched. +The +.Fl n +argument can be suffixed with an optional D probe clause. +More than one +.Fl n +option can be specified on the command line at a time. +.It Fl o Ar output +Specify the +.Ar output +file for the +.Fl A , G , +and +.Fl l +options, or for the traced data itself. +If the +.Fl A +option is present and +.Fl o +is not present, the default output file is +.Pa /boot/dtrace.dof . +If the +.Fl G +option is present and the +.Fl s +option's argument is of the form +.Ar filename.d +and +.Fl o +is not present, the default output file is +.Ar filename.o . +Otherwise the default output file is +.Ar d.out . +.It Fl p Ar pid +Grab the specified process-ID +.Ar pid , +cache its symbol tables, and exit upon its completion. +If more than one +.Fl p +option is present on the command line, +.Nm +exits when all commands have exited, reporting the exit status for each process +as it terminates. +The first process-ID is made available to any D programs specified on the +command line or using the +.Fl s +option through the +.Li $target +macro variable. +.It Fl P Ar provider Oo Oo Ar predicate Oc Ar action Oc +Specify provider name to trace or list +.Fl ( l +option). +The remaining probe description fields module, function, and name are left +blank and match any probes regardless of the values in those fields. +The +.Fl P +argument can be suffixed with an optional D probe clause. +You can specify more than one +.Fl P +option on the command line at a time. +.It Fl q +Set quiet mode. +.Nm +suppresses messages such as the number of probes matched by the specified +options and D programs and does not print column headers, the CPU ID, the probe +ID, or insert newlines into the output. +Only data traced and formatted by D program statements such as +.Ql dtrace() +and +.Ql printf() +is displayed to standard output. +.It Fl s Ar script +Compile the specified D program source file. +If the +.Fl e +option is present, the program is compiled but instrumentation is not enabled. +If the +.Fl l +option is present, the program is compiled and the set of probes matched by it +is listed, but instrumentation is not enabled. +If none of +.Fl e , l , G , +or +.Fl A +are present, the instrumentation specified by the D program is enabled and +tracing begins. +.It Fl S +Show D compiler intermediate code. +The D compiler produces a report of the intermediate code generated for each D +program to standard error. +.It Fl U Ar name +Undefine the specified +.Ar name +when invoking +.Xr cpp 1 +(enabled using the +.Fl C +option). +This option passes the +.Fl U +option to each +.Xr cpp 1 +invocation. +.It Fl v +Set verbose mode. +If the +.Fl v +option is specified, +.Nm +produces a program stability report showing the minimum interface stability and +dependency level for the specified D programs. +.It Fl V +Report the highest D programming interface version supported by +.Nm . +The version information is printed to standard output and the +.Nm +command exits. +.It Fl w +Permit destructive actions in D programs specified using the +.Fl s , P , m , f , n , +or +.Fl i +options. +If the +.Fl w +option is not specified, +.Nm +does not permit the compilation or enabling of a D program that contains +destructive actions. +.It Fl x Ar arg Op Ns = Ns value +Enable or modify a DTrace runtime option or D compiler option. +Boolean options are enabled by specifying their name. +Options with values are set by separating the option name and value with an +equals sign (=). +.It Fl X Cm a | c | s | t +Specify the degree of conformance to the ISO C standard that should be selected +when invoking +.Xr cpp 1 +(enabled using the +.Fl C +option). +The +.Fl X +option argument affects the value and presence of the __STDC__ macro depending +upon the value of the argument letter. +.sp +The +.Fl X +option supports the following arguments: +.Bl -tag -width indent +.It a +Default. +ISO C plus K&R compatibility extensions, with semantic changes required by ISO +C. +This is the default mode if +.Fl X +is not specified. +The predefined macro __STDC__ has a value of 0 when +.Xr cpp 1 +is invoked in conjunction with the +.Fl Xa +option. +.It c +Conformance. +Strictly conformant ISO C, without K&R C compatibility extensions. +The predefined macro __STDC__ has a value of 1 when +.Xr cpp 1 +is invoked in conjunction with the +.Fl \&Xc +option. +.It s +K&R C only. +The macro __STDC__ is not defined when +.Xr cpp 1 +is invoked in conjunction with the +.Fl Xs +option. +.It t +Transition. +ISO C plus K&R C compatibility extensions, without semantic changes required by +ISO C. +The predefined macro __STDC__ has a value of 0 when +.Xr cpp 1 +is invoked in conjunction with the +.Fl Xt +option. +.El +.Pp +As the +.Fl X +option only affects how the D compiler invokes the C preprocessor, the +.Fl Xa +and +.Fl Xt +options are equivalent from the perspective of D and both are provided only to +ease re-use of settings from a C build environment. +.Pp +Regardless of the +.Fl X +mode, the following additional C preprocessor definitions are always specified +and valid in all modes: +.Bl -bullet -offset indent +.It +__sun +.It +__unix +.It +__SVR4 +.It +__sparc (on SPARC systems only) +.It +__sparcv9 (on SPARC systems only when 64-bit programs are compiled) +.It +__i386 (on x86 systems only when 32-bit programs are compiled) +.It +__amd64 (on x86 systems only when 64-bit programs are compiled) +.It +__`uname -s`_`uname -r` (for example, +.Ql FreeBSD_9.2-RELEASE . +.It +__SUNW_D=1 +.It +.No __SUNW_D_VERSION=0x Ns Ar MMmmmuuu +.Pp +Where +.Ar MM +is the major release value in hexadecimal, +.Ar mmm +is the minor release value in hexadecimal, and +.Ar uuu +is the micro release value in hexadecimal. +.El +.It Fl Z +Permit probe descriptions that match zero probes. +If the +.Fl Z +option is not specified, +.Nm +reports an error and exits if any probe descriptions specified in D program +files +.Fl ( s +option) or on the command line +.Fl ( P , m , f , n , +or +.Fl i +options) contain descriptions that do not match any known probes. +.El +.Sh OPERANDS +You can specify zero or more additional arguments on the +.Nm +command line to define a set of macro variables and so forth). +The additional arguments can be used in D programs specified using the +.Fl s +option or on the command line. +.Sh FILES +.Bl -tag -width /boot/dtrace.dof -compact +.It Pa /boot/dtrace.dof +File for anonymous tracing directives. +.El +.Sh EXIT STATUS +The following exit statuses are returned: +.Bl -tag -width indent +.It 0 +Successful completion. +.Pp +For D program requests, an exit status of 0 indicates that programs were +successfully compiled, probes were successfully enabled, or anonymous state +was successfully retrieved. +.Nm +returns 0 even if the specified tracing requests encountered errors or drops. +.It 1 An error occurred. -.sp -For D program requests, an exit status of \fB1\fR indicates that program compilation failed or that the specified request could not be satisfied. -.RE - -.sp -.ne 2 -.mk -.na -\fB\fB2\fR\fR -.ad -.RS 5n -.rt +.Pp +For D program requests, an exit status of 1 indicates that program compilation +failed or that the specified request could not be satisfied. +.It 2 Invalid command line options or arguments were specified. -.RE - -.SH ATTRIBUTES -.sp -.LP -See \fBattributes\fR(5) for descriptions of the following attributes: -.sp - -.sp -.TS -tab() box; -cw(2.75i) |cw(2.75i) -lw(2.75i) |lw(2.75i) -. -ATTRIBUTE TYPEATTRIBUTE VALUE -_ -AvailabilitySUNWdtrc -_ -Interface StabilitySee below. -.TE - -.sp -.LP -The command-line syntax is Committed. The human-readable output is Uncommitted. -.SH SEE ALSO -.sp -.LP -\fBcpp\fR(1), \fBisainfo\fR(1), \fBlibdtrace\fR(3LIB), \fBdriver.conf\fR(4), \fBattributes\fR(5), \fBdtrace\fR(7D) -.sp -.LP -\fISolaris Dynamic Tracing Guide\fR +.El +.Sh SEE ALSO +.Xr cpp 1 , +.Xr dtruss 1 , +.Xr elf 5 , +.Xr SDT 9 +.Rs +.%T Solaris Dynamic Tracing Guide +.Re Index: src/external/cddl/osnet/dist/cmd/dtrace/dtrace.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dist/cmd/dtrace/dtrace.c,v retrieving revision 1.10 diff -u -p -r1.10 dtrace.c --- src/external/cddl/osnet/dist/cmd/dtrace/dtrace.c 5 Jun 2017 21:19:32 -0000 1.10 +++ src/external/cddl/osnet/dist/cmd/dtrace/dtrace.c 7 Jun 2017 18:38:01 -0000 @@ -23,8 +23,10 @@ * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ - -#pragma ident "%Z%%M% %I% %E% SMI" +/* + * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved. + */ #include #include @@ -41,13 +43,16 @@ #include #include #include -#if defined(sun) +#ifdef illumos #include #endif #include -#if defined(sun) +#ifdef illumos #include #endif +#ifdef __FreeBSD__ +#include +#endif typedef struct dtrace_cmd { void (*dc_func)(struct dtrace_cmd *); /* function to compile arg */ @@ -88,6 +93,9 @@ static int g_flowindent; static int g_intr; static int g_impatient; static int g_newline; +#if defined(__FreeBSD__) || defined(__NetBSD__) +static int g_siginfo; +#endif static int g_total; static int g_cflags; static int g_oflags; @@ -99,7 +107,7 @@ static int g_grabanon = 0; static const char *g_ofile = NULL; static FILE *g_ofp; static dtrace_hdl_t *g_dtp; -#if defined(sun) +#ifdef illumos static char *g_etcfile = "/etc/system"; static const char *g_etcbegin = "* vvvv Added by DTrace"; static const char *g_etcend = "* ^^^^ Added by DTrace"; @@ -195,6 +203,13 @@ fatal(const char *fmt, ...) verror(fmt, ap); va_end(ap); + /* + * Close the DTrace handle to ensure that any controlled processes are + * correctly restored and continued. + */ + if (g_dtp) + dtrace_close(g_dtp); + exit(E_ERROR); } @@ -202,7 +217,7 @@ fatal(const char *fmt, ...) static void __printflike(1, 2) __dead dfatal(const char *fmt, ...) { -#if !defined(sun) && defined(NEED_ERRLOC) +#if !defined(illumos) && defined(NEED_ERRLOC) char *p_errfile = NULL; int errline = 0; #endif @@ -223,7 +238,7 @@ dfatal(const char *fmt, ...) (void) fprintf(stderr, "%s\n", dtrace_errmsg(g_dtp, dtrace_errno(g_dtp))); } -#if !defined(sun) && defined(NEED_ERRLOC) +#if !defined(illumos) && defined(NEED_ERRLOC) dt_get_errloc(g_dtp, &p_errfile, &errline); if (p_errfile != NULL) printf("File '%s', line %d\n", p_errfile, errline); @@ -388,7 +403,42 @@ dof_prune(const char *fname) free(buf); } -#if defined(sun) +#ifdef __FreeBSD__ +/* + * Use nextboot(8) to tell the loader to load DTrace kernel modules during + * the next boot of the system. The nextboot(8) configuration is removed during + * boot, so it will not persist indefinitely. + */ +static void +bootdof_add(void) +{ + char * const nbargv[] = { + "nextboot", "-a", + "-e", "dtraceall_load=\"YES\"", + "-e", "dtrace_dof_load=\"YES\"", + "-e", "dtrace_dof_name=\"/boot/dtrace.dof\"", + "-e", "dtrace_dof_type=\"dtrace_dof\"", + NULL, + }; + pid_t child; + int err, status; + + err = posix_spawnp(&child, "nextboot", NULL, NULL, nbargv, + NULL); + if (err != 0) { + error("failed to execute nextboot: %s", strerror(err)); + exit(E_ERROR); + } + + if (waitpid(child, &status, 0) != child) + fatal("waiting for nextboot"); + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { + error("nextboot returned with status %d", status); + exit(E_ERROR); + } +} +#endif +#ifdef illumos static void etcsystem_prune(void) { @@ -499,12 +549,13 @@ etcsystem_add(void) error("added forceload directives to %s\n", g_ofile); } -#endif +#endif /* illumos__ */ static void print_probe_info(const dtrace_probeinfo_t *p) { char buf[BUFSIZ]; + char *user; int i; oprintf("\n\tProbe Description Attributes\n"); @@ -528,10 +579,14 @@ print_probe_info(const dtrace_probeinfo_ oprintf("\n\tArgument Types\n"); for (i = 0; i < p->dtp_argc; i++) { + if (p->dtp_argv[i].dtt_flags & DTT_FL_USER) + user = "userland "; + else + user = ""; if (ctf_type_name(p->dtp_argv[i].dtt_ctfp, p->dtp_argv[i].dtt_type, buf, sizeof (buf)) == NULL) (void) strlcpy(buf, "(unknown)", sizeof (buf)); - oprintf("\t\targs[%d]: %s\n", i, buf); + oprintf("\t\targs[%d]: %s%s\n", i, user, buf); } if (p->dtp_argc == 0) @@ -629,25 +684,26 @@ anon_prog(const dtrace_cmd_t *dcp, dof_h p = (uchar_t *)dof; q = p + dof->dofh_loadsz; -#if defined(sun) - oprintf("dof-data-%d=0x%x", n, *p++); - - while (p < q) - oprintf(",0x%x", *p++); - - oprintf(";\n"); -#else +#ifdef __FreeBSD__ /* - * On FreeBSD, the DOF data is handled as a kernel environment (kenv) - * string. We use two hex characters per DOF byte. + * On FreeBSD, the DOF file is read directly during boot - just write + * two hex characters per byte. */ - oprintf("dof-data-%d=%02x", n, *p++); + oprintf("dof-data-%d=", n); while (p < q) oprintf("%02x", *p++); oprintf("\n"); #endif +#ifdef illumos + oprintf("dof-data-%d=0x%x", n, *p++); + + while (p < q) + oprintf(",0x%x", *p++); + + oprintf(";\n"); +#endif dtrace_dof_destroy(g_dtp, dof); } @@ -671,9 +727,12 @@ link_prog(dtrace_cmd_t *dcp) p[0] = '\0'; /* strip .d suffix */ (void) snprintf(dcp->dc_ofile, sizeof (dcp->dc_ofile), "%s.o", basename(dcp->dc_arg)); + } else if (g_cmdc > 1) { + (void) snprintf(dcp->dc_ofile, sizeof (dcp->dc_ofile), + "d.out.%td", dcp - g_cmdv); } else { (void) snprintf(dcp->dc_ofile, sizeof (dcp->dc_ofile), - g_cmdc > 1 ? "%s.%d" : "%s", "d.out", (int)(dcp - g_cmdv)); + "d.out"); } if (dtrace_program_link(g_dtp, dcp->dc_prog, DTRACE_D_PROBES, @@ -693,6 +752,9 @@ list_probe(dtrace_hdl_t *dtp, const dtra if (g_verbose && dtrace_probe_info(dtp, pdp, &p) == 0) print_probe_info(&p); + if (g_intr != 0) + return (1); + return (0); } @@ -868,16 +930,16 @@ setopthandler(const dtrace_setoptdata_t #define BUFDUMPSTR(ptr, field) \ (void) printf("%s: %20s => ", g_pname, #field); \ if ((ptr)->field != NULL) { \ - const char *xc = (ptr)->field; \ + const char *c = (ptr)->field; \ (void) printf("\""); \ do { \ - if (*xc == '\n') { \ + if (*c == '\n') { \ (void) printf("\\n"); \ continue; \ } \ \ - (void) printf("%c", *xc); \ - } while (*xc++ != '\0'); \ + (void) printf("%c", *c); \ + } while (*c++ != '\0'); \ (void) printf("\"\n"); \ } else { \ (void) printf("\n"); \ @@ -914,7 +976,7 @@ bufhandler(const dtrace_bufdata_t *bufda { "AGGFORMAT", DTRACE_BUFDATA_AGGFORMAT }, { "AGGLAST", DTRACE_BUFDATA_AGGLAST }, { "???", UINT32_MAX }, - { NULL, 0 } + { NULL } }; if (bufdata->dtbda_probe != NULL) { @@ -971,7 +1033,7 @@ bufhandler(const dtrace_bufdata_t *bufda uint8_t *data; int lim = rec->dtrd_size; - (void) snprintf(buf, end - buf, "%d (data: ", rec->dtrd_offset); + (void) sprintf(buf, "%d (data: ", rec->dtrd_offset); c = buf + strlen(buf); if (lim > sizeof (uint64_t)) @@ -1070,7 +1132,7 @@ chew(const dtrace_probedata_t *data, voi (void) snprintf(name, sizeof (name), "%s:%s", pd->dtpd_func, pd->dtpd_name); - oprintf("%3d %6d %32s ", (int)cpu, pd->dtpd_id, name); + oprintf("%3d %6d %32s ", cpu, pd->dtpd_id, name); } } else { int indent = data->dtpda_indent; @@ -1090,7 +1152,7 @@ chew(const dtrace_probedata_t *data, voi data->dtpda_prefix, pd->dtpd_func); } - oprintf("%3d %-41s ", (int)cpu, name); + oprintf("%3d %-41s ", cpu, name); } return (DTRACE_CONSUME_THIS); @@ -1102,19 +1164,19 @@ go(void) int i; struct { - const char *name; - const char *optname; + char *name; + char *optname; dtrace_optval_t val; } bufs[] = { - { "buffer size", "bufsize", 0 }, - { "aggregation size", "aggsize", 0 }, - { "speculation size", "specsize", 0 }, - { "dynamic variable size", "dynvarsize", 0 }, - { NULL, NULL, 0 } + { "buffer size", "bufsize" }, + { "aggregation size", "aggsize" }, + { "speculation size", "specsize" }, + { "dynamic variable size", "dynvarsize" }, + { NULL } }, rates[] = { - { "cleaning rate", "cleanrate", 0 }, - { "status rate", "statusrate", 0 }, - { NULL, NULL ,0 } + { "cleaning rate", "cleanrate" }, + { "status rate", "statusrate" }, + { NULL } }; for (i = 0; bufs[i].name != NULL; i++) { @@ -1159,7 +1221,7 @@ go(void) for (i = 0; rates[i].name != NULL; i++) { dtrace_optval_t nval; - const char *dir; + char *dir; if (rates[i].val == DTRACEOPT_UNSET) continue; @@ -1203,11 +1265,48 @@ intr(int signo) g_impatient = 1; } +#ifdef __FreeBSD__ +static void +siginfo(int signo __unused) +{ + + g_siginfo++; + g_newline = 1; +} +#endif + +static void +installsighands(void) +{ + struct sigaction act, oact; + + (void) sigemptyset(&act.sa_mask); + act.sa_flags = 0; + act.sa_handler = intr; + + if (sigaction(SIGINT, NULL, &oact) == 0 && oact.sa_handler != SIG_IGN) + (void) sigaction(SIGINT, &act, NULL); + + if (sigaction(SIGTERM, NULL, &oact) == 0 && oact.sa_handler != SIG_IGN) + (void) sigaction(SIGTERM, &act, NULL); + +#ifdef __FreeBSD__ + if (sigaction(SIGPIPE, NULL, &oact) == 0 && oact.sa_handler != SIG_IGN) + (void) sigaction(SIGPIPE, &act, NULL); + + if (sigaction(SIGUSR1, NULL, &oact) == 0 && oact.sa_handler != SIG_IGN) + (void) sigaction(SIGUSR1, &act, NULL); + + act.sa_handler = siginfo; + if (sigaction(SIGINFO, NULL, &oact) == 0 && oact.sa_handler != SIG_IGN) + (void) sigaction(SIGINFO, &act, NULL); +#endif +} + int main(int argc, char *argv[]) { dtrace_bufdesc_t buf; - struct sigaction act, oact; dtrace_status_t status[2]; dtrace_optval_t opt; dtrace_cmd_t *dcp; @@ -1399,6 +1498,7 @@ main(int argc, char *argv[]) (void) dtrace_setopt(g_dtp, "bufsize", "4m"); (void) dtrace_setopt(g_dtp, "aggsize", "4m"); #endif + (void) dtrace_setopt(g_dtp, "temporal", "yes"); /* * If -G is specified, enable -xlink=dynamic and -xunodefs to permit @@ -1676,19 +1776,19 @@ main(int argc, char *argv[]) case DMODE_ANON: if (g_ofile == NULL) -#if defined(sun) +#ifdef illumos g_ofile = "/kernel/drv/dtrace.conf"; -#else +#endif +#ifdef __FreeBSD__ /* * On FreeBSD, anonymous DOF data is written to - * the DTrace DOF file that the boot loader will - * read if booting with the DTrace option. + * the DTrace DOF file. */ g_ofile = "/boot/dtrace.dof"; #endif dof_prune(g_ofile); /* strip out any old DOF directives */ -#if defined(sun) +#ifdef illumos etcsystem_prune(); /* string out any forceload directives */ #endif @@ -1721,7 +1821,11 @@ main(int argc, char *argv[]) * that itself contains a #pragma D option quiet. */ error("saved anonymous enabling in %s\n", g_ofile); -#if defined(sun) + +#ifdef __FreeBSD__ + bootdof_add(); +#endif +#ifdef illumos etcsystem_add(); error("run update_drv(1M) or reboot to enable changes\n"); #endif @@ -1758,6 +1862,8 @@ main(int argc, char *argv[]) if (g_ofile != NULL && (g_ofp = fopen(g_ofile, "a")) == NULL) fatal("failed to open output file '%s'", g_ofile); + installsighands(); + oprintf("%5s %10s %17s %33s %s\n", "ID", "PROVIDER", "MODULE", "FUNCTION", "NAME"); @@ -1779,7 +1885,7 @@ main(int argc, char *argv[]) } if (g_ofile == NULL) { - char *pv; + char *p; if (g_cmdc > 1) { (void) fprintf(stderr, "%s: -h requires an " @@ -1789,8 +1895,8 @@ main(int argc, char *argv[]) return (E_USAGE); } - if ((pv = strrchr(g_cmdv[0].dc_arg, '.')) == NULL || - strcmp(pv, ".d") != 0) { + if ((p = strrchr(g_cmdv[0].dc_arg, '.')) == NULL || + strcmp(p, ".d") != 0) { (void) fprintf(stderr, "%s: -h requires an " "output file if no scripts are " "specified\n", g_pname); @@ -1798,9 +1904,9 @@ main(int argc, char *argv[]) return (E_USAGE); } - pv[0] = '\0'; /* strip .d suffix */ - g_ofile = pv = g_cmdv[0].dc_ofile; - (void) snprintf(pv, sizeof (g_cmdv[0].dc_ofile), + p[0] = '\0'; /* strip .d suffix */ + g_ofile = p = g_cmdv[0].dc_ofile; + (void) snprintf(p, sizeof (g_cmdv[0].dc_ofile), "%s.h", basename(g_cmdv[0].dc_arg)); } @@ -1843,20 +1949,7 @@ main(int argc, char *argv[]) if (opt != DTRACEOPT_UNSET) notice("allowing destructive actions\n"); - (void) sigemptyset(&act.sa_mask); - act.sa_flags = 0; - act.sa_handler = intr; - - if (sigaction(SIGINT, NULL, &oact) == 0 && oact.sa_handler != SIG_IGN) - (void) sigaction(SIGINT, &act, NULL); - - if (sigaction(SIGTERM, NULL, &oact) == 0 && oact.sa_handler != SIG_IGN) - (void) sigaction(SIGTERM, &act, NULL); - -#if !defined(sun) - if (sigaction(SIGUSR1, NULL, &oact) == 0 && oact.sa_handler != SIG_IGN) - (void) sigaction(SIGUSR1, &act, NULL); -#endif + installsighands(); /* * Now that tracing is active and we are ready to consume trace data, @@ -1872,6 +1965,13 @@ main(int argc, char *argv[]) if (!g_intr && !done) dtrace_sleep(g_dtp); +#if defined(__FreeBSD__) || defined(__NetBSD__) + if (g_siginfo) { + (void)dtrace_aggregate_print(g_dtp, g_ofp, NULL); + g_siginfo = 0; + } +#endif + if (g_newline) { /* * Output a newline just to make the output look Index: src/external/cddl/osnet/dist/cmd/zdb/zdb.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dist/cmd/zdb/zdb.c,v retrieving revision 1.6 diff -u -p -r1.6 zdb.c --- src/external/cddl/osnet/dist/cmd/zdb/zdb.c 28 Mar 2014 03:18:24 -0000 1.6 +++ src/external/cddl/osnet/dist/cmd/zdb/zdb.c 27 Mar 2017 06:26:21 -0000 @@ -18,12 +18,15 @@ * * CDDL HEADER END */ + /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, 2016 by Delphix. All rights reserved. + * Copyright (c) 2014 Integros [integros.com] */ #include +#include #include #include #include @@ -34,6 +37,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -52,22 +58,30 @@ #include #include #include -#undef ZFS_MAXNAMELEN +#include +#include #undef verify #include -#define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \ - zio_compress_table[(idx)].ci_name : "UNKNOWN") -#define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \ - zio_checksum_table[(idx)].ci_name : "UNKNOWN") -#define ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ? \ - dmu_ot[(idx)].ot_name : "UNKNOWN") -#define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : DMU_OT_NUMTYPES) +#define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \ + zio_compress_table[(idx)].ci_name : "UNKNOWN") +#define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \ + zio_checksum_table[(idx)].ci_name : "UNKNOWN") +#define ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ? \ + dmu_ot[(idx)].ot_name : DMU_OT_IS_VALID(idx) ? \ + dmu_ot_byteswap[DMU_OT_BYTESWAP(idx)].ob_name : "UNKNOWN") +#define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : \ + (((idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA) ? \ + DMU_OT_ZAP_OTHER : DMU_OT_NUMTYPES)) #ifndef lint -extern int zfs_recover; +extern boolean_t zfs_recover; +extern uint64_t zfs_arc_max, zfs_arc_meta_limit; +extern int zfs_vdev_async_read_max_active; #else -int zfs_recover; +boolean_t zfs_recover; +uint64_t zfs_arc_max, zfs_arc_meta_limit; +int zfs_vdev_async_read_max_active; #endif const char cmdname[] = "zdb"; @@ -76,9 +90,12 @@ uint8_t dump_opt[256]; typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size); extern void dump_intent_log(zilog_t *); -uint64_t *zopt_object = NULL; -int zopt_objects = 0; -libzfs_handle_t *g_zfs; +static uint64_t *zopt_object = NULL; +static int zopt_objects = 0; +static libzfs_handle_t *g_zfs; +static uint64_t max_inflight = 1000; + +static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *); /* * These libumem hooks provide a reasonable set of defaults for the allocator's @@ -100,13 +117,17 @@ static void usage(void) { (void) fprintf(stderr, - "Usage: %s [-CumdibcsvhL] poolname [object...]\n" - " %s [-div] dataset [object...]\n" - " %s -m [-L] poolname [vdev [metaslab...]]\n" - " %s -R poolname vdev:offset:size[:flags]\n" - " %s -S poolname\n" - " %s -l [-u] device\n" - " %s -C\n\n", + "Usage: %s [-CumMdibcsDvhLXFPAG] [-t txg] [-e [-p path...]] " + "[-U config] [-I inflight I/Os] [-x dumpdir] poolname [object...]\n" + " %s [-divPA] [-e -p path...] [-U config] dataset " + "[object...]\n" + " %s -mM [-LXFPA] [-t txg] [-e [-p path...]] [-U config] " + "poolname [vdev [metaslab...]]\n" + " %s -R [-A] [-e [-p path...]] poolname " + "vdev:offset:size[:flags]\n" + " %s -S [-PA] [-e [-p path...]] [-U config] poolname\n" + " %s -l [-uA] device\n" + " %s -C [-A] [-U config]\n\n", cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname); (void) fprintf(stderr, " Dataset name must include at least one " @@ -123,9 +144,11 @@ usage(void) (void) fprintf(stderr, " -h pool history\n"); (void) fprintf(stderr, " -b block statistics\n"); (void) fprintf(stderr, " -m metaslabs\n"); + (void) fprintf(stderr, " -M metaslab groups\n"); (void) fprintf(stderr, " -c checksum all metadata (twice for " "all data) blocks\n"); (void) fprintf(stderr, " -s report stats on zdb's I/O\n"); + (void) fprintf(stderr, " -D dedup statistics\n"); (void) fprintf(stderr, " -S simulate dedup to measure effect\n"); (void) fprintf(stderr, " -v verbose (applies to all others)\n"); (void) fprintf(stderr, " -l dump label contents\n"); @@ -134,7 +157,7 @@ usage(void) (void) fprintf(stderr, " -R read and display block from a " "device\n\n"); (void) fprintf(stderr, " Below options are intended for use " - "with other options (except -l):\n"); + "with other options:\n"); (void) fprintf(stderr, " -A ignore assertions (-A), enable " "panic recovery (-AA) or both (-AAA)\n"); (void) fprintf(stderr, " -F attempt automatic rewind within " @@ -147,14 +170,31 @@ usage(void) "has altroot/not in a cachefile\n"); (void) fprintf(stderr, " -p -- use one or more with " "-e to specify path to vdev dir\n"); + (void) fprintf(stderr, " -x -- " + "dump all read blocks into specified directory\n"); + (void) fprintf(stderr, " -P print numbers in parseable form\n"); (void) fprintf(stderr, " -t -- highest txg to use when " "searching for uberblocks\n"); + (void) fprintf(stderr, " -I -- " + "specify the maximum number of " + "checksumming I/Os [default is 200]\n"); + (void) fprintf(stderr, " -G dump zfs_dbgmsg buffer before " + "exiting\n"); (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) " "to make only that option verbose\n"); (void) fprintf(stderr, "Default is to dump everything non-verbosely\n"); exit(1); } +static void +dump_debug_buffer() +{ + if (dump_opt['G']) { + (void) printf("\n"); + zfs_dbgmsg_print("zdb"); + } +} + /* * Called for usage errors that are discovered after a call to spa_open(), * dmu_bonus_hold(), or pool_match(). abort() is called for other errors. @@ -171,6 +211,8 @@ fatal(const char *fmt, ...) va_end(ap); (void) fprintf(stderr, "\n"); + dump_debug_buffer(); + exit(1); } @@ -193,18 +235,48 @@ dump_packed_nvlist(objset_t *os, uint64_ nvlist_free(nv); } -const char dump_zap_stars[] = "****************************************"; -const int dump_zap_width = sizeof (dump_zap_stars) - 1; +/* ARGSUSED */ +static void +dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size) +{ + spa_history_phys_t *shp = data; + + if (shp == NULL) + return; + + (void) printf("\t\tpool_create_len = %llu\n", + (u_longlong_t)shp->sh_pool_create_len); + (void) printf("\t\tphys_max_off = %llu\n", + (u_longlong_t)shp->sh_phys_max_off); + (void) printf("\t\tbof = %llu\n", + (u_longlong_t)shp->sh_bof); + (void) printf("\t\teof = %llu\n", + (u_longlong_t)shp->sh_eof); + (void) printf("\t\trecords_lost = %llu\n", + (u_longlong_t)shp->sh_records_lost); +} static void -dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE]) +zdb_nicenum(uint64_t num, char *buf) +{ + if (dump_opt['P']) + (void) sprintf(buf, "%llu", (longlong_t)num); + else + nicenum(num, buf); +} + +const char histo_stars[] = "****************************************"; +const int histo_width = sizeof (histo_stars) - 1; + +static void +dump_histogram(const uint64_t *histo, int size, int offset) { int i; - int minidx = ZAP_HISTOGRAM_SIZE - 1; + int minidx = size - 1; int maxidx = 0; uint64_t max = 0; - for (i = 0; i < ZAP_HISTOGRAM_SIZE; i++) { + for (i = 0; i < size; i++) { if (histo[i] > max) max = histo[i]; if (histo[i] > 0 && i > maxidx) @@ -213,12 +285,14 @@ dump_zap_histogram(uint64_t histo[ZAP_HI minidx = i; } - if (max < dump_zap_width) - max = dump_zap_width; + if (max < histo_width) + max = histo_width; - for (i = minidx; i <= maxidx; i++) - (void) printf("\t\t\t%u: %6llu %s\n", i, (u_longlong_t)histo[i], - &dump_zap_stars[(max - histo[i]) * dump_zap_width / max]); + for (i = minidx; i <= maxidx; i++) { + (void) printf("\t\t\t%3u: %6llu %s\n", + i + offset, (u_longlong_t)histo[i], + &histo_stars[(max - histo[i]) * histo_width / max]); + } } static void @@ -269,19 +343,19 @@ dump_zap_stats(objset_t *os, uint64_t ob (u_longlong_t)zs.zs_salt); (void) printf("\t\tLeafs with 2^n pointers:\n"); - dump_zap_histogram(zs.zs_leafs_with_2n_pointers); + dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE, 0); (void) printf("\t\tBlocks with n*5 entries:\n"); - dump_zap_histogram(zs.zs_blocks_with_n5_entries); + dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE, 0); (void) printf("\t\tBlocks n/10 full:\n"); - dump_zap_histogram(zs.zs_blocks_n_tenths_full); + dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE, 0); (void) printf("\t\tEntries with n chunks:\n"); - dump_zap_histogram(zs.zs_entries_using_n_chunks); + dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE, 0); (void) printf("\t\tBuckets with n entries:\n"); - dump_zap_histogram(zs.zs_buckets_with_n_entries); + dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE, 0); } /*ARGSUSED*/ @@ -359,6 +433,79 @@ dump_zap(objset_t *os, uint64_t object, zap_cursor_fini(&zc); } +static void +dump_bpobj(objset_t *os, uint64_t object, void *data, size_t size) +{ + bpobj_phys_t *bpop = data; + char bytes[32], comp[32], uncomp[32]; + + if (bpop == NULL) + return; + + zdb_nicenum(bpop->bpo_bytes, bytes); + zdb_nicenum(bpop->bpo_comp, comp); + zdb_nicenum(bpop->bpo_uncomp, uncomp); + + (void) printf("\t\tnum_blkptrs = %llu\n", + (u_longlong_t)bpop->bpo_num_blkptrs); + (void) printf("\t\tbytes = %s\n", bytes); + if (size >= BPOBJ_SIZE_V1) { + (void) printf("\t\tcomp = %s\n", comp); + (void) printf("\t\tuncomp = %s\n", uncomp); + } + if (size >= sizeof (*bpop)) { + (void) printf("\t\tsubobjs = %llu\n", + (u_longlong_t)bpop->bpo_subobjs); + (void) printf("\t\tnum_subobjs = %llu\n", + (u_longlong_t)bpop->bpo_num_subobjs); + } + + if (dump_opt['d'] < 5) + return; + + for (uint64_t i = 0; i < bpop->bpo_num_blkptrs; i++) { + char blkbuf[BP_SPRINTF_LEN]; + blkptr_t bp; + + int err = dmu_read(os, object, + i * sizeof (bp), sizeof (bp), &bp, 0); + if (err != 0) { + (void) printf("got error %u from dmu_read\n", err); + break; + } + snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), &bp); + (void) printf("\t%s\n", blkbuf); + } +} + +/* ARGSUSED */ +static void +dump_bpobj_subobjs(objset_t *os, uint64_t object, void *data, size_t size) +{ + dmu_object_info_t doi; + + VERIFY0(dmu_object_info(os, object, &doi)); + uint64_t *subobjs = kmem_alloc(doi.doi_max_offset, KM_SLEEP); + + int err = dmu_read(os, object, 0, doi.doi_max_offset, subobjs, 0); + if (err != 0) { + (void) printf("got error %u from dmu_read\n", err); + kmem_free(subobjs, doi.doi_max_offset); + return; + } + + int64_t last_nonzero = -1; + for (uint64_t i = 0; i < doi.doi_max_offset / 8; i++) { + if (subobjs[i] != 0) + last_nonzero = i; + } + + for (int64_t i = 0; i <= last_nonzero; i++) { + (void) printf("\t%llu\n", (longlong_t)subobjs[i]); + } + kmem_free(subobjs, doi.doi_max_offset); +} + /*ARGSUSED*/ static void dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size) @@ -369,6 +516,71 @@ dump_ddt_zap(objset_t *os, uint64_t obje /*ARGSUSED*/ static void +dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size) +{ + zap_cursor_t zc; + zap_attribute_t attr; + + dump_zap_stats(os, object); + (void) printf("\n"); + + for (zap_cursor_init(&zc, os, object); + zap_cursor_retrieve(&zc, &attr) == 0; + zap_cursor_advance(&zc)) { + (void) printf("\t\t%s = ", attr.za_name); + if (attr.za_num_integers == 0) { + (void) printf("\n"); + continue; + } + (void) printf(" %llx : [%d:%d:%d]\n", + (u_longlong_t)attr.za_first_integer, + (int)ATTR_LENGTH(attr.za_first_integer), + (int)ATTR_BSWAP(attr.za_first_integer), + (int)ATTR_NUM(attr.za_first_integer)); + } + zap_cursor_fini(&zc); +} + +/*ARGSUSED*/ +static void +dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size) +{ + zap_cursor_t zc; + zap_attribute_t attr; + uint16_t *layout_attrs; + int i; + + dump_zap_stats(os, object); + (void) printf("\n"); + + for (zap_cursor_init(&zc, os, object); + zap_cursor_retrieve(&zc, &attr) == 0; + zap_cursor_advance(&zc)) { + (void) printf("\t\t%s = [", attr.za_name); + if (attr.za_num_integers == 0) { + (void) printf("\n"); + continue; + } + + VERIFY(attr.za_integer_length == 2); + layout_attrs = umem_zalloc(attr.za_num_integers * + attr.za_integer_length, UMEM_NOFAIL); + + VERIFY(zap_lookup(os, object, attr.za_name, + attr.za_integer_length, + attr.za_num_integers, layout_attrs) == 0); + + for (i = 0; i != attr.za_num_integers; i++) + (void) printf(" %d ", (int)layout_attrs[i]); + (void) printf("]\n"); + umem_free(layout_attrs, + attr.za_num_integers * attr.za_integer_length); + } + zap_cursor_fini(&zc); +} + +/*ARGSUSED*/ +static void dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size) { zap_cursor_t zc; @@ -405,26 +617,89 @@ dump_zpldir(objset_t *os, uint64_t objec zap_cursor_fini(&zc); } +int +get_dtl_refcount(vdev_t *vd) +{ + int refcount = 0; + + if (vd->vdev_ops->vdev_op_leaf) { + space_map_t *sm = vd->vdev_dtl_sm; + + if (sm != NULL && + sm->sm_dbuf->db_size == sizeof (space_map_phys_t)) + return (1); + return (0); + } + + for (int c = 0; c < vd->vdev_children; c++) + refcount += get_dtl_refcount(vd->vdev_child[c]); + return (refcount); +} + +int +get_metaslab_refcount(vdev_t *vd) +{ + int refcount = 0; + + if (vd->vdev_top == vd && !vd->vdev_removing) { + for (int m = 0; m < vd->vdev_ms_count; m++) { + space_map_t *sm = vd->vdev_ms[m]->ms_sm; + + if (sm != NULL && + sm->sm_dbuf->db_size == sizeof (space_map_phys_t)) + refcount++; + } + } + for (int c = 0; c < vd->vdev_children; c++) + refcount += get_metaslab_refcount(vd->vdev_child[c]); + + return (refcount); +} + +static int +verify_spacemap_refcounts(spa_t *spa) +{ + uint64_t expected_refcount = 0; + uint64_t actual_refcount; + + (void) feature_get_refcount(spa, + &spa_feature_table[SPA_FEATURE_SPACEMAP_HISTOGRAM], + &expected_refcount); + actual_refcount = get_dtl_refcount(spa->spa_root_vdev); + actual_refcount += get_metaslab_refcount(spa->spa_root_vdev); + + if (expected_refcount != actual_refcount) { + (void) printf("space map refcount mismatch: expected %lld != " + "actual %lld\n", + (longlong_t)expected_refcount, + (longlong_t)actual_refcount); + return (2); + } + return (0); +} + static void -dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm) +dump_spacemap(objset_t *os, space_map_t *sm) { uint64_t alloc, offset, entry; - uint8_t mapshift = sm->sm_shift; - uint64_t mapstart = sm->sm_start; char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID", "INVALID", "INVALID", "INVALID", "INVALID" }; - if (smo->smo_object == 0) + if (sm == NULL) return; /* * Print out the freelist entries in both encoded and decoded form. */ alloc = 0; - for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) { - VERIFY(0 == dmu_read(os, smo->smo_object, offset, + for (offset = 0; offset < space_map_length(sm); + offset += sizeof (entry)) { + uint8_t mapshift = sm->sm_shift; + + VERIFY0(dmu_read(os, space_map_object(sm), offset, sizeof (entry), &entry, DMU_READ_PREFETCH)); if (SM_DEBUG_DECODE(entry)) { + (void) printf("\t [%6llu] %s: txg %llu, pass %llu\n", (u_longlong_t)(offset / sizeof (entry)), ddata[SM_DEBUG_ACTION_DECODE(entry)], @@ -436,10 +711,10 @@ dump_spacemap(objset_t *os, space_map_ob (u_longlong_t)(offset / sizeof (entry)), SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F', (u_longlong_t)((SM_OFFSET_DECODE(entry) << - mapshift) + mapstart), + mapshift) + sm->sm_start), (u_longlong_t)((SM_OFFSET_DECODE(entry) << - mapshift) + mapstart + (SM_RUN_DECODE(entry) << - mapshift)), + mapshift) + sm->sm_start + + (SM_RUN_DECODE(entry) << mapshift)), (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift)); if (SM_TYPE_DECODE(entry) == SM_ALLOC) alloc += SM_RUN_DECODE(entry) << mapshift; @@ -447,26 +722,28 @@ dump_spacemap(objset_t *os, space_map_ob alloc -= SM_RUN_DECODE(entry) << mapshift; } } - if (alloc != smo->smo_alloc) { + if (alloc != space_map_allocated(sm)) { (void) printf("space_map_object alloc (%llu) INCONSISTENT " "with space map summary (%llu)\n", - (u_longlong_t)smo->smo_alloc, (u_longlong_t)alloc); + (u_longlong_t)space_map_allocated(sm), (u_longlong_t)alloc); } } static void dump_metaslab_stats(metaslab_t *msp) { - char maxbuf[5]; - space_map_t *sm = &msp->ms_map; - avl_tree_t *t = sm->sm_pp_root; - int free_pct = sm->sm_space * 100 / sm->sm_size; + char maxbuf[32]; + range_tree_t *rt = msp->ms_tree; + avl_tree_t *t = &msp->ms_size_tree; + int free_pct = range_tree_space(rt) * 100 / msp->ms_size; - nicenum(space_map_maxsize(sm), maxbuf, sizeof(maxbuf)); + zdb_nicenum(metaslab_block_maxsize(msp), maxbuf); (void) printf("\t %25s %10lu %7s %6s %4s %4d%%\n", "segments", avl_numnodes(t), "maxsize", maxbuf, "freepct", free_pct); + (void) printf("\tIn-memory histogram:\n"); + dump_histogram(rt->rt_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); } static void @@ -474,33 +751,45 @@ dump_metaslab(metaslab_t *msp) { vdev_t *vd = msp->ms_group->mg_vd; spa_t *spa = vd->vdev_spa; - space_map_t *sm = &msp->ms_map; - space_map_obj_t *smo = &msp->ms_smo; - char freebuf[5]; + space_map_t *sm = msp->ms_sm; + char freebuf[32]; - nicenum(sm->sm_size - smo->smo_alloc, freebuf, sizeof(freebuf)); + zdb_nicenum(msp->ms_size - space_map_allocated(sm), freebuf); (void) printf( "\tmetaslab %6llu offset %12llx spacemap %6llu free %5s\n", - (u_longlong_t)(sm->sm_start / sm->sm_size), - (u_longlong_t)sm->sm_start, (u_longlong_t)smo->smo_object, freebuf); + (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start, + (u_longlong_t)space_map_object(sm), freebuf); - if (dump_opt['m'] > 1 && !dump_opt['L']) { + if (dump_opt['m'] > 2 && !dump_opt['L']) { mutex_enter(&msp->ms_lock); - space_map_load_wait(sm); - if (!sm->sm_loaded) - VERIFY(space_map_load(sm, zfs_metaslab_ops, - SM_FREE, smo, spa->spa_meta_objset) == 0); + metaslab_load_wait(msp); + if (!msp->ms_loaded) { + VERIFY0(metaslab_load(msp)); + range_tree_stat_verify(msp->ms_tree); + } dump_metaslab_stats(msp); - space_map_unload(sm); + metaslab_unload(msp); mutex_exit(&msp->ms_lock); } - if (dump_opt['d'] > 5 || dump_opt['m'] > 2) { - ASSERT(sm->sm_size == (1ULL << vd->vdev_ms_shift)); + if (dump_opt['m'] > 1 && sm != NULL && + spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) { + /* + * The space map histogram represents free space in chunks + * of sm_shift (i.e. bucket 0 refers to 2^sm_shift). + */ + (void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n", + (u_longlong_t)msp->ms_fragmentation); + dump_histogram(sm->sm_phys->smp_histogram, + SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift); + } + + if (dump_opt['d'] > 5 || dump_opt['m'] > 3) { + ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift)); mutex_enter(&msp->ms_lock); - dump_spacemap(spa->spa_meta_objset, smo, sm); + dump_spacemap(spa->spa_meta_objset, msp->ms_sm); mutex_exit(&msp->ms_lock); } } @@ -518,6 +807,47 @@ print_vdev_metaslab_header(vdev_t *vd) } static void +dump_metaslab_groups(spa_t *spa) +{ + vdev_t *rvd = spa->spa_root_vdev; + metaslab_class_t *mc = spa_normal_class(spa); + uint64_t fragmentation; + + metaslab_class_histogram_verify(mc); + + for (int c = 0; c < rvd->vdev_children; c++) { + vdev_t *tvd = rvd->vdev_child[c]; + metaslab_group_t *mg = tvd->vdev_mg; + + if (mg->mg_class != mc) + continue; + + metaslab_group_histogram_verify(mg); + mg->mg_fragmentation = metaslab_group_fragmentation(mg); + + (void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t" + "fragmentation", + (u_longlong_t)tvd->vdev_id, + (u_longlong_t)tvd->vdev_ms_count); + if (mg->mg_fragmentation == ZFS_FRAG_INVALID) { + (void) printf("%3s\n", "-"); + } else { + (void) printf("%3llu%%\n", + (u_longlong_t)mg->mg_fragmentation); + } + dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); + } + + (void) printf("\tpool %s\tfragmentation", spa_name(spa)); + fragmentation = metaslab_class_fragmentation(mc); + if (fragmentation == ZFS_FRAG_INVALID) + (void) printf("\t%3s\n", "-"); + else + (void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation); + dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0); +} + +static void dump_metaslabs(spa_t *spa) { vdev_t *vd, *rvd = spa->spa_root_vdev; @@ -572,7 +902,7 @@ dump_dde(const ddt_t *ddt, const ddt_ent if (ddp->ddp_phys_birth == 0) continue; ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); - snprintf_blkptr(blkbuf, sizeof(blkbuf), &blk); + snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk); (void) printf("index %llx refcnt %llu %s %s\n", (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt, types[p], blkbuf); @@ -617,13 +947,15 @@ dump_ddt(ddt_t *ddt, enum ddt_type type, return; ASSERT(error == 0); - count = ddt_object_count(ddt, type, class); + error = ddt_object_count(ddt, type, class, &count); + ASSERT(error == 0); + if (count == 0) + return; + dspace = doi.doi_physical_blocks_512 << 9; mspace = doi.doi_fill_count * doi.doi_data_block_size; - ASSERT(count != 0); /* we should have destroyed it */ - - ddt_object_name(ddt, type, class, name, sizeof(name)); + ddt_object_name(ddt, type, class, name); (void) printf("%s: %llu entries, size %llu on disk, %llu in core\n", name, @@ -687,9 +1019,9 @@ dump_all_ddts(spa_t *spa) } static void -dump_dtl_seg(space_map_t *sm, uint64_t start, uint64_t size) +dump_dtl_seg(void *arg, uint64_t start, uint64_t size) { - char *prefix = (void *)sm; + char *prefix = arg; (void) printf("%s [%llu,%llu) length %llu\n", prefix, @@ -719,28 +1051,32 @@ dump_dtl(vdev_t *vd, int indent) required ? "DTL-required" : "DTL-expendable"); for (int t = 0; t < DTL_TYPES; t++) { - space_map_t *sm = &vd->vdev_dtl[t]; - if (sm->sm_space == 0) + range_tree_t *rt = vd->vdev_dtl[t]; + if (range_tree_space(rt) == 0) continue; (void) snprintf(prefix, sizeof (prefix), "\t%*s%s", indent + 2, "", name[t]); - mutex_enter(sm->sm_lock); - space_map_walk(sm, dump_dtl_seg, (void *)prefix); - mutex_exit(sm->sm_lock); + mutex_enter(rt->rt_lock); + range_tree_walk(rt, dump_dtl_seg, prefix); + mutex_exit(rt->rt_lock); if (dump_opt['d'] > 5 && vd->vdev_children == 0) - dump_spacemap(spa->spa_meta_objset, - &vd->vdev_dtl_smo, sm); + dump_spacemap(spa->spa_meta_objset, vd->vdev_dtl_sm); } for (int c = 0; c < vd->vdev_children; c++) dump_dtl(vd->vdev_child[c], indent + 4); } +/* from spa_history.c: spa_history_create_obj() */ +#define HIS_BUF_LEN_DEF (128 << 10) +#define HIS_BUF_LEN_MAX (1 << 30) + static void dump_history(spa_t *spa) { nvlist_t **events = NULL; - char buf[SPA_MAXBLOCKSIZE]; + char *buf = NULL; + uint64_t bufsize = HIS_BUF_LEN_DEF; uint64_t resid, len, off = 0; uint_t num = 0; int error; @@ -749,8 +1085,11 @@ dump_history(spa_t *spa) char tbuf[30]; char internalstr[MAXPATHLEN]; + if ((buf = malloc(bufsize)) == NULL) + (void) fprintf(stderr, "Unable to read history: " + "out of memory\n"); do { - len = sizeof (buf); + len = bufsize; if ((error = spa_history_get(spa, &off, &len, buf)) != 0) { (void) fprintf(stderr, "Unable to read history: " @@ -760,34 +1099,52 @@ dump_history(spa_t *spa) if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0) break; - off -= resid; + + /* + * If the history block is too big, double the buffer + * size and try again. + */ + if (resid == len) { + free(buf); + buf = NULL; + + bufsize <<= 1; + if ((bufsize >= HIS_BUF_LEN_MAX) || + ((buf = malloc(bufsize)) == NULL)) { + (void) fprintf(stderr, "Unable to read history: " + "out of memory\n"); + return; + } + } } while (len != 0); + free(buf); (void) printf("\nHistory:\n"); for (int i = 0; i < num; i++) { uint64_t time, txg, ievent; char *cmd, *intstr; + boolean_t printed = B_FALSE; if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME, &time) != 0) - continue; + goto next; if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD, &cmd) != 0) { if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_INT_EVENT, &ievent) != 0) - continue; + goto next; verify(nvlist_lookup_uint64(events[i], ZPOOL_HIST_TXG, &txg) == 0); verify(nvlist_lookup_string(events[i], ZPOOL_HIST_INT_STR, &intstr) == 0); - if (ievent >= LOG_END) - continue; + if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS) + goto next; (void) snprintf(internalstr, sizeof (internalstr), "[internal %s txg:%lld] %s", - hist_event_table[ievent], txg, + zfs_history_event_names[ievent], txg, intstr); cmd = internalstr; } @@ -795,6 +1152,14 @@ dump_history(spa_t *spa) (void) localtime_r(&tsec, &t); (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t); (void) printf("%s %s\n", tbuf, cmd); + printed = B_TRUE; + +next: + if (dump_opt['h'] > 1) { + if (!printed) + (void) printf("unrecognized record:\n"); + dump_nvlist(events[i], 2); + } } } @@ -805,7 +1170,8 @@ dump_dnode(objset_t *os, uint64_t object } static uint64_t -blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_t *zb) +blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, + const zbookmark_phys_t *zb) { if (dnp == NULL) { ASSERT(zb->zb_level < 0); @@ -822,47 +1188,63 @@ blkid2offset(const dnode_phys_t *dnp, co } static void -snprintf_blkptr_compact(char *blkbuf, size_t blklen, blkptr_t *bp) +snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp) { - dva_t *dva = bp->blk_dva; + const dva_t *dva = bp->blk_dva; int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1; - size_t len; - if (dump_opt['b'] >= 5) { - snprintf_blkptr(blkbuf, blklen, bp); + if (dump_opt['b'] >= 6) { + snprintf_blkptr(blkbuf, buflen, bp); return; } - blkbuf[0] = '\0'; + if (BP_IS_EMBEDDED(bp)) { + (void) sprintf(blkbuf, + "EMBEDDED et=%u %llxL/%llxP B=%llu", + (int)BPE_GET_ETYPE(bp), + (u_longlong_t)BPE_GET_LSIZE(bp), + (u_longlong_t)BPE_GET_PSIZE(bp), + (u_longlong_t)bp->blk_birth); + return; + } - len = 0; - for (int i = 0; i < ndvas; i++) { - len += snprintf(blkbuf + len, blklen - len, "%llu:%llx:%llx ", + blkbuf[0] = '\0'; + for (int i = 0; i < ndvas; i++) + (void) snprintf(blkbuf + strlen(blkbuf), + buflen - strlen(blkbuf), "%llu:%llx:%llx ", (u_longlong_t)DVA_GET_VDEV(&dva[i]), (u_longlong_t)DVA_GET_OFFSET(&dva[i]), (u_longlong_t)DVA_GET_ASIZE(&dva[i])); - if (len > blklen) - len = blklen; - } - snprintf(blkbuf + len, blklen - len, - "%llxL/%llxP F=%llu B=%llu/%llu", - (u_longlong_t)BP_GET_LSIZE(bp), - (u_longlong_t)BP_GET_PSIZE(bp), - (u_longlong_t)bp->blk_fill, - (u_longlong_t)bp->blk_birth, - (u_longlong_t)BP_PHYSICAL_BIRTH(bp)); + if (BP_IS_HOLE(bp)) { + (void) snprintf(blkbuf + strlen(blkbuf), + buflen - strlen(blkbuf), + "%llxL B=%llu", + (u_longlong_t)BP_GET_LSIZE(bp), + (u_longlong_t)bp->blk_birth); + } else { + (void) snprintf(blkbuf + strlen(blkbuf), + buflen - strlen(blkbuf), + "%llxL/%llxP F=%llu B=%llu/%llu", + (u_longlong_t)BP_GET_LSIZE(bp), + (u_longlong_t)BP_GET_PSIZE(bp), + (u_longlong_t)BP_GET_FILL(bp), + (u_longlong_t)bp->blk_birth, + (u_longlong_t)BP_PHYSICAL_BIRTH(bp)); + } } static void -print_indirect(blkptr_t *bp, const zbookmark_t *zb, +print_indirect(blkptr_t *bp, const zbookmark_phys_t *zb, const dnode_phys_t *dnp) { char blkbuf[BP_SPRINTF_LEN]; int l; - ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); - ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); + if (!BP_IS_EMBEDDED(bp)) { + ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type); + ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level); + } (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb)); @@ -876,13 +1258,13 @@ print_indirect(blkptr_t *bp, const zbook } } - snprintf_blkptr_compact(blkbuf, sizeof(blkbuf), bp); + snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp); (void) printf("%s\n", blkbuf); } static int visit_indirect(spa_t *spa, const dnode_phys_t *dnp, - blkptr_t *bp, const zbookmark_t *zb) + blkptr_t *bp, const zbookmark_phys_t *zb) { int err = 0; @@ -891,23 +1273,24 @@ visit_indirect(spa_t *spa, const dnode_p print_indirect(bp, zb, dnp); - if (BP_GET_LEVEL(bp) > 0) { - uint32_t flags = ARC_WAIT; + if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) { + arc_flags_t flags = ARC_FLAG_WAIT; int i; blkptr_t *cbp; int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; arc_buf_t *buf; uint64_t fill = 0; - err = arc_read_nolock(NULL, spa, bp, arc_getbuf_func, &buf, + err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); if (err) return (err); + ASSERT(buf->b_data); /* recursively visit blocks below this */ cbp = buf->b_data; for (i = 0; i < epb; i++, cbp++) { - zbookmark_t czb; + zbookmark_phys_t czb; SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, zb->zb_level - 1, @@ -915,11 +1298,11 @@ visit_indirect(spa_t *spa, const dnode_p err = visit_indirect(spa, dnp, cbp, &czb); if (err) break; - fill += cbp->blk_fill; + fill += BP_GET_FILL(cbp); } if (!err) - ASSERT3U(fill, ==, bp->blk_fill); - (void) arc_buf_remove_ref(buf, &buf); + ASSERT3U(fill, ==, BP_GET_FILL(bp)); + arc_buf_destroy(buf, &buf); } return (err); @@ -931,7 +1314,7 @@ dump_indirect(dnode_t *dn) { dnode_phys_t *dnp = dn->dn_phys; int j; - zbookmark_t czb; + zbookmark_phys_t czb; (void) printf("Indirect blocks:\n"); @@ -952,7 +1335,7 @@ dump_dsl_dir(objset_t *os, uint64_t obje { dsl_dir_phys_t *dd = data; time_t crtime; - char nice[6]; + char nice[32]; if (dd == NULL) return; @@ -969,15 +1352,15 @@ dump_dsl_dir(objset_t *os, uint64_t obje (u_longlong_t)dd->dd_origin_obj); (void) printf("\t\tchild_dir_zapobj = %llu\n", (u_longlong_t)dd->dd_child_dir_zapobj); - nicenum(dd->dd_used_bytes, nice, sizeof(nice)); + zdb_nicenum(dd->dd_used_bytes, nice); (void) printf("\t\tused_bytes = %s\n", nice); - nicenum(dd->dd_compressed_bytes, nice, sizeof(nice)); + zdb_nicenum(dd->dd_compressed_bytes, nice); (void) printf("\t\tcompressed_bytes = %s\n", nice); - nicenum(dd->dd_uncompressed_bytes, nice, sizeof(nice)); + zdb_nicenum(dd->dd_uncompressed_bytes, nice); (void) printf("\t\tuncompressed_bytes = %s\n", nice); - nicenum(dd->dd_quota, nice, sizeof(nice)); + zdb_nicenum(dd->dd_quota, nice); (void) printf("\t\tquota = %s\n", nice); - nicenum(dd->dd_reserved, nice, sizeof(nice)); + zdb_nicenum(dd->dd_reserved, nice); (void) printf("\t\treserved = %s\n", nice); (void) printf("\t\tprops_zapobj = %llu\n", (u_longlong_t)dd->dd_props_zapobj); @@ -987,7 +1370,7 @@ dump_dsl_dir(objset_t *os, uint64_t obje (u_longlong_t)dd->dd_flags); #define DO(which) \ - nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice, sizeof(nice)); \ + zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \ (void) printf("\t\tused_breakdown[" #which "] = %s\n", nice) DO(HEAD); DO(SNAP); @@ -1003,7 +1386,7 @@ dump_dsl_dataset(objset_t *os, uint64_t { dsl_dataset_phys_t *ds = data; time_t crtime; - char used[6], compressed[6], uncompressed[6], unique[6]; + char used[32], compressed[32], uncompressed[32], unique[32]; char blkbuf[BP_SPRINTF_LEN]; if (ds == NULL) @@ -1011,11 +1394,11 @@ dump_dsl_dataset(objset_t *os, uint64_t ASSERT(size == sizeof (*ds)); crtime = ds->ds_creation_time; - nicenum(ds->ds_used_bytes, used, sizeof(used)); - nicenum(ds->ds_compressed_bytes, compressed, sizeof(compressed)); - nicenum(ds->ds_uncompressed_bytes, uncompressed, sizeof(uncompressed)); - nicenum(ds->ds_unique_bytes, unique, sizeof(unique)); - snprintf_blkptr(blkbuf, sizeof(blkbuf), &ds->ds_bp); + zdb_nicenum(ds->ds_referenced_bytes, used); + zdb_nicenum(ds->ds_compressed_bytes, compressed); + zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed); + zdb_nicenum(ds->ds_unique_bytes, unique); + snprintf_blkptr(blkbuf, sizeof (blkbuf), &ds->ds_bp); (void) printf("\t\tdir_obj = %llu\n", (u_longlong_t)ds->ds_dir_obj); @@ -1053,63 +1436,166 @@ dump_dsl_dataset(objset_t *os, uint64_t (void) printf("\t\tbp = %s\n", blkbuf); } +/* ARGSUSED */ +static int +dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) +{ + char blkbuf[BP_SPRINTF_LEN]; + + if (bp->blk_birth != 0) { + snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); + (void) printf("\t%s\n", blkbuf); + } + return (0); +} + static void -dump_bplist(objset_t *mos, uint64_t object, char *name) +dump_bptree(objset_t *os, uint64_t obj, char *name) { - bplist_t bpl = { 0 }; - blkptr_t blk, *bp = &blk; - uint64_t itor = 0; - char bytes[6]; - char comp[6]; - char uncomp[6]; + char bytes[32]; + bptree_phys_t *bt; + dmu_buf_t *db; if (dump_opt['d'] < 3) return; - bplist_init(&bpl); - VERIFY(0 == bplist_open(&bpl, mos, object)); - if (bplist_empty(&bpl)) { - bplist_close(&bpl); - bplist_fini(&bpl); + VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db)); + bt = db->db_data; + zdb_nicenum(bt->bt_bytes, bytes); + (void) printf("\n %s: %llu datasets, %s\n", + name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes); + dmu_buf_rele(db, FTAG); + + if (dump_opt['d'] < 5) return; - } - nicenum(bpl.bpl_phys->bpl_bytes, bytes, sizeof(bytes)); - if (bpl.bpl_dbuf->db_size == sizeof (bplist_phys_t)) { - nicenum(bpl.bpl_phys->bpl_comp, comp, sizeof(comp)); - nicenum(bpl.bpl_phys->bpl_uncomp, uncomp, sizeof(uncomp)); - (void) printf("\n %s: %llu entries, %s (%s/%s comp)\n", - name, (u_longlong_t)bpl.bpl_phys->bpl_entries, + (void) printf("\n"); + + (void) bptree_iterate(os, obj, B_FALSE, dump_bptree_cb, NULL, NULL); +} + +/* ARGSUSED */ +static int +dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) +{ + char blkbuf[BP_SPRINTF_LEN]; + + ASSERT(bp->blk_birth != 0); + snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp); + (void) printf("\t%s\n", blkbuf); + return (0); +} + +static void +dump_full_bpobj(bpobj_t *bpo, char *name, int indent) +{ + char bytes[32]; + char comp[32]; + char uncomp[32]; + + if (dump_opt['d'] < 3) + return; + + zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes); + if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) { + zdb_nicenum(bpo->bpo_phys->bpo_comp, comp); + zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp); + (void) printf(" %*s: object %llu, %llu local blkptrs, " + "%llu subobjs in object %llu, %s (%s/%s comp)\n", + indent * 8, name, + (u_longlong_t)bpo->bpo_object, + (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, + (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs, + (u_longlong_t)bpo->bpo_phys->bpo_subobjs, bytes, comp, uncomp); + + for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) { + uint64_t subobj; + bpobj_t subbpo; + int error; + VERIFY0(dmu_read(bpo->bpo_os, + bpo->bpo_phys->bpo_subobjs, + i * sizeof (subobj), sizeof (subobj), &subobj, 0)); + error = bpobj_open(&subbpo, bpo->bpo_os, subobj); + if (error != 0) { + (void) printf("ERROR %u while trying to open " + "subobj id %llu\n", + error, (u_longlong_t)subobj); + continue; + } + dump_full_bpobj(&subbpo, "subobj", indent + 1); + bpobj_close(&subbpo); + } } else { - (void) printf("\n %s: %llu entries, %s\n", - name, (u_longlong_t)bpl.bpl_phys->bpl_entries, bytes); + (void) printf(" %*s: object %llu, %llu blkptrs, %s\n", + indent * 8, name, + (u_longlong_t)bpo->bpo_object, + (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, + bytes); } - if (dump_opt['d'] < 5) { - bplist_close(&bpl); - bplist_fini(&bpl); + if (dump_opt['d'] < 5) return; + + + if (indent == 0) { + (void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL); + (void) printf("\n"); } +} - (void) printf("\n"); +static void +dump_deadlist(dsl_deadlist_t *dl) +{ + dsl_deadlist_entry_t *dle; + uint64_t unused; + char bytes[32]; + char comp[32]; + char uncomp[32]; - while (bplist_iterate(&bpl, &itor, bp) == 0) { - char blkbuf[BP_SPRINTF_LEN]; + if (dump_opt['d'] < 3) + return; - ASSERT(bp->blk_birth != 0); - snprintf_blkptr_compact(blkbuf, sizeof(blkbuf), bp); - (void) printf("\tItem %3llu: %s\n", - (u_longlong_t)itor - 1, blkbuf); + if (dl->dl_oldfmt) { + dump_full_bpobj(&dl->dl_bpobj, "old-format deadlist", 0); + return; } - bplist_close(&bpl); - bplist_fini(&bpl); + zdb_nicenum(dl->dl_phys->dl_used, bytes); + zdb_nicenum(dl->dl_phys->dl_comp, comp); + zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp); + (void) printf("\n Deadlist: %s (%s/%s comp)\n", + bytes, comp, uncomp); + + if (dump_opt['d'] < 4) + return; + + (void) printf("\n"); + + /* force the tree to be loaded */ + dsl_deadlist_space_range(dl, 0, UINT64_MAX, &unused, &unused, &unused); + + for (dle = avl_first(&dl->dl_tree); dle; + dle = AVL_NEXT(&dl->dl_tree, dle)) { + if (dump_opt['d'] >= 5) { + char buf[128]; + (void) snprintf(buf, sizeof (buf), "mintxg %llu -> " + "obj %llu", (longlong_t)dle->dle_mintxg, + (longlong_t)dle->dle_bpobj.bpo_object); + dump_full_bpobj(&dle->dle_bpobj, buf, 0); + } else { + (void) printf("mintxg %llu -> obj %llu\n", + (longlong_t)dle->dle_mintxg, + (longlong_t)dle->dle_bpobj.bpo_object); + } + } } static avl_tree_t idx_tree; static avl_tree_t domain_tree; static boolean_t fuid_table_loaded; +static boolean_t sa_loaded; +sa_attr_type_t *sa_attr_table; static void fuid_table_destroy() @@ -1124,7 +1610,7 @@ fuid_table_destroy() * print uid or gid information. * For normal POSIX id just the id is printed in decimal format. * For CIFS files with FUID the fuid is printed in hex followed by - * the doman-rid string. + * the domain-rid string. */ static void print_idstr(uint64_t id, const char *id_type) @@ -1142,12 +1628,12 @@ print_idstr(uint64_t id, const char *id_ } static void -dump_uidgid(objset_t *os, znode_phys_t *zp) +dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid) { uint32_t uid_idx, gid_idx; - uid_idx = FUID_INDEX(zp->zp_uid); - gid_idx = FUID_INDEX(zp->zp_gid); + uid_idx = FUID_INDEX(uid); + gid_idx = FUID_INDEX(gid); /* Load domain table, if not already loaded */ if (!fuid_table_loaded && (uid_idx || gid_idx)) { @@ -1162,50 +1648,111 @@ dump_uidgid(objset_t *os, znode_phys_t * fuid_table_loaded = B_TRUE; } - print_idstr(zp->zp_uid, "uid"); - print_idstr(zp->zp_gid, "gid"); + print_idstr(uid, "uid"); + print_idstr(gid, "gid"); } /*ARGSUSED*/ static void dump_znode(objset_t *os, uint64_t object, void *data, size_t size) { - znode_phys_t *zp = data; - time_t z_crtime, z_atime, z_mtime, z_ctime; char path[MAXPATHLEN * 2]; /* allow for xattr and failure prefix */ + sa_handle_t *hdl; + uint64_t xattr, rdev, gen; + uint64_t uid, gid, mode, fsize, parent, links; + uint64_t pflags; + uint64_t acctm[2], modtm[2], chgtm[2], crtm[2]; + time_t z_crtime, z_atime, z_mtime, z_ctime; + sa_bulk_attr_t bulk[12]; + int idx = 0; int error; - ASSERT(size >= sizeof (znode_phys_t)); + if (!sa_loaded) { + uint64_t sa_attrs = 0; + uint64_t version; + + VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, + 8, 1, &version) == 0); + if (version >= ZPL_VERSION_SA) { + VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, + 8, 1, &sa_attrs) == 0); + } + if ((error = sa_setup(os, sa_attrs, zfs_attr_table, + ZPL_END, &sa_attr_table)) != 0) { + (void) printf("sa_setup failed errno %d, can't " + "display znode contents\n", error); + return; + } + sa_loaded = B_TRUE; + } + + if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) { + (void) printf("Failed to get handle for SA znode\n"); + return; + } + + SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8); + SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8); + SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL, + &links, 8); + SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8); + SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL, + &mode, 8); + SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT], + NULL, &parent, 8); + SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL, + &fsize, 8); + SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL, + acctm, 16); + SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL, + modtm, 16); + SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL, + crtm, 16); + SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL, + chgtm, 16); + SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL, + &pflags, 8); + + if (sa_bulk_lookup(hdl, bulk, idx)) { + (void) sa_handle_destroy(hdl); + return; + } error = zfs_obj_to_path(os, object, path, sizeof (path)); if (error != 0) { (void) snprintf(path, sizeof (path), "\?\?\?", (u_longlong_t)object); } - if (dump_opt['d'] < 3) { (void) printf("\t%s\n", path); + (void) sa_handle_destroy(hdl); return; } - z_crtime = (time_t)zp->zp_crtime[0]; - z_atime = (time_t)zp->zp_atime[0]; - z_mtime = (time_t)zp->zp_mtime[0]; - z_ctime = (time_t)zp->zp_ctime[0]; + z_crtime = (time_t)crtm[0]; + z_atime = (time_t)acctm[0]; + z_mtime = (time_t)modtm[0]; + z_ctime = (time_t)chgtm[0]; (void) printf("\tpath %s\n", path); - dump_uidgid(os, zp); + dump_uidgid(os, uid, gid); (void) printf("\tatime %s", ctime(&z_atime)); (void) printf("\tmtime %s", ctime(&z_mtime)); (void) printf("\tctime %s", ctime(&z_ctime)); (void) printf("\tcrtime %s", ctime(&z_crtime)); - (void) printf("\tgen %llu\n", (u_longlong_t)zp->zp_gen); - (void) printf("\tmode %llo\n", (u_longlong_t)zp->zp_mode); - (void) printf("\tsize %llu\n", (u_longlong_t)zp->zp_size); - (void) printf("\tparent %llu\n", (u_longlong_t)zp->zp_parent); - (void) printf("\tlinks %llu\n", (u_longlong_t)zp->zp_links); - (void) printf("\txattr %llu\n", (u_longlong_t)zp->zp_xattr); - (void) printf("\trdev 0x%016llx\n", (u_longlong_t)zp->zp_rdev); + (void) printf("\tgen %llu\n", (u_longlong_t)gen); + (void) printf("\tmode %llo\n", (u_longlong_t)mode); + (void) printf("\tsize %llu\n", (u_longlong_t)fsize); + (void) printf("\tparent %llu\n", (u_longlong_t)parent); + (void) printf("\tlinks %llu\n", (u_longlong_t)links); + (void) printf("\tpflags %llx\n", (u_longlong_t)pflags); + if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr, + sizeof (uint64_t)) == 0) + (void) printf("\txattr %llu\n", (u_longlong_t)xattr); + if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev, + sizeof (uint64_t)) == 0) + (void) printf("\trdev 0x%016llx\n", (u_longlong_t)rdev); + sa_handle_destroy(hdl); } /*ARGSUSED*/ @@ -1226,8 +1773,8 @@ static object_viewer_t *object_viewer[DM dump_uint64, /* object array */ dump_none, /* packed nvlist */ dump_packed_nvlist, /* packed nvlist size */ - dump_none, /* bplist */ - dump_none, /* bplist header */ + dump_none, /* bpobj */ + dump_bpobj, /* bpobj header */ dump_none, /* SPA space map header */ dump_none, /* SPA space map */ dump_none, /* ZIL intent log */ @@ -1251,7 +1798,7 @@ static object_viewer_t *object_viewer[DM dump_zap, /* other ZAP */ dump_zap, /* persistent error log */ dump_uint8, /* SPA history */ - dump_uint64, /* SPA history offsets */ + dump_history_offsets, /* SPA history offsets */ dump_zap, /* Pool properties */ dump_zap, /* DSL permissions */ dump_acl, /* ZFS ACL */ @@ -1265,7 +1812,17 @@ static object_viewer_t *object_viewer[DM dump_zap, /* snapshot refcount tags */ dump_ddt_zap, /* DDT ZAP object */ dump_zap, /* DDT statistics */ - dump_unknown /* Unknown type, must be last */ + dump_znode, /* SA object */ + dump_zap, /* SA Master Node */ + dump_sa_attrs, /* SA attribute registration */ + dump_sa_layouts, /* SA attribute layouts */ + dump_zap, /* DSL scrub translations */ + dump_none, /* fake dedup BP */ + dump_zap, /* deadlist */ + dump_none, /* deadlist hdr */ + dump_zap, /* dsl clones */ + dump_bpobj_subobjs, /* bpobj subobjs */ + dump_unknown, /* Unknown type, must be last */ }; static void @@ -1276,7 +1833,8 @@ dump_object(objset_t *os, uint64_t objec dnode_t *dn; void *bonus = NULL; size_t bsize = 0; - char iblk[6], dblk[6], lsize[6], asize[6], bonus_size[6], fill[7]; + char iblk[32], dblk[32], lsize[32], asize[32], fill[32]; + char bonus_size[32]; char aux[50]; int error; @@ -1288,7 +1846,7 @@ dump_object(objset_t *os, uint64_t objec } if (object == 0) { - dn = os->os_meta_dnode; + dn = DMU_META_DNODE(os); } else { error = dmu_bonus_hold(os, object, FTAG, &db); if (error) @@ -1296,16 +1854,16 @@ dump_object(objset_t *os, uint64_t objec object, error); bonus = db->db_data; bsize = db->db_size; - dn = ((dmu_buf_impl_t *)db)->db_dnode; + dn = DB_DNODE((dmu_buf_impl_t *)db); } dmu_object_info_from_dnode(dn, &doi); - nicenum(doi.doi_metadata_block_size, iblk, sizeof(iblk)); - nicenum(doi.doi_data_block_size, dblk, sizeof(dblk)); - nicenum(doi.doi_max_offset, lsize, sizeof(lsize)); - nicenum(doi.doi_physical_blocks_512 << 9, asize, sizeof(asize)); - nicenum(doi.doi_bonus_size, bonus_size, sizeof(bonus_size)); - (void) snprintf(fill, sizeof(fill), "%6.2f", 100.0 * doi.doi_fill_count * + zdb_nicenum(doi.doi_metadata_block_size, iblk); + zdb_nicenum(doi.doi_data_block_size, dblk); + zdb_nicenum(doi.doi_max_offset, lsize); + zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize); + zdb_nicenum(doi.doi_bonus_size, bonus_size); + (void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count * doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) / doi.doi_max_offset); @@ -1332,11 +1890,13 @@ dump_object(objset_t *os, uint64_t objec } if (verbosity >= 4) { - (void) printf("\tdnode flags: %s%s\n", + (void) printf("\tdnode flags: %s%s%s\n", (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ? "USED_BYTES " : "", (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ? - "USERUSED_ACCOUNTED " : ""); + "USERUSED_ACCOUNTED " : "", + (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ? + "SPILL_BLKPTR" : ""); (void) printf("\tdnode maxblkid: %llu\n", (longlong_t)dn->dn_phys->dn_maxblkid); @@ -1364,7 +1924,7 @@ dump_object(objset_t *os, uint64_t objec } for (;;) { - char segsize[6]; + char segsize[32]; error = dnode_next_offset(dn, 0, &start, minlvl, blkfill, 0); if (error) @@ -1372,7 +1932,7 @@ dump_object(objset_t *os, uint64_t objec end = start; error = dnode_next_offset(dn, DNODE_FIND_HOLE, &end, minlvl, blkfill, 0); - nicenum(end - start, segsize, sizeof(segsize)); + zdb_nicenum(end - start, segsize); (void) printf("\t\tsegment [%016llx, %016llx)" " size %5s\n", (u_longlong_t)start, (u_longlong_t)end, segsize); @@ -1395,39 +1955,38 @@ dump_dir(objset_t *os) dmu_objset_stats_t dds; uint64_t object, object_count; uint64_t refdbytes, usedobjs, scratch; - char numbuf[8]; + char numbuf[32]; char blkbuf[BP_SPRINTF_LEN + 20]; - char osname[MAXNAMELEN]; + char osname[ZFS_MAX_DATASET_NAME_LEN]; char *type = "UNKNOWN"; int verbosity = dump_opt['d']; int print_header = 1; int i, error; - size_t len; + dsl_pool_config_enter(dmu_objset_pool(os), FTAG); dmu_objset_fast_stat(os, &dds); + dsl_pool_config_exit(dmu_objset_pool(os), FTAG); if (dds.dds_type < DMU_OST_NUMTYPES) type = objset_types[dds.dds_type]; if (dds.dds_type == DMU_OST_META) { dds.dds_creation_txg = TXG_INITIAL; - usedobjs = os->os_rootbp->blk_fill; - refdbytes = os->os_spa->spa_dsl_pool-> - dp_mos_dir->dd_phys->dd_used_bytes; + usedobjs = BP_GET_FILL(os->os_rootbp); + refdbytes = dsl_dir_phys(os->os_spa->spa_dsl_pool->dp_mos_dir)-> + dd_used_bytes; } else { dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch); } - ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill); + ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp)); - nicenum(refdbytes, numbuf, sizeof(numbuf)); + zdb_nicenum(refdbytes, numbuf); if (verbosity >= 4) { - size_t blklen = sizeof(blkbuf); - len = snprintf(blkbuf, blklen, ", rootbp "); - if (len > blklen) - len = blklen; - snprintf_blkptr(blkbuf + len, blklen - len, os->os_rootbp); + (void) snprintf(blkbuf, sizeof (blkbuf), ", rootbp "); + (void) snprintf_blkptr(blkbuf + strlen(blkbuf), + sizeof (blkbuf) - strlen(blkbuf), os->os_rootbp); } else { blkbuf[0] = '\0'; } @@ -1452,19 +2011,18 @@ dump_dir(objset_t *os) dump_intent_log(dmu_objset_zil(os)); if (dmu_objset_ds(os) != NULL) - dump_bplist(dmu_objset_pool(os)->dp_meta_objset, - dmu_objset_ds(os)->ds_phys->ds_deadlist_obj, "Deadlist"); + dump_deadlist(&dmu_objset_ds(os)->ds_deadlist); if (verbosity < 2) return; - if (os->os_rootbp->blk_birth == 0) + if (BP_IS_HOLE(os->os_rootbp)) return; dump_object(os, 0, verbosity, &print_header); object_count = 0; - if (os->os_userused_dnode && - os->os_userused_dnode->dn_type != 0) { + if (DMU_USERUSED_DNODE(os) != NULL && + DMU_USERUSED_DNODE(os)->dn_type != 0) { dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header); dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header); } @@ -1499,7 +2057,7 @@ dump_uberblock(uberblock_t *ub, const ch (u_longlong_t)ub->ub_timestamp, asctime(localtime(×tamp))); if (dump_opt['u'] >= 3) { char blkbuf[BP_SPRINTF_LEN]; - snprintf_blkptr(blkbuf, sizeof(blkbuf), &ub->ub_rootbp); + snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp); (void) printf("\trootbp = %s\n", blkbuf); } (void) printf(footer ? footer : ""); @@ -1538,13 +2096,13 @@ dump_cachefile(const char *cachefile) nvlist_t *config; if ((fd = open64(cachefile, O_RDONLY)) < 0) { - (void) printf("cannot open '%s': %s\n", cachefile, + (void) fprintf(stderr, "cannot open '%s': %s\n", cachefile, strerror(errno)); exit(1); } if (fstat64(fd, &statbuf) != 0) { - (void) printf("failed to stat '%s': %s\n", cachefile, + (void) fprintf(stderr, "failed to stat '%s': %s\n", cachefile, strerror(errno)); exit(1); } @@ -1604,19 +2162,41 @@ dump_label(const char *dev) { int fd; vdev_label_t label; - char *buf = label.vl_vdev_phys.vp_nvlist; + char *path, *buf = label.vl_vdev_phys.vp_nvlist; size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist); struct stat64 statbuf; uint64_t psize, ashift; + int len = strlen(dev) + 1; + + if (strncmp(dev, ZFS_DISK_ROOTD, strlen(ZFS_DISK_ROOTD)) == 0) { + len++; + path = malloc(len); + (void) snprintf(path, len, "%s%s", ZFS_RDISK_ROOTD, + dev + strlen(ZFS_DISK_ROOTD)); + } else { + path = strdup(dev); + } - if ((fd = open64(dev, O_RDONLY)) < 0) { - (void) printf("cannot open '%s': %s\n", dev, strerror(errno)); + if ((fd = open64(path, O_RDONLY)) < 0) { + (void) printf("cannot open '%s': %s\n", path, strerror(errno)); + free(path); exit(1); } if (fstat64(fd, &statbuf) != 0) { - (void) printf("failed to stat '%s': %s\n", dev, + (void) printf("failed to stat '%s': %s\n", path, strerror(errno)); + free(path); + (void) close(fd); + exit(1); + } + + if (S_ISBLK(statbuf.st_mode)) { + (void) printf("cannot use '%s': character device required\n", + path); + free(path); + (void) close(fd); + exit(1); } psize = statbuf.st_size; @@ -1652,8 +2232,13 @@ dump_label(const char *dev) if (dump_opt['u']) dump_label_uberblocks(&label, ashift); } + + free(path); + (void) close(fd); } +static uint64_t dataset_feature_count[SPA_FEATURES]; + /*ARGSUSED*/ static int dump_one_dir(const char *dsname, void *arg) @@ -1666,20 +2251,34 @@ dump_one_dir(const char *dsname, void *a (void) printf("Could not open %s, error %d\n", dsname, error); return (0); } + + for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { + if (!dmu_objset_ds(os)->ds_feature_inuse[f]) + continue; + ASSERT(spa_feature_table[f].fi_flags & + ZFEATURE_FLAG_PER_DATASET); + dataset_feature_count[f]++; + } + dump_dir(os); dmu_objset_disown(os, FTAG); fuid_table_destroy(); + sa_loaded = B_FALSE; return (0); } /* * Block statistics. */ +#define PSIZE_HISTO_SIZE (SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 2) typedef struct zdb_blkstats { - uint64_t zb_asize; - uint64_t zb_lsize; - uint64_t zb_psize; - uint64_t zb_count; + uint64_t zb_asize; + uint64_t zb_lsize; + uint64_t zb_psize; + uint64_t zb_count; + uint64_t zb_gangs; + uint64_t zb_ditto_samevdev; + uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE]; } zdb_blkstats_t; /* @@ -1687,11 +2286,13 @@ typedef struct zdb_blkstats { */ #define ZDB_OT_DEFERRED (DMU_OT_NUMTYPES + 0) #define ZDB_OT_DITTO (DMU_OT_NUMTYPES + 1) -#define ZDB_OT_TOTAL (DMU_OT_NUMTYPES + 2) +#define ZDB_OT_OTHER (DMU_OT_NUMTYPES + 2) +#define ZDB_OT_TOTAL (DMU_OT_NUMTYPES + 3) static char *zdb_ot_extname[] = { "deferred free", "dedup ditto", + "other", "Total", }; @@ -1701,13 +2302,20 @@ typedef struct zdb_cb { zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1]; uint64_t zcb_dedup_asize; uint64_t zcb_dedup_blocks; + uint64_t zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES]; + uint64_t zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES] + [BPE_PAYLOAD_SIZE]; + uint64_t zcb_start; + uint64_t zcb_lastprint; + uint64_t zcb_totalasize; uint64_t zcb_errors[256]; int zcb_readfails; int zcb_haderrors; + spa_t *zcb_spa; } zdb_cb_t; static void -zdb_count_block(spa_t *spa, zilog_t *zilog, zdb_cb_t *zcb, const blkptr_t *bp, +zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp, dmu_object_type_t type) { uint64_t refcnt = 0; @@ -1720,12 +2328,50 @@ zdb_count_block(spa_t *spa, zilog_t *zil for (int i = 0; i < 4; i++) { int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL; int t = (i & 1) ? type : ZDB_OT_TOTAL; + int equal; zdb_blkstats_t *zb = &zcb->zcb_type[l][t]; zb->zb_asize += BP_GET_ASIZE(bp); zb->zb_lsize += BP_GET_LSIZE(bp); zb->zb_psize += BP_GET_PSIZE(bp); zb->zb_count++; + + /* + * The histogram is only big enough to record blocks up to + * SPA_OLD_MAXBLOCKSIZE; larger blocks go into the last, + * "other", bucket. + */ + int idx = BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT; + idx = MIN(idx, SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1); + zb->zb_psize_histogram[idx]++; + + zb->zb_gangs += BP_COUNT_GANG(bp); + + switch (BP_GET_NDVAS(bp)) { + case 2: + if (DVA_GET_VDEV(&bp->blk_dva[0]) == + DVA_GET_VDEV(&bp->blk_dva[1])) + zb->zb_ditto_samevdev++; + break; + case 3: + equal = (DVA_GET_VDEV(&bp->blk_dva[0]) == + DVA_GET_VDEV(&bp->blk_dva[1])) + + (DVA_GET_VDEV(&bp->blk_dva[0]) == + DVA_GET_VDEV(&bp->blk_dva[2])) + + (DVA_GET_VDEV(&bp->blk_dva[1]) == + DVA_GET_VDEV(&bp->blk_dva[2])); + if (equal != 0) + zb->zb_ditto_samevdev++; + break; + } + + } + + if (BP_IS_EMBEDDED(bp)) { + zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++; + zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)] + [BPE_GET_PSIZE(bp)]++; + return; } if (dump_opt['L']) @@ -1735,7 +2381,7 @@ zdb_count_block(spa_t *spa, zilog_t *zil ddt_t *ddt; ddt_entry_t *dde; - ddt = ddt_select(spa, bp); + ddt = ddt_select(zcb->zcb_spa, bp); ddt_enter(ddt); dde = ddt_lookup(ddt, bp, B_FALSE); @@ -1751,115 +2397,149 @@ zdb_count_block(spa_t *spa, zilog_t *zil ddt_exit(ddt); } - VERIFY3U(zio_wait(zio_claim(NULL, spa, - refcnt ? 0 : spa_first_txg(spa), + VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa, + refcnt ? 0 : spa_first_txg(zcb->zcb_spa), bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0); } +/* ARGSUSED */ +static void +zdb_blkptr_done(zio_t *zio) +{ + spa_t *spa = zio->io_spa; + blkptr_t *bp = zio->io_bp; + int ioerr = zio->io_error; + zdb_cb_t *zcb = zio->io_private; + zbookmark_phys_t *zb = &zio->io_bookmark; + + zio_data_buf_free(zio->io_data, zio->io_size); + + mutex_enter(&spa->spa_scrub_lock); + spa->spa_scrub_inflight--; + cv_broadcast(&spa->spa_scrub_io_cv); + + if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { + char blkbuf[BP_SPRINTF_LEN]; + + zcb->zcb_haderrors = 1; + zcb->zcb_errors[ioerr]++; + + if (dump_opt['b'] >= 2) + snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); + else + blkbuf[0] = '\0'; + + (void) printf("zdb_blkptr_cb: " + "Got error %d reading " + "<%llu, %llu, %lld, %llx> %s -- skipping\n", + ioerr, + (u_longlong_t)zb->zb_objset, + (u_longlong_t)zb->zb_object, + (u_longlong_t)zb->zb_level, + (u_longlong_t)zb->zb_blkid, + blkbuf); + } + mutex_exit(&spa->spa_scrub_lock); +} + +/* ARGSUSED */ static int zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, - const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) + const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) { zdb_cb_t *zcb = arg; - char blkbuf[BP_SPRINTF_LEN]; dmu_object_type_t type; boolean_t is_metadata; if (bp == NULL) return (0); + if (dump_opt['b'] >= 5 && bp->blk_birth > 0) { + char blkbuf[BP_SPRINTF_LEN]; + snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); + (void) printf("objset %llu object %llu " + "level %lld offset 0x%llx %s\n", + (u_longlong_t)zb->zb_objset, + (u_longlong_t)zb->zb_object, + (longlong_t)zb->zb_level, + (u_longlong_t)blkid2offset(dnp, bp, zb), + blkbuf); + } + + if (BP_IS_HOLE(bp)) + return (0); + type = BP_GET_TYPE(bp); - zdb_count_block(spa, zilog, zcb, bp, type); + zdb_count_block(zcb, zilog, bp, + (type & DMU_OT_NEWTYPE) ? ZDB_OT_OTHER : type); - is_metadata = (BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata); + is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)); - if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) { - int ioerr; + if (!BP_IS_EMBEDDED(bp) && + (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) { size_t size = BP_GET_PSIZE(bp); - void *data = malloc(size); + void *data = zio_data_buf_alloc(size); int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW; /* If it's an intent log block, failure is expected. */ if (zb->zb_level == ZB_ZIL_LEVEL) flags |= ZIO_FLAG_SPECULATIVE; - ioerr = zio_wait(zio_read(NULL, spa, bp, data, size, - NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb)); - - free(data); - - if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) { - zcb->zcb_haderrors = 1; - zcb->zcb_errors[ioerr]++; - - if (dump_opt['b'] >= 2) - snprintf_blkptr(blkbuf, sizeof(blkbuf), bp); - else - blkbuf[0] = '\0'; + mutex_enter(&spa->spa_scrub_lock); + while (spa->spa_scrub_inflight > max_inflight) + cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); + spa->spa_scrub_inflight++; + mutex_exit(&spa->spa_scrub_lock); - (void) printf("zdb_blkptr_cb: " - "Got error %d reading " - "<%llu, %llu, %lld, %llx> %s -- skipping\n", - ioerr, - (u_longlong_t)zb->zb_objset, - (u_longlong_t)zb->zb_object, - (u_longlong_t)zb->zb_level, - (u_longlong_t)zb->zb_blkid, - blkbuf); - } + zio_nowait(zio_read(NULL, spa, bp, data, size, + zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb)); } zcb->zcb_readfails = 0; - if (dump_opt['b'] >= 4) { - snprintf_blkptr(blkbuf, sizeof(blkbuf), bp); - (void) printf("objset %llu object %llu " - "level %lld offset 0x%llx %s\n", - (u_longlong_t)zb->zb_objset, - (u_longlong_t)zb->zb_object, - (longlong_t)zb->zb_level, - (u_longlong_t)blkid2offset(dnp, bp, zb), - blkbuf); + /* only call gethrtime() every 100 blocks */ + static int iters; + if (++iters > 100) + iters = 0; + else + return (0); + + if (dump_opt['b'] < 5 && gethrtime() > zcb->zcb_lastprint + NANOSEC) { + uint64_t now = gethrtime(); + char buf[10]; + uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize; + int kb_per_sec = + 1 + bytes / (1 + ((now - zcb->zcb_start) / 1000 / 1000)); + int sec_remaining = + (zcb->zcb_totalasize - bytes) / 1024 / kb_per_sec; + + zfs_nicenum(bytes, buf, sizeof (buf)); + (void) fprintf(stderr, + "\r%5s completed (%4dMB/s) " + "estimated time remaining: %uhr %02umin %02usec ", + buf, kb_per_sec / 1024, + sec_remaining / 60 / 60, + sec_remaining / 60 % 60, + sec_remaining % 60); + + zcb->zcb_lastprint = now; } return (0); } static void -zdb_leak(space_map_t *sm, uint64_t start, uint64_t size) +zdb_leak(void *arg, uint64_t start, uint64_t size) { - vdev_t *vd = sm->sm_ppd; + vdev_t *vd = arg; (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n", (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size); } -/* ARGSUSED */ -static void -zdb_space_map_load(space_map_t *sm) -{ -} - -static void -zdb_space_map_unload(space_map_t *sm) -{ - space_map_vacate(sm, zdb_leak, sm); -} - -/* ARGSUSED */ -static void -zdb_space_map_claim(space_map_t *sm, uint64_t start, uint64_t size) -{ -} - -static space_map_ops_t zdb_space_map_ops = { - zdb_space_map_load, - zdb_space_map_unload, - NULL, /* alloc */ - zdb_space_map_claim, - NULL, /* free */ - NULL /* maxsize */ +static metaslab_ops_t zdb_metaslab_ops = { + NULL /* alloc */ }; static void @@ -1884,8 +2564,7 @@ zdb_ddt_leak_init(spa_t *spa, zdb_cb_t * ddt_bp_create(ddb.ddb_checksum, &dde.dde_key, ddp, &blk); if (p == DDT_PHYS_DITTO) { - zdb_count_block(spa, NULL, zcb, &blk, - ZDB_OT_DITTO); + zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO); } else { zcb->zcb_dedup_asize += BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1); @@ -1906,21 +2585,63 @@ zdb_ddt_leak_init(spa_t *spa, zdb_cb_t * static void zdb_leak_init(spa_t *spa, zdb_cb_t *zcb) { + zcb->zcb_spa = spa; + if (!dump_opt['L']) { vdev_t *rvd = spa->spa_root_vdev; - for (int c = 0; c < rvd->vdev_children; c++) { + + /* + * We are going to be changing the meaning of the metaslab's + * ms_tree. Ensure that the allocator doesn't try to + * use the tree. + */ + spa->spa_normal_class->mc_ops = &zdb_metaslab_ops; + spa->spa_log_class->mc_ops = &zdb_metaslab_ops; + + for (uint64_t c = 0; c < rvd->vdev_children; c++) { vdev_t *vd = rvd->vdev_child[c]; - for (int m = 0; m < vd->vdev_ms_count; m++) { + metaslab_group_t *mg = vd->vdev_mg; + for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { metaslab_t *msp = vd->vdev_ms[m]; + ASSERT3P(msp->ms_group, ==, mg); mutex_enter(&msp->ms_lock); - space_map_unload(&msp->ms_map); - VERIFY(space_map_load(&msp->ms_map, - &zdb_space_map_ops, SM_ALLOC, &msp->ms_smo, - spa->spa_meta_objset) == 0); - msp->ms_map.sm_ppd = vd; + metaslab_unload(msp); + + /* + * For leak detection, we overload the metaslab + * ms_tree to contain allocated segments + * instead of free segments. As a result, + * we can't use the normal metaslab_load/unload + * interfaces. + */ + if (msp->ms_sm != NULL) { + (void) fprintf(stderr, + "\rloading space map for " + "vdev %llu of %llu, " + "metaslab %llu of %llu ...", + (longlong_t)c, + (longlong_t)rvd->vdev_children, + (longlong_t)m, + (longlong_t)vd->vdev_ms_count); + + /* + * We don't want to spend the CPU + * manipulating the size-ordered + * tree, so clear the range_tree + * ops. + */ + msp->ms_tree->rt_ops = NULL; + VERIFY0(space_map_load(msp->ms_sm, + msp->ms_tree, SM_ALLOC)); + + if (!msp->ms_loaded) { + msp->ms_loaded = B_TRUE; + } + } mutex_exit(&msp->ms_lock); } } + (void) fprintf(stderr, "\n"); } spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); @@ -1937,16 +2658,50 @@ zdb_leak_fini(spa_t *spa) vdev_t *rvd = spa->spa_root_vdev; for (int c = 0; c < rvd->vdev_children; c++) { vdev_t *vd = rvd->vdev_child[c]; + metaslab_group_t *mg = vd->vdev_mg; for (int m = 0; m < vd->vdev_ms_count; m++) { metaslab_t *msp = vd->vdev_ms[m]; + ASSERT3P(mg, ==, msp->ms_group); mutex_enter(&msp->ms_lock); - space_map_unload(&msp->ms_map); + + /* + * The ms_tree has been overloaded to + * contain allocated segments. Now that we + * finished traversing all blocks, any + * block that remains in the ms_tree + * represents an allocated block that we + * did not claim during the traversal. + * Claimed blocks would have been removed + * from the ms_tree. + */ + range_tree_vacate(msp->ms_tree, zdb_leak, vd); + + if (msp->ms_loaded) { + msp->ms_loaded = B_FALSE; + } + mutex_exit(&msp->ms_lock); } } } } +/* ARGSUSED */ +static int +count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) +{ + zdb_cb_t *zcb = arg; + + if (dump_opt['b'] >= 5) { + char blkbuf[BP_SPRINTF_LEN]; + snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); + (void) printf("[%s] %s\n", + "deferred free", blkbuf); + } + zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED); + return (0); +} + static int dump_block_stats(spa_t *spa) { @@ -1954,9 +2709,9 @@ dump_block_stats(spa_t *spa) zdb_blkstats_t *zb, *tzb; uint64_t norm_alloc, norm_space, total_alloc, total_found; int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD; - int leaks = 0; + boolean_t leaks = B_FALSE; - (void) printf("\nTraversing all blocks %s%s%s%s%s...\n", + (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n", (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "", (dump_opt['c'] == 1) ? "metadata " : "", dump_opt['c'] ? "checksums " : "", @@ -1976,32 +2731,39 @@ dump_block_stats(spa_t *spa) /* * If there's a deferred-free bplist, process that first. */ - if (spa->spa_deferred_bplist_obj != 0) { - bplist_t *bpl = &spa->spa_deferred_bplist; - blkptr_t blk; - uint64_t itor = 0; - - VERIFY(0 == bplist_open(bpl, spa->spa_meta_objset, - spa->spa_deferred_bplist_obj)); - - while (bplist_iterate(bpl, &itor, &blk) == 0) { - if (dump_opt['b'] >= 4) { - char blkbuf[BP_SPRINTF_LEN]; - snprintf_blkptr(blkbuf, sizeof(blkbuf), &blk); - (void) printf("[%s] %s\n", - "deferred free", blkbuf); - } - zdb_count_block(spa, NULL, &zcb, &blk, ZDB_OT_DEFERRED); - } - - bplist_close(bpl); + (void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj, + count_block_cb, &zcb, NULL); + if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { + (void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj, + count_block_cb, &zcb, NULL); + } + if (spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) { + VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset, + spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb, + &zcb, NULL)); } if (dump_opt['c'] > 1) flags |= TRAVERSE_PREFETCH_DATA; + zcb.zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa)); + zcb.zcb_start = zcb.zcb_lastprint = gethrtime(); zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb); + /* + * If we've traversed the data blocks then we need to wait for those + * I/Os to complete. We leverage "The Godfather" zio to wait on + * all async I/Os to complete. + */ + if (dump_opt['c']) { + for (int i = 0; i < max_ncpus; i++) { + (void) zio_wait(spa->spa_async_zio_root[i]); + spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL, + ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | + ZIO_FLAG_GODFATHER); + } + } + if (zcb.zcb_haderrors) { (void) printf("\nError counts:\n\n"); (void) printf("\t%5s %s\n", "errno", "count"); @@ -2037,7 +2799,7 @@ dump_block_stats(spa_t *spa) (u_longlong_t)total_alloc, (dump_opt['L']) ? "unreachable" : "leaked", (longlong_t)(total_alloc - total_found)); - leaks = 1; + leaks = B_TRUE; } if (tzb->zb_count == 0) @@ -2046,6 +2808,8 @@ dump_block_stats(spa_t *spa) (void) printf("\n"); (void) printf("\tbp count: %10llu\n", (u_longlong_t)tzb->zb_count); + (void) printf("\tganged count: %10llu\n", + (longlong_t)tzb->zb_gangs); (void) printf("\tbp logical: %10llu avg: %6llu\n", (u_longlong_t)tzb->zb_lsize, (u_longlong_t)(tzb->zb_lsize / tzb->zb_count)); @@ -2067,13 +2831,36 @@ dump_block_stats(spa_t *spa) (void) printf("\tSPA allocated: %10llu used: %5.2f%%\n", (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space); + for (bp_embedded_type_t i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) { + if (zcb.zcb_embedded_blocks[i] == 0) + continue; + (void) printf("\n"); + (void) printf("\tadditional, non-pointer bps of type %u: " + "%10llu\n", + i, (u_longlong_t)zcb.zcb_embedded_blocks[i]); + + if (dump_opt['b'] >= 3) { + (void) printf("\t number of (compressed) bytes: " + "number of bps\n"); + dump_histogram(zcb.zcb_embedded_histogram[i], + sizeof (zcb.zcb_embedded_histogram[i]) / + sizeof (zcb.zcb_embedded_histogram[i][0]), 0); + } + } + + if (tzb->zb_ditto_samevdev != 0) { + (void) printf("\tDittoed blocks on same vdev: %llu\n", + (longlong_t)tzb->zb_ditto_samevdev); + } + if (dump_opt['b'] >= 2) { int l, t, level; (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE" "\t avg\t comp\t%%Total\tType\n"); for (t = 0; t <= ZDB_OT_TOTAL; t++) { - char csize[6], lsize[6], psize[6], asize[6], avg[6]; + char csize[32], lsize[32], psize[32], asize[32]; + char avg[32], gang[32]; char *typename; if (t < DMU_OT_NUMTYPES) @@ -2109,11 +2896,12 @@ dump_block_stats(spa_t *spa) zcb.zcb_type[ZB_TOTAL][t].zb_asize) continue; - nicenum(zb->zb_count, csize, sizeof(csize)); - nicenum(zb->zb_lsize, lsize, sizeof(lsize)); - nicenum(zb->zb_psize, psize, sizeof(psize)); - nicenum(zb->zb_asize, asize, sizeof(asize)); - nicenum(zb->zb_asize / zb->zb_count, avg, sizeof(avg)); + zdb_nicenum(zb->zb_count, csize); + zdb_nicenum(zb->zb_lsize, lsize); + zdb_nicenum(zb->zb_psize, psize); + zdb_nicenum(zb->zb_asize, asize); + zdb_nicenum(zb->zb_asize / zb->zb_count, avg); + zdb_nicenum(zb->zb_gangs, gang); (void) printf("%6s\t%5s\t%5s\t%5s\t%5s" "\t%5.2f\t%6.2f\t", @@ -2126,6 +2914,19 @@ dump_block_stats(spa_t *spa) else (void) printf(" L%d %s\n", level, typename); + + if (dump_opt['b'] >= 3 && zb->zb_gangs > 0) { + (void) printf("\t number of ganged " + "blocks: %s\n", gang); + } + + if (dump_opt['b'] >= 4) { + (void) printf("psize " + "(in 512-byte sectors): " + "number of blocks\n"); + dump_histogram(zb->zb_psize_histogram, + PSIZE_HISTO_SIZE, 0); + } } } } @@ -2153,25 +2954,25 @@ typedef struct zdb_ddt_entry { /* ARGSUSED */ static int zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, - const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) + const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) { avl_tree_t *t = arg; avl_index_t where; zdb_ddt_entry_t *zdde, zdde_search; - if (bp == NULL) + if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) return (0); if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) { (void) printf("traversing objset %llu, %llu objects, " "%lu blocks so far\n", (u_longlong_t)zb->zb_objset, - (u_longlong_t)bp->blk_fill, + (u_longlong_t)BP_GET_FILL(bp), avl_numnodes(t)); } if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF || - BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) + BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) return (0); ddt_key_fill(&zdde_search.zdde_key, bp); @@ -2226,7 +3027,8 @@ dump_simulated_ddt(spa_t *spa) dds.dds_ref_psize = zdde->zdde_ref_psize; dds.dds_ref_dsize = zdde->zdde_ref_dsize; - ddt_stat_add(&ddh_total.ddh_stat[highbit(refcnt) - 1], &dds, 0); + ddt_stat_add(&ddh_total.ddh_stat[highbit64(refcnt) - 1], + &dds, 0); umem_free(zdde, sizeof (*zdde)); } @@ -2269,28 +3071,72 @@ dump_zpool(spa_t *spa) if (dump_opt['d'] > 2 || dump_opt['m']) dump_metaslabs(spa); + if (dump_opt['M']) + dump_metaslab_groups(spa); if (dump_opt['d'] || dump_opt['i']) { dump_dir(dp->dp_meta_objset); if (dump_opt['d'] >= 3) { - dump_bplist(dp->dp_meta_objset, - spa->spa_deferred_bplist_obj, "Deferred frees"); + dump_full_bpobj(&spa->spa_deferred_bpobj, + "Deferred frees", 0); + if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { + dump_full_bpobj( + &spa->spa_dsl_pool->dp_free_bpobj, + "Pool snapshot frees", 0); + } + + if (spa_feature_is_active(spa, + SPA_FEATURE_ASYNC_DESTROY)) { + dump_bptree(spa->spa_meta_objset, + spa->spa_dsl_pool->dp_bptree_obj, + "Pool dataset frees"); + } dump_dtl(spa->spa_root_vdev, 0); } (void) dmu_objset_find(spa_name(spa), dump_one_dir, NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); + + for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { + uint64_t refcount; + + if (!(spa_feature_table[f].fi_flags & + ZFEATURE_FLAG_PER_DATASET)) { + ASSERT0(dataset_feature_count[f]); + continue; + } + (void) feature_get_refcount(spa, + &spa_feature_table[f], &refcount); + if (dataset_feature_count[f] != refcount) { + (void) printf("%s feature refcount mismatch: " + "%lld datasets != %lld refcount\n", + spa_feature_table[f].fi_uname, + (longlong_t)dataset_feature_count[f], + (longlong_t)refcount); + rc = 2; + } else { + (void) printf("Verified %s feature refcount " + "of %llu is correct\n", + spa_feature_table[f].fi_uname, + (longlong_t)refcount); + } + } } - if (dump_opt['b'] || dump_opt['c']) + if (rc == 0 && (dump_opt['b'] || dump_opt['c'])) rc = dump_block_stats(spa); + if (rc == 0) + rc = verify_spacemap_refcounts(spa); + if (dump_opt['s']) show_pool_stats(spa); if (dump_opt['h']) dump_history(spa); - if (rc != 0) + if (rc != 0) { + dump_debug_buffer(); exit(rc); + } } #define ZDB_FLAG_CHECKSUM 0x0001 @@ -2312,7 +3158,7 @@ zdb_print_blkptr(blkptr_t *bp, int flags if (flags & ZDB_FLAG_BSWAP) byteswap_uint64_array((void *)bp, sizeof (blkptr_t)); - snprintf_blkptr(blkbuf, sizeof(blkbuf), bp); + snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); (void) printf("%s\n", blkbuf); } @@ -2508,6 +3354,7 @@ zdb_read_block(char *thing, spa_t *spa) free(dup); return; } + i += p - &flagstr[i + 1]; /* skip over the number */ } } @@ -2666,7 +3513,7 @@ find_zpool(char **target, nvlist_t **con nvlist_t *match = NULL; char *name = NULL; char *sepp = NULL; - char sep; + char sep = '\0'; int count = 0; importargs_t args = { 0 }; @@ -2742,13 +3589,25 @@ main(int argc, char **argv) nvlist_t *policy = NULL; uint64_t max_txg = UINT64_MAX; int rewind = ZPOOL_NEVER_REWIND; + char *spa_config_path_env; + boolean_t target_is_spa = B_TRUE; (void) setrlimit(RLIMIT_NOFILE, &rl); (void) enable_extended_FILE_stdio(-1, -1); dprintf_setup(&argc, argv); - while ((c = getopt(argc, argv, "bcdhilmsuCDRSAFLXevp:t:U:")) != -1) { + /* + * If there is an environment variable SPA_CONFIG_PATH it overrides + * default spa_config_path setting. If -U flag is specified it will + * override this environment variable settings once again. + */ + spa_config_path_env = getenv("SPA_CONFIG_PATH"); + if (spa_config_path_env != NULL) + spa_config_path = spa_config_path_env; + + while ((c = getopt(argc, argv, + "bcdhilmMI:suCDRSAFLXx:evp:t:U:PG")) != -1) { switch (c) { case 'b': case 'c': @@ -2761,8 +3620,10 @@ main(int argc, char **argv) case 'u': case 'C': case 'D': + case 'M': case 'R': case 'S': + case 'G': dump_opt[c]++; dump_all = 0; break; @@ -2771,10 +3632,17 @@ main(int argc, char **argv) case 'L': case 'X': case 'e': + case 'P': dump_opt[c]++; break; - case 'v': - verbose++; + case 'I': + max_inflight = strtoull(optarg, NULL, 0); + if (max_inflight == 0) { + (void) fprintf(stderr, "maximum number " + "of inflight I/Os must be greater " + "than 0\n"); + usage(); + } break; case 'p': if (searchdirs == NULL) { @@ -2802,6 +3670,12 @@ main(int argc, char **argv) case 'U': spa_config_path = optarg; break; + case 'v': + verbose++; + break; + case 'x': + vn_dumpdir = optarg; + break; default: usage(); break; @@ -2813,15 +3687,29 @@ main(int argc, char **argv) usage(); } + /* + * ZDB does not typically re-read blocks; therefore limit the ARC + * to 256 MB, which can be used entirely for metadata. + */ + zfs_arc_max = zfs_arc_meta_limit = 256 * 1024 * 1024; + + /* + * "zdb -c" uses checksum-verifying scrub i/os which are async reads. + * "zdb -b" uses traversal prefetch which uses async reads. + * For good performance, let several of them be active at once. + */ + zfs_vdev_async_read_max_active = 10; + kernel_init(FREAD); g_zfs = libzfs_init(); - ASSERT(g_zfs != NULL); + if (g_zfs == NULL) + fatal("Fail to initialize zfs"); if (dump_all) verbose = MAX(verbose, 1); for (c = 0; c < 256; c++) { - if (dump_all && !strchr("elAFLRSX", c)) + if (dump_all && !strchr("elAFLRSXP", c)) dump_opt[c] = 1; if (dump_opt[c]) dump_opt[c] += verbose; @@ -2875,13 +3763,31 @@ main(int argc, char **argv) fatal("can't open '%s': %s", target, strerror(ENOMEM)); } - if ((error = spa_import(name, cfg, NULL)) != 0) - error = spa_import_verbatim(name, cfg, NULL); + if ((error = spa_import(name, cfg, NULL, + ZFS_IMPORT_MISSING_LOG)) != 0) { + error = spa_import(name, cfg, NULL, + ZFS_IMPORT_VERBATIM); + } } } + if (strpbrk(target, "/@") != NULL) { + size_t targetlen; + + target_is_spa = B_FALSE; + /* + * Remove any trailing slash. Later code would get confused + * by it, but we want to allow it so that "pool/" can + * indicate that we want to dump the topmost filesystem, + * rather than the whole pool. + */ + targetlen = strlen(target); + if (targetlen != 0 && target[targetlen - 1] == '/') + target[targetlen - 1] = '\0'; + } + if (error == 0) { - if (strpbrk(target, "/@") == NULL || dump_opt['R']) { + if (target_is_spa || dump_opt['R']) { error = spa_open_rewind(target, &spa, FTAG, policy, NULL); if (error) { @@ -2927,7 +3833,13 @@ main(int argc, char **argv) argv[i], strerror(errno)); } } - (os != NULL) ? dump_dir(os) : dump_zpool(spa); + if (os != NULL) { + dump_dir(os); + } else if (zopt_objects > 0 && !dump_opt['m']) { + dump_dir(spa->spa_meta_objset); + } else { + dump_zpool(spa); + } } else { flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR; flagbits['c'] = ZDB_FLAG_CHECKSUM; @@ -2945,6 +3857,9 @@ main(int argc, char **argv) (os != NULL) ? dmu_objset_disown(os, FTAG) : spa_close(spa, FTAG); fuid_table_destroy(); + sa_loaded = B_FALSE; + + dump_debug_buffer(); libzfs_fini(g_zfs); kernel_fini(); Index: src/external/cddl/osnet/dist/cmd/zdb/zdb_il.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dist/cmd/zdb/zdb_il.c,v retrieving revision 1.2 diff -u -p -r1.2 zdb_il.c --- src/external/cddl/osnet/dist/cmd/zdb/zdb_il.c 28 Mar 2014 03:46:56 -0000 1.2 +++ src/external/cddl/osnet/dist/cmd/zdb/zdb_il.c 17 Jul 2014 16:19:55 -0000 @@ -24,6 +24,10 @@ */ /* + * Copyright (c) 2013, 2014 by Delphix. All rights reserved. + */ + +/* * Print intent log header and statistics. */ @@ -47,7 +51,7 @@ print_log_bp(const blkptr_t *bp, const c { char blkbuf[BP_SPRINTF_LEN]; - snprintf_blkptr(blkbuf, sizeof(blkbuf), bp); + snprintf_blkptr(blkbuf, sizeof (blkbuf), bp); (void) printf("%s%s\n", prefix, blkbuf); } @@ -118,7 +122,7 @@ zil_prt_rec_write(zilog_t *zilog, int tx { char *data, *dlimit; blkptr_t *bp = &lr->lr_blkptr; - zbookmark_t zb; + zbookmark_phys_t zb; char buf[SPA_MAXBLOCKSIZE]; int verbose = MAX(dump_opt['d'], dump_opt['i']); int error; @@ -132,6 +136,7 @@ zil_prt_rec_write(zilog_t *zilog, int tx if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { (void) printf("%shas blkptr, %s\n", prefix, + !BP_IS_HOLE(bp) && bp->blk_birth >= spa_first_txg(zilog->zl_spa) ? "will claim" : "won't claim"); print_log_bp(bp, prefix); @@ -139,8 +144,6 @@ zil_prt_rec_write(zilog_t *zilog, int tx if (BP_IS_HOLE(bp)) { (void) printf("\t\t\tLSIZE 0x%llx\n", (u_longlong_t)BP_GET_LSIZE(bp)); - } - if (bp->blk_birth == 0) { bzero(buf, sizeof (buf)); (void) printf("%s\n", prefix); return; @@ -314,7 +317,7 @@ print_log_block(zilog_t *zilog, blkptr_t if (verbose >= 5) { (void) strcpy(blkbuf, ", "); snprintf_blkptr(blkbuf + strlen(blkbuf), - sizeof(blkbuf) - strlen(blkbuf), bp); + sizeof (blkbuf) - strlen(blkbuf), bp); } else { blkbuf[0] = '\0'; } @@ -362,7 +365,7 @@ dump_intent_log(zilog_t *zilog) int verbose = MAX(dump_opt['d'], dump_opt['i']); int i; - if (zh->zh_log.blk_birth == 0 || verbose < 1) + if (BP_IS_HOLE(&zh->zh_log) || verbose < 1) return; (void) printf("\n ZIL header: claim_txg %llu, " Index: src/external/cddl/osnet/dist/cmd/zfs/zfs_iter.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dist/cmd/zfs/zfs_iter.c,v retrieving revision 1.1.1.2 diff -u -p -r1.1.1.2 zfs_iter.c --- src/external/cddl/osnet/dist/cmd/zfs/zfs_iter.c 27 Feb 2010 22:29:21 -0000 1.1.1.2 +++ src/external/cddl/osnet/dist/cmd/zfs/zfs_iter.c 10 Oct 2016 11:14:24 -0000 @@ -18,9 +18,12 @@ * * CDDL HEADER END */ + /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 Pawel Jakub Dawidek. All rights reserved. + * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. */ #include @@ -69,7 +72,7 @@ uu_avl_pool_t *avl_pool; * Include snaps if they were requested or if this a zfs list where types * were not specified and the "listsnapshots" property is set on this pool. */ -static int +static boolean_t zfs_include_snapshots(zfs_handle_t *zhp, callback_data_t *cb) { zpool_handle_t *zph; @@ -89,8 +92,9 @@ static int zfs_callback(zfs_handle_t *zhp, void *data) { callback_data_t *cb = data; - int dontclose = 0; - int include_snaps = zfs_include_snapshots(zhp, cb); + boolean_t should_close = B_TRUE; + boolean_t include_snaps = zfs_include_snapshots(zhp, cb); + boolean_t include_bmarks = (cb->cb_types & ZFS_TYPE_BOOKMARK); if ((zfs_get_type(zhp) & cb->cb_types) || ((zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT) && include_snaps)) { @@ -108,14 +112,15 @@ zfs_callback(zfs_handle_t *zhp, void *da cb->cb_props_table); if (zfs_expand_proplist(zhp, cb->cb_proplist, - (cb->cb_flags & ZFS_ITER_RECVD_PROPS)) + (cb->cb_flags & ZFS_ITER_RECVD_PROPS), + (cb->cb_flags & ZFS_ITER_LITERAL_PROPS)) != 0) { free(node); return (-1); } } uu_avl_insert(cb->cb_avl, node, idx); - dontclose = 1; + should_close = B_FALSE; } else { free(node); } @@ -130,12 +135,18 @@ zfs_callback(zfs_handle_t *zhp, void *da cb->cb_depth++; if (zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) (void) zfs_iter_filesystems(zhp, zfs_callback, data); - if ((zfs_get_type(zhp) != ZFS_TYPE_SNAPSHOT) && include_snaps) - (void) zfs_iter_snapshots(zhp, zfs_callback, data); + if (((zfs_get_type(zhp) & (ZFS_TYPE_SNAPSHOT | + ZFS_TYPE_BOOKMARK)) == 0) && include_snaps) + (void) zfs_iter_snapshots(zhp, + (cb->cb_flags & ZFS_ITER_SIMPLE) != 0, zfs_callback, + data); + if (((zfs_get_type(zhp) & (ZFS_TYPE_SNAPSHOT | + ZFS_TYPE_BOOKMARK)) == 0) && include_bmarks) + (void) zfs_iter_bookmarks(zhp, zfs_callback, data); cb->cb_depth--; } - if (!dontclose) + if (should_close) zfs_close(zhp); return (0); @@ -185,6 +196,14 @@ zfs_free_sort_columns(zfs_sort_column_t } } +boolean_t +zfs_sort_only_by_name(const zfs_sort_column_t *sc) +{ + + return (sc != NULL && sc->sc_next == NULL && + sc->sc_prop == ZFS_PROP_NAME); +} + /* ARGSUSED */ static int zfs_compare(const void *larg, const void *rarg, void *unused) @@ -225,7 +244,13 @@ zfs_compare(const void *larg, const void lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG); rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG); - if (lcreate < rcreate) + /* + * Both lcreate and rcreate being 0 means we don't have + * properties and we should compare full name. + */ + if (lcreate == 0 && rcreate == 0) + ret = strcmp(lat + 1, rat + 1); + else if (lcreate < rcreate) ret = -1; else if (lcreate > rcreate) ret = 1; @@ -291,7 +316,14 @@ zfs_sort(const void *larg, const void *r if (rvalid) verify(nvlist_lookup_string(rval, ZPROP_VALUE, &rstr) == 0); + } else if (psc->sc_prop == ZFS_PROP_NAME) { + lvalid = rvalid = B_TRUE; + + (void) strlcpy(lbuf, zfs_get_name(l), sizeof (lbuf)); + (void) strlcpy(rbuf, zfs_get_name(r), sizeof (rbuf)); + lstr = lbuf; + rstr = rbuf; } else if (zfs_prop_is_string(psc->sc_prop)) { lvalid = (zfs_prop_get(l, psc->sc_prop, lbuf, sizeof (lbuf), NULL, NULL, 0, B_TRUE) == 0); @@ -351,11 +383,8 @@ zfs_for_each(int argc, char **argv, int avl_pool = uu_avl_pool_create("zfs_pool", sizeof (zfs_node_t), offsetof(zfs_node_t, zn_avlnode), zfs_sort, UU_DEFAULT); - if (avl_pool == NULL) { - (void) fprintf(stderr, - gettext("internal error: out of memory\n")); - exit(1); - } + if (avl_pool == NULL) + nomem(); cb.cb_sortcol = sortcol; cb.cb_flags = flags; @@ -400,11 +429,8 @@ zfs_for_each(int argc, char **argv, int sizeof (cb.cb_props_table)); } - if ((cb.cb_avl = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL) { - (void) fprintf(stderr, - gettext("internal error: out of memory\n")); - exit(1); - } + if ((cb.cb_avl = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL) + nomem(); if (argc == 0) { /* @@ -454,11 +480,8 @@ zfs_for_each(int argc, char **argv, int /* * Finally, clean up the AVL tree. */ - if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL) { - (void) fprintf(stderr, - gettext("internal error: out of memory")); - exit(1); - } + if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL) + nomem(); while ((node = uu_avl_walk_next(walk)) != NULL) { uu_avl_remove(cb.cb_avl, node); Index: src/external/cddl/osnet/dist/cmd/zfs/zfs_iter.h =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dist/cmd/zfs/zfs_iter.h,v retrieving revision 1.1.1.2 diff -u -p -r1.1.1.2 zfs_iter.h --- src/external/cddl/osnet/dist/cmd/zfs/zfs_iter.h 27 Feb 2010 22:29:20 -0000 1.1.1.2 +++ src/external/cddl/osnet/dist/cmd/zfs/zfs_iter.h 10 Oct 2016 11:14:24 -0000 @@ -18,9 +18,12 @@ * * CDDL HEADER END */ + /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved. + * Copyright 2013 Nexenta Systems, Inc. All rights reserved. */ #ifndef ZFS_ITER_H @@ -43,11 +46,14 @@ typedef struct zfs_sort_column { #define ZFS_ITER_PROP_LISTSNAPS (1 << 2) #define ZFS_ITER_DEPTH_LIMIT (1 << 3) #define ZFS_ITER_RECVD_PROPS (1 << 4) +#define ZFS_ITER_SIMPLE (1 << 5) +#define ZFS_ITER_LITERAL_PROPS (1 << 6) int zfs_for_each(int, char **, int options, zfs_type_t, zfs_sort_column_t *, zprop_list_t **, int, zfs_iter_f, void *); int zfs_add_sort_column(zfs_sort_column_t **, const char *, boolean_t); void zfs_free_sort_columns(zfs_sort_column_t *); +boolean_t zfs_sort_only_by_name(const zfs_sort_column_t *); #ifdef __cplusplus } Index: src/external/cddl/osnet/dist/cmd/zfs/zfs_main.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dist/cmd/zfs/zfs_main.c,v retrieving revision 1.5 diff -u -p -r1.5 zfs_main.c --- src/external/cddl/osnet/dist/cmd/zfs/zfs_main.c 10 Apr 2015 22:28:27 -0000 1.5 +++ src/external/cddl/osnet/dist/cmd/zfs/zfs_main.c 22 Apr 2017 16:42:58 -0000 @@ -20,8 +20,16 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, 2015 by Delphix. All rights reserved. + * Copyright 2012 Milan Jurik. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved. + * Copyright (c) 2012 Martin Matuska . All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. + * Copyright (c) 2014 Integros [integros.com] + * Copyright 2016 Igor Kozhukhov . + * Copyright 2016 Nexenta Systems, Inc. */ #include @@ -41,24 +49,43 @@ #include #include #include -#include +#include +#include #include #include #include #include #include +#include +#include +#include +#ifdef __FreeBSD__ +#include +#endif +#ifdef __NetBSD__ +#include +#endif #include +#include +#include +#include #include +#ifdef illumos +#include +#include +#include +#endif #include "zfs_iter.h" #include "zfs_util.h" +#include "zfs_comutil.h" libzfs_handle_t *g_zfs; static FILE *mnttab_file; static char history_str[HIS_MAX_RECORD_LEN]; -const char *pypath = "/usr/lib/zfs/pyzfs.py"; +static boolean_t log_history = B_TRUE; static int zfs_do_clone(int argc, char **argv); static int zfs_do_create(int argc, char **argv); @@ -79,9 +106,17 @@ static int zfs_do_send(int argc, char ** static int zfs_do_receive(int argc, char **argv); static int zfs_do_promote(int argc, char **argv); static int zfs_do_userspace(int argc, char **argv); -static int zfs_do_python(int argc, char **argv); +static int zfs_do_allow(int argc, char **argv); +static int zfs_do_unallow(int argc, char **argv); static int zfs_do_hold(int argc, char **argv); +static int zfs_do_holds(int argc, char **argv); static int zfs_do_release(int argc, char **argv); +static int zfs_do_diff(int argc, char **argv); +#ifdef __FreeBSD__ +static int zfs_do_jail(int argc, char **argv); +static int zfs_do_unjail(int argc, char **argv); +#endif +static int zfs_do_bookmark(int argc, char **argv); /* * Enable a reasonable set of defaults for libumem debugging on DEBUG builds. @@ -108,6 +143,8 @@ typedef enum { HELP_GET, HELP_INHERIT, HELP_UPGRADE, + HELP_JAIL, + HELP_UNJAIL, HELP_LIST, HELP_MOUNT, HELP_PROMOTE, @@ -126,7 +163,9 @@ typedef enum { HELP_GROUPSPACE, HELP_HOLD, HELP_HOLDS, - HELP_RELEASE + HELP_RELEASE, + HELP_DIFF, + HELP_BOOKMARK, } zfs_help_t; typedef struct zfs_command { @@ -153,6 +192,7 @@ static zfs_command_t command_table[] = { { "clone", zfs_do_clone, HELP_CLONE }, { "promote", zfs_do_promote, HELP_PROMOTE }, { "rename", zfs_do_rename, HELP_RENAME }, + { "bookmark", zfs_do_bookmark, HELP_BOOKMARK }, { NULL }, { "list", zfs_do_list, HELP_LIST }, { NULL }, @@ -171,13 +211,19 @@ static zfs_command_t command_table[] = { { "send", zfs_do_send, HELP_SEND }, { "receive", zfs_do_receive, HELP_RECEIVE }, { NULL }, - { "allow", zfs_do_python, HELP_ALLOW }, + { "allow", zfs_do_allow, HELP_ALLOW }, { NULL }, - { "unallow", zfs_do_python, HELP_UNALLOW }, + { "unallow", zfs_do_unallow, HELP_UNALLOW }, { NULL }, { "hold", zfs_do_hold, HELP_HOLD }, - { "holds", zfs_do_python, HELP_HOLDS }, + { "holds", zfs_do_holds, HELP_HOLDS }, { "release", zfs_do_release, HELP_RELEASE }, + { "diff", zfs_do_diff, HELP_DIFF }, +#ifdef __FreeBSD__ + { NULL }, + { "jail", zfs_do_jail, HELP_JAIL }, + { "unjail", zfs_do_unjail, HELP_UNJAIL }, +#endif }; #define NCOMMAND (sizeof (command_table) / sizeof (command_table[0])) @@ -192,16 +238,19 @@ get_usage(zfs_help_t idx) return (gettext("\tclone [-p] [-o property=value] ... " " \n")); case HELP_CREATE: - return (gettext("\tcreate [-p] [-o property=value] ... " + return (gettext("\tcreate [-pu] [-o property=value] ... " "\n" "\tcreate [-ps] [-b blocksize] [-o property=value] ... " "-V \n")); case HELP_DESTROY: - return (gettext("\tdestroy [-rRf] \n" - "\tdestroy [-rRd] \n")); + return (gettext("\tdestroy [-fnpRrv] \n" + "\tdestroy [-dnpRrv] " + "@[%][,...]\n" + "\tdestroy #\n")); case HELP_GET: return (gettext("\tget [-rHp] [-d max] " - "[-o \"all\" | field[,...]] [-s source[,...]]\n" + "[-o \"all\" | field[,...]]\n" + "\t [-t type[,...]] [-s source[,...]]\n" "\t <\"all\" | property[,...]> " "[filesystem|volume|snapshot] ...\n")); case HELP_INHERIT: @@ -210,10 +259,13 @@ get_usage(zfs_help_t idx) case HELP_UPGRADE: return (gettext("\tupgrade [-v]\n" "\tupgrade [-r] [-V version] <-a | filesystem ...>\n")); + case HELP_JAIL: + return (gettext("\tjail \n")); + case HELP_UNJAIL: + return (gettext("\tunjail \n")); case HELP_LIST: - return (gettext("\tlist [-rH][-d max] " - "[-o property[,...]] [-t type[,...]] [-s property] ...\n" - "\t [-S property] ... " + return (gettext("\tlist [-Hp] [-r|-d max] [-o property[,...]] " + "[-s property]...\n\t [-S property]... [-t type[,...]] " "[filesystem|volume|snapshot] ...\n")); case HELP_MOUNT: return (gettext("\tmount\n" @@ -221,28 +273,35 @@ get_usage(zfs_help_t idx) case HELP_PROMOTE: return (gettext("\tpromote \n")); case HELP_RECEIVE: - return (gettext("\treceive [-vnF] \n" - "\treceive [-vnF] -d \n")); + return (gettext("\treceive|recv [-vnsFu] \n" + "\treceive|recv [-vnsFu] [-o origin=] [-d | -e] " + "\n" + "\treceive|recv -A \n")); case HELP_RENAME: - return (gettext("\trename " + return (gettext("\trename [-f] " "\n" - "\trename -p \n" - "\trename -r ")); + "\trename [-f] -p \n" + "\trename -r \n" + "\trename -u [-p] ")); case HELP_ROLLBACK: return (gettext("\trollback [-rRf] \n")); case HELP_SEND: - return (gettext("\tsend [-RDp] [-[iI] snapshot] \n")); + return (gettext("\tsend [-DnPpRvLe] [-[iI] snapshot] " + "\n" + "\tsend [-Le] [-i snapshot|bookmark] " + "\n" + "\tsend [-nvPe] -t \n")); case HELP_SET: - return (gettext("\tset " + return (gettext("\tset ... " " ...\n")); case HELP_SHARE: return (gettext("\tshare <-a | filesystem>\n")); case HELP_SNAPSHOT: - return (gettext("\tsnapshot [-r] [-o property=value] ... " - "\n")); + return (gettext("\tsnapshot|snap [-r] [-o property=value] ... " + "@ ...\n")); case HELP_UNMOUNT: - return (gettext("\tunmount [-f] " + return (gettext("\tunmount|umount [-f] " "<-a | filesystem|mountpoint>\n")); case HELP_UNSHARE: return (gettext("\tunshare " @@ -268,41 +327,66 @@ get_usage(zfs_help_t idx) "\tunallow [-r] -s @setname [[,...]] " "\n")); case HELP_USERSPACE: - return (gettext("\tuserspace [-hniHp] [-o field[,...]] " - "[-sS field] ... [-t type[,...]]\n" - "\t \n")); + return (gettext("\tuserspace [-Hinp] [-o field[,...]] " + "[-s field] ...\n" + "\t [-S field] ... [-t type[,...]] " + "\n")); case HELP_GROUPSPACE: - return (gettext("\tgroupspace [-hniHpU] [-o field[,...]] " - "[-sS field] ... [-t type[,...]]\n" - "\t \n")); + return (gettext("\tgroupspace [-Hinp] [-o field[,...]] " + "[-s field] ...\n" + "\t [-S field] ... [-t type[,...]] " + "\n")); case HELP_HOLD: return (gettext("\thold [-r] ...\n")); case HELP_HOLDS: - return (gettext("\tholds [-r] ...\n")); + return (gettext("\tholds [-Hp] [-r|-d depth] " + " ...\n")); case HELP_RELEASE: return (gettext("\trelease [-r] ...\n")); + case HELP_DIFF: + return (gettext("\tdiff [-FHt] " + "[snapshot|filesystem]\n")); + case HELP_BOOKMARK: + return (gettext("\tbookmark \n")); } abort(); /* NOTREACHED */ } +void +nomem(void) +{ + (void) fprintf(stderr, gettext("internal error: out of memory\n")); + exit(1); +} + /* * Utility function to guarantee malloc() success. */ + void * safe_malloc(size_t size) { void *data; - if ((data = calloc(1, size)) == NULL) { - (void) fprintf(stderr, "internal error: out of memory\n"); - exit(1); - } + if ((data = calloc(1, size)) == NULL) + nomem(); return (data); } +static char * +safe_strdup(char *str) +{ + char *dupstr = strdup(str); + + if (dupstr == NULL) + nomem(); + + return (dupstr); +} + /* * Callback routine that will print out information for each of * the properties. @@ -391,6 +475,8 @@ usage(boolean_t requested) (void) fprintf(fp, "YES NO | none\n"); (void) fprintf(fp, "\t%-15s ", "groupquota@..."); (void) fprintf(fp, "YES NO | none\n"); + (void) fprintf(fp, "\t%-15s ", "written@"); + (void) fprintf(fp, " NO NO \n"); (void) fprintf(fp, gettext("\nSizes are specified in bytes " "with standard units such as K, M, G, etc.\n")); @@ -423,15 +509,18 @@ usage(boolean_t requested) exit(requested ? 0 : 2); } +/* + * Take a property=value argument string and add it to the given nvlist. + * Modifies the argument inplace. + */ static int -parseprop(nvlist_t *props) +parseprop(nvlist_t *props, char *propname) { - char *propname = optarg; char *propval, *strval; if ((propval = strchr(propname, '=')) == NULL) { (void) fprintf(stderr, gettext("missing " - "'=' for -o option\n")); + "'=' for property=value argument\n")); return (-1); } *propval = '\0'; @@ -441,11 +530,8 @@ parseprop(nvlist_t *props) "specified multiple times\n"), propname); return (-1); } - if (nvlist_add_string(props, propname, propval) != 0) { - (void) fprintf(stderr, gettext("internal " - "error: out of memory\n")); - return (-1); - } + if (nvlist_add_string(props, propname, propval) != 0) + nomem(); return (0); } @@ -458,7 +544,7 @@ parse_depth(char *opt, int *flags) depth = (int)strtol(opt, &tmp, 0); if (*tmp) { (void) fprintf(stderr, - gettext("%s is not an integer\n"), optarg); + gettext("%s is not an integer\n"), opt); usage(B_FALSE); } if (depth < 0) { @@ -470,6 +556,71 @@ parse_depth(char *opt, int *flags) return (depth); } +#define PROGRESS_DELAY 2 /* seconds */ + +static char *pt_reverse = "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"; +static time_t pt_begin; +static char *pt_header = NULL; +static boolean_t pt_shown; + +static void +start_progress_timer(void) +{ + pt_begin = time(NULL) + PROGRESS_DELAY; + pt_shown = B_FALSE; +} + +static void +set_progress_header(char *header) +{ + assert(pt_header == NULL); + pt_header = safe_strdup(header); + if (pt_shown) { + (void) printf("%s: ", header); + (void) fflush(stdout); + } +} + +static void +update_progress(char *update) +{ + if (!pt_shown && time(NULL) > pt_begin) { + int len = strlen(update); + + (void) printf("%s: %s%*.*s", pt_header, update, len, len, + pt_reverse); + (void) fflush(stdout); + pt_shown = B_TRUE; + } else if (pt_shown) { + int len = strlen(update); + + (void) printf("%s%*.*s", update, len, len, pt_reverse); + (void) fflush(stdout); + } +} + +static void +finish_progress(char *done) +{ + if (pt_shown) { + (void) printf("%s\n", done); + (void) fflush(stdout); + } + free(pt_header); + pt_header = NULL; +} + +/* + * Check if the dataset is mountable and should be automatically mounted. + */ +static boolean_t +should_auto_mount(zfs_handle_t *zhp) +{ + if (!zfs_prop_valid_for_type(ZFS_PROP_CANMOUNT, zfs_get_type(zhp))) + return (B_FALSE); + return (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_ON); +} + /* * zfs clone [-p] [-o prop=value] ... * @@ -486,20 +637,17 @@ zfs_do_clone(int argc, char **argv) zfs_handle_t *zhp = NULL; boolean_t parents = B_FALSE; nvlist_t *props; - int ret; + int ret = 0; int c; - if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) { - (void) fprintf(stderr, gettext("internal error: " - "out of memory\n")); - return (1); - } + if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) + nomem(); /* check options */ while ((c = getopt(argc, argv, "o:p")) != -1) { switch (c) { case 'o': - if (parseprop(props)) + if (parseprop(props, optarg) != 0) return (1); break; case 'p': @@ -558,8 +706,22 @@ zfs_do_clone(int argc, char **argv) clone = zfs_open(g_zfs, argv[1], ZFS_TYPE_DATASET); if (clone != NULL) { - if ((ret = zfs_mount(clone, NULL, 0)) == 0) - ret = zfs_share(clone); + /* + * If the user doesn't want the dataset + * automatically mounted, then skip the mount/share + * step. + */ + if (should_auto_mount(clone)) { + if ((ret = zfs_mount(clone, NULL, 0)) != 0) { + (void) fprintf(stderr, gettext("clone " + "successfully created, " + "but not mounted\n")); + } else if ((ret = zfs_share(clone)) != 0) { + (void) fprintf(stderr, gettext("clone " + "successfully created, " + "but not shared\n")); + } + } zfs_close(clone); } } @@ -578,7 +740,7 @@ usage: } /* - * zfs create [-p] [-o prop=value] ... fs + * zfs create [-pu] [-o prop=value] ... fs * zfs create [-ps] [-b blocksize] [-o prop=value] ... -V vol size * * Create a new dataset. This command can be used to create filesystems @@ -591,30 +753,29 @@ usage: * SPA_VERSION_REFRESERVATION, we set a refreservation instead. * * The '-p' flag creates all the non-existing ancestors of the target first. + * + * The '-u' flag prevents mounting of newly created file system. */ static int zfs_do_create(int argc, char **argv) { zfs_type_t type = ZFS_TYPE_FILESYSTEM; zfs_handle_t *zhp = NULL; - uint64_t volsize; + uint64_t volsize = 0; int c; boolean_t noreserve = B_FALSE; boolean_t bflag = B_FALSE; boolean_t parents = B_FALSE; + boolean_t nomount = B_FALSE; int ret = 1; nvlist_t *props; uint64_t intval; - int canmount; - if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) { - (void) fprintf(stderr, gettext("internal error: " - "out of memory\n")); - return (1); - } + if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) + nomem(); /* check options */ - while ((c = getopt(argc, argv, ":V:b:so:p")) != -1) { + while ((c = getopt(argc, argv, ":V:b:so:pu")) != -1) { switch (c) { case 'V': type = ZFS_TYPE_VOLUME; @@ -626,12 +787,8 @@ zfs_do_create(int argc, char **argv) } if (nvlist_add_uint64(props, - zfs_prop_to_name(ZFS_PROP_VOLSIZE), - intval) != 0) { - (void) fprintf(stderr, gettext("internal " - "error: out of memory\n")); - goto error; - } + zfs_prop_to_name(ZFS_PROP_VOLSIZE), intval) != 0) + nomem(); volsize = intval; break; case 'p': @@ -648,24 +805,23 @@ zfs_do_create(int argc, char **argv) if (nvlist_add_uint64(props, zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), - intval) != 0) { - (void) fprintf(stderr, gettext("internal " - "error: out of memory\n")); - goto error; - } + intval) != 0) + nomem(); break; case 'o': - if (parseprop(props)) + if (parseprop(props, optarg) != 0) goto error; break; case 's': noreserve = B_TRUE; break; + case 'u': + nomount = B_TRUE; + break; case ':': (void) fprintf(stderr, gettext("missing size " "argument\n")); goto badusage; - break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -678,6 +834,11 @@ zfs_do_create(int argc, char **argv) "used when creating a volume\n")); goto badusage; } + if (nomount && type != ZFS_TYPE_FILESYSTEM) { + (void) fprintf(stderr, gettext("'-u' can only be " + "used when creating a file system\n")); + goto badusage; + } argc -= optind; argv += optind; @@ -695,12 +856,14 @@ zfs_do_create(int argc, char **argv) if (type == ZFS_TYPE_VOLUME && !noreserve) { zpool_handle_t *zpool_handle; + nvlist_t *real_props = NULL; uint64_t spa_version; char *p; zfs_prop_t resv_prop; char *strval; + char msg[1024]; - if (p = strchr(argv[0], '/')) + if ((p = strchr(argv[0], '/')) != NULL) *p = '\0'; zpool_handle = zpool_open(g_zfs, argv[0]); if (p != NULL) @@ -709,21 +872,29 @@ zfs_do_create(int argc, char **argv) goto error; spa_version = zpool_get_prop_int(zpool_handle, ZPOOL_PROP_VERSION, NULL); - zpool_close(zpool_handle); if (spa_version >= SPA_VERSION_REFRESERVATION) resv_prop = ZFS_PROP_REFRESERVATION; else resv_prop = ZFS_PROP_RESERVATION; - volsize = zvol_volsize_to_reservation(volsize, props); + + (void) snprintf(msg, sizeof (msg), + gettext("cannot create '%s'"), argv[0]); + if (props && (real_props = zfs_valid_proplist(g_zfs, type, + props, 0, NULL, zpool_handle, msg)) == NULL) { + zpool_close(zpool_handle); + goto error; + } + zpool_close(zpool_handle); + + volsize = zvol_volsize_to_reservation(volsize, real_props); + nvlist_free(real_props); if (nvlist_lookup_string(props, zfs_prop_to_name(resv_prop), &strval) != 0) { if (nvlist_add_uint64(props, zfs_prop_to_name(resv_prop), volsize) != 0) { - (void) fprintf(stderr, gettext("internal " - "error: out of memory\n")); nvlist_free(props); - return (1); + nomem(); } } } @@ -748,20 +919,17 @@ zfs_do_create(int argc, char **argv) if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET)) == NULL) goto error; - /* - * if the user doesn't want the dataset automatically mounted, - * then skip the mount/share step - */ - canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT); + ret = 0; /* * Mount and/or share the new filesystem as appropriate. We provide a * verbose error message to let the user know that their filesystem was * in fact created, even if we failed to mount or share it. + * If the user doesn't want the dataset automatically mounted, + * then skip the mount/share step altogether. */ - ret = 0; - if (canmount == ZFS_CANMOUNT_ON) { + if (!nomount && should_auto_mount(zhp)) { if (zfs_mount(zhp, NULL, 0) != 0) { (void) fprintf(stderr, gettext("filesystem " "successfully created, but not mounted\n")); @@ -799,15 +967,25 @@ badusage: */ typedef struct destroy_cbdata { boolean_t cb_first; - int cb_force; - int cb_recurse; - int cb_error; - int cb_needforce; - int cb_doclones; - boolean_t cb_closezhp; + boolean_t cb_force; + boolean_t cb_recurse; + boolean_t cb_error; + boolean_t cb_doclones; zfs_handle_t *cb_target; - char *cb_snapname; boolean_t cb_defer_destroy; + boolean_t cb_verbose; + boolean_t cb_parsable; + boolean_t cb_dryrun; + nvlist_t *cb_nvl; + nvlist_t *cb_batchedsnaps; + + /* first snap in contiguous run */ + char *cb_firstsnap; + /* previous snap in contiguous run */ + char *cb_prevsnap; + int64_t cb_snapused; + char *cb_snapspec; + char *cb_bookmark; } destroy_cbdata_t; /* @@ -837,7 +1015,7 @@ destroy_check_dependent(zfs_handle_t *zh (void) fprintf(stderr, gettext("use '-r' to destroy " "the following datasets:\n")); cbp->cb_first = B_FALSE; - cbp->cb_error = 1; + cbp->cb_error = B_TRUE; } (void) fprintf(stderr, "%s\n", zfs_get_name(zhp)); @@ -858,7 +1036,8 @@ destroy_check_dependent(zfs_handle_t *zh (void) fprintf(stderr, gettext("use '-R' to destroy " "the following datasets:\n")); cbp->cb_first = B_FALSE; - cbp->cb_error = 1; + cbp->cb_error = B_TRUE; + cbp->cb_dryrun = B_TRUE; } (void) fprintf(stderr, "%s\n", zfs_get_name(zhp)); @@ -872,7 +1051,20 @@ out: static int destroy_callback(zfs_handle_t *zhp, void *data) { - destroy_cbdata_t *cbp = data; + destroy_cbdata_t *cb = data; + const char *name = zfs_get_name(zhp); + + if (cb->cb_verbose) { + if (cb->cb_parsable) { + (void) printf("destroy\t%s\n", name); + } else if (cb->cb_dryrun) { + (void) printf(gettext("would destroy %s\n"), + name); + } else { + (void) printf(gettext("will destroy %s\n"), + name); + } + } /* * Ignore pools (which we've already flagged as an error before getting @@ -883,14 +1075,31 @@ destroy_callback(zfs_handle_t *zhp, void zfs_close(zhp); return (0); } + if (cb->cb_dryrun) { + zfs_close(zhp); + return (0); + } /* - * Bail out on the first error. + * We batch up all contiguous snapshots (even of different + * filesystems) and destroy them with one ioctl. We can't + * simply do all snap deletions and then all fs deletions, + * because we must delete a clone before its origin. */ - if (zfs_unmount(zhp, NULL, cbp->cb_force ? MS_FORCE : 0) != 0 || - zfs_destroy(zhp, cbp->cb_defer_destroy) != 0) { - zfs_close(zhp); - return (-1); + if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT) { + fnvlist_add_boolean(cb->cb_batchedsnaps, name); + } else { + int error = zfs_destroy_snaps_nvl(g_zfs, + cb->cb_batchedsnaps, B_FALSE); + fnvlist_free(cb->cb_batchedsnaps); + cb->cb_batchedsnaps = fnvlist_alloc(); + + if (error != 0 || + zfs_unmount(zhp, NULL, cb->cb_force ? MS_FORCE : 0) != 0 || + zfs_destroy(zhp, cb->cb_defer_destroy) != 0) { + zfs_close(zhp); + return (-1); + } } zfs_close(zhp); @@ -898,66 +1107,183 @@ destroy_callback(zfs_handle_t *zhp, void } static int -destroy_snap_clones(zfs_handle_t *zhp, void *arg) +destroy_print_cb(zfs_handle_t *zhp, void *arg) { - destroy_cbdata_t *cbp = arg; - char thissnap[MAXPATHLEN]; - zfs_handle_t *szhp; - boolean_t closezhp = cbp->cb_closezhp; - int rv; + destroy_cbdata_t *cb = arg; + const char *name = zfs_get_name(zhp); + int err = 0; - (void) snprintf(thissnap, sizeof (thissnap), - "%s@%s", zfs_get_name(zhp), cbp->cb_snapname); + if (nvlist_exists(cb->cb_nvl, name)) { + if (cb->cb_firstsnap == NULL) + cb->cb_firstsnap = strdup(name); + if (cb->cb_prevsnap != NULL) + free(cb->cb_prevsnap); + /* this snap continues the current range */ + cb->cb_prevsnap = strdup(name); + if (cb->cb_firstsnap == NULL || cb->cb_prevsnap == NULL) + nomem(); + if (cb->cb_verbose) { + if (cb->cb_parsable) { + (void) printf("destroy\t%s\n", name); + } else if (cb->cb_dryrun) { + (void) printf(gettext("would destroy %s\n"), + name); + } else { + (void) printf(gettext("will destroy %s\n"), + name); + } + } + } else if (cb->cb_firstsnap != NULL) { + /* end of this range */ + uint64_t used = 0; + err = lzc_snaprange_space(cb->cb_firstsnap, + cb->cb_prevsnap, &used); + cb->cb_snapused += used; + free(cb->cb_firstsnap); + cb->cb_firstsnap = NULL; + free(cb->cb_prevsnap); + cb->cb_prevsnap = NULL; + } + zfs_close(zhp); + return (err); +} - libzfs_print_on_error(g_zfs, B_FALSE); - szhp = zfs_open(g_zfs, thissnap, ZFS_TYPE_SNAPSHOT); - libzfs_print_on_error(g_zfs, B_TRUE); - if (szhp) { - /* - * Destroy any clones of this snapshot - */ - if (zfs_iter_dependents(szhp, B_FALSE, destroy_callback, - cbp) != 0) { - zfs_close(szhp); - if (closezhp) - zfs_close(zhp); - return (-1); +static int +destroy_print_snapshots(zfs_handle_t *fs_zhp, destroy_cbdata_t *cb) +{ + int err = 0; + assert(cb->cb_firstsnap == NULL); + assert(cb->cb_prevsnap == NULL); + err = zfs_iter_snapshots_sorted(fs_zhp, destroy_print_cb, cb); + if (cb->cb_firstsnap != NULL) { + uint64_t used = 0; + if (err == 0) { + err = lzc_snaprange_space(cb->cb_firstsnap, + cb->cb_prevsnap, &used); } - zfs_close(szhp); + cb->cb_snapused += used; + free(cb->cb_firstsnap); + cb->cb_firstsnap = NULL; + free(cb->cb_prevsnap); + cb->cb_prevsnap = NULL; } + return (err); +} - cbp->cb_closezhp = B_TRUE; - rv = zfs_iter_filesystems(zhp, destroy_snap_clones, arg); - if (closezhp) - zfs_close(zhp); - return (rv); +static int +snapshot_to_nvl_cb(zfs_handle_t *zhp, void *arg) +{ + destroy_cbdata_t *cb = arg; + int err = 0; + + /* Check for clones. */ + if (!cb->cb_doclones && !cb->cb_defer_destroy) { + cb->cb_target = zhp; + cb->cb_first = B_TRUE; + err = zfs_iter_dependents(zhp, B_TRUE, + destroy_check_dependent, cb); + } + + if (err == 0) { + if (nvlist_add_boolean(cb->cb_nvl, zfs_get_name(zhp))) + nomem(); + } + zfs_close(zhp); + return (err); +} + +static int +gather_snapshots(zfs_handle_t *zhp, void *arg) +{ + destroy_cbdata_t *cb = arg; + int err = 0; + + err = zfs_iter_snapspec(zhp, cb->cb_snapspec, snapshot_to_nvl_cb, cb); + if (err == ENOENT) + err = 0; + if (err != 0) + goto out; + + if (cb->cb_verbose) { + err = destroy_print_snapshots(zhp, cb); + if (err != 0) + goto out; + } + + if (cb->cb_recurse) + err = zfs_iter_filesystems(zhp, gather_snapshots, cb); + +out: + zfs_close(zhp); + return (err); +} + +static int +destroy_clones(destroy_cbdata_t *cb) +{ + nvpair_t *pair; + for (pair = nvlist_next_nvpair(cb->cb_nvl, NULL); + pair != NULL; + pair = nvlist_next_nvpair(cb->cb_nvl, pair)) { + zfs_handle_t *zhp = zfs_open(g_zfs, nvpair_name(pair), + ZFS_TYPE_SNAPSHOT); + if (zhp != NULL) { + boolean_t defer = cb->cb_defer_destroy; + int err = 0; + + /* + * We can't defer destroy non-snapshots, so set it to + * false while destroying the clones. + */ + cb->cb_defer_destroy = B_FALSE; + err = zfs_iter_dependents(zhp, B_FALSE, + destroy_callback, cb); + cb->cb_defer_destroy = defer; + zfs_close(zhp); + if (err != 0) + return (err); + } + } + return (0); } static int zfs_do_destroy(int argc, char **argv) { destroy_cbdata_t cb = { 0 }; + int rv = 0; + int err = 0; int c; - zfs_handle_t *zhp; - char *cp; + zfs_handle_t *zhp = NULL; + char *at, *pound; zfs_type_t type = ZFS_TYPE_DATASET; /* check options */ - while ((c = getopt(argc, argv, "dfrR")) != -1) { + while ((c = getopt(argc, argv, "vpndfrR")) != -1) { switch (c) { + case 'v': + cb.cb_verbose = B_TRUE; + break; + case 'p': + cb.cb_verbose = B_TRUE; + cb.cb_parsable = B_TRUE; + break; + case 'n': + cb.cb_dryrun = B_TRUE; + break; case 'd': cb.cb_defer_destroy = B_TRUE; type = ZFS_TYPE_SNAPSHOT; break; case 'f': - cb.cb_force = 1; + cb.cb_force = B_TRUE; break; case 'r': - cb.cb_recurse = 1; + cb.cb_recurse = B_TRUE; break; case 'R': - cb.cb_recurse = 1; - cb.cb_doclones = 1; + cb.cb_recurse = B_TRUE; + cb.cb_doclones = B_TRUE; break; case '?': default: @@ -972,7 +1298,7 @@ zfs_do_destroy(int argc, char **argv) /* check number of arguments */ if (argc == 0) { - (void) fprintf(stderr, gettext("missing path argument\n")); + (void) fprintf(stderr, gettext("missing dataset argument\n")); usage(B_FALSE); } if (argc > 1) { @@ -980,112 +1306,195 @@ zfs_do_destroy(int argc, char **argv) usage(B_FALSE); } - /* - * If we are doing recursive destroy of a snapshot, then the - * named snapshot may not exist. Go straight to libzfs. - */ - if (cb.cb_recurse && (cp = strchr(argv[0], '@'))) { - int ret; + at = strchr(argv[0], '@'); + pound = strchr(argv[0], '#'); + if (at != NULL) { + + /* Build the list of snaps to destroy in cb_nvl. */ + cb.cb_nvl = fnvlist_alloc(); - *cp = '\0'; - if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET)) == NULL) + *at = '\0'; + zhp = zfs_open(g_zfs, argv[0], + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) return (1); - *cp = '@'; - cp++; - if (cb.cb_doclones) { - boolean_t defer = cb.cb_defer_destroy; + cb.cb_snapspec = at + 1; + if (gather_snapshots(zfs_handle_dup(zhp), &cb) != 0 || + cb.cb_error) { + rv = 1; + goto out; + } + + if (nvlist_empty(cb.cb_nvl)) { + (void) fprintf(stderr, gettext("could not find any " + "snapshots to destroy; check snapshot names.\n")); + rv = 1; + goto out; + } - /* - * Temporarily ignore the defer_destroy setting since - * it's not supported for clones. - */ - cb.cb_defer_destroy = B_FALSE; - cb.cb_snapname = cp; - if (destroy_snap_clones(zhp, &cb) != 0) { - zfs_close(zhp); - return (1); + if (cb.cb_verbose) { + char buf[16]; + zfs_nicenum(cb.cb_snapused, buf, sizeof (buf)); + if (cb.cb_parsable) { + (void) printf("reclaim\t%llu\n", + cb.cb_snapused); + } else if (cb.cb_dryrun) { + (void) printf(gettext("would reclaim %s\n"), + buf); + } else { + (void) printf(gettext("will reclaim %s\n"), + buf); } - cb.cb_defer_destroy = defer; } - ret = zfs_destroy_snaps(zhp, cp, cb.cb_defer_destroy); - zfs_close(zhp); - if (ret) { + if (!cb.cb_dryrun) { + if (cb.cb_doclones) { + cb.cb_batchedsnaps = fnvlist_alloc(); + err = destroy_clones(&cb); + if (err == 0) { + err = zfs_destroy_snaps_nvl(g_zfs, + cb.cb_batchedsnaps, B_FALSE); + } + if (err != 0) { + rv = 1; + goto out; + } + } + if (err == 0) { + err = zfs_destroy_snaps_nvl(g_zfs, cb.cb_nvl, + cb.cb_defer_destroy); + } + } + + if (err != 0) + rv = 1; + } else if (pound != NULL) { + int err; + nvlist_t *nvl; + + if (cb.cb_dryrun) { (void) fprintf(stderr, - gettext("no snapshots destroyed\n")); + "dryrun is not supported with bookmark\n"); + return (-1); } - return (ret != 0); - } - /* Open the given dataset */ - if ((zhp = zfs_open(g_zfs, argv[0], type)) == NULL) - return (1); + if (cb.cb_defer_destroy) { + (void) fprintf(stderr, + "defer destroy is not supported with bookmark\n"); + return (-1); + } - cb.cb_target = zhp; + if (cb.cb_recurse) { + (void) fprintf(stderr, + "recursive is not supported with bookmark\n"); + return (-1); + } - /* - * Perform an explicit check for pools before going any further. - */ - if (!cb.cb_recurse && strchr(zfs_get_name(zhp), '/') == NULL && - zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) { - (void) fprintf(stderr, gettext("cannot destroy '%s': " - "operation does not apply to pools\n"), - zfs_get_name(zhp)); - (void) fprintf(stderr, gettext("use 'zfs destroy -r " - "%s' to destroy all datasets in the pool\n"), - zfs_get_name(zhp)); - (void) fprintf(stderr, gettext("use 'zpool destroy %s' " - "to destroy the pool itself\n"), zfs_get_name(zhp)); - zfs_close(zhp); - return (1); - } + if (!zfs_bookmark_exists(argv[0])) { + (void) fprintf(stderr, gettext("bookmark '%s' " + "does not exist.\n"), argv[0]); + return (1); + } - /* - * Check for any dependents and/or clones. - */ - cb.cb_first = B_TRUE; - if (!cb.cb_doclones && !cb.cb_defer_destroy && - zfs_iter_dependents(zhp, B_TRUE, destroy_check_dependent, - &cb) != 0) { - zfs_close(zhp); - return (1); - } + nvl = fnvlist_alloc(); + fnvlist_add_boolean(nvl, argv[0]); - if (cb.cb_error || (!cb.cb_defer_destroy && - (zfs_iter_dependents(zhp, B_FALSE, destroy_callback, &cb) != 0))) { - zfs_close(zhp); - return (1); - } + err = lzc_destroy_bookmarks(nvl, NULL); + if (err != 0) { + (void) zfs_standard_error(g_zfs, err, + "cannot destroy bookmark"); + } - /* - * Do the real thing. The callback will close the handle regardless of - * whether it succeeds or not. - */ + nvlist_free(cb.cb_nvl); - if (destroy_callback(zhp, &cb) != 0) - return (1); + return (err); + } else { + /* Open the given dataset */ + if ((zhp = zfs_open(g_zfs, argv[0], type)) == NULL) + return (1); - return (0); -} + cb.cb_target = zhp; -static boolean_t -is_recvd_column(zprop_get_cbdata_t *cbp) -{ - int i; - zfs_get_column_t col; + /* + * Perform an explicit check for pools before going any further. + */ + if (!cb.cb_recurse && strchr(zfs_get_name(zhp), '/') == NULL && + zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) { + (void) fprintf(stderr, gettext("cannot destroy '%s': " + "operation does not apply to pools\n"), + zfs_get_name(zhp)); + (void) fprintf(stderr, gettext("use 'zfs destroy -r " + "%s' to destroy all datasets in the pool\n"), + zfs_get_name(zhp)); + (void) fprintf(stderr, gettext("use 'zpool destroy %s' " + "to destroy the pool itself\n"), zfs_get_name(zhp)); + rv = 1; + goto out; + } - for (i = 0; i < ZFS_GET_NCOLS && - (col = cbp->cb_columns[i]) != GET_COL_NONE; i++) - if (col == GET_COL_RECVD) - return (B_TRUE); - return (B_FALSE); -} + /* + * Check for any dependents and/or clones. + */ + cb.cb_first = B_TRUE; + if (!cb.cb_doclones && + zfs_iter_dependents(zhp, B_TRUE, destroy_check_dependent, + &cb) != 0) { + rv = 1; + goto out; + } -/* - * zfs get [-rHp] [-o all | field[,field]...] [-s source[,source]...] - * < all | property[,property]... > < fs | snap | vol > ... - * + if (cb.cb_error) { + rv = 1; + goto out; + } + + cb.cb_batchedsnaps = fnvlist_alloc(); + if (zfs_iter_dependents(zhp, B_FALSE, destroy_callback, + &cb) != 0) { + rv = 1; + goto out; + } + + /* + * Do the real thing. The callback will close the + * handle regardless of whether it succeeds or not. + */ + err = destroy_callback(zhp, &cb); + zhp = NULL; + if (err == 0) { + err = zfs_destroy_snaps_nvl(g_zfs, + cb.cb_batchedsnaps, cb.cb_defer_destroy); + } + if (err != 0) + rv = 1; + } + +out: + fnvlist_free(cb.cb_batchedsnaps); + fnvlist_free(cb.cb_nvl); + if (zhp != NULL) + zfs_close(zhp); + return (rv); +} + +static boolean_t +is_recvd_column(zprop_get_cbdata_t *cbp) +{ + int i; + zfs_get_column_t col; + + for (i = 0; i < ZFS_GET_NCOLS && + (col = cbp->cb_columns[i]) != GET_COL_NONE; i++) + if (col == GET_COL_RECVD) + return (B_TRUE); + return (B_FALSE); +} + +/* + * zfs get [-rHp] [-o all | field[,field]...] [-s source[,source]...] + * < all | property[,property]... > < fs | snap | vol > ... + * * -r recurse over any child datasets * -H scripted mode. Headers are stripped, and fields are separated * by tabs instead of spaces. @@ -1110,7 +1519,7 @@ get_callback(zfs_handle_t *zhp, void *da char buf[ZFS_MAXPROPLEN]; char rbuf[ZFS_MAXPROPLEN]; zprop_source_t sourcetype; - char source[ZFS_MAXNAMELEN]; + char source[ZFS_MAX_DATASET_NAME_LEN]; zprop_get_cbdata_t *cbp = data; nvlist_t *user_props = zfs_get_user_props(zhp); zprop_list_t *pl = cbp->cb_proplist; @@ -1166,6 +1575,17 @@ get_callback(zfs_handle_t *zhp, void *da zprop_print_one_property(zfs_get_name(zhp), cbp, pl->pl_user_prop, buf, sourcetype, source, NULL); + } else if (zfs_prop_written(pl->pl_user_prop)) { + sourcetype = ZPROP_SRC_LOCAL; + + if (zfs_prop_get_written(zhp, pl->pl_user_prop, + buf, sizeof (buf), cbp->cb_literal) != 0) { + sourcetype = ZPROP_SRC_NONE; + (void) strlcpy(buf, "-", sizeof (buf)); + } + + zprop_print_one_property(zfs_get_name(zhp), cbp, + pl->pl_user_prop, buf, sourcetype, source, NULL); } else { if (nvlist_lookup_nvlist(user_props, pl->pl_user_prop, &propval) != 0) { @@ -1210,9 +1630,10 @@ static int zfs_do_get(int argc, char **argv) { zprop_get_cbdata_t cb = { 0 }; - int i, c, flags = 0; + int i, c, flags = ZFS_ITER_ARGS_CAN_BE_PATHS; + int types = ZFS_TYPE_DATASET; char *value, *fields; - int ret; + int ret = 0; int limit = 0; zprop_list_t fake_name = { 0 }; @@ -1227,7 +1648,7 @@ zfs_do_get(int argc, char **argv) cb.cb_type = ZFS_TYPE_DATASET; /* check options */ - while ((c = getopt(argc, argv, ":d:o:s:rHp")) != -1) { + while ((c = getopt(argc, argv, ":d:o:s:rt:Hp")) != -1) { switch (c) { case 'p': cb.cb_literal = B_TRUE; @@ -1302,7 +1723,7 @@ zfs_do_get(int argc, char **argv) default: (void) fprintf(stderr, gettext("invalid column name " - "'%s'\n"), value); + "'%s'\n"), suboptarg); usage(B_FALSE); } } @@ -1339,7 +1760,43 @@ zfs_do_get(int argc, char **argv) default: (void) fprintf(stderr, gettext("invalid source " - "'%s'\n"), value); + "'%s'\n"), suboptarg); + usage(B_FALSE); + } + } + break; + + case 't': + types = 0; + flags &= ~ZFS_ITER_PROP_LISTSNAPS; + while (*optarg != '\0') { + static char *type_subopts[] = { "filesystem", + "volume", "snapshot", "bookmark", + "all", NULL }; + + switch (getsubopt(&optarg, type_subopts, + &value)) { + case 0: + types |= ZFS_TYPE_FILESYSTEM; + break; + case 1: + types |= ZFS_TYPE_VOLUME; + break; + case 2: + types |= ZFS_TYPE_SNAPSHOT; + break; + case 3: + types |= ZFS_TYPE_BOOKMARK; + break; + case 4: + types = ZFS_TYPE_DATASET | + ZFS_TYPE_BOOKMARK; + break; + + default: + (void) fprintf(stderr, + gettext("invalid type '%s'\n"), + suboptarg); usage(B_FALSE); } } @@ -1388,7 +1845,7 @@ zfs_do_get(int argc, char **argv) cb.cb_first = B_TRUE; /* run for each object */ - ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET, 0, + ret = zfs_for_each(argc, argv, flags, types, NULL, &cb.cb_proplist, limit, get_callback, &cb); if (cb.cb_proplist == &fake_name) @@ -1449,7 +1906,7 @@ zfs_do_inherit(int argc, char **argv) zfs_prop_t prop; inherit_cbdata_t cb = { 0 }; char *propname; - int ret; + int ret = 0; int flags = 0; boolean_t received = B_FALSE; @@ -1500,9 +1957,13 @@ zfs_do_inherit(int argc, char **argv) if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION || prop == ZFS_PROP_REFQUOTA || - prop == ZFS_PROP_REFRESERVATION) + prop == ZFS_PROP_REFRESERVATION) { (void) fprintf(stderr, gettext("use 'zfs set " "%s=none' to clear\n"), propname); + (void) fprintf(stderr, gettext("use 'zfs " + "inherit -S %s' to revert to received " + "value\n"), propname); + } return (1); } if (received && (prop == ZFS_PROP_VOLSIZE || @@ -1538,7 +1999,7 @@ typedef struct upgrade_cbdata { uint64_t cb_version; boolean_t cb_newer; boolean_t cb_foundone; - char cb_lastfs[ZFS_MAXNAMELEN]; + char cb_lastfs[ZFS_MAX_DATASET_NAME_LEN]; } upgrade_cbdata_t; static int @@ -1594,31 +2055,25 @@ upgrade_set_callback(zfs_handle_t *zhp, { upgrade_cbdata_t *cb = data; int version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION); - int i; - static struct { int zplver; int spaver; } table[] = { - {ZPL_VERSION_FUID, SPA_VERSION_FUID}, - {ZPL_VERSION_USERSPACE, SPA_VERSION_USERSPACE}, - {0, 0} - }; + int needed_spa_version; + int spa_version; + if (zfs_spa_version(zhp, &spa_version) < 0) + return (-1); - for (i = 0; table[i].zplver; i++) { - if (cb->cb_version >= table[i].zplver) { - int spa_version; + needed_spa_version = zfs_spa_version_map(cb->cb_version); - if (zfs_spa_version(zhp, &spa_version) < 0) - return (-1); + if (needed_spa_version < 0) + return (-1); - if (spa_version < table[i].spaver) { - /* can't upgrade */ - (void) printf(gettext("%s: can not be " - "upgraded; the pool version needs to first " - "be upgraded\nto version %d\n\n"), - zfs_get_name(zhp), table[i].spaver); - cb->cb_numfailed++; - return (0); - } - } + if (spa_version < needed_spa_version) { + /* can't upgrade */ + (void) printf(gettext("%s: can not be " + "upgraded; the pool version needs to first " + "be upgraded\nto version %d\n\n"), + zfs_get_name(zhp), needed_spa_version); + cb->cb_numfailed++; + return (0); } /* upgrade */ @@ -1630,9 +2085,11 @@ upgrade_set_callback(zfs_handle_t *zhp, /* * If they did "zfs upgrade -a", then we could * be doing ioctls to different pools. We need - * to log this history once to each pool. + * to log this history once to each pool, and bypass + * the normal history logging that happens in main(). */ - verify(zpool_stage_history(g_zfs, history_str) == 0); + (void) zpool_log_history(g_zfs, history_str); + log_history = B_FALSE; } if (zfs_prop_set(zhp, "version", verstr) == 0) cb->cb_numupgraded++; @@ -1661,9 +2118,9 @@ zfs_do_upgrade(int argc, char **argv) { boolean_t all = B_FALSE; boolean_t showversions = B_FALSE; - int ret; + int ret = 0; upgrade_cbdata_t cb = { 0 }; - char c; + int c; int flags = ZFS_ITER_ARGS_CAN_BE_PATHS; /* check options */ @@ -1716,15 +2173,14 @@ zfs_do_upgrade(int argc, char **argv) "---------------\n"); (void) printf(gettext(" 1 Initial ZFS filesystem version\n")); (void) printf(gettext(" 2 Enhanced directory entries\n")); - (void) printf(gettext(" 3 Case insensitive and File system " - "unique identifier (FUID)\n")); + (void) printf(gettext(" 3 Case insensitive and filesystem " + "user identifier (FUID)\n")); (void) printf(gettext(" 4 userquota, groupquota " "properties\n")); + (void) printf(gettext(" 5 System attributes\n")); (void) printf(gettext("\nFor more information on a particular " - "version, including supported releases, see:\n\n")); - (void) printf("http://www.opensolaris.org/os/community/zfs/" - "version/zpl/N\n\n"); - (void) printf(gettext("Where 'N' is the version number.\n")); + "version, including supported releases,\n")); + (void) printf("see the ZFS Administration Guide.\n\n"); ret = 0; } else if (argc || all) { /* Upgrade filesystems */ @@ -1768,183 +2224,820 @@ zfs_do_upgrade(int argc, char **argv) } /* - * zfs userspace + * zfs userspace [-Hinp] [-o field[,...]] [-s field [-s field]...] + * [-S field [-S field]...] [-t type[,...]] filesystem | snapshot + * zfs groupspace [-Hinp] [-o field[,...]] [-s field [-s field]...] + * [-S field [-S field]...] [-t type[,...]] filesystem | snapshot + * + * -H Scripted mode; elide headers and separate columns by tabs. + * -i Translate SID to POSIX ID. + * -n Print numeric ID instead of user/group name. + * -o Control which fields to display. + * -p Use exact (parsable) numeric output. + * -s Specify sort columns, descending order. + * -S Specify sort columns, ascending order. + * -t Control which object types to display. + * + * Displays space consumed by, and quotas on, each user in the specified + * filesystem or snapshot. */ -static int -userspace_cb(void *arg, const char *domain, uid_t rid, uint64_t space) -{ - zfs_userquota_prop_t *typep = arg; - zfs_userquota_prop_t p = *typep; - char *name = NULL; - char *ug, *propname; - char namebuf[32]; - char sizebuf[32]; - if (domain == NULL || domain[0] == '\0') { - if (p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA) { - struct group *g = getgrgid(rid); - if (g) - name = g->gr_name; - } else { - struct passwd *p = getpwuid(rid); - if (p) - name = p->pw_name; - } - } +/* us_field_types, us_field_hdr and us_field_names should be kept in sync */ +enum us_field_types { + USFIELD_TYPE, + USFIELD_NAME, + USFIELD_USED, + USFIELD_QUOTA +}; +static char *us_field_hdr[] = { "TYPE", "NAME", "USED", "QUOTA" }; +static char *us_field_names[] = { "type", "name", "used", "quota" }; +#define USFIELD_LAST (sizeof (us_field_names) / sizeof (char *)) + +#define USTYPE_PSX_GRP (1 << 0) +#define USTYPE_PSX_USR (1 << 1) +#define USTYPE_SMB_GRP (1 << 2) +#define USTYPE_SMB_USR (1 << 3) +#define USTYPE_ALL \ + (USTYPE_PSX_GRP | USTYPE_PSX_USR | USTYPE_SMB_GRP | USTYPE_SMB_USR) + +static int us_type_bits[] = { + USTYPE_PSX_GRP, + USTYPE_PSX_USR, + USTYPE_SMB_GRP, + USTYPE_SMB_USR, + USTYPE_ALL +}; +static char *us_type_names[] = { "posixgroup", "posixuser", "smbgroup", + "smbuser", "all" }; - if (p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA) - ug = "group"; - else - ug = "user"; +typedef struct us_node { + nvlist_t *usn_nvl; + uu_avl_node_t usn_avlnode; + uu_list_node_t usn_listnode; +} us_node_t; + +typedef struct us_cbdata { + nvlist_t **cb_nvlp; + uu_avl_pool_t *cb_avl_pool; + uu_avl_t *cb_avl; + boolean_t cb_numname; + boolean_t cb_nicenum; + boolean_t cb_sid2posix; + zfs_userquota_prop_t cb_prop; + zfs_sort_column_t *cb_sortcol; + size_t cb_width[USFIELD_LAST]; +} us_cbdata_t; + +static boolean_t us_populated = B_FALSE; + +typedef struct { + zfs_sort_column_t *si_sortcol; + boolean_t si_numname; +} us_sort_info_t; - if (p == ZFS_PROP_USERUSED || p == ZFS_PROP_GROUPUSED) - propname = "used"; - else - propname = "quota"; +static int +us_field_index(char *field) +{ + int i; - if (name == NULL) { - (void) snprintf(namebuf, sizeof (namebuf), - "%llu", (longlong_t)rid); - name = namebuf; + for (i = 0; i < USFIELD_LAST; i++) { + if (strcmp(field, us_field_names[i]) == 0) + return (i); } - zfs_nicenum(space, sizebuf, sizeof (sizebuf)); - (void) printf("%s %s %s%c%s %s\n", propname, ug, domain, - domain[0] ? '-' : ' ', name, sizebuf); - - return (0); + return (-1); } static int -zfs_do_userspace(int argc, char **argv) +us_compare(const void *larg, const void *rarg, void *unused) { - zfs_handle_t *zhp; - zfs_userquota_prop_t p; - int error; + const us_node_t *l = larg; + const us_node_t *r = rarg; + us_sort_info_t *si = (us_sort_info_t *)unused; + zfs_sort_column_t *sortcol = si->si_sortcol; + boolean_t numname = si->si_numname; + nvlist_t *lnvl = l->usn_nvl; + nvlist_t *rnvl = r->usn_nvl; + int rc = 0; + boolean_t lvb, rvb; + + for (; sortcol != NULL; sortcol = sortcol->sc_next) { + char *lvstr = ""; + char *rvstr = ""; + uint32_t lv32 = 0; + uint32_t rv32 = 0; + uint64_t lv64 = 0; + uint64_t rv64 = 0; + zfs_prop_t prop = sortcol->sc_prop; + const char *propname = NULL; + boolean_t reverse = sortcol->sc_reverse; + + switch (prop) { + case ZFS_PROP_TYPE: + propname = "type"; + (void) nvlist_lookup_uint32(lnvl, propname, &lv32); + (void) nvlist_lookup_uint32(rnvl, propname, &rv32); + if (rv32 != lv32) + rc = (rv32 < lv32) ? 1 : -1; + break; + case ZFS_PROP_NAME: + propname = "name"; + if (numname) { + (void) nvlist_lookup_uint64(lnvl, propname, + &lv64); + (void) nvlist_lookup_uint64(rnvl, propname, + &rv64); + if (rv64 != lv64) + rc = (rv64 < lv64) ? 1 : -1; + } else { + (void) nvlist_lookup_string(lnvl, propname, + &lvstr); + (void) nvlist_lookup_string(rnvl, propname, + &rvstr); + rc = strcmp(lvstr, rvstr); + } + break; + case ZFS_PROP_USED: + case ZFS_PROP_QUOTA: + if (!us_populated) + break; + if (prop == ZFS_PROP_USED) + propname = "used"; + else + propname = "quota"; + (void) nvlist_lookup_uint64(lnvl, propname, &lv64); + (void) nvlist_lookup_uint64(rnvl, propname, &rv64); + if (rv64 != lv64) + rc = (rv64 < lv64) ? 1 : -1; + break; - /* - * Try the python version. If the execv fails, we'll continue - * and do a simplistic implementation. - */ - (void) execv(pypath, argv-1); + default: + break; + } - (void) printf("internal error: %s not found\n" - "falling back on built-in implementation, " - "some features will not work\n", pypath); + if (rc != 0) { + if (rc < 0) + return (reverse ? 1 : -1); + else + return (reverse ? -1 : 1); + } + } - if ((zhp = zfs_open(g_zfs, argv[argc-1], ZFS_TYPE_DATASET)) == NULL) - return (1); + /* + * If entries still seem to be the same, check if they are of the same + * type (smbentity is added only if we are doing SID to POSIX ID + * translation where we can have duplicate type/name combinations). + */ + if (nvlist_lookup_boolean_value(lnvl, "smbentity", &lvb) == 0 && + nvlist_lookup_boolean_value(rnvl, "smbentity", &rvb) == 0 && + lvb != rvb) + return (lvb < rvb ? -1 : 1); - (void) printf("PROP TYPE NAME VALUE\n"); + return (0); +} - for (p = 0; p < ZFS_NUM_USERQUOTA_PROPS; p++) { - error = zfs_userspace(zhp, p, userspace_cb, &p); - if (error) - break; +static inline const char * +us_type2str(unsigned field_type) +{ + switch (field_type) { + case USTYPE_PSX_USR: + return ("POSIX User"); + case USTYPE_PSX_GRP: + return ("POSIX Group"); + case USTYPE_SMB_USR: + return ("SMB User"); + case USTYPE_SMB_GRP: + return ("SMB Group"); + default: + return ("Undefined"); } - return (error); } -/* - * list [-r][-d max] [-H] [-o property[,property]...] [-t type[,type]...] - * [-s property [-s property]...] [-S property [-S property]...] - * ... - * - * -r Recurse over all children - * -d Limit recursion by depth. - * -H Scripted mode; elide headers and separate columns by tabs - * -o Control which fields to display. - * -t Control which object types to display. - * -s Specify sort columns, descending order. - * -S Specify sort columns, ascending order. - * - * When given no arguments, lists all filesystems in the system. - * Otherwise, list the specified datasets, optionally recursing down them if - * '-r' is specified. - */ -typedef struct list_cbdata { - boolean_t cb_first; - boolean_t cb_scripted; - zprop_list_t *cb_proplist; -} list_cbdata_t; - -/* - * Given a list of columns to display, output appropriate headers for each one. - */ -static void -print_header(zprop_list_t *pl) +static int +userspace_cb(void *arg, const char *domain, uid_t rid, uint64_t space) { - char headerbuf[ZFS_MAXPROPLEN]; - const char *header; - int i; - boolean_t first = B_TRUE; - boolean_t right_justify; + us_cbdata_t *cb = (us_cbdata_t *)arg; + zfs_userquota_prop_t prop = cb->cb_prop; + char *name = NULL; + char *propname; + char sizebuf[32]; + us_node_t *node; + uu_avl_pool_t *avl_pool = cb->cb_avl_pool; + uu_avl_t *avl = cb->cb_avl; + uu_avl_index_t idx; + nvlist_t *props; + us_node_t *n; + zfs_sort_column_t *sortcol = cb->cb_sortcol; + unsigned type = 0; + const char *typestr; + size_t namelen; + size_t typelen; + size_t sizelen; + int typeidx, nameidx, sizeidx; + us_sort_info_t sortinfo = { sortcol, cb->cb_numname }; + boolean_t smbentity = B_FALSE; + + if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) + nomem(); + node = safe_malloc(sizeof (us_node_t)); + uu_avl_node_init(node, &node->usn_avlnode, avl_pool); + node->usn_nvl = props; + + if (domain != NULL && domain[0] != '\0') { + /* SMB */ + char sid[MAXNAMELEN + 32]; + uid_t id; +#ifdef illumos + int err; + int flag = IDMAP_REQ_FLG_USE_CACHE; +#endif - for (; pl != NULL; pl = pl->pl_next) { - if (!first) { - (void) printf(" "); + smbentity = B_TRUE; + + (void) snprintf(sid, sizeof (sid), "%s-%u", domain, rid); + + if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA) { + type = USTYPE_SMB_GRP; +#ifdef illumos + err = sid_to_id(sid, B_FALSE, &id); +#endif } else { - first = B_FALSE; + type = USTYPE_SMB_USR; +#ifdef illumos + err = sid_to_id(sid, B_TRUE, &id); +#endif } - right_justify = B_FALSE; - if (pl->pl_prop != ZPROP_INVAL) { - header = zfs_prop_column_name(pl->pl_prop); - right_justify = zfs_prop_align_right(pl->pl_prop); +#ifdef illumos + if (err == 0) { + rid = id; + if (!cb->cb_sid2posix) { + if (type == USTYPE_SMB_USR) { + (void) idmap_getwinnamebyuid(rid, flag, + &name, NULL); + } else { + (void) idmap_getwinnamebygid(rid, flag, + &name, NULL); + } + if (name == NULL) + name = sid; + } + } +#endif + } + + if (cb->cb_sid2posix || domain == NULL || domain[0] == '\0') { + /* POSIX or -i */ + if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA) { + type = USTYPE_PSX_GRP; + if (!cb->cb_numname) { + struct group *g; + + if ((g = getgrgid(rid)) != NULL) + name = g->gr_name; + } } else { - for (i = 0; pl->pl_user_prop[i] != '\0'; i++) - headerbuf[i] = toupper(pl->pl_user_prop[i]); - headerbuf[i] = '\0'; - header = headerbuf; + type = USTYPE_PSX_USR; + if (!cb->cb_numname) { + struct passwd *p; + + if ((p = getpwuid(rid)) != NULL) + name = p->pw_name; + } } + } - if (pl->pl_next == NULL && !right_justify) - (void) printf("%s", header); - else if (right_justify) - (void) printf("%*s", pl->pl_width, header); - else - (void) printf("%-*s", pl->pl_width, header); + /* + * Make sure that the type/name combination is unique when doing + * SID to POSIX ID translation (hence changing the type from SMB to + * POSIX). + */ + if (cb->cb_sid2posix && + nvlist_add_boolean_value(props, "smbentity", smbentity) != 0) + nomem(); + + /* Calculate/update width of TYPE field */ + typestr = us_type2str(type); + typelen = strlen(gettext(typestr)); + typeidx = us_field_index("type"); + if (typelen > cb->cb_width[typeidx]) + cb->cb_width[typeidx] = typelen; + if (nvlist_add_uint32(props, "type", type) != 0) + nomem(); + + /* Calculate/update width of NAME field */ + if ((cb->cb_numname && cb->cb_sid2posix) || name == NULL) { + if (nvlist_add_uint64(props, "name", rid) != 0) + nomem(); + namelen = snprintf(NULL, 0, "%u", rid); + } else { + if (nvlist_add_string(props, "name", name) != 0) + nomem(); + namelen = strlen(name); + } + nameidx = us_field_index("name"); + if (namelen > cb->cb_width[nameidx]) + cb->cb_width[nameidx] = namelen; + + /* + * Check if this type/name combination is in the list and update it; + * otherwise add new node to the list. + */ + if ((n = uu_avl_find(avl, node, &sortinfo, &idx)) == NULL) { + uu_avl_insert(avl, node, idx); + } else { + nvlist_free(props); + free(node); + node = n; + props = node->usn_nvl; } - (void) printf("\n"); + /* Calculate/update width of USED/QUOTA fields */ + if (cb->cb_nicenum) + zfs_nicenum(space, sizebuf, sizeof (sizebuf)); + else + (void) snprintf(sizebuf, sizeof (sizebuf), "%llu", space); + sizelen = strlen(sizebuf); + if (prop == ZFS_PROP_USERUSED || prop == ZFS_PROP_GROUPUSED) { + propname = "used"; + if (!nvlist_exists(props, "quota")) + (void) nvlist_add_uint64(props, "quota", 0); + } else { + propname = "quota"; + if (!nvlist_exists(props, "used")) + (void) nvlist_add_uint64(props, "used", 0); + } + sizeidx = us_field_index(propname); + if (sizelen > cb->cb_width[sizeidx]) + cb->cb_width[sizeidx] = sizelen; + + if (nvlist_add_uint64(props, propname, space) != 0) + nomem(); + + return (0); } -/* - * Given a dataset and a list of fields, print out all the properties according - * to the described layout. - */ static void -print_dataset(zfs_handle_t *zhp, zprop_list_t *pl, boolean_t scripted) +print_us_node(boolean_t scripted, boolean_t parsable, int *fields, int types, + size_t *width, us_node_t *node) { + nvlist_t *nvl = node->usn_nvl; + char valstr[MAXNAMELEN]; boolean_t first = B_TRUE; - char property[ZFS_MAXPROPLEN]; - nvlist_t *userprops = zfs_get_user_props(zhp); - nvlist_t *propval; - char *propstr; - boolean_t right_justify; - int width; + int cfield = 0; + int field; + uint32_t ustype; + + /* Check type */ + (void) nvlist_lookup_uint32(nvl, "type", &ustype); + if (!(ustype & types)) + return; + + while ((field = fields[cfield]) != USFIELD_LAST) { + nvpair_t *nvp = NULL; + data_type_t type; + uint32_t val32; + uint64_t val64; + char *strval = NULL; + + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + if (strcmp(nvpair_name(nvp), + us_field_names[field]) == 0) + break; + } + + type = nvpair_type(nvp); + switch (type) { + case DATA_TYPE_UINT32: + (void) nvpair_value_uint32(nvp, &val32); + break; + case DATA_TYPE_UINT64: + (void) nvpair_value_uint64(nvp, &val64); + break; + case DATA_TYPE_STRING: + (void) nvpair_value_string(nvp, &strval); + break; + default: + (void) fprintf(stderr, "invalid data type\n"); + } + + switch (field) { + case USFIELD_TYPE: + strval = (char *)us_type2str(val32); + break; + case USFIELD_NAME: + if (type == DATA_TYPE_UINT64) { + (void) sprintf(valstr, "%llu", val64); + strval = valstr; + } + break; + case USFIELD_USED: + case USFIELD_QUOTA: + if (type == DATA_TYPE_UINT64) { + if (parsable) { + (void) sprintf(valstr, "%llu", val64); + } else { + zfs_nicenum(val64, valstr, + sizeof (valstr)); + } + if (field == USFIELD_QUOTA && + strcmp(valstr, "0") == 0) + strval = "none"; + else + strval = valstr; + } + break; + } - for (; pl != NULL; pl = pl->pl_next) { if (!first) { if (scripted) (void) printf("\t"); else (void) printf(" "); - } else { - first = B_FALSE; } + if (scripted) + (void) printf("%s", strval); + else if (field == USFIELD_TYPE || field == USFIELD_NAME) + (void) printf("%-*s", width[field], strval); + else + (void) printf("%*s", width[field], strval); - if (pl->pl_prop != ZPROP_INVAL) { - if (zfs_prop_get(zhp, pl->pl_prop, property, - sizeof (property), NULL, NULL, 0, B_FALSE) != 0) + first = B_FALSE; + cfield++; + } + + (void) printf("\n"); +} + +static void +print_us(boolean_t scripted, boolean_t parsable, int *fields, int types, + size_t *width, boolean_t rmnode, uu_avl_t *avl) +{ + us_node_t *node; + const char *col; + int cfield = 0; + int field; + + if (!scripted) { + boolean_t first = B_TRUE; + + while ((field = fields[cfield]) != USFIELD_LAST) { + col = gettext(us_field_hdr[field]); + if (field == USFIELD_TYPE || field == USFIELD_NAME) { + (void) printf(first ? "%-*s" : " %-*s", + width[field], col); + } else { + (void) printf(first ? "%*s" : " %*s", + width[field], col); + } + first = B_FALSE; + cfield++; + } + (void) printf("\n"); + } + + for (node = uu_avl_first(avl); node; node = uu_avl_next(avl, node)) { + print_us_node(scripted, parsable, fields, types, width, node); + if (rmnode) + nvlist_free(node->usn_nvl); + } +} + +static int +zfs_do_userspace(int argc, char **argv) +{ + zfs_handle_t *zhp; + zfs_userquota_prop_t p; + + uu_avl_pool_t *avl_pool; + uu_avl_t *avl_tree; + uu_avl_walk_t *walk; + char *delim; + char deffields[] = "type,name,used,quota"; + char *ofield = NULL; + char *tfield = NULL; + int cfield = 0; + int fields[256]; + int i; + boolean_t scripted = B_FALSE; + boolean_t prtnum = B_FALSE; + boolean_t parsable = B_FALSE; + boolean_t sid2posix = B_FALSE; + int ret = 0; + int c; + zfs_sort_column_t *sortcol = NULL; + int types = USTYPE_PSX_USR | USTYPE_SMB_USR; + us_cbdata_t cb; + us_node_t *node; + us_node_t *rmnode; + uu_list_pool_t *listpool; + uu_list_t *list; + uu_avl_index_t idx = 0; + uu_list_index_t idx2 = 0; + + if (argc < 2) + usage(B_FALSE); + + if (strcmp(argv[0], "groupspace") == 0) + /* Toggle default group types */ + types = USTYPE_PSX_GRP | USTYPE_SMB_GRP; + + while ((c = getopt(argc, argv, "nHpo:s:S:t:i")) != -1) { + switch (c) { + case 'n': + prtnum = B_TRUE; + break; + case 'H': + scripted = B_TRUE; + break; + case 'p': + parsable = B_TRUE; + break; + case 'o': + ofield = optarg; + break; + case 's': + case 'S': + if (zfs_add_sort_column(&sortcol, optarg, + c == 's' ? B_FALSE : B_TRUE) != 0) { + (void) fprintf(stderr, + gettext("invalid field '%s'\n"), optarg); + usage(B_FALSE); + } + break; + case 't': + tfield = optarg; + break; + case 'i': + sid2posix = B_TRUE; + break; + case ':': + (void) fprintf(stderr, gettext("missing argument for " + "'%c' option\n"), optopt); + usage(B_FALSE); + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + if (argc < 1) { + (void) fprintf(stderr, gettext("missing dataset name\n")); + usage(B_FALSE); + } + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } + + /* Use default output fields if not specified using -o */ + if (ofield == NULL) + ofield = deffields; + do { + if ((delim = strchr(ofield, ',')) != NULL) + *delim = '\0'; + if ((fields[cfield++] = us_field_index(ofield)) == -1) { + (void) fprintf(stderr, gettext("invalid type '%s' " + "for -o option\n"), ofield); + return (-1); + } + if (delim != NULL) + ofield = delim + 1; + } while (delim != NULL); + fields[cfield] = USFIELD_LAST; + + /* Override output types (-t option) */ + if (tfield != NULL) { + types = 0; + + do { + boolean_t found = B_FALSE; + + if ((delim = strchr(tfield, ',')) != NULL) + *delim = '\0'; + for (i = 0; i < sizeof (us_type_bits) / sizeof (int); + i++) { + if (strcmp(tfield, us_type_names[i]) == 0) { + found = B_TRUE; + types |= us_type_bits[i]; + break; + } + } + if (!found) { + (void) fprintf(stderr, gettext("invalid type " + "'%s' for -t option\n"), tfield); + return (-1); + } + if (delim != NULL) + tfield = delim + 1; + } while (delim != NULL); + } + + if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET)) == NULL) + return (1); + + if ((avl_pool = uu_avl_pool_create("us_avl_pool", sizeof (us_node_t), + offsetof(us_node_t, usn_avlnode), us_compare, UU_DEFAULT)) == NULL) + nomem(); + if ((avl_tree = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL) + nomem(); + + /* Always add default sorting columns */ + (void) zfs_add_sort_column(&sortcol, "type", B_FALSE); + (void) zfs_add_sort_column(&sortcol, "name", B_FALSE); + + cb.cb_sortcol = sortcol; + cb.cb_numname = prtnum; + cb.cb_nicenum = !parsable; + cb.cb_avl_pool = avl_pool; + cb.cb_avl = avl_tree; + cb.cb_sid2posix = sid2posix; + + for (i = 0; i < USFIELD_LAST; i++) + cb.cb_width[i] = strlen(gettext(us_field_hdr[i])); + + for (p = 0; p < ZFS_NUM_USERQUOTA_PROPS; p++) { + if (((p == ZFS_PROP_USERUSED || p == ZFS_PROP_USERQUOTA) && + !(types & (USTYPE_PSX_USR | USTYPE_SMB_USR))) || + ((p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA) && + !(types & (USTYPE_PSX_GRP | USTYPE_SMB_GRP)))) + continue; + cb.cb_prop = p; + if ((ret = zfs_userspace(zhp, p, userspace_cb, &cb)) != 0) + return (ret); + } + + /* Sort the list */ + if ((node = uu_avl_first(avl_tree)) == NULL) + return (0); + + us_populated = B_TRUE; + + listpool = uu_list_pool_create("tmplist", sizeof (us_node_t), + offsetof(us_node_t, usn_listnode), NULL, UU_DEFAULT); + list = uu_list_create(listpool, NULL, UU_DEFAULT); + uu_list_node_init(node, &node->usn_listnode, listpool); + + while (node != NULL) { + rmnode = node; + node = uu_avl_next(avl_tree, node); + uu_avl_remove(avl_tree, rmnode); + if (uu_list_find(list, rmnode, NULL, &idx2) == NULL) + uu_list_insert(list, rmnode, idx2); + } + + for (node = uu_list_first(list); node != NULL; + node = uu_list_next(list, node)) { + us_sort_info_t sortinfo = { sortcol, cb.cb_numname }; + + if (uu_avl_find(avl_tree, node, &sortinfo, &idx) == NULL) + uu_avl_insert(avl_tree, node, idx); + } + + uu_list_destroy(list); + uu_list_pool_destroy(listpool); + + /* Print and free node nvlist memory */ + print_us(scripted, parsable, fields, types, cb.cb_width, B_TRUE, + cb.cb_avl); + + zfs_free_sort_columns(sortcol); + + /* Clean up the AVL tree */ + if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL) + nomem(); + + while ((node = uu_avl_walk_next(walk)) != NULL) { + uu_avl_remove(cb.cb_avl, node); + free(node); + } + + uu_avl_walk_end(walk); + uu_avl_destroy(avl_tree); + uu_avl_pool_destroy(avl_pool); + + return (ret); +} + +/* + * list [-Hp][-r|-d max] [-o property[,...]] [-s property] ... [-S property] ... + * [-t type[,...]] [filesystem|volume|snapshot] ... + * + * -H Scripted mode; elide headers and separate columns by tabs. + * -p Display values in parsable (literal) format. + * -r Recurse over all children. + * -d Limit recursion by depth. + * -o Control which fields to display. + * -s Specify sort columns, descending order. + * -S Specify sort columns, ascending order. + * -t Control which object types to display. + * + * When given no arguments, list all filesystems in the system. + * Otherwise, list the specified datasets, optionally recursing down them if + * '-r' is specified. + */ +typedef struct list_cbdata { + boolean_t cb_first; + boolean_t cb_literal; + boolean_t cb_scripted; + zprop_list_t *cb_proplist; +} list_cbdata_t; + +/* + * Given a list of columns to display, output appropriate headers for each one. + */ +static void +print_header(list_cbdata_t *cb) +{ + zprop_list_t *pl = cb->cb_proplist; + char headerbuf[ZFS_MAXPROPLEN]; + const char *header; + int i; + boolean_t first = B_TRUE; + boolean_t right_justify; + + for (; pl != NULL; pl = pl->pl_next) { + if (!first) { + (void) printf(" "); + } else { + first = B_FALSE; + } + + right_justify = B_FALSE; + if (pl->pl_prop != ZPROP_INVAL) { + header = zfs_prop_column_name(pl->pl_prop); + right_justify = zfs_prop_align_right(pl->pl_prop); + } else { + for (i = 0; pl->pl_user_prop[i] != '\0'; i++) + headerbuf[i] = toupper(pl->pl_user_prop[i]); + headerbuf[i] = '\0'; + header = headerbuf; + } + + if (pl->pl_next == NULL && !right_justify) + (void) printf("%s", header); + else if (right_justify) + (void) printf("%*s", pl->pl_width, header); + else + (void) printf("%-*s", pl->pl_width, header); + } + + (void) printf("\n"); +} + +/* + * Given a dataset and a list of fields, print out all the properties according + * to the described layout. + */ +static void +print_dataset(zfs_handle_t *zhp, list_cbdata_t *cb) +{ + zprop_list_t *pl = cb->cb_proplist; + boolean_t first = B_TRUE; + char property[ZFS_MAXPROPLEN]; + nvlist_t *userprops = zfs_get_user_props(zhp); + nvlist_t *propval; + char *propstr; + boolean_t right_justify; + + for (; pl != NULL; pl = pl->pl_next) { + if (!first) { + if (cb->cb_scripted) + (void) printf("\t"); + else + (void) printf(" "); + } else { + first = B_FALSE; + } + + if (pl->pl_prop == ZFS_PROP_NAME) { + (void) strlcpy(property, zfs_get_name(zhp), + sizeof (property)); + propstr = property; + right_justify = zfs_prop_align_right(pl->pl_prop); + } else if (pl->pl_prop != ZPROP_INVAL) { + if (zfs_prop_get(zhp, pl->pl_prop, property, + sizeof (property), NULL, NULL, 0, + cb->cb_literal) != 0) propstr = "-"; else propstr = property; - right_justify = zfs_prop_align_right(pl->pl_prop); } else if (zfs_prop_userquota(pl->pl_user_prop)) { if (zfs_prop_get_userquota(zhp, pl->pl_user_prop, - property, sizeof (property), B_FALSE) != 0) + property, sizeof (property), cb->cb_literal) != 0) + propstr = "-"; + else + propstr = property; + right_justify = B_TRUE; + } else if (zfs_prop_written(pl->pl_user_prop)) { + if (zfs_prop_get_written(zhp, pl->pl_user_prop, + property, sizeof (property), cb->cb_literal) != 0) propstr = "-"; else propstr = property; @@ -1959,19 +3052,17 @@ print_dataset(zfs_handle_t *zhp, zprop_l right_justify = B_FALSE; } - width = pl->pl_width; - /* * If this is being called in scripted mode, or if this is the * last column and it is left-justified, don't include a width * format specifier. */ - if (scripted || (pl->pl_next == NULL && !right_justify)) + if (cb->cb_scripted || (pl->pl_next == NULL && !right_justify)) (void) printf("%s", propstr); else if (right_justify) - (void) printf("%*s", width, propstr); + (void) printf("%*s", pl->pl_width, propstr); else - (void) printf("%-*s", width, propstr); + (void) printf("%-*s", pl->pl_width, propstr); } (void) printf("\n"); @@ -1987,11 +3078,11 @@ list_callback(zfs_handle_t *zhp, void *d if (cbp->cb_first) { if (!cbp->cb_scripted) - print_header(cbp->cb_proplist); + print_header(cbp); cbp->cb_first = B_FALSE; } - print_dataset(zhp, cbp->cb_proplist, cbp->cb_scripted); + print_dataset(zhp, cbp); return (0); } @@ -2000,7 +3091,6 @@ static int zfs_do_list(int argc, char **argv) { int c; - boolean_t scripted = B_FALSE; static char default_fields[] = "name,used,available,referenced,mountpoint"; int types = ZFS_TYPE_DATASET; @@ -2009,16 +3099,20 @@ zfs_do_list(int argc, char **argv) list_cbdata_t cb = { 0 }; char *value; int limit = 0; - int ret; + int ret = 0; zfs_sort_column_t *sortcol = NULL; int flags = ZFS_ITER_PROP_LISTSNAPS | ZFS_ITER_ARGS_CAN_BE_PATHS; /* check options */ - while ((c = getopt(argc, argv, ":d:o:rt:Hs:S:")) != -1) { + while ((c = getopt(argc, argv, "HS:d:o:prs:t:")) != -1) { switch (c) { case 'o': fields = optarg; break; + case 'p': + cb.cb_literal = B_TRUE; + flags |= ZFS_ITER_LITERAL_PROPS; + break; case 'd': limit = parse_depth(optarg, &flags); break; @@ -2026,7 +3120,7 @@ zfs_do_list(int argc, char **argv) flags |= ZFS_ITER_RECURSE; break; case 'H': - scripted = B_TRUE; + cb.cb_scripted = B_TRUE; break; case 's': if (zfs_add_sort_column(&sortcol, optarg, @@ -2050,7 +3144,8 @@ zfs_do_list(int argc, char **argv) flags &= ~ZFS_ITER_PROP_LISTSNAPS; while (*optarg != '\0') { static char *type_subopts[] = { "filesystem", - "volume", "snapshot", "all", NULL }; + "volume", "snapshot", "snap", "bookmark", + "all", NULL }; switch (getsubopt(&optarg, type_subopts, &value)) { @@ -2061,16 +3156,20 @@ zfs_do_list(int argc, char **argv) types |= ZFS_TYPE_VOLUME; break; case 2: + case 3: types |= ZFS_TYPE_SNAPSHOT; break; - case 3: - types = ZFS_TYPE_DATASET; + case 4: + types |= ZFS_TYPE_BOOKMARK; + break; + case 5: + types = ZFS_TYPE_DATASET | + ZFS_TYPE_BOOKMARK; break; - default: (void) fprintf(stderr, gettext("invalid type '%s'\n"), - value); + suboptarg); usage(B_FALSE); } } @@ -2094,6 +3193,13 @@ zfs_do_list(int argc, char **argv) fields = default_fields; /* + * If we are only going to list snapshot names and sort by name, + * then we can use faster version. + */ + if (strcmp(fields, "name") == 0 && zfs_sort_only_by_name(sortcol)) + flags |= ZFS_ITER_SIMPLE; + + /* * If "-o space" and no types were specified, don't display snapshots. */ if (strcmp(fields, "space") == 0 && types_specified == B_FALSE) @@ -2108,7 +3214,6 @@ zfs_do_list(int argc, char **argv) != 0) usage(B_FALSE); - cb.cb_scripted = scripted; cb.cb_first = B_TRUE; ret = zfs_for_each(argc, argv, flags, types, sortcol, &cb.cb_proplist, @@ -2124,9 +3229,10 @@ zfs_do_list(int argc, char **argv) } /* - * zfs rename - * zfs rename -p + * zfs rename [-f] + * zfs rename [-f] -p * zfs rename -r + * zfs rename -u [-p] * * Renames the given dataset to another of the same type. * @@ -2137,19 +3243,27 @@ static int zfs_do_rename(int argc, char **argv) { zfs_handle_t *zhp; + renameflags_t flags = { 0 }; int c; - int ret; - boolean_t recurse = B_FALSE; + int ret = 0; + int types; boolean_t parents = B_FALSE; + char *snapshot = NULL; /* check options */ - while ((c = getopt(argc, argv, "pr")) != -1) { + while ((c = getopt(argc, argv, "fpru")) != -1) { switch (c) { case 'p': parents = B_TRUE; break; case 'r': - recurse = B_TRUE; + flags.recurse = B_TRUE; + break; + case 'u': + flags.nounmount = B_TRUE; + break; + case 'f': + flags.forceunmount = B_TRUE; break; case '?': default: @@ -2178,20 +3292,45 @@ zfs_do_rename(int argc, char **argv) usage(B_FALSE); } - if (recurse && parents) { + if (flags.recurse && parents) { (void) fprintf(stderr, gettext("-p and -r options are mutually " "exclusive\n")); usage(B_FALSE); } - if (recurse && strchr(argv[0], '@') == 0) { + if (flags.recurse && strchr(argv[0], '@') == 0) { (void) fprintf(stderr, gettext("source dataset for recursive " "rename must be a snapshot\n")); usage(B_FALSE); } - if ((zhp = zfs_open(g_zfs, argv[0], parents ? ZFS_TYPE_FILESYSTEM | - ZFS_TYPE_VOLUME : ZFS_TYPE_DATASET)) == NULL) + if (flags.nounmount && parents) { + (void) fprintf(stderr, gettext("-u and -p options are mutually " + "exclusive\n")); + usage(B_FALSE); + } + + if (flags.nounmount) + types = ZFS_TYPE_FILESYSTEM; + else if (parents) + types = ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME; + else + types = ZFS_TYPE_DATASET; + + if (flags.recurse) { + /* + * When we do recursive rename we are fine when the given + * snapshot for the given dataset doesn't exist - it can + * still exists below. + */ + + snapshot = strchr(argv[0], '@'); + assert(snapshot != NULL); + *snapshot = '\0'; + snapshot++; + } + + if ((zhp = zfs_open(g_zfs, argv[0], types)) == NULL) return (1); /* If we were asked and the name looks good, try to create ancestors. */ @@ -2201,7 +3340,7 @@ zfs_do_rename(int argc, char **argv) return (1); } - ret = (zfs_rename(zhp, argv[1], recurse) != 0); + ret = (zfs_rename(zhp, snapshot, argv[1], flags) != 0); zfs_close(zhp); return (ret); @@ -2217,7 +3356,7 @@ static int zfs_do_promote(int argc, char **argv) { zfs_handle_t *zhp; - int ret; + int ret = 0; /* check options */ if (argc > 1 && argv[1][0] == '-') { @@ -2266,9 +3405,30 @@ typedef struct rollback_cbdata { char *cb_target; int cb_error; boolean_t cb_recurse; - boolean_t cb_dependent; } rollback_cbdata_t; +static int +rollback_check_dependent(zfs_handle_t *zhp, void *data) +{ + rollback_cbdata_t *cbp = data; + + if (cbp->cb_first && cbp->cb_recurse) { + (void) fprintf(stderr, gettext("cannot rollback to " + "'%s': clones of previous snapshots exist\n"), + cbp->cb_target); + (void) fprintf(stderr, gettext("use '-R' to " + "force deletion of the following clones and " + "dependents:\n")); + cbp->cb_first = 0; + cbp->cb_error = 1; + } + + (void) fprintf(stderr, "%s\n", zfs_get_name(zhp)); + + zfs_close(zhp); + return (0); +} + /* * Report any snapshots more recent than the one specified. Used when '-r' is * not specified. We reuse this same callback for the snapshot dependents - if @@ -2285,52 +3445,30 @@ rollback_check(zfs_handle_t *zhp, void * return (0); } - if (!cbp->cb_dependent) { - if (strcmp(zfs_get_name(zhp), cbp->cb_target) != 0 && - zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT && - zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > - cbp->cb_create) { - - if (cbp->cb_first && !cbp->cb_recurse) { - (void) fprintf(stderr, gettext("cannot " - "rollback to '%s': more recent snapshots " - "exist\n"), - cbp->cb_target); - (void) fprintf(stderr, gettext("use '-r' to " - "force deletion of the following " - "snapshots:\n")); - cbp->cb_first = 0; - cbp->cb_error = 1; - } - - if (cbp->cb_recurse) { - cbp->cb_dependent = B_TRUE; - if (zfs_iter_dependents(zhp, B_TRUE, - rollback_check, cbp) != 0) { - zfs_close(zhp); - return (-1); - } - cbp->cb_dependent = B_FALSE; - } else { - (void) fprintf(stderr, "%s\n", - zfs_get_name(zhp)); - } - } - } else { - if (cbp->cb_first && cbp->cb_recurse) { - (void) fprintf(stderr, gettext("cannot rollback to " - "'%s': clones of previous snapshots exist\n"), + if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > cbp->cb_create) { + if (cbp->cb_first && !cbp->cb_recurse) { + (void) fprintf(stderr, gettext("cannot " + "rollback to '%s': more recent snapshots " + "or bookmarks exist\n"), cbp->cb_target); - (void) fprintf(stderr, gettext("use '-R' to " - "force deletion of the following clones and " - "dependents:\n")); + (void) fprintf(stderr, gettext("use '-r' to " + "force deletion of the following " + "snapshots and bookmarks:\n")); cbp->cb_first = 0; cbp->cb_error = 1; } - (void) fprintf(stderr, "%s\n", zfs_get_name(zhp)); + if (cbp->cb_recurse) { + if (zfs_iter_dependents(zhp, B_TRUE, + rollback_check_dependent, cbp) != 0) { + zfs_close(zhp); + return (-1); + } + } else { + (void) fprintf(stderr, "%s\n", + zfs_get_name(zhp)); + } } - zfs_close(zhp); return (0); } @@ -2338,12 +3476,12 @@ rollback_check(zfs_handle_t *zhp, void * static int zfs_do_rollback(int argc, char **argv) { - int ret; + int ret = 0; int c; boolean_t force = B_FALSE; rollback_cbdata_t cb = { 0 }; zfs_handle_t *zhp, *snap; - char parentname[ZFS_MAXNAMELEN]; + char parentname[ZFS_MAX_DATASET_NAME_LEN]; char *delim; /* check options */ @@ -2400,7 +3538,9 @@ zfs_do_rollback(int argc, char **argv) cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG); cb.cb_first = B_TRUE; cb.cb_error = 0; - if ((ret = zfs_iter_children(zhp, rollback_check, &cb)) != 0) + if ((ret = zfs_iter_snapshots(zhp, B_FALSE, rollback_check, &cb)) != 0) + goto out; + if ((ret = zfs_iter_bookmarks(zhp, rollback_check, &cb)) != 0) goto out; if ((ret = cb.cb_error) != 0) @@ -2422,21 +3562,17 @@ out: } /* - * zfs set property=value { fs | snap | vol } ... + * zfs set property=value ... { fs | snap | vol } ... * - * Sets the given property for all datasets specified on the command line. + * Sets the given properties for all datasets specified on the command line. */ -typedef struct set_cbdata { - char *cb_propname; - char *cb_value; -} set_cbdata_t; static int set_callback(zfs_handle_t *zhp, void *data) { - set_cbdata_t *cbp = data; + nvlist_t *props = data; - if (zfs_prop_set(zhp, cbp->cb_propname, cbp->cb_value) != 0) { + if (zfs_prop_set_list(zhp, props) != 0) { switch (libzfs_errno(g_zfs)) { case EZFS_MOUNTFAILED: (void) fprintf(stderr, gettext("property may be set " @@ -2455,8 +3591,9 @@ set_callback(zfs_handle_t *zhp, void *da static int zfs_do_set(int argc, char **argv) { - set_cbdata_t cb; - int ret; + nvlist_t *props = NULL; + int ds_start = -1; /* argv idx of first dataset arg */ + int ret = 0; /* check for options */ if (argc > 1 && argv[1][0] == '-') { @@ -2467,39 +3604,86 @@ zfs_do_set(int argc, char **argv) /* check number of arguments */ if (argc < 2) { - (void) fprintf(stderr, gettext("missing property=value " - "argument\n")); + (void) fprintf(stderr, gettext("missing arguments\n")); usage(B_FALSE); } if (argc < 3) { - (void) fprintf(stderr, gettext("missing dataset name\n")); + if (strchr(argv[1], '=') == NULL) { + (void) fprintf(stderr, gettext("missing property=value " + "argument(s)\n")); + } else { + (void) fprintf(stderr, gettext("missing dataset " + "name(s)\n")); + } usage(B_FALSE); } - /* validate property=value argument */ - cb.cb_propname = argv[1]; - if (((cb.cb_value = strchr(cb.cb_propname, '=')) == NULL) || - (cb.cb_value[1] == '\0')) { - (void) fprintf(stderr, gettext("missing value in " - "property=value argument\n")); + /* validate argument order: prop=val args followed by dataset args */ + for (int i = 1; i < argc; i++) { + if (strchr(argv[i], '=') != NULL) { + if (ds_start > 0) { + /* out-of-order prop=val argument */ + (void) fprintf(stderr, gettext("invalid " + "argument order\n"), i); + usage(B_FALSE); + } + } else if (ds_start < 0) { + ds_start = i; + } + } + if (ds_start < 0) { + (void) fprintf(stderr, gettext("missing dataset name(s)\n")); usage(B_FALSE); } - *cb.cb_value = '\0'; - cb.cb_value++; - - if (*cb.cb_propname == '\0') { - (void) fprintf(stderr, - gettext("missing property in property=value argument\n")); - usage(B_FALSE); + /* Populate a list of property settings */ + if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) + nomem(); + for (int i = 1; i < ds_start; i++) { + if ((ret = parseprop(props, argv[i])) != 0) + goto error; } - ret = zfs_for_each(argc - 2, argv + 2, 0, - ZFS_TYPE_DATASET, NULL, NULL, 0, set_callback, &cb); + ret = zfs_for_each(argc - ds_start, argv + ds_start, 0, + ZFS_TYPE_DATASET, NULL, NULL, 0, set_callback, props); +error: + nvlist_free(props); return (ret); } +typedef struct snap_cbdata { + nvlist_t *sd_nvl; + boolean_t sd_recursive; + const char *sd_snapname; +} snap_cbdata_t; + +static int +zfs_snapshot_cb(zfs_handle_t *zhp, void *arg) +{ + snap_cbdata_t *sd = arg; + char *name; + int rv = 0; + int error; + + if (sd->sd_recursive && + zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) != 0) { + zfs_close(zhp); + return (0); + } + + error = asprintf(&name, "%s@%s", zfs_get_name(zhp), sd->sd_snapname); + if (error == -1) + nomem(); + fnvlist_add_boolean(sd->sd_nvl, name); + free(name); + + if (sd->sd_recursive) + rv = zfs_iter_filesystems(zhp, zfs_snapshot_cb, sd); + zfs_close(zhp); + return (rv); +} + /* * zfs snapshot [-r] [-o prop=value] ... * @@ -2509,26 +3693,27 @@ zfs_do_set(int argc, char **argv) static int zfs_do_snapshot(int argc, char **argv) { - boolean_t recursive = B_FALSE; - int ret; - char c; + int ret = 0; + int c; nvlist_t *props; + snap_cbdata_t sd = { 0 }; + boolean_t multiple_snaps = B_FALSE; - if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) { - (void) fprintf(stderr, gettext("internal error: " - "out of memory\n")); - return (1); - } + if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) + nomem(); + if (nvlist_alloc(&sd.sd_nvl, NV_UNIQUE_NAME, 0) != 0) + nomem(); /* check options */ while ((c = getopt(argc, argv, "ro:")) != -1) { switch (c) { case 'o': - if (parseprop(props)) + if (parseprop(props, optarg) != 0) return (1); break; case 'r': - recursive = B_TRUE; + sd.sd_recursive = B_TRUE; + multiple_snaps = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), @@ -2545,27 +3730,41 @@ zfs_do_snapshot(int argc, char **argv) (void) fprintf(stderr, gettext("missing snapshot argument\n")); goto usage; } - if (argc > 1) { - (void) fprintf(stderr, gettext("too many arguments\n")); - goto usage; + + if (argc > 1) + multiple_snaps = B_TRUE; + for (; argc > 0; argc--, argv++) { + char *atp; + zfs_handle_t *zhp; + + atp = strchr(argv[0], '@'); + if (atp == NULL) + goto usage; + *atp = '\0'; + sd.sd_snapname = atp + 1; + zhp = zfs_open(g_zfs, argv[0], + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) + goto usage; + if (zfs_snapshot_cb(zhp, &sd) != 0) + goto usage; } - ret = zfs_snapshot(g_zfs, argv[0], recursive, props); + ret = zfs_snapshot_nvl(g_zfs, sd.sd_nvl, props); + nvlist_free(sd.sd_nvl); nvlist_free(props); - if (ret && recursive) + if (ret != 0 && multiple_snaps) (void) fprintf(stderr, gettext("no snapshots were created\n")); return (ret != 0); usage: + nvlist_free(sd.sd_nvl); nvlist_free(props); usage(B_FALSE); return (-1); } /* - * zfs send [-vDp] -R [-i|-I <@snap>] - * zfs send [-vDp] [-i|-I <@snap>] - * * Send a backup stream to stdout. */ static int @@ -2573,13 +3772,16 @@ zfs_do_send(int argc, char **argv) { char *fromname = NULL; char *toname = NULL; + char *resume_token = NULL; char *cp; zfs_handle_t *zhp; sendflags_t flags = { 0 }; int c, err; + nvlist_t *dbgnv = NULL; + boolean_t extraverbose = B_FALSE; /* check options */ - while ((c = getopt(argc, argv, ":i:I:RDpv")) != -1) { + while ((c = getopt(argc, argv, ":i:I:RDpvnPLet:")) != -1) { switch (c) { case 'i': if (fromname) @@ -2598,12 +3800,31 @@ zfs_do_send(int argc, char **argv) case 'p': flags.props = B_TRUE; break; + case 'P': + flags.parsable = B_TRUE; + flags.verbose = B_TRUE; + break; case 'v': + if (flags.verbose) + extraverbose = B_TRUE; flags.verbose = B_TRUE; + flags.progress = B_TRUE; break; case 'D': flags.dedup = B_TRUE; break; + case 'n': + flags.dryrun = B_TRUE; + break; + case 'L': + flags.largeblock = B_TRUE; + break; + case 'e': + flags.embed_data = B_TRUE; + break; + case 't': + resume_token = optarg; + break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); @@ -2619,29 +3840,87 @@ zfs_do_send(int argc, char **argv) argc -= optind; argv += optind; - /* check number of arguments */ - if (argc < 1) { - (void) fprintf(stderr, gettext("missing snapshot argument\n")); - usage(B_FALSE); - } - if (argc > 1) { - (void) fprintf(stderr, gettext("too many arguments\n")); - usage(B_FALSE); + if (resume_token != NULL) { + if (fromname != NULL || flags.replicate || flags.props || + flags.dedup) { + (void) fprintf(stderr, + gettext("invalid flags combined with -t\n")); + usage(B_FALSE); + } + if (argc != 0) { + (void) fprintf(stderr, gettext("no additional " + "arguments are permitted with -t\n")); + usage(B_FALSE); + } + } else { + if (argc < 1) { + (void) fprintf(stderr, + gettext("missing snapshot argument\n")); + usage(B_FALSE); + } + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } } - if (isatty(STDOUT_FILENO)) { + if (!flags.dryrun && isatty(STDOUT_FILENO)) { (void) fprintf(stderr, gettext("Error: Stream can not be written to a terminal.\n" "You must redirect standard output.\n")); return (1); } - cp = strchr(argv[0], '@'); - if (cp == NULL) { - (void) fprintf(stderr, - gettext("argument must be a snapshot\n")); - usage(B_FALSE); + if (resume_token != NULL) { + return (zfs_send_resume(g_zfs, &flags, STDOUT_FILENO, + resume_token)); + } + + /* + * Special case sending a filesystem, or from a bookmark. + */ + if (strchr(argv[0], '@') == NULL || + (fromname && strchr(fromname, '#') != NULL)) { + char frombuf[ZFS_MAX_DATASET_NAME_LEN]; + enum lzc_send_flags lzc_flags = 0; + + if (flags.replicate || flags.doall || flags.props || + flags.dedup || flags.dryrun || flags.verbose || + flags.progress) { + (void) fprintf(stderr, + gettext("Error: " + "Unsupported flag with filesystem or bookmark.\n")); + return (1); + } + + zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET); + if (zhp == NULL) + return (1); + + if (flags.largeblock) + lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK; + if (flags.embed_data) + lzc_flags |= LZC_SEND_FLAG_EMBED_DATA; + + if (fromname != NULL && + (fromname[0] == '#' || fromname[0] == '@')) { + /* + * Incremental source name begins with # or @. + * Default to same fs as target. + */ + (void) strncpy(frombuf, argv[0], sizeof (frombuf)); + cp = strchr(frombuf, '@'); + if (cp != NULL) + *cp = '\0'; + (void) strlcat(frombuf, fromname, sizeof (frombuf)); + fromname = frombuf; + } + err = zfs_send_one(zhp, fromname, STDOUT_FILENO, lzc_flags); + zfs_close(zhp); + return (err != 0); } + + cp = strchr(argv[0], '@'); *cp = '\0'; toname = cp + 1; zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); @@ -2654,7 +3933,7 @@ zfs_do_send(int argc, char **argv) * case if they specify the origin. */ if (fromname && (cp = strchr(fromname, '@')) != NULL) { - char origin[ZFS_MAXNAMELEN]; + char origin[ZFS_MAX_DATASET_NAME_LEN]; zprop_source_t src; (void) zfs_prop_get(zhp, ZFS_PROP_ORIGIN, @@ -2683,26 +3962,47 @@ zfs_do_send(int argc, char **argv) if (flags.replicate && fromname == NULL) flags.doall = B_TRUE; - err = zfs_send(zhp, fromname, toname, flags, STDOUT_FILENO, NULL, 0); + err = zfs_send(zhp, fromname, toname, &flags, STDOUT_FILENO, NULL, 0, + extraverbose ? &dbgnv : NULL); + + if (extraverbose && dbgnv != NULL) { + /* + * dump_nvlist prints to stdout, but that's been + * redirected to a file. Make it print to stderr + * instead. + */ + (void) dup2(STDERR_FILENO, STDOUT_FILENO); + dump_nvlist(dbgnv, 0); + nvlist_free(dbgnv); + } zfs_close(zhp); return (err != 0); } /* - * zfs receive [-denvF] - * * Restore a backup stream from stdin. */ static int zfs_do_receive(int argc, char **argv) { - int c, err; + int c, err = 0; recvflags_t flags = { 0 }; + boolean_t abort_resumable = B_FALSE; + + nvlist_t *props; + nvpair_t *nvp = NULL; + + if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) + nomem(); /* check options */ - while ((c = getopt(argc, argv, ":denuvF")) != -1) { + while ((c = getopt(argc, argv, ":o:denuvFsA")) != -1) { switch (c) { + case 'o': + if (parseprop(props, optarg) != 0) + return (1); + break; case 'd': flags.isprefix = B_TRUE; break; @@ -2719,9 +4019,15 @@ zfs_do_receive(int argc, char **argv) case 'v': flags.verbose = B_TRUE; break; + case 's': + flags.resumable = B_TRUE; + break; case 'F': flags.force = B_TRUE; break; + case 'A': + abort_resumable = B_TRUE; + break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); @@ -2747,6 +4053,51 @@ zfs_do_receive(int argc, char **argv) usage(B_FALSE); } + while ((nvp = nvlist_next_nvpair(props, nvp))) { + if (strcmp(nvpair_name(nvp), "origin") != 0) { + (void) fprintf(stderr, gettext("invalid option")); + usage(B_FALSE); + } + } + + if (abort_resumable) { + if (flags.isprefix || flags.istail || flags.dryrun || + flags.resumable || flags.nomount) { + (void) fprintf(stderr, gettext("invalid option")); + usage(B_FALSE); + } + + char namebuf[ZFS_MAX_DATASET_NAME_LEN]; + (void) snprintf(namebuf, sizeof (namebuf), + "%s/%%recv", argv[0]); + + if (zfs_dataset_exists(g_zfs, namebuf, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) { + zfs_handle_t *zhp = zfs_open(g_zfs, + namebuf, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) + return (1); + err = zfs_destroy(zhp, B_FALSE); + } else { + zfs_handle_t *zhp = zfs_open(g_zfs, + argv[0], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) + usage(B_FALSE); + if (!zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) || + zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN, + NULL, 0, NULL, NULL, 0, B_TRUE) == -1) { + (void) fprintf(stderr, + gettext("'%s' does not have any " + "resumable receive state to abort\n"), + argv[0]); + return (1); + } + err = zfs_destroy(zhp, B_FALSE); + } + + return (err != 0); + } + if (isatty(STDIN_FILENO)) { (void) fprintf(stderr, gettext("Error: Backup stream can not be read " @@ -2754,147 +4105,1685 @@ zfs_do_receive(int argc, char **argv) "You must redirect standard input.\n")); return (1); } - - err = zfs_receive(g_zfs, argv[0], flags, STDIN_FILENO, NULL); + err = zfs_receive(g_zfs, argv[0], props, &flags, STDIN_FILENO, NULL); return (err != 0); } -static int -zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding) -{ - int errors = 0; - int i; - const char *tag; - boolean_t recursive = B_FALSE; - boolean_t temphold = B_FALSE; - const char *opts = holding ? "rt" : "r"; - int c; +/* + * allow/unallow stuff + */ +/* copied from zfs/sys/dsl_deleg.h */ +#define ZFS_DELEG_PERM_CREATE "create" +#define ZFS_DELEG_PERM_DESTROY "destroy" +#define ZFS_DELEG_PERM_SNAPSHOT "snapshot" +#define ZFS_DELEG_PERM_ROLLBACK "rollback" +#define ZFS_DELEG_PERM_CLONE "clone" +#define ZFS_DELEG_PERM_PROMOTE "promote" +#define ZFS_DELEG_PERM_RENAME "rename" +#define ZFS_DELEG_PERM_MOUNT "mount" +#define ZFS_DELEG_PERM_SHARE "share" +#define ZFS_DELEG_PERM_SEND "send" +#define ZFS_DELEG_PERM_RECEIVE "receive" +#define ZFS_DELEG_PERM_ALLOW "allow" +#define ZFS_DELEG_PERM_USERPROP "userprop" +#define ZFS_DELEG_PERM_VSCAN "vscan" /* ??? */ +#define ZFS_DELEG_PERM_USERQUOTA "userquota" +#define ZFS_DELEG_PERM_GROUPQUOTA "groupquota" +#define ZFS_DELEG_PERM_USERUSED "userused" +#define ZFS_DELEG_PERM_GROUPUSED "groupused" +#define ZFS_DELEG_PERM_HOLD "hold" +#define ZFS_DELEG_PERM_RELEASE "release" +#define ZFS_DELEG_PERM_DIFF "diff" +#define ZFS_DELEG_PERM_BOOKMARK "bookmark" + +#define ZFS_NUM_DELEG_NOTES ZFS_DELEG_NOTE_NONE + +static zfs_deleg_perm_tab_t zfs_deleg_perm_tbl[] = { + { ZFS_DELEG_PERM_ALLOW, ZFS_DELEG_NOTE_ALLOW }, + { ZFS_DELEG_PERM_CLONE, ZFS_DELEG_NOTE_CLONE }, + { ZFS_DELEG_PERM_CREATE, ZFS_DELEG_NOTE_CREATE }, + { ZFS_DELEG_PERM_DESTROY, ZFS_DELEG_NOTE_DESTROY }, + { ZFS_DELEG_PERM_DIFF, ZFS_DELEG_NOTE_DIFF}, + { ZFS_DELEG_PERM_HOLD, ZFS_DELEG_NOTE_HOLD }, + { ZFS_DELEG_PERM_MOUNT, ZFS_DELEG_NOTE_MOUNT }, + { ZFS_DELEG_PERM_PROMOTE, ZFS_DELEG_NOTE_PROMOTE }, + { ZFS_DELEG_PERM_RECEIVE, ZFS_DELEG_NOTE_RECEIVE }, + { ZFS_DELEG_PERM_RELEASE, ZFS_DELEG_NOTE_RELEASE }, + { ZFS_DELEG_PERM_RENAME, ZFS_DELEG_NOTE_RENAME }, + { ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK }, + { ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_SEND }, + { ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE }, + { ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT }, + { ZFS_DELEG_PERM_BOOKMARK, ZFS_DELEG_NOTE_BOOKMARK }, + + { ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA }, + { ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED }, + { ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP }, + { ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA }, + { ZFS_DELEG_PERM_USERUSED, ZFS_DELEG_NOTE_USERUSED }, + { NULL, ZFS_DELEG_NOTE_NONE } +}; - /* check options */ - while ((c = getopt(argc, argv, opts)) != -1) { - switch (c) { - case 'r': - recursive = B_TRUE; - break; - case 't': - temphold = B_TRUE; - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } +/* permission structure */ +typedef struct deleg_perm { + zfs_deleg_who_type_t dp_who_type; + const char *dp_name; + boolean_t dp_local; + boolean_t dp_descend; +} deleg_perm_t; + +/* */ +typedef struct deleg_perm_node { + deleg_perm_t dpn_perm; + + uu_avl_node_t dpn_avl_node; +} deleg_perm_node_t; + +typedef struct fs_perm fs_perm_t; + +/* permissions set */ +typedef struct who_perm { + zfs_deleg_who_type_t who_type; + const char *who_name; /* id */ + char who_ug_name[256]; /* user/group name */ + fs_perm_t *who_fsperm; /* uplink */ + + uu_avl_t *who_deleg_perm_avl; /* permissions */ +} who_perm_t; + +/* */ +typedef struct who_perm_node { + who_perm_t who_perm; + uu_avl_node_t who_avl_node; +} who_perm_node_t; + +typedef struct fs_perm_set fs_perm_set_t; +/* fs permissions */ +struct fs_perm { + const char *fsp_name; - argc -= optind; - argv += optind; + uu_avl_t *fsp_sc_avl; /* sets,create */ + uu_avl_t *fsp_uge_avl; /* user,group,everyone */ - /* check number of arguments */ - if (argc < 2) - usage(B_FALSE); + fs_perm_set_t *fsp_set; /* uplink */ +}; - tag = argv[0]; - --argc; - ++argv; +/* */ +typedef struct fs_perm_node { + fs_perm_t fspn_fsperm; + uu_avl_t *fspn_avl; + + uu_list_node_t fspn_list_node; +} fs_perm_node_t; + +/* top level structure */ +struct fs_perm_set { + uu_list_pool_t *fsps_list_pool; + uu_list_t *fsps_list; /* list of fs_perms */ + + uu_avl_pool_t *fsps_named_set_avl_pool; + uu_avl_pool_t *fsps_who_perm_avl_pool; + uu_avl_pool_t *fsps_deleg_perm_avl_pool; +}; - if (holding && tag[0] == '.') { - /* tags starting with '.' are reserved for libzfs */ - (void) fprintf(stderr, gettext("tag may not start with '.'\n")); - usage(B_FALSE); +static inline const char * +deleg_perm_type(zfs_deleg_note_t note) +{ + /* subcommands */ + switch (note) { + /* SUBCOMMANDS */ + /* OTHER */ + case ZFS_DELEG_NOTE_GROUPQUOTA: + case ZFS_DELEG_NOTE_GROUPUSED: + case ZFS_DELEG_NOTE_USERPROP: + case ZFS_DELEG_NOTE_USERQUOTA: + case ZFS_DELEG_NOTE_USERUSED: + /* other */ + return (gettext("other")); + default: + return (gettext("subcommand")); } +} - for (i = 0; i < argc; ++i) { - zfs_handle_t *zhp; - char parent[ZFS_MAXNAMELEN]; - const char *delim; - char *path = argv[i]; - - delim = strchr(path, '@'); - if (delim == NULL) { - (void) fprintf(stderr, - gettext("'%s' is not a snapshot\n"), path); - ++errors; - continue; - } - (void) strncpy(parent, path, delim - path); - parent[delim - path] = '\0'; - - zhp = zfs_open(g_zfs, parent, - ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); - if (zhp == NULL) { - ++errors; - continue; - } - if (holding) { - if (zfs_hold(zhp, delim+1, tag, recursive, - temphold, B_FALSE) != 0) - ++errors; - } else { - if (zfs_release(zhp, delim+1, tag, recursive) != 0) - ++errors; - } - zfs_close(zhp); +static int +who_type2weight(zfs_deleg_who_type_t who_type) +{ + int res; + switch (who_type) { + case ZFS_DELEG_NAMED_SET_SETS: + case ZFS_DELEG_NAMED_SET: + res = 0; + break; + case ZFS_DELEG_CREATE_SETS: + case ZFS_DELEG_CREATE: + res = 1; + break; + case ZFS_DELEG_USER_SETS: + case ZFS_DELEG_USER: + res = 2; + break; + case ZFS_DELEG_GROUP_SETS: + case ZFS_DELEG_GROUP: + res = 3; + break; + case ZFS_DELEG_EVERYONE_SETS: + case ZFS_DELEG_EVERYONE: + res = 4; + break; + default: + res = -1; } - return (errors != 0); + return (res); } -/* - * zfs hold [-r] [-t] ... - * - * -r Recursively hold - * -t Temporary hold (hidden option) - * - * Apply a user-hold with the given tag to the list of snapshots. - */ +/* ARGSUSED */ static int -zfs_do_hold(int argc, char **argv) +who_perm_compare(const void *larg, const void *rarg, void *unused) { - return (zfs_do_hold_rele_impl(argc, argv, B_TRUE)); + const who_perm_node_t *l = larg; + const who_perm_node_t *r = rarg; + zfs_deleg_who_type_t ltype = l->who_perm.who_type; + zfs_deleg_who_type_t rtype = r->who_perm.who_type; + int lweight = who_type2weight(ltype); + int rweight = who_type2weight(rtype); + int res = lweight - rweight; + if (res == 0) + res = strncmp(l->who_perm.who_name, r->who_perm.who_name, + ZFS_MAX_DELEG_NAME-1); + + if (res == 0) + return (0); + if (res > 0) + return (1); + else + return (-1); } -/* - * zfs release [-r] ... - * - * -r Recursively release - * - * Release a user-hold with the given tag from the list of snapshots. - */ +/* ARGSUSED */ static int -zfs_do_release(int argc, char **argv) +deleg_perm_compare(const void *larg, const void *rarg, void *unused) { - return (zfs_do_hold_rele_impl(argc, argv, B_FALSE)); -} + const deleg_perm_node_t *l = larg; + const deleg_perm_node_t *r = rarg; + int res = strncmp(l->dpn_perm.dp_name, r->dpn_perm.dp_name, + ZFS_MAX_DELEG_NAME-1); -typedef struct get_all_cbdata { - zfs_handle_t **cb_handles; - size_t cb_alloc; - size_t cb_used; - uint_t cb_types; - boolean_t cb_verbose; -} get_all_cbdata_t; + if (res == 0) + return (0); -#define CHECK_SPINNER 30 -#define SPINNER_TIME 3 /* seconds */ -#define MOUNT_TIME 5 /* seconds */ + if (res > 0) + return (1); + else + return (-1); +} -static int -get_one_dataset(zfs_handle_t *zhp, void *data) +static inline void +fs_perm_set_init(fs_perm_set_t *fspset) { - static char spin[] = { '-', '\\', '|', '/' }; - static int spinval = 0; - static int spincheck = 0; + bzero(fspset, sizeof (fs_perm_set_t)); + + if ((fspset->fsps_list_pool = uu_list_pool_create("fsps_list_pool", + sizeof (fs_perm_node_t), offsetof(fs_perm_node_t, fspn_list_node), + NULL, UU_DEFAULT)) == NULL) + nomem(); + if ((fspset->fsps_list = uu_list_create(fspset->fsps_list_pool, NULL, + UU_DEFAULT)) == NULL) + nomem(); + + if ((fspset->fsps_named_set_avl_pool = uu_avl_pool_create( + "named_set_avl_pool", sizeof (who_perm_node_t), offsetof( + who_perm_node_t, who_avl_node), who_perm_compare, + UU_DEFAULT)) == NULL) + nomem(); + + if ((fspset->fsps_who_perm_avl_pool = uu_avl_pool_create( + "who_perm_avl_pool", sizeof (who_perm_node_t), offsetof( + who_perm_node_t, who_avl_node), who_perm_compare, + UU_DEFAULT)) == NULL) + nomem(); + + if ((fspset->fsps_deleg_perm_avl_pool = uu_avl_pool_create( + "deleg_perm_avl_pool", sizeof (deleg_perm_node_t), offsetof( + deleg_perm_node_t, dpn_avl_node), deleg_perm_compare, UU_DEFAULT)) + == NULL) + nomem(); +} + +static inline void fs_perm_fini(fs_perm_t *); +static inline void who_perm_fini(who_perm_t *); + +static inline void +fs_perm_set_fini(fs_perm_set_t *fspset) +{ + fs_perm_node_t *node = uu_list_first(fspset->fsps_list); + + while (node != NULL) { + fs_perm_node_t *next_node = + uu_list_next(fspset->fsps_list, node); + fs_perm_t *fsperm = &node->fspn_fsperm; + fs_perm_fini(fsperm); + uu_list_remove(fspset->fsps_list, node); + free(node); + node = next_node; + } + + uu_avl_pool_destroy(fspset->fsps_named_set_avl_pool); + uu_avl_pool_destroy(fspset->fsps_who_perm_avl_pool); + uu_avl_pool_destroy(fspset->fsps_deleg_perm_avl_pool); +} + +static inline void +deleg_perm_init(deleg_perm_t *deleg_perm, zfs_deleg_who_type_t type, + const char *name) +{ + deleg_perm->dp_who_type = type; + deleg_perm->dp_name = name; +} + +static inline void +who_perm_init(who_perm_t *who_perm, fs_perm_t *fsperm, + zfs_deleg_who_type_t type, const char *name) +{ + uu_avl_pool_t *pool; + pool = fsperm->fsp_set->fsps_deleg_perm_avl_pool; + + bzero(who_perm, sizeof (who_perm_t)); + + if ((who_perm->who_deleg_perm_avl = uu_avl_create(pool, NULL, + UU_DEFAULT)) == NULL) + nomem(); + + who_perm->who_type = type; + who_perm->who_name = name; + who_perm->who_fsperm = fsperm; +} + +static inline void +who_perm_fini(who_perm_t *who_perm) +{ + deleg_perm_node_t *node = uu_avl_first(who_perm->who_deleg_perm_avl); + + while (node != NULL) { + deleg_perm_node_t *next_node = + uu_avl_next(who_perm->who_deleg_perm_avl, node); + + uu_avl_remove(who_perm->who_deleg_perm_avl, node); + free(node); + node = next_node; + } + + uu_avl_destroy(who_perm->who_deleg_perm_avl); +} + +static inline void +fs_perm_init(fs_perm_t *fsperm, fs_perm_set_t *fspset, const char *fsname) +{ + uu_avl_pool_t *nset_pool = fspset->fsps_named_set_avl_pool; + uu_avl_pool_t *who_pool = fspset->fsps_who_perm_avl_pool; + + bzero(fsperm, sizeof (fs_perm_t)); + + if ((fsperm->fsp_sc_avl = uu_avl_create(nset_pool, NULL, UU_DEFAULT)) + == NULL) + nomem(); + + if ((fsperm->fsp_uge_avl = uu_avl_create(who_pool, NULL, UU_DEFAULT)) + == NULL) + nomem(); + + fsperm->fsp_set = fspset; + fsperm->fsp_name = fsname; +} + +static inline void +fs_perm_fini(fs_perm_t *fsperm) +{ + who_perm_node_t *node = uu_avl_first(fsperm->fsp_sc_avl); + while (node != NULL) { + who_perm_node_t *next_node = uu_avl_next(fsperm->fsp_sc_avl, + node); + who_perm_t *who_perm = &node->who_perm; + who_perm_fini(who_perm); + uu_avl_remove(fsperm->fsp_sc_avl, node); + free(node); + node = next_node; + } + + node = uu_avl_first(fsperm->fsp_uge_avl); + while (node != NULL) { + who_perm_node_t *next_node = uu_avl_next(fsperm->fsp_uge_avl, + node); + who_perm_t *who_perm = &node->who_perm; + who_perm_fini(who_perm); + uu_avl_remove(fsperm->fsp_uge_avl, node); + free(node); + node = next_node; + } + + uu_avl_destroy(fsperm->fsp_sc_avl); + uu_avl_destroy(fsperm->fsp_uge_avl); +} + +static void +set_deleg_perm_node(uu_avl_t *avl, deleg_perm_node_t *node, + zfs_deleg_who_type_t who_type, const char *name, char locality) +{ + uu_avl_index_t idx = 0; + + deleg_perm_node_t *found_node = NULL; + deleg_perm_t *deleg_perm = &node->dpn_perm; + + deleg_perm_init(deleg_perm, who_type, name); + + if ((found_node = uu_avl_find(avl, node, NULL, &idx)) + == NULL) + uu_avl_insert(avl, node, idx); + else { + node = found_node; + deleg_perm = &node->dpn_perm; + } + + + switch (locality) { + case ZFS_DELEG_LOCAL: + deleg_perm->dp_local = B_TRUE; + break; + case ZFS_DELEG_DESCENDENT: + deleg_perm->dp_descend = B_TRUE; + break; + case ZFS_DELEG_NA: + break; + default: + assert(B_FALSE); /* invalid locality */ + } +} + +static inline int +parse_who_perm(who_perm_t *who_perm, nvlist_t *nvl, char locality) +{ + nvpair_t *nvp = NULL; + fs_perm_set_t *fspset = who_perm->who_fsperm->fsp_set; + uu_avl_t *avl = who_perm->who_deleg_perm_avl; + zfs_deleg_who_type_t who_type = who_perm->who_type; + + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + const char *name = nvpair_name(nvp); + data_type_t type = nvpair_type(nvp); + uu_avl_pool_t *avl_pool = fspset->fsps_deleg_perm_avl_pool; + deleg_perm_node_t *node = + safe_malloc(sizeof (deleg_perm_node_t)); + + assert(type == DATA_TYPE_BOOLEAN); + + uu_avl_node_init(node, &node->dpn_avl_node, avl_pool); + set_deleg_perm_node(avl, node, who_type, name, locality); + } + + return (0); +} + +static inline int +parse_fs_perm(fs_perm_t *fsperm, nvlist_t *nvl) +{ + nvpair_t *nvp = NULL; + fs_perm_set_t *fspset = fsperm->fsp_set; + + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + nvlist_t *nvl2 = NULL; + const char *name = nvpair_name(nvp); + uu_avl_t *avl = NULL; + uu_avl_pool_t *avl_pool = NULL; + zfs_deleg_who_type_t perm_type = name[0]; + char perm_locality = name[1]; + const char *perm_name = name + 3; + boolean_t is_set = B_TRUE; + who_perm_t *who_perm = NULL; + + assert('$' == name[2]); + + if (nvpair_value_nvlist(nvp, &nvl2) != 0) + return (-1); + + switch (perm_type) { + case ZFS_DELEG_CREATE: + case ZFS_DELEG_CREATE_SETS: + case ZFS_DELEG_NAMED_SET: + case ZFS_DELEG_NAMED_SET_SETS: + avl_pool = fspset->fsps_named_set_avl_pool; + avl = fsperm->fsp_sc_avl; + break; + case ZFS_DELEG_USER: + case ZFS_DELEG_USER_SETS: + case ZFS_DELEG_GROUP: + case ZFS_DELEG_GROUP_SETS: + case ZFS_DELEG_EVERYONE: + case ZFS_DELEG_EVERYONE_SETS: + avl_pool = fspset->fsps_who_perm_avl_pool; + avl = fsperm->fsp_uge_avl; + break; + + default: + assert(!"unhandled zfs_deleg_who_type_t"); + } + + if (is_set) { + who_perm_node_t *found_node = NULL; + who_perm_node_t *node = safe_malloc( + sizeof (who_perm_node_t)); + who_perm = &node->who_perm; + uu_avl_index_t idx = 0; + + uu_avl_node_init(node, &node->who_avl_node, avl_pool); + who_perm_init(who_perm, fsperm, perm_type, perm_name); + + if ((found_node = uu_avl_find(avl, node, NULL, &idx)) + == NULL) { + if (avl == fsperm->fsp_uge_avl) { + uid_t rid = 0; + struct passwd *p = NULL; + struct group *g = NULL; + const char *nice_name = NULL; + + switch (perm_type) { + case ZFS_DELEG_USER_SETS: + case ZFS_DELEG_USER: + rid = atoi(perm_name); + p = getpwuid(rid); + if (p) + nice_name = p->pw_name; + break; + case ZFS_DELEG_GROUP_SETS: + case ZFS_DELEG_GROUP: + rid = atoi(perm_name); + g = getgrgid(rid); + if (g) + nice_name = g->gr_name; + break; + + default: + break; + } + + if (nice_name != NULL) + (void) strlcpy( + node->who_perm.who_ug_name, + nice_name, 256); + } + + uu_avl_insert(avl, node, idx); + } else { + node = found_node; + who_perm = &node->who_perm; + } + } + + (void) parse_who_perm(who_perm, nvl2, perm_locality); + } + + return (0); +} + +static inline int +parse_fs_perm_set(fs_perm_set_t *fspset, nvlist_t *nvl) +{ + nvpair_t *nvp = NULL; + uu_avl_index_t idx = 0; + + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + nvlist_t *nvl2 = NULL; + const char *fsname = nvpair_name(nvp); + data_type_t type = nvpair_type(nvp); + fs_perm_t *fsperm = NULL; + fs_perm_node_t *node = safe_malloc(sizeof (fs_perm_node_t)); + if (node == NULL) + nomem(); + + fsperm = &node->fspn_fsperm; + + assert(DATA_TYPE_NVLIST == type); + + uu_list_node_init(node, &node->fspn_list_node, + fspset->fsps_list_pool); + + idx = uu_list_numnodes(fspset->fsps_list); + fs_perm_init(fsperm, fspset, fsname); + + if (nvpair_value_nvlist(nvp, &nvl2) != 0) + return (-1); + + (void) parse_fs_perm(fsperm, nvl2); + + uu_list_insert(fspset->fsps_list, node, idx); + } + + return (0); +} + +static inline const char * +deleg_perm_comment(zfs_deleg_note_t note) +{ + const char *str = ""; + + /* subcommands */ + switch (note) { + /* SUBCOMMANDS */ + case ZFS_DELEG_NOTE_ALLOW: + str = gettext("Must also have the permission that is being" + "\n\t\t\t\tallowed"); + break; + case ZFS_DELEG_NOTE_CLONE: + str = gettext("Must also have the 'create' ability and 'mount'" + "\n\t\t\t\tability in the origin file system"); + break; + case ZFS_DELEG_NOTE_CREATE: + str = gettext("Must also have the 'mount' ability"); + break; + case ZFS_DELEG_NOTE_DESTROY: + str = gettext("Must also have the 'mount' ability"); + break; + case ZFS_DELEG_NOTE_DIFF: + str = gettext("Allows lookup of paths within a dataset;" + "\n\t\t\t\tgiven an object number. Ordinary users need this" + "\n\t\t\t\tin order to use zfs diff"); + break; + case ZFS_DELEG_NOTE_HOLD: + str = gettext("Allows adding a user hold to a snapshot"); + break; + case ZFS_DELEG_NOTE_MOUNT: + str = gettext("Allows mount/umount of ZFS datasets"); + break; + case ZFS_DELEG_NOTE_PROMOTE: + str = gettext("Must also have the 'mount'\n\t\t\t\tand" + " 'promote' ability in the origin file system"); + break; + case ZFS_DELEG_NOTE_RECEIVE: + str = gettext("Must also have the 'mount' and 'create'" + " ability"); + break; + case ZFS_DELEG_NOTE_RELEASE: + str = gettext("Allows releasing a user hold which\n\t\t\t\t" + "might destroy the snapshot"); + break; + case ZFS_DELEG_NOTE_RENAME: + str = gettext("Must also have the 'mount' and 'create'" + "\n\t\t\t\tability in the new parent"); + break; + case ZFS_DELEG_NOTE_ROLLBACK: + str = gettext(""); + break; + case ZFS_DELEG_NOTE_SEND: + str = gettext(""); + break; + case ZFS_DELEG_NOTE_SHARE: + str = gettext("Allows sharing file systems over NFS or SMB" + "\n\t\t\t\tprotocols"); + break; + case ZFS_DELEG_NOTE_SNAPSHOT: + str = gettext(""); + break; +/* + * case ZFS_DELEG_NOTE_VSCAN: + * str = gettext(""); + * break; + */ + /* OTHER */ + case ZFS_DELEG_NOTE_GROUPQUOTA: + str = gettext("Allows accessing any groupquota@... property"); + break; + case ZFS_DELEG_NOTE_GROUPUSED: + str = gettext("Allows reading any groupused@... property"); + break; + case ZFS_DELEG_NOTE_USERPROP: + str = gettext("Allows changing any user property"); + break; + case ZFS_DELEG_NOTE_USERQUOTA: + str = gettext("Allows accessing any userquota@... property"); + break; + case ZFS_DELEG_NOTE_USERUSED: + str = gettext("Allows reading any userused@... property"); + break; + /* other */ + default: + str = ""; + } + + return (str); +} + +struct allow_opts { + boolean_t local; + boolean_t descend; + boolean_t user; + boolean_t group; + boolean_t everyone; + boolean_t create; + boolean_t set; + boolean_t recursive; /* unallow only */ + boolean_t prt_usage; + + boolean_t prt_perms; + char *who; + char *perms; + const char *dataset; +}; + +static inline int +prop_cmp(const void *a, const void *b) +{ + const char *str1 = *(const char **)a; + const char *str2 = *(const char **)b; + return (strcmp(str1, str2)); +} + +static void +allow_usage(boolean_t un, boolean_t requested, const char *msg) +{ + const char *opt_desc[] = { + "-h", gettext("show this help message and exit"), + "-l", gettext("set permission locally"), + "-d", gettext("set permission for descents"), + "-u", gettext("set permission for user"), + "-g", gettext("set permission for group"), + "-e", gettext("set permission for everyone"), + "-c", gettext("set create time permission"), + "-s", gettext("define permission set"), + /* unallow only */ + "-r", gettext("remove permissions recursively"), + }; + size_t unallow_size = sizeof (opt_desc) / sizeof (char *); + size_t allow_size = unallow_size - 2; + const char *props[ZFS_NUM_PROPS]; + int i; + size_t count = 0; + FILE *fp = requested ? stdout : stderr; + zprop_desc_t *pdtbl = zfs_prop_get_table(); + const char *fmt = gettext("%-16s %-14s\t%s\n"); + + (void) fprintf(fp, gettext("Usage: %s\n"), get_usage(un ? HELP_UNALLOW : + HELP_ALLOW)); + (void) fprintf(fp, gettext("Options:\n")); + for (i = 0; i < (un ? unallow_size : allow_size); i++) { + const char *opt = opt_desc[i++]; + const char *optdsc = opt_desc[i]; + (void) fprintf(fp, gettext(" %-10s %s\n"), opt, optdsc); + } + + (void) fprintf(fp, gettext("\nThe following permissions are " + "supported:\n\n")); + (void) fprintf(fp, fmt, gettext("NAME"), gettext("TYPE"), + gettext("NOTES")); + for (i = 0; i < ZFS_NUM_DELEG_NOTES; i++) { + const char *perm_name = zfs_deleg_perm_tbl[i].z_perm; + zfs_deleg_note_t perm_note = zfs_deleg_perm_tbl[i].z_note; + const char *perm_type = deleg_perm_type(perm_note); + const char *perm_comment = deleg_perm_comment(perm_note); + (void) fprintf(fp, fmt, perm_name, perm_type, perm_comment); + } + + for (i = 0; i < ZFS_NUM_PROPS; i++) { + zprop_desc_t *pd = &pdtbl[i]; + if (pd->pd_visible != B_TRUE) + continue; + + if (pd->pd_attr == PROP_READONLY) + continue; + + props[count++] = pd->pd_name; + } + props[count] = NULL; + + qsort(props, count, sizeof (char *), prop_cmp); + + for (i = 0; i < count; i++) + (void) fprintf(fp, fmt, props[i], gettext("property"), ""); + + if (msg != NULL) + (void) fprintf(fp, gettext("\nzfs: error: %s"), msg); + + exit(requested ? 0 : 2); +} + +static inline const char * +munge_args(int argc, char **argv, boolean_t un, size_t expected_argc, + char **permsp) +{ + if (un && argc == expected_argc - 1) + *permsp = NULL; + else if (argc == expected_argc) + *permsp = argv[argc - 2]; + else + allow_usage(un, B_FALSE, + gettext("wrong number of parameters\n")); + + return (argv[argc - 1]); +} + +static void +parse_allow_args(int argc, char **argv, boolean_t un, struct allow_opts *opts) +{ + int uge_sum = opts->user + opts->group + opts->everyone; + int csuge_sum = opts->create + opts->set + uge_sum; + int ldcsuge_sum = csuge_sum + opts->local + opts->descend; + int all_sum = un ? ldcsuge_sum + opts->recursive : ldcsuge_sum; + + if (uge_sum > 1) + allow_usage(un, B_FALSE, + gettext("-u, -g, and -e are mutually exclusive\n")); + + if (opts->prt_usage) { + if (argc == 0 && all_sum == 0) + allow_usage(un, B_TRUE, NULL); + else + usage(B_FALSE); + } + + if (opts->set) { + if (csuge_sum > 1) + allow_usage(un, B_FALSE, + gettext("invalid options combined with -s\n")); + + opts->dataset = munge_args(argc, argv, un, 3, &opts->perms); + if (argv[0][0] != '@') + allow_usage(un, B_FALSE, + gettext("invalid set name: missing '@' prefix\n")); + opts->who = argv[0]; + } else if (opts->create) { + if (ldcsuge_sum > 1) + allow_usage(un, B_FALSE, + gettext("invalid options combined with -c\n")); + opts->dataset = munge_args(argc, argv, un, 2, &opts->perms); + } else if (opts->everyone) { + if (csuge_sum > 1) + allow_usage(un, B_FALSE, + gettext("invalid options combined with -e\n")); + opts->dataset = munge_args(argc, argv, un, 2, &opts->perms); + } else if (uge_sum == 0 && argc > 0 && strcmp(argv[0], "everyone") + == 0) { + opts->everyone = B_TRUE; + argc--; + argv++; + opts->dataset = munge_args(argc, argv, un, 2, &opts->perms); + } else if (argc == 1 && !un) { + opts->prt_perms = B_TRUE; + opts->dataset = argv[argc-1]; + } else { + opts->dataset = munge_args(argc, argv, un, 3, &opts->perms); + opts->who = argv[0]; + } + + if (!opts->local && !opts->descend) { + opts->local = B_TRUE; + opts->descend = B_TRUE; + } +} + +static void +store_allow_perm(zfs_deleg_who_type_t type, boolean_t local, boolean_t descend, + const char *who, char *perms, nvlist_t *top_nvl) +{ + int i; + char ld[2] = { '\0', '\0' }; + char who_buf[MAXNAMELEN + 32]; + char base_type = '\0'; + char set_type = '\0'; + nvlist_t *base_nvl = NULL; + nvlist_t *set_nvl = NULL; + nvlist_t *nvl; + + if (nvlist_alloc(&base_nvl, NV_UNIQUE_NAME, 0) != 0) + nomem(); + if (nvlist_alloc(&set_nvl, NV_UNIQUE_NAME, 0) != 0) + nomem(); + + switch (type) { + case ZFS_DELEG_NAMED_SET_SETS: + case ZFS_DELEG_NAMED_SET: + set_type = ZFS_DELEG_NAMED_SET_SETS; + base_type = ZFS_DELEG_NAMED_SET; + ld[0] = ZFS_DELEG_NA; + break; + case ZFS_DELEG_CREATE_SETS: + case ZFS_DELEG_CREATE: + set_type = ZFS_DELEG_CREATE_SETS; + base_type = ZFS_DELEG_CREATE; + ld[0] = ZFS_DELEG_NA; + break; + case ZFS_DELEG_USER_SETS: + case ZFS_DELEG_USER: + set_type = ZFS_DELEG_USER_SETS; + base_type = ZFS_DELEG_USER; + if (local) + ld[0] = ZFS_DELEG_LOCAL; + if (descend) + ld[1] = ZFS_DELEG_DESCENDENT; + break; + case ZFS_DELEG_GROUP_SETS: + case ZFS_DELEG_GROUP: + set_type = ZFS_DELEG_GROUP_SETS; + base_type = ZFS_DELEG_GROUP; + if (local) + ld[0] = ZFS_DELEG_LOCAL; + if (descend) + ld[1] = ZFS_DELEG_DESCENDENT; + break; + case ZFS_DELEG_EVERYONE_SETS: + case ZFS_DELEG_EVERYONE: + set_type = ZFS_DELEG_EVERYONE_SETS; + base_type = ZFS_DELEG_EVERYONE; + if (local) + ld[0] = ZFS_DELEG_LOCAL; + if (descend) + ld[1] = ZFS_DELEG_DESCENDENT; + break; + + default: + assert(set_type != '\0' && base_type != '\0'); + } + + if (perms != NULL) { + char *curr = perms; + char *end = curr + strlen(perms); + + while (curr < end) { + char *delim = strchr(curr, ','); + if (delim == NULL) + delim = end; + else + *delim = '\0'; + + if (curr[0] == '@') + nvl = set_nvl; + else + nvl = base_nvl; + + (void) nvlist_add_boolean(nvl, curr); + if (delim != end) + *delim = ','; + curr = delim + 1; + } + + for (i = 0; i < 2; i++) { + char locality = ld[i]; + if (locality == 0) + continue; + + if (!nvlist_empty(base_nvl)) { + if (who != NULL) + (void) snprintf(who_buf, + sizeof (who_buf), "%c%c$%s", + base_type, locality, who); + else + (void) snprintf(who_buf, + sizeof (who_buf), "%c%c$", + base_type, locality); + + (void) nvlist_add_nvlist(top_nvl, who_buf, + base_nvl); + } + + + if (!nvlist_empty(set_nvl)) { + if (who != NULL) + (void) snprintf(who_buf, + sizeof (who_buf), "%c%c$%s", + set_type, locality, who); + else + (void) snprintf(who_buf, + sizeof (who_buf), "%c%c$", + set_type, locality); + + (void) nvlist_add_nvlist(top_nvl, who_buf, + set_nvl); + } + } + } else { + for (i = 0; i < 2; i++) { + char locality = ld[i]; + if (locality == 0) + continue; + + if (who != NULL) + (void) snprintf(who_buf, sizeof (who_buf), + "%c%c$%s", base_type, locality, who); + else + (void) snprintf(who_buf, sizeof (who_buf), + "%c%c$", base_type, locality); + (void) nvlist_add_boolean(top_nvl, who_buf); + + if (who != NULL) + (void) snprintf(who_buf, sizeof (who_buf), + "%c%c$%s", set_type, locality, who); + else + (void) snprintf(who_buf, sizeof (who_buf), + "%c%c$", set_type, locality); + (void) nvlist_add_boolean(top_nvl, who_buf); + } + } +} + +static int +construct_fsacl_list(boolean_t un, struct allow_opts *opts, nvlist_t **nvlp) +{ + if (nvlist_alloc(nvlp, NV_UNIQUE_NAME, 0) != 0) + nomem(); + + if (opts->set) { + store_allow_perm(ZFS_DELEG_NAMED_SET, opts->local, + opts->descend, opts->who, opts->perms, *nvlp); + } else if (opts->create) { + store_allow_perm(ZFS_DELEG_CREATE, opts->local, + opts->descend, NULL, opts->perms, *nvlp); + } else if (opts->everyone) { + store_allow_perm(ZFS_DELEG_EVERYONE, opts->local, + opts->descend, NULL, opts->perms, *nvlp); + } else { + char *curr = opts->who; + char *end = curr + strlen(curr); + + while (curr < end) { + const char *who; + zfs_deleg_who_type_t who_type = ZFS_DELEG_WHO_UNKNOWN; + char *endch; + char *delim = strchr(curr, ','); + char errbuf[256]; + char id[64]; + struct passwd *p = NULL; + struct group *g = NULL; + + uid_t rid; + if (delim == NULL) + delim = end; + else + *delim = '\0'; + + rid = (uid_t)strtol(curr, &endch, 0); + if (opts->user) { + who_type = ZFS_DELEG_USER; + if (*endch != '\0') + p = getpwnam(curr); + else + p = getpwuid(rid); + + if (p != NULL) + rid = p->pw_uid; + else { + (void) snprintf(errbuf, 256, gettext( + "invalid user %s"), curr); + allow_usage(un, B_TRUE, errbuf); + } + } else if (opts->group) { + who_type = ZFS_DELEG_GROUP; + if (*endch != '\0') + g = getgrnam(curr); + else + g = getgrgid(rid); + + if (g != NULL) + rid = g->gr_gid; + else { + (void) snprintf(errbuf, 256, gettext( + "invalid group %s"), curr); + allow_usage(un, B_TRUE, errbuf); + } + } else { + if (*endch != '\0') { + p = getpwnam(curr); + } else { + p = getpwuid(rid); + } + + if (p == NULL) { + if (*endch != '\0') { + g = getgrnam(curr); + } else { + g = getgrgid(rid); + } + } + + if (p != NULL) { + who_type = ZFS_DELEG_USER; + rid = p->pw_uid; + } else if (g != NULL) { + who_type = ZFS_DELEG_GROUP; + rid = g->gr_gid; + } else { + (void) snprintf(errbuf, 256, gettext( + "invalid user/group %s"), curr); + allow_usage(un, B_TRUE, errbuf); + } + } + + (void) sprintf(id, "%u", rid); + who = id; + + store_allow_perm(who_type, opts->local, + opts->descend, who, opts->perms, *nvlp); + curr = delim + 1; + } + } + + return (0); +} + +static void +print_set_creat_perms(uu_avl_t *who_avl) +{ + const char *sc_title[] = { + gettext("Permission sets:\n"), + gettext("Create time permissions:\n"), + NULL + }; + const char **title_ptr = sc_title; + who_perm_node_t *who_node = NULL; + int prev_weight = -1; + + for (who_node = uu_avl_first(who_avl); who_node != NULL; + who_node = uu_avl_next(who_avl, who_node)) { + uu_avl_t *avl = who_node->who_perm.who_deleg_perm_avl; + zfs_deleg_who_type_t who_type = who_node->who_perm.who_type; + const char *who_name = who_node->who_perm.who_name; + int weight = who_type2weight(who_type); + boolean_t first = B_TRUE; + deleg_perm_node_t *deleg_node; + + if (prev_weight != weight) { + (void) printf(*title_ptr++); + prev_weight = weight; + } + + if (who_name == NULL || strnlen(who_name, 1) == 0) + (void) printf("\t"); + else + (void) printf("\t%s ", who_name); + + for (deleg_node = uu_avl_first(avl); deleg_node != NULL; + deleg_node = uu_avl_next(avl, deleg_node)) { + if (first) { + (void) printf("%s", + deleg_node->dpn_perm.dp_name); + first = B_FALSE; + } else + (void) printf(",%s", + deleg_node->dpn_perm.dp_name); + } + + (void) printf("\n"); + } +} + +static void +print_uge_deleg_perms(uu_avl_t *who_avl, boolean_t local, boolean_t descend, + const char *title) +{ + who_perm_node_t *who_node = NULL; + boolean_t prt_title = B_TRUE; + uu_avl_walk_t *walk; + + if ((walk = uu_avl_walk_start(who_avl, UU_WALK_ROBUST)) == NULL) + nomem(); + + while ((who_node = uu_avl_walk_next(walk)) != NULL) { + const char *who_name = who_node->who_perm.who_name; + const char *nice_who_name = who_node->who_perm.who_ug_name; + uu_avl_t *avl = who_node->who_perm.who_deleg_perm_avl; + zfs_deleg_who_type_t who_type = who_node->who_perm.who_type; + char delim = ' '; + deleg_perm_node_t *deleg_node; + boolean_t prt_who = B_TRUE; + + for (deleg_node = uu_avl_first(avl); + deleg_node != NULL; + deleg_node = uu_avl_next(avl, deleg_node)) { + if (local != deleg_node->dpn_perm.dp_local || + descend != deleg_node->dpn_perm.dp_descend) + continue; + + if (prt_who) { + const char *who = NULL; + if (prt_title) { + prt_title = B_FALSE; + (void) printf(title); + } + + switch (who_type) { + case ZFS_DELEG_USER_SETS: + case ZFS_DELEG_USER: + who = gettext("user"); + if (nice_who_name) + who_name = nice_who_name; + break; + case ZFS_DELEG_GROUP_SETS: + case ZFS_DELEG_GROUP: + who = gettext("group"); + if (nice_who_name) + who_name = nice_who_name; + break; + case ZFS_DELEG_EVERYONE_SETS: + case ZFS_DELEG_EVERYONE: + who = gettext("everyone"); + who_name = NULL; + break; + + default: + assert(who != NULL); + } + + prt_who = B_FALSE; + if (who_name == NULL) + (void) printf("\t%s", who); + else + (void) printf("\t%s %s", who, who_name); + } + + (void) printf("%c%s", delim, + deleg_node->dpn_perm.dp_name); + delim = ','; + } + + if (!prt_who) + (void) printf("\n"); + } + + uu_avl_walk_end(walk); +} + +static void +print_fs_perms(fs_perm_set_t *fspset) +{ + fs_perm_node_t *node = NULL; + char buf[MAXNAMELEN + 32]; + const char *dsname = buf; + + for (node = uu_list_first(fspset->fsps_list); node != NULL; + node = uu_list_next(fspset->fsps_list, node)) { + uu_avl_t *sc_avl = node->fspn_fsperm.fsp_sc_avl; + uu_avl_t *uge_avl = node->fspn_fsperm.fsp_uge_avl; + int left = 0; + + (void) snprintf(buf, sizeof (buf), + gettext("---- Permissions on %s "), + node->fspn_fsperm.fsp_name); + (void) printf(dsname); + left = 70 - strlen(buf); + while (left-- > 0) + (void) printf("-"); + (void) printf("\n"); + + print_set_creat_perms(sc_avl); + print_uge_deleg_perms(uge_avl, B_TRUE, B_FALSE, + gettext("Local permissions:\n")); + print_uge_deleg_perms(uge_avl, B_FALSE, B_TRUE, + gettext("Descendent permissions:\n")); + print_uge_deleg_perms(uge_avl, B_TRUE, B_TRUE, + gettext("Local+Descendent permissions:\n")); + } +} + +static fs_perm_set_t fs_perm_set = { NULL, NULL, NULL, NULL }; + +struct deleg_perms { + boolean_t un; + nvlist_t *nvl; +}; + +static int +set_deleg_perms(zfs_handle_t *zhp, void *data) +{ + struct deleg_perms *perms = (struct deleg_perms *)data; + zfs_type_t zfs_type = zfs_get_type(zhp); + + if (zfs_type != ZFS_TYPE_FILESYSTEM && zfs_type != ZFS_TYPE_VOLUME) + return (0); + + return (zfs_set_fsacl(zhp, perms->un, perms->nvl)); +} + +static int +zfs_do_allow_unallow_impl(int argc, char **argv, boolean_t un) +{ + zfs_handle_t *zhp; + nvlist_t *perm_nvl = NULL; + nvlist_t *update_perm_nvl = NULL; + int error = 1; + int c; + struct allow_opts opts = { 0 }; + + const char *optstr = un ? "ldugecsrh" : "ldugecsh"; + + /* check opts */ + while ((c = getopt(argc, argv, optstr)) != -1) { + switch (c) { + case 'l': + opts.local = B_TRUE; + break; + case 'd': + opts.descend = B_TRUE; + break; + case 'u': + opts.user = B_TRUE; + break; + case 'g': + opts.group = B_TRUE; + break; + case 'e': + opts.everyone = B_TRUE; + break; + case 's': + opts.set = B_TRUE; + break; + case 'c': + opts.create = B_TRUE; + break; + case 'r': + opts.recursive = B_TRUE; + break; + case ':': + (void) fprintf(stderr, gettext("missing argument for " + "'%c' option\n"), optopt); + usage(B_FALSE); + break; + case 'h': + opts.prt_usage = B_TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + /* check arguments */ + parse_allow_args(argc, argv, un, &opts); + + /* try to open the dataset */ + if ((zhp = zfs_open(g_zfs, opts.dataset, ZFS_TYPE_FILESYSTEM | + ZFS_TYPE_VOLUME)) == NULL) { + (void) fprintf(stderr, "Failed to open dataset: %s\n", + opts.dataset); + return (-1); + } + + if (zfs_get_fsacl(zhp, &perm_nvl) != 0) + goto cleanup2; + + fs_perm_set_init(&fs_perm_set); + if (parse_fs_perm_set(&fs_perm_set, perm_nvl) != 0) { + (void) fprintf(stderr, "Failed to parse fsacl permissions\n"); + goto cleanup1; + } + + if (opts.prt_perms) + print_fs_perms(&fs_perm_set); + else { + (void) construct_fsacl_list(un, &opts, &update_perm_nvl); + if (zfs_set_fsacl(zhp, un, update_perm_nvl) != 0) + goto cleanup0; + + if (un && opts.recursive) { + struct deleg_perms data = { un, update_perm_nvl }; + if (zfs_iter_filesystems(zhp, set_deleg_perms, + &data) != 0) + goto cleanup0; + } + } + + error = 0; + +cleanup0: + nvlist_free(perm_nvl); + nvlist_free(update_perm_nvl); +cleanup1: + fs_perm_set_fini(&fs_perm_set); +cleanup2: + zfs_close(zhp); + + return (error); +} + +static int +zfs_do_allow(int argc, char **argv) +{ + return (zfs_do_allow_unallow_impl(argc, argv, B_FALSE)); +} + +static int +zfs_do_unallow(int argc, char **argv) +{ + return (zfs_do_allow_unallow_impl(argc, argv, B_TRUE)); +} + +static int +zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding) +{ + int errors = 0; + int i; + const char *tag; + boolean_t recursive = B_FALSE; + const char *opts = holding ? "rt" : "r"; + int c; + + /* check options */ + while ((c = getopt(argc, argv, opts)) != -1) { + switch (c) { + case 'r': + recursive = B_TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + /* check number of arguments */ + if (argc < 2) + usage(B_FALSE); + + tag = argv[0]; + --argc; + ++argv; + + if (holding && tag[0] == '.') { + /* tags starting with '.' are reserved for libzfs */ + (void) fprintf(stderr, gettext("tag may not start with '.'\n")); + usage(B_FALSE); + } + + for (i = 0; i < argc; ++i) { + zfs_handle_t *zhp; + char parent[ZFS_MAX_DATASET_NAME_LEN]; + const char *delim; + char *path = argv[i]; + + delim = strchr(path, '@'); + if (delim == NULL) { + (void) fprintf(stderr, + gettext("'%s' is not a snapshot\n"), path); + ++errors; + continue; + } + (void) strncpy(parent, path, delim - path); + parent[delim - path] = '\0'; + + zhp = zfs_open(g_zfs, parent, + ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME); + if (zhp == NULL) { + ++errors; + continue; + } + if (holding) { + if (zfs_hold(zhp, delim+1, tag, recursive, -1) != 0) + ++errors; + } else { + if (zfs_release(zhp, delim+1, tag, recursive) != 0) + ++errors; + } + zfs_close(zhp); + } + + return (errors != 0); +} + +/* + * zfs hold [-r] [-t] ... + * + * -r Recursively hold + * + * Apply a user-hold with the given tag to the list of snapshots. + */ +static int +zfs_do_hold(int argc, char **argv) +{ + return (zfs_do_hold_rele_impl(argc, argv, B_TRUE)); +} + +/* + * zfs release [-r] ... + * + * -r Recursively release + * + * Release a user-hold with the given tag from the list of snapshots. + */ +static int +zfs_do_release(int argc, char **argv) +{ + return (zfs_do_hold_rele_impl(argc, argv, B_FALSE)); +} + +typedef struct holds_cbdata { + boolean_t cb_recursive; + const char *cb_snapname; + nvlist_t **cb_nvlp; + size_t cb_max_namelen; + size_t cb_max_taglen; +} holds_cbdata_t; + +#define STRFTIME_FMT_STR "%a %b %e %k:%M %Y" +#define DATETIME_BUF_LEN (32) +/* + * + */ +static void +print_holds(boolean_t scripted, boolean_t literal, size_t nwidth, + size_t tagwidth, nvlist_t *nvl) +{ + int i; + nvpair_t *nvp = NULL; + char *hdr_cols[] = { "NAME", "TAG", "TIMESTAMP" }; + const char *col; + + if (!scripted) { + for (i = 0; i < 3; i++) { + col = gettext(hdr_cols[i]); + if (i < 2) + (void) printf("%-*s ", i ? tagwidth : nwidth, + col); + else + (void) printf("%s\n", col); + } + } + + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + char *zname = nvpair_name(nvp); + nvlist_t *nvl2; + nvpair_t *nvp2 = NULL; + (void) nvpair_value_nvlist(nvp, &nvl2); + while ((nvp2 = nvlist_next_nvpair(nvl2, nvp2)) != NULL) { + char tsbuf[DATETIME_BUF_LEN]; + char *tagname = nvpair_name(nvp2); + uint64_t val = 0; + time_t time; + struct tm t; + char sep = scripted ? '\t' : ' '; + size_t sepnum = scripted ? 1 : 2; + + (void) nvpair_value_uint64(nvp2, &val); + if (literal) + snprintf(tsbuf, DATETIME_BUF_LEN, "%llu", val); + else { + time = (time_t)val; + (void) localtime_r(&time, &t); + (void) strftime(tsbuf, DATETIME_BUF_LEN, + gettext(STRFTIME_FMT_STR), &t); + } + + (void) printf("%-*s%*c%-*s%*c%s\n", nwidth, zname, + sepnum, sep, tagwidth, tagname, sepnum, sep, tsbuf); + } + } +} + +/* + * Generic callback function to list a dataset or snapshot. + */ +static int +holds_callback(zfs_handle_t *zhp, void *data) +{ + holds_cbdata_t *cbp = data; + nvlist_t *top_nvl = *cbp->cb_nvlp; + nvlist_t *nvl = NULL; + nvpair_t *nvp = NULL; + const char *zname = zfs_get_name(zhp); + size_t znamelen = strlen(zname); + + if (cbp->cb_recursive && cbp->cb_snapname != NULL) { + const char *snapname; + char *delim = strchr(zname, '@'); + if (delim == NULL) + return (0); + + snapname = delim + 1; + if (strcmp(cbp->cb_snapname, snapname)) + return (0); + } + + if (zfs_get_holds(zhp, &nvl) != 0) + return (-1); + + if (znamelen > cbp->cb_max_namelen) + cbp->cb_max_namelen = znamelen; + + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + const char *tag = nvpair_name(nvp); + size_t taglen = strlen(tag); + if (taglen > cbp->cb_max_taglen) + cbp->cb_max_taglen = taglen; + } + + return (nvlist_add_nvlist(top_nvl, zname, nvl)); +} + +/* + * zfs holds [-Hp] [-r | -d max] ... + * + * -H Suppress header output + * -p Output literal values + * -r Recursively search for holds + * -d max Limit depth of recursive search + */ +static int +zfs_do_holds(int argc, char **argv) +{ + int errors = 0; + int c; + int i; + boolean_t scripted = B_FALSE; + boolean_t literal = B_FALSE; + boolean_t recursive = B_FALSE; + const char *opts = "d:rHp"; + nvlist_t *nvl; + + int types = ZFS_TYPE_SNAPSHOT; + holds_cbdata_t cb = { 0 }; + + int limit = 0; + int ret = 0; + int flags = 0; + + /* check options */ + while ((c = getopt(argc, argv, opts)) != -1) { + switch (c) { + case 'd': + limit = parse_depth(optarg, &flags); + recursive = B_TRUE; + break; + case 'r': + recursive = B_TRUE; + break; + case 'H': + scripted = B_TRUE; + break; + case 'p': + literal = B_TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + if (recursive) { + types |= ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME; + flags |= ZFS_ITER_RECURSE; + } + + argc -= optind; + argv += optind; + + /* check number of arguments */ + if (argc < 1) + usage(B_FALSE); + + if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) + nomem(); + + for (i = 0; i < argc; ++i) { + char *snapshot = argv[i]; + const char *delim; + const char *snapname = NULL; + + delim = strchr(snapshot, '@'); + if (delim != NULL) { + snapname = delim + 1; + if (recursive) + snapshot[delim - snapshot] = '\0'; + } + + cb.cb_recursive = recursive; + cb.cb_snapname = snapname; + cb.cb_nvlp = &nvl; + + /* + * 1. collect holds data, set format options + */ + ret = zfs_for_each(argc, argv, flags, types, NULL, NULL, limit, + holds_callback, &cb); + if (ret != 0) + ++errors; + } + + /* + * 2. print holds data + */ + print_holds(scripted, literal, cb.cb_max_namelen, cb.cb_max_taglen, + nvl); + + if (nvlist_empty(nvl)) + (void) printf(gettext("no datasets available\n")); + + nvlist_free(nvl); + + return (0 != errors); +} + +#define CHECK_SPINNER 30 +#define SPINNER_TIME 3 /* seconds */ +#define MOUNT_TIME 5 /* seconds */ + +static int +get_one_dataset(zfs_handle_t *zhp, void *data) +{ + static char *spin[] = { "-", "\\", "|", "/" }; + static int spinval = 0; + static int spincheck = 0; static time_t last_spin_time = (time_t)0; - get_all_cbdata_t *cbp = data; + get_all_cb_t *cbp = data; zfs_type_t type = zfs_get_type(zhp); if (cbp->cb_verbose) { if (--spincheck < 0) { time_t now = time(NULL); if (last_spin_time + SPINNER_TIME < now) { - (void) printf("\b%c", spin[spinval++ % 4]); - (void) fflush(stdout); + update_progress(spin[spinval++ % 4]); last_spin_time = now; } spincheck = CHECK_SPINNER; @@ -2904,8 +5793,7 @@ get_one_dataset(zfs_handle_t *zhp, void /* * Interate over any nested datasets. */ - if (type == ZFS_TYPE_FILESYSTEM && - zfs_iter_filesystems(zhp, get_one_dataset, data) != 0) { + if (zfs_iter_filesystems(zhp, get_one_dataset, data) != 0) { zfs_close(zhp); return (1); } @@ -2913,83 +5801,32 @@ get_one_dataset(zfs_handle_t *zhp, void /* * Skip any datasets whose type does not match. */ - if ((type & cbp->cb_types) == 0) { + if ((type & ZFS_TYPE_FILESYSTEM) == 0) { zfs_close(zhp); return (0); } - - if (cbp->cb_alloc == cbp->cb_used) { - zfs_handle_t **handles; - - if (cbp->cb_alloc == 0) - cbp->cb_alloc = 64; - else - cbp->cb_alloc *= 2; - - handles = safe_malloc(cbp->cb_alloc * sizeof (void *)); - - if (cbp->cb_handles) { - bcopy(cbp->cb_handles, handles, - cbp->cb_used * sizeof (void *)); - free(cbp->cb_handles); - } - - cbp->cb_handles = handles; - } - - cbp->cb_handles[cbp->cb_used++] = zhp; + libzfs_add_handle(cbp, zhp); + assert(cbp->cb_used <= cbp->cb_alloc); return (0); } static void -get_all_datasets(uint_t types, zfs_handle_t ***dslist, size_t *count, - boolean_t verbose) +get_all_datasets(zfs_handle_t ***dslist, size_t *count, boolean_t verbose) { - get_all_cbdata_t cb = { 0 }; - cb.cb_types = types; + get_all_cb_t cb = { 0 }; cb.cb_verbose = verbose; + cb.cb_getone = get_one_dataset; - if (verbose) { - (void) printf("%s: *", gettext("Reading ZFS config")); - (void) fflush(stdout); - } - + if (verbose) + set_progress_header(gettext("Reading ZFS config")); (void) zfs_iter_root(g_zfs, get_one_dataset, &cb); *dslist = cb.cb_handles; *count = cb.cb_used; - if (verbose) { - (void) printf("\b%s\n", gettext("done.")); - } -} - -static int -dataset_cmp(const void *a, const void *b) -{ - zfs_handle_t **za = (zfs_handle_t **)a; - zfs_handle_t **zb = (zfs_handle_t **)b; - char mounta[MAXPATHLEN]; - char mountb[MAXPATHLEN]; - boolean_t gota, gotb; - - if ((gota = (zfs_get_type(*za) == ZFS_TYPE_FILESYSTEM)) != 0) - verify(zfs_prop_get(*za, ZFS_PROP_MOUNTPOINT, mounta, - sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0); - if ((gotb = (zfs_get_type(*zb) == ZFS_TYPE_FILESYSTEM)) != 0) - verify(zfs_prop_get(*zb, ZFS_PROP_MOUNTPOINT, mountb, - sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0); - - if (gota && gotb) - return (strcmp(mounta, mountb)); - - if (gota) - return (-1); - if (gotb) - return (1); - - return (strcmp(zfs_get_name(a), zfs_get_name(b))); + if (verbose) + finish_progress(gettext("done.")); } /* @@ -3013,216 +5850,197 @@ share_mount_one(zfs_handle_t *zhp, int o const char *cmdname = op == OP_SHARE ? "share" : "mount"; struct mnttab mnt; uint64_t zoned, canmount; - zfs_type_t type = zfs_get_type(zhp); boolean_t shared_nfs, shared_smb; - assert(type & (ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)); - - if (type == ZFS_TYPE_FILESYSTEM) { - /* - * Check to make sure we can mount/share this dataset. If we - * are in the global zone and the filesystem is exported to a - * local zone, or if we are in a local zone and the - * filesystem is not exported, then it is an error. - */ - zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED); - - if (zoned && getzoneid() == GLOBAL_ZONEID) { - if (!explicit) - return (0); - - (void) fprintf(stderr, gettext("cannot %s '%s': " - "dataset is exported to a local zone\n"), cmdname, - zfs_get_name(zhp)); - return (1); - - } else if (!zoned && getzoneid() != GLOBAL_ZONEID) { - if (!explicit) - return (0); - - (void) fprintf(stderr, gettext("cannot %s '%s': " - "permission denied\n"), cmdname, - zfs_get_name(zhp)); - return (1); - } - - /* - * Ignore any filesystems which don't apply to us. This - * includes those with a legacy mountpoint, or those with - * legacy share options. - */ - verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint, - sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0); - verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts, - sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0); - verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, smbshareopts, - sizeof (smbshareopts), NULL, NULL, 0, B_FALSE) == 0); - - if (op == OP_SHARE && strcmp(shareopts, "off") == 0 && - strcmp(smbshareopts, "off") == 0) { - if (!explicit) - return (0); - - (void) fprintf(stderr, gettext("cannot share '%s': " - "legacy share\n"), zfs_get_name(zhp)); - (void) fprintf(stderr, gettext("use share(1M) to " - "share this filesystem, or set " - "sharenfs property on\n")); - return (1); - } + assert(zfs_get_type(zhp) & ZFS_TYPE_FILESYSTEM); - /* - * We cannot share or mount legacy filesystems. If the - * shareopts is non-legacy but the mountpoint is legacy, we - * treat it as a legacy share. - */ - if (strcmp(mountpoint, "legacy") == 0) { - if (!explicit) - return (0); + /* + * Check to make sure we can mount/share this dataset. If we + * are in the global zone and the filesystem is exported to a + * local zone, or if we are in a local zone and the + * filesystem is not exported, then it is an error. + */ + zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED); - (void) fprintf(stderr, gettext("cannot %s '%s': " - "legacy mountpoint\n"), cmdname, zfs_get_name(zhp)); - (void) fprintf(stderr, gettext("use %s(1M) to " - "%s this filesystem\n"), cmdname, cmdname); - return (1); - } + if (zoned && getzoneid() == GLOBAL_ZONEID) { + if (!explicit) + return (0); - if (strcmp(mountpoint, "none") == 0) { - if (!explicit) - return (0); + (void) fprintf(stderr, gettext("cannot %s '%s': " + "dataset is exported to a local zone\n"), cmdname, + zfs_get_name(zhp)); + return (1); - (void) fprintf(stderr, gettext("cannot %s '%s': no " - "mountpoint set\n"), cmdname, zfs_get_name(zhp)); - return (1); - } + } else if (!zoned && getzoneid() != GLOBAL_ZONEID) { + if (!explicit) + return (0); - /* - * canmount explicit outcome - * on no pass through - * on yes pass through - * off no return 0 - * off yes display error, return 1 - * noauto no return 0 - * noauto yes pass through - */ - canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT); - if (canmount == ZFS_CANMOUNT_OFF) { - if (!explicit) - return (0); + (void) fprintf(stderr, gettext("cannot %s '%s': " + "permission denied\n"), cmdname, + zfs_get_name(zhp)); + return (1); + } - (void) fprintf(stderr, gettext("cannot %s '%s': " - "'canmount' property is set to 'off'\n"), cmdname, - zfs_get_name(zhp)); - return (1); - } else if (canmount == ZFS_CANMOUNT_NOAUTO && !explicit) { + /* + * Ignore any filesystems which don't apply to us. This + * includes those with a legacy mountpoint, or those with + * legacy share options. + */ + verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint, + sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0); + verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts, + sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0); + verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, smbshareopts, + sizeof (smbshareopts), NULL, NULL, 0, B_FALSE) == 0); + + if (op == OP_SHARE && strcmp(shareopts, "off") == 0 && + strcmp(smbshareopts, "off") == 0) { + if (!explicit) return (0); - } - - /* - * At this point, we have verified that the mountpoint and/or - * shareopts are appropriate for auto management. If the - * filesystem is already mounted or shared, return (failing - * for explicit requests); otherwise mount or share the - * filesystem. - */ - switch (op) { - case OP_SHARE: - shared_nfs = zfs_is_shared_nfs(zhp, NULL); - shared_smb = zfs_is_shared_smb(zhp, NULL); + (void) fprintf(stderr, gettext("cannot share '%s': " + "legacy share\n"), zfs_get_name(zhp)); + (void) fprintf(stderr, gettext("to " + "share this filesystem set " + "sharenfs property on\n")); + return (1); + } - if (shared_nfs && shared_smb || - (shared_nfs && strcmp(shareopts, "on") == 0 && - strcmp(smbshareopts, "off") == 0) || - (shared_smb && strcmp(smbshareopts, "on") == 0 && - strcmp(shareopts, "off") == 0)) { - if (!explicit) - return (0); + /* + * We cannot share or mount legacy filesystems. If the + * shareopts is non-legacy but the mountpoint is legacy, we + * treat it as a legacy share. + */ + if (strcmp(mountpoint, "legacy") == 0) { + if (!explicit) + return (0); - (void) fprintf(stderr, gettext("cannot share " - "'%s': filesystem already shared\n"), - zfs_get_name(zhp)); - return (1); - } + (void) fprintf(stderr, gettext("cannot %s '%s': " + "legacy mountpoint\n"), cmdname, zfs_get_name(zhp)); + (void) fprintf(stderr, gettext("use %s(8) to " + "%s this filesystem\n"), cmdname, cmdname); + return (1); + } - if (!zfs_is_mounted(zhp, NULL) && - zfs_mount(zhp, NULL, 0) != 0) - return (1); + if (strcmp(mountpoint, "none") == 0) { + if (!explicit) + return (0); - if (protocol == NULL) { - if (zfs_shareall(zhp) != 0) - return (1); - } else if (strcmp(protocol, "nfs") == 0) { - if (zfs_share_nfs(zhp)) - return (1); - } else if (strcmp(protocol, "smb") == 0) { - if (zfs_share_smb(zhp)) - return (1); - } else { - (void) fprintf(stderr, gettext("cannot share " - "'%s': invalid share type '%s' " - "specified\n"), - zfs_get_name(zhp), protocol); - return (1); - } + (void) fprintf(stderr, gettext("cannot %s '%s': no " + "mountpoint set\n"), cmdname, zfs_get_name(zhp)); + return (1); + } - break; + /* + * canmount explicit outcome + * on no pass through + * on yes pass through + * off no return 0 + * off yes display error, return 1 + * noauto no return 0 + * noauto yes pass through + */ + canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT); + if (canmount == ZFS_CANMOUNT_OFF) { + if (!explicit) + return (0); - case OP_MOUNT: - if (options == NULL) - mnt.mnt_mntopts = ""; - else - mnt.mnt_mntopts = (char *)options; + (void) fprintf(stderr, gettext("cannot %s '%s': " + "'canmount' property is set to 'off'\n"), cmdname, + zfs_get_name(zhp)); + return (1); + } else if (canmount == ZFS_CANMOUNT_NOAUTO && !explicit) { + return (0); + } - if (!hasmntopt(&mnt, MNTOPT_REMOUNT) && - zfs_is_mounted(zhp, NULL)) { - if (!explicit) - return (0); + /* + * If this filesystem is inconsistent and has a receive resume + * token, we can not mount it. + */ + if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) && + zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN, + NULL, 0, NULL, NULL, 0, B_TRUE) == 0) { + if (!explicit) + return (0); - (void) fprintf(stderr, gettext("cannot mount " - "'%s': filesystem already mounted\n"), - zfs_get_name(zhp)); - return (1); - } + (void) fprintf(stderr, gettext("cannot %s '%s': " + "Contains partially-completed state from " + "\"zfs receive -r\", which can be resumed with " + "\"zfs send -t\"\n"), + cmdname, zfs_get_name(zhp)); + return (1); + } - if (zfs_mount(zhp, options, flags) != 0) - return (1); - break; - } - } else { - assert(op == OP_SHARE); + /* + * At this point, we have verified that the mountpoint and/or + * shareopts are appropriate for auto management. If the + * filesystem is already mounted or shared, return (failing + * for explicit requests); otherwise mount or share the + * filesystem. + */ + switch (op) { + case OP_SHARE: - /* - * Ignore any volumes that aren't shared. - */ - verify(zfs_prop_get(zhp, ZFS_PROP_SHAREISCSI, shareopts, - sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0); + shared_nfs = zfs_is_shared_nfs(zhp, NULL); + shared_smb = zfs_is_shared_smb(zhp, NULL); - if (strcmp(shareopts, "off") == 0) { + if ((shared_nfs && shared_smb) || + (shared_nfs && strcmp(shareopts, "on") == 0 && + strcmp(smbshareopts, "off") == 0) || + (shared_smb && strcmp(smbshareopts, "on") == 0 && + strcmp(shareopts, "off") == 0)) { if (!explicit) return (0); - (void) fprintf(stderr, gettext("cannot share '%s': " - "'shareiscsi' property not set\n"), + (void) fprintf(stderr, gettext("cannot share " + "'%s': filesystem already shared\n"), zfs_get_name(zhp)); - (void) fprintf(stderr, gettext("set 'shareiscsi' " - "property or use iscsitadm(1M) to share this " - "volume\n")); return (1); } - if (zfs_is_shared_iscsi(zhp)) { + if (!zfs_is_mounted(zhp, NULL) && + zfs_mount(zhp, NULL, 0) != 0) + return (1); + + if (protocol == NULL) { + if (zfs_shareall(zhp) != 0) + return (1); + } else if (strcmp(protocol, "nfs") == 0) { + if (zfs_share_nfs(zhp)) + return (1); + } else if (strcmp(protocol, "smb") == 0) { + if (zfs_share_smb(zhp)) + return (1); + } else { + (void) fprintf(stderr, gettext("cannot share " + "'%s': invalid share type '%s' " + "specified\n"), + zfs_get_name(zhp), protocol); + return (1); + } + + break; + + case OP_MOUNT: + if (options == NULL) + mnt.mnt_mntopts = ""; + else + mnt.mnt_mntopts = (char *)options; + + if (!hasmntopt(&mnt, MNTOPT_REMOUNT) && + zfs_is_mounted(zhp, NULL)) { if (!explicit) return (0); - (void) fprintf(stderr, gettext("cannot share " - "'%s': volume already shared\n"), + (void) fprintf(stderr, gettext("cannot mount " + "'%s': filesystem already mounted\n"), zfs_get_name(zhp)); return (1); } - if (zfs_share_iscsi(zhp) != 0) + if (zfs_mount(zhp, options, flags) != 0) return (1); + break; } return (0); @@ -3234,19 +6052,16 @@ share_mount_one(zfs_handle_t *zhp, int o static void report_mount_progress(int current, int total) { - static int len; - static char *reverse = "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b" - "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"; - static time_t last_progress_time; + static time_t last_progress_time = 0; time_t now = time(NULL); + char info[32]; /* report 1..n instead of 0..n-1 */ ++current; /* display header if we're here for the first time */ if (current == 1) { - (void) printf(gettext("Mounting ZFS filesystems: ")); - len = 0; + set_progress_header(gettext("Mounting ZFS filesystems")); } else if (current != total && last_progress_time + MOUNT_TIME >= now) { /* too soon to report again */ return; @@ -3254,13 +6069,12 @@ report_mount_progress(int current, int t last_progress_time = now; - /* back up to prepare for overwriting */ - if (len) - (void) printf("%*.*s", len, len, reverse); - - /* We put a newline at the end if this is the last one. */ - len = printf("(%d/%d)%s", current, total, current == total ? "\n" : ""); - (void) fflush(stdout); + (void) sprintf(info, "(%d/%d)", current, total); + + if (current == total) + finish_progress(info); + else + update_progress(info); } static void @@ -3289,7 +6103,7 @@ share_mount(int op, int argc, char **arg boolean_t verbose = B_FALSE; int c, ret = 0; char *options = NULL; - int types, flags = 0; + int flags = 0; /* check options */ while ((c = getopt(argc, argv, op == OP_MOUNT ? ":avo:O" : "a")) @@ -3316,7 +6130,7 @@ share_mount(int op, int argc, char **arg break; case 'O': - flags |= MS_OVERLAY; + warnx("no overlay mounts support on FreeBSD, ignoring"); break; case ':': (void) fprintf(stderr, gettext("missing argument for " @@ -3339,24 +6153,16 @@ share_mount(int op, int argc, char **arg size_t i, count = 0; char *protocol = NULL; - if (op == OP_MOUNT) { - types = ZFS_TYPE_FILESYSTEM; - } else if (argc > 0) { - if (strcmp(argv[0], "nfs") == 0 || - strcmp(argv[0], "smb") == 0) { - types = ZFS_TYPE_FILESYSTEM; - } else if (strcmp(argv[0], "iscsi") == 0) { - types = ZFS_TYPE_VOLUME; - } else { + if (op == OP_SHARE && argc > 0) { + if (strcmp(argv[0], "nfs") != 0 && + strcmp(argv[0], "smb") != 0) { (void) fprintf(stderr, gettext("share type " - "must be 'nfs', 'smb' or 'iscsi'\n")); + "must be 'nfs' or 'smb'\n")); usage(B_FALSE); } protocol = argv[0]; argc--; argv++; - } else { - types = ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME; } if (argc != 0) { @@ -3364,12 +6170,13 @@ share_mount(int op, int argc, char **arg usage(B_FALSE); } - get_all_datasets(types, &dslist, &count, verbose); + start_progress_timer(); + get_all_datasets(&dslist, &count, verbose); if (count == 0) return (0); - qsort(dslist, count, sizeof (void *), dataset_cmp); + qsort(dslist, count, sizeof (void *), libzfs_dataset_cmp); for (i = 0; i < count; i++) { if (verbose) @@ -3383,8 +6190,7 @@ share_mount(int op, int argc, char **arg free(dslist); } else if (argc == 0) { - struct statvfs *sfs; - int i, n; + struct mnttab entry; if ((op == OP_SHARE) || (options != NULL)) { (void) fprintf(stderr, gettext("missing filesystem " @@ -3397,33 +6203,27 @@ share_mount(int op, int argc, char **arg * display any active ZFS mounts. We hide any snapshots, since * they are controlled automatically. */ - if ((n = getmntinfo(&sfs, MNT_WAIT)) == 0) { - fprintf(stderr, "getmntinfo(): %s\n", strerror(errno)); - return (0); - } - for (i = 0; i < n; i++) { - if (strcmp(sfs[i].f_fstypename, MNTTYPE_ZFS) != 0 || - strchr(sfs[i].f_mntfromname, '@') != NULL) + rewind(mnttab_file); + while (getmntent(mnttab_file, &entry) == 0) { + if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0 || + strchr(entry.mnt_special, '@') != NULL) continue; - (void) printf("%-30s %s\n", sfs[i].f_mntfromname, - sfs[i].f_mntonname); + (void) printf("%-30s %s\n", entry.mnt_special, + entry.mnt_mountp); } } else { zfs_handle_t *zhp; - types = ZFS_TYPE_FILESYSTEM; - if (op == OP_SHARE) - types |= ZFS_TYPE_VOLUME; - if (argc > 1) { (void) fprintf(stderr, gettext("too many arguments\n")); usage(B_FALSE); } - if ((zhp = zfs_open(g_zfs, argv[0], types)) == NULL) { + if ((zhp = zfs_open(g_zfs, argv[0], + ZFS_TYPE_FILESYSTEM)) == NULL) { ret = 1; } else { ret = share_mount_one(zhp, op, flags, NULL, B_TRUE, @@ -3436,7 +6236,7 @@ share_mount(int op, int argc, char **arg } /* - * zfs mount -a [nfs | iscsi] + * zfs mount -a [nfs] * zfs mount filesystem * * Mount all filesystems, or mount the given filesystem. @@ -3448,7 +6248,7 @@ zfs_do_mount(int argc, char **argv) } /* - * zfs share -a [nfs | iscsi | smb] + * zfs share -a [nfs | smb] * zfs share filesystem * * Share all filesystems, or share the given filesystem. @@ -3484,9 +6284,9 @@ static int unshare_unmount_path(int op, char *path, int flags, boolean_t is_manual) { zfs_handle_t *zhp; - int ret; + int ret = 0; struct stat64 statbuf; - struct mnttab entry, search = { 0 }; + struct extmnttab entry; const char *cmdname = (op == OP_SHARE) ? "unshare" : "unmount"; ino_t path_inode; @@ -3506,9 +6306,39 @@ unshare_unmount_path(int op, char *path, /* * Search for the given (major,minor) pair in the mount table. */ +#ifdef illumos rewind(mnttab_file); - search.mnt_mountp = path; - if ((ret = getmntany(mnttab_file, &entry, &search)) == 0) { + while ((ret = getextmntent(mnttab_file, &entry, 0)) == 0) { + if (entry.mnt_major == major(statbuf.st_dev) && + entry.mnt_minor == minor(statbuf.st_dev)) + break; + } +#endif +#ifdef __FreeBSD__ + { + struct statfs sfs; + + if (statfs(path, &sfs) != 0) { + (void) fprintf(stderr, "%s: %s\n", path, + strerror(errno)); + ret = -1; + } + statfs2mnttab(&sfs, &entry); + } +#endif +#ifdef __NetBSD__ + { + struct statvfs sfs; + + if (statvfs(path, &sfs) != 0) { + (void) fprintf(stderr, "%s: %s\n", path, + strerror(errno)); + ret = -1; + } + statvfs2mnttab(&sfs, &entry); + } +#endif + if (ret != 0) { if (op == OP_SHARE) { (void) fprintf(stderr, gettext("cannot %s '%s': not " "currently mounted\n"), cmdname, path); @@ -3556,8 +6386,10 @@ unshare_unmount_path(int op, char *path, strcmp(smbshare_prop, "off") == 0) { (void) fprintf(stderr, gettext("cannot unshare " "'%s': legacy share\n"), path); +#ifdef illumos (void) fprintf(stderr, gettext("use " "unshare(1M) to unshare this filesystem\n")); +#endif } else if (!zfs_is_shared(zhp)) { (void) fprintf(stderr, gettext("cannot unshare '%s': " "not currently shared\n"), path); @@ -3576,7 +6408,7 @@ unshare_unmount_path(int op, char *path, (void) fprintf(stderr, gettext("cannot unmount " "'%s': legacy mountpoint\n"), zfs_get_name(zhp)); - (void) fprintf(stderr, gettext("use umount(1M) " + (void) fprintf(stderr, gettext("use umount(8) " "to unmount this filesystem\n")); } else { ret = zfs_unmountall(zhp, flags); @@ -3598,9 +6430,9 @@ unshare_unmount(int op, int argc, char * int do_all = 0; int flags = 0; int ret = 0; - int types, c; + int c; zfs_handle_t *zhp; - char nfsiscsi_mnt_prop[ZFS_MAXPROPLEN]; + char nfs_mnt_prop[ZFS_MAXPROPLEN]; char sharesmb[ZFS_MAXPROPLEN]; /* check options */ @@ -3637,10 +6469,9 @@ unshare_unmount(int op, int argc, char * * the special type (dataset name), and walk the result in * reverse to make sure to get any snapshots first. */ - struct statvfs *sfs; - int i, n; + struct mnttab entry; uu_avl_pool_t *pool; - uu_avl_t *tree; + uu_avl_t *tree = NULL; unshare_unmount_node_t *node; uu_avl_index_t idx; uu_avl_walk_t *walk; @@ -3650,66 +6481,61 @@ unshare_unmount(int op, int argc, char * usage(B_FALSE); } - if ((pool = uu_avl_pool_create("unmount_pool", + if (((pool = uu_avl_pool_create("unmount_pool", sizeof (unshare_unmount_node_t), offsetof(unshare_unmount_node_t, un_avlnode), - unshare_unmount_compare, - UU_DEFAULT)) == NULL) { - (void) fprintf(stderr, gettext("internal error: " - "out of memory\n")); - exit(1); - } - - if ((tree = uu_avl_create(pool, NULL, UU_DEFAULT)) == NULL) { - (void) fprintf(stderr, gettext("internal error: " - "out of memory\n")); - exit(1); - } + unshare_unmount_compare, UU_DEFAULT)) == NULL) || + ((tree = uu_avl_create(pool, NULL, UU_DEFAULT)) == NULL)) + nomem(); rewind(mnttab_file); - if ((n = getmntinfo(&sfs, MNT_WAIT)) == 0) { - (void) fprintf(stderr, gettext("internal error: " - "getmntinfo() failed\n")); - exit(1); - } - for (i = 0; i < n; i++) { + while (getmntent(mnttab_file, &entry) == 0) { /* ignore non-ZFS entries */ - if (strcmp(sfs[i].f_fstypename, MNTTYPE_ZFS) != 0) + if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) continue; /* ignore snapshots */ - if (strchr(sfs[i].f_mntfromname, '@') != NULL) + if (strchr(entry.mnt_special, '@') != NULL) continue; - if ((zhp = zfs_open(g_zfs, sfs[i].f_mntfromname, + if ((zhp = zfs_open(g_zfs, entry.mnt_special, ZFS_TYPE_FILESYSTEM)) == NULL) { ret = 1; continue; } + /* + * Ignore datasets that are excluded/restricted by + * parent pool name. + */ + if (zpool_skip_pool(zfs_get_pool_name(zhp))) { + zfs_close(zhp); + continue; + } + switch (op) { case OP_SHARE: verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, - nfsiscsi_mnt_prop, - sizeof (nfsiscsi_mnt_prop), + nfs_mnt_prop, + sizeof (nfs_mnt_prop), NULL, NULL, 0, B_FALSE) == 0); - if (strcmp(nfsiscsi_mnt_prop, "off") != 0) + if (strcmp(nfs_mnt_prop, "off") != 0) break; verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, - nfsiscsi_mnt_prop, - sizeof (nfsiscsi_mnt_prop), + nfs_mnt_prop, + sizeof (nfs_mnt_prop), NULL, NULL, 0, B_FALSE) == 0); - if (strcmp(nfsiscsi_mnt_prop, "off") == 0) + if (strcmp(nfs_mnt_prop, "off") == 0) continue; break; case OP_MOUNT: /* Ignore legacy mounts */ verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, - nfsiscsi_mnt_prop, - sizeof (nfsiscsi_mnt_prop), + nfs_mnt_prop, + sizeof (nfs_mnt_prop), NULL, NULL, 0, B_FALSE) == 0); - if (strcmp(nfsiscsi_mnt_prop, "legacy") == 0) + if (strcmp(nfs_mnt_prop, "legacy") == 0) continue; /* Ignore canmount=noauto mounts */ if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == @@ -3721,13 +6547,7 @@ unshare_unmount(int op, int argc, char * node = safe_malloc(sizeof (unshare_unmount_node_t)); node->un_zhp = zhp; - - if ((node->un_mountp = strdup(sfs[i].f_mntonname)) == - NULL) { - (void) fprintf(stderr, gettext("internal error:" - " out of memory\n")); - exit(1); - } + node->un_mountp = safe_strdup(entry.mnt_mountp); uu_avl_node_init(node, &node->un_avlnode, pool); @@ -3745,11 +6565,8 @@ unshare_unmount(int op, int argc, char * * removing it from the AVL tree in the process. */ if ((walk = uu_avl_walk_start(tree, - UU_WALK_REVERSE | UU_WALK_ROBUST)) == NULL) { - (void) fprintf(stderr, - gettext("internal error: out of memory")); - exit(1); - } + UU_WALK_REVERSE | UU_WALK_ROBUST)) == NULL) + nomem(); while ((node = uu_avl_walk_next(walk)) != NULL) { uu_avl_remove(tree, node); @@ -3777,29 +6594,6 @@ unshare_unmount(int op, int argc, char * uu_avl_destroy(tree); uu_avl_pool_destroy(pool); - if (op == OP_SHARE) { - /* - * Finally, unshare any volumes shared via iSCSI. - */ - zfs_handle_t **dslist = NULL; - size_t i, count = 0; - - get_all_datasets(ZFS_TYPE_VOLUME, &dslist, &count, - B_FALSE); - - if (count != 0) { - qsort(dslist, count, sizeof (void *), - dataset_cmp); - - for (i = 0; i < count; i++) { - if (zfs_unshare_iscsi(dslist[i]) != 0) - ret = 1; - zfs_close(dslist[i]); - } - - free(dslist); - } - } } else { if (argc != 1) { if (argc == 0) @@ -3821,91 +6615,65 @@ unshare_unmount(int op, int argc, char * return (unshare_unmount_path(op, argv[0], flags, B_FALSE)); - types = ZFS_TYPE_FILESYSTEM; - if (op == OP_SHARE) - types |= ZFS_TYPE_VOLUME; - - if ((zhp = zfs_open(g_zfs, argv[0], types)) == NULL) + if ((zhp = zfs_open(g_zfs, argv[0], + ZFS_TYPE_FILESYSTEM)) == NULL) return (1); - if (zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) { - verify(zfs_prop_get(zhp, op == OP_SHARE ? - ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT, - nfsiscsi_mnt_prop, sizeof (nfsiscsi_mnt_prop), NULL, - NULL, 0, B_FALSE) == 0); - - switch (op) { - case OP_SHARE: - verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, - nfsiscsi_mnt_prop, - sizeof (nfsiscsi_mnt_prop), - NULL, NULL, 0, B_FALSE) == 0); - verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, - sharesmb, sizeof (sharesmb), NULL, NULL, - 0, B_FALSE) == 0); - - if (strcmp(nfsiscsi_mnt_prop, "off") == 0 && - strcmp(sharesmb, "off") == 0) { - (void) fprintf(stderr, gettext("cannot " - "unshare '%s': legacy share\n"), - zfs_get_name(zhp)); - (void) fprintf(stderr, gettext("use " - "unshare(1M) to unshare this " - "filesystem\n")); - ret = 1; - } else if (!zfs_is_shared(zhp)) { - (void) fprintf(stderr, gettext("cannot " - "unshare '%s': not currently " - "shared\n"), zfs_get_name(zhp)); - ret = 1; - } else if (zfs_unshareall(zhp) != 0) { - ret = 1; - } - break; - - case OP_MOUNT: - if (strcmp(nfsiscsi_mnt_prop, "legacy") == 0) { - (void) fprintf(stderr, gettext("cannot " - "unmount '%s': legacy " - "mountpoint\n"), zfs_get_name(zhp)); - (void) fprintf(stderr, gettext("use " - "umount(1M) to unmount this " - "filesystem\n")); - ret = 1; - } else if (!zfs_is_mounted(zhp, NULL)) { - (void) fprintf(stderr, gettext("cannot " - "unmount '%s': not currently " - "mounted\n"), - zfs_get_name(zhp)); - ret = 1; - } else if (zfs_unmountall(zhp, flags) != 0) { - ret = 1; - } - break; - } - } else { - assert(op == OP_SHARE); + verify(zfs_prop_get(zhp, op == OP_SHARE ? + ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT, + nfs_mnt_prop, sizeof (nfs_mnt_prop), NULL, + NULL, 0, B_FALSE) == 0); - verify(zfs_prop_get(zhp, ZFS_PROP_SHAREISCSI, - nfsiscsi_mnt_prop, sizeof (nfsiscsi_mnt_prop), + switch (op) { + case OP_SHARE: + verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, + nfs_mnt_prop, + sizeof (nfs_mnt_prop), NULL, NULL, 0, B_FALSE) == 0); + verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, + sharesmb, sizeof (sharesmb), NULL, NULL, + 0, B_FALSE) == 0); - if (strcmp(nfsiscsi_mnt_prop, "off") == 0) { - (void) fprintf(stderr, gettext("cannot unshare " - "'%s': 'shareiscsi' property not set\n"), + if (strcmp(nfs_mnt_prop, "off") == 0 && + strcmp(sharesmb, "off") == 0) { + (void) fprintf(stderr, gettext("cannot " + "unshare '%s': legacy share\n"), zfs_get_name(zhp)); - (void) fprintf(stderr, gettext("set " - "'shareiscsi' property or use " - "iscsitadm(1M) to share this volume\n")); +#ifdef illumos + (void) fprintf(stderr, gettext("use " + "unshare(1M) to unshare this " + "filesystem\n")); +#endif + ret = 1; + } else if (!zfs_is_shared(zhp)) { + (void) fprintf(stderr, gettext("cannot " + "unshare '%s': not currently " + "shared\n"), zfs_get_name(zhp)); ret = 1; - } else if (!zfs_is_shared_iscsi(zhp)) { + } else if (zfs_unshareall(zhp) != 0) { + ret = 1; + } + break; + + case OP_MOUNT: + if (strcmp(nfs_mnt_prop, "legacy") == 0) { + (void) fprintf(stderr, gettext("cannot " + "unmount '%s': legacy " + "mountpoint\n"), zfs_get_name(zhp)); + (void) fprintf(stderr, gettext("use " + "umount(8) to unmount this " + "filesystem\n")); + ret = 1; + } else if (!zfs_is_mounted(zhp, NULL)) { (void) fprintf(stderr, gettext("cannot " - "unshare '%s': not currently shared\n"), + "unmount '%s': not currently " + "mounted\n"), zfs_get_name(zhp)); ret = 1; - } else if (zfs_unshare_iscsi(zhp) != 0) { + } else if (zfs_unmountall(zhp, flags) != 0) { ret = 1; } + break; } zfs_close(zhp); @@ -3938,14 +6706,69 @@ zfs_do_unshare(int argc, char **argv) return (unshare_unmount(OP_SHARE, argc, argv)); } +#ifdef __FreeBSD__ +/* + * Attach/detach the given dataset to/from the given jail + */ /* ARGSUSED */ static int -zfs_do_python(int argc, char **argv) +do_jail(int argc, char **argv, int attach) { - (void) execv(pypath, argv-1); - (void) printf("internal error: %s not found\n", pypath); - return (-1); + zfs_handle_t *zhp; + int jailid, ret; + + /* check number of arguments */ + if (argc < 3) { + (void) fprintf(stderr, gettext("missing argument(s)\n")); + usage(B_FALSE); + } + if (argc > 3) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } + + jailid = jail_getid(argv[1]); + if (jailid < 0) { + (void) fprintf(stderr, gettext("invalid jail id or name\n")); + usage(B_FALSE); + } + + zhp = zfs_open(g_zfs, argv[2], ZFS_TYPE_FILESYSTEM); + if (zhp == NULL) + return (1); + + ret = (zfs_jail(zhp, jailid, attach) != 0); + + zfs_close(zhp); + return (ret); +} + +/* + * zfs jail jailid filesystem + * + * Attach the given dataset to the given jail + */ +/* ARGSUSED */ +static int +zfs_do_jail(int argc, char **argv) +{ + + return (do_jail(argc, argv, 1)); +} + +/* + * zfs unjail jailid filesystem + * + * Detach the given dataset from the given jail + */ +/* ARGSUSED */ +static int +zfs_do_unjail(int argc, char **argv) +{ + + return (do_jail(argc, argv, 0)); } +#endif /* __FreeBSD__ */ /* * Called when invoked as /etc/fs/zfs/mount. Do the mount if the mountpoint is @@ -3957,7 +6780,7 @@ manual_mount(int argc, char **argv) zfs_handle_t *zhp; char mountpoint[ZFS_MAXPROPLEN]; char mntopts[MNT_LINE_MAX] = { '\0' }; - int ret; + int ret = 0; int c; int flags = 0; char *dataset, *path; @@ -4018,7 +6841,7 @@ manual_mount(int argc, char **argv) /* check for legacy mountpoint and complain appropriately */ ret = 0; if (strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) == 0) { - if (mount(dataset, path, MS_OPTIONSTR | flags, MNTTYPE_ZFS, + if (zmount(dataset, path, flags, MNTTYPE_ZFS, NULL, 0, mntopts, sizeof (mntopts)) != 0) { (void) fprintf(stderr, gettext("mount failed: %s\n"), strerror(errno)); @@ -4026,12 +6849,12 @@ manual_mount(int argc, char **argv) } } else { (void) fprintf(stderr, gettext("filesystem '%s' cannot be " - "mounted using 'mount -F zfs'\n"), dataset); + "mounted using 'mount -t zfs'\n"), dataset); (void) fprintf(stderr, gettext("Use 'zfs set mountpoint=%s' " "instead.\n"), path); - (void) fprintf(stderr, gettext("If you must use 'mount -F zfs' " - "or /etc/vfstab, use 'zfs set mountpoint=legacy'.\n")); - (void) fprintf(stderr, gettext("See zfs(1M) for more " + (void) fprintf(stderr, gettext("If you must use 'mount -t zfs' " + "or /etc/fstab, use 'zfs set mountpoint=legacy'.\n")); + (void) fprintf(stderr, gettext("See zfs(8) for more " "information.\n")); ret = 1; } @@ -4099,10 +6922,190 @@ find_command_idx(char *command, int *idx return (1); } +static int +zfs_do_diff(int argc, char **argv) +{ + zfs_handle_t *zhp; + int flags = 0; + char *tosnap = NULL; + char *fromsnap = NULL; + char *atp, *copy; + int err = 0; + int c; + + while ((c = getopt(argc, argv, "FHt")) != -1) { + switch (c) { + case 'F': + flags |= ZFS_DIFF_CLASSIFY; + break; + case 'H': + flags |= ZFS_DIFF_PARSEABLE; + break; + case 't': + flags |= ZFS_DIFF_TIMESTAMP; + break; + default: + (void) fprintf(stderr, + gettext("invalid option '%c'\n"), optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + if (argc < 1) { + (void) fprintf(stderr, + gettext("must provide at least one snapshot name\n")); + usage(B_FALSE); + } + + if (argc > 2) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } + + fromsnap = argv[0]; + tosnap = (argc == 2) ? argv[1] : NULL; + + copy = NULL; + if (*fromsnap != '@') + copy = strdup(fromsnap); + else if (tosnap) + copy = strdup(tosnap); + if (copy == NULL) + usage(B_FALSE); + + if ((atp = strchr(copy, '@')) != NULL) + *atp = '\0'; + + if ((zhp = zfs_open(g_zfs, copy, ZFS_TYPE_FILESYSTEM)) == NULL) + return (1); + + free(copy); + + /* + * Ignore SIGPIPE so that the library can give us + * information on any failure + */ + (void) sigignore(SIGPIPE); + + err = zfs_show_diffs(zhp, STDOUT_FILENO, fromsnap, tosnap, flags); + + zfs_close(zhp); + + return (err != 0); +} + +/* + * zfs bookmark + * + * Creates a bookmark with the given name from the given snapshot. + */ +static int +zfs_do_bookmark(int argc, char **argv) +{ + char snapname[ZFS_MAX_DATASET_NAME_LEN]; + zfs_handle_t *zhp; + nvlist_t *nvl; + int ret = 0; + int c; + + /* check options */ + while ((c = getopt(argc, argv, "")) != -1) { + switch (c) { + case '?': + (void) fprintf(stderr, + gettext("invalid option '%c'\n"), optopt); + goto usage; + } + } + + argc -= optind; + argv += optind; + + /* check number of arguments */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing snapshot argument\n")); + goto usage; + } + if (argc < 2) { + (void) fprintf(stderr, gettext("missing bookmark argument\n")); + goto usage; + } + + if (strchr(argv[1], '#') == NULL) { + (void) fprintf(stderr, + gettext("invalid bookmark name '%s' -- " + "must contain a '#'\n"), argv[1]); + goto usage; + } + + if (argv[0][0] == '@') { + /* + * Snapshot name begins with @. + * Default to same fs as bookmark. + */ + (void) strncpy(snapname, argv[1], sizeof (snapname)); + *strchr(snapname, '#') = '\0'; + (void) strlcat(snapname, argv[0], sizeof (snapname)); + } else { + (void) strncpy(snapname, argv[0], sizeof (snapname)); + } + zhp = zfs_open(g_zfs, snapname, ZFS_TYPE_SNAPSHOT); + if (zhp == NULL) + goto usage; + zfs_close(zhp); + + + nvl = fnvlist_alloc(); + fnvlist_add_string(nvl, argv[1], snapname); + ret = lzc_bookmark(nvl, NULL); + fnvlist_free(nvl); + + if (ret != 0) { + const char *err_msg; + char errbuf[1024]; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, + "cannot create bookmark '%s'"), argv[1]); + + switch (ret) { + case EXDEV: + err_msg = "bookmark is in a different pool"; + break; + case EEXIST: + err_msg = "bookmark exists"; + break; + case EINVAL: + err_msg = "invalid argument"; + break; + case ENOTSUP: + err_msg = "bookmark feature not enabled"; + break; + case ENOSPC: + err_msg = "out of space"; + break; + default: + err_msg = "unknown error"; + break; + } + (void) fprintf(stderr, "%s: %s\n", errbuf, + dgettext(TEXT_DOMAIN, err_msg)); + } + + return (ret != 0); + +usage: + usage(B_FALSE); + return (-1); +} + int main(int argc, char **argv) { - int ret; + int ret = 0; int i; char *progname; char *cmdname; @@ -4118,8 +7121,7 @@ main(int argc, char **argv) return (1); } - zpool_set_history_str("zfs", argc, argv, history_str); - verify(zpool_stage_history(g_zfs, history_str) == 0); + zfs_save_arguments(argc, argv, history_str, sizeof (history_str)); libzfs_print_on_error(g_zfs, B_TRUE); @@ -4162,6 +7164,12 @@ main(int argc, char **argv) cmdname = "receive"; /* + * The 'snap' command is an alias for 'snapshot' + */ + if (strcmp(cmdname, "snap") == 0) + cmdname = "snapshot"; + + /* * Special case '-?' */ if (strcmp(cmdname, "-?") == 0) @@ -4188,6 +7196,9 @@ main(int argc, char **argv) (void) fclose(mnttab_file); + if (ret == 0 && log_history) + (void) zpool_log_history(g_zfs, history_str); + libzfs_fini(g_zfs); /* Index: src/external/cddl/osnet/dist/cmd/zfs/zfs_util.h =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dist/cmd/zfs/zfs_util.h,v retrieving revision 1.1.1.1 diff -u -p -r1.1.1.1 zfs_util.h --- src/external/cddl/osnet/dist/cmd/zfs/zfs_util.h 7 Aug 2009 18:32:18 -0000 1.1.1.1 +++ src/external/cddl/osnet/dist/cmd/zfs/zfs_util.h 12 Jun 2012 05:55:36 -0000 @@ -19,15 +19,12 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef _ZFS_UTIL_H #define _ZFS_UTIL_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include #ifdef __cplusplus @@ -35,6 +32,7 @@ extern "C" { #endif void * safe_malloc(size_t size); +void nomem(void); libzfs_handle_t *g_zfs; #ifdef __cplusplus Index: src/external/cddl/osnet/dist/cmd/zpool/zpool_iter.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dist/cmd/zpool/zpool_iter.c,v retrieving revision 1.1.1.1 diff -u -p -r1.1.1.1 zpool_iter.c --- src/external/cddl/osnet/dist/cmd/zpool/zpool_iter.c 7 Aug 2009 18:32:18 -0000 1.1.1.1 +++ src/external/cddl/osnet/dist/cmd/zpool/zpool_iter.c 27 Mar 2016 02:48:25 -0000 @@ -22,9 +22,11 @@ * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright 2016 Igor Kozhukhov . + */ -#pragma ident "%Z%%M% %I% %E% SMI" - +#include #include #include #include @@ -131,7 +133,8 @@ pool_list_get(int argc, char **argv, zpr for (i = 0; i < argc; i++) { zpool_handle_t *zhp; - if (zhp = zpool_open_canfail(g_zfs, argv[i])) { + if ((zhp = zpool_open_canfail(g_zfs, argv[i])) != + NULL) { if (add_pool(zhp, zlp) != 0) *err = B_TRUE; } else { Index: src/external/cddl/osnet/dist/cmd/zpool/zpool_main.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dist/cmd/zpool/zpool_main.c,v retrieving revision 1.2 diff -u -p -r1.2 zpool_main.c --- src/external/cddl/osnet/dist/cmd/zpool/zpool_main.c 2 Jan 2013 10:33:49 -0000 1.2 +++ src/external/cddl/osnet/dist/cmd/zpool/zpool_main.c 5 May 2017 16:30:16 -0000 @@ -20,10 +20,16 @@ */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, 2015 by Delphix. All rights reserved. + * Copyright (c) 2012 by Frederik Wessels. All rights reserved. + * Copyright (c) 2012 Martin Matuska . All rights reserved. + * Copyright (c) 2013 by Prasad Joshi (sTec). All rights reserved. + * Copyright 2016 Igor Kozhukhov . + * Copyright 2016 Nexenta Systems, Inc. */ +#include #include #include #include @@ -41,15 +47,16 @@ #include #include #include +#include +#include #include - -#include #include #include #include "zpool_util.h" #include "zfs_comutil.h" +#include "zfeature_common.h" #include "statcommon.h" @@ -58,6 +65,7 @@ static int zpool_do_destroy(int, char ** static int zpool_do_add(int, char **); static int zpool_do_remove(int, char **); +static int zpool_do_labelclear(int, char **); static int zpool_do_list(int, char **); static int zpool_do_iostat(int, char **); @@ -66,6 +74,9 @@ static int zpool_do_status(int, char **) static int zpool_do_online(int, char **); static int zpool_do_offline(int, char **); static int zpool_do_clear(int, char **); +static int zpool_do_reopen(int, char **); + +static int zpool_do_reguid(int, char **); static int zpool_do_attach(int, char **); static int zpool_do_detach(int, char **); @@ -114,6 +125,7 @@ typedef enum { HELP_HISTORY, HELP_IMPORT, HELP_IOSTAT, + HELP_LABELCLEAR, HELP_LIST, HELP_OFFLINE, HELP_ONLINE, @@ -124,7 +136,9 @@ typedef enum { HELP_UPGRADE, HELP_GET, HELP_SET, - HELP_SPLIT + HELP_SPLIT, + HELP_REGUID, + HELP_REOPEN } zpool_help_t; @@ -150,6 +164,8 @@ static zpool_command_t command_table[] = { "add", zpool_do_add, HELP_ADD }, { "remove", zpool_do_remove, HELP_REMOVE }, { NULL }, + { "labelclear", zpool_do_labelclear, HELP_LABELCLEAR }, + { NULL }, { "list", zpool_do_list, HELP_LIST }, { "iostat", zpool_do_iostat, HELP_IOSTAT }, { "status", zpool_do_status, HELP_STATUS }, @@ -157,6 +173,7 @@ static zpool_command_t command_table[] = { "online", zpool_do_online, HELP_ONLINE }, { "offline", zpool_do_offline, HELP_OFFLINE }, { "clear", zpool_do_clear, HELP_CLEAR }, + { "reopen", zpool_do_reopen, HELP_REOPEN }, { NULL }, { "attach", zpool_do_attach, HELP_ATTACH }, { "detach", zpool_do_detach, HELP_DETACH }, @@ -168,6 +185,7 @@ static zpool_command_t command_table[] = { "import", zpool_do_import, HELP_IMPORT }, { "export", zpool_do_export, HELP_EXPORT }, { "upgrade", zpool_do_upgrade, HELP_UPGRADE }, + { "reguid", zpool_do_reguid, HELP_REGUID }, { NULL }, { "history", zpool_do_history, HELP_HISTORY }, { "get", zpool_do_get, HELP_GET }, @@ -176,13 +194,14 @@ static zpool_command_t command_table[] = #define NCOMMAND (sizeof (command_table) / sizeof (command_table[0])) -zpool_command_t *current_command; +static zpool_command_t *current_command; static char history_str[HIS_MAX_RECORD_LEN]; - +static boolean_t log_history = B_TRUE; static uint_t timestamp_fmt = NODATE; static const char * -get_usage(zpool_help_t idx) { +get_usage(zpool_help_t idx) +{ switch (idx) { case HELP_ADD: return (gettext("\tadd [-fn] ...\n")); @@ -192,7 +211,7 @@ get_usage(zpool_help_t idx) { case HELP_CLEAR: return (gettext("\tclear [-nF] [device]\n")); case HELP_CREATE: - return (gettext("\tcreate [-fn] [-o property=value] ... \n" + return (gettext("\tcreate [-fnd] [-o property=value] ... \n" "\t [-O file-system-property=value] ... \n" "\t [-m mountpoint] [-R root] ...\n")); case HELP_DESTROY: @@ -205,44 +224,52 @@ get_usage(zpool_help_t idx) { return (gettext("\thistory [-il] [] ...\n")); case HELP_IMPORT: return (gettext("\timport [-d dir] [-D]\n" - "\timport [-d dir | -c cachefile] [-n] -F \n" + "\timport [-d dir | -c cachefile] [-F [-n]] \n" "\timport [-o mntopts] [-o property=value] ... \n" - "\t [-d dir | -c cachefile] [-D] [-f] [-R root] -a\n" + "\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] " + "[-R root] [-F [-n]] -a\n" "\timport [-o mntopts] [-o property=value] ... \n" - "\t [-d dir | -c cachefile] [-D] [-f] [-R root] " - " [newpool]\n")); + "\t [-d dir | -c cachefile] [-D] [-f] [-m] [-N] " + "[-R root] [-F [-n]]\n" + "\t [newpool]\n")); case HELP_IOSTAT: return (gettext("\tiostat [-v] [-T d|u] [pool] ... [interval " "[count]]\n")); + case HELP_LABELCLEAR: + return (gettext("\tlabelclear [-f] \n")); case HELP_LIST: - return (gettext("\tlist [-H] [-o property[,...]] " - "[pool] ...\n")); + return (gettext("\tlist [-Hpv] [-o property[,...]] " + "[-T d|u] [pool] ... [interval [count]]\n")); case HELP_OFFLINE: return (gettext("\toffline [-t] ...\n")); case HELP_ONLINE: - return (gettext("\tonline ...\n")); + return (gettext("\tonline [-e] ...\n")); case HELP_REPLACE: return (gettext("\treplace [-f] " "[new-device]\n")); case HELP_REMOVE: return (gettext("\tremove ...\n")); + case HELP_REOPEN: + return (gettext("\treopen \n")); case HELP_SCRUB: return (gettext("\tscrub [-s] ...\n")); case HELP_STATUS: - return (gettext("\tstatus [-vx] [pool] ...\n")); + return (gettext("\tstatus [-vx] [-T d|u] [pool] ... [interval " + "[count]]\n")); case HELP_UPGRADE: - return (gettext("\tupgrade\n" - "\tupgrade -v\n" + return (gettext("\tupgrade [-v]\n" "\tupgrade [-V version] <-a | pool ...>\n")); case HELP_GET: - return (gettext("\tget <\"all\" | property[,...]> " - " ...\n")); + return (gettext("\tget [-Hp] [-o \"all\" | field[,...]] " + "<\"all\" | property[,...]> ...\n")); case HELP_SET: return (gettext("\tset \n")); case HELP_SPLIT: return (gettext("\tsplit [-n] [-R altroot] [-o mntopts]\n" "\t [-o property=value] " "[ ...]\n")); + case HELP_REGUID: + return (gettext("\treguid \n")); } abort(); @@ -316,6 +343,12 @@ usage(boolean_t requested) /* Iterate over all properties */ (void) zprop_iter(print_prop_cb, fp, B_FALSE, B_TRUE, ZFS_TYPE_POOL); + + (void) fprintf(fp, "\t%-15s ", "feature@..."); + (void) fprintf(fp, "YES disabled | enabled | active\n"); + + (void) fprintf(fp, gettext("\nThe feature@ properties must be " + "appended with a feature name.\nSee zpool-features(7).\n")); } /* @@ -359,6 +392,18 @@ print_vdev_tree(zpool_handle_t *zhp, con } } +static boolean_t +prop_list_contains_feature(nvlist_t *proplist) +{ + nvpair_t *nvp; + for (nvp = nvlist_next_nvpair(proplist, NULL); NULL != nvp; + nvp = nvlist_next_nvpair(proplist, nvp)) { + if (zpool_prop_feature(nvpair_name(nvp))) + return (B_TRUE); + } + return (B_FALSE); +} + /* * Add a property pair (name, string-value) into a property nvlist. */ @@ -382,12 +427,34 @@ add_prop_list(const char *propname, char proplist = *props; if (poolprop) { - if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) { + const char *vname = zpool_prop_to_name(ZPOOL_PROP_VERSION); + + if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL && + !zpool_prop_feature(propname)) { (void) fprintf(stderr, gettext("property '%s' is " "not a valid pool property\n"), propname); return (2); } - normnm = zpool_prop_to_name(prop); + + /* + * feature@ properties and version should not be specified + * at the same time. + */ + if ((prop == ZPROP_INVAL && zpool_prop_feature(propname) && + nvlist_exists(proplist, vname)) || + (prop == ZPOOL_PROP_VERSION && + prop_list_contains_feature(proplist))) { + (void) fprintf(stderr, gettext("'feature@' and " + "'version' properties cannot be specified " + "together\n")); + return (2); + } + + + if (zpool_prop_feature(propname)) + normnm = propname; + else + normnm = zpool_prop_to_name(prop); } else { if ((fprop = zfs_name_to_prop(propname)) != ZPROP_INVAL) { normnm = zfs_prop_to_name(fprop); @@ -520,11 +587,10 @@ zpool_do_add(int argc, char **argv) } /* - * zpool remove ... + * zpool remove ... * - * Removes the given vdev from the pool. Currently, this only supports removing - * spares and cache devices from the pool. Eventually, we'll want to support - * removing leaf vdevs (as an alias for 'detach') as well as toplevel vdevs. + * Removes the given vdev from the pool. Currently, this supports removing + * spares, cache, and log devices from the pool. */ int zpool_do_remove(int argc, char **argv) @@ -560,7 +626,154 @@ zpool_do_remove(int argc, char **argv) } /* - * zpool create [-fn] [-o property=value] ... + * zpool labelclear [-f] + * + * -f Force clearing the label for the vdevs which are members of + * the exported or foreign pools. + * + * Verifies that the vdev is not active and zeros out the label information + * on the device. + */ +int +zpool_do_labelclear(int argc, char **argv) +{ + char vdev[MAXPATHLEN]; + char *name = NULL; + struct stat st; + int c, fd, ret = 0; + nvlist_t *config; + pool_state_t state; + boolean_t inuse = B_FALSE; + boolean_t force = B_FALSE; + + /* check options */ + while ((c = getopt(argc, argv, "f")) != -1) { + switch (c) { + case 'f': + force = B_TRUE; + break; + default: + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + /* get vdev name */ + if (argc < 1) { + (void) fprintf(stderr, gettext("missing vdev name\n")); + usage(B_FALSE); + } + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } + + /* + * Check if we were given absolute path and use it as is. + * Otherwise if the provided vdev name doesn't point to a file, + * try prepending dsk path and appending s0. + */ + (void) strlcpy(vdev, argv[0], sizeof (vdev)); + if (vdev[0] != '/' && stat(vdev, &st) != 0) { + char *s; + + (void) snprintf(vdev, sizeof (vdev), "%s/%s", +#ifdef illumos + ZFS_DISK_ROOT, argv[0]); + if ((s = strrchr(argv[0], 's')) == NULL || + !isdigit(*(s + 1))) + (void) strlcat(vdev, "s0", sizeof (vdev)); +#else + "/dev", argv[0]); +#endif + if (stat(vdev, &st) != 0) { + (void) fprintf(stderr, gettext( + "failed to find device %s, try specifying absolute " + "path instead\n"), argv[0]); + return (1); + } + } + + if ((fd = open(vdev, O_RDWR)) < 0) { + (void) fprintf(stderr, gettext("failed to open %s: %s\n"), + vdev, strerror(errno)); + return (1); + } + + if (zpool_read_label(fd, &config) != 0 || config == NULL) { + (void) fprintf(stderr, + gettext("failed to read label from %s\n"), vdev); + return (1); + } + nvlist_free(config); + + ret = zpool_in_use(g_zfs, fd, &state, &name, &inuse); + if (ret != 0) { + (void) fprintf(stderr, + gettext("failed to check state for %s\n"), vdev); + return (1); + } + + if (!inuse) + goto wipe_label; + + switch (state) { + default: + case POOL_STATE_ACTIVE: + case POOL_STATE_SPARE: + case POOL_STATE_L2CACHE: + (void) fprintf(stderr, gettext( + "%s is a member (%s) of pool \"%s\"\n"), + vdev, zpool_pool_state_to_name(state), name); + ret = 1; + goto errout; + + case POOL_STATE_EXPORTED: + if (force) + break; + (void) fprintf(stderr, gettext( + "use '-f' to override the following error:\n" + "%s is a member of exported pool \"%s\"\n"), + vdev, name); + ret = 1; + goto errout; + + case POOL_STATE_POTENTIALLY_ACTIVE: + if (force) + break; + (void) fprintf(stderr, gettext( + "use '-f' to override the following error:\n" + "%s is a member of potentially active pool \"%s\"\n"), + vdev, name); + ret = 1; + goto errout; + + case POOL_STATE_DESTROYED: + /* inuse should never be set for a destroyed pool */ + assert(0); + break; + } + +wipe_label: + ret = zpool_clear_label(fd); + if (ret != 0) { + (void) fprintf(stderr, + gettext("failed to clear label for %s\n"), vdev); + } + +errout: + free(name); + (void) close(fd); + + return (ret); +} + +/* + * zpool create [-fnd] [-o property=value] ... * [-O file-system-property=value] ... * [-R root] [-m mountpoint] ... * @@ -569,8 +782,10 @@ zpool_do_remove(int argc, char **argv) * were to be created. * -R Create a pool under an alternate root * -m Set default mountpoint for the root dataset. By default it's - * '/' + * '/' * -o Set property=value. + * -d Don't automatically enable all supported pool features + * (individual features can be enabled with -o). * -O Set fsproperty=value in the pool's root file system * * Creates the named pool according to the given vdev specification. The @@ -583,6 +798,7 @@ zpool_do_create(int argc, char **argv) { boolean_t force = B_FALSE; boolean_t dryrun = B_FALSE; + boolean_t enable_all_pool_feat = B_TRUE; int c; nvlist_t *nvroot = NULL; char *poolname; @@ -594,7 +810,7 @@ zpool_do_create(int argc, char **argv) char *propval; /* check options */ - while ((c = getopt(argc, argv, ":fnR:m:o:O:")) != -1) { + while ((c = getopt(argc, argv, ":fndR:m:o:O:")) != -1) { switch (c) { case 'f': force = B_TRUE; @@ -602,6 +818,9 @@ zpool_do_create(int argc, char **argv) case 'n': dryrun = B_TRUE; break; + case 'd': + enable_all_pool_feat = B_FALSE; + break; case 'R': altroot = optarg; if (add_prop_list(zpool_prop_to_name( @@ -616,6 +835,7 @@ zpool_do_create(int argc, char **argv) goto errout; break; case 'm': + /* Equivalent to -O mountpoint=optarg */ mountpoint = optarg; break; case 'o': @@ -629,6 +849,23 @@ zpool_do_create(int argc, char **argv) if (add_prop_list(optarg, propval, &props, B_TRUE)) goto errout; + + /* + * If the user is creating a pool that doesn't support + * feature flags, don't enable any features. + */ + if (zpool_name_to_prop(optarg) == ZPOOL_PROP_VERSION) { + char *end; + u_longlong_t ver; + + ver = strtoull(propval, &end, 10); + if (*end == '\0' && + ver < SPA_VERSION_FEATURES) { + enable_all_pool_feat = B_FALSE; + } + } + if (zpool_name_to_prop(optarg) == ZPOOL_PROP_ALTROOT) + altroot = propval; break; case 'O': if ((propval = strchr(optarg, '=')) == NULL) { @@ -639,8 +876,18 @@ zpool_do_create(int argc, char **argv) *propval = '\0'; propval++; - if (add_prop_list(optarg, propval, &fsprops, B_FALSE)) + /* + * Mountpoints are checked and then added later. + * Uniquely among properties, they can be specified + * more than once, to avoid conflict with -m. + */ + if (0 == strcmp(optarg, + zfs_prop_to_name(ZFS_PROP_MOUNTPOINT))) { + mountpoint = propval; + } else if (add_prop_list(optarg, propval, &fsprops, + B_FALSE)) { goto errout; + } break; case ':': (void) fprintf(stderr, gettext("missing argument for " @@ -694,7 +941,6 @@ zpool_do_create(int argc, char **argv) goto errout; } - if (altroot != NULL && altroot[0] != '/') { (void) fprintf(stderr, gettext("invalid alternate root '%s': " "must be an absolute path\n"), altroot); @@ -704,10 +950,11 @@ zpool_do_create(int argc, char **argv) /* * Check the validity of the mountpoint and direct the user to use the * '-m' mountpoint option if it looks like its in use. + * Ignore the checks if the '-f' option is given. */ - if (mountpoint == NULL || + if (!force && (mountpoint == NULL || (strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) != 0 && - strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) != 0)) { + strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) != 0))) { char buf[MAXPATHLEN]; DIR *dirp; @@ -758,6 +1005,18 @@ zpool_do_create(int argc, char **argv) } } + /* + * Now that the mountpoint's validity has been checked, ensure that + * the property is set appropriately prior to creating the pool. + */ + if (mountpoint != NULL) { + ret = add_prop_list(zfs_prop_to_name(ZFS_PROP_MOUNTPOINT), + mountpoint, &fsprops, B_FALSE); + if (ret != 0) + goto errout; + } + + ret = 1; if (dryrun) { /* * For a dry run invocation, print out a basic message and run @@ -776,16 +1035,35 @@ zpool_do_create(int argc, char **argv) /* * Hand off to libzfs. */ + if (enable_all_pool_feat) { + spa_feature_t i; + for (i = 0; i < SPA_FEATURES; i++) { + char propname[MAXPATHLEN]; + zfeature_info_t *feat = &spa_feature_table[i]; + + (void) snprintf(propname, sizeof (propname), + "feature@%s", feat->fi_uname); + + /* + * Skip feature if user specified it manually + * on the command line. + */ + if (nvlist_exists(props, propname)) + continue; + + ret = add_prop_list(propname, + ZFS_FEATURE_ENABLED, &props, B_TRUE); + if (ret != 0) + goto errout; + } + } + + ret = 1; if (zpool_create(g_zfs, poolname, nvroot, props, fsprops) == 0) { zfs_handle_t *pool = zfs_open(g_zfs, poolname, ZFS_TYPE_FILESYSTEM); if (pool != NULL) { - if (mountpoint != NULL) - verify(zfs_prop_set(pool, - zfs_prop_to_name( - ZFS_PROP_MOUNTPOINT), - mountpoint) == 0); if (zfs_mount(pool, NULL, 0) == 0) ret = zfs_shareall(pool); zfs_close(pool); @@ -869,7 +1147,10 @@ zpool_do_destroy(int argc, char **argv) return (1); } - ret = (zpool_destroy(zhp) != 0); + /* The history must be logged as part of the export */ + log_history = B_FALSE; + + ret = (zpool_destroy(zhp, history_str) != 0); zpool_close(zhp); @@ -933,10 +1214,13 @@ zpool_do_export(int argc, char **argv) continue; } + /* The history must be logged as part of the export */ + log_history = B_FALSE; + if (hardforce) { - if (zpool_export_force(zhp) != 0) + if (zpool_export_force(zhp, history_str) != 0) ret = 1; - } else if (zpool_export(zhp, force) != 0) { + } else if (zpool_export(zhp, force, history_str) != 0) { ret = 1; } @@ -1044,21 +1328,23 @@ print_status_config(zpool_handle_t *zhp, int namewidth, int depth, boolean_t isspare) { nvlist_t **child; - uint_t c, children; + uint_t c, vsc, children; + pool_scan_stat_t *ps = NULL; vdev_stat_t *vs; - char rbuf[6], wbuf[6], cbuf[6], repaired[7]; + char rbuf[6], wbuf[6], cbuf[6]; char *vname; uint64_t notpresent; + uint64_t ashift; spare_cbdata_t cb; - char *state; - - verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS, - (uint64_t **)&vs, &c) == 0); + const char *state; if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) children = 0; + verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &vsc) == 0); + state = zpool_state_to_name(vs->vs_state, vs->vs_aux); if (isspare) { /* @@ -1082,10 +1368,11 @@ print_status_config(zpool_handle_t *zhp, } if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, - ¬present) == 0) { + ¬present) == 0 || + vs->vs_state <= VDEV_STATE_CANT_OPEN) { char *path; - verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); - (void) printf(" was %s", path); + if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) + (void) printf(" was %s", path); } else if (vs->vs_aux != 0) { (void) printf(" "); @@ -1106,6 +1393,14 @@ print_status_config(zpool_handle_t *zhp, (void) printf(gettext("newer version")); break; + case VDEV_AUX_UNSUP_FEAT: + (void) printf(gettext("unsupported feature(s)")); + break; + + case VDEV_AUX_ASHIFT_TOO_BIG: + (void) printf(gettext("unsupported minimum blocksize")); + break; + case VDEV_AUX_SPARED: verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &cb.cb_guid) == 0); @@ -1148,14 +1443,22 @@ print_status_config(zpool_handle_t *zhp, (void) printf(gettext("corrupted data")); break; } - } else if (vs->vs_scrub_repaired != 0 && children == 0) { - /* - * Report bytes resilvered/repaired on leaf devices. - */ - zfs_nicenum(vs->vs_scrub_repaired, repaired, sizeof (repaired)); - (void) printf(gettext(" %s %s"), repaired, - (vs->vs_scrub_type == POOL_SCRUB_RESILVER) ? - "resilvered" : "repaired"); + } else if (children == 0 && !isspare && + VDEV_STAT_VALID(vs_physical_ashift, vsc) && + vs->vs_configured_ashift < vs->vs_physical_ashift) { + (void) printf( + gettext(" block size: %dB configured, %dB native"), + 1 << vs->vs_configured_ashift, 1 << vs->vs_physical_ashift); + } + + (void) nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_SCAN_STATS, + (uint64_t **)&ps, &c); + + if (ps && ps->pss_state == DSS_SCANNING && + vs->vs_scan_processed != 0 && children == 0) { + (void) printf(gettext(" (%s)"), + (ps->pss_func == POOL_SCAN_RESILVER) ? + "resilvering" : "repairing"); } (void) printf("\n"); @@ -1195,7 +1498,7 @@ print_import_config(const char *name, nv strcmp(type, VDEV_TYPE_HOLE) == 0) return; - verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS, + verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c) == 0); (void) printf("\t%*s%-*s", depth, "", namewidth - depth, name); @@ -1221,6 +1524,10 @@ print_import_config(const char *name, nv (void) printf(gettext("newer version")); break; + case VDEV_AUX_UNSUP_FEAT: + (void) printf(gettext("unsupported feature(s)")); + break; + case VDEV_AUX_ERR_EXCEEDED: (void) printf(gettext("too many errors")); break; @@ -1324,6 +1631,7 @@ show_import(nvlist_t *config) const char *health; uint_t vsc; int namewidth; + char *comment; verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &name) == 0); @@ -1334,15 +1642,15 @@ show_import(nvlist_t *config) verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); - verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS, + verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc) == 0); health = zpool_state_to_name(vs->vs_state, vs->vs_aux); reason = zpool_import_status(config, &msgid); - (void) printf(gettext(" pool: %s\n"), name); - (void) printf(gettext(" id: %llu\n"), (u_longlong_t)guid); - (void) printf(gettext(" state: %s"), health); + (void) printf(gettext(" pool: %s\n"), name); + (void) printf(gettext(" id: %llu\n"), (u_longlong_t)guid); + (void) printf(gettext(" state: %s"), health); if (pool_state == POOL_STATE_DESTROYED) (void) printf(gettext(" (DESTROYED)")); (void) printf("\n"); @@ -1351,56 +1659,87 @@ show_import(nvlist_t *config) case ZPOOL_STATUS_MISSING_DEV_R: case ZPOOL_STATUS_MISSING_DEV_NR: case ZPOOL_STATUS_BAD_GUID_SUM: - (void) printf(gettext("status: One or more devices are missing " - "from the system.\n")); + (void) printf(gettext(" status: One or more devices are " + "missing from the system.\n")); break; case ZPOOL_STATUS_CORRUPT_LABEL_R: case ZPOOL_STATUS_CORRUPT_LABEL_NR: - (void) printf(gettext("status: One or more devices contains " + (void) printf(gettext(" status: One or more devices contains " "corrupted data.\n")); break; case ZPOOL_STATUS_CORRUPT_DATA: - (void) printf(gettext("status: The pool data is corrupted.\n")); + (void) printf( + gettext(" status: The pool data is corrupted.\n")); break; case ZPOOL_STATUS_OFFLINE_DEV: - (void) printf(gettext("status: One or more devices " + (void) printf(gettext(" status: One or more devices " "are offlined.\n")); break; case ZPOOL_STATUS_CORRUPT_POOL: - (void) printf(gettext("status: The pool metadata is " + (void) printf(gettext(" status: The pool metadata is " "corrupted.\n")); break; case ZPOOL_STATUS_VERSION_OLDER: - (void) printf(gettext("status: The pool is formatted using an " - "older on-disk version.\n")); + (void) printf(gettext(" status: The pool is formatted using a " + "legacy on-disk version.\n")); break; case ZPOOL_STATUS_VERSION_NEWER: - (void) printf(gettext("status: The pool is formatted using an " + (void) printf(gettext(" status: The pool is formatted using an " "incompatible version.\n")); break; + case ZPOOL_STATUS_FEAT_DISABLED: + (void) printf(gettext(" status: Some supported features are " + "not enabled on the pool.\n")); + break; + + case ZPOOL_STATUS_UNSUP_FEAT_READ: + (void) printf(gettext("status: The pool uses the following " + "feature(s) not supported on this sytem:\n")); + zpool_print_unsup_feat(config); + break; + + case ZPOOL_STATUS_UNSUP_FEAT_WRITE: + (void) printf(gettext("status: The pool can only be accessed " + "in read-only mode on this system. It\n\tcannot be " + "accessed in read-write mode because it uses the " + "following\n\tfeature(s) not supported on this system:\n")); + zpool_print_unsup_feat(config); + break; + case ZPOOL_STATUS_HOSTID_MISMATCH: - (void) printf(gettext("status: The pool was last accessed by " + (void) printf(gettext(" status: The pool was last accessed by " "another system.\n")); break; case ZPOOL_STATUS_FAULTED_DEV_R: case ZPOOL_STATUS_FAULTED_DEV_NR: - (void) printf(gettext("status: One or more devices are " + (void) printf(gettext(" status: One or more devices are " "faulted.\n")); break; case ZPOOL_STATUS_BAD_LOG: - (void) printf(gettext("status: An intent log record cannot be " + (void) printf(gettext(" status: An intent log record cannot be " "read.\n")); break; + case ZPOOL_STATUS_RESILVERING: + (void) printf(gettext(" status: One or more devices were being " + "resilvered.\n")); + break; + + case ZPOOL_STATUS_NON_NATIVE_ASHIFT: + (void) printf(gettext("status: One or more devices were " + "configured to use a non-native block size.\n" + "\tExpect reduced performance.\n")); + break; + default: /* * No other status can be seen when importing pools. @@ -1412,44 +1751,64 @@ show_import(nvlist_t *config) * Print out an action according to the overall state of the pool. */ if (vs->vs_state == VDEV_STATE_HEALTHY) { - if (reason == ZPOOL_STATUS_VERSION_OLDER) - (void) printf(gettext("action: The pool can be " + if (reason == ZPOOL_STATUS_VERSION_OLDER || + reason == ZPOOL_STATUS_FEAT_DISABLED) { + (void) printf(gettext(" action: The pool can be " "imported using its name or numeric identifier, " "though\n\tsome features will not be available " "without an explicit 'zpool upgrade'.\n")); - else if (reason == ZPOOL_STATUS_HOSTID_MISMATCH) - (void) printf(gettext("action: The pool can be " + } else if (reason == ZPOOL_STATUS_HOSTID_MISMATCH) { + (void) printf(gettext(" action: The pool can be " "imported using its name or numeric " "identifier and\n\tthe '-f' flag.\n")); - else - (void) printf(gettext("action: The pool can be " + } else { + (void) printf(gettext(" action: The pool can be " "imported using its name or numeric " "identifier.\n")); + } } else if (vs->vs_state == VDEV_STATE_DEGRADED) { - (void) printf(gettext("action: The pool can be imported " + (void) printf(gettext(" action: The pool can be imported " "despite missing or damaged devices. The\n\tfault " "tolerance of the pool may be compromised if imported.\n")); } else { switch (reason) { case ZPOOL_STATUS_VERSION_NEWER: - (void) printf(gettext("action: The pool cannot be " + (void) printf(gettext(" action: The pool cannot be " "imported. Access the pool on a system running " "newer\n\tsoftware, or recreate the pool from " "backup.\n")); break; + case ZPOOL_STATUS_UNSUP_FEAT_READ: + (void) printf(gettext("action: The pool cannot be " + "imported. Access the pool on a system that " + "supports\n\tthe required feature(s), or recreate " + "the pool from backup.\n")); + break; + case ZPOOL_STATUS_UNSUP_FEAT_WRITE: + (void) printf(gettext("action: The pool cannot be " + "imported in read-write mode. Import the pool " + "with\n" + "\t\"-o readonly=on\", access the pool on a system " + "that supports the\n\trequired feature(s), or " + "recreate the pool from backup.\n")); + break; case ZPOOL_STATUS_MISSING_DEV_R: case ZPOOL_STATUS_MISSING_DEV_NR: case ZPOOL_STATUS_BAD_GUID_SUM: - (void) printf(gettext("action: The pool cannot be " + (void) printf(gettext(" action: The pool cannot be " "imported. Attach the missing\n\tdevices and try " "again.\n")); break; default: - (void) printf(gettext("action: The pool cannot be " + (void) printf(gettext(" action: The pool cannot be " "imported due to damaged devices or data.\n")); } } + /* Print the comment attached to the pool. */ + if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0) + (void) printf(gettext("comment: %s\n"), comment); + /* * If the state is "closed" or "can't open", and the aux state * is "corrupt data": @@ -1467,10 +1826,10 @@ show_import(nvlist_t *config) } if (msgid != NULL) - (void) printf(gettext(" see: http://www.sun.com/msg/%s\n"), + (void) printf(gettext(" see: http://illumos.org/msg/%s\n"), msgid); - (void) printf(gettext("config:\n\n")); + (void) printf(gettext(" config:\n\n")); namewidth = max_width(NULL, nvroot, 0, 0); if (namewidth < 10) @@ -1494,7 +1853,7 @@ show_import(nvlist_t *config) */ static int do_import(nvlist_t *config, const char *newname, const char *mntopts, - int force, nvlist_t *props, boolean_t do_verbatim) + nvlist_t *props, int flags) { zpool_handle_t *zhp; char *name; @@ -1508,11 +1867,12 @@ do_import(nvlist_t *config, const char * ZPOOL_CONFIG_POOL_STATE, &state) == 0); verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) == 0); - if (version > SPA_VERSION) { + if (!SPA_VERSION_IS_SUPPORTED(version)) { (void) fprintf(stderr, gettext("cannot import '%s': pool " - "is formatted using a newer ZFS version\n"), name); + "is formatted using an unsupported ZFS version\n"), name); return (1); - } else if (state != POOL_STATE_EXPORTED && !force) { + } else if (state != POOL_STATE_EXPORTED && + !(flags & ZFS_IMPORT_ANY_HOST)) { uint64_t hostid; if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, @@ -1546,7 +1906,7 @@ do_import(nvlist_t *config, const char * } } - if (zpool_import_props(g_zfs, config, newname, props, do_verbatim) != 0) + if (zpool_import_props(g_zfs, config, newname, props, flags) != 0) return (1); if (newname != NULL) @@ -1556,6 +1916,7 @@ do_import(nvlist_t *config, const char * return (1); if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && + !(flags & ZFS_IMPORT_ONLY) && zpool_enable_datasets(zhp, mntopts, 0) != 0) { zpool_close(zhp); return (1); @@ -1597,6 +1958,11 @@ do_import(nvlist_t *config, const char * * * -n See if rewind would work, but don't actually rewind. * + * -N Import the pool but don't mount datasets. + * + * -T Specify a starting txg to use for import. This option is + * intentionally undocumented option for testing purposes. + * * -a Import all pools found. * * -o Set property=value and/or temporary mount options (without '='). @@ -1615,7 +1981,6 @@ zpool_do_import(int argc, char **argv) boolean_t do_all = B_FALSE; boolean_t do_destroyed = B_FALSE; char *mntopts = NULL; - boolean_t do_force = B_FALSE; nvpair_t *elem; nvlist_t *config; uint64_t searchguid = 0; @@ -1625,17 +1990,18 @@ zpool_do_import(int argc, char **argv) nvlist_t *policy = NULL; nvlist_t *props = NULL; boolean_t first; - boolean_t do_verbatim = B_FALSE; + int flags = ZFS_IMPORT_NORMAL; uint32_t rewind_policy = ZPOOL_NO_REWIND; boolean_t dryrun = B_FALSE; boolean_t do_rewind = B_FALSE; boolean_t xtreme_rewind = B_FALSE; - uint64_t pool_state; + uint64_t pool_state, txg = -1ULL; char *cachefile = NULL; importargs_t idata = { 0 }; + char *endptr; /* check options */ - while ((c = getopt(argc, argv, ":aCc:d:DEfFno:rR:VX")) != -1) { + while ((c = getopt(argc, argv, ":aCc:d:DEfFmnNo:R:T:VX")) != -1) { switch (c) { case 'a': do_all = B_TRUE; @@ -1660,14 +2026,20 @@ zpool_do_import(int argc, char **argv) do_destroyed = B_TRUE; break; case 'f': - do_force = B_TRUE; + flags |= ZFS_IMPORT_ANY_HOST; break; case 'F': do_rewind = B_TRUE; break; + case 'm': + flags |= ZFS_IMPORT_MISSING_LOG; + break; case 'n': dryrun = B_TRUE; break; + case 'N': + flags |= ZFS_IMPORT_ONLY; + break; case 'o': if ((propval = strchr(optarg, '=')) != NULL) { *propval = '\0'; @@ -1691,8 +2063,18 @@ zpool_do_import(int argc, char **argv) ZPOOL_PROP_CACHEFILE), "none", &props, B_TRUE)) goto error; break; + case 'T': + errno = 0; + txg = strtoull(optarg, &endptr, 0); + if (errno != 0 || *endptr != '\0') { + (void) fprintf(stderr, + gettext("invalid txg value\n")); + usage(B_FALSE); + } + rewind_policy = ZPOOL_DO_REWIND | ZPOOL_EXTREME_REWIND; + break; case 'V': - do_verbatim = B_TRUE; + flags |= ZFS_IMPORT_VERBATIM; break; case 'X': xtreme_rewind = B_TRUE; @@ -1731,12 +2113,13 @@ zpool_do_import(int argc, char **argv) /* In the future, we can capture further policy and include it here */ if (nvlist_alloc(&policy, NV_UNIQUE_NAME, 0) != 0 || + nvlist_add_uint64(policy, ZPOOL_REWIND_REQUEST_TXG, txg) != 0 || nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind_policy) != 0) goto error; if (searchdirs == NULL) { searchdirs = safe_malloc(sizeof (char *)); - searchdirs[0] = "/dev/dsk"; + searchdirs[0] = "/dev"; nsearch = 1; } @@ -1784,8 +2167,10 @@ zpool_do_import(int argc, char **argv) errno = 0; searchguid = strtoull(argv[0], &endptr, 10); - if (errno != 0 || *endptr != '\0') + if (errno != 0 || *endptr != '\0') { searchname = argv[0]; + searchguid = 0; + } found_config = NULL; /* @@ -1864,7 +2249,7 @@ zpool_do_import(int argc, char **argv) if (do_all) { err |= do_import(config, NULL, mntopts, - do_force, props, do_verbatim); + props, flags); } else { show_import(config); } @@ -1913,7 +2298,7 @@ zpool_do_import(int argc, char **argv) err = B_TRUE; } else { err |= do_import(found_config, argc == 1 ? NULL : - argv[1], mntopts, do_force, props, do_verbatim); + argv[1], mntopts, props, flags); } } @@ -1935,10 +2320,10 @@ error: } typedef struct iostat_cbdata { - zpool_list_t *cb_list; - int cb_verbose; - int cb_iteration; + boolean_t cb_verbose; int cb_namewidth; + int cb_iteration; + zpool_list_t *cb_list; } iostat_cbdata_t; static void @@ -1991,13 +2376,13 @@ print_vdev_stats(zpool_handle_t *zhp, co char *vname; if (oldnv != NULL) { - verify(nvlist_lookup_uint64_array(oldnv, ZPOOL_CONFIG_STATS, - (uint64_t **)&oldvs, &c) == 0); + verify(nvlist_lookup_uint64_array(oldnv, + ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&oldvs, &c) == 0); } else { oldvs = &zerovs; } - verify(nvlist_lookup_uint64_array(newnv, ZPOOL_CONFIG_STATS, + verify(nvlist_lookup_uint64_array(newnv, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&newvs, &c) == 0); if (strlen(name) + depth > cb->cb_namewidth) @@ -2047,6 +2432,17 @@ print_vdev_stats(zpool_handle_t *zhp, co return; for (c = 0; c < children; c++) { + uint64_t ishole = B_FALSE, islog = B_FALSE; + + (void) nvlist_lookup_uint64(newchild[c], ZPOOL_CONFIG_IS_HOLE, + &ishole); + + (void) nvlist_lookup_uint64(newchild[c], ZPOOL_CONFIG_IS_LOG, + &islog); + + if (ishole || islog) + continue; + vname = zpool_vdev_name(g_zfs, zhp, newchild[c], B_FALSE); print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL, newchild[c], cb, depth + 2); @@ -2054,6 +2450,31 @@ print_vdev_stats(zpool_handle_t *zhp, co } /* + * Log device section + */ + + if (num_logs(newnv) > 0) { + (void) printf("%-*s - - - - - " + "-\n", cb->cb_namewidth, "logs"); + + for (c = 0; c < children; c++) { + uint64_t islog = B_FALSE; + (void) nvlist_lookup_uint64(newchild[c], + ZPOOL_CONFIG_IS_LOG, &islog); + + if (islog) { + vname = zpool_vdev_name(g_zfs, zhp, newchild[c], + B_FALSE); + print_vdev_stats(zhp, vname, oldnv ? + oldchild[c] : NULL, newchild[c], + cb, depth + 2); + free(vname); + } + } + + } + + /* * Include level 2 ARC devices in iostat output */ if (nvlist_lookup_nvlist_array(newnv, ZPOOL_CONFIG_L2CACHE, @@ -2142,7 +2563,8 @@ get_namewidth(zpool_handle_t *zhp, void if (!cb->cb_verbose) cb->cb_namewidth = strlen(zpool_get_name(zhp)); else - cb->cb_namewidth = max_width(zhp, nvroot, 0, 0); + cb->cb_namewidth = max_width(zhp, nvroot, 0, + cb->cb_namewidth); } /* @@ -2158,55 +2580,14 @@ get_namewidth(zpool_handle_t *zhp, void } /* - * zpool iostat [-T d|u] [-v] [pool] ... [interval [count]] - * - * -T Display a timestamp in date(1) or Unix format - * -v Display statistics for individual vdevs - * - * This command can be tricky because we want to be able to deal with pool - * creation/destruction as well as vdev configuration changes. The bulk of this - * processing is handled by the pool_list_* routines in zpool_iter.c. We rely - * on pool_list_update() to detect the addition of new pools. Configuration - * changes are all handled within libzfs. + * Parse the input string, get the 'interval' and 'count' value if there is one. */ -int -zpool_do_iostat(int argc, char **argv) +static void +get_interval_count(int *argcp, char **argv, unsigned long *iv, + unsigned long *cnt) { - int c; - int ret; - int npools; unsigned long interval = 0, count = 0; - zpool_list_t *list; - boolean_t verbose = B_FALSE; - iostat_cbdata_t cb; - - /* check options */ - while ((c = getopt(argc, argv, "T:v")) != -1) { - switch (c) { - case 'T': - if (optarg) { - if (*optarg == 'u') - timestamp_fmt = UDATE; - else if (*optarg == 'd') - timestamp_fmt = DDATE; - else - usage(B_FALSE); - } else { - usage(B_FALSE); - } - break; - case 'v': - verbose = B_TRUE; - break; - case '?': - (void) fprintf(stderr, gettext("invalid option '%c'\n"), - optopt); - usage(B_FALSE); - } - } - - argc -= optind; - argv += optind; + int argc = *argcp, errno; /* * Determine if the last argument is an integer or a pool name @@ -2223,7 +2604,6 @@ zpool_do_iostat(int argc, char **argv) "cannot be zero\n")); usage(B_FALSE); } - /* * Ignore the last parameter */ @@ -2240,7 +2620,7 @@ zpool_do_iostat(int argc, char **argv) /* * If the last argument is also an integer, then we have both a count - * and an integer. + * and an interval. */ if (argc > 0 && isdigit(argv[argc - 1][0])) { char *end; @@ -2265,23 +2645,83 @@ zpool_do_iostat(int argc, char **argv) } } - /* - * Construct the list of all interesting pools. - */ - ret = 0; - if ((list = pool_list_get(argc, argv, NULL, &ret)) == NULL) - return (1); - - if (pool_list_count(list) == 0 && argc != 0) { - pool_list_free(list); - return (1); - } + *iv = interval; + *cnt = count; + *argcp = argc; +} - if (pool_list_count(list) == 0 && interval == 0) { - pool_list_free(list); - (void) fprintf(stderr, gettext("no pools available\n")); - return (1); - } +static void +get_timestamp_arg(char c) +{ + if (c == 'u') + timestamp_fmt = UDATE; + else if (c == 'd') + timestamp_fmt = DDATE; + else + usage(B_FALSE); +} + +/* + * zpool iostat [-v] [-T d|u] [pool] ... [interval [count]] + * + * -v Display statistics for individual vdevs + * -T Display a timestamp in date(1) or Unix format + * + * This command can be tricky because we want to be able to deal with pool + * creation/destruction as well as vdev configuration changes. The bulk of this + * processing is handled by the pool_list_* routines in zpool_iter.c. We rely + * on pool_list_update() to detect the addition of new pools. Configuration + * changes are all handled within libzfs. + */ +int +zpool_do_iostat(int argc, char **argv) +{ + int c; + int ret; + int npools; + unsigned long interval = 0, count = 0; + zpool_list_t *list; + boolean_t verbose = B_FALSE; + iostat_cbdata_t cb; + + /* check options */ + while ((c = getopt(argc, argv, "T:v")) != -1) { + switch (c) { + case 'T': + get_timestamp_arg(*optarg); + break; + case 'v': + verbose = B_TRUE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + get_interval_count(&argc, argv, &interval, &count); + + /* + * Construct the list of all interesting pools. + */ + ret = 0; + if ((list = pool_list_get(argc, argv, NULL, &ret)) == NULL) + return (1); + + if (pool_list_count(list) == 0 && argc != 0) { + pool_list_free(list); + return (1); + } + + if (pool_list_count(list) == 0 && interval == 0) { + pool_list_free(list); + (void) fprintf(stderr, gettext("no pools available\n")); + return (1); + } /* * Enter the main iostat loop. @@ -2353,39 +2793,61 @@ zpool_do_iostat(int argc, char **argv) } typedef struct list_cbdata { + boolean_t cb_verbose; + int cb_namewidth; boolean_t cb_scripted; - boolean_t cb_first; zprop_list_t *cb_proplist; + boolean_t cb_literal; } list_cbdata_t; /* * Given a list of columns to display, output appropriate headers for each one. */ static void -print_header(zprop_list_t *pl) +print_header(list_cbdata_t *cb) { + zprop_list_t *pl = cb->cb_proplist; + char headerbuf[ZPOOL_MAXPROPLEN]; const char *header; boolean_t first = B_TRUE; boolean_t right_justify; + size_t width = 0; for (; pl != NULL; pl = pl->pl_next) { - if (pl->pl_prop == ZPROP_INVAL) - continue; + width = pl->pl_width; + if (first && cb->cb_verbose) { + /* + * Reset the width to accommodate the verbose listing + * of devices. + */ + width = cb->cb_namewidth; + } if (!first) (void) printf(" "); else first = B_FALSE; - header = zpool_prop_column_name(pl->pl_prop); - right_justify = zpool_prop_align_right(pl->pl_prop); + right_justify = B_FALSE; + if (pl->pl_prop != ZPROP_INVAL) { + header = zpool_prop_column_name(pl->pl_prop); + right_justify = zpool_prop_align_right(pl->pl_prop); + } else { + int i; + + for (i = 0; pl->pl_user_prop[i] != '\0'; i++) + headerbuf[i] = toupper(pl->pl_user_prop[i]); + headerbuf[i] = '\0'; + header = headerbuf; + } if (pl->pl_next == NULL && !right_justify) (void) printf("%s", header); else if (right_justify) - (void) printf("%*s", pl->pl_width, header); + (void) printf("%*s", width, header); else - (void) printf("%-*s", pl->pl_width, header); + (void) printf("%-*s", width, header); + } (void) printf("\n"); @@ -2396,17 +2858,28 @@ print_header(zprop_list_t *pl) * to the described layout. */ static void -print_pool(zpool_handle_t *zhp, zprop_list_t *pl, int scripted) +print_pool(zpool_handle_t *zhp, list_cbdata_t *cb) { + zprop_list_t *pl = cb->cb_proplist; boolean_t first = B_TRUE; char property[ZPOOL_MAXPROPLEN]; char *propstr; boolean_t right_justify; - int width; + size_t width; for (; pl != NULL; pl = pl->pl_next) { + + width = pl->pl_width; + if (first && cb->cb_verbose) { + /* + * Reset the width to accommodate the verbose listing + * of devices. + */ + width = cb->cb_namewidth; + } + if (!first) { - if (scripted) + if (cb->cb_scripted) (void) printf("\t"); else (void) printf(" "); @@ -2417,24 +2890,28 @@ print_pool(zpool_handle_t *zhp, zprop_li right_justify = B_FALSE; if (pl->pl_prop != ZPROP_INVAL) { if (zpool_get_prop(zhp, pl->pl_prop, property, - sizeof (property), NULL) != 0) + sizeof (property), NULL, cb->cb_literal) != 0) propstr = "-"; else propstr = property; right_justify = zpool_prop_align_right(pl->pl_prop); + } else if ((zpool_prop_feature(pl->pl_user_prop) || + zpool_prop_unsupported(pl->pl_user_prop)) && + zpool_prop_get_feature(zhp, pl->pl_user_prop, property, + sizeof (property)) == 0) { + propstr = property; } else { propstr = "-"; } - width = pl->pl_width; /* * If this is being called in scripted mode, or if this is the * last column and it is left-justified, don't include a width * format specifier. */ - if (scripted || (pl->pl_next == NULL && !right_justify)) + if (cb->cb_scripted || (pl->pl_next == NULL && !right_justify)) (void) printf("%s", propstr); else if (right_justify) (void) printf("%*s", width, propstr); @@ -2445,6 +2922,155 @@ print_pool(zpool_handle_t *zhp, zprop_li (void) printf("\n"); } +static void +print_one_column(zpool_prop_t prop, uint64_t value, boolean_t scripted, + boolean_t valid) +{ + char propval[64]; + boolean_t fixed; + size_t width = zprop_width(prop, &fixed, ZFS_TYPE_POOL); + + switch (prop) { + case ZPOOL_PROP_EXPANDSZ: + if (value == 0) + (void) strlcpy(propval, "-", sizeof (propval)); + else + zfs_nicenum(value, propval, sizeof (propval)); + break; + case ZPOOL_PROP_FRAGMENTATION: + if (value == ZFS_FRAG_INVALID) { + (void) strlcpy(propval, "-", sizeof (propval)); + } else { + (void) snprintf(propval, sizeof (propval), "%llu%%", + value); + } + break; + case ZPOOL_PROP_CAPACITY: + (void) snprintf(propval, sizeof (propval), "%llu%%", value); + break; + default: + zfs_nicenum(value, propval, sizeof (propval)); + } + + if (!valid) + (void) strlcpy(propval, "-", sizeof (propval)); + + if (scripted) + (void) printf("\t%s", propval); + else + (void) printf(" %*s", width, propval); +} + +void +print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv, + list_cbdata_t *cb, int depth) +{ + nvlist_t **child; + vdev_stat_t *vs; + uint_t c, children; + char *vname; + boolean_t scripted = cb->cb_scripted; + uint64_t islog = B_FALSE; + boolean_t haslog = B_FALSE; + char *dashes = "%-*s - - - - - -\n"; + + verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, + (uint64_t **)&vs, &c) == 0); + + if (name != NULL) { + boolean_t toplevel = (vs->vs_space != 0); + uint64_t cap; + + if (scripted) + (void) printf("\t%s", name); + else if (strlen(name) + depth > cb->cb_namewidth) + (void) printf("%*s%s", depth, "", name); + else + (void) printf("%*s%s%*s", depth, "", name, + (int)(cb->cb_namewidth - strlen(name) - depth), ""); + + /* + * Print the properties for the individual vdevs. Some + * properties are only applicable to toplevel vdevs. The + * 'toplevel' boolean value is passed to the print_one_column() + * to indicate that the value is valid. + */ + print_one_column(ZPOOL_PROP_SIZE, vs->vs_space, scripted, + toplevel); + print_one_column(ZPOOL_PROP_ALLOCATED, vs->vs_alloc, scripted, + toplevel); + print_one_column(ZPOOL_PROP_FREE, vs->vs_space - vs->vs_alloc, + scripted, toplevel); + print_one_column(ZPOOL_PROP_EXPANDSZ, vs->vs_esize, scripted, + B_TRUE); + print_one_column(ZPOOL_PROP_FRAGMENTATION, + vs->vs_fragmentation, scripted, + (vs->vs_fragmentation != ZFS_FRAG_INVALID && toplevel)); + cap = (vs->vs_space == 0) ? 0 : + (vs->vs_alloc * 100 / vs->vs_space); + print_one_column(ZPOOL_PROP_CAPACITY, cap, scripted, toplevel); + (void) printf("\n"); + } + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, + &child, &children) != 0) + return; + + for (c = 0; c < children; c++) { + uint64_t ishole = B_FALSE; + + if (nvlist_lookup_uint64(child[c], + ZPOOL_CONFIG_IS_HOLE, &ishole) == 0 && ishole) + continue; + + if (nvlist_lookup_uint64(child[c], + ZPOOL_CONFIG_IS_LOG, &islog) == 0 && islog) { + haslog = B_TRUE; + continue; + } + + vname = zpool_vdev_name(g_zfs, zhp, child[c], B_FALSE); + print_list_stats(zhp, vname, child[c], cb, depth + 2); + free(vname); + } + + if (haslog == B_TRUE) { + /* LINTED E_SEC_PRINTF_VAR_FMT */ + (void) printf(dashes, cb->cb_namewidth, "log"); + for (c = 0; c < children; c++) { + if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, + &islog) != 0 || !islog) + continue; + vname = zpool_vdev_name(g_zfs, zhp, child[c], B_FALSE); + print_list_stats(zhp, vname, child[c], cb, depth + 2); + free(vname); + } + } + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, + &child, &children) == 0 && children > 0) { + /* LINTED E_SEC_PRINTF_VAR_FMT */ + (void) printf(dashes, cb->cb_namewidth, "cache"); + for (c = 0; c < children; c++) { + vname = zpool_vdev_name(g_zfs, zhp, child[c], B_FALSE); + print_list_stats(zhp, vname, child[c], cb, depth + 2); + free(vname); + } + } + + if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, &child, + &children) == 0 && children > 0) { + /* LINTED E_SEC_PRINTF_VAR_FMT */ + (void) printf(dashes, cb->cb_namewidth, "spare"); + for (c = 0; c < children; c++) { + vname = zpool_vdev_name(g_zfs, zhp, child[c], B_FALSE); + print_list_stats(zhp, vname, child[c], cb, depth + 2); + free(vname); + } + } +} + + /* * Generic callback function to list a pool. */ @@ -2452,25 +3078,32 @@ int list_callback(zpool_handle_t *zhp, void *data) { list_cbdata_t *cbp = data; + nvlist_t *config; + nvlist_t *nvroot; - if (cbp->cb_first) { - if (!cbp->cb_scripted) - print_header(cbp->cb_proplist); - cbp->cb_first = B_FALSE; - } + config = zpool_get_config(zhp, NULL); + + print_pool(zhp, cbp); + if (!cbp->cb_verbose) + return (0); - print_pool(zhp, cbp->cb_proplist, cbp->cb_scripted); + verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, + &nvroot) == 0); + print_list_stats(zhp, NULL, nvroot, cbp, 0); return (0); } /* - * zpool list [-H] [-o prop[,prop]*] [pool] ... + * zpool list [-Hp] [-o prop[,prop]*] [-T d|u] [pool] ... [interval [count]] * * -H Scripted mode. Don't display headers, and separate properties * by a single tab. * -o List of properties to display. Defaults to - * "name,size,allocated,free,capacity,health,altroot" + * "name,size,allocated,free,expandsize,fragmentation,capacity," + * "dedupratio,health,altroot" + * -p Diplay values in parsable (exact) format. + * -T Display a timestamp in date(1) or Unix format * * List all pools in the system, whether or not they're healthy. Output space * statistics for each one, as well as health status summary. @@ -2482,11 +3115,15 @@ zpool_do_list(int argc, char **argv) int ret; list_cbdata_t cb = { 0 }; static char default_props[] = - "name,size,allocated,free,capacity,dedupratio,health,altroot"; + "name,size,allocated,free,expandsize,fragmentation,capacity," + "dedupratio,health,altroot"; char *props = default_props; + unsigned long interval = 0, count = 0; + zpool_list_t *list; + boolean_t first = B_TRUE; /* check options */ - while ((c = getopt(argc, argv, ":Ho:")) != -1) { + while ((c = getopt(argc, argv, ":Ho:pT:v")) != -1) { switch (c) { case 'H': cb.cb_scripted = B_TRUE; @@ -2494,6 +3131,15 @@ zpool_do_list(int argc, char **argv) case 'o': props = optarg; break; + case 'p': + cb.cb_literal = B_TRUE; + break; + case 'T': + get_timestamp_arg(*optarg); + break; + case 'v': + cb.cb_verbose = B_TRUE; + break; case ':': (void) fprintf(stderr, gettext("missing argument for " "'%c' option\n"), optopt); @@ -2509,49 +3155,49 @@ zpool_do_list(int argc, char **argv) argc -= optind; argv += optind; + get_interval_count(&argc, argv, &interval, &count); + if (zprop_get_list(g_zfs, props, &cb.cb_proplist, ZFS_TYPE_POOL) != 0) usage(B_FALSE); - cb.cb_first = B_TRUE; + for (;;) { + if ((list = pool_list_get(argc, argv, &cb.cb_proplist, + &ret)) == NULL) + return (1); - ret = for_each_pool(argc, argv, B_TRUE, &cb.cb_proplist, - list_callback, &cb); + if (pool_list_count(list) == 0) + break; - zprop_free_list(cb.cb_proplist); + cb.cb_namewidth = 0; + (void) pool_list_iter(list, B_FALSE, get_namewidth, &cb); - if (argc == 0 && cb.cb_first && !cb.cb_scripted) { - (void) printf(gettext("no pools available\n")); - return (0); - } + if (timestamp_fmt != NODATE) + print_timestamp(timestamp_fmt); - return (ret); -} + if (!cb.cb_scripted && (first || cb.cb_verbose)) { + print_header(&cb); + first = B_FALSE; + } + ret = pool_list_iter(list, B_TRUE, list_callback, &cb); -static nvlist_t * -zpool_get_vdev_by_name(nvlist_t *nv, char *name) -{ - nvlist_t **child; - uint_t c, children; - nvlist_t *match; - char *path; + if (interval == 0) + break; - if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, - &child, &children) != 0) { - verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); - if (strncmp(name, "/dev/dsk/", 9) == 0) - name += 9; - if (strncmp(path, "/dev/dsk/", 9) == 0) - path += 9; - if (strcmp(name, path) == 0) - return (nv); - return (NULL); - } - - for (c = 0; c < children; c++) - if ((match = zpool_get_vdev_by_name(child[c], name)) != NULL) - return (match); + if (count != 0 && --count == 0) + break; + + pool_list_free(list); + (void) sleep(interval); + } + + if (argc == 0 && !cb.cb_scripted && pool_list_count(list) == 0) { + (void) printf(gettext("no pools available\n")); + ret = 0; + } - return (NULL); + pool_list_free(list); + zprop_free_list(cb.cb_proplist); + return (ret); } static int @@ -2769,8 +3415,7 @@ zpool_do_split(int argc, char **argv) if (add_prop_list( zpool_prop_to_name(ZPOOL_PROP_ALTROOT), optarg, &props, B_TRUE) != 0) { - if (props) - nvlist_free(props); + nvlist_free(props); usage(B_FALSE); } break; @@ -2783,8 +3428,7 @@ zpool_do_split(int argc, char **argv) propval++; if (add_prop_list(optarg, propval, &props, B_TRUE) != 0) { - if (props) - nvlist_free(props); + nvlist_free(props); usage(B_FALSE); } } else { @@ -2857,7 +3501,7 @@ zpool_do_split(int argc, char **argv) if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && zpool_enable_datasets(zhp, mntopts, 0) != 0) { ret = 1; - (void) fprintf(stderr, gettext("Split was succssful, but " + (void) fprintf(stderr, gettext("Split was successful, but " "the datasets could not all be mounted\n")); (void) fprintf(stderr, gettext("Try doing '%s' with a " "different altroot\n"), "zpool import"); @@ -3086,51 +3730,20 @@ zpool_do_clear(int argc, char **argv) return (ret); } -typedef struct scrub_cbdata { - int cb_type; - int cb_argc; - char **cb_argv; -} scrub_cbdata_t; - -int -scrub_callback(zpool_handle_t *zhp, void *data) -{ - scrub_cbdata_t *cb = data; - int err; - - /* - * Ignore faulted pools. - */ - if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { - (void) fprintf(stderr, gettext("cannot scrub '%s': pool is " - "currently unavailable\n"), zpool_get_name(zhp)); - return (1); - } - - err = zpool_scrub(zhp, cb->cb_type); - - return (err != 0); -} - /* - * zpool scrub [-s] ... - * - * -s Stop. Stops any in-progress scrub. + * zpool reguid */ int -zpool_do_scrub(int argc, char **argv) +zpool_do_reguid(int argc, char **argv) { int c; - scrub_cbdata_t cb; - - cb.cb_type = POOL_SCRUB_EVERYTHING; + char *poolname; + zpool_handle_t *zhp; + int ret = 0; /* check options */ - while ((c = getopt(argc, argv, "s")) != -1) { + while ((c = getopt(argc, argv, "")) != -1) { switch (c) { - case 's': - cb.cb_type = POOL_SCRUB_NONE; - break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -3138,24 +3751,146 @@ zpool_do_scrub(int argc, char **argv) } } - cb.cb_argc = argc; - cb.cb_argv = argv; argc -= optind; argv += optind; + /* get pool name and check number of arguments */ if (argc < 1) { - (void) fprintf(stderr, gettext("missing pool name argument\n")); + (void) fprintf(stderr, gettext("missing pool name\n")); usage(B_FALSE); } - return (for_each_pool(argc, argv, B_TRUE, NULL, scrub_callback, &cb)); -} + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } -typedef struct status_cbdata { - int cb_count; - boolean_t cb_allpools; - boolean_t cb_verbose; - boolean_t cb_explain; + poolname = argv[0]; + if ((zhp = zpool_open(g_zfs, poolname)) == NULL) + return (1); + + ret = zpool_reguid(zhp); + + zpool_close(zhp); + return (ret); +} + + +/* + * zpool reopen + * + * Reopen the pool so that the kernel can update the sizes of all vdevs. + */ +int +zpool_do_reopen(int argc, char **argv) +{ + int c; + int ret = 0; + zpool_handle_t *zhp; + char *pool; + + /* check options */ + while ((c = getopt(argc, argv, "")) != -1) { + switch (c) { + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc--; + argv++; + + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name\n")); + usage(B_FALSE); + } + + if (argc > 1) { + (void) fprintf(stderr, gettext("too many arguments\n")); + usage(B_FALSE); + } + + pool = argv[0]; + if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL) + return (1); + + ret = zpool_reopen(zhp); + zpool_close(zhp); + return (ret); +} + +typedef struct scrub_cbdata { + int cb_type; + int cb_argc; + char **cb_argv; +} scrub_cbdata_t; + +int +scrub_callback(zpool_handle_t *zhp, void *data) +{ + scrub_cbdata_t *cb = data; + int err; + + /* + * Ignore faulted pools. + */ + if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { + (void) fprintf(stderr, gettext("cannot scrub '%s': pool is " + "currently unavailable\n"), zpool_get_name(zhp)); + return (1); + } + + err = zpool_scan(zhp, cb->cb_type); + + return (err != 0); +} + +/* + * zpool scrub [-s] ... + * + * -s Stop. Stops any in-progress scrub. + */ +int +zpool_do_scrub(int argc, char **argv) +{ + int c; + scrub_cbdata_t cb; + + cb.cb_type = POOL_SCAN_SCRUB; + + /* check options */ + while ((c = getopt(argc, argv, "s")) != -1) { + switch (c) { + case 's': + cb.cb_type = POOL_SCAN_NONE; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + cb.cb_argc = argc; + cb.cb_argv = argv; + argc -= optind; + argv += optind; + + if (argc < 1) { + (void) fprintf(stderr, gettext("missing pool name argument\n")); + usage(B_FALSE); + } + + return (for_each_pool(argc, argv, B_TRUE, NULL, scrub_callback, &cb)); +} + +typedef struct status_cbdata { + int cb_count; + boolean_t cb_allpools; + boolean_t cb_verbose; + boolean_t cb_explain; boolean_t cb_first; boolean_t cb_dedup_stats; } status_cbdata_t; @@ -3164,62 +3899,112 @@ typedef struct status_cbdata { * Print out detailed scrub status. */ void -print_scrub_status(nvlist_t *nvroot) +print_scan_status(pool_scan_stat_t *ps) { - vdev_stat_t *vs; - uint_t vsc; - time_t start, end, now; + time_t start, end; + uint64_t elapsed, mins_left, hours_left; + uint64_t pass_exam, examined, total; + uint_t rate; double fraction_done; - uint64_t examined, total, minutes_left, minutes_taken; - char *scrub_type; + char processed_buf[7], examined_buf[7], total_buf[7], rate_buf[7]; - verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS, - (uint64_t **)&vs, &vsc) == 0); + (void) printf(gettext(" scan: ")); - /* - * If there's never been a scrub, there's not much to say. - */ - if (vs->vs_scrub_end == 0 && vs->vs_scrub_type == POOL_SCRUB_NONE) { + /* If there's never been a scan, there's not much to say. */ + if (ps == NULL || ps->pss_func == POOL_SCAN_NONE || + ps->pss_func >= POOL_SCAN_FUNCS) { (void) printf(gettext("none requested\n")); return; } - scrub_type = (vs->vs_scrub_type == POOL_SCRUB_RESILVER) ? - "resilver" : "scrub"; + start = ps->pss_start_time; + end = ps->pss_end_time; + zfs_nicenum(ps->pss_processed, processed_buf, sizeof (processed_buf)); - start = vs->vs_scrub_start; - end = vs->vs_scrub_end; - now = time(NULL); - examined = vs->vs_scrub_examined; - total = vs->vs_alloc; - - if (end != 0) { - minutes_taken = (uint64_t)((end - start) / 60); - - (void) printf(gettext("%s %s after %lluh%um with %llu errors " - "on %s"), - scrub_type, vs->vs_scrub_complete ? "completed" : "stopped", + assert(ps->pss_func == POOL_SCAN_SCRUB || + ps->pss_func == POOL_SCAN_RESILVER); + /* + * Scan is finished or canceled. + */ + if (ps->pss_state == DSS_FINISHED) { + uint64_t minutes_taken = (end - start) / 60; + char *fmt = NULL; + + if (ps->pss_func == POOL_SCAN_SCRUB) { + fmt = gettext("scrub repaired %s in %lluh%um with " + "%llu errors on %s"); + } else if (ps->pss_func == POOL_SCAN_RESILVER) { + fmt = gettext("resilvered %s in %lluh%um with " + "%llu errors on %s"); + } + /* LINTED */ + (void) printf(fmt, processed_buf, (u_longlong_t)(minutes_taken / 60), (uint_t)(minutes_taken % 60), - (u_longlong_t)vs->vs_scrub_errors, ctime(&end)); + (u_longlong_t)ps->pss_errors, + ctime((time_t *)&end)); + return; + } else if (ps->pss_state == DSS_CANCELED) { + if (ps->pss_func == POOL_SCAN_SCRUB) { + (void) printf(gettext("scrub canceled on %s"), + ctime(&end)); + } else if (ps->pss_func == POOL_SCAN_RESILVER) { + (void) printf(gettext("resilver canceled on %s"), + ctime(&end)); + } return; } - if (examined == 0) - examined = 1; - if (examined > total) - total = examined; + assert(ps->pss_state == DSS_SCANNING); + + /* + * Scan is in progress. + */ + if (ps->pss_func == POOL_SCAN_SCRUB) { + (void) printf(gettext("scrub in progress since %s"), + ctime(&start)); + } else if (ps->pss_func == POOL_SCAN_RESILVER) { + (void) printf(gettext("resilver in progress since %s"), + ctime(&start)); + } + examined = ps->pss_examined ? ps->pss_examined : 1; + total = ps->pss_to_examine; fraction_done = (double)examined / total; - minutes_left = (uint64_t)((now - start) * - (1 - fraction_done) / fraction_done / 60); - minutes_taken = (uint64_t)((now - start) / 60); - - (void) printf(gettext("%s in progress for %lluh%um, %.2f%% done, " - "%lluh%um to go\n"), - scrub_type, (u_longlong_t)(minutes_taken / 60), - (uint_t)(minutes_taken % 60), 100 * fraction_done, - (u_longlong_t)(minutes_left / 60), (uint_t)(minutes_left % 60)); + + /* elapsed time for this pass */ + elapsed = time(NULL) - ps->pss_pass_start; + elapsed = elapsed ? elapsed : 1; + pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1; + rate = pass_exam / elapsed; + rate = rate ? rate : 1; + mins_left = ((total - examined) / rate) / 60; + hours_left = mins_left / 60; + + zfs_nicenum(examined, examined_buf, sizeof (examined_buf)); + zfs_nicenum(total, total_buf, sizeof (total_buf)); + zfs_nicenum(rate, rate_buf, sizeof (rate_buf)); + + /* + * do not print estimated time if hours_left is more than 30 days + */ + (void) printf(gettext(" %s scanned out of %s at %s/s"), + examined_buf, total_buf, rate_buf); + if (hours_left < (30 * 24)) { + (void) printf(gettext(", %lluh%um to go\n"), + (u_longlong_t)hours_left, (uint_t)(mins_left % 60)); + } else { + (void) printf(gettext( + ", (scan is slow, no estimated time)\n")); + } + + if (ps->pss_func == POOL_SCAN_RESILVER) { + (void) printf(gettext(" %s resilvered, %.2f%% done\n"), + processed_buf, 100 * fraction_done); + } else if (ps->pss_func == POOL_SCAN_SCRUB) { + (void) printf(gettext(" %s repaired, %.2f%% done\n"), + processed_buf, 100 * fraction_done); + } } static void @@ -3307,14 +4092,20 @@ print_dedup_stats(nvlist_t *config) /* * If the pool was faulted then we may not have been able to - * obtain the config. Otherwise, if have anything in the dedup + * obtain the config. Otherwise, if we have anything in the dedup * table continue processing the stats. */ if (nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_OBJ_STATS, - (uint64_t **)&ddo, &c) != 0 || ddo->ddo_count == 0) + (uint64_t **)&ddo, &c) != 0) return; (void) printf("\n"); + (void) printf(gettext(" dedup: ")); + if (ddo->ddo_count == 0) { + (void) printf(gettext("no DDT entries\n")); + return; + } + (void) printf("DDT entries %llu, size %llu on disk, %llu in core\n", (u_longlong_t)ddo->ddo_count, (u_longlong_t)ddo->ddo_dspace, @@ -3333,7 +4124,7 @@ print_dedup_stats(nvlist_t *config) * pool: tank * status: DEGRADED * reason: One or more devices ... - * see: http://www.sun.com/msg/ZFS-xxxx-01 + * see: http://illumos.org/msg/ZFS-xxxx-01 * config: * mirror DEGRADED * c1t0d0 OK @@ -3362,7 +4153,11 @@ status_callback(zpool_handle_t *zhp, voi * If we were given 'zpool status -x', only report those pools with * problems. */ - if (reason == ZPOOL_STATUS_OK && cbp->cb_explain) { + if (cbp->cb_explain && + (reason == ZPOOL_STATUS_OK || + reason == ZPOOL_STATUS_VERSION_OLDER || + reason == ZPOOL_STATUS_NON_NATIVE_ASHIFT || + reason == ZPOOL_STATUS_FEAT_DISABLED)) { if (!cbp->cb_allpools) { (void) printf(gettext("pool '%s' is healthy\n"), zpool_get_name(zhp)); @@ -3379,7 +4174,7 @@ status_callback(zpool_handle_t *zhp, voi verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); - verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS, + verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c) == 0); health = zpool_state_to_name(vs->vs_state, vs->vs_aux); @@ -3452,7 +4247,6 @@ status_callback(zpool_handle_t *zhp, voi "replace'.\n")); break; - case ZPOOL_STATUS_RESILVERING: (void) printf(gettext("status: One or more devices is " "currently being resilvered. The pool will\n\tcontinue " @@ -3478,12 +4272,13 @@ status_callback(zpool_handle_t *zhp, voi break; case ZPOOL_STATUS_VERSION_OLDER: - (void) printf(gettext("status: The pool is formatted using an " - "older on-disk format. The pool can\n\tstill be used, but " - "some features are unavailable.\n")); + (void) printf(gettext("status: The pool is formatted using a " + "legacy on-disk format. The pool can\n\tstill be used, " + "but some features are unavailable.\n")); (void) printf(gettext("action: Upgrade the pool using 'zpool " "upgrade'. Once this is done, the\n\tpool will no longer " - "be accessible on older software versions.\n")); + "be accessible on software that does not support feature\n" + "\tflags.\n")); break; case ZPOOL_STATUS_VERSION_NEWER: @@ -3495,6 +4290,41 @@ status_callback(zpool_handle_t *zhp, voi "backup.\n")); break; + case ZPOOL_STATUS_FEAT_DISABLED: + (void) printf(gettext("status: Some supported features are not " + "enabled on the pool. The pool can\n\tstill be used, but " + "some features are unavailable.\n")); + (void) printf(gettext("action: Enable all features using " + "'zpool upgrade'. Once this is done,\n\tthe pool may no " + "longer be accessible by software that does not support\n\t" + "the features. See zpool-features(7) for details.\n")); + break; + + case ZPOOL_STATUS_UNSUP_FEAT_READ: + (void) printf(gettext("status: The pool cannot be accessed on " + "this system because it uses the\n\tfollowing feature(s) " + "not supported on this system:\n")); + zpool_print_unsup_feat(config); + (void) printf("\n"); + (void) printf(gettext("action: Access the pool from a system " + "that supports the required feature(s),\n\tor restore the " + "pool from backup.\n")); + break; + + case ZPOOL_STATUS_UNSUP_FEAT_WRITE: + (void) printf(gettext("status: The pool can only be accessed " + "in read-only mode on this system. It\n\tcannot be " + "accessed in read-write mode because it uses the " + "following\n\tfeature(s) not supported on this system:\n")); + zpool_print_unsup_feat(config); + (void) printf("\n"); + (void) printf(gettext("action: The pool cannot be accessed in " + "read-write mode. Import the pool with\n" + "\t\"-o readonly=on\", access the pool from a system that " + "supports the\n\trequired feature(s), or restore the " + "pool from backup.\n")); + break; + case ZPOOL_STATUS_FAULTED_DEV_R: (void) printf(gettext("status: One or more devices are " "faulted in response to persistent errors.\n\tSufficient " @@ -3534,6 +4364,15 @@ status_callback(zpool_handle_t *zhp, voi "'zpool clear'.\n")); break; + case ZPOOL_STATUS_NON_NATIVE_ASHIFT: + (void) printf(gettext("status: One or more devices are " + "configured to use a non-native block size.\n" + "\tExpect reduced performance.\n")); + (void) printf(gettext("action: Replace affected devices with " + "devices that support the\n\tconfigured block size, or " + "migrate data to a properly configured\n\tpool.\n")); + break; + default: /* * The remaining errors can't actually be generated, yet. @@ -3542,7 +4381,7 @@ status_callback(zpool_handle_t *zhp, voi } if (msgid != NULL) - (void) printf(gettext(" see: http://www.sun.com/msg/%s\n"), + (void) printf(gettext(" see: http://illumos.org/msg/%s\n"), msgid); if (config != NULL) { @@ -3550,10 +4389,11 @@ status_callback(zpool_handle_t *zhp, voi uint64_t nerr; nvlist_t **spares, **l2cache; uint_t nspares, nl2cache; + pool_scan_stat_t *ps = NULL; - - (void) printf(gettext(" scrub: ")); - print_scrub_status(nvroot); + (void) nvlist_lookup_uint64_array(nvroot, + ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &c); + print_scan_status(ps); namewidth = max_width(zhp, nvroot, 0, 0); if (namewidth < 10) @@ -3621,11 +4461,12 @@ status_callback(zpool_handle_t *zhp, voi } /* - * zpool status [-vx] [pool] ... + * zpool status [-vx] [-T d|u] [pool] ... [interval [count]] * * -v Display complete error logs * -x Display only pools with potential problems * -D Display dedup status (undocumented) + * -T Display a timestamp in date(1) or Unix format * * Describes the health status of all pools or some subset. */ @@ -3634,10 +4475,11 @@ zpool_do_status(int argc, char **argv) { int c; int ret; + unsigned long interval = 0, count = 0; status_cbdata_t cb = { 0 }; /* check options */ - while ((c = getopt(argc, argv, "vxD")) != -1) { + while ((c = getopt(argc, argv, "vxDT:")) != -1) { switch (c) { case 'v': cb.cb_verbose = B_TRUE; @@ -3648,6 +4490,9 @@ zpool_do_status(int argc, char **argv) case 'D': cb.cb_dedup_stats = B_TRUE; break; + case 'T': + get_timestamp_arg(*optarg); + break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -3658,72 +4503,280 @@ zpool_do_status(int argc, char **argv) argc -= optind; argv += optind; - cb.cb_first = B_TRUE; + get_interval_count(&argc, argv, &interval, &count); if (argc == 0) cb.cb_allpools = B_TRUE; - ret = for_each_pool(argc, argv, B_TRUE, NULL, status_callback, &cb); + cb.cb_first = B_TRUE; - if (argc == 0 && cb.cb_count == 0) - (void) printf(gettext("no pools available\n")); - else if (cb.cb_explain && cb.cb_first && cb.cb_allpools) - (void) printf(gettext("all pools are healthy\n")); + for (;;) { + if (timestamp_fmt != NODATE) + print_timestamp(timestamp_fmt); - return (ret); + ret = for_each_pool(argc, argv, B_TRUE, NULL, + status_callback, &cb); + + if (argc == 0 && cb.cb_count == 0) + (void) printf(gettext("no pools available\n")); + else if (cb.cb_explain && cb.cb_first && cb.cb_allpools) + (void) printf(gettext("all pools are healthy\n")); + + if (ret != 0) + return (ret); + + if (interval == 0) + break; + + if (count != 0 && --count == 0) + break; + + (void) sleep(interval); + } + + return (0); } typedef struct upgrade_cbdata { - int cb_all; - int cb_first; - int cb_newer; - int cb_argc; - uint64_t cb_version; - char **cb_argv; + boolean_t cb_first; + boolean_t cb_unavail; + char cb_poolname[ZFS_MAX_DATASET_NAME_LEN]; + int cb_argc; + uint64_t cb_version; + char **cb_argv; } upgrade_cbdata_t; +#ifdef __FreeBSD__ +static int +is_root_pool(zpool_handle_t *zhp) +{ + static struct statfs sfs; + static char *poolname = NULL; + static boolean_t stated = B_FALSE; + char *slash; + + if (!stated) { + stated = B_TRUE; + if (statfs("/", &sfs) == -1) { + (void) fprintf(stderr, + "Unable to stat root file system: %s.\n", + strerror(errno)); + return (0); + } + if (strcmp(sfs.f_fstypename, "zfs") != 0) + return (0); + poolname = sfs.f_mntfromname; + if ((slash = strchr(poolname, '/')) != NULL) + *slash = '\0'; + } + return (poolname != NULL && strcmp(poolname, zpool_get_name(zhp)) == 0); +} + +static void +root_pool_upgrade_check(zpool_handle_t *zhp, char *poolname, int size) +{ + + if (poolname[0] == '\0' && is_root_pool(zhp)) + (void) strlcpy(poolname, zpool_get_name(zhp), size); +} +#endif /* FreeBSD */ + +static int +upgrade_version(zpool_handle_t *zhp, uint64_t version) +{ + int ret; + nvlist_t *config; + uint64_t oldversion; + + config = zpool_get_config(zhp, NULL); + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, + &oldversion) == 0); + + assert(SPA_VERSION_IS_SUPPORTED(oldversion)); + assert(oldversion < version); + + ret = zpool_upgrade(zhp, version); + if (ret != 0) + return (ret); + + if (version >= SPA_VERSION_FEATURES) { + (void) printf(gettext("Successfully upgraded " + "'%s' from version %llu to feature flags.\n"), + zpool_get_name(zhp), oldversion); + } else { + (void) printf(gettext("Successfully upgraded " + "'%s' from version %llu to version %llu.\n"), + zpool_get_name(zhp), oldversion, version); + } + + return (0); +} + +static int +upgrade_enable_all(zpool_handle_t *zhp, int *countp) +{ + int i, ret, count; + boolean_t firstff = B_TRUE; + nvlist_t *enabled = zpool_get_features(zhp); + + count = 0; + for (i = 0; i < SPA_FEATURES; i++) { + const char *fname = spa_feature_table[i].fi_uname; + const char *fguid = spa_feature_table[i].fi_guid; + if (!nvlist_exists(enabled, fguid)) { + char *propname; + verify(-1 != asprintf(&propname, "feature@%s", fname)); + ret = zpool_set_prop(zhp, propname, + ZFS_FEATURE_ENABLED); + if (ret != 0) { + free(propname); + return (ret); + } + count++; + + if (firstff) { + (void) printf(gettext("Enabled the " + "following features on '%s':\n"), + zpool_get_name(zhp)); + firstff = B_FALSE; + } + (void) printf(gettext(" %s\n"), fname); + free(propname); + } + } + + if (countp != NULL) + *countp = count; + return (0); +} + static int upgrade_cb(zpool_handle_t *zhp, void *arg) { upgrade_cbdata_t *cbp = arg; nvlist_t *config; uint64_t version; - int ret = 0; + boolean_t printnl = B_FALSE; + int ret; + + if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { + (void) fprintf(stderr, gettext("cannot upgrade '%s': pool is " + "currently unavailable.\n\n"), zpool_get_name(zhp)); + cbp->cb_unavail = B_TRUE; + /* Allow iteration to continue. */ + return (0); + } config = zpool_get_config(zhp, NULL); verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) == 0); - if (!cbp->cb_newer && version < SPA_VERSION) { - if (!cbp->cb_all) { - if (cbp->cb_first) { - (void) printf(gettext("The following pools are " - "out of date, and can be upgraded. After " - "being\nupgraded, these pools will no " - "longer be accessible by older software " - "versions.\n\n")); - (void) printf(gettext("VER POOL\n")); - (void) printf(gettext("--- ------------\n")); - cbp->cb_first = B_FALSE; - } + assert(SPA_VERSION_IS_SUPPORTED(version)); - (void) printf("%2llu %s\n", (u_longlong_t)version, - zpool_get_name(zhp)); - } else { + if (version < cbp->cb_version) { + cbp->cb_first = B_FALSE; + ret = upgrade_version(zhp, cbp->cb_version); + if (ret != 0) + return (ret); +#ifdef __FreeBSD__ + root_pool_upgrade_check(zhp, cbp->cb_poolname, + sizeof(cbp->cb_poolname)); +#endif /* __FreeBSD__ */ + printnl = B_TRUE; + +#ifdef illumos + /* + * If they did "zpool upgrade -a", then we could + * be doing ioctls to different pools. We need + * to log this history once to each pool, and bypass + * the normal history logging that happens in main(). + */ + (void) zpool_log_history(g_zfs, history_str); + log_history = B_FALSE; +#endif + } + + if (cbp->cb_version >= SPA_VERSION_FEATURES) { + int count; + ret = upgrade_enable_all(zhp, &count); + if (ret != 0) + return (ret); + + if (count > 0) { + cbp->cb_first = B_FALSE; + printnl = B_TRUE; +#ifdef __FreeBSD__ + root_pool_upgrade_check(zhp, cbp->cb_poolname, + sizeof(cbp->cb_poolname)); +#endif /* __FreeBSD__ */ + /* + * If they did "zpool upgrade -a", then we could + * be doing ioctls to different pools. We need + * to log this history once to each pool, and bypass + * the normal history logging that happens in main(). + */ + (void) zpool_log_history(g_zfs, history_str); + log_history = B_FALSE; + } + } + + if (printnl) { + (void) printf(gettext("\n")); + } + + return (0); +} + +static int +upgrade_list_unavail(zpool_handle_t *zhp, void *arg) +{ + upgrade_cbdata_t *cbp = arg; + + if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { + if (cbp->cb_first) { + (void) fprintf(stderr, gettext("The following pools " + "are unavailable and cannot be upgraded as this " + "time.\n\n")); + (void) fprintf(stderr, gettext("POOL\n")); + (void) fprintf(stderr, gettext("------------\n")); cbp->cb_first = B_FALSE; - ret = zpool_upgrade(zhp, cbp->cb_version); - if (!ret) { - (void) printf(gettext("Successfully upgraded " - "'%s'\n\n"), zpool_get_name(zhp)); - } } - } else if (cbp->cb_newer && version > SPA_VERSION) { - assert(!cbp->cb_all); + (void) printf(gettext("%s\n"), zpool_get_name(zhp)); + cbp->cb_unavail = B_TRUE; + } + return (0); +} + +static int +upgrade_list_older_cb(zpool_handle_t *zhp, void *arg) +{ + upgrade_cbdata_t *cbp = arg; + nvlist_t *config; + uint64_t version; + + if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { + /* + * This will have been reported by upgrade_list_unavail so + * just allow iteration to continue. + */ + cbp->cb_unavail = B_TRUE; + return (0); + } + config = zpool_get_config(zhp, NULL); + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, + &version) == 0); + + assert(SPA_VERSION_IS_SUPPORTED(version)); + + if (version < SPA_VERSION_FEATURES) { if (cbp->cb_first) { (void) printf(gettext("The following pools are " - "formatted using a newer software version and\n" - "cannot be accessed on the current system.\n\n")); + "formatted with legacy version numbers and can\n" + "be upgraded to use feature flags. After " + "being upgraded, these pools\nwill no " + "longer be accessible by software that does not " + "support feature\nflags.\n\n")); (void) printf(gettext("VER POOL\n")); (void) printf(gettext("--- ------------\n")); cbp->cb_first = B_FALSE; @@ -3733,48 +4786,142 @@ upgrade_cb(zpool_handle_t *zhp, void *ar zpool_get_name(zhp)); } - zpool_close(zhp); - return (ret); + return (0); +} + +static int +upgrade_list_disabled_cb(zpool_handle_t *zhp, void *arg) +{ + upgrade_cbdata_t *cbp = arg; + nvlist_t *config; + uint64_t version; + + if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { + /* + * This will have been reported by upgrade_list_unavail so + * just allow iteration to continue. + */ + cbp->cb_unavail = B_TRUE; + return (0); + } + + config = zpool_get_config(zhp, NULL); + verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, + &version) == 0); + + if (version >= SPA_VERSION_FEATURES) { + int i; + boolean_t poolfirst = B_TRUE; + nvlist_t *enabled = zpool_get_features(zhp); + + for (i = 0; i < SPA_FEATURES; i++) { + const char *fguid = spa_feature_table[i].fi_guid; + const char *fname = spa_feature_table[i].fi_uname; + if (!nvlist_exists(enabled, fguid)) { + if (cbp->cb_first) { + (void) printf(gettext("\nSome " + "supported features are not " + "enabled on the following pools. " + "Once a\nfeature is enabled the " + "pool may become incompatible with " + "software\nthat does not support " + "the feature. See " + "zpool-features(7) for " + "details.\n\n")); + (void) printf(gettext("POOL " + "FEATURE\n")); + (void) printf(gettext("------" + "---------\n")); + cbp->cb_first = B_FALSE; + } + + if (poolfirst) { + (void) printf(gettext("%s\n"), + zpool_get_name(zhp)); + poolfirst = B_FALSE; + } + + (void) printf(gettext(" %s\n"), fname); + } + } + } + + return (0); } /* ARGSUSED */ static int upgrade_one(zpool_handle_t *zhp, void *data) { + boolean_t printnl = B_FALSE; upgrade_cbdata_t *cbp = data; uint64_t cur_version; int ret; + if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) { + (void) fprintf(stderr, gettext("cannot upgrade '%s': pool is " + "is currently unavailable.\n\n"), zpool_get_name(zhp)); + cbp->cb_unavail = B_TRUE; + return (1); + } + if (strcmp("log", zpool_get_name(zhp)) == 0) { (void) printf(gettext("'log' is now a reserved word\n" "Pool 'log' must be renamed using export and import" - " to upgrade.\n")); + " to upgrade.\n\n")); return (1); } cur_version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL); if (cur_version > cbp->cb_version) { (void) printf(gettext("Pool '%s' is already formatted " - "using more current version '%llu'.\n"), + "using more current version '%llu'.\n\n"), zpool_get_name(zhp), cur_version); return (0); } - if (cur_version == cbp->cb_version) { + + if (cbp->cb_version != SPA_VERSION && cur_version == cbp->cb_version) { (void) printf(gettext("Pool '%s' is already formatted " - "using the current version.\n"), zpool_get_name(zhp)); + "using version %llu.\n\n"), zpool_get_name(zhp), + cbp->cb_version); return (0); } - ret = zpool_upgrade(zhp, cbp->cb_version); + if (cur_version != cbp->cb_version) { + printnl = B_TRUE; + ret = upgrade_version(zhp, cbp->cb_version); + if (ret != 0) + return (ret); +#ifdef __FreeBSD__ + root_pool_upgrade_check(zhp, cbp->cb_poolname, + sizeof(cbp->cb_poolname)); +#endif /* __FreeBSD__ */ + } + + if (cbp->cb_version >= SPA_VERSION_FEATURES) { + int count = 0; + ret = upgrade_enable_all(zhp, &count); + if (ret != 0) + return (ret); + + if (count != 0) { + printnl = B_TRUE; +#ifdef __FreeBSD__ + root_pool_upgrade_check(zhp, cbp->cb_poolname, + sizeof(cbp->cb_poolname)); +#endif /* __FreeBSD __*/ + } else if (cur_version == SPA_VERSION) { + (void) printf(gettext("Pool '%s' already has all " + "supported features enabled.\n\n"), + zpool_get_name(zhp)); + } + } - if (!ret) { - (void) printf(gettext("Successfully upgraded '%s' " - "from version %llu to version %llu\n\n"), - zpool_get_name(zhp), (u_longlong_t)cur_version, - (u_longlong_t)cbp->cb_version); + if (printnl) { + (void) printf(gettext("\n")); } - return (ret != 0); + return (0); } /* @@ -3793,6 +4940,7 @@ zpool_do_upgrade(int argc, char **argv) upgrade_cbdata_t cb = { 0 }; int ret = 0; boolean_t showversions = B_FALSE; + boolean_t upgradeall = B_FALSE; char *end; @@ -3800,15 +4948,15 @@ zpool_do_upgrade(int argc, char **argv) while ((c = getopt(argc, argv, ":avV:")) != -1) { switch (c) { case 'a': - cb.cb_all = B_TRUE; + upgradeall = B_TRUE; break; case 'v': showversions = B_TRUE; break; case 'V': cb.cb_version = strtoll(optarg, &end, 10); - if (*end != '\0' || cb.cb_version > SPA_VERSION || - cb.cb_version < SPA_VERSION_1) { + if (*end != '\0' || + !SPA_VERSION_IS_SUPPORTED(cb.cb_version)) { (void) fprintf(stderr, gettext("invalid version '%s'\n"), optarg); usage(B_FALSE); @@ -3833,19 +4981,19 @@ zpool_do_upgrade(int argc, char **argv) if (cb.cb_version == 0) { cb.cb_version = SPA_VERSION; - } else if (!cb.cb_all && argc == 0) { + } else if (!upgradeall && argc == 0) { (void) fprintf(stderr, gettext("-V option is " "incompatible with other arguments\n")); usage(B_FALSE); } if (showversions) { - if (cb.cb_all || argc != 0) { + if (upgradeall || argc != 0) { (void) fprintf(stderr, gettext("-v option is " "incompatible with other arguments\n")); usage(B_FALSE); } - } else if (cb.cb_all) { + } else if (upgradeall) { if (argc != 0) { (void) fprintf(stderr, gettext("-a option should not " "be used along with a pool name\n")); @@ -3853,11 +5001,28 @@ zpool_do_upgrade(int argc, char **argv) } } - (void) printf(gettext("This system is currently running " - "ZFS pool version %llu.\n\n"), SPA_VERSION); - cb.cb_first = B_TRUE; + (void) printf(gettext("This system supports ZFS pool feature " + "flags.\n\n")); if (showversions) { - (void) printf(gettext("The following versions are " + int i; + + (void) printf(gettext("The following features are " + "supported:\n\n")); + (void) printf(gettext("FEAT DESCRIPTION\n")); + (void) printf("----------------------------------------------" + "---------------\n"); + for (i = 0; i < SPA_FEATURES; i++) { + zfeature_info_t *fi = &spa_feature_table[i]; + const char *ro = + (fi->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ? + " (read-only compatible)" : ""; + + (void) printf("%-37s%s\n", fi->fi_uname, ro); + (void) printf(" %s\n", fi->fi_desc); + } + (void) printf("\n"); + + (void) printf(gettext("The following legacy versions are also " "supported:\n\n")); (void) printf(gettext("VER DESCRIPTION\n")); (void) printf("--- -----------------------------------------" @@ -3890,50 +5055,89 @@ zpool_do_upgrade(int argc, char **argv) (void) printf(gettext(" 21 Deduplication\n")); (void) printf(gettext(" 22 Received properties\n")); (void) printf(gettext(" 23 Slim ZIL\n")); + (void) printf(gettext(" 24 System attributes\n")); + (void) printf(gettext(" 25 Improved scrub stats\n")); + (void) printf(gettext(" 26 Improved snapshot deletion " + "performance\n")); + (void) printf(gettext(" 27 Improved snapshot creation " + "performance\n")); + (void) printf(gettext(" 28 Multiple vdev replacements\n")); (void) printf(gettext("\nFor more information on a particular " - "version, including supported releases, see:\n\n")); - (void) printf("http://www.opensolaris.org/os/community/zfs/" - "version/N\n\n"); - (void) printf(gettext("Where 'N' is the version number.\n")); - } else if (argc == 0) { - int notfound; - + "version, including supported releases,\n")); + (void) printf(gettext("see the ZFS Administration Guide.\n\n")); + } else if (argc == 0 && upgradeall) { + cb.cb_first = B_TRUE; ret = zpool_iter(g_zfs, upgrade_cb, &cb); - notfound = cb.cb_first; - - if (!cb.cb_all && ret == 0) { - if (!cb.cb_first) - (void) printf("\n"); - cb.cb_first = B_TRUE; - cb.cb_newer = B_TRUE; - ret = zpool_iter(g_zfs, upgrade_cb, &cb); - if (!cb.cb_first) { - notfound = B_FALSE; - (void) printf("\n"); + if (ret == 0 && cb.cb_first) { + if (cb.cb_version == SPA_VERSION) { + (void) printf(gettext("All %spools are already " + "formatted using feature flags.\n\n"), + cb.cb_unavail ? gettext("available ") : ""); + (void) printf(gettext("Every %sfeature flags " + "pool already has all supported features " + "enabled.\n"), + cb.cb_unavail ? gettext("available ") : ""); + } else { + (void) printf(gettext("All pools are already " + "formatted with version %llu or higher.\n"), + cb.cb_version); } } + } else if (argc == 0) { + cb.cb_first = B_TRUE; + ret = zpool_iter(g_zfs, upgrade_list_unavail, &cb); + assert(ret == 0); + + if (!cb.cb_first) { + (void) fprintf(stderr, "\n"); + } + + cb.cb_first = B_TRUE; + ret = zpool_iter(g_zfs, upgrade_list_older_cb, &cb); + assert(ret == 0); + + if (cb.cb_first) { + (void) printf(gettext("All %spools are formatted using " + "feature flags.\n\n"), cb.cb_unavail ? + gettext("available ") : ""); + } else { + (void) printf(gettext("\nUse 'zpool upgrade -v' " + "for a list of available legacy versions.\n")); + } - if (ret == 0) { - if (notfound) - (void) printf(gettext("All pools are formatted " - "using this version.\n")); - else if (!cb.cb_all) - (void) printf(gettext("Use 'zpool upgrade -v' " - "for a list of available versions and " - "their associated\nfeatures.\n")); + cb.cb_first = B_TRUE; + ret = zpool_iter(g_zfs, upgrade_list_disabled_cb, &cb); + assert(ret == 0); + + if (cb.cb_first) { + (void) printf(gettext("Every %sfeature flags pool has " + "all supported features enabled.\n"), + cb.cb_unavail ? gettext("available ") : ""); + } else { + (void) printf(gettext("\n")); } } else { - ret = for_each_pool(argc, argv, B_FALSE, NULL, + ret = for_each_pool(argc, argv, B_TRUE, NULL, upgrade_one, &cb); } + if (cb.cb_poolname[0] != '\0') { + (void) printf( + "If you boot from pool '%s', don't forget to update boot code.\n" + "Assuming you use GPT partitioning and da0 is your boot disk\n" + "the following command will do it:\n" + "\n" + "\tgpart bootcode -b /boot/pmbr -p /boot/gptzfsboot -i 1 da0\n\n", + cb.cb_poolname); + } + return (ret); } typedef struct hist_cbdata { boolean_t first; - int longfmt; - int internal; + boolean_t longfmt; + boolean_t internal; } hist_cbdata_t; /* @@ -3945,21 +5149,8 @@ get_history_one(zpool_handle_t *zhp, voi nvlist_t *nvhis; nvlist_t **records; uint_t numrecords; - char *cmdstr; - char *pathstr; - uint64_t dst_time; - time_t tsec; - struct tm t; - char tbuf[30]; int ret, i; - uint64_t who; - struct passwd *pwd; - char *hostname; - char *zonename; - char internalstr[MAXPATHLEN]; hist_cbdata_t *cb = (hist_cbdata_t *)data; - uint64_t txg; - uint64_t ievent; cb->first = B_FALSE; @@ -3971,64 +5162,94 @@ get_history_one(zpool_handle_t *zhp, voi verify(nvlist_lookup_nvlist_array(nvhis, ZPOOL_HIST_RECORD, &records, &numrecords) == 0); for (i = 0; i < numrecords; i++) { - if (nvlist_lookup_uint64(records[i], ZPOOL_HIST_TIME, - &dst_time) != 0) - continue; + nvlist_t *rec = records[i]; + char tbuf[30] = ""; - /* is it an internal event or a standard event? */ - if (nvlist_lookup_string(records[i], ZPOOL_HIST_CMD, - &cmdstr) != 0) { - if (cb->internal == 0) + if (nvlist_exists(rec, ZPOOL_HIST_TIME)) { + time_t tsec; + struct tm t; + + tsec = fnvlist_lookup_uint64(records[i], + ZPOOL_HIST_TIME); + (void) localtime_r(&tsec, &t); + (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t); + } + + if (nvlist_exists(rec, ZPOOL_HIST_CMD)) { + (void) printf("%s %s", tbuf, + fnvlist_lookup_string(rec, ZPOOL_HIST_CMD)); + } else if (nvlist_exists(rec, ZPOOL_HIST_INT_EVENT)) { + int ievent = + fnvlist_lookup_uint64(rec, ZPOOL_HIST_INT_EVENT); + if (!cb->internal) continue; - - if (nvlist_lookup_uint64(records[i], - ZPOOL_HIST_INT_EVENT, &ievent) != 0) + if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS) { + (void) printf("%s unrecognized record:\n", + tbuf); + dump_nvlist(rec, 4); + continue; + } + (void) printf("%s [internal %s txg:%lld] %s", tbuf, + zfs_history_event_names[ievent], + fnvlist_lookup_uint64(rec, ZPOOL_HIST_TXG), + fnvlist_lookup_string(rec, ZPOOL_HIST_INT_STR)); + } else if (nvlist_exists(rec, ZPOOL_HIST_INT_NAME)) { + if (!cb->internal) + continue; + (void) printf("%s [txg:%lld] %s", tbuf, + fnvlist_lookup_uint64(rec, ZPOOL_HIST_TXG), + fnvlist_lookup_string(rec, ZPOOL_HIST_INT_NAME)); + if (nvlist_exists(rec, ZPOOL_HIST_DSNAME)) { + (void) printf(" %s (%llu)", + fnvlist_lookup_string(rec, + ZPOOL_HIST_DSNAME), + fnvlist_lookup_uint64(rec, + ZPOOL_HIST_DSID)); + } + (void) printf(" %s", fnvlist_lookup_string(rec, + ZPOOL_HIST_INT_STR)); + } else if (nvlist_exists(rec, ZPOOL_HIST_IOCTL)) { + if (!cb->internal) continue; - verify(nvlist_lookup_uint64(records[i], - ZPOOL_HIST_TXG, &txg) == 0); - verify(nvlist_lookup_string(records[i], - ZPOOL_HIST_INT_STR, &pathstr) == 0); - if (ievent >= LOG_END) + (void) printf("%s ioctl %s\n", tbuf, + fnvlist_lookup_string(rec, ZPOOL_HIST_IOCTL)); + if (nvlist_exists(rec, ZPOOL_HIST_INPUT_NVL)) { + (void) printf(" input:\n"); + dump_nvlist(fnvlist_lookup_nvlist(rec, + ZPOOL_HIST_INPUT_NVL), 8); + } + if (nvlist_exists(rec, ZPOOL_HIST_OUTPUT_NVL)) { + (void) printf(" output:\n"); + dump_nvlist(fnvlist_lookup_nvlist(rec, + ZPOOL_HIST_OUTPUT_NVL), 8); + } + } else { + if (!cb->internal) continue; - (void) snprintf(internalstr, - sizeof (internalstr), - "[internal %s txg:%lld] %s", - hist_event_table[ievent], txg, - pathstr); - cmdstr = internalstr; - } - tsec = dst_time; - (void) localtime_r(&tsec, &t); - (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t); - (void) printf("%s %s", tbuf, cmdstr); + (void) printf("%s unrecognized record:\n", tbuf); + dump_nvlist(rec, 4); + } if (!cb->longfmt) { (void) printf("\n"); continue; } (void) printf(" ["); - if (nvlist_lookup_uint64(records[i], - ZPOOL_HIST_WHO, &who) == 0) { - pwd = getpwuid((uid_t)who); - if (pwd) - (void) printf("user %s on", - pwd->pw_name); - else - (void) printf("user %d on", - (int)who); - } else { - (void) printf(gettext("no info]\n")); - continue; + if (nvlist_exists(rec, ZPOOL_HIST_WHO)) { + uid_t who = fnvlist_lookup_uint64(rec, ZPOOL_HIST_WHO); + struct passwd *pwd = getpwuid(who); + (void) printf("user %d ", (int)who); + if (pwd != NULL) + (void) printf("(%s) ", pwd->pw_name); + } + if (nvlist_exists(rec, ZPOOL_HIST_HOST)) { + (void) printf("on %s", + fnvlist_lookup_string(rec, ZPOOL_HIST_HOST)); + } + if (nvlist_exists(rec, ZPOOL_HIST_ZONE)) { + (void) printf(":%s", + fnvlist_lookup_string(rec, ZPOOL_HIST_ZONE)); } - if (nvlist_lookup_string(records[i], - ZPOOL_HIST_HOST, &hostname) == 0) { - (void) printf(" %s", hostname); - } - if (nvlist_lookup_string(records[i], - ZPOOL_HIST_ZONE, &zonename) == 0) { - (void) printf(":%s", zonename); - } - (void) printf("]"); (void) printf("\n"); } @@ -4043,8 +5264,6 @@ get_history_one(zpool_handle_t *zhp, voi * * Displays the history of commands that modified pools. */ - - int zpool_do_history(int argc, char **argv) { @@ -4057,10 +5276,10 @@ zpool_do_history(int argc, char **argv) while ((c = getopt(argc, argv, "li")) != -1) { switch (c) { case 'l': - cbdata.longfmt = 1; + cbdata.longfmt = B_TRUE; break; case 'i': - cbdata.internal = 1; + cbdata.internal = B_TRUE; break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), @@ -4100,28 +5319,56 @@ get_callback(zpool_handle_t *zhp, void * pl == cbp->cb_proplist) continue; - if (zpool_get_prop(zhp, pl->pl_prop, - value, sizeof (value), &srctype) != 0) - continue; + if (pl->pl_prop == ZPROP_INVAL && + (zpool_prop_feature(pl->pl_user_prop) || + zpool_prop_unsupported(pl->pl_user_prop))) { + srctype = ZPROP_SRC_LOCAL; + + if (zpool_prop_get_feature(zhp, pl->pl_user_prop, + value, sizeof (value)) == 0) { + zprop_print_one_property(zpool_get_name(zhp), + cbp, pl->pl_user_prop, value, srctype, + NULL, NULL); + } + } else { + if (zpool_get_prop(zhp, pl->pl_prop, value, + sizeof (value), &srctype, cbp->cb_literal) != 0) + continue; - zprop_print_one_property(zpool_get_name(zhp), cbp, - zpool_prop_to_name(pl->pl_prop), value, srctype, NULL, - NULL); + zprop_print_one_property(zpool_get_name(zhp), cbp, + zpool_prop_to_name(pl->pl_prop), value, srctype, + NULL, NULL); + } } return (0); } +/* + * zpool get [-Hp] [-o "all" | field[,...]] <"all" | property[,...]> ... + * + * -H Scripted mode. Don't display headers, and separate properties + * by a single tab. + * -o List of columns to display. Defaults to + * "name,property,value,source". + * -p Diplay values in parsable (exact) format. + * + * Get properties of pools in the system. Output space statistics + * for each one as well as other attributes. + */ int zpool_do_get(int argc, char **argv) { zprop_get_cbdata_t cb = { 0 }; zprop_list_t fake_name = { 0 }; int ret; - - if (argc < 3) - usage(B_FALSE); + int c, i; + char *value; cb.cb_first = B_TRUE; + + /* + * Set up default columns and sources. + */ cb.cb_sources = ZPROP_SRC_ALL; cb.cb_columns[0] = GET_COL_NAME; cb.cb_columns[1] = GET_COL_PROPERTY; @@ -4129,10 +5376,89 @@ zpool_do_get(int argc, char **argv) cb.cb_columns[3] = GET_COL_SOURCE; cb.cb_type = ZFS_TYPE_POOL; - if (zprop_get_list(g_zfs, argv[1], &cb.cb_proplist, + /* check options */ + while ((c = getopt(argc, argv, ":Hpo:")) != -1) { + switch (c) { + case 'p': + cb.cb_literal = B_TRUE; + break; + case 'H': + cb.cb_scripted = B_TRUE; + break; + case 'o': + bzero(&cb.cb_columns, sizeof (cb.cb_columns)); + i = 0; + while (*optarg != '\0') { + static char *col_subopts[] = + { "name", "property", "value", "source", + "all", NULL }; + + if (i == ZFS_GET_NCOLS) { + (void) fprintf(stderr, gettext("too " + "many fields given to -o " + "option\n")); + usage(B_FALSE); + } + + switch (getsubopt(&optarg, col_subopts, + &value)) { + case 0: + cb.cb_columns[i++] = GET_COL_NAME; + break; + case 1: + cb.cb_columns[i++] = GET_COL_PROPERTY; + break; + case 2: + cb.cb_columns[i++] = GET_COL_VALUE; + break; + case 3: + cb.cb_columns[i++] = GET_COL_SOURCE; + break; + case 4: + if (i > 0) { + (void) fprintf(stderr, + gettext("\"all\" conflicts " + "with specific fields " + "given to -o option\n")); + usage(B_FALSE); + } + cb.cb_columns[0] = GET_COL_NAME; + cb.cb_columns[1] = GET_COL_PROPERTY; + cb.cb_columns[2] = GET_COL_VALUE; + cb.cb_columns[3] = GET_COL_SOURCE; + i = ZFS_GET_NCOLS; + break; + default: + (void) fprintf(stderr, + gettext("invalid column name " + "'%s'\n"), suboptarg); + usage(B_FALSE); + } + } + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + + argc -= optind; + argv += optind; + + if (argc < 1) { + (void) fprintf(stderr, gettext("missing property " + "argument\n")); + usage(B_FALSE); + } + + if (zprop_get_list(g_zfs, argv[0], &cb.cb_proplist, ZFS_TYPE_POOL) != 0) usage(B_FALSE); + argc--; + argv++; + if (cb.cb_proplist != NULL) { fake_name.pl_prop = ZPOOL_PROP_NAME; fake_name.pl_width = strlen(gettext("NAME")); @@ -4140,7 +5466,7 @@ zpool_do_get(int argc, char **argv) cb.cb_proplist = &fake_name; } - ret = for_each_pool(argc - 2, argv + 2, B_TRUE, &cb.cb_proplist, + ret = for_each_pool(argc, argv, B_TRUE, &cb.cb_proplist, get_callback, &cb); if (cb.cb_proplist == &fake_name) @@ -4236,7 +5562,7 @@ find_command_idx(char *command, int *idx int main(int argc, char **argv) { - int ret; + int ret = 0; int i; char *cmdname; @@ -4269,8 +5595,7 @@ main(int argc, char **argv) if (strcmp(cmdname, "-?") == 0) usage(B_TRUE); - zpool_set_history_str("zpool", argc, argv, history_str); - verify(zpool_stage_history(g_zfs, history_str) == 0); + zfs_save_arguments(argc, argv, history_str, sizeof (history_str)); /* * Run the appropriate command. @@ -4287,16 +5612,18 @@ main(int argc, char **argv) * 'freeze' is a vile debugging abomination, so we treat * it as such. */ - char buf[16384]; - int fd = open(ZFS_DEV, O_RDWR); - (void) strcpy((void *)buf, argv[2]); - return (!!ioctl(fd, ZFS_IOC_POOL_FREEZE, buf)); + zfs_cmd_t zc = { 0 }; + (void) strlcpy(zc.zc_name, argv[2], sizeof (zc.zc_name)); + return (!!zfs_ioctl(g_zfs, ZFS_IOC_POOL_FREEZE, &zc)); } else { (void) fprintf(stderr, gettext("unrecognized " "command '%s'\n"), cmdname); usage(B_FALSE); } + if (ret == 0 && log_history) + (void) zpool_log_history(g_zfs, history_str); + libzfs_fini(g_zfs); /* Index: src/external/cddl/osnet/dist/cmd/zpool/zpool_util.h =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dist/cmd/zpool/zpool_util.h,v retrieving revision 1.1.1.2 diff -u -p -r1.1.1.2 zpool_util.h --- src/external/cddl/osnet/dist/cmd/zpool/zpool_util.h 27 Feb 2010 22:29:23 -0000 1.1.1.2 +++ src/external/cddl/osnet/dist/cmd/zpool/zpool_util.h 12 Jun 2012 05:55:36 -0000 @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ #ifndef ZPOOL_UTIL_H @@ -45,7 +44,7 @@ uint_t num_logs(nvlist_t *nv); */ nvlist_t *make_root_vdev(zpool_handle_t *zhp, int force, int check_rep, - boolean_t isreplace, boolean_t dryrun, int argc, char **argv); + boolean_t replacing, boolean_t dryrun, int argc, char **argv); nvlist_t *split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props, splitflags_t flags, int argc, char **argv); Index: src/external/cddl/osnet/dist/cmd/zpool/zpool_vdev.c =================================================================== RCS file: /home/chs/netbsd/cvs/src/external/cddl/osnet/dist/cmd/zpool/zpool_vdev.c,v retrieving revision