Add -p switch to "zpool get"
[zfs.git] / module / zfs / fm.c
index 3cc979d..c004032 100644 (file)
@@ -19,8 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
 
 #include <sys/types.h>
 #include <sys/time.h>
-#include <sys/sysevent.h>
-#include <sys/sysevent_impl.h>
+#include <sys/list.h>
 #include <sys/nvpair.h>
 #include <sys/cmn_err.h>
-#include <sys/cpuvar.h>
 #include <sys/sysmacros.h>
-#include <sys/systm.h>
-#include <sys/ddifm.h>
-#include <sys/ddifm_impl.h>
-#include <sys/spl.h>
-#include <sys/dumphdr.h>
 #include <sys/compress.h>
-#include <sys/cpuvar.h>
-#include <sys/console.h>
-#include <sys/panic.h>
-#include <sys/kobj.h>
 #include <sys/sunddi.h>
 #include <sys/systeminfo.h>
-#include <sys/sysevent/eventdefs.h>
 #include <sys/fm/util.h>
 #include <sys/fm/protocol.h>
+#include <sys/kstat.h>
+#include <sys/zfs_context.h>
+#ifdef _KERNEL
+#include <sys/atomic.h>
+#include <sys/condvar.h>
+#include <sys/cpuvar.h>
+#include <sys/systm.h>
+#include <sys/dumphdr.h>
+#include <sys/cpuvar.h>
+#include <sys/console.h>
+#include <sys/kobj.h>
+#include <sys/time.h>
+#include <sys/zfs_ioctl.h>
 
-/*
- * URL and SUNW-MSG-ID value to display for fm_panic(), defined below.  These
- * values must be kept in sync with the FMA source code in usr/src/cmd/fm.
- */
-static const char *fm_url = "http://www.sun.com/msg";
-static const char *fm_msgid = "SUNOS-8000-0G";
-static char *volatile fm_panicstr = NULL;
+int zfs_zevent_len_max = 0;
+int zfs_zevent_cols = 80;
+int zfs_zevent_console = 0;
 
-errorq_t *ereport_errorq;
-void *ereport_dumpbuf;
-size_t ereport_dumplen;
+static int zevent_len_cur = 0;
+static int zevent_waiters = 0;
+static int zevent_flags = 0;
 
-static uint_t ereport_chanlen = ERPT_EVCH_MAX;
-static evchan_t *ereport_chan = NULL;
-static ulong_t ereport_qlen = 0;
-static size_t ereport_size = 0;
-static int ereport_cols = 80;
+static kmutex_t zevent_lock;
+static list_t zevent_list;
+static kcondvar_t zevent_cv;
+#endif /* _KERNEL */
+
+extern void fastreboot_disable_highpil(void);
 
 /*
- * Common fault management kstats to record ereport generation
- * failures
+ * Common fault management kstats to record event generation failures
  */
 
 struct erpt_kstat {
@@ -113,57 +109,9 @@ static struct erpt_kstat erpt_kstat_data = {
        { "payload-set-failed", KSTAT_DATA_UINT64 }
 };
 
-/*ARGSUSED*/
-static void
-fm_drain(void *private, void *data, errorq_elem_t *eep)
-{
-       nvlist_t *nvl = errorq_elem_nvl(ereport_errorq, eep);
-
-       if (!panicstr)
-               (void) fm_ereport_post(nvl, EVCH_TRYHARD);
-       else
-               fm_nvprint(nvl);
-}
-
-void
-fm_init(void)
-{
-       kstat_t *ksp;
-
-       (void) sysevent_evc_bind(FM_ERROR_CHAN,
-           &ereport_chan, EVCH_CREAT | EVCH_HOLD_PEND);
-
-       (void) sysevent_evc_control(ereport_chan,
-           EVCH_SET_CHAN_LEN, &ereport_chanlen);
-
-       if (ereport_qlen == 0)
-               ereport_qlen = ERPT_MAX_ERRS * MAX(max_ncpus, 4);
-
-       if (ereport_size == 0)
-               ereport_size = ERPT_DATA_SZ;
-
-       ereport_errorq = errorq_nvcreate("fm_ereport_queue",
-           (errorq_func_t)fm_drain, NULL, ereport_qlen, ereport_size,
-           FM_ERR_PIL, ERRORQ_VITAL);
-       if (ereport_errorq == NULL)
-               panic("failed to create required ereport error queue");
-
-       ereport_dumpbuf = kmem_alloc(ereport_size, KM_SLEEP);
-       ereport_dumplen = ereport_size;
-
-       /* Initialize ereport allocation and generation kstats */
-       ksp = kstat_create("unix", 0, "fm", "misc", KSTAT_TYPE_NAMED,
-           sizeof (struct erpt_kstat) / sizeof (kstat_named_t),
-           KSTAT_FLAG_VIRTUAL);
-
-       if (ksp != NULL) {
-               ksp->ks_data = &erpt_kstat_data;
-               kstat_install(ksp);
-       } else {
-               cmn_err(CE_NOTE, "failed to create fm/misc kstat\n");
+kstat_t *fm_ksp;
 
-       }
-}
+#ifdef _KERNEL
 
 /*
  * Formatting utility function for fm_nvprintr.  We attempt to wrap chunks of
@@ -182,7 +130,7 @@ fm_printf(int depth, int c, int cols, const char *format, ...)
        va_end(ap);
 
        if (c + width >= cols) {
-               console_printf("\n\r");
+               console_printf("\n");
                c = 0;
                if (format[0] != ' ' && depth > 0) {
                        console_printf(" ");
@@ -244,54 +192,54 @@ fm_nvprintr(nvlist_t *nvl, int d, int c, int cols)
 
                case DATA_TYPE_BYTE:
                        (void) nvpair_value_byte(nvp, &i8);
-                       c = fm_printf(d + 1, c, cols, "%x", i8);
+                       c = fm_printf(d + 1, c, cols, "0x%x", i8);
                        break;
 
                case DATA_TYPE_INT8:
                        (void) nvpair_value_int8(nvp, (void *)&i8);
-                       c = fm_printf(d + 1, c, cols, "%x", i8);
+                       c = fm_printf(d + 1, c, cols, "0x%x", i8);
                        break;
 
                case DATA_TYPE_UINT8:
                        (void) nvpair_value_uint8(nvp, &i8);
-                       c = fm_printf(d + 1, c, cols, "%x", i8);
+                       c = fm_printf(d + 1, c, cols, "0x%x", i8);
                        break;
 
                case DATA_TYPE_INT16:
                        (void) nvpair_value_int16(nvp, (void *)&i16);
-                       c = fm_printf(d + 1, c, cols, "%x", i16);
+                       c = fm_printf(d + 1, c, cols, "0x%x", i16);
                        break;
 
                case DATA_TYPE_UINT16:
                        (void) nvpair_value_uint16(nvp, &i16);
-                       c = fm_printf(d + 1, c, cols, "%x", i16);
+                       c = fm_printf(d + 1, c, cols, "0x%x", i16);
                        break;
 
                case DATA_TYPE_INT32:
                        (void) nvpair_value_int32(nvp, (void *)&i32);
-                       c = fm_printf(d + 1, c, cols, "%x", i32);
+                       c = fm_printf(d + 1, c, cols, "0x%x", i32);
                        break;
 
                case DATA_TYPE_UINT32:
                        (void) nvpair_value_uint32(nvp, &i32);
-                       c = fm_printf(d + 1, c, cols, "%x", i32);
+                       c = fm_printf(d + 1, c, cols, "0x%x", i32);
                        break;
 
                case DATA_TYPE_INT64:
                        (void) nvpair_value_int64(nvp, (void *)&i64);
-                       c = fm_printf(d + 1, c, cols, "%llx",
+                       c = fm_printf(d + 1, c, cols, "0x%llx",
                            (u_longlong_t)i64);
                        break;
 
                case DATA_TYPE_UINT64:
                        (void) nvpair_value_uint64(nvp, &i64);
-                       c = fm_printf(d + 1, c, cols, "%llx",
+                       c = fm_printf(d + 1, c, cols, "0x%llx",
                            (u_longlong_t)i64);
                        break;
 
                case DATA_TYPE_HRTIME:
                        (void) nvpair_value_hrtime(nvp, (void *)&i64);
-                       c = fm_printf(d + 1, c, cols, "%llx",
+                       c = fm_printf(d + 1, c, cols, "0x%llx",
                            (u_longlong_t)i64);
                        break;
 
@@ -321,19 +269,124 @@ fm_nvprintr(nvlist_t *nvl, int d, int c, int cols)
                        }
                        break;
 
+               case DATA_TYPE_INT8_ARRAY: {
+                       int8_t *val;
+                       uint_t i, nelem;
+
+                       c = fm_printf(d + 1, c, cols, "[ ");
+                       (void) nvpair_value_int8_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               c = fm_printf(d + 1, c, cols, "0x%llx ",
+                                             (u_longlong_t)val[i]);
+
+                       c = fm_printf(d + 1, c, cols, "]");
+                       break;
+                       }
+
+               case DATA_TYPE_UINT8_ARRAY: {
+                       uint8_t *val;
+                       uint_t i, nelem;
+
+                       c = fm_printf(d + 1, c, cols, "[ ");
+                       (void) nvpair_value_uint8_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               c = fm_printf(d + 1, c, cols, "0x%llx ",
+                                             (u_longlong_t)val[i]);
+
+                       c = fm_printf(d + 1, c, cols, "]");
+                       break;
+                       }
+
+               case DATA_TYPE_INT16_ARRAY: {
+                       int16_t *val;
+                       uint_t i, nelem;
+
+                       c = fm_printf(d + 1, c, cols, "[ ");
+                       (void) nvpair_value_int16_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               c = fm_printf(d + 1, c, cols, "0x%llx ",
+                                             (u_longlong_t)val[i]);
+
+                       c = fm_printf(d + 1, c, cols, "]");
+                       break;
+                       }
+
+               case DATA_TYPE_UINT16_ARRAY: {
+                       uint16_t *val;
+                       uint_t i, nelem;
+
+                       c = fm_printf(d + 1, c, cols, "[ ");
+                       (void) nvpair_value_uint16_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               c = fm_printf(d + 1, c, cols, "0x%llx ",
+                                             (u_longlong_t)val[i]);
+
+                       c = fm_printf(d + 1, c, cols, "]");
+                       break;
+                       }
+
+               case DATA_TYPE_INT32_ARRAY: {
+                       int32_t *val;
+                       uint_t i, nelem;
+
+                       c = fm_printf(d + 1, c, cols, "[ ");
+                       (void) nvpair_value_int32_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               c = fm_printf(d + 1, c, cols, "0x%llx ",
+                                             (u_longlong_t)val[i]);
+
+                       c = fm_printf(d + 1, c, cols, "]");
+                       break;
+                       }
+
+               case DATA_TYPE_UINT32_ARRAY: {
+                       uint32_t *val;
+                       uint_t i, nelem;
+
+                       c = fm_printf(d + 1, c, cols, "[ ");
+                       (void) nvpair_value_uint32_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               c = fm_printf(d + 1, c, cols, "0x%llx ",
+                                             (u_longlong_t)val[i]);
+
+                       c = fm_printf(d + 1, c, cols, "]");
+                       break;
+                       }
+
+               case DATA_TYPE_INT64_ARRAY: {
+                       int64_t *val;
+                       uint_t i, nelem;
+
+                       c = fm_printf(d + 1, c, cols, "[ ");
+                       (void) nvpair_value_int64_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               c = fm_printf(d + 1, c, cols, "0x%llx ",
+                                             (u_longlong_t)val[i]);
+
+                       c = fm_printf(d + 1, c, cols, "]");
+                       break;
+                       }
+
+               case DATA_TYPE_UINT64_ARRAY: {
+                       uint64_t *val;
+                       uint_t i, nelem;
+
+                       c = fm_printf(d + 1, c, cols, "[ ");
+                       (void) nvpair_value_uint64_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++)
+                               c = fm_printf(d + 1, c, cols, "0x%llx ",
+                                             (u_longlong_t)val[i]);
+
+                       c = fm_printf(d + 1, c, cols, "]");
+                       break;
+                       }
+
+               case DATA_TYPE_STRING_ARRAY:
                case DATA_TYPE_BOOLEAN_ARRAY:
                case DATA_TYPE_BYTE_ARRAY:
-               case DATA_TYPE_INT8_ARRAY:
-               case DATA_TYPE_UINT8_ARRAY:
-               case DATA_TYPE_INT16_ARRAY:
-               case DATA_TYPE_UINT16_ARRAY:
-               case DATA_TYPE_INT32_ARRAY:
-               case DATA_TYPE_UINT32_ARRAY:
-               case DATA_TYPE_INT64_ARRAY:
-               case DATA_TYPE_UINT64_ARRAY:
-               case DATA_TYPE_STRING_ARRAY:
                        c = fm_printf(d + 1, c, cols, "[...]");
                        break;
+
                case DATA_TYPE_UNKNOWN:
                        c = fm_printf(d + 1, c, cols, "<unknown>");
                        break;
@@ -349,174 +402,270 @@ fm_nvprint(nvlist_t *nvl)
        char *class;
        int c = 0;
 
-       console_printf("\r");
+       console_printf("\n");
 
        if (nvlist_lookup_string(nvl, FM_CLASS, &class) == 0)
-               c = fm_printf(0, c, ereport_cols, "%s", class);
+               c = fm_printf(0, c, zfs_zevent_cols, "%s", class);
 
-       if (fm_nvprintr(nvl, 0, c, ereport_cols) != 0)
+       if (fm_nvprintr(nvl, 0, c, zfs_zevent_cols) != 0)
                console_printf("\n");
 
        console_printf("\n");
 }
 
+static zevent_t *
+zfs_zevent_alloc(void)
+{
+       zevent_t *ev;
+
+       ev = kmem_zalloc(sizeof(zevent_t), KM_PUSHPAGE);
+       if (ev == NULL)
+               return NULL;
+
+       list_create(&ev->ev_ze_list, sizeof(zfs_zevent_t),
+                   offsetof(zfs_zevent_t, ze_node));
+       list_link_init(&ev->ev_node);
+
+       return ev;
+}
+
+static void
+zfs_zevent_free(zevent_t *ev)
+{
+       /* Run provided cleanup callback */
+       ev->ev_cb(ev->ev_nvl, ev->ev_detector);
+
+       list_destroy(&ev->ev_ze_list);
+       kmem_free(ev, sizeof(zevent_t));
+}
+
+static void
+zfs_zevent_drain(zevent_t *ev)
+{
+       zfs_zevent_t *ze;
+
+       ASSERT(MUTEX_HELD(&zevent_lock));
+       list_remove(&zevent_list, ev);
+
+       /* Remove references to this event in all private file data */
+       while ((ze = list_head(&ev->ev_ze_list)) != NULL) {
+               list_remove(&ev->ev_ze_list, ze);
+               ze->ze_zevent = NULL;
+               ze->ze_dropped++;
+       }
+
+       zfs_zevent_free(ev);
+}
+
+void
+zfs_zevent_drain_all(int *count)
+{
+       zevent_t *ev;
+
+       mutex_enter(&zevent_lock);
+       while ((ev = list_head(&zevent_list)) != NULL)
+               zfs_zevent_drain(ev);
+
+       *count = zevent_len_cur;
+       zevent_len_cur = 0;
+       mutex_exit(&zevent_lock);
+}
+
 /*
- * Wrapper for panic() that first produces an FMA-style message for admins.
- * Normally such messages are generated by fmd(1M)'s syslog-msgs agent: this
- * is the one exception to that rule and the only error that gets messaged.
- * This function is intended for use by subsystems that have detected a fatal
- * error and enqueued appropriate ereports and wish to then force a panic.
+ * New zevents are inserted at the head.  If the maximum queue
+ * length is exceeded a zevent will be drained from the tail.
+ * As part of this any user space processes which currently have
+ * a reference to this zevent_t in their private data will have
+ * this reference set to NULL.
  */
-/*PRINTFLIKE1*/
-void
-fm_panic(const char *format, ...)
+static void
+zfs_zevent_insert(zevent_t *ev)
 {
-       va_list ap;
+       ASSERT(MUTEX_HELD(&zevent_lock));
+       list_insert_head(&zevent_list, ev);
 
-       (void) casptr((void *)&fm_panicstr, NULL, (void *)format);
-       va_start(ap, format);
-       vpanic(format, ap);
-       va_end(ap);
+       if (zevent_len_cur >= zfs_zevent_len_max)
+               zfs_zevent_drain(list_tail(&zevent_list));
+       else
+               zevent_len_cur++;
 }
 
 /*
- * Print any appropriate FMA banner message before the panic message.  This
- * function is called by panicsys() and prints the message for fm_panic().
- * We print the message here so that it comes after the system is quiesced.
- * A one-line summary is recorded in the log only (cmn_err(9F) with "!" prefix).
- * The rest of the message is for the console only and not needed in the log,
- * so it is printed using console_printf().  We break it up into multiple
- * chunks so as to avoid overflowing any small legacy prom_printf() buffers.
+ * Post a zevent
  */
 void
-fm_banner(void)
+zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb)
 {
-       timespec_t tod;
-       hrtime_t now;
+       int64_t tv_array[2];
+       timestruc_t tv;
+       size_t nvl_size = 0;
+       zevent_t *ev;
 
-       if (!fm_panicstr)
-               return; /* panic was not initiated by fm_panic(); do nothing */
+       gethrestime(&tv);
+       tv_array[0] = tv.tv_sec;
+       tv_array[1] = tv.tv_nsec;
+       if (nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2)) {
+               atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
+               return;
+       }
 
-       if (panicstr) {
-               tod = panic_hrestime;
-               now = panic_hrtime;
-       } else {
-               gethrestime(&tod);
-               now = gethrtime_waitfree();
+       (void) nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE);
+       if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {
+               atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
+               return;
        }
 
-       cmn_err(CE_NOTE, "!SUNW-MSG-ID: %s, "
-           "TYPE: Error, VER: 1, SEVERITY: Major\n", fm_msgid);
+       if (zfs_zevent_console)
+               fm_nvprint(nvl);
 
-       console_printf(
-"\n\rSUNW-MSG-ID: %s, TYPE: Error, VER: 1, SEVERITY: Major\n"
-"EVENT-TIME: 0x%lx.0x%lx (0x%llx)\n",
-           fm_msgid, tod.tv_sec, tod.tv_nsec, (u_longlong_t)now);
+       ev = zfs_zevent_alloc();
+       if (ev == NULL) {
+               atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
+               return;
+       }
 
-       console_printf(
-"PLATFORM: %s, CSN: -, HOSTNAME: %s\n"
-"SOURCE: %s, REV: %s %s\n",
-           platform, utsname.nodename, utsname.sysname,
-           utsname.release, utsname.version);
+        ev->ev_nvl = nvl;
+       ev->ev_detector = detector;
+       ev->ev_cb = cb;
 
-       console_printf(
-"DESC: Errors have been detected that require a reboot to ensure system\n"
-"integrity.  See %s/%s for more information.\n",
-           fm_url, fm_msgid);
+       mutex_enter(&zevent_lock);
+       zfs_zevent_insert(ev);
+       cv_broadcast(&zevent_cv);
+       mutex_exit(&zevent_lock);
+}
 
-       console_printf(
-"AUTO-RESPONSE: Solaris will attempt to save and diagnose the error telemetry\n"
-"IMPACT: The system will sync files, save a crash dump if needed, and reboot\n"
-"REC-ACTION: Save the error summary below in case telemetry cannot be saved\n");
+static int
+zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)
+{
+       *ze = zfsdev_get_state(minor, ZST_ZEVENT);
+       if (*ze == NULL)
+               return (EBADF);
 
-       console_printf("\n");
+       return (0);
+}
+
+int
+zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze)
+{
+       file_t *fp;
+       int error;
+
+        fp = getf(fd);
+        if (fp == NULL)
+                return (EBADF);
+
+        *minorp = zfsdev_getminor(fp->f_file);
+        error = zfs_zevent_minor_to_state(*minorp, ze);
+
+       if (error)
+               zfs_zevent_fd_rele(fd);
+
+       return (error);
+}
+
+void
+zfs_zevent_fd_rele(int fd)
+{
+       releasef(fd);
 }
 
 /*
- * Utility function to write all of the pending ereports to the dump device.
- * This function is called at either normal reboot or panic time, and simply
- * iterates over the in-transit messages in the ereport sysevent channel.
+ * Get the next zevent in the stream and place a copy in 'event'.  This
+ * may fail with ENOMEM if the encoded nvlist size exceeds the passed
+ * 'event_size'.  In this case the stream pointer is not advanced and
+ * and 'event_size' is set to the minimum required buffer size.
  */
-void
-fm_ereport_dump(void)
+int
+zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size,
+                uint64_t *dropped)
 {
-       evchanq_t *chq;
-       sysevent_t *sep;
-       erpt_dump_t ed;
-
-       timespec_t tod;
-       hrtime_t now;
-       char *buf;
-       size_t len;
-
-       if (panicstr) {
-               tod = panic_hrestime;
-               now = panic_hrtime;
+       zevent_t *ev;
+       size_t size;
+       int error = 0;
+
+       mutex_enter(&zevent_lock);
+       if (ze->ze_zevent == NULL) {
+               /* New stream start at the beginning/tail */
+               ev = list_tail(&zevent_list);
+               if (ev == NULL) {
+                       error = ENOENT;
+                       goto out;
+               }
        } else {
-               if (ereport_errorq != NULL)
-                       errorq_drain(ereport_errorq);
-               gethrestime(&tod);
-               now = gethrtime_waitfree();
+               /* Existing stream continue with the next element and remove
+                * ourselves from the wait queue for the previous element */
+               ev = list_prev(&zevent_list, ze->ze_zevent);
+               if (ev == NULL) {
+                       error = ENOENT;
+                       goto out;
+               }
        }
 
-       /*
-        * In the panic case, sysevent_evc_walk_init() will return NULL.
-        */
-       if ((chq = sysevent_evc_walk_init(ereport_chan, NULL)) == NULL &&
-           !panicstr)
-               return; /* event channel isn't initialized yet */
+       VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0);
+       if (size > *event_size) {
+               *event_size = size;
+               error = ENOMEM;
+               goto out;
+       }
 
-       while ((sep = sysevent_evc_walk_step(chq)) != NULL) {
-               if ((buf = sysevent_evc_event_attr(sep, &len)) == NULL)
-                       break;
+       if (ze->ze_zevent)
+               list_remove(&ze->ze_zevent->ev_ze_list, ze);
+
+       ze->ze_zevent = ev;
+       list_insert_head(&ev->ev_ze_list, ze);
+       nvlist_dup(ev->ev_nvl, event, KM_SLEEP);
+       *dropped = ze->ze_dropped;
+       ze->ze_dropped = 0;
+out:
+       mutex_exit(&zevent_lock);
+
+       return error;
+}
 
-               ed.ed_magic = ERPT_MAGIC;
-               ed.ed_chksum = checksum32(buf, len);
-               ed.ed_size = (uint32_t)len;
-               ed.ed_pad = 0;
-               ed.ed_hrt_nsec = SE_TIME(sep);
-               ed.ed_hrt_base = now;
-               ed.ed_tod_base.sec = tod.tv_sec;
-               ed.ed_tod_base.nsec = tod.tv_nsec;
-
-               dumpvp_write(&ed, sizeof (ed));
-               dumpvp_write(buf, len);
+int
+zfs_zevent_wait(zfs_zevent_t *ze)
+{
+       int error = 0;
+
+       mutex_enter(&zevent_lock);
+
+       if (zevent_flags & ZEVENT_SHUTDOWN) {
+               error = ESHUTDOWN;
+               goto out;
        }
 
-       sysevent_evc_walk_fini(chq);
+       zevent_waiters++;
+       cv_wait_interruptible(&zevent_cv, &zevent_lock);
+       if (issig(JUSTLOOKING))
+               error = EINTR;
+
+       zevent_waiters--;
+out:
+       mutex_exit(&zevent_lock);
+
+       return error;
 }
 
-/*
- * Post an error report (ereport) to the sysevent error channel.  The error
- * channel must be established with a prior call to sysevent_evc_create()
- * before publication may occur.
- */
 void
-fm_ereport_post(nvlist_t *ereport, int evc_flag)
+zfs_zevent_init(zfs_zevent_t **zep)
 {
-       size_t nvl_size = 0;
-       evchan_t *error_chan;
+       zfs_zevent_t *ze;
 
-       (void) nvlist_size(ereport, &nvl_size, NV_ENCODE_NATIVE);
-       if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {
-               atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
-               return;
-       }
+       ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP);
+       list_link_init(&ze->ze_node);
+}
 
-       if (sysevent_evc_bind(FM_ERROR_CHAN, &error_chan,
-           EVCH_CREAT|EVCH_HOLD_PEND) != 0) {
-               atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
-               return;
-       }
+void
+zfs_zevent_destroy(zfs_zevent_t *ze)
+{
+       mutex_enter(&zevent_lock);
+       if (ze->ze_zevent)
+               list_remove(&ze->ze_zevent->ev_ze_list, ze);
+       mutex_exit(&zevent_lock);
 
-       if (sysevent_evc_publish(error_chan, EC_FM, ESC_FM_ERROR,
-           SUNW_VENDOR, FM_PUB, ereport, evc_flag) != 0) {
-               atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
-               sysevent_evc_unbind(error_chan);
-               return;
-       }
-       sysevent_evc_unbind(error_chan);
+       kmem_free(ze, sizeof (zfs_zevent_t));
 }
+#endif /* _KERNEL */
 
 /*
  * Wrapppers for FM nvlist allocators
@@ -525,7 +674,7 @@ fm_ereport_post(nvlist_t *ereport, int evc_flag)
 static void *
 i_fm_alloc(nv_alloc_t *nva, size_t size)
 {
-       return (kmem_zalloc(size, KM_SLEEP));
+       return (kmem_zalloc(size, KM_PUSHPAGE));
 }
 
 /* ARGSUSED */
@@ -593,7 +742,7 @@ fm_nvlist_create(nv_alloc_t *nva)
        nv_alloc_t *nvhdl;
 
        if (nva == NULL) {
-               nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
+               nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_PUSHPAGE);
 
                if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) {
                        kmem_free(nvhdl, sizeof (nv_alloc_t));
@@ -606,8 +755,8 @@ fm_nvlist_create(nv_alloc_t *nva)
 
        if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) {
                if (hdl_alloced) {
-                       kmem_free(nvhdl, sizeof (nv_alloc_t));
                        nv_alloc_fini(nvhdl);
+                       kmem_free(nvhdl, sizeof (nv_alloc_t));
                }
                return (NULL);
        }
@@ -788,6 +937,14 @@ fm_payload_set(nvlist_t *payload, ...)
  *     detector                nvlist_t        <detector>
  *     ereport-payload         nvlist_t        <var args>
  *
+ * We don't actually add a 'version' member to the payload.  Really,
+ * the version quoted to us by our caller is that of the category 1
+ * "ereport" event class (and we require FM_EREPORT_VERS0) but
+ * the payload version of the actual leaf class event under construction
+ * may be something else.  Callers should supply a version in the varargs,
+ * or (better) we could take two version arguments - one for the
+ * ereport category 1 classification (expect FM_EREPORT_VERS0) and one
+ * for the leaf class.
  */
 void
 fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class,
@@ -912,6 +1069,105 @@ fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,
        }
 }
 
+void
+fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
+    nvlist_t *snvl, nvlist_t *bboard, int npairs, ...)
+{
+       nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
+       nvlist_t *pairs[HC_MAXPAIRS];
+       nvlist_t **hcl;
+       uint_t n;
+       int i, j;
+       va_list ap;
+       char *hcname, *hcid;
+
+       if (!fm_fmri_hc_set_common(fmri, version, auth))
+               return;
+
+       /*
+        * copy the bboard nvpairs to the pairs array
+        */
+       if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n)
+           != 0) {
+               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               return;
+       }
+
+       for (i = 0; i < n; i++) {
+               if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME,
+                   &hcname) != 0) {
+                       atomic_add_64(
+                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+                       return;
+               }
+               if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) {
+                       atomic_add_64(
+                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+                       return;
+               }
+
+               pairs[i] = fm_nvlist_create(nva);
+               if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 ||
+                   nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) {
+                       for (j = 0; j <= i; j++) {
+                               if (pairs[j] != NULL)
+                                       fm_nvlist_destroy(pairs[j],
+                                           FM_NVA_RETAIN);
+                       }
+                       atomic_add_64(
+                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+                       return;
+               }
+       }
+
+       /*
+        * create the pairs from passed in pairs
+        */
+       npairs = MIN(npairs, HC_MAXPAIRS);
+
+       va_start(ap, npairs);
+       for (i = n; i < npairs + n; i++) {
+               const char *name = va_arg(ap, const char *);
+               uint32_t id = va_arg(ap, uint32_t);
+               char idstr[11];
+               (void) snprintf(idstr, sizeof (idstr), "%u", id);
+               pairs[i] = fm_nvlist_create(nva);
+               if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
+                   nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
+                       for (j = 0; j <= i; j++) {
+                               if (pairs[j] != NULL)
+                                       fm_nvlist_destroy(pairs[j],
+                                           FM_NVA_RETAIN);
+                       }
+                       atomic_add_64(
+                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+                       return;
+               }
+       }
+       va_end(ap);
+
+       /*
+        * Create the fmri hc list
+        */
+       if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs,
+           npairs + n) != 0) {
+               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               return;
+       }
+
+       for (i = 0; i < npairs + n; i++) {
+                       fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
+       }
+
+       if (snvl != NULL) {
+               if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
+                       atomic_add_64(
+                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+                       return;
+               }
+       }
+}
+
 /*
  * Set-up and validate the members of an dev fmri according to:
  *
@@ -920,46 +1176,41 @@ fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,
  *     version                 uint8_t         0
  *     auth                    nvlist_t        <auth>
  *     devpath                 string          <devpath>
- *     devid                   string          <devid>
+ *     [devid]                 string          <devid>
+ *     [target-port-l0id]      string          <target-port-lun0-id>
  *
  * Note that auth and devid are optional members.
  */
 void
 fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth,
-    const char *devpath, const char *devid)
+    const char *devpath, const char *devid, const char *tpl0)
 {
-       if (version != DEV_SCHEME_VERSION0) {
-               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
-               return;
-       }
+       int err = 0;
 
-       if (nvlist_add_uint8(fmri_dev, FM_VERSION, version) != 0) {
+       if (version != DEV_SCHEME_VERSION0) {
                atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
                return;
        }
 
-       if (nvlist_add_string(fmri_dev, FM_FMRI_SCHEME,
-           FM_FMRI_SCHEME_DEV) != 0) {
-               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
-               return;
-       }
+       err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version);
+       err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV);
 
        if (auth != NULL) {
-               if (nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY,
-                   (nvlist_t *)auth) != 0) {
-                       atomic_add_64(
-                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
-               }
+               err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY,
+                   (nvlist_t *)auth);
        }
 
-       if (nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath) != 0) {
-               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
-       }
+       err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath);
 
        if (devid != NULL)
-               if (nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid) != 0)
-                       atomic_add_64(
-                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid);
+
+       if (tpl0 != NULL)
+               err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0);
+
+       if (err)
+               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+
 }
 
 /*
@@ -1146,7 +1397,7 @@ fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)
                        ena = (uint64_t)((format & ENA_FORMAT_MASK) |
                            ((cpuid << ENA_FMT1_CPUID_SHFT) &
                            ENA_FMT1_CPUID_MASK) |
-                           ((gethrtime_waitfree() << ENA_FMT1_TIME_SHFT) &
+                           ((gethrtime() << ENA_FMT1_TIME_SHFT) &
                            ENA_FMT1_TIME_MASK));
                }
                break;
@@ -1164,7 +1415,13 @@ fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)
 uint64_t
 fm_ena_generate(uint64_t timestamp, uchar_t format)
 {
-       return (fm_ena_generate_cpu(timestamp, CPU->cpu_id, format));
+       uint64_t ena;
+
+       kpreempt_disable();
+       ena = fm_ena_generate_cpu(timestamp, getcpuid(), format);
+       kpreempt_enable();
+
+       return (ena);
 }
 
 uint64_t
@@ -1232,35 +1489,68 @@ fm_ena_time_get(uint64_t ena)
        return (time);
 }
 
-/*
- * Convert a getpcstack() trace to symbolic name+offset, and add the resulting
- * string array to a Fault Management ereport as FM_EREPORT_PAYLOAD_NAME_STACK.
- */
+#ifdef _KERNEL
 void
-fm_payload_stack_add(nvlist_t *payload, const pc_t *stack, int depth)
+fm_init(void)
 {
-       int i;
-       char *sym;
-       ulong_t off;
-       char *stkpp[FM_STK_DEPTH];
-       char buf[FM_STK_DEPTH * FM_SYM_SZ];
-       char *stkp = buf;
-
-       for (i = 0; i < depth && i != FM_STK_DEPTH; i++, stkp += FM_SYM_SZ) {
-               if ((sym = kobj_getsymname(stack[i], &off)) != NULL)
-                       (void) snprintf(stkp, FM_SYM_SZ, "%s+%lx", sym, off);
-               else
-                       (void) snprintf(stkp, FM_SYM_SZ, "%lx", (long)stack[i]);
-               stkpp[i] = stkp;
+       zevent_len_cur = 0;
+       zevent_flags = 0;
+
+       if (zfs_zevent_len_max == 0)
+               zfs_zevent_len_max = ERPT_MAX_ERRS * MAX(max_ncpus, 4);
+
+       /* Initialize zevent allocation and generation kstats */
+       fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED,
+           sizeof (struct erpt_kstat) / sizeof (kstat_named_t),
+           KSTAT_FLAG_VIRTUAL);
+
+       if (fm_ksp != NULL) {
+               fm_ksp->ks_data = &erpt_kstat_data;
+               kstat_install(fm_ksp);
+       } else {
+               cmn_err(CE_NOTE, "failed to create fm/misc kstat\n");
        }
 
-       fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_STACK,
-           DATA_TYPE_STRING_ARRAY, depth, stkpp, NULL);
+       mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL);
+       list_create(&zevent_list, sizeof(zevent_t), offsetof(zevent_t, ev_node));
+       cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL);
 }
 
 void
-print_msg_hwerr(ctid_t ct_id, proc_t *p)
+fm_fini(void)
 {
-       uprintf("Killed process %d (%s) in contract id %d "
-           "due to hardware error\n", p->p_pid, p->p_user.u_comm, ct_id);
+       int count;
+
+       zfs_zevent_drain_all(&count);
+
+       mutex_enter(&zevent_lock);
+       cv_broadcast(&zevent_cv);
+
+       zevent_flags |= ZEVENT_SHUTDOWN;
+       while (zevent_waiters > 0) {
+               mutex_exit(&zevent_lock);
+               schedule();
+               mutex_enter(&zevent_lock);
+       }
+       mutex_exit(&zevent_lock);
+
+       cv_destroy(&zevent_cv);
+       list_destroy(&zevent_list);
+       mutex_destroy(&zevent_lock);
+
+       if (fm_ksp != NULL) {
+               kstat_delete(fm_ksp);
+               fm_ksp = NULL;
+       }
 }
+
+module_param(zfs_zevent_len_max, int, 0644);
+MODULE_PARM_DESC(zfs_zevent_len_max, "Max event queue length");
+
+module_param(zfs_zevent_cols, int, 0644);
+MODULE_PARM_DESC(zfs_zevent_cols, "Max event column width");
+
+module_param(zfs_zevent_console, int, 0644);
+MODULE_PARM_DESC(zfs_zevent_console, "Log events to the console");
+
+#endif /* _KERNEL */