4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
26 * Fault Management Architecture (FMA) Resource and Protocol Support
28 * The routines contained herein provide services to support kernel subsystems
29 * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089).
31 * Name-Value Pair Lists
33 * The embodiment of an FMA protocol element (event, fmri or authority) is a
34 * name-value pair list (nvlist_t). FMA-specific nvlist construtor and
35 * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used
36 * to create an nvpair list using custom allocators. Callers may choose to
37 * allocate either from the kernel memory allocator, or from a preallocated
38 * buffer, useful in constrained contexts like high-level interrupt routines.
40 * Protocol Event and FMRI Construction
42 * Convenience routines are provided to construct nvlist events according to
43 * the FMA Event Protocol and Naming Schema specification for ereports and
44 * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes.
48 * Routines to generate ENA formats 0, 1 and 2 are available as well as
49 * routines to increment formats 1 and 2. Individual fields within the
50 * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(),
51 * fm_ena_format_get() and fm_ena_gen_get().
54 #include <sys/types.h>
57 #include <sys/nvpair.h>
58 #include <sys/cmn_err.h>
59 #include <sys/sysmacros.h>
60 #include <sys/compress.h>
61 #include <sys/sunddi.h>
62 #include <sys/systeminfo.h>
63 #include <sys/fm/util.h>
64 #include <sys/fm/protocol.h>
65 #include <sys/kstat.h>
66 #include <sys/zfs_context.h>
68 #include <sys/atomic.h>
69 #include <sys/condvar.h>
70 #include <sys/cpuvar.h>
71 #include <sys/systm.h>
72 #include <sys/dumphdr.h>
73 #include <sys/cpuvar.h>
74 #include <sys/console.h>
77 #include <sys/zfs_ioctl.h>
79 int zfs_zevent_len_max = 0;
80 int zfs_zevent_cols = 80;
81 int zfs_zevent_console = 0;
83 static int zevent_len_cur = 0;
84 static int zevent_waiters = 0;
85 static int zevent_flags = 0;
87 static kmutex_t zevent_lock;
88 static list_t zevent_list;
89 static kcondvar_t zevent_cv;
92 extern void fastreboot_disable_highpil(void);
95 * Common fault management kstats to record event generation failures
99 kstat_named_t erpt_dropped; /* num erpts dropped on post */
100 kstat_named_t erpt_set_failed; /* num erpt set failures */
101 kstat_named_t fmri_set_failed; /* num fmri set failures */
102 kstat_named_t payload_set_failed; /* num payload set failures */
105 static struct erpt_kstat erpt_kstat_data = {
106 { "erpt-dropped", KSTAT_DATA_UINT64 },
107 { "erpt-set-failed", KSTAT_DATA_UINT64 },
108 { "fmri-set-failed", KSTAT_DATA_UINT64 },
109 { "payload-set-failed", KSTAT_DATA_UINT64 }
117 * Formatting utility function for fm_nvprintr. We attempt to wrap chunks of
118 * output so they aren't split across console lines, and return the end column.
122 fm_printf(int depth, int c, int cols, const char *format, ...)
128 va_start(ap, format);
129 width = vsnprintf(&c1, sizeof (c1), format, ap);
132 if (c + width >= cols) {
133 console_printf("\n");
135 if (format[0] != ' ' && depth > 0) {
141 va_start(ap, format);
142 console_vprintf(format, ap);
145 return ((c + width) % cols);
149 * Recursively print a nvlist in the specified column width and return the
150 * column we end up in. This function is called recursively by fm_nvprint(),
151 * below. We generically format the entire nvpair using hexadecimal
152 * integers and strings, and elide any integer arrays. Arrays are basically
153 * used for cache dumps right now, so we suppress them so as not to overwhelm
154 * the amount of console output we produce at panic time. This can be further
155 * enhanced as FMA technology grows based upon the needs of consumers. All
156 * FMA telemetry is logged using the dump device transport, so the console
157 * output serves only as a fallback in case this procedure is unsuccessful.
160 fm_nvprintr(nvlist_t *nvl, int d, int c, int cols)
164 for (nvp = nvlist_next_nvpair(nvl, NULL);
165 nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) {
167 data_type_t type = nvpair_type(nvp);
168 const char *name = nvpair_name(nvp);
178 if (strcmp(name, FM_CLASS) == 0)
179 continue; /* already printed by caller */
181 c = fm_printf(d, c, cols, " %s=", name);
184 case DATA_TYPE_BOOLEAN:
185 c = fm_printf(d + 1, c, cols, " 1");
188 case DATA_TYPE_BOOLEAN_VALUE:
189 (void) nvpair_value_boolean_value(nvp, &b);
190 c = fm_printf(d + 1, c, cols, b ? "1" : "0");
194 (void) nvpair_value_byte(nvp, &i8);
195 c = fm_printf(d + 1, c, cols, "0x%x", i8);
199 (void) nvpair_value_int8(nvp, (void *)&i8);
200 c = fm_printf(d + 1, c, cols, "0x%x", i8);
203 case DATA_TYPE_UINT8:
204 (void) nvpair_value_uint8(nvp, &i8);
205 c = fm_printf(d + 1, c, cols, "0x%x", i8);
208 case DATA_TYPE_INT16:
209 (void) nvpair_value_int16(nvp, (void *)&i16);
210 c = fm_printf(d + 1, c, cols, "0x%x", i16);
213 case DATA_TYPE_UINT16:
214 (void) nvpair_value_uint16(nvp, &i16);
215 c = fm_printf(d + 1, c, cols, "0x%x", i16);
218 case DATA_TYPE_INT32:
219 (void) nvpair_value_int32(nvp, (void *)&i32);
220 c = fm_printf(d + 1, c, cols, "0x%x", i32);
223 case DATA_TYPE_UINT32:
224 (void) nvpair_value_uint32(nvp, &i32);
225 c = fm_printf(d + 1, c, cols, "0x%x", i32);
228 case DATA_TYPE_INT64:
229 (void) nvpair_value_int64(nvp, (void *)&i64);
230 c = fm_printf(d + 1, c, cols, "0x%llx",
234 case DATA_TYPE_UINT64:
235 (void) nvpair_value_uint64(nvp, &i64);
236 c = fm_printf(d + 1, c, cols, "0x%llx",
240 case DATA_TYPE_HRTIME:
241 (void) nvpair_value_hrtime(nvp, (void *)&i64);
242 c = fm_printf(d + 1, c, cols, "0x%llx",
246 case DATA_TYPE_STRING:
247 (void) nvpair_value_string(nvp, &str);
248 c = fm_printf(d + 1, c, cols, "\"%s\"",
249 str ? str : "<NULL>");
252 case DATA_TYPE_NVLIST:
253 c = fm_printf(d + 1, c, cols, "[");
254 (void) nvpair_value_nvlist(nvp, &cnv);
255 c = fm_nvprintr(cnv, d + 1, c, cols);
256 c = fm_printf(d + 1, c, cols, " ]");
259 case DATA_TYPE_NVLIST_ARRAY: {
263 c = fm_printf(d + 1, c, cols, "[");
264 (void) nvpair_value_nvlist_array(nvp, &val, &nelem);
265 for (i = 0; i < nelem; i++) {
266 c = fm_nvprintr(val[i], d + 1, c, cols);
268 c = fm_printf(d + 1, c, cols, " ]");
272 case DATA_TYPE_INT8_ARRAY: {
276 c = fm_printf(d + 1, c, cols, "[ ");
277 (void) nvpair_value_int8_array(nvp, &val, &nelem);
278 for (i = 0; i < nelem; i++)
279 c = fm_printf(d + 1, c, cols, "0x%llx ",
280 (u_longlong_t)val[i]);
282 c = fm_printf(d + 1, c, cols, "]");
286 case DATA_TYPE_UINT8_ARRAY: {
290 c = fm_printf(d + 1, c, cols, "[ ");
291 (void) nvpair_value_uint8_array(nvp, &val, &nelem);
292 for (i = 0; i < nelem; i++)
293 c = fm_printf(d + 1, c, cols, "0x%llx ",
294 (u_longlong_t)val[i]);
296 c = fm_printf(d + 1, c, cols, "]");
300 case DATA_TYPE_INT16_ARRAY: {
304 c = fm_printf(d + 1, c, cols, "[ ");
305 (void) nvpair_value_int16_array(nvp, &val, &nelem);
306 for (i = 0; i < nelem; i++)
307 c = fm_printf(d + 1, c, cols, "0x%llx ",
308 (u_longlong_t)val[i]);
310 c = fm_printf(d + 1, c, cols, "]");
314 case DATA_TYPE_UINT16_ARRAY: {
318 c = fm_printf(d + 1, c, cols, "[ ");
319 (void) nvpair_value_uint16_array(nvp, &val, &nelem);
320 for (i = 0; i < nelem; i++)
321 c = fm_printf(d + 1, c, cols, "0x%llx ",
322 (u_longlong_t)val[i]);
324 c = fm_printf(d + 1, c, cols, "]");
328 case DATA_TYPE_INT32_ARRAY: {
332 c = fm_printf(d + 1, c, cols, "[ ");
333 (void) nvpair_value_int32_array(nvp, &val, &nelem);
334 for (i = 0; i < nelem; i++)
335 c = fm_printf(d + 1, c, cols, "0x%llx ",
336 (u_longlong_t)val[i]);
338 c = fm_printf(d + 1, c, cols, "]");
342 case DATA_TYPE_UINT32_ARRAY: {
346 c = fm_printf(d + 1, c, cols, "[ ");
347 (void) nvpair_value_uint32_array(nvp, &val, &nelem);
348 for (i = 0; i < nelem; i++)
349 c = fm_printf(d + 1, c, cols, "0x%llx ",
350 (u_longlong_t)val[i]);
352 c = fm_printf(d + 1, c, cols, "]");
356 case DATA_TYPE_INT64_ARRAY: {
360 c = fm_printf(d + 1, c, cols, "[ ");
361 (void) nvpair_value_int64_array(nvp, &val, &nelem);
362 for (i = 0; i < nelem; i++)
363 c = fm_printf(d + 1, c, cols, "0x%llx ",
364 (u_longlong_t)val[i]);
366 c = fm_printf(d + 1, c, cols, "]");
370 case DATA_TYPE_UINT64_ARRAY: {
374 c = fm_printf(d + 1, c, cols, "[ ");
375 (void) nvpair_value_uint64_array(nvp, &val, &nelem);
376 for (i = 0; i < nelem; i++)
377 c = fm_printf(d + 1, c, cols, "0x%llx ",
378 (u_longlong_t)val[i]);
380 c = fm_printf(d + 1, c, cols, "]");
384 case DATA_TYPE_STRING_ARRAY:
385 case DATA_TYPE_BOOLEAN_ARRAY:
386 case DATA_TYPE_BYTE_ARRAY:
387 c = fm_printf(d + 1, c, cols, "[...]");
390 case DATA_TYPE_UNKNOWN:
391 c = fm_printf(d + 1, c, cols, "<unknown>");
400 fm_nvprint(nvlist_t *nvl)
405 console_printf("\n");
407 if (nvlist_lookup_string(nvl, FM_CLASS, &class) == 0)
408 c = fm_printf(0, c, zfs_zevent_cols, "%s", class);
410 if (fm_nvprintr(nvl, 0, c, zfs_zevent_cols) != 0)
411 console_printf("\n");
413 console_printf("\n");
417 zfs_zevent_alloc(void)
421 ev = kmem_zalloc(sizeof(zevent_t), KM_SLEEP);
425 list_create(&ev->ev_ze_list, sizeof(zfs_zevent_t),
426 offsetof(zfs_zevent_t, ze_node));
427 list_link_init(&ev->ev_node);
433 zfs_zevent_free(zevent_t *ev)
435 /* Run provided cleanup callback */
436 ev->ev_cb(ev->ev_nvl, ev->ev_detector);
438 list_destroy(&ev->ev_ze_list);
439 kmem_free(ev, sizeof(zevent_t));
443 zfs_zevent_drain(zevent_t *ev)
447 ASSERT(MUTEX_HELD(&zevent_lock));
448 list_remove(&zevent_list, ev);
450 /* Remove references to this event in all private file data */
451 while ((ze = list_head(&ev->ev_ze_list)) != NULL) {
452 list_remove(&ev->ev_ze_list, ze);
453 ze->ze_zevent = NULL;
461 zfs_zevent_drain_all(int *count)
465 mutex_enter(&zevent_lock);
466 while ((ev = list_head(&zevent_list)) != NULL)
467 zfs_zevent_drain(ev);
469 *count = zevent_len_cur;
471 mutex_exit(&zevent_lock);
475 * New zevents are inserted at the head. If the maximum queue
476 * length is exceeded a zevent will be drained from the tail.
477 * As part of this any user space processes which currently have
478 * a reference to this zevent_t in their private data will have
479 * this reference set to NULL.
482 zfs_zevent_insert(zevent_t *ev)
484 mutex_enter(&zevent_lock);
485 list_insert_head(&zevent_list, ev);
486 if (zevent_len_cur >= zfs_zevent_len_max)
487 zfs_zevent_drain(list_tail(&zevent_list));
491 mutex_exit(&zevent_lock);
498 zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb)
506 tv_array[0] = tv.tv_sec;
507 tv_array[1] = tv.tv_nsec;
508 if (nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2)) {
509 atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
513 (void) nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE);
514 if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {
515 atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
519 if (zfs_zevent_console)
522 ev = zfs_zevent_alloc();
524 atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
529 ev->ev_detector = detector;
531 zfs_zevent_insert(ev);
532 cv_broadcast(&zevent_cv);
536 zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)
538 *ze = zfsdev_get_state(minor, ZST_ZEVENT);
546 zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze)
555 *minorp = zfsdev_getminor(fp->f_file);
556 error = zfs_zevent_minor_to_state(*minorp, ze);
559 zfs_zevent_fd_rele(fd);
565 zfs_zevent_fd_rele(int fd)
571 * Get the next zevent in the stream and place a copy in 'event'. This
572 * may fail with ENOMEM if the encoded nvlist size exceeds the passed
573 * 'event_size'. In this case the stream pointer is not advanced and
574 * and 'event_size' is set to the minimum required buffer size.
577 zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size,
584 mutex_enter(&zevent_lock);
585 if (ze->ze_zevent == NULL) {
586 /* New stream start at the beginning/tail */
587 ev = list_tail(&zevent_list);
593 /* Existing stream continue with the next element and remove
594 * ourselves from the wait queue for the previous element */
595 ev = list_prev(&zevent_list, ze->ze_zevent);
602 VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0);
603 if (size > *event_size) {
610 list_remove(&ze->ze_zevent->ev_ze_list, ze);
613 list_insert_head(&ev->ev_ze_list, ze);
614 nvlist_dup(ev->ev_nvl, event, KM_SLEEP);
615 *dropped = ze->ze_dropped;
618 mutex_exit(&zevent_lock);
624 zfs_zevent_wait(zfs_zevent_t *ze)
628 mutex_enter(&zevent_lock);
630 if (zevent_flags & ZEVENT_SHUTDOWN) {
636 cv_wait_interruptible(&zevent_cv, &zevent_lock);
637 if (issig(JUSTLOOKING))
642 mutex_exit(&zevent_lock);
648 zfs_zevent_init(zfs_zevent_t **zep)
652 ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP);
653 list_link_init(&ze->ze_node);
657 zfs_zevent_destroy(zfs_zevent_t *ze)
659 mutex_enter(&zevent_lock);
661 list_remove(&ze->ze_zevent->ev_ze_list, ze);
662 mutex_exit(&zevent_lock);
664 kmem_free(ze, sizeof (zfs_zevent_t));
669 * Wrapppers for FM nvlist allocators
673 i_fm_alloc(nv_alloc_t *nva, size_t size)
675 return (kmem_zalloc(size, KM_SLEEP));
680 i_fm_free(nv_alloc_t *nva, void *buf, size_t size)
682 kmem_free(buf, size);
685 const nv_alloc_ops_t fm_mem_alloc_ops = {
694 * Create and initialize a new nv_alloc_t for a fixed buffer, buf. A pointer
695 * to the newly allocated nv_alloc_t structure is returned upon success or NULL
696 * is returned to indicate that the nv_alloc structure could not be created.
699 fm_nva_xcreate(char *buf, size_t bufsz)
701 nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
703 if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) {
704 kmem_free(nvhdl, sizeof (nv_alloc_t));
712 * Destroy a previously allocated nv_alloc structure. The fixed buffer
713 * associated with nva must be freed by the caller.
716 fm_nva_xdestroy(nv_alloc_t *nva)
719 kmem_free(nva, sizeof (nv_alloc_t));
723 * Create a new nv list. A pointer to a new nv list structure is returned
724 * upon success or NULL is returned to indicate that the structure could
725 * not be created. The newly created nv list is created and managed by the
726 * operations installed in nva. If nva is NULL, the default FMA nva
727 * operations are installed and used.
729 * When called from the kernel and nva == NULL, this function must be called
730 * from passive kernel context with no locks held that can prevent a
731 * sleeping memory allocation from occurring. Otherwise, this function may
732 * be called from other kernel contexts as long a valid nva created via
733 * fm_nva_create() is supplied.
736 fm_nvlist_create(nv_alloc_t *nva)
743 nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
745 if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) {
746 kmem_free(nvhdl, sizeof (nv_alloc_t));
754 if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) {
756 nv_alloc_fini(nvhdl);
757 kmem_free(nvhdl, sizeof (nv_alloc_t));
766 * Destroy a previously allocated nvlist structure. flag indicates whether
767 * or not the associated nva structure should be freed (FM_NVA_FREE) or
768 * retained (FM_NVA_RETAIN). Retaining the nv alloc structure allows
769 * it to be re-used for future nvlist creation operations.
772 fm_nvlist_destroy(nvlist_t *nvl, int flag)
774 nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl);
779 if (flag == FM_NVA_FREE)
780 fm_nva_xdestroy(nva);
785 i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap)
790 while (ret == 0 && name != NULL) {
791 type = va_arg(ap, data_type_t);
794 ret = nvlist_add_byte(payload, name,
797 case DATA_TYPE_BYTE_ARRAY:
798 nelem = va_arg(ap, int);
799 ret = nvlist_add_byte_array(payload, name,
800 va_arg(ap, uchar_t *), nelem);
802 case DATA_TYPE_BOOLEAN_VALUE:
803 ret = nvlist_add_boolean_value(payload, name,
804 va_arg(ap, boolean_t));
806 case DATA_TYPE_BOOLEAN_ARRAY:
807 nelem = va_arg(ap, int);
808 ret = nvlist_add_boolean_array(payload, name,
809 va_arg(ap, boolean_t *), nelem);
812 ret = nvlist_add_int8(payload, name,
815 case DATA_TYPE_INT8_ARRAY:
816 nelem = va_arg(ap, int);
817 ret = nvlist_add_int8_array(payload, name,
818 va_arg(ap, int8_t *), nelem);
820 case DATA_TYPE_UINT8:
821 ret = nvlist_add_uint8(payload, name,
824 case DATA_TYPE_UINT8_ARRAY:
825 nelem = va_arg(ap, int);
826 ret = nvlist_add_uint8_array(payload, name,
827 va_arg(ap, uint8_t *), nelem);
829 case DATA_TYPE_INT16:
830 ret = nvlist_add_int16(payload, name,
833 case DATA_TYPE_INT16_ARRAY:
834 nelem = va_arg(ap, int);
835 ret = nvlist_add_int16_array(payload, name,
836 va_arg(ap, int16_t *), nelem);
838 case DATA_TYPE_UINT16:
839 ret = nvlist_add_uint16(payload, name,
842 case DATA_TYPE_UINT16_ARRAY:
843 nelem = va_arg(ap, int);
844 ret = nvlist_add_uint16_array(payload, name,
845 va_arg(ap, uint16_t *), nelem);
847 case DATA_TYPE_INT32:
848 ret = nvlist_add_int32(payload, name,
849 va_arg(ap, int32_t));
851 case DATA_TYPE_INT32_ARRAY:
852 nelem = va_arg(ap, int);
853 ret = nvlist_add_int32_array(payload, name,
854 va_arg(ap, int32_t *), nelem);
856 case DATA_TYPE_UINT32:
857 ret = nvlist_add_uint32(payload, name,
858 va_arg(ap, uint32_t));
860 case DATA_TYPE_UINT32_ARRAY:
861 nelem = va_arg(ap, int);
862 ret = nvlist_add_uint32_array(payload, name,
863 va_arg(ap, uint32_t *), nelem);
865 case DATA_TYPE_INT64:
866 ret = nvlist_add_int64(payload, name,
867 va_arg(ap, int64_t));
869 case DATA_TYPE_INT64_ARRAY:
870 nelem = va_arg(ap, int);
871 ret = nvlist_add_int64_array(payload, name,
872 va_arg(ap, int64_t *), nelem);
874 case DATA_TYPE_UINT64:
875 ret = nvlist_add_uint64(payload, name,
876 va_arg(ap, uint64_t));
878 case DATA_TYPE_UINT64_ARRAY:
879 nelem = va_arg(ap, int);
880 ret = nvlist_add_uint64_array(payload, name,
881 va_arg(ap, uint64_t *), nelem);
883 case DATA_TYPE_STRING:
884 ret = nvlist_add_string(payload, name,
887 case DATA_TYPE_STRING_ARRAY:
888 nelem = va_arg(ap, int);
889 ret = nvlist_add_string_array(payload, name,
890 va_arg(ap, char **), nelem);
892 case DATA_TYPE_NVLIST:
893 ret = nvlist_add_nvlist(payload, name,
894 va_arg(ap, nvlist_t *));
896 case DATA_TYPE_NVLIST_ARRAY:
897 nelem = va_arg(ap, int);
898 ret = nvlist_add_nvlist_array(payload, name,
899 va_arg(ap, nvlist_t **), nelem);
905 name = va_arg(ap, char *);
911 fm_payload_set(nvlist_t *payload, ...)
917 va_start(ap, payload);
918 name = va_arg(ap, char *);
919 ret = i_fm_payload_set(payload, name, ap);
924 &erpt_kstat_data.payload_set_failed.value.ui64, 1);
928 * Set-up and validate the members of an ereport event according to:
930 * Member name Type Value
931 * ====================================================
932 * class string ereport
935 * detector nvlist_t <detector>
936 * ereport-payload nvlist_t <var args>
938 * We don't actually add a 'version' member to the payload. Really,
939 * the version quoted to us by our caller is that of the category 1
940 * "ereport" event class (and we require FM_EREPORT_VERS0) but
941 * the payload version of the actual leaf class event under construction
942 * may be something else. Callers should supply a version in the varargs,
943 * or (better) we could take two version arguments - one for the
944 * ereport category 1 classification (expect FM_EREPORT_VERS0) and one
945 * for the leaf class.
948 fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class,
949 uint64_t ena, const nvlist_t *detector, ...)
951 char ereport_class[FM_MAX_CLASS];
956 if (version != FM_EREPORT_VERS0) {
957 atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
961 (void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s",
962 FM_EREPORT_CLASS, erpt_class);
963 if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) {
964 atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
968 if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) {
969 atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
972 if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR,
973 (nvlist_t *)detector) != 0) {
974 atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
977 va_start(ap, detector);
978 name = va_arg(ap, const char *);
979 ret = i_fm_payload_set(ereport, name, ap);
983 atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
987 * Set-up and validate the members of an hc fmri according to;
989 * Member name Type Value
990 * ===================================================
992 * auth nvlist_t <auth>
993 * hc-name string <name>
996 * Note that auth and hc-id are optional members.
999 #define HC_MAXPAIRS 20
1000 #define HC_MAXNAMELEN 50
1003 fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth)
1005 if (version != FM_HC_SCHEME_VERSION) {
1006 atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1010 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 ||
1011 nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) {
1012 atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1016 if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
1017 (nvlist_t *)auth) != 0) {
1018 atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1026 fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,
1027 nvlist_t *snvl, int npairs, ...)
1029 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
1030 nvlist_t *pairs[HC_MAXPAIRS];
1034 if (!fm_fmri_hc_set_common(fmri, version, auth))
1037 npairs = MIN(npairs, HC_MAXPAIRS);
1039 va_start(ap, npairs);
1040 for (i = 0; i < npairs; i++) {
1041 const char *name = va_arg(ap, const char *);
1042 uint32_t id = va_arg(ap, uint32_t);
1045 (void) snprintf(idstr, sizeof (idstr), "%u", id);
1047 pairs[i] = fm_nvlist_create(nva);
1048 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
1049 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
1051 &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1056 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs, npairs) != 0)
1057 atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1059 for (i = 0; i < npairs; i++)
1060 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
1063 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
1065 &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1071 fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
1072 nvlist_t *snvl, nvlist_t *bboard, int npairs, ...)
1074 nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
1075 nvlist_t *pairs[HC_MAXPAIRS];
1080 char *hcname, *hcid;
1082 if (!fm_fmri_hc_set_common(fmri, version, auth))
1086 * copy the bboard nvpairs to the pairs array
1088 if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n)
1090 atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1094 for (i = 0; i < n; i++) {
1095 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME,
1098 &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1101 if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) {
1103 &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1107 pairs[i] = fm_nvlist_create(nva);
1108 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 ||
1109 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) {
1110 for (j = 0; j <= i; j++) {
1111 if (pairs[j] != NULL)
1112 fm_nvlist_destroy(pairs[j],
1116 &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1122 * create the pairs from passed in pairs
1124 npairs = MIN(npairs, HC_MAXPAIRS);
1126 va_start(ap, npairs);
1127 for (i = n; i < npairs + n; i++) {
1128 const char *name = va_arg(ap, const char *);
1129 uint32_t id = va_arg(ap, uint32_t);
1131 (void) snprintf(idstr, sizeof (idstr), "%u", id);
1132 pairs[i] = fm_nvlist_create(nva);
1133 if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
1134 nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
1135 for (j = 0; j <= i; j++) {
1136 if (pairs[j] != NULL)
1137 fm_nvlist_destroy(pairs[j],
1141 &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1148 * Create the fmri hc list
1150 if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs,
1152 atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1156 for (i = 0; i < npairs + n; i++) {
1157 fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
1161 if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
1163 &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1170 * Set-up and validate the members of an dev fmri according to:
1172 * Member name Type Value
1173 * ====================================================
1175 * auth nvlist_t <auth>
1176 * devpath string <devpath>
1177 * [devid] string <devid>
1178 * [target-port-l0id] string <target-port-lun0-id>
1180 * Note that auth and devid are optional members.
1183 fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth,
1184 const char *devpath, const char *devid, const char *tpl0)
1188 if (version != DEV_SCHEME_VERSION0) {
1189 atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1193 err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version);
1194 err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV);
1197 err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY,
1201 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath);
1204 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid);
1207 err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0);
1210 atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1215 * Set-up and validate the members of an cpu fmri according to:
1217 * Member name Type Value
1218 * ====================================================
1220 * auth nvlist_t <auth>
1221 * cpuid uint32_t <cpu_id>
1222 * cpumask uint8_t <cpu_mask>
1223 * serial uint64_t <serial_id>
1225 * Note that auth, cpumask, serial are optional members.
1229 fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth,
1230 uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp)
1232 uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64;
1234 if (version < CPU_SCHEME_VERSION1) {
1235 atomic_add_64(failedp, 1);
1239 if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) {
1240 atomic_add_64(failedp, 1);
1244 if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME,
1245 FM_FMRI_SCHEME_CPU) != 0) {
1246 atomic_add_64(failedp, 1);
1250 if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY,
1251 (nvlist_t *)auth) != 0)
1252 atomic_add_64(failedp, 1);
1254 if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0)
1255 atomic_add_64(failedp, 1);
1257 if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK,
1259 atomic_add_64(failedp, 1);
1261 if (serial_idp == NULL || nvlist_add_string(fmri_cpu,
1262 FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0)
1263 atomic_add_64(failedp, 1);
1267 * Set-up and validate the members of a mem according to:
1269 * Member name Type Value
1270 * ====================================================
1272 * auth nvlist_t <auth> [optional]
1273 * unum string <unum>
1274 * serial string <serial> [optional*]
1275 * offset uint64_t <offset> [optional]
1277 * * serial is required if offset is present
1280 fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth,
1281 const char *unum, const char *serial, uint64_t offset)
1283 if (version != MEM_SCHEME_VERSION0) {
1284 atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1288 if (!serial && (offset != (uint64_t)-1)) {
1289 atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1293 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
1294 atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1298 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) {
1299 atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1304 if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
1305 (nvlist_t *)auth) != 0) {
1307 &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1311 if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) {
1312 atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1315 if (serial != NULL) {
1316 if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID,
1317 (char **)&serial, 1) != 0) {
1319 &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1321 if (offset != (uint64_t)-1) {
1322 if (nvlist_add_uint64(fmri, FM_FMRI_MEM_OFFSET,
1324 atomic_add_64(&erpt_kstat_data.
1325 fmri_set_failed.value.ui64, 1);
1332 fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid,
1335 if (version != ZFS_SCHEME_VERSION0) {
1336 atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1340 if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
1341 atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1345 if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) {
1346 atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1350 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) {
1351 atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1354 if (vdev_guid != 0) {
1355 if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) {
1357 &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
1363 fm_ena_increment(uint64_t ena)
1367 switch (ENA_FORMAT(ena)) {
1369 new_ena = ena + (1 << ENA_FMT1_GEN_SHFT);
1372 new_ena = ena + (1 << ENA_FMT2_GEN_SHFT);
1382 fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)
1389 ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1390 ((cpuid << ENA_FMT1_CPUID_SHFT) &
1391 ENA_FMT1_CPUID_MASK) |
1392 ((timestamp << ENA_FMT1_TIME_SHFT) &
1393 ENA_FMT1_TIME_MASK));
1395 ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1396 ((cpuid << ENA_FMT1_CPUID_SHFT) &
1397 ENA_FMT1_CPUID_MASK) |
1398 ((gethrtime() << ENA_FMT1_TIME_SHFT) &
1399 ENA_FMT1_TIME_MASK));
1403 ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1404 ((timestamp << ENA_FMT2_TIME_SHFT) & ENA_FMT2_TIME_MASK));
1414 fm_ena_generate(uint64_t timestamp, uchar_t format)
1416 return (fm_ena_generate_cpu(timestamp, getcpuid(), format));
1420 fm_ena_generation_get(uint64_t ena)
1424 switch (ENA_FORMAT(ena)) {
1426 gen = (ena & ENA_FMT1_GEN_MASK) >> ENA_FMT1_GEN_SHFT;
1429 gen = (ena & ENA_FMT2_GEN_MASK) >> ENA_FMT2_GEN_SHFT;
1440 fm_ena_format_get(uint64_t ena)
1443 return (ENA_FORMAT(ena));
1447 fm_ena_id_get(uint64_t ena)
1451 switch (ENA_FORMAT(ena)) {
1453 id = (ena & ENA_FMT1_ID_MASK) >> ENA_FMT1_ID_SHFT;
1456 id = (ena & ENA_FMT2_ID_MASK) >> ENA_FMT2_ID_SHFT;
1466 fm_ena_time_get(uint64_t ena)
1470 switch (ENA_FORMAT(ena)) {
1472 time = (ena & ENA_FMT1_TIME_MASK) >> ENA_FMT1_TIME_SHFT;
1475 time = (ena & ENA_FMT2_TIME_MASK) >> ENA_FMT2_TIME_SHFT;
1491 if (zfs_zevent_len_max == 0)
1492 zfs_zevent_len_max = ERPT_MAX_ERRS * MAX(max_ncpus, 4);
1494 /* Initialize zevent allocation and generation kstats */
1495 fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED,
1496 sizeof (struct erpt_kstat) / sizeof (kstat_named_t),
1497 KSTAT_FLAG_VIRTUAL);
1499 if (fm_ksp != NULL) {
1500 fm_ksp->ks_data = &erpt_kstat_data;
1501 kstat_install(fm_ksp);
1503 cmn_err(CE_NOTE, "failed to create fm/misc kstat\n");
1506 mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL);
1507 list_create(&zevent_list, sizeof(zevent_t), offsetof(zevent_t, ev_node));
1508 cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL);
1516 zfs_zevent_drain_all(&count);
1517 cv_broadcast(&zevent_cv);
1519 mutex_enter(&zevent_lock);
1520 zevent_flags |= ZEVENT_SHUTDOWN;
1521 while (zevent_waiters > 0) {
1522 mutex_exit(&zevent_lock);
1524 mutex_enter(&zevent_lock);
1526 mutex_exit(&zevent_lock);
1528 cv_destroy(&zevent_cv);
1529 list_destroy(&zevent_list);
1530 mutex_destroy(&zevent_lock);
1532 if (fm_ksp != NULL) {
1533 kstat_delete(fm_ksp);
1538 module_param(zfs_zevent_len_max, int, 0644);
1539 MODULE_PARM_DESC(zfs_zevent_len_max, "Max event queue length");
1541 module_param(zfs_zevent_cols, int, 0644);
1542 MODULE_PARM_DESC(zfs_zevent_cols, "Max event column width");
1544 module_param(zfs_zevent_console, int, 0644);
1545 MODULE_PARM_DESC(zfs_zevent_console, "Log events to the console");
1547 #endif /* _KERNEL */