X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Ffm.c;h=c004032f8dc89f3eb256620be4081404bec15121;hb=refs%2Fheads%2Frertzinger%2Ffeature-zpool-get--p;hp=3cc979d41bd584e17f90a3b17f68e2e0ad54ee50;hpb=fa42225a3d5daa58704bdb920ec92cd95c274011;p=zfs.git diff --git a/module/zfs/fm.c b/module/zfs/fm.c index 3cc979d..c004032 100644 --- a/module/zfs/fm.c +++ b/module/zfs/fm.c @@ -19,8 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. */ /* @@ -54,49 +53,46 @@ #include #include -#include -#include +#include #include #include -#include #include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include #include #include -#include #include #include +#include +#include +#ifdef _KERNEL +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include -/* - * URL and SUNW-MSG-ID value to display for fm_panic(), defined below. These - * values must be kept in sync with the FMA source code in usr/src/cmd/fm. - */ -static const char *fm_url = "http://www.sun.com/msg"; -static const char *fm_msgid = "SUNOS-8000-0G"; -static char *volatile fm_panicstr = NULL; +int zfs_zevent_len_max = 0; +int zfs_zevent_cols = 80; +int zfs_zevent_console = 0; -errorq_t *ereport_errorq; -void *ereport_dumpbuf; -size_t ereport_dumplen; +static int zevent_len_cur = 0; +static int zevent_waiters = 0; +static int zevent_flags = 0; -static uint_t ereport_chanlen = ERPT_EVCH_MAX; -static evchan_t *ereport_chan = NULL; -static ulong_t ereport_qlen = 0; -static size_t ereport_size = 0; -static int ereport_cols = 80; +static kmutex_t zevent_lock; +static list_t zevent_list; +static kcondvar_t zevent_cv; +#endif /* _KERNEL */ + +extern void fastreboot_disable_highpil(void); /* - * Common fault management kstats to record ereport generation - * failures + * Common fault management kstats to record event generation failures */ struct erpt_kstat { @@ -113,57 +109,9 @@ static struct erpt_kstat erpt_kstat_data = { { "payload-set-failed", KSTAT_DATA_UINT64 } }; -/*ARGSUSED*/ -static void -fm_drain(void *private, void *data, errorq_elem_t *eep) -{ - nvlist_t *nvl = errorq_elem_nvl(ereport_errorq, eep); - - if (!panicstr) - (void) fm_ereport_post(nvl, EVCH_TRYHARD); - else - fm_nvprint(nvl); -} - -void -fm_init(void) -{ - kstat_t *ksp; - - (void) sysevent_evc_bind(FM_ERROR_CHAN, - &ereport_chan, EVCH_CREAT | EVCH_HOLD_PEND); - - (void) sysevent_evc_control(ereport_chan, - EVCH_SET_CHAN_LEN, &ereport_chanlen); - - if (ereport_qlen == 0) - ereport_qlen = ERPT_MAX_ERRS * MAX(max_ncpus, 4); - - if (ereport_size == 0) - ereport_size = ERPT_DATA_SZ; - - ereport_errorq = errorq_nvcreate("fm_ereport_queue", - (errorq_func_t)fm_drain, NULL, ereport_qlen, ereport_size, - FM_ERR_PIL, ERRORQ_VITAL); - if (ereport_errorq == NULL) - panic("failed to create required ereport error queue"); - - ereport_dumpbuf = kmem_alloc(ereport_size, KM_SLEEP); - ereport_dumplen = ereport_size; - - /* Initialize ereport allocation and generation kstats */ - ksp = kstat_create("unix", 0, "fm", "misc", KSTAT_TYPE_NAMED, - sizeof (struct erpt_kstat) / sizeof (kstat_named_t), - KSTAT_FLAG_VIRTUAL); - - if (ksp != NULL) { - ksp->ks_data = &erpt_kstat_data; - kstat_install(ksp); - } else { - cmn_err(CE_NOTE, "failed to create fm/misc kstat\n"); +kstat_t *fm_ksp; - } -} +#ifdef _KERNEL /* * Formatting utility function for fm_nvprintr. We attempt to wrap chunks of @@ -182,7 +130,7 @@ fm_printf(int depth, int c, int cols, const char *format, ...) va_end(ap); if (c + width >= cols) { - console_printf("\n\r"); + console_printf("\n"); c = 0; if (format[0] != ' ' && depth > 0) { console_printf(" "); @@ -244,54 +192,54 @@ fm_nvprintr(nvlist_t *nvl, int d, int c, int cols) case DATA_TYPE_BYTE: (void) nvpair_value_byte(nvp, &i8); - c = fm_printf(d + 1, c, cols, "%x", i8); + c = fm_printf(d + 1, c, cols, "0x%x", i8); break; case DATA_TYPE_INT8: (void) nvpair_value_int8(nvp, (void *)&i8); - c = fm_printf(d + 1, c, cols, "%x", i8); + c = fm_printf(d + 1, c, cols, "0x%x", i8); break; case DATA_TYPE_UINT8: (void) nvpair_value_uint8(nvp, &i8); - c = fm_printf(d + 1, c, cols, "%x", i8); + c = fm_printf(d + 1, c, cols, "0x%x", i8); break; case DATA_TYPE_INT16: (void) nvpair_value_int16(nvp, (void *)&i16); - c = fm_printf(d + 1, c, cols, "%x", i16); + c = fm_printf(d + 1, c, cols, "0x%x", i16); break; case DATA_TYPE_UINT16: (void) nvpair_value_uint16(nvp, &i16); - c = fm_printf(d + 1, c, cols, "%x", i16); + c = fm_printf(d + 1, c, cols, "0x%x", i16); break; case DATA_TYPE_INT32: (void) nvpair_value_int32(nvp, (void *)&i32); - c = fm_printf(d + 1, c, cols, "%x", i32); + c = fm_printf(d + 1, c, cols, "0x%x", i32); break; case DATA_TYPE_UINT32: (void) nvpair_value_uint32(nvp, &i32); - c = fm_printf(d + 1, c, cols, "%x", i32); + c = fm_printf(d + 1, c, cols, "0x%x", i32); break; case DATA_TYPE_INT64: (void) nvpair_value_int64(nvp, (void *)&i64); - c = fm_printf(d + 1, c, cols, "%llx", + c = fm_printf(d + 1, c, cols, "0x%llx", (u_longlong_t)i64); break; case DATA_TYPE_UINT64: (void) nvpair_value_uint64(nvp, &i64); - c = fm_printf(d + 1, c, cols, "%llx", + c = fm_printf(d + 1, c, cols, "0x%llx", (u_longlong_t)i64); break; case DATA_TYPE_HRTIME: (void) nvpair_value_hrtime(nvp, (void *)&i64); - c = fm_printf(d + 1, c, cols, "%llx", + c = fm_printf(d + 1, c, cols, "0x%llx", (u_longlong_t)i64); break; @@ -321,19 +269,124 @@ fm_nvprintr(nvlist_t *nvl, int d, int c, int cols) } break; + case DATA_TYPE_INT8_ARRAY: { + int8_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_int8_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_UINT8_ARRAY: { + uint8_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_uint8_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_INT16_ARRAY: { + int16_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_int16_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_UINT16_ARRAY: { + uint16_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_uint16_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_INT32_ARRAY: { + int32_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_int32_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_UINT32_ARRAY: { + uint32_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_uint32_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_INT64_ARRAY: { + int64_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_int64_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_UINT64_ARRAY: { + uint64_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_uint64_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_STRING_ARRAY: case DATA_TYPE_BOOLEAN_ARRAY: case DATA_TYPE_BYTE_ARRAY: - case DATA_TYPE_INT8_ARRAY: - case DATA_TYPE_UINT8_ARRAY: - case DATA_TYPE_INT16_ARRAY: - case DATA_TYPE_UINT16_ARRAY: - case DATA_TYPE_INT32_ARRAY: - case DATA_TYPE_UINT32_ARRAY: - case DATA_TYPE_INT64_ARRAY: - case DATA_TYPE_UINT64_ARRAY: - case DATA_TYPE_STRING_ARRAY: c = fm_printf(d + 1, c, cols, "[...]"); break; + case DATA_TYPE_UNKNOWN: c = fm_printf(d + 1, c, cols, ""); break; @@ -349,174 +402,270 @@ fm_nvprint(nvlist_t *nvl) char *class; int c = 0; - console_printf("\r"); + console_printf("\n"); if (nvlist_lookup_string(nvl, FM_CLASS, &class) == 0) - c = fm_printf(0, c, ereport_cols, "%s", class); + c = fm_printf(0, c, zfs_zevent_cols, "%s", class); - if (fm_nvprintr(nvl, 0, c, ereport_cols) != 0) + if (fm_nvprintr(nvl, 0, c, zfs_zevent_cols) != 0) console_printf("\n"); console_printf("\n"); } +static zevent_t * +zfs_zevent_alloc(void) +{ + zevent_t *ev; + + ev = kmem_zalloc(sizeof(zevent_t), KM_PUSHPAGE); + if (ev == NULL) + return NULL; + + list_create(&ev->ev_ze_list, sizeof(zfs_zevent_t), + offsetof(zfs_zevent_t, ze_node)); + list_link_init(&ev->ev_node); + + return ev; +} + +static void +zfs_zevent_free(zevent_t *ev) +{ + /* Run provided cleanup callback */ + ev->ev_cb(ev->ev_nvl, ev->ev_detector); + + list_destroy(&ev->ev_ze_list); + kmem_free(ev, sizeof(zevent_t)); +} + +static void +zfs_zevent_drain(zevent_t *ev) +{ + zfs_zevent_t *ze; + + ASSERT(MUTEX_HELD(&zevent_lock)); + list_remove(&zevent_list, ev); + + /* Remove references to this event in all private file data */ + while ((ze = list_head(&ev->ev_ze_list)) != NULL) { + list_remove(&ev->ev_ze_list, ze); + ze->ze_zevent = NULL; + ze->ze_dropped++; + } + + zfs_zevent_free(ev); +} + +void +zfs_zevent_drain_all(int *count) +{ + zevent_t *ev; + + mutex_enter(&zevent_lock); + while ((ev = list_head(&zevent_list)) != NULL) + zfs_zevent_drain(ev); + + *count = zevent_len_cur; + zevent_len_cur = 0; + mutex_exit(&zevent_lock); +} + /* - * Wrapper for panic() that first produces an FMA-style message for admins. - * Normally such messages are generated by fmd(1M)'s syslog-msgs agent: this - * is the one exception to that rule and the only error that gets messaged. - * This function is intended for use by subsystems that have detected a fatal - * error and enqueued appropriate ereports and wish to then force a panic. + * New zevents are inserted at the head. If the maximum queue + * length is exceeded a zevent will be drained from the tail. + * As part of this any user space processes which currently have + * a reference to this zevent_t in their private data will have + * this reference set to NULL. */ -/*PRINTFLIKE1*/ -void -fm_panic(const char *format, ...) +static void +zfs_zevent_insert(zevent_t *ev) { - va_list ap; + ASSERT(MUTEX_HELD(&zevent_lock)); + list_insert_head(&zevent_list, ev); - (void) casptr((void *)&fm_panicstr, NULL, (void *)format); - va_start(ap, format); - vpanic(format, ap); - va_end(ap); + if (zevent_len_cur >= zfs_zevent_len_max) + zfs_zevent_drain(list_tail(&zevent_list)); + else + zevent_len_cur++; } /* - * Print any appropriate FMA banner message before the panic message. This - * function is called by panicsys() and prints the message for fm_panic(). - * We print the message here so that it comes after the system is quiesced. - * A one-line summary is recorded in the log only (cmn_err(9F) with "!" prefix). - * The rest of the message is for the console only and not needed in the log, - * so it is printed using console_printf(). We break it up into multiple - * chunks so as to avoid overflowing any small legacy prom_printf() buffers. + * Post a zevent */ void -fm_banner(void) +zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb) { - timespec_t tod; - hrtime_t now; + int64_t tv_array[2]; + timestruc_t tv; + size_t nvl_size = 0; + zevent_t *ev; - if (!fm_panicstr) - return; /* panic was not initiated by fm_panic(); do nothing */ + gethrestime(&tv); + tv_array[0] = tv.tv_sec; + tv_array[1] = tv.tv_nsec; + if (nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2)) { + atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1); + return; + } - if (panicstr) { - tod = panic_hrestime; - now = panic_hrtime; - } else { - gethrestime(&tod); - now = gethrtime_waitfree(); + (void) nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE); + if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) { + atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1); + return; } - cmn_err(CE_NOTE, "!SUNW-MSG-ID: %s, " - "TYPE: Error, VER: 1, SEVERITY: Major\n", fm_msgid); + if (zfs_zevent_console) + fm_nvprint(nvl); - console_printf( -"\n\rSUNW-MSG-ID: %s, TYPE: Error, VER: 1, SEVERITY: Major\n" -"EVENT-TIME: 0x%lx.0x%lx (0x%llx)\n", - fm_msgid, tod.tv_sec, tod.tv_nsec, (u_longlong_t)now); + ev = zfs_zevent_alloc(); + if (ev == NULL) { + atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1); + return; + } - console_printf( -"PLATFORM: %s, CSN: -, HOSTNAME: %s\n" -"SOURCE: %s, REV: %s %s\n", - platform, utsname.nodename, utsname.sysname, - utsname.release, utsname.version); + ev->ev_nvl = nvl; + ev->ev_detector = detector; + ev->ev_cb = cb; - console_printf( -"DESC: Errors have been detected that require a reboot to ensure system\n" -"integrity. See %s/%s for more information.\n", - fm_url, fm_msgid); + mutex_enter(&zevent_lock); + zfs_zevent_insert(ev); + cv_broadcast(&zevent_cv); + mutex_exit(&zevent_lock); +} - console_printf( -"AUTO-RESPONSE: Solaris will attempt to save and diagnose the error telemetry\n" -"IMPACT: The system will sync files, save a crash dump if needed, and reboot\n" -"REC-ACTION: Save the error summary below in case telemetry cannot be saved\n"); +static int +zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze) +{ + *ze = zfsdev_get_state(minor, ZST_ZEVENT); + if (*ze == NULL) + return (EBADF); - console_printf("\n"); + return (0); +} + +int +zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze) +{ + file_t *fp; + int error; + + fp = getf(fd); + if (fp == NULL) + return (EBADF); + + *minorp = zfsdev_getminor(fp->f_file); + error = zfs_zevent_minor_to_state(*minorp, ze); + + if (error) + zfs_zevent_fd_rele(fd); + + return (error); +} + +void +zfs_zevent_fd_rele(int fd) +{ + releasef(fd); } /* - * Utility function to write all of the pending ereports to the dump device. - * This function is called at either normal reboot or panic time, and simply - * iterates over the in-transit messages in the ereport sysevent channel. + * Get the next zevent in the stream and place a copy in 'event'. This + * may fail with ENOMEM if the encoded nvlist size exceeds the passed + * 'event_size'. In this case the stream pointer is not advanced and + * and 'event_size' is set to the minimum required buffer size. */ -void -fm_ereport_dump(void) +int +zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size, + uint64_t *dropped) { - evchanq_t *chq; - sysevent_t *sep; - erpt_dump_t ed; - - timespec_t tod; - hrtime_t now; - char *buf; - size_t len; - - if (panicstr) { - tod = panic_hrestime; - now = panic_hrtime; + zevent_t *ev; + size_t size; + int error = 0; + + mutex_enter(&zevent_lock); + if (ze->ze_zevent == NULL) { + /* New stream start at the beginning/tail */ + ev = list_tail(&zevent_list); + if (ev == NULL) { + error = ENOENT; + goto out; + } } else { - if (ereport_errorq != NULL) - errorq_drain(ereport_errorq); - gethrestime(&tod); - now = gethrtime_waitfree(); + /* Existing stream continue with the next element and remove + * ourselves from the wait queue for the previous element */ + ev = list_prev(&zevent_list, ze->ze_zevent); + if (ev == NULL) { + error = ENOENT; + goto out; + } } - /* - * In the panic case, sysevent_evc_walk_init() will return NULL. - */ - if ((chq = sysevent_evc_walk_init(ereport_chan, NULL)) == NULL && - !panicstr) - return; /* event channel isn't initialized yet */ + VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0); + if (size > *event_size) { + *event_size = size; + error = ENOMEM; + goto out; + } - while ((sep = sysevent_evc_walk_step(chq)) != NULL) { - if ((buf = sysevent_evc_event_attr(sep, &len)) == NULL) - break; + if (ze->ze_zevent) + list_remove(&ze->ze_zevent->ev_ze_list, ze); + + ze->ze_zevent = ev; + list_insert_head(&ev->ev_ze_list, ze); + nvlist_dup(ev->ev_nvl, event, KM_SLEEP); + *dropped = ze->ze_dropped; + ze->ze_dropped = 0; +out: + mutex_exit(&zevent_lock); + + return error; +} - ed.ed_magic = ERPT_MAGIC; - ed.ed_chksum = checksum32(buf, len); - ed.ed_size = (uint32_t)len; - ed.ed_pad = 0; - ed.ed_hrt_nsec = SE_TIME(sep); - ed.ed_hrt_base = now; - ed.ed_tod_base.sec = tod.tv_sec; - ed.ed_tod_base.nsec = tod.tv_nsec; - - dumpvp_write(&ed, sizeof (ed)); - dumpvp_write(buf, len); +int +zfs_zevent_wait(zfs_zevent_t *ze) +{ + int error = 0; + + mutex_enter(&zevent_lock); + + if (zevent_flags & ZEVENT_SHUTDOWN) { + error = ESHUTDOWN; + goto out; } - sysevent_evc_walk_fini(chq); + zevent_waiters++; + cv_wait_interruptible(&zevent_cv, &zevent_lock); + if (issig(JUSTLOOKING)) + error = EINTR; + + zevent_waiters--; +out: + mutex_exit(&zevent_lock); + + return error; } -/* - * Post an error report (ereport) to the sysevent error channel. The error - * channel must be established with a prior call to sysevent_evc_create() - * before publication may occur. - */ void -fm_ereport_post(nvlist_t *ereport, int evc_flag) +zfs_zevent_init(zfs_zevent_t **zep) { - size_t nvl_size = 0; - evchan_t *error_chan; + zfs_zevent_t *ze; - (void) nvlist_size(ereport, &nvl_size, NV_ENCODE_NATIVE); - if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) { - atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1); - return; - } + ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP); + list_link_init(&ze->ze_node); +} - if (sysevent_evc_bind(FM_ERROR_CHAN, &error_chan, - EVCH_CREAT|EVCH_HOLD_PEND) != 0) { - atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1); - return; - } +void +zfs_zevent_destroy(zfs_zevent_t *ze) +{ + mutex_enter(&zevent_lock); + if (ze->ze_zevent) + list_remove(&ze->ze_zevent->ev_ze_list, ze); + mutex_exit(&zevent_lock); - if (sysevent_evc_publish(error_chan, EC_FM, ESC_FM_ERROR, - SUNW_VENDOR, FM_PUB, ereport, evc_flag) != 0) { - atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1); - sysevent_evc_unbind(error_chan); - return; - } - sysevent_evc_unbind(error_chan); + kmem_free(ze, sizeof (zfs_zevent_t)); } +#endif /* _KERNEL */ /* * Wrapppers for FM nvlist allocators @@ -525,7 +674,7 @@ fm_ereport_post(nvlist_t *ereport, int evc_flag) static void * i_fm_alloc(nv_alloc_t *nva, size_t size) { - return (kmem_zalloc(size, KM_SLEEP)); + return (kmem_zalloc(size, KM_PUSHPAGE)); } /* ARGSUSED */ @@ -593,7 +742,7 @@ fm_nvlist_create(nv_alloc_t *nva) nv_alloc_t *nvhdl; if (nva == NULL) { - nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP); + nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_PUSHPAGE); if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) { kmem_free(nvhdl, sizeof (nv_alloc_t)); @@ -606,8 +755,8 @@ fm_nvlist_create(nv_alloc_t *nva) if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) { if (hdl_alloced) { - kmem_free(nvhdl, sizeof (nv_alloc_t)); nv_alloc_fini(nvhdl); + kmem_free(nvhdl, sizeof (nv_alloc_t)); } return (NULL); } @@ -788,6 +937,14 @@ fm_payload_set(nvlist_t *payload, ...) * detector nvlist_t * ereport-payload nvlist_t * + * We don't actually add a 'version' member to the payload. Really, + * the version quoted to us by our caller is that of the category 1 + * "ereport" event class (and we require FM_EREPORT_VERS0) but + * the payload version of the actual leaf class event under construction + * may be something else. Callers should supply a version in the varargs, + * or (better) we could take two version arguments - one for the + * ereport category 1 classification (expect FM_EREPORT_VERS0) and one + * for the leaf class. */ void fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class, @@ -912,6 +1069,105 @@ fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth, } } +void +fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth, + nvlist_t *snvl, nvlist_t *bboard, int npairs, ...) +{ + nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri); + nvlist_t *pairs[HC_MAXPAIRS]; + nvlist_t **hcl; + uint_t n; + int i, j; + va_list ap; + char *hcname, *hcid; + + if (!fm_fmri_hc_set_common(fmri, version, auth)) + return; + + /* + * copy the bboard nvpairs to the pairs array + */ + if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n) + != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + for (i = 0; i < n; i++) { + if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME, + &hcname) != 0) { + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) { + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + pairs[i] = fm_nvlist_create(nva); + if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 || + nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) { + for (j = 0; j <= i; j++) { + if (pairs[j] != NULL) + fm_nvlist_destroy(pairs[j], + FM_NVA_RETAIN); + } + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + } + + /* + * create the pairs from passed in pairs + */ + npairs = MIN(npairs, HC_MAXPAIRS); + + va_start(ap, npairs); + for (i = n; i < npairs + n; i++) { + const char *name = va_arg(ap, const char *); + uint32_t id = va_arg(ap, uint32_t); + char idstr[11]; + (void) snprintf(idstr, sizeof (idstr), "%u", id); + pairs[i] = fm_nvlist_create(nva); + if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 || + nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) { + for (j = 0; j <= i; j++) { + if (pairs[j] != NULL) + fm_nvlist_destroy(pairs[j], + FM_NVA_RETAIN); + } + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + } + va_end(ap); + + /* + * Create the fmri hc list + */ + if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs, + npairs + n) != 0) { + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + + for (i = 0; i < npairs + n; i++) { + fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN); + } + + if (snvl != NULL) { + if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) { + atomic_add_64( + &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + return; + } + } +} + /* * Set-up and validate the members of an dev fmri according to: * @@ -920,46 +1176,41 @@ fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth, * version uint8_t 0 * auth nvlist_t * devpath string - * devid string + * [devid] string + * [target-port-l0id] string * * Note that auth and devid are optional members. */ void fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth, - const char *devpath, const char *devid) + const char *devpath, const char *devid, const char *tpl0) { - if (version != DEV_SCHEME_VERSION0) { - atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); - return; - } + int err = 0; - if (nvlist_add_uint8(fmri_dev, FM_VERSION, version) != 0) { + if (version != DEV_SCHEME_VERSION0) { atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); return; } - if (nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, - FM_FMRI_SCHEME_DEV) != 0) { - atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); - return; - } + err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version); + err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV); if (auth != NULL) { - if (nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY, - (nvlist_t *)auth) != 0) { - atomic_add_64( - &erpt_kstat_data.fmri_set_failed.value.ui64, 1); - } + err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY, + (nvlist_t *)auth); } - if (nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath) != 0) { - atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); - } + err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath); if (devid != NULL) - if (nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid) != 0) - atomic_add_64( - &erpt_kstat_data.fmri_set_failed.value.ui64, 1); + err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid); + + if (tpl0 != NULL) + err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0); + + if (err) + atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1); + } /* @@ -1146,7 +1397,7 @@ fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format) ena = (uint64_t)((format & ENA_FORMAT_MASK) | ((cpuid << ENA_FMT1_CPUID_SHFT) & ENA_FMT1_CPUID_MASK) | - ((gethrtime_waitfree() << ENA_FMT1_TIME_SHFT) & + ((gethrtime() << ENA_FMT1_TIME_SHFT) & ENA_FMT1_TIME_MASK)); } break; @@ -1164,7 +1415,13 @@ fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format) uint64_t fm_ena_generate(uint64_t timestamp, uchar_t format) { - return (fm_ena_generate_cpu(timestamp, CPU->cpu_id, format)); + uint64_t ena; + + kpreempt_disable(); + ena = fm_ena_generate_cpu(timestamp, getcpuid(), format); + kpreempt_enable(); + + return (ena); } uint64_t @@ -1232,35 +1489,68 @@ fm_ena_time_get(uint64_t ena) return (time); } -/* - * Convert a getpcstack() trace to symbolic name+offset, and add the resulting - * string array to a Fault Management ereport as FM_EREPORT_PAYLOAD_NAME_STACK. - */ +#ifdef _KERNEL void -fm_payload_stack_add(nvlist_t *payload, const pc_t *stack, int depth) +fm_init(void) { - int i; - char *sym; - ulong_t off; - char *stkpp[FM_STK_DEPTH]; - char buf[FM_STK_DEPTH * FM_SYM_SZ]; - char *stkp = buf; - - for (i = 0; i < depth && i != FM_STK_DEPTH; i++, stkp += FM_SYM_SZ) { - if ((sym = kobj_getsymname(stack[i], &off)) != NULL) - (void) snprintf(stkp, FM_SYM_SZ, "%s+%lx", sym, off); - else - (void) snprintf(stkp, FM_SYM_SZ, "%lx", (long)stack[i]); - stkpp[i] = stkp; + zevent_len_cur = 0; + zevent_flags = 0; + + if (zfs_zevent_len_max == 0) + zfs_zevent_len_max = ERPT_MAX_ERRS * MAX(max_ncpus, 4); + + /* Initialize zevent allocation and generation kstats */ + fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED, + sizeof (struct erpt_kstat) / sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL); + + if (fm_ksp != NULL) { + fm_ksp->ks_data = &erpt_kstat_data; + kstat_install(fm_ksp); + } else { + cmn_err(CE_NOTE, "failed to create fm/misc kstat\n"); } - fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_STACK, - DATA_TYPE_STRING_ARRAY, depth, stkpp, NULL); + mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&zevent_list, sizeof(zevent_t), offsetof(zevent_t, ev_node)); + cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL); } void -print_msg_hwerr(ctid_t ct_id, proc_t *p) +fm_fini(void) { - uprintf("Killed process %d (%s) in contract id %d " - "due to hardware error\n", p->p_pid, p->p_user.u_comm, ct_id); + int count; + + zfs_zevent_drain_all(&count); + + mutex_enter(&zevent_lock); + cv_broadcast(&zevent_cv); + + zevent_flags |= ZEVENT_SHUTDOWN; + while (zevent_waiters > 0) { + mutex_exit(&zevent_lock); + schedule(); + mutex_enter(&zevent_lock); + } + mutex_exit(&zevent_lock); + + cv_destroy(&zevent_cv); + list_destroy(&zevent_list); + mutex_destroy(&zevent_lock); + + if (fm_ksp != NULL) { + kstat_delete(fm_ksp); + fm_ksp = NULL; + } } + +module_param(zfs_zevent_len_max, int, 0644); +MODULE_PARM_DESC(zfs_zevent_len_max, "Max event queue length"); + +module_param(zfs_zevent_cols, int, 0644); +MODULE_PARM_DESC(zfs_zevent_cols, "Max event column width"); + +module_param(zfs_zevent_console, int, 0644); +MODULE_PARM_DESC(zfs_zevent_console, "Log events to the console"); + +#endif /* _KERNEL */