* Use is subject to license terms.
*/
-#pragma ident "@(#)zfs_fm.c 1.6 08/04/01 SMI"
-
#include <sys/spa.h>
#include <sys/spa_impl.h>
#include <sys/vdev.h>
* pool X
*
* If we are in a loading state, all errors are chained together by the same
- * SPA-wide ENA.
+ * SPA-wide ENA (Error Numeric Association).
*
* For isolated I/O requests, we get the ENA from the zio_t. The propagation
* gets very complicated due to RAID-Z, gang blocks, and vdev caching. We want
* We keep track of the ENA for a ZIO chain through the 'io_logical' member.
* When a new logical I/O is issued, we set this to point to itself. Child I/Os
* then inherit this pointer, so that when it is first set subsequent failures
- * will use the same ENA. If a physical I/O is issued (by passing the
- * ZIO_FLAG_NOBOOKMARK flag), then this pointer is reset, guaranteeing that a
- * unique ENA will be generated. For an aggregate I/O, this pointer is set to
- * NULL, and no ereport will be generated (since it doesn't actually correspond
- * to any particular device or piece of data).
+ * will use the same ENA. For vdev cache fill and queue aggregation I/O,
+ * this pointer is set to NULL, and no ereport will be generated (since it
+ * doesn't actually correspond to any particular device or piece of data,
+ * and the caller will always retry without caching or queueing anyway).
*/
void
zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
nvlist_t *ereport, *detector;
uint64_t ena;
char class[64];
+ int state;
/*
* If we are doing a spa_tryimport(), ignore errors.
spa->spa_last_open_failed)
return;
- /*
- * Ignore any errors from I/Os that we are going to retry anyway - we
- * only generate errors from the final failure. Checksum errors are
- * generated after the pipeline stage responsible for retrying the I/O
- * (VDEV_IO_ASSESS), so this only applies to standard I/O errors.
- */
- if (zio && zio_should_retry(zio) && zio->io_error != ECKSUM)
- return;
+ if (zio != NULL) {
+ /*
+ * If this is not a read or write zio, ignore the error. This
+ * can occur if the DKIOCFLUSHWRITECACHE ioctl fails.
+ */
+ if (zio->io_type != ZIO_TYPE_READ &&
+ zio->io_type != ZIO_TYPE_WRITE)
+ return;
- /*
- * If this is not a read or write zio, ignore the error. This can occur
- * if the DKIOCFLUSHWRITECACHE ioctl fails.
- */
- if (zio && zio->io_type != ZIO_TYPE_READ &&
- zio->io_type != ZIO_TYPE_WRITE)
- return;
+ /*
+ * Ignore any errors from speculative I/Os, as failure is an
+ * expected result.
+ */
+ if (zio->io_flags & ZIO_FLAG_SPECULATIVE)
+ return;
+
+ /*
+ * If the vdev has already been marked as failing due to a
+ * failed probe, then ignore any subsequent I/O errors, as the
+ * DE will automatically fault the vdev on the first such
+ * failure.
+ */
+ if (vd != NULL &&
+ (!vdev_readable(vd) || !vdev_writeable(vd)) &&
+ strcmp(subclass, FM_EREPORT_ZFS_PROBE_FAILURE) != 0)
+ return;
+ }
if ((ereport = fm_nvlist_create(NULL)) == NULL)
return;
*/
/*
+ * If we are importing a faulted pool, then we treat it like an open,
+ * not an import. Otherwise, the DE will ignore all faults during
+ * import, since the default behavior is to mark the devices as
+ * persistently unavailable, not leave them in the faulted state.
+ */
+ state = spa->spa_import_faulted ? SPA_LOAD_OPEN : spa->spa_load_state;
+
+ /*
* Generic payload members common to all ereports.
- *
- * The direct reference to spa_name is used rather than spa_name()
- * because of the asynchronous nature of the zio pipeline. spa_name()
- * asserts that the config lock is held in some form. This is always
- * the case in I/O context, but because the check for RW_WRITER compares
- * against 'curthread', we may be in an asynchronous context and blow
- * this assert. Rather than loosen this assert, we acknowledge that all
- * contexts in which this function is called (pool open, I/O) are safe,
- * and dereference the name directly.
*/
fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_POOL,
- DATA_TYPE_STRING, spa->spa_name, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID,
+ DATA_TYPE_STRING, spa_name(spa), FM_EREPORT_PAYLOAD_ZFS_POOL_GUID,
DATA_TYPE_UINT64, spa_guid(spa),
FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT, DATA_TYPE_INT32,
- spa->spa_load_state, NULL);
+ state, NULL);
+
+ if (spa != NULL) {
+ fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE,
+ DATA_TYPE_STRING,
+ spa_get_failmode(spa) == ZIO_FAILURE_MODE_WAIT ?
+ FM_EREPORT_FAILMODE_WAIT :
+ spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE ?
+ FM_EREPORT_FAILMODE_CONTINUE : FM_EREPORT_FAILMODE_PANIC,
+ NULL);
+ }
if (vd != NULL) {
vdev_t *pvd = vd->vdev_parent;
}
/*
- * The 'resource.fs.zfs.ok' event is an internal signal that the associated
- * resource (pool or disk) has been identified by ZFS as healthy. This will
- * then trigger the DE to close the associated case, if any.
- */
-void
-zfs_post_ok(spa_t *spa, vdev_t *vd)
-{
- zfs_post_common(spa, vd, FM_RESOURCE_OK);
-}
-
-/*
* The 'resource.fs.zfs.removed' event is an internal signal that the given vdev
* has been removed from the system. This will cause the DE to ignore any
* recent I/O errors, inferring that they are due to the asynchronous device