* Callback vectors for replaying records.
* Only TX_WRITE is needed for zvol.
*/
-zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = {
- (zil_replay_func_t *)zvol_replay_err, /* no such transaction type */
- (zil_replay_func_t *)zvol_replay_err, /* TX_CREATE */
- (zil_replay_func_t *)zvol_replay_err, /* TX_MKDIR */
- (zil_replay_func_t *)zvol_replay_err, /* TX_MKXATTR */
- (zil_replay_func_t *)zvol_replay_err, /* TX_SYMLINK */
- (zil_replay_func_t *)zvol_replay_err, /* TX_REMOVE */
- (zil_replay_func_t *)zvol_replay_err, /* TX_RMDIR */
- (zil_replay_func_t *)zvol_replay_err, /* TX_LINK */
- (zil_replay_func_t *)zvol_replay_err, /* TX_RENAME */
- (zil_replay_func_t *)zvol_replay_write, /* TX_WRITE */
- (zil_replay_func_t *)zvol_replay_err, /* TX_TRUNCATE */
- (zil_replay_func_t *)zvol_replay_err, /* TX_SETATTR */
- (zil_replay_func_t *)zvol_replay_err, /* TX_ACL */
+zil_replay_func_t zvol_replay_vector[TX_MAX_TYPE] = {
+ (zil_replay_func_t)zvol_replay_err, /* no such transaction type */
+ (zil_replay_func_t)zvol_replay_err, /* TX_CREATE */
+ (zil_replay_func_t)zvol_replay_err, /* TX_MKDIR */
+ (zil_replay_func_t)zvol_replay_err, /* TX_MKXATTR */
+ (zil_replay_func_t)zvol_replay_err, /* TX_SYMLINK */
+ (zil_replay_func_t)zvol_replay_err, /* TX_REMOVE */
+ (zil_replay_func_t)zvol_replay_err, /* TX_RMDIR */
+ (zil_replay_func_t)zvol_replay_err, /* TX_LINK */
+ (zil_replay_func_t)zvol_replay_err, /* TX_RENAME */
+ (zil_replay_func_t)zvol_replay_write, /* TX_WRITE */
+ (zil_replay_func_t)zvol_replay_err, /* TX_TRUNCATE */
+ (zil_replay_func_t)zvol_replay_err, /* TX_SETATTR */
+ (zil_replay_func_t)zvol_replay_err, /* TX_ACL */
};
/*
{
objset_t *os;
uint64_t volsize;
+ int locked = 0;
int error;
uint64_t ro;
+ /*
+ * In all other cases the spa_namespace_lock is taken before the
+ * bdev->bd_mutex lock. But in this case the Linux __blkdev_get()
+ * function calls fops->open() with the bdev->bd_mutex lock held.
+ *
+ * To avoid a potential lock inversion deadlock we preemptively
+ * try to take the spa_namespace_lock(). Normally it will not
+ * be contended and this is safe because spa_open_common() handles
+ * the case where the caller already holds the spa_namespace_lock.
+ *
+ * When it is contended we risk a lock inversion if we were to
+ * block waiting for the lock. Luckily, the __blkdev_get()
+ * function allows us to return -ERESTARTSYS which will result in
+ * bdev->bd_mutex being dropped, reacquired, and fops->open() being
+ * called again. This process can be repeated safely until both
+ * locks are acquired.
+ */
+ if (!mutex_owned(&spa_namespace_lock)) {
+ locked = mutex_tryenter(&spa_namespace_lock);
+ if (!locked)
+ return (-ERESTARTSYS);
+ }
+
/* lie and say we're read-only */
error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zvol_tag, &os);
if (error)
- return (-error);
+ goto out_mutex;
error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
if (error) {
- dmu_objset_disown(os, zvol_tag);
- return (-error);
+ dmu_objset_disown(os, zvol_tag);
+ goto out_mutex;
}
zv->zv_objset = os;
error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf);
if (error) {
- dmu_objset_disown(os, zvol_tag);
- return (-error);
+ dmu_objset_disown(os, zvol_tag);
+ goto out_mutex;
}
set_capacity(zv->zv_disk, volsize >> 9);
VERIFY(dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL) == 0);
if (ro || dmu_objset_is_snapshot(os)) {
- set_disk_ro(zv->zv_disk, 1);
- zv->zv_flags |= ZVOL_RDONLY;
+ set_disk_ro(zv->zv_disk, 1);
+ zv->zv_flags |= ZVOL_RDONLY;
} else {
- set_disk_ro(zv->zv_disk, 0);
- zv->zv_flags &= ~ZVOL_RDONLY;
+ set_disk_ro(zv->zv_disk, 0);
+ zv->zv_flags &= ~ZVOL_RDONLY;
}
+out_mutex:
+ if (locked)
+ mutex_exit(&spa_namespace_lock);
+
return (-error);
}