{
objset_t *os;
uint64_t volsize;
+ int locked = 0;
int error;
uint64_t ro;
+ /*
+ * In all other cases the spa_namespace_lock is taken before the
+ * bdev->bd_mutex lock. But in this case the Linux __blkdev_get()
+ * function calls fops->open() with the bdev->bd_mutex lock held.
+ *
+ * To avoid a potential lock inversion deadlock we preemptively
+ * try to take the spa_namespace_lock(). Normally it will not
+ * be contended and this is safe because spa_open_common() handles
+ * the case where the caller already holds the spa_namespace_lock.
+ *
+ * When it is contended we risk a lock inversion if we were to
+ * block waiting for the lock. Luckily, the __blkdev_get()
+ * function allows us to return -ERESTARTSYS which will result in
+ * bdev->bd_mutex being dropped, reacquired, and fops->open() being
+ * called again. This process can be repeated safely until both
+ * locks are acquired.
+ */
+ if (!mutex_owned(&spa_namespace_lock)) {
+ locked = mutex_tryenter(&spa_namespace_lock);
+ if (!locked)
+ return (-ERESTARTSYS);
+ }
+
/* lie and say we're read-only */
error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zvol_tag, &os);
if (error)
- return (-error);
+ goto out_mutex;
error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
if (error) {
- dmu_objset_disown(os, zvol_tag);
- return (-error);
+ dmu_objset_disown(os, zvol_tag);
+ goto out_mutex;
}
zv->zv_objset = os;
error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf);
if (error) {
- dmu_objset_disown(os, zvol_tag);
- return (-error);
+ dmu_objset_disown(os, zvol_tag);
+ goto out_mutex;
}
set_capacity(zv->zv_disk, volsize >> 9);
VERIFY(dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL) == 0);
if (ro || dmu_objset_is_snapshot(os)) {
- set_disk_ro(zv->zv_disk, 1);
- zv->zv_flags |= ZVOL_RDONLY;
+ set_disk_ro(zv->zv_disk, 1);
+ zv->zv_flags |= ZVOL_RDONLY;
} else {
- set_disk_ro(zv->zv_disk, 0);
- zv->zv_flags &= ~ZVOL_RDONLY;
+ set_disk_ro(zv->zv_disk, 0);
+ zv->zv_flags &= ~ZVOL_RDONLY;
}
+out_mutex:
+ if (locked)
+ mutex_exit(&spa_namespace_lock);
+
return (-error);
}