module/zfs/zfs_ctldir.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  *
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
  25  * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  26  * LLNL-CODE-403049.
  27  * Rewritten for Linux by:
  28  *   Rohan Puri <rohan.puri15@gmail.com>
  29  *   Brian Behlendorf <behlendorf1@llnl.gov>
  30  */
  31
  32 /*
  33  * ZFS control directory (a.k.a. ".zfs")
  34  *
  35  * This directory provides a common location for all ZFS meta-objects.
  36  * Currently, this is only the 'snapshot' and 'shares' directory, but this may
  37  * expand in the future.  The elements are built dynamically, as the hierarchy
  38  * does not actually exist on disk.
  39  *
  40  * For 'snapshot', we don't want to have all snapshots always mounted, because
  41  * this would take up a huge amount of space in /etc/mnttab.  We have three
  42  * types of objects:
  43  *
  44  *      ctldir ------> snapshotdir -------> snapshot
  45  *                                             |
  46  *                                             |
  47  *                                             V
  48  *                                         mounted fs
  49  *
  50  * The 'snapshot' node contains just enough information to lookup '..' and act
  51  * as a mountpoint for the snapshot.  Whenever we lookup a specific snapshot, we
  52  * perform an automount of the underlying filesystem and return the
  53  * corresponding inode.
  54  *
  55  * All mounts are handled automatically by an user mode helper which invokes
  56  * the mount mount procedure.  Unmounts are handled by allowing the mount
  57  * point to expire so the kernel may automatically unmount it.
  58  *
  59  * The '.zfs', '.zfs/snapshot', and all directories created under
  60  * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same
  61  * share the same zfs_sb_t as the head filesystem (what '.zfs' lives under).
  62  *
  63  * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths
  64  * (ie: snapshots) are complete ZFS filesystems and have their own unique
  65  * zfs_sb_t.  However, the fsid reported by these mounts will be the same
  66  * as that used by the parent zfs_sb_t to make NFS happy.
  67  */
  68
  69 #include <sys/types.h>
  70 #include <sys/param.h>
  71 #include <sys/time.h>
  72 #include <sys/systm.h>
  73 #include <sys/sysmacros.h>
  74 #include <sys/pathname.h>
  75 #include <sys/vfs.h>
  76 #include <sys/vfs_opreg.h>
  77 #include <sys/zfs_ctldir.h>
  78 #include <sys/zfs_ioctl.h>
  79 #include <sys/zfs_vfsops.h>
  80 #include <sys/zfs_vnops.h>
  81 #include <sys/stat.h>
  82 #include <sys/dmu.h>
  83 #include <sys/dsl_deleg.h>
  84 #include <sys/mount.h>
  85 #include <sys/zpl.h>
  86 #include "zfs_namecheck.h"
  87
  88 /*
  89  * Control Directory Tunables (.zfs)
  90  */
  91 int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT;
  92
  93 static zfs_snapentry_t *
  94 zfsctl_sep_alloc(void)
  95 {
  96         return kmem_zalloc(sizeof (zfs_snapentry_t), KM_SLEEP);
  97 }
  98
  99 void
 100 zfsctl_sep_free(zfs_snapentry_t *sep)
 101 {
 102         kmem_free(sep->se_name, MAXNAMELEN);
 103         kmem_free(sep->se_path, PATH_MAX);
 104         kmem_free(sep, sizeof (zfs_snapentry_t));
 105 }
 106
 107 /*
 108  * Attempt to expire an automounted snapshot, unmounts are attempted every
 109  * 'zfs_expire_snapshot' seconds until they succeed.  The work request is
 110  * responsible for rescheduling itself and freeing the zfs_expire_snapshot_t.
 111  */
 112 static void
 113 zfsctl_expire_snapshot(void *data)
 114 {
 115         zfs_snapentry_t *sep;
 116         zfs_sb_t *zsb;
 117         int error;
 118
 119         sep = spl_get_work_data(data, zfs_snapentry_t, se_work.work);
 120         zsb = ITOZSB(sep->se_inode);
 121
 122         error = zfsctl_unmount_snapshot(zsb, sep->se_name, MNT_EXPIRE);
 123         if (error == EBUSY)
 124                 schedule_delayed_work(&sep->se_work, zfs_expire_snapshot * HZ);
 125 }
 126
 127 int
 128 snapentry_compare(const void *a, const void *b)
 129 {
 130         const zfs_snapentry_t *sa = a;
 131         const zfs_snapentry_t *sb = b;
 132         int ret = strcmp(sa->se_name, sb->se_name);
 133
 134         if (ret < 0)
 135                 return (-1);
 136         else if (ret > 0)
 137                 return (1);
 138         else
 139                 return (0);
 140 }
 141
 142 boolean_t
 143 zfsctl_is_node(struct inode *ip)
 144 {
 145         return (ITOZ(ip)->z_is_ctldir);
 146 }
 147
 148 boolean_t
 149 zfsctl_is_snapdir(struct inode *ip)
 150 {
 151         return (zfsctl_is_node(ip) && (ip->i_ino <= ZFSCTL_INO_SNAPDIRS));
 152 }
 153
 154 /*
 155  * Allocate a new inode with the passed id and ops.
 156  */
 157 static struct inode *
 158 zfsctl_inode_alloc(zfs_sb_t *zsb, uint64_t id,
 159     const struct file_operations *fops, const struct inode_operations *ops)
 160 {
 161         struct timespec now = current_fs_time(zsb->z_sb);
 162         struct inode *ip;
 163         znode_t *zp;
 164
 165         ip = new_inode(zsb->z_sb);
 166         if (ip == NULL)
 167                 return (NULL);
 168
 169         zp = ITOZ(ip);
 170         ASSERT3P(zp->z_dirlocks, ==, NULL);
 171         ASSERT3P(zp->z_acl_cached, ==, NULL);
 172         ASSERT3P(zp->z_xattr_cached, ==, NULL);
 173         zp->z_id = id;
 174         zp->z_unlinked = 0;
 175         zp->z_atime_dirty = 0;
 176         zp->z_zn_prefetch = 0;
 177         zp->z_moved = 0;
 178         zp->z_sa_hdl = NULL;
 179         zp->z_blksz = 0;
 180         zp->z_seq = 0;
 181         zp->z_mapcnt = 0;
 182         zp->z_gen = 0;
 183         zp->z_size = 0;
 184         zp->z_atime[0] = 0;
 185         zp->z_atime[1] = 0;
 186         zp->z_links = 0;
 187         zp->z_pflags = 0;
 188         zp->z_uid = 0;
 189         zp->z_gid = 0;
 190         zp->z_mode = 0;
 191         zp->z_sync_cnt = 0;
 192         zp->z_is_zvol = B_FALSE;
 193         zp->z_is_mapped = B_FALSE;
 194         zp->z_is_ctldir = B_TRUE;
 195         zp->z_is_sa = B_FALSE;
 196         ip->i_ino = id;
 197         ip->i_mode = (S_IFDIR | S_IRUGO | S_IXUGO);
 198         ip->i_uid = 0;
 199         ip->i_gid = 0;
 200         ip->i_blkbits = SPA_MINBLOCKSHIFT;
 201         ip->i_atime = now;
 202         ip->i_mtime = now;
 203         ip->i_ctime = now;
 204         ip->i_fop = fops;
 205         ip->i_op = ops;
 206
 207         if (insert_inode_locked(ip)) {
 208                 unlock_new_inode(ip);
 209                 iput(ip);
 210                 return (NULL);
 211         }
 212
 213         mutex_enter(&zsb->z_znodes_lock);
 214         list_insert_tail(&zsb->z_all_znodes, zp);
 215         zsb->z_nr_znodes++;
 216         membar_producer();
 217         mutex_exit(&zsb->z_znodes_lock);
 218
 219         unlock_new_inode(ip);
 220
 221         return (ip);
 222 }
 223
 224 /*
 225  * Lookup the inode with given id, it will be allocated if needed.
 226  */
 227 static struct inode *
 228 zfsctl_inode_lookup(zfs_sb_t *zsb, uint64_t id,
 229     const struct file_operations *fops, const struct inode_operations *ops)
 230 {
 231         struct inode *ip = NULL;
 232
 233         while (ip == NULL) {
 234                 ip = ilookup(zsb->z_sb, (unsigned long)id);
 235                 if (ip)
 236                         break;
 237
 238                 /* May fail due to concurrent zfsctl_inode_alloc() */
 239                 ip = zfsctl_inode_alloc(zsb, id, fops, ops);
 240         }
 241
 242         return (ip);
 243 }
 244
 245 /*
 246  * Free zfsctl inode specific structures, currently there are none.
 247  */
 248 void
 249 zfsctl_inode_destroy(struct inode *ip)
 250 {
 251         return;
 252 }
 253
 254 /*
 255  * An inode is being evicted from the cache.
 256  */
 257 void
 258 zfsctl_inode_inactive(struct inode *ip)
 259 {
 260         if (zfsctl_is_snapdir(ip))
 261                 zfsctl_snapdir_inactive(ip);
 262 }
 263
 264 /*
 265  * Create the '.zfs' directory.  This directory is cached as part of the VFS
 266  * structure.  This results in a hold on the zfs_sb_t.  The code in zfs_umount()
 267  * therefore checks against a vfs_count of 2 instead of 1.  This reference
 268  * is removed when the ctldir is destroyed in the unmount.  All other entities
 269  * under the '.zfs' directory are created dynamically as needed.
 270  *
 271  * Because the dynamically created '.zfs' directory entries assume the use
 272  * of 64-bit inode numbers this support must be disabled on 32-bit systems.
 273  */
 274 int
 275 zfsctl_create(zfs_sb_t *zsb)
 276 {
 277 #if defined(CONFIG_64BIT)
 278         ASSERT(zsb->z_ctldir == NULL);
 279
 280         zsb->z_ctldir = zfsctl_inode_alloc(zsb, ZFSCTL_INO_ROOT,
 281             &zpl_fops_root, &zpl_ops_root);
 282         if (zsb->z_ctldir == NULL)
 283                 return (ENOENT);
 284
 285         return (0);
 286 #else
 287         return (EOPNOTSUPP);
 288 #endif /* CONFIG_64BIT */
 289 }
 290
 291 /*
 292  * Destroy the '.zfs' directory.  Only called when the filesystem is unmounted.
 293  */
 294 void
 295 zfsctl_destroy(zfs_sb_t *zsb)
 296 {
 297         iput(zsb->z_ctldir);
 298         zsb->z_ctldir = NULL;
 299 }
 300
 301 /*
 302  * Given a root znode, retrieve the associated .zfs directory.
 303  * Add a hold to the vnode and return it.
 304  */
 305 struct inode *
 306 zfsctl_root(znode_t *zp)
 307 {
 308         ASSERT(zfs_has_ctldir(zp));
 309         igrab(ZTOZSB(zp)->z_ctldir);
 310         return (ZTOZSB(zp)->z_ctldir);
 311 }
 312
 313 /*ARGSUSED*/
 314 int
 315 zfsctl_fid(struct inode *ip, fid_t *fidp)
 316 {
 317         znode_t         *zp = ITOZ(ip);
 318         zfs_sb_t        *zsb = ITOZSB(ip);
 319         uint64_t        object = zp->z_id;
 320         zfid_short_t    *zfid;
 321         int             i;
 322
 323         ZFS_ENTER(zsb);
 324
 325         if (fidp->fid_len < SHORT_FID_LEN) {
 326                 fidp->fid_len = SHORT_FID_LEN;
 327                 ZFS_EXIT(zsb);
 328                 return (ENOSPC);
 329         }
 330
 331         zfid = (zfid_short_t *)fidp;
 332
 333         zfid->zf_len = SHORT_FID_LEN;
 334
 335         for (i = 0; i < sizeof (zfid->zf_object); i++)
 336                 zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
 337
 338         /* .zfs znodes always have a generation number of 0 */
 339         for (i = 0; i < sizeof (zfid->zf_gen); i++)
 340                 zfid->zf_gen[i] = 0;
 341
 342         ZFS_EXIT(zsb);
 343         return (0);
 344 }
 345
 346 static int
 347 zfsctl_snapshot_zname(struct inode *ip, const char *name, int len, char *zname)
 348 {
 349         objset_t *os = ITOZSB(ip)->z_os;
 350
 351         if (snapshot_namecheck(name, NULL, NULL) != 0)
 352                 return (EILSEQ);
 353
 354         dmu_objset_name(os, zname);
 355         if ((strlen(zname) + 1 + strlen(name)) >= len)
 356                 return (ENAMETOOLONG);
 357
 358         (void) strcat(zname, "@");
 359         (void) strcat(zname, name);
 360
 361         return (0);
 362 }
 363
 364 static int
 365 zfsctl_snapshot_zpath(struct path *path, int len, char *zpath)
 366 {
 367         char *path_buffer, *path_ptr;
 368         int path_len, error = 0;
 369
 370         path_buffer = kmem_alloc(len, KM_SLEEP);
 371
 372         path_ptr = d_path(path, path_buffer, len);
 373         if (IS_ERR(path_ptr)) {
 374                 error = -PTR_ERR(path_ptr);
 375                 goto out;
 376         }
 377
 378         path_len = path_buffer + len - 1 - path_ptr;
 379         if (path_len > len) {
 380                 error = EFAULT;
 381                 goto out;
 382         }
 383
 384         memcpy(zpath, path_ptr, path_len);
 385         zpath[path_len] = '\0';
 386 out:
 387         kmem_free(path_buffer, len);
 388
 389         return (error);
 390 }
 391
 392 /*
 393  * Special case the handling of "..".
 394  */
 395 /* ARGSUSED */
 396 int
 397 zfsctl_root_lookup(struct inode *dip, char *name, struct inode **ipp,
 398     int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
 399 {
 400         zfs_sb_t *zsb = ITOZSB(dip);
 401         int error = 0;
 402
 403         ZFS_ENTER(zsb);
 404
 405         if (strcmp(name, "..") == 0) {
 406                 *ipp = dip->i_sb->s_root->d_inode;
 407         } else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) {
 408                 *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIR,
 409                     &zpl_fops_snapdir, &zpl_ops_snapdir);
 410         } else if (strcmp(name, ZFS_SHAREDIR_NAME) == 0) {
 411                 *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SHARES,
 412                     &zpl_fops_shares, &zpl_ops_shares);
 413         } else {
 414                 *ipp = NULL;
 415         }
 416
 417         if (*ipp == NULL)
 418                 error = ENOENT;
 419
 420         ZFS_EXIT(zsb);
 421
 422         return (error);
 423 }
 424
 425 /*
 426  * Lookup entry point for the 'snapshot' directory.  Try to open the
 427  * snapshot if it exist, creating the pseudo filesystem inode as necessary.
 428  * Perform a mount of the associated dataset on top of the inode.
 429  */
 430 /* ARGSUSED */
 431 int
 432 zfsctl_snapdir_lookup(struct inode *dip, char *name, struct inode **ipp,
 433     int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
 434 {
 435         zfs_sb_t *zsb = ITOZSB(dip);
 436         uint64_t id;
 437         int error;
 438
 439         ZFS_ENTER(zsb);
 440
 441         error = dmu_snapshot_id(zsb->z_os, name, &id);
 442         if (error) {
 443                 ZFS_EXIT(zsb);
 444                 return (error);
 445         }
 446
 447         *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIRS - id,
 448             &simple_dir_operations, &simple_dir_inode_operations);
 449         if (*ipp) {
 450 #ifdef HAVE_AUTOMOUNT
 451                 (*ipp)->i_flags |= S_AUTOMOUNT;
 452 #endif /* HAVE_AUTOMOUNT */
 453         } else {
 454                 error = ENOENT;
 455         }
 456
 457         ZFS_EXIT(zsb);
 458
 459         return (error);
 460 }
 461
 462 static void
 463 zfsctl_rename_snap(zfs_sb_t *zsb, zfs_snapentry_t *sep, const char *name)
 464 {
 465         avl_index_t where;
 466
 467         ASSERT(MUTEX_HELD(&zsb->z_ctldir_lock));
 468         ASSERT(sep != NULL);
 469
 470         /*
 471          * Change the name in the AVL tree.
 472          */
 473         avl_remove(&zsb->z_ctldir_snaps, sep);
 474         (void) strcpy(sep->se_name, name);
 475         VERIFY(avl_find(&zsb->z_ctldir_snaps, sep, &where) == NULL);
 476         avl_insert(&zsb->z_ctldir_snaps, sep, where);
 477 }
 478
 479 /*
 480  * Renaming a directory under '.zfs/snapshot' will automatically trigger
 481  * a rename of the snapshot to the new given name.  The rename is confined
 482  * to the '.zfs/snapshot' directory snapshots cannot be moved elsewhere.
 483  */
 484 /*ARGSUSED*/
 485 int
 486 zfsctl_snapdir_rename(struct inode *sdip, char *sname,
 487     struct inode *tdip, char *tname, cred_t *cr, int flags)
 488 {
 489         zfs_sb_t *zsb = ITOZSB(sdip);
 490         zfs_snapentry_t search, *sep;
 491         avl_index_t where;
 492         char *to, *from, *real;
 493         int error;
 494
 495         ZFS_ENTER(zsb);
 496
 497         to = kmem_alloc(MAXNAMELEN, KM_SLEEP);
 498         from = kmem_alloc(MAXNAMELEN, KM_SLEEP);
 499         real = kmem_alloc(MAXNAMELEN, KM_SLEEP);
 500
 501         if (zsb->z_case == ZFS_CASE_INSENSITIVE) {
 502                 error = dmu_snapshot_realname(zsb->z_os, sname, real,
 503                     MAXNAMELEN, NULL);
 504                 if (error == 0) {
 505                         sname = real;
 506                 } else if (error != ENOTSUP) {
 507                         goto out;
 508                 }
 509         }
 510
 511         error = zfsctl_snapshot_zname(sdip, sname, MAXNAMELEN, from);
 512         if (!error)
 513                 error = zfsctl_snapshot_zname(tdip, tname, MAXNAMELEN, to);
 514         if (!error)
 515                 error = zfs_secpolicy_rename_perms(from, to, cr);
 516         if (error)
 517                 goto out;
 518
 519         /*
 520          * Cannot move snapshots out of the snapdir.
 521          */
 522         if (sdip != tdip) {
 523                 error = EINVAL;
 524                 goto out;
 525         }
 526
 527         /*
 528          * No-op when names are identical.
 529          */
 530         if (strcmp(sname, tname) == 0) {
 531                 error = 0;
 532                 goto out;
 533         }
 534
 535         mutex_enter(&zsb->z_ctldir_lock);
 536
 537         error = dmu_objset_rename(from, to, B_FALSE);
 538         if (error)
 539                 goto out_unlock;
 540
 541         search.se_name = (char *)sname;
 542         sep = avl_find(&zsb->z_ctldir_snaps, &search, &where);
 543         if (sep)
 544                 zfsctl_rename_snap(zsb, sep, tname);
 545
 546 out_unlock:
 547         mutex_exit(&zsb->z_ctldir_lock);
 548 out:
 549         kmem_free(from, MAXNAMELEN);
 550         kmem_free(to, MAXNAMELEN);
 551         kmem_free(real, MAXNAMELEN);
 552
 553         ZFS_EXIT(zsb);
 554
 555         return (error);
 556 }
 557
 558 /*
 559  * Removing a directory under '.zfs/snapshot' will automatically trigger
 560  * the removal of the snapshot with the given name.
 561  */
 562 /* ARGSUSED */
 563 int
 564 zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags)
 565 {
 566         zfs_sb_t *zsb = ITOZSB(dip);
 567         char *snapname, *real;
 568         int error;
 569
 570         ZFS_ENTER(zsb);
 571
 572         snapname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
 573         real = kmem_alloc(MAXNAMELEN, KM_SLEEP);
 574
 575         if (zsb->z_case == ZFS_CASE_INSENSITIVE) {
 576                 error = dmu_snapshot_realname(zsb->z_os, name, real,
 577                     MAXNAMELEN, NULL);
 578                 if (error == 0) {
 579                         name = real;
 580                 } else if (error != ENOTSUP) {
 581                         goto out;
 582                 }
 583         }
 584
 585         error = zfsctl_snapshot_zname(dip, name, MAXNAMELEN, snapname);
 586         if (!error)
 587                 error = zfs_secpolicy_destroy_perms(snapname, cr);
 588         if (error)
 589                 goto out;
 590
 591         error = zfsctl_unmount_snapshot(zsb, name, MNT_FORCE);
 592         if ((error == 0) || (error == ENOENT))
 593                 error = dmu_objset_destroy(snapname, B_FALSE);
 594 out:
 595         kmem_free(snapname, MAXNAMELEN);
 596         kmem_free(real, MAXNAMELEN);
 597
 598         ZFS_EXIT(zsb);
 599
 600         return (error);
 601 }
 602
 603 /*
 604  * Creating a directory under '.zfs/snapshot' will automatically trigger
 605  * the creation of a new snapshot with the given name.
 606  */
 607 /* ARGSUSED */
 608 int
 609 zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
 610         struct inode **ipp, cred_t *cr, int flags)
 611 {
 612         zfs_sb_t *zsb = ITOZSB(dip);
 613         char *dsname;
 614         int error;
 615
 616         dsname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
 617
 618         if (snapshot_namecheck(dirname, NULL, NULL) != 0) {
 619                 error = EILSEQ;
 620                 goto out;
 621         }
 622
 623         dmu_objset_name(zsb->z_os, dsname);
 624
 625         error = zfs_secpolicy_snapshot_perms(dsname, cr);
 626         if (error)
 627                 goto out;
 628
 629         if (error == 0) {
 630                 error = dmu_objset_snapshot(dsname, dirname,
 631                     NULL, NULL, B_FALSE, B_FALSE, -1);
 632                 if (error)
 633                         goto out;
 634
 635                 error = zfsctl_snapdir_lookup(dip, dirname, ipp,
 636                     0, cr, NULL, NULL);
 637         }
 638 out:
 639         kmem_free(dsname, MAXNAMELEN);
 640
 641         return (error);
 642 }
 643
 644 /*
 645  * When a .zfs/snapshot/<snapshot> inode is evicted they must be removed
 646  * from the snapshot list.  This will normally happen as part of the auto
 647  * unmount, however in the case of a manual snapshot unmount this will be
 648  * the only notification we receive.
 649  */
 650 void
 651 zfsctl_snapdir_inactive(struct inode *ip)
 652 {
 653         zfs_sb_t *zsb = ITOZSB(ip);
 654         zfs_snapentry_t *sep, *next;
 655
 656         mutex_enter(&zsb->z_ctldir_lock);
 657
 658         sep = avl_first(&zsb->z_ctldir_snaps);
 659         while (sep != NULL) {
 660                 next = AVL_NEXT(&zsb->z_ctldir_snaps, sep);
 661
 662                 if (sep->se_inode == ip) {
 663                         avl_remove(&zsb->z_ctldir_snaps, sep);
 664                         cancel_delayed_work_sync(&sep->se_work);
 665                         zfsctl_sep_free(sep);
 666                         break;
 667                 }
 668                 sep = next;
 669         }
 670
 671         mutex_exit(&zsb->z_ctldir_lock);
 672 }
 673
 674 /*
 675  * Attempt to unmount a snapshot by making a call to user space.
 676  * There is no assurance that this can or will succeed, is just a
 677  * best effort.  In the case where it does fail, perhaps because
 678  * it's in use, the unmount will fail harmlessly.
 679  */
 680 #define SET_UNMOUNT_CMD \
 681         "exec 0</dev/null " \
 682         "     1>/dev/null " \
 683         "     2>/dev/null; " \
 684         "umount -t zfs -n '%s%s'"
 685
 686 static int
 687 __zfsctl_unmount_snapshot(zfs_snapentry_t *sep, int flags)
 688 {
 689         char *argv[] = { "/bin/sh", "-c", NULL, NULL };
 690         char *envp[] = { NULL };
 691         int error;
 692
 693         argv[2] = kmem_asprintf(SET_UNMOUNT_CMD,
 694             flags & MNT_FORCE ? "-f " : "", sep->se_path);
 695         error = call_usermodehelper(argv[0], argv, envp, 1);
 696         strfree(argv[2]);
 697
 698         /*
 699          * The umount system utility will return 256 on error.  We must
 700          * assume this error is because the file system is busy so it is
 701          * converted to the more sensible EBUSY.
 702          */
 703         if (error)
 704                 error = EBUSY;
 705
 706         /*
 707          * This was the result of a manual unmount, cancel the delayed work
 708          * to prevent zfsctl_expire_snapshot() from attempting a unmount.
 709          */
 710         if ((error == 0) && !(flags & MNT_EXPIRE))
 711                 cancel_delayed_work(&sep->se_work);
 712
 713         return (error);
 714 }
 715
 716 int
 717 zfsctl_unmount_snapshot(zfs_sb_t *zsb, char *name, int flags)
 718 {
 719         zfs_snapentry_t search;
 720         zfs_snapentry_t *sep;
 721         int error = 0;
 722
 723         mutex_enter(&zsb->z_ctldir_lock);
 724
 725         search.se_name = name;
 726         sep = avl_find(&zsb->z_ctldir_snaps, &search, NULL);
 727         if (sep) {
 728                 avl_remove(&zsb->z_ctldir_snaps, sep);
 729                 error = __zfsctl_unmount_snapshot(sep, flags);
 730                 if (error == EBUSY)
 731                         avl_add(&zsb->z_ctldir_snaps, sep);
 732                 else
 733                         zfsctl_sep_free(sep);
 734         } else {
 735                 error = ENOENT;
 736         }
 737
 738         mutex_exit(&zsb->z_ctldir_lock);
 739         ASSERT3S(error, >=, 0);
 740
 741         return (error);
 742 }
 743
 744 /*
 745  * Traverse all mounted snapshots and attempt to unmount them.  This
 746  * is best effort, on failure EEXIST is returned and count will be set
 747  * to the number of file snapshots which could not be unmounted.
 748  */
 749 int
 750 zfsctl_unmount_snapshots(zfs_sb_t *zsb, int flags, int *count)
 751 {
 752         zfs_snapentry_t *sep, *next;
 753         int error = 0;
 754
 755         *count = 0;
 756
 757         ASSERT(zsb->z_ctldir != NULL);
 758         mutex_enter(&zsb->z_ctldir_lock);
 759
 760         sep = avl_first(&zsb->z_ctldir_snaps);
 761         while (sep != NULL) {
 762                 next = AVL_NEXT(&zsb->z_ctldir_snaps, sep);
 763                 avl_remove(&zsb->z_ctldir_snaps, sep);
 764                 error = __zfsctl_unmount_snapshot(sep, flags);
 765                 if (error == EBUSY) {
 766                         avl_add(&zsb->z_ctldir_snaps, sep);
 767                         (*count)++;
 768                 } else {
 769                         zfsctl_sep_free(sep);
 770                 }
 771
 772                 sep = next;
 773         }
 774
 775         mutex_exit(&zsb->z_ctldir_lock);
 776
 777         return ((*count > 0) ? EEXIST : 0);
 778 }
 779
 780 #define SET_MOUNT_CMD \
 781         "exec 0</dev/null " \
 782         "     1>/dev/null " \
 783         "     2>/dev/null; " \
 784         "mount -t zfs -n '%s' '%s'"
 785
 786 int
 787 zfsctl_mount_snapshot(struct path *path, int flags)
 788 {
 789         struct dentry *dentry = path->dentry;
 790         struct inode *ip = dentry->d_inode;
 791         zfs_sb_t *zsb = ITOZSB(ip);
 792         char *full_name, *full_path;
 793         zfs_snapentry_t *sep;
 794         zfs_snapentry_t search;
 795         char *argv[] = { "/bin/sh", "-c", NULL, NULL };
 796         char *envp[] = { NULL };
 797         int error;
 798
 799         ZFS_ENTER(zsb);
 800
 801         full_name = kmem_zalloc(MAXNAMELEN, KM_SLEEP);
 802         full_path = kmem_zalloc(PATH_MAX, KM_SLEEP);
 803
 804         error = zfsctl_snapshot_zname(ip, dname(dentry), MAXNAMELEN, full_name);
 805         if (error)
 806                 goto error;
 807
 808         error = zfsctl_snapshot_zpath(path, PATH_MAX, full_path);
 809         if (error)
 810                 goto error;
 811
 812         /*
 813          * Attempt to mount the snapshot from user space.  Normally this
 814          * would be done using the vfs_kern_mount() function, however that
 815          * function is marked GPL-only and cannot be used.  On error we
 816          * careful to log the real error to the console and return EISDIR
 817          * to safely abort the automount.  This should be very rare.
 818          */
 819         argv[2] = kmem_asprintf(SET_MOUNT_CMD, full_name, full_path);
 820         error = call_usermodehelper(argv[0], argv, envp, 1);
 821         strfree(argv[2]);
 822         if (error) {
 823                 printk("ZFS: Unable to automount %s at %s: %d\n",
 824                     full_name, full_path, error);
 825                 error = EISDIR;
 826                 goto error;
 827         }
 828
 829         mutex_enter(&zsb->z_ctldir_lock);
 830
 831         /*
 832          * Ensure a previous entry does not exist, if it does safely remove
 833          * it any cancel the outstanding expiration.  This can occur when a
 834          * snapshot is manually unmounted and then an automount is triggered.
 835          */
 836         search.se_name = full_name;
 837         sep = avl_find(&zsb->z_ctldir_snaps, &search, NULL);
 838         if (sep) {
 839                 avl_remove(&zsb->z_ctldir_snaps, sep);
 840                 cancel_delayed_work_sync(&sep->se_work);
 841                 zfsctl_sep_free(sep);
 842         }
 843
 844         sep = zfsctl_sep_alloc();
 845         sep->se_name = full_name;
 846         sep->se_path = full_path;
 847         sep->se_inode = ip;
 848         avl_add(&zsb->z_ctldir_snaps, sep);
 849
 850         spl_init_delayed_work(&sep->se_work, zfsctl_expire_snapshot, sep);
 851         schedule_delayed_work(&sep->se_work, zfs_expire_snapshot * HZ);
 852
 853         mutex_exit(&zsb->z_ctldir_lock);
 854 error:
 855         if (error) {
 856                 kmem_free(full_name, MAXNAMELEN);
 857                 kmem_free(full_path, PATH_MAX);
 858         }
 859
 860         ZFS_EXIT(zsb);
 861
 862         return (error);
 863 }
 864
 865 /*
 866  * Check if this super block has a matching objset id.
 867  */
 868 static int
 869 zfsctl_test_super(struct super_block *sb, void *objsetidp)
 870 {
 871         zfs_sb_t *zsb = sb->s_fs_info;
 872         uint64_t objsetid = *(uint64_t *)objsetidp;
 873
 874         return (dmu_objset_id(zsb->z_os) == objsetid);
 875 }
 876
 877 /*
 878  * Prevent a new super block from being allocated if an existing one
 879  * could not be located.  We only want to preform a lookup operation.
 880  */
 881 static int
 882 zfsctl_set_super(struct super_block *sb, void *objsetidp)
 883 {
 884         return (-EEXIST);
 885 }
 886
 887 int
 888 zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid, zfs_sb_t **zsbp)
 889 {
 890         zfs_sb_t *zsb = sb->s_fs_info;
 891         struct super_block *sbp;
 892         zfs_snapentry_t *sep;
 893         uint64_t id;
 894         int error;
 895
 896         ASSERT(zsb->z_ctldir != NULL);
 897
 898         mutex_enter(&zsb->z_ctldir_lock);
 899
 900         /*
 901          * Verify that the snapshot is mounted.
 902          */
 903         sep = avl_first(&zsb->z_ctldir_snaps);
 904         while (sep != NULL) {
 905                 error = dmu_snapshot_id(zsb->z_os, sep->se_name, &id);
 906                 if (error)
 907                         goto out;
 908
 909                 if (id == objsetid)
 910                         break;
 911
 912                 sep = AVL_NEXT(&zsb->z_ctldir_snaps, sep);
 913         }
 914
 915         if (sep != NULL) {
 916                 /*
 917                  * Lookup the mounted root rather than the covered mount
 918                  * point.  This may fail if the snapshot has just been
 919                  * unmounted by an unrelated user space process.  This
 920                  * race cannot occur to an expired mount point because
 921                  * we hold the zsb->z_ctldir_lock to prevent the race.
 922                  */
 923                 sbp = zpl_sget(&zpl_fs_type, zfsctl_test_super,
 924                     zfsctl_set_super, 0, &id);
 925                 if (IS_ERR(sbp)) {
 926                         error = -PTR_ERR(sbp);
 927                 } else {
 928                         *zsbp = sbp->s_fs_info;
 929                         deactivate_super(sbp);
 930                 }
 931         } else {
 932                 error = EINVAL;
 933         }
 934 out:
 935         mutex_exit(&zsb->z_ctldir_lock);
 936         ASSERT3S(error, >=, 0);
 937
 938         return (error);
 939 }
 940
 941 /* ARGSUSED */
 942 int
 943 zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp,
 944     int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
 945 {
 946         zfs_sb_t *zsb = ITOZSB(dip);
 947         struct inode *ip;
 948         znode_t *dzp;
 949         int error;
 950
 951         ZFS_ENTER(zsb);
 952
 953         if (zsb->z_shares_dir == 0) {
 954                 ZFS_EXIT(zsb);
 955                 return (ENOTSUP);
 956         }
 957
 958         error = zfs_zget(zsb, zsb->z_shares_dir, &dzp);
 959         if (error) {
 960                 ZFS_EXIT(zsb);
 961                 return (error);
 962         }
 963
 964         error = zfs_lookup(ZTOI(dzp), name, &ip, 0, cr, NULL, NULL);
 965
 966         iput(ZTOI(dzp));
 967         ZFS_EXIT(zsb);
 968
 969         return (error);
 970 }
 971
 972
 973 /*
 974  * Initialize the various pieces we'll need to create and manipulate .zfs
 975  * directories.  Currently this is unused but available.
 976  */
 977 void
 978 zfsctl_init(void)
 979 {
 980 }
 981
 982 /*
 983  * Cleanup the various pieces we needed for .zfs directories.  In particular
 984  * ensure the expiry timer is canceled safely.
 985  */
 986 void
 987 zfsctl_fini(void)
 988 {
 989 }
 990
 991 module_param(zfs_expire_snapshot, int, 0644);
 992 MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot");