zfs/lib/libudmu/udmu.c

   1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
   2  * vim:expandtab:shiftwidth=8:tabstop=8:
   3  *
   4  *  lustre/dmu/udmu.c
   5  *  Module that interacts with the ZFS DMU and provides an abstraction
   6  *  to the rest of Lustre.
   7  *
   8  *  Copyright (c) 2007 Cluster File Systems, Inc.
   9  *   Author: Alex Tomas <alex@clusterfs.com>
  10  *   Author: Atul Vidwansa <atul.vidwansa@sun.com>
  11  *   Author: Manoj Joseph <manoj.joseph@sun.com>
  12  *   Author: Mike Pershin <tappro@sun.com>
  13  *
  14  *   This file is part of the Lustre file system, http://www.lustre.org
  15  *   Lustre is a trademark of Cluster File Systems, Inc.
  16  *
  17  *   You may have signed or agreed to another license before downloading
  18  *   this software.  If so, you are bound by the terms and conditions
  19  *   of that agreement, and the following does not apply to you.  See the
  20  *   LICENSE file included with this distribution for more information.
  21  *
  22  *   If you did not agree to a different license, then this copy of Lustre
  23  *   is open source software; you can redistribute it and/or modify it
  24  *   under the terms of version 2 of the GNU General Public License as
  25  *   published by the Free Software Foundation.
  26  *
  27  *   In either case, Lustre is distributed in the hope that it will be
  28  *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
  29  *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  30  *   license text for more details.
  31  */
  32
  33 #include <sys/dnode.h>
  34 #include <sys/dbuf.h>
  35 #include <sys/spa.h>
  36 #include <sys/stat.h>
  37 #include <sys/statvfs.h>
  38 #include <sys/zap.h>
  39 #include <sys/spa_impl.h>
  40 #include <sys/zfs_znode.h>
  41 #include <sys/dmu_tx.h>
  42 #include <sys/dmu_objset.h>
  43 #include <udmu.h>
  44 #include <sys/dbuf.h>
  45 #include <sys/dnode.h>
  46 #include <sys/dmu_ctl.h>
  47
  48 enum vtype iftovt_tab[] = {
  49         VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
  50         VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
  51 };
  52
  53 ushort_t vttoif_tab[] = {
  54         0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO,
  55         S_IFDOOR, 0, S_IFSOCK, S_IFPORT, 0
  56 };
  57
  58 #define MODEMASK        07777
  59
  60 #define IFTOVT(M)       (iftovt_tab[((M) & S_IFMT) >> 12])
  61 #define VTTOIF(T)       (vttoif_tab[(int)(T)])
  62 #define MAKEIMODE(T, M) (VTTOIF(T) | ((M) & ~S_IFMT))
  63
  64 /*
  65  * Debug levels. Default is LEVEL_CRITICAL.
  66  */
  67 #define LEVEL_CRITICAL  1
  68 #define LEVEL_INFO      2
  69 #define LEVEL_DEBUG     3
  70
  71 static int debug_level = LEVEL_CRITICAL;
  72
  73 #define CONFIG_DIR "/var/run/zfs/udmu"
  74
  75 static char configdir[MAXPATHLEN];
  76
  77 static void udmu_gethrestime(struct timespec *tp)
  78 {
  79         tp->tv_nsec = 0;
  80         time(&tp->tv_sec);
  81 }
  82
  83 static void udmu_printf(int level, FILE *stream, char *message, ...)
  84 {
  85         va_list args;
  86
  87         if (level <= debug_level) {
  88                 va_start(args, message);
  89                 (void) vfprintf(stream, message, args);
  90                 va_end(args);
  91         }
  92 }
  93
  94 void udmu_debug(int level)
  95 {
  96         debug_level = level;
  97 }
  98
  99 void udmu_init()
 100 {
 101         char cmd[MAXPATHLEN];
 102         struct rlimit rl = { 1024, 1024 };
 103         int rc;
 104
 105         /*
 106          * Set spa_config_dir to /var/run/zfs/udmu/$pid.
 107          */
 108         snprintf(configdir, MAXPATHLEN, "%s/%d", CONFIG_DIR, (int)getpid());
 109
 110         snprintf(cmd, MAXPATHLEN, "mkdir -p %s", configdir);
 111         system(cmd);
 112
 113         spa_config_dir = configdir;
 114
 115         (void) setvbuf(stdout, NULL, _IOLBF, 0);
 116         (void) setrlimit(RLIMIT_NOFILE, &rl);
 117
 118         /* Initialize the emulation of kernel services in userland. */
 119         kernel_init(FREAD | FWRITE);
 120
 121         rc = dctl_server_init(configdir, 2, 2);
 122         if (rc != 0)
 123                 fprintf(stderr, "Error calling dctl_server_init(): %i\n"
 124                     "lzpool and lzfs will not be functional!\n", rc);
 125 }
 126
 127 void udmu_fini()
 128 {
 129         int rc;
 130
 131         rc = dctl_server_fini();
 132         if (rc != 0)
 133                 fprintf(stderr, "Error calling dctl_server_fini(): %i!\n", rc);
 134
 135         kernel_fini();
 136 }
 137
 138 int udmu_objset_open(char *osname, char *import_dir, int import, int force,
 139                      udmu_objset_t *uos)
 140 {
 141         int error;
 142         char cmd[MAXPATHLEN];
 143         char *c;
 144         uint64_t version = ZPL_VERSION;
 145         int tried_import = FALSE;
 146
 147         memset(uos, 0, sizeof(udmu_objset_t));
 148
 149         c = strchr(osname, '/');
 150
 151 top:
 152         /* Let's try to open the objset */
 153         error = dmu_objset_open(osname, DMU_OST_ZFS, DS_MODE_STANDARD,
 154                                 &uos->os);
 155
 156         if (error == ENOENT && import && !tried_import) {
 157                 /* objset not found, let's try to import the pool */
 158                 udmu_printf(LEVEL_INFO, stdout, "Importing pool %s\n", osname);
 159
 160                 if (c != NULL)
 161                         *c = '\0';
 162
 163                 snprintf(cmd, sizeof(cmd), "lzpool import%s%s%s %s",
 164                     force ? " -F" : "", import_dir ? " -d " : "",
 165                     import_dir ? import_dir : "", osname);
 166
 167                 if (c != NULL)
 168                         *c = '/';
 169
 170                 error = system(cmd);
 171
 172                 if (error) {
 173                         udmu_printf(LEVEL_CRITICAL, stderr, "\"%s\" failed:"
 174                             " %d\n", cmd, error);
 175                         return(error);
 176                 }
 177
 178                 tried_import = TRUE;
 179                 goto top;
 180         }
 181
 182         if (error) {
 183                 uos->os = NULL;
 184                 goto out;
 185         }
 186
 187         /* Check ZFS version */
 188         error = zap_lookup(uos->os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1,
 189                            &version);
 190         if (error) {
 191                 udmu_printf(LEVEL_CRITICAL, stderr,
 192                             "Error looking up ZPL VERSION");
 193                 /*
 194                  * We can't return ENOENT because that would mean the objset
 195                  * didn't exist.
 196                  */
 197                 error = EIO;
 198                 goto out;
 199         } else if (version != LUSTRE_ZPL_VERSION) {
 200                 udmu_printf(LEVEL_CRITICAL, stderr,
 201                             "Mismatched versions:  File system "
 202                             "is version %lld on-disk format, which is "
 203                             "incompatible with this software version %lld!",
 204                             (u_longlong_t)version, LUSTRE_ZPL_VERSION);
 205                 error = ENOTSUP;
 206                 goto out;
 207         }
 208
 209         error = zap_lookup(uos->os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ,
 210                            8, 1, &uos->root);
 211         if (error) {
 212                 udmu_printf(LEVEL_CRITICAL, stderr,
 213                             "Error looking up ZFS root object.");
 214                 error = EIO;
 215                 goto out;
 216         }
 217         ASSERT(uos->root != 0);
 218
 219 out:
 220         if (error) {
 221                 if (uos->os == NULL && tried_import) {
 222                         if (c != NULL)
 223                                 *c = '\0';
 224                         spa_export(osname, NULL);
 225                         if (c != NULL)
 226                                 *c = '/';
 227                 } else if(uos->os != NULL)
 228                         udmu_objset_close(uos, tried_import);
 229         }
 230
 231         return (error);
 232 }
 233
 234 void udmu_wait_synced(udmu_objset_t *uos, dmu_tx_t *tx)
 235 {
 236         /* Wait for the pool to be synced */
 237         txg_wait_synced(dmu_objset_pool(uos->os),
 238                         tx ? tx->tx_txg : 0ULL);
 239 }
 240
 241 void udmu_objset_close(udmu_objset_t *uos, int export_pool)
 242 {
 243         spa_t *spa;
 244         char pool_name[MAXPATHLEN];
 245
 246         ASSERT(uos->os != NULL);
 247         spa = uos->os->os->os_spa;
 248
 249         spa_config_enter(spa, RW_READER, FTAG);
 250         strncpy(pool_name, spa_name(spa), sizeof(pool_name));
 251         spa_config_exit(spa, FTAG);
 252
 253         udmu_wait_synced(uos, NULL);
 254         /* close the object set */
 255         dmu_objset_close(uos->os);
 256
 257         uos->os = NULL;
 258
 259         if (export_pool)
 260                 spa_export(pool_name, NULL);
 261 }
 262
 263 int udmu_objset_statvfs(udmu_objset_t *uos, struct statvfs64 *statp)
 264 {
 265         uint64_t refdbytes, availbytes, usedobjs, availobjs;
 266
 267         dmu_objset_space(uos->os, &refdbytes, &availbytes, &usedobjs,
 268                          &availobjs);
 269
 270         /*
 271          * The underlying storage pool actually uses multiple block sizes.
 272          * We report the fragsize as the smallest block size we support,
 273          * and we report our blocksize as the filesystem's maximum blocksize.
 274          */
 275         statp->f_frsize = 1ULL << SPA_MINBLOCKSHIFT;
 276         statp->f_bsize = 1ULL << SPA_MAXBLOCKSHIFT;
 277
 278         /*
 279          * The following report "total" blocks of various kinds in the
 280          * file system, but reported in terms of f_frsize - the
 281          * "fragment" size.
 282          */
 283
 284         statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT;
 285         statp->f_bfree = availbytes >> SPA_MINBLOCKSHIFT;
 286         statp->f_bavail = statp->f_bfree; /* no root reservation */
 287
 288         /*
 289          * statvfs() should really be called statufs(), because it assumes
 290          * static metadata.  ZFS doesn't preallocate files, so the best
 291          * we can do is report the max that could possibly fit in f_files,
 292          * and that minus the number actually used in f_ffree.
 293          * For f_ffree, report the smaller of the number of object available
 294          * and the number of blocks (each object will take at least a block).
 295          */
 296         statp->f_ffree = MIN(availobjs, statp->f_bfree);
 297         statp->f_favail = statp->f_ffree; /* no "root reservation" */
 298         statp->f_files = statp->f_ffree + usedobjs;
 299
 300         /* ZFSFUSE: not necessary? see 'man statfs' */
 301         /*(void) cmpldev(&d32, vfsp->vfs_dev);
 302         statp->f_fsid = d32;*/
 303
 304         /*
 305          * We're a zfs filesystem.
 306          */
 307         /* ZFSFUSE: not necessary */
 308         /*(void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
 309
 310         statp->f_flag = vf_to_stf(vfsp->vfs_flag);*/
 311
 312         statp->f_namemax = 256;
 313
 314         return (0);
 315 }
 316
 317 static int udmu_obj2dbuf(udmu_objset_t *uos, uint64_t oid, dmu_buf_t **dbp,
 318                          void *tag)
 319 {
 320         dmu_object_info_t doi;
 321         int err;
 322
 323         ASSERT(tag);
 324
 325         err = dmu_bonus_hold(uos->os, oid, tag, dbp);
 326         if (err) {
 327                 return (err);
 328         }
 329
 330         dmu_object_info_from_db(*dbp, &doi);
 331         if (doi.doi_bonus_type != DMU_OT_ZNODE ||
 332             doi.doi_bonus_size < sizeof (znode_phys_t)) {
 333                 dmu_buf_rele(*dbp, tag);
 334                 return (EINVAL);
 335         }
 336
 337         ASSERT(*dbp);
 338         ASSERT((*dbp)->db_object == oid);
 339         ASSERT((*dbp)->db_offset == -1);
 340         ASSERT((*dbp)->db_data != NULL);
 341
 342         return (0);
 343 }
 344
 345 int udmu_objset_root(udmu_objset_t *uos, dmu_buf_t **dbp, void *tag)
 346 {
 347         return (udmu_obj2dbuf(uos, uos->root, dbp, tag));
 348 }
 349
 350 int udmu_zap_lookup(udmu_objset_t *uos, dmu_buf_t *zap_db, const char *name,
 351                     void *value, int value_size, int intsize)
 352 {
 353         uint64_t oid;
 354         oid = zap_db->db_object;
 355
 356         /*
 357          * value_size should be a multiple of intsize.
 358          * intsize is 8 for micro ZAP and 1, 2, 4 or 8 for a fat ZAP.
 359          */
 360         ASSERT(value_size % intsize == 0);
 361         return (zap_lookup(uos->os, oid, name, intsize,
 362                            value_size / intsize, value));
 363 }
 364
 365 /*
 366  * The transaction passed to this routine must have
 367  * udmu_tx_hold_bonus(tx, DMU_NEW_OBJECT) called and then assigned
 368  * to a transaction group.
 369  */
 370 void udmu_object_create(udmu_objset_t *uos, dmu_buf_t **dbp, dmu_tx_t *tx,
 371                         void *tag)
 372 {
 373         znode_phys_t    *zp;
 374         uint64_t        oid;
 375         uint64_t        gen;
 376         timestruc_t     now;
 377
 378         ASSERT(tag);
 379
 380         /* Assert that the transaction has been assigned to a
 381            transaction group. */
 382         ASSERT(tx->tx_txg != 0);
 383
 384         udmu_gethrestime(&now);
 385         gen = dmu_tx_get_txg(tx);
 386
 387         /* Create a new DMU object. */
 388         oid = dmu_object_alloc(uos->os, DMU_OT_PLAIN_FILE_CONTENTS, 0,
 389                                DMU_OT_ZNODE, sizeof (znode_phys_t), tx);
 390
 391         dmu_object_set_blocksize(uos->os, oid, 128ULL << 10, 0, tx);
 392
 393         VERIFY(0 == dmu_bonus_hold(uos->os, oid, tag, dbp));
 394
 395         dmu_buf_will_dirty(*dbp, tx);
 396
 397         /* Initialize the znode physical data to zero. */
 398         ASSERT((*dbp)->db_size >= sizeof (znode_phys_t));
 399         bzero((*dbp)->db_data, (*dbp)->db_size);
 400         zp = (*dbp)->db_data;
 401         zp->zp_gen = gen;
 402         zp->zp_links = 1;
 403         ZFS_TIME_ENCODE(&now, zp->zp_crtime);
 404         ZFS_TIME_ENCODE(&now, zp->zp_ctime);
 405         ZFS_TIME_ENCODE(&now, zp->zp_atime);
 406         ZFS_TIME_ENCODE(&now, zp->zp_mtime);
 407         zp->zp_mode = MAKEIMODE(VREG, 0007);
 408 }
 409
 410
 411 /*
 412  * The transaction passed to this routine must have
 413  * udmu_tx_hold_zap(tx, DMU_NEW_OBJECT, ...) called and then assigned
 414  * to a transaction group.
 415  */
 416 void udmu_zap_create(udmu_objset_t *uos, dmu_buf_t **zap_dbp, dmu_tx_t *tx,
 417                      void *tag)
 418 {
 419         znode_phys_t    *zp;
 420         uint64_t        oid;
 421         timestruc_t     now;
 422         uint64_t        gen;
 423
 424         ASSERT(tag);
 425
 426         /* Assert that the transaction has been assigned to a
 427            transaction group. */
 428         ASSERT(tx->tx_txg != 0);
 429
 430         oid = 0;
 431         udmu_gethrestime(&now);
 432         gen = dmu_tx_get_txg(tx);
 433
 434         oid = zap_create(uos->os, DMU_OT_DIRECTORY_CONTENTS, DMU_OT_ZNODE,
 435                          sizeof (znode_phys_t), tx);
 436
 437         VERIFY(0 == dmu_bonus_hold(uos->os, oid, tag, zap_dbp));
 438
 439         dmu_buf_will_dirty(*zap_dbp, tx);
 440
 441         bzero((*zap_dbp)->db_data, (*zap_dbp)->db_size);
 442         zp = (*zap_dbp)->db_data;
 443         zp->zp_size = 2;
 444         zp->zp_links = 1;
 445         zp->zp_gen = gen;
 446         zp->zp_mode = MAKEIMODE(VDIR, 0007);
 447
 448         ZFS_TIME_ENCODE(&now, zp->zp_crtime);
 449         ZFS_TIME_ENCODE(&now, zp->zp_ctime);
 450         ZFS_TIME_ENCODE(&now, zp->zp_atime);
 451         ZFS_TIME_ENCODE(&now, zp->zp_mtime);
 452 }
 453
 454 int udmu_object_get_dmu_buf(udmu_objset_t *uos, uint64_t object,
 455                             dmu_buf_t **dbp, void *tag)
 456 {
 457         return (udmu_obj2dbuf(uos, object, dbp, tag));
 458 }
 459
 460
 461 /*
 462  * The transaction passed to this routine must have
 463  * udmu_tx_hold_bonus(tx, oid) and
 464  * udmu_tx_hold_zap(tx, oid, ...)
 465  * called and then assigned to a transaction group.
 466  */
 467 int udmu_zap_insert(udmu_objset_t *uos, dmu_buf_t *zap_db, dmu_tx_t *tx,
 468                     const char *name, void *value, int len)
 469 {
 470         uint64_t oid = zap_db->db_object;
 471
 472         /* Assert that the transaction has been assigned to a
 473            transaction group. */
 474         ASSERT(tx->tx_txg != 0);
 475
 476         dmu_buf_will_dirty(zap_db, tx);
 477         return (zap_add(uos->os, oid, name, 8, 1, value, tx));
 478 }
 479
 480 /*
 481  * The transaction passed to this routine must have
 482  * udmu_tx_hold_zap(tx, oid, ...) called and then
 483  * assigned to a transaction group.
 484  */
 485 int udmu_zap_delete(udmu_objset_t *uos, dmu_buf_t *zap_db, dmu_tx_t *tx,
 486                     const char *name)
 487 {
 488         uint64_t oid = zap_db->db_object;
 489
 490         /* Assert that the transaction has been assigned to a
 491            transaction group. */
 492         ASSERT(tx->tx_txg != 0);
 493
 494         return (zap_remove(uos->os, oid, name, tx));
 495 }
 496
 497 /*
 498  * Read data from a DMU object
 499  */
 500 int udmu_object_read(udmu_objset_t *uos, dmu_buf_t *db, uint64_t offset,
 501                      uint64_t size, void *buf)
 502 {
 503         uint64_t oid = db->db_object;
 504         vnattr_t va;
 505         int rc;
 506
 507         udmu_printf(LEVEL_INFO, stdout, "udmu_read(%lld, %lld, %lld)\n",
 508                     oid, offset, size);
 509
 510         udmu_object_getattr(db, &va);
 511         if (offset + size > va.va_size) {
 512                 if (va.va_size < offset)
 513                         size = 0;
 514                 else
 515                         size = va.va_size - offset;
 516         }
 517
 518         rc = dmu_read(uos->os, oid, offset, size, buf);
 519         if (rc == 0)
 520                 return size;
 521         else
 522                 return (-rc);
 523 }
 524
 525 /*
 526  * Write data to a DMU object
 527  *
 528  * The transaction passed to this routine must have had
 529  * udmu_tx_hold_write(tx, oid, offset, size) called and then
 530  * assigned to a transaction group.
 531  */
 532 void udmu_object_write(udmu_objset_t *uos, dmu_buf_t *db, struct dmu_tx *tx,
 533                        uint64_t offset, uint64_t size, void *buf)
 534 {
 535         uint64_t oid = db->db_object;
 536
 537         udmu_printf(LEVEL_INFO, stdout, "udmu_write(%lld, %lld, %lld\n",
 538                     oid, offset, size);
 539
 540         dmu_write(uos->os, oid, offset, size, buf, tx);
 541 }
 542
 543 /*
 544  * Retrieve the attributes of a DMU object
 545  */
 546 void udmu_object_getattr(dmu_buf_t *db, vnattr_t *vap)
 547 {
 548         dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode;
 549         znode_phys_t *zp = db->db_data;
 550
 551         vap->va_mask = AT_ATIME | AT_MTIME | AT_CTIME | AT_MODE | AT_SIZE |
 552                        AT_UID | AT_GID | AT_TYPE | AT_NLINK | AT_RDEV;
 553         vap->va_atime.tv_sec    = zp->zp_atime[0];
 554         vap->va_atime.tv_nsec   = 0;
 555         vap->va_mtime.tv_sec    = zp->zp_mtime[0];
 556         vap->va_mtime.tv_nsec   = 0;
 557         vap->va_ctime.tv_sec    = zp->zp_ctime[0];
 558         vap->va_ctime.tv_nsec   = 0;
 559         vap->va_mode     = zp->zp_mode & MODEMASK;;
 560         vap->va_size     = zp->zp_size;
 561         vap->va_uid      = zp->zp_uid;
 562         vap->va_gid      = zp->zp_gid;
 563         vap->va_type     = IFTOVT((mode_t)zp->zp_mode);
 564         vap->va_nlink    = zp->zp_links;
 565         vap->va_rdev     = zp->zp_rdev;
 566
 567         vap->va_blksize = dn->dn_datablksz;
 568         vap->va_blkbits = dn->dn_datablkshift;
 569         /* in 512-bytes units*/
 570         vap->va_nblocks = DN_USED_BYTES(dn->dn_phys) >> SPA_MINBLOCKSHIFT;
 571         vap->va_mask |= AT_NBLOCKS | AT_BLKSIZE;
 572 }
 573
 574 /*
 575  * Set the attributes of an object
 576  *
 577  * The transaction passed to this routine must have
 578  * udmu_tx_hold_bonus(tx, oid) called and then assigned
 579  * to a transaction group.
 580  */
 581 void udmu_object_setattr(dmu_buf_t *db, dmu_tx_t *tx, vnattr_t *vap)
 582 {
 583         znode_phys_t *zp = db->db_data;
 584         uint_t mask = vap->va_mask;
 585
 586         /* Assert that the transaction has been assigned to a
 587            transaction group. */
 588         ASSERT(tx->tx_txg != 0);
 589
 590         if (mask == 0) {
 591                 return;
 592         }
 593
 594         dmu_buf_will_dirty(db, tx);
 595
 596         /*
 597          * Set each attribute requested.
 598          * We group settings according to the locks they need to acquire.
 599          *
 600          * Note: you cannot set ctime directly, although it will be
 601          * updated as a side-effect of calling this function.
 602          */
 603
 604         if (mask & AT_MODE)
 605                 zp->zp_mode = MAKEIMODE(vap->va_type, vap->va_mode);
 606
 607         if (mask & AT_UID)
 608                 zp->zp_uid = (uint64_t)vap->va_uid;
 609
 610         if (mask & AT_GID)
 611                 zp->zp_gid = (uint64_t)vap->va_gid;
 612
 613         if (mask & AT_SIZE)
 614                 zp->zp_size = vap->va_size;
 615
 616         if (mask & AT_ATIME)
 617                 ZFS_TIME_ENCODE(&vap->va_atime, zp->zp_atime);
 618
 619         if (mask & AT_MTIME)
 620                 ZFS_TIME_ENCODE(&vap->va_mtime, zp->zp_mtime);
 621
 622         if (mask & AT_CTIME)
 623                 ZFS_TIME_ENCODE(&vap->va_ctime, zp->zp_ctime);
 624
 625         if (mask & AT_NLINK)
 626                 zp->zp_links = vap->va_nlink;
 627 }
 628
 629 /*
 630  * Punch/truncate an object
 631  *
 632  *      IN:     db      - dmu_buf of the object to free data in.
 633  *              off     - start of section to free.
 634  *              len     - length of section to free (0 => to EOF).
 635  *
 636  *      RETURN: 0 if success
 637  *              error code if failure
 638  *
 639  * The transaction passed to this routine must have
 640  * udmu_tx_hold_bonus(tx, oid) and
 641  * if off < size, udmu_tx_hold_free(tx, oid, off, len ? len : DMU_OBJECT_END)
 642  * called and then assigned to a transaction group.
 643  */
 644 void udmu_object_punch(udmu_objset_t *uos, dmu_buf_t *db, dmu_tx_t *tx,
 645                       uint64_t off, uint64_t len)
 646 {
 647         znode_phys_t *zp = db->db_data;
 648         uint64_t oid = db->db_object;
 649         uint64_t end = off + len;
 650         uint64_t size = zp->zp_size;
 651
 652         /* Assert that the transaction has been assigned to a
 653            transaction group. */
 654         ASSERT(tx->tx_txg != 0);
 655
 656         /*
 657          * Nothing to do if file already at desired length.
 658          */
 659         if (len == 0 && size == off) {
 660                 return;
 661         }
 662
 663         if (end > size || len == 0) {
 664                 zp->zp_size = end;
 665         }
 666
 667         if (off < size) {
 668                 uint64_t rlen = len;
 669
 670                 if (len == 0)
 671                         rlen = -1;
 672                 else if (end > size)
 673                         rlen = size - off;
 674
 675                 VERIFY(0 == dmu_free_range(uos->os, oid, off, rlen, tx));
 676         }
 677 }
 678
 679 /*
 680  * Delete a DMU object
 681  *
 682  * The transaction passed to this routine must have
 683  * udmu_tx_hold_free(tx, oid, 0, DMU_OBJECT_END) called
 684  * and then assigned to a transaction group.
 685  *
 686  * This will release db and set it to NULL to prevent further dbuf releases.
 687  */
 688 int udmu_object_delete(udmu_objset_t *uos, dmu_buf_t **db, dmu_tx_t *tx,
 689                        void *tag)
 690 {
 691         int error;
 692         uint64_t oid = (*db)->db_object;
 693
 694         /* Assert that the transaction has been assigned to a
 695            transaction group. */
 696         ASSERT(tx->tx_txg != 0);
 697
 698         udmu_object_put_dmu_buf(*db, tag);
 699         *db = NULL;
 700
 701         error = dmu_object_free(uos->os, oid, tx);
 702
 703         return (error);
 704 }
 705
 706 /*
 707  * Get the object id from dmu_buf_t
 708  */
 709 uint64_t udmu_object_get_id(dmu_buf_t *db)
 710 {
 711         ASSERT(db != NULL);
 712         return (db->db_object);
 713 }
 714
 715 int udmu_object_is_zap(dmu_buf_t *_db)
 716 {
 717         dmu_buf_impl_t *db = (dmu_buf_impl_t *) _db;
 718         if (db->db_dnode->dn_type == DMU_OT_DIRECTORY_CONTENTS)
 719                 return 1;
 720         return 0;
 721 }
 722
 723 /*
 724  * Release the reference to a dmu_buf object.
 725  */
 726 void udmu_object_put_dmu_buf(dmu_buf_t *db, void *tag)
 727 {
 728         ASSERT(tag);
 729         dmu_buf_rele(db, tag);
 730 }
 731
 732 dmu_tx_t *udmu_tx_create(udmu_objset_t *uos)
 733 {
 734         return (dmu_tx_create(uos->os));
 735 }
 736
 737 void udmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len)
 738 {
 739         dmu_tx_hold_write(tx, object, off, len);
 740 }
 741
 742 void udmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
 743                        uint64_t len)
 744 {
 745         dmu_tx_hold_free(tx, object, off, len);
 746 }
 747
 748 void udmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name)
 749 {
 750         dmu_tx_hold_zap(tx, object, add, name);
 751 }
 752
 753 void udmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object)
 754 {
 755         dmu_tx_hold_bonus(tx, object);
 756 }
 757
 758 void udmu_tx_abort(dmu_tx_t *tx)
 759 {
 760         dmu_tx_abort(tx);
 761 }
 762
 763 int udmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how)
 764 {
 765         return (dmu_tx_assign(tx, txg_how));
 766 }
 767
 768 void udmu_tx_wait(dmu_tx_t *tx)
 769 {
 770         dmu_tx_wait(tx);
 771 }
 772
 773 void udmu_tx_commit(dmu_tx_t *tx)
 774 {
 775         dmu_tx_commit(tx);
 776 }
 777
 778 /* commit callback API */
 779 void * udmu_tx_cb_create(size_t bytes)
 780 {
 781         return dmu_tx_callback_data_create(bytes);
 782 }
 783
 784 int udmu_tx_cb_add(dmu_tx_t *tx, void *func, void *data)
 785 {
 786         return dmu_tx_callback_commit_add(tx, func, data);
 787 }
 788
 789 int udmu_tx_cb_destroy(void *data)
 790 {
 791         return dmu_tx_callback_data_destroy(data);
 792 }
 793
 794 int udmu_indblk_overhead(dmu_buf_t *db, unsigned long *used,
 795                          unsigned long *overhead)
 796 {
 797         dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode;
 798
 799         *overhead = (2 * (*used))/(1 << dn->dn_phys->dn_indblkshift);
 800
 801         return 0;
 802 }
 803
 804 int udmu_get_blocksize(dmu_buf_t *db, long *blksz)
 805 {
 806         dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode;
 807
 808         *blksz = (dn->dn_datablksz);
 809
 810         return 0;
 811 }