X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=module%2Fzfs%2Fzap_micro.c;h=2d89c20c47d7ba8b14aefdc6387f4459a529f7b4;hb=428870ff734fdaccc342b33fc53cf94724409a46;hp=528d31d5e2f7bffdce909ac2f217046ba1447eb6;hpb=6119cb885a976e175a6e827894accf657ff1984f;p=zfs.git diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c index 528d31d..2d89c20 100644 --- a/module/zfs/zap_micro.c +++ b/module/zfs/zap_micro.c @@ -19,10 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. */ +#include #include #include #include @@ -31,38 +31,98 @@ #include #include #include +#include #ifdef _KERNEL #include #endif -static int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx); +static int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags); +uint64_t +zap_getflags(zap_t *zap) +{ + if (zap->zap_ismicro) + return (0); + return (zap->zap_u.zap_fat.zap_phys->zap_flags); +} + +int +zap_hashbits(zap_t *zap) +{ + if (zap_getflags(zap) & ZAP_FLAG_HASH64) + return (48); + else + return (28); +} + +uint32_t +zap_maxcd(zap_t *zap) +{ + if (zap_getflags(zap) & ZAP_FLAG_HASH64) + return ((1<<16)-1); + else + return (-1U); +} static uint64_t -zap_hash(zap_t *zap, const char *normname) +zap_hash(zap_name_t *zn) { - const uint8_t *cp; - uint8_t c; - uint64_t crc = zap->zap_salt; + zap_t *zap = zn->zn_zap; + uint64_t h = 0; - /* NB: name must already be normalized, if necessary */ + if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) { + ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY); + h = *(uint64_t *)zn->zn_key_orig; + } else { + h = zap->zap_salt; + ASSERT(h != 0); + ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); + + if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) { + int i; + const uint64_t *wp = zn->zn_key_norm; + + ASSERT(zn->zn_key_intlen == 8); + for (i = 0; i < zn->zn_key_norm_numints; wp++, i++) { + int j; + uint64_t word = *wp; + + for (j = 0; j < zn->zn_key_intlen; j++) { + h = (h >> 8) ^ + zfs_crc64_table[(h ^ word) & 0xFF]; + word >>= NBBY; + } + } + } else { + int i, len; + const uint8_t *cp = zn->zn_key_norm; - ASSERT(crc != 0); - ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); - for (cp = (const uint8_t *)normname; (c = *cp) != '\0'; cp++) { - crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ c) & 0xFF]; - } + /* + * We previously stored the terminating null on + * disk, but didn't hash it, so we need to + * continue to not hash it. (The + * zn_key_*_numints includes the terminating + * null for non-binary keys.) + */ + len = zn->zn_key_norm_numints - 1; + ASSERT(zn->zn_key_intlen == 1); + for (i = 0; i < len; cp++, i++) { + h = (h >> 8) ^ + zfs_crc64_table[(h ^ *cp) & 0xFF]; + } + } + } /* - * Only use 28 bits, since we need 4 bits in the cookie for the - * collision differentiator. We MUST use the high bits, since - * those are the ones that we first pay attention to when + * Don't use all 64 bits, since we need some in the cookie for + * the collision differentiator. We MUST use the high bits, + * since those are the ones that we first pay attention to when * chosing the bucket. */ - crc &= ~((1ULL << (64 - ZAP_HASHBITS)) - 1); + h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1); - return (crc); + return (h); } static int @@ -71,6 +131,8 @@ zap_normalize(zap_t *zap, const char *name, char *namenorm) size_t inlen, outlen; int err; + ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY)); + inlen = strlen(name) + 1; outlen = ZAP_MAXNAMELEN; @@ -85,16 +147,18 @@ zap_normalize(zap_t *zap, const char *name, char *namenorm) boolean_t zap_match(zap_name_t *zn, const char *matchname) { + ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY)); + if (zn->zn_matchtype == MT_FIRST) { char norm[ZAP_MAXNAMELEN]; if (zap_normalize(zn->zn_zap, matchname, norm) != 0) return (B_FALSE); - return (strcmp(zn->zn_name_norm, norm) == 0); + return (strcmp(zn->zn_key_norm, norm) == 0); } else { /* MT_BEST or MT_EXACT */ - return (strcmp(zn->zn_name_orij, matchname) == 0); + return (strcmp(zn->zn_key_orig, matchname) == 0); } } @@ -104,30 +168,49 @@ zap_name_free(zap_name_t *zn) kmem_free(zn, sizeof (zap_name_t)); } -/* XXX combine this with zap_lockdir()? */ zap_name_t * -zap_name_alloc(zap_t *zap, const char *name, matchtype_t mt) +zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt) { zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); zn->zn_zap = zap; - zn->zn_name_orij = name; + zn->zn_key_intlen = sizeof (*key); + zn->zn_key_orig = key; + zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1; zn->zn_matchtype = mt; if (zap->zap_normflags) { - if (zap_normalize(zap, name, zn->zn_normbuf) != 0) { + if (zap_normalize(zap, key, zn->zn_normbuf) != 0) { zap_name_free(zn); return (NULL); } - zn->zn_name_norm = zn->zn_normbuf; + zn->zn_key_norm = zn->zn_normbuf; + zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1; } else { if (mt != MT_EXACT) { zap_name_free(zn); return (NULL); } - zn->zn_name_norm = zn->zn_name_orij; + zn->zn_key_norm = zn->zn_key_orig; + zn->zn_key_norm_numints = zn->zn_key_orig_numints; } - zn->zn_hash = zap_hash(zap, zn->zn_name_norm); + zn->zn_hash = zap_hash(zn); + return (zn); +} + +zap_name_t * +zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints) +{ + zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); + + ASSERT(zap->zap_normflags == 0); + zn->zn_zap = zap; + zn->zn_key_intlen = sizeof (*key); + zn->zn_key_orig = zn->zn_key_norm = key; + zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints; + zn->zn_matchtype = MT_EXACT; + + zn->zn_hash = zap_hash(zn); return (zn); } @@ -172,26 +255,26 @@ mze_compare(const void *arg1, const void *arg2) return (+1); if (mze1->mze_hash < mze2->mze_hash) return (-1); - if (mze1->mze_phys.mze_cd > mze2->mze_phys.mze_cd) + if (mze1->mze_cd > mze2->mze_cd) return (+1); - if (mze1->mze_phys.mze_cd < mze2->mze_phys.mze_cd) + if (mze1->mze_cd < mze2->mze_cd) return (-1); return (0); } static void -mze_insert(zap_t *zap, int chunkid, uint64_t hash, mzap_ent_phys_t *mzep) +mze_insert(zap_t *zap, int chunkid, uint64_t hash) { mzap_ent_t *mze; ASSERT(zap->zap_ismicro); ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); - ASSERT(mzep->mze_cd < ZAP_MAXCD); mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP); mze->mze_chunkid = chunkid; mze->mze_hash = hash; - mze->mze_phys = *mzep; + mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd; + ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0); avl_add(&zap->zap_m.zap_avl, mze); } @@ -206,18 +289,16 @@ mze_find(zap_name_t *zn) ASSERT(zn->zn_zap->zap_ismicro); ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock)); - if (strlen(zn->zn_name_norm) >= sizeof (mze_tofind.mze_phys.mze_name)) - return (NULL); - mze_tofind.mze_hash = zn->zn_hash; - mze_tofind.mze_phys.mze_cd = 0; + mze_tofind.mze_cd = 0; again: mze = avl_find(avl, &mze_tofind, &idx); if (mze == NULL) mze = avl_nearest(avl, idx, AVL_AFTER); for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) { - if (zap_match(zn, mze->mze_phys.mze_name)) + ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd); + if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name)) return (mze); } if (zn->zn_matchtype == MT_BEST) { @@ -240,12 +321,12 @@ mze_find_unused_cd(zap_t *zap, uint64_t hash) ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); mze_tofind.mze_hash = hash; - mze_tofind.mze_phys.mze_cd = 0; + mze_tofind.mze_cd = 0; cd = 0; for (mze = avl_find(avl, &mze_tofind, &idx); mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { - if (mze->mze_phys.mze_cd != cd) + if (mze->mze_cd != cd) break; cd++; } @@ -329,7 +410,7 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db) zap->zap_m.zap_num_entries++; zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT); - mze_insert(zap, i, zn->zn_hash, mze); + mze_insert(zap, i, zn->zn_hash); zap_name_free(zn); } } @@ -371,7 +452,7 @@ zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, *zapp = NULL; - err = dmu_buf_hold(os, obj, 0, NULL, &db); + err = dmu_buf_hold(os, obj, 0, NULL, &db, DMU_READ_NO_PREFETCH); if (err) return (err); @@ -421,7 +502,7 @@ zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, dprintf("upgrading obj %llu: num_entries=%u\n", obj, zap->zap_m.zap_num_entries); *zapp = zap; - return (mzap_upgrade(zapp, tx)); + return (mzap_upgrade(zapp, tx, 0)); } err = dmu_object_set_blocksize(os, obj, newsz, 0, tx); ASSERT3U(err, ==, 0); @@ -441,10 +522,11 @@ zap_unlockdir(zap_t *zap) } static int -mzap_upgrade(zap_t **zapp, dmu_tx_t *tx) +mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags) { mzap_phys_t *mzp; - int i, sz, nchunks, err; + int i, sz, nchunks; + int err = 0; zap_t *zap = *zapp; ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); @@ -454,11 +536,13 @@ mzap_upgrade(zap_t **zapp, dmu_tx_t *tx) bcopy(zap->zap_dbuf->db_data, mzp, sz); nchunks = zap->zap_m.zap_num_chunks; - err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object, - 1ULL << fzap_default_block_shift, 0, tx); - if (err) { - kmem_free(mzp, sz); - return (err); + if (!flags) { + err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object, + 1ULL << fzap_default_block_shift, 0, tx); + if (err) { + kmem_free(mzp, sz); + return (err); + } } dprintf("upgrading obj=%llu with %u chunks\n", @@ -466,10 +550,9 @@ mzap_upgrade(zap_t **zapp, dmu_tx_t *tx) /* XXX destroy the avl later, so we can use the stored hash value */ mze_destroy(zap); - fzap_upgrade(zap, tx); + fzap_upgrade(zap, tx, flags); for (i = 0; i < nchunks; i++) { - int err; mzap_ent_phys_t *mze = &mzp->mz_chunk[i]; zap_name_t *zn; if (mze->mze_name[0] == 0) @@ -489,12 +572,13 @@ mzap_upgrade(zap_t **zapp, dmu_tx_t *tx) } static void -mzap_create_impl(objset_t *os, uint64_t obj, int normflags, dmu_tx_t *tx) +mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags, + dmu_tx_t *tx) { dmu_buf_t *db; mzap_phys_t *zp; - VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db)); + VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH)); #ifdef ZFS_DEBUG { @@ -510,6 +594,15 @@ mzap_create_impl(objset_t *os, uint64_t obj, int normflags, dmu_tx_t *tx) zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL; zp->mz_normflags = normflags; dmu_buf_rele(db, FTAG); + + if (flags != 0) { + zap_t *zap; + /* Only fat zap supports flags; upgrade immediately. */ + VERIFY(0 == zap_lockdir(os, obj, tx, RW_WRITER, + B_FALSE, B_FALSE, &zap)); + VERIFY3U(0, ==, mzap_upgrade(&zap, tx, flags)); + zap_unlockdir(zap); + } } int @@ -530,7 +623,7 @@ zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags, err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx); if (err != 0) return (err); - mzap_create_impl(os, obj, normflags, tx); + mzap_create_impl(os, obj, normflags, 0, tx); return (0); } @@ -547,7 +640,26 @@ zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot, { uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); - mzap_create_impl(os, obj, normflags, tx); + mzap_create_impl(os, obj, normflags, 0, tx); + return (obj); +} + +uint64_t +zap_create_flags(objset_t *os, int normflags, zap_flags_t flags, + dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, + dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) +{ + uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); + + ASSERT(leaf_blockshift >= SPA_MINBLOCKSHIFT && + leaf_blockshift <= SPA_MAXBLOCKSHIFT && + indirect_blockshift >= SPA_MINBLOCKSHIFT && + indirect_blockshift <= SPA_MAXBLOCKSHIFT); + + VERIFY(dmu_object_set_blocksize(os, obj, + 1ULL << leaf_blockshift, indirect_blockshift, tx) == 0); + + mzap_create_impl(os, obj, normflags, flags, tx); return (obj); } @@ -617,11 +729,11 @@ again: other = avl_walk(&zap->zap_m.zap_avl, other, direction)) { if (zn == NULL) { - zn = zap_name_alloc(zap, mze->mze_phys.mze_name, + zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name, MT_FIRST); allocdzn = B_TRUE; } - if (zap_match(zn, other->mze_phys.mze_name)) { + if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) { if (allocdzn) zap_name_free(zn); return (B_TRUE); @@ -683,9 +795,10 @@ zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name, } else if (integer_size != 8) { err = EINVAL; } else { - *(uint64_t *)buf = mze->mze_phys.mze_value; + *(uint64_t *)buf = + MZE_PHYS(zap, mze)->mze_value; (void) strlcpy(realname, - mze->mze_phys.mze_name, rn_len); + MZE_PHYS(zap, mze)->mze_name, rn_len); if (ncp) { *ncp = mzap_normalization_conflict(zap, zn, mze); @@ -699,6 +812,63 @@ zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name, } int +zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, + int key_numints) +{ + zap_t *zap; + int err; + zap_name_t *zn; + + err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); + if (err) + return (err); + zn = zap_name_alloc_uint64(zap, key, key_numints); + if (zn == NULL) { + zap_unlockdir(zap); + return (ENOTSUP); + } + + fzap_prefetch(zn); + zap_name_free(zn); + zap_unlockdir(zap); + return (err); +} + +int +zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, + int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf) +{ + zap_t *zap; + int err; + zap_name_t *zn; + + err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); + if (err) + return (err); + zn = zap_name_alloc_uint64(zap, key, key_numints); + if (zn == NULL) { + zap_unlockdir(zap); + return (ENOTSUP); + } + + err = fzap_lookup(zn, integer_size, num_integers, buf, + NULL, 0, NULL); + zap_name_free(zn); + zap_unlockdir(zap); + return (err); +} + +int +zap_contains(objset_t *os, uint64_t zapobj, const char *name) +{ + int err = (zap_lookup_norm(os, zapobj, name, 0, + 0, NULL, MT_EXACT, NULL, 0, NULL)); + if (err == EOVERFLOW || err == EINVAL) + err = 0; /* found, but skipped reading the value */ + return (err); +} + +int zap_length(objset_t *os, uint64_t zapobj, const char *name, uint64_t *integer_size, uint64_t *num_integers) { @@ -733,6 +903,28 @@ zap_length(objset_t *os, uint64_t zapobj, const char *name, return (err); } +int +zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, + int key_numints, uint64_t *integer_size, uint64_t *num_integers) +{ + zap_t *zap; + int err; + zap_name_t *zn; + + err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); + if (err) + return (err); + zn = zap_name_alloc_uint64(zap, key, key_numints); + if (zn == NULL) { + zap_unlockdir(zap); + return (ENOTSUP); + } + err = fzap_length(zn, integer_size, num_integers); + zap_name_free(zn); + zap_unlockdir(zap); + return (err); +} + static void mzap_addent(zap_name_t *zn, uint64_t value) { @@ -741,20 +933,18 @@ mzap_addent(zap_name_t *zn, uint64_t value) int start = zap->zap_m.zap_alloc_next; uint32_t cd; - dprintf("obj=%llu %s=%llu\n", zap->zap_object, - zn->zn_name_orij, value); ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); #ifdef ZFS_DEBUG for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i]; - ASSERT(strcmp(zn->zn_name_orij, mze->mze_name) != 0); + ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0); } #endif cd = mze_find_unused_cd(zap, zn->zn_hash); /* given the limited size of the microzap, this can't happen */ - ASSERT(cd != ZAP_MAXCD); + ASSERT(cd < zap_maxcd(zap)); again: for (i = start; i < zap->zap_m.zap_num_chunks; i++) { @@ -762,13 +952,13 @@ again: if (mze->mze_name[0] == 0) { mze->mze_value = value; mze->mze_cd = cd; - (void) strcpy(mze->mze_name, zn->zn_name_orij); + (void) strcpy(mze->mze_name, zn->zn_key_orig); zap->zap_m.zap_num_entries++; zap->zap_m.zap_alloc_next = i+1; if (zap->zap_m.zap_alloc_next == zap->zap_m.zap_num_chunks) zap->zap_m.zap_alloc_next = 0; - mze_insert(zap, i, zn->zn_hash, mze); + mze_insert(zap, i, zn->zn_hash); return; } } @@ -780,7 +970,7 @@ again: } int -zap_add(objset_t *os, uint64_t zapobj, const char *name, +zap_add(objset_t *os, uint64_t zapobj, const char *key, int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) { @@ -793,7 +983,7 @@ zap_add(objset_t *os, uint64_t zapobj, const char *name, err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); if (err) return (err); - zn = zap_name_alloc(zap, name, MT_EXACT); + zn = zap_name_alloc(zap, key, MT_EXACT); if (zn == NULL) { zap_unlockdir(zap); return (ENOTSUP); @@ -802,10 +992,8 @@ zap_add(objset_t *os, uint64_t zapobj, const char *name, err = fzap_add(zn, integer_size, num_integers, val, tx); zap = zn->zn_zap; /* fzap_add() may change zap */ } else if (integer_size != 8 || num_integers != 1 || - strlen(name) >= MZAP_NAME_LEN) { - dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n", - zapobj, integer_size, num_integers, name); - err = mzap_upgrade(&zn->zn_zap, tx); + strlen(key) >= MZAP_NAME_LEN) { + err = mzap_upgrade(&zn->zn_zap, tx, 0); if (err == 0) err = fzap_add(zn, integer_size, num_integers, val, tx); zap = zn->zn_zap; /* fzap_add() may change zap */ @@ -825,15 +1013,50 @@ zap_add(objset_t *os, uint64_t zapobj, const char *name, } int +zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, + int key_numints, int integer_size, uint64_t num_integers, + const void *val, dmu_tx_t *tx) +{ + zap_t *zap; + int err; + zap_name_t *zn; + + err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); + if (err) + return (err); + zn = zap_name_alloc_uint64(zap, key, key_numints); + if (zn == NULL) { + zap_unlockdir(zap); + return (ENOTSUP); + } + err = fzap_add(zn, integer_size, num_integers, val, tx); + zap = zn->zn_zap; /* fzap_add() may change zap */ + zap_name_free(zn); + if (zap != NULL) /* may be NULL if fzap_add() failed */ + zap_unlockdir(zap); + return (err); +} + +int zap_update(objset_t *os, uint64_t zapobj, const char *name, int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) { zap_t *zap; mzap_ent_t *mze; + uint64_t oldval; const uint64_t *intval = val; zap_name_t *zn; int err; +#ifdef ZFS_DEBUG + /* + * If there is an old value, it shouldn't change across the + * lockdir (eg, due to bprewrite's xlation). + */ + if (integer_size == 8 && num_integers == 1) + (void) zap_lookup(os, zapobj, name, 8, 1, &oldval); +#endif + err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); if (err) return (err); @@ -849,7 +1072,7 @@ zap_update(objset_t *os, uint64_t zapobj, const char *name, strlen(name) >= MZAP_NAME_LEN) { dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n", zapobj, integer_size, num_integers, name); - err = mzap_upgrade(&zn->zn_zap, tx); + err = mzap_upgrade(&zn->zn_zap, tx, 0); if (err == 0) err = fzap_update(zn, integer_size, num_integers, val, tx); @@ -857,9 +1080,8 @@ zap_update(objset_t *os, uint64_t zapobj, const char *name, } else { mze = mze_find(zn); if (mze != NULL) { - mze->mze_phys.mze_value = *intval; - zap->zap_m.zap_phys->mz_chunk - [mze->mze_chunkid].mze_value = *intval; + ASSERT3U(MZE_PHYS(zap, mze)->mze_value, ==, oldval); + MZE_PHYS(zap, mze)->mze_value = *intval; } else { mzap_addent(zn, *intval); } @@ -872,6 +1094,31 @@ zap_update(objset_t *os, uint64_t zapobj, const char *name, } int +zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, + int key_numints, + int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) +{ + zap_t *zap; + zap_name_t *zn; + int err; + + err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); + if (err) + return (err); + zn = zap_name_alloc_uint64(zap, key, key_numints); + if (zn == NULL) { + zap_unlockdir(zap); + return (ENOTSUP); + } + err = fzap_update(zn, integer_size, num_integers, val, tx); + zap = zn->zn_zap; /* fzap_update() may change zap */ + zap_name_free(zn); + if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ + zap_unlockdir(zap); + return (err); +} + +int zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx) { return (zap_remove_norm(os, zapobj, name, MT_EXACT, tx)); @@ -912,17 +1159,32 @@ zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name, return (err); } +int +zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, + int key_numints, dmu_tx_t *tx) +{ + zap_t *zap; + int err; + zap_name_t *zn; + + err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap); + if (err) + return (err); + zn = zap_name_alloc_uint64(zap, key, key_numints); + if (zn == NULL) { + zap_unlockdir(zap); + return (ENOTSUP); + } + err = fzap_remove(zn, tx); + zap_name_free(zn); + zap_unlockdir(zap); + return (err); +} + /* * Routines for iterating over the attributes. */ -/* - * We want to keep the high 32 bits of the cursor zero if we can, so - * that 32-bit programs can access this. So use a small hash value so - * we can fit 4 bits of cd into the 32-bit cursor. - * - * [ 4 zero bits | 32-bit collision differentiator | 28-bit hash value ] - */ void zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, uint64_t serialized) @@ -931,15 +1193,9 @@ zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, zc->zc_zap = NULL; zc->zc_leaf = NULL; zc->zc_zapobj = zapobj; - if (serialized == -1ULL) { - zc->zc_hash = -1ULL; - zc->zc_cd = 0; - } else { - zc->zc_hash = serialized << (64-ZAP_HASHBITS); - zc->zc_cd = serialized >> ZAP_HASHBITS; - if (zc->zc_cd >= ZAP_MAXCD) /* corrupt serialized */ - zc->zc_cd = 0; - } + zc->zc_serialized = serialized; + zc->zc_hash = 0; + zc->zc_cd = 0; } void @@ -969,10 +1225,21 @@ zap_cursor_serialize(zap_cursor_t *zc) { if (zc->zc_hash == -1ULL) return (-1ULL); - ASSERT((zc->zc_hash & (ZAP_MAXCD-1)) == 0); - ASSERT(zc->zc_cd < ZAP_MAXCD); - return ((zc->zc_hash >> (64-ZAP_HASHBITS)) | - ((uint64_t)zc->zc_cd << ZAP_HASHBITS)); + if (zc->zc_zap == NULL) + return (zc->zc_serialized); + ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0); + ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap)); + + /* + * We want to keep the high 32 bits of the cursor zero if we can, so + * that 32-bit programs can access this. So usually use a small + * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits + * of the cursor. + * + * [ collision differentiator | zap_hashbits()-bit hash value ] + */ + return ((zc->zc_hash >> (64 - zap_hashbits(zc->zc_zap))) | + ((uint64_t)zc->zc_cd << zap_hashbits(zc->zc_zap))); } int @@ -987,10 +1254,23 @@ zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) return (ENOENT); if (zc->zc_zap == NULL) { + int hb; err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, RW_READER, TRUE, FALSE, &zc->zc_zap); if (err) return (err); + + /* + * To support zap_cursor_init_serialized, advance, retrieve, + * we must add to the existing zc_cd, which may already + * be 1 due to the zap_cursor_advance. + */ + ASSERT(zc->zc_hash == 0); + hb = zap_hashbits(zc->zc_zap); + zc->zc_hash = zc->zc_serialized << (64 - hb); + zc->zc_cd += zc->zc_serialized >> hb; + if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */ + zc->zc_cd = 0; } else { rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); } @@ -1000,7 +1280,7 @@ zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) err = ENOENT; mze_tofind.mze_hash = zc->zc_hash; - mze_tofind.mze_phys.mze_cd = zc->zc_cd; + mze_tofind.mze_cd = zc->zc_cd; mze = avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx); if (mze == NULL) { @@ -1008,18 +1288,16 @@ zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) idx, AVL_AFTER); } if (mze) { - ASSERT(0 == bcmp(&mze->mze_phys, - &zc->zc_zap->zap_m.zap_phys->mz_chunk - [mze->mze_chunkid], sizeof (mze->mze_phys))); - + mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze); + ASSERT3U(mze->mze_cd, ==, mzep->mze_cd); za->za_normalization_conflict = mzap_normalization_conflict(zc->zc_zap, NULL, mze); za->za_integer_length = 8; za->za_num_integers = 1; - za->za_first_integer = mze->mze_phys.mze_value; - (void) strcpy(za->za_name, mze->mze_phys.mze_name); + za->za_first_integer = mzep->mze_value; + (void) strcpy(za->za_name, mzep->mze_name); zc->zc_hash = mze->mze_hash; - zc->zc_cd = mze->mze_phys.mze_cd; + zc->zc_cd = mze->mze_cd; err = 0; } else { zc->zc_hash = -1ULL; @@ -1035,12 +1313,46 @@ zap_cursor_advance(zap_cursor_t *zc) if (zc->zc_hash == -1ULL) return; zc->zc_cd++; - if (zc->zc_cd >= ZAP_MAXCD) { - zc->zc_cd = 0; - zc->zc_hash += 1ULL<<(64-ZAP_HASHBITS); - if (zc->zc_hash == 0) /* EOF */ - zc->zc_hash = -1ULL; +} + +int +zap_cursor_move_to_key(zap_cursor_t *zc, const char *name, matchtype_t mt) +{ + int err = 0; + mzap_ent_t *mze; + zap_name_t *zn; + + if (zc->zc_zap == NULL) { + err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, + RW_READER, TRUE, FALSE, &zc->zc_zap); + if (err) + return (err); + } else { + rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); + } + + zn = zap_name_alloc(zc->zc_zap, name, mt); + if (zn == NULL) { + rw_exit(&zc->zc_zap->zap_rwlock); + return (ENOTSUP); + } + + if (!zc->zc_zap->zap_ismicro) { + err = fzap_cursor_move_to_key(zc, zn); + } else { + mze = mze_find(zn); + if (mze == NULL) { + err = ENOENT; + goto out; + } + zc->zc_hash = mze->mze_hash; + zc->zc_cd = mze->mze_cd; } + +out: + zap_name_free(zn); + rw_exit(&zc->zc_zap->zap_rwlock); + return (err); } int