9d8354e73e33555342e220283e8f48d5d393c65f
[zfs.git] / module / zfs / zap_leaf.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25
26 /*
27  * The 512-byte leaf is broken into 32 16-byte chunks.
28  * chunk number n means l_chunk[n], even though the header precedes it.
29  * the names are stored null-terminated.
30  */
31
32 #include <sys/spa.h>
33 #include <sys/dmu.h>
34 #include <sys/zfs_context.h>
35 #include <sys/fs/zfs.h>
36 #include <sys/zap.h>
37 #include <sys/zap_impl.h>
38 #include <sys/zap_leaf.h>
39
40 static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry);
41
42 #define CHAIN_END 0xffff /* end of the chunk chain */
43
44 /* half the (current) minimum block size */
45 #define MAX_ARRAY_BYTES (8<<10)
46
47 #define LEAF_HASH(l, h) \
48         ((ZAP_LEAF_HASH_NUMENTRIES(l)-1) & \
49         ((h) >> (64 - ZAP_LEAF_HASH_SHIFT(l)-(l)->l_phys->l_hdr.lh_prefix_len)))
50
51 #define LEAF_HASH_ENTPTR(l, h) (&(l)->l_phys->l_hash[LEAF_HASH(l, h)])
52
53
54 static void
55 zap_memset(void *a, int c, size_t n)
56 {
57         char *cp = a;
58         char *cpend = cp + n;
59
60         while (cp < cpend)
61                 *cp++ = c;
62 }
63
64 static void
65 stv(int len, void *addr, uint64_t value)
66 {
67         switch (len) {
68         case 1:
69                 *(uint8_t *)addr = value;
70                 return;
71         case 2:
72                 *(uint16_t *)addr = value;
73                 return;
74         case 4:
75                 *(uint32_t *)addr = value;
76                 return;
77         case 8:
78                 *(uint64_t *)addr = value;
79                 return;
80         }
81         ASSERT(!"bad int len");
82 }
83
84 static uint64_t
85 ldv(int len, const void *addr)
86 {
87         switch (len) {
88         case 1:
89                 return (*(uint8_t *)addr);
90         case 2:
91                 return (*(uint16_t *)addr);
92         case 4:
93                 return (*(uint32_t *)addr);
94         case 8:
95                 return (*(uint64_t *)addr);
96         }
97         ASSERT(!"bad int len");
98         return (0xFEEDFACEDEADBEEFULL);
99 }
100
101 void
102 zap_leaf_byteswap(zap_leaf_phys_t *buf, int size)
103 {
104         int i;
105         zap_leaf_t l;
106         l.l_bs = highbit(size)-1;
107         l.l_phys = buf;
108
109         buf->l_hdr.lh_block_type =      BSWAP_64(buf->l_hdr.lh_block_type);
110         buf->l_hdr.lh_prefix =          BSWAP_64(buf->l_hdr.lh_prefix);
111         buf->l_hdr.lh_magic =           BSWAP_32(buf->l_hdr.lh_magic);
112         buf->l_hdr.lh_nfree =           BSWAP_16(buf->l_hdr.lh_nfree);
113         buf->l_hdr.lh_nentries =        BSWAP_16(buf->l_hdr.lh_nentries);
114         buf->l_hdr.lh_prefix_len =      BSWAP_16(buf->l_hdr.lh_prefix_len);
115         buf->l_hdr.lh_freelist =        BSWAP_16(buf->l_hdr.lh_freelist);
116
117         for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++)
118                 buf->l_hash[i] = BSWAP_16(buf->l_hash[i]);
119
120         for (i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) {
121                 zap_leaf_chunk_t *lc = &ZAP_LEAF_CHUNK(&l, i);
122                 struct zap_leaf_entry *le;
123
124                 switch (lc->l_free.lf_type) {
125                 case ZAP_CHUNK_ENTRY:
126                         le = &lc->l_entry;
127
128                         le->le_type =           BSWAP_8(le->le_type);
129                         le->le_int_size =       BSWAP_8(le->le_int_size);
130                         le->le_next =           BSWAP_16(le->le_next);
131                         le->le_name_chunk =     BSWAP_16(le->le_name_chunk);
132                         le->le_name_length =    BSWAP_16(le->le_name_length);
133                         le->le_value_chunk =    BSWAP_16(le->le_value_chunk);
134                         le->le_value_length =   BSWAP_16(le->le_value_length);
135                         le->le_cd =             BSWAP_32(le->le_cd);
136                         le->le_hash =           BSWAP_64(le->le_hash);
137                         break;
138                 case ZAP_CHUNK_FREE:
139                         lc->l_free.lf_type =    BSWAP_8(lc->l_free.lf_type);
140                         lc->l_free.lf_next =    BSWAP_16(lc->l_free.lf_next);
141                         break;
142                 case ZAP_CHUNK_ARRAY:
143                         lc->l_array.la_type =   BSWAP_8(lc->l_array.la_type);
144                         lc->l_array.la_next =   BSWAP_16(lc->l_array.la_next);
145                         /* la_array doesn't need swapping */
146                         break;
147                 default:
148                         ASSERT(!"bad leaf type");
149                 }
150         }
151 }
152
153 void
154 zap_leaf_init(zap_leaf_t *l, boolean_t sort)
155 {
156         int i;
157
158         l->l_bs = highbit(l->l_dbuf->db_size)-1;
159         zap_memset(&l->l_phys->l_hdr, 0, sizeof (struct zap_leaf_header));
160         zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l));
161         for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
162                 ZAP_LEAF_CHUNK(l, i).l_free.lf_type = ZAP_CHUNK_FREE;
163                 ZAP_LEAF_CHUNK(l, i).l_free.lf_next = i+1;
164         }
165         ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)-1).l_free.lf_next = CHAIN_END;
166         l->l_phys->l_hdr.lh_block_type = ZBT_LEAF;
167         l->l_phys->l_hdr.lh_magic = ZAP_LEAF_MAGIC;
168         l->l_phys->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l);
169         if (sort)
170                 l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED;
171 }
172
173 /*
174  * Routines which manipulate leaf chunks (l_chunk[]).
175  */
176
177 static uint16_t
178 zap_leaf_chunk_alloc(zap_leaf_t *l)
179 {
180         int chunk;
181
182         ASSERT(l->l_phys->l_hdr.lh_nfree > 0);
183
184         chunk = l->l_phys->l_hdr.lh_freelist;
185         ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
186         ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_free.lf_type, ==, ZAP_CHUNK_FREE);
187
188         l->l_phys->l_hdr.lh_freelist = ZAP_LEAF_CHUNK(l, chunk).l_free.lf_next;
189
190         l->l_phys->l_hdr.lh_nfree--;
191
192         return (chunk);
193 }
194
195 static void
196 zap_leaf_chunk_free(zap_leaf_t *l, uint16_t chunk)
197 {
198         struct zap_leaf_free *zlf = &ZAP_LEAF_CHUNK(l, chunk).l_free;
199         ASSERT3U(l->l_phys->l_hdr.lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l));
200         ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
201         ASSERT(zlf->lf_type != ZAP_CHUNK_FREE);
202
203         zlf->lf_type = ZAP_CHUNK_FREE;
204         zlf->lf_next = l->l_phys->l_hdr.lh_freelist;
205         bzero(zlf->lf_pad, sizeof (zlf->lf_pad)); /* help it to compress */
206         l->l_phys->l_hdr.lh_freelist = chunk;
207
208         l->l_phys->l_hdr.lh_nfree++;
209 }
210
211 /*
212  * Routines which manipulate leaf arrays (zap_leaf_array type chunks).
213  */
214
215 static uint16_t
216 zap_leaf_array_create(zap_leaf_t *l, const char *buf,
217         int integer_size, int num_integers)
218 {
219         uint16_t chunk_head;
220         uint16_t *chunkp = &chunk_head;
221         int byten = 0;
222         uint64_t value;
223         int shift = (integer_size-1)*8;
224         int len = num_integers;
225
226         ASSERT3U(num_integers * integer_size, <, MAX_ARRAY_BYTES);
227
228         while (len > 0) {
229                 uint16_t chunk = zap_leaf_chunk_alloc(l);
230                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
231                 int i;
232
233                 la->la_type = ZAP_CHUNK_ARRAY;
234                 for (i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) {
235                         if (byten == 0)
236                                 value = ldv(integer_size, buf);
237                         la->la_array[i] = value >> shift;
238                         value <<= 8;
239                         if (++byten == integer_size) {
240                                 byten = 0;
241                                 buf += integer_size;
242                                 if (--len == 0)
243                                         break;
244                         }
245                 }
246
247                 *chunkp = chunk;
248                 chunkp = &la->la_next;
249         }
250         *chunkp = CHAIN_END;
251
252         return (chunk_head);
253 }
254
255 static void
256 zap_leaf_array_free(zap_leaf_t *l, uint16_t *chunkp)
257 {
258         uint16_t chunk = *chunkp;
259
260         *chunkp = CHAIN_END;
261
262         while (chunk != CHAIN_END) {
263                 int nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next;
264                 ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==,
265                     ZAP_CHUNK_ARRAY);
266                 zap_leaf_chunk_free(l, chunk);
267                 chunk = nextchunk;
268         }
269 }
270
271 /* array_len and buf_len are in integers, not bytes */
272 static void
273 zap_leaf_array_read(zap_leaf_t *l, uint16_t chunk,
274     int array_int_len, int array_len, int buf_int_len, uint64_t buf_len,
275     char *buf)
276 {
277         int len = MIN(array_len, buf_len);
278         int byten = 0;
279         uint64_t value = 0;
280
281         ASSERT3U(array_int_len, <=, buf_int_len);
282
283         /* Fast path for one 8-byte integer */
284         if (array_int_len == 8 && buf_int_len == 8 && len == 1) {
285                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
286                 uint8_t *ip = la->la_array;
287                 uint64_t *buf64 = (uint64_t *)buf;
288
289                 *buf64 = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 |
290                     (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 |
291                     (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 |
292                     (uint64_t)ip[6] << 8 | (uint64_t)ip[7];
293                 return;
294         }
295
296         /* Fast path for an array of 1-byte integers (eg. the entry name) */
297         if (array_int_len == 1 && buf_int_len == 1 &&
298             buf_len > array_len + ZAP_LEAF_ARRAY_BYTES) {
299                 while (chunk != CHAIN_END) {
300                         struct zap_leaf_array *la =
301                             &ZAP_LEAF_CHUNK(l, chunk).l_array;
302                         bcopy(la->la_array, buf, ZAP_LEAF_ARRAY_BYTES);
303                         buf += ZAP_LEAF_ARRAY_BYTES;
304                         chunk = la->la_next;
305                 }
306                 return;
307         }
308
309         while (len > 0) {
310                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
311                 int i;
312
313                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
314                 for (i = 0; i < ZAP_LEAF_ARRAY_BYTES && len > 0; i++) {
315                         value = (value << 8) | la->la_array[i];
316                         byten++;
317                         if (byten == array_int_len) {
318                                 stv(buf_int_len, buf, value);
319                                 byten = 0;
320                                 len--;
321                                 if (len == 0)
322                                         return;
323                                 buf += buf_int_len;
324                         }
325                 }
326                 chunk = la->la_next;
327         }
328 }
329
330 /*
331  * Only to be used on 8-bit arrays.
332  * array_len is actual len in bytes (not encoded le_value_length).
333  * namenorm is null-terminated.
334  */
335 static boolean_t
336 zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn, int chunk, int array_len)
337 {
338         int bseen = 0;
339
340         if (zn->zn_matchtype == MT_FIRST) {
341                 char *thisname = kmem_alloc(array_len, KM_SLEEP);
342                 boolean_t match;
343
344                 zap_leaf_array_read(l, chunk, 1, array_len, 1,
345                     array_len, thisname);
346                 match = zap_match(zn, thisname);
347                 kmem_free(thisname, array_len);
348                 return (match);
349         }
350
351         /* Fast path for exact matching */
352         while (bseen < array_len) {
353                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
354                 int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES);
355                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
356                 if (bcmp(la->la_array, zn->zn_name_orij + bseen, toread))
357                         break;
358                 chunk = la->la_next;
359                 bseen += toread;
360         }
361         return (bseen == array_len);
362 }
363
364 /*
365  * Routines which manipulate leaf entries.
366  */
367
368 int
369 zap_leaf_lookup(zap_leaf_t *l, zap_name_t *zn, zap_entry_handle_t *zeh)
370 {
371         uint16_t *chunkp;
372         struct zap_leaf_entry *le;
373
374         ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
375
376 again:
377         for (chunkp = LEAF_HASH_ENTPTR(l, zn->zn_hash);
378             *chunkp != CHAIN_END; chunkp = &le->le_next) {
379                 uint16_t chunk = *chunkp;
380                 le = ZAP_LEAF_ENTRY(l, chunk);
381
382                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
383                 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
384
385                 if (le->le_hash != zn->zn_hash)
386                         continue;
387
388                 /*
389                  * NB: the entry chain is always sorted by cd on
390                  * normalized zap objects, so this will find the
391                  * lowest-cd match for MT_FIRST.
392                  */
393                 ASSERT(zn->zn_matchtype == MT_EXACT ||
394                     (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED));
395                 if (zap_leaf_array_match(l, zn, le->le_name_chunk,
396                     le->le_name_length)) {
397                         zeh->zeh_num_integers = le->le_value_length;
398                         zeh->zeh_integer_size = le->le_int_size;
399                         zeh->zeh_cd = le->le_cd;
400                         zeh->zeh_hash = le->le_hash;
401                         zeh->zeh_chunkp = chunkp;
402                         zeh->zeh_leaf = l;
403                         return (0);
404                 }
405         }
406
407         /*
408          * NB: we could of course do this in one pass, but that would be
409          * a pain.  We'll see if MT_BEST is even used much.
410          */
411         if (zn->zn_matchtype == MT_BEST) {
412                 zn->zn_matchtype = MT_FIRST;
413                 goto again;
414         }
415
416         return (ENOENT);
417 }
418
419 /* Return (h1,cd1 >= h2,cd2) */
420 #define HCD_GTEQ(h1, cd1, h2, cd2) \
421         ((h1 > h2) ? TRUE : ((h1 == h2 && cd1 >= cd2) ? TRUE : FALSE))
422
423 int
424 zap_leaf_lookup_closest(zap_leaf_t *l,
425     uint64_t h, uint32_t cd, zap_entry_handle_t *zeh)
426 {
427         uint16_t chunk;
428         uint64_t besth = -1ULL;
429         uint32_t bestcd = ZAP_MAXCD;
430         uint16_t bestlh = ZAP_LEAF_HASH_NUMENTRIES(l)-1;
431         uint16_t lh;
432         struct zap_leaf_entry *le;
433
434         ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
435
436         for (lh = LEAF_HASH(l, h); lh <= bestlh; lh++) {
437                 for (chunk = l->l_phys->l_hash[lh];
438                     chunk != CHAIN_END; chunk = le->le_next) {
439                         le = ZAP_LEAF_ENTRY(l, chunk);
440
441                         ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
442                         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
443
444                         if (HCD_GTEQ(le->le_hash, le->le_cd, h, cd) &&
445                             HCD_GTEQ(besth, bestcd, le->le_hash, le->le_cd)) {
446                                 ASSERT3U(bestlh, >=, lh);
447                                 bestlh = lh;
448                                 besth = le->le_hash;
449                                 bestcd = le->le_cd;
450
451                                 zeh->zeh_num_integers = le->le_value_length;
452                                 zeh->zeh_integer_size = le->le_int_size;
453                                 zeh->zeh_cd = le->le_cd;
454                                 zeh->zeh_hash = le->le_hash;
455                                 zeh->zeh_fakechunk = chunk;
456                                 zeh->zeh_chunkp = &zeh->zeh_fakechunk;
457                                 zeh->zeh_leaf = l;
458                         }
459                 }
460         }
461
462         return (bestcd == ZAP_MAXCD ? ENOENT : 0);
463 }
464
465 int
466 zap_entry_read(const zap_entry_handle_t *zeh,
467     uint8_t integer_size, uint64_t num_integers, void *buf)
468 {
469         struct zap_leaf_entry *le =
470             ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp);
471         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
472
473         if (le->le_int_size > integer_size)
474                 return (EINVAL);
475
476         zap_leaf_array_read(zeh->zeh_leaf, le->le_value_chunk, le->le_int_size,
477             le->le_value_length, integer_size, num_integers, buf);
478
479         if (zeh->zeh_num_integers > num_integers)
480                 return (EOVERFLOW);
481         return (0);
482
483 }
484
485 int
486 zap_entry_read_name(const zap_entry_handle_t *zeh, uint16_t buflen, char *buf)
487 {
488         struct zap_leaf_entry *le =
489             ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp);
490         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
491
492         zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 1,
493             le->le_name_length, 1, buflen, buf);
494         if (le->le_name_length > buflen)
495                 return (EOVERFLOW);
496         return (0);
497 }
498
499 int
500 zap_entry_update(zap_entry_handle_t *zeh,
501         uint8_t integer_size, uint64_t num_integers, const void *buf)
502 {
503         int delta_chunks;
504         zap_leaf_t *l = zeh->zeh_leaf;
505         struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, *zeh->zeh_chunkp);
506
507         delta_chunks = ZAP_LEAF_ARRAY_NCHUNKS(num_integers * integer_size) -
508             ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_length * le->le_int_size);
509
510         if ((int)l->l_phys->l_hdr.lh_nfree < delta_chunks)
511                 return (EAGAIN);
512
513         /*
514          * We should search other chained leaves (via
515          * zap_entry_remove,create?) otherwise returning EAGAIN will
516          * just send us into an infinite loop if we have to chain
517          * another leaf block, rather than being able to split this
518          * block.
519          */
520
521         zap_leaf_array_free(l, &le->le_value_chunk);
522         le->le_value_chunk =
523             zap_leaf_array_create(l, buf, integer_size, num_integers);
524         le->le_value_length = num_integers;
525         le->le_int_size = integer_size;
526         return (0);
527 }
528
529 void
530 zap_entry_remove(zap_entry_handle_t *zeh)
531 {
532         uint16_t entry_chunk;
533         struct zap_leaf_entry *le;
534         zap_leaf_t *l = zeh->zeh_leaf;
535
536         ASSERT3P(zeh->zeh_chunkp, !=, &zeh->zeh_fakechunk);
537
538         entry_chunk = *zeh->zeh_chunkp;
539         le = ZAP_LEAF_ENTRY(l, entry_chunk);
540         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
541
542         zap_leaf_array_free(l, &le->le_name_chunk);
543         zap_leaf_array_free(l, &le->le_value_chunk);
544
545         *zeh->zeh_chunkp = le->le_next;
546         zap_leaf_chunk_free(l, entry_chunk);
547
548         l->l_phys->l_hdr.lh_nentries--;
549 }
550
551 int
552 zap_entry_create(zap_leaf_t *l, const char *name, uint64_t h, uint32_t cd,
553     uint8_t integer_size, uint64_t num_integers, const void *buf,
554     zap_entry_handle_t *zeh)
555 {
556         uint16_t chunk;
557         uint16_t *chunkp;
558         struct zap_leaf_entry *le;
559         uint64_t namelen, valuelen;
560         int numchunks;
561
562         valuelen = integer_size * num_integers;
563         namelen = strlen(name) + 1;
564         ASSERT(namelen >= 2);
565
566         numchunks = 1 + ZAP_LEAF_ARRAY_NCHUNKS(namelen) +
567             ZAP_LEAF_ARRAY_NCHUNKS(valuelen);
568         if (numchunks > ZAP_LEAF_NUMCHUNKS(l))
569                 return (E2BIG);
570
571         if (cd == ZAP_MAXCD) {
572                 /* find the lowest unused cd */
573                 if (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED) {
574                         cd = 0;
575
576                         for (chunk = *LEAF_HASH_ENTPTR(l, h);
577                             chunk != CHAIN_END; chunk = le->le_next) {
578                                 le = ZAP_LEAF_ENTRY(l, chunk);
579                                 if (le->le_cd > cd)
580                                         break;
581                                 if (le->le_hash == h) {
582                                         ASSERT3U(cd, ==, le->le_cd);
583                                         cd++;
584                                 }
585                         }
586                 } else {
587                         /* old unsorted format; do it the O(n^2) way */
588                         for (cd = 0; cd < ZAP_MAXCD; cd++) {
589                                 for (chunk = *LEAF_HASH_ENTPTR(l, h);
590                                     chunk != CHAIN_END; chunk = le->le_next) {
591                                         le = ZAP_LEAF_ENTRY(l, chunk);
592                                         if (le->le_hash == h &&
593                                             le->le_cd == cd) {
594                                                 break;
595                                         }
596                                 }
597                                 /* If this cd is not in use, we are good. */
598                                 if (chunk == CHAIN_END)
599                                         break;
600                         }
601                 }
602                 /*
603                  * we would run out of space in a block before we could
604                  * have ZAP_MAXCD entries
605                  */
606                 ASSERT3U(cd, <, ZAP_MAXCD);
607         }
608
609         if (l->l_phys->l_hdr.lh_nfree < numchunks)
610                 return (EAGAIN);
611
612         /* make the entry */
613         chunk = zap_leaf_chunk_alloc(l);
614         le = ZAP_LEAF_ENTRY(l, chunk);
615         le->le_type = ZAP_CHUNK_ENTRY;
616         le->le_name_chunk = zap_leaf_array_create(l, name, 1, namelen);
617         le->le_name_length = namelen;
618         le->le_value_chunk =
619             zap_leaf_array_create(l, buf, integer_size, num_integers);
620         le->le_value_length = num_integers;
621         le->le_int_size = integer_size;
622         le->le_hash = h;
623         le->le_cd = cd;
624
625         /* link it into the hash chain */
626         /* XXX if we did the search above, we could just use that */
627         chunkp = zap_leaf_rehash_entry(l, chunk);
628
629         l->l_phys->l_hdr.lh_nentries++;
630
631         zeh->zeh_leaf = l;
632         zeh->zeh_num_integers = num_integers;
633         zeh->zeh_integer_size = le->le_int_size;
634         zeh->zeh_cd = le->le_cd;
635         zeh->zeh_hash = le->le_hash;
636         zeh->zeh_chunkp = chunkp;
637
638         return (0);
639 }
640
641 /*
642  * Determine if there is another entry with the same normalized form.
643  * For performance purposes, either zn or name must be provided (the
644  * other can be NULL).  Note, there usually won't be any hash
645  * conflicts, in which case we don't need the concatenated/normalized
646  * form of the name.  But all callers have one of these on hand anyway,
647  * so might as well take advantage.  A cleaner but slower interface
648  * would accept neither argument, and compute the normalized name as
649  * needed (using zap_name_alloc(zap_entry_read_name(zeh))).
650  */
651 boolean_t
652 zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn,
653     const char *name, zap_t *zap)
654 {
655         uint64_t chunk;
656         struct zap_leaf_entry *le;
657         boolean_t allocdzn = B_FALSE;
658
659         if (zap->zap_normflags == 0)
660                 return (B_FALSE);
661
662         for (chunk = *LEAF_HASH_ENTPTR(zeh->zeh_leaf, zeh->zeh_hash);
663             chunk != CHAIN_END; chunk = le->le_next) {
664                 le = ZAP_LEAF_ENTRY(zeh->zeh_leaf, chunk);
665                 if (le->le_hash != zeh->zeh_hash)
666                         continue;
667                 if (le->le_cd == zeh->zeh_cd)
668                         continue;
669
670                 if (zn == NULL) {
671                         zn = zap_name_alloc(zap, name, MT_FIRST);
672                         allocdzn = B_TRUE;
673                 }
674                 if (zap_leaf_array_match(zeh->zeh_leaf, zn,
675                     le->le_name_chunk, le->le_name_length)) {
676                         if (allocdzn)
677                                 zap_name_free(zn);
678                         return (B_TRUE);
679                 }
680         }
681         if (allocdzn)
682                 zap_name_free(zn);
683         return (B_FALSE);
684 }
685
686 /*
687  * Routines for transferring entries between leafs.
688  */
689
690 static uint16_t *
691 zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry)
692 {
693         struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry);
694         struct zap_leaf_entry *le2;
695         uint16_t *chunkp;
696
697         /*
698          * keep the entry chain sorted by cd
699          * NB: this will not cause problems for unsorted leafs, though
700          * it is unnecessary there.
701          */
702         for (chunkp = LEAF_HASH_ENTPTR(l, le->le_hash);
703             *chunkp != CHAIN_END; chunkp = &le2->le_next) {
704                 le2 = ZAP_LEAF_ENTRY(l, *chunkp);
705                 if (le2->le_cd > le->le_cd)
706                         break;
707         }
708
709         le->le_next = *chunkp;
710         *chunkp = entry;
711         return (chunkp);
712 }
713
714 static uint16_t
715 zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl)
716 {
717         uint16_t new_chunk;
718         uint16_t *nchunkp = &new_chunk;
719
720         while (chunk != CHAIN_END) {
721                 uint16_t nchunk = zap_leaf_chunk_alloc(nl);
722                 struct zap_leaf_array *nla =
723                     &ZAP_LEAF_CHUNK(nl, nchunk).l_array;
724                 struct zap_leaf_array *la =
725                     &ZAP_LEAF_CHUNK(l, chunk).l_array;
726                 int nextchunk = la->la_next;
727
728                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
729                 ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l));
730
731                 *nla = *la; /* structure assignment */
732
733                 zap_leaf_chunk_free(l, chunk);
734                 chunk = nextchunk;
735                 *nchunkp = nchunk;
736                 nchunkp = &nla->la_next;
737         }
738         *nchunkp = CHAIN_END;
739         return (new_chunk);
740 }
741
742 static void
743 zap_leaf_transfer_entry(zap_leaf_t *l, int entry, zap_leaf_t *nl)
744 {
745         struct zap_leaf_entry *le, *nle;
746         uint16_t chunk;
747
748         le = ZAP_LEAF_ENTRY(l, entry);
749         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
750
751         chunk = zap_leaf_chunk_alloc(nl);
752         nle = ZAP_LEAF_ENTRY(nl, chunk);
753         *nle = *le; /* structure assignment */
754
755         (void) zap_leaf_rehash_entry(nl, chunk);
756
757         nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl);
758         nle->le_value_chunk =
759             zap_leaf_transfer_array(l, le->le_value_chunk, nl);
760
761         zap_leaf_chunk_free(l, entry);
762
763         l->l_phys->l_hdr.lh_nentries--;
764         nl->l_phys->l_hdr.lh_nentries++;
765 }
766
767 /*
768  * Transfer the entries whose hash prefix ends in 1 to the new leaf.
769  */
770 void
771 zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort)
772 {
773         int i;
774         int bit = 64 - 1 - l->l_phys->l_hdr.lh_prefix_len;
775
776         /* set new prefix and prefix_len */
777         l->l_phys->l_hdr.lh_prefix <<= 1;
778         l->l_phys->l_hdr.lh_prefix_len++;
779         nl->l_phys->l_hdr.lh_prefix = l->l_phys->l_hdr.lh_prefix | 1;
780         nl->l_phys->l_hdr.lh_prefix_len = l->l_phys->l_hdr.lh_prefix_len;
781
782         /* break existing hash chains */
783         zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l));
784
785         if (sort)
786                 l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED;
787
788         /*
789          * Transfer entries whose hash bit 'bit' is set to nl; rehash
790          * the remaining entries
791          *
792          * NB: We could find entries via the hashtable instead. That
793          * would be O(hashents+numents) rather than O(numblks+numents),
794          * but this accesses memory more sequentially, and when we're
795          * called, the block is usually pretty full.
796          */
797         for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
798                 struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, i);
799                 if (le->le_type != ZAP_CHUNK_ENTRY)
800                         continue;
801
802                 if (le->le_hash & (1ULL << bit))
803                         zap_leaf_transfer_entry(l, i, nl);
804                 else
805                         (void) zap_leaf_rehash_entry(l, i);
806         }
807 }
808
809 void
810 zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs)
811 {
812         int i, n;
813
814         n = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift -
815             l->l_phys->l_hdr.lh_prefix_len;
816         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
817         zs->zs_leafs_with_2n_pointers[n]++;
818
819
820         n = l->l_phys->l_hdr.lh_nentries/5;
821         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
822         zs->zs_blocks_with_n5_entries[n]++;
823
824         n = ((1<<FZAP_BLOCK_SHIFT(zap)) -
825             l->l_phys->l_hdr.lh_nfree * (ZAP_LEAF_ARRAY_BYTES+1))*10 /
826             (1<<FZAP_BLOCK_SHIFT(zap));
827         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
828         zs->zs_blocks_n_tenths_full[n]++;
829
830         for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(l); i++) {
831                 int nentries = 0;
832                 int chunk = l->l_phys->l_hash[i];
833
834                 while (chunk != CHAIN_END) {
835                         struct zap_leaf_entry *le =
836                             ZAP_LEAF_ENTRY(l, chunk);
837
838                         n = 1 + ZAP_LEAF_ARRAY_NCHUNKS(le->le_name_length) +
839                             ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_length *
840                             le->le_int_size);
841                         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
842                         zs->zs_entries_using_n_chunks[n]++;
843
844                         chunk = le->le_next;
845                         nentries++;
846                 }
847
848                 n = nentries;
849                 n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
850                 zs->zs_buckets_with_n_entries[n]++;
851         }
852 }