Fix minor acl issue
[zfs.git] / lib / libefi / rdwr_efi.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <errno.h>
29 #include <strings.h>
30 #include <unistd.h>
31 #include <uuid/uuid.h>
32 #include <libintl.h>
33 #include <sys/types.h>
34 #include <sys/dkio.h>
35 #include <sys/vtoc.h>
36 #include <sys/mhd.h>
37 #include <sys/param.h>
38 #include <sys/dktp/fdisk.h>
39 #include <sys/efi_partition.h>
40 #include <sys/byteorder.h>
41 #include <sys/ddi.h>
42
43 static struct uuid_to_ptag {
44         struct uuid     uuid;
45 } conversion_array[] = {
46         { EFI_UNUSED },
47         { EFI_BOOT },
48         { EFI_ROOT },
49         { EFI_SWAP },
50         { EFI_USR },
51         { EFI_BACKUP },
52         { 0 },                  /* STAND is never used */
53         { EFI_VAR },
54         { EFI_HOME },
55         { EFI_ALTSCTR },
56         { 0 },                  /* CACHE (cachefs) is never used */
57         { EFI_RESERVED },
58         { EFI_SYSTEM },
59         { EFI_LEGACY_MBR },
60         { EFI_SYMC_PUB },
61         { EFI_SYMC_CDS },
62         { EFI_MSFT_RESV },
63         { EFI_DELL_BASIC },
64         { EFI_DELL_RAID },
65         { EFI_DELL_SWAP },
66         { EFI_DELL_LVM },
67         { EFI_DELL_RESV },
68         { EFI_AAPL_HFS },
69         { EFI_AAPL_UFS }
70 };
71
72 /*
73  * Default vtoc information for non-SVr4 partitions
74  */
75 struct dk_map2  default_vtoc_map[NDKMAP] = {
76         {       V_ROOT,         0       },              /* a - 0 */
77         {       V_SWAP,         V_UNMNT },              /* b - 1 */
78         {       V_BACKUP,       V_UNMNT },              /* c - 2 */
79         {       V_UNASSIGNED,   0       },              /* d - 3 */
80         {       V_UNASSIGNED,   0       },              /* e - 4 */
81         {       V_UNASSIGNED,   0       },              /* f - 5 */
82         {       V_USR,          0       },              /* g - 6 */
83         {       V_UNASSIGNED,   0       },              /* h - 7 */
84
85 #if defined(_SUNOS_VTOC_16)
86
87 #if defined(i386) || defined(__amd64)
88         {       V_BOOT,         V_UNMNT },              /* i - 8 */
89         {       V_ALTSCTR,      0       },              /* j - 9 */
90
91 #else
92 #error No VTOC format defined.
93 #endif                  /* defined(i386) */
94
95         {       V_UNASSIGNED,   0       },              /* k - 10 */
96         {       V_UNASSIGNED,   0       },              /* l - 11 */
97         {       V_UNASSIGNED,   0       },              /* m - 12 */
98         {       V_UNASSIGNED,   0       },              /* n - 13 */
99         {       V_UNASSIGNED,   0       },              /* o - 14 */
100         {       V_UNASSIGNED,   0       },              /* p - 15 */
101 #endif                  /* defined(_SUNOS_VTOC_16) */
102 };
103
104 #ifdef DEBUG
105 int efi_debug = 1;
106 #else
107 int efi_debug = 0;
108 #endif
109
110 extern unsigned int     efi_crc32(const unsigned char *, unsigned int);
111 static int              efi_read(int, struct dk_gpt *);
112
113 static int
114 read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize)
115 {
116         struct dk_minfo         disk_info;
117
118         if ((ioctl(fd, DKIOCGMEDIAINFO, (caddr_t)&disk_info)) == -1)
119                 return (errno);
120         *capacity = disk_info.dki_capacity;
121         *lbsize = disk_info.dki_lbsize;
122         return (0);
123 }
124
125 /*
126  * the number of blocks the EFI label takes up (round up to nearest
127  * block)
128  */
129 #define NBLOCKS(p, l)   (1 + ((((p) * (int)sizeof (efi_gpe_t))  + \
130                                 ((l) - 1)) / (l)))
131 /* number of partitions -- limited by what we can malloc */
132 #define MAX_PARTS       ((4294967295UL - sizeof (struct dk_gpt)) / \
133                             sizeof (struct dk_part))
134
135 int
136 efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc)
137 {
138         diskaddr_t      capacity;
139         uint_t          lbsize;
140         uint_t          nblocks;
141         size_t          length;
142         struct dk_gpt   *vptr;
143         struct uuid     uuid;
144
145         if (read_disk_info(fd, &capacity, &lbsize) != 0) {
146                 if (efi_debug)
147                         (void) fprintf(stderr,
148                             "couldn't read disk information\n");
149                 return (-1);
150         }
151
152         nblocks = NBLOCKS(nparts, lbsize);
153         if ((nblocks * lbsize) < EFI_MIN_ARRAY_SIZE + lbsize) {
154                 /* 16K plus one block for the GPT */
155                 nblocks = EFI_MIN_ARRAY_SIZE / lbsize + 1;
156         }
157
158         if (nparts > MAX_PARTS) {
159                 if (efi_debug) {
160                         (void) fprintf(stderr,
161                         "the maximum number of partitions supported is %lu\n",
162                             MAX_PARTS);
163                 }
164                 return (-1);
165         }
166
167         length = sizeof (struct dk_gpt) +
168             sizeof (struct dk_part) * (nparts - 1);
169
170         if ((*vtoc = calloc(length, 1)) == NULL)
171                 return (-1);
172
173         vptr = *vtoc;
174
175         vptr->efi_version = EFI_VERSION_CURRENT;
176         vptr->efi_lbasize = lbsize;
177         vptr->efi_nparts = nparts;
178         /*
179          * add one block here for the PMBR; on disks with a 512 byte
180          * block size and 128 or fewer partitions, efi_first_u_lba
181          * should work out to "34"
182          */
183         vptr->efi_first_u_lba = nblocks + 1;
184         vptr->efi_last_lba = capacity - 1;
185         vptr->efi_altern_lba = capacity -1;
186         vptr->efi_last_u_lba = vptr->efi_last_lba - nblocks;
187
188         (void) uuid_generate((uchar_t *)&uuid);
189         UUID_LE_CONVERT(vptr->efi_disk_uguid, uuid);
190         return (0);
191 }
192
193 /*
194  * Read EFI - return partition number upon success.
195  */
196 int
197 efi_alloc_and_read(int fd, struct dk_gpt **vtoc)
198 {
199         int                     rval;
200         uint32_t                nparts;
201         int                     length;
202
203         /* figure out the number of entries that would fit into 16K */
204         nparts = EFI_MIN_ARRAY_SIZE / sizeof (efi_gpe_t);
205         length = (int) sizeof (struct dk_gpt) +
206             (int) sizeof (struct dk_part) * (nparts - 1);
207         if ((*vtoc = calloc(length, 1)) == NULL)
208                 return (VT_ERROR);
209
210         (*vtoc)->efi_nparts = nparts;
211         rval = efi_read(fd, *vtoc);
212
213         if ((rval == VT_EINVAL) && (*vtoc)->efi_nparts > nparts) {
214                 void *tmp;
215                 length = (int) sizeof (struct dk_gpt) +
216                     (int) sizeof (struct dk_part) *
217                     ((*vtoc)->efi_nparts - 1);
218                 nparts = (*vtoc)->efi_nparts;
219                 if ((tmp = realloc(*vtoc, length)) == NULL) {
220                         free (*vtoc);
221                         *vtoc = NULL;
222                         return (VT_ERROR);
223                 } else {
224                         *vtoc = tmp;
225                         rval = efi_read(fd, *vtoc);
226                 }
227         }
228
229         if (rval < 0) {
230                 if (efi_debug) {
231                         (void) fprintf(stderr,
232                             "read of EFI table failed, rval=%d\n", rval);
233                 }
234                 free (*vtoc);
235                 *vtoc = NULL;
236         }
237
238         return (rval);
239 }
240
241 static int
242 efi_ioctl(int fd, int cmd, dk_efi_t *dk_ioc)
243 {
244         void *data = dk_ioc->dki_data;
245         int error;
246
247         dk_ioc->dki_data_64 = (uint64_t)(uintptr_t)data;
248         error = ioctl(fd, cmd, (void *)dk_ioc);
249         dk_ioc->dki_data = data;
250
251         return (error);
252 }
253
254 static int
255 check_label(int fd, dk_efi_t *dk_ioc)
256 {
257         efi_gpt_t               *efi;
258         uint_t                  crc;
259
260         if (efi_ioctl(fd, DKIOCGETEFI, dk_ioc) == -1) {
261                 switch (errno) {
262                 case EIO:
263                         return (VT_EIO);
264                 default:
265                         return (VT_ERROR);
266                 }
267         }
268         efi = dk_ioc->dki_data;
269         if (efi->efi_gpt_Signature != LE_64(EFI_SIGNATURE)) {
270                 if (efi_debug)
271                         (void) fprintf(stderr,
272                             "Bad EFI signature: 0x%llx != 0x%llx\n",
273                             (long long)efi->efi_gpt_Signature,
274                             (long long)LE_64(EFI_SIGNATURE));
275                 return (VT_EINVAL);
276         }
277
278         /*
279          * check CRC of the header; the size of the header should
280          * never be larger than one block
281          */
282         crc = efi->efi_gpt_HeaderCRC32;
283         efi->efi_gpt_HeaderCRC32 = 0;
284
285         if (((len_t)LE_32(efi->efi_gpt_HeaderSize) > dk_ioc->dki_length) ||
286             crc != LE_32(efi_crc32((unsigned char *)efi,
287             LE_32(efi->efi_gpt_HeaderSize)))) {
288                 if (efi_debug)
289                         (void) fprintf(stderr,
290                             "Bad EFI CRC: 0x%x != 0x%x\n",
291                             crc,
292                             LE_32(efi_crc32((unsigned char *)efi,
293                             sizeof (struct efi_gpt))));
294                 return (VT_EINVAL);
295         }
296
297         return (0);
298 }
299
300 static int
301 efi_read(int fd, struct dk_gpt *vtoc)
302 {
303         int                     i, j;
304         int                     label_len;
305         int                     rval = 0;
306         int                     md_flag = 0;
307         int                     vdc_flag = 0;
308         struct dk_minfo         disk_info;
309         dk_efi_t                dk_ioc;
310         efi_gpt_t               *efi;
311         efi_gpe_t               *efi_parts;
312         struct dk_cinfo         dki_info;
313         uint32_t                user_length;
314         boolean_t               legacy_label = B_FALSE;
315
316         /*
317          * get the partition number for this file descriptor.
318          */
319         if (ioctl(fd, DKIOCINFO, (caddr_t)&dki_info) == -1) {
320                 if (efi_debug) {
321                         (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno);
322                 }
323                 switch (errno) {
324                 case EIO:
325                         return (VT_EIO);
326                 case EINVAL:
327                         return (VT_EINVAL);
328                 default:
329                         return (VT_ERROR);
330                 }
331         }
332         if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) &&
333             (strncmp(dki_info.dki_dname, "md", 3) == 0)) {
334                 md_flag++;
335         } else if ((strncmp(dki_info.dki_cname, "vdc", 4) == 0) &&
336             (strncmp(dki_info.dki_dname, "vdc", 4) == 0)) {
337                 /*
338                  * The controller and drive name "vdc" (virtual disk client)
339                  * indicates a LDoms virtual disk.
340                  */
341                 vdc_flag++;
342         }
343
344         /* get the LBA size */
345         if (ioctl(fd, DKIOCGMEDIAINFO, (caddr_t)&disk_info) == -1) {
346                 if (efi_debug) {
347                         (void) fprintf(stderr,
348                             "assuming LBA 512 bytes %d\n",
349                             errno);
350                 }
351                 disk_info.dki_lbsize = DEV_BSIZE;
352         }
353         if (disk_info.dki_lbsize == 0) {
354                 if (efi_debug) {
355                         (void) fprintf(stderr,
356                             "efi_read: assuming LBA 512 bytes\n");
357                 }
358                 disk_info.dki_lbsize = DEV_BSIZE;
359         }
360         /*
361          * Read the EFI GPT to figure out how many partitions we need
362          * to deal with.
363          */
364         dk_ioc.dki_lba = 1;
365         if (NBLOCKS(vtoc->efi_nparts, disk_info.dki_lbsize) < 34) {
366                 label_len = EFI_MIN_ARRAY_SIZE + disk_info.dki_lbsize;
367         } else {
368                 label_len = vtoc->efi_nparts * (int) sizeof (efi_gpe_t) +
369                     disk_info.dki_lbsize;
370                 if (label_len % disk_info.dki_lbsize) {
371                         /* pad to physical sector size */
372                         label_len += disk_info.dki_lbsize;
373                         label_len &= ~(disk_info.dki_lbsize - 1);
374                 }
375         }
376
377         if ((dk_ioc.dki_data = calloc(label_len, 1)) == NULL)
378                 return (VT_ERROR);
379
380         dk_ioc.dki_length = disk_info.dki_lbsize;
381         user_length = vtoc->efi_nparts;
382         efi = dk_ioc.dki_data;
383         if (md_flag) {
384                 dk_ioc.dki_length = label_len;
385                 if (efi_ioctl(fd, DKIOCGETEFI, &dk_ioc) == -1) {
386                         switch (errno) {
387                         case EIO:
388                                 return (VT_EIO);
389                         default:
390                                 return (VT_ERROR);
391                         }
392                 }
393         } else if ((rval = check_label(fd, &dk_ioc)) == VT_EINVAL) {
394                 /*
395                  * No valid label here; try the alternate. Note that here
396                  * we just read GPT header and save it into dk_ioc.data,
397                  * Later, we will read GUID partition entry array if we
398                  * can get valid GPT header.
399                  */
400
401                 /*
402                  * This is a workaround for legacy systems. In the past, the
403                  * last sector of SCSI disk was invisible on x86 platform. At
404                  * that time, backup label was saved on the next to the last
405                  * sector. It is possible for users to move a disk from previous
406                  * solaris system to present system. Here, we attempt to search
407                  * legacy backup EFI label first.
408                  */
409                 dk_ioc.dki_lba = disk_info.dki_capacity - 2;
410                 dk_ioc.dki_length = disk_info.dki_lbsize;
411                 rval = check_label(fd, &dk_ioc);
412                 if (rval == VT_EINVAL) {
413                         /*
414                          * we didn't find legacy backup EFI label, try to
415                          * search backup EFI label in the last block.
416                          */
417                         dk_ioc.dki_lba = disk_info.dki_capacity - 1;
418                         dk_ioc.dki_length = disk_info.dki_lbsize;
419                         rval = check_label(fd, &dk_ioc);
420                         if (rval == 0) {
421                                 legacy_label = B_TRUE;
422                                 if (efi_debug)
423                                         (void) fprintf(stderr,
424                                             "efi_read: primary label corrupt; "
425                                             "using EFI backup label located on"
426                                             " the last block\n");
427                         }
428                 } else {
429                         if ((efi_debug) && (rval == 0))
430                                 (void) fprintf(stderr, "efi_read: primary label"
431                                     " corrupt; using legacy EFI backup label "
432                                     " located on the next to last block\n");
433                 }
434
435                 if (rval == 0) {
436                         dk_ioc.dki_lba = LE_64(efi->efi_gpt_PartitionEntryLBA);
437                         vtoc->efi_flags |= EFI_GPT_PRIMARY_CORRUPT;
438                         vtoc->efi_nparts =
439                             LE_32(efi->efi_gpt_NumberOfPartitionEntries);
440                         /*
441                          * Partition tables are between backup GPT header
442                          * table and ParitionEntryLBA (the starting LBA of
443                          * the GUID partition entries array). Now that we
444                          * already got valid GPT header and saved it in
445                          * dk_ioc.dki_data, we try to get GUID partition
446                          * entry array here.
447                          */
448                         /* LINTED */
449                         dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data
450                             + disk_info.dki_lbsize);
451                         if (legacy_label)
452                                 dk_ioc.dki_length = disk_info.dki_capacity - 1 -
453                                     dk_ioc.dki_lba;
454                         else
455                                 dk_ioc.dki_length = disk_info.dki_capacity - 2 -
456                                     dk_ioc.dki_lba;
457                         dk_ioc.dki_length *= disk_info.dki_lbsize;
458                         if (dk_ioc.dki_length >
459                             ((len_t)label_len - sizeof (*dk_ioc.dki_data))) {
460                                 rval = VT_EINVAL;
461                         } else {
462                                 /*
463                                  * read GUID partition entry array
464                                  */
465                                 rval = efi_ioctl(fd, DKIOCGETEFI, &dk_ioc);
466                         }
467                 }
468
469         } else if (rval == 0) {
470
471                 dk_ioc.dki_lba = LE_64(efi->efi_gpt_PartitionEntryLBA);
472                 /* LINTED */
473                 dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data
474                     + disk_info.dki_lbsize);
475                 dk_ioc.dki_length = label_len - disk_info.dki_lbsize;
476                 rval = efi_ioctl(fd, DKIOCGETEFI, &dk_ioc);
477
478         } else if (vdc_flag && rval == VT_ERROR && errno == EINVAL) {
479                 /*
480                  * When the device is a LDoms virtual disk, the DKIOCGETEFI
481                  * ioctl can fail with EINVAL if the virtual disk backend
482                  * is a ZFS volume serviced by a domain running an old version
483                  * of Solaris. This is because the DKIOCGETEFI ioctl was
484                  * initially incorrectly implemented for a ZFS volume and it
485                  * expected the GPT and GPE to be retrieved with a single ioctl.
486                  * So we try to read the GPT and the GPE using that old style
487                  * ioctl.
488                  */
489                 dk_ioc.dki_lba = 1;
490                 dk_ioc.dki_length = label_len;
491                 rval = check_label(fd, &dk_ioc);
492         }
493
494         if (rval < 0) {
495                 free(efi);
496                 return (rval);
497         }
498
499         /* LINTED -- always longlong aligned */
500         efi_parts = (efi_gpe_t *)(((char *)efi) + disk_info.dki_lbsize);
501
502         /*
503          * Assemble this into a "dk_gpt" struct for easier
504          * digestibility by applications.
505          */
506         vtoc->efi_version = LE_32(efi->efi_gpt_Revision);
507         vtoc->efi_nparts = LE_32(efi->efi_gpt_NumberOfPartitionEntries);
508         vtoc->efi_part_size = LE_32(efi->efi_gpt_SizeOfPartitionEntry);
509         vtoc->efi_lbasize = disk_info.dki_lbsize;
510         vtoc->efi_last_lba = disk_info.dki_capacity - 1;
511         vtoc->efi_first_u_lba = LE_64(efi->efi_gpt_FirstUsableLBA);
512         vtoc->efi_last_u_lba = LE_64(efi->efi_gpt_LastUsableLBA);
513         vtoc->efi_altern_lba = LE_64(efi->efi_gpt_AlternateLBA);
514         UUID_LE_CONVERT(vtoc->efi_disk_uguid, efi->efi_gpt_DiskGUID);
515
516         /*
517          * If the array the user passed in is too small, set the length
518          * to what it needs to be and return
519          */
520         if (user_length < vtoc->efi_nparts) {
521                 return (VT_EINVAL);
522         }
523
524         for (i = 0; i < vtoc->efi_nparts; i++) {
525
526                 UUID_LE_CONVERT(vtoc->efi_parts[i].p_guid,
527                     efi_parts[i].efi_gpe_PartitionTypeGUID);
528
529                 for (j = 0;
530                     j < sizeof (conversion_array)
531                     / sizeof (struct uuid_to_ptag); j++) {
532
533                         if (bcmp(&vtoc->efi_parts[i].p_guid,
534                             &conversion_array[j].uuid,
535                             sizeof (struct uuid)) == 0) {
536                                 vtoc->efi_parts[i].p_tag = j;
537                                 break;
538                         }
539                 }
540                 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED)
541                         continue;
542                 vtoc->efi_parts[i].p_flag =
543                     LE_16(efi_parts[i].efi_gpe_Attributes.PartitionAttrs);
544                 vtoc->efi_parts[i].p_start =
545                     LE_64(efi_parts[i].efi_gpe_StartingLBA);
546                 vtoc->efi_parts[i].p_size =
547                     LE_64(efi_parts[i].efi_gpe_EndingLBA) -
548                     vtoc->efi_parts[i].p_start + 1;
549                 for (j = 0; j < EFI_PART_NAME_LEN; j++) {
550                         vtoc->efi_parts[i].p_name[j] =
551                             (uchar_t)LE_16(
552                             efi_parts[i].efi_gpe_PartitionName[j]);
553                 }
554
555                 UUID_LE_CONVERT(vtoc->efi_parts[i].p_uguid,
556                     efi_parts[i].efi_gpe_UniquePartitionGUID);
557         }
558         free(efi);
559
560         return (dki_info.dki_partition);
561 }
562
563 /* writes a "protective" MBR */
564 static int
565 write_pmbr(int fd, struct dk_gpt *vtoc)
566 {
567         dk_efi_t        dk_ioc;
568         struct mboot    mb;
569         uchar_t         *cp;
570         diskaddr_t      size_in_lba;
571         uchar_t         *buf;
572         int             len;
573
574         len = (vtoc->efi_lbasize == 0) ? sizeof (mb) : vtoc->efi_lbasize;
575         buf = calloc(len, 1);
576
577         /*
578          * Preserve any boot code and disk signature if the first block is
579          * already an MBR.
580          */
581         dk_ioc.dki_lba = 0;
582         dk_ioc.dki_length = len;
583         /* LINTED -- always longlong aligned */
584         dk_ioc.dki_data = (efi_gpt_t *)buf;
585         if (efi_ioctl(fd, DKIOCGETEFI, &dk_ioc) == -1) {
586                 (void *) memcpy(&mb, buf, sizeof (mb));
587                 bzero(&mb, sizeof (mb));
588                 mb.signature = LE_16(MBB_MAGIC);
589         } else {
590                 (void *) memcpy(&mb, buf, sizeof (mb));
591                 if (mb.signature != LE_16(MBB_MAGIC)) {
592                         bzero(&mb, sizeof (mb));
593                         mb.signature = LE_16(MBB_MAGIC);
594                 }
595         }
596
597         bzero(&mb.parts, sizeof (mb.parts));
598         cp = (uchar_t *)&mb.parts[0];
599         /* bootable or not */
600         *cp++ = 0;
601         /* beginning CHS; 0xffffff if not representable */
602         *cp++ = 0xff;
603         *cp++ = 0xff;
604         *cp++ = 0xff;
605         /* OS type */
606         *cp++ = EFI_PMBR;
607         /* ending CHS; 0xffffff if not representable */
608         *cp++ = 0xff;
609         *cp++ = 0xff;
610         *cp++ = 0xff;
611         /* starting LBA: 1 (little endian format) by EFI definition */
612         *cp++ = 0x01;
613         *cp++ = 0x00;
614         *cp++ = 0x00;
615         *cp++ = 0x00;
616         /* ending LBA: last block on the disk (little endian format) */
617         size_in_lba = vtoc->efi_last_lba;
618         if (size_in_lba < 0xffffffff) {
619                 *cp++ = (size_in_lba & 0x000000ff);
620                 *cp++ = (size_in_lba & 0x0000ff00) >> 8;
621                 *cp++ = (size_in_lba & 0x00ff0000) >> 16;
622                 *cp++ = (size_in_lba & 0xff000000) >> 24;
623         } else {
624                 *cp++ = 0xff;
625                 *cp++ = 0xff;
626                 *cp++ = 0xff;
627                 *cp++ = 0xff;
628         }
629
630         (void *) memcpy(buf, &mb, sizeof (mb));
631         /* LINTED -- always longlong aligned */
632         dk_ioc.dki_data = (efi_gpt_t *)buf;
633         dk_ioc.dki_lba = 0;
634         dk_ioc.dki_length = len;
635         if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) {
636                 free(buf);
637                 switch (errno) {
638                 case EIO:
639                         return (VT_EIO);
640                 case EINVAL:
641                         return (VT_EINVAL);
642                 default:
643                         return (VT_ERROR);
644                 }
645         }
646         free(buf);
647         return (0);
648 }
649
650 /* make sure the user specified something reasonable */
651 static int
652 check_input(struct dk_gpt *vtoc)
653 {
654         int                     resv_part = -1;
655         int                     i, j;
656         diskaddr_t              istart, jstart, isize, jsize, endsect;
657
658         /*
659          * Sanity-check the input (make sure no partitions overlap)
660          */
661         for (i = 0; i < vtoc->efi_nparts; i++) {
662                 /* It can't be unassigned and have an actual size */
663                 if ((vtoc->efi_parts[i].p_tag == V_UNASSIGNED) &&
664                     (vtoc->efi_parts[i].p_size != 0)) {
665                         if (efi_debug) {
666                                 (void) fprintf(stderr,
667 "partition %d is \"unassigned\" but has a size of %llu",
668                                     i,
669                                     vtoc->efi_parts[i].p_size);
670                         }
671                         return (VT_EINVAL);
672                 }
673                 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) {
674                         if (uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid))
675                                 continue;
676                         /* we have encountered an unknown uuid */
677                         vtoc->efi_parts[i].p_tag = 0xff;
678                 }
679                 if (vtoc->efi_parts[i].p_tag == V_RESERVED) {
680                         if (resv_part != -1) {
681                                 if (efi_debug) {
682                                         (void) fprintf(stderr,
683 "found duplicate reserved partition at %d\n",
684                                             i);
685                                 }
686                                 return (VT_EINVAL);
687                         }
688                         resv_part = i;
689                 }
690                 if ((vtoc->efi_parts[i].p_start < vtoc->efi_first_u_lba) ||
691                     (vtoc->efi_parts[i].p_start > vtoc->efi_last_u_lba)) {
692                         if (efi_debug) {
693                                 (void) fprintf(stderr,
694                                     "Partition %d starts at %llu.  ",
695                                     i,
696                                     vtoc->efi_parts[i].p_start);
697                                 (void) fprintf(stderr,
698                                     "It must be between %llu and %llu.\n",
699                                     vtoc->efi_first_u_lba,
700                                     vtoc->efi_last_u_lba);
701                         }
702                         return (VT_EINVAL);
703                 }
704                 if ((vtoc->efi_parts[i].p_start +
705                     vtoc->efi_parts[i].p_size <
706                     vtoc->efi_first_u_lba) ||
707                     (vtoc->efi_parts[i].p_start +
708                     vtoc->efi_parts[i].p_size >
709                     vtoc->efi_last_u_lba + 1)) {
710                         if (efi_debug) {
711                                 (void) fprintf(stderr,
712                                     "Partition %d ends at %llu.  ",
713                                     i,
714                                     vtoc->efi_parts[i].p_start +
715                                     vtoc->efi_parts[i].p_size);
716                                 (void) fprintf(stderr,
717                                     "It must be between %llu and %llu.\n",
718                                     vtoc->efi_first_u_lba,
719                                     vtoc->efi_last_u_lba);
720                         }
721                         return (VT_EINVAL);
722                 }
723
724                 for (j = 0; j < vtoc->efi_nparts; j++) {
725                         isize = vtoc->efi_parts[i].p_size;
726                         jsize = vtoc->efi_parts[j].p_size;
727                         istart = vtoc->efi_parts[i].p_start;
728                         jstart = vtoc->efi_parts[j].p_start;
729                         if ((i != j) && (isize != 0) && (jsize != 0)) {
730                                 endsect = jstart + jsize -1;
731                                 if ((jstart <= istart) &&
732                                     (istart <= endsect)) {
733                                         if (efi_debug) {
734                                                 (void) fprintf(stderr,
735 "Partition %d overlaps partition %d.",
736                                                     i, j);
737                                         }
738                                         return (VT_EINVAL);
739                                 }
740                         }
741                 }
742         }
743         /* just a warning for now */
744         if ((resv_part == -1) && efi_debug) {
745                 (void) fprintf(stderr,
746                     "no reserved partition found\n");
747         }
748         return (0);
749 }
750
751 /*
752  * add all the unallocated space to the current label
753  */
754 int
755 efi_use_whole_disk(int fd)
756 {
757         struct dk_gpt           *efi_label;
758         int                     rval;
759         int                     i;
760         uint_t                  phy_last_slice = 0;
761         diskaddr_t              pl_start = 0;
762         diskaddr_t              pl_size;
763
764         rval = efi_alloc_and_read(fd, &efi_label);
765         if (rval < 0) {
766                 return (rval);
767         }
768
769         /* find the last physically non-zero partition */
770         for (i = 0; i < efi_label->efi_nparts - 2; i ++) {
771                 if (pl_start < efi_label->efi_parts[i].p_start) {
772                         pl_start = efi_label->efi_parts[i].p_start;
773                         phy_last_slice = i;
774                 }
775         }
776         pl_size = efi_label->efi_parts[phy_last_slice].p_size;
777
778         /*
779          * If alter_lba is 1, we are using the backup label.
780          * Since we can locate the backup label by disk capacity,
781          * there must be no unallocated space.
782          */
783         if ((efi_label->efi_altern_lba == 1) || (efi_label->efi_altern_lba
784             >= efi_label->efi_last_lba)) {
785                 if (efi_debug) {
786                         (void) fprintf(stderr,
787                             "efi_use_whole_disk: requested space not found\n");
788                 }
789                 efi_free(efi_label);
790                 return (VT_ENOSPC);
791         }
792
793         /*
794          * If there is space between the last physically non-zero partition
795          * and the reserved partition, just add the unallocated space to this
796          * area. Otherwise, the unallocated space is added to the last
797          * physically non-zero partition.
798          */
799         if (pl_start + pl_size - 1 == efi_label->efi_last_u_lba -
800             EFI_MIN_RESV_SIZE) {
801                 efi_label->efi_parts[phy_last_slice].p_size +=
802                     efi_label->efi_last_lba - efi_label->efi_altern_lba;
803         }
804
805         /*
806          * Move the reserved partition. There is currently no data in
807          * here except fabricated devids (which get generated via
808          * efi_write()). So there is no need to copy data.
809          */
810         efi_label->efi_parts[efi_label->efi_nparts - 1].p_start +=
811             efi_label->efi_last_lba - efi_label->efi_altern_lba;
812         efi_label->efi_last_u_lba += efi_label->efi_last_lba
813             - efi_label->efi_altern_lba;
814
815         rval = efi_write(fd, efi_label);
816         if (rval < 0) {
817                 if (efi_debug) {
818                         (void) fprintf(stderr,
819                             "efi_use_whole_disk:fail to write label, rval=%d\n",
820                             rval);
821                 }
822                 efi_free(efi_label);
823                 return (rval);
824         }
825
826         efi_free(efi_label);
827         return (0);
828 }
829
830
831 /*
832  * write EFI label and backup label
833  */
834 int
835 efi_write(int fd, struct dk_gpt *vtoc)
836 {
837         dk_efi_t                dk_ioc;
838         efi_gpt_t               *efi;
839         efi_gpe_t               *efi_parts;
840         int                     i, j;
841         struct dk_cinfo         dki_info;
842         int                     md_flag = 0;
843         int                     nblocks;
844         diskaddr_t              lba_backup_gpt_hdr;
845
846         if (ioctl(fd, DKIOCINFO, (caddr_t)&dki_info) == -1) {
847                 if (efi_debug)
848                         (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno);
849                 switch (errno) {
850                 case EIO:
851                         return (VT_EIO);
852                 case EINVAL:
853                         return (VT_EINVAL);
854                 default:
855                         return (VT_ERROR);
856                 }
857         }
858
859         /* check if we are dealing wih a metadevice */
860         if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) &&
861             (strncmp(dki_info.dki_dname, "md", 3) == 0)) {
862                 md_flag = 1;
863         }
864
865         if (check_input(vtoc)) {
866                 /*
867                  * not valid; if it's a metadevice just pass it down
868                  * because SVM will do its own checking
869                  */
870                 if (md_flag == 0) {
871                         return (VT_EINVAL);
872                 }
873         }
874
875         dk_ioc.dki_lba = 1;
876         if (NBLOCKS(vtoc->efi_nparts, vtoc->efi_lbasize) < 34) {
877                 dk_ioc.dki_length = EFI_MIN_ARRAY_SIZE + vtoc->efi_lbasize;
878         } else {
879                 dk_ioc.dki_length = NBLOCKS(vtoc->efi_nparts,
880                     vtoc->efi_lbasize) *
881                     vtoc->efi_lbasize;
882         }
883
884         /*
885          * the number of blocks occupied by GUID partition entry array
886          */
887         nblocks = dk_ioc.dki_length / vtoc->efi_lbasize - 1;
888
889         /*
890          * Backup GPT header is located on the block after GUID
891          * partition entry array. Here, we calculate the address
892          * for backup GPT header.
893          */
894         lba_backup_gpt_hdr = vtoc->efi_last_u_lba + 1 + nblocks;
895         if ((dk_ioc.dki_data = calloc(dk_ioc.dki_length, 1)) == NULL)
896                 return (VT_ERROR);
897
898         efi = dk_ioc.dki_data;
899
900         /* stuff user's input into EFI struct */
901         efi->efi_gpt_Signature = LE_64(EFI_SIGNATURE);
902         efi->efi_gpt_Revision = LE_32(vtoc->efi_version); /* 0x02000100 */
903         efi->efi_gpt_HeaderSize = LE_32(sizeof (struct efi_gpt));
904         efi->efi_gpt_Reserved1 = 0;
905         efi->efi_gpt_MyLBA = LE_64(1ULL);
906         efi->efi_gpt_AlternateLBA = LE_64(lba_backup_gpt_hdr);
907         efi->efi_gpt_FirstUsableLBA = LE_64(vtoc->efi_first_u_lba);
908         efi->efi_gpt_LastUsableLBA = LE_64(vtoc->efi_last_u_lba);
909         efi->efi_gpt_PartitionEntryLBA = LE_64(2ULL);
910         efi->efi_gpt_NumberOfPartitionEntries = LE_32(vtoc->efi_nparts);
911         efi->efi_gpt_SizeOfPartitionEntry = LE_32(sizeof (struct efi_gpe));
912         UUID_LE_CONVERT(efi->efi_gpt_DiskGUID, vtoc->efi_disk_uguid);
913
914         /* LINTED -- always longlong aligned */
915         efi_parts = (efi_gpe_t *)((char *)dk_ioc.dki_data + vtoc->efi_lbasize);
916
917         for (i = 0; i < vtoc->efi_nparts; i++) {
918                 for (j = 0;
919                     j < sizeof (conversion_array) /
920                     sizeof (struct uuid_to_ptag); j++) {
921
922                         if (vtoc->efi_parts[i].p_tag == j) {
923                                 UUID_LE_CONVERT(
924                                     efi_parts[i].efi_gpe_PartitionTypeGUID,
925                                     conversion_array[j].uuid);
926                                 break;
927                         }
928                 }
929
930                 if (j == sizeof (conversion_array) /
931                     sizeof (struct uuid_to_ptag)) {
932                         /*
933                          * If we didn't have a matching uuid match, bail here.
934                          * Don't write a label with unknown uuid.
935                          */
936                         if (efi_debug) {
937                                 (void) fprintf(stderr,
938                                     "Unknown uuid for p_tag %d\n",
939                                     vtoc->efi_parts[i].p_tag);
940                         }
941                         return (VT_EINVAL);
942                 }
943
944                 efi_parts[i].efi_gpe_StartingLBA =
945                     LE_64(vtoc->efi_parts[i].p_start);
946                 efi_parts[i].efi_gpe_EndingLBA =
947                     LE_64(vtoc->efi_parts[i].p_start +
948                     vtoc->efi_parts[i].p_size - 1);
949                 efi_parts[i].efi_gpe_Attributes.PartitionAttrs =
950                     LE_16(vtoc->efi_parts[i].p_flag);
951                 for (j = 0; j < EFI_PART_NAME_LEN; j++) {
952                         efi_parts[i].efi_gpe_PartitionName[j] =
953                             LE_16((ushort_t)vtoc->efi_parts[i].p_name[j]);
954                 }
955                 if ((vtoc->efi_parts[i].p_tag != V_UNASSIGNED) &&
956                     uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_uguid)) {
957                         (void) uuid_generate((uchar_t *)
958                             &vtoc->efi_parts[i].p_uguid);
959                 }
960                 bcopy(&vtoc->efi_parts[i].p_uguid,
961                     &efi_parts[i].efi_gpe_UniquePartitionGUID,
962                     sizeof (uuid_t));
963         }
964         efi->efi_gpt_PartitionEntryArrayCRC32 =
965             LE_32(efi_crc32((unsigned char *)efi_parts,
966             vtoc->efi_nparts * (int)sizeof (struct efi_gpe)));
967         efi->efi_gpt_HeaderCRC32 =
968             LE_32(efi_crc32((unsigned char *)efi, sizeof (struct efi_gpt)));
969
970         if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) {
971                 free(dk_ioc.dki_data);
972                 switch (errno) {
973                 case EIO:
974                         return (VT_EIO);
975                 case EINVAL:
976                         return (VT_EINVAL);
977                 default:
978                         return (VT_ERROR);
979                 }
980         }
981         /* if it's a metadevice we're done */
982         if (md_flag) {
983                 free(dk_ioc.dki_data);
984                 return (0);
985         }
986
987         /* write backup partition array */
988         dk_ioc.dki_lba = vtoc->efi_last_u_lba + 1;
989         dk_ioc.dki_length -= vtoc->efi_lbasize;
990         /* LINTED */
991         dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data +
992             vtoc->efi_lbasize);
993
994         if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) {
995                 /*
996                  * we wrote the primary label okay, so don't fail
997                  */
998                 if (efi_debug) {
999                         (void) fprintf(stderr,
1000                             "write of backup partitions to block %llu "
1001                             "failed, errno %d\n",
1002                             vtoc->efi_last_u_lba + 1,
1003                             errno);
1004                 }
1005         }
1006         /*
1007          * now swap MyLBA and AlternateLBA fields and write backup
1008          * partition table header
1009          */
1010         dk_ioc.dki_lba = lba_backup_gpt_hdr;
1011         dk_ioc.dki_length = vtoc->efi_lbasize;
1012         /* LINTED */
1013         dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data -
1014             vtoc->efi_lbasize);
1015         efi->efi_gpt_AlternateLBA = LE_64(1ULL);
1016         efi->efi_gpt_MyLBA = LE_64(lba_backup_gpt_hdr);
1017         efi->efi_gpt_PartitionEntryLBA = LE_64(vtoc->efi_last_u_lba + 1);
1018         efi->efi_gpt_HeaderCRC32 = 0;
1019         efi->efi_gpt_HeaderCRC32 =
1020             LE_32(efi_crc32((unsigned char *)dk_ioc.dki_data,
1021             sizeof (struct efi_gpt)));
1022
1023         if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) {
1024                 if (efi_debug) {
1025                         (void) fprintf(stderr,
1026                             "write of backup header to block %llu failed, "
1027                             "errno %d\n",
1028                             lba_backup_gpt_hdr,
1029                             errno);
1030                 }
1031         }
1032         /* write the PMBR */
1033         (void) write_pmbr(fd, vtoc);
1034         free(dk_ioc.dki_data);
1035         return (0);
1036 }
1037
1038 void
1039 efi_free(struct dk_gpt *ptr)
1040 {
1041         free(ptr);
1042 }
1043
1044 /*
1045  * Input: File descriptor
1046  * Output: 1 if disk has an EFI label, or > 2TB with no VTOC or legacy MBR.
1047  * Otherwise 0.
1048  */
1049 int
1050 efi_type(int fd)
1051 {
1052         struct vtoc vtoc;
1053         struct extvtoc extvtoc;
1054
1055         if (ioctl(fd, DKIOCGEXTVTOC, &extvtoc) == -1) {
1056                 if (errno == ENOTSUP)
1057                         return (1);
1058                 else if (errno == ENOTTY) {
1059                         if (ioctl(fd, DKIOCGVTOC, &vtoc) == -1)
1060                                 if (errno == ENOTSUP)
1061                                         return (1);
1062                 }
1063         }
1064         return (0);
1065 }
1066
1067 void
1068 efi_err_check(struct dk_gpt *vtoc)
1069 {
1070         int                     resv_part = -1;
1071         int                     i, j;
1072         diskaddr_t              istart, jstart, isize, jsize, endsect;
1073         int                     overlap = 0;
1074
1075         /*
1076          * make sure no partitions overlap
1077          */
1078         for (i = 0; i < vtoc->efi_nparts; i++) {
1079                 /* It can't be unassigned and have an actual size */
1080                 if ((vtoc->efi_parts[i].p_tag == V_UNASSIGNED) &&
1081                     (vtoc->efi_parts[i].p_size != 0)) {
1082                         (void) fprintf(stderr,
1083                             "partition %d is \"unassigned\" but has a size "
1084                             "of %llu\n", i, vtoc->efi_parts[i].p_size);
1085                 }
1086                 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) {
1087                         continue;
1088                 }
1089                 if (vtoc->efi_parts[i].p_tag == V_RESERVED) {
1090                         if (resv_part != -1) {
1091                                 (void) fprintf(stderr,
1092                                     "found duplicate reserved partition at "
1093                                     "%d\n", i);
1094                         }
1095                         resv_part = i;
1096                         if (vtoc->efi_parts[i].p_size != EFI_MIN_RESV_SIZE)
1097                                 (void) fprintf(stderr,
1098                                     "Warning: reserved partition size must "
1099                                     "be %d sectors\n", EFI_MIN_RESV_SIZE);
1100                 }
1101                 if ((vtoc->efi_parts[i].p_start < vtoc->efi_first_u_lba) ||
1102                     (vtoc->efi_parts[i].p_start > vtoc->efi_last_u_lba)) {
1103                         (void) fprintf(stderr,
1104                             "Partition %d starts at %llu\n",
1105                             i,
1106                             vtoc->efi_parts[i].p_start);
1107                         (void) fprintf(stderr,
1108                             "It must be between %llu and %llu.\n",
1109                             vtoc->efi_first_u_lba,
1110                             vtoc->efi_last_u_lba);
1111                 }
1112                 if ((vtoc->efi_parts[i].p_start +
1113                     vtoc->efi_parts[i].p_size <
1114                     vtoc->efi_first_u_lba) ||
1115                     (vtoc->efi_parts[i].p_start +
1116                     vtoc->efi_parts[i].p_size >
1117                     vtoc->efi_last_u_lba + 1)) {
1118                         (void) fprintf(stderr,
1119                             "Partition %d ends at %llu\n",
1120                             i,
1121                             vtoc->efi_parts[i].p_start +
1122                             vtoc->efi_parts[i].p_size);
1123                         (void) fprintf(stderr,
1124                             "It must be between %llu and %llu.\n",
1125                             vtoc->efi_first_u_lba,
1126                             vtoc->efi_last_u_lba);
1127                 }
1128
1129                 for (j = 0; j < vtoc->efi_nparts; j++) {
1130                         isize = vtoc->efi_parts[i].p_size;
1131                         jsize = vtoc->efi_parts[j].p_size;
1132                         istart = vtoc->efi_parts[i].p_start;
1133                         jstart = vtoc->efi_parts[j].p_start;
1134                         if ((i != j) && (isize != 0) && (jsize != 0)) {
1135                                 endsect = jstart + jsize -1;
1136                                 if ((jstart <= istart) &&
1137                                     (istart <= endsect)) {
1138                                         if (!overlap) {
1139                                         (void) fprintf(stderr,
1140                                             "label error: EFI Labels do not "
1141                                             "support overlapping partitions\n");
1142                                         }
1143                                         (void) fprintf(stderr,
1144                                             "Partition %d overlaps partition "
1145                                             "%d.\n", i, j);
1146                                         overlap = 1;
1147                                 }
1148                         }
1149                 }
1150         }
1151         /* make sure there is a reserved partition */
1152         if (resv_part == -1) {
1153                 (void) fprintf(stderr,
1154                     "no reserved partition found\n");
1155         }
1156 }
1157
1158 /*
1159  * We need to get information necessary to construct a *new* efi
1160  * label type
1161  */
1162 int
1163 efi_auto_sense(int fd, struct dk_gpt **vtoc)
1164 {
1165
1166         int     i;
1167
1168         /*
1169          * Now build the default partition table
1170          */
1171         if (efi_alloc_and_init(fd, EFI_NUMPAR, vtoc) != 0) {
1172                 if (efi_debug) {
1173                         (void) fprintf(stderr, "efi_alloc_and_init failed.\n");
1174                 }
1175                 return (-1);
1176         }
1177
1178         for (i = 0; i < min((*vtoc)->efi_nparts, V_NUMPAR); i++) {
1179                 (*vtoc)->efi_parts[i].p_tag = default_vtoc_map[i].p_tag;
1180                 (*vtoc)->efi_parts[i].p_flag = default_vtoc_map[i].p_flag;
1181                 (*vtoc)->efi_parts[i].p_start = 0;
1182                 (*vtoc)->efi_parts[i].p_size = 0;
1183         }
1184         /*
1185          * Make constants first
1186          * and variable partitions later
1187          */
1188
1189         /* root partition - s0 128 MB */
1190         (*vtoc)->efi_parts[0].p_start = 34;
1191         (*vtoc)->efi_parts[0].p_size = 262144;
1192
1193         /* partition - s1  128 MB */
1194         (*vtoc)->efi_parts[1].p_start = 262178;
1195         (*vtoc)->efi_parts[1].p_size = 262144;
1196
1197         /* partition -s2 is NOT the Backup disk */
1198         (*vtoc)->efi_parts[2].p_tag = V_UNASSIGNED;
1199
1200         /* partition -s6 /usr partition - HOG */
1201         (*vtoc)->efi_parts[6].p_start = 524322;
1202         (*vtoc)->efi_parts[6].p_size = (*vtoc)->efi_last_u_lba - 524322
1203             - (1024 * 16);
1204
1205         /* efi reserved partition - s9 16K */
1206         (*vtoc)->efi_parts[8].p_start = (*vtoc)->efi_last_u_lba - (1024 * 16);
1207         (*vtoc)->efi_parts[8].p_size = (1024 * 16);
1208         (*vtoc)->efi_parts[8].p_tag = V_RESERVED;
1209         return (0);
1210 }