Script update-zfs.sh updated to include libefi library
[zfs.git] / lib / libefi / rdwr_efi.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <errno.h>
30 #include <strings.h>
31 #include <unistd.h>
32 #include <uuid/uuid.h>
33 #include <libintl.h>
34 #include <sys/types.h>
35 #include <sys/dkio.h>
36 #include <sys/vtoc.h>
37 #include <sys/mhd.h>
38 #include <sys/param.h>
39 #include <sys/dktp/fdisk.h>
40 #include <sys/efi_partition.h>
41 #include <sys/byteorder.h>
42 #include <sys/ddi.h>
43
44 static struct uuid_to_ptag {
45         struct uuid     uuid;
46 } conversion_array[] = {
47         { EFI_UNUSED },
48         { EFI_BOOT },
49         { EFI_ROOT },
50         { EFI_SWAP },
51         { EFI_USR },
52         { EFI_BACKUP },
53         { 0 },                  /* STAND is never used */
54         { EFI_VAR },
55         { EFI_HOME },
56         { EFI_ALTSCTR },
57         { 0 },                  /* CACHE (cachefs) is never used */
58         { EFI_RESERVED },
59         { EFI_SYSTEM },
60         { EFI_LEGACY_MBR },
61         { EFI_RESV3 },
62         { EFI_RESV4 },
63         { EFI_MSFT_RESV },
64         { EFI_DELL_BASIC },
65         { EFI_DELL_RAID },
66         { EFI_DELL_SWAP },
67         { EFI_DELL_LVM },
68         { EFI_DELL_RESV },
69         { EFI_AAPL_HFS },
70         { EFI_AAPL_UFS }
71 };
72
73 /*
74  * Default vtoc information for non-SVr4 partitions
75  */
76 struct dk_map2  default_vtoc_map[NDKMAP] = {
77         {       V_ROOT,         0       },              /* a - 0 */
78         {       V_SWAP,         V_UNMNT },              /* b - 1 */
79         {       V_BACKUP,       V_UNMNT },              /* c - 2 */
80         {       V_UNASSIGNED,   0       },              /* d - 3 */
81         {       V_UNASSIGNED,   0       },              /* e - 4 */
82         {       V_UNASSIGNED,   0       },              /* f - 5 */
83         {       V_USR,          0       },              /* g - 6 */
84         {       V_UNASSIGNED,   0       },              /* h - 7 */
85
86 #if defined(_SUNOS_VTOC_16)
87
88 #if defined(i386) || defined(__amd64)
89         {       V_BOOT,         V_UNMNT },              /* i - 8 */
90         {       V_ALTSCTR,      0       },              /* j - 9 */
91
92 #else
93 #error No VTOC format defined.
94 #endif                  /* defined(i386) */
95
96         {       V_UNASSIGNED,   0       },              /* k - 10 */
97         {       V_UNASSIGNED,   0       },              /* l - 11 */
98         {       V_UNASSIGNED,   0       },              /* m - 12 */
99         {       V_UNASSIGNED,   0       },              /* n - 13 */
100         {       V_UNASSIGNED,   0       },              /* o - 14 */
101         {       V_UNASSIGNED,   0       },              /* p - 15 */
102 #endif                  /* defined(_SUNOS_VTOC_16) */
103 };
104
105 #ifdef DEBUG
106 int efi_debug = 1;
107 #else
108 int efi_debug = 0;
109 #endif
110
111 extern unsigned int     efi_crc32(const unsigned char *, unsigned int);
112 static int              efi_read(int, struct dk_gpt *);
113
114 static int
115 read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize)
116 {
117         struct dk_minfo         disk_info;
118
119         if ((ioctl(fd, DKIOCGMEDIAINFO, (caddr_t)&disk_info)) == -1)
120                 return (errno);
121         *capacity = disk_info.dki_capacity;
122         *lbsize = disk_info.dki_lbsize;
123         return (0);
124 }
125
126 /*
127  * the number of blocks the EFI label takes up (round up to nearest
128  * block)
129  */
130 #define NBLOCKS(p, l)   (1 + ((((p) * (int)sizeof (efi_gpe_t))  + \
131                                 ((l) - 1)) / (l)))
132 /* number of partitions -- limited by what we can malloc */
133 #define MAX_PARTS       ((4294967295UL - sizeof (struct dk_gpt)) / \
134                             sizeof (struct dk_part))
135
136 int
137 efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc)
138 {
139         diskaddr_t      capacity;
140         uint_t          lbsize;
141         uint_t          nblocks;
142         size_t          length;
143         struct dk_gpt   *vptr;
144         struct uuid     uuid;
145
146         if (read_disk_info(fd, &capacity, &lbsize) != 0) {
147                 if (efi_debug)
148                         (void) fprintf(stderr,
149                             "couldn't read disk information\n");
150                 return (-1);
151         }
152
153         nblocks = NBLOCKS(nparts, lbsize);
154         if ((nblocks * lbsize) < EFI_MIN_ARRAY_SIZE + lbsize) {
155                 /* 16K plus one block for the GPT */
156                 nblocks = EFI_MIN_ARRAY_SIZE / lbsize + 1;
157         }
158
159         if (nparts > MAX_PARTS) {
160                 if (efi_debug) {
161                         (void) fprintf(stderr,
162                         "the maximum number of partitions supported is %lu\n",
163                             MAX_PARTS);
164                 }
165                 return (-1);
166         }
167
168         length = sizeof (struct dk_gpt) +
169             sizeof (struct dk_part) * (nparts - 1);
170
171         if ((*vtoc = calloc(length, 1)) == NULL)
172                 return (-1);
173
174         vptr = *vtoc;
175
176         vptr->efi_version = EFI_VERSION_CURRENT;
177         vptr->efi_lbasize = lbsize;
178         vptr->efi_nparts = nparts;
179         /*
180          * add one block here for the PMBR; on disks with a 512 byte
181          * block size and 128 or fewer partitions, efi_first_u_lba
182          * should work out to "34"
183          */
184         vptr->efi_first_u_lba = nblocks + 1;
185         vptr->efi_last_lba = capacity - 1;
186         vptr->efi_altern_lba = capacity -1;
187         vptr->efi_last_u_lba = vptr->efi_last_lba - nblocks;
188
189         (void) uuid_generate((uchar_t *)&uuid);
190         UUID_LE_CONVERT(vptr->efi_disk_uguid, uuid);
191         return (0);
192 }
193
194 /*
195  * Read EFI - return partition number upon success.
196  */
197 int
198 efi_alloc_and_read(int fd, struct dk_gpt **vtoc)
199 {
200         int                     rval;
201         uint32_t                nparts;
202         int                     length;
203
204         /* figure out the number of entries that would fit into 16K */
205         nparts = EFI_MIN_ARRAY_SIZE / sizeof (efi_gpe_t);
206         length = (int) sizeof (struct dk_gpt) +
207             (int) sizeof (struct dk_part) * (nparts - 1);
208         if ((*vtoc = calloc(length, 1)) == NULL)
209                 return (VT_ERROR);
210
211         (*vtoc)->efi_nparts = nparts;
212         rval = efi_read(fd, *vtoc);
213
214         if ((rval == VT_EINVAL) && (*vtoc)->efi_nparts > nparts) {
215                 void *tmp;
216                 length = (int) sizeof (struct dk_gpt) +
217                     (int) sizeof (struct dk_part) *
218                     ((*vtoc)->efi_nparts - 1);
219                 nparts = (*vtoc)->efi_nparts;
220                 if ((tmp = realloc(*vtoc, length)) == NULL) {
221                         free (*vtoc);
222                         *vtoc = NULL;
223                         return (VT_ERROR);
224                 } else {
225                         *vtoc = tmp;
226                         rval = efi_read(fd, *vtoc);
227                 }
228         }
229
230         if (rval < 0) {
231                 if (efi_debug) {
232                         (void) fprintf(stderr,
233                             "read of EFI table failed, rval=%d\n", rval);
234                 }
235                 free (*vtoc);
236                 *vtoc = NULL;
237         }
238
239         return (rval);
240 }
241
242 static int
243 efi_ioctl(int fd, int cmd, dk_efi_t *dk_ioc)
244 {
245         void *data = dk_ioc->dki_data;
246         int error;
247
248         dk_ioc->dki_data_64 = (uint64_t)(uintptr_t)data;
249         error = ioctl(fd, cmd, (void *)dk_ioc);
250         dk_ioc->dki_data = data;
251
252         return (error);
253 }
254
255 static int
256 check_label(int fd, dk_efi_t *dk_ioc)
257 {
258         efi_gpt_t               *efi;
259         uint_t                  crc;
260
261         if (efi_ioctl(fd, DKIOCGETEFI, dk_ioc) == -1) {
262                 switch (errno) {
263                 case EIO:
264                         return (VT_EIO);
265                 default:
266                         return (VT_ERROR);
267                 }
268         }
269         efi = dk_ioc->dki_data;
270         if (efi->efi_gpt_Signature != LE_64(EFI_SIGNATURE)) {
271                 if (efi_debug)
272                         (void) fprintf(stderr,
273                             "Bad EFI signature: 0x%llx != 0x%llx\n",
274                             (long long)efi->efi_gpt_Signature,
275                             (long long)LE_64(EFI_SIGNATURE));
276                 return (VT_EINVAL);
277         }
278
279         /*
280          * check CRC of the header; the size of the header should
281          * never be larger than one block
282          */
283         crc = efi->efi_gpt_HeaderCRC32;
284         efi->efi_gpt_HeaderCRC32 = 0;
285
286         if (((len_t)LE_32(efi->efi_gpt_HeaderSize) > dk_ioc->dki_length) ||
287             crc != LE_32(efi_crc32((unsigned char *)efi,
288             LE_32(efi->efi_gpt_HeaderSize)))) {
289                 if (efi_debug)
290                         (void) fprintf(stderr,
291                             "Bad EFI CRC: 0x%x != 0x%x\n",
292                             crc,
293                             LE_32(efi_crc32((unsigned char *)efi,
294                             sizeof (struct efi_gpt))));
295                 return (VT_EINVAL);
296         }
297
298         return (0);
299 }
300
301 static int
302 efi_read(int fd, struct dk_gpt *vtoc)
303 {
304         int                     i, j;
305         int                     label_len;
306         int                     rval = 0;
307         int                     md_flag = 0;
308         int                     vdc_flag = 0;
309         struct dk_minfo         disk_info;
310         dk_efi_t                dk_ioc;
311         efi_gpt_t               *efi;
312         efi_gpe_t               *efi_parts;
313         struct dk_cinfo         dki_info;
314         uint32_t                user_length;
315         boolean_t               legacy_label = B_FALSE;
316
317         /*
318          * get the partition number for this file descriptor.
319          */
320         if (ioctl(fd, DKIOCINFO, (caddr_t)&dki_info) == -1) {
321                 if (efi_debug) {
322                         (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno);
323                 }
324                 switch (errno) {
325                 case EIO:
326                         return (VT_EIO);
327                 case EINVAL:
328                         return (VT_EINVAL);
329                 default:
330                         return (VT_ERROR);
331                 }
332         }
333         if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) &&
334             (strncmp(dki_info.dki_dname, "md", 3) == 0)) {
335                 md_flag++;
336         } else if ((strncmp(dki_info.dki_cname, "vdc", 4) == 0) &&
337             (strncmp(dki_info.dki_dname, "vdc", 4) == 0)) {
338                 /*
339                  * The controller and drive name "vdc" (virtual disk client)
340                  * indicates a LDoms virtual disk.
341                  */
342                 vdc_flag++;
343         }
344
345         /* get the LBA size */
346         if (ioctl(fd, DKIOCGMEDIAINFO, (caddr_t)&disk_info) == -1) {
347                 if (efi_debug) {
348                         (void) fprintf(stderr,
349                             "assuming LBA 512 bytes %d\n",
350                             errno);
351                 }
352                 disk_info.dki_lbsize = DEV_BSIZE;
353         }
354         if (disk_info.dki_lbsize == 0) {
355                 if (efi_debug) {
356                         (void) fprintf(stderr,
357                             "efi_read: assuming LBA 512 bytes\n");
358                 }
359                 disk_info.dki_lbsize = DEV_BSIZE;
360         }
361         /*
362          * Read the EFI GPT to figure out how many partitions we need
363          * to deal with.
364          */
365         dk_ioc.dki_lba = 1;
366         if (NBLOCKS(vtoc->efi_nparts, disk_info.dki_lbsize) < 34) {
367                 label_len = EFI_MIN_ARRAY_SIZE + disk_info.dki_lbsize;
368         } else {
369                 label_len = vtoc->efi_nparts * (int) sizeof (efi_gpe_t) +
370                     disk_info.dki_lbsize;
371                 if (label_len % disk_info.dki_lbsize) {
372                         /* pad to physical sector size */
373                         label_len += disk_info.dki_lbsize;
374                         label_len &= ~(disk_info.dki_lbsize - 1);
375                 }
376         }
377
378         if ((dk_ioc.dki_data = calloc(label_len, 1)) == NULL)
379                 return (VT_ERROR);
380
381         dk_ioc.dki_length = disk_info.dki_lbsize;
382         user_length = vtoc->efi_nparts;
383         efi = dk_ioc.dki_data;
384         if (md_flag) {
385                 dk_ioc.dki_length = label_len;
386                 if (efi_ioctl(fd, DKIOCGETEFI, &dk_ioc) == -1) {
387                         switch (errno) {
388                         case EIO:
389                                 return (VT_EIO);
390                         default:
391                                 return (VT_ERROR);
392                         }
393                 }
394         } else if ((rval = check_label(fd, &dk_ioc)) == VT_EINVAL) {
395                 /*
396                  * No valid label here; try the alternate. Note that here
397                  * we just read GPT header and save it into dk_ioc.data,
398                  * Later, we will read GUID partition entry array if we
399                  * can get valid GPT header.
400                  */
401
402                 /*
403                  * This is a workaround for legacy systems. In the past, the
404                  * last sector of SCSI disk was invisible on x86 platform. At
405                  * that time, backup label was saved on the next to the last
406                  * sector. It is possible for users to move a disk from previous
407                  * solaris system to present system. Here, we attempt to search
408                  * legacy backup EFI label first.
409                  */
410                 dk_ioc.dki_lba = disk_info.dki_capacity - 2;
411                 dk_ioc.dki_length = disk_info.dki_lbsize;
412                 rval = check_label(fd, &dk_ioc);
413                 if (rval == VT_EINVAL) {
414                         /*
415                          * we didn't find legacy backup EFI label, try to
416                          * search backup EFI label in the last block.
417                          */
418                         dk_ioc.dki_lba = disk_info.dki_capacity - 1;
419                         dk_ioc.dki_length = disk_info.dki_lbsize;
420                         rval = check_label(fd, &dk_ioc);
421                         if (rval == 0) {
422                                 legacy_label = B_TRUE;
423                                 if (efi_debug)
424                                         (void) fprintf(stderr,
425                                             "efi_read: primary label corrupt; "
426                                             "using EFI backup label located on"
427                                             " the last block\n");
428                         }
429                 } else {
430                         if ((efi_debug) && (rval == 0))
431                                 (void) fprintf(stderr, "efi_read: primary label"
432                                     " corrupt; using legacy EFI backup label "
433                                     " located on the next to last block\n");
434                 }
435
436                 if (rval == 0) {
437                         dk_ioc.dki_lba = LE_64(efi->efi_gpt_PartitionEntryLBA);
438                         vtoc->efi_flags |= EFI_GPT_PRIMARY_CORRUPT;
439                         vtoc->efi_nparts =
440                             LE_32(efi->efi_gpt_NumberOfPartitionEntries);
441                         /*
442                          * Partition tables are between backup GPT header
443                          * table and ParitionEntryLBA (the starting LBA of
444                          * the GUID partition entries array). Now that we
445                          * already got valid GPT header and saved it in
446                          * dk_ioc.dki_data, we try to get GUID partition
447                          * entry array here.
448                          */
449                         /* LINTED */
450                         dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data
451                             + disk_info.dki_lbsize);
452                         if (legacy_label)
453                                 dk_ioc.dki_length = disk_info.dki_capacity - 1 -
454                                     dk_ioc.dki_lba;
455                         else
456                                 dk_ioc.dki_length = disk_info.dki_capacity - 2 -
457                                     dk_ioc.dki_lba;
458                         dk_ioc.dki_length *= disk_info.dki_lbsize;
459                         if (dk_ioc.dki_length >
460                             ((len_t)label_len - sizeof (*dk_ioc.dki_data))) {
461                                 rval = VT_EINVAL;
462                         } else {
463                                 /*
464                                  * read GUID partition entry array
465                                  */
466                                 rval = efi_ioctl(fd, DKIOCGETEFI, &dk_ioc);
467                         }
468                 }
469
470         } else if (rval == 0) {
471
472                 dk_ioc.dki_lba = LE_64(efi->efi_gpt_PartitionEntryLBA);
473                 /* LINTED */
474                 dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data
475                     + disk_info.dki_lbsize);
476                 dk_ioc.dki_length = label_len - disk_info.dki_lbsize;
477                 rval = efi_ioctl(fd, DKIOCGETEFI, &dk_ioc);
478
479         } else if (vdc_flag && rval == VT_ERROR && errno == EINVAL) {
480                 /*
481                  * When the device is a LDoms virtual disk, the DKIOCGETEFI
482                  * ioctl can fail with EINVAL if the virtual disk backend
483                  * is a ZFS volume serviced by a domain running an old version
484                  * of Solaris. This is because the DKIOCGETEFI ioctl was
485                  * initially incorrectly implemented for a ZFS volume and it
486                  * expected the GPT and GPE to be retrieved with a single ioctl.
487                  * So we try to read the GPT and the GPE using that old style
488                  * ioctl.
489                  */
490                 dk_ioc.dki_lba = 1;
491                 dk_ioc.dki_length = label_len;
492                 rval = check_label(fd, &dk_ioc);
493         }
494
495         if (rval < 0) {
496                 free(efi);
497                 return (rval);
498         }
499
500         /* LINTED -- always longlong aligned */
501         efi_parts = (efi_gpe_t *)(((char *)efi) + disk_info.dki_lbsize);
502
503         /*
504          * Assemble this into a "dk_gpt" struct for easier
505          * digestibility by applications.
506          */
507         vtoc->efi_version = LE_32(efi->efi_gpt_Revision);
508         vtoc->efi_nparts = LE_32(efi->efi_gpt_NumberOfPartitionEntries);
509         vtoc->efi_part_size = LE_32(efi->efi_gpt_SizeOfPartitionEntry);
510         vtoc->efi_lbasize = disk_info.dki_lbsize;
511         vtoc->efi_last_lba = disk_info.dki_capacity - 1;
512         vtoc->efi_first_u_lba = LE_64(efi->efi_gpt_FirstUsableLBA);
513         vtoc->efi_last_u_lba = LE_64(efi->efi_gpt_LastUsableLBA);
514         vtoc->efi_altern_lba = LE_64(efi->efi_gpt_AlternateLBA);
515         UUID_LE_CONVERT(vtoc->efi_disk_uguid, efi->efi_gpt_DiskGUID);
516
517         /*
518          * If the array the user passed in is too small, set the length
519          * to what it needs to be and return
520          */
521         if (user_length < vtoc->efi_nparts) {
522                 return (VT_EINVAL);
523         }
524
525         for (i = 0; i < vtoc->efi_nparts; i++) {
526
527                 UUID_LE_CONVERT(vtoc->efi_parts[i].p_guid,
528                     efi_parts[i].efi_gpe_PartitionTypeGUID);
529
530                 for (j = 0;
531                     j < sizeof (conversion_array)
532                     / sizeof (struct uuid_to_ptag); j++) {
533
534                         if (bcmp(&vtoc->efi_parts[i].p_guid,
535                             &conversion_array[j].uuid,
536                             sizeof (struct uuid)) == 0) {
537                                 vtoc->efi_parts[i].p_tag = j;
538                                 break;
539                         }
540                 }
541                 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED)
542                         continue;
543                 vtoc->efi_parts[i].p_flag =
544                     LE_16(efi_parts[i].efi_gpe_Attributes.PartitionAttrs);
545                 vtoc->efi_parts[i].p_start =
546                     LE_64(efi_parts[i].efi_gpe_StartingLBA);
547                 vtoc->efi_parts[i].p_size =
548                     LE_64(efi_parts[i].efi_gpe_EndingLBA) -
549                     vtoc->efi_parts[i].p_start + 1;
550                 for (j = 0; j < EFI_PART_NAME_LEN; j++) {
551                         vtoc->efi_parts[i].p_name[j] =
552                             (uchar_t)LE_16(
553                             efi_parts[i].efi_gpe_PartitionName[j]);
554                 }
555
556                 UUID_LE_CONVERT(vtoc->efi_parts[i].p_uguid,
557                     efi_parts[i].efi_gpe_UniquePartitionGUID);
558         }
559         free(efi);
560
561         return (dki_info.dki_partition);
562 }
563
564 /* writes a "protective" MBR */
565 static int
566 write_pmbr(int fd, struct dk_gpt *vtoc)
567 {
568         dk_efi_t        dk_ioc;
569         struct mboot    mb;
570         uchar_t         *cp;
571         diskaddr_t      size_in_lba;
572         uchar_t         *buf;
573         int             len;
574
575         len = (vtoc->efi_lbasize == 0) ? sizeof (mb) : vtoc->efi_lbasize;
576         buf = calloc(len, 1);
577
578         /*
579          * Preserve any boot code and disk signature if the first block is
580          * already an MBR.
581          */
582         dk_ioc.dki_lba = 0;
583         dk_ioc.dki_length = len;
584         /* LINTED -- always longlong aligned */
585         dk_ioc.dki_data = (efi_gpt_t *)buf;
586         if (efi_ioctl(fd, DKIOCGETEFI, &dk_ioc) == -1) {
587                 (void *) memcpy(&mb, buf, sizeof (mb));
588                 bzero(&mb, sizeof (mb));
589                 mb.signature = LE_16(MBB_MAGIC);
590         } else {
591                 (void *) memcpy(&mb, buf, sizeof (mb));
592                 if (mb.signature != LE_16(MBB_MAGIC)) {
593                         bzero(&mb, sizeof (mb));
594                         mb.signature = LE_16(MBB_MAGIC);
595                 }
596         }
597
598         bzero(&mb.parts, sizeof (mb.parts));
599         cp = (uchar_t *)&mb.parts[0];
600         /* bootable or not */
601         *cp++ = 0;
602         /* beginning CHS; 0xffffff if not representable */
603         *cp++ = 0xff;
604         *cp++ = 0xff;
605         *cp++ = 0xff;
606         /* OS type */
607         *cp++ = EFI_PMBR;
608         /* ending CHS; 0xffffff if not representable */
609         *cp++ = 0xff;
610         *cp++ = 0xff;
611         *cp++ = 0xff;
612         /* starting LBA: 1 (little endian format) by EFI definition */
613         *cp++ = 0x01;
614         *cp++ = 0x00;
615         *cp++ = 0x00;
616         *cp++ = 0x00;
617         /* ending LBA: last block on the disk (little endian format) */
618         size_in_lba = vtoc->efi_last_lba;
619         if (size_in_lba < 0xffffffff) {
620                 *cp++ = (size_in_lba & 0x000000ff);
621                 *cp++ = (size_in_lba & 0x0000ff00) >> 8;
622                 *cp++ = (size_in_lba & 0x00ff0000) >> 16;
623                 *cp++ = (size_in_lba & 0xff000000) >> 24;
624         } else {
625                 *cp++ = 0xff;
626                 *cp++ = 0xff;
627                 *cp++ = 0xff;
628                 *cp++ = 0xff;
629         }
630
631         (void *) memcpy(buf, &mb, sizeof (mb));
632         /* LINTED -- always longlong aligned */
633         dk_ioc.dki_data = (efi_gpt_t *)buf;
634         dk_ioc.dki_lba = 0;
635         dk_ioc.dki_length = len;
636         if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) {
637                 free(buf);
638                 switch (errno) {
639                 case EIO:
640                         return (VT_EIO);
641                 case EINVAL:
642                         return (VT_EINVAL);
643                 default:
644                         return (VT_ERROR);
645                 }
646         }
647         free(buf);
648         return (0);
649 }
650
651 /* make sure the user specified something reasonable */
652 static int
653 check_input(struct dk_gpt *vtoc)
654 {
655         int                     resv_part = -1;
656         int                     i, j;
657         diskaddr_t              istart, jstart, isize, jsize, endsect;
658
659         /*
660          * Sanity-check the input (make sure no partitions overlap)
661          */
662         for (i = 0; i < vtoc->efi_nparts; i++) {
663                 /* It can't be unassigned and have an actual size */
664                 if ((vtoc->efi_parts[i].p_tag == V_UNASSIGNED) &&
665                     (vtoc->efi_parts[i].p_size != 0)) {
666                         if (efi_debug) {
667                                 (void) fprintf(stderr,
668 "partition %d is \"unassigned\" but has a size of %llu",
669                                     i,
670                                     vtoc->efi_parts[i].p_size);
671                         }
672                         return (VT_EINVAL);
673                 }
674                 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) {
675                         if (uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid))
676                                 continue;
677                         /* we have encountered an unknown uuid */
678                         vtoc->efi_parts[i].p_tag = 0xff;
679                 }
680                 if (vtoc->efi_parts[i].p_tag == V_RESERVED) {
681                         if (resv_part != -1) {
682                                 if (efi_debug) {
683                                         (void) fprintf(stderr,
684 "found duplicate reserved partition at %d\n",
685                                             i);
686                                 }
687                                 return (VT_EINVAL);
688                         }
689                         resv_part = i;
690                 }
691                 if ((vtoc->efi_parts[i].p_start < vtoc->efi_first_u_lba) ||
692                     (vtoc->efi_parts[i].p_start > vtoc->efi_last_u_lba)) {
693                         if (efi_debug) {
694                                 (void) fprintf(stderr,
695                                     "Partition %d starts at %llu.  ",
696                                     i,
697                                     vtoc->efi_parts[i].p_start);
698                                 (void) fprintf(stderr,
699                                     "It must be between %llu and %llu.\n",
700                                     vtoc->efi_first_u_lba,
701                                     vtoc->efi_last_u_lba);
702                         }
703                         return (VT_EINVAL);
704                 }
705                 if ((vtoc->efi_parts[i].p_start +
706                     vtoc->efi_parts[i].p_size <
707                     vtoc->efi_first_u_lba) ||
708                     (vtoc->efi_parts[i].p_start +
709                     vtoc->efi_parts[i].p_size >
710                     vtoc->efi_last_u_lba + 1)) {
711                         if (efi_debug) {
712                                 (void) fprintf(stderr,
713                                     "Partition %d ends at %llu.  ",
714                                     i,
715                                     vtoc->efi_parts[i].p_start +
716                                     vtoc->efi_parts[i].p_size);
717                                 (void) fprintf(stderr,
718                                     "It must be between %llu and %llu.\n",
719                                     vtoc->efi_first_u_lba,
720                                     vtoc->efi_last_u_lba);
721                         }
722                         return (VT_EINVAL);
723                 }
724
725                 for (j = 0; j < vtoc->efi_nparts; j++) {
726                         isize = vtoc->efi_parts[i].p_size;
727                         jsize = vtoc->efi_parts[j].p_size;
728                         istart = vtoc->efi_parts[i].p_start;
729                         jstart = vtoc->efi_parts[j].p_start;
730                         if ((i != j) && (isize != 0) && (jsize != 0)) {
731                                 endsect = jstart + jsize -1;
732                                 if ((jstart <= istart) &&
733                                     (istart <= endsect)) {
734                                         if (efi_debug) {
735                                                 (void) fprintf(stderr,
736 "Partition %d overlaps partition %d.",
737                                                     i, j);
738                                         }
739                                         return (VT_EINVAL);
740                                 }
741                         }
742                 }
743         }
744         /* just a warning for now */
745         if ((resv_part == -1) && efi_debug) {
746                 (void) fprintf(stderr,
747                     "no reserved partition found\n");
748         }
749         return (0);
750 }
751
752 /*
753  * add all the unallocated space to the current label
754  */
755 int
756 efi_use_whole_disk(int fd)
757 {
758         struct dk_gpt           *efi_label;
759         int                     rval;
760         int                     i;
761         uint_t                  phy_last_slice = 0;
762         diskaddr_t              pl_start = 0;
763         diskaddr_t              pl_size;
764
765         rval = efi_alloc_and_read(fd, &efi_label);
766         if (rval < 0) {
767                 return (rval);
768         }
769
770         /* find the last physically non-zero partition */
771         for (i = 0; i < efi_label->efi_nparts - 2; i ++) {
772                 if (pl_start < efi_label->efi_parts[i].p_start) {
773                         pl_start = efi_label->efi_parts[i].p_start;
774                         phy_last_slice = i;
775                 }
776         }
777         pl_size = efi_label->efi_parts[phy_last_slice].p_size;
778
779         /*
780          * If alter_lba is 1, we are using the backup label.
781          * Since we can locate the backup label by disk capacity,
782          * there must be no unallocated space.
783          */
784         if ((efi_label->efi_altern_lba == 1) || (efi_label->efi_altern_lba
785             >= efi_label->efi_last_lba)) {
786                 if (efi_debug) {
787                         (void) fprintf(stderr,
788                             "efi_use_whole_disk: requested space not found\n");
789                 }
790                 efi_free(efi_label);
791                 return (VT_ENOSPC);
792         }
793
794         /*
795          * If there is space between the last physically non-zero partition
796          * and the reserved partition, just add the unallocated space to this
797          * area. Otherwise, the unallocated space is added to the last
798          * physically non-zero partition.
799          */
800         if (pl_start + pl_size - 1 == efi_label->efi_last_u_lba -
801             EFI_MIN_RESV_SIZE) {
802                 efi_label->efi_parts[phy_last_slice].p_size +=
803                     efi_label->efi_last_lba - efi_label->efi_altern_lba;
804         }
805
806         /*
807          * Move the reserved partition. There is currently no data in
808          * here except fabricated devids (which get generated via
809          * efi_write()). So there is no need to copy data.
810          */
811         efi_label->efi_parts[efi_label->efi_nparts - 1].p_start +=
812             efi_label->efi_last_lba - efi_label->efi_altern_lba;
813         efi_label->efi_last_u_lba += efi_label->efi_last_lba
814             - efi_label->efi_altern_lba;
815
816         rval = efi_write(fd, efi_label);
817         if (rval < 0) {
818                 if (efi_debug) {
819                         (void) fprintf(stderr,
820                             "efi_use_whole_disk:fail to write label, rval=%d\n",
821                             rval);
822                 }
823                 efi_free(efi_label);
824                 return (rval);
825         }
826
827         efi_free(efi_label);
828         return (0);
829 }
830
831
832 /*
833  * write EFI label and backup label
834  */
835 int
836 efi_write(int fd, struct dk_gpt *vtoc)
837 {
838         dk_efi_t                dk_ioc;
839         efi_gpt_t               *efi;
840         efi_gpe_t               *efi_parts;
841         int                     i, j;
842         struct dk_cinfo         dki_info;
843         int                     md_flag = 0;
844         int                     nblocks;
845         diskaddr_t              lba_backup_gpt_hdr;
846
847         if (ioctl(fd, DKIOCINFO, (caddr_t)&dki_info) == -1) {
848                 if (efi_debug)
849                         (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno);
850                 switch (errno) {
851                 case EIO:
852                         return (VT_EIO);
853                 case EINVAL:
854                         return (VT_EINVAL);
855                 default:
856                         return (VT_ERROR);
857                 }
858         }
859
860         /* check if we are dealing wih a metadevice */
861         if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) &&
862             (strncmp(dki_info.dki_dname, "md", 3) == 0)) {
863                 md_flag = 1;
864         }
865
866         if (check_input(vtoc)) {
867                 /*
868                  * not valid; if it's a metadevice just pass it down
869                  * because SVM will do its own checking
870                  */
871                 if (md_flag == 0) {
872                         return (VT_EINVAL);
873                 }
874         }
875
876         dk_ioc.dki_lba = 1;
877         if (NBLOCKS(vtoc->efi_nparts, vtoc->efi_lbasize) < 34) {
878                 dk_ioc.dki_length = EFI_MIN_ARRAY_SIZE + vtoc->efi_lbasize;
879         } else {
880                 dk_ioc.dki_length = NBLOCKS(vtoc->efi_nparts,
881                     vtoc->efi_lbasize) *
882                     vtoc->efi_lbasize;
883         }
884
885         /*
886          * the number of blocks occupied by GUID partition entry array
887          */
888         nblocks = dk_ioc.dki_length / vtoc->efi_lbasize - 1;
889
890         /*
891          * Backup GPT header is located on the block after GUID
892          * partition entry array. Here, we calculate the address
893          * for backup GPT header.
894          */
895         lba_backup_gpt_hdr = vtoc->efi_last_u_lba + 1 + nblocks;
896         if ((dk_ioc.dki_data = calloc(dk_ioc.dki_length, 1)) == NULL)
897                 return (VT_ERROR);
898
899         efi = dk_ioc.dki_data;
900
901         /* stuff user's input into EFI struct */
902         efi->efi_gpt_Signature = LE_64(EFI_SIGNATURE);
903         efi->efi_gpt_Revision = LE_32(vtoc->efi_version); /* 0x02000100 */
904         efi->efi_gpt_HeaderSize = LE_32(sizeof (struct efi_gpt));
905         efi->efi_gpt_Reserved1 = 0;
906         efi->efi_gpt_MyLBA = LE_64(1ULL);
907         efi->efi_gpt_AlternateLBA = LE_64(lba_backup_gpt_hdr);
908         efi->efi_gpt_FirstUsableLBA = LE_64(vtoc->efi_first_u_lba);
909         efi->efi_gpt_LastUsableLBA = LE_64(vtoc->efi_last_u_lba);
910         efi->efi_gpt_PartitionEntryLBA = LE_64(2ULL);
911         efi->efi_gpt_NumberOfPartitionEntries = LE_32(vtoc->efi_nparts);
912         efi->efi_gpt_SizeOfPartitionEntry = LE_32(sizeof (struct efi_gpe));
913         UUID_LE_CONVERT(efi->efi_gpt_DiskGUID, vtoc->efi_disk_uguid);
914
915         /* LINTED -- always longlong aligned */
916         efi_parts = (efi_gpe_t *)((char *)dk_ioc.dki_data + vtoc->efi_lbasize);
917
918         for (i = 0; i < vtoc->efi_nparts; i++) {
919                 for (j = 0;
920                     j < sizeof (conversion_array) /
921                     sizeof (struct uuid_to_ptag); j++) {
922
923                         if (vtoc->efi_parts[i].p_tag == j) {
924                                 UUID_LE_CONVERT(
925                                     efi_parts[i].efi_gpe_PartitionTypeGUID,
926                                     conversion_array[j].uuid);
927                                 break;
928                         }
929                 }
930
931                 if (j == sizeof (conversion_array) /
932                     sizeof (struct uuid_to_ptag)) {
933                         /*
934                          * If we didn't have a matching uuid match, bail here.
935                          * Don't write a label with unknown uuid.
936                          */
937                         if (efi_debug) {
938                                 (void) fprintf(stderr,
939                                     "Unknown uuid for p_tag %d\n",
940                                     vtoc->efi_parts[i].p_tag);
941                         }
942                         return (VT_EINVAL);
943                 }
944
945                 efi_parts[i].efi_gpe_StartingLBA =
946                     LE_64(vtoc->efi_parts[i].p_start);
947                 efi_parts[i].efi_gpe_EndingLBA =
948                     LE_64(vtoc->efi_parts[i].p_start +
949                     vtoc->efi_parts[i].p_size - 1);
950                 efi_parts[i].efi_gpe_Attributes.PartitionAttrs =
951                     LE_16(vtoc->efi_parts[i].p_flag);
952                 for (j = 0; j < EFI_PART_NAME_LEN; j++) {
953                         efi_parts[i].efi_gpe_PartitionName[j] =
954                             LE_16((ushort_t)vtoc->efi_parts[i].p_name[j]);
955                 }
956                 if ((vtoc->efi_parts[i].p_tag != V_UNASSIGNED) &&
957                     uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_uguid)) {
958                         (void) uuid_generate((uchar_t *)
959                             &vtoc->efi_parts[i].p_uguid);
960                 }
961                 bcopy(&vtoc->efi_parts[i].p_uguid,
962                     &efi_parts[i].efi_gpe_UniquePartitionGUID,
963                     sizeof (uuid_t));
964         }
965         efi->efi_gpt_PartitionEntryArrayCRC32 =
966             LE_32(efi_crc32((unsigned char *)efi_parts,
967             vtoc->efi_nparts * (int)sizeof (struct efi_gpe)));
968         efi->efi_gpt_HeaderCRC32 =
969             LE_32(efi_crc32((unsigned char *)efi, sizeof (struct efi_gpt)));
970
971         if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) {
972                 free(dk_ioc.dki_data);
973                 switch (errno) {
974                 case EIO:
975                         return (VT_EIO);
976                 case EINVAL:
977                         return (VT_EINVAL);
978                 default:
979                         return (VT_ERROR);
980                 }
981         }
982         /* if it's a metadevice we're done */
983         if (md_flag) {
984                 free(dk_ioc.dki_data);
985                 return (0);
986         }
987
988         /* write backup partition array */
989         dk_ioc.dki_lba = vtoc->efi_last_u_lba + 1;
990         dk_ioc.dki_length -= vtoc->efi_lbasize;
991         /* LINTED */
992         dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data +
993             vtoc->efi_lbasize);
994
995         if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) {
996                 /*
997                  * we wrote the primary label okay, so don't fail
998                  */
999                 if (efi_debug) {
1000                         (void) fprintf(stderr,
1001                             "write of backup partitions to block %llu "
1002                             "failed, errno %d\n",
1003                             vtoc->efi_last_u_lba + 1,
1004                             errno);
1005                 }
1006         }
1007         /*
1008          * now swap MyLBA and AlternateLBA fields and write backup
1009          * partition table header
1010          */
1011         dk_ioc.dki_lba = lba_backup_gpt_hdr;
1012         dk_ioc.dki_length = vtoc->efi_lbasize;
1013         /* LINTED */
1014         dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data -
1015             vtoc->efi_lbasize);
1016         efi->efi_gpt_AlternateLBA = LE_64(1ULL);
1017         efi->efi_gpt_MyLBA = LE_64(lba_backup_gpt_hdr);
1018         efi->efi_gpt_PartitionEntryLBA = LE_64(vtoc->efi_last_u_lba + 1);
1019         efi->efi_gpt_HeaderCRC32 = 0;
1020         efi->efi_gpt_HeaderCRC32 =
1021             LE_32(efi_crc32((unsigned char *)dk_ioc.dki_data,
1022             sizeof (struct efi_gpt)));
1023
1024         if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) {
1025                 if (efi_debug) {
1026                         (void) fprintf(stderr,
1027                             "write of backup header to block %llu failed, "
1028                             "errno %d\n",
1029                             lba_backup_gpt_hdr,
1030                             errno);
1031                 }
1032         }
1033         /* write the PMBR */
1034         (void) write_pmbr(fd, vtoc);
1035         free(dk_ioc.dki_data);
1036         return (0);
1037 }
1038
1039 void
1040 efi_free(struct dk_gpt *ptr)
1041 {
1042         free(ptr);
1043 }
1044
1045 /*
1046  * Input: File descriptor
1047  * Output: 1 if disk has an EFI label, or > 2TB with no VTOC or legacy MBR.
1048  * Otherwise 0.
1049  */
1050 int
1051 efi_type(int fd)
1052 {
1053         struct vtoc vtoc;
1054         struct extvtoc extvtoc;
1055
1056         if (ioctl(fd, DKIOCGEXTVTOC, &extvtoc) == -1) {
1057                 if (errno == ENOTSUP)
1058                         return (1);
1059                 else if (errno == ENOTTY) {
1060                         if (ioctl(fd, DKIOCGVTOC, &vtoc) == -1)
1061                                 if (errno == ENOTSUP)
1062                                         return (1);
1063                 }
1064         }
1065         return (0);
1066 }
1067
1068 void
1069 efi_err_check(struct dk_gpt *vtoc)
1070 {
1071         int                     resv_part = -1;
1072         int                     i, j;
1073         diskaddr_t              istart, jstart, isize, jsize, endsect;
1074         int                     overlap = 0;
1075
1076         /*
1077          * make sure no partitions overlap
1078          */
1079         for (i = 0; i < vtoc->efi_nparts; i++) {
1080                 /* It can't be unassigned and have an actual size */
1081                 if ((vtoc->efi_parts[i].p_tag == V_UNASSIGNED) &&
1082                     (vtoc->efi_parts[i].p_size != 0)) {
1083                         (void) fprintf(stderr,
1084                             "partition %d is \"unassigned\" but has a size "
1085                             "of %llu\n", i, vtoc->efi_parts[i].p_size);
1086                 }
1087                 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) {
1088                         continue;
1089                 }
1090                 if (vtoc->efi_parts[i].p_tag == V_RESERVED) {
1091                         if (resv_part != -1) {
1092                                 (void) fprintf(stderr,
1093                                     "found duplicate reserved partition at "
1094                                     "%d\n", i);
1095                         }
1096                         resv_part = i;
1097                         if (vtoc->efi_parts[i].p_size != EFI_MIN_RESV_SIZE)
1098                                 (void) fprintf(stderr,
1099                                     "Warning: reserved partition size must "
1100                                     "be %d sectors\n", EFI_MIN_RESV_SIZE);
1101                 }
1102                 if ((vtoc->efi_parts[i].p_start < vtoc->efi_first_u_lba) ||
1103                     (vtoc->efi_parts[i].p_start > vtoc->efi_last_u_lba)) {
1104                         (void) fprintf(stderr,
1105                             "Partition %d starts at %llu\n",
1106                             i,
1107                             vtoc->efi_parts[i].p_start);
1108                         (void) fprintf(stderr,
1109                             "It must be between %llu and %llu.\n",
1110                             vtoc->efi_first_u_lba,
1111                             vtoc->efi_last_u_lba);
1112                 }
1113                 if ((vtoc->efi_parts[i].p_start +
1114                     vtoc->efi_parts[i].p_size <
1115                     vtoc->efi_first_u_lba) ||
1116                     (vtoc->efi_parts[i].p_start +
1117                     vtoc->efi_parts[i].p_size >
1118                     vtoc->efi_last_u_lba + 1)) {
1119                         (void) fprintf(stderr,
1120                             "Partition %d ends at %llu\n",
1121                             i,
1122                             vtoc->efi_parts[i].p_start +
1123                             vtoc->efi_parts[i].p_size);
1124                         (void) fprintf(stderr,
1125                             "It must be between %llu and %llu.\n",
1126                             vtoc->efi_first_u_lba,
1127                             vtoc->efi_last_u_lba);
1128                 }
1129
1130                 for (j = 0; j < vtoc->efi_nparts; j++) {
1131                         isize = vtoc->efi_parts[i].p_size;
1132                         jsize = vtoc->efi_parts[j].p_size;
1133                         istart = vtoc->efi_parts[i].p_start;
1134                         jstart = vtoc->efi_parts[j].p_start;
1135                         if ((i != j) && (isize != 0) && (jsize != 0)) {
1136                                 endsect = jstart + jsize -1;
1137                                 if ((jstart <= istart) &&
1138                                     (istart <= endsect)) {
1139                                         if (!overlap) {
1140                                         (void) fprintf(stderr,
1141                                             "label error: EFI Labels do not "
1142                                             "support overlapping partitions\n");
1143                                         }
1144                                         (void) fprintf(stderr,
1145                                             "Partition %d overlaps partition "
1146                                             "%d.\n", i, j);
1147                                         overlap = 1;
1148                                 }
1149                         }
1150                 }
1151         }
1152         /* make sure there is a reserved partition */
1153         if (resv_part == -1) {
1154                 (void) fprintf(stderr,
1155                     "no reserved partition found\n");
1156         }
1157 }
1158
1159 /*
1160  * We need to get information necessary to construct a *new* efi
1161  * label type
1162  */
1163 int
1164 efi_auto_sense(int fd, struct dk_gpt **vtoc)
1165 {
1166
1167         int     i;
1168
1169         /*
1170          * Now build the default partition table
1171          */
1172         if (efi_alloc_and_init(fd, EFI_NUMPAR, vtoc) != 0) {
1173                 if (efi_debug) {
1174                         (void) fprintf(stderr, "efi_alloc_and_init failed.\n");
1175                 }
1176                 return (-1);
1177         }
1178
1179         for (i = 0; i < min((*vtoc)->efi_nparts, V_NUMPAR); i++) {
1180                 (*vtoc)->efi_parts[i].p_tag = default_vtoc_map[i].p_tag;
1181                 (*vtoc)->efi_parts[i].p_flag = default_vtoc_map[i].p_flag;
1182                 (*vtoc)->efi_parts[i].p_start = 0;
1183                 (*vtoc)->efi_parts[i].p_size = 0;
1184         }
1185         /*
1186          * Make constants first
1187          * and variable partitions later
1188          */
1189
1190         /* root partition - s0 128 MB */
1191         (*vtoc)->efi_parts[0].p_start = 34;
1192         (*vtoc)->efi_parts[0].p_size = 262144;
1193
1194         /* partition - s1  128 MB */
1195         (*vtoc)->efi_parts[1].p_start = 262178;
1196         (*vtoc)->efi_parts[1].p_size = 262144;
1197
1198         /* partition -s2 is NOT the Backup disk */
1199         (*vtoc)->efi_parts[2].p_tag = V_UNASSIGNED;
1200
1201         /* partition -s6 /usr partition - HOG */
1202         (*vtoc)->efi_parts[6].p_start = 524322;
1203         (*vtoc)->efi_parts[6].p_size = (*vtoc)->efi_last_u_lba - 524322
1204             - (1024 * 16);
1205
1206         /* efi reserved partition - s9 16K */
1207         (*vtoc)->efi_parts[8].p_start = (*vtoc)->efi_last_u_lba - (1024 * 16);
1208         (*vtoc)->efi_parts[8].p_size = (1024 * 16);
1209         (*vtoc)->efi_parts[8].p_tag = V_RESERVED;
1210         return (0);
1211 }