WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] Port over the latest zfs code from opensolaris-grub

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH] Port over the latest zfs code from opensolaris-grub to libfsimage
From: Jody Belka <lists-xen@xxxxxxxx>
Date: Thu, 15 Jan 2009 12:43:31 +0000
Delivery-date: Thu, 15 Jan 2009 04:43:25 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
3 files changed, 192 insertions(+), 97 deletions(-)
tools/libfsimage/common/fsimage_grub.h |    1 
tools/libfsimage/zfs/fsys_zfs.c        |  276 ++++++++++++++++++++++----------
tools/libfsimage/zfs/zfs-include/zfs.h |   12 -


# HG changeset patch
# User Jody Belka <dev@xxxxxxxx>
# Date 1232022187 0
# Branch zfs14
# Node ID 74b7416dc04c4bf734c4e8df7d64b2630da6ef0b
# Parent  10a8fae412c5e1b1cd24e58f94f780f180b84ab8
Port over the latest zfs code from opensolaris-grub to libfsimage

The existing libfsimage zfs code is rather old now, and can't read
versions of zfs now in general use. Port over the current code from
the opensolaris-patched grub, so that pygrub can be used again for
booting solaris/zfs based domains.

Signed-off-by: Jody Belka <dev@xxxxxxxx>

diff -r 10a8fae412c5 -r 74b7416dc04c tools/libfsimage/common/fsimage_grub.h
--- a/tools/libfsimage/common/fsimage_grub.h    Wed Jan 14 13:43:17 2009 +0000
+++ b/tools/libfsimage/common/fsimage_grub.h    Thu Jan 15 12:23:07 2009 +0000
@@ -78,6 +78,7 @@
 #define        ERR_DEV_VALUES 1
 #define        ERR_WONT_FIT 1
 #define        ERR_READ 1
+#define        ERR_NEWER_VERSION 1
 
 fsi_plugin_ops_t *fsig_init(fsi_plugin_t *, fsig_plugin_ops_t *);
 
diff -r 10a8fae412c5 -r 74b7416dc04c tools/libfsimage/zfs/fsys_zfs.c
--- a/tools/libfsimage/zfs/fsys_zfs.c   Wed Jan 14 13:43:17 2009 +0000
+++ b/tools/libfsimage/zfs/fsys_zfs.c   Thu Jan 15 12:23:07 2009 +0000
@@ -85,11 +85,15 @@
 
 decomp_entry_t decomp_table[ZIO_COMPRESS_FUNCTIONS] =
 {
-       {"noop", 0},
+       {"inherit", 0},                 /* ZIO_COMPRESS_INHERIT */
        {"on", lzjb_decompress},        /* ZIO_COMPRESS_ON */
-       {"off", 0},
-       {"lzjb", lzjb_decompress}       /* ZIO_COMPRESS_LZJB */
+       {"off", 0},                     /* ZIO_COMPRESS_OFF */
+       {"lzjb", lzjb_decompress},      /* ZIO_COMPRESS_LZJB */
+       {"empty", 0}                    /* ZIO_COMPRESS_EMPTY */
 };
+
+static int zio_read_data(fsi_file_t *ffi, blkptr_t *bp, void *buf, char 
*stack);
+
 
 /* From disk_io.c */
 /* ZFS root filesystem for booting */
@@ -170,8 +174,7 @@
 zio_checksum_verify(blkptr_t *bp, char *data, int size)
 {
        zio_cksum_t zc = bp->blk_cksum;
-       uint32_t checksum = BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER :
-           BP_GET_CHECKSUM(bp);
+       uint32_t checksum = BP_GET_CHECKSUM(bp);
        int byteswap = BP_SHOULD_BYTESWAP(bp);
        zio_block_tail_t *zbt = (zio_block_tail_t *)(data + size) - 1;
        zio_checksum_info_t *ci = &zio_checksum_table[checksum];
@@ -185,27 +188,13 @@
                return (-1);
 
        if (ci->ci_zbt) {
-               if (checksum == ZIO_CHECKSUM_GANG_HEADER) {
-                       /*
-                        * 'gang blocks' is not supported.
-                        */
-                       return (-1);
-               }
-
-               if (zbt->zbt_magic == BSWAP_64(ZBT_MAGIC)) {
-                       /* byte swapping is not supported */
-                       return (-1);
-               } else {
-                       expected_cksum = zbt->zbt_cksum;
-                       zbt->zbt_cksum = zc;
-                       ci->ci_func[0](data, size, &actual_cksum);
-                       zbt->zbt_cksum = expected_cksum;
-               }
+               expected_cksum = zbt->zbt_cksum;
+               zbt->zbt_cksum = zc;
+               ci->ci_func[0](data, size, &actual_cksum);
+               zbt->zbt_cksum = expected_cksum;
                zc = expected_cksum;
 
        } else {
-               if (BP_IS_GANG(bp))
-                       return (-1);
                ci->ci_func[byteswap](data, size, &actual_cksum);
        }
 
@@ -298,7 +287,7 @@
                return (-1);
 
        if (uber->ub_magic == UBERBLOCK_MAGIC &&
-           uber->ub_version >= SPA_VERSION_1 &&
+           uber->ub_version > 0 &&
            uber->ub_version <= SPA_VERSION)
                return (0);
 
@@ -337,7 +326,93 @@
 }
 
 /*
- * Read in a block and put its uncompressed data in buf.
+ * Read a block of data based on the gang block address dva,
+ * and put its data in buf.
+ *
+ * Return:
+ *     0 - success
+ *     1 - failure
+ */
+static int
+zio_read_gang(fsi_file_t *ffi, blkptr_t *bp, dva_t *dva, void *buf, char 
*stack)
+{
+       zio_gbh_phys_t *zio_gb;
+       uint64_t offset, sector;
+       blkptr_t tmpbp;
+       int i;
+
+       zio_gb = (zio_gbh_phys_t *)stack;
+       stack += SPA_GANGBLOCKSIZE;
+       offset = DVA_GET_OFFSET(dva);
+       sector =  DVA_OFFSET_TO_PHYS_SECTOR(offset);
+
+       /* read in the gang block header */
+       if (devread(ffi, sector, 0, SPA_GANGBLOCKSIZE, (char *)zio_gb) == 0) {
+               return (1);
+       }
+
+       /* self checksuming the gang block header */
+       BP_ZERO(&tmpbp);
+       BP_SET_CHECKSUM(&tmpbp, ZIO_CHECKSUM_GANG_HEADER);
+       BP_SET_BYTEORDER(&tmpbp, ZFS_HOST_BYTEORDER);
+       ZIO_SET_CHECKSUM(&tmpbp.blk_cksum, DVA_GET_VDEV(dva),
+           DVA_GET_OFFSET(dva), bp->blk_birth, 0);
+       if (zio_checksum_verify(&tmpbp, (char *)zio_gb, SPA_GANGBLOCKSIZE)) {
+               return (1);
+       }
+
+       for (i = 0; i < SPA_GBH_NBLKPTRS; i++) {
+               if (zio_gb->zg_blkptr[i].blk_birth == 0)
+                       continue;
+
+               if (zio_read_data(ffi, &zio_gb->zg_blkptr[i], buf, stack))
+                       return (1);
+               buf += BP_GET_PSIZE(&zio_gb->zg_blkptr[i]);
+       }
+
+       return (0);
+}
+
+/*
+ * Read in a block of raw data to buf.
+ *
+ * Return:
+ *     0 - success
+ *     1 - failure
+ */
+static int
+zio_read_data(fsi_file_t *ffi, blkptr_t *bp, void *buf, char *stack)
+{
+       int i, psize;
+
+       psize = BP_GET_PSIZE(bp);
+
+       /* pick a good dva from the block pointer */
+       for (i = 0; i < SPA_DVAS_PER_BP; i++) {
+               uint64_t offset, sector;
+
+               if (bp->blk_dva[i].dva_word[0] == 0 &&
+                   bp->blk_dva[i].dva_word[1] == 0)
+                       continue;
+
+               if (DVA_GET_GANG(&bp->blk_dva[i])) {
+                       if (zio_read_gang(ffi, bp, &bp->blk_dva[i], buf, stack) 
== 0)
+                               return (0);
+               } else {
+                       /* read in a data block */
+                       offset = DVA_GET_OFFSET(&bp->blk_dva[i]);
+                       sector =  DVA_OFFSET_TO_PHYS_SECTOR(offset);
+                       if (devread(ffi, sector, 0, psize, buf))
+                               return (0);
+               }
+       }
+
+       return (1);
+}
+
+/*
+ * Read in a block of data, verify its checksum, decompress if needed,
+ * and put the uncompressed data in buf.
  *
  * Return:
  *     0 - success
@@ -346,49 +421,41 @@
 static int
 zio_read(fsi_file_t *ffi, blkptr_t *bp, void *buf, char *stack)
 {
-       uint64_t offset, sector;
-       int psize, lsize;
-       int i, comp, cksum;
+       int lsize, psize, comp;
+       char *retbuf;
 
+       comp = BP_GET_COMPRESS(bp);
+       lsize = BP_GET_LSIZE(bp);
        psize = BP_GET_PSIZE(bp);
-       lsize = BP_GET_LSIZE(bp);
-       comp = BP_GET_COMPRESS(bp);
-       cksum = BP_GET_CHECKSUM(bp);
 
        if ((unsigned int)comp >= ZIO_COMPRESS_FUNCTIONS ||
            (comp != ZIO_COMPRESS_OFF &&
-           decomp_table[comp].decomp_func == NULL))
+           decomp_table[comp].decomp_func == NULL)) {
                return (ERR_FSYS_CORRUPT);
-
-       /* pick a good dva from the block pointer */
-       for (i = 0; i < SPA_DVAS_PER_BP; i++) {
-
-               if (bp->blk_dva[i].dva_word[0] == 0 &&
-                   bp->blk_dva[i].dva_word[1] == 0)
-                       continue;
-
-               /* read in a block */
-               offset = DVA_GET_OFFSET(&bp->blk_dva[i]);
-               sector =  DVA_OFFSET_TO_PHYS_SECTOR(offset);
-
-               if (comp != ZIO_COMPRESS_OFF) {
-
-                       if (devread(ffi, sector, 0, psize, stack) == 0)
-                               continue;
-                       if (zio_checksum_verify(bp, stack, psize) != 0)
-                               continue;
-                       decomp_table[comp].decomp_func(stack, buf, psize,
-                           lsize);
-               } else {
-                       if (devread(ffi, sector, 0, psize, buf) == 0)
-                               continue;
-                       if (zio_checksum_verify(bp, buf, psize) != 0)
-                               continue;
-               }
-               return (0);
        }
 
-       return (ERR_FSYS_CORRUPT);
+       if ((char *)buf < stack && ((char *)buf) + lsize > stack) {
+               return (ERR_WONT_FIT);
+       }
+
+       retbuf = buf;
+       if (comp != ZIO_COMPRESS_OFF) {
+               buf = stack;
+               stack += psize;
+       }
+
+       if (zio_read_data(ffi, bp, buf, stack)) {
+               return (ERR_FSYS_CORRUPT);
+       }
+
+       if (zio_checksum_verify(bp, buf, psize) != 0) {
+               return (ERR_FSYS_CORRUPT);
+       }
+
+       if (comp != ZIO_COMPRESS_OFF)
+               decomp_table[comp].decomp_func(buf, retbuf, psize, lsize);
+
+       return (0);
 }
 
 /*
@@ -618,6 +685,8 @@
        /* Get the leaf block */
        l = (zap_leaf_phys_t *)stack;
        stack += 1<<blksft;
+       if ((1<<blksft) < sizeof (zap_leaf_phys_t))
+               return (ERR_FSYS_CORRUPT);
        if ((errnum = dmu_read(ffi, zap_dnode, blkid, l, stack)))
                return (errnum);
 
@@ -865,6 +934,8 @@
        char *cname, ch;
        blkptr_t *bp;
        objset_phys_t *osp;
+       int issnapshot = 0;
+       char *snapname = NULL;
 
        if (fsname == NULL && obj) {
                headobj = *obj;
@@ -905,6 +976,13 @@
                ch = *fsname;
                *fsname = 0;
 
+               snapname = cname;
+               while (*snapname && !isspace((uint8_t)*snapname) && *snapname 
!= '@')
+                       snapname++;
+               if (*snapname == '@') {
+                       issnapshot = 1;
+                       *snapname = 0;
+               }
                childobj =
                    ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_child_dir_zapobj;
                if ((errnum = dnode_get(ffi, mosmdn, childobj,
@@ -919,6 +997,8 @@
                        return (errnum);
 
                *fsname = ch;
+               if (issnapshot)
+                       *snapname = '@';
        }
        headobj = ((dsl_dir_phys_t *)DN_BONUS(mdn))->dd_head_dataset_obj;
        if (obj)
@@ -928,8 +1008,23 @@
        if ((errnum = dnode_get(ffi, mosmdn, headobj, DMU_OT_DSL_DATASET, mdn,
            stack)))
                return (errnum);
+       if (issnapshot) {
+               uint64_t snapobj;
 
-       /* TODO: Add snapshot support here - for fsname=snapshot-name */
+               snapobj = ((dsl_dataset_phys_t *)DN_BONUS(mdn))->
+                   ds_snapnames_zapobj;
+
+               if ((errnum = dnode_get(ffi, mosmdn, snapobj,
+                   DMU_OT_DSL_DS_SNAP_MAP, mdn, stack)))
+                       return (errnum);
+               if (zap_lookup(ffi, mdn, snapname + 1, &headobj, stack))
+                       return (ERR_FILESYSTEM_NOT_FOUND);
+               if ((errnum = dnode_get(ffi, mosmdn, headobj,
+                   DMU_OT_DSL_DATASET, mdn, stack)))
+                       return (errnum);
+               if (obj)
+                       *obj = headobj;
+       }
 
        bp = &((dsl_dataset_phys_t *)DN_BONUS(mdn))->ds_bp;
        osp = (objset_phys_t *)stack;
@@ -1073,8 +1168,6 @@
            DATA_TYPE_UINT64, NULL) == 0 ||
            nvlist_lookup_value(nv, ZPOOL_CONFIG_FAULTED, &ival,
            DATA_TYPE_UINT64, NULL) == 0 ||
-           nvlist_lookup_value(nv, ZPOOL_CONFIG_DEGRADED, &ival,
-           DATA_TYPE_UINT64, NULL) == 0 ||
            nvlist_lookup_value(nv, ZPOOL_CONFIG_REMOVED, &ival,
            DATA_TYPE_UINT64, NULL) == 0)
                return (ERR_DEV_VALUES);
@@ -1087,20 +1180,35 @@
  * The caller should already allocate MAXNAMELEN memory for bootpath.
  */
 static int
-vdev_get_bootpath(char *nv, char *bootpath)
+vdev_get_bootpath(char *nv, uint64_t inguid, char *bootpath)
 {
        char type[16];
 
-       bootpath[0] = '\0';
        if (nvlist_lookup_value(nv, ZPOOL_CONFIG_TYPE, &type, DATA_TYPE_STRING,
            NULL))
                return (ERR_FSYS_CORRUPT);
 
        if (strcmp(type, VDEV_TYPE_DISK) == 0) {
-               if (vdev_validate(nv) != 0 ||
-                   nvlist_lookup_value(nv, ZPOOL_CONFIG_PHYS_PATH, bootpath,
-                   DATA_TYPE_STRING, NULL) != 0)
+               uint64_t guid;
+
+               if (vdev_validate(nv) != 0)
                        return (ERR_NO_BOOTPATH);
+
+               if (nvlist_lookup_value(nv, ZPOOL_CONFIG_GUID,
+                   &guid, DATA_TYPE_UINT64, NULL) != 0)
+                       return (ERR_NO_BOOTPATH);
+
+               if (guid != inguid)
+                       return (ERR_NO_BOOTPATH);
+
+               if (nvlist_lookup_value(nv, ZPOOL_CONFIG_PHYS_PATH,
+                   bootpath, DATA_TYPE_STRING, NULL) != 0)
+                       bootpath[0] = '\0';
+
+               if (strlen(bootpath) >= MAXNAMELEN)
+                       return (ERR_WONT_FIT);
+
+               return (0);
 
        } else if (strcmp(type, VDEV_TYPE_MIRROR) == 0) {
                int nelm, i;
@@ -1111,28 +1219,15 @@
                        return (ERR_FSYS_CORRUPT);
 
                for (i = 0; i < nelm; i++) {
-                       char tmp_path[MAXNAMELEN];
                        char *child_i;
 
                        child_i = nvlist_array(child, i);
-                       if (vdev_validate(child_i) != 0)
-                               continue;
-
-                       if (nvlist_lookup_value(child_i, ZPOOL_CONFIG_PHYS_PATH,
-                           tmp_path, DATA_TYPE_STRING, NULL) != 0)
-                               return (ERR_NO_BOOTPATH);
-
-                       if ((strlen(bootpath) + strlen(tmp_path)) > MAXNAMELEN)
-                               return (ERR_WONT_FIT);
-
-                       if (strlen(bootpath) == 0)
-                               sprintf(bootpath, "%s", tmp_path);
-                       else
-                               sprintf(bootpath, "%s %s", bootpath, tmp_path);
+                       if (vdev_get_bootpath(child_i, inguid, bootpath) == 0)
+                               return (0);
                }
        }
 
-       return (strlen(bootpath) > 0 ? 0 : ERR_NO_BOOTPATH);
+       return (ERR_NO_BOOTPATH);
 }
 
 /*
@@ -1148,6 +1243,8 @@
        vdev_phys_t *vdev;
        uint64_t sector, pool_state, txg = 0;
        char *nvlist, *nv;
+       uint64_t diskguid;
+       uint64_t version;
        zfs_bootarea_t *zfs_ba = (zfs_bootarea_t *)ffi->ff_fsi->f_data;
 
        sector = (label * sizeof (vdev_label_t) + VDEV_SKIP_SIZE +
@@ -1181,11 +1278,18 @@
        if (txg == 0)
                return (ERR_NO_BOOTPATH);
 
+       if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VERSION, &version,
+           DATA_TYPE_UINT64, NULL))
+               return (ERR_FSYS_CORRUPT);
+       if (version > SPA_VERSION)
+               return (ERR_NEWER_VERSION);
        if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_VDEV_TREE, &nv,
            DATA_TYPE_NVLIST, NULL))
                return (ERR_FSYS_CORRUPT);
-
-       if (vdev_get_bootpath(nv, current_bootpath))
+       if (nvlist_lookup_value(nvlist, ZPOOL_CONFIG_GUID, &diskguid,
+           DATA_TYPE_UINT64, NULL))
+               return (ERR_FSYS_CORRUPT);
+       if (vdev_get_bootpath(nv, diskguid, current_bootpath))
                return (ERR_NO_BOOTPATH);
 
        return (0);
diff -r 10a8fae412c5 -r 74b7416dc04c tools/libfsimage/zfs/zfs-include/zfs.h
--- a/tools/libfsimage/zfs/zfs-include/zfs.h    Wed Jan 14 13:43:17 2009 +0000
+++ b/tools/libfsimage/zfs/zfs-include/zfs.h    Thu Jan 15 12:23:07 2009 +0000
@@ -28,17 +28,7 @@
 /*
  * On-disk version number.
  */
-#define        SPA_VERSION_1                   1ULL
-#define        SPA_VERSION_2                   2ULL
-#define        SPA_VERSION_3                   3ULL
-#define        SPA_VERSION_4                   4ULL
-#define        SPA_VERSION_5                   5ULL
-#define        SPA_VERSION_6                   6ULL
-#define        SPA_VERSION_7                   7ULL
-#define        SPA_VERSION_8                   8ULL
-#define        SPA_VERSION_9                   9ULL
-#define        SPA_VERSION_10                  10ULL
-#define        SPA_VERSION                     SPA_VERSION_10
+#define        SPA_VERSION                     14ULL
 
 /*
  * The following are configuration names used in the nvlist describing a pool's
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH] Port over the latest zfs code from opensolaris-grub to libfsimage, Jody Belka <=