WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] libxl: sane disk backend selection and validation

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH] libxl: sane disk backend selection and validation
From: Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>
Date: Wed, 6 Jul 2011 16:07:23 +0100
Delivery-date: Wed, 06 Jul 2011 08:08:17 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Introduce a new function libxl__device_disk_set_backend which
does some sanity checks and determines which backend ought to be used.

If the caller specifies LIBXL_DISK_BACKEND_UNKNOWN (which has the
value 0), it tries PHY, TAP and QDISK in that order.  Otherwise it
tries only the specified value.

libxl__device_disk_set_backend (and its helper function
disk_try_backend) inherit the role (and small amounts of the code)
from validate_virtual_disk.  This is called during do_domain_create
and also from libxl_disk_device_add (for the benefit of hotplug
devices).

It also now takes over the role of the scattered fragments of backend
selection found in libxl_device_disk_add,
libxl_device_disk_local_attach and libxl__need_xenpv_qemu.  These
latter functions now simply do the job for the backend they find has
already been specified and checked.

The restrictions on the capabilities of each backend, as expressed in
disk_try_backend (and to an extent in libxl_device_disk_local_attach)
are intended to be identical to the previous arrangements.

In 23618:3173b68c8a94 combined with 23622:160f7f39841b,
23623:c7180c353eb2, "xl" effectively became much more likely to select
TAP as the backend.  With this change to libxl the default backend
selected by the libxl__device_disk_set_backend is intended to once
again to be PHY where possible.

Signed-off-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>

diff -r 7e4404a8f5f9 tools/libxl/libxl.c
--- a/tools/libxl/libxl.c       Mon Jul 04 07:57:32 2011 +0100
+++ b/tools/libxl/libxl.c       Wed Jul 06 16:06:32 2011 +0100
@@ -902,49 +902,6 @@ int libxl_vncviewer_exec(libxl_ctx *ctx,
 
 
/******************************************************************************/
 
-static int validate_virtual_disk(libxl__gc *gc, char *file_name,
-                                 libxl_device_disk *disk)
-{
-    libxl_ctx *ctx = libxl__gc_owner(gc);
-    struct stat stat_buf;
-    char *delimiter;
-
-    if (disk->format == LIBXL_DISK_FORMAT_EMPTY) {
-        if (disk->is_cdrom)
-            return 0;
-        LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Empty disk %s is not a CDROM 
device\n",
-                   disk->vdev);
-        return ERROR_INVAL;
-    }
-
-    if (disk->format == LIBXL_DISK_FORMAT_RAW) {
-        delimiter = strchr(file_name, ':');
-        if (delimiter) {
-            if (!strncmp(file_name, "vhd:", sizeof("vhd:")-1)) {
-                disk->format = LIBXL_DISK_FORMAT_VHD;
-                file_name = ++delimiter;
-            }
-        }
-    }
-
-    if ( stat(file_name, &stat_buf) != 0 ) {
-        LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "failed to stat %s", 
file_name);
-        return ERROR_INVAL;
-    }
-    if (disk->backend == LIBXL_DISK_BACKEND_PHY) {
-        if ( !(S_ISBLK(stat_buf.st_mode)) ) {
-            LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Virtual disk %s is not a block 
device!\n",
-                file_name);
-            return ERROR_INVAL;
-        }
-    } else if ( S_ISREG(stat_buf.st_mode) && stat_buf.st_size == 0 ) {
-        LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Virtual disk %s size is 0!\n", 
file_name);
-        return ERROR_INVAL;
-    }
-
-    return 0;
-}
-
 int libxl_device_disk_add(libxl_ctx *ctx, uint32_t domid, libxl_device_disk 
*disk)
 {
     libxl__gc gc = LIBXL_INIT_GC(ctx);
@@ -954,10 +911,6 @@ int libxl_device_disk_add(libxl_ctx *ctx
     int devid;
     libxl__device device;
     int major, minor, rc;
-
-    rc = validate_virtual_disk(&gc, disk->pdev_path, disk); 
-    if (rc)
-        return rc;
 
     front = flexarray_make(16, 1);
     if (!front) {
@@ -990,34 +943,6 @@ int libxl_device_disk_add(libxl_ctx *ctx
     device.devid = devid;
     device.domid = domid;
     device.kind = DEVICE_VBD;
-
-
-    /*
-     * Fixing the incoming backend type to try to decide on which
-     * backend to use.  Unfortunately at the moment this code is
-     * utterly broken, but it more or less works.
-     */
-
-    /*
-     * Backend type UNKNOWN should mean "caller does not want to specify",
-     * not "break pointlessely".  (Callers should not be required to
-     * specify the backend if they don't want to.)
-     */
-    if (disk->backend == LIBXL_DISK_BACKEND_UNKNOWN)
-        disk->backend = LIBXL_DISK_BACKEND_TAP;
-
-    /* If blktap is not available then fallback to qdisk */
-    if (disk->backend == LIBXL_DISK_BACKEND_TAP && !libxl__blktap_enabled(&gc))
-        disk->backend = LIBXL_DISK_BACKEND_QDISK;
-
-    /*
-     * blktap cannot handle empty disks (aka cdroms). Fall back to
-     * qdisk because qemu-xen creates the disk based on the xenstore
-     * entries.
-     */
-    if (disk->backend == LIBXL_DISK_BACKEND_TAP &&
-        disk->format == LIBXL_DISK_FORMAT_EMPTY)
-        disk->backend == LIBXL_DISK_BACKEND_QDISK;
 
     switch (disk->backend) {
         case LIBXL_DISK_BACKEND_PHY:
@@ -1138,68 +1063,49 @@ char * libxl_device_disk_local_attach(li
     libxl__gc gc = LIBXL_INIT_GC(ctx);
     char *dev = NULL;
     char *ret = NULL;
+    int rc;
 
-    /*
-     * Fixing the incoming backend type to try to decide on which
-     * backend to use.  Unfortunately at the moment this code is
-     * utterly broken, but it more or less works.
-     */
-
-    /*
-     * Backend type UNKNOWN should mean "caller does not want to specify",
-     * not "break pointlessely".  (Callers should not be required to
-     * specify the backend if they don't want to.)
-     */
-    if (disk->backend == LIBXL_DISK_BACKEND_UNKNOWN)
-        disk->backend = LIBXL_DISK_BACKEND_TAP;
+    rc = libxl__device_disk_set_backend(&gc, disk, 0);
+    if (rc) goto out;
 
     switch (disk->backend) {
         case LIBXL_DISK_BACKEND_PHY:
-            if (disk->format != LIBXL_DISK_FORMAT_RAW) {
-                LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "physical block device must"
-                    " be raw");
-                break;
-            }
-            LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "attaching PHY disk %s to domain 
0",
-                disk->pdev_path);
+            LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "locally attaching PHY disk %s",
+                       disk->pdev_path);
             dev = disk->pdev_path;
             break;
         case LIBXL_DISK_BACKEND_TAP:
-            if (disk->format == LIBXL_DISK_FORMAT_VHD ||
-                disk->format == LIBXL_DISK_FORMAT_RAW)
-            {
-                if (libxl__blktap_enabled(&gc))
-                    dev = libxl__blktap_devpath(&gc, disk->pdev_path, 
disk->format);
-                else {
-                    if (disk->format != LIBXL_DISK_FORMAT_RAW) {
-                        LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "tapdisk2 is 
required"
-                            " to open a vhd disk");
-                        break;
-                    } else {
-                        LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "attaching tap disk 
%s to domain 0",
-                            disk->pdev_path);
-                        dev = disk->pdev_path;
-                        break;
-                    }
-                }
+            switch (disk->format) {
+            case LIBXL_DISK_FORMAT_RAW:
+                /* optimise away the early tapdisk attach in this case */
+                LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "locally attaching"
+                           " tap disk %s directly (ie without using blktap)",
+                           disk->pdev_path);
+                dev = disk->pdev_path;
                 break;
-            } else if (disk->format == LIBXL_DISK_FORMAT_QCOW ||
-                       disk->format == LIBXL_DISK_FORMAT_QCOW2) {
-                LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "cannot locally attach a 
qcow or qcow2 disk image");
+            case LIBXL_DISK_FORMAT_VHD:
+                dev = libxl__blktap_devpath(&gc, disk->pdev_path,
+                                            disk->format);
                 break;
-            } else {
-                LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "unrecognized disk backend "
-                    "type: %d", disk->backend);
+            case LIBXL_DISK_FORMAT_QCOW:
+            case LIBXL_DISK_FORMAT_QCOW2:
+                LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "cannot locally attach"
+                           " a qcow or qcow2 disk image");
+                break;
+            default:
+                LIBXL__LOG(ctx, LIBXL__LOG_ERROR,
+                           "unrecognized disk format: %d", disk->format);
                 break;
             }
+            break;
         case LIBXL_DISK_BACKEND_QDISK:
             if (disk->format != LIBXL_DISK_FORMAT_RAW) {
-                LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "cannot locally attach a 
qdisk "
-                    "image if the format is not raw");
+                LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "cannot locally"
+                           " attach a qdisk image if the format is not raw");
                 break;
             }
-            LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "attaching qdisk %s to domain 
0\n",
-                disk->pdev_path);
+            LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "locally attaching qdisk %s\n",
+                       disk->pdev_path);
             dev = disk->pdev_path;
             break;
         default:
@@ -1208,6 +1114,7 @@ char * libxl_device_disk_local_attach(li
             break;
     }
 
+ out:
     if (dev != NULL)
         ret = strdup(dev);
     libxl__free_all(&gc);
diff -r 7e4404a8f5f9 tools/libxl/libxl_create.c
--- a/tools/libxl/libxl_create.c        Mon Jul 04 07:57:32 2011 +0100
+++ b/tools/libxl/libxl_create.c        Wed Jul 06 16:06:32 2011 +0100
@@ -424,6 +424,13 @@ static int do_domain_create(libxl__gc *g
             goto error_out;
     }
 
+
+    for (i = 0; i < d_config->num_disks; i++) {
+        ret = libxl__device_disk_set_backend(gc, &d_config->disks[i],
+                                             dm_info);
+        if (ret) goto error_out;
+    }
+
     if ( restore_fd < 0 ) {
         ret = libxl_run_bootloader(ctx, &d_config->b_info, d_config->num_disks 
> 0 ? &d_config->disks[0] : NULL, domid);
         if (ret) {
diff -r 7e4404a8f5f9 tools/libxl/libxl_device.c
--- a/tools/libxl/libxl_device.c        Mon Jul 04 07:57:32 2011 +0100
+++ b/tools/libxl/libxl_device.c        Wed Jul 06 16:06:32 2011 +0100
@@ -116,6 +116,140 @@ retry_transaction:
     rc = 0;
 out:
     return rc;
+}
+
+typedef struct {
+    libxl__gc *gc;
+    libxl_device_disk *disk;
+    const libxl_device_model_info *dm_info;
+    struct stat stab;
+} disk_try_backend_args;
+
+static int disk_try_backend(disk_try_backend_args *a,
+                            libxl_disk_backend backend) {
+    /* returns 0 (ie, DISK_BACKEND_UNKNOWN) on failure, or
+     * backend on success */
+    libxl_ctx *ctx = libxl__gc_owner(a->gc);
+    switch (backend) {
+
+    case LIBXL_DISK_BACKEND_PHY:
+        if (!(a->disk->format == LIBXL_DISK_FORMAT_RAW ||
+              a->disk->format == LIBXL_DISK_FORMAT_EMPTY)) {
+            LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Disk vdev=%s, backend phy"
+                       " unsuitable due to format %s",
+                       a->disk->vdev,
+                       libxl__device_disk_string_of_format(a->disk->format));
+            return 0;
+        }
+        if (a->disk->format != LIBXL_DISK_FORMAT_EMPTY &&
+            !S_ISBLK(a->stab.st_mode)) {
+            LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Disk vdev=%s, backend phy"
+                       " unsuitable as phys path not a block device",
+                       a->disk->vdev);
+            return ERROR_INVAL;
+        }
+
+        return backend;
+
+    case LIBXL_DISK_BACKEND_TAP:
+        if (!libxl__blktap_enabled(a->gc)) {
+            LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Disk vdev=%s, backend tap"
+                       " unsuitable because blktap not available",
+                       a->disk->vdev);
+            return 0;
+        }
+        if (a->disk->format == LIBXL_DISK_FORMAT_EMPTY ||
+            (S_ISREG(a->stab.st_mode) && !a->stab.st_size)) {
+            LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Disk vdev=%s, backend tap"
+                       " unsuitable because empty devices not supported",
+                       a->disk->vdev);
+            return 0;
+        }
+        return backend;
+
+    case LIBXL_DISK_BACKEND_QDISK:
+        if (!a->dm_info) {
+            LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Disk vdev=%s, backend qdisk"
+                       " unsuitable for dynamic attach",
+                       a->disk->vdev);
+            return 0;
+        }
+        switch (a->dm_info->device_model_version) {
+        case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
+            LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Disk vdev=%s, backend qdisk"
+                       " unsuitable because using old qemu",
+                       a->disk->vdev);
+            return 0;
+        case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
+            return backend;
+        }
+        abort();
+
+    default:
+        LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Disk vdev=%s, backend "
+                   " %d unknown", a->disk->vdev, backend);
+        return 0;
+        
+    }
+}            
+
+int libxl__device_disk_set_backend(libxl__gc *gc, libxl_device_disk *disk,
+                                   const libxl_device_model_info *dm_info) {
+    libxl_ctx *ctx = libxl__gc_owner(gc);
+    libxl_disk_backend ok;
+    disk_try_backend_args a;
+
+    a.gc = gc;
+    a.disk = disk;
+    a.dm_info = dm_info;
+
+    LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Disk vdev=%s spec.backend=%s",
+               disk->vdev,
+               libxl__device_disk_string_of_backend(disk->backend));
+
+    if (disk->format == LIBXL_DISK_FORMAT_EMPTY) {
+        if (!disk->is_cdrom) {
+            LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Disk vdev=%s is empty"
+                       " but not cdrom",
+                       disk->vdev);
+            return ERROR_INVAL;
+        }
+        memset(&a.stab, 0, sizeof(a.stab));
+    } else {
+        if (stat(disk->pdev_path, &a.stab)) {
+            LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Disk vdev=%s "
+                             "failed to stat: %s",
+                             disk->vdev, disk->pdev_path);
+            return ERROR_INVAL;
+        }
+        if (!S_ISBLK(a.stab.st_mode) &
+            !S_ISREG(a.stab.st_mode)) {
+            LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Disk vdev=%s "
+                             "phys path is not a block dev or file: %s",
+                             disk->vdev, disk->pdev_path);
+            return ERROR_INVAL;
+        }
+    }
+
+    if (disk->backend != LIBXL_DISK_BACKEND_UNKNOWN) {
+        ok= disk_try_backend(&a, disk->backend);
+    } else {
+        ok=
+            disk_try_backend(&a, LIBXL_DISK_BACKEND_PHY) ?:
+            disk_try_backend(&a, LIBXL_DISK_BACKEND_TAP) ?:
+            disk_try_backend(&a, LIBXL_DISK_BACKEND_QDISK);
+        if (ok)
+            LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Disk vdev=%s, using backend %s",
+                       disk->vdev,
+                       libxl__device_disk_string_of_backend(ok));
+    }
+    if (!ok) {
+        LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "no suitable backend for disk %s",
+                   disk->vdev);
+        return ERROR_INVAL;
+    }
+    disk->backend = ok;
+    return 0;
 }
 
 char *libxl__device_disk_string_of_format(libxl_disk_format format)
diff -r 7e4404a8f5f9 tools/libxl/libxl_dm.c
--- a/tools/libxl/libxl_dm.c    Mon Jul 04 07:57:32 2011 +0100
+++ b/tools/libxl/libxl_dm.c    Wed Jul 06 16:06:32 2011 +0100
@@ -1001,25 +1001,18 @@ int libxl__need_xenpv_qemu(libxl__gc *gc
     }
 
     if (nr_disks > 0) {
-        int blktap_enabled = -1;
         for (i = 0; i < nr_disks; i++) {
             switch (disks[i].backend) {
-            case LIBXL_DISK_BACKEND_TAP:
-                if (blktap_enabled == -1)
-                    blktap_enabled = libxl__blktap_enabled(gc);
-                if (!blktap_enabled) {
-                    ret = 1;
-                    goto out;
-                }
-                break;
-
             case LIBXL_DISK_BACKEND_QDISK:
                 ret = 1;
                 goto out;
 
+            case LIBXL_DISK_BACKEND_TAP:
             case LIBXL_DISK_BACKEND_PHY:
-            case LIBXL_DISK_BACKEND_UNKNOWN:
                 break;
+
+            case LIBXL_DISK_FORMAT_UNKNOWN:
+                abort(); /* impossible due to libxl__device_disk_set_backend */
             }
         }
     }
diff -r 7e4404a8f5f9 tools/libxl/libxl_internal.h
--- a/tools/libxl/libxl_internal.h      Mon Jul 04 07:57:32 2011 +0100
+++ b/tools/libxl/libxl_internal.h      Wed Jul 06 16:06:32 2011 +0100
@@ -205,6 +205,8 @@ _hidden void libxl__userdata_destroyall(
 /* from xl_device */
 _hidden char *libxl__device_disk_string_of_backend(libxl_disk_backend backend);
 _hidden char *libxl__device_disk_string_of_format(libxl_disk_format format);
+_hidden int libxl__device_disk_set_backend(libxl__gc*, libxl_device_disk *disk,
+                                   const libxl_device_model_info *dm_info);
 
 _hidden int libxl__device_physdisk_major_minor(const char *physpath, int 
*major, int *minor);
 _hidden int libxl__device_disk_dev_number(const char *virtpath,

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel