WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 2/5] [POST-4.0]: HVM NUMA guest: pass NUMA informatio

To: Keir Fraser <keir.fraser@xxxxxxxxxxxxx>, "Kamble, Nitin A" <nitin.a.kamble@xxxxxxxxx>
Subject: [Xen-devel] [PATCH 2/5] [POST-4.0]: HVM NUMA guest: pass NUMA information to libxc
From: Andre Przywara <andre.przywara@xxxxxxx>
Date: Thu, 4 Feb 2010 22:54:22 +0100
Cc: "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
Delivery-date: Thu, 04 Feb 2010 13:53:18 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Thunderbird 2.0.0.18 (X11/20081105)
This patch adds a "struct numa_guest_info" to libxc, which allows to specify a guest's NUMA topology along with the appropriate host binding. Here the information will be filled out by the Python wrapper (I left out the libxl part for now), it will fill up the struct with sensible default values (equal distribution), only the number of guest nodes should be specified by the caller. The information is passed on to the hvm_info_table. The respective memory allocation is in another patch.

Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>

Regards,
Andre.

--
Andre Przywara
AMD-Operating System Research Center (OSRC), Dresden, Germany
Tel: +49 351 488-3567-12
----to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Karl-Hammerschmidt-Str. 34, 85609 Dornach b. Muenchen
Geschaeftsfuehrer: Andrew Bowd; Thomas M. McCoy; Giuliano Meroni
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632
commit b4ee75af6f6220bdafccf75c9bc6e4915a413b18
Author: Andre Przywara <andre.przywara@xxxxxxx>
Date:   Mon Feb 1 12:36:28 2010 +0100

    add struct numainfo to interface and
    parse xend passed NUMA info and pass on to libxc

diff --git a/tools/libxc/xc_hvm_build.c b/tools/libxc/xc_hvm_build.c
index 8fc7ac5..02103b1 100644
--- a/tools/libxc/xc_hvm_build.c
+++ b/tools/libxc/xc_hvm_build.c
@@ -106,6 +106,7 @@ static int loadelfimage(
 
 static int setup_guest(int xc_handle,
                        uint32_t dom, int memsize, int target,
+                       struct numa_guest_info *numainfo,
                        char *image, unsigned long image_size)
 {
     xen_pfn_t *page_array = NULL;
@@ -334,6 +335,7 @@ static int xc_hvm_build_internal(int xc_handle,
                                  uint32_t domid,
                                  int memsize,
                                  int target,
+                                 struct numa_guest_info *numainfo,
                                  char *image,
                                  unsigned long image_size)
 {
@@ -343,7 +345,8 @@ static int xc_hvm_build_internal(int xc_handle,
         return -1;
     }
 
-    return setup_guest(xc_handle, domid, memsize, target, image, image_size);
+    return setup_guest(xc_handle, domid, memsize, target,
+        numainfo, image, image_size);
 }
 
 /* xc_hvm_build:
@@ -352,6 +355,7 @@ static int xc_hvm_build_internal(int xc_handle,
 int xc_hvm_build(int xc_handle,
                  uint32_t domid,
                  int memsize,
+                 struct numa_guest_info *numainfo,
                  const char *image_name)
 {
     char *image;
@@ -362,7 +366,8 @@ int xc_hvm_build(int xc_handle,
          ((image = xc_read_image(image_name, &image_size)) == NULL) )
         return -1;
 
-    sts = xc_hvm_build_internal(xc_handle, domid, memsize, memsize, image, 
image_size);
+    sts = xc_hvm_build_internal(xc_handle, domid, memsize, memsize,
+                                numainfo, image, image_size);
 
     free(image);
 
@@ -379,6 +384,7 @@ int xc_hvm_build_target_mem(int xc_handle,
                            uint32_t domid,
                            int memsize,
                            int target,
+                           struct numa_guest_info *numainfo,
                            const char *image_name)
 {
     char *image;
@@ -389,7 +395,8 @@ int xc_hvm_build_target_mem(int xc_handle,
          ((image = xc_read_image(image_name, &image_size)) == NULL) )
         return -1;
 
-    sts = xc_hvm_build_internal(xc_handle, domid, memsize, target, image, 
image_size);
+    sts = xc_hvm_build_internal(xc_handle, domid, memsize, target,
+                                numainfo, image, image_size);
 
     free(image);
 
@@ -402,6 +409,7 @@ int xc_hvm_build_target_mem(int xc_handle,
 int xc_hvm_build_mem(int xc_handle,
                      uint32_t domid,
                      int memsize,
+                     struct numa_guest_info *numainfo,
                      const char *image_buffer,
                      unsigned long image_size)
 {
@@ -425,7 +433,7 @@ int xc_hvm_build_mem(int xc_handle,
     }
 
     sts = xc_hvm_build_internal(xc_handle, domid, memsize, memsize,
-                                img, img_len);
+                                numainfo, img, img_len);
 
     /* xc_inflate_buffer may return the original buffer pointer (for
        for already inflated buffers), so exercise some care in freeing */
diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h
index 851f769..110b0c9 100644
--- a/tools/libxc/xenguest.h
+++ b/tools/libxc/xenguest.h
@@ -62,6 +62,13 @@ int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
                       unsigned int console_evtchn, unsigned long *console_mfn,
                       unsigned int hvm, unsigned int pae, int superpages);
 
+struct numa_guest_info {
+       int num_nodes;
+       int *guest_to_host_node;
+       int *vcpu_to_node;
+       uint64_t *node_mem;
+};
+
 /**
  * This function will create a domain for a paravirtualized Linux
  * using file names pointing to kernel and ramdisk
@@ -143,17 +150,20 @@ int xc_linux_build_mem(int xc_handle,
 int xc_hvm_build(int xc_handle,
                  uint32_t domid,
                  int memsize,
+                 struct numa_guest_info *numa_info,
                  const char *image_name);
 
 int xc_hvm_build_target_mem(int xc_handle,
                             uint32_t domid,
                             int memsize,
                             int target,
+                            struct numa_guest_info *numa_info,
                             const char *image_name);
 
 int xc_hvm_build_mem(int xc_handle,
                      uint32_t domid,
                      int memsize,
+                     struct numa_guest_info *numa_info,
                      const char *image_buffer,
                      unsigned long image_size);
 
diff --git a/tools/libxc/xg_private.c b/tools/libxc/xg_private.c
index 457001c..15d1b71 100644
--- a/tools/libxc/xg_private.c
+++ b/tools/libxc/xg_private.c
@@ -177,6 +177,7 @@ __attribute__((weak))
     int xc_hvm_build(int xc_handle,
                      uint32_t domid,
                      int memsize,
+                     struct numa_guest_info *numainfo,
                      const char *image_name)
 {
     errno = ENOSYS;
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 7196aa8..154253a 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -168,7 +168,7 @@ int build_hvm(struct libxl_ctx *ctx, uint32_t domid,
 {
     int ret;
 
-    ret = xc_hvm_build_target_mem(ctx->xch, domid, (info->max_memkb - 
info->video_memkb) / 1024, (info->target_memkb - info->video_memkb) / 1024, 
info->kernel);
+    ret = xc_hvm_build_target_mem(ctx->xch, domid, (info->max_memkb - 
info->video_memkb) / 1024, (info->target_memkb - info->video_memkb) / 1024, 
NULL, info->kernel);
     if (ret) {
         XL_LOG_ERRNOVAL(ctx, XL_LOG_ERROR, ret, "hvm building failed");
         return ERROR_FAIL;
diff --git a/tools/python/xen/lowlevel/xc/xc.c 
b/tools/python/xen/lowlevel/xc/xc.c
index 1932758..ecd7800 100644
--- a/tools/python/xen/lowlevel/xc/xc.c
+++ b/tools/python/xen/lowlevel/xc/xc.c
@@ -915,15 +915,19 @@ static PyObject *pyxc_hvm_build(XcObject *self,
     int i;
     char *image;
     int memsize, target=-1, vcpus = 1, acpi = 0, apic = 1;
+    int numanodes;
+    struct numa_guest_info numainfo;
     PyObject *vcpu_avail_handle = NULL;
+    PyObject *nodemem_handle = NULL;
     uint8_t vcpu_avail[HVM_MAX_VCPUS/8];
 
-    static char *kwd_list[] = { "domid",
-                                "memsize", "image", "target", "vcpus", 
-                                "vcpu_avail", "acpi", "apic", NULL };
-    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iiOii", kwd_list,
+    static char *kwd_list[] = { "domid", "memsize", "image", "target",
+                                "vcpus", "vcpu_avail", "acpi", "apic",
+                                "nodes", "nodemem", NULL };
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iiOiiiO", kwd_list,
                                       &dom, &memsize, &image, &target, &vcpus,
-                                      &vcpu_avail_handle, &acpi, &apic) )
+                                      &vcpu_avail_handle, &acpi, &apic,
+                                      &numanodes, &nodemem_handle) )
         return NULL;
 
     memset(vcpu_avail, 0, sizeof(vcpu_avail));
@@ -954,8 +958,50 @@ static PyObject *pyxc_hvm_build(XcObject *self,
     if ( target == -1 )
         target = memsize;
 
-    if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize,
-                                 target, image) != 0 )
+    numainfo.num_nodes = numanodes;
+    numainfo.guest_to_host_node = malloc(numanodes * sizeof(int)); 
+    numainfo.vcpu_to_node = malloc(vcpus * sizeof(int));
+    numainfo.node_mem = malloc(numanodes * sizeof(uint64_t)); 
+    if (numanodes > 1)
+    {
+        int vcpu_per_node, remainder;
+        int n;
+        vcpu_per_node = vcpus / numanodes;
+        remainder = vcpus % numanodes;
+        n = remainder * (vcpu_per_node + 1);
+        for (i = 0; i < vcpus; i++)
+        {
+            if (i < n)
+                numainfo.vcpu_to_node[i] = i / (vcpu_per_node + 1);
+            else
+                numainfo.vcpu_to_node[i] = remainder + (i - n) / vcpu_per_node;
+        }
+        for (i = 0; i < numanodes; i++)
+            numainfo.guest_to_host_node[i] = i % 2;
+        if ( nodemem_handle != NULL && PyList_Check(nodemem_handle) )
+        {
+            PyObject *item;
+            for ( i = 0; i < numanodes; i++)
+            {
+                item = PyList_GetItem(nodemem_handle, i);
+                numainfo.node_mem[i] = PyInt_AsLong(item);
+                fprintf(stderr, "NUMA: node %i has %lu kB\n", i,
+                    numainfo.node_mem[i]);
+            }
+        } else {
+            unsigned int mempernode;
+            if ( nodemem_handle != NULL && PyInt_Check(nodemem_handle) )
+                mempernode = PyInt_AsLong(nodemem_handle);
+            else
+                mempernode = (memsize << 10) / numanodes;
+            fprintf(stderr, "NUMA: setting all nodes to %i KB\n", mempernode);
+            for (i = 0; i < numanodes; i++)
+                numainfo.node_mem[i] = mempernode;
+        }
+    }
+
+    if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize, target,
+                                 &numainfo, image) != 0 )
         return pyxc_error_to_exception();
 
 #if !defined(__ia64__)
@@ -970,6 +1016,15 @@ static PyObject *pyxc_hvm_build(XcObject *self,
     va_hvm->apic_mode    = apic;
     va_hvm->nr_vcpus     = vcpus;
     memcpy(va_hvm->vcpu_online, vcpu_avail, sizeof(vcpu_avail));
+
+    va_hvm->num_nodes    = numanodes;
+    if (numanodes > 0) {
+        for (i = 0; i < vcpus; i++)
+            va_hvm->vcpu_to_node[i] = numainfo.vcpu_to_node[i];
+        for (i = 0; i < numanodes; i++)
+            va_hvm->node_mem[i] = numainfo.node_mem[i] >> 10;
+    }
+
     for ( i = 0, sum = 0; i < va_hvm->length; i++ )
         sum += ((uint8_t *)va_hvm)[i];
     va_hvm->checksum -= sum;
@@ -1174,7 +1229,7 @@ static PyObject *pyxc_physinfo(XcObject *self)
             nr_nodes++;
     }
 
-    ret_obj = 
Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s:s:s}",
+    ret_obj = 
Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s,s:s}",
                             "nr_nodes",         nr_nodes,
                             "max_node_id",      info.max_node_id,
                             "max_cpu_id",       info.max_cpu_id,
diff --git a/tools/python/xen/xend/image.py b/tools/python/xen/xend/image.py
index f3b2cc5..f06d6e2 100644
--- a/tools/python/xen/xend/image.py
+++ b/tools/python/xen/xend/image.py
@@ -961,7 +961,8 @@ class HVMImageHandler(ImageHandler):
                           vcpus          = self.vm.getVCpuCount(),
                           vcpu_avail     = self.vm.getVCpuAvail(),
                           acpi           = self.acpi,
-                          apic           = self.apic)
+                          apic           = self.apic,
+                          nodes          = 0)
         rc['notes'] = { 'SUSPEND_CANCEL': 1 }
 
         rc['store_mfn'] = xc.hvm_get_param(self.vm.getDomid(),
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>