WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

Re: [Xen-devel] [PATCH 2/5] [POST-4.0]: HVM NUMA guest: pass NUMA inform

To: Andre Przywara <andre.przywara@xxxxxxx>
Subject: Re: [Xen-devel] [PATCH 2/5] [POST-4.0]: HVM NUMA guest: pass NUMA information to libxc
From: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
Date: Thu, 4 Feb 2010 19:32:29 -0500
Cc: "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>, Keir Fraser <keir.fraser@xxxxxxxxxxxxx>
Delivery-date: Thu, 04 Feb 2010 17:11:25 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <4B6B420E.2010104@xxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <4B6B420E.2010104@xxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mutt/1.5.19 (2009-01-05)
On Thu, Feb 04, 2010 at 10:54:22PM +0100, Andre Przywara wrote:
> This patch adds a "struct numa_guest_info" to libxc, which allows to  
> specify a guest's NUMA topology along with the appropriate host binding.
> Here the information will be filled out by the Python wrapper (I left  
> out the libxl part for now), it will fill up the struct with sensible  
> default values (equal distribution), only the number of guest nodes  
> should be specified by the caller. The information is passed on to the  
> hvm_info_table. The respective memory allocation is in another patch.
>
> Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>
>
> Regards,
> Andre.
>
> -- 
> Andre Przywara
> AMD-Operating System Research Center (OSRC), Dresden, Germany
> Tel: +49 351 488-3567-12
> ----to satisfy European Law for business letters:
> Advanced Micro Devices GmbH
> Karl-Hammerschmidt-Str. 34, 85609 Dornach b. Muenchen
> Geschaeftsfuehrer: Andrew Bowd; Thomas M. McCoy; Giuliano Meroni
> Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
> Registergericht Muenchen, HRB Nr. 43632

> commit b4ee75af6f6220bdafccf75c9bc6e4915a413b18
> Author: Andre Przywara <andre.przywara@xxxxxxx>
> Date:   Mon Feb 1 12:36:28 2010 +0100
> 
>     add struct numainfo to interface and
>     parse xend passed NUMA info and pass on to libxc
> 
> diff --git a/tools/libxc/xc_hvm_build.c b/tools/libxc/xc_hvm_build.c
> index 8fc7ac5..02103b1 100644
> --- a/tools/libxc/xc_hvm_build.c
> +++ b/tools/libxc/xc_hvm_build.c
> @@ -106,6 +106,7 @@ static int loadelfimage(
>  
>  static int setup_guest(int xc_handle,
>                         uint32_t dom, int memsize, int target,
> +                       struct numa_guest_info *numainfo,
>                         char *image, unsigned long image_size)
>  {
>      xen_pfn_t *page_array = NULL;
> @@ -334,6 +335,7 @@ static int xc_hvm_build_internal(int xc_handle,
>                                   uint32_t domid,
>                                   int memsize,
>                                   int target,
> +                                 struct numa_guest_info *numainfo,
>                                   char *image,
>                                   unsigned long image_size)
>  {
> @@ -343,7 +345,8 @@ static int xc_hvm_build_internal(int xc_handle,
>          return -1;
>      }
>  
> -    return setup_guest(xc_handle, domid, memsize, target, image, image_size);
> +    return setup_guest(xc_handle, domid, memsize, target,
> +        numainfo, image, image_size);
>  }
>  
>  /* xc_hvm_build:
> @@ -352,6 +355,7 @@ static int xc_hvm_build_internal(int xc_handle,
>  int xc_hvm_build(int xc_handle,
>                   uint32_t domid,
>                   int memsize,
> +                 struct numa_guest_info *numainfo,
>                   const char *image_name)
>  {
>      char *image;
> @@ -362,7 +366,8 @@ int xc_hvm_build(int xc_handle,
>           ((image = xc_read_image(image_name, &image_size)) == NULL) )
>          return -1;
>  
> -    sts = xc_hvm_build_internal(xc_handle, domid, memsize, memsize, image, 
> image_size);
> +    sts = xc_hvm_build_internal(xc_handle, domid, memsize, memsize,
> +                                numainfo, image, image_size);
>  
>      free(image);
>  
> @@ -379,6 +384,7 @@ int xc_hvm_build_target_mem(int xc_handle,
>                             uint32_t domid,
>                             int memsize,
>                             int target,
> +                           struct numa_guest_info *numainfo,
>                             const char *image_name)
>  {
>      char *image;
> @@ -389,7 +395,8 @@ int xc_hvm_build_target_mem(int xc_handle,
>           ((image = xc_read_image(image_name, &image_size)) == NULL) )
>          return -1;
>  
> -    sts = xc_hvm_build_internal(xc_handle, domid, memsize, target, image, 
> image_size);
> +    sts = xc_hvm_build_internal(xc_handle, domid, memsize, target,
> +                                numainfo, image, image_size);
>  
>      free(image);
>  
> @@ -402,6 +409,7 @@ int xc_hvm_build_target_mem(int xc_handle,
>  int xc_hvm_build_mem(int xc_handle,
>                       uint32_t domid,
>                       int memsize,
> +                     struct numa_guest_info *numainfo,
>                       const char *image_buffer,
>                       unsigned long image_size)
>  {
> @@ -425,7 +433,7 @@ int xc_hvm_build_mem(int xc_handle,
>      }
>  
>      sts = xc_hvm_build_internal(xc_handle, domid, memsize, memsize,
> -                                img, img_len);
> +                                numainfo, img, img_len);
>  
>      /* xc_inflate_buffer may return the original buffer pointer (for
>         for already inflated buffers), so exercise some care in freeing */
> diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h
> index 851f769..110b0c9 100644
> --- a/tools/libxc/xenguest.h
> +++ b/tools/libxc/xenguest.h
> @@ -62,6 +62,13 @@ int xc_domain_restore(int xc_handle, int io_fd, uint32_t 
> dom,
>                        unsigned int console_evtchn, unsigned long 
> *console_mfn,
>                        unsigned int hvm, unsigned int pae, int superpages);
>  
> +struct numa_guest_info {
> +     int num_nodes;
> +     int *guest_to_host_node;
> +     int *vcpu_to_node;
> +     uint64_t *node_mem;
> +};
> +
>  /**
>   * This function will create a domain for a paravirtualized Linux
>   * using file names pointing to kernel and ramdisk
> @@ -143,17 +150,20 @@ int xc_linux_build_mem(int xc_handle,
>  int xc_hvm_build(int xc_handle,
>                   uint32_t domid,
>                   int memsize,
> +                 struct numa_guest_info *numa_info,
>                   const char *image_name);
>  
>  int xc_hvm_build_target_mem(int xc_handle,
>                              uint32_t domid,
>                              int memsize,
>                              int target,
> +                            struct numa_guest_info *numa_info,
>                              const char *image_name);
>  
>  int xc_hvm_build_mem(int xc_handle,
>                       uint32_t domid,
>                       int memsize,
> +                     struct numa_guest_info *numa_info,
>                       const char *image_buffer,
>                       unsigned long image_size);
>  
> diff --git a/tools/libxc/xg_private.c b/tools/libxc/xg_private.c
> index 457001c..15d1b71 100644
> --- a/tools/libxc/xg_private.c
> +++ b/tools/libxc/xg_private.c
> @@ -177,6 +177,7 @@ __attribute__((weak))
>      int xc_hvm_build(int xc_handle,
>                       uint32_t domid,
>                       int memsize,
> +                     struct numa_guest_info *numainfo,
>                       const char *image_name)
>  {
>      errno = ENOSYS;
> diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
> index 7196aa8..154253a 100644
> --- a/tools/libxl/libxl_dom.c
> +++ b/tools/libxl/libxl_dom.c
> @@ -168,7 +168,7 @@ int build_hvm(struct libxl_ctx *ctx, uint32_t domid,
>  {
>      int ret;
>  
> -    ret = xc_hvm_build_target_mem(ctx->xch, domid, (info->max_memkb - 
> info->video_memkb) / 1024, (info->target_memkb - info->video_memkb) / 1024, 
> info->kernel);
> +    ret = xc_hvm_build_target_mem(ctx->xch, domid, (info->max_memkb - 
> info->video_memkb) / 1024, (info->target_memkb - info->video_memkb) / 1024, 
> NULL, info->kernel);
>      if (ret) {
>          XL_LOG_ERRNOVAL(ctx, XL_LOG_ERROR, ret, "hvm building failed");
>          return ERROR_FAIL;
> diff --git a/tools/python/xen/lowlevel/xc/xc.c 
> b/tools/python/xen/lowlevel/xc/xc.c
> index 1932758..ecd7800 100644
> --- a/tools/python/xen/lowlevel/xc/xc.c
> +++ b/tools/python/xen/lowlevel/xc/xc.c
> @@ -915,15 +915,19 @@ static PyObject *pyxc_hvm_build(XcObject *self,
>      int i;
>      char *image;
>      int memsize, target=-1, vcpus = 1, acpi = 0, apic = 1;
> +    int numanodes;
> +    struct numa_guest_info numainfo;
>      PyObject *vcpu_avail_handle = NULL;
> +    PyObject *nodemem_handle = NULL;
>      uint8_t vcpu_avail[HVM_MAX_VCPUS/8];
>  
> -    static char *kwd_list[] = { "domid",
> -                                "memsize", "image", "target", "vcpus", 
> -                                "vcpu_avail", "acpi", "apic", NULL };
> -    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iiOii", kwd_list,
> +    static char *kwd_list[] = { "domid", "memsize", "image", "target",
> +                                "vcpus", "vcpu_avail", "acpi", "apic",
> +                                "nodes", "nodemem", NULL };
> +    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iiOiiiO", kwd_list,
>                                        &dom, &memsize, &image, &target, 
> &vcpus,
> -                                      &vcpu_avail_handle, &acpi, &apic) )
> +                                      &vcpu_avail_handle, &acpi, &apic,
> +                                      &numanodes, &nodemem_handle) )
>          return NULL;
>  
>      memset(vcpu_avail, 0, sizeof(vcpu_avail));
> @@ -954,8 +958,50 @@ static PyObject *pyxc_hvm_build(XcObject *self,
>      if ( target == -1 )
>          target = memsize;
>  
> -    if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize,
> -                                 target, image) != 0 )
> +    numainfo.num_nodes = numanodes;
> +    numainfo.guest_to_host_node = malloc(numanodes * sizeof(int)); 
> +    numainfo.vcpu_to_node = malloc(vcpus * sizeof(int));
> +    numainfo.node_mem = malloc(numanodes * sizeof(uint64_t)); 
> +    if (numanodes > 1)
> +    {
> +        int vcpu_per_node, remainder;
> +        int n;
> +        vcpu_per_node = vcpus / numanodes;
> +        remainder = vcpus % numanodes;
> +        n = remainder * (vcpu_per_node + 1);
> +        for (i = 0; i < vcpus; i++)

I don't know the coding style, but you seem to have two different
version of it. Here you do the 'for (i= ..')
> +        {
> +            if (i < n)
> +                numainfo.vcpu_to_node[i] = i / (vcpu_per_node + 1);
> +            else
> +                numainfo.vcpu_to_node[i] = remainder + (i - n) / 
> vcpu_per_node;
> +        }
> +        for (i = 0; i < numanodes; i++)
> +            numainfo.guest_to_host_node[i] = i % 2;
> +        if ( nodemem_handle != NULL && PyList_Check(nodemem_handle) )
> +        {

> +            PyObject *item;
> +            for ( i = 0; i < numanodes; i++)

and here it is ' for ( i = 0 ..')'.

I've failed in the past to find the coding style so I am not exactly
sure which one is correct.
> +            {
> +                item = PyList_GetItem(nodemem_handle, i);
> +                numainfo.node_mem[i] = PyInt_AsLong(item);
> +                fprintf(stderr, "NUMA: node %i has %lu kB\n", i,
> +                    numainfo.node_mem[i]);

There isn't any other way to print this out? Can't you use xc_dom_printf
routines?

> +            }
> +        } else {
> +            unsigned int mempernode;
> +            if ( nodemem_handle != NULL && PyInt_Check(nodemem_handle) )
> +                mempernode = PyInt_AsLong(nodemem_handle);
> +            else
> +                mempernode = (memsize << 10) / numanodes;
> +            fprintf(stderr, "NUMA: setting all nodes to %i KB\n", 
> mempernode);

dittoo..
> +            for (i = 0; i < numanodes; i++)

dittoo for the coding guideline.
> +                numainfo.node_mem[i] = mempernode;
> +        }
> +    }
> +
> +    if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize, target,
> +                                 &numainfo, image) != 0 )

and I guess here too
>          return pyxc_error_to_exception();
>  
>  #if !defined(__ia64__)
> @@ -970,6 +1016,15 @@ static PyObject *pyxc_hvm_build(XcObject *self,
>      va_hvm->apic_mode    = apic;
>      va_hvm->nr_vcpus     = vcpus;
>      memcpy(va_hvm->vcpu_online, vcpu_avail, sizeof(vcpu_avail));
> +
> +    va_hvm->num_nodes    = numanodes;
> +    if (numanodes > 0) {
> +        for (i = 0; i < vcpus; i++)
> +            va_hvm->vcpu_to_node[i] = numainfo.vcpu_to_node[i];
> +        for (i = 0; i < numanodes; i++)
> +            va_hvm->node_mem[i] = numainfo.node_mem[i] >> 10;

Would it make sense to have 10 be #defined somewhere?

> +    }
> +
>      for ( i = 0, sum = 0; i < va_hvm->length; i++ )
>          sum += ((uint8_t *)va_hvm)[i];
>      va_hvm->checksum -= sum;
> @@ -1174,7 +1229,7 @@ static PyObject *pyxc_physinfo(XcObject *self)
>              nr_nodes++;
>      }
>  
> -    ret_obj = 
> Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s:s:s}",
> +    ret_obj = 
> Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s,s:s}",

what changed in that line? My eyes don't seem to be able to find the
difference.

>                              "nr_nodes",         nr_nodes,
>                              "max_node_id",      info.max_node_id,
>                              "max_cpu_id",       info.max_cpu_id,
> diff --git a/tools/python/xen/xend/image.py b/tools/python/xen/xend/image.py
> index f3b2cc5..f06d6e2 100644
> --- a/tools/python/xen/xend/image.py
> +++ b/tools/python/xen/xend/image.py
> @@ -961,7 +961,8 @@ class HVMImageHandler(ImageHandler):
>                            vcpus          = self.vm.getVCpuCount(),
>                            vcpu_avail     = self.vm.getVCpuAvail(),
>                            acpi           = self.acpi,
> -                          apic           = self.apic)
> +                          apic           = self.apic,
> +                          nodes          = 0)
>          rc['notes'] = { 'SUSPEND_CANCEL': 1 }
>  
>          rc['store_mfn'] = xc.hvm_get_param(self.vm.getDomid(),

> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel