On Thu, Feb 04, 2010 at 10:54:22PM +0100, Andre Przywara wrote:
> This patch adds a "struct numa_guest_info" to libxc, which allows to
> specify a guest's NUMA topology along with the appropriate host binding.
> Here the information will be filled out by the Python wrapper (I left
> out the libxl part for now), it will fill up the struct with sensible
> default values (equal distribution), only the number of guest nodes
> should be specified by the caller. The information is passed on to the
> hvm_info_table. The respective memory allocation is in another patch.
>
> Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>
>
> Regards,
> Andre.
>
> --
> Andre Przywara
> AMD-Operating System Research Center (OSRC), Dresden, Germany
> Tel: +49 351 488-3567-12
> ----to satisfy European Law for business letters:
> Advanced Micro Devices GmbH
> Karl-Hammerschmidt-Str. 34, 85609 Dornach b. Muenchen
> Geschaeftsfuehrer: Andrew Bowd; Thomas M. McCoy; Giuliano Meroni
> Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
> Registergericht Muenchen, HRB Nr. 43632
> commit b4ee75af6f6220bdafccf75c9bc6e4915a413b18
> Author: Andre Przywara <andre.przywara@xxxxxxx>
> Date: Mon Feb 1 12:36:28 2010 +0100
>
> add struct numainfo to interface and
> parse xend passed NUMA info and pass on to libxc
>
> diff --git a/tools/libxc/xc_hvm_build.c b/tools/libxc/xc_hvm_build.c
> index 8fc7ac5..02103b1 100644
> --- a/tools/libxc/xc_hvm_build.c
> +++ b/tools/libxc/xc_hvm_build.c
> @@ -106,6 +106,7 @@ static int loadelfimage(
>
> static int setup_guest(int xc_handle,
> uint32_t dom, int memsize, int target,
> + struct numa_guest_info *numainfo,
> char *image, unsigned long image_size)
> {
> xen_pfn_t *page_array = NULL;
> @@ -334,6 +335,7 @@ static int xc_hvm_build_internal(int xc_handle,
> uint32_t domid,
> int memsize,
> int target,
> + struct numa_guest_info *numainfo,
> char *image,
> unsigned long image_size)
> {
> @@ -343,7 +345,8 @@ static int xc_hvm_build_internal(int xc_handle,
> return -1;
> }
>
> - return setup_guest(xc_handle, domid, memsize, target, image, image_size);
> + return setup_guest(xc_handle, domid, memsize, target,
> + numainfo, image, image_size);
> }
>
> /* xc_hvm_build:
> @@ -352,6 +355,7 @@ static int xc_hvm_build_internal(int xc_handle,
> int xc_hvm_build(int xc_handle,
> uint32_t domid,
> int memsize,
> + struct numa_guest_info *numainfo,
> const char *image_name)
> {
> char *image;
> @@ -362,7 +366,8 @@ int xc_hvm_build(int xc_handle,
> ((image = xc_read_image(image_name, &image_size)) == NULL) )
> return -1;
>
> - sts = xc_hvm_build_internal(xc_handle, domid, memsize, memsize, image,
> image_size);
> + sts = xc_hvm_build_internal(xc_handle, domid, memsize, memsize,
> + numainfo, image, image_size);
>
> free(image);
>
> @@ -379,6 +384,7 @@ int xc_hvm_build_target_mem(int xc_handle,
> uint32_t domid,
> int memsize,
> int target,
> + struct numa_guest_info *numainfo,
> const char *image_name)
> {
> char *image;
> @@ -389,7 +395,8 @@ int xc_hvm_build_target_mem(int xc_handle,
> ((image = xc_read_image(image_name, &image_size)) == NULL) )
> return -1;
>
> - sts = xc_hvm_build_internal(xc_handle, domid, memsize, target, image,
> image_size);
> + sts = xc_hvm_build_internal(xc_handle, domid, memsize, target,
> + numainfo, image, image_size);
>
> free(image);
>
> @@ -402,6 +409,7 @@ int xc_hvm_build_target_mem(int xc_handle,
> int xc_hvm_build_mem(int xc_handle,
> uint32_t domid,
> int memsize,
> + struct numa_guest_info *numainfo,
> const char *image_buffer,
> unsigned long image_size)
> {
> @@ -425,7 +433,7 @@ int xc_hvm_build_mem(int xc_handle,
> }
>
> sts = xc_hvm_build_internal(xc_handle, domid, memsize, memsize,
> - img, img_len);
> + numainfo, img, img_len);
>
> /* xc_inflate_buffer may return the original buffer pointer (for
> for already inflated buffers), so exercise some care in freeing */
> diff --git a/tools/libxc/xenguest.h b/tools/libxc/xenguest.h
> index 851f769..110b0c9 100644
> --- a/tools/libxc/xenguest.h
> +++ b/tools/libxc/xenguest.h
> @@ -62,6 +62,13 @@ int xc_domain_restore(int xc_handle, int io_fd, uint32_t
> dom,
> unsigned int console_evtchn, unsigned long
> *console_mfn,
> unsigned int hvm, unsigned int pae, int superpages);
>
> +struct numa_guest_info {
> + int num_nodes;
> + int *guest_to_host_node;
> + int *vcpu_to_node;
> + uint64_t *node_mem;
> +};
> +
> /**
> * This function will create a domain for a paravirtualized Linux
> * using file names pointing to kernel and ramdisk
> @@ -143,17 +150,20 @@ int xc_linux_build_mem(int xc_handle,
> int xc_hvm_build(int xc_handle,
> uint32_t domid,
> int memsize,
> + struct numa_guest_info *numa_info,
> const char *image_name);
>
> int xc_hvm_build_target_mem(int xc_handle,
> uint32_t domid,
> int memsize,
> int target,
> + struct numa_guest_info *numa_info,
> const char *image_name);
>
> int xc_hvm_build_mem(int xc_handle,
> uint32_t domid,
> int memsize,
> + struct numa_guest_info *numa_info,
> const char *image_buffer,
> unsigned long image_size);
>
> diff --git a/tools/libxc/xg_private.c b/tools/libxc/xg_private.c
> index 457001c..15d1b71 100644
> --- a/tools/libxc/xg_private.c
> +++ b/tools/libxc/xg_private.c
> @@ -177,6 +177,7 @@ __attribute__((weak))
> int xc_hvm_build(int xc_handle,
> uint32_t domid,
> int memsize,
> + struct numa_guest_info *numainfo,
> const char *image_name)
> {
> errno = ENOSYS;
> diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
> index 7196aa8..154253a 100644
> --- a/tools/libxl/libxl_dom.c
> +++ b/tools/libxl/libxl_dom.c
> @@ -168,7 +168,7 @@ int build_hvm(struct libxl_ctx *ctx, uint32_t domid,
> {
> int ret;
>
> - ret = xc_hvm_build_target_mem(ctx->xch, domid, (info->max_memkb -
> info->video_memkb) / 1024, (info->target_memkb - info->video_memkb) / 1024,
> info->kernel);
> + ret = xc_hvm_build_target_mem(ctx->xch, domid, (info->max_memkb -
> info->video_memkb) / 1024, (info->target_memkb - info->video_memkb) / 1024,
> NULL, info->kernel);
> if (ret) {
> XL_LOG_ERRNOVAL(ctx, XL_LOG_ERROR, ret, "hvm building failed");
> return ERROR_FAIL;
> diff --git a/tools/python/xen/lowlevel/xc/xc.c
> b/tools/python/xen/lowlevel/xc/xc.c
> index 1932758..ecd7800 100644
> --- a/tools/python/xen/lowlevel/xc/xc.c
> +++ b/tools/python/xen/lowlevel/xc/xc.c
> @@ -915,15 +915,19 @@ static PyObject *pyxc_hvm_build(XcObject *self,
> int i;
> char *image;
> int memsize, target=-1, vcpus = 1, acpi = 0, apic = 1;
> + int numanodes;
> + struct numa_guest_info numainfo;
> PyObject *vcpu_avail_handle = NULL;
> + PyObject *nodemem_handle = NULL;
> uint8_t vcpu_avail[HVM_MAX_VCPUS/8];
>
> - static char *kwd_list[] = { "domid",
> - "memsize", "image", "target", "vcpus",
> - "vcpu_avail", "acpi", "apic", NULL };
> - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iiOii", kwd_list,
> + static char *kwd_list[] = { "domid", "memsize", "image", "target",
> + "vcpus", "vcpu_avail", "acpi", "apic",
> + "nodes", "nodemem", NULL };
> + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iiOiiiO", kwd_list,
> &dom, &memsize, &image, &target,
> &vcpus,
> - &vcpu_avail_handle, &acpi, &apic) )
> + &vcpu_avail_handle, &acpi, &apic,
> + &numanodes, &nodemem_handle) )
> return NULL;
>
> memset(vcpu_avail, 0, sizeof(vcpu_avail));
> @@ -954,8 +958,50 @@ static PyObject *pyxc_hvm_build(XcObject *self,
> if ( target == -1 )
> target = memsize;
>
> - if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize,
> - target, image) != 0 )
> + numainfo.num_nodes = numanodes;
> + numainfo.guest_to_host_node = malloc(numanodes * sizeof(int));
> + numainfo.vcpu_to_node = malloc(vcpus * sizeof(int));
> + numainfo.node_mem = malloc(numanodes * sizeof(uint64_t));
> + if (numanodes > 1)
> + {
> + int vcpu_per_node, remainder;
> + int n;
> + vcpu_per_node = vcpus / numanodes;
> + remainder = vcpus % numanodes;
> + n = remainder * (vcpu_per_node + 1);
> + for (i = 0; i < vcpus; i++)
I don't know the coding style, but you seem to have two different
version of it. Here you do the 'for (i= ..')
> + {
> + if (i < n)
> + numainfo.vcpu_to_node[i] = i / (vcpu_per_node + 1);
> + else
> + numainfo.vcpu_to_node[i] = remainder + (i - n) /
> vcpu_per_node;
> + }
> + for (i = 0; i < numanodes; i++)
> + numainfo.guest_to_host_node[i] = i % 2;
> + if ( nodemem_handle != NULL && PyList_Check(nodemem_handle) )
> + {
> + PyObject *item;
> + for ( i = 0; i < numanodes; i++)
and here it is ' for ( i = 0 ..')'.
I've failed in the past to find the coding style so I am not exactly
sure which one is correct.
> + {
> + item = PyList_GetItem(nodemem_handle, i);
> + numainfo.node_mem[i] = PyInt_AsLong(item);
> + fprintf(stderr, "NUMA: node %i has %lu kB\n", i,
> + numainfo.node_mem[i]);
There isn't any other way to print this out? Can't you use xc_dom_printf
routines?
> + }
> + } else {
> + unsigned int mempernode;
> + if ( nodemem_handle != NULL && PyInt_Check(nodemem_handle) )
> + mempernode = PyInt_AsLong(nodemem_handle);
> + else
> + mempernode = (memsize << 10) / numanodes;
> + fprintf(stderr, "NUMA: setting all nodes to %i KB\n",
> mempernode);
dittoo..
> + for (i = 0; i < numanodes; i++)
dittoo for the coding guideline.
> + numainfo.node_mem[i] = mempernode;
> + }
> + }
> +
> + if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize, target,
> + &numainfo, image) != 0 )
and I guess here too
> return pyxc_error_to_exception();
>
> #if !defined(__ia64__)
> @@ -970,6 +1016,15 @@ static PyObject *pyxc_hvm_build(XcObject *self,
> va_hvm->apic_mode = apic;
> va_hvm->nr_vcpus = vcpus;
> memcpy(va_hvm->vcpu_online, vcpu_avail, sizeof(vcpu_avail));
> +
> + va_hvm->num_nodes = numanodes;
> + if (numanodes > 0) {
> + for (i = 0; i < vcpus; i++)
> + va_hvm->vcpu_to_node[i] = numainfo.vcpu_to_node[i];
> + for (i = 0; i < numanodes; i++)
> + va_hvm->node_mem[i] = numainfo.node_mem[i] >> 10;
Would it make sense to have 10 be #defined somewhere?
> + }
> +
> for ( i = 0, sum = 0; i < va_hvm->length; i++ )
> sum += ((uint8_t *)va_hvm)[i];
> va_hvm->checksum -= sum;
> @@ -1174,7 +1229,7 @@ static PyObject *pyxc_physinfo(XcObject *self)
> nr_nodes++;
> }
>
> - ret_obj =
> Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s:s:s}",
> + ret_obj =
> Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s,s:s}",
what changed in that line? My eyes don't seem to be able to find the
difference.
> "nr_nodes", nr_nodes,
> "max_node_id", info.max_node_id,
> "max_cpu_id", info.max_cpu_id,
> diff --git a/tools/python/xen/xend/image.py b/tools/python/xen/xend/image.py
> index f3b2cc5..f06d6e2 100644
> --- a/tools/python/xen/xend/image.py
> +++ b/tools/python/xen/xend/image.py
> @@ -961,7 +961,8 @@ class HVMImageHandler(ImageHandler):
> vcpus = self.vm.getVCpuCount(),
> vcpu_avail = self.vm.getVCpuAvail(),
> acpi = self.acpi,
> - apic = self.apic)
> + apic = self.apic,
> + nodes = 0)
> rc['notes'] = { 'SUSPEND_CANCEL': 1 }
>
> rc['store_mfn'] = xc.hvm_get_param(self.vm.getDomid(),
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|