WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 3/4] [HVM] allocate HVM guest memory with NUMA in min

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH 3/4] [HVM] allocate HVM guest memory with NUMA in mind
From: "Andre Przywara" <andre.przywara@xxxxxxx>
Date: Mon, 13 Aug 2007 12:02:59 +0200
Delivery-date: Mon, 13 Aug 2007 03:04:27 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Thunderbird 1.5.0.10 (X11/20070409)
Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>
# HG changeset patch
# User andre.przywara@xxxxxxx
# Date 1186563732 -7200
# Node ID f5e9f20109d9dc3c82bfadcedd4af77a35e8c5fb
# Parent  e730c1207604414f6f2779cc6adb213e3c1362eb
allocate HVM guest memory according to NUMA setup

diff -r e730c1207604 -r f5e9f20109d9 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c        Tue Aug 07 15:11:00 2007 +0200
+++ b/tools/libxc/xc_hvm_build.c        Wed Aug 08 11:02:12 2007 +0200
@@ -152,8 +152,101 @@ static int loadelfimage(
     return rc;
 }
 
+#define MAX_CPU_ID 255
+
+static int setup_numa_affinity (int xc_handle, uint32_t dom, int numanodes)
+{
+    xc_physinfo_t physinfo;
+    xc_cpu_to_node_t *cpumap;
+    uint64_t *nodemasks;
+
+    int nrcpus, i, node;
+    xc_dominfo_t dominfo;
+
+    cpumap=(xc_cpu_to_node_t *)malloc(sizeof(xc_cpu_to_node_t)*MAX_CPU_ID);
+    set_xen_guest_handle(physinfo.cpu_to_node, cpumap);
+
+    xc_physinfo (xc_handle,&physinfo);
+    nrcpus = physinfo.threads_per_core * physinfo.cores_per_socket *
+        physinfo.sockets_per_node * physinfo.nr_nodes;
+
+    nodemasks=malloc(sizeof(uint64_t)*physinfo.nr_nodes);
+    memset (nodemasks,0,sizeof(uint64_t)*physinfo.nr_nodes);
+    for (i=0;i<nrcpus;i++)
+    {
+        nodemasks[cpumap[i]]|=(1<<i);
+    }
+
+    if (xc_domain_getinfo (xc_handle, dom, 1, &dominfo) != 1)
+    {
+        ERROR("Unable to get platform info.");
+        return -1;
+    }
+
+    for (i=0;i<=dominfo.max_vcpu_id;i++)
+    {
+        node= ( i * numanodes ) / (dominfo.max_vcpu_id+1);
+        xc_vcpu_setaffinity (xc_handle, dom, i, nodemasks[node]);
+    }
+
+    return 0;
+}
+
+static int setup_numa_mem ( int xc_handle, uint32_t dom, int nr_pages,
+                       xen_pfn_t *page_array, int numanodes )
+{
+    xc_physinfo_t physinfo;
+    xc_cpu_to_node_t *cpumap;
+
+    int nrcpus, i, j, rc;
+    uint32_t firstcpu;
+    unsigned long offset;
+    unsigned long pages_per_node, curpages;
+
+    cpumap=(xc_cpu_to_node_t *)malloc(sizeof(xc_cpu_to_node_t)*MAX_CPU_ID);
+    set_xen_guest_handle(physinfo.cpu_to_node, cpumap);
+
+
+    xc_physinfo (xc_handle,&physinfo);
+    nrcpus = physinfo.threads_per_core * physinfo.cores_per_socket *
+        physinfo.sockets_per_node * physinfo.nr_nodes;
+
+    offset = 0xc0;
+
+    pages_per_node=((nr_pages+0xFF)&(~0xFFUL))/numanodes;
+    firstcpu=0;
+    for ( i=0 ; i<numanodes ; i++ )
+    {
+        for ( j=0 ; j<nrcpus ; j++ )
+        {
+            if ( cpumap[j] == i )
+            {
+                firstcpu = j;
+                break;
+            }
+        }
+
+        if ( j == nrcpus ) firstcpu=0;
+
+        if ( i == numanodes - 1 )
+            curpages = nr_pages - i * pages_per_node;
+        else curpages = pages_per_node;
+
+        if ( i == 0 ) curpages -= 0xc0;
+
+        rc = xc_domain_memory_populate_physmap(
+            xc_handle, dom, curpages, 0, 0, firstcpu, 
+            &page_array[offset]);
+
+        if ( rc != 0 ) return rc;
+
+        offset+=curpages;
+    }
+    return 0;
+}
+
 static int setup_guest(int xc_handle,
-                       uint32_t dom, int memsize,
+                       uint32_t dom, int memsize, int numanodes,
                        char *image, unsigned long image_size,
                        vcpu_guest_context_either_t *ctxt)
 {
@@ -213,13 +306,24 @@ static int setup_guest(int xc_handle,
     rc = xc_domain_memory_populate_physmap(
         xc_handle, dom, 0xa0, 0, 0, XENMEM_DEFAULT_CPU, &page_array[0x00]);
     if ( rc == 0 )
-        rc = xc_domain_memory_populate_physmap(
-            xc_handle, dom, nr_pages - 0xc0, 0, 0, XENMEM_DEFAULT_CPU,
-            &page_array[0xc0]);
+    {
+        if ( numanodes > 0 )
+            rc = setup_numa_mem (xc_handle, dom, nr_pages, page_array,
+            numanodes);
+        else
+            rc = xc_domain_memory_populate_physmap (
+                xc_handle, dom, nr_pages - 0xc0, 0, 0, XENMEM_DEFAULT_CPU,
+                &page_array[0xc0] );
+    }
     if ( rc != 0 )
     {
         PERROR("Could not allocate memory for HVM guest.\n");
         goto error_out;
+    }
+
+    if ( numanodes > 0 )
+    {
+        setup_numa_affinity (xc_handle, dom, numanodes);
     }
 
     if ( loadelfimage(&elf, xc_handle, dom, page_array) != 0 )
@@ -288,6 +392,7 @@ static int xc_hvm_build_internal(int xc_
 static int xc_hvm_build_internal(int xc_handle,
                                  uint32_t domid,
                                  int memsize,
+                                 int numanodes,
                                  char *image,
                                  unsigned long image_size)
 {
@@ -303,7 +408,8 @@ static int xc_hvm_build_internal(int xc_
 
     memset(&ctxt, 0, sizeof(ctxt));
 
-    if ( setup_guest(xc_handle, domid, memsize, image, image_size, &ctxt) < 0 )
+    if ( setup_guest(xc_handle, domid, memsize, numanodes,
+        image, image_size, &ctxt) < 0 )
     {
         goto error_out;
     }
@@ -341,6 +447,7 @@ int xc_hvm_build(int xc_handle,
 int xc_hvm_build(int xc_handle,
                  uint32_t domid,
                  int memsize,
+                 int numanodes,
                  const char *image_name)
 {
     char *image;
@@ -351,7 +458,8 @@ int xc_hvm_build(int xc_handle,
          ((image = xc_read_image(image_name, &image_size)) == NULL) )
         return -1;
 
-    sts = xc_hvm_build_internal(xc_handle, domid, memsize, image, image_size);
+    sts = xc_hvm_build_internal(xc_handle, domid, memsize, numanodes,
+        image, image_size);
 
     free(image);
 
@@ -364,6 +472,7 @@ int xc_hvm_build_mem(int xc_handle,
 int xc_hvm_build_mem(int xc_handle,
                      uint32_t domid,
                      int memsize,
+                     int numanodes,
                      const char *image_buffer,
                      unsigned long image_size)
 {
@@ -386,7 +495,7 @@ int xc_hvm_build_mem(int xc_handle,
         return -1;
     }
 
-    sts = xc_hvm_build_internal(xc_handle, domid, memsize,
+    sts = xc_hvm_build_internal(xc_handle, domid, memsize, numanodes,
                                 img, img_len);
 
     /* xc_inflate_buffer may return the original buffer pointer (for
diff -r e730c1207604 -r f5e9f20109d9 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h    Tue Aug 07 15:11:00 2007 +0200
+++ b/tools/libxc/xenguest.h    Wed Aug 08 11:02:12 2007 +0200
@@ -128,11 +128,13 @@ int xc_hvm_build(int xc_handle,
 int xc_hvm_build(int xc_handle,
                  uint32_t domid,
                  int memsize,
+                 int numanodes,
                  const char *image_name);
 
 int xc_hvm_build_mem(int xc_handle,
                      uint32_t domid,
                      int memsize,
+                     int numanodes,
                      const char *image_buffer,
                      unsigned long image_size);
 
diff -r e730c1207604 -r f5e9f20109d9 tools/libxc/xg_private.c
--- a/tools/libxc/xg_private.c  Tue Aug 07 15:11:00 2007 +0200
+++ b/tools/libxc/xg_private.c  Wed Aug 08 11:02:12 2007 +0200
@@ -192,6 +192,7 @@ __attribute__((weak))
     int xc_hvm_build(int xc_handle,
                      uint32_t domid,
                      int memsize,
+                     int numanodes,
                      const char *image_name)
 {
     errno = ENOSYS;
diff -r e730c1207604 -r f5e9f20109d9 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Tue Aug 07 15:11:00 2007 +0200
+++ b/tools/python/xen/lowlevel/xc/xc.c Wed Aug 08 11:02:12 2007 +0200
@@ -549,7 +549,7 @@ static PyObject *pyxc_hvm_build(XcObject
                                       &numanodes) )
         return NULL;
 
-    if ( xc_hvm_build(self->xc_handle, dom, memsize, image) != 0 )
+    if ( xc_hvm_build(self->xc_handle, dom, memsize, numanodes, image) != 0 )
         return pyxc_error_to_exception();
 
 #if !defined(__ia64__)
diff -r e730c1207604 -r f5e9f20109d9 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Tue Aug 07 15:11:00 2007 +0200
+++ b/xen/common/page_alloc.c   Wed Aug 08 11:02:12 2007 +0200
@@ -806,8 +806,12 @@ struct page_info *__alloc_domheap_pages(
 
     if ( (zone_hi + PAGE_SHIFT) >= dma_bitsize )
     {
-        pg = alloc_heap_pages(dma_bitsize - PAGE_SHIFT, zone_hi, cpu, order);
-
+        if (avail_heap_pages(dma_bitsize - PAGE_SHIFT, zone_hi,
+            cpu_to_node (cpu)) >= ( 1UL << order ))
+        {
+            pg = alloc_heap_pages(dma_bitsize - PAGE_SHIFT, zone_hi,
+                cpu, order);
+        }
         /* Failure? Then check if we can fall back to the DMA pool. */
         if ( unlikely(pg == NULL) &&
              ((order > MAX_ORDER) ||
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH 3/4] [HVM] allocate HVM guest memory with NUMA in mind, Andre Przywara <=