Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>
# HG changeset patch
# User andre.przywara@xxxxxxx
# Date 1186563732 -7200
# Node ID f5e9f20109d9dc3c82bfadcedd4af77a35e8c5fb
# Parent e730c1207604414f6f2779cc6adb213e3c1362eb
allocate HVM guest memory according to NUMA setup
diff -r e730c1207604 -r f5e9f20109d9 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c Tue Aug 07 15:11:00 2007 +0200
+++ b/tools/libxc/xc_hvm_build.c Wed Aug 08 11:02:12 2007 +0200
@@ -152,8 +152,101 @@ static int loadelfimage(
return rc;
}
+#define MAX_CPU_ID 255
+
+static int setup_numa_affinity (int xc_handle, uint32_t dom, int numanodes)
+{
+ xc_physinfo_t physinfo;
+ xc_cpu_to_node_t *cpumap;
+ uint64_t *nodemasks;
+
+ int nrcpus, i, node;
+ xc_dominfo_t dominfo;
+
+ cpumap=(xc_cpu_to_node_t *)malloc(sizeof(xc_cpu_to_node_t)*MAX_CPU_ID);
+ set_xen_guest_handle(physinfo.cpu_to_node, cpumap);
+
+ xc_physinfo (xc_handle,&physinfo);
+ nrcpus = physinfo.threads_per_core * physinfo.cores_per_socket *
+ physinfo.sockets_per_node * physinfo.nr_nodes;
+
+ nodemasks=malloc(sizeof(uint64_t)*physinfo.nr_nodes);
+ memset (nodemasks,0,sizeof(uint64_t)*physinfo.nr_nodes);
+ for (i=0;i<nrcpus;i++)
+ {
+ nodemasks[cpumap[i]]|=(1<<i);
+ }
+
+ if (xc_domain_getinfo (xc_handle, dom, 1, &dominfo) != 1)
+ {
+ ERROR("Unable to get platform info.");
+ return -1;
+ }
+
+ for (i=0;i<=dominfo.max_vcpu_id;i++)
+ {
+ node= ( i * numanodes ) / (dominfo.max_vcpu_id+1);
+ xc_vcpu_setaffinity (xc_handle, dom, i, nodemasks[node]);
+ }
+
+ return 0;
+}
+
+static int setup_numa_mem ( int xc_handle, uint32_t dom, int nr_pages,
+ xen_pfn_t *page_array, int numanodes )
+{
+ xc_physinfo_t physinfo;
+ xc_cpu_to_node_t *cpumap;
+
+ int nrcpus, i, j, rc;
+ uint32_t firstcpu;
+ unsigned long offset;
+ unsigned long pages_per_node, curpages;
+
+ cpumap=(xc_cpu_to_node_t *)malloc(sizeof(xc_cpu_to_node_t)*MAX_CPU_ID);
+ set_xen_guest_handle(physinfo.cpu_to_node, cpumap);
+
+
+ xc_physinfo (xc_handle,&physinfo);
+ nrcpus = physinfo.threads_per_core * physinfo.cores_per_socket *
+ physinfo.sockets_per_node * physinfo.nr_nodes;
+
+ offset = 0xc0;
+
+ pages_per_node=((nr_pages+0xFF)&(~0xFFUL))/numanodes;
+ firstcpu=0;
+ for ( i=0 ; i<numanodes ; i++ )
+ {
+ for ( j=0 ; j<nrcpus ; j++ )
+ {
+ if ( cpumap[j] == i )
+ {
+ firstcpu = j;
+ break;
+ }
+ }
+
+ if ( j == nrcpus ) firstcpu=0;
+
+ if ( i == numanodes - 1 )
+ curpages = nr_pages - i * pages_per_node;
+ else curpages = pages_per_node;
+
+ if ( i == 0 ) curpages -= 0xc0;
+
+ rc = xc_domain_memory_populate_physmap(
+ xc_handle, dom, curpages, 0, 0, firstcpu,
+ &page_array[offset]);
+
+ if ( rc != 0 ) return rc;
+
+ offset+=curpages;
+ }
+ return 0;
+}
+
static int setup_guest(int xc_handle,
- uint32_t dom, int memsize,
+ uint32_t dom, int memsize, int numanodes,
char *image, unsigned long image_size,
vcpu_guest_context_either_t *ctxt)
{
@@ -213,13 +306,24 @@ static int setup_guest(int xc_handle,
rc = xc_domain_memory_populate_physmap(
xc_handle, dom, 0xa0, 0, 0, XENMEM_DEFAULT_CPU, &page_array[0x00]);
if ( rc == 0 )
- rc = xc_domain_memory_populate_physmap(
- xc_handle, dom, nr_pages - 0xc0, 0, 0, XENMEM_DEFAULT_CPU,
- &page_array[0xc0]);
+ {
+ if ( numanodes > 0 )
+ rc = setup_numa_mem (xc_handle, dom, nr_pages, page_array,
+ numanodes);
+ else
+ rc = xc_domain_memory_populate_physmap (
+ xc_handle, dom, nr_pages - 0xc0, 0, 0, XENMEM_DEFAULT_CPU,
+ &page_array[0xc0] );
+ }
if ( rc != 0 )
{
PERROR("Could not allocate memory for HVM guest.\n");
goto error_out;
+ }
+
+ if ( numanodes > 0 )
+ {
+ setup_numa_affinity (xc_handle, dom, numanodes);
}
if ( loadelfimage(&elf, xc_handle, dom, page_array) != 0 )
@@ -288,6 +392,7 @@ static int xc_hvm_build_internal(int xc_
static int xc_hvm_build_internal(int xc_handle,
uint32_t domid,
int memsize,
+ int numanodes,
char *image,
unsigned long image_size)
{
@@ -303,7 +408,8 @@ static int xc_hvm_build_internal(int xc_
memset(&ctxt, 0, sizeof(ctxt));
- if ( setup_guest(xc_handle, domid, memsize, image, image_size, &ctxt) < 0 )
+ if ( setup_guest(xc_handle, domid, memsize, numanodes,
+ image, image_size, &ctxt) < 0 )
{
goto error_out;
}
@@ -341,6 +447,7 @@ int xc_hvm_build(int xc_handle,
int xc_hvm_build(int xc_handle,
uint32_t domid,
int memsize,
+ int numanodes,
const char *image_name)
{
char *image;
@@ -351,7 +458,8 @@ int xc_hvm_build(int xc_handle,
((image = xc_read_image(image_name, &image_size)) == NULL) )
return -1;
- sts = xc_hvm_build_internal(xc_handle, domid, memsize, image, image_size);
+ sts = xc_hvm_build_internal(xc_handle, domid, memsize, numanodes,
+ image, image_size);
free(image);
@@ -364,6 +472,7 @@ int xc_hvm_build_mem(int xc_handle,
int xc_hvm_build_mem(int xc_handle,
uint32_t domid,
int memsize,
+ int numanodes,
const char *image_buffer,
unsigned long image_size)
{
@@ -386,7 +495,7 @@ int xc_hvm_build_mem(int xc_handle,
return -1;
}
- sts = xc_hvm_build_internal(xc_handle, domid, memsize,
+ sts = xc_hvm_build_internal(xc_handle, domid, memsize, numanodes,
img, img_len);
/* xc_inflate_buffer may return the original buffer pointer (for
diff -r e730c1207604 -r f5e9f20109d9 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h Tue Aug 07 15:11:00 2007 +0200
+++ b/tools/libxc/xenguest.h Wed Aug 08 11:02:12 2007 +0200
@@ -128,11 +128,13 @@ int xc_hvm_build(int xc_handle,
int xc_hvm_build(int xc_handle,
uint32_t domid,
int memsize,
+ int numanodes,
const char *image_name);
int xc_hvm_build_mem(int xc_handle,
uint32_t domid,
int memsize,
+ int numanodes,
const char *image_buffer,
unsigned long image_size);
diff -r e730c1207604 -r f5e9f20109d9 tools/libxc/xg_private.c
--- a/tools/libxc/xg_private.c Tue Aug 07 15:11:00 2007 +0200
+++ b/tools/libxc/xg_private.c Wed Aug 08 11:02:12 2007 +0200
@@ -192,6 +192,7 @@ __attribute__((weak))
int xc_hvm_build(int xc_handle,
uint32_t domid,
int memsize,
+ int numanodes,
const char *image_name)
{
errno = ENOSYS;
diff -r e730c1207604 -r f5e9f20109d9 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Tue Aug 07 15:11:00 2007 +0200
+++ b/tools/python/xen/lowlevel/xc/xc.c Wed Aug 08 11:02:12 2007 +0200
@@ -549,7 +549,7 @@ static PyObject *pyxc_hvm_build(XcObject
&numanodes) )
return NULL;
- if ( xc_hvm_build(self->xc_handle, dom, memsize, image) != 0 )
+ if ( xc_hvm_build(self->xc_handle, dom, memsize, numanodes, image) != 0 )
return pyxc_error_to_exception();
#if !defined(__ia64__)
diff -r e730c1207604 -r f5e9f20109d9 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c Tue Aug 07 15:11:00 2007 +0200
+++ b/xen/common/page_alloc.c Wed Aug 08 11:02:12 2007 +0200
@@ -806,8 +806,12 @@ struct page_info *__alloc_domheap_pages(
if ( (zone_hi + PAGE_SHIFT) >= dma_bitsize )
{
- pg = alloc_heap_pages(dma_bitsize - PAGE_SHIFT, zone_hi, cpu, order);
-
+ if (avail_heap_pages(dma_bitsize - PAGE_SHIFT, zone_hi,
+ cpu_to_node (cpu)) >= ( 1UL << order ))
+ {
+ pg = alloc_heap_pages(dma_bitsize - PAGE_SHIFT, zone_hi,
+ cpu, order);
+ }
/* Failure? Then check if we can fall back to the DMA pool. */
if ( unlikely(pg == NULL) &&
((order > MAX_ORDER) ||
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|