diff -r aa6ed694499d -r 5f60a383a055 tools/libxc/xc_dom.h --- a/tools/libxc/xc_dom.h Sat Apr 03 02:02:05 2010 -0400 +++ b/tools/libxc/xc_dom.h Sat Apr 03 04:08:44 2010 -0400 @@ -114,6 +114,9 @@ int (*allocate) (struct xc_dom_image * dom, xen_vaddr_t up_to); }; +#define XC_DOM_IS_NUMA_GUEST(dom) \ + (elf_xen_feature_get(XENFEAT_numa_guest_support, (dom)->parms.f_supported)) + /* --- pluggable kernel loader ------------------------------------- */ struct xc_dom_loader { diff -r aa6ed694499d -r 5f60a383a055 tools/libxc/xc_dom_numa.c --- a/tools/libxc/xc_dom_numa.c Sat Apr 03 02:02:05 2010 -0400 +++ b/tools/libxc/xc_dom_numa.c Sat Apr 03 04:08:44 2010 -0400 @@ -409,7 +409,7 @@ static int xc_select_domain_vnodes(struct xc_dom_image *dom, xc_machine_numa_layout_t *phys_layout, xc_domain_numa_layout_t *pv_layout) { - int i; + int i, numa_split_supported; uint32_t page_shift, numa_strategy; xen_pfn_t pv_dom_pages; @@ -439,21 +439,24 @@ return -1; } + if (!XC_DOM_IS_NUMA_GUEST(dom)) + xc_dom_printf("%s: Image doesn't support guest numa\n", __FUNCTION__); + if (!XC_POWER_OF_2(pv_layout->max_vcpus)) + xc_dom_printf("%s: #vcpus != 2^n (disable numa split)\n", __FUNCTION__); + numa_split_supported = + (XC_DOM_IS_NUMA_GUEST(dom) && XC_POWER_OF_2(pv_layout->max_vcpus)); + /* Attempt to confine or split the VM */ for (i = 1; i <= phys_layout->max_nodes; i<<=1) { uint64_t vnode_size_pages; vnode_size_pages = pv_dom_pages/i; - if ((vnode_size_pages << page_shift) < XC_VNODE_MIN_SIZE) - break; + if ((vnode_size_pages << page_shift) < XC_VNODE_MIN_SIZE) break; - /* Not enough vcpus to distribute over */ - if (pv_layout->max_vcpus < (i*XC_VCPUS_PER_VNODE)) - break; - - if ((i > 1) && !XC_POWER_OF_2(pv_layout->max_vcpus)) - break; + if ((i > 1) && !numa_split_supported) break; + /* Not enough vcpus to distribute */ + if (pv_layout->max_vcpus < (i*XC_VCPUS_PER_VNODE)) break; memset(node_pages_selected, 0, sizeof(node_pages_selected)); if (!xc_select_best_fit_nodes(phys_layout, i, vnode_size_pages, diff -r aa6ed694499d -r 5f60a383a055 xen/common/kernel.c --- a/xen/common/kernel.c Sat Apr 03 02:02:05 2010 -0400 +++ b/xen/common/kernel.c Sat Apr 03 04:08:44 2010 -0400 @@ -244,6 +244,8 @@ (1U << XENFEAT_highmem_assist) | (1U << XENFEAT_gnttab_map_avail_bits); #endif + if (is_numa_domain(current->domain)) + fi.submap |= (1U << XENFEAT_numa_guest_support); break; default: return -EINVAL; diff -r aa6ed694499d -r 5f60a383a055 xen/common/libelf/libelf-dominfo.c --- a/xen/common/libelf/libelf-dominfo.c Sat Apr 03 02:02:05 2010 -0400 +++ b/xen/common/libelf/libelf-dominfo.c Sat Apr 03 04:08:44 2010 -0400 @@ -12,7 +12,8 @@ [XENFEAT_writable_descriptor_tables] = "writable_descriptor_tables", [XENFEAT_auto_translated_physmap] = "auto_translated_physmap", [XENFEAT_supervisor_mode_kernel] = "supervisor_mode_kernel", - [XENFEAT_pae_pgdir_above_4gb] = "pae_pgdir_above_4gb" + [XENFEAT_pae_pgdir_above_4gb] = "pae_pgdir_above_4gb", + [XENFEAT_numa_guest_support] = "numa_guest_support" }; static const int elf_xen_features = sizeof(elf_xen_feature_names) / sizeof(elf_xen_feature_names[0]); diff -r aa6ed694499d -r 5f60a383a055 xen/include/public/features.h --- a/xen/include/public/features.h Sat Apr 03 02:02:05 2010 -0400 +++ b/xen/include/public/features.h Sat Apr 03 04:08:44 2010 -0400 @@ -68,6 +68,12 @@ */ #define XENFEAT_gnttab_map_avail_bits 7 +/* + * If set, the guest is made aware of the underlying NUMA characteristics + * (numa guest enlightenment) + */ +#define XENFEAT_numa_guest_support 8 + #define XENFEAT_NR_SUBMAPS 1 #endif /* __XEN_PUBLIC_FEATURES_H__ */ diff -r aa6ed694499d -r 5f60a383a055 xen/include/xen/sched.h --- a/xen/include/xen/sched.h Sat Apr 03 02:02:05 2010 -0400 +++ b/xen/include/xen/sched.h Sat Apr 03 04:08:44 2010 -0400 @@ -347,6 +347,7 @@ #define put_domain(_d) \ if ( atomic_dec_and_test(&(_d)->refcnt) ) domain_destroy(_d) +#define is_numa_domain(d) ((d)->numa_layout != NULL) /* * Use this when you don't have an existing reference to @d. It returns * FALSE if @d is being destroyed.