diff -r c0e32941ee69 tools/include/xen-foreign/reference.size --- a/tools/include/xen-foreign/reference.size Wed Nov 25 14:19:50 2009 +0000 +++ b/tools/include/xen-foreign/reference.size Sat Feb 13 00:55:44 2010 -0500 @@ -1,7 +1,7 @@ structs | x86_32 x86_64 ia64 -start_info | 1112 1168 1168 +start_info | 1120 1176 1176 trap_info | 8 16 - pt_fpreg | - - 16 cpu_user_regs | 68 200 - diff -r c0e32941ee69 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Wed Nov 25 14:19:50 2009 +0000 +++ b/xen/arch/x86/domain_build.c Sat Feb 13 00:55:44 2010 -0500 @@ -31,6 +31,7 @@ #include #include #include /* for bzimage_parse */ +#include #include @@ -122,8 +123,10 @@ #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) #define round_pgdown(_p) ((_p)&PAGE_MASK) +#define DOM0_BOOT_NODE 0 +#define XEN_MEMF_exact_node(n) (MEMF_node(n) | MEMF_exact_node) static struct page_info * __init alloc_chunk( - struct domain *d, unsigned long max_pages) + struct domain *d, unsigned long max_pages, unsigned int node) { struct page_info *page; unsigned int order; @@ -136,12 +139,14 @@ order = get_order_from_pages(max_pages); if ( (max_pages & (max_pages-1)) != 0 ) order--; - while ( (page = alloc_domheap_pages(d, order, 0)) == NULL ) + while ( (page = alloc_domheap_pages(d, order, XEN_MEMF_exact_node(node))) + == NULL ) if ( order-- == 0 ) - break; + break; return page; } + static unsigned long __init compute_dom0_nr_pages( #ifdef __x86_64__ unsigned long vstart, unsigned long vend, size_t sizeof_long) @@ -241,6 +246,37 @@ } } +void dump_numa_layout(struct xen_domain_numa_layout *layout) +{ + unsigned int i, j; + char vcpumask[128]; + printk("NUMA-LAYOUT(Dom0) : vcpus(%u), vnodes(%u)\n", + layout->max_vcpus, layout->max_vnodes); + for (i = 0; i < layout->max_vnodes; i++) + { + struct xen_vnode_data *vnode_data = &layout->vnode_data[i]; + cpumask_scnprintf(vcpumask, sizeof(vcpumask), + *((cpumask_t *)&vnode_data->vcpu_mask)); + printk("vnode[%u]:mnode(%u), node_nr_pages(%lx), vcpu_mask(%s)\n", + vnode_data->vnode_id, vnode_data->mnode_id, + vnode_data->nr_pages, vcpumask); + } + + printk("vnode distances :\n"); + for (i = 0; i < layout->max_vnodes; i++) + printk("\tvnode[%u]", i); + for (i = 0; i < layout->max_vnodes; i++) + { + printk("\nvnode[%u]", i); + for (j = 0; j < layout->max_vnodes; j++) + printk("\t%u", layout->vnode_distance[i*layout->max_vnodes + j]); + printk("\n"); + } + return; +} + +struct xen_domain_numa_layout dom0_numa_layout; + int __init construct_dom0( struct domain *d, unsigned long _image_base, @@ -258,6 +294,7 @@ unsigned long count; struct page_info *page = NULL; start_info_t *si; + struct xen_domain_numa_layout *numa_layout; struct vcpu *v = d->vcpu[0]; unsigned long long value; #if defined(__i386__) @@ -381,7 +418,7 @@ #else nr_pages = compute_dom0_nr_pages(); #endif - + if ( parms.pae == PAEKERN_extended_cr3 ) set_bit(VMASST_TYPE_pae_extended_cr3, &d->vm_assist); @@ -430,7 +467,8 @@ vstartinfo_start = round_pgup(vphysmap_end); vstartinfo_end = (vstartinfo_start + sizeof(struct start_info) + - sizeof(struct dom0_vga_console_info)); + sizeof(struct dom0_vga_console_info) + + sizeof(struct xen_domain_numa_layout)); vpt_start = round_pgup(vstartinfo_end); for ( nr_pt_pages = 2; ; nr_pt_pages++ ) { @@ -473,11 +511,12 @@ vphysmap_start = parms.p2m_base; vphysmap_end = vphysmap_start + nr_pages * sizeof(unsigned long); } - page = alloc_domheap_pages(d, order, 0); + page = alloc_domheap_pages(d, order, XEN_MEMF_exact_node(DOM0_BOOT_NODE)); #endif if ( page == NULL ) panic("Not enough RAM for domain 0 allocation.\n"); alloc_spfn = page_to_mfn(page); + /* XXX: What happens in a layout with holes : pfn_pdx_hole_shift != 0 */ alloc_epfn = alloc_spfn + d->tot_pages; printk("PHYSICAL MEMORY ARRANGEMENT:\n" @@ -503,6 +542,47 @@ _p(v_start), _p(v_end)); printk(" ENTRY ADDRESS: %p\n", _p(parms.virt_entry)); +#if 0 + if (d->numa) +#endif + { + int i, j; + cpumask_t *vcpumask; + numa_layout = &dom0_numa_layout; + + numa_layout->max_vcpus = d->max_vcpus; + numa_layout->max_vnodes = num_online_nodes(); + + BUG_ON(DOM0_BOOT_NODE); +#define NR_NODE_PAGES (nr_pages / num_online_nodes()) + for (i = 0; i < numa_layout->max_vnodes; i++) + { + /* XXX: Dom0 eventually looks at the same e820 map, so is it + * right to set Dom0's max_pfn to nr_pages ? */ + struct xen_vnode_data *vnode_data = &numa_layout->vnode_data[i]; + vnode_data->vnode_id = i; + /* We are allocating from all nodes starting at DOM0_BOOT_NODE(0) */ + vnode_data->mnode_id = i; + vnode_data->nr_pages = NR_NODE_PAGES; + } +#undef NR_NODE_PAGES + vcpumask = (cpumask_t *)&numa_layout->vnode_data[0].vcpu_mask; + /* VCPU0 is placed on DOM0_BOOT_NODE(0) */ + cpu_set(0, *vcpumask); + + /* Fill up the vnode<->vnode distances */ + for (i = 0; i < numa_layout->max_vnodes; i++) + { + uint32_t imnode = numa_layout->vnode_data[i].mnode_id; + for (j = 0; j < numa_layout->max_vnodes; j++) + { + uint32_t jmnode = numa_layout->vnode_data[j].mnode_id; + numa_layout->vnode_distance[(i*XEN_MAX_VNODES) + j] = + node_distance(imnode, jmnode); + } + } + } + mpt_alloc = (vpt_start - v_start) + (unsigned long)pfn_to_paddr(alloc_spfn); @@ -625,7 +705,7 @@ } else { - page = alloc_domheap_page(NULL, 0); + page = alloc_domheap_page(NULL, XEN_MEMF_exact_node(DOM0_BOOT_NODE)); if ( !page ) panic("Not enough RAM for domain 0 PML4.\n"); page->u.inuse.type_info = PGT_l4_page_table|PGT_validated|1; @@ -757,8 +837,24 @@ printk("Dom0 has maximum %u VCPUs\n", opt_dom0_max_vcpus); - for ( i = 1; i < opt_dom0_max_vcpus; i++ ) - (void)alloc_vcpu(d, i, i % num_online_cpus()); + for (i = 1; i < d->max_vcpus; i++) + { + int processor; + + /* Distribute the vcpus (over nodes or cpus) */ + if((d->is_numa) && (d->max_vcpus >= num_online_nodes())) + { + int node = i/(d->max_vcpus/num_online_nodes()); + cpumask_t *vcpumask = + (cpumask_t *)&numa_layout->vnode_data[node].vcpu_mask; + cpu_set(i, *vcpumask); + processor = first_cpu(node_to_cpumask(node)); + } + else + processor = i%num_online_cpus(); + + (void)alloc_vcpu(d, i, processor); + } /* Set up CR3 value for write_ptbase */ if ( paging_mode_enabled(d) ) @@ -805,6 +901,14 @@ si->pt_base = vpt_start + 2 * PAGE_SIZE * !!is_pv_32on64_domain(d); si->nr_pt_frames = nr_pt_pages; si->mfn_list = vphysmap_start; + + si->numa_layout_info.info_off = + sizeof(struct start_info) + sizeof(struct dom0_vga_console_info); + si->numa_layout_info.info_size = sizeof(struct xen_domain_numa_layout); + numa_layout = (struct xen_domain_numa_layout *) + (vstartinfo_start + si->numa_layout_info.info_off); + memcpy(numa_layout, &dom0_numa_layout, sizeof(*numa_layout)); + snprintf(si->magic, sizeof(si->magic), "xen-3.0-x86_%d%s", elf_64bit(&elf) ? 64 : 32, parms.pae ? "p" : ""); @@ -827,7 +931,7 @@ l4tab = l4start + l4_table_offset(va); if ( !l4e_get_intpte(*l4tab) ) { - page = alloc_domheap_page(d, 0); + page = alloc_domheap_page(d, XEN_MEMF_exact_node(DOM0_BOOT_NODE)); if ( !page ) break; /* No mapping, PGC_allocated + page-table page. */ @@ -847,14 +951,15 @@ (page = alloc_domheap_pages(d, L3_PAGETABLE_SHIFT - PAGE_SHIFT, - 0)) != NULL ) + XEN_MEMF_exact_node(DOM0_BOOT_NODE))) != NULL ) { *l3tab = l3e_from_page(page, L1_PROT|_PAGE_DIRTY|_PAGE_PSE); va += 1UL << L3_PAGETABLE_SHIFT; continue; } - if ( (page = alloc_domheap_page(d, 0)) == NULL ) + if ( (page = alloc_domheap_page(d, XEN_MEMF_exact_node(DOM0_BOOT_NODE))) + == NULL ) break; else { @@ -875,14 +980,15 @@ (page = alloc_domheap_pages(d, L2_PAGETABLE_SHIFT - PAGE_SHIFT, - 0)) != NULL ) + XEN_MEMF_exact_node(DOM0_BOOT_NODE))) != NULL ) { *l2tab = l2e_from_page(page, L1_PROT|_PAGE_DIRTY|_PAGE_PSE); va += 1UL << L2_PAGETABLE_SHIFT; continue; } - if ( (page = alloc_domheap_page(d, 0)) == NULL ) + if ( (page = alloc_domheap_page(d, XEN_MEMF_exact_node(DOM0_BOOT_NODE))) + == NULL ) break; else { @@ -897,7 +1003,7 @@ l1tab = page_to_virt(l2e_get_page(*l2tab)); l1tab += l1_table_offset(va); BUG_ON(l1e_get_intpte(*l1tab)); - page = alloc_domheap_page(d, 0); + page = alloc_domheap_page(d, XEN_MEMF_exact_node(DOM0_BOOT_NODE)); if ( !page ) break; *l1tab = l1e_from_page(page, L1_PROT|_PAGE_DIRTY); @@ -917,6 +1023,7 @@ #define REVERSE_START ((v_end - v_start) >> PAGE_SHIFT) if ( pfn > REVERSE_START ) mfn = alloc_epfn - (pfn - REVERSE_START); +#undef REVERSE_START #endif if ( !is_pv_32on64_domain(d) ) ((unsigned long *)vphysmap_start)[pfn] = mfn; @@ -948,27 +1055,45 @@ } } BUG_ON(pfn != d->tot_pages); - while ( pfn < nr_pages ) + +{ + unsigned int vnode; + for (vnode = 0; vnode < numa_layout->max_vnodes; vnode++) { - if ( (page = alloc_chunk(d, nr_pages - d->tot_pages)) == NULL ) - panic("Not enough RAM for DOM0 reservation.\n"); - while ( pfn < d->tot_pages ) + struct xen_vnode_data *vnode_data = &numa_layout->vnode_data[vnode]; + unsigned int mnode = vnode_data->mnode_id; +#define NR_NODE_PAGES (nr_pages / num_online_nodes()) + unsigned long vnode_end_pfn = (vnode+1)*NR_NODE_PAGES; + if (pfn > vnode_end_pfn) { - mfn = page_to_mfn(page); + dump_numa_layout(numa_layout); + printk("pfn(%lx) > vnode_end_pfn(%lx)\n", pfn, vnode_end_pfn); + panic("pfn(%lx) > vnode_end_pfn(%lx)\n", pfn, vnode_end_pfn); + } +#undef NR_NODE_PAGES + while (pfn < vnode_end_pfn) + { + if (!(page = alloc_chunk(d, vnode_end_pfn - d->tot_pages, mnode))) + panic("Not enough RAM for DOM0 reservation.\n"); + while (pfn < d->tot_pages) + { + mfn = page_to_mfn(page); #ifndef NDEBUG #define pfn (nr_pages - 1 - (pfn - (alloc_epfn - alloc_spfn))) #endif - if ( !is_pv_32on64_domain(d) ) - ((unsigned long *)vphysmap_start)[pfn] = mfn; - else - ((unsigned int *)vphysmap_start)[pfn] = mfn; - set_gpfn_from_mfn(mfn, pfn); + if ( !is_pv_32on64_domain(d) ) + ((unsigned long *)vphysmap_start)[pfn] = mfn; + else + ((unsigned int *)vphysmap_start)[pfn] = mfn; + set_gpfn_from_mfn(mfn, pfn); #undef pfn - page++; pfn++; - if (!(pfn & 0xfffff)) - process_pending_timers(); + page++; pfn++; + if (!(pfn & 0xfffff)) + process_pending_timers(); + } } } +} if ( initrd_len != 0 ) { @@ -986,10 +1111,12 @@ si->console.dom0.info_size = sizeof(struct dom0_vga_console_info); } +#if 0 #if defined(__x86_64__) if ( is_pv_32on64_domain(d) ) xlat_start_info(si, XLAT_start_info_console_dom0); #endif +#endif /* Return to idle domain's page tables. */ write_ptbase(current); diff -r c0e32941ee69 xen/arch/x86/e820.c --- a/xen/arch/x86/e820.c Wed Nov 25 14:19:50 2009 +0000 +++ b/xen/arch/x86/e820.c Sat Feb 13 00:55:44 2010 -0500 @@ -647,3 +647,50 @@ return find_max_pfn(); } + +#define round_down(addr, mask) ((addr) & ~(mask)) +#define round_up(addr, mask) ((addr) | (mask)) + +int __init e820_find_active_region(const struct e820entry *ei, + unsigned long start_pfn, + unsigned long last_pfn, + unsigned long *ei_startpfn, + unsigned long *ei_endpfn) +{ + unsigned long align_mask = (PAGE_SIZE-1); + + *ei_startpfn = round_up(ei->addr, align_mask) >> PAGE_SHIFT; + *ei_endpfn = round_down(ei->addr + ei->size, align_mask) >> PAGE_SHIFT; + + /* Skip map entries smaller than a page */ + if (*ei_startpfn >= *ei_endpfn) + return 0; + + /* Skip if map is outside the node */ + if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || + *ei_startpfn >= last_pfn) + return 0; + + /* Check for overlaps */ + if (*ei_startpfn < start_pfn) + *ei_startpfn = start_pfn; + if (*ei_endpfn > last_pfn) + *ei_endpfn = last_pfn; + + return 1; +} + +unsigned long __init e820_hole_size(unsigned long start_pfn, + unsigned long last_pfn) +{ + unsigned long ei_startpfn, ei_endpfn, ram = 0; + int i; + + for (i = 0; i < e820.nr_map; i++) { + if (e820_find_active_region(&e820.map[i], + start_pfn, last_pfn, + &ei_startpfn, &ei_endpfn)) + ram += ei_endpfn - ei_startpfn; + } + return (last_pfn - start_pfn - ram); +} diff -r c0e32941ee69 xen/arch/x86/numa.c --- a/xen/arch/x86/numa.c Wed Nov 25 14:19:50 2009 +0000 +++ b/xen/arch/x86/numa.c Sat Feb 13 00:55:44 2010 -0500 @@ -28,8 +28,10 @@ struct node_data node_data[MAX_NUMNODES]; -int memnode_shift; -u8 memnodemap[NODEMAPSIZE]; +struct memnode memnode = {.mapsize = NODEMAPSIZE}; +#define memnode_shift memnode.shift +#define memnodemap memnode.map +#define memnodemapsize memnode.mapsize unsigned char cpu_to_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = NUMA_NO_NODE @@ -278,6 +280,9 @@ EXPORT_SYMBOL(memnodemap); EXPORT_SYMBOL(node_data); +extern struct xen_domain_numa_layout dom0_numa_layout; +extern void dump_numa_layout(struct xen_domain_numa_layout *layout); + static void dump_numa(unsigned char key) { s_time_t now = NOW(); @@ -289,6 +294,8 @@ printk("'%c' pressed -> dumping numa info (now-0x%X:%08X)\n", key, (u32)(now>>32), (u32)now); + dump_numa_layout(&dom0_numa_layout); + for_each_online_node(i) { paddr_t pa = (paddr_t)(NODE_DATA(i)->node_start_pfn + 1)<< PAGE_SHIFT; printk("idx%d -> NODE%d start->%lu size->%lu\n", diff -r c0e32941ee69 xen/common/compat/xlat.c --- a/xen/common/compat/xlat.c Wed Nov 25 14:19:50 2009 +0000 +++ b/xen/common/compat/xlat.c Sat Feb 13 00:55:44 2010 -0500 @@ -8,6 +8,7 @@ #include #include +#if 0 /* In-place translation functons: */ void xlat_start_info(struct start_info *native, enum XLAT_start_info_console console) @@ -17,6 +18,7 @@ BUILD_BUG_ON(sizeof(*native) < sizeof(*compat)); XLAT_start_info(compat, native); } +#endif void xlat_vcpu_runstate_info(struct vcpu_runstate_info *native) { diff -r c0e32941ee69 xen/common/domain.c --- a/xen/common/domain.c Wed Nov 25 14:19:50 2009 +0000 +++ b/xen/common/domain.c Sat Feb 13 00:55:44 2010 -0500 @@ -241,7 +241,10 @@ if ( domid == 0 ) { + /* should be opt_dom0_numa */ d->is_pinned = opt_dom0_vcpus_pin; + d->is_numa = 1; + d->is_pinned = 1; d->disable_migrate = 1; } diff -r c0e32941ee69 xen/common/page_alloc.c --- a/xen/common/page_alloc.c Wed Nov 25 14:19:50 2009 +0000 +++ b/xen/common/page_alloc.c Sat Feb 13 00:55:44 2010 -0500 @@ -287,11 +287,17 @@ unsigned int i, j, zone = 0; unsigned int num_nodes = num_online_nodes(); unsigned long request = 1UL << order; + unsigned int exact_node_request; cpumask_t extra_cpus_mask, mask; struct page_info *pg; - if ( node == NUMA_NO_NODE ) + if ( node == NUMA_NO_NODE ) { node = cpu_to_node(smp_processor_id()); + exact_node_request = 0; + } else { + exact_node_request = (memflags & MEMF_exact_node); + } + ASSERT(node >= 0); ASSERT(zone_lo <= zone_hi); @@ -321,6 +327,8 @@ goto found; } while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */ + if (exact_node_request) + goto not_found; /* Pick next node, wrapping around if needed. */ node = next_node(node, node_online_map); if (node == MAX_NUMNODES) @@ -335,6 +343,7 @@ return pg; } +not_found: /* No suitable memory blocks. Fail the request. */ spin_unlock(&heap_lock); return NULL; @@ -834,6 +843,11 @@ return free_pages; } +unsigned long avail_node_heap_pages(unsigned int node) +{ + return avail_heap_pages(MEMZONE_XEN, NR_ZONES-1, node); +} + void __init end_boot_allocator(void) { unsigned int i; @@ -1188,9 +1202,7 @@ unsigned long avail_domheap_pages(void) { - return avail_heap_pages(MEMZONE_XEN + 1, - NR_ZONES - 1, - -1); + return avail_heap_pages(MEMZONE_XEN + 1, NR_ZONES - 1, -1); } static void pagealloc_info(unsigned char key) diff -r c0e32941ee69 xen/common/schedule.c --- a/xen/common/schedule.c Wed Nov 25 14:19:50 2009 +0000 +++ b/xen/common/schedule.c Sat Feb 13 00:55:44 2010 -0500 @@ -33,6 +33,7 @@ #include #include #include +#include /* opt_sched: scheduler - default to credit */ static char __initdata opt_sched[10] = "credit"; @@ -150,7 +151,7 @@ return state.time[RUNSTATE_running]; } -int sched_init_vcpu(struct vcpu *v, unsigned int processor) +int sched_init_vcpu(struct vcpu *v, unsigned int processor) { struct domain *d = v->domain; @@ -160,7 +161,12 @@ */ v->processor = processor; if ( is_idle_domain(d) || d->is_pinned ) - v->cpu_affinity = cpumask_of_cpu(processor); + { + if (d->is_numa) + v->cpu_affinity = node_to_cpumask(cpu_to_node(processor)); + else + v->cpu_affinity = cpumask_of_cpu(processor); + } else cpus_setall(v->cpu_affinity); diff -r c0e32941ee69 xen/include/asm-x86/e820.h --- a/xen/include/asm-x86/e820.h Wed Nov 25 14:19:50 2009 +0000 +++ b/xen/include/asm-x86/e820.h Sat Feb 13 00:55:44 2010 -0500 @@ -29,6 +29,11 @@ struct e820map *e820, uint64_t s, uint64_t e, uint32_t orig_type, uint32_t new_type); extern unsigned long init_e820(const char *, struct e820entry *, int *); +extern int e820_find_active_region(const struct e820entry *ei, + unsigned long start_pfn, unsigned long last_pfn, + unsigned long *ei_startpfn, unsigned long *ei_endpfn); +extern unsigned long e820_hole_size(unsigned long start_pfn, + unsigned long end_pfn); extern struct e820map e820; /* These symbols live in the boot trampoline. */ diff -r c0e32941ee69 xen/include/asm-x86/numa.h --- a/xen/include/asm-x86/numa.h Wed Nov 25 14:19:50 2009 +0000 +++ b/xen/include/asm-x86/numa.h Sat Feb 13 00:55:44 2010 -0500 @@ -12,7 +12,7 @@ #define cpu_to_node(cpu) (cpu_to_node[cpu]) #define parent_node(node) (node) -#define node_to_first_cpu(node) (__ffs(node_to_cpumask[node])) +#define node_to_first_cpu(node) (ffs(node_to_cpumask[node])) #define node_to_cpumask(node) (node_to_cpumask[node]) struct node { @@ -49,8 +49,15 @@ } /* Simple perfect hash to map physical addresses to node numbers */ -extern int memnode_shift; -extern u8 memnodemap[NODEMAPSIZE]; +struct memnode { + int shift; + unsigned int mapsize; + u8 map[NODEMAPSIZE]; +}; +extern struct memnode memnode; +#define memnode_shift memnode.shift +#define memnodemap memnode.map +#define memnodemapsize memnode.mapsize struct node_data { unsigned long node_start_pfn; @@ -69,11 +76,15 @@ return nid; } +int __node_distance(int a, int b); + #define NODE_DATA(nid) (&(node_data[nid])) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) +#define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) #define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ NODE_DATA(nid)->node_spanned_pages) +#define node_distance(a, b) (__node_distance(a, b)) #else diff -r c0e32941ee69 xen/include/public/xen.h --- a/xen/include/public/xen.h Wed Nov 25 14:19:50 2009 +0000 +++ b/xen/include/public/xen.h Sat Feb 13 00:55:44 2010 -0500 @@ -519,6 +519,53 @@ typedef struct shared_info shared_info_t; #endif +#define XEN_NR_CPUS 64 +#if defined(__i386__) +#define XEN_BITS_PER_LONG 32 +#define XEN_BYTES_PER_LONG 4 +#define XEN_LONG_BYTEORDER 2 +#elif defined(__x86_64__) +#define XEN_BITS_PER_LONG 64 +#define XEN_BYTES_PER_LONG 8 +#define XEN_LONG_BYTEORDER 3 +#endif + +/* same as cpumask_t - in xen and even Linux (for now) */ +#define XEN_BITS_TO_LONGS(bits) \ + (((bits)+XEN_BITS_PER_LONG-1)/XEN_BITS_PER_LONG) +#define XEN_DECLARE_BITMAP(name,bits) \ + unsigned long name[XEN_BITS_TO_LONGS(bits)] +struct xen_cpumask{ XEN_DECLARE_BITMAP(bits, XEN_NR_CPUS); }; +#ifndef __XEN__ +typedef struct xen_cpumask xen_cpumask_t; +#endif + +#define XEN_MAX_VNODES 8 +struct xen_vnode_data { + uint32_t vnode_id; + uint32_t mnode_id; + uint64_t nr_pages; + /* XXX: Can we use this in xen<->domain interfaces ? */ + struct xen_cpumask vcpu_mask; /* vnode_to_vcpumask */ +}; +#ifndef __XEN__ +typedef struct xen_vnode_data xen_vnode_data_t; +#endif + +/* NUMA layout for the domain at the time of startup. + * Structure has to fit within a page. */ +struct xen_domain_numa_layout { + uint32_t max_vcpus; + uint32_t max_vnodes; + + /* Only (max_vnodes*max_vnodes) entries are filled */ + uint32_t vnode_distance[XEN_MAX_VNODES * XEN_MAX_VNODES]; + struct xen_vnode_data vnode_data[XEN_MAX_VNODES]; +}; +#ifndef __XEN__ +typedef struct xen_domain_numa_layout xen_domain_numa_layout_t; +#endif + /* * Start-of-day memory layout: * 1. The domain is started within contiguous virtual-memory region. @@ -572,6 +619,10 @@ /* The pfn range here covers both page table and p->m table frames. */ unsigned long first_p2m_pfn;/* 1st pfn forming initial P->M table. */ unsigned long nr_p2m_frames;/* # of pfns forming initial P->M table. */ + struct { + uint32_t info_off; /* Offset of console_info struct. */ + uint32_t info_size; /* Size of console_info struct from start.*/ + } numa_layout_info; }; typedef struct start_info start_info_t; diff -r c0e32941ee69 xen/include/xen/mm.h --- a/xen/include/xen/mm.h Wed Nov 25 14:19:50 2009 +0000 +++ b/xen/include/xen/mm.h Sat Feb 13 00:55:44 2010 -0500 @@ -46,6 +46,7 @@ void init_xenheap_pages(paddr_t ps, paddr_t pe); void *alloc_xenheap_pages(unsigned int order, unsigned int memflags); void free_xenheap_pages(void *v, unsigned int order); +unsigned long avail_node_heap_pages(unsigned int node); #define alloc_xenheap_page() (alloc_xenheap_pages(0,0)) #define free_xenheap_page(v) (free_xenheap_pages(v,0)) @@ -78,6 +79,8 @@ #define MEMF_populate_on_demand (1U<<_MEMF_populate_on_demand) #define _MEMF_tmem 2 #define MEMF_tmem (1U<<_MEMF_tmem) +#define _MEMF_exact_node 3 +#define MEMF_exact_node (1U << _MEMF_exact_node) #define _MEMF_node 8 #define MEMF_node(n) ((((n)+1)&0xff)<<_MEMF_node) #define _MEMF_bits 24 diff -r c0e32941ee69 xen/include/xen/sched.h --- a/xen/include/xen/sched.h Wed Nov 25 14:19:50 2009 +0000 +++ b/xen/include/xen/sched.h Sat Feb 13 00:55:44 2010 -0500 @@ -226,6 +226,8 @@ bool_t is_paused_by_controller; /* Domain's VCPUs are pinned 1:1 to physical CPUs? */ bool_t is_pinned; + /* Domain is numa aware */ + bool_t is_numa; /* Are any VCPUs polling event channels (SCHEDOP_poll)? */ #if MAX_VIRT_CPUS <= BITS_PER_LONG