Hi,
Trying to figure it out, did we get the chicken or the egg first?
This one fixes NUMA memory allocations, at least for me. The problem was
that Xen would register all memory with the buddy allocator before
parsing the SRAT tables and setting up the memblk's 'nid' information.
The result was that all memory would first be put into the node 0 block,
then later when trying to free a page that resides on another node, the
buddy allocator would try and insert it in the given node's heap, but
not heap structures had been allocated and Xen would explode.
I'd be interested to hear if this patch gets Xen on a zx1000 in non-
crippled mode any further?
Cheers,
Jes
# HG changeset patch
# User jes@xxxxxxxxxxxxxxxx
# Date 1180963316 -7200
# Node ID 41dfd6d9d0d4d95b8c7bf3c66edee3041bcf23c2
# Parent 765d90fd0f633e2323ce68001ab950cabe80a8cc
Make ia64 ACPI register memory ranges before all memory is put into
the buddy allocator.
Provide method in page_alloc.c for early setup of a node's avail+heap
pools before the boot allocator is finished. This is required for
buddy allocator setup to avoid all memory ending up in the node 0
pool.
In addition, set PADDR_BITS on ia64 to 48 to match reality a bit
closer and save some memory in the heap arrays.
With this patch a 4-node SGI Altix is able to boot dom0.
Signed-off-by: Jes Sorensen <jes@xxxxxxx>
diff -r 765d90fd0f63 -r 41dfd6d9d0d4 xen/arch/ia64/linux-xen/setup.c
--- a/xen/arch/ia64/linux-xen/setup.c Wed May 30 11:05:04 2007 +0200
+++ b/xen/arch/ia64/linux-xen/setup.c Mon Jun 04 15:21:56 2007 +0200
@@ -514,11 +514,13 @@ late_setup_arch (char **cmdline_p)
{
#endif
#ifdef CONFIG_ACPI_BOOT
+#ifndef XEN
/* Initialize the ACPI boot-time table parser */
acpi_table_init();
# ifdef CONFIG_ACPI_NUMA
acpi_numa_init();
# endif
+#endif
#else
# ifdef CONFIG_SMP
smp_build_cpu_map(); /* happens, e.g., with the Ski simulator */
diff -r 765d90fd0f63 -r 41dfd6d9d0d4 xen/arch/ia64/xen/acpi.c
--- a/xen/arch/ia64/xen/acpi.c Wed May 30 11:05:04 2007 +0200
+++ b/xen/arch/ia64/xen/acpi.c Mon Jun 04 15:21:56 2007 +0200
@@ -455,6 +455,9 @@ acpi_numa_memory_affinity_init (struct a
p->start_paddr = paddr;
p->size = size;
p->nid = pxm;
+
+ early_memory_node_init(pxm);
+
num_node_memblks++;
}
diff -r 765d90fd0f63 -r 41dfd6d9d0d4 xen/arch/ia64/xen/xensetup.c
--- a/xen/arch/ia64/xen/xensetup.c Wed May 30 11:05:04 2007 +0200
+++ b/xen/arch/ia64/xen/xensetup.c Mon Jun 04 15:21:56 2007 +0200
@@ -28,6 +28,7 @@
#include <asm/iosapic.h>
#include <xen/softirq.h>
#include <xen/rcupdate.h>
+#include <xen/acpi.h>
#include <acm/acm_hooks.h>
#include <asm/sn/simulator.h>
@@ -433,6 +434,16 @@ void __init start_kernel(void)
alloc_dom0();
+ /*
+ * This needs to be done before we lose the boot allocator
+ */
+#ifdef CONFIG_ACPI_BOOT
+ /* Initialize the ACPI boot-time table parser */
+ acpi_table_init();
+# ifdef CONFIG_ACPI_NUMA
+ acpi_numa_init();
+# endif
+#endif /* CONFIG_APCI_BOOT */
end_boot_allocator();
init_xenheap_pages(__pa(xen_heap_start), xenheap_phys_end);
diff -r 765d90fd0f63 -r 41dfd6d9d0d4 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c Wed May 30 11:05:04 2007 +0200
+++ b/xen/common/page_alloc.c Mon Jun 04 15:21:56 2007 +0200
@@ -509,6 +509,31 @@ static void free_heap_pages(
spin_unlock(&heap_lock);
}
+#ifdef CONFIG_NUMA
+void early_memory_node_init(int nid)
+{
+ if (nid > 0 && !avail[nid])
+ {
+ void *pageptr;
+ unsigned long pfn, size;
+
+ size = sizeof(long) * NR_ZONES + sizeof(heap_by_zone_and_order_t);
+ size = round_pgup(size);
+
+ pfn = alloc_boot_pages(size / PAGE_SIZE, 1);
+ if (!pfn)
+ BUG();
+
+ pageptr = __va(pfn << PAGE_SHIFT);
+ memset(pageptr, 0, PAGE_SIZE);
+
+ avail[nid] = pageptr;
+ _heap[nid] = pageptr + sizeof(long) * NR_ZONES;
+ init_heap_block(_heap[nid]);
+ }
+}
+#endif
+
/*
* Hand the specified arbitrary page range to the specified heap zone
* checking the node_id of the previous page. If they differ and the
diff -r 765d90fd0f63 -r 41dfd6d9d0d4 xen/include/asm-ia64/config.h
--- a/xen/include/asm-ia64/config.h Wed May 30 11:05:04 2007 +0200
+++ b/xen/include/asm-ia64/config.h Mon Jun 04 15:21:56 2007 +0200
@@ -43,6 +43,8 @@
#define supervisor_mode_kernel (0)
#define CONFIG_DMA_BITSIZE 32
+
+#define PADDR_BITS 48
/* If PERFC is used, include privop maps. */
#ifdef PERF_COUNTERS
diff -r 765d90fd0f63 -r 41dfd6d9d0d4 xen/include/xen/mm.h
--- a/xen/include/xen/mm.h Wed May 30 11:05:04 2007 +0200
+++ b/xen/include/xen/mm.h Mon Jun 04 15:21:56 2007 +0200
@@ -66,6 +66,7 @@ unsigned long avail_domheap_pages(void);
#define free_domheap_page(p) (free_domheap_pages(p,0))
void scrub_heap_pages(void);
+void early_memory_node_init(int nid);
int assign_pages(
struct domain *d,
_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel
|