WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [RFC] [PATCH] Memory allocation from the nearest node.

To: Keir Fraser <keir.fraser@xxxxxxxxxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [RFC] [PATCH] Memory allocation from the nearest node.
From: Yuji Shimada <shimada-yxb@xxxxxxxxxxxxxxx>
Date: Tue, 07 Apr 2009 16:34:13 +0900
Cc:
Delivery-date: Tue, 07 Apr 2009 00:34:49 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
On current xen, memory is allocated from a NUMA node on which vcpu#0 is
If there is no free memory on the node, next NUMA node is selected in
numeric order. But second selected node is not always the nearest node
from first selected node.

For example, If NUMA node #n is selected as first node on a machine
which has m nodes, free memory is searched in the following order.

NUMA node #
  (n) -> (n+1) -> ... -> (m-2) -> (m-1) -> (0) -> (1) -> ... -> (n-1)

On memory allocation, if there is no free memory on first selected
node, the nearest node should be used for the next memory allocation.
Because we can get better memory latency by using memory of near node.

This patch fixes to select the nearest node for next memory allocation.

BIOS sets distance between NUMA node to SLIT table of ACPI. This patch
uses the value of SLIT table to get distance between NUMA node.
If ACPI does not have SLIT table, the value of "10" is defined as the
same node and "20" is defined as the different node.

I would like ask any comments of developers.
I will make the modified patch after Xen 3.4 is released.

Thanks,
--
Yuji Shimada


Signed-off-by: Yuji Shimada <shimada-yxb@xxxxxxxxxxxxxxx>

diff -r 5a60eb7fad79 xen/arch/x86/srat.c
--- a/xen/arch/x86/srat.c       Thu Apr 02 14:17:19 2009 +0100
+++ b/xen/arch/x86/srat.c       Tue Apr 07 10:40:06 2009 +0900
@@ -307,7 +307,7 @@
        int index;
 
        if (!acpi_slit)
-               return a == b ? 10 : 20;
+               return a == b ? LOCAL_DISTANCE : REMOTE_DISTANCE;
        index = acpi_slit->locality_count * node_to_pxm(a);
        return acpi_slit->entry[index + node_to_pxm(b)];
 }
diff -r 5a60eb7fad79 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Thu Apr 02 14:17:19 2009 +0100
+++ b/xen/common/page_alloc.c   Tue Apr 07 10:40:06 2009 +0900
@@ -332,6 +332,36 @@
     return needed;
 }
 
+/*
+ * find_nearest_node - find the nearest node
+ * @node: NUMA node number
+ * @used_node_mask: already used nodes
+ *
+ * It returns -1 if no node is found.
+ */
+static int find_nearest_node(unsigned int node, nodemask_t *used_node_mask)
+{
+    int n, val;
+    int min_val = INT_MAX;
+    int best_node = -1;
+
+    for_each_online_node(n) {
+        /* Don't want a node more than once */
+        if (node_isset(n, *used_node_mask))
+            continue;
+
+        /* Use the distance array to find the distance */
+        val = node_distance(node, n);
+
+        if (val < min_val) {
+            min_val = val;
+            best_node = n;
+        }
+    }
+
+    return best_node;
+}
+
 /* Allocate 2^@order contiguous pages. */
 static struct page_info *alloc_heap_pages(
     unsigned int zone_lo, unsigned int zone_hi,
@@ -342,6 +372,8 @@
     unsigned long request = 1UL << order;
     cpumask_t extra_cpus_mask, mask;
     struct page_info *pg;
+    int nearest_node;
+    nodemask_t used_node_mask;
 
     if ( node == NUMA_NO_NODE )
         node = cpu_to_node(smp_processor_id());
@@ -354,6 +386,9 @@
     if ( unlikely(order > MAX_ORDER) )
         return NULL;
 
+    nearest_node = node;
+    nodes_clear(used_node_mask);
+
     spin_lock(&heap_lock);
 
     /*
@@ -363,28 +398,33 @@
      */
     for ( i = 0; i < num_nodes; i++ )
     {
+        node_set(nearest_node, used_node_mask);
         zone = zone_hi;
         do {
             /* Check if target node can support the allocation. */
-            if ( !avail[node] || (avail[node][zone] < request) )
+            if ( !avail[nearest_node] || (avail[nearest_node][zone] < request) 
)
                 continue;
 
             /* Find smallest order which can satisfy the request. */
             for ( j = order; j <= MAX_ORDER; j++ )
-                if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
+                if ( (pg =
+                   page_list_remove_head(&heap(nearest_node, zone, j))) )
                     goto found;
         } while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */
 
-        /* Pick next node, wrapping around if needed. */
-        if ( ++node == num_nodes )
-            node = 0;
+        /* Pick the next nearest node. */
+        if ( (nearest_node = find_nearest_node(node, &used_node_mask)) < 0 ) {
+            break;
+        }
     }
 
     /* No suitable memory blocks. Fail the request. */
     spin_unlock(&heap_lock);
     return NULL;
 
- found: 
+ found:
+    node = nearest_node;
+
     /* We may have to halve the chunk a number of times. */
     while ( j != order )
     {
diff -r 5a60eb7fad79 xen/include/asm-x86/numa.h
--- a/xen/include/asm-x86/numa.h        Thu Apr 02 14:17:19 2009 +0100
+++ b/xen/include/asm-x86/numa.h        Tue Apr 07 10:40:06 2009 +0900
@@ -23,6 +23,8 @@
 #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
 #define VIRTUAL_BUG_ON(x) 
 #define NODEMAPSIZE 0xfff
+#define LOCAL_DISTANCE 10
+#define REMOTE_DISTANCE 20
 
 extern void numa_add_cpu(int cpu);
 extern void numa_init_array(void);
@@ -67,10 +69,13 @@
 #define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn + \
                                 NODE_DATA(nid)->node_spanned_pages)
 
+extern int __node_distance(int, int);
+#define node_distance(a, b) __node_distance(a,b)
 
 #else
 #define init_cpu_to_node() do {} while (0)
 #define clear_node_cpumask(cpu) do {} while (0)
+#define node_distance(a, b) ((a) == (b) ? LOCAL_DISTANCE : REMOTE_DISTANCE)
 #endif
 
 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>