WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 4/6] xen: export NUMA topology in physinfo hcall

To: Ian Pratt <m+Ian.Pratt@xxxxxxxxxxxx>, Keir Fraser <Keir.Fraser@xxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH 4/6] xen: export NUMA topology in physinfo hcall
From: Ryan Harper <ryanh@xxxxxxxxxx>
Date: Fri, 29 Sep 2006 13:58:49 -0500
Cc: xen-devel@xxxxxxxxxxxxxxxxxxx
Delivery-date: Fri, 29 Sep 2006 12:01:20 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mutt/1.5.6+20040907i
This patch modifies the physinfo hcall to export NUMA CPU and Memory
topology information.  The new physinfo hcall is integrated into libxc
and xend (xm info specifically).  Included in this patch is a minor
tweak to xm-test's xm info testcase.  The new fields in xm info are:

nr_nodes               : 4
mem_chunks             : node0:0x0000000000000000-0x0000000190000000
                         node1:0x0000000190000000-0x0000000300000000
                         node2:0x0000000300000000-0x0000000470000000
                         node3:0x0000000470000000-0x0000000640000000
node_to_cpu            : node0:0-7
                         node1:8-15
                         node2:16-23
                         node3:24-31



-- 
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
(512) 838-9253   T/L: 678-9253
ryanh@xxxxxxxxxx


diffstat output:
 b/xen/include/public/numa_structs.h                 |   26 ++++++
 tools/libxc/xc_misc.c                               |    4 +
 tools/libxc/xenctrl.h                               |    3 
 tools/python/xen/lowlevel/xc/xc.c                   |   77 ++++++++++++++++----
 tools/python/xen/xend/XendNode.py                   |   67 +++++++++++++++++
 tools/xenmon/xenbaked.c                             |    3 
 tools/xenstat/libxenstat/src/xenstat.c              |    3 
 tools/xentrace/xentrace.c                           |    3 
 tools/xm-test/tests/info/02_info_compiledata_pos.py |    4 -
 xen/arch/x86/sysctl.c                               |   51 ++++++++++++-
 xen/include/public/arch-x86_32.h                    |    1 
 xen/include/public/arch-x86_64.h                    |    1 
 xen/include/public/sysctl.h                         |    3 
 xen/include/xen/numa.h                              |    7 -
 14 files changed, 231 insertions(+), 22 deletions(-)

Signed-off-by: Ryan Harper <ryanh@xxxxxxxxxx>
---
Export NUMA topology in physinfo hcall

diff -r 6f3c6fb05af3 tools/libxc/xc_misc.c
--- a/tools/libxc/xc_misc.c     Mon Sep 11 13:45:23 2006 -0500
+++ b/tools/libxc/xc_misc.c     Mon Sep 11 13:45:23 2006 -0500
@@ -39,6 +39,10 @@ int xc_physinfo(int xc_handle,
     DECLARE_SYSCTL;
 
     sysctl.cmd = XEN_SYSCTL_physinfo;
+
+    /* set pointers to caller's so memcpy doesn't clobber them */
+    sysctl.u.physinfo.memory_chunks = put_info->memory_chunks;
+    sysctl.u.physinfo.node_to_cpu = put_info->node_to_cpu;
 
     if ( (ret = do_sysctl(xc_handle, &sysctl)) != 0 )
         return ret;
diff -r 6f3c6fb05af3 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Mon Sep 11 13:45:23 2006 -0500
+++ b/tools/libxc/xenctrl.h     Sat Sep 16 08:17:10 2006 -0500
@@ -26,6 +26,7 @@
 #include <xen/memory.h>
 #include <xen/acm.h>
 #include <xen/acm_ops.h>
+#include <xen/numa_structs.h>
 
 #ifdef __ia64__
 #define XC_PAGE_SHIFT           14
@@ -387,6 +388,8 @@ int xc_readconsolering(int xc_handle,
                        int clear);
 
 typedef xen_sysctl_physinfo_t xc_physinfo_t;
+typedef node_data_t xc_memory_chunk_t;
+typedef uint64_t xc_node_to_cpu_t;
 int xc_physinfo(int xc_handle,
                 xc_physinfo_t *info);
 
diff -r 6f3c6fb05af3 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Mon Sep 11 13:45:23 2006 -0500
+++ b/tools/python/xen/lowlevel/xc/xc.c Sat Sep 16 08:17:13 2006 -0500
@@ -474,8 +474,21 @@ static PyObject *pyxc_physinfo(XcObject 
 {
     xc_physinfo_t info;
     char cpu_cap[128], *p=cpu_cap, *q=cpu_cap;
-    int i;
-    
+    int i,j;
+    PyObject *ret_obj, *memchunk_obj, *node_to_cpu_obj;
+    xc_memory_chunk_t *chunks;
+    xc_node_to_cpu_t  *map;
+
+    /* make space for mem chunks */
+    chunks =  (xc_memory_chunk_t *)malloc( sizeof(xc_memory_chunk_t) * 
+                                     PUBLIC_MAXCHUNKS );
+    set_xen_guest_handle(info.memory_chunks, chunks);
+
+    /* make space for node_to_cpu mapping */
+    map = (xc_node_to_cpu_t *)malloc( sizeof(xc_node_to_cpu_t) *
+                                    PUBLIC_MAX_NUMNODES ); 
+    set_xen_guest_handle(info.node_to_cpu, map);
+
     if ( xc_physinfo(self->xc_handle, &info) != 0 )
         return PyErr_SetFromErrno(xc_error);
 
@@ -489,16 +502,56 @@ static PyObject *pyxc_physinfo(XcObject 
     if(q>cpu_cap)
         *(q-1)=0;
 
-    return Py_BuildValue("{s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s}",
-                         "threads_per_core", info.threads_per_core,
-                         "cores_per_socket", info.cores_per_socket,
-                         "sockets_per_node", info.sockets_per_node,
-                         "nr_nodes",         info.nr_nodes,
-                         "total_memory",     pages_to_kib(info.total_pages),
-                         "free_memory",      pages_to_kib(info.free_pages),
-                         "scrub_memory",     pages_to_kib(info.scrub_pages),
-                         "cpu_khz",          info.cpu_khz,
-                         "hw_caps",          cpu_cap);
+    ret_obj = Py_BuildValue("{s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s}",
+                            "threads_per_core", info.threads_per_core,
+                            "cores_per_socket", info.cores_per_socket,
+                            "sockets_per_node", info.sockets_per_node,
+                            "total_memory",     pages_to_kib(info.total_pages),
+                            "free_memory",      pages_to_kib(info.free_pages),
+                            "scrub_memory",     pages_to_kib(info.scrub_pages),
+                            "cpu_khz",          info.cpu_khz,
+                            "hw_caps",          cpu_cap);
+    /* memchunks */
+    memchunk_obj = PyList_New(0);
+ 
+    /* build list of each memchunk's attributes, converting pfn to paddr */
+    for ( i=0; i<info.nr_nodes; i++ ) 
+    {
+        PyList_Append(memchunk_obj, 
+                      Py_BuildValue("{s:i,s:K,s:K}",
+                      "node"       , chunks[i].node_id,
+                      "start_paddr", chunks[i].node_start_pfn << XC_PAGE_SHIFT,
+                      "end_paddr"  , (chunks[i].node_start_pfn + 
+                      chunks[i].node_spanned_pages) << XC_PAGE_SHIFT ));
+    }
+    PyDict_SetItemString(ret_obj, "mem_chunks", memchunk_obj);
+ 
+    /* node to cpu mappings */
+    node_to_cpu_obj = PyList_New(0);
+    /* build list of node to cpu mappings */
+    for ( i=0; i<info.nr_nodes; i++ )
+    {
+        uint64_t cpumap = (uint64_t)map[i];
+        PyObject *cpus = PyList_New(0);
+ 
+        for ( j=0; cpumap != 0; j++ ) 
+        {
+            if ( cpumap & 1 )
+                PyList_Append(cpus, PyInt_FromLong(j));
+            cpumap >>=1;
+        }
+        PyList_Append(node_to_cpu_obj, cpus); 
+    }
+    /* add list of node to cpu mappings and nr_nodes to physinfo dictionary */
+    PyDict_SetItemString(ret_obj, "node_to_cpu",  node_to_cpu_obj);
+    PyDict_SetItemString(ret_obj, "nr_nodes", 
+             Py_BuildValue("i", info.nr_nodes));
+
+    /* free malloc'd memory */
+    free(chunks);
+    free(map);
+ 
+    return ret_obj;
 }
 
 static PyObject *pyxc_xeninfo(XcObject *self)
diff -r 6f3c6fb05af3 tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Mon Sep 11 13:45:23 2006 -0500
+++ b/tools/python/xen/xend/XendNode.py Sat Sep 16 08:17:13 2006 -0500
@@ -51,6 +51,69 @@ class XendNode:
                 ['version', ver],
                 ['machine', mch]]
 
+    def list_to_rangepairs(self,cmap):
+            cmap.sort()
+            pairs = []
+            x = y = 0
+            for i in range(0,len(cmap)):
+                try:
+                    if ((cmap[y+1] - cmap[i]) > 1):
+                        pairs.append((cmap[x],cmap[y]))
+                        x = y = i+1
+                    else:
+                        y = y + 1
+                # if we go off the end, then just add x to y
+                except IndexError:
+                    pairs.append((cmap[x],cmap[y]))
+
+            return pairs
+
+    def format_pairs(self,pairs):
+            if not pairs:
+                return "no cpus"
+            out = ""
+            for f,s in pairs:
+                if (f==s):
+                    out += '%d'%f
+                else:
+                    out += '%d-%d'%(f,s)
+                out += ','
+            # trim trailing ','
+            return out[:-1]
+
+    def list_to_strrange(self,list):
+        return self.format_pairs(self.list_to_rangepairs(list))
+
+    def format_memchunks(self, pinfo):
+        str=''
+        whitespace=''
+        try:
+            chunk=pinfo['mem_chunks']
+            for i in range(0, pinfo['nr_nodes']):
+                str+='%snode%d:0x%016x-0x%016x\n' % (whitespace,
+                                                    chunk[i]['node'],
+                                                    chunk[i]['start_paddr'], 
+                                                    chunk[i]['end_paddr']) 
+                whitespace='%25s' % ''
+        except:
+            str='none\n' 
+        return str[:-1]
+        
+    def format_node_to_cpu(self, pinfo):
+        str=''
+        whitespace=''
+        try:
+            node_to_cpu=pinfo['node_to_cpu']
+            for i in range(0, pinfo['nr_nodes']):
+                str+='%snode%d:%s\n' % (whitespace,
+                                        i, 
+                                      self.list_to_strrange(node_to_cpu[i]))
+                whitespace='%25s' % ''        
+        except:
+            str='none\n'
+        return str[:-1];
+
+
     def physinfo(self):
         info = self.xc.physinfo()
 
@@ -62,6 +125,8 @@ class XendNode:
         # physinfo is in KiB
         info['total_memory'] = info['total_memory'] / 1024
         info['free_memory']  = info['free_memory'] / 1024
+        info['mem_chunks']   = self.format_memchunks(info)
+        info['node_to_cpu']  = self.format_node_to_cpu(info)
 
         ITEM_ORDER = ['nr_cpus',
                       'nr_nodes',
@@ -72,6 +137,8 @@ class XendNode:
                       'hw_caps',
                       'total_memory',
                       'free_memory',
+                      'mem_chunks',
+                      'node_to_cpu'
                       ]
 
         return [[k, info[k]] for k in ITEM_ORDER]
diff -r 6f3c6fb05af3 tools/xenmon/xenbaked.c
--- a/tools/xenmon/xenbaked.c   Mon Sep 11 13:45:23 2006 -0500
+++ b/tools/xenmon/xenbaked.c   Sat Sep 16 08:17:13 2006 -0500
@@ -448,6 +448,9 @@ unsigned int get_num_cpus(void)
     int xc_handle = xc_interface_open();
     int ret;
 
+    /* ensure memory_chunks and node_to_cpu are NULL */
+    memset(&physinfo, 0, sizeof(physinfo));
+
     ret = xc_physinfo(xc_handle, &physinfo);
 
     if ( ret != 0 )
diff -r 6f3c6fb05af3 tools/xenstat/libxenstat/src/xenstat.c
--- a/tools/xenstat/libxenstat/src/xenstat.c    Mon Sep 11 13:45:23 2006 -0500
+++ b/tools/xenstat/libxenstat/src/xenstat.c    Sat Sep 16 08:17:13 2006 -0500
@@ -222,6 +222,9 @@ xenstat_node *xenstat_get_node(xenstat_h
 
        /* Store the handle in the node for later access */
        node->handle = handle;
+
+   /* ensure memory_chunks and node_to_cpu are NULL */
+   memset(&physinfo, 0, sizeof(physinfo));
 
        /* Get information about the physical system */
        if (xc_physinfo(handle->xc_handle, &physinfo) < 0) {
diff -r 6f3c6fb05af3 tools/xentrace/xentrace.c
--- a/tools/xentrace/xentrace.c Mon Sep 11 13:45:23 2006 -0500
+++ b/tools/xentrace/xentrace.c Sat Sep 16 08:17:13 2006 -0500
@@ -259,6 +259,9 @@ unsigned int get_num_cpus(void)
     int xc_handle = xc_interface_open();
     int ret;
     
+    /* ensure memory_chunks and node_to_cpu are NULL */
+    memset(&physinfo, 0, sizeof(physinfo));
+
     ret = xc_physinfo(xc_handle, &physinfo);
     
     if ( ret != 0 )
diff -r 6f3c6fb05af3 tools/xm-test/tests/info/02_info_compiledata_pos.py
--- a/tools/xm-test/tests/info/02_info_compiledata_pos.py       Mon Sep 11 
13:45:23 2006 -0500
+++ b/tools/xm-test/tests/info/02_info_compiledata_pos.py       Mon Sep 11 
13:45:23 2006 -0500
@@ -18,9 +18,7 @@ for line in lines:
 for line in lines:
     pieces = line.split(" : ", 1)
 
-    if len(pieces) < 2:
-        FAIL("Found invalid line: [%s]" % line)
-    else:
+    if len(pieces) > 1:
         map[pieces[0]] = pieces[1]
 
 for field in ["cores_per_socket", "threads_per_core", "cpu_mhz",
diff -r 6f3c6fb05af3 xen/arch/x86/sysctl.c
--- a/xen/arch/x86/sysctl.c     Mon Sep 11 13:45:23 2006 -0500
+++ b/xen/arch/x86/sysctl.c     Sat Sep 16 08:17:13 2006 -0500
@@ -24,6 +24,10 @@
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/support.h>
 #include <asm/processor.h>
+#include <asm/numa.h>
+#include <asm/topology.h>
+
+#define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)
 
 long arch_do_sysctl(
     struct xen_sysctl *sysctl, XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl)
@@ -35,6 +39,10 @@ long arch_do_sysctl(
 
     case XEN_SYSCTL_physinfo:
     {
+        int i,j;
+        node_data_t *chunks;
+        u64 *map, node_to_cpu_64[MAX_NUMNODES];
+
         xen_sysctl_physinfo_t *pi = &sysctl->u.physinfo;
 
         pi->threads_per_core =
@@ -44,7 +52,6 @@ long arch_do_sysctl(
         pi->sockets_per_node = 
             num_online_cpus() / cpus_weight(cpu_core_map[0]);
 
-        pi->nr_nodes         = 1;
         pi->total_pages      = total_pages;
         pi->free_pages       = avail_domheap_pages();
         pi->scrub_pages      = avail_scrub_pages();
@@ -52,6 +59,48 @@ long arch_do_sysctl(
         memset(pi->hw_cap, 0, sizeof(pi->hw_cap));
         memcpy(pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4);
         ret = 0;
+
+        /* fetch memory_chunk pointer from guest*/
+        get_xen_guest_handle(chunks, sysctl->u.physinfo.memory_chunks);
+
+        /* if it is set, fill out memory chunk array */
+        if ( chunks != NULL )
+            for_each_online_node(i)
+            {
+                /* copy memory chunk structs to guest */
+                if ( copy_to_guest_offset(sysctl->u.physinfo.memory_chunks, i,
+                                          &(node_data[i]), 1) ) {
+                    ret = -EFAULT;
+                    break;
+                }
+            }
+
+        /* set number of notes */
+        pi->nr_nodes = num_online_nodes();
+
+        /* fetch node_to_cpu pointer from guest */
+        get_xen_guest_handle(map, sysctl->u.physinfo.node_to_cpu);
+
+        /* if set, fill out node_to_cpu array */
+        if ( map != NULL )
+        {
+            /* copy cpu to node mapping to domU */
+            /* converting cpumask to u64 b/c userspace doesn't
+             * know about cpumask_t and is accepting a u64 */
+            memset(node_to_cpu_64, 0, sizeof(node_to_cpu_64));
+            for ( i = 0; i < pi->nr_nodes; i++ ) {
+                for ( j = 0; j < num_online_cpus(); j++ )
+                    if ( cpu_isset(j, node_to_cpumask(i)) )
+                        node_to_cpu_64[i] |= (u64)1 << j;
+
+                if ( copy_to_guest_offset(sysctl->u.physinfo.node_to_cpu,
+                                          i, &(node_to_cpu_64[i]), 1) ) {
+                    ret = -EFAULT;
+                    break;
+                }
+            }
+        }
+
         if ( copy_to_guest(u_sysctl, sysctl, 1) )
             ret = -EFAULT;
     }
diff -r 6f3c6fb05af3 xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h  Mon Sep 11 13:45:23 2006 -0500
+++ b/xen/include/public/arch-x86_32.h  Mon Sep 11 13:45:23 2006 -0500
@@ -48,6 +48,7 @@ __DEFINE_XEN_GUEST_HANDLE(uchar, unsigne
 __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
 __DEFINE_XEN_GUEST_HANDLE(uint,  unsigned int);
 __DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long);
+__DEFINE_XEN_GUEST_HANDLE(u64, uint64_t);
 DEFINE_XEN_GUEST_HANDLE(char);
 DEFINE_XEN_GUEST_HANDLE(int);
 DEFINE_XEN_GUEST_HANDLE(long);
diff -r 6f3c6fb05af3 xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h  Mon Sep 11 13:45:23 2006 -0500
+++ b/xen/include/public/arch-x86_64.h  Mon Sep 11 13:45:23 2006 -0500
@@ -49,6 +49,7 @@ __DEFINE_XEN_GUEST_HANDLE(uchar, unsigne
 __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
 __DEFINE_XEN_GUEST_HANDLE(uint,  unsigned int);
 __DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long);
+__DEFINE_XEN_GUEST_HANDLE(u64, uint64_t);
 DEFINE_XEN_GUEST_HANDLE(char);
 DEFINE_XEN_GUEST_HANDLE(int);
 DEFINE_XEN_GUEST_HANDLE(long);
diff -r 6f3c6fb05af3 xen/include/public/sysctl.h
--- a/xen/include/public/sysctl.h       Mon Sep 11 13:45:23 2006 -0500
+++ b/xen/include/public/sysctl.h       Sat Sep 16 08:17:13 2006 -0500
@@ -15,6 +15,7 @@
 
 #include "xen.h"
 #include "domctl.h"
+#include "numa_structs.h"
 
 #define XEN_SYSCTL_INTERFACE_VERSION 0x00000002
 
@@ -67,6 +68,8 @@ struct xen_sysctl_physinfo {
     uint64_t free_pages;
     uint64_t scrub_pages;
     uint32_t hw_cap[8];
+    XEN_GUEST_HANDLE(node_data_t) memory_chunks;
+    XEN_GUEST_HANDLE(u64) node_to_cpu;
 };
 typedef struct xen_sysctl_physinfo xen_sysctl_physinfo_t;
 DEFINE_XEN_GUEST_HANDLE(xen_sysctl_physinfo_t);
diff -r 6f3c6fb05af3 xen/include/xen/numa.h
--- a/xen/include/xen/numa.h    Mon Sep 11 13:45:23 2006 -0500
+++ b/xen/include/xen/numa.h    Mon Sep 11 13:45:23 2006 -0500
@@ -2,6 +2,7 @@
 #define _XEN_NUMA_H
 
 #include <xen/config.h>
+#include <public/numa_structs.h>
 
 #ifdef CONFIG_DISCONTIGMEM
 #include <asm/numnodes.h>
@@ -26,10 +27,4 @@ extern unsigned int cpu_to_node[];
 #include <xen/cpumask.h>
 extern cpumask_t node_to_cpumask[];
 
-typedef struct node_data {
-    unsigned long node_start_pfn;
-    unsigned long node_spanned_pages;
-    unsigned int  node_id;
-} node_data_t;
-
 #endif /* _XEN_NUMA_H */
diff -r 6f3c6fb05af3 xen/include/public/numa_structs.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/public/numa_structs.h Mon Sep 11 13:45:23 2006 -0500
@@ -0,0 +1,26 @@
+/*
+ * Ryan Grimm  <grimm@xxxxxxxxxx>
+ * Ryan Harper <ryanh@xxxxxxxxxx>
+ * Copyright (c) 2006, International Business Machines Corporation.
+ *
+ */
+
+#ifndef __XEN_PUBLIC_NUMA_STRUCTS_H__
+
+#define __XEN_PUBLIC_NUMA_STRUCTS_H__
+
+#include "xen.h"
+
+/* define these for xc to use b/c MAX_NUMNODES and MAX_CHUNKS
+ * are not exposed in /public */
+#define PUBLIC_MAX_NUMNODES 16
+#define PUBLIC_MAXCHUNKS 32
+
+typedef struct node_data {
+    unsigned long node_start_pfn;
+    unsigned long node_spanned_pages;
+    unsigned int  node_id;
+} node_data_t;
+DEFINE_XEN_GUEST_HANDLE(node_data_t);
+
+#endif

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH 4/6] xen: export NUMA topology in physinfo hcall, Ryan Harper <=