WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] Host Numa information in dom0

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] Host Numa information in dom0
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Fri, 09 Apr 2010 01:00:25 -0700
Delivery-date: Fri, 09 Apr 2010 01:04:19 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1270653725 -3600
# Node ID 28e5409e3fb377830a5f4346fd414d3d158f3483
# Parent  f0ef396d8c334100293fcba75ee89f311811b9f2
Host Numa information in dom0

'xm info' command now also gives the cpu topology & host numa
information. This will be later used to build guest numa support.  The
patch basically changes physinfo sysctl, and adds topology_info &
numa_info sysctls, and also changes the python & libxc code
accordingly.

Signed-off-by: Nitin A Kamble <nitin.a.kamble@xxxxxxxxx>
---
 tools/libxc/xc_misc.c             |   37 ++++++
 tools/libxc/xenctrl.h             |   14 ++
 tools/python/xen/lowlevel/xc/xc.c |  215 ++++++++++++++++++++++++++------------
 tools/python/xen/xend/XendNode.py |   63 ++++++-----
 tools/python/xen/xend/balloon.py  |   14 --
 xen/arch/x86/sysctl.c             |  140 ++++++++++++++++++++++--
 xen/common/page_alloc.c           |    6 +
 xen/include/asm-x86/numa.h        |    1 
 xen/include/public/sysctl.h       |   90 ++++++++++++---
 xen/include/xen/mm.h              |    1 
 10 files changed, 447 insertions(+), 134 deletions(-)

diff -r f0ef396d8c33 -r 28e5409e3fb3 tools/libxc/xc_misc.c
--- a/tools/libxc/xc_misc.c     Wed Apr 07 15:44:29 2010 +0100
+++ b/tools/libxc/xc_misc.c     Wed Apr 07 16:22:05 2010 +0100
@@ -79,6 +79,43 @@ int xc_physinfo(int xc_handle,
 
     return 0;
 }
+
+int xc_topologyinfo(int xc_handle,
+                xc_topologyinfo_t *put_info)
+{
+    int ret;
+    DECLARE_SYSCTL;
+
+    sysctl.cmd = XEN_SYSCTL_topologyinfo;
+
+    memcpy(&sysctl.u.topologyinfo, put_info, sizeof(*put_info));
+
+    if ( (ret = do_sysctl(xc_handle, &sysctl)) != 0 )
+        return ret;
+
+    memcpy(put_info, &sysctl.u.topologyinfo, sizeof(*put_info));
+
+    return 0;
+}
+
+int xc_numainfo(int xc_handle,
+                xc_numainfo_t *put_info)
+{
+    int ret;
+    DECLARE_SYSCTL;
+
+    sysctl.cmd = XEN_SYSCTL_numainfo;
+
+    memcpy(&sysctl.u.numainfo, put_info, sizeof(*put_info));
+
+    if ((ret = do_sysctl(xc_handle, &sysctl)) != 0)
+        return ret;
+
+    memcpy(put_info, &sysctl.u.numainfo, sizeof(*put_info));
+
+    return 0;
+}
+
 
 int xc_sched_id(int xc_handle,
                 int *sched_id)
diff -r f0ef396d8c33 -r 28e5409e3fb3 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Wed Apr 07 15:44:29 2010 +0100
+++ b/tools/libxc/xenctrl.h     Wed Apr 07 16:22:05 2010 +0100
@@ -612,9 +612,19 @@ int xc_send_debug_keys(int xc_handle, ch
 int xc_send_debug_keys(int xc_handle, char *keys);
 
 typedef xen_sysctl_physinfo_t xc_physinfo_t;
+typedef xen_sysctl_topologyinfo_t xc_topologyinfo_t;
+typedef xen_sysctl_numainfo_t xc_numainfo_t;
+
 typedef uint32_t xc_cpu_to_node_t;
-int xc_physinfo(int xc_handle,
-                xc_physinfo_t *info);
+typedef uint32_t xc_cpu_to_socket_t;
+typedef uint32_t xc_cpu_to_core_t;
+typedef uint64_t xc_node_to_memsize_t;
+typedef uint64_t xc_node_to_memfree_t;
+typedef uint32_t xc_node_to_node_dist_t;
+
+int xc_physinfo(int xc_handle, xc_physinfo_t *info);
+int xc_topologyinfo(int xc_handle, xc_topologyinfo_t *info);
+int xc_numainfo(int xc_handle, xc_numainfo_t *info);
 
 int xc_sched_id(int xc_handle,
                 int *sched_id);
diff -r f0ef396d8c33 -r 28e5409e3fb3 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Wed Apr 07 15:44:29 2010 +0100
+++ b/tools/python/xen/lowlevel/xc/xc.c Wed Apr 07 16:22:05 2010 +0100
@@ -1151,105 +1151,178 @@ static PyObject *pyxc_pages_to_kib(XcObj
     return PyLong_FromUnsignedLong(pages_to_kib(pages));
 }
 
-
 static PyObject *pyxc_physinfo(XcObject *self)
 {
-#define MAX_CPU_ID 255
-    xc_physinfo_t info;
+    xc_physinfo_t pinfo;
     char cpu_cap[128], virt_caps[128], *p;
-    int i, j, max_cpu_id, nr_nodes = 0;
-    uint64_t free_heap;
-    PyObject *ret_obj, *node_to_cpu_obj, *node_to_memory_obj;
-    PyObject *node_to_dma32_mem_obj;
-    xc_cpu_to_node_t map[MAX_CPU_ID + 1];
+    int i;
     const char *virtcap_names[] = { "hvm", "hvm_directio" };
 
-    set_xen_guest_handle(info.cpu_to_node, map);
-    info.max_cpu_id = MAX_CPU_ID;
-
-    if ( xc_physinfo(self->xc_handle, &info) != 0 )
+    if ( xc_physinfo(self->xc_handle, &pinfo) != 0 )
         return pyxc_error_to_exception();
 
     p = cpu_cap;
     *p = '\0';
-    for ( i = 0; i < sizeof(info.hw_cap)/4; i++ )
-        p += sprintf(p, "%08x:", info.hw_cap[i]);
+    for ( i = 0; i < sizeof(pinfo.hw_cap)/4; i++ )
+        p += sprintf(p, "%08x:", pinfo.hw_cap[i]);
     *(p-1) = 0;
 
     p = virt_caps;
     *p = '\0';
     for ( i = 0; i < 2; i++ )
-        if ( (info.capabilities >> i) & 1 )
+        if ( (pinfo.capabilities >> i) & 1 )
           p += sprintf(p, "%s ", virtcap_names[i]);
     if ( p != virt_caps )
       *(p-1) = '\0';
 
-    max_cpu_id = info.max_cpu_id;
-    if ( max_cpu_id > MAX_CPU_ID )
-        max_cpu_id = MAX_CPU_ID;
+    return Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s,s:s}",
+                            "nr_nodes",         pinfo.nr_nodes,
+                            "threads_per_core", pinfo.threads_per_core,
+                            "cores_per_socket", pinfo.cores_per_socket,
+                            "sockets_per_node", pinfo.sockets_per_node,
+                            "nr_cpus",          pinfo.nr_cpus, 
+                            "total_memory",     
pages_to_kib(pinfo.total_pages),
+                            "free_memory",      pages_to_kib(pinfo.free_pages),
+                            "scrub_memory",     
pages_to_kib(pinfo.scrub_pages),
+                            "cpu_khz",          pinfo.cpu_khz,
+                            "hw_caps",          cpu_cap,
+                            "virt_caps",        virt_caps);
+}
+
+static PyObject *pyxc_topologyinfo(XcObject *self)
+{
+#define MAX_CPU_INDEX 255
+    xc_topologyinfo_t tinfo;
+    int i, max_cpu_index;
+    PyObject *ret_obj;
+    PyObject *cpu_to_core_obj, *cpu_to_socket_obj, *cpu_to_node_obj;
+    xc_cpu_to_core_t coremap[MAX_CPU_INDEX + 1];
+    xc_cpu_to_socket_t socketmap[MAX_CPU_INDEX + 1];
+    xc_cpu_to_node_t nodemap[MAX_CPU_INDEX + 1];
+
+
+    set_xen_guest_handle(tinfo.cpu_to_core, coremap);
+    set_xen_guest_handle(tinfo.cpu_to_socket, socketmap);
+    set_xen_guest_handle(tinfo.cpu_to_node, nodemap);
+    tinfo.max_cpu_index = MAX_CPU_INDEX;
+
+    if ( xc_topologyinfo(self->xc_handle, &tinfo) != 0 )
+        return pyxc_error_to_exception();
+
+    max_cpu_index = tinfo.max_cpu_index;
+    if ( max_cpu_index > MAX_CPU_INDEX )
+        max_cpu_index = MAX_CPU_INDEX;
+
+    /* Construct cpu-to-* lists. */
+    cpu_to_core_obj = PyList_New(0);
+    cpu_to_socket_obj = PyList_New(0);
+    cpu_to_node_obj = PyList_New(0);
+    for ( i = 0; i < max_cpu_index; i++ )
+    {
+        PyObject *pyint;
+
+        pyint = PyInt_FromLong(coremap[i]);
+        PyList_Append(cpu_to_core_obj, pyint);
+        Py_DECREF(pyint);
+
+        pyint = PyInt_FromLong(socketmap[i]);
+        PyList_Append(cpu_to_socket_obj, pyint);
+        Py_DECREF(pyint);
+
+        pyint = PyInt_FromLong(nodemap[i]);
+        PyList_Append(cpu_to_node_obj, pyint);
+        Py_DECREF(pyint);
+    }
+
+    ret_obj = Py_BuildValue("{s:i}", "max_cpu_index", max_cpu_index);
+
+    PyDict_SetItemString(ret_obj, "cpu_to_core", cpu_to_core_obj);
+    Py_DECREF(cpu_to_core_obj);
+
+    PyDict_SetItemString(ret_obj, "cpu_to_socket", cpu_to_socket_obj);
+    Py_DECREF(cpu_to_socket_obj);
+ 
+    PyDict_SetItemString(ret_obj, "cpu_to_node", cpu_to_node_obj);
+    Py_DECREF(cpu_to_node_obj);
+ 
+    return ret_obj;
+#undef MAX_CPU_INDEX
+}
+
+static PyObject *pyxc_numainfo(XcObject *self)
+{
+#define MAX_NODE_INDEX 31
+    xc_numainfo_t ninfo;
+    int i, j, max_node_index;
+    uint64_t free_heap;
+    PyObject *ret_obj;
+    PyObject *node_to_memsize_obj, *node_to_memfree_obj;
+    PyObject *node_to_dma32_mem_obj, *node_to_node_dist_obj;
+    xc_node_to_memsize_t node_memsize[MAX_NODE_INDEX + 1];
+    xc_node_to_memfree_t node_memfree[MAX_NODE_INDEX + 1];
+    xc_node_to_node_dist_t nodes_dist[(MAX_NODE_INDEX * MAX_NODE_INDEX) + 1];
+
+    set_xen_guest_handle(ninfo.node_to_memsize, node_memsize);
+    set_xen_guest_handle(ninfo.node_to_memfree, node_memfree);
+    set_xen_guest_handle(ninfo.node_to_node_distance, nodes_dist);
+    ninfo.max_node_index = MAX_NODE_INDEX;
+    if( xc_numainfo(self->xc_handle, &ninfo) != 0 )
+        return pyxc_error_to_exception();
+
+    max_node_index = ninfo.max_node_index;
+    if ( max_node_index > MAX_NODE_INDEX )
+        max_node_index = MAX_NODE_INDEX;
 
     /* Construct node-to-* lists. */
-    node_to_cpu_obj = PyList_New(0);
-    node_to_memory_obj = PyList_New(0);
+    node_to_memsize_obj = PyList_New(0);
+    node_to_memfree_obj = PyList_New(0);
     node_to_dma32_mem_obj = PyList_New(0);
-    for ( i = 0; i <= info.max_node_id; i++ )
+    node_to_node_dist_obj = PyList_New(0);
+    for ( i = 0; i < max_node_index; i++ )
     {
-        int node_exists = 0;
         PyObject *pyint;
 
-        /* CPUs. */
-        PyObject *cpus = PyList_New(0);
-        for ( j = 0; j <= max_cpu_id; j++ )
-        {
-            if ( i != map[j] )
-                continue;
-            pyint = PyInt_FromLong(j);
-            PyList_Append(cpus, pyint);
-            Py_DECREF(pyint);
-            node_exists = 1;
-        }
-        PyList_Append(node_to_cpu_obj, cpus); 
-        Py_DECREF(cpus);
-
-        /* Memory. */
-        xc_availheap(self->xc_handle, 0, 0, i, &free_heap);
-        node_exists = node_exists || (free_heap != 0);
-        pyint = PyInt_FromLong(free_heap / 1024);
-        PyList_Append(node_to_memory_obj, pyint);
+        /* Total Memory */
+        pyint = PyInt_FromLong(node_memsize[i] >> 20); /* MB */
+        PyList_Append(node_to_memsize_obj, pyint);
+        Py_DECREF(pyint);
+
+        /* Free Memory */
+        pyint = PyInt_FromLong(node_memfree[i] >> 20); /* MB */
+        PyList_Append(node_to_memfree_obj, pyint);
         Py_DECREF(pyint);
 
         /* DMA memory. */
         xc_availheap(self->xc_handle, 0, 32, i, &free_heap);
-        pyint = PyInt_FromLong(free_heap / 1024);
+        pyint = PyInt_FromLong(free_heap >> 20); /* MB */
         PyList_Append(node_to_dma32_mem_obj, pyint);
         Py_DECREF(pyint);
 
-        if ( node_exists )
-            nr_nodes++;
-    }
-
-    ret_obj = 
Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s:s:s}",
-                            "nr_nodes",         nr_nodes,
-                            "max_node_id",      info.max_node_id,
-                            "max_cpu_id",       info.max_cpu_id,
-                            "threads_per_core", info.threads_per_core,
-                            "cores_per_socket", info.cores_per_socket,
-                            "nr_cpus",          info.nr_cpus, 
-                            "total_memory",     pages_to_kib(info.total_pages),
-                            "free_memory",      pages_to_kib(info.free_pages),
-                            "scrub_memory",     pages_to_kib(info.scrub_pages),
-                            "cpu_khz",          info.cpu_khz,
-                            "hw_caps",          cpu_cap,
-                            "virt_caps",        virt_caps);
-    PyDict_SetItemString(ret_obj, "node_to_cpu", node_to_cpu_obj);
-    Py_DECREF(node_to_cpu_obj);
-    PyDict_SetItemString(ret_obj, "node_to_memory", node_to_memory_obj);
-    Py_DECREF(node_to_memory_obj);
+        /* Node to Node Distance */
+        for ( j = 0; j < ninfo.max_node_index; j++ )
+        {
+            pyint = PyInt_FromLong(nodes_dist[(i * ninfo.max_node_index) + j]);
+            PyList_Append(node_to_node_dist_obj, pyint);
+            Py_DECREF(pyint);
+        }
+    }
+
+    ret_obj = Py_BuildValue("{s:i}", "max_node_index", max_node_index);
+
+    PyDict_SetItemString(ret_obj, "node_memsize", node_to_memsize_obj);
+    Py_DECREF(node_to_memsize_obj);
+
+    PyDict_SetItemString(ret_obj, "node_memfree", node_to_memfree_obj);
+    Py_DECREF(node_to_memfree_obj);
+
     PyDict_SetItemString(ret_obj, "node_to_dma32_mem", node_to_dma32_mem_obj);
     Py_DECREF(node_to_dma32_mem_obj);
+
+    PyDict_SetItemString(ret_obj, "node_to_node_dist", node_to_node_dist_obj);
+    Py_DECREF(node_to_node_dist_obj);
  
     return ret_obj;
-#undef MAX_CPU_ID
+#undef MAX_NODE_INDEX
 }
 
 static PyObject *pyxc_xeninfo(XcObject *self)
@@ -2056,6 +2129,20 @@ static PyMethodDef pyxc_methods[] = {
       METH_NOARGS, "\n"
       "Get information about the physical host machine\n"
       "Returns [dict]: information about the hardware"
+      "        [None]: on failure.\n" },
+
+    { "topologyinfo",
+      (PyCFunction)pyxc_topologyinfo,
+      METH_NOARGS, "\n"
+      "Get information about the cpu topology on the host machine\n"
+      "Returns [dict]: information about the cpu topology on host"
+      "        [None]: on failure.\n" },
+
+    { "numainfo",
+      (PyCFunction)pyxc_numainfo,
+      METH_NOARGS, "\n"
+      "Get NUMA information on the host machine\n"
+      "Returns [dict]: NUMA information on host"
       "        [None]: on failure.\n" },
 
     { "xeninfo",
diff -r f0ef396d8c33 -r 28e5409e3fb3 tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Wed Apr 07 15:44:29 2010 +0100
+++ b/tools/python/xen/xend/XendNode.py Wed Apr 07 16:22:05 2010 +0100
@@ -878,65 +878,70 @@ class XendNode:
     def list_to_strrange(self,list):
         return self.format_pairs(self.list_to_rangepairs(list))
 
-    def format_node_to_cpu(self, pinfo):
-        str=''
-        whitespace=''
+    def format_cpu_to_core_socket_node(self, tinfo):
         try:
-            node_to_cpu=pinfo['node_to_cpu']
-            for i in range(0, pinfo['max_node_id']+1):
-                str+='%snode%d:%s\n' % (whitespace,
-                                        i, 
-                                      self.list_to_strrange(node_to_cpu[i]))
-                whitespace='%25s' % ''        
+            nr_cpus=tinfo['max_cpu_index']
+            str='\ncpu:    core    socket     node\n'
+            for i in range(0, nr_cpus):
+                str+='%3d:%8d %8d %8d\n' % (i, 
+                                          tinfo['cpu_to_core'][i],
+                                          tinfo['cpu_to_socket'][i],
+                                          tinfo['cpu_to_node'][i])
         except:
             str='none\n'
         return str[:-1];
-    def format_node_to_memory(self, pinfo, key):
-        str=''
-        whitespace=''
+
+    def format_numa_info(self, ninfo):
         try:
-            node_to_memory=pinfo[key]
-            for i in range(0, pinfo['max_node_id']+1):
-                str+='%snode%d:%d\n' % (whitespace,
-                                        i,
-                                        node_to_memory[i] / 1024)
-                whitespace='%25s' % ''
+            nr_nodes=ninfo['max_node_index']
+            str='\nnode: TotalMemory FreeMemory dma32Memory NodeDist:'
+            for i in range(0, nr_nodes):
+                str+='%4d ' % i
+            str+='\n'
+            for i in range(0, nr_nodes):
+                str+='%4d:  %8dMB %8dMB  %8dMB         :' % (i, 
+                                      ninfo['node_memsize'][i],
+                                      ninfo['node_memfree'][i],
+                                      ninfo['node_to_dma32_mem'][i])
+                for j in range(0, nr_nodes):
+                    str+='%4d ' % ninfo['node_to_node_dist'][(i*nr_nodes)+j]
+                str+='\n'
         except:
             str='none\n'
         return str[:-1];
 
-
     def physinfo(self):
         info = self.xc.physinfo()
+        tinfo = self.xc.topologyinfo()
+        ninfo = self.xc.numainfo()
 
         info['cpu_mhz'] = info['cpu_khz'] / 1000
         
         # physinfo is in KiB, need it in MiB
         info['total_memory'] = info['total_memory'] / 1024
         info['free_memory']  = info['free_memory'] / 1024
-        info['node_to_cpu']  = self.format_node_to_cpu(info)
-        info['node_to_memory'] = \
-            self.format_node_to_memory(info, 'node_to_memory')
-        info['node_to_dma32_mem'] = \
-            self.format_node_to_memory(info, 'node_to_dma32_mem')
+
+        info['cpu_topology']  = \
+             self.format_cpu_to_core_socket_node(tinfo)
+
+        info['numa_info']  = \
+             self.format_numa_info(ninfo)
 
         ITEM_ORDER = ['nr_cpus',
                       'nr_nodes',
                       'cores_per_socket',
                       'threads_per_core',
+                      'sockets_per_node',
                       'cpu_mhz',
                       'hw_caps',
                       'virt_caps',
                       'total_memory',
                       'free_memory',
-                      'node_to_cpu',
-                      'node_to_memory',
-                      'node_to_dma32_mem',
-                      'max_node_id'
+                      'cpu_topology',
+                      'numa_info',
                       ]
 
         return [[k, info[k]] for k in ITEM_ORDER]
-
 
     def pciinfo(self):
         from xen.xend.server.pciif import get_all_assigned_pci_devices
diff -r f0ef396d8c33 -r 28e5409e3fb3 tools/python/xen/xend/balloon.py
--- a/tools/python/xen/xend/balloon.py  Wed Apr 07 15:44:29 2010 +0100
+++ b/tools/python/xen/xend/balloon.py  Wed Apr 07 16:22:05 2010 +0100
@@ -184,15 +184,11 @@ def free(need_mem, dominfo):
             waitscrub = 1
             vcpus = dominfo.info['cpus'][0]
             for vcpu in vcpus:
-                nodenum = 0
-                for node in physinfo['node_to_cpu']:
-                    for cpu in node:
-                        if vcpu == cpu:
-                            if oldnode == -1:
-                                oldnode = nodenum
-                            elif oldnode != nodenum:
-                                waitscrub = 0
-                    nodenum = nodenum + 1
+                nodenum = xc.numainfo()['cpu_to_node'][cpu]
+                if oldnode == -1:
+                    oldnode = nodenum
+                elif oldnode != nodenum:
+                    waitscrub = 0
 
             if waitscrub == 1 and scrub_mem > 0:
                 log.debug("wait for scrub %s", scrub_mem)
diff -r f0ef396d8c33 -r 28e5409e3fb3 xen/arch/x86/sysctl.c
--- a/xen/arch/x86/sysctl.c     Wed Apr 07 15:44:29 2010 +0100
+++ b/xen/arch/x86/sysctl.c     Wed Apr 07 16:22:05 2010 +0100
@@ -35,6 +35,8 @@ static long cpu_down_helper(void *data)
     return cpu_down(cpu);
 }
 
+extern int __node_distance(int a, int b);
+
 long arch_do_sysctl(
     struct xen_sysctl *sysctl, XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl)
 {
@@ -45,25 +47,22 @@ long arch_do_sysctl(
 
     case XEN_SYSCTL_physinfo:
     {
-        uint32_t i, max_array_ent;
-        XEN_GUEST_HANDLE_64(uint32) cpu_to_node_arr;
-
         xen_sysctl_physinfo_t *pi = &sysctl->u.physinfo;
 
         ret = xsm_physinfo();
         if ( ret )
             break;
 
-        max_array_ent = pi->max_cpu_id;
-        cpu_to_node_arr = pi->cpu_to_node;
 
         memset(pi, 0, sizeof(*pi));
-        pi->cpu_to_node = cpu_to_node_arr;
         pi->threads_per_core =
             cpus_weight(per_cpu(cpu_sibling_map, 0));
         pi->cores_per_socket =
             cpus_weight(per_cpu(cpu_core_map, 0)) / pi->threads_per_core;
         pi->nr_cpus = (u32)num_online_cpus();
+        pi->nr_nodes = (u32)num_online_nodes();
+        pi->sockets_per_node =  pi->nr_cpus / 
+                     (pi->nr_nodes * pi->cores_per_socket * 
pi->threads_per_core);
         pi->total_pages = total_pages;
         pi->free_pages = avail_domheap_pages();
         pi->scrub_pages = 0;
@@ -74,15 +73,56 @@ long arch_do_sysctl(
         if ( iommu_enabled )
             pi->capabilities |= XEN_SYSCTL_PHYSCAP_hvm_directio;
 
-        pi->max_node_id = last_node(node_online_map);
-        pi->max_cpu_id = last_cpu(cpu_online_map);
-        max_array_ent = min_t(uint32_t, max_array_ent, pi->max_cpu_id);
+        if ( copy_to_guest(u_sysctl, sysctl, 1) )
+            ret = -EFAULT;
+    }
+    break;
+        
+    case XEN_SYSCTL_topologyinfo:
+    {
+        uint32_t i, max_cpu_index;
+        XEN_GUEST_HANDLE_64(uint32) cpu_to_core_arr;
+        XEN_GUEST_HANDLE_64(uint32) cpu_to_socket_arr;
+        XEN_GUEST_HANDLE_64(uint32) cpu_to_node_arr;
+
+        xen_sysctl_topologyinfo_t *ti = &sysctl->u.topologyinfo;
+
+        max_cpu_index = ti->max_cpu_index;
+        cpu_to_core_arr = ti->cpu_to_core;
+        cpu_to_socket_arr = ti->cpu_to_socket;
+        cpu_to_node_arr = ti->cpu_to_node;
+
+        memset(ti, 0, sizeof(*ti));
+        ti->cpu_to_core = cpu_to_core_arr;
+        ti->cpu_to_socket = cpu_to_socket_arr;
+        ti->cpu_to_node = cpu_to_node_arr;
+
+        max_cpu_index = min_t(uint32_t, max_cpu_index, num_online_cpus());
+        ti->max_cpu_index = max_cpu_index;
 
         ret = 0;
 
-        if ( !guest_handle_is_null(cpu_to_node_arr) )
-        {
-            for ( i = 0; i <= max_array_ent; i++ )
+        for ( i = 0; i < max_cpu_index; i++ )
+        {
+            if ( !guest_handle_is_null(cpu_to_core_arr) )
+            {
+                uint32_t core = cpu_online(i) ? cpu_to_core(i) : ~0u;
+                if ( copy_to_guest_offset(cpu_to_core_arr, i, &core, 1) )
+                {
+                    ret = -EFAULT;
+                    break;
+                }
+            }
+            if ( !guest_handle_is_null(cpu_to_socket_arr) )
+            {
+                uint32_t socket = cpu_online(i) ? cpu_to_socket(i) : ~0u;
+                if ( copy_to_guest_offset(cpu_to_socket_arr, i, &socket, 1) )
+                {
+                    ret = -EFAULT;
+                    break;
+                }
+            }
+            if ( !guest_handle_is_null(cpu_to_node_arr) )
             {
                 uint32_t node = cpu_online(i) ? cpu_to_node(i) : ~0u;
                 if ( copy_to_guest_offset(cpu_to_node_arr, i, &node, 1) )
@@ -92,6 +132,82 @@ long arch_do_sysctl(
                 }
             }
         }
+
+        if (ret)
+            break;
+ 
+        if ( copy_to_guest(u_sysctl, sysctl, 1) )
+            ret = -EFAULT;
+    }
+    break;
+
+    case XEN_SYSCTL_numainfo:
+    {
+        uint32_t i, max_node_index;
+        XEN_GUEST_HANDLE_64(uint64) node_to_memsize_arr;
+        XEN_GUEST_HANDLE_64(uint64) node_to_memfree_arr;
+        XEN_GUEST_HANDLE_64(uint32) node_to_node_distance_arr;
+
+        xen_sysctl_numainfo_t *ni = &sysctl->u.numainfo;
+
+        max_node_index = ni->max_node_index;
+        node_to_memsize_arr = ni->node_to_memsize;
+        node_to_memfree_arr = ni->node_to_memfree;
+        node_to_node_distance_arr = ni->node_to_node_distance;
+
+        memset(ni, 0, sizeof(*ni));
+        ni->node_to_memsize = node_to_memsize_arr;
+        ni->node_to_memfree = node_to_memfree_arr;
+        ni->node_to_node_distance = node_to_node_distance_arr;
+
+        max_node_index = min_t(uint32_t, max_node_index, num_online_nodes());
+        ni->max_node_index = max_node_index;
+
+        ret = 0;
+
+        for ( i = 0; i < max_node_index; i++ )
+        {
+            if ( !guest_handle_is_null(node_to_memsize_arr) )
+            {
+                uint64_t memsize = node_online(i) ? 
+                                   node_spanned_pages(i) << PAGE_SHIFT : 0ul;
+                if ( copy_to_guest_offset(node_to_memsize_arr, i, &memsize, 1) 
)
+                {
+                    ret = -EFAULT;
+                    break;
+                }
+            }
+            if ( !guest_handle_is_null(node_to_memfree_arr) )
+            {
+                uint64_t memfree = node_online(i) ? 
+                                   avail_node_heap_pages(i) << PAGE_SHIFT : 
0ul;
+                if ( copy_to_guest_offset(node_to_memfree_arr, i, &memfree, 1) 
)
+                {
+                    ret = -EFAULT;
+                    break;
+                }
+            }
+
+            if ( !guest_handle_is_null(node_to_node_distance_arr) )
+           {
+                int j;
+                for ( j = 0; j < max_node_index; j++)
+                {
+                    uint32_t distance = ~0u;
+                    if (node_online(i) && node_online (j)) 
+                        distance = __node_distance(i, j);
+                    
+                    if ( copy_to_guest_offset(node_to_node_distance_arr, 
+                         (i * max_node_index + j), &distance, 1) )
+                    {
+                        ret = -EFAULT;
+                        break;
+                    }
+                }
+            }
+        }
+        if (ret)
+            break;
 
         if ( copy_to_guest(u_sysctl, sysctl, 1) )
             ret = -EFAULT;
diff -r f0ef396d8c33 -r 28e5409e3fb3 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Wed Apr 07 15:44:29 2010 +0100
+++ b/xen/common/page_alloc.c   Wed Apr 07 16:22:05 2010 +0100
@@ -1256,6 +1256,12 @@ unsigned long avail_domheap_pages(void)
                             -1);
 }
 
+unsigned long avail_node_heap_pages(unsigned int nodeid)
+{
+    return avail_heap_pages(MEMZONE_XEN, NR_ZONES -1, nodeid);
+}
+
+
 static void pagealloc_info(unsigned char key)
 {
     unsigned int zone = MEMZONE_XEN;
diff -r f0ef396d8c33 -r 28e5409e3fb3 xen/include/asm-x86/numa.h
--- a/xen/include/asm-x86/numa.h        Wed Apr 07 15:44:29 2010 +0100
+++ b/xen/include/asm-x86/numa.h        Wed Apr 07 16:22:05 2010 +0100
@@ -73,6 +73,7 @@ static inline __attribute__((pure)) int 
 #define NODE_DATA(nid)         (&(node_data[nid]))
 
 #define node_start_pfn(nid)    (NODE_DATA(nid)->node_start_pfn)
+#define node_spanned_pages(nid)        (NODE_DATA(nid)->node_spanned_pages)
 #define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn + \
                                 NODE_DATA(nid)->node_spanned_pages)
 
diff -r f0ef396d8c33 -r 28e5409e3fb3 xen/include/public/sysctl.h
--- a/xen/include/public/sysctl.h       Wed Apr 07 15:44:29 2010 +0100
+++ b/xen/include/public/sysctl.h       Wed Apr 07 16:22:05 2010 +0100
@@ -34,7 +34,7 @@
 #include "xen.h"
 #include "domctl.h"
 
-#define XEN_SYSCTL_INTERFACE_VERSION 0x00000007
+#define XEN_SYSCTL_INTERFACE_VERSION 0x00000008
 
 /*
  * Read console content from Xen buffer ring.
@@ -93,29 +93,14 @@ struct xen_sysctl_physinfo {
 struct xen_sysctl_physinfo {
     uint32_t threads_per_core;
     uint32_t cores_per_socket;
+    uint32_t sockets_per_node;
     uint32_t nr_cpus;
-    uint32_t max_node_id;
+    uint32_t nr_nodes;
     uint32_t cpu_khz;
     uint64_aligned_t total_pages;
     uint64_aligned_t free_pages;
     uint64_aligned_t scrub_pages;
     uint32_t hw_cap[8];
-
-    /*
-     * IN: maximum addressable entry in the caller-provided cpu_to_node array.
-     * OUT: largest cpu identifier in the system.
-     * If OUT is greater than IN then the cpu_to_node array is truncated!
-     */
-    uint32_t max_cpu_id;
-    /*
-     * If not NULL, this array is filled with node identifier for each cpu.
-     * If a cpu has no node information (e.g., cpu not present) then the
-     * sentinel value ~0u is written.
-     * The size of this array is specified by the caller in @max_cpu_id.
-     * If the actual @max_cpu_id is smaller than the array then the trailing
-     * elements of the array will not be written by the sysctl.
-     */
-    XEN_GUEST_HANDLE_64(uint32) cpu_to_node;
 
     /* XEN_SYSCTL_PHYSCAP_??? */
     uint32_t capabilities;
@@ -491,6 +476,73 @@ typedef struct xen_sysctl_lockprof_op xe
 typedef struct xen_sysctl_lockprof_op xen_sysctl_lockprof_op_t;
 DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_op_t);
 
+#define XEN_SYSCTL_topologyinfo         16 
+struct xen_sysctl_topologyinfo {
+
+    /*
+     * IN: maximum addressable entry in the caller-provided cpu_to_core, 
+     * cpu_to_socket & cpu_to_node arrays.
+     * OUT: largest cpu identifier in the system.
+     * If OUT is greater than IN then the cpu_to_node array is truncated!
+     */
+    uint32_t max_cpu_index;
+
+    /*
+     * If not NULL, this array is filled with core/socket/node identifier for 
+     * each cpu.
+     * If a cpu has no core/socket/node information (e.g., cpu not present) 
+     * then the sentinel value ~0u is written.
+     * The size of this array is specified by the caller in @max_cpu_index.
+     * If the actual @max_cpu_index is smaller than the array then the trailing
+     * elements of the array will not be written by the sysctl.
+     */
+    XEN_GUEST_HANDLE_64(uint32) cpu_to_core;
+    XEN_GUEST_HANDLE_64(uint32) cpu_to_socket;
+    XEN_GUEST_HANDLE_64(uint32) cpu_to_node;  /* node_number */
+
+};
+typedef struct xen_sysctl_topologyinfo xen_sysctl_topologyinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_topologyinfo_t);
+
+#define XEN_SYSCTL_numainfo          17        
+struct xen_sysctl_numainfo {
+    /*
+     * IN: maximum addressable entry in the caller-provided node_numbers, 
+     * node_to_memsize & node_to_memfree arrays.
+     * OUT: largest possible node index for the system.
+     * If OUT is greater than IN then these arrays are truncated!
+     */
+    uint32_t max_node_index;
+
+    /* For node_to_memsize & node_to_memfree arrays, the 
+     * entry with same index corrosponds to the same node.
+     * If a entry has no node information (e.g., node not present) then the 
+     * sentinel value ~0u is written for_node_number, and value 0u is written 
+     * for node_to_memsize & node_to_memfree.
+     * The size of this array is specified by the caller in @max_node_index. 
+     * If the actual @max_node_index is smaller than the array then the 
+     * trailing elements of the array will not be written by the sysctl.
+     */
+    XEN_GUEST_HANDLE_64(uint64) node_to_memsize;
+    XEN_GUEST_HANDLE_64(uint64) node_to_memfree;
+
+
+    /* node_to_node_distance is array of size (nr_nodes * nr_nodes) listing
+     * memory access distances between nodes. i'th  entry in the array 
+     * specifies distance between node (i / nr_nodes) & node (i % nr_nodes)
+     * If a entry has no node distance information (e.g., node not present) 
+     * then the sentinel value ~0u is written.
+     * The size of this array is specified by the caller in 
+     * @max_node_distance_index. If the max_node_index*max_node_index is 
+     * smaller than the array then the trailing elements of the array will 
+     * not be written by the sysctl.
+     */
+    XEN_GUEST_HANDLE_64(uint32) node_to_node_distance;
+};
+typedef struct xen_sysctl_numainfo xen_sysctl_numainfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_numainfo_t);
+
+
 struct xen_sysctl {
     uint32_t cmd;
     uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
@@ -498,6 +550,8 @@ struct xen_sysctl {
         struct xen_sysctl_readconsole       readconsole;
         struct xen_sysctl_tbuf_op           tbuf_op;
         struct xen_sysctl_physinfo          physinfo;
+        struct xen_sysctl_topologyinfo      topologyinfo;
+        struct xen_sysctl_numainfo          numainfo;
         struct xen_sysctl_sched_id          sched_id;
         struct xen_sysctl_perfc_op          perfc_op;
         struct xen_sysctl_getdomaininfolist getdomaininfolist;
diff -r f0ef396d8c33 -r 28e5409e3fb3 xen/include/xen/mm.h
--- a/xen/include/xen/mm.h      Wed Apr 07 15:44:29 2010 +0100
+++ b/xen/include/xen/mm.h      Wed Apr 07 16:22:05 2010 +0100
@@ -57,6 +57,7 @@ unsigned long avail_domheap_pages_region
 unsigned long avail_domheap_pages_region(
     unsigned int node, unsigned int min_width, unsigned int max_width);
 unsigned long avail_domheap_pages(void);
+unsigned long avail_node_heap_pages(unsigned int);
 #define alloc_domheap_page(d,f) (alloc_domheap_pages(d,0,f))
 #define free_domheap_page(p)  (free_domheap_pages(p,0))
 unsigned int online_page(unsigned long mfn, uint32_t *status);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] Host Numa information in dom0, Xen patchbot-unstable <=