Testing on an 8-node 128-way NUMA machine has exposed a problem with
Xen's nr_cpus calculation. In this case, since Xen cuts off recognized
CPUs at 32, the machine appears to have 16 CPUs on the first and second
nodes and none on the remaining nodes. Given this asymmetry, the
calculation of sockets_per_node (which is later used to calculate
nr_cpus) is incorrect:
pi->sockets_per_node = num_online_cpus() /(num_online_nodes() *
pi->cores_per_socket * pi->threads_per_core);
The most straightforward solution is to remove sockets_per_node, and
instead determine nr_cpus directly from num_online_cpus.
This patch has been tested on x86_64 NUMA machines.
--
Elizabeth Kon (Beth)
IBM Linux Technology Center
Open Hypervisor Team
email: eak@xxxxxxxxxx
diff -r b4278beaf354 docs/man/xm.pod.1
--- a/docs/man/xm.pod.1 Wed Oct 17 13:12:03 2007 +0100
+++ b/docs/man/xm.pod.1 Wed Oct 17 20:09:46 2007 -0700
@@ -446,7 +446,6 @@ page more readable):
machine : i686
nr_cpus : 2
nr_nodes : 1
- sockets_per_node : 2
cores_per_socket : 1
threads_per_core : 1
cpu_mhz : 696
diff -r b4278beaf354 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Wed Oct 17 13:12:03 2007 +0100
+++ b/tools/python/xen/lowlevel/xc/xc.c Wed Oct 17 20:09:46 2007 -0700
@@ -721,7 +721,7 @@ static PyObject *pyxc_physinfo(XcObject
"max_cpu_id", info.max_cpu_id,
"threads_per_core", info.threads_per_core,
"cores_per_socket", info.cores_per_socket,
- "sockets_per_node", info.sockets_per_node,
+ "nr_cpus", info.nr_cpus,
"total_memory", pages_to_kib(info.total_pages),
"free_memory", pages_to_kib(info.free_pages),
"scrub_memory", pages_to_kib(info.scrub_pages),
diff -r b4278beaf354 tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Wed Oct 17 13:12:03 2007 +0100
+++ b/tools/python/xen/xend/XendNode.py Wed Oct 17 20:09:46 2007 -0700
@@ -475,7 +475,7 @@ class XendNode:
cpu_info = {
"nr_nodes": phys_info["nr_nodes"],
- "sockets_per_node": phys_info["sockets_per_node"],
+ "nr_cpus": phys_info["nr_cpus"],
"cores_per_socket": phys_info["cores_per_socket"],
"threads_per_core": phys_info["threads_per_core"]
}
@@ -580,17 +580,9 @@ class XendNode:
str='none\n'
return str[:-1];
- def count_cpus(self, pinfo):
- count=0
- node_to_cpu=pinfo['node_to_cpu']
- for i in range(0, pinfo['nr_nodes']):
- count+=len(node_to_cpu[i])
- return count;
-
def physinfo(self):
info = self.xc.physinfo()
- info['nr_cpus'] = self.count_cpus(info)
info['cpu_mhz'] = info['cpu_khz'] / 1000
# physinfo is in KiB, need it in MiB
@@ -600,7 +592,6 @@ class XendNode:
ITEM_ORDER = ['nr_cpus',
'nr_nodes',
- 'sockets_per_node',
'cores_per_socket',
'threads_per_core',
'cpu_mhz',
diff -r b4278beaf354 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py Wed Oct 17 13:12:03 2007 +0100
+++ b/tools/python/xen/xm/main.py Wed Oct 17 20:09:46 2007 -0700
@@ -1667,9 +1667,8 @@ def xm_info(args):
"release": getVal(["software_version", "release"]),
"version": getVal(["software_version", "version"]),
"machine": getVal(["software_version", "machine"]),
- "nr_cpus": len(getVal(["host_CPUs"], [])),
+ "nr_cpus": getVal(["cpu_configuration", "nr_cpus"]),
"nr_nodes": getVal(["cpu_configuration", "nr_nodes"]),
- "sockets_per_node": getVal(["cpu_configuration",
"sockets_per_node"]),
"cores_per_socket": getVal(["cpu_configuration",
"cores_per_socket"]),
"threads_per_core": getVal(["cpu_configuration",
"threads_per_core"]),
"cpu_mhz": getCpuMhz(),
diff -r b4278beaf354 tools/xenmon/xenbaked.c
--- a/tools/xenmon/xenbaked.c Wed Oct 17 13:12:03 2007 +0100
+++ b/tools/xenmon/xenbaked.c Wed Oct 17 20:09:46 2007 -0700
@@ -460,10 +460,7 @@ unsigned int get_num_cpus(void)
xc_interface_close(xc_handle);
opts.cpu_freq = (double)physinfo.cpu_khz/1000.0;
- return (physinfo.threads_per_core *
- physinfo.cores_per_socket *
- physinfo.sockets_per_node *
- physinfo.nr_nodes);
+ return physinfo.nr_cpus;
}
diff -r b4278beaf354 tools/xenstat/libxenstat/src/xenstat.c
--- a/tools/xenstat/libxenstat/src/xenstat.c Wed Oct 17 13:12:03 2007 +0100
+++ b/tools/xenstat/libxenstat/src/xenstat.c Wed Oct 17 20:09:46 2007 -0700
@@ -155,9 +155,7 @@ xenstat_node *xenstat_get_node(xenstat_h
}
node->cpu_hz = ((unsigned long long)physinfo.cpu_khz) * 1000ULL;
- node->num_cpus =
- (physinfo.threads_per_core * physinfo.cores_per_socket *
- physinfo.sockets_per_node * physinfo.nr_nodes);
+ node->num_cpus = physinfo.nr_cpus;
node->tot_mem = ((unsigned long long)physinfo.total_pages)
* handle->page_size;
node->free_mem = ((unsigned long long)physinfo.free_pages)
diff -r b4278beaf354 tools/xentrace/xentrace.c
--- a/tools/xentrace/xentrace.c Wed Oct 17 13:12:03 2007 +0100
+++ b/tools/xentrace/xentrace.c Wed Oct 17 20:09:46 2007 -0700
@@ -309,10 +309,7 @@ unsigned int get_num_cpus(void)
xc_interface_close(xc_handle);
- return (physinfo.threads_per_core *
- physinfo.cores_per_socket *
- physinfo.sockets_per_node *
- physinfo.nr_nodes);
+ return physinfo.nr_cpus;
}
diff -r b4278beaf354 tools/xm-test/lib/XmTestLib/Xm.py
--- a/tools/xm-test/lib/XmTestLib/Xm.py Wed Oct 17 13:12:03 2007 +0100
+++ b/tools/xm-test/lib/XmTestLib/Xm.py Wed Oct 17 20:09:46 2007 -0700
@@ -218,11 +218,9 @@ def restartXend():
return status
def smpConcurrencyLevel():
- cores = int(getInfo("cores_per_socket"))
- threads = int(getInfo("threads_per_core"))
- sockets = int(getInfo("sockets_per_node"))
-
- return cores * sockets * threads
+ nr_cpus = int(getInfo("nr_cpus"))
+
+ return nr_cpus
if __name__ == "__main__":
if isDomainRunning("0"):
diff -r b4278beaf354 tools/xm-test/lib/XmTestReport/OSReport.py
--- a/tools/xm-test/lib/XmTestReport/OSReport.py Wed Oct 17 13:12:03
2007 +0100
+++ b/tools/xm-test/lib/XmTestReport/OSReport.py Wed Oct 17 20:09:46
2007 -0700
@@ -92,7 +92,6 @@ class Machine:
xenValues = {"nr_cpus" : "Unknown",
"nr_nodes" : "Unknown",
- "sockets_per_node" : "Unknown",
"cores_per_socket" : "Unknown",
"threads_per_core" : "Unknown",
"cpu_mhz" : "Unknown",
diff -r b4278beaf354 xen/arch/ia64/xen/dom0_ops.c
--- a/xen/arch/ia64/xen/dom0_ops.c Wed Oct 17 13:12:03 2007 +0100
+++ b/xen/arch/ia64/xen/dom0_ops.c Wed Oct 17 20:09:46 2007 -0700
@@ -234,7 +234,7 @@ long arch_do_sysctl(xen_sysctl_t *op, XE
{
case XEN_SYSCTL_physinfo:
{
- int i, node_cpus = 0;
+ int i;
uint32_t max_array_ent;
xen_sysctl_physinfo_t *pi = &op->u.physinfo;
@@ -242,18 +242,8 @@ long arch_do_sysctl(xen_sysctl_t *op, XE
pi->threads_per_core = cpus_weight(cpu_sibling_map[0]);
pi->cores_per_socket =
cpus_weight(cpu_core_map[0]) / pi->threads_per_core;
+ pi->nr_cpus = (u32)num_online_cpus();
pi->nr_nodes = num_online_nodes();
-
- /*
- * Guess at a sockets_per_node value. Use the maximum number of
- * CPUs per node to avoid deconfigured CPUs breaking the average.
- */
- for_each_online_node(i)
- node_cpus = max(node_cpus, cpus_weight(node_to_cpumask(i)));
-
- pi->sockets_per_node = node_cpus /
- (pi->cores_per_socket * pi->threads_per_core);
-
pi->total_pages = total_pages;
pi->free_pages = avail_domheap_pages();
pi->scrub_pages = avail_scrub_pages();
diff -r b4278beaf354 xen/arch/powerpc/sysctl.c
--- a/xen/arch/powerpc/sysctl.c Wed Oct 17 13:12:03 2007 +0100
+++ b/xen/arch/powerpc/sysctl.c Wed Oct 17 20:09:46 2007 -0700
@@ -45,9 +45,7 @@ long arch_do_sysctl(struct xen_sysctl *s
cpus_weight(cpu_sibling_map[0]);
pi->cores_per_socket =
cpus_weight(cpu_core_map[0]) / pi->threads_per_core;
- pi->sockets_per_node = num_online_cpus() /
- (num_online_nodes() * pi->cores_per_socket * pi->threads_per_core);
-
+ pi->nr_cpus = (u32)num_online_cpus();
pi->nr_nodes = num_online_nodes();
pi->total_pages = total_pages;
pi->free_pages = avail_domheap_pages();
diff -r b4278beaf354 xen/arch/x86/sysctl.c
--- a/xen/arch/x86/sysctl.c Wed Oct 17 13:12:03 2007 +0100
+++ b/xen/arch/x86/sysctl.c Wed Oct 17 20:09:46 2007 -0700
@@ -51,10 +51,8 @@ long arch_do_sysctl(
cpus_weight(cpu_sibling_map[0]);
pi->cores_per_socket =
cpus_weight(cpu_core_map[0]) / pi->threads_per_core;
+ pi->nr_cpus = (u32)num_online_cpus();
pi->nr_nodes = num_online_nodes();
- pi->sockets_per_node = num_online_cpus() /
- (pi->nr_nodes * pi->cores_per_socket * pi->threads_per_core);
-
pi->total_pages = total_pages;
pi->free_pages = avail_domheap_pages();
pi->scrub_pages = avail_scrub_pages();
diff -r b4278beaf354 xen/include/public/sysctl.h
--- a/xen/include/public/sysctl.h Wed Oct 17 13:12:03 2007 +0100
+++ b/xen/include/public/sysctl.h Wed Oct 17 20:09:46 2007 -0700
@@ -79,7 +79,7 @@ struct xen_sysctl_physinfo {
/* IN variables. */
uint32_t threads_per_core;
uint32_t cores_per_socket;
- uint32_t sockets_per_node;
+ uint32_t nr_cpus;
uint32_t nr_nodes;
uint32_t cpu_khz;
uint64_aligned_t total_pages;
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|