WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 6/6] xen: Add NUMA support to Xen

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH 6/6] xen: Add NUMA support to Xen
From: Ryan Harper <ryanh@xxxxxxxxxx>
Date: Mon, 1 May 2006 16:59:21 -0500
Cc: Ryan Grimm <grimm@xxxxxxxxxx>
Delivery-date: Mon, 01 May 2006 15:02:09 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mutt/1.5.6+20040907i
Introduce two dom0_ops and a userspace tool used to probe Xen's heap
availability and examine which nodes a domain's memory resides within.

availheap takes a zone and a node value and returns the current number
of free pages in that area of the heap.  Using -1 as wildcards, one can
probe total free pages in a zone, or node.

getnodestat examines the page ownership of a domain.  By walking the
domain's page list we can determine from which nodes a domain's memory
hails.

-- 
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
(512) 838-9253   T/L: 678-9253
ryanh@xxxxxxxxxx


diffstat output:
 b/tools/xen_numastat/Makefile       |   35 +++++
 b/tools/xen_numastat/xen_numastat.1 |   22 +++
 b/tools/xen_numastat/xen_numastat.c |  215 ++++++++++++++++++++++++++++++++++++
 tools/Makefile                      |    1 
 tools/libxc/xc_domain.c             |   45 +++++++
 tools/libxc/xenctrl.h               |   17 ++
 xen/common/dom0_ops.c               |   77 ++++++++++++
 xen/common/page_alloc.c             |    5 
 xen/include/public/dom0_ops.h       |   22 +++
 xen/include/xen/mm.h                |    5 
 10 files changed, 439 insertions(+), 5 deletions(-)

Signed-off-by: Ryan Harper <ryanh@xxxxxxxxxx>
---
# HG changeset patch
# User Ryan Harper <ryanh@xxxxxxxxxx>
# Node ID ef7786ee83cfa618f54372a4495b0d35dbd59e32
# Parent  0d1f094caf6f5370a53aa2a4ff9201c1505becff
Introduce two dom0_ops and a userspace tool used to probe Xen's heap
availability and examine which nodes a domain's memory resides within.

availheap takes a zone and a node value and returns the current number of free
pages in that area of the heap.  Using -1 as wildcards, one can probe total free
pages in a zone, or node.

getnodestat examines the page ownership of a domain.  By walking the domain's
page list we can determine from which nodes a domain's memory hails.

Signed-off-by: Ryan Harper <ryanh@xxxxxxxxxx>
Signed-off-by: Ryan Grimm <grimm@xxxxxxxxxx>

diff -r 0d1f094caf6f -r ef7786ee83cf tools/Makefile
--- a/tools/Makefile    Mon May  1 15:57:40 2006
+++ b/tools/Makefile    Mon May  1 20:51:38 2006
@@ -13,6 +13,7 @@
 SUBDIRS += console
 SUBDIRS += xenmon
 SUBDIRS += guest-headers
+SUBDIRS += xen_numastat
 ifeq ($(VTPM_TOOLS),y)
 SUBDIRS += vtpm_manager
 SUBDIRS += vtpm
diff -r 0d1f094caf6f -r ef7786ee83cf tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c   Mon May  1 15:57:40 2006
+++ b/tools/libxc/xc_domain.c   Mon May  1 20:51:38 2006
@@ -513,6 +513,51 @@
     op.u.iomem_permission.allow_access = allow_access;
 
     return do_dom0_op(xc_handle, &op);
+}
+
+int xc_availheap(int xc_handle, 
+                 int zone,
+                 int node,
+                 uint32_t *nr_zones,
+                 uint32_t *nr_nodes,
+                 uint64_t *pages)
+{
+    DECLARE_DOM0_OP;
+    int rc = 0; 
+
+    op.cmd = DOM0_AVAILHEAP;
+    op.u.availheap.zone = zone;
+    op.u.availheap.node = node;
+
+    rc = do_dom0_op(xc_handle, &op);
+    if ( rc >= 0 ) {
+        if (nr_zones)
+            *nr_zones = op.u.availheap.nr_zones;
+        if (nr_nodes)
+            *nr_nodes = op.u.availheap.nr_nodes;
+        *pages = op.u.availheap.pages;
+    }
+
+    return rc;
+}
+                
+int xc_domain_getnodestat(int xc_handle,
+                          uint32_t domid,
+                          uint32_t node,
+                          uint64_t *pages)
+{
+    DECLARE_DOM0_OP;
+    int rc = 0; 
+
+    op.cmd = DOM0_GETDOMNODESTAT;
+    op.u.getdomnodestat.domain = (domid_t) domid;
+    op.u.getdomnodestat.node   = node;
+
+    rc = do_dom0_op(xc_handle, &op);
+    if ( rc >= 0 )
+        *pages = op.u.getdomnodestat.pages;
+
+    return rc;
 }
 
 /*
diff -r 0d1f094caf6f -r ef7786ee83cf tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Mon May  1 15:57:40 2006
+++ b/tools/libxc/xenctrl.h     Mon May  1 20:51:38 2006
@@ -563,6 +563,23 @@
  */
 int xc_tbuf_get_size(int xc_handle, uint32_t *size);
 
+/**
+ * This function retrieves the the number of pages in the
+ * specified domain that are on the specified node.
+ *
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain to query, -1 for per-node free list
+ * @parm node the node to query
+ * @parm *pages caller variable to put output
+ * @return 0 on success, <0 on failure.
+ */
+int xc_domain_getnodestat(int xc_handle, 
+                          uint32_t domid,
+                          uint32_t node,
+                          uint64_t *pages);
+
+int xc_availheap(int xc_handle, int zone, int node, 
+                 uint32_t *nr_zones, uint32_t *nr_nodes, uint64_t *pages);
 
 /* Execute a privileged dom0 operation. */
 int xc_dom0_op(int xc_handle, dom0_op_t *op);
diff -r 0d1f094caf6f -r ef7786ee83cf xen/common/dom0_ops.c
--- a/xen/common/dom0_ops.c     Mon May  1 15:57:40 2006
+++ b/xen/common/dom0_ops.c     Mon May  1 20:51:38 2006
@@ -22,6 +22,10 @@
 #include <public/dom0_ops.h>
 #include <public/sched_ctl.h>
 #include <acm/acm_hooks.h>
+#ifdef CONFIG_NUMA
+#include <xen/numa.h>
+#endif
+#include <xen/nodemask.h> 
 
 extern long arch_do_dom0_op(
     struct dom0_op *op, XEN_GUEST_HANDLE(dom0_op_t) u_dom0_op);
@@ -603,6 +607,79 @@
     }
     break;
 
+    case DOM0_AVAILHEAP:
+    {
+        ret = -EINVAL;
+        if ( op->u.availheap.node >= num_online_nodes() )
+            break;
+        if ( op->u.availheap.zone >= NR_ZONES )
+            break;
+
+        /* indicate the number of zones/nodes queried.
+         * NB: -1 is wild card for all zones/nodes */
+        ( op->u.availheap.zone < 0 ) ? 
+            (op->u.availheap.nr_zones=NR_ZONES) : 
+            (op->u.availheap.nr_zones=1);
+
+        ( op->u.availheap.node < 0 ) ?
+            (op->u.availheap.nr_nodes=num_online_nodes()) :
+            (op->u.availheap.nr_nodes=1);
+
+        op->u.availheap.pages = 
+            avail_heap_pages(op->u.availheap.zone, op->u.availheap.node);
+
+        if ( copy_to_guest(u_dom0_op, op, 1) )
+            ret = -EFAULT;
+        else
+            ret = 0;
+    }
+    break;
+
+    case DOM0_GETDOMNODESTAT:
+    {
+        struct domain *d;
+        struct page_info *pg;
+        
+        ret = -EINVAL;
+        if ( op->u.getdomnodestat.node >= num_online_nodes() )
+            break;
+        
+        ret = -ESRCH;
+        d = find_domain_by_id(op->u.getdomnodestat.domain);
+
+        if ( d != NULL )
+        {
+            /* clear out pages count */
+            op->u.getdomnodestat.pages = 0;
+
+            /* walk domain's page list and count pages on node */
+            spin_lock(&d->page_alloc_lock);
+            list_for_each_entry(pg, &d->page_list, list)
+            {
+#ifdef CONFIG_NUMA
+                if ( page_to_node(pg) == (int) op->u.getdomnodestat.node )
+#endif
+                    op->u.getdomnodestat.pages++;
+            }
+            list_for_each_entry(pg, &d->xenpage_list, list)
+            {
+#ifdef CONFIG_NUMA
+                if ( page_to_node(pg) == (int) op->u.getdomnodestat.node )
+#endif
+                    op->u.getdomnodestat.pages++;
+            }
+            spin_unlock(&d->page_alloc_lock);
+
+            put_domain(d);
+
+            if ( copy_to_guest(u_dom0_op, op, 1) )
+                ret = -EFAULT;
+            else
+                ret = 0;
+        }
+    }
+    break;    
+
     case DOM0_SETDOMAINHANDLE:
     {
         struct domain *d;
diff -r 0d1f094caf6f -r ef7786ee83cf xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Mon May  1 15:57:40 2006
+++ b/xen/common/page_alloc.c   Mon May  1 20:51:38 2006
@@ -287,11 +287,6 @@
  * BINARY BUDDY ALLOCATOR
  */
 
-#define MEMZONE_XEN 0
-#define MEMZONE_DOM 1
-#define MEMZONE_DMADOM 2
-#define NR_ZONES    3
-
 #define pfn_dom_zone_type(_pfn)                                 \
     (((_pfn) <= MAX_DMADOM_PFN) ? MEMZONE_DMADOM : MEMZONE_DOM)
 
diff -r 0d1f094caf6f -r ef7786ee83cf xen/include/public/dom0_ops.h
--- a/xen/include/public/dom0_ops.h     Mon May  1 15:57:40 2006
+++ b/xen/include/public/dom0_ops.h     Mon May  1 20:51:38 2006
@@ -479,6 +479,26 @@
 } dom0_hypercall_init_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_hypercall_init_t);
 
+#define DOM0_GETDOMNODESTAT   49
+typedef struct dom0_getdomnodestat {
+    int domain;           /* domain to query, -1 to query per-node free list */
+    int32_t node;         /* node to query */
+    uint64_t pages;       /* number of pages on node */
+} dom0_getdomnodestat_t;
+DEFINE_XEN_GUEST_HANDLE(dom0_getdomnodestat_t);
+
+#define DOM0_AVAILHEAP        50
+typedef struct dom0_availheap {
+    /* in  */
+    int zone;             /* query available pages in zone, -1 for all */
+    int node;             /* query available pages in node, -1 for all */
+    /* out */
+    uint32_t nr_zones;    /* number of zones queried */
+    uint32_t nr_nodes;    /* number of nodes queried */
+    uint64_t pages;
+} dom0_availheap_t;
+DEFINE_XEN_GUEST_HANDLE(dom0_availheap_t);
+ 
 typedef struct dom0_op {
     uint32_t cmd;
     uint32_t interface_version; /* DOM0_INTERFACE_VERSION */
@@ -520,6 +540,8 @@
         struct dom0_irq_permission    irq_permission;
         struct dom0_iomem_permission  iomem_permission;
         struct dom0_hypercall_init    hypercall_init;
+        struct dom0_getdomnodestat    getdomnodestat;
+        struct dom0_availheap         availheap;
         uint8_t                       pad[128];
     } u;
 } dom0_op_t;
diff -r 0d1f094caf6f -r ef7786ee83cf xen/include/xen/mm.h
--- a/xen/include/xen/mm.h      Mon May  1 15:57:40 2006
+++ b/xen/include/xen/mm.h      Mon May  1 20:51:38 2006
@@ -32,6 +32,11 @@
 #include <xen/types.h>
 #include <xen/list.h>
 #include <xen/spinlock.h>
+
+#define MEMZONE_XEN 0
+#define MEMZONE_DOM 1
+#define MEMZONE_DMADOM 2
+#define NR_ZONES    3
 
 struct domain;
 struct page_info;
diff -r 0d1f094caf6f -r ef7786ee83cf tools/xen_numastat/Makefile
--- /dev/null   Mon May  1 15:57:40 2006
+++ b/tools/xen_numastat/Makefile       Mon May  1 20:51:38 2006
@@ -0,0 +1,35 @@
+INSTALL                = install
+INSTALL_PROG   = $(INSTALL) -m0755
+INSTALL_DIR    = $(INSTALL) -d -m0755
+INSTALL_DATA   = $(INSTALL) -m0644
+
+XEN_ROOT=../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+CFLAGS  += -Wall -Werror -g
+
+CFLAGS  += -I $(XEN_XC)
+CFLAGS  += -I $(XEN_LIBXC)
+
+HDRS     = $(wildcard *.h)
+OBJS     = $(patsubst %.c,%.o,$(wildcard *.c))
+
+BIN      = xen_numastat
+MAN1     = $(wildcard *.1)
+LIBBIN   = 
+
+all: build
+build: $(BIN)
+
+install: build
+       [ -d $(DESTDIR)/usr/bin ] || $(INSTALL_DIR) $(DESTDIR)/usr/bin
+       $(INSTALL_PROG) $(BIN) $(DESTDIR)/usr/bin
+       [ -d $(DESTDIR)/usr/share/man/man1 ] || \
+               $(INSTALL_DIR) $(DESTDIR)/usr/share/man/man1
+       $(INSTALL_DATA) $(MAN1) $(DESTDIR)/usr/share/man/man1
+
+clean:
+       $(RM) *.a *.so *.o $(BIN)
+
+%: %.c $(HDRS) Makefile
+       $(CC) $(CFLAGS) -o $@ $< -L$(XEN_LIBXC) -lxenctrl
diff -r 0d1f094caf6f -r ef7786ee83cf tools/xen_numastat/xen_numastat.1
--- /dev/null   Mon May  1 15:57:40 2006
+++ b/tools/xen_numastat/xen_numastat.1 Mon May  1 20:51:38 2006
@@ -0,0 +1,22 @@
+.\" DO NOT MODIFY THIS FILE!  It was generated by help2man 1.35.
+.TH XEN_NUMASTAT "1" "May 2006" "Usage: xen_numastat [OPTION]" "User Commands"
+.SH NAME
+xen_numastat - Displays NUMA statistics about machine and domains
+.SH SYNOPSIS
+.B xen_numastat
+[\fIOPTIONS\fR]
+.SH DESCRIPTION
+Displays NUMA statistics about machine and domains
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+display this help and exit
+.TP
+\fB\-H\fR, \fB\-\-heapinfo\fR
+display info about Xen HEAP
+.TP
+\fB\-d\fR, \fB\-\-domid\fR <D>
+display NUMA info about domain D
+.TP
+\fB\-p\fR, \fB\-\-physinfo\fR
+display NUMA info about this machine
+.PP
diff -r 0d1f094caf6f -r ef7786ee83cf tools/xen_numastat/xen_numastat.c
--- /dev/null   Mon May  1 15:57:40 2006
+++ b/tools/xen_numastat/xen_numastat.c Mon May  1 20:51:38 2006
@@ -0,0 +1,215 @@
+/*\
+ *  Copyright (C) International Business Machines  Corp., 2006
+ *  Author(s): Ryan Harper <ryanh@xxxxxxxxxx>
+ *
+ *  Xen Domain NUMA statistics tool
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ * 
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.  * 
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+\*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <err.h>
+#include <errno.h>
+#include <xenctrl.h>
+#include <getopt.h>
+#include <inttypes.h>
+
+#define DOMAIN 0
+#define PHYSICAL 1
+/* this is a bit arbitrary, might be nice to query from xen */
+#define MAX_DOMAINS 256
+
+/* taken from xen/include/xen/mm.h */
+#define MEMZONE_XEN 0
+#define MEMZONE_DOM 1
+#define MEMZONE_DMADOM 2
+
+static void usage(const char *program) {
+       printf("Usage: %s [OPTION]\n"
+              "Displays NUMA statistics about machine and domains \n"
+              "\n"
+              "  -h, --help       display this help and exit\n"
+              "  -H, --heapinfo   display info about Xen HEAP\n"
+              "  -d, --domid <D>  display NUMA info about domain D\n"
+              "  -p, --physinfo   display NUMA info about this machine\n"
+              , program);
+}
+
+int str_to_domid(const char* domidstr) {
+   char *end = NULL;
+   int domid;
+   
+   domid = strtol(domidstr, &end, 10);
+   if (end && *end) {
+      fprintf(stderr, "Invalid DOMID `%s'\n", domidstr);
+      fprintf(stderr, "*end -> %c\n", *end);
+      exit(EINVAL);
+   }
+
+   return domid;
+}
+
+void display_dominfo(int xc_handle, int domid, int nr_nodes)
+{
+   xc_domaininfo_t *dominfo =NULL;
+   uint64_t pages;
+   unsigned int i, j, num_domains = 1;
+
+   /* malloc a large buffer for info */
+   if ( !(dominfo = malloc(MAX_DOMAINS * sizeof(xc_domaininfo_t))) )
+       err(errno, "failed to malloc memory");
+       
+   /* get actual number of domains */
+   num_domains = xc_domain_getinfolist(xc_handle, 0, MAX_DOMAINS, dominfo);
+
+   for (i=0; i<num_domains; i++) {
+      /* show all domains, or only requested domain */
+      if (domid == -1 || domid == dominfo[i].domain) {
+          /* print domain per-node info */
+         for (j=0; j < nr_nodes; j++) {
+            if (xc_domain_getnodestat(xc_handle, 
+                                      dominfo[i].domain, j, &pages) < 0)
+               err(errno, "xc_domain_getnodestat()");
+            printf("DOM%d: NODE%d: PAGES: %"PRIu64"\n", 
+                   dominfo[i].domain, j, pages);
+         }
+      }
+   }
+
+   free(dominfo);
+}
+
+void display_heapinfo(int xc_handle)
+{
+   int i,j;
+   uint32_t nr_zones, nr_nodes;
+   uint64_t pages;
+   char zone[8];
+
+   /* get zones/nodes and total free pages */
+   if( xc_availheap(xc_handle, -1, -1, &nr_zones, &nr_nodes, &pages) )
+      err(errno, "xc_availheap()");
+
+   printf("HEAPINFO: TOTAL_FREE_PAGES: %"PRIu64"\n", pages);
+   printf("HEAPINFO: NR_ZONES: %d\n", nr_zones);
+   printf("HEAPINFO: NR_NODES: %d\n", nr_nodes);
+
+   for (i=0; i<nr_zones; i++) {
+      switch(i) {
+          case MEMZONE_XEN:
+              sprintf(zone, "XEN");
+              break;
+          case MEMZONE_DOM:
+              sprintf(zone, "DOM");
+              break;
+          case MEMZONE_DMADOM:
+              sprintf(zone, "DMA");
+              break;
+      }
+      for (j=0; j<nr_nodes; j++) {
+         if( xc_availheap(xc_handle, i, j, NULL, NULL, &pages) )
+            err(errno, "xc_availheap()");
+         printf("HEAPINFO: ZONE_%s: NODE%d: FREE_PAGES: %"PRIu64"\n", 
+                zone, j, pages);
+      }
+   }
+}
+
+void display_physinfo(int xc_handle, xc_physinfo_t *info) 
+{
+
+   printf("PHYSINFO: NR_NODES: %d\n", info->nr_nodes);
+
+}
+
+int main(int argc, char * argv[])
+{
+    
+       char *sopt = "d:Hhp";
+       int ch;
+       int opt_ind=0, heapinfo=0, dominfo=0, physinfo=0, domid=-1;
+   int xc_handle, rc;
+   xc_physinfo_t info;
+   xc_memory_chunk_t *chunks;
+   xc_node_to_cpu_t *map;
+       struct option lopt[] = {
+      { "domid",    1, 0, 'd' },
+      { "heapinfo", 0, 0, 'H' },
+               { "help",     0, 0, 'h' },
+      { "physinfo", 0, 0, 'p' },
+               { 0, 0, 0, 0 }
+       };
+
+       while((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
+               switch(ch) {
+      case 'd':
+         /* convert domid */
+         domid = str_to_domid(optarg);
+         dominfo = 1;
+         break;
+      case 'H':
+         heapinfo = 1;
+         break;
+               case 'h':
+                       usage(argv[0]);
+                       exit(0);
+                       break;
+      case 'p':
+         physinfo = 1;
+         break;
+      }
+       }
+
+   /* display help if no options are passed */
+   if (dominfo == 0 && physinfo == 0 && heapinfo == 0) {
+       usage(argv[0]);
+       exit(0);
+   }
+   
+   /* set up */
+   xc_handle = xc_interface_open();
+   if (xc_handle == -1)
+      err(errno, "xc_interface_open()");
+
+   /* make space for mem chunks */
+   chunks = (xc_memory_chunk_t *)
+      malloc( sizeof(xc_memory_chunk_t) * PUBLIC_MAXCHUNKS );
+   set_xen_guest_handle(info.memory_chunks, chunks);
+
+   /* make space for node_to_cpu mapping */
+   map = (xc_node_to_cpu_t *)
+      malloc( sizeof(xc_node_to_cpu_t) * PUBLIC_MAX_NUMNODES );
+   set_xen_guest_handle(info.node_to_cpu, map);
+
+   /* get the machine physinfo */
+   rc = xc_physinfo(xc_handle, &info);
+   if (rc < 0)
+      err(errno, "xc_physinfo()");
+
+   if (physinfo > 0)
+      display_physinfo(xc_handle, &info);
+   if (heapinfo > 0)
+      display_heapinfo(xc_handle);
+   if (dominfo > 0)
+      display_dominfo(xc_handle, domid, info.nr_nodes);
+
+   /* free malloc'd memory */
+   free(chunks);
+   free(map);
+
+   xc_interface_close(xc_handle);
+
+   return 0;
+}

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>