WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] x86: improve reporting through XENMEM_machine_memory

To: <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH] x86: improve reporting through XENMEM_machine_memory_map
From: "Jan Beulich" <JBeulich@xxxxxxxxxx>
Date: Tue, 03 Nov 2009 10:36:16 +0000
Delivery-date: Tue, 03 Nov 2009 02:36:46 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Since Dom0 derives machine address ranges usable for assigning PCI
device resources from the output of this sub-hypercall, Xen should make
sure it properly reports all ranges not suitable for this (as either
reserved or unusable):
- RAM regions excluded via command line option
- memory regions used by Xen itself (LAPIC, IOAPICs)

While the latter should generally already be excluded by the BIOS
provided E820 table, this apparently isn't always the case at least for
IOAPICs, and with Linux having got changed to account for this it seems
to make sense to also do so in Xen.

Generally the HPET range should also be excluded here, but since it
isn't being reflected in Dom0's iomem_caps (and can't be, as it's a
sub-page range) I wasn't sure whether adding explicit code for doing so
would be reasonable.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

--- 2009-10-27.orig/xen/arch/x86/e820.c 2009-09-22 11:02:53.000000000 +0200
+++ 2009-10-27/xen/arch/x86/e820.c      2009-10-27 12:12:34.000000000 +0100
@@ -373,7 +373,12 @@ static void __init clip_to_limit(uint64_
              ((e820.map[i].addr + e820.map[i].size) <= limit) )
             continue;
         old_limit = e820.map[i].addr + e820.map[i].size;
-        if ( e820.map[i].addr < limit )
+        if ( e820_change_range_type(&e820, max(e820.map[i].addr, limit),
+                                    old_limit, E820_RAM, E820_UNUSABLE) )
+        {
+            i = 0;
+        }
+        else if ( e820.map[i].addr < limit )
         {
             e820.map[i].size = limit - e820.map[i].addr;
         }
--- 2009-10-27.orig/xen/arch/x86/mm.c   2009-10-27 12:07:32.000000000 +0100
+++ 2009-10-27/xen/arch/x86/mm.c        2009-10-27 12:12:34.000000000 +0100
@@ -3962,6 +3962,37 @@ long do_update_descriptor(u64 pa, u64 de
 typedef struct e820entry e820entry_t;
 DEFINE_XEN_GUEST_HANDLE(e820entry_t);
 
+struct memory_map_context
+{
+    unsigned int n;
+    unsigned long s;
+    struct xen_memory_map map;
+};
+
+static int handle_iomem_range(unsigned long s, unsigned long e, void *p)
+{
+    struct memory_map_context *ctxt = p;
+
+    if ( s > ctxt->s )
+    {
+        e820entry_t ent;
+        XEN_GUEST_HANDLE(e820entry_t) buffer;
+
+        if ( ctxt->n + 1 >= ctxt->map.nr_entries )
+            return -EINVAL;
+        ent.addr = (uint64_t)ctxt->s << PAGE_SHIFT;
+        ent.size = (uint64_t)(s - ctxt->s) << PAGE_SHIFT;
+        ent.type = E820_RESERVED;
+        buffer = guest_handle_cast(ctxt->map.buffer, e820entry_t);
+        if ( __copy_to_guest_offset(buffer, ctxt->n, &ent, 1) < 0 )
+            return -EFAULT;
+        ctxt->n++;
+    }
+    ctxt->s = e + 1;
+
+    return 0;
+}
+
 long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
 {
     struct page_info *page = NULL;
@@ -4123,9 +4154,9 @@ long arch_memory_op(int op, XEN_GUEST_HA
 
     case XENMEM_machine_memory_map:
     {
-        struct xen_memory_map memmap;
+        struct memory_map_context ctxt;
         XEN_GUEST_HANDLE(e820entry_t) buffer;
-        int count;
+        unsigned int i;
         int rc;
 
         if ( !IS_PRIV(current->domain) )
@@ -4135,20 +4166,49 @@ long arch_memory_op(int op, XEN_GUEST_HA
         if ( rc )
             return rc;
 
-        if ( copy_from_guest(&memmap, arg, 1) )
+        if ( copy_from_guest(&ctxt.map, arg, 1) )
             return -EFAULT;
-        if ( memmap.nr_entries < e820.nr_map + 1 )
+        if ( ctxt.map.nr_entries < e820.nr_map + 1 )
             return -EINVAL;
 
-        buffer = guest_handle_cast(memmap.buffer, e820entry_t);
-
-        count = min((unsigned int)e820.nr_map, memmap.nr_entries);
-        if ( copy_to_guest(buffer, e820.map, count) < 0 )
+        buffer = guest_handle_cast(ctxt.map.buffer, e820entry_t);
+        if ( !guest_handle_okay(buffer, ctxt.map.nr_entries) )
             return -EFAULT;
 
-        memmap.nr_entries = count;
+        for ( i = 0, ctxt.n = 0, ctxt.s = 0; i < e820.nr_map; ++i, ++ctxt.n )
+        {
+            unsigned long s = PFN_DOWN(e820.map[i].addr);
+
+            if ( s )
+            {
+                rc = rangeset_report_ranges(current->domain->iomem_caps,
+                                            ctxt.s, s - 1,
+                                            handle_iomem_range, &ctxt);
+                if ( !rc )
+                    rc = handle_iomem_range(s, s, &ctxt);
+                if ( rc )
+                    return rc;
+            }
+            if ( ctxt.map.nr_entries <= ctxt.n + (e820.nr_map - i) )
+                return -EINVAL;
+            if ( __copy_to_guest_offset(buffer, ctxt.n, e820.map + i, 1) < 0 )
+                return -EFAULT;
+            ctxt.s = PFN_UP(e820.map[i].addr + e820.map[i].size);
+        }
+
+        if ( ctxt.s )
+        {
+            rc = rangeset_report_ranges(current->domain->iomem_caps, ctxt.s,
+                                        ~0UL, handle_iomem_range, &ctxt);
+            if ( !rc && ctxt.s )
+                rc = handle_iomem_range(~0UL, ~0UL, &ctxt);
+            if ( rc )
+                return rc;
+        }
+
+        ctxt.map.nr_entries = ctxt.n;
 
-        if ( copy_to_guest(arg, &memmap, 1) )
+        if ( copy_to_guest(arg, &ctxt.map, 1) )
             return -EFAULT;
 
         return 0;
--- 2009-10-27.orig/xen/common/rangeset.c       2008-09-08 12:57:38.000000000 
+0200
+++ 2009-10-27/xen/common/rangeset.c    2009-10-27 12:12:34.000000000 +0100
@@ -251,6 +251,29 @@ int rangeset_contains_range(
     return contains;
 }
 
+int rangeset_report_ranges(
+    struct rangeset *r, unsigned long s, unsigned long e,
+    int (*cb)(unsigned long s, unsigned long e, void *), void *ctxt)
+{
+    struct range *x;
+    int rc = 0;
+
+    spin_lock(&r->lock);
+
+    for ( x = find_range(r, s); rc == 0 && x != NULL; x = next_range(r, x) )
+    {
+        if ( x->s > e )
+            break;
+        if ( s > x->e )
+            continue;
+        rc = cb(max(x->s, s), min(x->e, e), ctxt);
+    }
+
+    spin_unlock(&r->lock);
+
+    return rc;
+}
+
 int rangeset_add_singleton(
     struct rangeset *r, unsigned long s)
 {
--- 2009-10-27.orig/xen/include/xen/rangeset.h  2006-01-03 14:25:46.000000000 
+0100
+++ 2009-10-27/xen/include/xen/rangeset.h       2009-10-27 12:12:34.000000000 
+0100
@@ -53,6 +53,9 @@ int __must_check rangeset_remove_range(
     struct rangeset *r, unsigned long s, unsigned long e);
 int __must_check rangeset_contains_range(
     struct rangeset *r, unsigned long s, unsigned long e);
+int rangeset_report_ranges(
+    struct rangeset *r, unsigned long s, unsigned long e,
+    int (*cb)(unsigned long s, unsigned long e, void *), void *ctxt);
 
 /* Add/remove/query a single number. */
 int __must_check rangeset_add_singleton(



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH] x86: improve reporting through XENMEM_machine_memory_map, Jan Beulich <=