WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH,RFC 9/17] 32-on-64 memory ops

To: <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH,RFC 9/17] 32-on-64 memory ops
From: "Jan Beulich" <jbeulich@xxxxxxxxxx>
Date: Wed, 04 Oct 2006 17:40:36 +0200
Delivery-date: Wed, 04 Oct 2006 08:39:33 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Index: 2006-10-04/xen/arch/x86/domain.c
===================================================================
--- 2006-10-04.orig/xen/arch/x86/domain.c       2006-10-04 15:18:36.000000000 
+0200
+++ 2006-10-04/xen/arch/x86/domain.c    2006-10-04 15:18:51.000000000 +0200
@@ -114,6 +114,58 @@ void dump_pageframe_info(struct domain *
     }
 }
 
+#ifdef CONFIG_COMPAT
+int setup_arg_xlat_area(struct domain *d, unsigned int vcpu_id, l4_pgentry_t 
*l4tab)
+{
+    unsigned i;
+    struct page_info *pg;
+
+    if ( !d->arch.mm_arg_xlat_l3 )
+    {
+        pg = alloc_domheap_page(NULL);
+        if ( !pg )
+            return -ENOMEM;
+        d->arch.mm_arg_xlat_l3 = clear_page(page_to_virt(pg));
+    }
+
+    l4tab[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] =
+        l4e_from_paddr(__pa(d->arch.mm_arg_xlat_l3), __PAGE_HYPERVISOR);
+
+    for ( i = 0; i < COMPAT_ARG_XLAT_PAGES; ++i )
+    {
+        unsigned long va = COMPAT_ARG_XLAT_VIRT_START(vcpu_id) + i * PAGE_SIZE;
+        l2_pgentry_t *l2tab;
+        l1_pgentry_t *l1tab;
+
+        if ( !l3e_get_intpte(d->arch.mm_arg_xlat_l3[l3_table_offset(va)]) )
+        {
+            pg = alloc_domheap_page(NULL);
+            if ( !pg )
+                return -ENOMEM;
+            clear_page(page_to_virt(pg));
+            d->arch.mm_arg_xlat_l3[l3_table_offset(va)] = l3e_from_page(pg, 
PAGE_HYPERVISOR);
+        }
+        l2tab = l3e_to_l2e(d->arch.mm_arg_xlat_l3[l3_table_offset(va)]);
+        if ( !l2e_get_intpte(l2tab[l2_table_offset(va)]) )
+        {
+            pg = alloc_domheap_page(NULL);
+            if ( !pg )
+                return -ENOMEM;
+            clear_page(page_to_virt(pg));
+            l2tab[l2_table_offset(va)] = l2e_from_page(pg, PAGE_HYPERVISOR);
+        }
+        l1tab = l2e_to_l1e(l2tab[l2_table_offset(va)]);
+        BUG_ON(l1e_get_intpte(l1tab[l1_table_offset(va)]));
+        pg = alloc_domheap_page(NULL);
+        if ( !pg )
+            return -ENOMEM;
+        l1tab[l1_table_offset(va)] = l1e_from_page(pg, PAGE_HYPERVISOR);
+    }
+
+    return 0;
+}
+#endif
+
 struct vcpu *alloc_vcpu_struct(struct domain *d, unsigned int vcpu_id)
 {
     struct vcpu *v;
@@ -161,6 +213,13 @@ struct vcpu *alloc_vcpu_struct(struct do
             l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR);
         v->arch.guest_table = pagetable_from_page(pg);
         v->arch.guest_table_user = v->arch.guest_table;
+
+        if ( setup_arg_xlat_area(d, vcpu_id, l4tab) < 0 )
+        {
+            free_xenheap_page(l4tab);
+            xfree(v);
+            return NULL;
+        }
     }
 #endif
 
@@ -273,6 +332,46 @@ void arch_domain_destroy(struct domain *
     free_domheap_page(virt_to_page(d->arch.mm_perdomain_l3));
 #endif
 
+#ifdef CONFIG_COMPAT
+    if ( d->arch.mm_arg_xlat_l3 )
+    {
+        struct page_info *pg;
+        unsigned l3;
+
+        for ( l3 = 0; l3 < L3_PAGETABLE_ENTRIES; ++l3 )
+        {
+            if ( l3e_get_intpte(d->arch.mm_arg_xlat_l3[l3]) )
+            {
+                l2_pgentry_t *l2tab = l3e_to_l2e(d->arch.mm_arg_xlat_l3[l3]);
+                unsigned l2;
+
+                for ( l2 = 0; l2 < L2_PAGETABLE_ENTRIES; ++l2 )
+                {
+                    if ( l2e_get_intpte(l2tab[l2]) )
+                    {
+                        l1_pgentry_t *l1tab = l2e_to_l1e(l2tab[l2]);
+                        unsigned l1;
+
+                        for ( l1 = 0; l1 < L1_PAGETABLE_ENTRIES; ++l1 )
+                        {
+                            if ( l1e_get_intpte(l1tab[l1]) )
+                            {
+                                pg = l1e_get_page(l1tab[l1]);
+                                free_domheap_page(pg);
+                            }
+                        }
+                        pg = l2e_get_page(l2tab[l2]);
+                        free_domheap_page(pg);
+                    }
+                }
+                pg = l3e_get_page(d->arch.mm_arg_xlat_l3[l3]);
+                free_domheap_page(pg);
+            }
+        }
+        free_domheap_page(virt_to_page(d->arch.mm_arg_xlat_l3));
+    }
+#endif
+
     free_xenheap_page(d->shared_info);
 }
 
@@ -931,55 +1030,131 @@ unsigned long hypercall_create_continuat
 
         for ( i = 0; *p != '\0'; i++ )
             mcs->call.args[i] = next_arg(p, args);
+        if ( IS_COMPAT(current->domain) )
+        {
+            for ( ; i < 6; i++ )
+                mcs->call.args[i] = 0;
+        }
     }
     else
     {
         regs       = guest_cpu_user_regs();
-#if defined(__i386__)
         regs->eax  = op;
+        regs->eip -= 2;  /* re-execute 'syscall' / 'int 0x82' */
 
-        if ( supervisor_mode_kernel || hvm_guest(current) )
-            regs->eip &= ~31; /* re-execute entire hypercall entry stub */
+#if defined(__x86_64__)
+        if ( !IS_COMPAT(current->domain) )
+        {
+            for ( i = 0; *p != '\0'; i++ )
+            {
+                arg = next_arg(p, args);
+                switch ( i )
+                {
+                case 0: regs->rdi = arg; break;
+                case 1: regs->rsi = arg; break;
+                case 2: regs->rdx = arg; break;
+                case 3: regs->r10 = arg; break;
+                case 4: regs->r8  = arg; break;
+                case 5: regs->r9  = arg; break;
+                }
+            }
+        }
         else
-            regs->eip -= 2;   /* re-execute 'int 0x82' */
-
-        for ( i = 0; *p != '\0'; i++ )
+#endif
         {
-            arg = next_arg(p, args);
-            switch ( i )
+            if ( supervisor_mode_kernel || hvm_guest(current) )
+                regs->eip &= ~31; /* re-execute entire hypercall entry stub */
+
+            for ( i = 0; *p != '\0'; i++ )
             {
-            case 0: regs->ebx = arg; break;
-            case 1: regs->ecx = arg; break;
-            case 2: regs->edx = arg; break;
-            case 3: regs->esi = arg; break;
-            case 4: regs->edi = arg; break;
-            case 5: regs->ebp = arg; break;
+                arg = next_arg(p, args);
+                switch ( i )
+                {
+                case 0: regs->ebx = arg; break;
+                case 1: regs->ecx = arg; break;
+                case 2: regs->edx = arg; break;
+                case 3: regs->esi = arg; break;
+                case 4: regs->edi = arg; break;
+                case 5: regs->ebp = arg; break;
+                }
             }
         }
-#elif defined(__x86_64__)
-        regs->rax  = op;
-        regs->rip -= 2;  /* re-execute 'syscall' */
+    }
 
-        for ( i = 0; *p != '\0'; i++ )
+    va_end(args);
+
+    return op;
+}
+
+#ifdef CONFIG_COMPAT
+int hypercall_xlat_continuation(unsigned int mask, ...)
+{
+    int rc = 0;
+    struct mc_state *mcs = &this_cpu(mc_state);
+    struct cpu_user_regs *regs = guest_cpu_user_regs();
+    unsigned int i, cval = 0;
+    unsigned long nval = 0;
+    va_list args;
+
+    va_start(args, mask);
+
+    if ( test_bit(_MCSF_in_multicall, &mcs->flags) )
+    {
+        for ( i = 0; i < 6; ++i, mask >>= 1 )
         {
-            arg = next_arg(p, args);
+            if ( (mask & 1) )
+            {
+                nval = va_arg(args, unsigned long);
+                cval = va_arg(args, unsigned int);
+            }
+            if ( (mask & 1) && mcs->call.args[i] == nval )
+            {
+                ++rc;
+            }
+            else
+            {
+                cval = mcs->call.args[i];
+                BUG_ON(mcs->call.args[i] != cval);
+            }
+            mcs->compat_call.args[i] = cval;
+        }
+    }
+    else
+    {
+        for ( i = 0; i < 6; ++i, mask >>= 1 )
+        {
+            unsigned long *reg;
+
             switch ( i )
             {
-            case 0: regs->rdi = arg; break;
-            case 1: regs->rsi = arg; break;
-            case 2: regs->rdx = arg; break;
-            case 3: regs->r10 = arg; break;
-            case 4: regs->r8  = arg; break;
-            case 5: regs->r9  = arg; break;
+            case 0: reg = &regs->ebx; break;
+            case 1: reg = &regs->ecx; break;
+            case 2: reg = &regs->edx; break;
+            case 3: reg = &regs->esi; break;
+            case 4: reg = &regs->edi; break;
+            case 5: reg = &regs->ebp; break;
+            default: BUG(); reg = NULL; break;
+            }
+            if ( (mask & 1) )
+            {
+                nval = va_arg(args, unsigned long);
+                cval = va_arg(args, unsigned int);
+            }
+            if ( (mask & 1) && *reg == nval )
+            {
+                *reg = cval;
+                ++rc;
             }
+            else
+                BUG_ON(*reg != (unsigned int)*reg);
         }
-#endif
     }
 
     va_end(args);
 
-    return op;
+    return rc;
 }
+#endif
 
 static void relinquish_memory(struct domain *d, struct list_head *list)
 {
Index: 2006-10-04/xen/arch/x86/domain_build.c
===================================================================
--- 2006-10-04.orig/xen/arch/x86/domain_build.c 2006-10-04 15:16:05.000000000 
+0200
+++ 2006-10-04/xen/arch/x86/domain_build.c      2006-10-04 15:18:51.000000000 
+0200
@@ -665,7 +665,11 @@ int construct_dom0(struct domain *d,
         l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR);
     v->arch.guest_table = pagetable_from_paddr(__pa(l4start));
     if ( IS_COMPAT(d) )
+    {
         v->arch.guest_table_user = v->arch.guest_table;
+        if ( setup_arg_xlat_area(d, 0, l4start) < 0 )
+            panic("Not enough RAM for domain 0 hypercall argument 
translation.\n");
+    }
 
     l4tab += l4_table_offset(dsi.v_start);
     mfn = alloc_spfn;
Index: 2006-10-04/xen/arch/x86/mm.c
===================================================================
--- 2006-10-04.orig/xen/arch/x86/mm.c   2006-10-04 15:18:45.000000000 +0200
+++ 2006-10-04/xen/arch/x86/mm.c        2006-10-04 15:18:51.000000000 +0200
@@ -1106,9 +1106,12 @@ static int alloc_l4_table(struct page_in
     pl4e[l4_table_offset(LINEAR_PT_VIRT_START)] =
         l4e_from_pfn(pfn, __PAGE_HYPERVISOR);
     pl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
-        l4e_from_page(
-            virt_to_page(page_get_owner(page)->arch.mm_perdomain_l3),
-            __PAGE_HYPERVISOR);
+        l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3),
+                      __PAGE_HYPERVISOR);
+    if ( IS_COMPAT(d) )
+        pl4e[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] =
+            l4e_from_page(virt_to_page(d->arch.mm_arg_xlat_l3),
+                          __PAGE_HYPERVISOR);
 
     return 1;
 
@@ -2732,7 +2735,9 @@ int do_update_va_mapping(unsigned long v
             flush_tlb_mask(d->domain_dirty_cpumask);
             break;
         default:
-            if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) )
+            if ( unlikely(!IS_COMPAT(d) ?
+                          get_user(vmask, (unsigned long *)bmap_ptr) :
+                          get_user(vmask, (unsigned int *)bmap_ptr)) )
                 rc = -EFAULT;
             pmask = vcpumask_to_pcpumask(d, vmask);
             flush_tlb_mask(pmask);
Index: 2006-10-04/xen/arch/x86/x86_64/Makefile
===================================================================
--- 2006-10-04.orig/xen/arch/x86/x86_64/Makefile        2006-10-04 
15:06:22.000000000 +0200
+++ 2006-10-04/xen/arch/x86/x86_64/Makefile     2006-10-04 15:18:51.000000000 
+0200
@@ -6,5 +6,6 @@ obj-y += traps.o
 ifeq ($(CONFIG_COMPAT),y)
 # extra dependencies
 entry.o:       compat/entry.S
+mm.o:          compat/mm.c
 traps.o:       compat/traps.c
 endif
Index: 2006-10-04/xen/arch/x86/x86_64/compat/entry.S
===================================================================
--- 2006-10-04.orig/xen/arch/x86/x86_64/compat/entry.S  2006-10-04 
15:11:03.000000000 +0200
+++ 2006-10-04/xen/arch/x86/x86_64/compat/entry.S       2006-10-04 
15:18:51.000000000 +0200
@@ -282,15 +282,11 @@ CFIX14:
 #define compat_mmu_update domain_crash_synchronous
 #define compat_set_gdt domain_crash_synchronous
 #define compat_platform_op domain_crash_synchronous
-#define compat_update_descriptor domain_crash_synchronous
-#define compat_memory_op domain_crash_synchronous
 #define compat_multicall domain_crash_synchronous
-#define compat_update_va_mapping domain_crash_synchronous
 #define compat_set_timer_op domain_crash_synchronous
 #define compat_event_channel_op_compat domain_crash_synchronous
 #define compat_physdev_op_compat domain_crash_synchronous
 #define compat_grant_table_op domain_crash_synchronous
-#define compat_update_va_mapping_otherdomain domain_crash_synchronous
 #define compat_vcpu_op domain_crash_synchronous
 #define compat_mmuext_op domain_crash_synchronous
 #define compat_acm_op domain_crash_synchronous
Index: 2006-10-04/xen/arch/x86/x86_64/compat/mm.c
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ 2006-10-04/xen/arch/x86/x86_64/compat/mm.c  2006-10-04 15:18:51.000000000 
+0200
@@ -0,0 +1,128 @@
+#ifdef CONFIG_COMPAT
+
+#include <compat/memory.h>
+
+int compat_update_descriptor(u32 pa_lo, u32 pa_hi, u32 desc_lo, u32 desc_hi)
+{
+    return do_update_descriptor(pa_lo | ((u64)pa_hi << 32),
+                                desc_lo | ((u64)desc_hi << 32));
+}
+
+int compat_arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
+{
+    struct compat_machphys_mfn_list xmml;
+    l2_pgentry_t l2e;
+    unsigned long v;
+    compat_pfn_t mfn;
+    unsigned int i;
+    int rc = 0;
+
+    switch ( op )
+    {
+    case XENMEM_add_to_physmap:
+    {
+        struct compat_add_to_physmap cmp;
+        struct xen_add_to_physmap *nat = (void 
*)COMPAT_ARG_XLAT_VIRT_START(current->vcpu_id);
+
+        if ( copy_from_guest(&cmp, arg, 1) )
+            return -EFAULT;
+
+        XLAT_add_to_physmap(nat, &cmp);
+        rc = arch_memory_op(op, guest_handle_from_ptr(nat, void));
+
+        break;
+    }
+
+    case XENMEM_memory_map:
+    case XENMEM_machine_memory_map:
+    {
+        struct compat_memory_map cmp;
+        struct xen_memory_map *nat = (void 
*)COMPAT_ARG_XLAT_VIRT_START(current->vcpu_id);
+
+        if ( copy_from_guest(&cmp, arg, 1) )
+            return -EFAULT;
+#define XLAT_memory_map_HNDL_buffer(_d_, _s_) \
+        guest_from_compat_handle((_d_)->buffer, (_s_)->buffer)
+        XLAT_memory_map(nat, &cmp);
+#undef XLAT_memory_map_HNDL_buffer
+
+        rc = arch_memory_op(op, guest_handle_from_ptr(nat, void));
+        if ( rc < 0 )
+            break;
+
+#define XLAT_memory_map_HNDL_buffer(_d_, _s_) ((void)0)
+        XLAT_memory_map(&cmp, nat);
+#undef XLAT_memory_map_HNDL_buffer
+        if ( copy_to_guest(arg, &cmp, 1) )
+            rc = -EFAULT;
+
+        break;
+    }
+
+    case XENMEM_machphys_mapping:
+    {
+        static /*const*/ struct compat_machphys_mapping mapping = {
+            .v_start = MACH2PHYS_COMPAT_VIRT_START,
+            .v_end   = MACH2PHYS_COMPAT_VIRT_END,
+            .max_mfn = MACH2PHYS_COMPAT_NR_ENTRIES - 1
+        };
+
+        if ( copy_to_guest(arg, &mapping, 1) )
+            rc = -EFAULT;
+
+        break;
+    }
+
+    case XENMEM_machphys_mfn_list:
+        if ( copy_from_guest(&xmml, arg, 1) )
+            return -EFAULT;
+
+        for ( i = 0, v = RDWR_COMPAT_MPT_VIRT_START;
+              (i != xmml.max_extents) && (v != RDWR_COMPAT_MPT_VIRT_END);
+              i++, v += 1 << L2_PAGETABLE_SHIFT )
+        {
+            l2e = compat_idle_pg_table_l2[l2_table_offset(v)];
+            if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+                break;
+            mfn = l2e_get_pfn(l2e) + l1_table_offset(v);
+            if ( copy_to_compat_offset(xmml.extent_start, i, &mfn, 1) )
+                return -EFAULT;
+        }
+
+        xmml.nr_extents = i;
+        if ( copy_to_guest(arg, &xmml, 1) )
+            rc = -EFAULT;
+
+        break;
+
+    default:
+        rc = -ENOSYS;
+        break;
+    }
+
+    return rc;
+}
+
+int compat_update_va_mapping(unsigned int va, u32 lo, u32 hi,
+                             unsigned int flags)
+{
+    return do_update_va_mapping(va, lo | ((u64)hi << 32), flags);
+}
+
+int compat_update_va_mapping_otherdomain(unsigned long va, u32 lo, u32 hi,
+                                         unsigned long flags,
+                                         domid_t domid)
+{
+    return do_update_va_mapping_otherdomain(va, lo | ((u64)hi << 32), flags, 
domid);
+}
+#endif /* CONFIG_COMPAT */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
Index: 2006-10-04/xen/arch/x86/x86_64/mm.c
===================================================================
--- 2006-10-04.orig/xen/arch/x86/x86_64/mm.c    2006-10-04 15:18:45.000000000 
+0200
+++ 2006-10-04/xen/arch/x86/x86_64/mm.c 2006-10-04 15:18:51.000000000 +0200
@@ -28,6 +28,7 @@
 #include <asm/page.h>
 #include <asm/flushtlb.h>
 #include <asm/fixmap.h>
+#include <asm/hypercall.h>
 #include <asm/msr.h>
 #include <public/memory.h>
 
@@ -383,6 +384,8 @@ int check_descriptor(const struct domain
     return 0;
 }
 
+#include "compat/mm.c"
+
 /*
  * Local variables:
  * mode: C
Index: 2006-10-04/xen/common/compat/Makefile
===================================================================
--- 2006-10-04.orig/xen/common/compat/Makefile  2006-10-04 15:10:46.000000000 
+0200
+++ 2006-10-04/xen/common/compat/Makefile       2006-10-04 15:18:51.000000000 
+0200
@@ -1,4 +1,5 @@
 obj-y += kernel.o
+obj-y += memory.o
 obj-y += xlat.o
 
 # extra dependencies
Index: 2006-10-04/xen/common/compat/memory.c
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ 2006-10-04/xen/common/compat/memory.c       2006-10-04 15:18:51.000000000 
+0200
@@ -0,0 +1,358 @@
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/hypercall.h>
+#include <xen/guest_access.h>
+#include <xen/sched.h>
+#include <xen/event.h>
+#include <asm/current.h>
+#include <compat/memory.h>
+
+int compat_memory_op(unsigned int cmd, XEN_GUEST_HANDLE(void) compat)
+{
+    int rc, split, op = cmd & MEMOP_CMD_MASK;
+    unsigned int start_extent = cmd >> MEMOP_EXTENT_SHIFT;
+
+    do
+    {
+        unsigned int i, end_extent = 0;
+        union {
+            XEN_GUEST_HANDLE(void) hnd;
+            struct xen_memory_reservation *rsrv;
+            struct xen_memory_exchange *xchg;
+            struct xen_translate_gpfn_list *xlat;
+        } nat;
+        union {
+            struct compat_memory_reservation rsrv;
+            struct compat_memory_exchange xchg;
+            struct compat_translate_gpfn_list xlat;
+        } cmp;
+
+        set_xen_guest_handle(nat.hnd, (void 
*)COMPAT_ARG_XLAT_VIRT_START(current->vcpu_id));
+        split = 0;
+        switch ( op )
+        {
+            xen_pfn_t *space;
+
+        case XENMEM_increase_reservation:
+        case XENMEM_decrease_reservation:
+        case XENMEM_populate_physmap:
+            if ( copy_from_guest(&cmp.rsrv, compat, 1) )
+                return start_extent;
+
+            /* Is size too large for us to encode a continuation? */
+            if ( cmp.rsrv.nr_extents > (UINT_MAX >> MEMOP_EXTENT_SHIFT) )
+                return start_extent;
+
+            if ( !compat_handle_is_null(cmp.rsrv.extent_start) &&
+                 !compat_handle_okay(cmp.rsrv.extent_start, 
cmp.rsrv.nr_extents) )
+                return start_extent;
+
+            end_extent = start_extent + (COMPAT_ARG_XLAT_SIZE - 
sizeof(*nat.rsrv)) /
+                                        sizeof(*space);
+            if ( end_extent > cmp.rsrv.nr_extents )
+                end_extent = cmp.rsrv.nr_extents;
+
+            space = (xen_pfn_t *)(nat.rsrv + 1);
+#define XLAT_memory_reservation_HNDL_extent_start(_d_, _s_) \
+            do \
+            { \
+                if ( !compat_handle_is_null((_s_)->extent_start) ) \
+                { \
+                    set_xen_guest_handle((_d_)->extent_start, space - 
start_extent); \
+                    if ( op != XENMEM_increase_reservation ) \
+                    { \
+                        for ( i = start_extent; i < end_extent; ++i ) \
+                        { \
+                            compat_pfn_t pfn; \
+                            if ( __copy_from_compat_offset(&pfn, 
(_s_)->extent_start, i, 1) ) \
+                            { \
+                                end_extent = i; \
+                                split = -1; \
+                                break; \
+                            } \
+                            *space++ = pfn; \
+                        } \
+                    } \
+                } \
+                else \
+                { \
+                    set_xen_guest_handle((_d_)->extent_start, NULL); \
+                    end_extent = cmp.rsrv.nr_extents; \
+                } \
+            } while (0)
+            XLAT_memory_reservation(nat.rsrv, &cmp.rsrv);
+#undef XLAT_memory_reservation_HNDL_extent_start
+
+            if ( end_extent < cmp.rsrv.nr_extents )
+            {
+                nat.rsrv->nr_extents = end_extent;
+                ++split;
+            }
+
+            break;
+
+        case XENMEM_exchange:
+        {
+            int order_delta;
+
+            if ( copy_from_guest(&cmp.xchg, compat, 1) )
+                return -EFAULT;
+
+            order_delta = cmp.xchg.out.extent_order - cmp.xchg.in.extent_order;
+            /* Various sanity checks. */
+            if ( (cmp.xchg.nr_exchanged > cmp.xchg.in.nr_extents) ||
+                 (order_delta > 0 && (cmp.xchg.nr_exchanged & ((1U << 
order_delta) - 1))) ||
+                 /* Sizes of input and output lists do not overflow an int? */
+                 ((~0U >> cmp.xchg.in.extent_order) < cmp.xchg.in.nr_extents) 
||
+                 ((~0U >> cmp.xchg.out.extent_order) < 
cmp.xchg.out.nr_extents) ||
+                 /* Sizes of input and output lists match? */
+                 ((cmp.xchg.in.nr_extents << cmp.xchg.in.extent_order) !=
+                  (cmp.xchg.out.nr_extents << cmp.xchg.out.extent_order)) )
+                return -EINVAL;
+
+            start_extent = cmp.xchg.nr_exchanged;
+            end_extent = (COMPAT_ARG_XLAT_SIZE - sizeof(*nat.xchg)) /
+                         (((1U << __builtin_abs(order_delta)) + 1) *
+                          sizeof(*space));
+            if ( end_extent == 0 )
+            {
+                printk("Cannot translate compatibility mode XENMEM_exchange 
extents (%u,%u)\n",
+                       cmp.xchg.in.extent_order, cmp.xchg.out.extent_order);
+                return -E2BIG;
+            }
+            if ( order_delta > 0 )
+                end_extent <<= order_delta;
+            end_extent += start_extent;
+            if ( end_extent > cmp.xchg.in.nr_extents )
+                end_extent = cmp.xchg.in.nr_extents;
+
+            space = (xen_pfn_t *)(nat.xchg + 1);
+            /* Code below depends upon .in preceding .out. */
+            BUILD_BUG_ON(offsetof(xen_memory_exchange_t, in) > 
offsetof(xen_memory_exchange_t, out));
+#define XLAT_memory_reservation_HNDL_extent_start(_d_, _s_) \
+            do \
+            { \
+                set_xen_guest_handle((_d_)->extent_start, space - 
start_extent); \
+                for ( i = start_extent; i < end_extent; ++i ) \
+                { \
+                    compat_pfn_t pfn; \
+                    if ( __copy_from_compat_offset(&pfn, (_s_)->extent_start, 
i, 1) ) \
+                        return -EFAULT; \
+                    *space++ = pfn; \
+                } \
+                if ( order_delta > 0 ) \
+                { \
+                    start_extent >>= order_delta; \
+                    end_extent >>= order_delta; \
+                } \
+                else \
+                { \
+                    start_extent <<= -order_delta; \
+                    end_extent <<= -order_delta; \
+                } \
+                order_delta = -order_delta; \
+            } while (0)
+            XLAT_memory_exchange(nat.xchg, &cmp.xchg);
+#undef XLAT_memory_reservation_HNDL_extent_start
+
+            if ( end_extent < cmp.xchg.in.nr_extents )
+            {
+                nat.xchg->in.nr_extents = end_extent;
+                if ( order_delta >= 0 )
+                    nat.xchg->out.nr_extents = end_extent >> order_delta;
+                else
+                    nat.xchg->out.nr_extents = end_extent << order_delta;
+                ++split;
+            }
+
+            break;
+        }
+
+        case XENMEM_current_reservation:
+        case XENMEM_maximum_reservation:
+        {
+#define xen_domid_t domid_t
+#define compat_domid_t domid_compat_t
+            CHECK_TYPE(domid);
+#undef compat_domid_t
+#undef xen_domid_t
+        }
+        case XENMEM_maximum_ram_page:
+            nat.hnd = compat;
+            break;
+
+        case XENMEM_translate_gpfn_list:
+            if ( copy_from_guest(&cmp.xlat, compat, 1) )
+                return -EFAULT;
+
+            /* Is size too large for us to encode a continuation? */
+            if ( cmp.xlat.nr_gpfns > (UINT_MAX >> MEMOP_EXTENT_SHIFT) )
+                return -EINVAL;
+
+            if ( !compat_handle_okay(cmp.xlat.gpfn_list, cmp.xlat.nr_gpfns) ||
+                 !compat_handle_okay(cmp.xlat.mfn_list,  cmp.xlat.nr_gpfns) )
+                return -EFAULT;
+
+            end_extent = start_extent + (COMPAT_ARG_XLAT_SIZE - 
sizeof(*nat.xlat)) /
+                                        sizeof(*space);
+            if ( end_extent > cmp.xlat.nr_gpfns )
+                end_extent = cmp.xlat.nr_gpfns;
+
+            space = (xen_pfn_t *)(nat.xlat + 1);
+            /* Code below depends upon .gpfn_list preceding .mfn_list. */
+            BUILD_BUG_ON(offsetof(xen_translate_gpfn_list_t, gpfn_list) > 
offsetof(xen_translate_gpfn_list_t,
mfn_list));
+#define XLAT_translate_gpfn_list_HNDL_gpfn_list(_d_, _s_) \
+            do \
+            { \
+                set_xen_guest_handle((_d_)->gpfn_list, space - start_extent); \
+                for ( i = start_extent; i < end_extent; ++i ) \
+                { \
+                    compat_pfn_t pfn; \
+                    if ( __copy_from_compat_offset(&pfn, (_s_)->gpfn_list, i, 
1) ) \
+                        return -EFAULT; \
+                    *space++ = pfn; \
+                } \
+            } while (0)
+#define XLAT_translate_gpfn_list_HNDL_mfn_list(_d_, _s_) \
+            (_d_)->mfn_list = (_d_)->gpfn_list
+            XLAT_translate_gpfn_list(nat.xlat, &cmp.xlat);
+#undef XLAT_translate_gpfn_list_HNDL_mfn_list
+#undef XLAT_translate_gpfn_list_HNDL_gpfn_list
+
+            if ( end_extent < cmp.xlat.nr_gpfns )
+            {
+                nat.xlat->nr_gpfns = end_extent;
+                ++split;
+            }
+
+            break;
+
+        default:
+            return compat_arch_memory_op(cmd, compat);
+        }
+
+        rc = do_memory_op(cmd, nat.hnd);
+        if ( rc < 0 )
+            return rc;
+
+        if ( hypercall_xlat_continuation(0x02, nat.hnd, compat) )
+            split = -1;
+
+        switch ( op )
+        {
+        case XENMEM_increase_reservation:
+        case XENMEM_decrease_reservation:
+        case XENMEM_populate_physmap:
+            end_extent = split >= 0 ? rc : rc >> MEMOP_EXTENT_SHIFT;
+            if ( op != XENMEM_decrease_reservation &&
+                 !guest_handle_is_null(nat.rsrv->extent_start) )
+            {
+                for ( ; start_extent < end_extent; ++start_extent )
+                {
+                    compat_pfn_t pfn = nat.rsrv->extent_start.p[start_extent];
+
+                    BUG_ON(pfn != nat.rsrv->extent_start.p[start_extent]);
+                    if ( __copy_to_compat_offset(cmp.rsrv.extent_start, 
start_extent, &pfn, 1) )
+                    {
+                        if ( split >= 0 )
+                        {
+                            rc = start_extent;
+                            split = 0;
+                        }
+                        else
+                            /*
+                             * Short of being able to cancel the continuation,
+                             * force it to restart here; eventually we shall
+                             * get out of this state.
+                             */
+                            rc = (start_extent << MEMOP_EXTENT_SHIFT) | op;
+                        break;
+                    }
+                }
+            }
+            else
+                start_extent = end_extent;
+            break;
+
+        case XENMEM_exchange:
+        {
+            DEFINE_XEN_GUEST_HANDLE(compat_memory_exchange_t);
+            int order_delta;
+
+            BUG_ON(rc);
+            BUG_ON(end_extent < nat.xchg->nr_exchanged);
+            end_extent = nat.xchg->nr_exchanged;
+
+            order_delta = cmp.xchg.out.extent_order - cmp.xchg.in.extent_order;
+            if ( order_delta > 0 )
+            {
+                start_extent >>= order_delta;
+                BUG_ON(end_extent & ((1U << order_delta) - 1));
+                end_extent >>= order_delta;
+            }
+            else
+            {
+                start_extent <<= -order_delta;
+                end_extent <<= -order_delta;
+            }
+
+            for ( ; start_extent < end_extent; ++start_extent )
+            {
+                compat_pfn_t pfn = nat.xchg->out.extent_start.p[start_extent];
+
+                BUG_ON(pfn != nat.xchg->out.extent_start.p[start_extent]);
+                /* Note that we ignore errors accessing the output extent 
list. */
+                __copy_to_compat_offset(cmp.xchg.out.extent_start, 
start_extent, &pfn, 1);
+            }
+
+            cmp.xchg.nr_exchanged = nat.xchg->nr_exchanged;
+            if ( copy_field_to_guest(guest_handle_cast(compat, 
compat_memory_exchange_t),
+                                     &cmp.xchg, nr_exchanged) )
+            {
+                if ( split < 0 )
+                    /* Cannot cancel the continuation... */
+                    domain_crash_synchronous();
+                return -EFAULT;
+            }
+            break;
+        }
+
+        case XENMEM_maximum_ram_page:
+        case XENMEM_current_reservation:
+        case XENMEM_maximum_reservation:
+            break;
+
+        case XENMEM_translate_gpfn_list:
+            if ( split < 0 )
+                end_extent = rc >> MEMOP_EXTENT_SHIFT;
+            else
+                BUG_ON(rc);
+
+            for ( ; start_extent < end_extent; ++start_extent )
+            {
+                compat_pfn_t pfn = nat.xlat->mfn_list.p[start_extent];
+
+                BUG_ON(pfn != nat.xlat->mfn_list.p[start_extent]);
+                if ( __copy_to_compat_offset(cmp.xlat.mfn_list, start_extent, 
&pfn, 1) )
+                {
+                    if ( split < 0 )
+                        /* Cannot cancel the continuation... */
+                        domain_crash_synchronous();
+                    return -EFAULT;
+                }
+            }
+            break;
+
+        default:
+            domain_crash_synchronous();
+            break;
+        }
+
+        cmd = op | (start_extent << MEMOP_EXTENT_SHIFT);
+        if ( split > 0 && hypercall_preempt_check() )
+            return hypercall_create_continuation(
+                __HYPERVISOR_memory_op, "ih", cmd, compat);
+    } while ( split > 0 );
+
+    return rc;
+}
Index: 2006-10-04/xen/common/memory.c
===================================================================
--- 2006-10-04.orig/xen/common/memory.c 2006-08-21 18:02:24.000000000 +0200
+++ 2006-10-04/xen/common/memory.c      2006-10-04 15:18:51.000000000 +0200
@@ -17,18 +17,12 @@
 #include <xen/shadow.h>
 #include <xen/iocap.h>
 #include <xen/guest_access.h>
+#include <xen/hypercall.h>
 #include <xen/errno.h>
 #include <asm/current.h>
 #include <asm/hardirq.h>
 #include <public/memory.h>
 
-/*
- * To allow safe resume of do_memory_op() after preemption, we need to know 
- * at what point in the page list to resume. For this purpose I steal the 
- * high-order bits of the @cmd parameter, which are otherwise unused and zero.
- */
-#define START_EXTENT_SHIFT 4 /* cmd[:4] == start_extent */
-
 static long
 increase_reservation(
     struct domain *d, 
@@ -236,7 +230,7 @@ translate_gpfn_list(
         return -EFAULT;
 
     /* Is size too large for us to encode a continuation? */
-    if ( op.nr_gpfns > (ULONG_MAX >> START_EXTENT_SHIFT) )
+    if ( op.nr_gpfns > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) )
         return -EINVAL;
 
     if ( !guest_handle_okay(op.gpfn_list, op.nr_gpfns) ||
@@ -517,20 +511,20 @@ long do_memory_op(unsigned long cmd, XEN
     struct xen_memory_reservation reservation;
     domid_t domid;
 
-    op = cmd & ((1 << START_EXTENT_SHIFT) - 1);
+    op = cmd & MEMOP_CMD_MASK;
 
     switch ( op )
     {
     case XENMEM_increase_reservation:
     case XENMEM_decrease_reservation:
     case XENMEM_populate_physmap:
-        start_extent = cmd >> START_EXTENT_SHIFT;
+        start_extent = cmd >> MEMOP_EXTENT_SHIFT;
 
         if ( copy_from_guest(&reservation, arg, 1) )
             return start_extent;
 
         /* Is size too large for us to encode a continuation? */
-        if ( reservation.nr_extents > (ULONG_MAX >> START_EXTENT_SHIFT) )
+        if ( reservation.nr_extents > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) )
             return start_extent;
 
         if ( unlikely(start_extent > reservation.nr_extents) )
@@ -594,7 +588,7 @@ long do_memory_op(unsigned long cmd, XEN
         if ( preempted )
             return hypercall_create_continuation(
                 __HYPERVISOR_memory_op, "lh",
-                op | (rc << START_EXTENT_SHIFT), arg);
+                op | (rc << MEMOP_EXTENT_SHIFT), arg);
 
         break;
 
@@ -626,14 +620,14 @@ long do_memory_op(unsigned long cmd, XEN
         break;
 
     case XENMEM_translate_gpfn_list:
-        progress = cmd >> START_EXTENT_SHIFT;
+        progress = cmd >> MEMOP_EXTENT_SHIFT;
         rc = translate_gpfn_list(
             guest_handle_cast(arg, xen_translate_gpfn_list_t),
             &progress);
         if ( rc == -EAGAIN )
             return hypercall_create_continuation(
                 __HYPERVISOR_memory_op, "lh",
-                op | (progress << START_EXTENT_SHIFT), arg);
+                op | (progress << MEMOP_EXTENT_SHIFT), arg);
         break;
 
     default:
Index: 2006-10-04/xen/include/asm-x86/config.h
===================================================================
--- 2006-10-04.orig/xen/include/asm-x86/config.h        2006-10-04 
15:16:05.000000000 +0200
+++ 2006-10-04/xen/include/asm-x86/config.h     2006-10-04 15:18:51.000000000 
+0200
@@ -114,7 +114,7 @@ static inline void FORCE_CRASH(void) 
 /*
  * Memory layout:
  *  0x0000000000000000 - 0x00007fffffffffff [128TB, 2^47 bytes, PML4:0-255]
- *    Guest-defined use.
+ *    Guest-defined use (see below for compatibility mode guests).
  *  0x0000800000000000 - 0xffff7fffffffffff [16EB]
  *    Inaccessible: current arch only supports 48-bit sign-extended VAs.
  *  0xffff800000000000 - 0xffff803fffffffff [256GB, 2^38 bytes, PML4:256]
@@ -147,6 +147,18 @@ static inline void FORCE_CRASH(void) 
  *    Reserved for future use.
  *  0xffff880000000000 - 0xffffffffffffffff [120TB, PML4:272-511]
  *    Guest-defined use.
+ *
+ * Compatibility guest area layout:
+ *  0x0000000000000000 - 0x00000000f57fffff [3928MB,            PML4:0]
+ *    Guest-defined use.
+ *  0x0000000f58000000 - 0x00000000ffffffff [168MB,             PML4:0]
+ *    Read-only machine-to-phys translation table (GUEST ACCESSIBLE).
+ *  0x0000000000000000 - 0x00000000ffffffff [508GB,             PML4:0]
+ *    Unused.
+ *  0x0000008000000000 - 0x000000ffffffffff [512GB, 2^39 bytes, PML4:1]
+ *    Hypercall argument translation area.
+ *  0x0000010000000000 - 0x00007fffffffffff [127TB, 2^46 bytes, PML4:2-255]
+ *    Reserved for future use.
  */
 
 
@@ -216,6 +228,14 @@ static inline void FORCE_CRASH(void) 
 #define COMPAT_L2_PAGETABLE_XEN_SLOTS \
     (COMPAT_L2_PAGETABLE_LAST_XEN_SLOT - COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT + 
1)
 
+#define COMPAT_ARG_XLAT_VIRT_BASE      (1UL << ROOT_PAGETABLE_SHIFT)
+#define COMPAT_ARG_XLAT_SHIFT          0
+#define COMPAT_ARG_XLAT_PAGES          (1U << COMPAT_ARG_XLAT_SHIFT)
+#define COMPAT_ARG_XLAT_SIZE           (COMPAT_ARG_XLAT_PAGES << PAGE_SHIFT)
+#define COMPAT_ARG_XLAT_VIRT_START(vcpu_id) \
+    (COMPAT_ARG_XLAT_VIRT_BASE + ((unsigned long)(vcpu_id) << \
+                                  (PAGE_SHIFT + COMPAT_ARG_XLAT_SHIFT + 1)))
+
 #define PGT_base_page_table     PGT_l4_page_table
 
 #define __HYPERVISOR_CS64 0xe008
Index: 2006-10-04/xen/include/asm-x86/domain.h
===================================================================
--- 2006-10-04.orig/xen/include/asm-x86/domain.h        2006-09-21 
11:09:00.000000000 +0200
+++ 2006-10-04/xen/include/asm-x86/domain.h     2006-10-04 15:18:51.000000000 
+0200
@@ -98,6 +98,10 @@ struct arch_domain
     struct mapcache mapcache;
 #endif
 
+#ifdef CONFIG_COMPAT
+    l3_pgentry_t *mm_arg_xlat_l3;
+#endif
+
     /* I/O-port admin-specified access capabilities. */
     struct rangeset *ioport_caps;
 
Index: 2006-10-04/xen/include/asm-x86/mm.h
===================================================================
--- 2006-10-04.orig/xen/include/asm-x86/mm.h    2006-10-04 15:18:45.000000000 
+0200
+++ 2006-10-04/xen/include/asm-x86/mm.h 2006-10-04 15:18:51.000000000 +0200
@@ -394,8 +394,18 @@ int __sync_lazy_execstate(void);
 /* Arch-specific portion of memory_op hypercall. */
 long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
 long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
+#ifdef CONFIG_COMPAT
+int compat_arch_memory_op(int op, XEN_GUEST_HANDLE(void));
+int compat_subarch_memory_op(int op, XEN_GUEST_HANDLE(void));
+#endif
 
 int steal_page(
     struct domain *d, struct page_info *page, unsigned int memflags);
 
+#ifdef CONFIG_COMPAT
+int setup_arg_xlat_area(struct domain *, unsigned int vcpu_id, l4_pgentry_t *);
+#else
+# define setup_arg_xlat_area(dom, vcpu_id, l4tab) 0
+#endif
+
 #endif /* __ASM_X86_MM_H__ */
Index: 2006-10-04/xen/include/xen/hypercall.h
===================================================================
--- 2006-10-04.orig/xen/include/xen/hypercall.h 2006-08-28 08:32:38.000000000 
+0200
+++ 2006-10-04/xen/include/xen/hypercall.h      2006-10-04 15:18:51.000000000 
+0200
@@ -42,9 +42,17 @@ extern long
 do_platform_op(
     XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op);
 
+/*
+ * To allow safe resume of do_memory_op() after preemption, we need to know
+ * at what point in the page list to resume. For this purpose I steal the
+ * high-order bits of the @cmd parameter, which are otherwise unused and zero.
+ */
+#define MEMOP_EXTENT_SHIFT 4 /* cmd[:4] == start_extent */
+#define MEMOP_CMD_MASK     ((1 << MEMOP_EXTENT_SHIFT) - 1)
+
 extern long
 do_memory_op(
-    int cmd,
+    unsigned long cmd,
     XEN_GUEST_HANDLE(void) arg);
 
 extern long
@@ -102,4 +110,13 @@ do_hvm_op(
     unsigned long op,
     XEN_GUEST_HANDLE(void) arg);
 
+#ifdef CONFIG_COMPAT
+
+extern int
+compat_memory_op(
+    unsigned int cmd,
+    XEN_GUEST_HANDLE(void) arg);
+
+#endif
+
 #endif /* __XEN_HYPERCALL_H__ */
Index: 2006-10-04/xen/include/xen/compat.h
===================================================================
--- 2006-10-04.orig/xen/include/xen/compat.h    2006-10-04 15:11:04.000000000 
+0200
+++ 2006-10-04/xen/include/xen/compat.h 2006-10-04 15:18:51.000000000 +0200
@@ -143,6 +143,8 @@
                    &((k compat_ ## n *)0)->f1.f2.f3) * 2]
 
 
+int hypercall_xlat_continuation(unsigned int mask, ...);
+
 /* In-place translation functons: */
 struct start_info;
 void xlat_start_info(struct start_info *, enum XLAT_start_info_console);
Index: 2006-10-04/xen/include/xlat.lst
===================================================================
--- 2006-10-04.orig/xen/include/xlat.lst        2006-10-04 15:11:04.000000000 
+0200
+++ 2006-10-04/xen/include/xlat.lst     2006-10-04 15:18:51.000000000 +0200
@@ -4,3 +4,8 @@
 ?      dom0_vga_console_info           xen.h
 !      start_info                      xen.h
 ?      vcpu_time_info                  xen.h
+!      add_to_physmap                  memory.h
+!      memory_exchange                 memory.h
+!      memory_map                      memory.h
+!      memory_reservation              memory.h
+!      translate_gpfn_list             memory.h


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH,RFC 9/17] 32-on-64 memory ops, Jan Beulich <=