WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] The 1:1 page table should be a 3 level PAE page tabl

To: Ian Pratt <Ian.Pratt@xxxxxxxxxxxx>, Keir Fraser <Keir.Fraser@xxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH] The 1:1 page table should be a 3 level PAE page table on x86-64
From: Arun Sharma <arun.sharma@xxxxxxxxx>
Date: Mon, 11 Jul 2005 07:24:08 -0700
Cc: xen-devel@xxxxxxxxxxxxxxxxxxx
Delivery-date: Mon, 11 Jul 2005 14:18:17 +0000
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mutt/1.4.1i
The 1:1 page table should be a 3 level PAE page table on x86-64 

This is needed to support > 4GB machine physical addresses.

Signed-off-by: Chengyuan Li <chengyuan.li@xxxxxxxxx>
Signed-off-by: Arun Sharma <arun.sharma@xxxxxxxxx>

--- a/tools/libxc/xc_vmx_build.c        Mon Jul 11 05:02:12 2005
+++ b/tools/libxc/xc_vmx_build.c        Mon Jul 11 05:04:22 2005
@@ -13,6 +13,9 @@
 
 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#ifdef __x86_64__
+#define L3_PROT (_PAGE_PRESENT)
+#endif
 
 #define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
 #define round_pgdown(_p)  ((_p)&PAGE_MASK)
@@ -91,6 +94,7 @@
     mem_mapp->nr_map = nr_map;
 }
 
+#ifdef __i386__
 static int zap_mmio_range(int xc_handle, u32 dom,
                             l2_pgentry_32_t *vl2tab,
                             unsigned long mmio_range_start,
@@ -138,6 +142,65 @@
     munmap(vl2tab, PAGE_SIZE);
     return 0;
 }
+#else
+static int zap_mmio_range(int xc_handle, u32 dom,
+                           l3_pgentry_t *vl3tab,
+                           unsigned long mmio_range_start,
+                           unsigned long mmio_range_size)
+{
+   unsigned long mmio_addr;
+   unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
+   unsigned long vl2e = 0;
+   unsigned long vl3e;
+   l1_pgentry_t *vl1tab;
+   l2_pgentry_t *vl2tab;
+ 
+   mmio_addr = mmio_range_start & PAGE_MASK;
+   for (; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE) {
+       vl3e = vl3tab[l3_table_offset(mmio_addr)];
+       if (vl3e == 0)
+           continue;
+       vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+               PROT_READ|PROT_WRITE, vl3e >> PAGE_SHIFT);
+       if (vl2tab == 0) {
+           PERROR("Failed zap MMIO range");
+           return -1;
+       }
+       vl2e = vl2tab[l2_table_offset(mmio_addr)];
+       if (vl2e == 0)
+           continue;
+       vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+               PROT_READ|PROT_WRITE, vl2e >> PAGE_SHIFT);
+
+       vl1tab[l1_table_offset(mmio_addr)] = 0;
+       munmap(vl2tab, PAGE_SIZE);
+       munmap(vl1tab, PAGE_SIZE);
+   }
+   return 0;
+}
+
+static int zap_mmio_ranges(int xc_handle, u32 dom,
+                           unsigned long l3tab,
+                           struct mem_map *mem_mapp)
+{
+   int i;
+   l3_pgentry_t *vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                               PROT_READ|PROT_WRITE,
+                                               l3tab >> PAGE_SHIFT);
+   if (vl3tab == 0)
+       return -1;
+   for (i = 0; i < mem_mapp->nr_map; i++) {
+       if ((mem_mapp->map[i].type == E820_IO)
+         && (mem_mapp->map[i].caching_attr == MEMMAP_UC))
+           if (zap_mmio_range(xc_handle, dom, vl3tab,
+                       mem_mapp->map[i].addr, mem_mapp->map[i].size) == -1)
+               return -1;
+   }
+   munmap(vl3tab, PAGE_SIZE);
+   return 0;
+}
+
+#endif
 
 static int setup_guest(int xc_handle,
                          u32 dom, int memsize,
@@ -151,9 +214,13 @@
                          unsigned long flags,
                          struct mem_map * mem_mapp)
 {
-    l1_pgentry_32_t *vl1tab=NULL, *vl1e=NULL;
-    l2_pgentry_32_t *vl2tab=NULL, *vl2e=NULL;
+    l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
+    l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
     unsigned long *page_array = NULL;
+#ifdef __x86_64__
+    l3_pgentry_t *vl3tab=NULL, *vl3e=NULL;
+    unsigned long l3tab;
+#endif
     unsigned long l2tab;
     unsigned long l1tab;
     unsigned long count, i;
@@ -212,7 +279,11 @@
     if(initrd_len == 0)
         vinitrd_start = vinitrd_end = 0;
 
+#ifdef __i386__
     nr_pt_pages = 1 + ((memsize + 3) >> 2);
+#else
+    nr_pt_pages = 5 + ((memsize + 1) >> 1);
+#endif
     vpt_start   = v_end;
     vpt_end     = vpt_start + (nr_pt_pages * PAGE_SIZE);
 
@@ -274,6 +345,7 @@
     if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL )
         goto error_out;
 
+#ifdef __i386__
     /* First allocate page for page dir. */
     ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
     l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
@@ -310,7 +382,64 @@
     }
     munmap(vl1tab, PAGE_SIZE);
     munmap(vl2tab, PAGE_SIZE);
-
+#else
+    /* First allocate pdpt */
+    ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
+    /* here l3tab means pdpt, only 4 entry is used */
+    l3tab = page_array[ppt_alloc++] << PAGE_SHIFT;
+    ctxt->ctrlreg[3] = l3tab;
+
+    /* Initialise the page tables. */
+    if ( (vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, 
+                                        PROT_READ|PROT_WRITE, 
+                                        l3tab >> PAGE_SHIFT)) == NULL )
+        goto error_out;
+    memset(vl3tab, 0, PAGE_SIZE);
+
+    vl3e = &vl3tab[l3_table_offset(dsi.v_start)];
+
+    for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
+    {
+        if (!(count % (1 << (L3_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)))){
+            l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
+
+            if (vl2tab != NULL)
+                munmap(vl2tab, PAGE_SIZE);
+
+            if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                      PROT_READ|PROT_WRITE,
+                      l2tab >> PAGE_SHIFT)) == NULL )
+                goto error_out;
+
+            memset(vl2tab, 0, PAGE_SIZE);
+            *vl3e++ = l2tab | L3_PROT;
+            vl2e = &vl2tab[l2_table_offset(dsi.v_start + (count << 
PAGE_SHIFT))];
+        }
+        if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
+        {
+            l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
+            if ( vl1tab != NULL )
+                munmap(vl1tab, PAGE_SIZE);
+            if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                      PROT_READ|PROT_WRITE,
+                      l1tab >> PAGE_SHIFT)) == NULL )
+            {
+                munmap(vl2tab, PAGE_SIZE);
+                goto error_out;
+            }
+            memset(vl1tab, 0, PAGE_SIZE);
+            vl1e = &vl1tab[l1_table_offset(dsi.v_start + (count<<PAGE_SHIFT))];
+            *vl2e++ = l1tab | L2_PROT;
+        }
+
+        *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
+        vl1e++;
+    }
+
+    munmap(vl1tab, PAGE_SIZE);
+    munmap(vl2tab, PAGE_SIZE);
+    munmap(vl3tab, PAGE_SIZE);
+#endif
     /* Write the machine->phys table entries. */
     for ( count = 0; count < nr_pages; count++ )
     {
@@ -325,6 +454,7 @@
                xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
                page_array[(vboot_params_start-dsi.v_start)>>PAGE_SHIFT])) == 0)
         goto error_out;
+
     memset(boot_paramsp, 0, sizeof(*boot_paramsp));
 
     strncpy((char *)boot_paramsp->cmd_line, cmdline, 0x800);
@@ -381,7 +511,11 @@
 
     /* memsize is in megabytes */
     build_e820map(mem_mapp, memsize << 20);
+#if defined (__i386__)
     if (zap_mmio_ranges(xc_handle, dom, l2tab, mem_mapp) == -1)
+#else
+    if (zap_mmio_ranges(xc_handle, dom, l3tab, mem_mapp) == -1)
+#endif
        goto error_out;
     boot_paramsp->e820_map_nr = mem_mapp->nr_map;
     for (i=0; i<mem_mapp->nr_map; i++) {
diff -r 036c6e463f67 -r 51dd38b2b917 xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c        Mon Jul 11 05:02:12 2005
+++ b/xen/arch/x86/vmx.c        Mon Jul 11 05:04:22 2005
@@ -801,7 +801,11 @@
 skip_cr3:
 
     error |= __vmread(CR4_READ_SHADOW, &old_cr4);
+#if defined (__i386__)
     error |= __vmwrite(GUEST_CR4, (c->cr4 | X86_CR4_VMXE));
+#else
+    error |= __vmwrite(GUEST_CR4, (c->cr4 | X86_CR4_VMXE | X86_CR4_PAE));
+#endif
     error |= __vmwrite(CR4_READ_SHADOW, c->cr4);
 
     error |= __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
@@ -860,7 +864,7 @@
 {
     struct vmx_assist_context c;
     u32 magic;
-    unsigned long cp;
+    u32 cp;
 
     /* make sure vmxassist exists (this is not an error) */
     if (!vmx_copy(&magic, VMXASSIST_MAGIC_OFFSET, sizeof(magic), COPY_IN))
@@ -1191,7 +1195,7 @@
 
         __vmread(CR4_READ_SHADOW, &old_cr);
         if (pae_disabled)
-            __vmwrite(GUEST_CR4, ((value & ~X86_CR4_PAE) | X86_CR4_VMXE));
+            __vmwrite(GUEST_CR4, value| X86_CR4_VMXE);
         else
             __vmwrite(GUEST_CR4, value| X86_CR4_VMXE);
 
diff -r 036c6e463f67 -r 51dd38b2b917 xen/arch/x86/vmx_vmcs.c
--- a/xen/arch/x86/vmx_vmcs.c   Mon Jul 11 05:02:12 2005
+++ b/xen/arch/x86/vmx_vmcs.c   Mon Jul 11 05:04:22 2005
@@ -122,6 +122,7 @@
     struct e820entry *e820p;
     unsigned long gpfn = 0;
 
+    local_flush_tlb_pge();
     regs->ebx = 0;   /* Linux expects ebx to be 0 for boot proc */
 
     n = regs->ecx;
@@ -311,8 +312,7 @@
     error |= __vmwrite(CR0_READ_SHADOW, shadow_cr);
     /* CR3 is set in vmx_final_setup_guest */
 #ifdef __x86_64__
-    error |= __vmwrite(GUEST_CR4, host_env->cr4 & ~X86_CR4_PAE);
-    printk("construct_init_vmcs_guest: guest CR4 is %lx\n", host_env->cr4 );
+    error |= __vmwrite(GUEST_CR4, host_env->cr4 & ~X86_CR4_PSE);
 #else
     error |= __vmwrite(GUEST_CR4, host_env->cr4);
 #endif
diff -r 036c6e463f67 -r 51dd38b2b917 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Mon Jul 11 05:02:12 2005
+++ b/tools/python/xen/xend/image.py    Mon Jul 11 05:04:22 2005
@@ -351,6 +351,8 @@
         @param mem_mb: size in MB
         @return size in KB
         """
-        # Logic x86-32 specific. 
         # 1 page for the PGD + 1 pte page for 4MB of memory (rounded)
-        return (1 + ((mem_mb + 3) >> 2)) * 4
+        if os.uname()[4] == 'x86_64':
+            return (5 + ((mem_mb + 1) >> 1)) * 4
+        else:
+            return (1 + ((mem_mb + 3) >> 2)) * 4
diff -r 036c6e463f67 -r 51dd38b2b917 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h  Mon Jul 11 05:02:12 2005
+++ b/xen/include/asm-x86/mm.h  Mon Jul 11 05:04:22 2005
@@ -349,4 +349,7 @@
                             l1_pgentry_t _nl1e, 
                             struct domain *d,
                             struct vcpu *v);
+
+void alloc_monitor_pagetable(struct vcpu *v);
+
 #endif /* __ASM_X86_MM_H__ */

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH] The 1:1 page table should be a 3 level PAE page table on x86-64, Arun Sharma <=