WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] x86: make GDT per-CPU

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] x86: make GDT per-CPU
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Mon, 22 Sep 2008 11:20:09 -0700
Delivery-date: Mon, 22 Sep 2008 11:20:10 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1222087617 -3600
# Node ID 7f1a36b834e183904f069948d3037d50492d98d2
# Parent  3c42b5ad0a4f607749426f82ecf11f75d84699c5
x86: make GDT per-CPU

The major issue with supporting a significantly larger number of
physical CPUs appears to be the use of per-CPU GDT entries - at
present, x86-64 could support only up to 126 CPUs (with code changes
to also use the top-most GDT page, that would be 254). Instead of
trying to go with incremental steps here, by converting the GDT itself
to be per-CPU, limitations in that respect go away entirely.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
---
 xen/arch/x86/boot/wakeup.S                   |    2 
 xen/arch/x86/boot/x86_32.S                   |    6 +-
 xen/arch/x86/boot/x86_64.S                   |   10 ++--
 xen/arch/x86/cpu/common.c                    |    6 +-
 xen/arch/x86/domain.c                        |   66 ++++++++++++---------------
 xen/arch/x86/domain_build.c                  |   13 -----
 xen/arch/x86/hvm/vmx/vmcs.c                  |    2 
 xen/arch/x86/setup.c                         |   14 +++++
 xen/arch/x86/smpboot.c                       |   36 ++++++++++++++
 xen/arch/x86/traps.c                         |    4 -
 xen/arch/x86/x86_32/mm.c                     |   26 ++++++++++
 xen/arch/x86/x86_32/supervisor_mode_kernel.S |   13 +----
 xen/arch/x86/x86_32/traps.c                  |    8 ++-
 xen/arch/x86/x86_64/mm.c                     |   19 +++++++
 xen/arch/x86/x86_64/traps.c                  |    7 +-
 xen/include/asm-x86/desc.h                   |   34 +++++++------
 xen/include/asm-x86/ldt.h                    |    9 +--
 xen/include/asm-x86/page.h                   |    1 
 18 files changed, 178 insertions(+), 98 deletions(-)

diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/boot/wakeup.S
--- a/xen/arch/x86/boot/wakeup.S        Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/boot/wakeup.S        Mon Sep 22 13:46:57 2008 +0100
@@ -168,7 +168,7 @@ 1:
         .word   0,0,0
 lgdt_descr:
         .word   LAST_RESERVED_GDT_BYTE
-        .quad   gdt_table - FIRST_RESERVED_GDT_BYTE
+        .quad   boot_cpu_gdt_table - FIRST_RESERVED_GDT_BYTE
         
 wakeup_64:
         lgdt    lgdt_descr(%rip)
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/boot/x86_32.S
--- a/xen/arch/x86/boot/x86_32.S        Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/boot/x86_32.S        Mon Sep 22 13:46:57 2008 +0100
@@ -78,7 +78,7 @@ idt_descr:
         .word   0
 gdt_descr:
         .word   LAST_RESERVED_GDT_BYTE
-        .long   gdt_table - FIRST_RESERVED_GDT_BYTE
+        .long   boot_cpu_gdt_table - FIRST_RESERVED_GDT_BYTE
 
 
         .align 32
@@ -94,7 +94,7 @@ ENTRY(idle_pg_table)
 #define GUEST_DESC(d)                                                   \
         .long ((MACH2PHYS_VIRT_END - 1) >> 12) & 0xffff,                \
               ((MACH2PHYS_VIRT_END - 1) >> 12) & (0xf << 16) | (d)
-ENTRY(gdt_table)
+ENTRY(boot_cpu_gdt_table)
         .quad 0x0000000000000000     /* unused */
         .quad 0x00cf9a000000ffff     /* 0xe008 ring 0 4.00GB code at 0x0 */
         .quad 0x00cf92000000ffff     /* 0xe010 ring 0 4.00GB data at 0x0 */
@@ -102,4 +102,6 @@ ENTRY(gdt_table)
         GUEST_DESC(0x00c0b200)       /* 0xe021 ring 1 3.xxGB data at 0x0 */
         GUEST_DESC(0x00c0fa00)       /* 0xe02b ring 3 3.xxGB code at 0x0 */
         GUEST_DESC(0x00c0f200)       /* 0xe033 ring 3 3.xxGB data at 0x0 */
+        .fill (PER_CPU_GDT_ENTRY - FLAT_RING3_DS / 8 - 1), 8, 0
+        .quad 0x0000910000000000     /* per-CPU entry (limit == cpu) */
         .align PAGE_SIZE,0
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/boot/x86_64.S
--- a/xen/arch/x86/boot/x86_64.S        Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/boot/x86_64.S        Mon Sep 22 13:46:57 2008 +0100
@@ -85,7 +85,7 @@ multiboot_ptr:
         .word   0
 gdt_descr:
         .word   LAST_RESERVED_GDT_BYTE
-        .quad   gdt_table - FIRST_RESERVED_GDT_BYTE
+        .quad   boot_cpu_gdt_table - FIRST_RESERVED_GDT_BYTE
 
         .word   0,0,0
 idt_descr:
@@ -96,7 +96,7 @@ ENTRY(stack_start)
         .quad   cpu0_stack
 
         .align PAGE_SIZE, 0
-ENTRY(gdt_table)
+ENTRY(boot_cpu_gdt_table)
         .quad 0x0000000000000000     /* unused */
         .quad 0x00af9a000000ffff     /* 0xe008 ring 0 code, 64-bit mode   */
         .quad 0x00cf92000000ffff     /* 0xe010 ring 0 data                */
@@ -105,11 +105,13 @@ ENTRY(gdt_table)
         .quad 0x00cff2000000ffff     /* 0xe02b ring 3 data                */
         .quad 0x00affa000000ffff     /* 0xe033 ring 3 code, 64-bit mode   */
         .quad 0x00cf9a000000ffff     /* 0xe038 ring 0 code, compatibility */
+        .fill (PER_CPU_GDT_ENTRY - __HYPERVISOR_CS32 / 8 - 1), 8, 0
+        .quad 0x0000910000000000     /* per-CPU entry (limit == cpu)      */
 
         .align PAGE_SIZE, 0
 /* NB. Even rings != 0 get access to the full 4Gb, as only the            */
 /*     (compatibility) machine->physical mapping table lives there.       */
-ENTRY(compat_gdt_table)
+ENTRY(boot_cpu_compat_gdt_table)
         .quad 0x0000000000000000     /* unused */
         .quad 0x00af9a000000ffff     /* 0xe008 ring 0 code, 64-bit mode   */
         .quad 0x00cf92000000ffff     /* 0xe010 ring 0 data                */
@@ -118,4 +120,6 @@ ENTRY(compat_gdt_table)
         .quad 0x00cffa000000ffff     /* 0xe02b ring 3 code, compatibility */
         .quad 0x00cff2000000ffff     /* 0xe033 ring 3 data                */
         .quad 0x00cf9a000000ffff     /* 0xe038 ring 0 code, compatibility */
+        .fill (PER_CPU_GDT_ENTRY - __HYPERVISOR_CS32 / 8 - 1), 8, 0
+        .quad 0x0000910000000000     /* per-CPU entry (limit == cpu)      */
         .align PAGE_SIZE, 0
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/cpu/common.c
--- a/xen/arch/x86/cpu/common.c Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/cpu/common.c Mon Sep 22 13:46:57 2008 +0100
@@ -575,6 +575,9 @@ void __cpuinit cpu_init(void)
        if (cpu_has_pat)
                wrmsrl(MSR_IA32_CR_PAT, host_pat);
 
+       /* Install correct page table. */
+       write_ptbase(current);
+
        *(unsigned short *)(&gdt_load[0]) = LAST_RESERVED_GDT_BYTE;
        *(unsigned long  *)(&gdt_load[2]) = GDT_VIRT_START(current);
        asm volatile ( "lgdt %0" : "=m" (gdt_load) );
@@ -605,9 +608,6 @@ void __cpuinit cpu_init(void)
 #define CD(register) asm volatile ( "mov %0,%%db" #register : : "r"(0UL) );
        CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
 #undef CD
-
-       /* Install correct page table. */
-       write_ptbase(current);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/domain.c     Mon Sep 22 13:46:57 2008 +0100
@@ -211,7 +211,6 @@ static inline int may_switch_mode(struct
 
 int switch_native(struct domain *d)
 {
-    l1_pgentry_t gdt_l1e;
     unsigned int vcpuid;
 
     if ( d == NULL )
@@ -223,12 +222,8 @@ int switch_native(struct domain *d)
 
     d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
 
-    /* switch gdt */
-    gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
     for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
     {
-        d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
-                                 FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
         if (d->vcpu[vcpuid])
             release_compat_l4(d->vcpu[vcpuid]);
     }
@@ -238,7 +233,6 @@ int switch_native(struct domain *d)
 
 int switch_compat(struct domain *d)
 {
-    l1_pgentry_t gdt_l1e;
     unsigned int vcpuid;
 
     if ( d == NULL )
@@ -250,15 +244,11 @@ int switch_compat(struct domain *d)
 
     d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 1;
 
-    /* switch gdt */
-    gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table), PAGE_HYPERVISOR);
     for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
     {
         if ( (d->vcpu[vcpuid] != NULL) &&
              (setup_compat_l4(d->vcpu[vcpuid]) != 0) )
             goto undo_and_fail;
-        d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
-                                 FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
     }
 
     domain_set_alloc_bitsize(d);
@@ -267,13 +257,10 @@ int switch_compat(struct domain *d)
 
  undo_and_fail:
     d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
-    gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
     while ( vcpuid-- != 0 )
     {
         if ( d->vcpu[vcpuid] != NULL )
             release_compat_l4(d->vcpu[vcpuid]);
-        d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
-                                 FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
     }
     return -ENOMEM;
 }
@@ -322,7 +309,12 @@ int vcpu_initialise(struct vcpu *v)
         if ( is_idle_domain(d) )
         {
             v->arch.schedule_tail = continue_idle_domain;
-            v->arch.cr3           = __pa(idle_pg_table);
+            if ( v->vcpu_id )
+                v->arch.cr3 = d->vcpu[0]->arch.cr3;
+            else if ( !*idle_vcpu )
+                v->arch.cr3 = __pa(idle_pg_table);
+            else if ( !(v->arch.cr3 = clone_idle_pagetable(v)) )
+                return -ENOMEM;
         }
 
         v->arch.guest_context.ctrlreg[4] =
@@ -349,8 +341,7 @@ int arch_domain_create(struct domain *d,
 #ifdef __x86_64__
     struct page_info *pg;
 #endif
-    l1_pgentry_t gdt_l1e;
-    int i, vcpuid, pdpt_order, paging_initialised = 0;
+    int i, pdpt_order, paging_initialised = 0;
     int rc = -ENOMEM;
 
     d->arch.hvm_domain.hap_enabled =
@@ -368,18 +359,6 @@ int arch_domain_create(struct domain *d,
     if ( d->arch.mm_perdomain_pt == NULL )
         goto fail;
     memset(d->arch.mm_perdomain_pt, 0, PAGE_SIZE << pdpt_order);
-
-    /*
-     * Map Xen segments into every VCPU's GDT, irrespective of whether every
-     * VCPU will actually be used. This avoids an NMI race during context
-     * switch: if we take an interrupt after switching CR3 but before switching
-     * GDT, and the old VCPU# is invalid in the new domain, we would otherwise
-     * try to load CS from an invalid table.
-     */
-    gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
-    for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
-        d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
-                                 FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
 
 #if defined(__i386__)
 
@@ -1193,9 +1172,12 @@ static void __context_switch(void)
 static void __context_switch(void)
 {
     struct cpu_user_regs *stack_regs = guest_cpu_user_regs();
-    unsigned int          cpu = smp_processor_id();
+    unsigned int          i, cpu = smp_processor_id();
     struct vcpu          *p = per_cpu(curr_vcpu, cpu);
     struct vcpu          *n = current;
+    struct desc_struct   *gdt;
+    struct page_info     *page;
+    struct desc_ptr       gdt_desc;
 
     ASSERT(p != n);
     ASSERT(cpus_empty(n->vcpu_dirty_cpumask));
@@ -1221,14 +1203,30 @@ static void __context_switch(void)
         cpu_set(cpu, n->domain->domain_dirty_cpumask);
     cpu_set(cpu, n->vcpu_dirty_cpumask);
 
+    gdt = !is_pv_32on64_vcpu(n) ? per_cpu(gdt_table, cpu) :
+                                  per_cpu(compat_gdt_table, cpu);
+    page = virt_to_page(gdt);
+    for (i = 0; i < NR_RESERVED_GDT_PAGES; ++i)
+    {
+        l1e_write(n->domain->arch.mm_perdomain_pt +
+                  (n->vcpu_id << GDT_LDT_VCPU_SHIFT) +
+                  FIRST_RESERVED_GDT_PAGE + i,
+                  l1e_from_page(page + i, __PAGE_HYPERVISOR));
+    }
+
+    if ( p->vcpu_id != n->vcpu_id )
+    {
+        gdt_desc.limit = LAST_RESERVED_GDT_BYTE;
+        gdt_desc.base  = (unsigned long)(gdt - FIRST_RESERVED_GDT_ENTRY);
+        asm volatile ( "lgdt %0" : : "m" (gdt_desc) );
+    }
+
     write_ptbase(n);
 
     if ( p->vcpu_id != n->vcpu_id )
     {
-        char gdt_load[10];
-        *(unsigned short *)(&gdt_load[0]) = LAST_RESERVED_GDT_BYTE;
-        *(unsigned long  *)(&gdt_load[2]) = GDT_VIRT_START(n);
-        asm volatile ( "lgdt %0" : "=m" (gdt_load) );
+        gdt_desc.base = GDT_VIRT_START(n);
+        asm volatile ( "lgdt %0" : : "m" (gdt_desc) );
     }
 
     if ( p->domain != n->domain )
@@ -1279,8 +1277,6 @@ void context_switch(struct vcpu *prev, s
             uint64_t efer = read_efer();
             if ( !(efer & EFER_SCE) )
                 write_efer(efer | EFER_SCE);
-            flush_tlb_one_local(GDT_VIRT_START(next) +
-                                FIRST_RESERVED_GDT_BYTE);
         }
 #endif
 
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/domain_build.c       Mon Sep 22 13:46:57 2008 +0100
@@ -314,24 +314,11 @@ int __init construct_dom0(
 #if defined(__x86_64__)
     if ( compat32 )
     {
-        l1_pgentry_t gdt_l1e;
-
         d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 1;
         v->vcpu_info = (void *)&d->shared_info->compat.vcpu_info[0];
 
         if ( nr_pages != (unsigned int)nr_pages )
             nr_pages = UINT_MAX;
-
-        /*
-         * Map compatibility Xen segments into every VCPU's GDT. See
-         * arch_domain_create() for further comments.
-         */
-        gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table),
-                                PAGE_HYPERVISOR);
-        for ( i = 0; i < MAX_VIRT_CPUS; i++ )
-            d->arch.mm_perdomain_pt[((i << GDT_LDT_VCPU_SHIFT) +
-                                     FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
-        flush_tlb_one_local(GDT_LDT_VIRT_START + FIRST_RESERVED_GDT_BYTE);
     }
 #endif
 
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c       Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/hvm/vmx/vmcs.c       Mon Sep 22 13:46:57 2008 +0100
@@ -446,7 +446,7 @@ static void vmx_set_host_env(struct vcpu
 
     __vmwrite(HOST_IDTR_BASE, (unsigned long)idt_tables[cpu]);
 
-    __vmwrite(HOST_TR_SELECTOR, __TSS(cpu) << 3);
+    __vmwrite(HOST_TR_SELECTOR, TSS_ENTRY << 3);
     __vmwrite(HOST_TR_BASE, (unsigned long)&init_tss[cpu]);
 
     __vmwrite(HOST_SYSENTER_ESP, get_stack_bottom());
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/setup.c      Mon Sep 22 13:46:57 2008 +0100
@@ -115,6 +115,12 @@ extern void vesa_init(void);
 extern void vesa_init(void);
 extern void vesa_mtrr_init(void);
 
+DEFINE_PER_CPU(struct desc_struct *, gdt_table) = boot_cpu_gdt_table;
+#ifdef CONFIG_COMPAT
+DEFINE_PER_CPU(struct desc_struct *, compat_gdt_table)
+    = boot_cpu_compat_gdt_table;
+#endif
+
 struct tss_struct init_tss[NR_CPUS];
 
 char __attribute__ ((__section__(".bss.stack_aligned"))) 
cpu0_stack[STACK_SIZE];
@@ -224,6 +230,7 @@ static void __init init_idle_domain(void
 static void __init init_idle_domain(void)
 {
     struct domain *idle_domain;
+    unsigned int i;
 
     /* Domain creation requires that scheduler structures are initialised. */
     scheduler_init();
@@ -236,6 +243,12 @@ static void __init init_idle_domain(void
     idle_vcpu[0] = this_cpu(curr_vcpu) = current;
 
     setup_idle_pagetable();
+
+    for (i = 0; i < NR_RESERVED_GDT_PAGES; ++i)
+        idle_domain->arch.mm_perdomain_pt[FIRST_RESERVED_GDT_PAGE + i] =
+            l1e_from_page(virt_to_page(boot_cpu_gdt_table) + i,
+                          __PAGE_HYPERVISOR);
+
 }
 
 static void __init srat_detect_node(int cpu)
@@ -443,7 +456,6 @@ void __init __start_xen(unsigned long mb
     parse_video_info();
 
     set_current((struct vcpu *)0xfffff000); /* debug sanity */
-    idle_vcpu[0] = current;
     set_processor_id(0); /* needed early, for smp_processor_id() */
     if ( cpu_has_efer )
         rdmsrl(MSR_EFER, this_cpu(efer));
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/smpboot.c    Mon Sep 22 13:46:57 2008 +0100
@@ -836,10 +836,15 @@ static int __devinit do_boot_cpu(int api
  */
 {
        unsigned long boot_error;
+       unsigned int i;
        int timeout;
        unsigned long start_eip;
        unsigned short nmi_high = 0, nmi_low = 0;
        struct vcpu *v;
+       struct desc_struct *gdt;
+#ifdef __x86_64__
+        struct page_info *page;
+#endif
 
        /*
         * Save current MTRR state in case it was changed since early boot
@@ -864,6 +869,37 @@ static int __devinit do_boot_cpu(int api
 
        /* Debug build: detect stack overflow by setting up a guard page. */
        memguard_guard_stack(stack_start.esp);
+
+       gdt = per_cpu(gdt_table, cpu);
+       if (gdt == boot_cpu_gdt_table) {
+               i = get_order_from_pages(NR_RESERVED_GDT_PAGES);
+#ifdef __x86_64__
+#ifdef CONFIG_COMPAT
+               page = alloc_domheap_pages(NULL, i,
+                                          MEMF_node(cpu_to_node(cpu)));
+               per_cpu(compat_gdt_table, cpu) = gdt = page_to_virt(page);
+               memcpy(gdt, boot_cpu_compat_gdt_table,
+                      NR_RESERVED_GDT_PAGES * PAGE_SIZE);
+               gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
+#endif
+               page = alloc_domheap_pages(NULL, i,
+                                          MEMF_node(cpu_to_node(cpu)));
+               per_cpu(gdt_table, cpu) = gdt = page_to_virt(page);
+#else
+               per_cpu(gdt_table, cpu) = gdt = alloc_xenheap_pages(i);
+#endif
+               memcpy(gdt, boot_cpu_gdt_table,
+                      NR_RESERVED_GDT_PAGES * PAGE_SIZE);
+               BUILD_BUG_ON(NR_CPUS > 0x10000);
+               gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
+       }
+
+       for (i = 0; i < NR_RESERVED_GDT_PAGES; ++i)
+               v->domain->arch.mm_perdomain_pt
+                       [(v->vcpu_id << GDT_LDT_VCPU_SHIFT) +
+                        FIRST_RESERVED_GDT_PAGE + i]
+                       = l1e_from_page(virt_to_page(gdt) + i,
+                                       __PAGE_HYPERVISOR);
 
        /*
         * This grunge runs the startup process for
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/traps.c      Mon Sep 22 13:46:57 2008 +0100
@@ -2965,13 +2965,13 @@ void set_tss_desc(unsigned int n, void *
 void set_tss_desc(unsigned int n, void *addr)
 {
     _set_tssldt_desc(
-        gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
+        per_cpu(gdt_table, n) + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
         (unsigned long)addr,
         offsetof(struct tss_struct, __cacheline_filler) - 1,
         9);
 #ifdef CONFIG_COMPAT
     _set_tssldt_desc(
-        compat_gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
+        per_cpu(compat_gdt_table, n) + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
         (unsigned long)addr,
         offsetof(struct tss_struct, __cacheline_filler) - 1,
         11);
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/x86_32/mm.c
--- a/xen/arch/x86/x86_32/mm.c  Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/x86_32/mm.c  Mon Sep 22 13:46:57 2008 +0100
@@ -132,6 +132,30 @@ void __init setup_idle_pagetable(void)
                                 __PAGE_HYPERVISOR));
 }
 
+unsigned long clone_idle_pagetable(struct vcpu *v)
+{
+    unsigned int i;
+    struct domain *d = v->domain;
+    l3_pgentry_t *l3_table = v->arch.pae_l3_cache.table[0];
+    l2_pgentry_t *l2_table = alloc_xenheap_page();
+
+    if ( !l2_table )
+        return 0;
+
+    memcpy(l3_table, idle_pg_table, L3_PAGETABLE_ENTRIES * sizeof(*l3_table));
+    l3_table[l3_table_offset(PERDOMAIN_VIRT_START)] =
+        l3e_from_page(virt_to_page(l2_table), _PAGE_PRESENT);
+
+    copy_page(l2_table, idle_pg_table_l2 +
+              l3_table_offset(PERDOMAIN_VIRT_START) * L2_PAGETABLE_ENTRIES);
+    for ( i = 0; i < PDPT_L2_ENTRIES; ++i )
+        l2_table[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+            l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt) + i,
+                          __PAGE_HYPERVISOR);
+
+    return __pa(l3_table);
+}
+
 void __init zap_low_mappings(l2_pgentry_t *dom0_l2)
 {
     int i;
@@ -186,7 +210,7 @@ void __init subarch_init_memory(void)
     {
         /* Guest kernel runs in ring 0, not ring 1. */
         struct desc_struct *d;
-        d = &gdt_table[(FLAT_RING1_CS >> 3) - FIRST_RESERVED_GDT_ENTRY];
+        d = &boot_cpu_gdt_table[(FLAT_RING1_CS >> 3) - 
FIRST_RESERVED_GDT_ENTRY];
         d[0].b &= ~_SEGMENT_DPL;
         d[1].b &= ~_SEGMENT_DPL;
     }
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 
xen/arch/x86/x86_32/supervisor_mode_kernel.S
--- a/xen/arch/x86/x86_32/supervisor_mode_kernel.S      Mon Sep 22 13:41:07 
2008 +0100
+++ b/xen/arch/x86/x86_32/supervisor_mode_kernel.S      Mon Sep 22 13:46:57 
2008 +0100
@@ -100,15 +100,10 @@ ENTRY(fixup_ring0_guest_stack)
         # %gs:%esi now points to the guest stack before the
         # interrupt/exception occured.
 
-        /*
-         * Reverse the __TSS macro, giving us the CPU number.
-         * The TSS for this cpu is at init_tss + ( cpu * 128 ).
-         */
-        str   %ecx
-        shrl  $3,%ecx                                   # Calculate GDT index 
for TSS.
-        subl  $(FIRST_RESERVED_GDT_ENTRY+8),%ecx        # %ecx = 2*cpu.
-        shll  $6,%ecx                                   # Each TSS entry is 
0x80 bytes
-        addl  $init_tss,%ecx                            # but we have 2*cpu 
from above.
+        movl  $PER_CPU_GDT_ENTRY*8,%ecx
+        lsll  %ecx,%ecx
+        shll  $7,%ecx                                   # Each TSS entry is 
0x80 bytes
+        addl  $init_tss,%ecx
 
         # Load Xen stack from TSS.
         movw  TSS_ss0(%ecx),%ax
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c       Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/x86_32/traps.c       Mon Sep 22 13:46:57 2008 +0100
@@ -194,12 +194,14 @@ static unsigned char doublefault_stack[D
 
 asmlinkage void do_double_fault(void)
 {
-    struct tss_struct *tss = &doublefault_tss;
-    unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1;
+    struct tss_struct *tss;
+    unsigned int cpu;
 
     watchdog_disable();
 
     console_force_unlock();
+
+    asm ( "lsll %1, %0" : "=r" (cpu) : "rm" (PER_CPU_GDT_ENTRY << 3) );
 
     /* Find information saved during fault and dump it to the console. */
     tss = &init_tss[cpu];
@@ -325,7 +327,7 @@ void __devinit subarch_percpu_traps_init
     tss->eflags = 2;
     tss->bitmap = IOBMP_INVALID_OFFSET;
     _set_tssldt_desc(
-        gdt_table + __DOUBLEFAULT_TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
+        boot_cpu_gdt_table + __DOUBLEFAULT_TSS_ENTRY - 
FIRST_RESERVED_GDT_ENTRY,
         (unsigned long)tss, 235, 9);
 
     set_task_gate(TRAP_double_fault, __DOUBLEFAULT_TSS_ENTRY<<3);
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c  Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/x86_64/mm.c  Mon Sep 22 13:46:57 2008 +0100
@@ -21,6 +21,7 @@
 #include <xen/lib.h>
 #include <xen/init.h>
 #include <xen/mm.h>
+#include <xen/numa.h>
 #include <xen/sched.h>
 #include <xen/guest_access.h>
 #include <asm/current.h>
@@ -206,6 +207,24 @@ void __init setup_idle_pagetable(void)
                   __PAGE_HYPERVISOR));
 }
 
+unsigned long clone_idle_pagetable(struct vcpu *v)
+{
+    struct domain *d = v->domain;
+    struct page_info *page = alloc_domheap_page(NULL,
+                                                MEMF_node(vcpu_to_node(v)));
+    l4_pgentry_t *l4_table = page_to_virt(page);
+
+    if ( !page )
+        return 0;
+
+    copy_page(l4_table, idle_pg_table);
+    l4_table[l4_table_offset(PERDOMAIN_VIRT_START)] =
+        l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3),
+                      __PAGE_HYPERVISOR);
+
+    return __pa(l4_table);
+}
+
 void __init zap_low_mappings(void)
 {
     BUG_ON(num_online_cpus() != 1);
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c       Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/x86_64/traps.c       Mon Sep 22 13:46:57 2008 +0100
@@ -213,14 +213,13 @@ asmlinkage void double_fault(void);
 asmlinkage void double_fault(void);
 asmlinkage void do_double_fault(struct cpu_user_regs *regs)
 {
-    unsigned int cpu, tr;
-
-    asm volatile ( "str %0" : "=r" (tr) );
-    cpu = ((tr >> 3) - __FIRST_TSS_ENTRY) >> 2;
+    unsigned int cpu;
 
     watchdog_disable();
 
     console_force_unlock();
+
+    asm ( "lsll %1, %0" : "=r" (cpu) : "rm" (PER_CPU_GDT_ENTRY << 3) );
 
     /* Find information saved during fault and dump it to the console. */
     printk("*** DOUBLE FAULT ***\n");
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/include/asm-x86/desc.h
--- a/xen/include/asm-x86/desc.h        Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/include/asm-x86/desc.h        Mon Sep 22 13:46:57 2008 +0100
@@ -34,11 +34,9 @@
 #define FLAT_COMPAT_USER_CS   FLAT_COMPAT_RING3_CS
 #define FLAT_COMPAT_USER_SS   FLAT_COMPAT_RING3_SS
 
-#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
-#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 2)
-
-#define __TSS(n) (((n)<<2) + __FIRST_TSS_ENTRY)
-#define __LDT(n) (((n)<<2) + __FIRST_LDT_ENTRY)
+#define TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
+#define LDT_ENTRY (TSS_ENTRY + 2)
+#define PER_CPU_GDT_ENTRY (LDT_ENTRY + 2)
 
 #elif defined(__i386__)
 
@@ -51,17 +49,15 @@
 
 #define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY
 
-#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
-#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 1)
-
-#define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY)
-#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY)
+#define TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
+#define LDT_ENTRY (TSS_ENTRY + 1)
+#define PER_CPU_GDT_ENTRY (LDT_ENTRY + 1)
 
 #endif
 
 #ifndef __ASSEMBLY__
 
-#define load_TR(n)  __asm__ __volatile__ ("ltr  %%ax" : : "a" (__TSS(n)<<3) )
+#define load_TR(n)  __asm__ __volatile__ ("ltr  %%ax" : : "a" (TSS_ENTRY<<3) )
 
 #if defined(__x86_64__)
 #define GUEST_KERNEL_RPL(d) (is_pv_32bit_domain(d) ? 1 : 3)
@@ -205,11 +201,19 @@ do {                                    
 
 #endif
 
-extern struct desc_struct gdt_table[];
+struct desc_ptr {
+       unsigned short limit;
+       unsigned long base;
+} __attribute__((__packed__)) ;
+
+extern struct desc_struct boot_cpu_gdt_table[];
+DECLARE_PER_CPU(struct desc_struct *, gdt_table);
 #ifdef CONFIG_COMPAT
-extern struct desc_struct compat_gdt_table[];
-#else
-# define compat_gdt_table gdt_table
+extern struct desc_struct boot_cpu_compat_gdt_table[];
+DECLARE_PER_CPU(struct desc_struct *, compat_gdt_table);
+#else
+# define boot_cpu_compat_gdt_table boot_cpu_gdt_table
+# define per_cpu__compat_gdt_table per_cpu__gdt_table
 #endif
 
 extern void set_intr_gate(unsigned int irq, void * addr);
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/include/asm-x86/ldt.h
--- a/xen/include/asm-x86/ldt.h Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/include/asm-x86/ldt.h Mon Sep 22 13:46:57 2008 +0100
@@ -6,7 +6,6 @@
 
 static inline void load_LDT(struct vcpu *v)
 {
-    unsigned int cpu;
     struct desc_struct *desc;
     unsigned long ents;
 
@@ -16,11 +15,11 @@ static inline void load_LDT(struct vcpu 
     }
     else
     {
-        cpu = smp_processor_id();
-        desc = (!is_pv_32on64_vcpu(v) ? gdt_table : compat_gdt_table)
-               + __LDT(cpu) - FIRST_RESERVED_GDT_ENTRY;
+        desc = (!is_pv_32on64_vcpu(v)
+                ? this_cpu(gdt_table) : this_cpu(compat_gdt_table))
+               + LDT_ENTRY - FIRST_RESERVED_GDT_ENTRY;
         _set_tssldt_desc(desc, LDT_VIRT_START(v), ents*8-1, 2);
-        __asm__ __volatile__ ( "lldt %%ax" : : "a" (__LDT(cpu)<<3) );
+        __asm__ __volatile__ ( "lldt %%ax" : : "a" (LDT_ENTRY << 3) );
     }
 }
 
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h        Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/include/asm-x86/page.h        Mon Sep 22 13:46:57 2008 +0100
@@ -278,6 +278,7 @@ extern unsigned int   m2p_compat_vstart;
 #endif
 void paging_init(void);
 void setup_idle_pagetable(void);
+unsigned long clone_idle_pagetable(struct vcpu *);
 #endif /* !defined(__ASSEMBLY__) */
 
 #define _PAGE_PRESENT  0x001U

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] x86: make GDT per-CPU, Xen patchbot-unstable <=