# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1222087617 -3600
# Node ID 7f1a36b834e183904f069948d3037d50492d98d2
# Parent 3c42b5ad0a4f607749426f82ecf11f75d84699c5
x86: make GDT per-CPU
The major issue with supporting a significantly larger number of
physical CPUs appears to be the use of per-CPU GDT entries - at
present, x86-64 could support only up to 126 CPUs (with code changes
to also use the top-most GDT page, that would be 254). Instead of
trying to go with incremental steps here, by converting the GDT itself
to be per-CPU, limitations in that respect go away entirely.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
---
xen/arch/x86/boot/wakeup.S | 2
xen/arch/x86/boot/x86_32.S | 6 +-
xen/arch/x86/boot/x86_64.S | 10 ++--
xen/arch/x86/cpu/common.c | 6 +-
xen/arch/x86/domain.c | 66 ++++++++++++---------------
xen/arch/x86/domain_build.c | 13 -----
xen/arch/x86/hvm/vmx/vmcs.c | 2
xen/arch/x86/setup.c | 14 +++++
xen/arch/x86/smpboot.c | 36 ++++++++++++++
xen/arch/x86/traps.c | 4 -
xen/arch/x86/x86_32/mm.c | 26 ++++++++++
xen/arch/x86/x86_32/supervisor_mode_kernel.S | 13 +----
xen/arch/x86/x86_32/traps.c | 8 ++-
xen/arch/x86/x86_64/mm.c | 19 +++++++
xen/arch/x86/x86_64/traps.c | 7 +-
xen/include/asm-x86/desc.h | 34 +++++++------
xen/include/asm-x86/ldt.h | 9 +--
xen/include/asm-x86/page.h | 1
18 files changed, 178 insertions(+), 98 deletions(-)
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/boot/wakeup.S
--- a/xen/arch/x86/boot/wakeup.S Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/boot/wakeup.S Mon Sep 22 13:46:57 2008 +0100
@@ -168,7 +168,7 @@ 1:
.word 0,0,0
lgdt_descr:
.word LAST_RESERVED_GDT_BYTE
- .quad gdt_table - FIRST_RESERVED_GDT_BYTE
+ .quad boot_cpu_gdt_table - FIRST_RESERVED_GDT_BYTE
wakeup_64:
lgdt lgdt_descr(%rip)
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/boot/x86_32.S
--- a/xen/arch/x86/boot/x86_32.S Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/boot/x86_32.S Mon Sep 22 13:46:57 2008 +0100
@@ -78,7 +78,7 @@ idt_descr:
.word 0
gdt_descr:
.word LAST_RESERVED_GDT_BYTE
- .long gdt_table - FIRST_RESERVED_GDT_BYTE
+ .long boot_cpu_gdt_table - FIRST_RESERVED_GDT_BYTE
.align 32
@@ -94,7 +94,7 @@ ENTRY(idle_pg_table)
#define GUEST_DESC(d) \
.long ((MACH2PHYS_VIRT_END - 1) >> 12) & 0xffff, \
((MACH2PHYS_VIRT_END - 1) >> 12) & (0xf << 16) | (d)
-ENTRY(gdt_table)
+ENTRY(boot_cpu_gdt_table)
.quad 0x0000000000000000 /* unused */
.quad 0x00cf9a000000ffff /* 0xe008 ring 0 4.00GB code at 0x0 */
.quad 0x00cf92000000ffff /* 0xe010 ring 0 4.00GB data at 0x0 */
@@ -102,4 +102,6 @@ ENTRY(gdt_table)
GUEST_DESC(0x00c0b200) /* 0xe021 ring 1 3.xxGB data at 0x0 */
GUEST_DESC(0x00c0fa00) /* 0xe02b ring 3 3.xxGB code at 0x0 */
GUEST_DESC(0x00c0f200) /* 0xe033 ring 3 3.xxGB data at 0x0 */
+ .fill (PER_CPU_GDT_ENTRY - FLAT_RING3_DS / 8 - 1), 8, 0
+ .quad 0x0000910000000000 /* per-CPU entry (limit == cpu) */
.align PAGE_SIZE,0
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/boot/x86_64.S
--- a/xen/arch/x86/boot/x86_64.S Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/boot/x86_64.S Mon Sep 22 13:46:57 2008 +0100
@@ -85,7 +85,7 @@ multiboot_ptr:
.word 0
gdt_descr:
.word LAST_RESERVED_GDT_BYTE
- .quad gdt_table - FIRST_RESERVED_GDT_BYTE
+ .quad boot_cpu_gdt_table - FIRST_RESERVED_GDT_BYTE
.word 0,0,0
idt_descr:
@@ -96,7 +96,7 @@ ENTRY(stack_start)
.quad cpu0_stack
.align PAGE_SIZE, 0
-ENTRY(gdt_table)
+ENTRY(boot_cpu_gdt_table)
.quad 0x0000000000000000 /* unused */
.quad 0x00af9a000000ffff /* 0xe008 ring 0 code, 64-bit mode */
.quad 0x00cf92000000ffff /* 0xe010 ring 0 data */
@@ -105,11 +105,13 @@ ENTRY(gdt_table)
.quad 0x00cff2000000ffff /* 0xe02b ring 3 data */
.quad 0x00affa000000ffff /* 0xe033 ring 3 code, 64-bit mode */
.quad 0x00cf9a000000ffff /* 0xe038 ring 0 code, compatibility */
+ .fill (PER_CPU_GDT_ENTRY - __HYPERVISOR_CS32 / 8 - 1), 8, 0
+ .quad 0x0000910000000000 /* per-CPU entry (limit == cpu) */
.align PAGE_SIZE, 0
/* NB. Even rings != 0 get access to the full 4Gb, as only the */
/* (compatibility) machine->physical mapping table lives there. */
-ENTRY(compat_gdt_table)
+ENTRY(boot_cpu_compat_gdt_table)
.quad 0x0000000000000000 /* unused */
.quad 0x00af9a000000ffff /* 0xe008 ring 0 code, 64-bit mode */
.quad 0x00cf92000000ffff /* 0xe010 ring 0 data */
@@ -118,4 +120,6 @@ ENTRY(compat_gdt_table)
.quad 0x00cffa000000ffff /* 0xe02b ring 3 code, compatibility */
.quad 0x00cff2000000ffff /* 0xe033 ring 3 data */
.quad 0x00cf9a000000ffff /* 0xe038 ring 0 code, compatibility */
+ .fill (PER_CPU_GDT_ENTRY - __HYPERVISOR_CS32 / 8 - 1), 8, 0
+ .quad 0x0000910000000000 /* per-CPU entry (limit == cpu) */
.align PAGE_SIZE, 0
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/cpu/common.c
--- a/xen/arch/x86/cpu/common.c Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/cpu/common.c Mon Sep 22 13:46:57 2008 +0100
@@ -575,6 +575,9 @@ void __cpuinit cpu_init(void)
if (cpu_has_pat)
wrmsrl(MSR_IA32_CR_PAT, host_pat);
+ /* Install correct page table. */
+ write_ptbase(current);
+
*(unsigned short *)(&gdt_load[0]) = LAST_RESERVED_GDT_BYTE;
*(unsigned long *)(&gdt_load[2]) = GDT_VIRT_START(current);
asm volatile ( "lgdt %0" : "=m" (gdt_load) );
@@ -605,9 +608,6 @@ void __cpuinit cpu_init(void)
#define CD(register) asm volatile ( "mov %0,%%db" #register : : "r"(0UL) );
CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
#undef CD
-
- /* Install correct page table. */
- write_ptbase(current);
}
#ifdef CONFIG_HOTPLUG_CPU
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/domain.c Mon Sep 22 13:46:57 2008 +0100
@@ -211,7 +211,6 @@ static inline int may_switch_mode(struct
int switch_native(struct domain *d)
{
- l1_pgentry_t gdt_l1e;
unsigned int vcpuid;
if ( d == NULL )
@@ -223,12 +222,8 @@ int switch_native(struct domain *d)
d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
- /* switch gdt */
- gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
{
- d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
- FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
if (d->vcpu[vcpuid])
release_compat_l4(d->vcpu[vcpuid]);
}
@@ -238,7 +233,6 @@ int switch_native(struct domain *d)
int switch_compat(struct domain *d)
{
- l1_pgentry_t gdt_l1e;
unsigned int vcpuid;
if ( d == NULL )
@@ -250,15 +244,11 @@ int switch_compat(struct domain *d)
d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 1;
- /* switch gdt */
- gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table), PAGE_HYPERVISOR);
for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
{
if ( (d->vcpu[vcpuid] != NULL) &&
(setup_compat_l4(d->vcpu[vcpuid]) != 0) )
goto undo_and_fail;
- d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
- FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
}
domain_set_alloc_bitsize(d);
@@ -267,13 +257,10 @@ int switch_compat(struct domain *d)
undo_and_fail:
d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
- gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
while ( vcpuid-- != 0 )
{
if ( d->vcpu[vcpuid] != NULL )
release_compat_l4(d->vcpu[vcpuid]);
- d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
- FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
}
return -ENOMEM;
}
@@ -322,7 +309,12 @@ int vcpu_initialise(struct vcpu *v)
if ( is_idle_domain(d) )
{
v->arch.schedule_tail = continue_idle_domain;
- v->arch.cr3 = __pa(idle_pg_table);
+ if ( v->vcpu_id )
+ v->arch.cr3 = d->vcpu[0]->arch.cr3;
+ else if ( !*idle_vcpu )
+ v->arch.cr3 = __pa(idle_pg_table);
+ else if ( !(v->arch.cr3 = clone_idle_pagetable(v)) )
+ return -ENOMEM;
}
v->arch.guest_context.ctrlreg[4] =
@@ -349,8 +341,7 @@ int arch_domain_create(struct domain *d,
#ifdef __x86_64__
struct page_info *pg;
#endif
- l1_pgentry_t gdt_l1e;
- int i, vcpuid, pdpt_order, paging_initialised = 0;
+ int i, pdpt_order, paging_initialised = 0;
int rc = -ENOMEM;
d->arch.hvm_domain.hap_enabled =
@@ -368,18 +359,6 @@ int arch_domain_create(struct domain *d,
if ( d->arch.mm_perdomain_pt == NULL )
goto fail;
memset(d->arch.mm_perdomain_pt, 0, PAGE_SIZE << pdpt_order);
-
- /*
- * Map Xen segments into every VCPU's GDT, irrespective of whether every
- * VCPU will actually be used. This avoids an NMI race during context
- * switch: if we take an interrupt after switching CR3 but before switching
- * GDT, and the old VCPU# is invalid in the new domain, we would otherwise
- * try to load CS from an invalid table.
- */
- gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
- for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
- d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
- FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
#if defined(__i386__)
@@ -1193,9 +1172,12 @@ static void __context_switch(void)
static void __context_switch(void)
{
struct cpu_user_regs *stack_regs = guest_cpu_user_regs();
- unsigned int cpu = smp_processor_id();
+ unsigned int i, cpu = smp_processor_id();
struct vcpu *p = per_cpu(curr_vcpu, cpu);
struct vcpu *n = current;
+ struct desc_struct *gdt;
+ struct page_info *page;
+ struct desc_ptr gdt_desc;
ASSERT(p != n);
ASSERT(cpus_empty(n->vcpu_dirty_cpumask));
@@ -1221,14 +1203,30 @@ static void __context_switch(void)
cpu_set(cpu, n->domain->domain_dirty_cpumask);
cpu_set(cpu, n->vcpu_dirty_cpumask);
+ gdt = !is_pv_32on64_vcpu(n) ? per_cpu(gdt_table, cpu) :
+ per_cpu(compat_gdt_table, cpu);
+ page = virt_to_page(gdt);
+ for (i = 0; i < NR_RESERVED_GDT_PAGES; ++i)
+ {
+ l1e_write(n->domain->arch.mm_perdomain_pt +
+ (n->vcpu_id << GDT_LDT_VCPU_SHIFT) +
+ FIRST_RESERVED_GDT_PAGE + i,
+ l1e_from_page(page + i, __PAGE_HYPERVISOR));
+ }
+
+ if ( p->vcpu_id != n->vcpu_id )
+ {
+ gdt_desc.limit = LAST_RESERVED_GDT_BYTE;
+ gdt_desc.base = (unsigned long)(gdt - FIRST_RESERVED_GDT_ENTRY);
+ asm volatile ( "lgdt %0" : : "m" (gdt_desc) );
+ }
+
write_ptbase(n);
if ( p->vcpu_id != n->vcpu_id )
{
- char gdt_load[10];
- *(unsigned short *)(&gdt_load[0]) = LAST_RESERVED_GDT_BYTE;
- *(unsigned long *)(&gdt_load[2]) = GDT_VIRT_START(n);
- asm volatile ( "lgdt %0" : "=m" (gdt_load) );
+ gdt_desc.base = GDT_VIRT_START(n);
+ asm volatile ( "lgdt %0" : : "m" (gdt_desc) );
}
if ( p->domain != n->domain )
@@ -1279,8 +1277,6 @@ void context_switch(struct vcpu *prev, s
uint64_t efer = read_efer();
if ( !(efer & EFER_SCE) )
write_efer(efer | EFER_SCE);
- flush_tlb_one_local(GDT_VIRT_START(next) +
- FIRST_RESERVED_GDT_BYTE);
}
#endif
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/domain_build.c Mon Sep 22 13:46:57 2008 +0100
@@ -314,24 +314,11 @@ int __init construct_dom0(
#if defined(__x86_64__)
if ( compat32 )
{
- l1_pgentry_t gdt_l1e;
-
d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 1;
v->vcpu_info = (void *)&d->shared_info->compat.vcpu_info[0];
if ( nr_pages != (unsigned int)nr_pages )
nr_pages = UINT_MAX;
-
- /*
- * Map compatibility Xen segments into every VCPU's GDT. See
- * arch_domain_create() for further comments.
- */
- gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table),
- PAGE_HYPERVISOR);
- for ( i = 0; i < MAX_VIRT_CPUS; i++ )
- d->arch.mm_perdomain_pt[((i << GDT_LDT_VCPU_SHIFT) +
- FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
- flush_tlb_one_local(GDT_LDT_VIRT_START + FIRST_RESERVED_GDT_BYTE);
}
#endif
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/hvm/vmx/vmcs.c Mon Sep 22 13:46:57 2008 +0100
@@ -446,7 +446,7 @@ static void vmx_set_host_env(struct vcpu
__vmwrite(HOST_IDTR_BASE, (unsigned long)idt_tables[cpu]);
- __vmwrite(HOST_TR_SELECTOR, __TSS(cpu) << 3);
+ __vmwrite(HOST_TR_SELECTOR, TSS_ENTRY << 3);
__vmwrite(HOST_TR_BASE, (unsigned long)&init_tss[cpu]);
__vmwrite(HOST_SYSENTER_ESP, get_stack_bottom());
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/setup.c Mon Sep 22 13:46:57 2008 +0100
@@ -115,6 +115,12 @@ extern void vesa_init(void);
extern void vesa_init(void);
extern void vesa_mtrr_init(void);
+DEFINE_PER_CPU(struct desc_struct *, gdt_table) = boot_cpu_gdt_table;
+#ifdef CONFIG_COMPAT
+DEFINE_PER_CPU(struct desc_struct *, compat_gdt_table)
+ = boot_cpu_compat_gdt_table;
+#endif
+
struct tss_struct init_tss[NR_CPUS];
char __attribute__ ((__section__(".bss.stack_aligned")))
cpu0_stack[STACK_SIZE];
@@ -224,6 +230,7 @@ static void __init init_idle_domain(void
static void __init init_idle_domain(void)
{
struct domain *idle_domain;
+ unsigned int i;
/* Domain creation requires that scheduler structures are initialised. */
scheduler_init();
@@ -236,6 +243,12 @@ static void __init init_idle_domain(void
idle_vcpu[0] = this_cpu(curr_vcpu) = current;
setup_idle_pagetable();
+
+ for (i = 0; i < NR_RESERVED_GDT_PAGES; ++i)
+ idle_domain->arch.mm_perdomain_pt[FIRST_RESERVED_GDT_PAGE + i] =
+ l1e_from_page(virt_to_page(boot_cpu_gdt_table) + i,
+ __PAGE_HYPERVISOR);
+
}
static void __init srat_detect_node(int cpu)
@@ -443,7 +456,6 @@ void __init __start_xen(unsigned long mb
parse_video_info();
set_current((struct vcpu *)0xfffff000); /* debug sanity */
- idle_vcpu[0] = current;
set_processor_id(0); /* needed early, for smp_processor_id() */
if ( cpu_has_efer )
rdmsrl(MSR_EFER, this_cpu(efer));
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/smpboot.c Mon Sep 22 13:46:57 2008 +0100
@@ -836,10 +836,15 @@ static int __devinit do_boot_cpu(int api
*/
{
unsigned long boot_error;
+ unsigned int i;
int timeout;
unsigned long start_eip;
unsigned short nmi_high = 0, nmi_low = 0;
struct vcpu *v;
+ struct desc_struct *gdt;
+#ifdef __x86_64__
+ struct page_info *page;
+#endif
/*
* Save current MTRR state in case it was changed since early boot
@@ -864,6 +869,37 @@ static int __devinit do_boot_cpu(int api
/* Debug build: detect stack overflow by setting up a guard page. */
memguard_guard_stack(stack_start.esp);
+
+ gdt = per_cpu(gdt_table, cpu);
+ if (gdt == boot_cpu_gdt_table) {
+ i = get_order_from_pages(NR_RESERVED_GDT_PAGES);
+#ifdef __x86_64__
+#ifdef CONFIG_COMPAT
+ page = alloc_domheap_pages(NULL, i,
+ MEMF_node(cpu_to_node(cpu)));
+ per_cpu(compat_gdt_table, cpu) = gdt = page_to_virt(page);
+ memcpy(gdt, boot_cpu_compat_gdt_table,
+ NR_RESERVED_GDT_PAGES * PAGE_SIZE);
+ gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
+#endif
+ page = alloc_domheap_pages(NULL, i,
+ MEMF_node(cpu_to_node(cpu)));
+ per_cpu(gdt_table, cpu) = gdt = page_to_virt(page);
+#else
+ per_cpu(gdt_table, cpu) = gdt = alloc_xenheap_pages(i);
+#endif
+ memcpy(gdt, boot_cpu_gdt_table,
+ NR_RESERVED_GDT_PAGES * PAGE_SIZE);
+ BUILD_BUG_ON(NR_CPUS > 0x10000);
+ gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
+ }
+
+ for (i = 0; i < NR_RESERVED_GDT_PAGES; ++i)
+ v->domain->arch.mm_perdomain_pt
+ [(v->vcpu_id << GDT_LDT_VCPU_SHIFT) +
+ FIRST_RESERVED_GDT_PAGE + i]
+ = l1e_from_page(virt_to_page(gdt) + i,
+ __PAGE_HYPERVISOR);
/*
* This grunge runs the startup process for
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/traps.c Mon Sep 22 13:46:57 2008 +0100
@@ -2965,13 +2965,13 @@ void set_tss_desc(unsigned int n, void *
void set_tss_desc(unsigned int n, void *addr)
{
_set_tssldt_desc(
- gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
+ per_cpu(gdt_table, n) + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
(unsigned long)addr,
offsetof(struct tss_struct, __cacheline_filler) - 1,
9);
#ifdef CONFIG_COMPAT
_set_tssldt_desc(
- compat_gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
+ per_cpu(compat_gdt_table, n) + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
(unsigned long)addr,
offsetof(struct tss_struct, __cacheline_filler) - 1,
11);
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/x86_32/mm.c
--- a/xen/arch/x86/x86_32/mm.c Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/x86_32/mm.c Mon Sep 22 13:46:57 2008 +0100
@@ -132,6 +132,30 @@ void __init setup_idle_pagetable(void)
__PAGE_HYPERVISOR));
}
+unsigned long clone_idle_pagetable(struct vcpu *v)
+{
+ unsigned int i;
+ struct domain *d = v->domain;
+ l3_pgentry_t *l3_table = v->arch.pae_l3_cache.table[0];
+ l2_pgentry_t *l2_table = alloc_xenheap_page();
+
+ if ( !l2_table )
+ return 0;
+
+ memcpy(l3_table, idle_pg_table, L3_PAGETABLE_ENTRIES * sizeof(*l3_table));
+ l3_table[l3_table_offset(PERDOMAIN_VIRT_START)] =
+ l3e_from_page(virt_to_page(l2_table), _PAGE_PRESENT);
+
+ copy_page(l2_table, idle_pg_table_l2 +
+ l3_table_offset(PERDOMAIN_VIRT_START) * L2_PAGETABLE_ENTRIES);
+ for ( i = 0; i < PDPT_L2_ENTRIES; ++i )
+ l2_table[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
+ l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt) + i,
+ __PAGE_HYPERVISOR);
+
+ return __pa(l3_table);
+}
+
void __init zap_low_mappings(l2_pgentry_t *dom0_l2)
{
int i;
@@ -186,7 +210,7 @@ void __init subarch_init_memory(void)
{
/* Guest kernel runs in ring 0, not ring 1. */
struct desc_struct *d;
- d = &gdt_table[(FLAT_RING1_CS >> 3) - FIRST_RESERVED_GDT_ENTRY];
+ d = &boot_cpu_gdt_table[(FLAT_RING1_CS >> 3) -
FIRST_RESERVED_GDT_ENTRY];
d[0].b &= ~_SEGMENT_DPL;
d[1].b &= ~_SEGMENT_DPL;
}
diff -r 3c42b5ad0a4f -r 7f1a36b834e1
xen/arch/x86/x86_32/supervisor_mode_kernel.S
--- a/xen/arch/x86/x86_32/supervisor_mode_kernel.S Mon Sep 22 13:41:07
2008 +0100
+++ b/xen/arch/x86/x86_32/supervisor_mode_kernel.S Mon Sep 22 13:46:57
2008 +0100
@@ -100,15 +100,10 @@ ENTRY(fixup_ring0_guest_stack)
# %gs:%esi now points to the guest stack before the
# interrupt/exception occured.
- /*
- * Reverse the __TSS macro, giving us the CPU number.
- * The TSS for this cpu is at init_tss + ( cpu * 128 ).
- */
- str %ecx
- shrl $3,%ecx # Calculate GDT index
for TSS.
- subl $(FIRST_RESERVED_GDT_ENTRY+8),%ecx # %ecx = 2*cpu.
- shll $6,%ecx # Each TSS entry is
0x80 bytes
- addl $init_tss,%ecx # but we have 2*cpu
from above.
+ movl $PER_CPU_GDT_ENTRY*8,%ecx
+ lsll %ecx,%ecx
+ shll $7,%ecx # Each TSS entry is
0x80 bytes
+ addl $init_tss,%ecx
# Load Xen stack from TSS.
movw TSS_ss0(%ecx),%ax
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/x86_32/traps.c Mon Sep 22 13:46:57 2008 +0100
@@ -194,12 +194,14 @@ static unsigned char doublefault_stack[D
asmlinkage void do_double_fault(void)
{
- struct tss_struct *tss = &doublefault_tss;
- unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1;
+ struct tss_struct *tss;
+ unsigned int cpu;
watchdog_disable();
console_force_unlock();
+
+ asm ( "lsll %1, %0" : "=r" (cpu) : "rm" (PER_CPU_GDT_ENTRY << 3) );
/* Find information saved during fault and dump it to the console. */
tss = &init_tss[cpu];
@@ -325,7 +327,7 @@ void __devinit subarch_percpu_traps_init
tss->eflags = 2;
tss->bitmap = IOBMP_INVALID_OFFSET;
_set_tssldt_desc(
- gdt_table + __DOUBLEFAULT_TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
+ boot_cpu_gdt_table + __DOUBLEFAULT_TSS_ENTRY -
FIRST_RESERVED_GDT_ENTRY,
(unsigned long)tss, 235, 9);
set_task_gate(TRAP_double_fault, __DOUBLEFAULT_TSS_ENTRY<<3);
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/x86_64/mm.c Mon Sep 22 13:46:57 2008 +0100
@@ -21,6 +21,7 @@
#include <xen/lib.h>
#include <xen/init.h>
#include <xen/mm.h>
+#include <xen/numa.h>
#include <xen/sched.h>
#include <xen/guest_access.h>
#include <asm/current.h>
@@ -206,6 +207,24 @@ void __init setup_idle_pagetable(void)
__PAGE_HYPERVISOR));
}
+unsigned long clone_idle_pagetable(struct vcpu *v)
+{
+ struct domain *d = v->domain;
+ struct page_info *page = alloc_domheap_page(NULL,
+ MEMF_node(vcpu_to_node(v)));
+ l4_pgentry_t *l4_table = page_to_virt(page);
+
+ if ( !page )
+ return 0;
+
+ copy_page(l4_table, idle_pg_table);
+ l4_table[l4_table_offset(PERDOMAIN_VIRT_START)] =
+ l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3),
+ __PAGE_HYPERVISOR);
+
+ return __pa(l4_table);
+}
+
void __init zap_low_mappings(void)
{
BUG_ON(num_online_cpus() != 1);
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/arch/x86/x86_64/traps.c Mon Sep 22 13:46:57 2008 +0100
@@ -213,14 +213,13 @@ asmlinkage void double_fault(void);
asmlinkage void double_fault(void);
asmlinkage void do_double_fault(struct cpu_user_regs *regs)
{
- unsigned int cpu, tr;
-
- asm volatile ( "str %0" : "=r" (tr) );
- cpu = ((tr >> 3) - __FIRST_TSS_ENTRY) >> 2;
+ unsigned int cpu;
watchdog_disable();
console_force_unlock();
+
+ asm ( "lsll %1, %0" : "=r" (cpu) : "rm" (PER_CPU_GDT_ENTRY << 3) );
/* Find information saved during fault and dump it to the console. */
printk("*** DOUBLE FAULT ***\n");
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/include/asm-x86/desc.h
--- a/xen/include/asm-x86/desc.h Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/include/asm-x86/desc.h Mon Sep 22 13:46:57 2008 +0100
@@ -34,11 +34,9 @@
#define FLAT_COMPAT_USER_CS FLAT_COMPAT_RING3_CS
#define FLAT_COMPAT_USER_SS FLAT_COMPAT_RING3_SS
-#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
-#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 2)
-
-#define __TSS(n) (((n)<<2) + __FIRST_TSS_ENTRY)
-#define __LDT(n) (((n)<<2) + __FIRST_LDT_ENTRY)
+#define TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
+#define LDT_ENTRY (TSS_ENTRY + 2)
+#define PER_CPU_GDT_ENTRY (LDT_ENTRY + 2)
#elif defined(__i386__)
@@ -51,17 +49,15 @@
#define __DOUBLEFAULT_TSS_ENTRY FIRST_RESERVED_GDT_ENTRY
-#define __FIRST_TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
-#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY + 1)
-
-#define __TSS(n) (((n)<<1) + __FIRST_TSS_ENTRY)
-#define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY)
+#define TSS_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
+#define LDT_ENTRY (TSS_ENTRY + 1)
+#define PER_CPU_GDT_ENTRY (LDT_ENTRY + 1)
#endif
#ifndef __ASSEMBLY__
-#define load_TR(n) __asm__ __volatile__ ("ltr %%ax" : : "a" (__TSS(n)<<3) )
+#define load_TR(n) __asm__ __volatile__ ("ltr %%ax" : : "a" (TSS_ENTRY<<3) )
#if defined(__x86_64__)
#define GUEST_KERNEL_RPL(d) (is_pv_32bit_domain(d) ? 1 : 3)
@@ -205,11 +201,19 @@ do {
#endif
-extern struct desc_struct gdt_table[];
+struct desc_ptr {
+ unsigned short limit;
+ unsigned long base;
+} __attribute__((__packed__)) ;
+
+extern struct desc_struct boot_cpu_gdt_table[];
+DECLARE_PER_CPU(struct desc_struct *, gdt_table);
#ifdef CONFIG_COMPAT
-extern struct desc_struct compat_gdt_table[];
-#else
-# define compat_gdt_table gdt_table
+extern struct desc_struct boot_cpu_compat_gdt_table[];
+DECLARE_PER_CPU(struct desc_struct *, compat_gdt_table);
+#else
+# define boot_cpu_compat_gdt_table boot_cpu_gdt_table
+# define per_cpu__compat_gdt_table per_cpu__gdt_table
#endif
extern void set_intr_gate(unsigned int irq, void * addr);
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/include/asm-x86/ldt.h
--- a/xen/include/asm-x86/ldt.h Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/include/asm-x86/ldt.h Mon Sep 22 13:46:57 2008 +0100
@@ -6,7 +6,6 @@
static inline void load_LDT(struct vcpu *v)
{
- unsigned int cpu;
struct desc_struct *desc;
unsigned long ents;
@@ -16,11 +15,11 @@ static inline void load_LDT(struct vcpu
}
else
{
- cpu = smp_processor_id();
- desc = (!is_pv_32on64_vcpu(v) ? gdt_table : compat_gdt_table)
- + __LDT(cpu) - FIRST_RESERVED_GDT_ENTRY;
+ desc = (!is_pv_32on64_vcpu(v)
+ ? this_cpu(gdt_table) : this_cpu(compat_gdt_table))
+ + LDT_ENTRY - FIRST_RESERVED_GDT_ENTRY;
_set_tssldt_desc(desc, LDT_VIRT_START(v), ents*8-1, 2);
- __asm__ __volatile__ ( "lldt %%ax" : : "a" (__LDT(cpu)<<3) );
+ __asm__ __volatile__ ( "lldt %%ax" : : "a" (LDT_ENTRY << 3) );
}
}
diff -r 3c42b5ad0a4f -r 7f1a36b834e1 xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h Mon Sep 22 13:41:07 2008 +0100
+++ b/xen/include/asm-x86/page.h Mon Sep 22 13:46:57 2008 +0100
@@ -278,6 +278,7 @@ extern unsigned int m2p_compat_vstart;
#endif
void paging_init(void);
void setup_idle_pagetable(void);
+unsigned long clone_idle_pagetable(struct vcpu *);
#endif /* !defined(__ASSEMBLY__) */
#define _PAGE_PRESENT 0x001U
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|