# HG changeset patch
# User Ian.Campbell@xxxxxxxxxxxxx
# Node ID 697fac283c9e565b4c9697c70a5529d06a488df9
# Parent f1c75df46b46cf2b71d949b63f988d8b781302b0
Add feature flag allowing guest kernel to run in domain 0.
To support this we give kernel GDT entries DPL=0 and use kernel
segment selectors with RPL=0. Xen will crunch these to ring 1 when
they are passed in. When a segment selector is used directly or placed
in a stack frame the guest OS is reponsible for crunching the RPL.
Signed-off-by: Ian Campbell <Ian.Campbell@xxxxxxxxxxxxx>
diff -r f1c75df46b46 -r 697fac283c9e
linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S
--- a/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S Wed Feb 22 18:40:19 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S Wed Feb 22 19:11:23 2006
@@ -106,8 +106,8 @@
.quad 0x0000000000000000 /* 0x53 reserved */
.quad 0x0000000000000000 /* 0x5b reserved */
- .quad 0x00cfbb000000ffff /* 0x60 kernel 4GB code at 0x00000000 */
- .quad 0x00cfb3000000ffff /* 0x68 kernel 4GB data at 0x00000000 */
+ .quad 0x00cf9b000000ffff /* 0x60 kernel 4GB code at 0x00000000 */
+ .quad 0x00cf93000000ffff /* 0x68 kernel 4GB data at 0x00000000 */
.quad 0x00cffb000000ffff /* 0x73 user 4GB code at 0x00000000 */
.quad 0x00cff3000000ffff /* 0x7b user 4GB data at 0x00000000 */
@@ -182,6 +182,7 @@
.ascii ",FEATURES=writable_page_tables"
.ascii "|writable_descriptor_tables"
.ascii "|auto_translated_physmap"
+ .ascii "|ring0_kernel"
#ifdef CONFIG_X86_PAE
.ascii ",PAE=yes"
#else
diff -r f1c75df46b46 -r 697fac283c9e
linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c Wed Feb 22
18:40:19 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c Wed Feb 22
19:11:23 2006
@@ -272,7 +272,7 @@
regs.xes = __USER_DS;
regs.orig_eax = -1;
regs.eip = (unsigned long) kernel_thread_helper;
- regs.xcs = __KERNEL_CS;
+ regs.xcs = GET_KERNEL_CS();
regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
/* Ok, create the new process.. */
diff -r f1c75df46b46 -r 697fac283c9e
linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c Wed Feb 22 18:40:19 2006
+++ b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c Wed Feb 22 19:11:23 2006
@@ -85,7 +85,7 @@
return eip + (seg << 4);
/* By far the most common cases. */
- if (likely(seg == __USER_CS || seg == __KERNEL_CS))
+ if (likely(seg == __USER_CS || seg == GET_KERNEL_CS()))
return eip;
/* Check the segment exists, is within the current LDT/GDT size,
@@ -396,7 +396,7 @@
switch (error_code & 3) {
default: /* 3: write, present */
#ifdef TEST_VERIFY_AREA
- if (regs->cs == KERNEL_CS)
+ if (regs->cs == GET_KERNEL_CS())
printk("WP fault at %08lx\n", regs->eip);
#endif
/* fall through */
diff -r f1c75df46b46 -r 697fac283c9e
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/segment.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/segment.h Wed Feb
22 18:40:19 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/segment.h Wed Feb
22 19:11:23 2006
@@ -60,10 +60,12 @@
#define GDT_ENTRY_KERNEL_BASE 12
#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0)
-#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8 + 1)
+#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
+#define GET_KERNEL_CS() (__KERNEL_CS | (xen_feature(XENFEAT_ring0_kernel)?0:1)
)
#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1)
-#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8 + 1)
+#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
+#define GET_KERNEL_DS() (__KERNEL_DS | (xen_feature(XENFEAT_ring0_kernel)?0:1)
)
#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4)
#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5)
diff -r f1c75df46b46 -r 697fac283c9e xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Wed Feb 22 18:40:19 2006
+++ b/xen/arch/x86/domain.c Wed Feb 22 19:11:23 2006
@@ -356,7 +356,8 @@
*/
if ( !(c->flags & VGCF_HVM_GUEST) )
{
- if ( ((c->user_regs.ss & 3) == 0) ||
+ if ( !VALID_STACKSEL(c->user_regs.ss) ||
+ !VALID_STACKSEL(c->kernel_ss) ||
!VALID_CODESEL(c->user_regs.cs) )
return -EINVAL;
diff -r f1c75df46b46 -r 697fac283c9e xen/arch/x86/x86_32/mm.c
--- a/xen/arch/x86/x86_32/mm.c Wed Feb 22 18:40:19 2006
+++ b/xen/arch/x86/x86_32/mm.c Wed Feb 22 19:11:23 2006
@@ -223,7 +223,7 @@
int nr = smp_processor_id();
struct tss_struct *t = &init_tss[nr];
- if ( (ss & 3) != 1 )
+ if ( !VALID_STACKSEL(ss) )
return -EPERM;
current->arch.guest_context.kernel_ss = ss;
@@ -239,6 +239,7 @@
{
unsigned long base, limit;
u32 a = d->a, b = d->b;
+ u16 cs = a>>16;
/* A not-present descriptor will always fault, so is safe. */
if ( !(b & _SEGMENT_P) )
@@ -251,7 +252,7 @@
* DPL 0 -- this would get the OS ring-0 privileges).
*/
if ( (b & _SEGMENT_DPL) == 0 )
- goto bad;
+ d->b = b = b | (0x01<<13); /* Force DPL == 1 */
if ( !(b & _SEGMENT_S) )
{
@@ -272,8 +273,16 @@
goto bad;
/* Can't allow far jump to a Xen-private segment. */
- if ( !VALID_CODESEL(a>>16) )
+ if ( !VALID_CODESEL(cs) )
goto bad;
+
+ /*
+ * VALID_CODESEL might have fixed up the RPL for us. So be sure to
+ * update the descriptor.
+ *
+ */
+ d->a &= 0x0000ffff;
+ d->a |= cs<<16;
/* Reserved bits must be zero. */
if ( (b & 0xe0) != 0 )
diff -r f1c75df46b46 -r 697fac283c9e xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c Wed Feb 22 18:40:19 2006
+++ b/xen/arch/x86/x86_64/mm.c Wed Feb 22 19:11:23 2006
@@ -292,6 +292,7 @@
int check_descriptor(struct desc_struct *d)
{
u32 a = d->a, b = d->b;
+ u16 cs = a>>16;
/* A not-present descriptor will always fault, so is safe. */
if ( !(b & _SEGMENT_P) )
@@ -314,8 +315,16 @@
goto bad;
/* Can't allow far jump to a Xen-private segment. */
- if ( !VALID_CODESEL(a>>16) )
+ if ( !VALID_CODESEL(cs) )
goto bad;
+
+ /*
+ * VALID_CODESEL might have fixed up the RPL for us. So be sure to
+ * update the descriptor.
+ *
+ */
+ d->a &= 0x0000ffff;
+ d->a |= cs<<16;
/* Reserved bits must be zero. */
if ( (b & 0xe0) != 0 )
diff -r f1c75df46b46 -r 697fac283c9e xen/include/asm-x86/desc.h
--- a/xen/include/asm-x86/desc.h Wed Feb 22 18:40:19 2006
+++ b/xen/include/asm-x86/desc.h Wed Feb 22 19:11:23 2006
@@ -35,7 +35,14 @@
#define VALID_SEL(_s) \
(((((_s)>>3) < FIRST_RESERVED_GDT_ENTRY) || ((_s)&4)) && \
(((_s)&3) == GUEST_KERNEL_RPL))
-#define VALID_CODESEL(_s) ((_s) == FLAT_KERNEL_CS || VALID_SEL(_s))
+#define VALID_CODESEL(_s) ({ \
+ if ( ((_s) & 3) == 0 ) \
+ (_s) |= GUEST_KERNEL_RPL; \
+ (_s) == FLAT_KERNEL_CS || VALID_SEL(_s); })
+#define VALID_STACKSEL(_s) ({ \
+ if ( ((_s) & 3) == 0 ) \
+ (_s) |= GUEST_KERNEL_RPL; \
+ (_s) == FLAT_KERNEL_SS || VALID_SEL(_s); })
/* These are bitmasks for the high 32 bits of a descriptor table entry. */
#define _SEGMENT_TYPE (15<< 8)
diff -r f1c75df46b46 -r 697fac283c9e xen/include/public/version.h
--- a/xen/include/public/version.h Wed Feb 22 18:40:19 2006
+++ b/xen/include/public/version.h Wed Feb 22 19:11:23 2006
@@ -51,6 +51,7 @@
#define XENFEAT_writable_page_tables 0
#define XENFEAT_writable_descriptor_tables 1
#define XENFEAT_auto_translated_physmap 2
+#define XENFEAT_ring0_kernel 3
#define XENFEAT_NR_SUBMAPS 1
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|