# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID c4512592a1dc11c76b94d87f14849bcc38871f72
# Parent 69bf77e1b10272ebc40013ae6b8e5b0740df701c
Attached is a patch to x86_64 xenlinux. It also includes cleanups. We
are also working on SMP + writable pagetable support now.
Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>
diff -r 69bf77e1b102 -r c4512592a1dc
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S Mon Aug 8
08:18:06 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S Mon Aug 8
08:18:38 2005
@@ -28,8 +28,6 @@
#include <asm/page.h>
#include <asm/msr.h>
#include <asm/cache.h>
-/* #include <asm/thread_info.h> */
-
/* we are not able to switch in one step to the final KERNEL ADRESS SPACE
* because we need identity-mapped pages on setup so define __START_KERNEL to
@@ -116,15 +114,81 @@
ENTRY(init_level4_user_pgt)
.fill 512,8,0
+ /*
+ * In Xen the following pre-initialized pgt entries are re-initialized.
+ */
+.org 0x3000
+ENTRY(level3_kernel_pgt)
+ .fill 510,8,0
+ /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
+ .quad 0x0000000000105007 /* -> level2_kernel_pgt */
+ .fill 1,8,0
+
+.org 0x4000
+ENTRY(level2_ident_pgt)
+ /* 40MB for bootup. */
+ .quad 0x0000000000000283
+ .quad 0x0000000000200183
+ .quad 0x0000000000400183
+ .quad 0x0000000000600183
+ .quad 0x0000000000800183
+ .quad 0x0000000000A00183
+ .quad 0x0000000000C00183
+ .quad 0x0000000000E00183
+ .quad 0x0000000001000183
+ .quad 0x0000000001200183
+ .quad 0x0000000001400183
+ .quad 0x0000000001600183
+ .quad 0x0000000001800183
+ .quad 0x0000000001A00183
+ .quad 0x0000000001C00183
+ .quad 0x0000000001E00183
+ .quad 0x0000000002000183
+ .quad 0x0000000002200183
+ .quad 0x0000000002400183
+ .quad 0x0000000002600183
+ /* Temporary mappings for the super early allocator in
arch/x86_64/mm/init.c */
+ .globl temp_boot_pmds
+temp_boot_pmds:
+ .fill 492,8,0
+
+.org 0x5000
+ENTRY(level2_kernel_pgt)
+ /* 40MB kernel mapping. The kernel code cannot be bigger than that.
+ When you change this change KERNEL_TEXT_SIZE in page.h too. */
+ /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
+ .quad 0x0000000000000183
+ .quad 0x0000000000200183
+ .quad 0x0000000000400183
+ .quad 0x0000000000600183
+ .quad 0x0000000000800183
+ .quad 0x0000000000A00183
+ .quad 0x0000000000C00183
+ .quad 0x0000000000E00183
+ .quad 0x0000000001000183
+ .quad 0x0000000001200183
+ .quad 0x0000000001400183
+ .quad 0x0000000001600183
+ .quad 0x0000000001800183
+ .quad 0x0000000001A00183
+ .quad 0x0000000001C00183
+ .quad 0x0000000001E00183
+ .quad 0x0000000002000183
+ .quad 0x0000000002200183
+ .quad 0x0000000002400183
+ .quad 0x0000000002600183
+ /* Module mapping starts here */
+ .fill 492,8,0
+
/*
* This is used for vsyscall area mapping as we have a different
* level4 page table for user.
*/
-.org 0x3000
+.org 0x6000
ENTRY(level3_user_pgt)
.fill 512,8,0
-.org 0x4000
+.org 0x7000
ENTRY(cpu_gdt_table)
/* The TLS descriptors are currently at a different place compared to i386.
Hopefully nobody expects them at a fixed place (Wine?) */
@@ -147,19 +211,24 @@
/* GDTs of other CPUs: */
.fill (GDT_SIZE * NR_CPUS) - (gdt_end - cpu_gdt_table)
-.org 0x5000
+.org 0x8000
ENTRY(empty_zero_page)
-.org 0x6000
+.org 0x9000
ENTRY(empty_bad_page)
-.org 0x7000
+.org 0xa000
ENTRY(empty_bad_pte_table)
-.org 0x8000
+.org 0xb000
ENTRY(empty_bad_pmd_table)
- .org 0x9000
+.org 0xc000
+ENTRY(level3_physmem_pgt)
+ .quad 0x0000000000105007 /* -> level2_kernel_pgt (so
that __va works even before pagetable_init) */
+
+
+ .org 0xd000
#ifdef CONFIG_ACPI_SLEEP
ENTRY(wakeup_level4_pgt)
.quad 0x0000000000102007 /* -> level3_ident_pgt */
diff -r 69bf77e1b102 -r c4512592a1dc
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Mon Aug 8
08:18:06 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Mon Aug 8
08:18:38 2005
@@ -623,7 +623,9 @@
rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
#endif
-/* register_console(&xen_console); */
+
+ HYPERVISOR_vm_assist(VMASST_CMD_enable,
+ VMASST_TYPE_writable_pagetables);
#ifdef CONFIG_XEN_PHYSDEV_ACCESS
/* This is drawn from a dump from vgacon:startup in standard Linux. */
diff -r 69bf77e1b102 -r c4512592a1dc
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c Mon Aug 8
08:18:06 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c Mon Aug 8
08:18:38 2005
@@ -30,9 +30,9 @@
#include <asm/proto.h>
#include <asm/mman.h>
#include <asm/numa.h>
-
+#ifdef CONFIG_XEN
#include <asm-xen/hypervisor.h>
-
+#endif
char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
cpumask_t cpu_initialized __initdata = CPU_MASK_NONE;
@@ -123,82 +123,11 @@
}
}
-void pda_init(int cpu)
-{
- pgd_t *old_level4 = (pgd_t *)xen_start_info.pt_base;
- struct x8664_pda *pda = &cpu_pda[cpu];
-
- /* Setup up data that may be needed in __get_free_pages early */
- asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
- HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL,
- (unsigned long)(cpu_pda + cpu));
-
- pda->me = pda;
- pda->cpunumber = cpu;
- pda->irqcount = -1;
- pda->kernelstack =
- (unsigned long)stack_thread_info() - PDA_STACKOFFSET +
THREAD_SIZE;
- pda->active_mm = &init_mm;
- pda->mmu_state = 0;
- pda->kernel_mode = 1;
-
- if (cpu == 0) {
- memcpy((void *)init_level4_pgt,
- (void *) xen_start_info.pt_base, PAGE_SIZE);
- /* others are initialized in smpboot.c */
- pda->pcurrent = &init_task;
- pda->irqstackptr = boot_cpu_stack;
- make_page_readonly(init_level4_pgt);
- make_page_readonly(init_level4_user_pgt);
- make_page_readonly(level3_user_pgt); /* for vsyscall stuff */
- xen_pgd_pin(__pa_symbol(init_level4_user_pgt));
- xen_pud_pin(__pa_symbol(level3_user_pgt));
- set_pgd((pgd_t *)(init_level4_user_pgt + 511),
- mk_kernel_pgd(__pa_symbol(level3_user_pgt)));
- } else {
- pda->irqstackptr = (char *)
- __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
- if (!pda->irqstackptr)
- panic("cannot allocate irqstack for cpu %d", cpu);
- }
-
+#ifdef CONFIG_XEN
+static void switch_pt(void)
+{
xen_pt_switch(__pa(init_level4_pgt));
xen_new_user_pt(__pa(init_level4_user_pgt));
-
- if (cpu == 0) {
- xen_pgd_unpin(__pa(old_level4));
-#if 0
- early_printk("__pa: %x, <machine_phys> old_level 4 %x\n",
- __pa(xen_start_info.pt_base),
- pfn_to_mfn(__pa(old_level4) >> PAGE_SHIFT));
-#endif
-// make_page_writable(old_level4);
-// free_bootmem(__pa(old_level4), PAGE_SIZE);
- }
-
- pda->irqstackptr += IRQSTACKSIZE-64;
-}
-
-char boot_exception_stacks[N_EXCEPTION_STACKS * EXCEPTION_STKSZ]
-__attribute__((section(".bss.page_aligned")));
-
-/* May not be marked __init: used by software suspend */
-void syscall_init(void)
-{
-#ifdef CONFIG_IA32_EMULATION
- syscall32_cpu_init ();
-#endif
-}
-
-void __init check_efer(void)
-{
- unsigned long efer;
-
- rdmsrl(MSR_EFER, efer);
- if (!(efer & EFER_NX) || do_not_nx) {
- __supported_pte_mask &= ~_PAGE_NX;
-
- }
}
void __init cpu_gdt_init(struct desc_ptr *gdt_descr)
@@ -217,7 +146,96 @@
sizeof (struct desc_struct)))
BUG();
}
-
+#else
+static void switch_pt(void)
+{
+ asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
+}
+
+void __init cpu_gdt_init(struct desc_ptr *gdt_descr)
+{
+#ifdef CONFIG_SMP
+ int cpu = stack_smp_processor_id();
+#else
+ int cpu = smp_processor_id();
+#endif
+
+ asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
+ asm volatile("lidt %0" :: "m" (idt_descr));
+}
+#endif
+
+
+void pda_init(int cpu)
+{
+ struct x8664_pda *pda = &cpu_pda[cpu];
+
+ /* Setup up data that may be needed in __get_free_pages early */
+ asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
+#ifndef CONFIG_XEN
+ wrmsrl(MSR_GS_BASE, cpu_pda + cpu);
+#else
+ HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL,
+ (unsigned long)(cpu_pda + cpu));
+#endif
+ pda->me = pda;
+ pda->cpunumber = cpu;
+ pda->irqcount = -1;
+ pda->kernelstack =
+ (unsigned long)stack_thread_info() - PDA_STACKOFFSET +
THREAD_SIZE;
+ pda->active_mm = &init_mm;
+ pda->mmu_state = 0;
+
+ if (cpu == 0) {
+#ifdef CONFIG_XEN
+ xen_init_pt();
+#endif
+ /* others are initialized in smpboot.c */
+ pda->pcurrent = &init_task;
+ pda->irqstackptr = boot_cpu_stack;
+ } else {
+ pda->irqstackptr = (char *)
+ __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
+ if (!pda->irqstackptr)
+ panic("cannot allocate irqstack for cpu %d", cpu);
+ }
+
+ switch_pt();
+ pda->irqstackptr += IRQSTACKSIZE-64;
+}
+
+char boot_exception_stacks[N_EXCEPTION_STACKS * EXCEPTION_STKSZ]
+__attribute__((section(".bss.page_aligned")));
+
+/* May not be marked __init: used by software suspend */
+void syscall_init(void)
+{
+#ifndef CONFIG_XEN
+ /*
+ * LSTAR and STAR live in a bit strange symbiosis.
+ * They both write to the same internal register. STAR allows to set
CS/DS
+ * but only a 32bit target. LSTAR sets the 64bit rip.
+ */
+ wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
+ wrmsrl(MSR_LSTAR, system_call);
+
+ /* Flags to clear on syscall */
+ wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000);
+#endif
+#ifdef CONFIG_IA32_EMULATION
+ syscall32_cpu_init ();
+#endif
+}
+
+void __init check_efer(void)
+{
+ unsigned long efer;
+
+ rdmsrl(MSR_EFER, efer);
+ if (!(efer & EFER_NX) || do_not_nx) {
+ __supported_pte_mask &= ~_PAGE_NX;
+ }
+}
/*
* cpu_init() initializes state that is per-CPU. Some data is already
@@ -247,14 +265,13 @@
me = current;
- if (test_and_set_bit(cpu, &cpu_initialized))
+ if (cpu_test_and_set(cpu, cpu_initialized))
panic("CPU#%d already initialized!\n", cpu);
printk("Initializing CPU#%d\n", cpu);
-#if 0
clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
-#endif
+
/*
* Initialize the per-CPU GDT with the boot GDT,
* and set up the GDT descriptor:
@@ -265,18 +282,16 @@
cpu_gdt_descr[cpu].size = GDT_SIZE;
cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
-#if 0
- asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
- asm volatile("lidt %0" :: "m" (idt_descr));
-#endif
+
cpu_gdt_init(&cpu_gdt_descr[cpu]);
-#if 0
+#ifndef CONFIG_XEN
memcpy(me->thread.tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_ENTRIES
* 8);
-#endif
+#else
memcpy(me->thread.tls_array, &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN],
GDT_ENTRY_TLS_ENTRIES * 8);
+#endif
/*
* Delete NT
@@ -284,12 +299,12 @@
asm volatile("pushfq ; popq %%rax ; btr $14,%%rax ; pushq %%rax ;
popfq" ::: "eax");
- if (cpu == 0)
- early_identify_cpu(&boot_cpu_data);
-
syscall_init();
+ wrmsrl(MSR_FS_BASE, 0);
+ wrmsrl(MSR_KERNEL_GS_BASE, 0);
barrier();
+
check_efer();
/*
@@ -321,19 +336,22 @@
BUG();
enter_lazy_tlb(&init_mm, me);
+#ifndef CONFIG_XEN
+ set_tss_desc(cpu, t);
+ load_TR_desc();
+#endif
load_LDT(&init_mm.context);
/*
* Clear all 6 debug registers:
*/
-#define CD(register) HYPERVISOR_set_debugreg(register, 0)
-
- CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7);
-
-#undef CD
+
+ set_debug(0UL, 0);
+ set_debug(0UL, 1);
+ set_debug(0UL, 2);
+ set_debug(0UL, 3);
+ set_debug(0UL, 6);
+ set_debug(0UL, 7);
+
fpu_init();
-
-#ifdef CONFIG_NUMA
- numa_add_cpu(cpu);
-#endif
-}
+}
diff -r 69bf77e1b102 -r c4512592a1dc
linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Mon Aug 8 08:18:06 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Mon Aug 8 08:18:38 2005
@@ -280,7 +280,7 @@
if (!pte_none(*pte) &&
pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
pte_ERROR(*pte);
- xen_l1_entry_update(pte, new_pte);
+ set_pte(pte, new_pte);
/*
* It's enough to flush this one mapping.
@@ -511,6 +511,78 @@
round_up(ptes * 8, PAGE_SIZE);
}
+static void xen_copy_pt(void)
+{
+ unsigned long va = __START_KERNEL_map;
+ unsigned long addr, *pte_page;
+ int i;
+ pud_t *pud; pmd_t *pmd; pte_t *pte;
+ unsigned long *page = (unsigned long *) init_level4_pgt;
+
+ addr = (unsigned long) page[pgd_index(va)];
+ addr_to_page(addr, page);
+
+ pud = (pud_t *) &page[pud_index(va)];
+ addr = page[pud_index(va)];
+ addr_to_page(addr, page);
+
+ level3_kernel_pgt[pud_index(va)] =
+ __pud(__pa_symbol(level2_kernel_pgt) | _KERNPG_TABLE |
_PAGE_USER);
+
+ for (;;) {
+ pmd = (pmd_t *) &page[pmd_index(va)];
+ if (pmd_present(*pmd)) {
+ level2_kernel_pgt[pmd_index(va)] = *pmd;
+ /*
+ * if pmd is valid, check pte.
+ */
+ addr = page[pmd_index(va)];
+ addr_to_page(addr, pte_page);
+
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ pte = (pte_t *) &pte_page[pte_index(va)];
+ if (pte_present(*pte))
+ va += PAGE_SIZE;
+ else
+ break;
+ }
+
+ } else
+ break;
+ }
+
+ init_level4_pgt[pgd_index(__START_KERNEL_map)] =
+ mk_kernel_pgd(__pa_symbol(level3_kernel_pgt));
+}
+
+void __init xen_init_pt(void)
+{
+ pgd_t *old_level4 = (pgd_t *)xen_start_info.pt_base;
+
+ memcpy((void *)init_level4_pgt,
+ (void *)xen_start_info.pt_base, PAGE_SIZE);
+
+ memset((void *)level3_kernel_pgt, 0, PAGE_SIZE);
+ memset((void *)level2_kernel_pgt, 0, PAGE_SIZE);
+
+ xen_copy_pt();
+
+ make_page_readonly(init_level4_pgt);
+ make_page_readonly(level3_kernel_pgt);
+ make_page_readonly(level2_kernel_pgt);
+ make_page_readonly(init_level4_user_pgt);
+ make_page_readonly(level3_user_pgt); /* for vsyscall stuff */
+
+ xen_pgd_pin(__pa_symbol(init_level4_pgt));
+ xen_pgd_pin(__pa_symbol(init_level4_user_pgt));
+ xen_pud_pin(__pa_symbol(level3_kernel_pgt));
+ xen_pud_pin(__pa_symbol(level3_user_pgt));
+ xen_pmd_pin(__pa_symbol(level2_kernel_pgt));
+
+ set_pgd((pgd_t *)(init_level4_user_pgt + 511),
+ mk_kernel_pgd(__pa_symbol(level3_user_pgt)));
+
+}
/*
* Extend kernel mapping to access pages for page tables. The initial
diff -r 69bf77e1b102 -r c4512592a1dc
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Mon Aug 8
08:18:06 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Mon Aug 8
08:18:38 2005
@@ -4,31 +4,19 @@
/*
* This file contains the functions and defines necessary to modify and use
* the x86-64 page table tree.
- *
- * x86-64 has a 4 level table setup. Generic linux MM only supports
- * three levels. The fourth level is currently a single static page that
- * is shared by everybody and just contains a pointer to the current
- * three level page setup on the beginning and some kernel mappings at
- * the end. For more details see Documentation/x86_64/mm.txt
*/
#include <asm/processor.h>
#include <asm/fixmap.h>
#include <asm/bitops.h>
#include <linux/threads.h>
#include <asm/pda.h>
+#ifdef CONFIG_XEN
#include <asm-xen/hypervisor.h>
+
extern pud_t level3_user_pgt[512];
-extern pud_t init_level4_pgt[];
extern pud_t init_level4_user_pgt[];
-extern unsigned long __supported_pte_mask;
-
-#define swapper_pg_dir NULL
-
-extern int nonx_setup(char *str);
-extern void paging_init(void);
-extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
-
-extern unsigned long pgkern_mask;
+
+extern void xen_init_pt(void);
#define virt_to_ptep(__va) \
({ \
@@ -44,6 +32,22 @@
unsigned long __pa = (*(unsigned long *)__pte) & PAGE_MASK; \
__pa | ((unsigned long)(__va) & (PAGE_SIZE-1)); \
})
+#endif
+
+extern pud_t level3_kernel_pgt[512];
+extern pud_t level3_physmem_pgt[512];
+extern pud_t level3_ident_pgt[512];
+extern pmd_t level2_kernel_pgt[512];
+extern pgd_t init_level4_pgt[];
+extern unsigned long __supported_pte_mask;
+
+#define swapper_pg_dir init_level4_pgt
+
+extern int nonx_setup(char *str);
+extern void paging_init(void);
+extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
+
+extern unsigned long pgkern_mask;
/*
* ZERO_PAGE is a global shared page that is always zero: used
@@ -52,11 +56,14 @@
extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+/*
+ * PGDIR_SHIFT determines what a top-level page table entry can map
+ */
#define PGDIR_SHIFT 39
#define PTRS_PER_PGD 512
/*
- * PUDIR_SHIFT determines what a top-level page table entry can map
+ * 3rd level page
*/
#define PUD_SHIFT 30
#define PTRS_PER_PUD 512
@@ -80,7 +87,7 @@
#define pud_ERROR(e) \
printk("%s:%d: bad pud %p(%016lx).\n", __FILE__, __LINE__, &(e),
pud_val(e))
#define pgd_ERROR(e) \
- printk("%s:%d: bad pgd %p(%016lx).\n", __FILE__, __LINE__, &(e),
pgd_val(e))
+ printk("%s:%d: bad pgd %p(%016lx).\n", __FILE__, __LINE__, &(e),
pgd_val(e))
#define pgd_none(x) (!pgd_val(x))
#define pud_none(x) (!pud_val(x))
@@ -90,18 +97,10 @@
extern inline int pud_present(pud_t pud) { return !pud_none(pud); }
-#ifdef CONFIG_SMP
-#define set_pte(pteptr, pteval) xen_l1_entry_update(pteptr, (pteval))
-
-#else
-#define set_pte(pteptr, pteval) xen_l1_entry_update(pteptr, (pteval))
-#if 0
static inline void set_pte(pte_t *dst, pte_t val)
{
*dst = val;
}
-#endif
-#endif
#define set_pmd(pmdptr, pmdval) xen_l2_entry_update(pmdptr, (pmdval))
#define set_pud(pudptr, pudval) xen_l3_entry_update(pudptr, (pudval))
@@ -132,6 +131,9 @@
* each domain will have separate page tables, with their own versions of
* accessed & dirty state.
*/
+#define ptep_get_and_clear(mm,addr,xp) __pte_ma(xchg(&(xp)->pte, 0))
+
+#if 0
static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long
addr, pte_t *xp)
{
pte_t pte = *xp;
@@ -139,21 +141,22 @@
set_pte(xp, __pte_ma(0));
return pte;
}
+#endif
#define pte_same(a, b) ((a).pte == (b).pte)
-#define PMD_SIZE (1UL << PMD_SHIFT)
-#define PMD_MASK (~(PMD_SIZE-1))
-#define PUD_SIZE (1UL << PUD_SHIFT)
-#define PUD_MASK (~(PUD_SIZE-1))
-#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
-#define PGDIR_MASK (~(PGDIR_SIZE-1))
-
-#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE)
+#define PMD_SIZE (1UL << PMD_SHIFT)
+#define PMD_MASK (~(PMD_SIZE-1))
+#define PUD_SIZE (1UL << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE-1))
+#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE-1))
+
+#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE)
#define FIRST_USER_ADDRESS 0
#ifndef __ASSEMBLY__
-#define MAXMEM 0x3fffffffffffUL
+#define MAXMEM 0x3fffffffffffUL
#define VMALLOC_START 0xffffc20000000000UL
#define VMALLOC_END 0xffffe1ffffffffffUL
#define MODULES_VADDR 0xffffffff88000000UL
@@ -347,7 +350,7 @@
pte_t pte = *ptep;
int ret = pte_dirty(pte);
if (ret)
- xen_l1_entry_update(ptep, pte_mkclean(pte));
+ set_pte(ptep, pte_mkclean(pte));
return ret;
}
@@ -356,7 +359,7 @@
pte_t pte = *ptep;
int ret = pte_young(pte);
if (ret)
- xen_l1_entry_update(ptep, pte_mkold(pte));
+ set_pte(ptep, pte_mkold(pte));
return ret;
}
@@ -398,7 +401,7 @@
/* PUD - Level3 access */
/* to find an entry in a page-table-directory. */
-#define pud_index(address) ((address >> PUD_SHIFT) & (PTRS_PER_PUD-1))
+#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
#define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) +
pud_index(address))
static inline pud_t *__pud_offset_k(pud_t *pud, unsigned long address)
{
@@ -413,7 +416,7 @@
{
unsigned long addr;
- addr = pud_val(init_level4_pgt[pud_index(address)]);
+ addr = pgd_val(init_level4_pgt[pud_index(address)]);
addr &= PHYSICAL_PAGE_MASK; /* machine physical */
addr = machine_to_phys(addr);
return __pud_offset_k((pud_t *)__va(addr), address);
@@ -427,9 +430,11 @@
#define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \
pmd_index(address))
#define pmd_none(x) (!pmd_val(x))
-#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
+/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
+ can temporarily clear it. */
+#define pmd_present(x) (pmd_val(x))
#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
-#define pmd_bad(x) ((pmd_val(x) & ~PTE_MASK) != _KERNPG_TABLE )
+#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_PRESENT))
!= (_KERNPG_TABLE & ~_PAGE_PRESENT))
#define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
#define pmd_pfn(x) ((pmd_val(x) >> PAGE_SHIFT) & __PHYSICAL_MASK)
@@ -479,11 +484,24 @@
* race with other CPU's that might be updating the dirty
* bit at the same time. */
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+#if 0
#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
do { \
if (__dirty) { \
set_pte(__ptep, __entry); \
flush_tlb_page(__vma, __address); \
+ } \
+ } while (0)
+#endif
+#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
+ do { \
+ if (__dirty) { \
+ if ( likely((__vma)->vm_mm == current->mm) ) { \
+ HYPERVISOR_update_va_mapping((__address),
(__entry), UVMF_INVLPG|UVMF_MULTI|(unsigned
long)((__vma)->vm_mm->cpu_vm_mask.bits)); \
+ } else { \
+ xen_l1_entry_update((__ptep), (__entry)); \
+ flush_tlb_page((__vma), (__address)); \
+ } \
} \
} while (0)
diff -r 69bf77e1b102 -r c4512592a1dc
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/processor.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/processor.h Mon Aug
8 08:18:06 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/processor.h Mon Aug
8 08:18:38 2005
@@ -153,6 +153,20 @@
} while (0);
}
}
+
+
+static inline void clear_in_cr4 (unsigned long mask)
+{
+#ifndef CONFIG_XEN
+ mmu_cr4_features &= ~mask;
+ __asm__("movq %%cr4,%%rax\n\t"
+ "andq %0,%%rax\n\t"
+ "movq %%rax,%%cr4\n"
+ : : "irg" (~mask)
+ :"ax");
+#endif
+}
+
#define load_cr3(pgdir) do { \
xen_pt_switch(__pa(pgdir)); \
@@ -283,9 +297,9 @@
load_gs_index(0);
\
(regs)->rip = (new_rip);
\
(regs)->rsp = (new_rsp);
\
- write_pda(oldrsp, (new_rsp));
\
- (regs)->cs = __USER_CS;
\
- (regs)->ss = __USER_DS;
\
+ write_pda(oldrsp, (new_rsp));
\
+ (regs)->cs = __USER_CS;
\
+ (regs)->ss = __USER_DS;
\
(regs)->eflags = 0x200;
\
set_fs(USER_DS);
\
} while(0)
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|