On Fri, 2009-06-05 at 14:19 -0400, Pasi Kärkkäinen wrote:
> On Fri, Jun 05, 2009 at 05:12:33PM +0100, Ian Campbell wrote:
> > On Fri, 2009-06-05 at 12:05 -0400, Ian Campbell wrote:
> > >
> > > I had some patches to unify the 32 and 64 bit versions of dump
> page
> > > table at one point, since the 64 bit version does the right thing.
> > > I'll see if I can find or reproduce them.
> >
> > Couldn't find them but please try this:
> >
>
> I had some problems applying the patch until I figured out it was
> supposed
> to be applied to a clean tree.. hopefully "git checkout file" restores
> (or
> resets) the file to it's original form and removes any local changes.
Should work I guess, I usually use "git reset --hard" to undo any local
mods.
>
> Here goes again:
> http://pasik.reaktio.net/xen/pv_ops-dom0-debug/pv_ops-dom0-log-04-with-highpte-no-swap-with-debug2.txt
>
>
> L4 at e1822000 is pinned contains L2 at e1977228 which points at an L1
> which is unpinned low mem address 0x8bf8000
OK so I think that is interesting. A pinned L4 referencing an unpinned
L1 isn't supposed to happen, I don't think (Jeremy?).
The patch at the end (applies to a clean tree again) walks the lowmem
region of every L4 to ensure that every L1 page is pinned just before
pinning the L4. I hope this will catch the L1 in the act.
> PGD 8ef001 PUD 8ef001 PMD 1268067 PTE 207061
This just tells us that the PT which maps the PTE we were trying to
write is mapped R/O, which is not as interesting as I thought it would
be.
> Fixmap KM_PTE0 @ 0xf57f0000
> PGD 8ef001 PUD 8ef001 PMD 207067 PTE 0
> Fixmap KM_PTE1 @ 0xf57ee000
> PGD 8ef001 PUD 8ef001 PMD 207067 PTE 0
So these guys are not at fault, although we are in the middle of filling
in KM_PTE0, I think.
I've just had another go reproing this with a xen-3.3-testing.hg
hypervisor (both 32 and 64 bit) with a 32 bit kernel and dom0_mem=1024M.
No luck...
Ian.
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index f9b252c..538590a 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -285,46 +285,12 @@ check_v8086_mode(struct pt_regs *regs, unsigned long
address,
tsk->thread.screen_bitmap |= 1 << bit;
}
-static void dump_pagetable(unsigned long address)
-{
- __typeof__(pte_val(__pte(0))) page;
-
- page = read_cr3();
- page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
-
#ifdef CONFIG_X86_PAE
- printk("*pdpt = %016Lx ", page);
- if ((page >> PAGE_SHIFT) < max_low_pfn
- && page & _PAGE_PRESENT) {
- page &= PAGE_MASK;
- page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
- & (PTRS_PER_PMD - 1)];
- printk(KERN_CONT "*pde = %016Lx ", page);
- page &= ~_PAGE_NX;
- }
+#define FMTPTE "ll"
#else
- printk("*pde = %08lx ", page);
+#define FMTPTE "l"
#endif
- /*
- * We must not directly access the pte in the highpte
- * case if the page table is located in highmem.
- * And let's rather not kmap-atomic the pte, just in case
- * it's allocated already:
- */
- if ((page >> PAGE_SHIFT) < max_low_pfn
- && (page & _PAGE_PRESENT)
- && !(page & _PAGE_PSE)) {
-
- page &= PAGE_MASK;
- page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
- & (PTRS_PER_PTE - 1)];
- printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
- }
-
- printk("\n");
-}
-
#else /* CONFIG_X86_64: */
void vmalloc_sync_all(void)
@@ -440,6 +406,10 @@ check_v8086_mode(struct pt_regs *regs, unsigned long
address,
{
}
+#define FMTPTE "ll"
+
+#endif /* CONFIG_X86_64 */
+
static int bad_address(void *p)
{
unsigned long dummy;
@@ -447,7 +417,7 @@ static int bad_address(void *p)
return probe_kernel_address((unsigned long *)p, dummy);
}
-static void dump_pagetable(unsigned long address)
+void dump_pagetable(unsigned long address)
{
pgd_t *pgd;
pud_t *pud;
@@ -462,7 +432,7 @@ static void dump_pagetable(unsigned long address)
if (bad_address(pgd))
goto bad;
- printk("PGD %lx ", pgd_val(*pgd));
+ printk("PGD %"FMTPTE"x ", pgd_val(*pgd));
if (!pgd_present(*pgd))
goto out;
@@ -471,7 +441,7 @@ static void dump_pagetable(unsigned long address)
if (bad_address(pud))
goto bad;
- printk("PUD %lx ", pud_val(*pud));
+ printk("PUD %"FMTPTE"x ", pud_val(*pud));
if (!pud_present(*pud) || pud_large(*pud))
goto out;
@@ -479,7 +449,7 @@ static void dump_pagetable(unsigned long address)
if (bad_address(pmd))
goto bad;
- printk("PMD %lx ", pmd_val(*pmd));
+ printk("PMD %"FMTPTE"x ", pmd_val(*pmd));
if (!pmd_present(*pmd) || pmd_large(*pmd))
goto out;
@@ -487,7 +457,7 @@ static void dump_pagetable(unsigned long address)
if (bad_address(pte))
goto bad;
- printk("PTE %lx", pte_val(*pte));
+ printk("PTE %"FMTPTE"x", pte_val(*pte));
out:
printk("\n");
return;
@@ -495,8 +465,6 @@ bad:
printk("BAD\n");
}
-#endif /* CONFIG_X86_64 */
-
/*
* Workaround for K8 erratum #93 & buggy BIOS.
*
@@ -598,6 +566,10 @@ show_fault_oops(struct pt_regs *regs, unsigned long
error_code,
printk_address(regs->ip, 1);
dump_pagetable(address);
+ printk(KERN_CRIT "Fixmap KM_PTE0 @ %#lx\n", fix_to_virt(KM_PTE0));
+ dump_pagetable(fix_to_virt(KM_PTE0));
+ printk(KERN_CRIT "Fixmap KM_PTE1 @ %#lx\n", fix_to_virt(KM_PTE1));
+ dump_pagetable(fix_to_virt(KM_PTE1));
}
static noinline void
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 1729178..2c427d3 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1015,13 +1015,34 @@ static int xen_pin_page(struct mm_struct *mm, struct
page *page,
return flush;
}
+static int xen_check_l1_pinned(pte_t *pte, unsigned long s, unsigned long e,
struct mm_walk *walk)
+{
+ extern void dump_pagetable(unsigned long address);
+ struct page *pte_page = virt_to_page(pte);
+
+ if (!PagePinned(pte_page)) {
+ printk(KERN_CRIT "PTE @ %p is an L1 page %p covering %#lx-%#lx
which is not pinned\n", pte, pte_page, s, e);
+ dump_pagetable((unsigned long)pte);
+ BUG();
+ }
+
+ return 0;
+}
+
/* This is called just after a mm has been created, but it has not
been used yet. We need to make sure that its pagetable is all
read-only, and can be pinned. */
static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
{
+ struct mm_walk xen_pin_walk = {
+ .pte_entry = &xen_check_l1_pinned,
+ .mm = mm,
+ };
+
vm_unmap_aliases();
+ walk_page_range(0xc0000000, FIXADDR_TOP, &xen_pin_walk);
+
xen_mc_batch();
if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
diff --git a/init/main.c b/init/main.c
index 33ce929..baf4300 100644
--- a/init/main.c
+++ b/init/main.c
@@ -74,6 +74,8 @@
#include <asm/sections.h>
#include <asm/cacheflush.h>
+#include <asm/xen/page.h>
+
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/smp.h>
#endif
@@ -815,6 +817,54 @@ static noinline int init_post(void)
system_state = SYSTEM_RUNNING;
numa_default_policy();
+ {
+ extern void dump_pagetable(unsigned long address);
+ struct page *pgd_page, *pte_page;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ phys_addr_t pte_phys;
+ unsigned long address = 0xc08ce011UL;//(unsigned long)
__builtin_return_address(0);
+
+ pgd = pgd_offset(&init_mm, address);
+ if (!pgd_present(*pgd))
+ goto skip;
+
+ pud = pud_offset(pgd, address);
+ if (!pud_present(*pud))
+ goto skip;
+
+ pmd = pmd_offset(pud, address);
+ if (!pmd_present(*pmd))
+ goto skip;
+
+ pgd_page = virt_to_page(init_mm.pgd);
+ pte_page = pmd_page(*pmd);
+
+ pte_phys = page_to_phys(pte_page) + pte_index(address);
+ printk(KERN_CRIT "Test debug infrastructure on address
%#lx:\n", address);
+ printk(KERN_CRIT "L4 at V:%p/P:%#llx/M:%#llx is %s and contains
L2 at V:%p/P:%#llx/M:%#llx = %#llx "
+ "which points to an L1 P:%#llx/M:%#llx which is %s %s\n",
+ pgd, virt_to_phys(pgd), virt_to_machine(pgd).maddr,
+ PagePinned(pgd_page) ? "pinned" : "unpinned",
+ pmd, virt_to_phys(pmd), virt_to_machine(pmd).maddr,
+ pmd_val(*pmd),
+ pte_phys, phys_to_machine(XPADDR(pte_phys)).maddr,
+ PagePinned(pte_page) ? "pinned" : "unpinned",
+ PageHighMem(pte_page) ? "highmem" : "lowmem");
+ printk(KERN_CRIT "faulting address %#lx\n", address);
+ dump_pagetable(address);
+ if (!PageHighMem(pte_page)) {
+ printk(KERN_CRIT "lowmem mapping of L1 @ P:%#llx is at
V:%p\n", pte_phys, phys_to_virt(page_to_phys(pte_page)));
+ dump_pagetable((unsigned
long)phys_to_virt(page_to_phys(pte_page)));
+ }
+ printk(KERN_CRIT "Fixmap KM_PTE0 @ %#lx\n",
fix_to_virt(KM_PTE0));
+ dump_pagetable(fix_to_virt(KM_PTE0));
+ printk(KERN_CRIT "Fixmap KM_PTE1 @ %#lx\n",
fix_to_virt(KM_PTE1));
+ dump_pagetable(fix_to_virt(KM_PTE1));
+ }
+ skip:
+
if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
printk(KERN_WARNING "Warning: unable to open an initial
console.\n");
diff --git a/mm/rmap.c b/mm/rmap.c
index 1652166..ced5650 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -52,6 +52,9 @@
#include <linux/migrate.h>
#include <asm/tlbflush.h>
+#include <asm/io.h>
+
+#include <asm/xen/page.h>
#include "internal.h"
@@ -267,6 +270,7 @@ unsigned long page_address_in_vma(struct page *page, struct
vm_area_struct *vma)
pte_t *page_check_address(struct page *page, struct mm_struct *mm,
unsigned long address, spinlock_t **ptlp, int sync)
{
+ struct page *pgd_page, *pte_page;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
@@ -285,6 +289,32 @@ pte_t *page_check_address(struct page *page, struct
mm_struct *mm,
if (!pmd_present(*pmd))
return NULL;
+ pgd_page = virt_to_page(mm->pgd);
+ pte_page = pmd_page(*pmd);
+
+ if (PagePinned(pgd_page) != PagePinned(pte_page)) {
+ extern void dump_pagetable(unsigned long address);
+ phys_addr_t pte_phys = page_to_phys(pte_page) +
pte_index(address);
+ printk(KERN_CRIT "L4 at V:%p/P:%#llx/M:%#llx is %s and contains
L2 at V:%p/P:%#llx/M:%#llx = %#llx "
+ "which points to an L1 P:%#llx/M:%#llx which is %s %s\n",
+ pgd, virt_to_phys(pgd), virt_to_machine(pgd).maddr,
+ PagePinned(pgd_page) ? "pinned" : "unpinned",
+ pmd, virt_to_phys(pmd), virt_to_machine(pmd).maddr,
+ pmd_val(*pmd),
+ pte_phys, phys_to_machine(XPADDR(pte_phys)).maddr,
+ PagePinned(pte_page) ? "pinned" : "unpinned",
+ PageHighMem(pte_page) ? "highmem" : "lowmem");
+ printk(KERN_CRIT "faulting address %#lx\n", address);
+ dump_pagetable(address);
+ if (!PageHighMem(pte_page)) {
+ printk(KERN_CRIT "lowmem mapping of L1 @ P:%#llx is at
V:%p\n", pte_phys, phys_to_virt(page_to_phys(pte_page)));
+ dump_pagetable((unsigned
long)phys_to_virt(page_to_phys(pte_page)));
+ }
+ printk(KERN_CRIT "Fixmap KM_PTE0 @ %#lx\n",
fix_to_virt(KM_PTE0));
+ dump_pagetable(fix_to_virt(KM_PTE0));
+ printk(KERN_CRIT "Fixmap KM_PTE1 @ %#lx\n",
fix_to_virt(KM_PTE1));
+ dump_pagetable(fix_to_virt(KM_PTE1));
+ }
pte = pte_offset_map(pmd, address);
/* Make a quick check before getting the lock */
if (!sync && !pte_present(*pte)) {
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|