On Wed, Oct 31, 2007 at 10:56:05AM +0900, Simon Horman wrote:
> On Tue, Oct 30, 2007 at 12:52:49PM -0600, Alex Williamson wrote:
> >
> > On Mon, 2007-10-29 at 13:48 +0900, Simon Horman wrote:
> > > Hi,
> > >
> > > here is an update on the patch series to solve the EFI mapping for kexec.
> > > The major change is to remove the C-code portions of the page fault
> > > handling changes and instead implement all of the identity mapping
> > > in assembly, as suggested by Tristan Gingold.
> > >
> > > Although this does make the assembly a little bulkier, removing the
> > > aditional complexity from the C code does seem to be a win, and I beleive
> > > that overall there are less lines of code changed (though I haven't
> > > counted).
> >
> > Hi Simon,
> >
> > This isn't working for me. Dom0 doesn't quite boot, log below. Let
> > me know what you think. This is on an rx6600, I assume this series
> > works on the rx2600 you test with. Has anyone else had success/failure
> > with this series? Thanks,
>
> Hi Alex,
>
> the patch set does indeed work on my rx2620. I am guessing
> that what is happening is that a fault for an EFI address
> is coming through one of the page fault handlers in ivt.S
> that I didn't modify. The fault then falls through to
> ia64_do_page_fault() which the current patch set doesn't teach
> about EFI identity mappings. I'll send a patch which should allow
> us (you :) to isolate which one. I can then scratch my head
> for a bit and make the appropriate adjustments.
Hi Alex,
I've been unable to reproduce this problem on my RX2620 with the default
config (which includes the E1000 driver), so unfortunately I think you
will have to do some testing for me. Hopefully this won't take too many
round trips.
Below is a rather long patch which does two things:
1) Teaches the C code how to handle page faults for EFI addresses.
This is basically the original implementation. I ripped it out as I
thought the ASM was working. By adding it back in it should catch any
faults that the ASM code isn't catching. And hopefully this means
the code will work for you. If it doesn't we need to look deeper
into the problem.
2) Logs whenever a fault for an EFI address hits the C code.
This should enable us to work out which ASM fault handler is
recieving the fault and not handling it. I strongly suspect
alt_itlb_miss (03), but before I stab in the dark in there I'd like
to see what results you get. Look for lines that look like this.
Hopefully they end in 03.
(XEN) EFI memory fault addr=0xe00000407fff0010 kr2=0x020202020c020202
If you could send me the boot log, or at the very least the lines that
look like the one above, that should tell me something interesting.
Your kernel config may also be interesting if it deviates from the
default config significantly.
This patch isn't intended for the tree, just for testing.
Index: xen-unstable.hg/xen/arch/ia64/xen/faults.c
===================================================================
--- xen-unstable.hg.orig/xen/arch/ia64/xen/faults.c 2007-10-31
17:52:40.000000000 +0900
+++ xen-unstable.hg/xen/arch/ia64/xen/faults.c 2007-10-31 17:54:27.000000000
+0900
@@ -164,6 +164,7 @@ void ia64_do_page_fault(unsigned long ad
// FIXME should validate address here
unsigned long pteval;
unsigned long is_data = !((isr >> IA64_ISR_X_BIT) & 1UL);
+ unsigned long attr = 0;
IA64FAULT fault;
int is_ptc_l_needed = 0;
ia64_itir_t _itir = {.itir = itir};
@@ -182,17 +183,39 @@ void ia64_do_page_fault(unsigned long ad
}
again:
+ /* All that matters here is that if we are accessing EFI memory
+ * we note that by setting EFI_MEMORY_WB for cached access
+ * or EFI_MEMORY_UC for uncached access. The actuall efi attibutes
+ * are not of any interest as we can deduce the needed values.
+ * This avoids an expensive call to efi_mem_attributes().
+ *
+ * The RID check prevents domains from accessing this memory.
+ *
+ * This check is also duplicated in assembly in alt_dtlb_miss
+ */
+ if (address >> 59 == __IA64_EFI_CACHED_OFFSET >> 59 &&
+ ia64_get_rr(7) == XEN_EFI_RID)
+ attr = EFI_MEMORY_WB;
+ else if (address >> 59 == __IA64_EFI_UNCACHED_OFFSET >> 59 &&
+ ia64_get_rr(6) == XEN_EFI_RID)
+ attr = EFI_MEMORY_UC;
+ if ((address >> 59 == __IA64_EFI_CACHED_OFFSET >> 59 ||
+ address >> 59 == __IA64_EFI_UNCACHED_OFFSET >> 59) &&
+ ia64_get_rr(address >> 61) == XEN_EFI_RID)
+ printk("EFI memory fault addr=0x%016lx kr2=0x%016lx\n",
+ address, ia64_get_kr(2));
+
fault = vcpu_translate(current, address, is_data, &pteval,
- &itir, &iha);
+ &itir, &iha, attr);
if (fault == IA64_NO_FAULT || fault == IA64_USE_TLB) {
struct p2m_entry entry;
unsigned long m_pteval;
m_pteval = translate_domain_pte(pteval, address, itir,
- &(_itir.itir), &entry);
+ &(_itir.itir), &entry, attr);
vcpu_itc_no_srlz(current, is_data ? 2 : 1, address,
- m_pteval, pteval, _itir.itir, &entry);
+ m_pteval, pteval, _itir.itir, &entry, attr);
if ((fault == IA64_USE_TLB && !current->arch.dtlb.pte.p) ||
- p2m_entry_retry(&entry)) {
+ (!attr && p2m_entry_retry(&entry))) {
/* dtlb has been purged in-between. This dtlb was
matching. Undo the work. */
vcpu_flush_tlb_vhpt_range(address, _itir.ps);
@@ -215,7 +238,9 @@ void ia64_do_page_fault(unsigned long ad
// indicate a bad xen pointer
printk("*** xen_handle_domain_access: exception table"
" lookup failed, iip=0x%lx, addr=0x%lx, "
- "spinning...\n", iip, address);
+ "rr[%d]=0x%lx kr2=0x%lx spinning...\n", iip,
+ address, (address >> 62),
+ ia64_get_rr(address >> 62), ia64_get_kr(2));
panic_domain(regs, "*** xen_handle_domain_access: "
"exception table lookup failed, "
"iip=0x%lx, addr=0x%lx, spinning...\n",
@@ -770,7 +795,7 @@ ia64_shadow_fault(unsigned long ifa, uns
/* FIXME: gives a chance to tpa, as the TC was valid. */
- fault = vcpu_translate(v, ifa, 1, &pte, &itir, &iha);
+ fault = vcpu_translate(v, ifa, 1, &pte, &itir, &iha, 0);
/* Try again! */
if (fault != IA64_NO_FAULT) {
Index: xen-unstable.hg/xen/arch/ia64/xen/ivt.S
===================================================================
--- xen-unstable.hg.orig/xen/arch/ia64/xen/ivt.S 2007-10-31
17:52:40.000000000 +0900
+++ xen-unstable.hg/xen/arch/ia64/xen/ivt.S 2007-10-31 17:54:48.000000000
+0900
@@ -65,7 +65,7 @@
# define PSR_DEFAULT_BITS 0
#endif
-#if 0
+#if 1
/*
* This lets you track the last eight faults that occurred on the CPU.
* Make sure ar.k2 isn't needed for something else before enabling this...
Index: xen-unstable.hg/xen/arch/ia64/xen/mm.c
===================================================================
--- xen-unstable.hg.orig/xen/arch/ia64/xen/mm.c 2007-10-31 17:52:40.000000000
+0900
+++ xen-unstable.hg/xen/arch/ia64/xen/mm.c 2007-10-31 17:54:27.000000000
+0900
@@ -502,7 +502,7 @@ gmfn_to_mfn_foreign(struct domain *d, un
// Xen PAGE_SIZE and return modified pte. (NOTE: TLB insert should use
// current->arch.vhpt_pg_shift!)
u64 translate_domain_pte(u64 pteval, u64 address, u64 itir__, u64* itir,
- struct p2m_entry* entry)
+ struct p2m_entry* entry, unsigned long efi_attr)
{
struct domain *d = current->domain;
ia64_itir_t _itir = {.itir = itir__};
@@ -511,6 +511,18 @@ u64 translate_domain_pte(u64 pteval, u64
u64 arflags2;
u64 maflags2;
+ /* EFI-Runtime areas are itendity mapped into the same location
+ * that they are maped in Linux with GRANULE size pages.
+ * If efi_attr is non-zero, then the address is EFI-Runtime memory.
+ */
+ if (efi_attr) {
+ /* Copy the whole register. */
+ ((ia64_itir_t*)itir)->itir = _itir.itir;
+ /* Overwrite ps part! */
+ ((ia64_itir_t*)itir)->ps = IA64_GRANULE_SHIFT;
+ return pteval;
+ }
+
pteval &= ((1UL << 53) - 1);// ignore [63:53] bits
// FIXME address had better be pre-validated on insert
Index: xen-unstable.hg/xen/arch/ia64/xen/vcpu.c
===================================================================
--- xen-unstable.hg.orig/xen/arch/ia64/xen/vcpu.c 2007-10-31
17:52:40.000000000 +0900
+++ xen-unstable.hg/xen/arch/ia64/xen/vcpu.c 2007-10-31 17:54:27.000000000
+0900
@@ -7,6 +7,7 @@
*/
#include <linux/sched.h>
+#include <linux/efi.h>
#include <public/xen.h>
#include <xen/mm.h>
#include <asm/ia64_int.h>
@@ -1670,13 +1671,30 @@ vcpu_get_domain_bundle(VCPU * vcpu, REGS
}
IA64FAULT vcpu_translate(VCPU * vcpu, u64 address, BOOLEAN is_data,
- u64 * pteval, u64 * itir, u64 * iha)
+ u64 * pteval, u64 * itir, u64 * iha,
+ unsigned long efi_attr)
{
unsigned long region = address >> 61;
unsigned long pta, rid, rr, key = 0;
union pte_flags pte;
TR_ENTRY *trp;
+ /* EFI-Runtime areas are identity mapped into
+ * the same location that they are maped in Linux.
+ * If efi_attr is non-zero then * (efi_attr & EFI_MEMORY_RUNTIME) is
+ * true and that either (efi_attr & EFI_MEMORY_WB) is true or
+ * (efi_attr & (EFI_MEMORY_UC|EFI_MEMORY_WC|EFI_MEMORY_WT)) is true.
+ */
+ if (efi_attr) {
+ if (efi_attr & EFI_MEMORY_WB)
+ *pteval = (address & _PAGE_PPN_MASK) | __DIRTY_BITS |
+ _PAGE_AR_RWX;
+ else
+ *pteval = (address & _PAGE_PPN_MASK) | __DIRTY_BITS |
+ _PAGE_AR_RWX | _PAGE_PL_PRIV | _PAGE_MA_UC;
+ return IA64_NO_FAULT;
+ }
+
if (PSCB(vcpu, metaphysical_mode) && !(!is_data && region)) {
// dom0 may generate an uncacheable physical address (msb=1)
if (region && ((region != 4) || (vcpu->domain != dom0))) {
@@ -1806,7 +1824,7 @@ IA64FAULT vcpu_tpa(VCPU * vcpu, u64 vadr
u64 pteval, itir, mask, iha;
IA64FAULT fault;
- fault = vcpu_translate(vcpu, vadr, TRUE, &pteval, &itir, &iha);
+ fault = vcpu_translate(vcpu, vadr, TRUE, &pteval, &itir, &iha, 0);
if (fault == IA64_NO_FAULT || fault == IA64_USE_TLB) {
mask = itir_mask(itir);
*padr = (pteval & _PAGE_PPN_MASK & mask) | (vadr & ~mask);
@@ -1820,7 +1838,7 @@ IA64FAULT vcpu_tak(VCPU * vcpu, u64 vadr
u64 pteval, itir, iha;
IA64FAULT fault;
- fault = vcpu_translate(vcpu, vadr, TRUE, &pteval, &itir, &iha);
+ fault = vcpu_translate(vcpu, vadr, TRUE, &pteval, &itir, &iha, 0);
if (fault == IA64_NO_FAULT || fault == IA64_USE_TLB)
*key = itir & IA64_ITIR_KEY_MASK;
else
@@ -2315,11 +2333,23 @@ vcpu_rebuild_vhpt(VCPU * vcpu, u64 ps)
void
vcpu_itc_no_srlz(VCPU * vcpu, u64 IorD, u64 vaddr, u64 pte,
- u64 mp_pte, u64 itir, struct p2m_entry *entry)
+ u64 mp_pte, u64 itir, struct p2m_entry *entry,
+ unsigned long efi_attr)
{
ia64_itir_t _itir = {.itir = itir};
unsigned long psr;
+ /* EFI-Runtime areas are identity mapped into
+ * the same location that they are maped in Linux.
+ * If efi_attr is non-zero, then the address is EFI-Runtime memory
+ */
+ if (efi_attr) {
+ unsigned long psr = ia64_clear_ic();
+ ia64_itc(IorD, vaddr, pte, _itir.itir);
+ ia64_set_psr(psr);
+ return;
+ }
+
check_xen_space_overlap("itc", vaddr, 1UL << _itir.ps);
// FIXME, must be inlined or potential for nested fault here!
@@ -2364,12 +2394,12 @@ IA64FAULT vcpu_itc_d(VCPU * vcpu, u64 pt
again:
//itir = (itir & ~0xfc) | (vcpu->arch.vhpt_pg_shift<<2); // ign dom pgsz
- pteval = translate_domain_pte(pte, ifa, itir, &(_itir.itir), &entry);
+ pteval = translate_domain_pte(pte, ifa, itir, &(_itir.itir), &entry, 0);
if (!pteval)
return IA64_ILLOP_FAULT;
if (swap_rr0)
set_virtual_rr0();
- vcpu_itc_no_srlz(vcpu, 2, ifa, pteval, pte, _itir.itir, &entry);
+ vcpu_itc_no_srlz(vcpu, 2, ifa, pteval, pte, _itir.itir, &entry, 0);
if (swap_rr0)
set_metaphysical_rr0();
if (p2m_entry_retry(&entry)) {
@@ -2392,12 +2422,12 @@ IA64FAULT vcpu_itc_i(VCPU * vcpu, u64 pt
again:
//itir = (itir & ~0xfc) | (vcpu->arch.vhpt_pg_shift<<2); // ign dom pgsz
- pteval = translate_domain_pte(pte, ifa, itir, &(_itir.itir), &entry);
+ pteval = translate_domain_pte(pte, ifa, itir, &(_itir.itir), &entry, 0);
if (!pteval)
return IA64_ILLOP_FAULT;
if (swap_rr0)
set_virtual_rr0();
- vcpu_itc_no_srlz(vcpu, 1, ifa, pteval, pte, _itir.itir, &entry);
+ vcpu_itc_no_srlz(vcpu, 1, ifa, pteval, pte, _itir.itir, &entry, 0);
if (swap_rr0)
set_metaphysical_rr0();
if (p2m_entry_retry(&entry)) {
Index: xen-unstable.hg/xen/include/asm-ia64/mm.h
===================================================================
--- xen-unstable.hg.orig/xen/include/asm-ia64/mm.h 2007-10-31
17:52:40.000000000 +0900
+++ xen-unstable.hg/xen/include/asm-ia64/mm.h 2007-10-31 17:54:27.000000000
+0900
@@ -459,7 +459,8 @@ extern unsigned long dom0vp_unexpose_for
extern volatile unsigned long *mpt_table;
extern unsigned long gmfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
extern u64 translate_domain_pte(u64 pteval, u64 address, u64 itir__,
- u64* itir, struct p2m_entry* entry);
+ u64* itir, struct p2m_entry* entry,
+ unsigned long efi_attr);
#define machine_to_phys_mapping mpt_table
#define INVALID_M2P_ENTRY (~0UL)
Index: xen-unstable.hg/xen/include/asm-ia64/vcpu.h
===================================================================
--- xen-unstable.hg.orig/xen/include/asm-ia64/vcpu.h 2007-10-31
17:52:40.000000000 +0900
+++ xen-unstable.hg/xen/include/asm-ia64/vcpu.h 2007-10-31 17:54:27.000000000
+0900
@@ -162,7 +162,8 @@ union U_IA64_BUNDLE;
extern int vcpu_get_domain_bundle(VCPU * vcpu, REGS * regs, u64 gip,
union U_IA64_BUNDLE *bundle);
extern IA64FAULT vcpu_translate(VCPU * vcpu, u64 address, BOOLEAN is_data,
- u64 * pteval, u64 * itir, u64 * iha);
+ u64 * pteval, u64 * itir, u64 * iha,
+ unsigned long efi_attr);
extern IA64FAULT vcpu_tpa(VCPU * vcpu, u64 vadr, u64 * padr);
extern IA64FAULT vcpu_force_inst_miss(VCPU * vcpu, u64 ifa);
extern IA64FAULT vcpu_force_data_miss(VCPU * vcpu, u64 ifa);
@@ -182,7 +183,7 @@ extern BOOLEAN vcpu_timer_expired(VCPU *
extern u64 vcpu_deliverable_interrupts(VCPU * vcpu);
struct p2m_entry;
extern void vcpu_itc_no_srlz(VCPU * vcpu, u64, u64, u64, u64, u64,
- struct p2m_entry *);
+ struct p2m_entry *, unsigned long efi_attr);
extern u64 vcpu_get_tmp(VCPU *, u64);
extern void vcpu_set_tmp(VCPU *, u64, u64);
_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel
|