On Wed, Oct 03, 2007 at 11:17:58AM -0600, Alex Williamson wrote:
> On Thu, 2007-09-27 at 17:17 +0900, Simon Horman wrote:
> > plain text document attachment (ia64-xen-kexec-save-acpi.patch)
> > Xen mangles the MADT tables on boot up. But the pristine tables are needed
> > on kexec. So save the tables and restore them on kexec.
> >
> > Note that this saves all the tables. A trimmed down save could
> > be done if prefered.
>
> Hi Simon,
>
> I made an attempt to re-write this one. The most obvious reason for
> virtualizing the id/eid is for indexing into the domain vcpu array.
> Even if we made use of the real id/eid for that lookup, there are a
> couple other places where we're disabling bits of ACPI for dom0. So, it
> seems like we're stuck with backing up tables before dom0 is built and
> restoring them on kexec. I took a little bit more surgical approach in
> the implementation below (only backing up tables we mangle) and kept the
> changes in architecture specific code. This is a little more space
> efficient, but does require that we explicitly backup tables before we
> mangle them. Let me know what you think (or if it works - I can't seem
> to get a kexec to happen, no console output after machine_shutdown).
> Thanks,
Hi Alex,
if you prefer this approach then its fine by me. I can confirm that
it works - that is, with it my RX2620 ends up with 2 cpus in the second
kernel, and without it, it doesn't.
The reason that your kexec is locking up is that its getting stuck
on the call to PA() in purgatory, which is supplied by kexec-tools.
The reason for this is that it assumes that the following is valid:
pa = va - PAGE_OFFSET
where: PAGE_OFFSET = 0xe000000000000000UL;
I'm sorry that I overlooked this when I suggested that Kexec
should work with the patches you suggested and subsequently applied.
I missed this one :-(
This code is appauling (IMHO) but its there, and it effects
Linux compatibility.
There are several fixes for this, and the ultimately the choice is
likely to rely on the choice of solution for the EFI mapping problem -
as its EFI addresses that PA() operates on.
I imagine that if you just hack PAGE_OFFSET in
purgatory/arch/ia64/purgatory-ia64.c of the kexec-tools-tree such that
it is 0xf000000000000000UL, then the problem will (tempoarily) go away.
I can verify that if you like. Though it will break kexec from Linux.
Another (tempoary) solution is to simply apply the patches that you
skipped in my series. When I say apply, I mean for the purposes of
testing, not merge into the tree. This works because EFI virtual
addresses do end up satisfying the equation above.
Specifically I am talking about the following patches. Which can all be
found at
http://www.vergenet.net/linux/kexec/ia64-xen/20070927/broken_out/linux-xen/
I believe that they will apply to the current xen-ia64-unstable tree
unmodified.
EFI_OFFSET.patch
ia64_do_page_fault-efi-identity-map.patch
efi_enter_virtual_mode.patch
alt_dtlb_miss-efi.patch
I think that the best way forward from here is to look at a solution
for the EFI mapping problem. I will try and find time to investigate
Yamahata-san's RID idea ASAP. If we can get a solution for that problem
in place, then the way to deal with the PA() in purgatory problem
should be come obvious, and if Yamahata-san's idea works and
we can map EFI at 0xe000000000000000UL, then the problem will go away.
>
> Alex
>
>
> Signed-off-by: Alex Williamson <alex.williamson@xxxxxx>
Acked-by: Simon Horman <horms@xxxxxxxxxxxx>
> --
>
> diff -r 3165e43ce734 xen/arch/ia64/xen/dom_fw_dom0.c
> --- a/xen/arch/ia64/xen/dom_fw_dom0.c Tue Oct 02 11:31:55 2007 -0600
> +++ b/xen/arch/ia64/xen/dom_fw_dom0.c Wed Oct 03 10:54:50 2007 -0600
> @@ -28,6 +28,7 @@
> #include <xen/acpi.h>
> #include <xen/errno.h>
> #include <xen/sched.h>
> +#include <xen/list.h>
>
> #include <asm/dom_fw.h>
> #include <asm/dom_fw_common.h>
> @@ -35,6 +36,15 @@
> #include <asm/dom_fw_utils.h>
>
> #include <linux/sort.h>
> +
> +struct acpi_backup_table_entry {
> + struct list_head list;
> + unsigned long pa;
> + unsigned long size;
> + unsigned char data[0];
> +};
> +
> +static LIST_HEAD(acpi_backup_table_list);
>
> static u32 lsapic_nbr;
>
> @@ -100,11 +110,67 @@ acpi_update_madt_checksum(unsigned long
> return 0;
> }
>
> +static int __init
> +acpi_backup_table(unsigned long phys_addr, unsigned long size)
> +{
> + struct acpi_backup_table_entry *entry;
> + void *vaddr = __va(phys_addr);
> +
> + if (!phys_addr || !size)
> + return -EINVAL;
> +
> + entry = xmalloc_bytes(sizeof(*entry) + size);
> + if (!entry) {
> + dprintk(XENLOG_WARNING, "Failed to allocate memory for "
> + "%.4s table backup\n",
> + ((struct acpi_table_header *)vaddr)->signature);
> + return -ENOMEM;
> + }
> +
> + entry->pa = phys_addr;
> + entry->size = size;
> +
> + memcpy(entry->data, vaddr, size);
> +
> + list_add(&entry->list, &acpi_backup_table_list);
> +
> + printk(XENLOG_INFO "Backup %.4s table stored @0x%p\n",
> + ((struct acpi_table_header *)entry->data)->signature,
> + entry->data);
> +
> + return 0;
> +}
> +
> +void
> +acpi_restore_tables()
> +{
> + struct acpi_backup_table_entry *entry;
> +
> + list_for_each_entry(entry, &acpi_backup_table_list, list) {
> + printk(XENLOG_INFO "Restoring backup %.4s table @0x%p\n",
> + ((struct acpi_table_header *)entry->data)->signature,
> + entry->data);
> +
> + memcpy(__va(entry->pa), entry->data, entry->size);
> + /* Only called from kexec path, no need to free entries */
> + }
> +}
> +
> /* base is physical address of acpi table */
> static void __init touch_acpi_table(void)
> {
> int result;
> lsapic_nbr = 0;
> +
> + /*
> + * Modify dom0 MADT:
> + * - Disable CPUs that would exceed max vCPUs for the domain
> + * - Virtualize id/eid for indexing into domain vCPU array
> + * - Hide CPEI interrupt source
> + *
> + * ACPI tables must be backed-up before modification!
> + */
> + acpi_table_parse(ACPI_APIC, acpi_backup_table);
>
> if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_update_lsapic, 0) < 0)
> printk("Error parsing MADT - no LAPIC entries\n");
> @@ -112,6 +178,17 @@ static void __init touch_acpi_table(void
> acpi_patch_plat_int_src, 0) < 0)
> printk("Error parsing MADT - no PLAT_INT_SRC entries\n");
>
> + acpi_table_parse(ACPI_APIC, acpi_update_madt_checksum);
> +
> + /*
> + * SRAT & SLIT tables aren't useful for Dom0 until
> + * we support more NUMA configuration information in Xen.
> + *
> + * NB - backup ACPI tables first.
> + */
> + acpi_table_parse(ACPI_SRAT, acpi_backup_table);
> + acpi_table_parse(ACPI_SLIT, acpi_backup_table);
> +
> result = acpi_table_disable(ACPI_SRAT);
> if ( result == 0 )
> printk("Success Disabling SRAT\n");
> @@ -124,8 +201,6 @@ static void __init touch_acpi_table(void
> else if ( result != -ENOENT )
> printk("ERROR: Failed Disabling SLIT\n");
>
> - acpi_table_parse(ACPI_APIC, acpi_update_madt_checksum);
> -
> return;
> }
>
> @@ -133,9 +208,9 @@ void __init efi_systable_init_dom0(struc
> {
> int i = 1;
>
> + touch_acpi_table();
> +
> /* Write messages to the console. */
> - touch_acpi_table();
> -
> printk("Domain0 EFI passthrough:");
> if (efi.mps) {
> tables->efi_tables[i].guid = MPS_TABLE_GUID;
> diff -r 3165e43ce734 xen/arch/ia64/xen/machine_kexec.c
> --- a/xen/arch/ia64/xen/machine_kexec.c Tue Oct 02 11:31:55 2007 -0600
> +++ b/xen/arch/ia64/xen/machine_kexec.c Tue Oct 02 15:41:59 2007 -0600
> @@ -23,6 +23,7 @@
> #include <linux/cpu.h>
> #include <linux/cpu.h>
> #include <linux/notifier.h>
> +#include <asm/dom_fw_dom0.h>
>
> typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
> unsigned long indirection_page,
> @@ -149,6 +150,7 @@ static void machine_shutdown(void)
> }
> #endif
> kexec_disable_iosapic();
> + acpi_restore_tables();
> }
>
> void machine_kexec(xen_kexec_image_t *image)
> diff -r 3165e43ce734 xen/include/asm-ia64/dom_fw_dom0.h
> --- a/xen/include/asm-ia64/dom_fw_dom0.h Tue Oct 02 11:31:55 2007 -0600
> +++ b/xen/include/asm-ia64/dom_fw_dom0.h Tue Oct 02 15:41:32 2007 -0600
> @@ -26,6 +26,7 @@ struct domain;
>
> void efi_systable_init_dom0(struct fw_tables *tables);
> int complete_dom0_memmap(struct domain *d, struct fw_tables *tables);
> +void acpi_restore_tables(void);
>
> #endif /* __ASM_IA64_DOM_FW_DOM0_H__ */
> /*
>
--
Horms
H: http://www.vergenet.net/~horms/
W: http://www.valinux.co.jp/en/
_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel
|