On Thu, Sep 22, 2011 at 09:17:57AM +0100, Jan Beulich wrote:
> Now that the hypercall interface changes are in -unstable, make the
> kernel side code not ignore the segment (aka domain) number anymore
> (which results in pretty odd behavior on such systems). Rather, if
> only the old interfaces are available, don't call them for devices on
> non-zero segments at all.
>
> The one thing I wasn't able to spot was a use of PHYSDEVOP_restore_msi
> (which would also need to be changed), so if there is some other patch
> in some tree that would be introducing this it ought to get adjusted
> to try using PHYSDEVOP_restore_msi_ext first.
Liang,
Can you apply the same logic to the ACPI S3 patches as what Jan did here please?
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
>
> ---
> arch/x86/pci/xen.c | 22 ++++++++-
> drivers/xen/pci.c | 94
> +++++++++++++++++++++++++++++++++++-----
> include/xen/interface/physdev.h | 34 ++++++++++++++
> 3 files changed, 136 insertions(+), 14 deletions(-)
>
> --- 3.1-rc7/arch/x86/pci/xen.c
> +++ 3.1-rc7-xen-pci-multi-seg/arch/x86/pci/xen.c
> @@ -248,6 +248,8 @@ error:
> }
>
> #ifdef CONFIG_XEN_DOM0
> +static bool __read_mostly pci_seg_supported = true;
> +
> static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int
> type)
> {
> int ret = 0;
> @@ -265,10 +267,11 @@ static int xen_initdom_setup_msi_irqs(st
>
> memset(&map_irq, 0, sizeof(map_irq));
> map_irq.domid = domid;
> - map_irq.type = MAP_PIRQ_TYPE_MSI;
> + map_irq.type = MAP_PIRQ_TYPE_MSI_SEG;
> map_irq.index = -1;
> map_irq.pirq = -1;
> - map_irq.bus = dev->bus->number;
> + map_irq.bus = dev->bus->number |
> + (pci_domain_nr(dev->bus) << 16);
> map_irq.devfn = dev->devfn;
>
> if (type == PCI_CAP_ID_MSIX) {
> @@ -285,7 +288,20 @@ static int xen_initdom_setup_msi_irqs(st
> map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
> }
>
> - ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
> + ret = -EINVAL;
> + if (pci_seg_supported)
> + ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
> + &map_irq);
> + if (ret == -EINVAL && !pci_domain_nr(dev->bus)) {
> + map_irq.type = MAP_PIRQ_TYPE_MSI;
> + map_irq.index = -1;
> + map_irq.pirq = -1;
> + map_irq.bus = dev->bus->number;
> + ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
> + &map_irq);
> + if (ret != -EINVAL)
> + pci_seg_supported = false;
> + }
> if (ret) {
> dev_warn(&dev->dev, "xen map irq failed %d for %d
> domain\n",
> ret, domid);
> --- 3.1-rc7/drivers/xen/pci.c
> +++ 3.1-rc7-xen-pci-multi-seg/drivers/xen/pci.c
> @@ -18,6 +18,7 @@
> */
>
> #include <linux/pci.h>
> +#include <linux/acpi.h>
> #include <xen/xen.h>
> #include <xen/interface/physdev.h>
> #include <xen/interface/xen.h>
> @@ -26,26 +27,85 @@
> #include <asm/xen/hypercall.h>
> #include "../pci/pci.h"
>
> +static bool __read_mostly pci_seg_supported = true;
> +
> static int xen_add_device(struct device *dev)
> {
> int r;
> struct pci_dev *pci_dev = to_pci_dev(dev);
> +#ifdef CONFIG_PCI_IOV
> + struct pci_dev *physfn = pci_dev->physfn;
> +#endif
> +
> + if (pci_seg_supported) {
> + struct physdev_pci_device_add add = {
> + .seg = pci_domain_nr(pci_dev->bus),
> + .bus = pci_dev->bus->number,
> + .devfn = pci_dev->devfn
> + };
> +#ifdef CONFIG_ACPI
> + acpi_handle handle;
> +#endif
>
> #ifdef CONFIG_PCI_IOV
> - if (pci_dev->is_virtfn) {
> + if (pci_dev->is_virtfn) {
> + add.flags = XEN_PCI_DEV_VIRTFN;
> + add.physfn.bus = physfn->bus->number;
> + add.physfn.devfn = physfn->devfn;
> + } else
> +#endif
> + if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn))
> + add.flags = XEN_PCI_DEV_EXTFN;
> +
> +#ifdef CONFIG_ACPI
> + handle = DEVICE_ACPI_HANDLE(&pci_dev->dev);
> + if (!handle)
> + handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge);
> +#ifdef CONFIG_PCI_IOV
> + if (!handle && pci_dev->is_virtfn)
> + handle = DEVICE_ACPI_HANDLE(physfn->bus->bridge);
> +#endif
> + if (handle) {
> + acpi_status status;
> +
> + do {
> + unsigned long long pxm;
> +
> + status = acpi_evaluate_integer(handle, "_PXM",
> + NULL, &pxm);
> + if (ACPI_SUCCESS(status)) {
> + add.optarr[0] = pxm;
> + add.flags |= XEN_PCI_DEV_PXM;
> + break;
> + }
> + status = acpi_get_parent(handle, &handle);
> + } while (ACPI_SUCCESS(status));
> + }
> +#endif /* CONFIG_ACPI */
> +
> + r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_add, &add);
> + if (r != -ENOSYS)
> + return r;
> + pci_seg_supported = false;
> + }
> +
> + if (pci_domain_nr(pci_dev->bus))
> + r = -ENOSYS;
> +#ifdef CONFIG_PCI_IOV
> + else if (pci_dev->is_virtfn) {
> struct physdev_manage_pci_ext manage_pci_ext = {
> .bus = pci_dev->bus->number,
> .devfn = pci_dev->devfn,
> .is_virtfn = 1,
> - .physfn.bus = pci_dev->physfn->bus->number,
> - .physfn.devfn = pci_dev->physfn->devfn,
> + .physfn.bus = physfn->bus->number,
> + .physfn.devfn = physfn->devfn,
> };
>
> r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext,
> &manage_pci_ext);
> - } else
> + }
> #endif
> - if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
> + else if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
> struct physdev_manage_pci_ext manage_pci_ext = {
> .bus = pci_dev->bus->number,
> .devfn = pci_dev->devfn,
> @@ -71,13 +131,27 @@ static int xen_remove_device(struct devi
> {
> int r;
> struct pci_dev *pci_dev = to_pci_dev(dev);
> - struct physdev_manage_pci manage_pci;
>
> - manage_pci.bus = pci_dev->bus->number;
> - manage_pci.devfn = pci_dev->devfn;
> + if (pci_seg_supported) {
> + struct physdev_pci_device device = {
> + .seg = pci_domain_nr(pci_dev->bus),
> + .bus = pci_dev->bus->number,
> + .devfn = pci_dev->devfn
> + };
>
> - r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
> - &manage_pci);
> + r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_remove,
> + &device);
> + } else if (pci_domain_nr(pci_dev->bus))
> + r = -ENOSYS;
> + else {
> + struct physdev_manage_pci manage_pci = {
> + .bus = pci_dev->bus->number,
> + .devfn = pci_dev->devfn
> + };
> +
> + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
> + &manage_pci);
> + }
>
> return r;
> }
> --- 3.1-rc7/include/xen/interface/physdev.h
> +++ 3.1-rc7-xen-pci-multi-seg/include/xen/interface/physdev.h
> @@ -109,6 +109,7 @@ struct physdev_irq {
> #define MAP_PIRQ_TYPE_MSI 0x0
> #define MAP_PIRQ_TYPE_GSI 0x1
> #define MAP_PIRQ_TYPE_UNKNOWN 0x2
> +#define MAP_PIRQ_TYPE_MSI_SEG 0x3
>
> #define PHYSDEVOP_map_pirq 13
> struct physdev_map_pirq {
> @@ -119,7 +120,7 @@ struct physdev_map_pirq {
> int index;
> /* IN or OUT */
> int pirq;
> - /* IN */
> + /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */
> int bus;
> /* IN */
> int devfn;
> @@ -198,6 +199,37 @@ struct physdev_get_free_pirq {
> uint32_t pirq;
> };
>
> +#define XEN_PCI_DEV_EXTFN 0x1
> +#define XEN_PCI_DEV_VIRTFN 0x2
> +#define XEN_PCI_DEV_PXM 0x4
> +
> +#define PHYSDEVOP_pci_device_add 25
> +struct physdev_pci_device_add {
> + /* IN */
> + uint16_t seg;
> + uint8_t bus;
> + uint8_t devfn;
> + uint32_t flags;
> + struct {
> + uint8_t bus;
> + uint8_t devfn;
> + } physfn;
> +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
> + uint32_t optarr[];
> +#elif defined(__GNUC__)
> + uint32_t optarr[0];
> +#endif
> +};
> +
> +#define PHYSDEVOP_pci_device_remove 26
> +#define PHYSDEVOP_restore_msi_ext 27
> +struct physdev_pci_device {
> + /* IN */
> + uint16_t seg;
> + uint8_t bus;
> + uint8_t devfn;
> +};
> +
> /*
> * Notify that some PIRQ-bound event channels have been unmasked.
> * ** This command is obsolete since interface version 0x00030202 and is **
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|