This patch add code to use _CRS for PCI resource allocation.
To use _CRS, please add "pci=use_crs" to dom0 linux boot parameter.
Without this patch, MMIO resource is allocated from e820 gap. But e820
gap is available for only low MMIO area. _CRS reports high MMIO area
as well as low MMIO area. With this patch, we become able to use high
MMIO area.
Most of codes are backported from 2.6.26.
Thanks,
--
Yuji Shimada
Signed-off-by: Yuji Shimada <shimada-yxb@xxxxxxxxxxxxxxx>
diff -r cdc6729dc702 arch/i386/pci/acpi.c
--- a/arch/i386/pci/acpi.c Fri Nov 28 13:41:38 2008 +0000
+++ b/arch/i386/pci/acpi.c Mon Dec 01 19:09:12 2008 +0900
@@ -5,27 +5,228 @@
#include <asm/numa.h>
#include "pci.h"
-struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int
domain, int busnum)
+/* This struct is backported from 2.6.26 kernel */
+struct pci_root_info {
+ char *name;
+ unsigned int res_num;
+ struct resource *res;
+ struct pci_bus *bus;
+ int busnum;
+};
+
+struct pci_sysdata {
+ int domain; /* PCI domain */
+ int node; /* NUMA node */
+};
+
+/* This function is backported from 2.6.26 kernel */
+static acpi_status __devinit
+resource_to_addr(struct acpi_resource *resource,
+ struct acpi_resource_address64 *addr)
+{
+ acpi_status status;
+
+ status = acpi_resource_to_address64(resource, addr);
+ if (ACPI_SUCCESS(status) &&
+ (addr->resource_type == ACPI_MEMORY_RANGE ||
+ addr->resource_type == ACPI_IO_RANGE) &&
+ addr->address_length > 0 &&
+ addr->producer_consumer == ACPI_PRODUCER) {
+ return AE_OK;
+ }
+ return AE_ERROR;
+}
+
+/* This function is backported from 2.6.26 kernel */
+static acpi_status __devinit
+count_resource(struct acpi_resource *acpi_res, void *data)
+{
+ struct pci_root_info *info = data;
+ struct acpi_resource_address64 addr;
+ acpi_status status;
+
+ if (info->res_num >= PCI_BUS_NUM_RESOURCES)
+ return AE_OK;
+
+ status = resource_to_addr(acpi_res, &addr);
+ if (ACPI_SUCCESS(status))
+ info->res_num++;
+
+ return AE_OK;
+}
+
+/* This function is backported from 2.6.26 kernel */
+static acpi_status __devinit
+setup_resource(struct acpi_resource *acpi_res, void *data)
+{
+ struct pci_root_info *info = data;
+ struct resource *res;
+ struct acpi_resource_address64 addr;
+ acpi_status status;
+ unsigned long flags;
+ struct resource *root;
+
+ if (info->res_num >= PCI_BUS_NUM_RESOURCES)
+ return AE_OK;
+
+ status = resource_to_addr(acpi_res, &addr);
+ if (!ACPI_SUCCESS(status)) {
+ return AE_OK;
+ }
+
+ if (addr.resource_type == ACPI_MEMORY_RANGE) {
+ root = &iomem_resource;
+ flags = IORESOURCE_MEM;
+ if (addr.info.mem.caching == ACPI_PREFETCHABLE_MEMORY)
+ flags |= IORESOURCE_PREFETCH;
+ } else if (addr.resource_type == ACPI_IO_RANGE) {
+ root = &ioport_resource;
+ flags = IORESOURCE_IO;
+ } else
+ return AE_OK;
+
+ res = &info->res[info->res_num];
+ res->name = info->name;
+ res->flags = flags;
+ res->start = addr.minimum + addr.translation_offset;
+ res->end = res->start + addr.address_length - 1;
+ res->child = NULL;
+ printk(KERN_DEBUG "PCI: ACPI resource [%llx-%llx:%lx] for %s\n",
+ (unsigned long long)res->start, (unsigned long long)res->end,
+ (unsigned long)res->flags, info->name);
+
+ if (insert_resource(root, res)) {
+ printk(KERN_ERR "PCI: Failed to allocate %llx-%llx from %s"
+ " for %s\n", (unsigned long long)res->start,
+ (unsigned long long)res->end, root->name, info->name);
+ } else {
+ info->bus->resource[info->res_num] = res;
+ info->res_num++;
+ }
+ return AE_OK;
+}
+
+/* This function is backported from 2.6.26 kernel */
+static void __devinit adjust_transparent_bridge_resources(struct pci_bus *bus)
+{
+ struct pci_dev *dev;
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ int i;
+ u16 class = dev->class >> 8;
+
+ if (class == PCI_CLASS_BRIDGE_PCI && dev->transparent) {
+ for(i = 3; i < PCI_BUS_NUM_RESOURCES; i++)
+ dev->subordinate->resource[i] =
+ dev->bus->resource[i - 3];
+ }
+ }
+}
+
+/* This function is backported from 2.6.26 kernel */
+static void __devinit
+get_current_resources(struct acpi_device *device, int busnum,
+ int domain, struct pci_bus *bus)
+{
+ struct pci_root_info info;
+ size_t size;
+
+ info.bus = bus;
+ info.res_num = 0;
+ info.name = kmalloc(16, GFP_KERNEL);
+ if (!info.name)
+ goto res_alloc_fail;
+ sprintf(info.name, "PCI Bus %04x:%02x", domain, busnum);
+
+ acpi_walk_resources(device->handle, METHOD_NAME__CRS,
+ count_resource, &info);
+ if (!info.res_num)
+ return;
+
+ size = sizeof(*info.res) * info.res_num;
+ info.res = kmalloc(size, GFP_KERNEL);
+ if (!info.res) {
+ printk(KERN_ERR "PCI: Failed to allocate resource structure "
+ "for %s\n", info.name);
+ goto name_alloc_fail;
+ }
+
+ info.res_num = 0;
+ acpi_walk_resources(device->handle, METHOD_NAME__CRS,
+ setup_resource, &info);
+ if (info.res_num) {
+ adjust_transparent_bridge_resources(bus);
+ }
+
+ return;
+
+name_alloc_fail:
+ kfree(info.res);
+res_alloc_fail:
+ return;
+}
+
+/* This function is backported from 2.6.26 kernel */
+struct pci_bus * __devinit
+pci_acpi_scan_root(struct acpi_device *device, int domain, int busnum)
{
struct pci_bus *bus;
+ struct pci_sysdata *sd;
+ int node;
+#ifdef CONFIG_ACPI_NUMA
+ int pxm;
+#endif
- if (domain != 0) {
- printk(KERN_WARNING "PCI: Multiple domains not supported\n");
+ node = -1;
+#ifdef CONFIG_ACPI_NUMA
+ pxm = acpi_get_pxm(device->handle);
+ if (pxm >= 0)
+ node = pxm_to_node(pxm);
+#endif
+
+ /* Allocate per-root-bus (not per bus) arch-specific data.
+ * TODO: leak; this memory is never freed.
+ * It's arguable whether it's worth the trouble to care.
+ */
+ sd = kzalloc(sizeof(*sd), GFP_KERNEL);
+ if (!sd) {
+ printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum);
return NULL;
}
- bus = pcibios_scan_root(busnum);
+ sd->domain = domain;
+ sd->node = node;
+ /*
+ * Maybe the desired pci bus has been already scanned. In such case
+ * it is unnecessary to scan the pci bus with the given domain,busnum.
+ */
+ bus = pci_find_bus(domain, busnum);
+ if (bus) {
+ /*
+ * If the desired bus exits, the content of bus->sysdata will
+ * be replaced by sd.
+ */
+ memcpy(bus->sysdata, sd, sizeof(*sd));
+ kfree(sd);
+ } else
+ bus = pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd);
+
+ if (!bus)
+ kfree(sd);
+
#ifdef CONFIG_ACPI_NUMA
- if (bus != NULL) {
- int pxm = acpi_get_pxm(device->handle);
+ if (bus) {
if (pxm >= 0) {
- bus->sysdata = (void *)(unsigned long)pxm_to_node(pxm);
- printk("bus %d -> pxm %d -> node %ld\n",
- busnum, pxm, (long)(bus->sysdata));
+ printk(KERN_DEBUG "bus %02x -> pxm %d -> node %d\n",
+ busnum, pxm, pxm_to_node(pxm));
}
}
#endif
-
+
+ if (bus && (pci_probe & PCI_USE__CRS)) {
+ get_current_resources(device, busnum, domain, bus);
+ }
+
return bus;
}
diff -r cdc6729dc702 arch/i386/pci/common.c
--- a/arch/i386/pci/common.c Fri Nov 28 13:41:38 2008 +0000
+++ b/arch/i386/pci/common.c Mon Dec 01 19:09:12 2008 +0900
@@ -260,6 +260,9 @@ char * __devinit pcibios_setup(char *st
} else if (!strcmp(str, "assign-busses")) {
pci_probe |= PCI_ASSIGN_ALL_BUSSES;
return NULL;
+ } else if (!strcmp(str, "use_crs")) {
+ pci_probe |= PCI_USE__CRS;
+ return NULL;
} else if (!strcmp(str, "routeirq")) {
pci_routeirq = 1;
return NULL;
diff -r cdc6729dc702 arch/i386/pci/pci.h
--- a/arch/i386/pci/pci.h Fri Nov 28 13:41:38 2008 +0000
+++ b/arch/i386/pci/pci.h Mon Dec 01 19:09:12 2008 +0900
@@ -25,6 +25,7 @@
#define PCI_ASSIGN_ROMS 0x1000
#define PCI_BIOS_IRQ_SCAN 0x2000
#define PCI_ASSIGN_ALL_BUSSES 0x4000
+#define PCI_USE__CRS 0x10000
extern unsigned int pci_probe;
extern unsigned long pirq_table_addr;
diff -r cdc6729dc702 include/asm-i386/pci.h
--- a/include/asm-i386/pci.h Fri Nov 28 13:41:38 2008 +0000
+++ b/include/asm-i386/pci.h Mon Dec 01 19:09:12 2008 +0900
@@ -4,6 +4,22 @@
#ifdef __KERNEL__
#include <linux/mm.h> /* for struct page */
+
+struct pci_sysdata {
+ int domain; /* PCI domain */
+ int node; /* NUMA node */
+};
+
+static inline int pci_domain_nr(struct pci_bus *bus)
+{
+ struct pci_sysdata *sd = bus->sysdata;
+ return sd->domain;
+}
+
+static inline int pci_proc_domain(struct pci_bus *bus)
+{
+ return pci_domain_nr(bus);
+}
/* Can be used to override the logic in pci_scan_bus for skipping
already-configured bus numbers - to be used for buggy BIOSes
@@ -116,4 +132,14 @@ static inline void pci_dma_burst_advice(
/* generic pci stuff */
#include <asm-generic/pci.h>
+#ifdef CONFIG_NUMA
+/* Returns the node based on pci bus */
+static inline int __pcibus_to_node(struct pci_bus *bus)
+{
+ struct pci_sysdata *sd = bus->sysdata;
+
+ return sd->node;
+}
+#endif
+
#endif /* __i386_PCI_H */
diff -r cdc6729dc702 include/asm-i386/topology.h
--- a/include/asm-i386/topology.h Fri Nov 28 13:41:38 2008 +0000
+++ b/include/asm-i386/topology.h Mon Dec 01 19:09:12 2008 +0900
@@ -67,7 +67,7 @@ static inline int node_to_first_cpu(int
return first_cpu(mask);
}
-#define pcibus_to_node(bus) ((long) (bus)->sysdata)
+#define pcibus_to_node(bus) __pcibus_to_node(bus)
#define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus))
/* sched_domains SD_NODE_INIT for NUMAQ machines */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|