WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 1/3] dom0 linux: Use _CRS for PCI resource allocation

To: Keir Fraser <keir.fraser@xxxxxxxxxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH 1/3] dom0 linux: Use _CRS for PCI resource allocation.
From: Yuji Shimada <shimada-yxb@xxxxxxxxxxxxxxx>
Date: Tue, 02 Dec 2008 15:35:13 +0900
Cc:
Delivery-date: Mon, 01 Dec 2008 22:35:41 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <20081202152033.08A3.SHIMADA-YXB@xxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <20081202152033.08A3.SHIMADA-YXB@xxxxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
This patch add code to use _CRS for PCI resource allocation.
To use _CRS, please add "pci=use_crs" to dom0 linux boot parameter.


Without this patch, MMIO resource is allocated from e820 gap. But e820
gap is available for only low MMIO area. _CRS reports high MMIO area
as well as low MMIO area. With this patch, we become able to use high
MMIO area.


Most of codes are backported from 2.6.26.

Thanks,
--
Yuji Shimada


Signed-off-by: Yuji Shimada <shimada-yxb@xxxxxxxxxxxxxxx>

diff -r cdc6729dc702 arch/i386/pci/acpi.c
--- a/arch/i386/pci/acpi.c      Fri Nov 28 13:41:38 2008 +0000
+++ b/arch/i386/pci/acpi.c      Mon Dec 01 19:09:12 2008 +0900
@@ -5,27 +5,228 @@
 #include <asm/numa.h>
 #include "pci.h"
 
-struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int 
domain, int busnum)
+/* This struct is backported from 2.6.26 kernel */
+struct pci_root_info {
+       char *name;
+       unsigned int res_num;
+       struct resource *res;
+       struct pci_bus *bus;
+       int busnum;
+};
+
+struct pci_sysdata {
+       int     domain;         /* PCI domain */
+       int     node;           /* NUMA node */
+};
+
+/* This function is backported from 2.6.26 kernel */
+static acpi_status __devinit
+resource_to_addr(struct acpi_resource *resource,
+                       struct acpi_resource_address64 *addr)
+{
+       acpi_status status;
+
+       status = acpi_resource_to_address64(resource, addr);
+       if (ACPI_SUCCESS(status) &&
+           (addr->resource_type == ACPI_MEMORY_RANGE ||
+           addr->resource_type == ACPI_IO_RANGE) &&
+           addr->address_length > 0 &&
+           addr->producer_consumer == ACPI_PRODUCER) {
+               return AE_OK;
+       }
+       return AE_ERROR;
+}
+
+/* This function is backported from 2.6.26 kernel */
+static acpi_status __devinit
+count_resource(struct acpi_resource *acpi_res, void *data)
+{
+       struct pci_root_info *info = data;
+       struct acpi_resource_address64 addr;
+       acpi_status status;
+
+       if (info->res_num >= PCI_BUS_NUM_RESOURCES)
+               return AE_OK;
+
+       status = resource_to_addr(acpi_res, &addr);
+       if (ACPI_SUCCESS(status))
+               info->res_num++;
+
+       return AE_OK;
+}
+
+/* This function is backported from 2.6.26 kernel */
+static acpi_status __devinit
+setup_resource(struct acpi_resource *acpi_res, void *data)
+{
+       struct pci_root_info *info = data;
+       struct resource *res;
+       struct acpi_resource_address64 addr;
+       acpi_status status;
+       unsigned long flags;
+       struct resource *root;
+
+       if (info->res_num >= PCI_BUS_NUM_RESOURCES)
+               return AE_OK;
+
+       status = resource_to_addr(acpi_res, &addr);
+       if (!ACPI_SUCCESS(status)) {
+               return AE_OK;
+       }
+
+       if (addr.resource_type == ACPI_MEMORY_RANGE) {
+               root = &iomem_resource;
+               flags = IORESOURCE_MEM;
+               if (addr.info.mem.caching == ACPI_PREFETCHABLE_MEMORY)
+                       flags |= IORESOURCE_PREFETCH;
+       } else if (addr.resource_type == ACPI_IO_RANGE) {
+               root = &ioport_resource;
+               flags = IORESOURCE_IO;
+       } else
+               return AE_OK;
+
+       res = &info->res[info->res_num];
+       res->name = info->name;
+       res->flags = flags;
+       res->start = addr.minimum + addr.translation_offset;
+       res->end = res->start + addr.address_length - 1;
+       res->child = NULL;
+       printk(KERN_DEBUG "PCI: ACPI resource [%llx-%llx:%lx] for %s\n",
+               (unsigned long long)res->start, (unsigned long long)res->end,
+               (unsigned long)res->flags, info->name);
+
+       if (insert_resource(root, res)) {
+               printk(KERN_ERR "PCI: Failed to allocate %llx-%llx from %s"
+                       " for %s\n", (unsigned long long)res->start,
+                       (unsigned long long)res->end, root->name, info->name);
+       } else {
+               info->bus->resource[info->res_num] = res;
+               info->res_num++;
+       }
+       return AE_OK;
+}
+
+/* This function is backported from 2.6.26 kernel */
+static void __devinit adjust_transparent_bridge_resources(struct pci_bus *bus)
+{
+       struct pci_dev *dev;
+
+       list_for_each_entry(dev, &bus->devices, bus_list) {
+               int i;
+               u16 class = dev->class >> 8;
+
+               if (class == PCI_CLASS_BRIDGE_PCI && dev->transparent) {
+                       for(i = 3; i < PCI_BUS_NUM_RESOURCES; i++)
+                               dev->subordinate->resource[i] =
+                                               dev->bus->resource[i - 3];
+               }
+       }
+}
+
+/* This function is backported from 2.6.26 kernel */
+static void __devinit
+get_current_resources(struct acpi_device *device, int busnum,
+                       int domain, struct pci_bus *bus)
+{
+       struct pci_root_info info;
+       size_t size;
+
+       info.bus = bus;
+       info.res_num = 0;
+       info.name = kmalloc(16, GFP_KERNEL);
+       if (!info.name)
+               goto res_alloc_fail;
+       sprintf(info.name, "PCI Bus %04x:%02x", domain, busnum);
+
+       acpi_walk_resources(device->handle, METHOD_NAME__CRS, 
+                               count_resource, &info);
+       if (!info.res_num)
+               return;
+
+       size = sizeof(*info.res) * info.res_num;
+       info.res = kmalloc(size, GFP_KERNEL);
+       if (!info.res) {
+               printk(KERN_ERR "PCI: Failed to allocate resource structure "
+                               "for %s\n", info.name);
+               goto name_alloc_fail;
+       }
+
+       info.res_num = 0;
+       acpi_walk_resources(device->handle, METHOD_NAME__CRS,
+                               setup_resource, &info);
+       if (info.res_num) {
+               adjust_transparent_bridge_resources(bus);
+       }
+
+       return;
+
+name_alloc_fail:
+       kfree(info.res);
+res_alloc_fail:
+       return;
+}
+
+/* This function is backported from 2.6.26 kernel */
+struct pci_bus * __devinit 
+pci_acpi_scan_root(struct acpi_device *device, int domain, int busnum)
 {
        struct pci_bus *bus;
+       struct pci_sysdata *sd;
+       int node;
+#ifdef CONFIG_ACPI_NUMA
+       int pxm;
+#endif
 
-       if (domain != 0) {
-               printk(KERN_WARNING "PCI: Multiple domains not supported\n");
+       node = -1;
+#ifdef CONFIG_ACPI_NUMA
+       pxm = acpi_get_pxm(device->handle);
+       if (pxm >= 0)
+               node = pxm_to_node(pxm);
+#endif
+
+       /* Allocate per-root-bus (not per bus) arch-specific data.
+        * TODO: leak; this memory is never freed.
+        * It's arguable whether it's worth the trouble to care.
+        */
+       sd = kzalloc(sizeof(*sd), GFP_KERNEL);
+       if (!sd) {
+               printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum);
                return NULL;
        }
 
-       bus = pcibios_scan_root(busnum);
+       sd->domain = domain;
+       sd->node = node;
+       /*
+        * Maybe the desired pci bus has been already scanned. In such case
+        * it is unnecessary to scan the pci bus with the given domain,busnum.
+        */
+       bus = pci_find_bus(domain, busnum);
+       if (bus) {
+               /*
+                * If the desired bus exits, the content of bus->sysdata will
+                * be replaced by sd.
+                */
+               memcpy(bus->sysdata, sd, sizeof(*sd));
+               kfree(sd);
+       } else
+               bus = pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd);
+
+       if (!bus)
+               kfree(sd);
+
 #ifdef CONFIG_ACPI_NUMA
-       if (bus != NULL) {
-               int pxm = acpi_get_pxm(device->handle);
+       if (bus) {
                if (pxm >= 0) {
-                       bus->sysdata = (void *)(unsigned long)pxm_to_node(pxm);
-                       printk("bus %d -> pxm %d -> node %ld\n",
-                               busnum, pxm, (long)(bus->sysdata));
+                       printk(KERN_DEBUG "bus %02x -> pxm %d -> node %d\n",
+                               busnum, pxm, pxm_to_node(pxm));
                }
        }
 #endif
-       
+
+       if (bus && (pci_probe & PCI_USE__CRS)) {
+               get_current_resources(device, busnum, domain, bus);
+       }
+
        return bus;
 }
 
diff -r cdc6729dc702 arch/i386/pci/common.c
--- a/arch/i386/pci/common.c    Fri Nov 28 13:41:38 2008 +0000
+++ b/arch/i386/pci/common.c    Mon Dec 01 19:09:12 2008 +0900
@@ -260,6 +260,9 @@ char * __devinit  pcibios_setup(char *st
        } else if (!strcmp(str, "assign-busses")) {
                pci_probe |= PCI_ASSIGN_ALL_BUSSES;
                return NULL;
+       } else if (!strcmp(str, "use_crs")) {
+               pci_probe |= PCI_USE__CRS;
+               return NULL;
        } else if (!strcmp(str, "routeirq")) {
                pci_routeirq = 1;
                return NULL;
diff -r cdc6729dc702 arch/i386/pci/pci.h
--- a/arch/i386/pci/pci.h       Fri Nov 28 13:41:38 2008 +0000
+++ b/arch/i386/pci/pci.h       Mon Dec 01 19:09:12 2008 +0900
@@ -25,6 +25,7 @@
 #define PCI_ASSIGN_ROMS                0x1000
 #define PCI_BIOS_IRQ_SCAN      0x2000
 #define PCI_ASSIGN_ALL_BUSSES  0x4000
+#define PCI_USE__CRS           0x10000
 
 extern unsigned int pci_probe;
 extern unsigned long pirq_table_addr;
diff -r cdc6729dc702 include/asm-i386/pci.h
--- a/include/asm-i386/pci.h    Fri Nov 28 13:41:38 2008 +0000
+++ b/include/asm-i386/pci.h    Mon Dec 01 19:09:12 2008 +0900
@@ -4,6 +4,22 @@
 
 #ifdef __KERNEL__
 #include <linux/mm.h>          /* for struct page */
+
+struct pci_sysdata {
+       int     domain;         /* PCI domain */
+       int     node;           /* NUMA node */
+};
+
+static inline int pci_domain_nr(struct pci_bus *bus)
+{
+       struct pci_sysdata *sd = bus->sysdata;
+       return sd->domain;
+}
+
+static inline int pci_proc_domain(struct pci_bus *bus)
+{
+       return pci_domain_nr(bus);
+}
 
 /* Can be used to override the logic in pci_scan_bus for skipping
    already-configured bus numbers - to be used for buggy BIOSes
@@ -116,4 +132,14 @@ static inline void pci_dma_burst_advice(
 /* generic pci stuff */
 #include <asm-generic/pci.h>
 
+#ifdef CONFIG_NUMA
+/* Returns the node based on pci bus */
+static inline int __pcibus_to_node(struct pci_bus *bus)
+{
+       struct pci_sysdata *sd = bus->sysdata;
+
+       return sd->node;
+}
+#endif
+
 #endif /* __i386_PCI_H */
diff -r cdc6729dc702 include/asm-i386/topology.h
--- a/include/asm-i386/topology.h       Fri Nov 28 13:41:38 2008 +0000
+++ b/include/asm-i386/topology.h       Mon Dec 01 19:09:12 2008 +0900
@@ -67,7 +67,7 @@ static inline int node_to_first_cpu(int 
        return first_cpu(mask);
 }
 
-#define pcibus_to_node(bus) ((long) (bus)->sysdata)
+#define pcibus_to_node(bus) __pcibus_to_node(bus)
 #define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus))
 
 /* sched_domains SD_NODE_INIT for NUMAQ machines */


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel