On Tue, Nov 8, 2011 at 12:56, Stefano Stabellini
<stefano.stabellini@xxxxxxxxxxxxx> wrote:
> On Fri, 28 Oct 2011, Anthony PERARD wrote:
>> From: Allen Kay <allen.m.kay@xxxxxxxxx>
>>
>> Signed-off-by: Allen Kay <allen.m.kay@xxxxxxxxx>
>> Signed-off-by: Guy Zana <guy@xxxxxxxxxxxx>
>> Signed-off-by: Anthony PERARD <anthony.perard@xxxxxxxxxx>
>> ---
>> Makefile.target | 2 +
>> hw/xen_pci_passthrough.c | 838
>> ++++++++++++++++++++++++++++++++++++++
>> hw/xen_pci_passthrough.h | 223 ++++++++++
>> hw/xen_pci_passthrough_helpers.c | 46 ++
>> 4 files changed, 1109 insertions(+), 0 deletions(-)
>> create mode 100644 hw/xen_pci_passthrough.c
>> create mode 100644 hw/xen_pci_passthrough.h
>> create mode 100644 hw/xen_pci_passthrough_helpers.c
>>
>> diff --git a/Makefile.target b/Makefile.target
>> index 243f9f2..36ea47d 100644
>> --- a/Makefile.target
>> +++ b/Makefile.target
>> @@ -217,6 +217,8 @@ obj-i386-$(CONFIG_XEN) += xen_platform.o
>>
>> # Xen PCI Passthrough
>> obj-i386-$(CONFIG_XEN_PCI_PASSTHROUGH) += host-pci-device.o
>> +obj-i386-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen_pci_passthrough.o
>> +obj-i386-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen_pci_passthrough_helpers.o
>>
>> # Inter-VM PCI shared memory
>> CONFIG_IVSHMEM =
>> diff --git a/hw/xen_pci_passthrough.c b/hw/xen_pci_passthrough.c
>> new file mode 100644
>> index 0000000..b97c5b6
>> --- /dev/null
>> +++ b/hw/xen_pci_passthrough.c
>> @@ -0,0 +1,838 @@
>> +/*
>> + * Copyright (c) 2007, Neocleus Corporation.
>> + * Copyright (c) 2007, Intel Corporation.
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2. See
>> + * the COPYING file in the top-level directory.
>> + *
>> + * Alex Novik <alex@xxxxxxxxxxxx>
>> + * Allen Kay <allen.m.kay@xxxxxxxxx>
>> + * Guy Zana <guy@xxxxxxxxxxxx>
>> + *
>> + * This file implements direct PCI assignment to a HVM guest
>> + */
>> +
>> +/*
>> + * Interrupt Disable policy:
>> + *
>> + * INTx interrupt:
>> + * Initialize(register_real_device)
>> + * Map INTx(xc_physdev_map_pirq):
>> + * <fail>
>> + * - Set real Interrupt Disable bit to '1'.
>> + * - Set machine_irq and assigned_device->machine_irq to '0'.
>> + * * Don't bind INTx.
>> + *
>> + * Bind INTx(xc_domain_bind_pt_pci_irq):
>> + * <fail>
>> + * - Set real Interrupt Disable bit to '1'.
>> + * - Unmap INTx.
>> + * - Decrement mapped_machine_irq[machine_irq]
>> + * - Set assigned_device->machine_irq to '0'.
>> + *
>> + * Write to Interrupt Disable bit by guest software(pt_cmd_reg_write)
>> + * Write '0'
>> + * <ptdev->msi_trans_en is false>
>> + * - Set real bit to '0' if assigned_device->machine_irq isn't '0'.
>> + *
>> + * Write '1'
>> + * <ptdev->msi_trans_en is false>
>> + * - Set real bit to '1'.
>> + *
>> + * MSI-INTx translation.
>> + * Initialize(xc_physdev_map_pirq_msi/pt_msi_setup)
>> + * Bind MSI-INTx(xc_domain_bind_pt_irq)
>> + * <fail>
>> + * - Unmap MSI.
>> + * <success>
>> + * - Set dev->msi->pirq to '-1'.
>> + * <fail>
>> + * - Do nothing.
>> + *
>> + * Write to Interrupt Disable bit by guest software(pt_cmd_reg_write)
>> + * Write '0'
>> + * <ptdev->msi_trans_en is true>
>> + * - Set MSI Enable bit to '1'.
>> + *
>> + * Write '1'
>> + * <ptdev->msi_trans_en is true>
>> + * - Set MSI Enable bit to '0'.
>> + *
>> + * MSI interrupt:
>> + * Initialize MSI register(pt_msi_setup, pt_msi_update)
>> + * Bind MSI(xc_domain_update_msi_irq)
>> + * <fail>
>> + * - Unmap MSI.
>> + * - Set dev->msi->pirq to '-1'.
>> + *
>> + * MSI-X interrupt:
>> + * Initialize MSI-X register(pt_msix_update_one)
>> + * Bind MSI-X(xc_domain_update_msi_irq)
>> + * <fail>
>> + * - Unmap MSI-X.
>> + * - Set entry->pirq to '-1'.
>> + */
>> +
>
> you should move all the MSI related comments to the MSI patch
OK, I will move MSI comments.
>> +#include <sys/ioctl.h>
>> +
>> +#include "pci.h"
>> +#include "xen.h"
>> +#include "xen_backend.h"
>> +#include "xen_pci_passthrough.h"
>> +
>> +#define PCI_BAR_ENTRIES (6)
>> +
>> +#define PT_NR_IRQS (256)
>> +char mapped_machine_irq[PT_NR_IRQS] = {0};
>> +
>> +/* Config Space */
>> +static int pt_pci_config_access_check(PCIDevice *d, uint32_t address, int
>> len)
>> +{
>> + /* check offset range */
>> + if (address >= 0xFF) {
>> + PT_LOG("Error: Failed to access register with offset exceeding FFh.
>> "
>> + "[%02x:%02x.%x][Offset:%02xh][Length:%d]\n",
>> + pci_bus_num(d->bus), PCI_SLOT(d->devfn), PCI_FUNC(d->devfn),
>> + address, len);
>> + return -1;
>> + }
>> +
>> + /* check read size */
>> + if ((len != 1) && (len != 2) && (len != 4)) {
>> + PT_LOG("Error: Failed to access register with invalid access
>> length. "
>> + "[%02x:%02x.%x][Offset:%02xh][Length:%d]\n",
>> + pci_bus_num(d->bus), PCI_SLOT(d->devfn), PCI_FUNC(d->devfn),
>> + address, len);
>> + return -1;
>> + }
>> +
>> + /* check offset alignment */
>> + if (address & (len - 1)) {
>> + PT_LOG("Error: Failed to access register with invalid access size "
>> + "alignment. [%02x:%02x.%x][Offset:%02xh][Length:%d]\n",
>> + pci_bus_num(d->bus), PCI_SLOT(d->devfn), PCI_FUNC(d->devfn),
>> + address, len);
>> + return -1;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +int pt_bar_offset_to_index(uint32_t offset)
>> +{
>> + int index = 0;
>> +
>> + /* check Exp ROM BAR */
>> + if (offset == PCI_ROM_ADDRESS) {
>> + return PCI_ROM_SLOT;
>> + }
>> +
>> + /* calculate BAR index */
>> + index = (offset - PCI_BASE_ADDRESS_0) >> 2;
>> + if (index >= PCI_NUM_REGIONS) {
>> + return -1;
>> + }
>> +
>> + return index;
>> +}
>> +
>> +static uint32_t pt_pci_read_config(PCIDevice *d, uint32_t address, int len)
>> +{
>> + XenPCIPassthroughState *s = DO_UPCAST(XenPCIPassthroughState, dev, d);
>> + uint32_t val = 0;
>> + XenPTRegGroup *reg_grp_entry = NULL;
>> + XenPTReg *reg_entry = NULL;
>> + int rc = 0;
>> + int emul_len = 0;
>> + uint32_t find_addr = address;
>> +
>> + if (pt_pci_config_access_check(d, address, len)) {
>> + goto exit;
>> + }
>> +
>> + /* check power state transition flags */
>> + if (s->pm_state != NULL && s->pm_state->flags & PT_FLAG_TRANSITING) {
>> + /* can't accept until previous power state transition is completed.
>> + * so finished previous request here.
>> + */
>> + PT_LOG("Warning: guest want to write durring power state
>> transition\n");
>> + goto exit;
>> + }
>> +
>> + /* find register group entry */
>> + reg_grp_entry = pt_find_reg_grp(s, address);
>> + if (reg_grp_entry) {
>> + /* check 0 Hardwired register group */
>> + if (reg_grp_entry->reg_grp->grp_type == GRP_TYPE_HARDWIRED) {
>> + /* no need to emulate, just return 0 */
>> + val = 0;
>> + goto exit;
>> + }
>> + }
>> +
>> + /* read I/O device register value */
>> + rc = host_pci_get_block(s->real_device, address, (uint8_t *)&val, len);
>> + if (!rc) {
>> + PT_LOG("Error: pci_read_block failed. return value[%d].\n", rc);
>> + memset(&val, 0xff, len);
>> + }
>> +
>> + /* just return the I/O device register value for
>> + * passthrough type register group */
>> + if (reg_grp_entry == NULL) {
>> + goto exit;
>> + }
>> +
>> + /* adjust the read value to appropriate CFC-CFF window */
>> + val <<= (address & 3) << 3;
>> + emul_len = len;
>> +
>> + /* loop Guest request size */
>> + while (emul_len > 0) {
>> + /* find register entry to be emulated */
>> + reg_entry = pt_find_reg(reg_grp_entry, find_addr);
>> + if (reg_entry) {
>> + XenPTRegInfo *reg = reg_entry->reg;
>> + uint32_t real_offset = reg_grp_entry->base_offset + reg->offset;
>> + uint32_t valid_mask = 0xFFFFFFFF >> ((4 - emul_len) << 3);
>> + uint8_t *ptr_val = NULL;
>> +
>> + valid_mask <<= (find_addr - real_offset) << 3;
>> + ptr_val = (uint8_t *)&val + (real_offset & 3);
>> +
>> + /* do emulation depend on register size */
>> + switch (reg->size) {
>> + case 1:
>> + if (reg->u.b.read) {
>> + rc = reg->u.b.read(s, reg_entry, ptr_val, valid_mask);
>> + }
>> + break;
>> + case 2:
>> + if (reg->u.w.read) {
>> + rc = reg->u.w.read(s, reg_entry,
>> + (uint16_t *)ptr_val, valid_mask);
>> + }
>> + break;
>> + case 4:
>> + if (reg->u.dw.read) {
>> + rc = reg->u.dw.read(s, reg_entry,
>> + (uint32_t *)ptr_val, valid_mask);
>> + }
>> + break;
>> + }
>> +
>> + if (rc < 0) {
>> + hw_error("Internal error: Invalid read emulation "
>> + "return value[%d]. I/O emulator exit.\n", rc);
>> + }
>> +
>> + /* calculate next address to find */
>> + emul_len -= reg->size;
>> + if (emul_len > 0) {
>> + find_addr = real_offset + reg->size;
>> + }
>> + } else {
>> + /* nothing to do with passthrough type register,
>> + * continue to find next byte */
>> + emul_len--;
>> + find_addr++;
>> + }
>> + }
>> +
>> + /* need to shift back before returning them to pci bus emulator */
>> + val >>= ((address & 3) << 3);
>> +
>> +exit:
>> + PT_LOG_CONFIG("[%02x:%02x.%x]: address=%04x val=0x%08x len=%d\n",
>> + pci_bus_num(d->bus), PCI_SLOT(d->devfn),
>> PCI_FUNC(d->devfn),
>> + address, val, len);
>> + return val;
>> +}
>> +
>> +static void pt_pci_write_config(PCIDevice *d, uint32_t address,
>> + uint32_t val, int len)
>> +{
>> + XenPCIPassthroughState *s = DO_UPCAST(XenPCIPassthroughState, dev, d);
>> + int index = 0;
>> + XenPTRegGroup *reg_grp_entry = NULL;
>> + int rc = 0;
>> + uint32_t read_val = 0;
>> + int emul_len = 0;
>> + XenPTReg *reg_entry = NULL;
>> + uint32_t find_addr = address;
>> + XenPTRegInfo *reg = NULL;
>> +
>> + if (pt_pci_config_access_check(d, address, len)) {
>> + return;
>> + }
>> +
>> + PT_LOG_CONFIG("[%02x:%02x.%x]: address=%04x val=0x%08x len=%d\n",
>> + pci_bus_num(d->bus), PCI_SLOT(d->devfn),
>> PCI_FUNC(d->devfn),
>> + address, val, len);
>> +
>> + /* check unused BAR register */
>> + index = pt_bar_offset_to_index(address);
>> + if ((index >= 0) && (val > 0 && val < PT_BAR_ALLF) &&
>> + (s->bases[index].bar_flag == PT_BAR_FLAG_UNUSED)) {
>> + PT_LOG("Warning: Guest attempt to set address to unused Base
>> Address "
>> + "Register. [%02x:%02x.%x][Offset:%02xh][Length:%d]\n",
>> + pci_bus_num(d->bus), PCI_SLOT(d->devfn), PCI_FUNC(d->devfn),
>> + address, len);
>> + }
>> +
>> + /* check power state transition flags */
>> + if (s->pm_state != NULL && s->pm_state->flags & PT_FLAG_TRANSITING) {
>> + /* can't accept untill previous power state transition is completed.
>> + * so finished previous request here.
>> + */
>> + PT_LOG("Warning: guest want to write durring power state
>> transition\n");
>> + return;
>> + }
>> +
>> + /* find register group entry */
>> + reg_grp_entry = pt_find_reg_grp(s, address);
>> + if (reg_grp_entry) {
>> + /* check 0 Hardwired register group */
>> + if (reg_grp_entry->reg_grp->grp_type == GRP_TYPE_HARDWIRED) {
>> + /* ignore silently */
>> + PT_LOG("Warning: Access to 0 Hardwired register. "
>> + "[%02x:%02x.%x][Offset:%02xh][Length:%d]\n",
>> + pci_bus_num(d->bus), PCI_SLOT(d->devfn),
>> PCI_FUNC(d->devfn),
>> + address, len);
>> + return;
>> + }
>> + }
>> +
>> + /* read I/O device register value */
>> + rc = host_pci_get_block(s->real_device, address,
>> + (uint8_t *)&read_val, len);
>> + if (!rc) {
>> + PT_LOG("Error: pci_read_block failed. return value[%d].\n", rc);
>> + memset(&read_val, 0xff, len);
>> + }
>> +
>> + /* pass directly to libpci for passthrough type register group */
>> + if (reg_grp_entry == NULL) {
>> + goto out;
>> + }
>> +
>> + /* adjust the read and write value to appropriate CFC-CFF window */
>> + read_val <<= (address & 3) << 3;
>> + val <<= (address & 3) << 3;
>> + emul_len = len;
>> +
>> + /* loop Guest request size */
>> + while (emul_len > 0) {
>> + /* find register entry to be emulated */
>> + reg_entry = pt_find_reg(reg_grp_entry, find_addr);
>> + if (reg_entry) {
>> + reg = reg_entry->reg;
>> + uint32_t real_offset = reg_grp_entry->base_offset + reg->offset;
>> + uint32_t valid_mask = 0xFFFFFFFF >> ((4 - emul_len) << 3);
>> + uint8_t *ptr_val = NULL;
>> +
>> + valid_mask <<= (find_addr - real_offset) << 3;
>> + ptr_val = (uint8_t *)&val + (real_offset & 3);
>> +
>> + /* do emulation depend on register size */
>> + switch (reg->size) {
>> + case 1:
>> + if (reg->u.b.write) {
>> + rc = reg->u.b.write(s, reg_entry, ptr_val,
>> + read_val >> ((real_offset & 3) <<
>> 3),
>> + valid_mask);
>> + }
>> + break;
>> + case 2:
>> + if (reg->u.w.write) {
>> + rc = reg->u.w.write(s, reg_entry, (uint16_t *)ptr_val,
>> + (read_val >> ((real_offset & 3) <<
>> 3)),
>> + valid_mask);
>> + }
>> + break;
>> + case 4:
>> + if (reg->u.dw.write) {
>> + rc = reg->u.dw.write(s, reg_entry, (uint32_t *)ptr_val,
>> + (read_val >> ((real_offset & 3) <<
>> 3)),
>> + valid_mask);
>> + }
>> + break;
>> + }
>> +
>> + if (rc < 0) {
>> + hw_error("Internal error: Invalid write emulation "
>> + "return value[%d]. I/O emulator exit.\n", rc);
>> + }
>> +
>> + /* calculate next address to find */
>> + emul_len -= reg->size;
>> + if (emul_len > 0) {
>> + find_addr = real_offset + reg->size;
>> + }
>> + } else {
>> + /* nothing to do with passthrough type register,
>> + * continue to find next byte */
>> + emul_len--;
>> + find_addr++;
>> + }
>> + }
>> +
>> + /* need to shift back before passing them to libpci */
>> + val >>= (address & 3) << 3;
>> +
>> +out:
>> + if (!(reg && reg->no_wb)) {
>> + /* unknown regs are passed through */
>> + rc = host_pci_set_block(s->real_device, address, (uint8_t *)&val,
>> len);
>> +
>> + if (!rc) {
>> + PT_LOG("Error: pci_write_block failed. return value[%d].\n",
>> rc);
>> + }
>> + }
>> +
>> + if (s->pm_state != NULL && s->pm_state->flags & PT_FLAG_TRANSITING) {
>> + qemu_mod_timer(s->pm_state->pm_timer,
>> + qemu_get_clock_ms(rt_clock) + s->pm_state->pm_delay);
>> + }
>> +}
>
> Where is this timer allocated and initialized?
In the next patch, I will move this lines to the releated patch.
>> +/* ioport/iomem space*/
>> +static void pt_iomem_map(XenPCIPassthroughState *s, int i,
>> + pcibus_t e_phys, pcibus_t e_size, int type)
>> +{
>> + uint32_t old_ebase = s->bases[i].e_physbase;
>> + bool first_map = s->bases[i].e_size == 0;
>> + int ret = 0;
>> +
>> + s->bases[i].e_physbase = e_phys;
>> + s->bases[i].e_size = e_size;
>> +
>> + PT_LOG("e_phys=%#"PRIx64" maddr=%#"PRIx64" type=%%d"
>> + " len=%#"PRIx64" index=%d first_map=%d\n",
>> + e_phys, s->bases[i].access.maddr, /*type,*/
>> + e_size, i, first_map);
>> +
>> + if (e_size == 0) {
>> + return;
>> + }
>> +
>> + if (!first_map && old_ebase != -1) {
>> + /* Remove old mapping */
>> + ret = xc_domain_memory_mapping(xen_xc, xen_domid,
>> + old_ebase >> XC_PAGE_SHIFT,
>> + s->bases[i].access.maddr >> XC_PAGE_SHIFT,
>> + (e_size + XC_PAGE_SIZE - 1) >> XC_PAGE_SHIFT,
>> + DPCI_REMOVE_MAPPING);
>> + if (ret != 0) {
>> + PT_LOG("Error: remove old mapping failed!\n");
>> + return;
>> + }
>> + }
>> +
>> + /* map only valid guest address */
>> + if (e_phys != -1) {
>> + /* Create new mapping */
>> + ret = xc_domain_memory_mapping(xen_xc, xen_domid,
>> + s->bases[i].e_physbase >> XC_PAGE_SHIFT,
>> + s->bases[i].access.maddr >>
>> XC_PAGE_SHIFT,
>> + (e_size+XC_PAGE_SIZE-1) >> XC_PAGE_SHIFT,
>> + DPCI_ADD_MAPPING);
>> +
>> + if (ret != 0) {
>> + PT_LOG("Error: create new mapping failed!\n");
>> + }
>> + }
>> +}
>> +
>> +static void pt_ioport_map(XenPCIPassthroughState *s, int i,
>> + pcibus_t e_phys, pcibus_t e_size, int type)
>> +{
>> + uint32_t old_ebase = s->bases[i].e_physbase;
>> + bool first_map = s->bases[i].e_size == 0;
>> + int ret = 0;
>> +
>> + s->bases[i].e_physbase = e_phys;
>> + s->bases[i].e_size = e_size;
>> +
>> + PT_LOG("e_phys=%#04"PRIx64" pio_base=%#04"PRIx64" len=%"PRId64"
>> index=%d"
>> + " first_map=%d\n",
>> + e_phys, s->bases[i].access.pio_base, e_size, i, first_map);
>> +
>> + if (e_size == 0) {
>> + return;
>> + }
>> +
>> + if (!first_map && old_ebase != -1) {
>> + /* Remove old mapping */
>> + ret = xc_domain_ioport_mapping(xen_xc, xen_domid, old_ebase,
>> + s->bases[i].access.pio_base, e_size,
>> + DPCI_REMOVE_MAPPING);
>> + if (ret != 0) {
>> + PT_LOG("Error: remove old mapping failed!\n");
>> + return;
>> + }
>> + }
>> +
>> + /* map only valid guest address (include 0) */
>> + if (e_phys != -1) {
>> + /* Create new mapping */
>> + ret = xc_domain_ioport_mapping(xen_xc, xen_domid, e_phys,
>> + s->bases[i].access.pio_base, e_size,
>> + DPCI_ADD_MAPPING);
>> + if (ret != 0) {
>> + PT_LOG("Error: create new mapping failed!\n");
>> + }
>> + }
>> +
>> +}
>> +
>> +
>> +/* mapping BAR */
>> +
>> +void pt_bar_mapping_one(XenPCIPassthroughState *s, int bar,
>> + int io_enable, int mem_enable)
>> +{
>> + PCIDevice *dev = &s->dev;
>> + PCIIORegion *r;
>> + XenPTRegGroup *reg_grp_entry = NULL;
>> + XenPTReg *reg_entry = NULL;
>> + XenPTRegion *base = NULL;
>> + pcibus_t r_size = 0, r_addr = -1;
>> + int rc = 0;
>> +
>> + r = &dev->io_regions[bar];
>> +
>> + /* check valid region */
>> + if (!r->size) {
>> + return;
>> + }
>> +
>> + base = &s->bases[bar];
>> + /* skip unused BAR or upper 64bit BAR */
>> + if ((base->bar_flag == PT_BAR_FLAG_UNUSED)
>> + || (base->bar_flag == PT_BAR_FLAG_UPPER)) {
>> + return;
>> + }
>> +
>> + /* copy region address to temporary */
>> + r_addr = r->addr;
>> +
>> + /* need unmapping in case I/O Space or Memory Space disable */
>> + if (((base->bar_flag == PT_BAR_FLAG_IO) && !io_enable) ||
>> + ((base->bar_flag == PT_BAR_FLAG_MEM) && !mem_enable)) {
>> + r_addr = -1;
>> + }
>> + if ((bar == PCI_ROM_SLOT) && (r_addr != -1)) {
>> + reg_grp_entry = pt_find_reg_grp(s, PCI_ROM_ADDRESS);
>> + if (reg_grp_entry) {
>> + reg_entry = pt_find_reg(reg_grp_entry, PCI_ROM_ADDRESS);
>> + if (reg_entry && !(reg_entry->data & PCI_ROM_ADDRESS_ENABLE)) {
>> + r_addr = -1;
>> + }
>> + }
>> + }
>> +
>> + /* prevent guest software mapping memory resource to 00000000h */
>> + if ((base->bar_flag == PT_BAR_FLAG_MEM) && (r_addr == 0)) {
>> + r_addr = -1;
>> + }
>> +
>> + r_size = pt_get_emul_size(base->bar_flag, r->size);
>> +
>> + rc = pci_check_bar_overlap(dev, r_addr, r_size, r->type);
>> + if (rc > 0) {
>> + PT_LOG("Warning: s[%02x:%02x.%x][Region:%d][Address:%"FMT_PCIBUS"h]"
>> + "[Size:%"FMT_PCIBUS"h] is overlapped.\n",
>> pci_bus_num(dev->bus),
>> + PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), bar,
>> + r_addr, r_size);
>> + }
>> +
>> + /* check whether we need to update the mapping or not */
>> + if (r_addr != s->bases[bar].e_physbase) {
>> + /* mapping BAR */
>> + if (base->bar_flag == PT_BAR_FLAG_IO) {
>> + pt_ioport_map(s, bar, r_addr, r_size, r->type);
>> + } else {
>> + pt_iomem_map(s, bar, r_addr, r_size, r->type);
>> + }
>> + }
>> +}
>> +
>> +void pt_bar_mapping(XenPCIPassthroughState *s, int io_enable, int
>> mem_enable)
>> +{
>> + int i;
>> +
>> + for (i = 0; i < PCI_NUM_REGIONS; i++) {
>> + pt_bar_mapping_one(s, i, io_enable, mem_enable);
>> + }
>> +}
>> +
>> +/* register regions */
>> +static int pt_register_regions(XenPCIPassthroughState *s)
>> +{
>> + int i = 0;
>> + uint32_t bar_data = 0;
>> + HostPCIDevice *d = s->real_device;
>> +
>> + /* Register PIO/MMIO BARs */
>> + for (i = 0; i < PCI_BAR_ENTRIES; i++) {
>> + HostPCIIORegion *r = &d->io_regions[i];
>> +
>> + if (r->base_addr) {
>> + s->bases[i].e_physbase = r->base_addr;
>> + s->bases[i].access.u = r->base_addr;
>> +
>> + /* Register current region */
>> + if (r->flags & IORESOURCE_IO) {
>> + memory_region_init_io(&s->bar[i], NULL, NULL,
>> + "xen-pci-pt-bar", r->size);
>> + pci_register_bar(&s->dev, i, PCI_BASE_ADDRESS_SPACE_IO,
>> + &s->bar[i]);
>> + } else if (r->flags & IORESOURCE_PREFETCH) {
>> + memory_region_init_io(&s->bar[i], NULL, NULL,
>> + "xen-pci-pt-bar", r->size);
>> + pci_register_bar(&s->dev, i, PCI_BASE_ADDRESS_MEM_PREFETCH,
>> + &s->bar[i]);
>> + } else {
>> + memory_region_init_io(&s->bar[i], NULL, NULL,
>> + "xen-pci-pt-bar", r->size);
>> + pci_register_bar(&s->dev, i, PCI_BASE_ADDRESS_SPACE_MEMORY,
>> + &s->bar[i]);
>> + }
>> +
>> + PT_LOG("IO region registered (size=0x%08"PRIx64
>> + " base_addr=0x%08"PRIx64")\n",
>> + r->size, r->base_addr);
>> + }
>> + }
>> +
>> + /* Register expansion ROM address */
>> + if (d->rom.base_addr && d->rom.size) {
>> + /* Re-set BAR reported by OS, otherwise ROM can't be read. */
>> + bar_data = host_pci_get_long(d, PCI_ROM_ADDRESS);
>> + if ((bar_data & PCI_ROM_ADDRESS_MASK) == 0) {
>> + bar_data |= d->rom.base_addr & PCI_ROM_ADDRESS_MASK;
>> + host_pci_set_long(d, PCI_ROM_ADDRESS, bar_data);
>> + }
>> +
>> + s->bases[PCI_ROM_SLOT].e_physbase = d->rom.base_addr;
>> + s->bases[PCI_ROM_SLOT].access.maddr = d->rom.base_addr;
>> +
>> + memory_region_init_rom_device(&s->rom, NULL, NULL, &s->dev.qdev,
>> + "xen-pci-pt-rom", d->rom.size);
>> + pci_register_bar(&s->dev, PCI_ROM_SLOT,
>> PCI_BASE_ADDRESS_MEM_PREFETCH,
>> + &s->rom);
>> +
>> + PT_LOG("Expansion ROM registered (size=0x%08"PRIx64
>> + " base_addr=0x%08"PRIx64")\n",
>> + d->rom.size, d->rom.base_addr);
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static void pt_unregister_regions(XenPCIPassthroughState *s)
>> +{
>> + int i, type, rc;
>> + uint32_t e_size;
>> + PCIDevice *d = &s->dev;
>> +
>> + for (i = 0; i < PCI_NUM_REGIONS; i++) {
>> + e_size = s->bases[i].e_size;
>> + if ((e_size == 0) || (s->bases[i].e_physbase == -1)) {
>> + continue;
>> + }
>> +
>> + type = d->io_regions[i].type;
>> +
>> + if (type == PCI_BASE_ADDRESS_SPACE_MEMORY
>> + || type == PCI_BASE_ADDRESS_MEM_PREFETCH) {
>> + rc = xc_domain_memory_mapping(xen_xc, xen_domid,
>> + s->bases[i].e_physbase >> XC_PAGE_SHIFT,
>> + s->bases[i].access.maddr >> XC_PAGE_SHIFT,
>> + (e_size+XC_PAGE_SIZE-1) >> XC_PAGE_SHIFT,
>> + DPCI_REMOVE_MAPPING);
>> + if (rc != 0) {
>> + PT_LOG("Error: remove old mem mapping failed!\n");
>> + continue;
>> + }
>> +
>> + } else if (type == PCI_BASE_ADDRESS_SPACE_IO) {
>> + rc = xc_domain_ioport_mapping(xen_xc, xen_domid,
>> + s->bases[i].e_physbase,
>> + s->bases[i].access.pio_base,
>> + e_size,
>> + DPCI_REMOVE_MAPPING);
>> + if (rc != 0) {
>> + PT_LOG("Error: remove old io mapping failed!\n");
>> + continue;
>> + }
>> + }
>> + }
>> +}
>> +
>> +static int pt_initfn(PCIDevice *pcidev)
>> +{
>> + XenPCIPassthroughState *s = DO_UPCAST(XenPCIPassthroughState, dev,
>> pcidev);
>> + int dom, bus;
>> + unsigned slot, func;
>> + int rc = 0;
>> + uint32_t machine_irq;
>> + int pirq = -1;
>> +
>> + if (pci_parse_devaddr(s->hostaddr, &dom, &bus, &slot, &func) < 0) {
>> + fprintf(stderr, "error parse bdf: %s\n", s->hostaddr);
>> + return -1;
>> + }
>> +
>> + /* register real device */
>> + PT_LOG("Assigning real physical device %02x:%02x.%x to devfn %i ...\n",
>> + bus, slot, func, s->dev.devfn);
>> +
>> + s->real_device = host_pci_device_get(bus, slot, func);
>> + if (!s->real_device) {
>> + return -1;
>> + }
>> +
>> + s->is_virtfn = s->real_device->is_virtfn;
>> + if (s->is_virtfn) {
>> + PT_LOG("%04x:%02x:%02x.%x is a SR-IOV Virtual Function\n",
>> + s->real_device->domain, bus, slot, func);
>> + }
>> +
>> + /* Initialize virtualized PCI configuration (Extended 256 Bytes) */
>> + if (host_pci_get_block(s->real_device, 0, pcidev->config,
>> + PCI_CONFIG_SPACE_SIZE) == -1) {
>> + return -1;
>> + }
>> +
>> + /* Handle real device's MMIO/PIO BARs */
>> + pt_register_regions(s);
>> +
>> + /* reinitialize each config register to be emulated */
>> + pt_config_init(s);
>
> this function is implemented in the next patch, so you might as well add
> this call there
Ok, I will move this.
>> + /* Bind interrupt */
>> + if (!s->dev.config[PCI_INTERRUPT_PIN]) {
>> + PT_LOG("no pin interrupt\n");
>> + goto out;
>> + }
>> +
>> + machine_irq = host_pci_get_byte(s->real_device, PCI_INTERRUPT_LINE);
>> + rc = xc_physdev_map_pirq(xen_xc, xen_domid, machine_irq, &pirq);
>> +
>> + if (rc) {
>> + PT_LOG("Error: Mapping irq failed, rc = %d\n", rc);
>> +
>> + /* Disable PCI intx assertion (turn on bit10 of devctl) */
>> + host_pci_set_word(s->real_device,
>> + PCI_COMMAND,
>> + pci_get_word(s->dev.config + PCI_COMMAND)
>> + | PCI_COMMAND_INTX_DISABLE);
>> + machine_irq = 0;
>> + s->machine_irq = 0;
>> + } else {
>> + machine_irq = pirq;
>> + s->machine_irq = pirq;
>> + mapped_machine_irq[machine_irq]++;
>> + }
>> +
>> + /* bind machine_irq to device */
>> + if (rc < 0 && machine_irq != 0) {
>> + uint8_t e_device = PCI_SLOT(s->dev.devfn);
>> + uint8_t e_intx = pci_intx(s);
>> +
>> + rc = xc_domain_bind_pt_pci_irq(xen_xc, xen_domid, machine_irq, 0,
>> + e_device, e_intx);
>> + if (rc < 0) {
>> + PT_LOG("Error: Binding of interrupt failed! rc=%d\n", rc);
>> +
>> + /* Disable PCI intx assertion (turn on bit10 of devctl) */
>> + host_pci_set_word(s->real_device, PCI_COMMAND,
>> + *(uint16_t *)(&s->dev.config[PCI_COMMAND])
>> + | PCI_COMMAND_INTX_DISABLE);
>> + mapped_machine_irq[machine_irq]--;
>> +
>> + if (mapped_machine_irq[machine_irq] == 0) {
>> + if (xc_physdev_unmap_pirq(xen_xc, xen_domid, machine_irq)) {
>> + PT_LOG("Error: Unmapping of interrupt failed! rc=%d\n",
>> + rc);
>> + }
>> + }
>> + s->machine_irq = 0;
>> + }
>> + }
>> +
>> +out:
>> + PT_LOG("Real physical device %02x:%02x.%x registered successfuly!\n"
>> + "IRQ type = %s\n", bus, slot, func, "INTx");
>> +
>> + return 0;
>> +}
>> +
>> +static int pt_unregister_device(PCIDevice *pcidev)
>> +{
>> + XenPCIPassthroughState *s = DO_UPCAST(XenPCIPassthroughState, dev,
>> pcidev);
>> + uint8_t e_device, e_intx;
>> + uint32_t machine_irq;
>> + int rc;
>> +
>> + /* Unbind interrupt */
>> + e_device = PCI_SLOT(s->dev.devfn);
>> + e_intx = pci_intx(s);
>> + machine_irq = s->machine_irq;
>> +
>> + if (machine_irq) {
>> + rc = xc_domain_unbind_pt_irq(xen_xc, xen_domid, machine_irq,
>> + PT_IRQ_TYPE_PCI, 0, e_device, e_intx,
>> 0);
>> + if (rc < 0) {
>> + PT_LOG("Error: Unbinding of interrupt failed! rc=%d\n", rc);
>> + }
>> + }
>> +
>> + if (machine_irq) {
>> + mapped_machine_irq[machine_irq]--;
>> +
>> + if (mapped_machine_irq[machine_irq] == 0) {
>> + rc = xc_physdev_unmap_pirq(xen_xc, xen_domid, machine_irq);
>> +
>> + if (rc < 0) {
>> + PT_LOG("Error: Unmaping of interrupt failed! rc=%d\n", rc);
>> + }
>> + }
>> + }
>> +
>> + /* delete all emulated config registers */
>> + pt_config_delete(s);
>> +
>> + /* unregister real device's MMIO/PIO BARs */
>> + pt_unregister_regions(s);
>> +
>> + host_pci_device_put(s->real_device);
>> +
>> + return 0;
>> +}
>> +
>> +static PCIDeviceInfo xen_pci_passthrough = {
>> + .init = pt_initfn,
>> + .exit = pt_unregister_device,
>> + .qdev.name = "xen-pci-passthrough",
>> + .qdev.desc = "Assign an host pci device with Xen",
>> + .qdev.size = sizeof(XenPCIPassthroughState),
>> + .config_read = pt_pci_read_config,
>> + .config_write = pt_pci_write_config,
>> + .is_express = 0,
>> + .qdev.props = (Property[]) {
>> + DEFINE_PROP_STRING("hostaddr", XenPCIPassthroughState, hostaddr),
>> + DEFINE_PROP_BIT("power-mgmt", XenPCIPassthroughState, power_mgmt,
>> + 0, false),
>> + DEFINE_PROP_END_OF_LIST(),
>> + }
>> +};
>> +
>> +static void xen_passthrough_register(void)
>> +{
>> + pci_qdev_register(&xen_pci_passthrough);
>> +}
>> +
>> +device_init(xen_passthrough_register);
>> diff --git a/hw/xen_pci_passthrough.h b/hw/xen_pci_passthrough.h
>> new file mode 100644
>> index 0000000..2d1979d
>> --- /dev/null
>> +++ b/hw/xen_pci_passthrough.h
>> @@ -0,0 +1,223 @@
>> +#ifndef QEMU_HW_XEN_PCI_PASSTHROUGH_H
>> +# define QEMU_HW_XEN_PCI_PASSTHROUGH_H
>> +
>> +#include "qemu-common.h"
>> +#include "xen_common.h"
>> +#include "pci.h"
>> +#include "host-pci-device.h"
>> +
>> +#define PT_LOGGING_ENABLED
>> +#define PT_DEBUG_PCI_CONFIG_ACCESS
>> +
>> +#ifdef PT_LOGGING_ENABLED
>> +# define PT_LOG(_f, _a...) fprintf(stderr, "%s: " _f, __func__, ##_a)
>> +#else
>> +# define PT_LOG(_f, _a...)
>> +#endif
>> +
>> +#ifdef PT_DEBUG_PCI_CONFIG_ACCESS
>> +# define PT_LOG_CONFIG(_f, _a...) PT_LOG(_f, ##_a)
>> +#else
>> +# define PT_LOG_CONFIG(_f, _a...)
>> +#endif
>> +
>> +
>> +typedef struct XenPTRegInfo XenPTRegInfo;
>> +typedef struct XenPTReg XenPTReg;
>> +
>> +typedef struct XenPCIPassthroughState XenPCIPassthroughState;
>> +
>> +/* function type for config reg */
>> +typedef uint32_t (*conf_reg_init)
>> + (XenPCIPassthroughState *, XenPTRegInfo *, uint32_t real_offset);
>> +typedef int (*conf_dword_write)
>> + (XenPCIPassthroughState *, XenPTReg *cfg_entry,
>> + uint32_t *val, uint32_t dev_value, uint32_t valid_mask);
>> +typedef int (*conf_word_write)
>> + (XenPCIPassthroughState *, XenPTReg *cfg_entry,
>> + uint16_t *val, uint16_t dev_value, uint16_t valid_mask);
>> +typedef int (*conf_byte_write)
>> + (XenPCIPassthroughState *, XenPTReg *cfg_entry,
>> + uint8_t *val, uint8_t dev_value, uint8_t valid_mask);
>> +typedef int (*conf_dword_read)
>> + (XenPCIPassthroughState *, XenPTReg *cfg_entry,
>> + uint32_t *val, uint32_t valid_mask);
>> +typedef int (*conf_word_read)
>> + (XenPCIPassthroughState *, XenPTReg *cfg_entry,
>> + uint16_t *val, uint16_t valid_mask);
>> +typedef int (*conf_byte_read)
>> + (XenPCIPassthroughState *, XenPTReg *cfg_entry,
>> + uint8_t *val, uint8_t valid_mask);
>> +typedef int (*conf_dword_restore)
>> + (XenPCIPassthroughState *, XenPTReg *cfg_entry, uint32_t real_offset,
>> + uint32_t dev_value, uint32_t *val);
>> +typedef int (*conf_word_restore)
>> + (XenPCIPassthroughState *, XenPTReg *cfg_entry, uint32_t real_offset,
>> + uint16_t dev_value, uint16_t *val);
>> +typedef int (*conf_byte_restore)
>> + (XenPCIPassthroughState *, XenPTReg *cfg_entry, uint32_t real_offset,
>> + uint8_t dev_value, uint8_t *val);
>> +
>> +/* power state transition */
>> +#define PT_FLAG_TRANSITING 0x0001
>> +
>> +
>> +typedef enum {
>> + GRP_TYPE_HARDWIRED = 0, /* 0 Hardwired reg group */
>> + GRP_TYPE_EMU, /* emul reg group */
>> +} RegisterGroupType;
>> +
>> +typedef enum {
>> + PT_BAR_FLAG_MEM = 0, /* Memory type BAR */
>> + PT_BAR_FLAG_IO, /* I/O type BAR */
>> + PT_BAR_FLAG_UPPER, /* upper 64bit BAR */
>> + PT_BAR_FLAG_UNUSED, /* unused BAR */
>> +} PTBarFlag;
>> +
>> +
>> +typedef struct XenPTRegion {
>> + /* Virtual phys base & size */
>> + uint32_t e_physbase;
>> + uint32_t e_size;
>> + /* Index of region in qemu */
>> + uint32_t memory_index;
>> + /* BAR flag */
>> + PTBarFlag bar_flag;
>> + /* Translation of the emulated address */
>> + union {
>> + uint64_t maddr;
>> + uint64_t pio_base;
>> + uint64_t u;
>> + } access;
>> +} XenPTRegion;
>> +
>> +/* XenPTRegInfo declaration
>> + * - only for emulated register (either a part or whole bit).
>> + * - for passthrough register that need special behavior (like interacting
>> with
>> + * other component), set emu_mask to all 0 and specify r/w func properly.
>> + * - do NOT use ALL F for init_val, otherwise the tbl will not be
>> registered.
>> + */
>> +
>> +/* emulated register infomation */
>> +struct XenPTRegInfo {
>> + uint32_t offset;
>> + uint32_t size;
>> + uint32_t init_val;
>> + /* reg read only field mask (ON:RO/ROS, OFF:other) */
>> + uint32_t ro_mask;
>> + /* reg emulate field mask (ON:emu, OFF:passthrough) */
>> + uint32_t emu_mask;
>> + /* no write back allowed */
>> + uint32_t no_wb;
>> + conf_reg_init init;
>> + /* read/write/restore function pointer
>> + * for double_word/word/byte size */
>> + union {
>> + struct {
>> + conf_dword_write write;
>> + conf_dword_read read;
>> + conf_dword_restore restore;
>> + } dw;
>> + struct {
>> + conf_word_write write;
>> + conf_word_read read;
>> + conf_word_restore restore;
>> + } w;
>> + struct {
>> + conf_byte_write write;
>> + conf_byte_read read;
>> + conf_byte_restore restore;
>> + } b;
>> + } u;
>> +};
>> +
>> +/* emulated register management */
>> +struct XenPTReg {
>> + QLIST_ENTRY(XenPTReg) entries;
>> + XenPTRegInfo *reg;
>> + uint32_t data;
>> +};
>> +
>> +typedef struct XenPTRegGroupInfo XenPTRegGroupInfo;
>> +
>> +/* emul reg group size initialize method */
>> +typedef uint8_t (*pt_reg_size_init_fn)
>> + (XenPCIPassthroughState *, const XenPTRegGroupInfo *,
>> + uint32_t base_offset);
>> +
>> +/* emulated register group infomation */
>> +struct XenPTRegGroupInfo {
>> + uint8_t grp_id;
>> + RegisterGroupType grp_type;
>> + uint8_t grp_size;
>> + pt_reg_size_init_fn size_init;
>> + XenPTRegInfo *emu_reg_tbl;
>> +};
>> +
>> +/* emul register group management table */
>> +typedef struct XenPTRegGroup {
>> + QLIST_ENTRY(XenPTRegGroup) entries;
>> + const XenPTRegGroupInfo *reg_grp;
>> + uint32_t base_offset;
>> + uint8_t size;
>> + QLIST_HEAD(, XenPTReg) reg_tbl_list;
>> +} XenPTRegGroup;
>> +
>> +
>> +typedef struct XenPTPM {
>> + QEMUTimer *pm_timer; /* QEMUTimer struct */
>> + int no_soft_reset; /* No Soft Reset flags */
>> + uint16_t flags; /* power state transition flags */
>> + uint16_t pmc_field; /* Power Management Capabilities field */
>> + int pm_delay; /* power state transition delay */
>> + uint16_t cur_state; /* current power state */
>> + uint16_t req_state; /* requested power state */
>> + uint32_t pm_base; /* Power Management Capability reg base offset */
>> + uint32_t aer_base; /* AER Capability reg base offset */
>> +} XenPTPM;
>> +
>> +struct XenPCIPassthroughState {
>> + PCIDevice dev;
>> +
>> + char *hostaddr;
>> + bool is_virtfn;
>> + HostPCIDevice *real_device;
>> + XenPTRegion bases[PCI_NUM_REGIONS]; /* Access regions */
>> + QLIST_HEAD(, XenPTRegGroup) reg_grp_tbl;
>> +
>> + uint32_t machine_irq;
>> +
>> + uint32_t power_mgmt;
>> + XenPTPM *pm_state;
>> +
>> + MemoryRegion bar[PCI_NUM_REGIONS - 1];
>> + MemoryRegion rom;
>> +};
>> +
>> +void pt_config_init(XenPCIPassthroughState *s);
>> +void pt_config_delete(XenPCIPassthroughState *s);
>> +void pt_bar_mapping(XenPCIPassthroughState *s, int io_enable, int
>> mem_enable);
>> +void pt_bar_mapping_one(XenPCIPassthroughState *s, int bar,
>> + int io_enable, int mem_enable);
>> +XenPTRegGroup *pt_find_reg_grp(XenPCIPassthroughState *s, uint32_t address);
>> +XenPTReg *pt_find_reg(XenPTRegGroup *reg_grp, uint32_t address);
>> +int pt_bar_offset_to_index(uint32_t offset);
>> +
>> +static inline pcibus_t pt_get_emul_size(PTBarFlag flag, pcibus_t r_size)
>> +{
>> + /* align resource size (memory type only) */
>> + if (flag == PT_BAR_FLAG_MEM) {
>> + return (r_size + XC_PAGE_SIZE - 1) & XC_PAGE_MASK;
>> + } else {
>> + return r_size;
>> + }
>> +}
>> +
>> +/* INTx */
>> +static inline uint8_t pci_read_intx(XenPCIPassthroughState *s)
>> +{
>> + return host_pci_get_byte(s->real_device, PCI_INTERRUPT_PIN);
>> +}
>> +uint8_t pci_intx(XenPCIPassthroughState *ptdev);
>> +
>> +#endif /* !QEMU_HW_XEN_PCI_PASSTHROUGH_H */
>> diff --git a/hw/xen_pci_passthrough_helpers.c
>> b/hw/xen_pci_passthrough_helpers.c
>> new file mode 100644
>> index 0000000..192e918
>> --- /dev/null
>> +++ b/hw/xen_pci_passthrough_helpers.c
>> @@ -0,0 +1,46 @@
>> +#include "xen_pci_passthrough.h"
>> +
>> +/* The PCI Local Bus Specification, Rev. 3.0, {
>> + * Section 6.2.4 Miscellaneous Registers, pp 223
>> + * outlines 5 valid values for the intertupt pin (intx).
>> + * 0: For devices (or device functions) that don't use an interrupt in
>> + * 1: INTA#
>> + * 2: INTB#
>> + * 3: INTC#
>> + * 4: INTD#
>> + *
>> + * Xen uses the following 4 values for intx
>> + * 0: INTA#
>> + * 1: INTB#
>> + * 2: INTC#
>> + * 3: INTD#
>> + *
>> + * Observing that these list of values are not the same, pci_read_intx()
>> + * uses the following mapping from hw to xen values.
>> + * This seems to reflect the current usage within Xen.
>> + *
>> + * PCI hardware | Xen | Notes
>> + *
>> ----------------+-----+----------------------------------------------------
>> + * 0 | 0 | No interrupt
>> + * 1 | 0 | INTA#
>> + * 2 | 1 | INTB#
>> + * 3 | 2 | INTC#
>> + * 4 | 3 | INTD#
>> + * any other value | 0 | This should never happen, log error message
>> +}
>> + */
>> +uint8_t pci_intx(XenPCIPassthroughState *ptdev)
>> +{
>> + uint8_t r_val = pci_read_intx(ptdev);
>> +
>> + PT_LOG("intx=%i\n", r_val);
>> + if (r_val < 1 || r_val > 4) {
>> + PT_LOG("Interrupt pin read from hardware is out of range: "
>> + "value=%i, acceptable range is 1 - 4\n", r_val);
>> + r_val = 0;
>> + } else {
>> + r_val -= 1;
>> + }
>> +
>> + return r_val;
>> +}
>
> if xen_pci_passthrough_helpers.c is only going to contain this function
> you might as well declared it static inline and move it to
> xen_pci_passthrough.h
Ok, I will.
--
Anthony PERARD
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|