# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1210583246 -3600
# Node ID 4afc6023e8eca87590d7d6e89bebad45f299235c
# Parent 5c00188dd159825e47547da59f691b2a3c59ac44
x86 hvm: Support MSI-X for HVM domains.
Signed-off-by: Shan Haitao <Haitao.shan@xxxxxxxxx>
---
tools/ioemu/hw/pass-through.c | 11 +
tools/ioemu/hw/pass-through.h | 21 +-
tools/ioemu/hw/pt-msi.c | 424 +++++++++++++++++++++++++++++++++++++++---
tools/ioemu/hw/pt-msi.h | 15 +
tools/libxc/xc_physdev.c | 2
tools/libxc/xenctrl.h | 1
6 files changed, 445 insertions(+), 29 deletions(-)
diff -r 5c00188dd159 -r 4afc6023e8ec tools/ioemu/hw/pass-through.c
--- a/tools/ioemu/hw/pass-through.c Mon May 12 10:06:40 2008 +0100
+++ b/tools/ioemu/hw/pass-through.c Mon May 12 10:07:26 2008 +0100
@@ -205,6 +205,7 @@ void pt_iomem_map(PCIDevice *d, int i, u
if ( !first_map )
{
+ add_msix_mapping(assigned_device, i);
/* Remove old mapping */
ret = xc_domain_memory_mapping(xc_handle, domid,
old_ebase >> XC_PAGE_SHIFT,
@@ -227,6 +228,9 @@ void pt_iomem_map(PCIDevice *d, int i, u
if ( ret != 0 )
PT_LOG("Error: create new mapping failed!\n");
+ ret = remove_msix_mapping(assigned_device, i);
+ if ( ret != 0 )
+ PT_LOG("Error: remove MSX-X mmio mapping failed!\n");
}
/* Being called each time a pio region has been updated */
@@ -289,6 +293,9 @@ static void pt_pci_write_config(PCIDevic
}
if ( pt_msi_write(assigned_device, address, val, len) )
+ return;
+
+ if ( pt_msix_write(assigned_device, address, val, len) )
return;
/* PCI config pass-through */
@@ -338,6 +345,7 @@ static uint32_t pt_pci_read_config(PCIDe
}
pt_msi_read(assigned_device, address, len, &val);
+ pt_msix_read(assigned_device, address, len, &val);
exit:
#ifdef PT_DEBUG_PCI_CONFIG_ACCESS
@@ -549,6 +557,9 @@ struct pt_dev * register_real_device(PCI
if ( (pos = find_cap_offset(pci_dev, PCI_CAP_ID_MSI)) )
pt_msi_init(assigned_device, pos);
+ if ( (pos = find_cap_offset(pci_dev, PCI_CAP_ID_MSIX)) )
+ pt_msix_init(assigned_device, pos);
+
/* Handle real device's MMIO/PIO BARs */
pt_register_regions(assigned_device);
diff -r 5c00188dd159 -r 4afc6023e8ec tools/ioemu/hw/pass-through.h
--- a/tools/ioemu/hw/pass-through.h Mon May 12 10:06:40 2008 +0100
+++ b/tools/ioemu/hw/pass-through.h Mon May 12 10:07:26 2008 +0100
@@ -61,8 +61,26 @@ struct pt_msi_info {
uint32_t flags;
int offset;
int size;
- int pvec; /* physical vector used */
int pirq; /* guest pirq corresponding */
+};
+
+struct msix_entry_info {
+ int pirq; /* -1 means unmapped */
+ int flags; /* flags indicting whether MSI ADDR or DATA is updated */
+ uint32_t io_mem[4];
+};
+
+struct pt_msix_info {
+ int enabled;
+ int offset;
+ int total_entries;
+ int bar_index;
+ uint32_t table_off;
+ u64 mmio_base_addr;
+ int mmio_index;
+ int fd;
+ void *phys_iomem_base;
+ struct msix_entry_info msix_entry[0];
};
/*
@@ -74,6 +92,7 @@ struct pt_dev {
struct pci_dev *pci_dev; /* libpci struct */
struct pt_region bases[PCI_NUM_REGIONS]; /* Access regions */
struct pt_msi_info *msi; /* MSI virtualization */
+ struct pt_msix_info *msix; /* MSI-X virtualization */
};
/* Used for formatting PCI BDF into cf8 format */
diff -r 5c00188dd159 -r 4afc6023e8ec tools/ioemu/hw/pt-msi.c
--- a/tools/ioemu/hw/pt-msi.c Mon May 12 10:06:40 2008 +0100
+++ b/tools/ioemu/hw/pt-msi.c Mon May 12 10:07:26 2008 +0100
@@ -20,7 +20,9 @@
*/
#include "pt-msi.h"
-
+#include <sys/mman.h>
+
+/* MSI virtuailization functions */
#define PT_MSI_CTRL_WR_MASK_HI (0x1)
#define PT_MSI_CTRL_WR_MASK_LO (0x8E)
#define PT_MSI_DATA_WR_MASK (0x38)
@@ -76,7 +78,7 @@ int pt_msi_init(struct pt_dev *dev, int
*/
static int pt_msi_setup(struct pt_dev *dev)
{
- int vector = -1, pirq = -1;
+ int pirq = -1;
if ( !(dev->msi->flags & MSI_FLAG_UNINIT) )
{
@@ -85,15 +87,15 @@ static int pt_msi_setup(struct pt_dev *d
}
if ( xc_physdev_map_pirq_msi(xc_handle, domid, MAP_PIRQ_TYPE_MSI,
- vector, &pirq,
+ AUTO_ASSIGN, &pirq,
dev->pci_dev->dev << 3
| dev->pci_dev->func,
- dev->pci_dev->bus, 1) )
- {
- PT_LOG("error map vector %x\n", vector);
+ dev->pci_dev->bus, 0,
1) )
+ {
+ PT_LOG("error map msi\n");
return -1;
}
dev->msi->pirq = pirq;
- PT_LOG("vector %x pirq %x\n", vector, pirq);
+ PT_LOG("msi mapped with pirq %x\n", pirq);
return 0;
}
@@ -147,15 +149,10 @@ static uint8_t get_msi_gctrl(struct pt_d
return *(uint8_t *)(pd->config + d->msi->offset + PCI_MSI_FLAGS);
}
-static uint32_t get_msi_gflags(struct pt_dev *d)
+static uint32_t __get_msi_gflags(uint32_t data, uint64_t addr)
{
uint32_t result = 0;
int rh, dm, dest_id, deliv_mode, trig_mode;
- uint16_t data;
- uint64_t addr;
-
- data = get_msi_gdata(d);
- addr = get_msi_gaddr(d);
rh = (addr >> MSI_ADDR_REDIRECTION_SHIFT) & 0x1;
dm = (addr >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
@@ -170,25 +167,20 @@ static uint32_t get_msi_gflags(struct pt
return result;
}
+static uint32_t get_msi_gflags(struct pt_dev *d)
+{
+ uint16_t data = get_msi_gdata(d);
+ uint64_t addr = get_msi_gaddr(d);
+
+ return __get_msi_gflags(data, addr);
+}
+
/*
* This may be arch different
*/
static inline uint8_t get_msi_gvec(struct pt_dev *d)
{
return get_msi_gdata(d) & 0xff;
-}
-
-static inline uint8_t get_msi_hvec(struct pt_dev *d)
-{
- struct pci_dev *pd = d->pci_dev;
- uint16_t data;
-
- if ( d->msi->flags & PCI_MSI_FLAGS_64BIT )
- data = pci_read_word(pd, PCI_MSI_DATA_64);
- else
- data = pci_read_word(pd, PCI_MSI_DATA_32);
-
- return data & 0xff;
}
/*
@@ -198,7 +190,7 @@ static int pt_msi_update(struct pt_dev *
static int pt_msi_update(struct pt_dev *d)
{
PT_LOG("now update msi with pirq %x gvec %x\n",
- get_msi_gvec(d), d->msi->pirq);
+ d->msi->pirq, get_msi_gvec(d));
return xc_domain_update_msi_irq(xc_handle, domid, get_msi_gvec(d),
d->msi->pirq, get_msi_gflags(d));
}
@@ -266,7 +258,6 @@ static int
static int
pt_msi_map_update(struct pt_dev *d, uint32_t old_data, uint64_t old_addr)
{
- uint16_t pctrl;
uint32_t data;
uint64_t addr;
@@ -301,6 +292,8 @@ static int pt_msi_mask_update(struct pt_
if ( old_mask != mask )
pci_write_long(pd, offset, mask);
+
+ return 0;
}
#define ACCESSED_DATA 0x2
@@ -486,3 +479,378 @@ int pt_msi_read(struct pt_dev *d, int ad
return e_len;
}
+/* MSI-X virtulization functions */
+#define PT_MSIX_CTRL_WR_MASK_HI (0xC0)
+static void mask_physical_msix_entry(struct pt_dev *dev, int entry_nr, int
mask)
+{
+ void *phys_off;
+
+ phys_off = dev->msix->phys_iomem_base + 16 * entry_nr + 12;
+ *(uint32_t *)phys_off = mask;
+}
+
+static int pt_msix_update_one(struct pt_dev *dev, int entry_nr)
+{
+ struct msix_entry_info *entry = &dev->msix->msix_entry[entry_nr];
+ int pirq = entry->pirq;
+ int gvec = entry->io_mem[2] & 0xff;
+ uint64_t gaddr = *(uint64_t *)&entry->io_mem[0];
+ uint32_t gflags = __get_msi_gflags(entry->io_mem[2], gaddr);
+ int ret;
+
+ if ( !entry->flags )
+ return 0;
+
+ /* Check if this entry is already mapped */
+ if ( entry->pirq == -1 )
+ {
+ ret = xc_physdev_map_pirq_msi(xc_handle, domid, MAP_PIRQ_TYPE_MSI,
+ AUTO_ASSIGN, &pirq,
+ dev->pci_dev->dev << 3 | dev->pci_dev->func,
+ dev->pci_dev->bus, entry_nr, 0);
+ if ( ret )
+ {
+ PT_LOG("error map msix entry %x\n", entry_nr);
+ return ret;
+ }
+ entry->pirq = pirq;
+ }
+
+ PT_LOG("now update msix entry %x with pirq %x gvec %x\n",
+ entry_nr, pirq, gvec);
+
+ ret = xc_domain_update_msi_irq(xc_handle, domid, gvec, pirq, gflags);
+ if ( ret )
+ {
+ PT_LOG("error update msix irq info for entry %d\n", entry_nr);
+ return ret;
+ }
+
+ entry->flags = 0;
+
+ return 0;
+}
+
+static int pt_msix_update(struct pt_dev *dev)
+{
+ struct pt_msix_info *msix = dev->msix;
+ int i;
+
+ for ( i = 0; i < msix->total_entries; i++ )
+ {
+ pt_msix_update_one(dev, i);
+ }
+
+ return 0;
+}
+
+static void pci_msix_invalid_write(void *opaque, target_phys_addr_t addr,
+ uint32_t val)
+{
+ PT_LOG("invalid write to MSI-X table, \
+ only dword access is allowed.\n");
+}
+
+static void pci_msix_writel(void *opaque, target_phys_addr_t addr, uint32_t
val)
+{
+ struct pt_dev *dev = (struct pt_dev *)opaque;
+ struct pt_msix_info *msix = dev->msix;
+ struct msix_entry_info *entry;
+ int entry_nr, offset;
+
+ if ( addr % 4 )
+ {
+ PT_LOG("unaligned dword access to MSI-X table, addr %016lx\n",
+ addr);
+ return;
+ }
+
+ entry_nr = (addr - msix->mmio_base_addr) / 16;
+ entry = &msix->msix_entry[entry_nr];
+ offset = ((addr - msix->mmio_base_addr) % 16) / 4;
+
+ if ( offset != 3 && msix->enabled && entry->io_mem[3] & 0x1 )
+ {
+ PT_LOG("can not update msix entry %d since MSI-X is already \
+ function now.\n", entry_nr);
+ return;
+ }
+
+ if ( offset != 3 && entry->io_mem[offset] != val )
+ entry->flags = 1;
+ entry->io_mem[offset] = val;
+
+ if ( offset == 3 )
+ {
+ if ( !(val & 0x1) )
+ pt_msix_update_one(dev, entry_nr);
+ mask_physical_msix_entry(dev, entry_nr, entry->io_mem[3] & 0x1);
+ }
+}
+
+static CPUWriteMemoryFunc *pci_msix_write[] = {
+ pci_msix_invalid_write,
+ pci_msix_invalid_write,
+ pci_msix_writel
+};
+
+static uint32_t pci_msix_invalid_read(void *opaque, target_phys_addr_t addr)
+{
+ PT_LOG("invalid read to MSI-X table, \
+ only dword access is allowed.\n");
+ return 0;
+}
+
+static uint32_t pci_msix_readl(void *opaque, target_phys_addr_t addr)
+{
+ struct pt_dev *dev = (struct pt_dev *)opaque;
+ struct pt_msix_info *msix = dev->msix;
+ int entry_nr, offset;
+
+ if ( addr % 4 )
+ {
+ PT_LOG("unaligned dword access to MSI-X table, addr %016lx\n",
+ addr);
+ return 0;
+ }
+
+ entry_nr = (addr - msix->mmio_base_addr) / 16;
+ offset = ((addr - msix->mmio_base_addr) % 16) / 4;
+
+ return msix->msix_entry[entry_nr].io_mem[offset];
+}
+
+static CPUReadMemoryFunc *pci_msix_read[] = {
+ pci_msix_invalid_read,
+ pci_msix_invalid_read,
+ pci_msix_readl
+};
+
+int add_msix_mapping(struct pt_dev *dev, int bar_index)
+{
+ if ( !(dev->msix && dev->msix->bar_index == bar_index) )
+ return 0;
+
+ return xc_domain_memory_mapping(xc_handle, domid,
+ dev->msix->mmio_base_addr >> XC_PAGE_SHIFT,
+ (dev->bases[bar_index].access.maddr
+ + dev->msix->table_off) >> XC_PAGE_SHIFT,
+ (dev->msix->total_entries * 16
+ + XC_PAGE_SIZE -1) >> XC_PAGE_SHIFT,
+ DPCI_ADD_MAPPING);
+}
+
+int remove_msix_mapping(struct pt_dev *dev, int bar_index)
+{
+ if ( !(dev->msix && dev->msix->bar_index == bar_index) )
+ return 0;
+
+ dev->msix->mmio_base_addr = dev->bases[bar_index].e_physbase
+ + dev->msix->table_off;
+
+ cpu_register_physical_memory(dev->msix->mmio_base_addr,
+ dev->msix->total_entries * 16,
+ dev->msix->mmio_index);
+
+ return xc_domain_memory_mapping(xc_handle, domid,
+ dev->msix->mmio_base_addr >> XC_PAGE_SHIFT,
+ (dev->bases[bar_index].access.maddr
+ + dev->msix->table_off) >> XC_PAGE_SHIFT,
+ (dev->msix->total_entries * 16
+ + XC_PAGE_SIZE -1) >> XC_PAGE_SHIFT,
+ DPCI_REMOVE_MAPPING);
+}
+
+int pt_msix_init(struct pt_dev *dev, int pos)
+{
+ uint8_t id;
+ uint16_t flags, control;
+ int i, total_entries, table_off, bar_index;
+ u64 bar_base;
+ struct pci_dev *pd = dev->pci_dev;
+
+ id = pci_read_byte(pd, pos + PCI_CAP_LIST_ID);
+
+ if ( id != PCI_CAP_ID_MSIX )
+ {
+ PT_LOG("error id %x pos %x\n", id, pos);
+ return -1;
+ }
+
+ control = pci_read_word(pd, pos + 2);
+ total_entries = control & 0x7ff;
+ total_entries += 1;
+
+ dev->msix = malloc(sizeof(struct pt_msix_info)
+ + total_entries*sizeof(struct msix_entry_info));
+ if ( !dev->msix )
+ {
+ PT_LOG("error allocation pt_msix_info\n");
+ return -1;
+ }
+ memset(dev->msix, 0, sizeof(struct pt_msix_info)
+ + total_entries*sizeof(struct msix_entry_info));
+ dev->msix->total_entries = total_entries;
+ dev->msix->offset = pos;
+ for ( i = 0; i < total_entries; i++ )
+ dev->msix->msix_entry[i].pirq = -1;
+
+ dev->msix->mmio_index =
+ cpu_register_io_memory(0, pci_msix_read, pci_msix_write, dev);
+
+ flags = pci_read_word(pd, pos + PCI_MSI_FLAGS);
+ if ( flags & PCI_MSIX_ENABLE )
+ {
+ PT_LOG("MSIX enabled already, disable first\n");
+ pci_write_word(pd, pos + PCI_MSI_FLAGS, flags & ~PCI_MSIX_ENABLE);
+ *(uint16_t *)&dev->dev.config[pos + PCI_MSI_FLAGS]
+ = flags & ~(PCI_MSIX_ENABLE | PCI_MSIX_MASK);
+ }
+
+ table_off = pci_read_long(pd, pos + PCI_MSIX_TABLE);
+ bar_index = dev->msix->bar_index = table_off & PCI_MSIX_BIR;
+ table_off &= table_off & ~PCI_MSIX_BIR;
+ bar_base = pci_read_long(pd, 0x10 + 4 * bar_index);
+ if ( (bar_base & 0x6) == 0x4 )
+ {
+ bar_base &= ~0xf;
+ bar_base += (u64)pci_read_long(pd, 0x10 + 4 * (bar_index + 1)) << 32;
+ }
+ PT_LOG("get MSI-X table bar base %lx\n", bar_base);
+
+ dev->msix->fd = open("/dev/mem", O_RDWR);
+ dev->msix->phys_iomem_base = mmap(0, total_entries * 16,
+ PROT_WRITE | PROT_READ, MAP_SHARED | MAP_LOCKED,
+ dev->msix->fd, bar_base + table_off);
+ PT_LOG("mapping physical MSI-X table to %lx\n",
+ (unsigned long)dev->msix->phys_iomem_base);
+ return 0;
+}
+
+static int pt_msix_enable(struct pt_dev *d, int enable)
+{
+ uint16_t ctrl;
+ struct pci_dev *pd = d->pci_dev;
+
+ if ( !pd )
+ return -1;
+
+ ctrl = pci_read_word(pd, d->msix->offset + PCI_MSI_FLAGS);
+ if ( enable )
+ ctrl |= PCI_MSIX_ENABLE;
+ else
+ ctrl &= ~PCI_MSIX_ENABLE;
+ pci_write_word(pd, d->msix->offset + PCI_MSI_FLAGS, ctrl);
+ d->msix->enabled = !!enable;
+
+ return 0;
+}
+
+static int pt_msix_func_mask(struct pt_dev *d, int mask)
+{
+ uint16_t ctrl;
+ struct pci_dev *pd = d->pci_dev;
+
+ if ( !pd )
+ return -1;
+
+ ctrl = pci_read_word(pd, d->msix->offset + PCI_MSI_FLAGS);
+
+ if ( mask )
+ ctrl |= PCI_MSIX_MASK;
+ else
+ ctrl &= ~PCI_MSIX_MASK;
+
+ pci_write_word(pd, d->msix->offset + PCI_MSI_FLAGS, ctrl);
+ return 0;
+}
+
+static int pt_msix_control_update(struct pt_dev *d)
+{
+ PCIDevice *pd = (PCIDevice *)d;
+ uint16_t ctrl = *(uint16_t *)(&pd->config[d->msix->offset + 2]);
+
+ if ( ctrl & PCI_MSIX_ENABLE && !(ctrl & PCI_MSIX_MASK ) )
+ pt_msix_update(d);
+
+ pt_msix_func_mask(d, ctrl & PCI_MSIX_MASK);
+ pt_msix_enable(d, ctrl & PCI_MSIX_ENABLE);
+
+ return 0;
+}
+
+int pt_msix_write(struct pt_dev *d, uint32_t addr, uint32_t val, uint32_t len)
+{
+ struct pci_dev *pd;
+ int i, cur = addr;
+ uint8_t value;
+ PCIDevice *dev = (PCIDevice *)d;
+
+ if ( !d || !d->msix )
+ return 0;
+
+ if ( (addr >= (d->msix->offset + 4) ) ||
+ (addr + len) < d->msix->offset)
+ return 0;
+
+ PT_LOG("addr %x val %x len %x offset %x\n",
+ addr, val, len, d->msix->offset);
+
+ pd = d->pci_dev;
+
+ for ( i = 0; i < len; i++, cur++ )
+ {
+ uint8_t orig_value;
+
+ if ( cur != d->msix->offset + 3 )
+ continue;
+
+ value = (val >> (i * 8)) & 0xff;
+
+ orig_value = pci_read_byte(pd, cur);
+ value = (orig_value & ~PT_MSIX_CTRL_WR_MASK_HI) |
+ (value & PT_MSIX_CTRL_WR_MASK_HI);
+ dev->config[cur] = value;
+ pt_msix_control_update(d);
+ return 1;
+ }
+
+ return 0;
+}
+
+int pt_msix_read(struct pt_dev *d, int addr, int len, uint32_t *val)
+{
+ int e_addr = addr, e_len = len, offset = 0, i;
+ uint8_t e_val = 0;
+ PCIDevice *pd = (PCIDevice *)d;
+
+ if ( !d || !d->msix )
+ return 0;
+
+ if ( (addr > (d->msix->offset + 3) ) ||
+ (addr + len) <= d->msix->offset )
+ return 0;
+
+ if ( (addr + len ) > (d->msix->offset + 3) )
+ e_len -= addr + len - d->msix->offset - 3;
+
+ if ( addr < d->msix->offset )
+ {
+ e_addr = d->msix->offset;
+ offset = d->msix->offset - addr;
+ e_len -= offset;
+ }
+
+ for ( i = 0; i < e_len; i++ )
+ {
+ e_val = *(uint8_t *)(&pd->config[e_addr] + i);
+ *val &= ~(0xff << ( (offset + i) * 8));
+ *val |= (e_val << ( (offset + i) * 8));
+ }
+
+ PT_LOG("addr %x len %x val %x offset %x\n",
+ addr, len, *val, d->msix->offset);
+
+ return e_len;
+}
+
diff -r 5c00188dd159 -r 4afc6023e8ec tools/ioemu/hw/pt-msi.h
--- a/tools/ioemu/hw/pt-msi.h Mon May 12 10:06:40 2008 +0100
+++ b/tools/ioemu/hw/pt-msi.h Mon May 12 10:07:26 2008 +0100
@@ -62,4 +62,19 @@ int
int
pt_msi_read(struct pt_dev *d, int addr, int len, uint32_t *val);
+int
+remove_msix_mapping(struct pt_dev *dev, int bar_index);
+
+int
+add_msix_mapping(struct pt_dev *dev, int bar_index);
+
+int
+pt_msix_init(struct pt_dev *dev, int pos);
+
+int
+pt_msix_write(struct pt_dev *d, uint32_t addr, uint32_t val, uint32_t len);
+
+int
+pt_msix_read(struct pt_dev *d, int addr, int len, uint32_t *val);
+
#endif
diff -r 5c00188dd159 -r 4afc6023e8ec tools/libxc/xc_physdev.c
--- a/tools/libxc/xc_physdev.c Mon May 12 10:06:40 2008 +0100
+++ b/tools/libxc/xc_physdev.c Mon May 12 10:07:26 2008 +0100
@@ -52,6 +52,7 @@ int xc_physdev_map_pirq_msi(int xc_handl
int *pirq,
int devfn,
int bus,
+ int entry_nr,
int msi_type)
{
int rc;
@@ -66,6 +67,7 @@ int xc_physdev_map_pirq_msi(int xc_handl
map.pirq = *pirq;
map.msi_info.devfn = devfn;
map.msi_info.bus = bus;
+ map.msi_info.entry_nr = entry_nr;
map.msi_info.msi = msi_type;
rc = do_physdev_op(xc_handle, PHYSDEVOP_map_pirq, &map);
diff -r 5c00188dd159 -r 4afc6023e8ec tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Mon May 12 10:06:40 2008 +0100
+++ b/tools/libxc/xenctrl.h Mon May 12 10:07:26 2008 +0100
@@ -859,6 +859,7 @@ int xc_physdev_map_pirq_msi(int xc_handl
int *pirq,
int devfn,
int bus,
+ int entry_nr,
int msi_type);
int xc_physdev_unmap_pirq(int xc_handle,
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|