WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH V4 10/10] Introduce Xen PCI Passthrough, MSI (3/3)

To: QEMU-devel <qemu-devel@xxxxxxxxxx>, Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH V4 10/10] Introduce Xen PCI Passthrough, MSI (3/3)
From: Anthony PERARD <anthony.perard@xxxxxxxxxx>
Date: Thu, 17 Nov 2011 15:17:13 +0000
Cc: Anthony PERARD <anthony.perard@xxxxxxxxxx>, Xen Devel <xen-devel@xxxxxxxxxxxxxxxxxxx>, Shan Haitao <haitao.shan@xxxxxxxxx>, Jiang Yunhong <yunhong.jiang@xxxxxxxxx>
Delivery-date: Thu, 17 Nov 2011 07:57:00 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <1321543033-22090-1-git-send-email-anthony.perard@xxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <1321543033-22090-1-git-send-email-anthony.perard@xxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
From: Jiang Yunhong <yunhong.jiang@xxxxxxxxx>

A more complete history can be found here:
git://xenbits.xensource.com/qemu-xen-unstable.git

Signed-off-by: Jiang Yunhong <yunhong.jiang@xxxxxxxxx>
Signed-off-by: Shan Haitao <haitao.shan@xxxxxxxxx>
Signed-off-by: Anthony PERARD <anthony.perard@xxxxxxxxxx>
---
 Makefile.target                      |    1 +
 hw/apic-msidef.h                     |    2 +
 hw/xen_pci_passthrough.c             |   60 +++-
 hw/xen_pci_passthrough.h             |   55 +++
 hw/xen_pci_passthrough_config_init.c |  505 +++++++++++++++++++++++++-
 hw/xen_pci_passthrough_msi.c         |  678 ++++++++++++++++++++++++++++++++++
 6 files changed, 1294 insertions(+), 7 deletions(-)
 create mode 100644 hw/xen_pci_passthrough_msi.c

diff --git a/Makefile.target b/Makefile.target
index 33435a3..81cff70 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -223,6 +223,7 @@ obj-i386-$(CONFIG_XEN) += xen_platform.o
 obj-i386-$(CONFIG_XEN_PCI_PASSTHROUGH) += host-pci-device.o
 obj-i386-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen_pci_passthrough.o
 obj-i386-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen_pci_passthrough_config_init.o
+obj-i386-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen_pci_passthrough_msi.o
 
 # Inter-VM PCI shared memory
 CONFIG_IVSHMEM =
diff --git a/hw/apic-msidef.h b/hw/apic-msidef.h
index 3182f0b..6e2eb71 100644
--- a/hw/apic-msidef.h
+++ b/hw/apic-msidef.h
@@ -22,6 +22,8 @@
 
 #define MSI_ADDR_DEST_MODE_SHIFT        2
 
+#define MSI_ADDR_REDIRECTION_SHIFT      3
+
 #define MSI_ADDR_DEST_ID_SHIFT          12
 #define  MSI_ADDR_DEST_ID_MASK          0x00ffff0
 
diff --git a/hw/xen_pci_passthrough.c b/hw/xen_pci_passthrough.c
index c816ed5..cd7e3c7 100644
--- a/hw/xen_pci_passthrough.c
+++ b/hw/xen_pci_passthrough.c
@@ -38,6 +38,39 @@
  *     Write '1'
  *       <ptdev->msi_trans_en is false>
  *         - Set real bit to '1'.
+ *
+ * MSI-INTx translation.
+ *   Initialize(xc_physdev_map_pirq_msi/pt_msi_setup)
+ *     Bind MSI-INTx(xc_domain_bind_pt_irq)
+ *       <fail>
+ *         - Unmap MSI.
+ *           <success>
+ *             - Set dev->msi->pirq to '-1'.
+ *           <fail>
+ *             - Do nothing.
+ *
+ *   Write to Interrupt Disable bit by guest software(pt_cmd_reg_write)
+ *     Write '0'
+ *       <ptdev->msi_trans_en is true>
+ *         - Set MSI Enable bit to '1'.
+ *
+ *     Write '1'
+ *       <ptdev->msi_trans_en is true>
+ *         - Set MSI Enable bit to '0'.
+ *
+ * MSI interrupt:
+ *   Initialize MSI register(pt_msi_setup, pt_msi_update)
+ *     Bind MSI(xc_domain_update_msi_irq)
+ *       <fail>
+ *         - Unmap MSI.
+ *         - Set dev->msi->pirq to '-1'.
+ *
+ * MSI-X interrupt:
+ *   Initialize MSI-X register(pt_msix_update_one)
+ *     Bind MSI-X(xc_domain_update_msi_irq)
+ *       <fail>
+ *         - Unmap MSI-X.
+ *         - Set entry->pirq to '-1'.
  */
 
 #include <sys/ioctl.h>
@@ -389,6 +422,7 @@ static void pt_iomem_map(XenPCIPassthroughState *s, int i,
     }
 
     if (!first_map && old_ebase != PT_PCI_BAR_UNMAPPED) {
+        pt_add_msix_mapping(s, i);
         /* Remove old mapping */
         memory_region_del_subregion(r->address_space,
                                     r->memory);
@@ -417,6 +451,15 @@ static void pt_iomem_map(XenPCIPassthroughState *s, int i,
         if (ret != 0) {
             PT_ERR(&s->dev, "create new mapping failed!\n");
         }
+
+        ret = pt_remove_msix_mapping(s, i);
+        if (ret != 0) {
+            PT_ERR(&s->dev, "Remove MSI-X MMIO mapping failed!\n");
+        }
+
+        if (old_ebase != e_phys && old_ebase != -1) {
+            pt_msix_update_remap(s, i);
+        }
     }
 }
 
@@ -744,6 +787,9 @@ static int pt_initfn(PCIDevice *pcidev)
         mapped_machine_irq[machine_irq]++;
     }
 
+    /* setup MSI-INTx translation if support */
+    rc = pt_enable_msi_translate(s);
+
     /* bind machine_irq to device */
     if (rc < 0 && machine_irq != 0) {
         uint8_t e_device = PCI_SLOT(s->dev.devfn);
@@ -773,7 +819,8 @@ static int pt_initfn(PCIDevice *pcidev)
 
 out:
     PT_LOG(pcidev, "Real physical device %02x:%02x.%x registered successfuly!"
-           "\nIRQ type = %s\n", bus, slot, func, "INTx");
+           "\nIRQ type = %s\n", bus, slot, func,
+           s->msi_trans_en ? "MSI-INTx" : "INTx");
 
     return 0;
 }
@@ -790,7 +837,7 @@ static int pt_unregister_device(PCIDevice *pcidev)
     e_intx = pci_intx(s);
     machine_irq = s->machine_irq;
 
-    if (machine_irq) {
+    if (s->msi_trans_en == 0 && machine_irq) {
         rc = xc_domain_unbind_pt_irq(xen_xc, xen_domid, machine_irq,
                                      PT_IRQ_TYPE_PCI, 0, e_device, e_intx, 0);
         if (rc < 0) {
@@ -798,6 +845,13 @@ static int pt_unregister_device(PCIDevice *pcidev)
         }
     }
 
+    if (s->msi) {
+        pt_msi_disable(s);
+    }
+    if (s->msix) {
+        pt_msix_disable(s);
+    }
+
     if (machine_irq) {
         mapped_machine_irq[machine_irq]--;
 
@@ -832,6 +886,8 @@ static PCIDeviceInfo xen_pci_passthrough = {
     .is_express = 0,
     .qdev.props = (Property[]) {
         DEFINE_PROP_STRING("hostaddr", XenPCIPassthroughState, hostaddr),
+        DEFINE_PROP_BIT("msitranslate", XenPCIPassthroughState, msi_trans_cap,
+                        0, true),
         DEFINE_PROP_BIT("power-mgmt", XenPCIPassthroughState, power_mgmt,
                         0, false),
         DEFINE_PROP_END_OF_LIST(),
diff --git a/hw/xen_pci_passthrough.h b/hw/xen_pci_passthrough.h
index 110325c..acbbab5 100644
--- a/hw/xen_pci_passthrough.h
+++ b/hw/xen_pci_passthrough.h
@@ -74,6 +74,10 @@ typedef int (*conf_byte_restore)
 #define PT_PCI_BAR_UNMAPPED (-1)
 #define PT_UNASSIGNED_PIRQ (-1)
 
+/* MSI-X */
+#define PT_MSI_FLAG_UNINIT 0x1000
+#define PT_MSI_FLAG_MAPPED 0x2000
+
 
 typedef enum {
     GRP_TYPE_HARDWIRED = 0,                     /* 0 Hardwired reg group */
@@ -177,6 +181,34 @@ typedef struct XenPTRegGroup {
 } XenPTRegGroup;
 
 
+typedef struct XenPTMSI {
+    uint32_t flags;
+    uint32_t ctrl_offset; /* saved control offset */
+    int pirq;          /* guest pirq corresponding */
+    uint32_t addr_lo;  /* guest message address */
+    uint32_t addr_hi;  /* guest message upper address */
+    uint16_t data;     /* guest message data */
+} XenPTMSI;
+
+typedef struct XenMSIXEntry {
+    int pirq;        /* -1 means unmapped */
+    int flags;       /* flags indicting whether MSI ADDR or DATA is updated */
+    uint32_t io_mem[4];
+} XenMSIXEntry;
+typedef struct XenPTMSIX {
+    uint32_t ctrl_offset;
+    int enabled;
+    int total_entries;
+    int bar_index;
+    uint64_t table_base;
+    uint32_t table_off;
+    uint32_t table_offset_adjust; /* page align mmap */
+    uint64_t mmio_base_addr;
+    int mmio_index;
+    void *phys_iomem_base;
+    XenMSIXEntry msix_entry[0];
+} XenPTMSIX;
+
 typedef struct XenPTPM {
     QEMUTimer *pm_timer;  /* QEMUTimer struct */
     int no_soft_reset;    /* No Soft Reset flags */
@@ -200,6 +232,13 @@ struct XenPCIPassthroughState {
 
     uint32_t machine_irq;
 
+    XenPTMSI *msi;
+    XenPTMSIX *msix;
+
+    /* Physical MSI to guest INTx translation when possible */
+    uint32_t msi_trans_cap;
+    bool msi_trans_en;
+
     uint32_t power_mgmt;
     XenPTPM *pm_state;
 
@@ -279,4 +318,20 @@ static inline uint8_t pci_intx(XenPCIPassthroughState *s)
     return r_val;
 }
 
+/* MSI/MSI-X */
+void pt_msi_set_enable(XenPCIPassthroughState *s, int en);
+int pt_msi_setup(XenPCIPassthroughState *s);
+int pt_msi_update(XenPCIPassthroughState *d);
+void pt_msi_disable(XenPCIPassthroughState *s);
+int pt_enable_msi_translate(XenPCIPassthroughState *s);
+void pt_disable_msi_translate(XenPCIPassthroughState *s);
+
+int pt_msix_init(XenPCIPassthroughState *s, int pos);
+void pt_msix_delete(XenPCIPassthroughState *s);
+int pt_msix_update(XenPCIPassthroughState *s);
+int pt_msix_update_remap(XenPCIPassthroughState *s, int bar_index);
+void pt_msix_disable(XenPCIPassthroughState *s);
+int pt_add_msix_mapping(XenPCIPassthroughState *s, int bar_index);
+int pt_remove_msix_mapping(XenPCIPassthroughState *s, int bar_index);
+
 #endif /* !QEMU_HW_XEN_PCI_PASSTHROUGH_H */
diff --git a/hw/xen_pci_passthrough_config_init.c 
b/hw/xen_pci_passthrough_config_init.c
index ae64544..28523f1 100644
--- a/hw/xen_pci_passthrough_config_init.c
+++ b/hw/xen_pci_passthrough_config_init.c
@@ -398,11 +398,19 @@ static int pt_cmd_reg_write(XenPCIPassthroughState *s, 
XenPTReg *cfg_entry,
     throughable_mask = ~emu_mask & valid_mask;
 
     if (*value & PCI_COMMAND_INTX_DISABLE) {
-        throughable_mask |= PCI_COMMAND_INTX_DISABLE;
-    } else {
-        if (s->machine_irq) {
+        if (s->msi_trans_en) {
+            pt_msi_set_enable(s, 0);
+        } else {
             throughable_mask |= PCI_COMMAND_INTX_DISABLE;
         }
+    } else {
+        if (s->msi_trans_en) {
+            pt_msi_set_enable(s, 1);
+        } else {
+            if (s->machine_irq) {
+                throughable_mask |= PCI_COMMAND_INTX_DISABLE;
+            }
+        }
     }
 
     *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask);
@@ -1282,13 +1290,21 @@ static void 
pt_reset_interrupt_and_io_mapping(XenPCIPassthroughState *s)
     e_device = PCI_SLOT(s->dev.devfn);
     e_intx = pci_intx(s);
 
-    if (s->machine_irq) {
+    if (s->msi_trans_en == 0 && s->machine_irq) {
         if (xc_domain_unbind_pt_irq(xen_xc, xen_domid, s->machine_irq,
                                     PT_IRQ_TYPE_PCI, 0, e_device, e_intx, 0)) {
             PT_ERR(d, "Unbinding of interrupt failed!\n");
         }
     }
 
+    /* disable MSI/MSI-X and MSI-INTx translation */
+    if (s->msi) {
+        pt_msi_disable(s);
+    }
+    if (s->msix) {
+        pt_msix_disable(s);
+    }
+
     /* clear all virtual region address */
     for (i = 0; i < PCI_NUM_REGIONS; i++) {
         r = &d->io_regions[i];
@@ -1536,6 +1552,415 @@ static XenPTRegInfo pt_emu_reg_pm_tbl[] = {
     },
 };
 
+/********************************
+ * MSI Capability
+ */
+
+/* Message Control register */
+static int pt_msgctrl_reg_init(XenPCIPassthroughState *s,
+                               XenPTRegInfo *reg, uint32_t real_offset,
+                               uint32_t *data)
+{
+    PCIDevice *d = &s->dev;
+    uint16_t reg_field = 0;
+
+    /* use I/O device register's value as initial value */
+    reg_field = pci_get_word(d->config + real_offset);
+
+    if (reg_field & PCI_MSI_FLAGS_ENABLE) {
+        PT_LOG(&s->dev, "MSI already enabled, disable it first\n");
+        host_pci_set_word(s->real_device, real_offset,
+                          reg_field & ~PCI_MSI_FLAGS_ENABLE);
+    }
+    s->msi->flags |= reg_field | PT_MSI_FLAG_UNINIT;
+    s->msi->ctrl_offset = real_offset;
+
+    *data = reg->init_val;
+    return 0;
+}
+static int pt_msgctrl_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry,
+                                uint16_t *value, uint16_t dev_value,
+                                uint16_t valid_mask)
+{
+    XenPTRegInfo *reg = cfg_entry->reg;
+    uint16_t writable_mask = 0;
+    uint16_t throughable_mask = 0;
+    uint16_t val;
+
+    /* Currently no support for multi-vector */
+    if (*value & PCI_MSI_FLAGS_QSIZE) {
+        PT_WARN(&s->dev, "try to set more than 1 vector ctrl %x\n", *value);
+    }
+
+    /* modify emulate register */
+    writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask;
+    cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask);
+    /* update the msi_info too */
+    s->msi->flags |= cfg_entry->data &
+        ~(PT_MSI_FLAG_UNINIT | PT_MSI_FLAG_MAPPED | PCI_MSI_FLAGS_ENABLE);
+
+    /* create value for writing to I/O device register */
+    val = *value;
+    throughable_mask = ~reg->emu_mask & valid_mask;
+    *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask);
+
+    /* update MSI */
+    if (val & PCI_MSI_FLAGS_ENABLE) {
+        /* setup MSI pirq for the first time */
+        if (s->msi->flags & PT_MSI_FLAG_UNINIT) {
+            if (s->msi_trans_en) {
+                PT_LOG(&s->dev,
+                       "guest enabling MSI, disable MSI-INTx translation\n");
+                pt_disable_msi_translate(s);
+            } else {
+                /* Init physical one */
+                PT_LOG(&s->dev, "setup MSI\n");
+                if (pt_msi_setup(s)) {
+                    /* We do not broadcast the error to the framework code, so
+                     * that MSI errors are contained in MSI emulation code and
+                     * QEMU can go on running.
+                     * Guest MSI would be actually not working.
+                     */
+                    *value &= ~PCI_MSI_FLAGS_ENABLE;
+                    PT_WARN(&s->dev, "Can not map MSI.\n");
+                    return 0;
+                }
+            }
+            if (pt_msi_update(s)) {
+                *value &= ~PCI_MSI_FLAGS_ENABLE;
+                PT_WARN(&s->dev, "Can not bind MSI\n");
+                return 0;
+            }
+            s->msi->flags &= ~PT_MSI_FLAG_UNINIT;
+            s->msi->flags |= PT_MSI_FLAG_MAPPED;
+        }
+        s->msi->flags |= PCI_MSI_FLAGS_ENABLE;
+    } else {
+        s->msi->flags &= ~PCI_MSI_FLAGS_ENABLE;
+    }
+
+    /* pass through MSI_ENABLE bit when no MSI-INTx translation */
+    if (!s->msi_trans_en) {
+        *value &= ~PCI_MSI_FLAGS_ENABLE;
+        *value |= val & PCI_MSI_FLAGS_ENABLE;
+    }
+
+    return 0;
+}
+
+/* initialize Message Upper Address register */
+static int pt_msgaddr64_reg_init(XenPCIPassthroughState *ptdev,
+                                 XenPTRegInfo *reg, uint32_t real_offset,
+                                 uint32_t *data)
+{
+    /* no need to initialize in case of 32 bit type */
+    if (!(ptdev->msi->flags & PCI_MSI_FLAGS_64BIT)) {
+        *data = PT_INVALID_REG;
+    } else {
+        *data = reg->init_val;
+    }
+
+    return 0;
+}
+/* this function will be called twice (for 32 bit and 64 bit type) */
+/* initialize Message Data register */
+static int pt_msgdata_reg_init(XenPCIPassthroughState *ptdev,
+                               XenPTRegInfo *reg, uint32_t real_offset,
+                               uint32_t *data)
+{
+    uint32_t flags = ptdev->msi->flags;
+    uint32_t offset = reg->offset;
+
+    /* check the offset whether matches the type or not */
+    if (((offset == PCI_MSI_DATA_64) &&  (flags & PCI_MSI_FLAGS_64BIT)) ||
+        ((offset == PCI_MSI_DATA_32) && !(flags & PCI_MSI_FLAGS_64BIT))) {
+        *data = reg->init_val;
+    } else {
+        *data = PT_INVALID_REG;
+    }
+    return 0;
+}
+
+/* write Message Address register */
+static int pt_msgaddr32_reg_write(XenPCIPassthroughState *s,
+                                  XenPTReg *cfg_entry, uint32_t *value,
+                                  uint32_t dev_value, uint32_t valid_mask)
+{
+    XenPTRegInfo *reg = cfg_entry->reg;
+    uint32_t writable_mask = 0;
+    uint32_t throughable_mask = 0;
+    uint32_t old_addr = cfg_entry->data;
+
+    /* modify emulate register */
+    writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask;
+    cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask);
+    /* update the msi_info too */
+    s->msi->addr_lo = cfg_entry->data;
+
+    /* create value for writing to I/O device register */
+    throughable_mask = ~reg->emu_mask & valid_mask;
+    *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask);
+
+    /* update MSI */
+    if (cfg_entry->data != old_addr) {
+        if (s->msi->flags & PT_MSI_FLAG_MAPPED) {
+            pt_msi_update(s);
+        }
+    }
+
+    return 0;
+}
+/* write Message Upper Address register */
+static int pt_msgaddr64_reg_write(XenPCIPassthroughState *s,
+                                  XenPTReg *cfg_entry, uint32_t *value,
+                                  uint32_t dev_value, uint32_t valid_mask)
+{
+    XenPTRegInfo *reg = cfg_entry->reg;
+    uint32_t writable_mask = 0;
+    uint32_t throughable_mask = 0;
+    uint32_t old_addr = cfg_entry->data;
+
+    /* check whether the type is 64 bit or not */
+    if (!(s->msi->flags & PCI_MSI_FLAGS_64BIT)) {
+        PT_ERR(&s->dev, "Write to the Upper Address without 64 bit support\n");
+        return -1;
+    }
+
+    /* modify emulate register */
+    writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask;
+    cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask);
+    /* update the msi_info too */
+    s->msi->addr_hi = cfg_entry->data;
+
+    /* create value for writing to I/O device register */
+    throughable_mask = ~reg->emu_mask & valid_mask;
+    *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask);
+
+    /* update MSI */
+    if (cfg_entry->data != old_addr) {
+        if (s->msi->flags & PT_MSI_FLAG_MAPPED) {
+            pt_msi_update(s);
+        }
+    }
+
+    return 0;
+}
+
+
+/* this function will be called twice (for 32 bit and 64 bit type) */
+/* write Message Data register */
+static int pt_msgdata_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry,
+                                uint16_t *value, uint16_t dev_value,
+                                uint16_t valid_mask)
+{
+    XenPTRegInfo *reg = cfg_entry->reg;
+    uint16_t writable_mask = 0;
+    uint16_t throughable_mask = 0;
+    uint16_t old_data = cfg_entry->data;
+    uint32_t flags = s->msi->flags;
+    uint32_t offset = reg->offset;
+
+    /* check the offset whether matches the type or not */
+    if (!((offset == PCI_MSI_DATA_64) &&  (flags & PCI_MSI_FLAGS_64BIT)) &&
+        !((offset == PCI_MSI_DATA_32) && !(flags & PCI_MSI_FLAGS_64BIT))) {
+        /* exit I/O emulator */
+        PT_ERR(&s->dev, "the offset does not match the 32/64 bit type!\n");
+        return -1;
+    }
+
+    /* modify emulate register */
+    writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask;
+    cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask);
+    /* update the msi_info too */
+    s->msi->data = cfg_entry->data;
+
+    /* create value for writing to I/O device register */
+    throughable_mask = ~reg->emu_mask & valid_mask;
+    *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask);
+
+    /* update MSI */
+    if (cfg_entry->data != old_data) {
+        if (flags & PT_MSI_FLAG_MAPPED) {
+            pt_msi_update(s);
+        }
+    }
+
+    return 0;
+}
+
+/* MSI Capability Structure reg static infomation table */
+static XenPTRegInfo pt_emu_reg_msi_tbl[] = {
+    /* Next Pointer reg */
+    {
+        .offset     = PCI_CAP_LIST_NEXT,
+        .size       = 1,
+        .init_val   = 0x00,
+        .ro_mask    = 0xFF,
+        .emu_mask   = 0xFF,
+        .init       = pt_ptr_reg_init,
+        .u.b.read   = pt_byte_reg_read,
+        .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = NULL,
+    },
+    /* Message Control reg */
+    {
+        .offset     = PCI_MSI_FLAGS,
+        .size       = 2,
+        .init_val   = 0x0000,
+        .ro_mask    = 0xFF8E,
+        .emu_mask   = 0x007F,
+        .init       = pt_msgctrl_reg_init,
+        .u.w.read   = pt_word_reg_read,
+        .u.w.write  = pt_msgctrl_reg_write,
+        .u.w.restore  = NULL,
+    },
+    /* Message Address reg */
+    {
+        .offset     = PCI_MSI_ADDRESS_LO,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .ro_mask    = 0x00000003,
+        .emu_mask   = 0xFFFFFFFF,
+        .no_wb      = 1,
+        .init       = pt_common_reg_init,
+        .u.dw.read  = pt_long_reg_read,
+        .u.dw.write = pt_msgaddr32_reg_write,
+        .u.dw.restore = NULL,
+    },
+    /* Message Upper Address reg (if PCI_MSI_FLAGS_64BIT set) */
+    {
+        .offset     = PCI_MSI_ADDRESS_HI,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .ro_mask    = 0x00000000,
+        .emu_mask   = 0xFFFFFFFF,
+        .no_wb      = 1,
+        .init       = pt_msgaddr64_reg_init,
+        .u.dw.read  = pt_long_reg_read,
+        .u.dw.write = pt_msgaddr64_reg_write,
+        .u.dw.restore = NULL,
+    },
+    /* Message Data reg (16 bits of data for 32-bit devices) */
+    {
+        .offset     = PCI_MSI_DATA_32,
+        .size       = 2,
+        .init_val   = 0x0000,
+        .ro_mask    = 0x0000,
+        .emu_mask   = 0xFFFF,
+        .no_wb      = 1,
+        .init       = pt_msgdata_reg_init,
+        .u.w.read   = pt_word_reg_read,
+        .u.w.write  = pt_msgdata_reg_write,
+        .u.w.restore  = NULL,
+    },
+    /* Message Data reg (16 bits of data for 64-bit devices) */
+    {
+        .offset     = PCI_MSI_DATA_64,
+        .size       = 2,
+        .init_val   = 0x0000,
+        .ro_mask    = 0x0000,
+        .emu_mask   = 0xFFFF,
+        .no_wb      = 1,
+        .init       = pt_msgdata_reg_init,
+        .u.w.read   = pt_word_reg_read,
+        .u.w.write  = pt_msgdata_reg_write,
+        .u.w.restore  = NULL,
+    },
+    {
+        .size = 0,
+    },
+};
+
+
+/**************************************
+ * MSI-X Capability
+ */
+
+/* Message Control register for MSI-X */
+static int pt_msixctrl_reg_init(XenPCIPassthroughState *s,
+                                XenPTRegInfo *reg, uint32_t real_offset,
+                                uint32_t *data)
+{
+    PCIDevice *d = &s->dev;
+    uint16_t reg_field = 0;
+
+    /* use I/O device register's value as initial value */
+    reg_field = pci_get_word(d->config + real_offset);
+
+    if (reg_field & PCI_MSIX_FLAGS_ENABLE) {
+        PT_LOG(d, "MSIX already enabled, disable it first\n");
+        host_pci_set_word(s->real_device, real_offset,
+                          reg_field & ~PCI_MSIX_FLAGS_ENABLE);
+    }
+
+    s->msix->ctrl_offset = real_offset;
+
+    *data = reg->init_val;
+    return 0;
+}
+static int pt_msixctrl_reg_write(XenPCIPassthroughState *s,
+                                 XenPTReg *cfg_entry, uint16_t *value,
+                                 uint16_t dev_value, uint16_t valid_mask)
+{
+    XenPTRegInfo *reg = cfg_entry->reg;
+    uint16_t writable_mask = 0;
+    uint16_t throughable_mask = 0;
+
+    /* modify emulate register */
+    writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask;
+    cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask);
+
+    /* create value for writing to I/O device register */
+    throughable_mask = ~reg->emu_mask & valid_mask;
+    *value = PT_MERGE_VALUE(*value, dev_value, throughable_mask);
+
+    /* update MSI-X */
+    if ((*value & PCI_MSIX_FLAGS_ENABLE)
+        && !(*value & PCI_MSIX_FLAGS_MASKALL)) {
+        if (s->msi_trans_en) {
+            PT_LOG(&s->dev,
+                   "guest enabling MSI-X, disable MSI-INTx translation\n");
+            pt_disable_msi_translate(s);
+        }
+        pt_msix_update(s);
+    }
+
+    s->msix->enabled = !!(*value & PCI_MSIX_FLAGS_ENABLE);
+
+    return 0;
+}
+
+/* MSI-X Capability Structure reg static infomation table */
+static XenPTRegInfo pt_emu_reg_msix_tbl[] = {
+    /* Next Pointer reg */
+    {
+        .offset     = PCI_CAP_LIST_NEXT,
+        .size       = 1,
+        .init_val   = 0x00,
+        .ro_mask    = 0xFF,
+        .emu_mask   = 0xFF,
+        .init       = pt_ptr_reg_init,
+        .u.b.read   = pt_byte_reg_read,
+        .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = NULL,
+    },
+    /* Message Control reg */
+    {
+        .offset     = PCI_MSI_FLAGS,
+        .size       = 2,
+        .init_val   = 0x0000,
+        .ro_mask    = 0x3FFF,
+        .emu_mask   = 0x0000,
+        .init       = pt_msixctrl_reg_init,
+        .u.w.read   = pt_word_reg_read,
+        .u.w.write  = pt_msixctrl_reg_write,
+        .u.w.restore  = NULL,
+    },
+    {
+        .size = 0,
+    },
+};
+
 
 /****************************
  * Capabilities
@@ -1709,6 +2134,49 @@ static int pt_pcie_size_init(XenPCIPassthroughState *s,
     *size = pcie_size;
     return 0;
 }
+/* get MSI Capability Structure register group size */
+static int pt_msi_size_init(XenPCIPassthroughState *s,
+                            const XenPTRegGroupInfo *grp_reg,
+                            uint32_t base_offset, uint8_t *size)
+{
+    PCIDevice *d = &s->dev;
+    uint16_t msg_ctrl = 0;
+    uint8_t msi_size = 0xa;
+
+    msg_ctrl = pci_get_word(d->config + (base_offset + PCI_MSI_FLAGS));
+
+    /* check 64 bit address capable & Per-vector masking capable */
+    if (msg_ctrl & PCI_MSI_FLAGS_64BIT) {
+        msi_size += 4;
+    }
+    if (msg_ctrl & PCI_MSI_FLAGS_MASKBIT) {
+        msi_size += 10;
+    }
+
+    s->msi = g_new0(XenPTMSI, 1);
+    s->msi->pirq = PT_UNASSIGNED_PIRQ;
+
+    *size = msi_size;
+    return 0;
+}
+/* get MSI-X Capability Structure register group size */
+static int pt_msix_size_init(XenPCIPassthroughState *s,
+                             const XenPTRegGroupInfo *grp_reg,
+                             uint32_t base_offset, uint8_t *size)
+{
+    int rc = 0;
+
+    rc = pt_msix_init(s, base_offset);
+
+    if (rc == -1) {
+        PT_ERR(&s->dev, "Internal error: Invalid pt_msix_init.\n");
+        return rc;
+    }
+
+    *size = grp_reg->grp_size;
+    return 0;
+}
+
 
 static const XenPTRegGroupInfo pt_emu_reg_grp_tbl[] = {
     /* Header Type0 reg group */
@@ -1749,6 +2217,14 @@ static const XenPTRegGroupInfo pt_emu_reg_grp_tbl[] = {
         .grp_size   = 0x04,
         .size_init  = pt_reg_grp_size_init,
     },
+    /* MSI Capability Structure reg group */
+    {
+        .grp_id      = PCI_CAP_ID_MSI,
+        .grp_type    = GRP_TYPE_EMU,
+        .grp_size    = 0xFF,
+        .size_init   = pt_msi_size_init,
+        .emu_reg_tbl = pt_emu_reg_msi_tbl,
+    },
     /* PCI-X Capabilities List Item reg group */
     {
         .grp_id     = PCI_CAP_ID_PCIX,
@@ -1793,6 +2269,14 @@ static const XenPTRegGroupInfo pt_emu_reg_grp_tbl[] = {
         .size_init   = pt_pcie_size_init,
         .emu_reg_tbl = pt_emu_reg_pcie_tbl,
     },
+    /* MSI-X Capability Structure reg group */
+    {
+        .grp_id      = PCI_CAP_ID_MSIX,
+        .grp_type    = GRP_TYPE_EMU,
+        .grp_size    = 0x0C,
+        .size_init   = pt_msix_size_init,
+        .emu_reg_tbl = pt_emu_reg_msix_tbl,
+    },
     {
         .grp_size = 0,
     },
@@ -1965,8 +2449,11 @@ static int pt_init_pci_config(XenPCIPassthroughState *s)
         return rc;
     }
 
+    /* setup MSI-INTx translation if support */
+    rc = pt_enable_msi_translate(s);
+
     /* rebind machine_irq to device */
-    if (s->machine_irq != 0) {
+    if (rc < 0 && s->machine_irq != 0) {
         uint8_t e_device = PCI_SLOT(s->dev.devfn);
         uint8_t e_intx = pci_intx(s);
 
@@ -2117,6 +2604,14 @@ void pt_config_delete(XenPCIPassthroughState *s)
     struct XenPTRegGroup *reg_group, *next_grp;
     struct XenPTReg *reg, *next_reg;
 
+    /* free MSI/MSI-X info table */
+    if (s->msix) {
+        pt_msix_delete(s);
+    }
+    if (s->msi) {
+        g_free(s->msi);
+    }
+
     /* free Power Management info table */
     if (s->pm_state) {
         if (s->pm_state->pm_timer) {
diff --git a/hw/xen_pci_passthrough_msi.c b/hw/xen_pci_passthrough_msi.c
new file mode 100644
index 0000000..73d3d9b
--- /dev/null
+++ b/hw/xen_pci_passthrough_msi.c
@@ -0,0 +1,678 @@
+/*
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Jiang Yunhong <yunhong.jiang@xxxxxxxxx>
+ *
+ * This file implements direct PCI assignment to a HVM guest
+ */
+
+#include <sys/mman.h>
+
+#include "xen_backend.h"
+#include "xen_pci_passthrough.h"
+#include "apic-msidef.h"
+
+
+#define AUTO_ASSIGN -1
+
+/* shift count for gflags */
+#define GFLAGS_SHIFT_DEST_ID        0
+#define GFLAGS_SHIFT_RH             8
+#define GFLAGS_SHIFT_DM             9
+#define GLFAGS_SHIFT_DELIV_MODE     12
+#define GLFAGS_SHIFT_TRG_MODE       15
+
+
+/*
+ * Helpers
+ */
+
+static uint32_t get_msi_gflags(uint32_t data, uint64_t addr)
+{
+    uint32_t result = 0;
+    int rh, dm, dest_id, deliv_mode, trig_mode;
+
+    rh = (addr >> MSI_ADDR_REDIRECTION_SHIFT) & 0x1;
+    dm = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
+    dest_id = (addr >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
+    deliv_mode = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7;
+    trig_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
+
+    result = dest_id | (rh << GFLAGS_SHIFT_RH) | (dm << GFLAGS_SHIFT_DM) |
+             (deliv_mode << GLFAGS_SHIFT_DELIV_MODE) |
+             (trig_mode << GLFAGS_SHIFT_TRG_MODE);
+
+    return result;
+}
+
+
+/*
+ * MSI virtualization functions
+ */
+
+void pt_msi_set_enable(XenPCIPassthroughState *s, int en)
+{
+    uint16_t val = 0;
+    uint32_t address = 0;
+
+    PT_LOG(&s->dev, "enable: %i\n", en);
+
+    if (!s->msi) {
+        return;
+    }
+
+    address = s->msi->ctrl_offset;
+    if (!address) {
+        return;
+    }
+
+    host_pci_get_word(s->real_device, address, &val);
+    val &= ~PCI_MSI_FLAGS_ENABLE;
+    val |= en & PCI_MSI_FLAGS_ENABLE;
+    host_pci_set_word(s->real_device, address, val);
+}
+
+/* setup physical msi, but don't enable it */
+int pt_msi_setup(XenPCIPassthroughState *s)
+{
+    int pirq = PT_UNASSIGNED_PIRQ;
+    uint8_t gvec = 0;
+    int rc = 0;
+
+    if (!(s->msi->flags & PT_MSI_FLAG_UNINIT)) {
+        PT_ERR(&s->dev, "Setup physical MSI when it's already initialized.\n");
+        return -1;
+    }
+
+    gvec = s->msi->data & 0xFF;
+    if (!gvec) {
+        /* if gvec is 0, the guest is asking for a particular pirq that
+         * is passed as dest_id */
+        pirq = (s->msi->addr_hi & 0xffffff00) |
+               ((s->msi->addr_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
+        if (!pirq) {
+            /* this probably identifies an misconfiguration of the guest,
+             * try the emulated path */
+            pirq = PT_UNASSIGNED_PIRQ;
+        } else {
+            PT_LOG(&s->dev, "requested pirq: %d\n", pirq);
+        }
+    }
+
+    rc = xc_physdev_map_pirq_msi(xen_xc, xen_domid, AUTO_ASSIGN, &pirq,
+                                 PCI_DEVFN(s->real_device->dev,
+                                           s->real_device->func),
+                                 s->real_device->bus, 0, 0);
+    if (rc) {
+        PT_ERR(&s->dev, "Mapping of MSI failed. (rc: %d,"
+               " assigned device: %02x:%02x.%x)\n", rc,
+               s->real_device->dev, s->real_device->func, s->real_device->bus);
+        return -1;
+    }
+
+    if (pirq < 0) {
+        PT_ERR(&s->dev, "Invalid pirq number.\n");
+        return -1;
+    }
+
+    s->msi->pirq = pirq;
+    PT_LOG(&s->dev, "MSI mapped with pirq %d.\n", pirq);
+
+    return 0;
+}
+
+int pt_msi_update(XenPCIPassthroughState *s)
+{
+    uint8_t gvec = 0;
+    uint32_t gflags = 0;
+    uint64_t addr = 0;
+    int rc = 0;
+
+    /* get vector, address, flags info, etc. */
+    gvec = s->msi->data & 0xFF;
+    addr = (uint64_t)s->msi->addr_hi << 32 | s->msi->addr_lo;
+    gflags = get_msi_gflags(s->msi->data, addr);
+
+    PT_LOG(&s->dev, "Update MSI with pirq %d gvec 0x%x gflags 0x%x\n",
+           s->msi->pirq, gvec, gflags);
+
+    rc = xc_domain_update_msi_irq(xen_xc, xen_domid, gvec,
+                                  s->msi->pirq, gflags, 0);
+    if (rc) {
+        PT_ERR(&s->dev, "Binding of MSI failed. (rc: %d)\n", rc);
+
+        if (xc_physdev_unmap_pirq(xen_xc, xen_domid, s->msi->pirq)) {
+            PT_ERR(&s->dev, "Unmapping of MSI pirq %d failed.\n",
+                   s->msi->pirq);
+        }
+        s->msi->pirq = PT_UNASSIGNED_PIRQ;
+    }
+    return rc;
+}
+
+void pt_msi_disable(XenPCIPassthroughState *s)
+{
+    PCIDevice *d = &s->dev;
+    uint8_t gvec = 0;
+    uint32_t gflags = 0;
+    uint64_t addr = 0;
+    uint8_t e_device = 0;
+    uint8_t e_intx = 0;
+
+    pt_msi_set_enable(s, 0);
+
+    e_device = PCI_SLOT(d->devfn);
+    e_intx = pci_intx(s);
+
+    if (s->msi_trans_en) {
+        if (xc_domain_unbind_pt_irq(xen_xc, xen_domid, s->msi->pirq,
+                                    PT_IRQ_TYPE_MSI_TRANSLATE, 0,
+                                    e_device, e_intx, 0)) {
+            PT_ERR(d, "Unbinding pt irq for MSI-INTx failed!\n");
+            goto out;
+        }
+    } else if (!(s->msi->flags & PT_MSI_FLAG_UNINIT)) {
+        /* get vector, address, flags info, etc. */
+        gvec = s->msi->data & 0xFF;
+        addr = (uint64_t)s->msi->addr_hi << 32 | s->msi->addr_lo;
+        gflags = get_msi_gflags(s->msi->data, addr);
+
+        PT_ERR(&s->dev, "Unbind MSI with pirq %x, gvec %x\n",
+               s->msi->pirq, gvec);
+
+        if (xc_domain_unbind_msi_irq(xen_xc, xen_domid, gvec,
+                                     s->msi->pirq, gflags)) {
+            PT_ERR(&s->dev, "Unbinding of MSI failed. (pirq: %d)\n",
+                   s->msi->pirq);
+            goto out;
+        }
+    }
+
+    if (s->msi->pirq != PT_UNASSIGNED_PIRQ) {
+        PT_LOG(&s->dev, "Unmap msi with pirq %d\n", s->msi->pirq);
+
+        if (xc_physdev_unmap_pirq(xen_xc, xen_domid, s->msi->pirq)) {
+            PT_ERR(&s->dev, "Unmapping of MSI failed. (pirq: %d)\n",
+                   s->msi->pirq);
+            goto out;
+        }
+    }
+
+out:
+    /* clear msi info */
+    s->msi->flags = 0;
+    s->msi->pirq = -1;
+    s->msi_trans_en = 0;
+}
+
+/*
+ * MSI-INTx translation virtualization functions
+ */
+
+int pt_enable_msi_translate(XenPCIPassthroughState *s)
+{
+    uint8_t e_device = 0;
+    uint8_t e_intx = 0;
+
+    if (!(s->msi && s->msi_trans_cap)) {
+        return -1;
+    }
+
+    pt_msi_set_enable(s, 0);
+    s->msi_trans_en = 0;
+
+    if (pt_msi_setup(s)) {
+        PT_ERR(&s->dev, "MSI-INTx translation MSI setup failed, fallback\n");
+        return -1;
+    }
+
+    e_device = PCI_SLOT(s->dev.devfn);
+    /* fix virtual interrupt pin to INTA# */
+    e_intx = pci_intx(s);
+
+    if (xc_domain_bind_pt_irq(xen_xc, xen_domid, s->msi->pirq,
+                              PT_IRQ_TYPE_MSI_TRANSLATE, 0,
+                              e_device, e_intx, 0)) {
+        PT_ERR(&s->dev, "MSI-INTx translation bind failed, fallback\n");
+
+        if (xc_physdev_unmap_pirq(xen_xc, xen_domid, s->msi->pirq)) {
+            PT_ERR(&s->dev, "Unmapping of MSI failed.\n");
+        }
+        s->msi->pirq = PT_UNASSIGNED_PIRQ;
+        return -1;
+    }
+
+    pt_msi_set_enable(s, 1);
+    s->msi_trans_en = 1;
+
+    return 0;
+}
+
+void pt_disable_msi_translate(XenPCIPassthroughState *s)
+{
+    uint8_t e_device = 0;
+    uint8_t e_intx = 0;
+
+    /* MSI_ENABLE bit should be disabed until the new handler is set */
+    pt_msi_set_enable(s, 0);
+
+    e_device = PCI_SLOT(s->dev.devfn);
+    e_intx = pci_intx(s);
+
+    if (xc_domain_unbind_pt_irq(xen_xc, xen_domid, s->msi->pirq,
+                                PT_IRQ_TYPE_MSI_TRANSLATE, 0,
+                                e_device, e_intx, 0)) {
+        PT_ERR(&s->dev, "Unbinding pt irq for MSI-INTx failed!\n");
+    }
+
+    if (s->machine_irq) {
+        if (xc_domain_bind_pt_pci_irq(xen_xc, xen_domid, s->machine_irq,
+                                      0, e_device, e_intx)) {
+            PT_ERR(&s->dev, "Rebinding of interrupt failed!\n");
+        }
+    }
+
+    s->msi_trans_en = 0;
+}
+
+/*
+ * MSI-X virtualization functions
+ */
+
+static void msix_set_enable(XenPCIPassthroughState *s, int en)
+{
+    uint16_t val = 0;
+    uint32_t address = 0;
+
+    if (!s->msix) {
+        return;
+    }
+
+    address = s->msix->ctrl_offset;
+    if (!address) {
+        return;
+    }
+
+    host_pci_get_word(s->real_device, address, &val);
+    val &= ~PCI_MSIX_FLAGS_ENABLE;
+    if (en) {
+        val |= PCI_MSIX_FLAGS_ENABLE;
+    }
+    host_pci_set_word(s->real_device, address, val);
+}
+
+static void mask_physical_msix_entry(XenPCIPassthroughState *s,
+                                     int entry_nr, int mask)
+{
+    void *phys_off;
+
+    phys_off = s->msix->phys_iomem_base + 16 * entry_nr + 12;
+    *(uint32_t *)phys_off = mask;
+}
+
+static int pt_msix_update_one(XenPCIPassthroughState *s, int entry_nr)
+{
+    XenMSIXEntry *entry = &s->msix->msix_entry[entry_nr];
+    int pirq = entry->pirq;
+    int gvec = entry->io_mem[2] & 0xff;
+    uint64_t gaddr = *(uint64_t *)&entry->io_mem[0];
+    uint32_t gflags = get_msi_gflags(entry->io_mem[2], gaddr);
+    int rc;
+
+    if (!entry->flags) {
+        return 0;
+    }
+
+    if (!gvec) {
+        /* if gvec is 0, the guest is asking for a particular pirq that
+         * is passed as dest_id */
+        pirq = ((gaddr >> 32) & 0xffffff00) |
+               (((gaddr & 0xffffffff) >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
+        if (!pirq) {
+            /* this probably identifies an misconfiguration of the guest,
+             * try the emulated path */
+            pirq = PT_UNASSIGNED_PIRQ;
+        } else {
+            PT_LOG(&s->dev, "requested pirq: %d\n", pirq);
+        }
+    }
+
+    /* Check if this entry is already mapped */
+    if (entry->pirq == PT_UNASSIGNED_PIRQ) {
+        rc = xc_physdev_map_pirq_msi(xen_xc, xen_domid, AUTO_ASSIGN, &pirq,
+                                     PCI_DEVFN(s->real_device->dev,
+                                               s->real_device->func),
+                                     s->real_device->bus, entry_nr,
+                                     s->msix->table_base);
+        if (rc) {
+            PT_ERR(&s->dev, "Mapping msix entry %x\n", entry_nr);
+            return rc;
+        }
+        entry->pirq = pirq;
+    }
+
+    PT_LOG(&s->dev, "Update msix entry 0x%x with pirq %d gvec 0x%x\n",
+            entry_nr, pirq, gvec);
+
+    rc = xc_domain_update_msi_irq(xen_xc, xen_domid, gvec, pirq, gflags,
+                                  s->msix->mmio_base_addr);
+    if (rc) {
+        PT_ERR(&s->dev, "Updating msix irq %d info for entry %d\n",
+               pirq, entry_nr);
+
+        if (xc_physdev_unmap_pirq(xen_xc, xen_domid, entry->pirq)) {
+            PT_ERR(&s->dev, "Unmapping of MSI-X failed. (pirq: %d)\n",
+                   entry->pirq);
+        }
+        entry->pirq = PT_UNASSIGNED_PIRQ;
+        return rc;
+    }
+
+    entry->flags = 0;
+
+    return 0;
+}
+
+int pt_msix_update(XenPCIPassthroughState *s)
+{
+    XenPTMSIX *msix = s->msix;
+    int i;
+
+    for (i = 0; i < msix->total_entries; i++) {
+        pt_msix_update_one(s, i);
+    }
+
+    return 0;
+}
+
+void pt_msix_disable(XenPCIPassthroughState *s)
+{
+    PCIDevice *d = &s->dev;
+    uint8_t gvec = 0;
+    uint32_t gflags = 0;
+    uint64_t addr = 0;
+    int i = 0;
+    XenMSIXEntry *entry = NULL;
+
+    msix_set_enable(s, 0);
+
+    for (i = 0; i < s->msix->total_entries; i++) {
+        entry = &s->msix->msix_entry[i];
+
+        if (entry->pirq == PT_UNASSIGNED_PIRQ) {
+            continue;
+        }
+
+        gvec = entry->io_mem[2] & 0xff;
+        addr = *(uint64_t *)&entry->io_mem[0];
+        gflags = get_msi_gflags(entry->io_mem[2], addr);
+
+        PT_LOG(d, "Unbind msix with pirq %d, gvec 0x%x\n",
+                entry->pirq, gvec);
+
+        if (xc_domain_unbind_msi_irq(xen_xc, xen_domid, gvec,
+                                        entry->pirq, gflags)) {
+            PT_ERR(d, "Unbinding of MSI-X failed.\n");
+        } else {
+            PT_LOG(d, "Unmap msix with pirq %d\n", entry->pirq);
+
+            if (xc_physdev_unmap_pirq(xen_xc, xen_domid, entry->pirq)) {
+                PT_ERR(d, "Unmapping of MSI-X failed.\n");
+            }
+        }
+        /* clear msi-x info */
+        entry->pirq = PT_UNASSIGNED_PIRQ;
+        entry->flags = 0;
+    }
+}
+
+int pt_msix_update_remap(XenPCIPassthroughState *s, int bar_index)
+{
+    XenMSIXEntry *entry;
+    int i, ret;
+
+    if (!(s->msix && s->msix->bar_index == bar_index)) {
+        return 0;
+    }
+
+    for (i = 0; i < s->msix->total_entries; i++) {
+        entry = &s->msix->msix_entry[i];
+        if (entry->pirq != PT_UNASSIGNED_PIRQ) {
+            ret = xc_domain_unbind_pt_irq(xen_xc, xen_domid, entry->pirq,
+                                          PT_IRQ_TYPE_MSI, 0, 0, 0, 0);
+            if (ret) {
+                PT_ERR(&s->dev, "unbind MSI-X entry %d failed\n", entry->pirq);
+            }
+            entry->flags = 1;
+        }
+    }
+    pt_msix_update(s);
+
+    return 0;
+}
+
+static void pci_msix_invalid_write(void *opaque, target_phys_addr_t addr,
+                                   uint32_t val)
+{
+    XenPCIPassthroughState *s = opaque;
+    PT_ERR(&s->dev, "Invalid write to MSI-X table,"
+           " only dword access is allowed.\n");
+}
+
+static void pci_msix_writel(void *opaque, target_phys_addr_t addr,
+                            uint32_t val)
+{
+    XenPCIPassthroughState *s = opaque;
+    XenPTMSIX *msix = s->msix;
+    XenMSIXEntry *entry;
+    int entry_nr, offset;
+    void *phys_off;
+    uint32_t vec_ctrl;
+
+    if (addr % 4) {
+        PT_ERR(&s->dev, "Unaligned dword access to MSI-X table, "
+                "addr 0x"TARGET_FMT_plx"\n", addr);
+        return;
+    }
+
+    entry_nr = addr / 16;
+    entry = &msix->msix_entry[entry_nr];
+    offset = (addr % 16) / 4;
+
+    /*
+     * If Xen intercepts the mask bit access, io_mem[3] may not be
+     * up-to-date. Read from hardware directly.
+     */
+    phys_off = s->msix->phys_iomem_base + 16 * entry_nr + 12;
+    vec_ctrl = *(uint32_t *)phys_off;
+
+    if (offset != 3 && msix->enabled && !(vec_ctrl & 0x1)) {
+        PT_ERR(&s->dev, "Can't update msix entry %d since MSI-X is already "
+                "enabled.\n", entry_nr);
+        return;
+    }
+
+    if (offset != 3 && entry->io_mem[offset] != val) {
+        entry->flags = 1;
+    }
+    entry->io_mem[offset] = val;
+
+    if (offset == 3) {
+        if (msix->enabled && !(val & 0x1)) {
+            pt_msix_update_one(s, entry_nr);
+        }
+        mask_physical_msix_entry(s, entry_nr, entry->io_mem[3] & 0x1);
+    }
+}
+
+static CPUWriteMemoryFunc *pci_msix_write[] = {
+    pci_msix_invalid_write,
+    pci_msix_invalid_write,
+    pci_msix_writel
+};
+
+static uint32_t pci_msix_invalid_read(void *opaque, target_phys_addr_t addr)
+{
+    XenPCIPassthroughState *s = opaque;
+    PT_ERR(&s->dev, "Invalid read to MSI-X table,"
+           " only dword access is allowed.\n");
+    return 0;
+}
+
+static uint32_t pci_msix_readl(void *opaque, target_phys_addr_t addr)
+{
+    XenPCIPassthroughState *s = opaque;
+    XenPTMSIX *msix = s->msix;
+    int entry_nr, offset;
+
+    if (addr % 4) {
+        PT_ERR(&s->dev, "Unaligned dword access to MSI-X table, "
+                "addr 0x"TARGET_FMT_plx"\n", addr);
+        return 0;
+    }
+
+    entry_nr = addr / 16;
+    offset = (addr % 16) / 4;
+
+    return msix->msix_entry[entry_nr].io_mem[offset];
+}
+
+static CPUReadMemoryFunc *pci_msix_read[] = {
+    pci_msix_invalid_read,
+    pci_msix_invalid_read,
+    pci_msix_readl
+};
+
+int pt_add_msix_mapping(XenPCIPassthroughState *s, int bar_index)
+{
+    if (!(s->msix && s->msix->bar_index == bar_index)) {
+        return 0;
+    }
+
+    return xc_domain_memory_mapping(xen_xc, xen_domid,
+         s->msix->mmio_base_addr >> XC_PAGE_SHIFT,
+         (s->bases[bar_index].access.maddr + s->msix->table_off)
+             >> XC_PAGE_SHIFT,
+         (s->msix->total_entries * 16 + XC_PAGE_SIZE - 1) >> XC_PAGE_SHIFT,
+         DPCI_ADD_MAPPING);
+}
+
+int pt_remove_msix_mapping(XenPCIPassthroughState *s, int bar_index)
+{
+    if (!(s->msix && s->msix->bar_index == bar_index)) {
+        return 0;
+    }
+
+    s->msix->mmio_base_addr = s->bases[bar_index].e_physbase
+        + s->msix->table_off;
+
+    cpu_register_physical_memory(s->msix->mmio_base_addr,
+                                 s->msix->total_entries * 16,
+                                 s->msix->mmio_index);
+
+    return xc_domain_memory_mapping(xen_xc, xen_domid,
+         s->msix->mmio_base_addr >> XC_PAGE_SHIFT,
+         (s->bases[bar_index].access.maddr + s->msix->table_off)
+             >> XC_PAGE_SHIFT,
+         (s->msix->total_entries * 16 + XC_PAGE_SIZE - 1) >> XC_PAGE_SHIFT,
+         DPCI_REMOVE_MAPPING);
+}
+
+int pt_msix_init(XenPCIPassthroughState *s, int base)
+{
+    uint8_t id = 0;
+    uint16_t control = 0;
+    uint32_t table_off = 0;
+    int i, total_entries, bar_index;
+    HostPCIDevice *d = s->real_device;
+    int fd;
+
+    if (host_pci_get_byte(d, base + PCI_CAP_LIST_ID, &id)) {
+        return -1;
+    }
+
+    if (id != PCI_CAP_ID_MSIX) {
+        PT_ERR(&s->dev, "Invalid id %#x base %#x\n", id, base);
+        return -1;
+    }
+
+    host_pci_get_word(d, base + 2, &control);
+    total_entries = control & 0x7ff;
+    total_entries += 1;
+
+    s->msix = g_malloc0(sizeof (XenPTMSIX)
+                        + total_entries * sizeof (XenMSIXEntry));
+
+    s->msix->total_entries = total_entries;
+    for (i = 0; i < total_entries; i++) {
+        s->msix->msix_entry[i].pirq = PT_UNASSIGNED_PIRQ;
+    }
+
+    s->msix->mmio_index =
+        cpu_register_io_memory(pci_msix_read, pci_msix_write,
+                               s, DEVICE_NATIVE_ENDIAN);
+
+    host_pci_get_long(d, base + PCI_MSIX_TABLE, &table_off);
+    bar_index = s->msix->bar_index = table_off & PCI_MSIX_FLAGS_BIRMASK;
+    table_off = s->msix->table_off = table_off & ~PCI_MSIX_FLAGS_BIRMASK;
+    s->msix->table_base = s->real_device->io_regions[bar_index].base_addr;
+    PT_LOG(&s->dev, "get MSI-X table BAR base 0x%"PRIx64"\n",
+           s->msix->table_base);
+
+    fd = open("/dev/mem", O_RDWR);
+    if (fd == -1) {
+        PT_ERR(&s->dev, "Can't open /dev/mem: %s\n", strerror(errno));
+        goto error_out;
+    }
+    PT_LOG(&s->dev, "table_off = %#x, total_entries = %d\n",
+           table_off, total_entries);
+    s->msix->table_offset_adjust = table_off & 0x0fff;
+    s->msix->phys_iomem_base =
+        mmap(0,
+             total_entries * 16 + s->msix->table_offset_adjust,
+             PROT_WRITE | PROT_READ,
+             MAP_SHARED | MAP_LOCKED,
+             fd,
+             s->msix->table_base + table_off - s->msix->table_offset_adjust);
+
+    if (s->msix->phys_iomem_base == MAP_FAILED) {
+        PT_ERR(&s->dev, "Can't map physical MSI-X table: %s\n",
+               strerror(errno));
+        close(fd);
+        goto error_out;
+    }
+    s->msix->phys_iomem_base = (char *)s->msix->phys_iomem_base
+        + s->msix->table_offset_adjust;
+
+    close(fd);
+
+    PT_LOG(&s->dev, "mapping physical MSI-X table to %p\n",
+           s->msix->phys_iomem_base);
+    return 0;
+
+error_out:
+    g_free(s->msix);
+    s->msix = NULL;
+    return -1;
+}
+
+void pt_msix_delete(XenPCIPassthroughState *s)
+{
+    /* unmap the MSI-X memory mapped register area */
+    if (s->msix->phys_iomem_base) {
+        PT_LOG(&s->dev, "unmapping physical MSI-X table from %p\n",
+           s->msix->phys_iomem_base);
+        munmap(s->msix->phys_iomem_base, s->msix->total_entries * 16 +
+           s->msix->table_offset_adjust);
+    }
+
+    if (s->msix->mmio_index > 0) {
+        cpu_unregister_io_memory(s->msix->mmio_index);
+    }
+
+    g_free(s->msix);
+    s->msix = NULL;
+}
-- 
Anthony PERARD


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel