WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 2/6] ioemu:passthrough: MSI-INTx interrupt translatio

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH 2/6] ioemu:passthrough: MSI-INTx interrupt translation support
From: Qing He <qing.he@xxxxxxxxx>
Date: Thu, 8 Jan 2009 17:06:45 +0800
Cc: Qing He <qing.he@xxxxxxxxx>
Delivery-date: Thu, 08 Jan 2009 01:11:08 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <1231405609-23138-1-git-send-email-qing.he@xxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <1231405609-23138-1-git-send-email-qing.he@xxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
passthrough: MSI-INTx interrupt translation support
    
This patch enables Xen to use MSI for MSI-capable devices as the
underlying interrupt source even if the guest does not explicitly
use it. The guest will still see an IO-APIC based INTx interrupt
translated by Xen from the MSI irq.

If the guest enables MSI or MSI-X for the passthrough device, this
translation is automatically turned off. It can also be disabled in
the config file at domain startup time.

Signed-off-by: Qing He <qing.he@xxxxxxxxx>
---

diff --git a/hw/pass-through.c b/hw/pass-through.c
index d280ff6..625e644 100644
--- a/hw/pass-through.c
+++ b/hw/pass-through.c
@@ -34,6 +34,7 @@ struct php_dev {
     uint8_t r_bus;
     uint8_t r_dev;
     uint8_t r_func;
+    char *opt;
 };
 struct dpci_infos {
 
@@ -492,7 +493,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msi_tbl[] = {
         .size       = 2,
         .init_val   = 0x0000,
         .ro_mask    = 0x018E,
-        .emu_mask   = 0xFFFE,
+        .emu_mask   = 0xFFFF,
         .init       = pt_msgctrl_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_msgctrl_reg_write,
@@ -692,7 +693,7 @@ static int token_value(char *token)
     return strtol(token, NULL, 16);
 }
 
-static int next_bdf(char **str, int *seg, int *bus, int *dev, int *func)
+static int next_bdf(char **str, int *seg, int *bus, int *dev, int *func, char 
**opt)
 {
     char *token;
     const char *delim = ":.-";
@@ -711,18 +712,60 @@ static int next_bdf(char **str, int *seg, int *bus, int 
*dev, int *func)
     *dev  = token_value(token);
 
     token  = strsep(str, delim);
+    *opt = strchr(token, ',');
+    if (*opt)
+        *(*opt)++ = '\0';
+
     *func  = token_value(token);
 
     return 1;
 }
 
+static int get_next_keyval(char **option, char **key, char **val)
+{
+    char *opt, *k, *v;
+
+    k = *option;
+    opt = strchr(k, ',');
+    if (opt)
+        *opt++ = '\0';
+    v = strchr(k, '=');
+    if (!v)
+        return -1;
+    *v++ = '\0';
+
+    *key = k;
+    *val = v;
+    *option = opt;
+
+    return 0;
+}
+
+static void msi_set_enable(struct pt_dev *ptdev, int en)
+{
+    uint16_t val;
+    uint32_t address;
+    if (!ptdev->msi)
+        return;
+
+    address = ptdev->msi->ctrl_offset;
+    if (!address)
+        return;
+
+    val = pci_read_word(ptdev->pci_dev, address);
+    val &= ~PCI_MSI_FLAGS_ENABLE;
+    val |= en & PCI_MSI_FLAGS_ENABLE;
+    pci_write_word(ptdev->pci_dev, address, val);
+}
+
 /* Insert a new pass-through device into a specific pci slot.
  * input  dom:bus:dev.func@slot, chose free one if slot == 0
  * return -1: required slot not available
  *         0: no free hotplug slots, but normal slot should okay
  *        >0: the new hotplug slot
  */
-static int __insert_to_pci_slot(int bus, int dev, int func, int slot)
+static int __insert_to_pci_slot(int bus, int dev, int func, int slot,
+                                char *opt)
 {
     int i, php_slot;
 
@@ -759,6 +802,7 @@ found:
     dpci_infos.php_devs[php_slot].r_bus  = bus;
     dpci_infos.php_devs[php_slot].r_dev  = dev;
     dpci_infos.php_devs[php_slot].r_func = func;
+    dpci_infos.php_devs[php_slot].opt = opt;
     return PHP_TO_PCI_SLOT(php_slot);
 }
 
@@ -768,19 +812,19 @@ found:
 int insert_to_pci_slot(char *bdf_slt)
 {
     int seg, bus, dev, func, slot;
-    char *bdf_str, *slt_str;
+    char *bdf_str, *slt_str, *opt;
     const char *delim="@";
 
     bdf_str = strsep(&bdf_slt, delim);
     slt_str = bdf_slt;
     slot = token_value(slt_str);
 
-    if ( !next_bdf(&bdf_str, &seg, &bus, &dev, &func))
+    if ( !next_bdf(&bdf_str, &seg, &bus, &dev, &func, &opt))
     {
         return -1;
     }
 
-    return __insert_to_pci_slot(bus, dev, func, slot);
+    return __insert_to_pci_slot(bus, dev, func, slot, opt);
 
 }
 
@@ -807,8 +851,9 @@ int test_pci_slot(int slot)
 int bdf_to_slot(char *bdf_str)
 {
     int seg, bus, dev, func, i;
+    char *opt;
 
-    if ( !next_bdf(&bdf_str, &seg, &bus, &dev, &func))
+    if ( !next_bdf(&bdf_str, &seg, &bus, &dev, &func, &opt))
     {
         return -1;
     }
@@ -1960,9 +2005,15 @@ static uint32_t pt_msgctrl_reg_init(struct pt_dev *ptdev,
         pci_write_word(pdev, real_offset, reg_field & ~PCI_MSI_FLAGS_ENABLE);
     }
     ptdev->msi->flags |= (reg_field | MSI_FLAG_UNINIT);
+    ptdev->msi->ctrl_offset = real_offset;
     
     /* All register is 0 after reset, except first 4 byte */
     reg_field &= reg->ro_mask;
+
+    if (ptdev->msi_trans_cap) {
+        PT_LOG("Turning on MSI-INTx translation\n");
+        ptdev->msi_trans_en = 1;
+    }
     
     return reg_field;
 }
@@ -2673,6 +2724,34 @@ static int pt_linkctrl2_reg_write(struct pt_dev *ptdev,
     return 0;
 }
 
+static void pt_unmap_msi_translate(struct pt_dev *ptdev)
+{
+    uint16_t e_device, e_intx;
+    int rc;
+
+    /* MSI_ENABLE bit should be disabed until the new handler is set */
+    msi_set_enable(ptdev, 0);
+
+    e_device = (ptdev->dev.devfn >> 3) & 0x1f;
+    /* fix virtual interrupt pin to INTA# */
+    e_intx = 0;
+    rc = xc_domain_unbind_pt_irq(xc_handle, domid, ptdev->msi->pirq,
+                                 PT_IRQ_TYPE_MSI_TRANSLATE, 0,
+                                 e_device, e_intx, 0);
+    if (rc < 0)
+        PT_LOG("Error: Unbinding pt irq for MSI-INTx failed! rc=%d\n", rc);
+
+    if (ptdev->machine_irq)
+    {
+        rc = xc_domain_bind_pt_pci_irq(xc_handle, domid, ptdev->machine_irq,
+                                       0, e_device, e_intx);
+        if ( rc < 0 )
+            PT_LOG("Error: Rebinding of interrupt failed! rc=%d\n", rc);
+    }
+
+    ptdev->msi_trans_en = 0;
+}
+
 /* write Message Control register */
 static int pt_msgctrl_reg_write(struct pt_dev *ptdev, 
     struct pt_reg_tbl *cfg_entry, 
@@ -2682,7 +2761,9 @@ static int pt_msgctrl_reg_write(struct pt_dev *ptdev,
     uint16_t writable_mask = 0;
     uint16_t throughable_mask = 0;
     uint16_t old_ctrl = cfg_entry->data;
+    uint8_t e_device, e_intx;
     PCIDevice *pd = (PCIDevice *)ptdev;
+    uint16_t val;
 
     /* Currently no support for multi-vector */
     if ((*value & PCI_MSI_FLAGS_QSIZE) != 0x0)
@@ -2699,21 +2780,29 @@ static int pt_msgctrl_reg_write(struct pt_dev *ptdev,
     PT_LOG("old_ctrl:%04xh new_ctrl:%04xh\n", old_ctrl, cfg_entry->data);
     
     /* create value for writing to I/O device register */
+    val = *value;
     throughable_mask = ~reg->emu_mask & valid_mask;
     *value = ((*value & throughable_mask) | (dev_value & ~throughable_mask));
 
     /* update MSI */
-    if (*value & PCI_MSI_FLAGS_ENABLE)
+    if (val & PCI_MSI_FLAGS_ENABLE)
     {
         /* setup MSI pirq for the first time */
         if (ptdev->msi->flags & MSI_FLAG_UNINIT)
         {
-            /* Init physical one */
-            PT_LOG("setup msi for dev %x\n", pd->devfn);
-            if (pt_msi_setup(ptdev))
+            if (ptdev->msi_trans_en) {
+                PT_LOG("guest enabling MSI, disable MSI-INTx translation\n");
+                pt_unmap_msi_translate(ptdev);
+            }
+            else
             {
-                PT_LOG("pt_msi_setup error!!!\n");
-                return -1;
+                /* Init physical one */
+                PT_LOG("setup msi for dev %x\n", pd->devfn);
+                if (pt_msi_setup(ptdev))
+                {
+                    PT_LOG("pt_msi_setup error!!!\n");
+                    return -1;
+                }
             }
             pt_msi_update(ptdev);
 
@@ -2725,6 +2814,12 @@ static int pt_msgctrl_reg_write(struct pt_dev *ptdev,
     else
         ptdev->msi->flags &= ~PCI_MSI_FLAGS_ENABLE;
 
+    /* pass through MSI_ENABLE bit when no MSI-INTx translation */
+    if (!ptdev->msi_trans_en) {
+        *value &= ~PCI_MSI_FLAGS_ENABLE;
+        *value |= val & PCI_MSI_FLAGS_ENABLE;
+    }
+
     return 0;
 }
 
@@ -2870,7 +2965,13 @@ static int pt_msixctrl_reg_write(struct pt_dev *ptdev,
 
     /* update MSI-X */
     if ((*value & PCI_MSIX_ENABLE) && !(*value & PCI_MSIX_MASK))
+    {
+        if (ptdev->msi_trans_en) {
+            PT_LOG("guest enabling MSI-X, disable MSI-INTx translation\n");
+            pt_unmap_msi_translate(ptdev);
+        }
         pt_msix_update(ptdev);
+    }
 
     ptdev->msix->enabled = !!(*value & PCI_MSIX_ENABLE);
 
@@ -2879,7 +2980,8 @@ static int pt_msixctrl_reg_write(struct pt_dev *ptdev,
 
 struct pt_dev * register_real_device(PCIBus *e_bus,
         const char *e_dev_name, int e_devfn, uint8_t r_bus, uint8_t r_dev,
-        uint8_t r_func, uint32_t machine_irq, struct pci_access *pci_access)
+        uint8_t r_func, uint32_t machine_irq, struct pci_access *pci_access,
+        char *opt)
 {
     int rc = -1, i;
     struct pt_dev *assigned_device = NULL;
@@ -2887,6 +2989,8 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
     uint8_t e_device, e_intx;
     struct pci_config_cf8 machine_bdf;
     int free_pci_slot = -1;
+    char *key, *val;
+    int msi_translate;
 
     PT_LOG("Assigning real physical device %02x:%02x.%x ...\n",
         r_bus, r_dev, r_func);
@@ -2908,13 +3012,41 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
 
     if ( e_devfn == PT_VIRT_DEVFN_AUTO ) {
         /*indicate a static assignment(not hotplug), so find a free PCI hot 
plug slot */
-        free_pci_slot = __insert_to_pci_slot(r_bus, r_dev, r_func, 0);
+        free_pci_slot = __insert_to_pci_slot(r_bus, r_dev, r_func, 0, NULL);
         if ( free_pci_slot > 0 )
             e_devfn = free_pci_slot  << 3;
         else
             PT_LOG("Error: no free virtual PCI hot plug slot, thus no live 
migration.\n");
     }
 
+    msi_translate = direct_pci_msitranslate;
+    while (opt) {
+        if (get_next_keyval(&opt, &key, &val)) {
+            PT_LOG("Error: unrecognized PCI assignment option \"%s\"\n", opt);
+            break;
+        }
+
+        if (strcmp(key, "msitranslate") == 0)
+        {
+            if (strcmp(val, "0") == 0 || strcmp(val, "no") == 0)
+            {
+                PT_LOG("Disable MSI translation via per device option\n");
+                msi_translate = 0;
+            }
+            else if (strcmp(val, "1") == 0 || strcmp(val, "yes") == 0)
+            {
+                PT_LOG("Enable MSI translation via per device option\n");
+                msi_translate = 1;
+            }
+            else
+                PT_LOG("Error: unrecognized value for msitranslate=\n");
+        }
+        else
+            PT_LOG("Error: unrecognized PCI assignment option \"%s=%s\"\n", 
key, val);
+
+    }
+
+
     /* Register device */
     assigned_device = (struct pt_dev *) pci_register_device(e_bus, e_dev_name,
                                 sizeof(struct pt_dev), e_devfn,
@@ -2929,6 +3061,7 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
         dpci_infos.php_devs[PCI_TO_PHP_SLOT(free_pci_slot)].pt_dev = 
assigned_device;
 
     assigned_device->pci_dev = pci_dev;
+    assigned_device->msi_trans_cap = msi_translate;
 
     /* Assign device */
     machine_bdf.reg = 0;
@@ -2960,6 +3093,28 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
     /* fix virtual interrupt pin to INTA# */
     e_intx = 0;
 
+    while (assigned_device->msi_trans_en)
+    {
+        if (pt_msi_setup(assigned_device))
+        {
+            PT_LOG("Error: MSI-INTx translation MSI setup failed, fallback\n");
+            assigned_device->msi_trans_en = 0;
+            break;
+        }
+
+        rc = xc_domain_bind_pt_irq(xc_handle, domid, 
assigned_device->msi->pirq,
+                                   PT_IRQ_TYPE_MSI_TRANSLATE, 0,
+                                   e_device, e_intx, 0);
+        if ( rc < 0)
+        {
+            PT_LOG("Error: MSI-INTx translation bind failed, fallback\n");
+            assigned_device->msi_trans_en = 0;
+            break;
+        }
+        msi_set_enable(assigned_device, 1);
+        break;
+    }
+
     if ( PT_MACHINE_IRQ_AUTO == machine_irq )
     {
         int pirq = pci_dev->irq;
@@ -2973,9 +3125,15 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
             PT_LOG("Error: Mapping irq failed, rc = %d\n", rc);
         }
         else
+        {
             machine_irq = pirq;
+            assigned_device->machine_irq = pirq;
+        }
     }
 
+    if (assigned_device->msi_trans_en)
+        goto out;
+
     /* bind machine_irq to device */
     if ( 0 != machine_irq )
     {
@@ -2995,8 +3153,9 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
     }
 
 out:
-    PT_LOG("Real physical device %02x:%02x.%x registered successfuly!\n", 
-        r_bus, r_dev, r_func);
+    PT_LOG("Real physical device %02x:%02x.%x registered successfuly!\n"
+           "IRQ type = %s\n", r_bus, r_dev, r_func,
+           assigned_device->msi_trans_en? "MSI-INTx":"INTx");
 
     return assigned_device;
 }
@@ -3029,9 +3188,9 @@ int unregister_real_device(int php_slot)
     e_device = (assigned_device->dev.devfn >> 3) & 0x1f;
     /* fix virtual interrupt pin to INTA# */
     e_intx = 0;
-    machine_irq = pci_dev->irq;
+    machine_irq = assigned_device->machine_irq;
 
-    if ( machine_irq != 0 ) {
+    if ( assigned_device->msi_trans_en == 0 && machine_irq ) {
         rc = xc_domain_unbind_pt_irq(xc_handle, domid, machine_irq, 
PT_IRQ_TYPE_PCI, 0,
                                        e_device, e_intx, 0);
         if ( rc < 0 )
@@ -3040,6 +3199,16 @@ int unregister_real_device(int php_slot)
             PT_LOG("Error: Unbinding of interrupt failed! rc=%d\n", rc);
         }
     }
+    else if (assigned_device->msi_trans_en)
+    {
+        rc = xc_domain_unbind_pt_irq(xc_handle, domid, 
assigned_device->msi->pirq,
+                                     PT_IRQ_TYPE_MSI_TRANSLATE, 0,
+                                     e_device, e_intx, 0);
+        if (rc < 0)
+            PT_LOG("Error: Unbinding pt irq for MSI-INTx failed! rc=%d\n", rc);
+    }
+
+    /* TODO: unmap passthrough MSI and MSI-X irqs */
 
     /* delete all emulated config registers */
     pt_config_delete(assigned_device);
@@ -3075,7 +3244,10 @@ int power_on_php_slot(int php_slot)
             php_dev->r_dev,
             php_dev->r_func,
             PT_MACHINE_IRQ_AUTO,
-            dpci_infos.pci_access);
+            dpci_infos.pci_access,
+            php_dev->opt);
+
+    php_dev->opt = NULL;
 
     php_dev->pt_dev = pt_dev;
 
@@ -3097,6 +3269,7 @@ int pt_init(PCIBus *e_bus, const char *direct_pci)
     char slot_str[8];
     char *direct_pci_head = NULL;
     char *direct_pci_p = NULL;
+    char *opt;
 
     /* Initialize libpci */
     pci_access = pci_alloc();
@@ -3125,11 +3298,11 @@ int pt_init(PCIBus *e_bus, const char *direct_pci)
     vslots = qemu_mallocz ( strlen(direct_pci) / 3 );
 
     /* Assign given devices to guest */
-    while ( next_bdf(&direct_pci_p, &seg, &b, &d, &f) )
+    while ( next_bdf(&direct_pci_p, &seg, &b, &d, &f, &opt) )
     {
         /* Register real device with the emulated bus */
         pt_dev = register_real_device(e_bus, "DIRECT PCI", PT_VIRT_DEVFN_AUTO,
-            b, d, f, PT_MACHINE_IRQ_AUTO, pci_access);
+            b, d, f, PT_MACHINE_IRQ_AUTO, pci_access, opt);
         if ( pt_dev == NULL )
         {
             PT_LOG("Error: Registration failed (%02x:%02x.%x)\n", b, d, f);
diff --git a/hw/pass-through.h b/hw/pass-through.h
index 8aa664b..a7d2727 100644
--- a/hw/pass-through.h
+++ b/hw/pass-through.h
@@ -121,6 +121,7 @@ struct pt_region {
 
 struct pt_msi_info {
     uint32_t flags;
+    uint32_t ctrl_offset; /* saved control offset */
     int pirq;          /* guest pirq corresponding */
     uint32_t addr_lo;  /* guest message address */
     uint32_t addr_hi;  /* guest message upper address */
@@ -158,6 +159,10 @@ struct pt_dev {
                                                 /* emul reg group list */
     struct pt_msi_info *msi;                    /* MSI virtualization */
     struct pt_msix_info *msix;                  /* MSI-X virtualization */
+    int machine_irq;                            /* saved pirq */
+    /* Physical MSI to guest INTx translation when possible */
+    int msi_trans_cap;
+    int msi_trans_en;
 };
 
 /* Used for formatting PCI BDF into cf8 format */
diff --git a/hw/pci.h b/hw/pci.h
index 4adc4d7..a527a39 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -64,6 +64,7 @@ struct PCIDevice {
 };
 
 extern char direct_pci_str[];
+extern int direct_pci_msitranslate;
 
 PCIDevice *pci_register_device(PCIBus *bus, const char *name,
                                int instance_size, int devfn,
diff --git a/xenstore.c b/xenstore.c
index 86e8b63..ff3d023 100644
--- a/xenstore.c
+++ b/xenstore.c
@@ -290,8 +290,10 @@ const char *xenstore_get_guest_uuid(void) {
 #endif
 }
 
-#define DIRECT_PCI_STR_LEN 160
+#define DIRECT_PCI_STR_LEN 512
+#define PT_PCI_MSITRANSLATE_DEFAULT 1
 char direct_pci_str[DIRECT_PCI_STR_LEN];
+int direct_pci_msitranslate;
 void xenstore_parse_domain_config(int hvm_domid)
 {
     char **e_danger = NULL;
@@ -556,20 +558,50 @@ void xenstore_parse_domain_config(int hvm_domid)
             free(dev);
             dev = xs_read(xsh, XBT_NULL, buf, &len);
 
-            if ( strlen(dev) + strlen(direct_pci_str) > DIRECT_PCI_STR_LEN ) {
+            if ( strlen(dev) + strlen(direct_pci_str) > DIRECT_PCI_STR_LEN - 
1) {
                 fprintf(stderr, "qemu: too many pci pass-through devices\n");
                 memset(direct_pci_str, 0, DIRECT_PCI_STR_LEN);
                 goto out;
             }
 
+
             /* append to direct_pci_str */
+            if ( !dev )
+                continue;
+
+            strcat(direct_pci_str, dev);
+
+            if (pasprintf(&buf, "/local/domain/0/backend/pci/%u/%u/opts-%d",
+                          hvm_domid, pci_devid, i) != -1) {
+                free(dev);
+                dev = xs_read(xsh, XBT_NULL, buf, &len);
+            }
             if ( dev ) {
+                if ( strlen(dev) + strlen(direct_pci_str) > DIRECT_PCI_STR_LEN 
- 2) {
+                    fprintf(stderr, "qemu: too many pci pass-through 
devices\n");
+                    memset(direct_pci_str, 0, DIRECT_PCI_STR_LEN);
+                    goto out;
+                }
+                strcat(direct_pci_str, ",");
                 strcat(direct_pci_str, dev);
-                strcat(direct_pci_str, "-");
             }
+
+            strcat(direct_pci_str, "-");
         }
     }
 
+    /* get the pci pass-through parameter */
+    if (pasprintf(&buf, "/local/domain/0/backend/pci/%u/%u/msitranslate",
+                  hvm_domid, pci_devid) == -1)
+        goto out;
+
+    free(params);
+    params = xs_read(xsh, XBT_NULL, buf, &len);
+    if (params)
+        direct_pci_msitranslate = atoi(params);
+    else
+        direct_pci_msitranslate = PT_PCI_MSITRANSLATE_DEFAULT;
+
  out:
     free(danger_type);
     free(params);

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel