This is an updated patch per Shimada-san's advice.
Commit 8c771eb6294afc5b3754a9e3de51568d4e5986c2 enables the guest OS
to program D0-D3hot states of the assigned device, however,
D3hot state in some PCI devices causes the failure of domain
creation/destruction.
With this patch, we can configure a guest to manage the PCI D-states
or not for each PCI passthru device.
A corresponding change is committed to xen-unstable.hg.
chageset: 19279:ec671455fb05ca6714deeaca78aacb1026ca4752
Signed-off-by: Kouya Shimura <kouya@xxxxxxxxxxxxxx>
diff --git a/hw/pass-through.c b/hw/pass-through.c
index 4a86309..78a8e8f 100644
--- a/hw/pass-through.c
+++ b/hw/pass-through.c
@@ -105,6 +105,9 @@ static int pt_long_reg_read(struct pt_dev *ptdev,
static int pt_bar_reg_read(struct pt_dev *ptdev,
struct pt_reg_tbl *cfg_entry,
uint32_t *value, uint32_t valid_mask);
+static int pt_pmcsr_reg_read(struct pt_dev *ptdev,
+ struct pt_reg_tbl *cfg_entry,
+ uint16_t *value, uint16_t valid_mask);
static int pt_byte_reg_write(struct pt_dev *ptdev,
struct pt_reg_tbl *cfg_entry,
uint8_t *value, uint8_t dev_value, uint8_t valid_mask);
@@ -407,7 +410,7 @@ static struct pt_reg_info_tbl pt_emu_reg_pm_tbl[] = {
.ro_mask = 0xE1FC,
.emu_mask = 0x8100,
.init = pt_pmcsr_reg_init,
- .u.w.read = pt_word_reg_read,
+ .u.w.read = pt_pmcsr_reg_read,
.u.w.write = pt_pmcsr_reg_write,
.u.w.restore = pt_pmcsr_reg_restore,
},
@@ -2341,6 +2344,9 @@ static uint32_t pt_pmc_reg_init(struct pt_dev *ptdev,
{
PCIDevice *d = &ptdev->dev;
+ if (!ptdev->power_mgmt)
+ return reg->init_val;
+
/* set Power Management Capabilities register */
ptdev->pm_state->pmc_field = *(uint16_t *)(d->config + real_offset);
@@ -2354,6 +2360,9 @@ static uint32_t pt_pmcsr_reg_init(struct pt_dev *ptdev,
PCIDevice *d = &ptdev->dev;
uint16_t cap_ver = 0;
+ if (!ptdev->power_mgmt)
+ return reg->init_val;
+
/* check PCI Power Management support version */
cap_ver = ptdev->pm_state->pmc_field & PCI_PM_CAP_VER_MASK;
@@ -2553,6 +2562,9 @@ static uint8_t pt_reg_grp_size_init(struct pt_dev *ptdev,
static uint8_t pt_pm_size_init(struct pt_dev *ptdev,
struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset)
{
+ if (!ptdev->power_mgmt)
+ return grp_reg->grp_size;
+
ptdev->pm_state = qemu_mallocz(sizeof(struct pt_pm_info));
if (!ptdev->pm_state)
{
@@ -2806,6 +2818,25 @@ static int pt_bar_reg_read(struct pt_dev *ptdev,
return 0;
}
+
+/* read Power Management Control/Status register */
+static int pt_pmcsr_reg_read(struct pt_dev *ptdev,
+ struct pt_reg_tbl *cfg_entry,
+ uint16_t *value, uint16_t valid_mask)
+{
+ struct pt_reg_info_tbl *reg = cfg_entry->reg;
+ uint16_t valid_emu_mask = reg->emu_mask;
+
+ if (!ptdev->power_mgmt)
+ valid_emu_mask |= PCI_PM_CTRL_STATE_MASK | PCI_PM_CTRL_NO_SOFT_RESET;
+
+ valid_emu_mask = valid_emu_mask & valid_mask ;
+ *value = PT_MERGE_VALUE(*value, cfg_entry->data, ~valid_emu_mask);
+
+ return 0;
+}
+
+
/* write byte size emulate register */
static int pt_byte_reg_write(struct pt_dev *ptdev,
struct pt_reg_tbl *cfg_entry,
@@ -3077,19 +3108,26 @@ static int pt_pmcsr_reg_write(struct pt_dev *ptdev,
{
struct pt_reg_info_tbl *reg = cfg_entry->reg;
PCIDevice *d = &ptdev->dev;
+ uint16_t emu_mask = reg->emu_mask;
uint16_t writable_mask = 0;
uint16_t throughable_mask = 0;
struct pt_pm_info *pm_state = ptdev->pm_state;
uint16_t read_val = 0;
+ if (!ptdev->power_mgmt)
+ emu_mask |= PCI_PM_CTRL_STATE_MASK | PCI_PM_CTRL_NO_SOFT_RESET;
+
/* modify emulate register */
- writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask;
+ writable_mask = emu_mask & ~reg->ro_mask & valid_mask;
cfg_entry->data = PT_MERGE_VALUE(*value, cfg_entry->data, writable_mask);
/* create value for writing to I/O device register */
- throughable_mask = ~reg->emu_mask & valid_mask;
+ throughable_mask = ~emu_mask & valid_mask;
*value = PT_MERGE_VALUE(*value, dev_value, throughable_mask);
+ if (!ptdev->power_mgmt)
+ return 0;
+
/* set I/O device power state */
pm_state->cur_state = (dev_value & PCI_PM_CTRL_STATE_MASK);
@@ -3564,7 +3602,7 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
struct pci_config_cf8 machine_bdf;
int free_pci_slot = -1;
char *key, *val;
- int msi_translate;
+ int msi_translate, power_mgmt;
PT_LOG("Assigning real physical device %02x:%02x.%x ...\n",
r_bus, r_dev, r_func);
@@ -3597,6 +3635,7 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
}
msi_translate = direct_pci_msitranslate;
+ power_mgmt = direct_pci_power_mgmt;
while (opt) {
if (get_next_keyval(&opt, &key, &val)) {
PT_LOG("Error: unrecognized PCI assignment option \"%s\"\n", opt);
@@ -3618,6 +3657,21 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
else
PT_LOG("Error: unrecognized value for msitranslate=\n");
}
+ else if (strcmp(key, "power_mgmt") == 0)
+ {
+ if (strcmp(val, "0") == 0)
+ {
+ PT_LOG("Disable PCI Power Management\n");
+ power_mgmt = 0;
+ }
+ else if (strcmp(val, "1") == 0)
+ {
+ PT_LOG("Enable PCI Power Management\n");
+ power_mgmt = 1;
+ }
+ else
+ PT_LOG("Error: unrecognized value for power_mgmt=\n");
+ }
else
PT_LOG("Error: unrecognized PCI assignment option \"%s=%s\"\n",
key, val);
@@ -3639,6 +3693,7 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
assigned_device->pci_dev = pci_dev;
assigned_device->msi_trans_cap = msi_translate;
+ assigned_device->power_mgmt = power_mgmt;
/* Assign device */
machine_bdf.reg = 0;
diff --git a/hw/pass-through.h b/hw/pass-through.h
index e86d311..b7b5a79 100644
--- a/hw/pass-through.h
+++ b/hw/pass-through.h
@@ -217,6 +217,7 @@ struct pt_dev {
/* Physical MSI to guest INTx translation when possible */
int msi_trans_cap;
int msi_trans_en;
+ int power_mgmt;
struct pt_pm_info *pm_state; /* PM virtualization */
};
diff --git a/hw/pci.h b/hw/pci.h
index 2800499..10fa601 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -65,6 +65,7 @@ struct PCIDevice {
extern char direct_pci_str[];
extern int direct_pci_msitranslate;
+extern int direct_pci_power_mgmt;
PCIDevice *pci_register_device(PCIBus *bus, const char *name,
int instance_size, int devfn,
diff --git a/xenstore.c b/xenstore.c
index 928e950..4ee6ceb 100644
--- a/xenstore.c
+++ b/xenstore.c
@@ -303,8 +303,10 @@ const char *xenstore_get_guest_uuid(void)
#define DIRECT_PCI_STR_LEN 512
#define PT_PCI_MSITRANSLATE_DEFAULT 1
+#define PT_PCI_POWER_MANAGEMENT_DEFAULT 0
char direct_pci_str[DIRECT_PCI_STR_LEN];
int direct_pci_msitranslate;
+int direct_pci_power_mgmt;
void xenstore_parse_domain_config(int hvm_domid)
{
char **e_danger = NULL;
@@ -603,15 +605,26 @@ void xenstore_parse_domain_config(int hvm_domid)
/* get the pci pass-through parameter */
if (pasprintf(&buf, "/local/domain/0/backend/pci/%u/%u/msitranslate",
- hvm_domid, pci_devid) == -1)
- goto out;
+ hvm_domid, pci_devid) != -1)
+ {
+ free(params);
+ params = xs_read(xsh, XBT_NULL, buf, &len);
+ if (params)
+ direct_pci_msitranslate = atoi(params);
+ else
+ direct_pci_msitranslate = PT_PCI_MSITRANSLATE_DEFAULT;
+ }
- free(params);
- params = xs_read(xsh, XBT_NULL, buf, &len);
- if (params)
- direct_pci_msitranslate = atoi(params);
- else
- direct_pci_msitranslate = PT_PCI_MSITRANSLATE_DEFAULT;
+ if (pasprintf(&buf, "/local/domain/0/backend/pci/%u/%u/power_mgmt",
+ hvm_domid, pci_devid) != -1)
+ {
+ free(params);
+ params = xs_read(xsh, XBT_NULL, buf, &len);
+ if (params)
+ direct_pci_power_mgmt = atoi(params);
+ else
+ direct_pci_power_mgmt = PT_PCI_POWER_MANAGEMENT_DEFAULT;
+ }
out:
free(danger_type);
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|