WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 2/2] ioemu: Enable guest OS to program D0-D3hot state

To: xen-devel@xxxxxxxxxxxxxxxxxxx, Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH 2/2] ioemu: Enable guest OS to program D0-D3hot states of an assigned device
From: Yuji Shimada <shimada-yxb@xxxxxxxxxxxxxxx>
Date: Thu, 05 Feb 2009 19:23:11 +0900
Cc: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
Delivery-date: Thu, 05 Feb 2009 02:24:03 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <20090205162655.2F57.SHIMADA-YXB@xxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <20090204101439.GN25835%yamahata@xxxxxxxxxxxxx> <20090205162655.2F57.SHIMADA-YXB@xxxxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
This patch enables guest OS to program D0-D3hot states of assigned
device.

The patch depends on the following patch I have sent.

    [PATCH 1/2] libxc: Add xc_domain_unbind_msi_irq


This patch is revised version based on the review comments.

- Use LIST_FOREACH(). Don't use lh_first, le_next directly.
- Use pci_{read, write}_block instead of "switch(len) case [124]
  pci_{read, write}_{byte, word, long}():".

We can clean up the existing code in pass-through.c in the same
manner. I will submit the patch to clean up the code.

Thanks,
--
Yuji Shimada.


Signed-off-by: Yuji Shimada <shimada-yxb@xxxxxxxxxxxxxxx>

diff --git a/hw/pass-through.c b/hw/pass-through.c
index e76a3c3..a94bd23 100644
--- a/hw/pass-through.c
+++ b/hw/pass-through.c
@@ -27,6 +27,7 @@
 #include "pci/pci.h"
 #include "pt-msi.h"
 #include "qemu-xen.h"
+#include <unistd.h>
 
 struct php_dev {
     struct pt_dev *pt_dev;
@@ -60,6 +61,10 @@ static uint32_t pt_irqpin_reg_init(struct pt_dev *ptdev,
     struct pt_reg_info_tbl *reg, uint32_t real_offset);
 static uint32_t pt_bar_reg_init(struct pt_dev *ptdev,
     struct pt_reg_info_tbl *reg, uint32_t real_offset);
+static uint32_t pt_pmc_reg_init(struct pt_dev *ptdev,
+    struct pt_reg_info_tbl *reg, uint32_t real_offset);
+static uint32_t pt_pmcsr_reg_init(struct pt_dev *ptdev,
+    struct pt_reg_info_tbl *reg, uint32_t real_offset);
 static uint32_t pt_linkctrl_reg_init(struct pt_dev *ptdev,
     struct pt_reg_info_tbl *reg, uint32_t real_offset);
 static uint32_t pt_devctrl2_reg_init(struct pt_dev *ptdev,
@@ -78,6 +83,8 @@ static uint32_t pt_msixctrl_reg_init(struct pt_dev *ptdev,
     struct pt_reg_info_tbl *reg, uint32_t real_offset);
 static uint8_t pt_reg_grp_size_init(struct pt_dev *ptdev,
     struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset);
+static uint8_t pt_pm_size_init(struct pt_dev *ptdev,
+    struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset);
 static uint8_t pt_msi_size_init(struct pt_dev *ptdev,
     struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset);
 static uint8_t pt_msix_size_init(struct pt_dev *ptdev,
@@ -146,6 +153,24 @@ static int pt_msgdata_reg_write(struct pt_dev *ptdev,
 static int pt_msixctrl_reg_write(struct pt_dev *ptdev, 
     struct pt_reg_tbl *cfg_entry, 
     uint16_t *value, uint16_t dev_value, uint16_t valid_mask);
+static int pt_byte_reg_restore(struct pt_dev *ptdev, 
+    struct pt_reg_tbl *cfg_entry, 
+    uint32_t real_offset, uint8_t dev_value, uint8_t *value);
+static int pt_word_reg_restore(struct pt_dev *ptdev, 
+    struct pt_reg_tbl *cfg_entry, 
+    uint32_t real_offset, uint16_t dev_value, uint16_t *value);
+static int pt_long_reg_restore(struct pt_dev *ptdev, 
+    struct pt_reg_tbl *cfg_entry, 
+    uint32_t real_offset, uint32_t dev_value, uint32_t *value);
+static int pt_cmd_reg_restore(struct pt_dev *ptdev, 
+    struct pt_reg_tbl *cfg_entry, 
+    uint32_t real_offset, uint16_t dev_value, uint16_t *value);
+static int pt_pmcsr_reg_restore(struct pt_dev *ptdev, 
+    struct pt_reg_tbl *cfg_entry, 
+    uint32_t real_offset, uint16_t dev_value, uint16_t *value);
+static int pt_bar_reg_restore(struct pt_dev *ptdev, 
+    struct pt_reg_tbl *cfg_entry, 
+    uint32_t real_offset, uint32_t dev_value, uint32_t *value);
 
 /* pt_reg_info_tbl declaration
  * - only for emulated register (either a part or whole bit).
@@ -166,6 +191,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_vendor_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_word_reg_write,
+        .u.w.restore  = NULL,
     },
     /* Device ID reg */
     {
@@ -177,6 +203,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_device_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_word_reg_write,
+        .u.w.restore  = NULL,
     },
     /* Command reg */
     {
@@ -188,6 +215,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_common_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_cmd_reg_write,
+        .u.w.restore  = pt_cmd_reg_restore,
     },
     /* Capabilities Pointer reg */
     {
@@ -199,6 +227,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_ptr_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = NULL,
     },
     /* Status reg */
     /* use emulated Cap Ptr value to initialize, 
@@ -213,6 +242,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_status_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_word_reg_write,
+        .u.w.restore  = NULL,
     },
     /* Cache Line Size reg */
     {
@@ -224,6 +254,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_common_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = pt_byte_reg_restore,
     },
     /* Latency Timer reg */
     {
@@ -235,6 +266,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_common_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = pt_byte_reg_restore,
     },
     /* Header Type reg */
     {
@@ -246,6 +278,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_common_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = NULL,
     },
     /* Interrupt Line reg */
     {
@@ -257,6 +290,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_common_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = NULL,
     },
     /* Interrupt Pin reg */
     {
@@ -268,6 +302,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_irqpin_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = NULL,
     },
     /* BAR 0 reg */
     /* mask of BAR need to be decided later, depends on IO/MEM type */
@@ -278,6 +313,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_bar_reg_init,
         .u.dw.read  = pt_bar_reg_read,
         .u.dw.write = pt_bar_reg_write,
+        .u.dw.restore = pt_bar_reg_restore,
     },
     /* BAR 1 reg */
     {
@@ -287,6 +323,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_bar_reg_init,
         .u.dw.read  = pt_bar_reg_read,
         .u.dw.write = pt_bar_reg_write,
+        .u.dw.restore = pt_bar_reg_restore,
     },
     /* BAR 2 reg */
     {
@@ -296,6 +333,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_bar_reg_init,
         .u.dw.read  = pt_bar_reg_read,
         .u.dw.write = pt_bar_reg_write,
+        .u.dw.restore = pt_bar_reg_restore,
     },
     /* BAR 3 reg */
     {
@@ -305,6 +343,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_bar_reg_init,
         .u.dw.read  = pt_bar_reg_read,
         .u.dw.write = pt_bar_reg_write,
+        .u.dw.restore = pt_bar_reg_restore,
     },
     /* BAR 4 reg */
     {
@@ -314,6 +353,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_bar_reg_init,
         .u.dw.read  = pt_bar_reg_read,
         .u.dw.write = pt_bar_reg_write,
+        .u.dw.restore = pt_bar_reg_restore,
     },
     /* BAR 5 reg */
     {
@@ -323,6 +363,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_bar_reg_init,
         .u.dw.read  = pt_bar_reg_read,
         .u.dw.write = pt_bar_reg_write,
+        .u.dw.restore = pt_bar_reg_restore,
     },
     /* Expansion ROM BAR reg */
     {
@@ -334,6 +375,7 @@ static struct pt_reg_info_tbl pt_emu_reg_header0_tbl[] = {
         .init       = pt_bar_reg_init,
         .u.dw.read  = pt_long_reg_read,
         .u.dw.write = pt_exp_rom_bar_reg_write,
+        .u.dw.restore = pt_long_reg_restore,
     },
     {
         .size = 0,
@@ -352,6 +394,7 @@ static struct pt_reg_info_tbl pt_emu_reg_pm_tbl[] = {
         .init       = pt_ptr_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = NULL,
     },
     /* Power Management Capabilities reg */
     {
@@ -359,10 +402,11 @@ static struct pt_reg_info_tbl pt_emu_reg_pm_tbl[] = {
         .size       = 2,
         .init_val   = 0x0000,
         .ro_mask    = 0xFFFF,
-        .emu_mask   = 0xFFE8,
-        .init       = pt_common_reg_init,
+        .emu_mask   = 0xF9C8,
+        .init       = pt_pmc_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_word_reg_write,
+        .u.w.restore  = NULL,
     },
     /* PCI Power Management Control/Status reg */
     {
@@ -370,21 +414,11 @@ static struct pt_reg_info_tbl pt_emu_reg_pm_tbl[] = {
         .size       = 2,
         .init_val   = 0x0008,
         .ro_mask    = 0x60FC,
-        .emu_mask   = 0xFF0B,
-        .init       = pt_common_reg_init,
+        .emu_mask   = 0x8100,
+        .init       = pt_pmcsr_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_pmcsr_reg_write,
-    },
-    /* Data reg */
-    {
-        .offset     = PCI_PM_DATA_REGISTER,
-        .size       = 1,
-        .init_val   = 0x00,
-        .ro_mask    = 0xFF,
-        .emu_mask   = 0xFF,
-        .init       = pt_common_reg_init,
-        .u.b.read   = pt_byte_reg_read,
-        .u.b.write  = pt_byte_reg_write,
+        .u.w.restore  = pt_pmcsr_reg_restore,
     },
     {
         .size = 0,
@@ -403,6 +437,7 @@ static struct pt_reg_info_tbl pt_emu_reg_vpd_tbl[] = {
         .init       = pt_ptr_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = NULL,
     },
     {
         .size = 0,
@@ -421,6 +456,7 @@ static struct pt_reg_info_tbl pt_emu_reg_vendor_tbl[] = {
         .init       = pt_ptr_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = NULL,
     },
     {
         .size = 0,
@@ -439,6 +475,7 @@ static struct pt_reg_info_tbl pt_emu_reg_pcie_tbl[] = {
         .init       = pt_ptr_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = NULL,
     },
     /* Device Capabilities reg */
     {
@@ -450,6 +487,7 @@ static struct pt_reg_info_tbl pt_emu_reg_pcie_tbl[] = {
         .init       = pt_common_reg_init,
         .u.dw.read  = pt_long_reg_read,
         .u.dw.write = pt_long_reg_write,
+        .u.dw.restore = NULL,
     },
     /* Device Control reg */
     {
@@ -461,6 +499,7 @@ static struct pt_reg_info_tbl pt_emu_reg_pcie_tbl[] = {
         .init       = pt_common_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_devctrl_reg_write,
+        .u.w.restore  = pt_word_reg_restore,
     },
     /* Link Control reg */
     {
@@ -472,6 +511,7 @@ static struct pt_reg_info_tbl pt_emu_reg_pcie_tbl[] = {
         .init       = pt_linkctrl_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_linkctrl_reg_write,
+        .u.w.restore  = pt_word_reg_restore,
     },
     /* Device Control 2 reg */
     {
@@ -483,6 +523,7 @@ static struct pt_reg_info_tbl pt_emu_reg_pcie_tbl[] = {
         .init       = pt_devctrl2_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_devctrl2_reg_write,
+        .u.w.restore  = pt_word_reg_restore,
     },
     /* Link Control 2 reg */
     {
@@ -494,6 +535,7 @@ static struct pt_reg_info_tbl pt_emu_reg_pcie_tbl[] = {
         .init       = pt_linkctrl2_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_linkctrl2_reg_write,
+        .u.w.restore  = pt_word_reg_restore,
     },
     {
         .size = 0,
@@ -512,6 +554,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msi_tbl[] = {
         .init       = pt_ptr_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = NULL,
     },
     /* Message Control reg */
     {
@@ -523,6 +566,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msi_tbl[] = {
         .init       = pt_msgctrl_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_msgctrl_reg_write,
+        .u.w.restore  = NULL,
     },
     /* Message Address reg */
     {
@@ -534,6 +578,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msi_tbl[] = {
         .init       = pt_msgaddr32_reg_init,
         .u.dw.read  = pt_long_reg_read,
         .u.dw.write = pt_msgaddr32_reg_write,
+        .u.dw.restore = NULL,
     },
     /* Message Upper Address reg (if PCI_MSI_FLAGS_64BIT set) */
     {
@@ -545,6 +590,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msi_tbl[] = {
         .init       = pt_msgaddr64_reg_init,
         .u.dw.read  = pt_long_reg_read,
         .u.dw.write = pt_msgaddr64_reg_write,
+        .u.dw.restore = NULL,
     },
     /* Message Data reg (16 bits of data for 32-bit devices) */
     {
@@ -556,6 +602,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msi_tbl[] = {
         .init       = pt_msgdata_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_msgdata_reg_write,
+        .u.w.restore  = NULL,
     },
     /* Message Data reg (16 bits of data for 64-bit devices) */
     {
@@ -567,6 +614,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msi_tbl[] = {
         .init       = pt_msgdata_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_msgdata_reg_write,
+        .u.w.restore  = NULL,
     },
     {
         .size = 0,
@@ -585,6 +633,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msix_tbl[] = {
         .init       = pt_ptr_reg_init,
         .u.b.read   = pt_byte_reg_read,
         .u.b.write  = pt_byte_reg_write,
+        .u.b.restore  = NULL,
     },
     /* Message Control reg */
     {
@@ -596,6 +645,7 @@ static struct pt_reg_info_tbl pt_emu_reg_msix_tbl[] = {
         .init       = pt_msixctrl_reg_init,
         .u.w.read   = pt_word_reg_read,
         .u.w.write  = pt_msixctrl_reg_write,
+        .u.w.restore  = NULL,
     },
     {
         .size = 0,
@@ -624,7 +674,7 @@ static const struct pt_reg_grp_info_tbl 
pt_emu_reg_grp_tbl[] = {
         .grp_id     = PCI_CAP_ID_PM,
         .grp_type   = GRP_TYPE_EMU,
         .grp_size   = PCI_PM_SIZEOF,
-        .size_init  = pt_reg_grp_size_init,
+        .size_init  = pt_pm_size_init,
         .emu_reg_tbl= pt_emu_reg_pm_tbl,
     },
     /* AGP Capability Structure reg group */
@@ -777,23 +827,6 @@ static int get_next_keyval(char **option, char **key, char 
**val)
     return 0;
 }
 
-static void msi_set_enable(struct pt_dev *ptdev, int en)
-{
-    uint16_t val;
-    uint32_t address;
-    if (!ptdev->msi)
-        return;
-
-    address = ptdev->msi->ctrl_offset;
-    if (!address)
-        return;
-
-    val = pci_read_word(ptdev->pci_dev, address);
-    val &= ~PCI_MSI_FLAGS_ENABLE;
-    val |= en & PCI_MSI_FLAGS_ENABLE;
-    pci_write_word(ptdev->pci_dev, address, val);
-}
-
 /* Insert a new pass-through device into a specific pci slot.
  * input  dom:bus:dev.func@slot, chose free one if slot == 0
  * return -1: required slot not available
@@ -1084,6 +1117,7 @@ static void pt_pci_write_config(PCIDevice *d, uint32_t 
address, uint32_t val,
 {
     struct pt_dev *assigned_device = (struct pt_dev *)d;
     struct pci_dev *pci_dev = assigned_device->pci_dev;
+    struct pt_pm_info *pm_state = assigned_device->pm_state;
     struct pt_reg_grp_tbl *reg_grp_entry = NULL;
     struct pt_reg_grp_info_tbl *reg_grp = NULL;
     struct pt_reg_tbl *reg_entry = NULL;
@@ -1144,6 +1178,13 @@ static void pt_pci_write_config(PCIDevice *d, uint32_t 
address, uint32_t val,
             (d->devfn & 0x7), address, len);
     }
 
+    /* check power state transition flags */
+    if (pm_state->flags & PT_FLAG_TRANSITING)
+        /* can't accept untill previous power state transition is completed.
+         * so finished previous request here.
+         */
+        qemu_run_one_timer(pm_state->pm_timer);
+
     /* find register group entry */
     reg_grp_entry = pt_find_reg_grp(assigned_device, address);
     if (reg_grp_entry)
@@ -1274,6 +1315,11 @@ out:
         break;
     }
 
+    if (pm_state->flags & PT_FLAG_TRANSITING)
+        /* set QEMUTimer */
+        qemu_mod_timer(pm_state->pm_timer,
+            (qemu_get_clock(rt_clock) + pm_state->pm_delay));
+
 exit:
     return;
 }
@@ -1282,6 +1328,7 @@ static uint32_t pt_pci_read_config(PCIDevice *d, uint32_t 
address, int len)
 {
     struct pt_dev *assigned_device = (struct pt_dev *)d;
     struct pci_dev *pci_dev = assigned_device->pci_dev;
+    struct pt_pm_info *pm_state = assigned_device->pm_state;
     uint32_t val = 0xFFFFFFFF;
     struct pt_reg_grp_tbl *reg_grp_entry = NULL;
     struct pt_reg_grp_info_tbl *reg_grp = NULL;
@@ -1324,6 +1371,13 @@ static uint32_t pt_pci_read_config(PCIDevice *d, 
uint32_t address, int len)
         goto exit;
     }
 
+    /* check power state transition flags */
+    if (pm_state->flags & PT_FLAG_TRANSITING)
+        /* can't accept untill previous power state transition is completed.
+         * so finished previous request here.
+         */
+        qemu_run_one_timer(pm_state->pm_timer);
+
     /* find register group entry */
     reg_grp_entry = pt_find_reg_grp(assigned_device, address);
     if (reg_grp_entry)
@@ -1643,6 +1697,35 @@ uint8_t find_cap_offset(struct pci_dev *pci_dev, uint8_t 
cap)
     return 0;
 }
 
+uint32_t find_ext_cap_offset(struct pci_dev *pci_dev, uint32_t cap)
+{
+    uint32_t header = 0;
+    int max_cap = 480;
+    int pos = 0x100;
+
+    do
+    {
+        header = pci_read_long(pci_dev, pos);
+        /*
+         * If we have no capabilities, this is indicated by cap ID,
+         * cap version and next pointer all being 0.
+         */
+        if (header == 0)
+            break;
+
+        if (PCI_EXT_CAP_ID(header) == cap)
+            return pos;
+
+        pos = PCI_EXT_CAP_NEXT(header);
+        if (pos < 0x100)
+            break;
+
+        max_cap--;
+    }while (max_cap > 0);
+
+    return 0;
+}
+
 /* parse BAR */
 static int pt_bar_reg_parse(
         struct pt_dev *ptdev, struct pt_reg_info_tbl *reg)
@@ -1751,6 +1834,287 @@ static void pt_bar_mapping(struct pt_dev *ptdev, int 
io_enable, int mem_enable)
     return;
 }
 
+/* check power state transition */
+int check_power_state(struct pt_dev *ptdev)
+{
+    struct pt_pm_info *pm_state = ptdev->pm_state;
+    PCIDevice *d = &ptdev->dev;
+    uint16_t read_val = 0;
+    uint16_t cur_state = 0;
+
+    /* get current power state */
+    read_val = pci_read_word(ptdev->pci_dev,
+                                (pm_state->pm_base + PCI_PM_CTRL));
+    cur_state = read_val & PCI_PM_CTRL_STATE_MASK;
+
+    if (pm_state->req_state != cur_state)
+    {
+        PT_LOG("Error: Failed to change power state. " 
+            "[%02x:%02x.%x][requested state:%d][current state:%d]\n", 
+            pci_bus_num(d->bus), ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), 
+            pm_state->req_state, cur_state);
+        return -1;
+    }
+    return 0;
+}
+
+/* save AER register */
+static void pt_aer_reg_save(struct pt_dev *ptdev)
+{
+    PCIDevice *d = &ptdev->dev;
+    uint32_t aer_base = ptdev->pm_state->aer_base;
+    int i = 0;
+    /* Root Port and Root Complex Event Collector need size expansion */
+    int aer_size = 0x2c;
+
+    for (i=0; i < aer_size; i+=4)
+    {
+        switch (i) {
+        /* after reset, following register values should be restored.
+         * So, save them.
+         */
+        case PCI_ERR_UNCOR_MASK:
+        case PCI_ERR_UNCOR_SEVER:
+        case PCI_ERR_COR_MASK:
+        case PCI_ERR_CAP:
+            *(uint32_t*)(d->config + (aer_base + i))
+                 = pci_read_long(ptdev->pci_dev, (aer_base + i));
+            break;
+        default:
+            break;
+        }
+    }
+}
+
+/* restore AER register */
+static void pt_aer_reg_restore(struct pt_dev *ptdev)
+{
+    PCIDevice *d = &ptdev->dev;
+    uint32_t aer_base = ptdev->pm_state->aer_base;
+    int i = 0;
+    uint32_t config = 0;
+    /* Root Port and Root Complex Event Collector need size expansion */
+    int aer_size = 0x2c;
+
+    for (i=0; i < aer_size; i+=4)
+    {
+        switch (i) {
+        /* the following registers should be reconfigured to correct values
+         * after reset. restore them.
+         */
+        case PCI_ERR_UNCOR_MASK:
+        case PCI_ERR_UNCOR_SEVER:
+        case PCI_ERR_COR_MASK:
+        case PCI_ERR_CAP:
+            config = *(uint32_t*)(d->config + (aer_base + i));
+            pci_write_long(ptdev->pci_dev, (aer_base + i), config);
+            break;
+        /* other registers should not be reconfigured after reset 
+         * if there is no reason
+         */
+        default:
+            break;
+        }
+    }
+}
+
+/* reset Interrupt and I/O resource  */
+void pt_reset_interrupt_and_io_mapping(struct pt_dev *ptdev)
+{
+    PCIDevice *d = &ptdev->dev;
+    PCIIORegion *r;
+    int i = 0;
+
+    /* disable MSI/MSI-X and MSI-INTx translation */
+    if (ptdev->msi)
+        pt_msi_disable(ptdev);
+    if (ptdev->msix)
+        pt_msix_disable(ptdev);
+
+    /* clear all virtual region address */
+    for (i=0; i<PCI_NUM_REGIONS; i++)
+    {
+        r = &d->io_regions[i];
+        r->addr = -1;
+    }
+
+    /* unmapping BAR */
+    pt_bar_mapping(ptdev, 0, 0);
+}
+
+/* restore a part of I/O device register */
+static void pt_config_restore(struct pt_dev *ptdev)
+{
+    struct pt_reg_grp_tbl *reg_grp_entry = NULL;
+    struct pt_reg_grp_info_tbl *reg_grp = NULL;
+    struct pt_reg_tbl *reg_entry = NULL;
+    struct pt_reg_info_tbl *reg = NULL;
+    uint32_t real_offset = 0;
+    uint32_t read_val = 0;
+    uint32_t val = 0;
+    int ret = 0;
+    PCIDevice *d = &ptdev->dev;
+
+    /* find emulate register group entry */
+    LIST_FOREACH(reg_grp_entry, &ptdev->reg_grp_tbl_head, entries)
+    {
+        /* find emulate register entry */
+        LIST_FOREACH(reg_entry, &reg_grp_entry->reg_tbl_head, entries)
+        {
+            reg = reg_entry->reg;
+
+            /* check whether restoring is needed */
+            if (!reg->u.b.restore)
+                continue;
+
+            real_offset = (reg_grp_entry->base_offset + reg->offset);
+
+            /* read I/O device register value */
+            ret = pci_read_block(ptdev->pci_dev, real_offset,
+                        (uint8_t *)&read_val, reg->size);
+
+            if (!ret)
+            {
+                PT_LOG("Error: pci_read_block failed. "
+                    "return value[%d].\n", ret);
+                memset((uint8_t *)&read_val, 0xff, reg->size);
+            }
+
+            val = 0;
+
+            /* restore based on register size */
+            switch (reg->size) {
+            case 1:
+                /* byte register */
+                ret = reg->u.b.restore(ptdev, reg_entry, real_offset,
+                           (uint8_t)read_val, (uint8_t *)&val);
+                break;
+            case 2:
+                /* word register */
+                ret = reg->u.w.restore(ptdev, reg_entry, real_offset,
+                           (uint16_t)read_val, (uint16_t *)&val);
+                break;
+            case 4:
+                /* double word register */
+                ret = reg->u.dw.restore(ptdev, reg_entry, real_offset,
+                           (uint32_t)read_val, (uint32_t *)&val);
+                break;
+            }
+
+            /* restoring error */
+            if (ret < 0)
+            {
+                /* exit I/O emulator */
+                PT_LOG("Internal error: Invalid restoring " 
+                    "return value[%d]. I/O emulator exit.\n", ret);
+                exit(1);
+            }
+
+#ifdef PT_DEBUG_PCI_CONFIG_ACCESS
+            PT_LOG("[%02x:%02x.%x]: address=%04x val=0x%08x len=%d\n", 
+                pci_bus_num(d->bus), (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), 
+                real_offset, val, reg->size);
+#endif
+
+            ret = pci_write_block(ptdev->pci_dev, real_offset,
+                            (uint8_t *)&val, reg->size);
+
+            if (!ret)
+                PT_LOG("Error: pci_write_block failed. "
+                    "return value[%d].\n", ret);
+        }
+    }
+
+    /* if AER supported, restore it */
+    if (ptdev->pm_state->aer_base)
+        pt_aer_reg_restore(ptdev);
+}
+
+/* reinitialize all emulate registers */
+static void pt_config_reinit(struct pt_dev *ptdev)
+{
+    struct pt_reg_grp_tbl *reg_grp_entry = NULL;
+    struct pt_reg_grp_info_tbl *reg_grp = NULL;
+    struct pt_reg_tbl *reg_entry = NULL;
+    struct pt_reg_info_tbl *reg = NULL;
+
+    /* find emulate register group entry */
+    LIST_FOREACH(reg_grp_entry, &ptdev->reg_grp_tbl_head, entries)
+    {
+        /* find emulate register entry */
+        LIST_FOREACH(reg_entry, &reg_grp_entry->reg_tbl_head, entries)
+        {
+            reg = reg_entry->reg;
+            if (reg->init)
+                /* initialize emulate register */
+                reg_entry->data = reg->init(ptdev, reg_entry->reg,
+                                   (reg_grp_entry->base_offset + reg->offset));
+        }
+    }
+}
+
+void pt_from_d3hot_to_d0_with_reset(void *opaque)
+{
+    struct pt_dev *ptdev = opaque;
+    PCIDevice *d = &ptdev->dev;
+    struct pt_pm_info *pm_state = ptdev->pm_state;
+    uint8_t e_device = 0;
+    uint8_t e_intx = 0;
+    int ret = 0;
+
+    /* check power state */
+    ret = check_power_state(ptdev);
+
+    if (ret < 0)
+        goto out;
+
+    PT_LOG("Reinitialize PCI configuration registers " 
+        "due to power state transition with internal reset. [%02x:%02x.%x]\n", 
+        pci_bus_num(d->bus), ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7));
+
+    /* restore a part of I/O device register */
+    pt_config_restore(ptdev);
+
+    /* reinitialize all emulate register */
+    pt_config_reinit(ptdev);
+
+    /* setup MSI-INTx translation if support */
+    ret = pt_enable_msi_translate(ptdev);
+
+    /* rebind machine_irq to device */
+    if (ret < 0 && ptdev->machine_irq != 0)
+    {
+        e_device = (ptdev->dev.devfn >> 3) & 0x1f;
+        /* fix virtual interrupt pin to INTA# */
+        e_intx = 0;
+
+        ret = xc_domain_bind_pt_pci_irq(xc_handle, domid, ptdev->machine_irq,
+                                       0, e_device, e_intx);
+        if (ret < 0)
+            PT_LOG("Error: Rebinding of interrupt failed! ret=%d\n", ret);
+    }
+
+out:
+    /* power state transition flags off */
+    pm_state->flags &= ~PT_FLAG_TRANSITING;
+
+    qemu_free_timer(pm_state->pm_timer);
+}
+
+void pt_default_power_transition(void *opaque)
+{
+    struct pt_dev *ptdev = opaque;
+    struct pt_pm_info *pm_state = ptdev->pm_state;
+
+    /* check power state */
+    check_power_state(ptdev);
+
+    /* power state transition flags off */
+    pm_state->flags &= ~PT_FLAG_TRANSITING;
+
+    qemu_free_timer(pm_state->pm_timer);
+}
+
 /* initialize emulate register */
 static int pt_config_reg_init(struct pt_dev *ptdev,
         struct pt_reg_grp_tbl *reg_grp,
@@ -1878,6 +2242,15 @@ static void pt_config_delete(struct pt_dev *ptdev)
     if (ptdev->msi)
         free(ptdev->msi);
 
+    /* free Power Management info table */
+    if (ptdev->pm_state)
+    {
+        if (ptdev->pm_state->pm_timer)
+            qemu_free_timer(ptdev->pm_state->pm_timer);
+
+        free(ptdev->pm_state);
+    }
+
     /* free all register group entry */
     while ((reg_grp_entry = ptdev->reg_grp_tbl_head.lh_first) != NULL)
     {
@@ -2027,6 +2400,36 @@ static uint32_t pt_bar_reg_init(struct pt_dev *ptdev,
     return reg_field;
 }
 
+/* initialize Power Management Capabilities register */
+static uint32_t pt_pmc_reg_init(struct pt_dev *ptdev,
+        struct pt_reg_info_tbl *reg, uint32_t real_offset)
+{
+    PCIDevice *d = &ptdev->dev;
+
+    /* set Power Management Capabilities register */
+    ptdev->pm_state->pmc_field = *(uint16_t *)(d->config + real_offset);
+
+    return reg->init_val;
+}
+
+/* initialize PCI Power Management Control/Status register */
+static uint32_t pt_pmcsr_reg_init(struct pt_dev *ptdev,
+        struct pt_reg_info_tbl *reg, uint32_t real_offset)
+{
+    PCIDevice *d = &ptdev->dev;
+    uint16_t cap_ver  = 0;
+
+    /* check PCI Power Management support version */
+    cap_ver = ptdev->pm_state->pmc_field & PCI_PM_CAP_VER_MASK;
+
+    if (cap_ver > 2)
+        /* set No Soft Reset */
+        ptdev->pm_state->no_soft_reset = (*(uint8_t *)(d->config + real_offset)
+            & (uint8_t)PCI_PM_CTRL_NO_SOFT_RESET);
+
+    return reg->init_val;
+}
+
 /* initialize Link Control register */
 static uint32_t pt_linkctrl_reg_init(struct pt_dev *ptdev,
         struct pt_reg_info_tbl *reg, uint32_t real_offset)
@@ -2108,11 +2511,6 @@ static uint32_t pt_msgctrl_reg_init(struct pt_dev *ptdev,
     /* All register is 0 after reset, except first 4 byte */
     reg_field &= reg->ro_mask;
 
-    if (ptdev->msi_trans_cap) {
-        PT_LOG("Turning on MSI-INTx translation\n");
-        ptdev->msi_trans_en = 1;
-    }
-    
     return reg_field;
 }
 
@@ -2180,7 +2578,9 @@ static uint32_t pt_msixctrl_reg_init(struct pt_dev *ptdev,
         pci_write_word(pdev, real_offset, reg_field & ~PCI_MSIX_ENABLE);
         reg_field &= ~(PCI_MSIX_ENABLE | PCI_MSIX_MASK);
     }
-    
+
+    ptdev->msix->ctrl_offset = real_offset;
+
     return reg_field;
 }
 
@@ -2191,6 +2591,32 @@ static uint8_t pt_reg_grp_size_init(struct pt_dev *ptdev,
     return grp_reg->grp_size;
 }
 
+/* get Power Management Capability Structure register group size */
+static uint8_t pt_pm_size_init(struct pt_dev *ptdev,
+        struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset)
+{
+    ptdev->pm_state = qemu_mallocz(sizeof(struct pt_pm_info));
+    if (!ptdev->pm_state)
+    {
+        /* exit I/O emulator */
+        PT_LOG("Error: Allocating pt_pm_info failed. I/O emulator exit.\n");
+        exit(1);
+    }
+
+    /* set Power Management Capability base offset */
+    ptdev->pm_state->pm_base = base_offset;
+
+    /* find AER register and set AER Capability base offset */
+    ptdev->pm_state->aer_base = find_ext_cap_offset(ptdev->pci_dev,
+        (uint32_t)PCI_EXT_CAP_ID_AER);
+
+    /* save AER register */
+    if (ptdev->pm_state->aer_base)
+        pt_aer_reg_save(ptdev);
+
+    return grp_reg->grp_size;
+}
+
 /* get MSI Capability Structure register group size */
 static uint8_t pt_msi_size_init(struct pt_dev *ptdev,
         struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset)
@@ -2215,7 +2641,8 @@ static uint8_t pt_msi_size_init(struct pt_dev *ptdev,
         exit(1);
     }
     memset(ptdev->msi, 0, sizeof(struct pt_msi_info));
-    
+    ptdev->msi->pirq = -1;
+
     return msi_size;
 }
 
@@ -2705,18 +3132,17 @@ static int pt_pmcsr_reg_write(struct pt_dev *ptdev,
         uint16_t *value, uint16_t dev_value, uint16_t valid_mask)
 {
     struct pt_reg_info_tbl *reg = cfg_entry->reg;
+    PCIDevice *d = &ptdev->dev;
     uint16_t writable_mask = 0;
     uint16_t throughable_mask = 0;
     uint16_t pmcsr_mask = (PCI_PM_CTRL_PME_ENABLE | 
                            PCI_PM_CTRL_DATA_SEL_MASK |
                            PCI_PM_CTRL_PME_STATUS);
+    struct pt_pm_info *pm_state = ptdev->pm_state;
+    uint16_t read_val = 0;
 
     /* modify emulate register */
     writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask & ~pmcsr_mask;
-    /* ignore it when the requested state neither D3 nor D0 */
-    if (((*value & PCI_PM_CTRL_STATE_MASK) != PCI_PM_CTRL_STATE_MASK) &&
-        ((*value & PCI_PM_CTRL_STATE_MASK) != 0))
-        writable_mask &= ~PCI_PM_CTRL_STATE_MASK;
 
     cfg_entry->data = ((*value & writable_mask) |
                        (cfg_entry->data & ~writable_mask));
@@ -2726,6 +3152,100 @@ static int pt_pmcsr_reg_write(struct pt_dev *ptdev,
     *value = ((*value & throughable_mask) |
               (dev_value & ~throughable_mask));
 
+    /* set I/O device power state */
+    pm_state->cur_state = (dev_value & PCI_PM_CTRL_STATE_MASK);
+
+    /* set Guest requested PowerState */
+    pm_state->req_state = (*value & PCI_PM_CTRL_STATE_MASK);
+
+    /* check power state transition or not */
+    if (pm_state->cur_state == pm_state->req_state)
+        /* not power state transition */
+        return 0;
+
+    /* check enable power state transition */
+    if ((pm_state->req_state != 0) &&
+        (pm_state->cur_state > pm_state->req_state))
+    {
+        PT_LOG("Error: Invalid power transition. "
+            "[%02x:%02x.%x][requested state:%d][current state:%d]\n",
+            pci_bus_num(d->bus), ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
+            pm_state->req_state, pm_state->cur_state);
+
+        return 0;
+    }
+
+    /* check if this device supports the requested power state */
+    if (((pm_state->req_state == 1) && !(pm_state->pmc_field & PCI_PM_CAP_D1))
+        || ((pm_state->req_state == 2) &&
+        !(pm_state->pmc_field & PCI_PM_CAP_D2)))
+    {
+        PT_LOG("Error: Invalid power transition. "
+            "[%02x:%02x.%x][requested state:%d][current state:%d]\n",
+            pci_bus_num(d->bus), ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
+            pm_state->req_state, pm_state->cur_state);
+
+        return 0;
+    }
+
+    /* in case of transition related to D3hot, it's necessary to wait 10 ms.
+     * But because writing to register will be performed later on actually,
+     * don't start QEMUTimer right now, just alloc and init QEMUTimer here.
+     */
+    if ((pm_state->cur_state == 3) || (pm_state->req_state == 3))
+    {
+        if (pm_state->req_state == 0)
+        {
+            /* alloc and init QEMUTimer */
+            if (!pm_state->no_soft_reset)
+            {
+                pm_state->pm_timer = qemu_new_timer(rt_clock,
+                    pt_from_d3hot_to_d0_with_reset, ptdev);
+
+                /* reset Interrupt and I/O resource mapping */
+                pt_reset_interrupt_and_io_mapping(ptdev);
+            }
+            else
+                pm_state->pm_timer = qemu_new_timer(rt_clock,
+                    pt_default_power_transition, ptdev);
+        }
+        else
+            /* alloc and init QEMUTimer */
+            pm_state->pm_timer = qemu_new_timer(rt_clock,
+                pt_default_power_transition, ptdev);
+
+        /* set power state transition delay */
+        pm_state->pm_delay = 10;
+
+        /* power state transition flags on */
+        pm_state->flags |= PT_FLAG_TRANSITING;
+    }
+    /* in case of transition related to D0, D1 and D2,
+     * no need to use QEMUTimer.
+     * So, we perfom writing to register here and then read it back.
+     */
+    else
+    {
+        /* write power state to I/O device register */
+        pci_write_word(ptdev->pci_dev,
+                        (pm_state->pm_base + PCI_PM_CTRL), *value);
+
+        /* in case of transition related to D2,
+         * it's necessary to wait 200 usec.
+         * But because QEMUTimer do not support microsec unit right now,
+         * so we do wait ourself here.
+         */
+        if ((pm_state->cur_state == 2) || (pm_state->req_state == 2))
+            usleep(200);
+
+        /* check power state */
+        check_power_state(ptdev);
+
+        /* recreate value for writing to I/O device register */
+        *value = pci_read_word(ptdev->pci_dev,
+                                (pm_state->pm_base + PCI_PM_CTRL));
+    }
+
     return 0;
 }
 
@@ -2760,8 +3280,7 @@ static int pt_linkctrl_reg_write(struct pt_dev *ptdev,
     struct pt_reg_info_tbl *reg = cfg_entry->reg;
     uint16_t writable_mask = 0;
     uint16_t throughable_mask = 0;
-    uint16_t linkctrl_mask = (PCI_EXP_LNKCTL_ASPM | 0x04 |
-                              PCI_EXP_LNKCTL_DISABLE |
+    uint16_t linkctrl_mask = (0x04 | PCI_EXP_LNKCTL_DISABLE |
                               PCI_EXP_LNKCTL_RETRAIN | 
                               0x0400 | 0x0800 | 0xF000);
 
@@ -2825,34 +3344,6 @@ static int pt_linkctrl2_reg_write(struct pt_dev *ptdev,
     return 0;
 }
 
-static void pt_unmap_msi_translate(struct pt_dev *ptdev)
-{
-    uint16_t e_device, e_intx;
-    int rc;
-
-    /* MSI_ENABLE bit should be disabed until the new handler is set */
-    msi_set_enable(ptdev, 0);
-
-    e_device = (ptdev->dev.devfn >> 3) & 0x1f;
-    /* fix virtual interrupt pin to INTA# */
-    e_intx = 0;
-    rc = xc_domain_unbind_pt_irq(xc_handle, domid, ptdev->msi->pirq,
-                                 PT_IRQ_TYPE_MSI_TRANSLATE, 0,
-                                 e_device, e_intx, 0);
-    if (rc < 0)
-        PT_LOG("Error: Unbinding pt irq for MSI-INTx failed! rc=%d\n", rc);
-
-    if (ptdev->machine_irq)
-    {
-        rc = xc_domain_bind_pt_pci_irq(xc_handle, domid, ptdev->machine_irq,
-                                       0, e_device, e_intx);
-        if ( rc < 0 )
-            PT_LOG("Error: Rebinding of interrupt failed! rc=%d\n", rc);
-    }
-
-    ptdev->msi_trans_en = 0;
-}
-
 /* write Message Control register */
 static int pt_msgctrl_reg_write(struct pt_dev *ptdev, 
     struct pt_reg_tbl *cfg_entry, 
@@ -2893,7 +3384,7 @@ static int pt_msgctrl_reg_write(struct pt_dev *ptdev,
         {
             if (ptdev->msi_trans_en) {
                 PT_LOG("guest enabling MSI, disable MSI-INTx translation\n");
-                pt_unmap_msi_translate(ptdev);
+                pt_disable_msi_translate(ptdev);
             }
             else
             {
@@ -3075,7 +3566,7 @@ static int pt_msixctrl_reg_write(struct pt_dev *ptdev,
     {
         if (ptdev->msi_trans_en) {
             PT_LOG("guest enabling MSI-X, disable MSI-INTx translation\n");
-            pt_unmap_msi_translate(ptdev);
+            pt_disable_msi_translate(ptdev);
         }
         pt_msix_update(ptdev);
     }
@@ -3085,6 +3576,141 @@ static int pt_msixctrl_reg_write(struct pt_dev *ptdev,
     return 0;
 }
 
+/* restore byte size emulate register */
+static int pt_byte_reg_restore(struct pt_dev *ptdev, 
+        struct pt_reg_tbl *cfg_entry, 
+        uint32_t real_offset, uint8_t dev_value, uint8_t *value)
+{
+    struct pt_reg_info_tbl *reg = cfg_entry->reg;
+    PCIDevice *d = &ptdev->dev;
+
+    /* use I/O device register's value as restore value */
+    *value = *(uint8_t *)(d->config + real_offset);
+
+    /* create value for restoring to I/O device register */
+    *value = PT_MERGE_VALUE(*value, dev_value, reg->emu_mask);
+
+    return 0;
+}
+
+/* restore word size emulate register */
+static int pt_word_reg_restore(struct pt_dev *ptdev, 
+    struct pt_reg_tbl *cfg_entry, 
+    uint32_t real_offset, uint16_t dev_value, uint16_t *value)
+{
+    struct pt_reg_info_tbl *reg = cfg_entry->reg;
+    PCIDevice *d = &ptdev->dev;
+
+    /* use I/O device register's value as restore value */
+    *value = *(uint16_t *)(d->config + real_offset);
+
+    /* create value for restoring to I/O device register */
+    *value = PT_MERGE_VALUE(*value, dev_value, reg->emu_mask);
+
+    return 0;
+}
+
+/* restore long size emulate register */
+static int pt_long_reg_restore(struct pt_dev *ptdev, 
+    struct pt_reg_tbl *cfg_entry, 
+    uint32_t real_offset, uint32_t dev_value, uint32_t *value)
+{
+    struct pt_reg_info_tbl *reg = cfg_entry->reg;
+    PCIDevice *d = &ptdev->dev;
+
+    /* use I/O device register's value as restore value */
+    *value = *(uint32_t *)(d->config + real_offset);
+
+    /* create value for restoring to I/O device register */
+    *value = PT_MERGE_VALUE(*value, dev_value, reg->emu_mask);
+
+    return 0;
+}
+
+/* restore Command register */
+static int pt_cmd_reg_restore(struct pt_dev *ptdev, 
+    struct pt_reg_tbl *cfg_entry, 
+    uint32_t real_offset, uint16_t dev_value, uint16_t *value)
+{
+    struct pt_reg_info_tbl *reg = cfg_entry->reg;
+    PCIDevice *d = &ptdev->dev;
+    uint16_t restorable_mask = 0;
+
+    /* use I/O device register's value as restore value */
+    *value = *(uint16_t *)(d->config + real_offset);
+
+    /* create value for restoring to I/O device register
+     * but do not include Fast Back-to-Back Enable bit.
+     */
+    restorable_mask = reg->emu_mask & ~PCI_COMMAND_FAST_BACK;
+    *value = PT_MERGE_VALUE(*value, dev_value, restorable_mask);
+
+    return 0;
+}
+
+/* restore BAR */
+static int pt_bar_reg_restore(struct pt_dev *ptdev, 
+    struct pt_reg_tbl *cfg_entry, 
+    uint32_t real_offset, uint32_t dev_value, uint32_t *value)
+{
+    struct pt_reg_info_tbl *reg = cfg_entry->reg;
+    uint32_t bar_emu_mask = 0;
+    int index = 0;
+
+    /* get BAR index */
+    index = pt_bar_offset_to_index(reg->offset);
+    if (index < 0)
+    {
+        /* exit I/O emulator */
+        PT_LOG("Internal error: Invalid BAR index[%d]. "
+            "I/O emulator exit.\n", index);
+        exit(1);
+    }
+
+    /* use value from kernel sysfs */
+    if (ptdev->bases[index].bar_flag == PT_BAR_FLAG_UPPER)
+        *value = ptdev->pci_dev->base_addr[index-1] >> 32;
+    else
+        *value = ptdev->pci_dev->base_addr[index];
+
+    /* set emulate mask depend on BAR flag */
+    switch (ptdev->bases[index].bar_flag)
+    {
+    case PT_BAR_FLAG_MEM:
+        bar_emu_mask = PT_BAR_MEM_EMU_MASK;
+        break;
+    case PT_BAR_FLAG_IO:
+        bar_emu_mask = PT_BAR_IO_EMU_MASK;
+        break;
+    case PT_BAR_FLAG_UPPER:
+        bar_emu_mask = PT_BAR_ALLF;
+        break;
+    default:
+        break;
+    }
+
+    /* create value for restoring to I/O device register */
+    *value = PT_MERGE_VALUE(*value, dev_value, bar_emu_mask);
+
+    return 0;
+}
+
+/* restore Power Management Control/Status register */
+static int pt_pmcsr_reg_restore(struct pt_dev *ptdev, 
+    struct pt_reg_tbl *cfg_entry, 
+    uint32_t real_offset, uint16_t dev_value, uint16_t *value)
+{
+    struct pt_reg_info_tbl *reg = cfg_entry->reg;
+
+    /* create value for restoring to I/O device register
+     * No need to restore, just clear PME Enable and PME Status bit
+     * Note: register type of PME Status bit is RW1C, so clear by writing 1b
+     */
+    *value = (dev_value & ~PCI_PM_CTRL_PME_ENABLE) | PCI_PM_CTRL_PME_STATUS;
+
+    return 0;
+}
+
 struct pt_dev * register_real_device(PCIBus *e_bus,
         const char *e_dev_name, int e_devfn, uint8_t r_bus, uint8_t r_dev,
         uint8_t r_func, uint32_t machine_irq, struct pci_access *pci_access,
@@ -3197,32 +3823,6 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
     if (!assigned_device->dev.config[0x3d])
         goto out;
 
-    e_device = (assigned_device->dev.devfn >> 3) & 0x1f;
-    /* fix virtual interrupt pin to INTA# */
-    e_intx = 0;
-
-    while (assigned_device->msi_trans_en)
-    {
-        if (pt_msi_setup(assigned_device))
-        {
-            PT_LOG("Error: MSI-INTx translation MSI setup failed, fallback\n");
-            assigned_device->msi_trans_en = 0;
-            break;
-        }
-
-        rc = xc_domain_bind_pt_irq(xc_handle, domid, 
assigned_device->msi->pirq,
-                                   PT_IRQ_TYPE_MSI_TRANSLATE, 0,
-                                   e_device, e_intx, 0);
-        if ( rc < 0)
-        {
-            PT_LOG("Error: MSI-INTx translation bind failed, fallback\n");
-            assigned_device->msi_trans_en = 0;
-            break;
-        }
-        msi_set_enable(assigned_device, 1);
-        break;
-    }
-
     if ( PT_MACHINE_IRQ_AUTO == machine_irq )
     {
         int pirq = pci_dev->irq;
@@ -3242,12 +3842,16 @@ struct pt_dev * register_real_device(PCIBus *e_bus,
         }
     }
 
-    if (assigned_device->msi_trans_en)
-        goto out;
+    /* setup MSI-INTx translation if support */
+    rc = pt_enable_msi_translate(assigned_device);
 
     /* bind machine_irq to device */
-    if ( 0 != machine_irq )
+    if (rc < 0 && machine_irq != 0)
     {
+        e_device = (assigned_device->dev.devfn >> 3) & 0x1f;
+        /* fix virtual interrupt pin to INTA# */
+        e_intx = 0;
+
         rc = xc_domain_bind_pt_pci_irq(xc_handle, domid, machine_irq, 0,
                                        e_device, e_intx);
         if ( rc < 0 )
diff --git a/hw/pass-through.h b/hw/pass-through.h
index 7a623be..4704d83 100644
--- a/hw/pass-through.h
+++ b/hw/pass-through.h
@@ -24,6 +24,7 @@
 #include "pci/pci.h"
 #include "exec-all.h"
 #include "sys-queue.h"
+#include "qemu-timer.h"
 
 /* Log acesss */
 #define PT_LOGGING_ENABLED
@@ -59,6 +60,12 @@
 #define PCI_CAP_ID_SSVID        0x0D
 #endif
 
+#ifdef PCI_PM_CTRL_NO_SOFT_RESET
+#undef PCI_PM_CTRL_NO_SOFT_RESET
+#endif
+/* No Soft Reset for D3hot->D0 */
+#define PCI_PM_CTRL_NO_SOFT_RESET 0x0008
+
 #ifndef PCI_MSI_FLAGS_MASK_BIT
 /* interrupt masking & reporting supported */
 #define PCI_MSI_FLAGS_MASK_BIT  0x0100
@@ -79,6 +86,19 @@
 #define PCI_EXP_TYPE_ROOT_EC     0xa
 #endif
 
+#ifndef PCI_EXT_CAP_ID
+/* Extended Capabilities (PCI-X 2.0 and PCI Express) */
+#define PCI_EXT_CAP_ID(header)   (header & 0x0000ffff)
+#endif
+
+#ifndef PCI_EXT_CAP_NEXT
+/* Extended Capabilities (PCI-X 2.0 and PCI Express) */
+#define PCI_EXT_CAP_NEXT(header) ((header >> 20) & 0xffc)
+#endif
+
+/* power state transition */
+#define PT_FLAG_TRANSITING 0x0001
+
 #define PT_INVALID_REG          0xFFFFFFFF      /* invalid register value */
 #define PT_BAR_ALLF             0xFFFFFFFF      /* BAR ALLF value */
 #define PT_BAR_MEM_RO_MASK      0x0000000F      /* BAR ReadOnly mask(Memory) */
@@ -102,6 +122,8 @@ enum {
     }\
 } while(0)
 
+#define PT_MERGE_VALUE(value, data, val_mask) \
+    (((value) & (val_mask)) | ((data) & ~(val_mask)))
 
 struct pt_region {
     /* Virtual phys base & size */
@@ -135,6 +157,7 @@ struct msix_entry_info {
 };
 
 struct pt_msix_info {
+    uint32_t ctrl_offset;
     int enabled;
     int total_entries;
     int bar_index;
@@ -147,6 +170,18 @@ struct pt_msix_info {
     struct msix_entry_info msix_entry[0];
 };
 
+struct pt_pm_info {
+    QEMUTimer *pm_timer;  /* QEMUTimer struct */
+    int no_soft_reset;    /* No Soft Reset flags */
+    uint16_t flags;       /* power state transition flags */
+    uint16_t pmc_field;   /* Power Management Capabilities field */
+    int pm_delay;         /* power state transition delay */
+    uint16_t cur_state;   /* current power state */
+    uint16_t req_state;   /* requested power state */
+    uint32_t pm_base;     /* Power Management Capability reg base offset */
+    uint32_t aer_base;    /* AER Capability reg base offset */
+};
+
 /*
     This structure holds the context of the mapping functions
     and data that is relevant for qemu device management.
@@ -163,6 +198,7 @@ struct pt_dev {
     /* Physical MSI to guest INTx translation when possible */
     int msi_trans_cap;
     int msi_trans_en;
+    struct pt_pm_info *pm_state;                /* PM virtualization */
 };
 
 /* Used for formatting PCI BDF into cf8 format */
@@ -260,6 +296,24 @@ typedef int (*conf_byte_read) (struct pt_dev *ptdev,
                                struct pt_reg_tbl *cfg_entry, 
                                uint8_t *value,
                                uint8_t valid_mask);
+/* emul reg long restore method */
+typedef int (*conf_dword_restore) (struct pt_dev *ptdev,
+                                   struct pt_reg_tbl *cfg_entry, 
+                                   uint32_t real_offset,
+                                   uint32_t dev_value,
+                                   uint32_t *value);
+/* emul reg word restore method */
+typedef int (*conf_word_restore) (struct pt_dev *ptdev,
+                                  struct pt_reg_tbl *cfg_entry, 
+                                  uint32_t real_offset,
+                                  uint16_t dev_value,
+                                  uint16_t *value);
+/* emul reg byte restore method */
+typedef int (*conf_byte_restore) (struct pt_dev *ptdev,
+                                  struct pt_reg_tbl *cfg_entry, 
+                                  uint32_t real_offset,
+                                  uint8_t dev_value,
+                                  uint8_t *value);
 
 /* emul reg infomation table */
 struct pt_reg_info_tbl {
@@ -281,18 +335,24 @@ struct pt_reg_info_tbl {
             conf_dword_write write;
             /* emul reg long read method */
             conf_dword_read read;
+            /* emul reg long restore method */
+            conf_dword_restore restore;
         } dw;
         struct {
             /* emul reg word write method */
             conf_word_write write;
             /* emul reg word read method */
             conf_word_read read;
+            /* emul reg word restore method */
+            conf_word_restore restore;
         } w;
         struct {
             /* emul reg byte write method */
             conf_byte_write write;
             /* emul reg byte read method */
             conf_byte_read read;
+            /* emul reg byte restore method */
+            conf_byte_restore restore;
         } b;
     } u;
 };
diff --git a/hw/pci.h b/hw/pci.h
index a527a39..2800499 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -44,7 +44,7 @@ typedef struct PCIIORegion {
 
 struct PCIDevice {
     /* PCI config space */
-    uint8_t config[256];
+    uint8_t config[4096];
 
     /* the following fields are read only */
     PCIBus *bus;
diff --git a/hw/pt-msi.c b/hw/pt-msi.c
index 9898763..c7a8f22 100644
--- a/hw/pt-msi.c
+++ b/hw/pt-msi.c
@@ -22,6 +22,41 @@
 #include "pt-msi.h"
 #include <sys/mman.h>
 
+static void msi_set_enable(struct pt_dev *dev, int en)
+{
+    uint16_t val = 0;
+    uint32_t address = 0;
+    if (!dev->msi)
+        return;
+
+    address = dev->msi->ctrl_offset;
+    if (!address)
+        return;
+
+    val = pci_read_word(dev->pci_dev, address);
+    val &= ~PCI_MSI_FLAGS_ENABLE;
+    val |= en & PCI_MSI_FLAGS_ENABLE;
+    pci_write_word(dev->pci_dev, address, val);
+}
+
+static void msix_set_enable(struct pt_dev *dev, int en)
+{
+    uint16_t val = 0;
+    uint32_t address = 0;
+    if (!dev->msix)
+        return;
+
+    address = dev->msix->ctrl_offset;
+    if (!address)
+        return;
+
+    val = pci_read_word(dev->pci_dev, address);
+    val &= ~PCI_MSIX_ENABLE;
+    if (en)
+        val |= PCI_MSIX_ENABLE;
+    pci_write_word(dev->pci_dev, address, val);
+}
+
 /* MSI virtuailization functions */
 
 /*
@@ -95,6 +130,141 @@ int pt_msi_update(struct pt_dev *d)
                                      d->msi->pirq, gflags);
 }
 
+void pt_msi_disable(struct pt_dev *dev)
+{
+    PCIDevice *d = &dev->dev;
+    uint8_t gvec = 0;
+    uint32_t gflags = 0;
+    uint64_t addr = 0;
+    uint8_t e_device = 0;
+    uint8_t e_intx = 0;
+
+    msi_set_enable(dev, 0);
+
+    e_device = (dev->dev.devfn >> 3) & 0x1f;
+    /* fix virtual interrupt pin to INTA# */
+    e_intx = 0;
+
+    if (dev->msi_trans_en)
+    {
+        if (xc_domain_unbind_pt_irq(xc_handle, domid, dev->msi->pirq,
+                                    PT_IRQ_TYPE_MSI_TRANSLATE, 0,
+                                    e_device, e_intx, 0))
+        {
+            PT_LOG("Error: Unbinding pt irq for MSI-INTx failed!\n");
+            goto out;
+        }
+    }
+    else if (!(dev->msi->flags & MSI_FLAG_UNINIT))
+    {
+        /* get vector, address, flags info, etc. */
+        gvec = dev->msi->data & 0xFF;
+        addr = (uint64_t)dev->msi->addr_hi << 32 | dev->msi->addr_lo;
+        gflags = __get_msi_gflags(dev->msi->data, addr);
+
+        PT_LOG("Unbind msi with pirq %x, gvec %x\n",
+                dev->msi->pirq, gvec);
+
+        if (xc_domain_unbind_msi_irq(xc_handle, domid, gvec,
+                                        dev->msi->pirq, gflags))
+        {
+            PT_LOG("Error: Unbinding of MSI failed. [%02x:%02x.%x]\n", 
+                pci_bus_num(d->bus), 
+                ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7));
+            goto out;
+        }
+    }
+
+    if (dev->msi->pirq != -1)
+    {
+        PT_LOG("Unmap msi with pirq %x\n", dev->msi->pirq);
+
+        if (xc_physdev_unmap_pirq(xc_handle, domid, dev->msi->pirq))
+        {
+            PT_LOG("Error: Unmapping of MSI failed. [%02x:%02x.%x]\n", 
+               pci_bus_num(d->bus), 
+               ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7));
+            goto out;
+        }
+    }
+    /* unbind INTx */
+    if (dev->msi_trans_cap && !dev->msi_trans_en)
+    {
+        if (xc_domain_unbind_pt_irq(xc_handle, domid, dev->machine_irq,
+                        PT_IRQ_TYPE_PCI, 0, e_device, e_intx, 0))
+            PT_LOG("Error: Unbinding of interrupt failed!\n");
+    }
+
+out:
+    /* clear msi info */
+    dev->msi->flags = 0;
+    dev->msi->pirq = -1;
+    dev->msi_trans_en = 0;
+}
+
+/* MSI-INTx translation virtulization functions */
+int pt_enable_msi_translate(struct pt_dev* dev)
+{
+    uint8_t e_device = 0;
+    uint8_t e_intx = 0;
+
+    if (!(dev->msi && dev->msi_trans_cap))
+        return -1;
+
+    msi_set_enable(dev, 0);
+    dev->msi_trans_en = 0;
+
+    if (pt_msi_setup(dev))
+    {
+        PT_LOG("Error: MSI-INTx translation MSI setup failed, fallback\n");
+        return -1;
+    }
+
+    e_device = (dev->dev.devfn >> 3) & 0x1f;
+    /* fix virtual interrupt pin to INTA# */
+    e_intx = 0;
+
+    if (xc_domain_bind_pt_irq(xc_handle, domid, dev->msi->pirq,
+                               PT_IRQ_TYPE_MSI_TRANSLATE, 0,
+                               e_device, e_intx, 0))
+    {
+        PT_LOG("Error: MSI-INTx translation bind failed, fallback\n");
+        return -1;
+    }
+
+    msi_set_enable(dev, 1);
+    dev->msi_trans_en = 1;
+
+    return 0;
+}
+
+void pt_disable_msi_translate(struct pt_dev *dev)
+{
+    uint8_t e_device = 0;
+    uint8_t e_intx = 0;
+
+    /* MSI_ENABLE bit should be disabed until the new handler is set */
+    msi_set_enable(dev, 0);
+
+    e_device = (dev->dev.devfn >> 3) & 0x1f;
+    /* fix virtual interrupt pin to INTA# */
+    e_intx = 0;
+
+    if (xc_domain_unbind_pt_irq(xc_handle, domid, dev->msi->pirq,
+                                 PT_IRQ_TYPE_MSI_TRANSLATE, 0,
+                                 e_device, e_intx, 0))
+        PT_LOG("Error: Unbinding pt irq for MSI-INTx failed!\n");
+
+    if (dev->machine_irq)
+    {
+        if (xc_domain_bind_pt_pci_irq(xc_handle, domid, dev->machine_irq,
+                                       0, e_device, e_intx))
+            PT_LOG("Error: Rebinding of interrupt failed!\n");
+    }
+
+    dev->msi_trans_en = 0;
+}
+
 /* MSI-X virtulization functions */
 static void mask_physical_msix_entry(struct pt_dev *dev, int entry_nr, int 
mask)
 {
@@ -159,6 +329,52 @@ int pt_msix_update(struct pt_dev *dev)
     return 0;
 }
 
+void pt_msix_disable(struct pt_dev *dev)
+{
+    PCIDevice *d = &dev->dev;
+    uint8_t gvec = 0;
+    uint32_t gflags = 0;
+    uint64_t addr = 0;
+    int i = 0;
+    struct msix_entry_info *entry = NULL;
+
+    msix_set_enable(dev, 0);
+
+    for ( i = 0; i < dev->msix->total_entries; i++ )
+    {
+        entry = &dev->msix->msix_entry[i];
+
+        if (entry->pirq == -1)
+            continue;
+
+        gvec = entry->io_mem[2] & 0xff;
+        addr = *(uint64_t *)&entry->io_mem[0];
+        gflags = __get_msi_gflags(entry->io_mem[2], addr);
+
+        PT_LOG("Unbind msix with pirq %x, gvec %x\n",
+                entry->pirq, gvec);
+
+        if (xc_domain_unbind_msi_irq(xc_handle, domid, gvec,
+                                        entry->pirq, gflags))
+            PT_LOG("Error: Unbinding of MSI-X failed. [%02x:%02x.%x]\n", 
+                pci_bus_num(d->bus), 
+                ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7));
+        else
+        {
+            PT_LOG("Unmap msix with pirq %x\n", entry->pirq);
+
+            if (xc_physdev_unmap_pirq(xc_handle,
+                                         domid, entry->pirq))
+                PT_LOG("Error: Unmapping of MSI-X failed. [%02x:%02x.%x]\n",
+                    pci_bus_num(d->bus),
+                    ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7));
+        }
+        /* clear msi-x info */
+        entry->pirq = -1;
+        entry->flags = 0;
+    }
+}
+
 static void pci_msix_invalid_write(void *opaque, target_phys_addr_t addr,
                                    uint32_t val)
 {
diff --git a/hw/pt-msi.h b/hw/pt-msi.h
index a8632d5..dea0848 100644
--- a/hw/pt-msi.h
+++ b/hw/pt-msi.h
@@ -85,9 +85,21 @@ __get_msi_gflags(uint32_t data, uint64_t addr);
 int
 pt_msi_update(struct pt_dev *d);
 
+void
+pt_msi_disable(struct pt_dev *dev);
+
+int
+pt_enable_msi_translate(struct pt_dev* dev);
+
+void
+pt_disable_msi_translate(struct pt_dev *dev);
+
 int
 pt_msix_update(struct pt_dev *dev);
 
+void
+pt_msix_disable(struct pt_dev *dev);
+
 int
 remove_msix_mapping(struct pt_dev *dev, int bar_index);
 
diff --git a/qemu-timer.h b/qemu-timer.h
index 7408edc..181428f 100644
--- a/qemu-timer.h
+++ b/qemu-timer.h
@@ -31,6 +31,8 @@ extern int64_t ticks_per_sec;
 void qemu_get_timer(QEMUFile *f, QEMUTimer *ts);
 void qemu_put_timer(QEMUFile *f, QEMUTimer *ts);
 
+void qemu_run_one_timer(QEMUTimer *ts);
+
 /* ptimer.c */
 typedef struct ptimer_state ptimer_state;
 typedef void (*ptimer_cb)(void *opaque);
diff --git a/vl.c b/vl.c
index dd5d155..8539f6d 100644
--- a/vl.c
+++ b/vl.c
@@ -1286,6 +1286,22 @@ void qemu_get_timer(QEMUFile *f, QEMUTimer *ts)
     }
 }
 
+/* run the specified timer */
+void qemu_run_one_timer(QEMUTimer *ts)
+{
+    uint64_t current_time;
+
+    /* remove timer from the list before calling the callback */
+    qemu_del_timer(ts);
+
+    while ((current_time = qemu_get_clock(rt_clock)) < ts->expire_time)
+        /* sleep until the expire time */
+        usleep((ts->expire_time - current_time) * 1000);
+
+    /* run the callback */
+    ts->cb(ts->opaque);
+}
+
 static void timer_save(QEMUFile *f, void *opaque)
 {
     if (cpu_ticks_enabled) {


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>