# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1212156368 -3600
# Node ID ecd266cebcab648132d432899eabaecf8a168508
# Parent 121d196b4cc85222dccbd947b372a8c2d218035c
vtd: Various cleanups and fixes:
* Handle DRHDs with different supported AGAWs. To support this we
create page tables which always have 4 levels, and skip top levels
for units which support only 2 or 3 levels.
* Handle systems with mixed DRHD support for cache snooping. We must
pessimistically CLFLUSH if any DRHD does not support snooping.
Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx>
---
xen/drivers/passthrough/vtd/iommu.c | 204 ++++++++++++++++------------------
xen/drivers/passthrough/vtd/iommu.h | 1
xen/drivers/passthrough/vtd/vtd.h | 4
xen/drivers/passthrough/vtd/x86/vtd.c | 6 -
xen/include/xen/iommu.h | 1
5 files changed, 101 insertions(+), 115 deletions(-)
diff -r 121d196b4cc8 -r ecd266cebcab xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c Thu May 29 14:30:48 2008 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.c Fri May 30 15:06:08 2008 +0100
@@ -112,28 +112,27 @@ struct iommu_flush *iommu_get_flush(stru
return iommu ? &iommu->intel->flush : NULL;
}
-unsigned int clflush_size;
-void clflush_cache_range(void *adr, int size)
+static unsigned int clflush_size;
+static int iommus_incoherent;
+static void __iommu_flush_cache(void *addr, int size)
{
int i;
+
+ if ( !iommus_incoherent )
+ return;
+
for ( i = 0; i < size; i += clflush_size )
- clflush(adr + i);
-}
-
-static void __iommu_flush_cache(struct iommu *iommu, void *addr, int size)
-{
- if ( !ecap_coherent(iommu->ecap) )
- clflush_cache_range(addr, size);
-}
-
-void iommu_flush_cache_entry(struct iommu *iommu, void *addr)
-{
- __iommu_flush_cache(iommu, addr, 8);
-}
-
-void iommu_flush_cache_page(struct iommu *iommu, void *addr)
-{
- __iommu_flush_cache(iommu, addr, PAGE_SIZE_4K);
+ clflush((char *)addr + i);
+}
+
+void iommu_flush_cache_entry(void *addr)
+{
+ __iommu_flush_cache(addr, 8);
+}
+
+void iommu_flush_cache_page(void *addr)
+{
+ __iommu_flush_cache(addr, PAGE_SIZE_4K);
}
int nr_iommus;
@@ -157,7 +156,7 @@ static u64 bus_to_context_maddr(struct i
}
set_root_value(*root, maddr);
set_root_present(*root);
- iommu_flush_cache_entry(iommu, root);
+ iommu_flush_cache_entry(root);
}
maddr = (u64) get_context_addr(*root);
unmap_vtd_domain_page(root_entries);
@@ -194,8 +193,6 @@ static u64 addr_to_dma_page_maddr(struct
static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc)
{
struct hvm_iommu *hd = domain_hvm_iommu(domain);
- struct acpi_drhd_unit *drhd;
- struct iommu *iommu;
int addr_width = agaw_to_width(hd->agaw);
struct dma_pte *parent, *pte = NULL;
int level = agaw_to_level(hd->agaw);
@@ -204,19 +201,11 @@ static u64 addr_to_dma_page_maddr(struct
u64 pte_maddr = 0, maddr;
u64 *vaddr = NULL;
- drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
- iommu = drhd->iommu;
-
addr &= (((u64)1) << addr_width) - 1;
spin_lock_irqsave(&hd->mapping_lock, flags);
if ( hd->pgd_maddr == 0 )
- {
- if ( !alloc )
+ if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr()) == 0) )
goto out;
- hd->pgd_maddr = alloc_pgtable_maddr();
- if ( hd->pgd_maddr == 0 )
- goto out;
- }
parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr);
while ( level > 1 )
@@ -240,7 +229,7 @@ static u64 addr_to_dma_page_maddr(struct
*/
dma_set_pte_readable(*pte);
dma_set_pte_writable(*pte);
- iommu_flush_cache_entry(iommu, pte);
+ iommu_flush_cache_entry(pte);
}
else
{
@@ -551,8 +540,6 @@ static void dma_pte_clear_one(struct dom
struct dma_pte *page = NULL, *pte = NULL;
u64 pg_maddr;
- drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
-
/* get last level pte */
pg_maddr = addr_to_dma_page_maddr(domain, addr, 0);
if ( pg_maddr == 0 )
@@ -567,14 +554,14 @@ static void dma_pte_clear_one(struct dom
}
dma_clear_pte(*pte);
- iommu_flush_cache_entry(drhd->iommu, pte);
+ iommu_flush_cache_entry(pte);
for_each_drhd_unit ( drhd )
{
iommu = drhd->iommu;
-
if ( test_bit(iommu->index, &hd->iommu_bitmap) )
- iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain), addr, 1,
0);
+ iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
+ addr, 1, 0);
}
unmap_vtd_domain_page(page);
@@ -603,7 +590,6 @@ static void iommu_free_next_pagetable(u6
static void iommu_free_next_pagetable(u64 pt_maddr, unsigned long index,
int level)
{
- struct acpi_drhd_unit *drhd;
unsigned long next_index;
struct dma_pte *pt_vaddr, *pde;
int next_level;
@@ -613,50 +599,38 @@ static void iommu_free_next_pagetable(u6
pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);
pde = &pt_vaddr[index];
- if ( dma_pte_addr(*pde) != 0 )
- {
- next_level = level - 1;
- if ( next_level > 1 )
- {
- next_index = 0;
- do
- {
- iommu_free_next_pagetable(pde->val,
- next_index, next_level);
- next_index++;
- } while ( next_index < PTE_NUM );
- }
-
- dma_clear_pte(*pde);
- drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
- iommu_flush_cache_entry(drhd->iommu, pde);
- free_pgtable_maddr(pde->val);
- unmap_vtd_domain_page(pt_vaddr);
- }
- else
- unmap_vtd_domain_page(pt_vaddr);
+ if ( dma_pte_addr(*pde) == 0 )
+ goto out;
+
+ next_level = level - 1;
+ if ( next_level > 1 )
+ {
+ for ( next_index = 0; next_index < PTE_NUM; next_index++ )
+ iommu_free_next_pagetable(pde->val, next_index, next_level);
+ }
+
+ dma_clear_pte(*pde);
+ iommu_flush_cache_entry(pde);
+ free_pgtable_maddr(pde->val);
+
+ out:
+ unmap_vtd_domain_page(pt_vaddr);
}
/* free all VT-d page tables when shut down or destroy domain. */
static void iommu_free_pagetable(struct domain *domain)
{
- unsigned long index;
struct hvm_iommu *hd = domain_hvm_iommu(domain);
- int total_level = agaw_to_level(hd->agaw);
-
- if ( hd->pgd_maddr != 0 )
- {
- index = 0;
- do
- {
- iommu_free_next_pagetable(hd->pgd_maddr,
- index, total_level + 1);
- index++;
- } while ( index < PTE_NUM );
-
- free_pgtable_maddr(hd->pgd_maddr);
- hd->pgd_maddr = 0;
- }
+ int i, total_level = agaw_to_level(hd->agaw);
+
+ if ( hd->pgd_maddr == 0 )
+ return;
+
+ for ( i = 0; i < PTE_NUM; i++ )
+ iommu_free_next_pagetable(hd->pgd_maddr, i, total_level + 1);
+
+ free_pgtable_maddr(hd->pgd_maddr);
+ hd->pgd_maddr = 0;
}
static int iommu_set_root_entry(struct iommu *iommu)
@@ -977,6 +951,8 @@ static int iommu_alloc(struct acpi_drhd_
static int iommu_alloc(struct acpi_drhd_unit *drhd)
{
struct iommu *iommu;
+ unsigned long sagaw;
+ int agaw;
if ( nr_iommus > MAX_IOMMUS )
{
@@ -1003,6 +979,23 @@ static int iommu_alloc(struct acpi_drhd_
iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
+
+ /* Calculate number of pagetable levels: between 2 and 4. */
+ sagaw = cap_sagaw(iommu->cap);
+ for ( agaw = level_to_agaw(4); agaw >= 0; agaw-- )
+ if ( test_bit(agaw, &sagaw) )
+ break;
+ if ( agaw < 0 )
+ {
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "IOMMU: unsupported sagaw %lx\n", sagaw);
+ xfree(iommu);
+ return -ENODEV;
+ }
+ iommu->nr_pt_levels = agaw_to_level(agaw);
+
+ if ( !ecap_coherent(iommu->ecap) )
+ iommus_incoherent = 1;
spin_lock_init(&iommu->lock);
spin_lock_init(&iommu->register_lock);
@@ -1045,10 +1038,7 @@ static int intel_iommu_domain_init(struc
{
struct hvm_iommu *hd = domain_hvm_iommu(d);
struct iommu *iommu = NULL;
- int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH;
- int adjust_width, agaw;
u64 i;
- unsigned long sagaw;
struct acpi_drhd_unit *drhd;
INIT_LIST_HEAD(&hd->pdev_list);
@@ -1056,22 +1046,7 @@ static int intel_iommu_domain_init(struc
drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
iommu = drhd->iommu;
- /* Calculate AGAW. */
- if ( guest_width > cap_mgaw(iommu->cap) )
- guest_width = cap_mgaw(iommu->cap);
- adjust_width = guestwidth_to_adjustwidth(guest_width);
- agaw = width_to_agaw(adjust_width);
- /* FIXME: hardware doesn't support it, choose a bigger one? */
- sagaw = cap_sagaw(iommu->cap);
- if ( !test_bit(agaw, &sagaw) )
- {
- gdprintk(XENLOG_ERR VTDPREFIX,
- "IOMMU: hardware doesn't support the agaw\n");
- agaw = find_next_bit(&sagaw, 5, agaw);
- if ( agaw >= 5 )
- return -ENODEV;
- }
- hd->agaw = agaw;
+ hd->agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
if ( d->domain_id == 0 )
{
@@ -1115,7 +1090,8 @@ static int domain_context_mapping_one(
struct hvm_iommu *hd = domain_hvm_iommu(domain);
struct context_entry *context, *context_entries;
unsigned long flags;
- u64 maddr;
+ u64 maddr, pgd_maddr;
+ int agaw;
maddr = bus_to_context_maddr(iommu, bus);
context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
@@ -1135,17 +1111,33 @@ static int domain_context_mapping_one(
else
{
#endif
+ /* Ensure we have pagetables allocated down to leaf PTE. */
if ( hd->pgd_maddr == 0 )
{
- hd->pgd_maddr = alloc_pgtable_maddr();
+ addr_to_dma_page_maddr(domain, 0, 1);
if ( hd->pgd_maddr == 0 )
{
+ nomem:
unmap_vtd_domain_page(context_entries);
spin_unlock_irqrestore(&iommu->lock, flags);
return -ENOMEM;
}
}
- context_set_address_root(*context, hd->pgd_maddr);
+
+ /* Skip top levels of page tables for 2- and 3-level DRHDs. */
+ pgd_maddr = hd->pgd_maddr;
+ for ( agaw = level_to_agaw(4);
+ agaw != level_to_agaw(iommu->nr_pt_levels);
+ agaw-- )
+ {
+ struct dma_pte *p = map_vtd_domain_page(pgd_maddr);
+ pgd_maddr = dma_pte_addr(*p);
+ unmap_vtd_domain_page(p);
+ if ( pgd_maddr == 0 )
+ goto nomem;
+ }
+
+ context_set_address_root(*context, pgd_maddr);
context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
#ifdef CONTEXT_PASSTHRU
}
@@ -1156,10 +1148,10 @@ static int domain_context_mapping_one(
* be 1 based as required by intel's iommu hw.
*/
context_set_domain_id(context, domain);
- context_set_address_width(*context, hd->agaw);
+ context_set_address_width(*context, agaw);
context_set_fault_enable(*context);
context_set_present(*context);
- iommu_flush_cache_entry(iommu, context);
+ iommu_flush_cache_entry(context);
unmap_vtd_domain_page(context_entries);
@@ -1316,7 +1308,7 @@ static int domain_context_unmap_one(
spin_lock_irqsave(&iommu->lock, flags);
context_clear_present(*context);
context_clear_entry(*context);
- iommu_flush_cache_entry(iommu, context);
+ iommu_flush_cache_entry(context);
iommu_flush_context_global(iommu, 0);
iommu_flush_iotlb_global(iommu, 0);
unmap_vtd_domain_page(context_entries);
@@ -1499,9 +1491,6 @@ int intel_iommu_map_page(
u64 pg_maddr;
int pte_present;
- drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
- iommu = drhd->iommu;
-
#ifdef CONTEXT_PASSTHRU
/* do nothing if dom0 and iommu supports pass thru */
if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
@@ -1516,7 +1505,7 @@ int intel_iommu_map_page(
pte_present = dma_pte_present(*pte);
dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K);
dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
- iommu_flush_cache_entry(iommu, pte);
+ iommu_flush_cache_entry(pte);
unmap_vtd_domain_page(page);
for_each_drhd_unit ( drhd )
@@ -1565,10 +1554,9 @@ int iommu_page_mapping(struct domain *do
int index;
u64 pg_maddr;
- drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
- iommu = drhd->iommu;
if ( (prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0 )
return -EINVAL;
+
iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;
start_pfn = hpa >> PAGE_SHIFT_4K;
end_pfn = (PAGE_ALIGN_4K(hpa + size)) >> PAGE_SHIFT_4K;
@@ -1582,7 +1570,7 @@ int iommu_page_mapping(struct domain *do
pte = page + (start_pfn & LEVEL_MASK);
dma_set_pte_addr(*pte, (paddr_t)start_pfn << PAGE_SHIFT_4K);
dma_set_pte_prot(*pte, prot);
- iommu_flush_cache_entry(iommu, pte);
+ iommu_flush_cache_entry(pte);
unmap_vtd_domain_page(page);
start_pfn++;
index++;
diff -r 121d196b4cc8 -r ecd266cebcab xen/drivers/passthrough/vtd/iommu.h
--- a/xen/drivers/passthrough/vtd/iommu.h Thu May 29 14:30:48 2008 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.h Fri May 30 15:06:08 2008 +0100
@@ -236,6 +236,7 @@ struct context_entry {
#define LEVEL_STRIDE (9)
#define LEVEL_MASK ((1 << LEVEL_STRIDE) - 1)
#define PTE_NUM (1 << LEVEL_STRIDE)
+#define level_to_agaw(val) ((val) - 2)
#define agaw_to_level(val) ((val) + 2)
#define agaw_to_width(val) (30 + val * LEVEL_STRIDE)
#define width_to_agaw(w) ((w - 30)/LEVEL_STRIDE)
diff -r 121d196b4cc8 -r ecd266cebcab xen/drivers/passthrough/vtd/vtd.h
--- a/xen/drivers/passthrough/vtd/vtd.h Thu May 29 14:30:48 2008 +0100
+++ b/xen/drivers/passthrough/vtd/vtd.h Fri May 30 15:06:08 2008 +0100
@@ -66,7 +66,7 @@ void *map_vtd_domain_page(u64 maddr);
void *map_vtd_domain_page(u64 maddr);
void unmap_vtd_domain_page(void *va);
-void iommu_flush_cache_entry(struct iommu *iommu, void *addr);
-void iommu_flush_cache_page(struct iommu *iommu, void *addr);
+void iommu_flush_cache_entry(void *addr);
+void iommu_flush_cache_page(void *addr);
#endif // _VTD_H_
diff -r 121d196b4cc8 -r ecd266cebcab xen/drivers/passthrough/vtd/x86/vtd.c
--- a/xen/drivers/passthrough/vtd/x86/vtd.c Thu May 29 14:30:48 2008 +0100
+++ b/xen/drivers/passthrough/vtd/x86/vtd.c Fri May 30 15:06:08 2008 +0100
@@ -41,8 +41,6 @@ u64 alloc_pgtable_maddr(void)
{
struct page_info *pg;
u64 *vaddr;
- struct acpi_drhd_unit *drhd;
- struct iommu *iommu;
pg = alloc_domheap_page(NULL, 0);
vaddr = map_domain_page(page_to_mfn(pg));
@@ -50,9 +48,7 @@ u64 alloc_pgtable_maddr(void)
return 0;
memset(vaddr, 0, PAGE_SIZE);
- drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
- iommu = drhd->iommu;
- iommu_flush_cache_page(iommu, vaddr);
+ iommu_flush_cache_page(vaddr);
unmap_domain_page(vaddr);
return page_to_maddr(pg);
diff -r 121d196b4cc8 -r ecd266cebcab xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h Thu May 29 14:30:48 2008 +0100
+++ b/xen/include/xen/iommu.h Fri May 30 15:06:08 2008 +0100
@@ -47,6 +47,7 @@ struct iommu {
void __iomem *reg; /* Pointer to hardware regs, virtual addr */
u32 index; /* Sequence number of iommu */
u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
+ u32 nr_pt_levels;
u64 cap;
u64 ecap;
spinlock_t lock; /* protect context, domain ids */
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|