VMX: fix ept pages free up when ept superpage split fails:
1) implement ept super page split in a recursive way to
form an ept sub tree before real installation;
2) free an ept sub tree also in a recursive way.
3) change ept_next_level last input parameter from shift
bits # to next walk level;
signed-off-by: Xin Li <xin.li@xxxxxxxxx>
diff -r f483b5ce7be2 xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c Fri Jul 02 19:04:57 2010 +0100
+++ b/xen/arch/x86/mm/hap/p2m-ept.c Sat Jul 03 21:29:56 2010 +0800
@@ -118,6 +118,74 @@
return 1;
}
+/* free ept sub tree behind an entry */
+void ept_free_entry(struct domain *d, ept_entry_t *ept_entry, int level)
+{
+ /* End if the entry is a leaf entry. */
+ if ( level == 0 || !is_epte_present(ept_entry) ||
+ is_epte_superpage(ept_entry) )
+ return;
+
+ if ( level > 1 )
+ {
+ ept_entry_t *epte = map_domain_page(ept_entry->mfn);
+ for ( int i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
+ ept_free_entry(d, epte + i, level - 1);
+ unmap_domain_page(epte);
+ }
+
+ d->arch.p2m->free_page(d, mfn_to_page(ept_entry->mfn));
+}
+
+static int ept_split_super_page(struct domain *d, ept_entry_t *ept_entry,
+ int level, int target)
+{
+ ept_entry_t new_ept, *table;
+ uint64_t trunk;
+ int rv = 1;
+
+ /* End if the entry is a leaf entry or reaches the target level. */
+ if ( level == 0 || level == target )
+ return rv;
+
+ ASSERT(is_epte_superpage(ept_entry));
+
+ if ( !ept_set_middle_entry(d, &new_ept) )
+ return 0;
+
+ table = map_domain_page(new_ept.mfn);
+ trunk = 1UL << ((level - 1) * EPT_TABLE_ORDER);
+
+ for ( int i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
+ {
+ ept_entry_t *epte = table + i;
+
+ epte->emt = ept_entry->emt;
+ epte->ipat = ept_entry->ipat;
+ epte->sp = (level > 1) ? 1 : 0;
+ epte->avail1 = ept_entry->avail1;
+ epte->avail2 = 0;
+ epte->mfn = ept_entry->mfn + i * trunk;
+
+ ept_p2m_type_to_flags(epte, epte->avail1);
+
+ if ( (level - 1) == target )
+ continue;
+
+ ASSERT(is_epte_superpage(epte));
+
+ if ( !(rv = ept_split_super_page(d, epte, level - 1, target)) )
+ break;
+ }
+
+ unmap_domain_page(table);
+
+ /* Even failed we should install the newly allocated ept page. */
+ *ept_entry = new_ept;
+
+ return rv;
+}
+
/* Take the currently mapped table, find the corresponding gfn entry,
* and map the next table, if available. If the entry is empty
* and read_only is set,
@@ -134,14 +202,18 @@
*/
static int ept_next_level(struct domain *d, bool_t read_only,
ept_entry_t **table, unsigned long *gfn_remainder,
- u32 shift)
+ int next_level)
{
ept_entry_t *ept_entry;
- ept_entry_t *next;
- u32 index;
+ u32 shift, index;
+
+ shift = next_level * EPT_TABLE_ORDER;
index = *gfn_remainder >> shift;
+ /* index must be falling into the page */
+ ASSERT(index < EPT_PAGETABLE_ENTRIES);
+
ept_entry = (*table) + index;
if ( !is_epte_present(ept_entry) )
@@ -161,69 +233,15 @@
return GUEST_TABLE_SUPER_PAGE;
else
{
+ unsigned long mfn = ept_entry->mfn;
+
+ unmap_domain_page(*table);
+ *table = map_domain_page(mfn);
*gfn_remainder &= (1UL << shift) - 1;
- next = map_domain_page(ept_entry->mfn);
- unmap_domain_page(*table);
- *table = next;
return GUEST_TABLE_NORMAL_PAGE;
}
}
-/* It's super page before and we should break down it now. */
-static int ept_split_large_page(struct domain *d,
- ept_entry_t **table, u32 *index,
- unsigned long gfn, int level)
-{
- ept_entry_t *prev_table = *table;
- ept_entry_t *split_table = NULL;
- ept_entry_t *split_entry = NULL;
- ept_entry_t *ept_entry = (*table) + (*index);
- ept_entry_t temp_ept_entry;
- unsigned long s_gfn, s_mfn;
- unsigned long offset, trunk;
- int i;
-
- /* alloc new page for new ept middle level entry which is
- * before a leaf super entry
- */
-
- if ( !ept_set_middle_entry(d, &temp_ept_entry) )
- return 0;
-
- /* split the super page to small next level pages */
- split_table = map_domain_page(temp_ept_entry.mfn);
- offset = gfn & ((1UL << (level * EPT_TABLE_ORDER)) - 1);
- trunk = (1UL << ((level-1) * EPT_TABLE_ORDER));
-
- for ( i = 0; i < (1UL << EPT_TABLE_ORDER); i++ )
- {
- s_gfn = gfn - offset + i * trunk;
- s_mfn = ept_entry->mfn + i * trunk;
-
- split_entry = split_table + i;
- split_entry->emt = ept_entry->emt;
- split_entry->ipat = ept_entry->ipat;
-
- split_entry->sp = (level > 1) ? 1 : 0;
-
- split_entry->mfn = s_mfn;
-
- split_entry->avail1 = ept_entry->avail1;
- split_entry->avail2 = 0;
- /* last step */
- split_entry->r = split_entry->w = split_entry->x = 1;
- ept_p2m_type_to_flags(split_entry, ept_entry->avail1);
- }
-
- *ept_entry = temp_ept_entry;
-
- *index = offset / trunk;
- *table = split_table;
- unmap_domain_page(prev_table);
-
- return 1;
-}
-
/*
* ept_set_entry() computes 'need_modify_vtd_table' for itself,
* by observing whether any gfn->mfn translations are modified.
@@ -265,7 +283,7 @@
for ( i = ept_get_wl(d); i > target; i-- )
{
- ret = ept_next_level(d, 0, &table, &gfn_remainder, i *
EPT_TABLE_ORDER);
+ ret = ept_next_level(d, 0, &table, &gfn_remainder, i);
if ( !ret )
goto out;
else if ( ret != GUEST_TABLE_NORMAL_PAGE )
@@ -275,12 +293,10 @@
ASSERT(ret != GUEST_TABLE_POD_PAGE || i != target);
index = gfn_remainder >> (i * EPT_TABLE_ORDER);
- gfn_remainder &= (1UL << (i * EPT_TABLE_ORDER)) - 1;
+ offset = gfn_remainder & ((1UL << (i * EPT_TABLE_ORDER)) - 1);
ept_entry = table + index;
- offset = gfn_remainder;
-
/*
* When we are here, we must be on a leaf ept entry
* with i == target or i > target.
@@ -301,15 +317,14 @@
direct_mmio);
ept_entry->ipat = ipat;
ept_entry->sp = order ? 1 : 0;
+ ept_entry->avail1 = p2mt;
+ ept_entry->avail2 = 0;
if ( ept_entry->mfn == mfn_x(mfn) )
need_modify_vtd_table = 0;
else
ept_entry->mfn = mfn_x(mfn);
- ept_entry->avail1 = p2mt;
- ept_entry->avail2 = 0;
-
ept_p2m_type_to_flags(ept_entry, p2mt);
}
else
@@ -318,33 +333,50 @@
else
{
/* We need to split the original page. */
- ept_entry_t *split_ept_entry;
+ ept_entry_t split_ept_entry;
ASSERT(is_epte_superpage(ept_entry));
- for ( ; i > target; i-- )
+ split_ept_entry = *ept_entry;
+
+ if ( !ept_split_super_page(d, &split_ept_entry, i, target) )
{
- rv = ept_split_large_page(d, &table, &index, gfn, i);
- if ( !rv )
- goto out;
+ ept_free_entry(d, &split_ept_entry, i);
+ goto out;
}
- split_ept_entry = table + index;
- split_ept_entry->avail1 = p2mt;
- ept_p2m_type_to_flags(split_ept_entry, p2mt);
- split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn, &ipat,
- direct_mmio);
- split_ept_entry->ipat = ipat;
+ /* now install the newly split ept sub-tree */
+ /* NB: please make sure domian is paused and no in-fly VT-d DMA. */
+ *ept_entry = split_ept_entry;
- if ( split_ept_entry->mfn == mfn_x(mfn) )
- need_modify_vtd_table = 0;
- else
- split_ept_entry->mfn = mfn_x(mfn);
+ /* then move to the level we want to make real changes */
+ for ( ; i > target; i-- )
+ ept_next_level(d, 0, &table, &gfn_remainder, i);
+
+ ASSERT(i == target);
+
+ index = gfn_remainder >> (i * EPT_TABLE_ORDER);
+ offset = gfn_remainder & ((1UL << (i * EPT_TABLE_ORDER)) - 1);
+
+ ept_entry = table + index;
+
+ ept_entry->emt = epte_get_entry_emt(d, gfn, mfn, &ipat, direct_mmio);
+ ept_entry->ipat = ipat;
+ ept_entry->sp = i ? 1 : 0;
+ ept_entry->avail1 = p2mt;
+ ept_entry->avail2 = 0;
+
+ if ( ept_entry->mfn == mfn_x(mfn) )
+ need_modify_vtd_table = 0;
+ else /* the caller should take care of the previous page */
+ ept_entry->mfn = mfn_x(mfn);
+
+ ept_p2m_type_to_flags(ept_entry, p2mt);
}
/* Track the highest gfn for which we have ever had a valid mapping */
- if ( mfn_valid(mfn_x(mfn))
- && (gfn + (1UL << order) - 1 > d->arch.p2m->max_mapped_pfn) )
+ if ( mfn_valid(mfn_x(mfn)) &&
+ (gfn + (1UL << order) - 1 > d->arch.p2m->max_mapped_pfn) )
d->arch.p2m->max_mapped_pfn = gfn + (1UL << order) - 1;
/* Success */
@@ -366,11 +398,11 @@
for ( i = 0; i < (1 << order); i++ )
iommu_map_page(
d, gfn - offset + i, mfn_x(mfn) - offset + i,
- IOMMUF_readable|IOMMUF_writable);
+ IOMMUF_readable | IOMMUF_writable);
}
else if ( !order )
iommu_map_page(
- d, gfn, mfn_x(mfn), IOMMUF_readable|IOMMUF_writable);
+ d, gfn, mfn_x(mfn), IOMMUF_readable | IOMMUF_writable);
}
else
{
@@ -410,8 +442,7 @@
for ( i = ept_get_wl(d); i > 0; i-- )
{
retry:
- ret = ept_next_level(d, 1, &table, &gfn_remainder,
- i * EPT_TABLE_ORDER);
+ ret = ept_next_level(d, 1, &table, &gfn_remainder, i);
if ( !ret )
goto out;
else if ( ret == GUEST_TABLE_POD_PAGE )
@@ -498,8 +529,7 @@
for ( i = ept_get_wl(d); i > 0; i-- )
{
- ret = ept_next_level(d, 1, &table, &gfn_remainder,
- i * EPT_TABLE_ORDER);
+ ret = ept_next_level(d, 1, &table, &gfn_remainder, i);
if ( !ret || ret == GUEST_TABLE_POD_PAGE )
goto out;
else if ( ret == GUEST_TABLE_SUPER_PAGE )
@@ -722,8 +752,7 @@
for ( i = ept_get_wl(d); i > 0; i-- )
{
- ret = ept_next_level(d, 1, &table, &gfn_remainder,
- i * EPT_TABLE_ORDER);
+ ret = ept_next_level(d, 1, &table, &gfn_remainder, i);
if ( ret != GUEST_TABLE_NORMAL_PAGE )
break;
}
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|