Hi Tim,
Thanks for your careful review. I will change the patch
according to your comments in the next version.
Thanks!
Dongxiao
Tim Deegan wrote:
> At 03:20 +0000 on 23 Feb (1266895248), Xu, Dongxiao wrote:
>> EPT: 1GB large page support.
>>
>> Alloc 1GB large page for EPT if possible. It also contains the logic
>> to split
>> large page into small ones (2M or 4k).
>>
>> Signed-off-by: Dongxiao Xu <dongxiao.xu@xxxxxxxxx>
>> Signed-off-by: Xiaohui Xin <xiaohui.xin@xxxxxxxxx>
>
>
>> diff -r af2a0ed32ad5 xen/arch/x86/mm/hap/p2m-ept.c
>> --- a/xen/arch/x86/mm/hap/p2m-ept.c Sat Feb 20 19:25:07 2010 +0800
>> +++ b/xen/arch/x86/mm/hap/p2m-ept.c Sat Feb 20 19:43:49 2010 +0800
>> @@ -183,17 +239,21 @@ ept_set_entry(struct domain *d, unsigned
>> int i; int rv = 0;
>> int ret = 0;
>> + int split_level = 0;
>> int walk_level = order / EPT_TABLE_ORDER;
>> int direct_mmio = (p2mt == p2m_mmio_direct);
>> uint8_t ipat = 0;
>> int need_modify_vtd_table = 1;
>>
>> - /* We only support 4k and 2m pages now */
>> - BUG_ON(order && order != EPT_TABLE_ORDER);
>> + /* We support 4K, 2M, and 1G pages now */
>> + BUG_ON(order % EPT_TABLE_ORDER);
>
> So order 27 allocations are OK? :) Actually, now that I'm looking at
> this BUG_ON, I can't see why it should be true even before this
> change. Is it enforced anywhere?
>
>> if ( order != 0 )
>> if ( (gfn & ((1UL << order) - 1)) )
>> return 1;
>> +
>> + if ( order == 18 )
>> + printk(KERN_INFO "Got 1GB EPT entry.\n");
>
> Definitely not.
>
>> table =
>> map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
>>
>> @@ -208,15 +268,21 @@ ept_set_entry(struct domain *d, unsigned
>> break; }
>>
>> - /* If order == 9, we should never get SUPERPAGE or PoD.
>> + /* If order == 9 or 18, we should never get SUPERPAGE or PoD.
>
> Why is that true? Surely having introduced order-18 entries you can
> now get SUPERPAGE when asking for an order-9 entry.
>
>> * If order == 0, we should only get POD if we have a POD
>> superpage.
>> * If i > walk_level, we need to split the page; otherwise,
>> * just behave as normal. */
>> ASSERT(order == 0 || ret == GUEST_TABLE_NORMAL_PAGE);
>> ASSERT(ret != GUEST_TABLE_POD_PAGE || i != walk_level);
>>
>> + if ( i && i == walk_level && i <=
>> cpu_vmx_ept_super_page_level_limit + && ret ==
>> GUEST_TABLE_NORMAL_PAGE ) + ret = GUEST_TABLE_SUPER_PAGE;
>> +
>
> That needs a comment explaining it.
>
>> index = gfn_remainder >> ( i ? (i * EPT_TABLE_ORDER): order);
>> offset = (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1)));
>> + + split_level = i;
>>
>> ept_entry = table + index;
>>
>> @@ -240,7 +306,7 @@ ept_set_entry(struct domain *d, unsigned
>>
>> if ( (ept_entry->avail1 == p2m_ram_logdirty)
>> && (p2mt == p2m_ram_rw) )
>> - for ( i = 0; i < 512; i++ )
>> + for ( i = 0; i < (1UL << order); i++ )
>
> Gosh. What's the performance of *that* like on live migration?
>
>> paging_mark_dirty(d, mfn_x(mfn) - offset +
>> i); } else
>> @@ -261,51 +327,22 @@ ept_set_entry(struct domain *d, unsigned }
>> else
>> {
>> - /*
>> - * It's super page before, now set one of the 4k pages, so
>> - * we should split the 2m page to 4k pages now.
>> - */
>> - /* Pointers to / into new (split) middle-level table */
>> - ept_entry_t *split_table = NULL;
>> - ept_entry_t *split_ept_entry = NULL;
>> - /* Info about old (superpage) table */
>> - unsigned long super_mfn = ept_entry->mfn;
>> - p2m_type_t super_p2mt = ept_entry->avail1;
>> - /* The new l2 entry which we'll write after we've build the
>> new l1 table */
>> - ept_entry_t l2_ept_entry;
>> -
>> - /*
>> - * Allocate new page for new ept middle level entry which is
>> - * before a leaf super entry
>> - */
>> - if ( !ept_set_middle_entry(d, &l2_ept_entry) )
>> - goto out;
>> -
>> - /* Split the super page before to 4k pages */
>> - split_table = map_domain_page(l2_ept_entry.mfn);
>> - offset = gfn & ((1 << EPT_TABLE_ORDER) - 1); -
>> - for ( i = 0; i < 512; i++ )
>> - {
>> - split_ept_entry = split_table + i;
>> - split_ept_entry->emt = epte_get_entry_emt(d, gfn -
>> offset + i,
>> -
>> _mfn(super_mfn + i),
>> - &ipat,
>> direct_mmio);
>> - split_ept_entry->ipat = ipat;
>> - split_ept_entry->sp_avail = 0;
>> - /* Don't increment mfn if it's a PoD mfn */
>> - if ( super_p2mt != p2m_populate_on_demand )
>> - split_ept_entry->mfn = super_mfn + i;
>> - else
>> - split_ept_entry->mfn = super_mfn;
>> - split_ept_entry->avail1 = super_p2mt;
>> - split_ept_entry->avail2 = 0;
>> -
>> - ept_p2m_type_to_flags(split_ept_entry, super_p2mt);
>> - }
>> -
>> - /* Set the destinated 4k page as normal */
>> - split_ept_entry = split_table + offset;
>> + int num = order / EPT_TABLE_ORDER;
>> + int level;
>> + ept_entry_t *split_ept_entry;
>> +
>> + if ( num >= cpu_vmx_ept_super_page_level_limit )
>> + num = cpu_vmx_ept_super_page_level_limit;
>> + for ( level = split_level; level > num ; level-- ) +
>> { + rv = ept_split_large_page(d, &table, &index, gfn,
>> level); + if ( !rv ) + goto out;
>> + }
>> +
>> + split_ept_entry = table + index;
>> + split_ept_entry->avail1 = p2mt;
>> + ept_p2m_type_to_flags(split_ept_entry, p2mt);
>> split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn,
>> &ipat,
>> direct_mmio); split_ept_entry->ipat = ipat;
>> @@ -314,12 +351,6 @@ ept_set_entry(struct domain *d, unsigned
>> need_modify_vtd_table = 0;
>> else
>> split_ept_entry->mfn = mfn_x(mfn);
>> -
>> - split_ept_entry->avail1 = p2mt;
>> - ept_p2m_type_to_flags(split_ept_entry, p2mt); -
>> - unmap_domain_page(split_table);
>> - *ept_entry = l2_ept_entry;
>> }
>>
>> /* Track the highest gfn for which we have ever had a valid
>> mapping */ @@ -336,7 +367,7 @@ out: ept_sync_domain(d);
>>
>> /* Now the p2m table is not shared with vt-d page table */
>> - if ( iommu_enabled && need_iommu(d) && need_modify_vtd_table )
>> + if ( rv && iommu_enabled && need_iommu(d) &&
>> need_modify_vtd_table ) { if ( p2mt == p2m_ram_rw )
>> {
>> @@ -459,7 +490,7 @@ out:
>> /* WARNING: Only caller doesn't care about PoD pages. So this
>> function will
>> * always return 0 for PoD pages, not populate them. If that
>> becomes necessary,
>> * pass a p2m_query_t type along to distinguish. */
>> -static ept_entry_t ept_get_entry_content(struct domain *d, unsigned
>> long gfn) +static ept_entry_t ept_get_entry_content(struct domain
>> *d, unsigned long gfn, int *level) { ept_entry_t *table =
>>
>> map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); @@
>> -487,6 +518,7 @@ static ept_entry_t ept_get_entry_content index
>> = gfn_remainder >> (i * EPT_TABLE_ORDER); ept_entry = table +
>> index; content = *ept_entry;
>> + *level = i;
>>
>> out:
>> unmap_domain_page(table);
>> @@ -579,7 +611,10 @@ void ept_change_entry_emt_with_range(str
>> p2m_lock(d->arch.p2m); for ( gfn = start_gfn; gfn <= end_gfn;
>> gfn++ ) { - e = ept_get_entry_content(d, gfn);
>> + int level = 0;
>> + uint64_t trunk = 0;
>> +
>> + e = ept_get_entry_content(d, gfn, &level);
>> if ( !p2m_has_emt(e.avail1) )
>> continue;
>>
>> @@ -588,25 +623,23 @@ void ept_change_entry_emt_with_range(str
>>
>> if ( e.sp_avail )
>> {
>> - if ( !(gfn & ((1 << EPT_TABLE_ORDER) - 1)) &&
>> - ((gfn + 0x1FF) <= end_gfn) )
>> + while ( level )
>> {
>> - /*
>> - * gfn assigned with 2M, and the end covers more
>> than 2m areas.
>> - * Set emt for super page.
>> - */
>> - order = EPT_TABLE_ORDER;
>> - if ( need_modify_ept_entry(d, gfn, mfn, e.ipat,
>> e.emt, e.avail1) )
>> - ept_set_entry(d, gfn, mfn, order, e.avail1);
>> - gfn += 0x1FF;
>> - }
>> - else
>> - {
>> - /* Change emt for partial entries of the 2m area. */
>> - if ( need_modify_ept_entry(d, gfn, mfn, e.ipat,
>> e.emt, e.avail1) )
>> - ept_set_entry(d, gfn, mfn, order, e.avail1);
>> - gfn = ((gfn >> EPT_TABLE_ORDER) << EPT_TABLE_ORDER)
>> + 0x1FF;
>> - }
>> + trunk = (1UL << (level * EPT_TABLE_ORDER)) - 1;
>> + if ( !(gfn & trunk) && (gfn + trunk <= end_gfn) ) +
>> { + /* gfn assigned with 2M, and the end covers
>> more than 2m areas.
>
> This code covers 2M and 1G superpages; please fix the comment too.
>
>> + * Set emt for super page.
>> + */
>> + order = level * EPT_TABLE_ORDER;
>> + if ( need_modify_ept_entry(d, gfn, mfn,
>> + e.ipat, e.emt, e.avail1) )
>> + ept_set_entry(d, gfn, mfn, order, e.avail1);
>> + gfn += trunk;
>> + break;
>> + }
>> + level--;
>> + }
>> }
>> else /* gfn assigned with 4k */
>> {
>
> Cheers,
>
> Tim.
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|