Keir Fraser wrote:
> Perhaps try dropping calls to domain_kill() into
> memory_exchange()? It may
> not complete the destroy, but it will set ->is_dying and
> trigger your error
> paths. Then you can 'xm destroy' from the tools afterwards to
> try to finish
> up the destroy, and see if you get left with a zombie or not.
It will cover some case, but not all. But yees, I can try that.
>
> Looking at this patch I can immediately see that:
> A. Your extra check(s) of is_dying are pointless. Only the check in
> assign_pages() is critical. So just leave it to that.
I add that to avoid meaningless loop if the domain is dying. But yes it is not
important because of low probability.
> B. Your adjustment of tot_pages is confusing. I can't
> convince myself the
> arithmetic is correct. Furthermore messing with tot_pages
> outside of the
> d->page_alloc_lock is not allowed.
The idea of the adjustment is:
In each loop, we remove some pages (the in_chunk_list) without decrease the
tot_pages. Then when we get domain is dying when assign pages (the
out_chunk_list), we need decrease the count. For those page that has been
assigned, it should be covered by domain_relinquish_resources(), so what we
need decrease is:
the number that has been removed - the number that has been assigned
already.
Yes, I need keep the page_alloc_lock for tot_pages.
>
> -- Keir
>
> On 30/06/2009 08:55, "Jiang, Yunhong" <yunhong.jiang@xxxxxxxxx> wrote:
>
>> Keir, this is the patch based on the discussion before. Can you
>> please have a look on it? I can't triger the corner case of domain
>> being dying, so I hope it can be achieved through code review.
>>
>> Thanks
>> Yunhong Jiang
>>
>> Extend memory_exchange to support exchange memory for foreign domain.
>> When domain is killed during the memory exchange, it will try to
>> decrease domain's page count that has been removed from page_list.
>>
>> Signed-off-by: Jiang, Yunhong <yunhong.jiang@xxxxxxxxx>
>>
>> diff -r 02003bee3e80 xen/common/memory.c
>> --- a/xen/common/memory.c Thu Jun 25 18:31:10 2009 +0100
>> +++ b/xen/common/memory.c Tue Jun 30 01:44:28 2009 +0800
>> @@ -265,16 +265,22 @@ static long memory_exchange(XEN_GUEST_HA
>> out_chunk_order = exch.in.extent_order -
>> exch.out.extent_order; }
>>
>> - /*
>> - * Only support exchange on calling domain right now. Otherwise
>> there are
>> - * tricky corner cases to consider (e.g., dying domain).
>> - */
>> - if ( unlikely(exch.in.domid != DOMID_SELF) )
>> - {
>> - rc = IS_PRIV(current->domain) ? -EINVAL : -EPERM;
>> - goto fail_early;
>> - }
>> - d = current->domain;
>> + if ( likely(exch.in.domid == DOMID_SELF) )
>> + {
>> + d = rcu_lock_current_domain();
>> + }
>> + else
>> + {
>> + if ( (d = rcu_lock_domain_by_id(exch.in.domid)) == NULL ) +
>> goto fail_early; +
>> + if ( !IS_PRIV_FOR(current->domain, d) )
>> + {
>> + rcu_unlock_domain(d);
>> + rc = -EPERM;
>> + goto fail_early;
>> + }
>> + }
>>
>> memflags |= MEMF_bits(domain_clamp_alloc_bitsize( d,
>> @@ -292,11 +298,15 @@ static long memory_exchange(XEN_GUEST_HA
>> if ( hypercall_preempt_check() )
>> {
>> exch.nr_exchanged = i << in_chunk_order;
>> + rcu_unlock_domain(d);
>> if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
>> return -EFAULT;
>> return hypercall_create_continuation(
>> __HYPERVISOR_memory_op, "lh", XENMEM_exchange,
>> arg); } +
>> + if (d->is_dying)
>> + goto dying;
>>
>> /* Steal a chunk's worth of input pages from the domain. */
>> for ( j = 0; j < (1UL << in_chunk_order); j++ )
>> @@ -360,9 +370,24 @@ static long memory_exchange(XEN_GUEST_HA
>> j = 0; while ( (page =
>> page_list_remove_head(&out_chunk_list)) ) {
>> - if ( assign_pages(d, page, exch.out.extent_order,
>> - MEMF_no_refcount) )
>> + rc = (assign_pages(d, page, exch.out.extent_order,
>> + MEMF_no_refcount));
>> +
>> + if (rc == -EINVAL)
>> BUG();
>> + /* Domain is being destroy */
>> + else if (rc == -ENODEV)
>> + {
>> + int dec_count;
>> + dec_count = ( ( 1UL << exch.in.extent_order)*
>> + (1UL << in_chunk_order) -
>> + j * (1UL << exch.out.extent_order));
>> + d->tot_pages -= dec_count;
>> +
>> + if (dec_count && !d->tot_pages)
>> + put_domain(d);
>> + break;
>> + }
>>
>> /* Note that we ignore errors accessing the output
>> extent list. */ (void)__copy_from_guest_offset(
>> @@ -378,15 +403,15 @@ static long memory_exchange(XEN_GUEST_HA
>> (void)__copy_to_guest_offset(
>> exch.out.extent_start,
> (i<<out_chunk_order)+j, &mfn, 1);
>> }
>> -
>> j++;
>> }
>> - BUG_ON(j != (1UL << out_chunk_order));
>> + BUG_ON( !(d->is_dying) && (j != (1UL << out_chunk_order))
>> ); }
>>
>> exch.nr_exchanged = exch.in.nr_extents;
>> if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
>> rc = -EFAULT; + rcu_unlock_domain(d);
>> return rc;
>>
>> /*
>> @@ -398,7 +423,8 @@ static long memory_exchange(XEN_GUEST_HA
>> while ( (page = page_list_remove_head(&in_chunk_list)) )
>> if ( assign_pages(d, page, 0, MEMF_no_refcount) )
>> BUG(); -
>> + dying:
>> + rcu_unlock_domain(d);
>> /* Free any output pages we managed to allocate. */
>> while ( (page = page_list_remove_head(&out_chunk_list)) )
>> free_domheap_pages(page, exch.out.extent_order);
>> diff -r 02003bee3e80 xen/common/page_alloc.c
>> --- a/xen/common/page_alloc.c Thu Jun 25 18:31:10 2009 +0100
>> +++ b/xen/common/page_alloc.c Sun Jun 28 23:39:28 2009 +0800
>> @@ -1120,6 +1120,7 @@ int assign_pages(
>> unsigned int memflags)
>> {
>> unsigned long i;
>> + int rc = -EINVAL;
>>
>> spin_lock(&d->page_alloc_lock);
>>
>> @@ -1127,6 +1128,7 @@ int assign_pages(
>> {
>> gdprintk(XENLOG_INFO, "Cannot assign page to domain%d --
>> dying.\n", d->domain_id);
>> + rc = -ENODEV;
>> goto fail;
>> }
>>
>> @@ -1136,6 +1138,7 @@ int assign_pages(
>> {
>> gdprintk(XENLOG_INFO, "Over-allocation for domain %u:
>> %u > %u\n", d->domain_id, d->tot_pages + (1 <<
>> order), d->max_pages); + rc = -EINVAL; goto
>> fail; }
>>
>> @@ -1160,7 +1163,7 @@ int assign_pages(
>>
>> fail:
>> spin_unlock(&d->page_alloc_lock);
>> - return -1;
>> + return rc;
>> }
>>
>>
>>
>>
>> Keir Fraser wrote:
>>> On 01/06/2009 09:40, "Jiang, Yunhong"
> <yunhong.jiang@xxxxxxxxx> wrote:
>>>
>>>> Thanks for suggestion. I'm always nervous on API changes.
>>>>
>>>> I'm still considering if any other potential usage mode for patch
>>>> 5/6l (i.e. change page table or exchange memory for other domain),,
>>>> but frustratedly realize no other requirement.
>>>
>>> The API for XENMEM_exchange already permits specifying a
>>> foreign domain, so
>>> you're just filling in the missing implementation. By the way I did
>>> not check your implementation, but the major subtlety is that you
>>> must take care for domains which are dying (d->is_dying). Giving
>>> new pages to
>>> such a domain
>>> is generally a bug because you race
>>> domain_relinquish_resources() which is
>>> walking the domain's page lists and freeing the pages. You
>>> therefore risk leaving a zombie domain which will not die (refcount
>>> never zero).
>>>
>>> See for example how that is handled with page_alloc.c for calls such
>>> as XENMEM_populate_physmap, and how it is handled specially by
>>> MMUEXT_PIN_L*_TABLE.
>>>
>>> -- Keir
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|