Attached is the updated patch. I tried to call domain_kill() duing
memory_exchange, and do hit the situation that assign_pages will fail because
of domain dying. The domain can be killed cleanly still, no zombie domain left.
Changes from previous submission: a) lock protection for tot_pages, b) free the
page that has been moved out of the out_chunk_list.
Thanks
Yunhong Jiang
diff -r 02003bee3e80 xen/common/memory.c
--- a/xen/common/memory.c Thu Jun 25 18:31:10 2009 +0100
+++ b/xen/common/memory.c Wed Jul 01 19:55:23 2009 +0800
@@ -265,16 +265,22 @@ static long memory_exchange(XEN_GUEST_HA
out_chunk_order = exch.in.extent_order - exch.out.extent_order;
}
- /*
- * Only support exchange on calling domain right now. Otherwise there are
- * tricky corner cases to consider (e.g., dying domain).
- */
- if ( unlikely(exch.in.domid != DOMID_SELF) )
- {
- rc = IS_PRIV(current->domain) ? -EINVAL : -EPERM;
- goto fail_early;
- }
- d = current->domain;
+ if ( likely(exch.in.domid == DOMID_SELF) )
+ {
+ d = rcu_lock_current_domain();
+ }
+ else
+ {
+ if ( (d = rcu_lock_domain_by_id(exch.in.domid)) == NULL )
+ goto fail_early;
+
+ if ( !IS_PRIV_FOR(current->domain, d) )
+ {
+ rcu_unlock_domain(d);
+ rc = -EPERM;
+ goto fail_early;
+ }
+ }
memflags |= MEMF_bits(domain_clamp_alloc_bitsize(
d,
@@ -292,6 +298,7 @@ static long memory_exchange(XEN_GUEST_HA
if ( hypercall_preempt_check() )
{
exch.nr_exchanged = i << in_chunk_order;
+ rcu_unlock_domain(d);
if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
return -EFAULT;
return hypercall_create_continuation(
@@ -360,9 +367,39 @@ static long memory_exchange(XEN_GUEST_HA
j = 0;
while ( (page = page_list_remove_head(&out_chunk_list)) )
{
- if ( assign_pages(d, page, exch.out.extent_order,
- MEMF_no_refcount) )
+ rc = (assign_pages(d, page, exch.out.extent_order,
+ MEMF_no_refcount));
+
+ if (rc == -EINVAL)
BUG();
+ /* Domain is being destroy */
+ else if (rc == -ENODEV)
+ {
+ int dec_count, drop_dom_ref = 0;
+
+ /*
+ * Pages in in_chunk_list is stolen without
+ * decreasing the tot_pages. If the domain is dying when
+ * assign pages, we need decrease the count. For those pages
+ * that has been assigned, it should be covered by
+ * domain_relinquish_resources().
+ */
+ dec_count = ( ( 1UL << exch.in.extent_order)*
+ (1UL << in_chunk_order) -
+ j * (1UL << exch.out.extent_order));
+
+ spin_lock(&d->page_alloc_lock);
+ d->tot_pages -= dec_count;
+ if (dec_count && !d->tot_pages)
+ drop_dom_ref = 1;
+ spin_unlock(&d->page_alloc_lock);
+
+ if (drop_dom_ref)
+ put_domain(d);
+
+ free_domheap_pages(page, exch.out.extent_order);
+ goto dying;
+ }
/* Note that we ignore errors accessing the output extent list. */
(void)__copy_from_guest_offset(
@@ -378,15 +415,15 @@ static long memory_exchange(XEN_GUEST_HA
(void)__copy_to_guest_offset(
exch.out.extent_start, (i<<out_chunk_order)+j, &mfn, 1);
}
-
j++;
}
- BUG_ON(j != (1UL << out_chunk_order));
+ BUG_ON( !(d->is_dying) && (j != (1UL << out_chunk_order)) );
}
exch.nr_exchanged = exch.in.nr_extents;
if ( copy_field_to_guest(arg, &exch, nr_exchanged) )
rc = -EFAULT;
+ rcu_unlock_domain(d);
return rc;
/*
@@ -398,7 +435,8 @@ static long memory_exchange(XEN_GUEST_HA
while ( (page = page_list_remove_head(&in_chunk_list)) )
if ( assign_pages(d, page, 0, MEMF_no_refcount) )
BUG();
-
+ dying:
+ rcu_unlock_domain(d);
/* Free any output pages we managed to allocate. */
while ( (page = page_list_remove_head(&out_chunk_list)) )
free_domheap_pages(page, exch.out.extent_order);
diff -r 02003bee3e80 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c Thu Jun 25 18:31:10 2009 +0100
+++ b/xen/common/page_alloc.c Tue Jun 30 23:33:47 2009 +0800
@@ -1120,6 +1120,7 @@ int assign_pages(
unsigned int memflags)
{
unsigned long i;
+ int rc = -EINVAL;
spin_lock(&d->page_alloc_lock);
@@ -1127,6 +1128,7 @@ int assign_pages(
{
gdprintk(XENLOG_INFO, "Cannot assign page to domain%d -- dying.\n",
d->domain_id);
+ rc = -ENODEV;
goto fail;
}
@@ -1136,6 +1138,7 @@ int assign_pages(
{
gdprintk(XENLOG_INFO, "Over-allocation for domain %u: %u > %u\n",
d->domain_id, d->tot_pages + (1 << order), d->max_pages);
+ rc = -EINVAL;
goto fail;
}
@@ -1160,7 +1163,7 @@ int assign_pages(
fail:
spin_unlock(&d->page_alloc_lock);
- return -1;
+ return rc;
}
Keir Fraser wrote:
> On 30/06/2009 10:40, "Jiang, Yunhong" <yunhong.jiang@xxxxxxxxx> wrote:
>
>>> B. Your adjustment of tot_pages is confusing. I can't convince
>>> myself the arithmetic is correct. Furthermore messing with
>>> tot_pages outside of the d->page_alloc_lock is not allowed.
>>
>> The idea of the adjustment is:
>>
>> In each loop, we remove some pages (the in_chunk_list) without
>> decrease the tot_pages. Then when we get domain is dying when assign
>> pages (the out_chunk_list), we need decrease the count. For those
>> page that has been assigned, it should be covered by
> domain_relinquish_resources(), so what we
>> need decrease is:
>> the number that has been removed - the number that has been assigned
>> already.
>
> There's still quite a logical leap to the arithmetic in your
> code. Spell it
> out in a comment.
>
> -- Keir
memory_exchange_v2.patch
Description: memory_exchange_v2.patch
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|