As suggested by Keir, simplify general page reference management for
preempted (partiall [in-]validated) pages: Reserve on reference that
can be acquired without the risk of overflowing the reference count,
thus allowing to have a simplified get_page() equivalent that cannot
fail (but must be used with care).
Doing this conversion pointed out a latent issue in the changes done
previously in this area: The extra reference must be acquired before
the 'normal' reference gets dropped, so the patch fixes this at once
in both the alloc_page_type() and free_page_type() paths (it's really
only the latter that failed to work with the change described above).
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Index: 2008-10-27/xen/arch/x86/mm.c
===================================================================
--- 2008-10-27.orig/xen/arch/x86/mm.c 2008-10-29 16:46:52.000000000 +0100
+++ 2008-10-27/xen/arch/x86/mm.c 2008-10-31 16:13:56.000000000 +0100
@@ -1856,7 +1856,8 @@ int get_page(struct page_info *page, str
nx = x + 1;
d = nd;
if ( unlikely((x & PGC_count_mask) == 0) || /* Not allocated? */
- unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
+ /* Keep one spare reference to be acquired by get_page_light(). */
+ unlikely(((nx + 1) & PGC_count_mask) <= 1) || /* Overflow? */
unlikely(d != _domain) ) /* Wrong owner? */
{
if ( !_shadow_mode_refcounts(domain) && !domain->is_dying )
@@ -1878,6 +1879,28 @@ int get_page(struct page_info *page, str
return 1;
}
+/*
+ * Special version of get_page() to be used exclusively when
+ * - a page is known to already have a non-zero reference count
+ * - the page does not need its owner to be checked
+ * - it will not be called more than once without dropping the thus
+ * acquired reference again.
+ * Due to get_page() reserving one reference, this call cannot fail.
+ */
+static void get_page_light(struct page_info *page)
+{
+ u32 x, nx, y = page->count_info;
+
+ do {
+ x = y;
+ nx = x + 1;
+ BUG_ON(!(x & PGC_count_mask)); /* Not allocated? */
+ BUG_ON(!(nx & PGC_count_mask)); /* Overflow? */
+ y = cmpxchg(&page->count_info, x, nx);
+ }
+ while ( unlikely(y != x) );
+}
+
static int alloc_page_type(struct page_info *page, unsigned long type,
int preemptible)
@@ -1885,10 +1908,6 @@ static int alloc_page_type(struct page_i
struct domain *owner = page_get_owner(page);
int rc;
- /* Obtain an extra reference to retain if we set PGT_partial. */
- if ( preemptible && !get_page(page, owner) )
- return -EINVAL;
-
/* A page table is dirtied when its type count becomes non-zero. */
if ( likely(owner != NULL) )
paging_mark_dirty(owner, page_to_mfn(page));
@@ -1922,14 +1941,10 @@ static int alloc_page_type(struct page_i
wmb();
if ( rc == -EAGAIN )
{
+ get_page_light(page);
page->u.inuse.type_info |= PGT_partial;
- return -EAGAIN;
}
-
- if ( preemptible )
- put_page(page);
-
- if ( rc == -EINTR )
+ else if ( rc == -EINTR )
{
ASSERT((page->u.inuse.type_info &
(PGT_count_mask|PGT_validated|PGT_partial)) == 1);
@@ -2037,8 +2052,8 @@ static int __put_page_type_final(struct
page->u.inuse.type_info--;
break;
case -EINTR:
- ASSERT(!(page->u.inuse.type_info &
- (PGT_count_mask|PGT_validated|PGT_partial)));
+ ASSERT((page->u.inuse.type_info &
+ (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
if ( !(shadow_mode_enabled(page_get_owner(page)) &&
(page->count_info & PGC_page_table)) )
page->tlbflush_timestamp = tlbflush_current_time();
@@ -2047,17 +2062,13 @@ static int __put_page_type_final(struct
break;
case -EAGAIN:
wmb();
+ get_page_light(page);
page->u.inuse.type_info |= PGT_partial;
- /* Must skip put_page() below. */
- preemptible = 0;
break;
default:
BUG();
}
- if ( preemptible )
- put_page(page);
-
return rc;
}
@@ -2066,10 +2077,7 @@ static int __put_page_type(struct page_i
int preemptible)
{
unsigned long nx, x, y = page->u.inuse.type_info;
-
- /* Obtain an extra reference to retain if we set PGT_partial. */
- if ( preemptible && !get_page(page, page_get_owner(page)) )
- return -EINVAL;
+ int rc = 0;
for ( ; ; )
{
@@ -2092,10 +2100,11 @@ static int __put_page_type(struct page_i
if ( unlikely((y = cmpxchg(&page->u.inuse.type_info,
x, nx)) != x) )
continue;
+ /* We cleared the 'valid bit' so we do the clean up. */
+ rc = __put_page_type_final(page, x, preemptible);
if ( x & PGT_partial )
put_page(page);
- /* We cleared the 'valid bit' so we do the clean up. */
- return __put_page_type_final(page, x, preemptible);
+ break;
}
/*
@@ -2114,17 +2123,10 @@ static int __put_page_type(struct page_i
break;
if ( preemptible && hypercall_preempt_check() )
- {
- if ( preemptible )
- put_page(page);
return -EINTR;
- }
}
- if ( preemptible )
- put_page(page);
-
- return 0;
+ return rc;
}
@@ -2132,6 +2134,7 @@ static int __get_page_type(struct page_i
int preemptible)
{
unsigned long nx, x, y = page->u.inuse.type_info;
+ int rc = 0;
ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
@@ -2227,11 +2230,7 @@ static int __get_page_type(struct page_i
}
if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
- {
- if ( (x & PGT_partial) && !(nx & PGT_partial) )
- put_page(page);
break;
- }
if ( preemptible && hypercall_preempt_check() )
return -EINTR;
@@ -2258,10 +2257,13 @@ static int __get_page_type(struct page_i
page->nr_validated_ptes = 0;
page->partial_pte = 0;
}
- return alloc_page_type(page, type, preemptible);
+ rc = alloc_page_type(page, type, preemptible);
}
- return 0;
+ if ( (x & PGT_partial) && !(nx & PGT_partial) )
+ put_page(page);
+
+ return rc;
}
void put_page_type(struct page_info *page)
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|