Implement Xen interface to PoD functionality.
* Increase the number of MEMOP bits from 4 to 6 (increasing the number
of available memory operations from 16 to 64).
* Introduce XENMEMF_populate_on_demand, which will cause
populate_physmap() to fill a range with PoD entries rather than
backing it with ram
* Introduce XENMEM_[sg]et_pod_target operation to the memory
hypercall, to get and set PoD cache size. set_pod_target() should be
called during domain creation, as well as after modifying the memory
target of any domain which may have outstanding PoD entries.
Signed-off-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
diff -r 90feb993b0b8 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Fri Dec 19 17:54:23 2008 +0000
+++ b/xen/arch/x86/mm.c Tue Dec 23 11:35:30 2008 +0000
@@ -3976,6 +3976,49 @@
return 0;
}
+ case XENMEM_set_pod_target:
+ case XENMEM_get_pod_target:
+ {
+ xen_pod_target_t target;
+ struct domain *d;
+
+ /* Support DOMID_SELF? */
+ if ( !IS_PRIV(current->domain) )
+ return -EINVAL;
+
+ if ( copy_from_guest(&target, arg, 1) )
+ return -EFAULT;
+
+ rc = rcu_lock_target_domain_by_id(target.domid, &d);
+ if ( rc != 0 )
+ return rc;
+
+ if ( op == XENMEM_set_pod_target )
+ {
+ if ( target.target_pages > d->max_pages )
+ {
+ rc = -EINVAL;
+ goto pod_target_out_unlock;
+ }
+
+ rc = p2m_pod_set_mem_target(d, target.target_pages);
+ }
+
+ target.tot_pages = d->tot_pages;
+ target.pod_cache_pages = d->arch.p2m->pod.count;
+ target.pod_entries = d->arch.p2m->pod.entry_count;
+
+ if ( copy_to_guest(arg, &target, 1) )
+ {
+ rc= -EFAULT;
+ goto pod_target_out_unlock;
+ }
+
+ pod_target_out_unlock:
+ rcu_unlock_domain(d);
+ return rc;
+ }
+
default:
return subarch_memory_op(op, arg);
}
diff -r 90feb993b0b8 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c Fri Dec 19 17:54:23 2008 +0000
+++ b/xen/arch/x86/mm/p2m.c Tue Dec 23 11:35:30 2008 +0000
@@ -387,6 +387,150 @@
return p;
}
+/* Set the size of the cache, allocating or freeing as necessary. */
+static int
+p2m_pod_set_cache_target(struct domain *d, unsigned long pod_target)
+{
+ struct p2m_domain *p2md = d->arch.p2m;
+ int ret = 0;
+
+ /* Increasing the target */
+ while ( pod_target > p2md->pod.count )
+ {
+ struct page_info * page;
+ int order;
+
+ if ( (pod_target - p2md->pod.count) >= (1>>9) )
+ order = 9;
+ else
+ order = 0;
+
+ page = alloc_domheap_pages(d, order, 0);
+ if ( unlikely(page == NULL) )
+ goto out;
+
+ p2m_pod_cache_add(d, page, order);
+ }
+
+ /* Decreasing the target */
+ /* We hold the p2m lock here, so we don't need to worry about
+ * cache disappearing under our feet. */
+ while ( pod_target < p2md->pod.count )
+ {
+ struct page_info * page;
+ int order, i;
+
+ /* Grab the lock before checking that pod.super is empty, or the last
+ * entries may disappear before we grab the lock. */
+ spin_lock(&d->page_alloc_lock);
+
+ if ( (p2md->pod.count - pod_target) > (1>>9)
+ && !list_empty(&p2md->pod.super) )
+ order = 9;
+ else
+ order = 0;
+
+ page = p2m_pod_cache_get(d, order);
+
+ ASSERT(page != NULL);
+
+ spin_unlock(&d->page_alloc_lock);
+
+ /* Then free them */
+ for ( i = 0 ; i < (1 << order) ; i++ )
+ {
+ /* Copied from common/memory.c:guest_remove_page() */
+ if ( unlikely(!get_page(page+i, d)) )
+ {
+ gdprintk(XENLOG_INFO, "Bad page free for domain
%u\n", d->domain_id);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if ( test_and_clear_bit(_PGT_pinned,
&(page+i)->u.inuse.type_info) )
+ put_page_and_type(page+i);
+
+ if ( test_and_clear_bit(_PGC_allocated, &(page+i)->count_info) )
+ put_page(page+i);
+
+ put_page(page+i);
+ }
+ }
+
+out:
+ return ret;
+}
+
+/*
+ * The "right behavior" here requires some careful thought. First, some
+ * definitions:
+ * + M: static_max
+ * + B: number of pages the balloon driver has ballooned down to.
+ * + P: Number of populated pages.
+ * + T: Old target
+ * + T': New target
+ *
+ * The following equations should hold:
+ * 0 <= P <= T <= B <= M
+ * d->arch.p2m->pod.entry_count == B - P
+ * d->tot_pages == P + d->arch.p2m->pod.count
+ *
+ * Now we have the following potential cases to cover:
+ * B <T': Set the PoD cache size equal to the number of outstanding PoD
+ * entries. The balloon driver will deflate the balloon to give back
+ * the remainder of the ram to the guest OS.
+ * T <T'<B : Increase PoD cache size.
+ * T'<T<=B : Here we have a choice. We can decrease the size of the cache,
+ * get the memory right away. However, that means every time we
+ * reduce the memory target we risk the guest attempting to populate the
+ * memory before the balloon driver has reached its new target. Safer to
+ * never reduce the cache size here, but only when the balloon driver frees
+ * PoD ranges.
+ *
+ * If there are many zero pages, we could reach the target also by doing
+ * zero sweeps and marking the ranges PoD; but the balloon driver will have
+ * to free this memory eventually anyway, so we don't actually gain that much
+ * by doing so.
+ *
+ * NB that the equation (B<T') may require adjustment to the cache
+ * size as PoD pages are freed as well; i.e., freeing a PoD-backed
+ * entry when pod.entry_count == pod.count requires us to reduce both
+ * pod.entry_count and pod.count.
+ */
+int
+p2m_pod_set_mem_target(struct domain *d, unsigned long target)
+{
+ unsigned pod_target;
+ struct p2m_domain *p2md = d->arch.p2m;
+ int ret = 0;
+ unsigned long populated;
+
+ /* P == B: Nothing to do. */
+ if ( p2md->pod.entry_count == 0 )
+ goto out;
+
+ /* T' < B: Don't reduce the cache size; let the balloon driver
+ * take care of it. */
+ if ( target < d->tot_pages )
+ goto out;
+
+ populated = d->tot_pages - p2md->pod.count;
+
+ pod_target = target - populated;
+
+ /* B < T': Set the cache size equal to # of outstanding entries,
+ * let the balloon driver fill in the rest. */
+ if ( pod_target > p2md->pod.entry_count )
+ pod_target = p2md->pod.entry_count;
+
+ ASSERT( pod_target > p2md->pod.count );
+
+ ret = p2m_pod_set_cache_target(d, pod_target);
+
+out:
+ return ret;
+}
+
void
p2m_pod_empty_cache(struct domain *d)
{
@@ -537,6 +681,13 @@
ram--;
}
}
+
+ /* If we've reduced our "liabilities" beyond our "assets", free some */
+ if ( p2md->pod.entry_count < p2md->pod.count )
+ {
+ printk("b %d\n", p2md->pod.entry_count);
+ p2m_pod_set_cache_target(d, p2md->pod.entry_count);
+ }
/* If there are no more non-PoD entries, tell decrease_reservation() that
* there's nothing left to do. */
@@ -786,7 +937,7 @@
/* Stop if we're past our limit and we have found *something*.
*
* NB that this is a zero-sum game; we're increasing our cache size
- * by re-increasing our 'debt'. Since we hold the p2m lock,
+ * by increasing our 'debt'. Since we hold the p2m lock,
* (entry_count - count) must remain the same. */
if ( !list_empty(&p2md->pod.super) && i < limit )
break;
diff -r 90feb993b0b8 xen/arch/x86/x86_64/compat/mm.c
--- a/xen/arch/x86/x86_64/compat/mm.c Fri Dec 19 17:54:23 2008 +0000
+++ b/xen/arch/x86/x86_64/compat/mm.c Tue Dec 23 11:35:30 2008 +0000
@@ -122,6 +122,29 @@
#define XLAT_memory_map_HNDL_buffer(_d_, _s_) ((void)0)
XLAT_memory_map(&cmp, nat);
#undef XLAT_memory_map_HNDL_buffer
+ if ( copy_to_guest(arg, &cmp, 1) )
+ rc = -EFAULT;
+
+ break;
+ }
+
+ case XENMEM_set_pod_target:
+ case XENMEM_get_pod_target:
+ {
+ struct compat_pod_target cmp;
+ struct xen_pod_target *nat = (void *)COMPAT_ARG_XLAT_VIRT_BASE;
+
+ if ( copy_from_guest(&cmp, arg, 1) )
+ return -EFAULT;
+
+ XLAT_pod_target(nat, &cmp);
+
+ rc = arch_memory_op(op, guest_handle_from_ptr(nat, void));
+ if ( rc < 0 )
+ break;
+
+ XLAT_pod_target(&cmp, nat);
+
if ( copy_to_guest(arg, &cmp, 1) )
rc = -EFAULT;
diff -r 90feb993b0b8 xen/common/memory.c
--- a/xen/common/memory.c Fri Dec 19 17:54:23 2008 +0000
+++ b/xen/common/memory.c Tue Dec 23 11:35:30 2008 +0000
@@ -111,31 +111,40 @@
if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) )
goto out;
- page = alloc_domheap_pages(d, a->extent_order, a->memflags);
- if ( unlikely(page == NULL) )
+ if ( a->memflags & MEMF_populate_on_demand )
{
- gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
- "id=%d memflags=%x (%ld of %d)\n",
- a->extent_order, d->domain_id, a->memflags,
- i, a->nr_extents);
- goto out;
+ if ( guest_physmap_mark_populate_on_demand(d, gpfn,
+ a->extent_order) < 0 )
+ goto out;
}
+ else
+ {
+ page = alloc_domheap_pages(d, a->extent_order, a->memflags);
+ if ( unlikely(page == NULL) )
+ {
+ gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
+ "id=%d memflags=%x (%ld of %d)\n",
+ a->extent_order, d->domain_id, a->memflags,
+ i, a->nr_extents);
+ goto out;
+ }
- mfn = page_to_mfn(page);
- guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
+ mfn = page_to_mfn(page);
+ guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
- if ( !paging_mode_translate(d) )
- {
- for ( j = 0; j < (1 << a->extent_order); j++ )
- set_gpfn_from_mfn(mfn + j, gpfn + j);
+ if ( !paging_mode_translate(d) )
+ {
+ for ( j = 0; j < (1 << a->extent_order); j++ )
+ set_gpfn_from_mfn(mfn + j, gpfn + j);
- /* Inform the domain of the new page's machine address. */
- if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
- goto out;
+ /* Inform the domain of the new page's machine address. */
+ if ( unlikely(__copy_to_guest_offset(a->extent_list,
i, &mfn, 1)) )
+ goto out;
+ }
}
}
- out:
+out:
a->nr_done = i;
}
@@ -527,6 +536,10 @@
args.memflags |= MEMF_node(XENMEMF_get_node(reservation.mem_flags));
+ if ( op == XENMEM_populate_physmap
+ && (reservation.mem_flags & XENMEMF_populate_on_demand) )
+ args.memflags |= MEMF_populate_on_demand;
+
if ( likely(reservation.domid == DOMID_SELF) )
{
d = rcu_lock_current_domain();
diff -r 90feb993b0b8 xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h Fri Dec 19 17:54:23 2008 +0000
+++ b/xen/include/asm-x86/p2m.h Tue Dec 23 11:35:30 2008 +0000
@@ -261,6 +261,10 @@
* (usually in preparation for domain destruction) */
void p2m_pod_empty_cache(struct domain *d);
+/* Set populate-on-demand cache size so that the total memory allocated to a
+ * domain matches target */
+int p2m_pod_set_mem_target(struct domain *d, unsigned long target);
+
/* Call when decreasing memory reservation to handle PoD entries properly.
* Will return '1' if all entries were handled and nothing more need be done.*/
int
diff -r 90feb993b0b8 xen/include/public/memory.h
--- a/xen/include/public/memory.h Fri Dec 19 17:54:23 2008 +0000
+++ b/xen/include/public/memory.h Tue Dec 23 11:35:30 2008 +0000
@@ -48,6 +48,8 @@
/* NUMA node to allocate from. */
#define XENMEMF_node(x) (((x) + 1) << 8)
#define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu)
+/* Flag to populate physmap with populate-on-demand entries */
+#define XENMEMF_populate_on_demand (1<<16)
#endif
struct xen_memory_reservation {
@@ -299,6 +301,19 @@
typedef struct xen_foreign_memory_map xen_foreign_memory_map_t;
DEFINE_XEN_GUEST_HANDLE(xen_foreign_memory_map_t);
+#define XENMEM_set_pod_target 16
+#define XENMEM_get_pod_target 17
+struct xen_pod_target {
+ /* IN */
+ uint64_t target_pages;
+ /* OUT */
+ uint64_t tot_pages;
+ uint64_t pod_cache_pages;
+ uint64_t pod_entries;
+ /* IN */
+ domid_t domid;
+};
+typedef struct xen_pod_target xen_pod_target_t;
#endif /* __XEN_PUBLIC_MEMORY_H__ */
/*
diff -r 90feb993b0b8 xen/include/xen/hypercall.h
--- a/xen/include/xen/hypercall.h Fri Dec 19 17:54:23 2008 +0000
+++ b/xen/include/xen/hypercall.h Tue Dec 23 11:35:30 2008 +0000
@@ -48,7 +48,7 @@
* at what point in the page list to resume. For this purpose I steal the
* high-order bits of the @cmd parameter, which are otherwise unused and zero.
*/
-#define MEMOP_EXTENT_SHIFT 4 /* cmd[:4] == start_extent */
+#define MEMOP_EXTENT_SHIFT 6 /* cmd[:6] == start_extent */
#define MEMOP_CMD_MASK ((1 << MEMOP_EXTENT_SHIFT) - 1)
extern long
diff -r 90feb993b0b8 xen/include/xen/mm.h
--- a/xen/include/xen/mm.h Fri Dec 19 17:54:23 2008 +0000
+++ b/xen/include/xen/mm.h Tue Dec 23 11:35:30 2008 +0000
@@ -72,6 +72,8 @@
/* memflags: */
#define _MEMF_no_refcount 0
#define MEMF_no_refcount (1U<<_MEMF_no_refcount)
+#define _MEMF_populate_on_demand 1
+#define MEMF_populate_on_demand (1U<<_MEMF_populate_on_demand)
#define _MEMF_node 8
#define MEMF_node(n) ((((n)+1)&0xff)<<_MEMF_node)
#define _MEMF_bits 24
diff -r 90feb993b0b8 xen/include/xlat.lst
--- a/xen/include/xlat.lst Fri Dec 19 17:54:23 2008 +0000
+++ b/xen/include/xlat.lst Tue Dec 23 11:35:30 2008 +0000
@@ -38,6 +38,7 @@
! memory_exchange memory.h
! memory_map memory.h
! memory_reservation memory.h
+! pod_target memory.h
! translate_gpfn_list memory.h
! sched_poll sched.h
? sched_remote_shutdown sched.h
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|