This patch adds hugepage support to the balloon driver. It is activated
by specifying "balloon_hugepages" on the kernel command line. Once activated,
the balloon driver will work entirely in hugepage sized chunks.
If, when returning pages, it finds a hugepage that is not contiguous
at the machine level, it will return each underlying page separately.
When this page is later repopulated it will be contiguous.
Signed-off-by: Dave McCracken <dave.mccracken@xxxxxxxxxx>
--------
balloon.c | 171 +++++++++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 125 insertions(+), 46 deletions(-)
--- 2.6-xen/drivers/xen/balloon.c 2009-10-29 17:48:30.000000000 -0500
+++ 2.6-xen-balloon/drivers/xen/balloon.c 2009-10-29 19:14:33.000000000
-0500
@@ -59,7 +59,7 @@
#include <xen/features.h>
#include <xen/page.h>
-#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
+#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT+balloon_order-10))
#define BALLOON_CLASS_NAME "xen_memory"
@@ -85,6 +85,14 @@ static int register_balloon(struct sys_d
static struct balloon_stats balloon_stats;
+/*
+ * Work in pages of this order. Can be either 0 for normal pages
+ * or 9 for hugepages.
+ */
+static int balloon_order;
+static unsigned long balloon_npages;
+static unsigned long discontig_frame_list[PAGE_SIZE / sizeof(unsigned long)];
+
/* We increase/decrease in batches which fit in a page */
static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
@@ -113,10 +121,41 @@ static struct timer_list balloon_timer;
static void scrub_page(struct page *page)
{
#ifdef CONFIG_XEN_SCRUB_PAGES
- clear_highpage(page);
+ int i;
+
+ for (i = 0; i < balloon_npages; i++)
+ clear_highpage(page++);
#endif
}
+static void free_discontig_frame(void)
+{
+ int rc;
+ struct xen_memory_reservation reservation = {
+ .address_bits = 0,
+ .domid = DOMID_SELF,
+ .nr_extents = balloon_npages,
+ .extent_order = 0
+ };
+
+ set_xen_guest_handle(reservation.extent_start, discontig_frame_list);
+ rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+ BUG_ON(rc != balloon_npages);
+}
+
+static unsigned long shrink_frame(unsigned long nr_pages)
+{
+ unsigned long i, j;
+
+ for (i = 0, j = 0; i < nr_pages; i++, j++) {
+ if (frame_list[i] == 0)
+ j++;
+ if (i != j)
+ frame_list[i] = frame_list[j];
+ }
+ return i;
+}
+
/* balloon_append: add the given page to the balloon. */
static void balloon_append(struct page *page)
{
@@ -190,12 +229,11 @@ static unsigned long current_target(void
static int increase_reservation(unsigned long nr_pages)
{
- unsigned long pfn, i, flags;
+ unsigned long pfn, mfn, i, j, flags;
struct page *page;
long rc;
struct xen_memory_reservation reservation = {
.address_bits = 0,
- .extent_order = 0,
.domid = DOMID_SELF
};
@@ -207,12 +245,14 @@ static int increase_reservation(unsigned
page = balloon_first_page();
for (i = 0; i < nr_pages; i++) {
BUG_ON(page == NULL);
- frame_list[i] = page_to_pfn(page);;
+ frame_list[i] = page_to_pfn(page);
page = balloon_next_page(page);
}
set_xen_guest_handle(reservation.extent_start, frame_list);
reservation.nr_extents = nr_pages;
+ reservation.extent_order = balloon_order;
+
rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
if (rc < 0)
goto out;
@@ -222,19 +262,22 @@ static int increase_reservation(unsigned
BUG_ON(page == NULL);
pfn = page_to_pfn(page);
+ mfn = frame_list[i];
BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
phys_to_machine_mapping_valid(pfn));
- set_phys_to_machine(pfn, frame_list[i]);
+ for (j = 0; j < balloon_npages; j++, pfn++, mfn++) {
+ set_phys_to_machine(pfn, mfn);
- /* Link back into the page tables if not highmem. */
- if (pfn < max_low_pfn) {
- int ret;
- ret = HYPERVISOR_update_va_mapping(
- (unsigned long)__va(pfn << PAGE_SHIFT),
- mfn_pte(frame_list[i], PAGE_KERNEL),
- 0);
- BUG_ON(ret);
+ /* Link back into the page tables if not highmem. */
+ if (pfn < max_low_pfn) {
+ int ret;
+ ret = HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(pfn << PAGE_SHIFT),
+ mfn_pte(mfn, PAGE_KERNEL),
+ 0);
+ BUG_ON(ret);
+ }
}
/* Relinquish the page back to the allocator. */
@@ -253,13 +296,13 @@ static int increase_reservation(unsigned
static int decrease_reservation(unsigned long nr_pages)
{
- unsigned long pfn, i, flags;
+ unsigned long pfn, lpfn, mfn, i, j, flags;
struct page *page;
int need_sleep = 0;
- int ret;
+ int discontig, discontig_free;
+ int ret;
struct xen_memory_reservation reservation = {
.address_bits = 0,
- .extent_order = 0,
.domid = DOMID_SELF
};
@@ -267,7 +310,7 @@ static int decrease_reservation(unsigned
nr_pages = ARRAY_SIZE(frame_list);
for (i = 0; i < nr_pages; i++) {
- if ((page = alloc_page(GFP_BALLOON)) == NULL) {
+ if ((page = alloc_pages(GFP_BALLOON, balloon_order)) == NULL) {
nr_pages = i;
need_sleep = 1;
break;
@@ -277,14 +320,6 @@ static int decrease_reservation(unsigned
frame_list[i] = pfn_to_mfn(pfn);
scrub_page(page);
-
- if (!PageHighMem(page)) {
- ret = HYPERVISOR_update_va_mapping(
- (unsigned long)__va(pfn << PAGE_SHIFT),
- __pte_ma(0), 0);
- BUG_ON(ret);
- }
-
}
/* Ensure that ballooned highmem pages don't have kmaps. */
@@ -295,18 +330,39 @@ static int decrease_reservation(unsigned
/* No more mappings: invalidate P2M and add to balloon. */
for (i = 0; i < nr_pages; i++) {
- pfn = mfn_to_pfn(frame_list[i]);
- set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ mfn = frame_list[i];
+ lpfn = pfn = mfn_to_pfn(mfn);
balloon_append(pfn_to_page(pfn));
+ discontig_free = 0;
+ for (j = 0; j < balloon_npages; j++, lpfn++, mfn++) {
+ if ((discontig_frame_list[j] = pfn_to_mfn(lpfn)) != mfn)
+ discontig_free = 1;
+
+ set_phys_to_machine(lpfn, INVALID_P2M_ENTRY);
+ if (!PageHighMem(page)) {
+ ret = HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(lpfn << PAGE_SHIFT),
+ __pte_ma(0), 0);
+ BUG_ON(ret);
+ }
+ }
+ if (discontig_free) {
+ free_discontig_frame();
+ frame_list[i] = 0;
+ discontig = 1;
+ }
}
+ balloon_stats.current_pages -= nr_pages;
+
+ if (discontig)
+ nr_pages = shrink_frame(nr_pages);
set_xen_guest_handle(reservation.extent_start, frame_list);
reservation.nr_extents = nr_pages;
+ reservation.extent_order = balloon_order;
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
BUG_ON(ret != nr_pages);
- balloon_stats.current_pages -= nr_pages;
-
spin_unlock_irqrestore(&xen_reservation_lock, flags);
return need_sleep;
@@ -374,7 +430,7 @@ static void watch_target(struct xenbus_w
/* The given memory/target value is in KiB, so it needs converting to
* pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
*/
- balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
+ balloon_set_new_target(new_target >> ((PAGE_SHIFT - 10) +
balloon_order));
}
static int balloon_init_watcher(struct notifier_block *notifier,
@@ -399,9 +455,12 @@ static int __init balloon_init(void)
if (!xen_pv_domain())
return -ENODEV;
- pr_info("xen_balloon: Initialising balloon driver.\n");
+ pr_info("xen_balloon: Initialising balloon driver with page order
%d.\n",
+ balloon_order);
+
+ balloon_npages = 1 << balloon_order;
- balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
+ balloon_stats.current_pages = (min(xen_start_info->nr_pages, max_pfn))
>> balloon_order;
balloon_stats.target_pages = balloon_stats.current_pages;
balloon_stats.balloon_low = 0;
balloon_stats.balloon_high = 0;
@@ -414,7 +473,7 @@ static int __init balloon_init(void)
register_balloon(&balloon_sysdev);
/* Initialise the balloon with excess memory space. */
- for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
+ for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn +=
balloon_npages) {
if (page_is_ram(pfn)) {
struct page *page = pfn_to_page(pfn);
if (!PageReserved(page))
@@ -464,16 +523,20 @@ static int dealloc_pte_fn(pte_t *pte, st
struct page **alloc_empty_pages_and_pagevec(int nr_pages)
{
struct page *page, **pagevec;
- int i, ret;
+ int npages;
+ int i, j, ret;
+
+ /* Round up to next number of balloon_order pages */
+ npages = (nr_pages + (balloon_npages-1)) >> balloon_order;
- pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
+ pagevec = kmalloc(sizeof(page) * nr_pages << balloon_order, GFP_KERNEL);
if (pagevec == NULL)
return NULL;
for (i = 0; i < nr_pages; i++) {
void *v;
- page = pagevec[i] = alloc_page(GFP_KERNEL|__GFP_COLD);
+ page = alloc_pages(GFP_KERNEL|__GFP_COLD, balloon_order);
if (page == NULL)
goto err;
@@ -484,8 +547,8 @@ struct page **alloc_empty_pages_and_page
v = page_address(page);
ret = apply_to_page_range(&init_mm, (unsigned long)v,
- PAGE_SIZE, dealloc_pte_fn,
- NULL);
+ PAGE_SIZE << balloon_order,
+ dealloc_pte_fn, NULL);
if (ret != 0) {
mutex_unlock(&balloon_mutex);
@@ -493,8 +556,10 @@ struct page **alloc_empty_pages_and_page
__free_page(page);
goto err;
}
+ for (j = 0; j < balloon_npages; j++)
+ pagevec[(i<<balloon_order)+j] = page++;
- totalram_pages = --balloon_stats.current_pages;
+ totalram_pages = balloon_stats.current_pages -= balloon_npages;
mutex_unlock(&balloon_mutex);
}
@@ -507,7 +572,7 @@ struct page **alloc_empty_pages_and_page
err:
mutex_lock(&balloon_mutex);
while (--i >= 0)
- balloon_append(pagevec[i]);
+ balloon_append(pagevec[i << balloon_order]);
mutex_unlock(&balloon_mutex);
kfree(pagevec);
pagevec = NULL;
@@ -517,15 +582,21 @@ EXPORT_SYMBOL_GPL(alloc_empty_pages_and_
void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
{
+ struct page *page;
int i;
+ int npages;
if (pagevec == NULL)
return;
+ /* Round up to next number of balloon_order pages */
+ npages = (nr_pages + (balloon_npages-1)) >> balloon_order;
+
mutex_lock(&balloon_mutex);
for (i = 0; i < nr_pages; i++) {
- BUG_ON(page_count(pagevec[i]) != 1);
- balloon_append(pagevec[i]);
+ page = pagevec[i << balloon_order];
+ BUG_ON(page_count(page) != 1);
+ balloon_append(page);
}
mutex_unlock(&balloon_mutex);
@@ -535,6 +606,14 @@ void free_empty_pages_and_pagevec(struct
}
EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec);
+static int __init balloon_parse_huge(char *s)
+{
+ balloon_order = 9;
+ return 1;
+}
+
+__setup("balloon_hugepages", balloon_parse_huge);
+
#define BALLOON_SHOW(name, format, args...) \
static ssize_t show_##name(struct sys_device *dev, \
struct sysdev_attribute *attr, \
@@ -568,7 +647,7 @@ static ssize_t store_target_kb(struct sy
target_bytes = simple_strtoull(buf, &endchar, 0) * 1024;
- balloon_set_new_target(target_bytes >> PAGE_SHIFT);
+ balloon_set_new_target(target_bytes >> (PAGE_SHIFT + balloon_order));
return count;
}
@@ -582,7 +661,7 @@ static ssize_t show_target(struct sys_de
{
return sprintf(buf, "%llu\n",
(unsigned long long)balloon_stats.target_pages
- << PAGE_SHIFT);
+ << (PAGE_SHIFT + balloon_order));
}
static ssize_t store_target(struct sys_device *dev,
@@ -598,7 +677,7 @@ static ssize_t store_target(struct sys_d
target_bytes = memparse(buf, &endchar);
- balloon_set_new_target(target_bytes >> PAGE_SHIFT);
+ balloon_set_new_target(target_bytes >> (PAGE_SHIFT + balloon_order));
return count;
}
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|