# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID 8070050cc30f3d969835d7b1d6eda57959d56842
# Parent 5552bc2c3716e9e0f57dbba80f61332b895a0f2a
[LINUX][PAE] Improve allocation strategy when PAE pgdirs must be below 4GB.
Moving the re-allocation to low memory into pgd_alloc() has several
advantages:
1. Avoids race with save/restore where pgdir may end up above 4GB after
save/restore.
2. If pgdir cannot be re-allocated we can return failure to the caller
rather than BUG().
3. Slightly reduces diff against native Linux code.
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
linux-2.6-xen-sparse/arch/i386/mm/init-xen.c | 2
linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c | 57 ++++++++++++++++--------
2 files changed, 40 insertions(+), 19 deletions(-)
diff -r 5552bc2c3716 -r 8070050cc30f
linux-2.6-xen-sparse/arch/i386/mm/init-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Wed Jun 14 11:19:53
2006 +0100
+++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c Wed Jun 14 12:36:06
2006 +0100
@@ -763,7 +763,7 @@ void __init pgtable_cache_init(void)
#endif
0,
pgd_ctor,
- pgd_dtor);
+ PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
if (!pgd_cache)
panic("pgtable_cache_init(): Cannot create pgd cache");
}
diff -r 5552bc2c3716 -r 8070050cc30f
linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Wed Jun 14 11:19:53
2006 +0100
+++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Wed Jun 14 12:36:06
2006 +0100
@@ -300,11 +300,6 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
unsigned long flags;
if (PTRS_PER_PMD > 1) {
- if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) {
- int rc = xen_create_contiguous_region(
- (unsigned long)pgd, 0, 32);
- BUG_ON(rc);
- }
if (HAVE_SHARED_KERNEL_PMD)
clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD,
@@ -320,26 +315,22 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
}
}
+/* never called when PTRS_PER_PMD > 1 */
void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
{
unsigned long flags; /* can be called from interrupt context */
- if (PTRS_PER_PMD > 1) {
- if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
- xen_destroy_contiguous_region((unsigned long)pgd, 0);
- } else {
- spin_lock_irqsave(&pgd_lock, flags);
- pgd_list_del(pgd);
- spin_unlock_irqrestore(&pgd_lock, flags);
-
- pgd_test_and_unpin(pgd);
- }
+ spin_lock_irqsave(&pgd_lock, flags);
+ pgd_list_del(pgd);
+ spin_unlock_irqrestore(&pgd_lock, flags);
+
+ pgd_test_and_unpin(pgd);
}
pgd_t *pgd_alloc(struct mm_struct *mm)
{
int i;
- pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
+ pgd_t *pgd_tmp = NULL, *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
pgd_test_and_unpin(pgd);
@@ -363,7 +354,26 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
}
+ /* create_contig_region() loses page data. Make a temp copy. */
+ if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) {
+ pgd_tmp = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
+ if (!pgd_tmp)
+ goto out_oom;
+ memcpy(pgd_tmp, pgd, PAGE_SIZE);
+ }
+
spin_lock_irqsave(&pgd_lock, flags);
+
+ /* Protect against save/restore: move below 4GB with lock. */
+ if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) {
+ int rc = xen_create_contiguous_region(
+ (unsigned long)pgd, 0, 32);
+ memcpy(pgd, pgd_tmp, PAGE_SIZE);
+ kmem_cache_free(pgd_cache, pgd_tmp);
+ if (rc)
+ goto out_oom;
+ }
+
for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
unsigned long v = (unsigned long)i << PGDIR_SHIFT;
pgd_t *kpgd = pgd_offset_k(v);
@@ -374,7 +384,9 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
make_lowmem_page_readonly(
pmd, XENFEAT_writable_page_tables);
}
+
pgd_list_add(pgd);
+
spin_unlock_irqrestore(&pgd_lock, flags);
}
@@ -399,11 +411,15 @@ void pgd_free(pgd_t *pgd)
pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
kmem_cache_free(pmd_cache, pmd);
}
+
if (!HAVE_SHARED_KERNEL_PMD) {
unsigned long flags;
spin_lock_irqsave(&pgd_lock, flags);
pgd_list_del(pgd);
spin_unlock_irqrestore(&pgd_lock, flags);
+
+ pgd_test_and_unpin(pgd);
+
for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
make_lowmem_page_writable(
@@ -411,8 +427,13 @@ void pgd_free(pgd_t *pgd)
memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
kmem_cache_free(pmd_cache, pmd);
}
- }
- }
+
+ if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
+ xen_destroy_contiguous_region(
+ (unsigned long)pgd, 0);
+ }
+ }
+
/* in the non-PAE case, free_pgtables() clears user pgd entries */
kmem_cache_free(pgd_cache, pgd);
}
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|