xen/arch/x86/mm/hap/private.h | 1 +
xen/arch/x86/mm/mm-locks.h | 20 +-
xen/arch/x86/mm/p2m-ept.c | 1 +
xen/arch/x86/mm/p2m-lock.h | 613 ++++++++++++++++++++++++++++++++++++++++++
xen/arch/x86/mm/p2m-pod.c | 1 +
xen/arch/x86/mm/p2m-pt.c | 1 +
xen/arch/x86/mm/p2m.c | 24 +-
xen/include/asm-x86/p2m.h | 3 +-
8 files changed, 652 insertions(+), 12 deletions(-)
Introduce a fine-grained concurrency control structure for the p2m. This
allows for locking 2M-aligned chunks of the p2m at a time, exclusively.
Recursive locking is allowed. Global locking of the whole p2m is also
allowed for certain operations. Simple deadlock detection heuristics are
put in place.
Note the patch creates backwards-compatible shortcuts that will lock the
p2m globally. So it should remain functionally identical to what is currently
in place.
Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
diff -r 981073d78f7f -r a23e1262b124 xen/arch/x86/mm/hap/private.h
--- a/xen/arch/x86/mm/hap/private.h
+++ b/xen/arch/x86/mm/hap/private.h
@@ -21,6 +21,7 @@
#define __HAP_PRIVATE_H__
#include "../mm-locks.h"
+#include "../p2m-lock.h"
/********************************************/
/* GUEST TRANSLATION FUNCS */
diff -r 981073d78f7f -r a23e1262b124 xen/arch/x86/mm/mm-locks.h
--- a/xen/arch/x86/mm/mm-locks.h
+++ b/xen/arch/x86/mm/mm-locks.h
@@ -146,14 +146,22 @@ declare_mm_lock(nestedp2m)
/* P2M lock (per-p2m-table)
*
- * This protects all updates to the p2m table. Updates are expected to
- * be safe against concurrent reads, which do *not* require the lock. */
+ * This protects all updates to the p2m table.
+ *
+ * In 64 bit mode we disable this because the lock becomes fine-grained,
+ * and several code paths cause inversion/deadlock:
+ * -- PoD sweeps
+ * -- mem_sharing_unshare_page
+ * -- generally widespread recursive locking, which we don't support
+ * (yet, I guess) on an "external" mm lock. */
+#ifndef __x86_64__
declare_mm_lock(p2m)
-#define p2m_lock(p) mm_lock(p2m, &(p)->lock)
-#define p2m_lock_recursive(p) mm_lock_recursive(p2m, &(p)->lock)
-#define p2m_unlock(p) mm_unlock(&(p)->lock)
-#define p2m_locked_by_me(p) mm_locked_by_me(&(p)->lock)
+#define _p2m_lock(p) mm_lock(p2m, &(p)->lock)
+#define _p2m_lock_recursive(p) mm_lock_recursive(p2m, &(p)->lock)
+#define _p2m_unlock(p) mm_unlock(&(p)->lock)
+#define _p2m_locked_by_me(p) mm_locked_by_me(&(p)->lock)
+#endif /* __x86_64__ */
/* PoD lock (per-p2m-table)
*
diff -r 981073d78f7f -r a23e1262b124 xen/arch/x86/mm/p2m-ept.c
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -33,6 +33,7 @@
#include <xen/softirq.h>
#include "mm-locks.h"
+#include "p2m-lock.h"
#define atomic_read_ept_entry(__pepte) \
( (ept_entry_t) { .epte = atomic_read64(&(__pepte)->epte) } )
diff -r 981073d78f7f -r a23e1262b124 xen/arch/x86/mm/p2m-lock.h
--- /dev/null
+++ b/xen/arch/x86/mm/p2m-lock.h
@@ -0,0 +1,613 @@
+/******************************************************************************
+ * arch/x86/mm/p2m-lock.h
+ *
+ * Fine-grained locking of the p2m. Allow for concurrent updates to different
+ * regions of the p2m. Serially synchronize updates and lookups. Mutex
+ * access on p2m entries while a CPU is using them.
+ *
+ * Copyright (c) 2011 Andres Lagar-Cavilla, GridCentric Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _XEN_P2M_LOCK_H
+#define _XEN_P2M_LOCK_H
+
+#include <xen/config.h>
+#include <xen/lib.h>
+/* See comment about space consideration for spinlocks below */
+#define NDEBUG
+#undef LOCK_PROFILE
+#include <xen/spinlock.h>
+#include <asm/atomic.h>
+#include <xen/xmalloc.h>
+#include <xen/paging.h>
+#include <asm/page.h>
+#include <asm/p2m.h>
+#include "mm-locks.h"
+
+/* Rationale:
+ *
+ * The motivating scenario is one in which you have at least three CPUs
+ * operating on likely disjoint regions of the p2m: a paging utility, a sharing
+ * utility, and the domU vcpu. With yet another p2m-heavy utility (mem
+ * access?), and/or a migrate/remus utility, the number of CPUs operating
+ * on disjoint regions increases. Not to mention multi-vcpu domUs.
+ *
+ * Therefore, p2m concurrency control is achieved through a hierarchical
+ * tree of locks, to allow all these CPUs to work without bothering each other.
+ * (Without disallowing any other cases such as single-vcpu domU)
+ *
+ * Leafs in the tree of locks are represented by spinlocks.
+ *
+ * Inner nodes (or uppper levels), are represented by a spinlock and a count.
+ * The count indicates how many CPUs are locking a node beneath.
+ *
+ * A cpu holds a leaf by grabbing the spinlock, and not letting go of it. On
its
+ * way to the leaf, for each inner node, it grabs the spinlock, increases the
+ * count, and releases the spinlock.
+ *
+ * Leaf levels are recursive, the same CPU can lock them again.
+ *
+ * A cpu holds an inner node in exclusive mode by busy-waiting until the count
+ * is zero, grabbing the spinlock, and not letting go of it.
+ *
+ * Unlocks work by releasing the current spinlock, and working your way up:
+ * grab spinlock, decrease count, release.
+ *
+ * No locker can be preempted. For that reason, there are no atomic promotions:
+ * you would end up with promoters deadlocking on their way up the tree.
+ *
+ * Today, there are effectively two levels: the global lock (an inner node),
and
+ * 2M locks, leaf locks for contiguous, aligned, 2M extents (akin to
superpages).
+ *
+ * The global level can be held exclusively for big hammer operations such as
+ * log dirty (re)set.
+ *
+ * For non-global locking, the global lock is grabbed non-exclusively. At each
+ * 1G boundary we allocate, if we hadn't before, the corresponding set of 512
+ * 2M locks. Allocation of 2M locks is itself protected by a regular
+ * spinlock (this is rare enough). Allocation functions on-demand because
+ * we can't really know a priori the "total" size of the p2m.
+ *
+ * It is expected that every query or modification to the p2m will lock the
+ * appropriate range. Leafs are recurisve for this reason: commonly you query
a
+ * range and then you modify it.
+ *
+ * Conversely, all callers of queries and modifications, once done, need to
undo
+ * their locking.
+ *
+ * Because we mimic the page table structure of a 512-radix tree, we run into
+ * space considerations with the spinlocks in this tree. So we need to be
careful
+ * about space.
+ *
+ * For 32bit code, we currently bail out and default to one big lock. Sorry
Atom :(
+ *
+ * Also note that the p2m tree of locks is included in the ordering constraints
+ * enforced by mm-locks.h. It is treated as an "external" lock in that code.
+ *
+ */
+
+#define P2M_ORDER_GLOBAL ~0U
+
+/* The 32 bit case serves as a concise summary of the external API */
+#ifndef __x86_64__
+/* For 32 bits we default to one big lock */
+typedef struct __p2m_lock {
+ mm_lock_t lock;
+} p2m_lock_t;
+
+static inline int p2m_lock_init(struct p2m_domain *p2m)
+{
+ p2m_lock_t *p2ml = xmalloc(p2m_lock_t);
+ if ( !p2ml )
+ return -ENOMEM;
+ mm_lock_init(&p2ml->lock);
+ p2m->lock = p2ml;
+ return 0;
+}
+
+static inline void get_p2m(struct p2m_domain *p2m, unsigned long gfn, unsigned
int order)
+{
+ _p2m_lock(p2m->lock);
+}
+
+static inline void put_p2m(struct p2m_domain *p2m, unsigned long gfn, unsigned
int order)
+{
+ _p2m_unlock(p2m->lock);
+}
+
+static inline void p2m_lock_destroy(struct p2m_domain *p2m)
+{
+ xfree(p2m->lock);
+ p2m->lock = NULL;
+}
+
+/* Backwards compatiblity */
+#define p2m_lock(p) _p2m_lock((p)->lock)
+#define p2m_lock_recursive(p) _p2m_lock_recursive((p)->lock)
+#define p2m_locked_by_me(p) _p2m_locked_by_me((p)->lock)
+#define p2m_unlock(p) _p2m_unlock((p)->lock)
+
+#else /* __x86_64__ */
+/* If we were to have inner locks (say 1G locks, then the space considerations
+ * outlined below for leaf locks would also apply here. */
+typedef struct p2m_inner_lock {
+ spinlock_t lock;
+ atomic_t count;
+} p2m_inner_lock_t;
+
+static inline void init_p2m_inner_lock(p2m_inner_lock_t *inner)
+{
+ spin_lock_init(&inner->lock);
+ _atomic_set(inner->count, 0);
+}
+
+/* We cannot risk reusing the code in common/spinlock.c, because it may
+ * have been compiled with LOCK_DEBUG or LOCK_PROFILE. This is unfortunate. */
+static inline void lock_p2m_inner(p2m_inner_lock_t *inner)
+{
+ spin_lock(&inner->lock);
+}
+
+static inline void unlock_p2m_inner(p2m_inner_lock_t *inner)
+{
+ spin_unlock(&inner->lock);
+}
+
+static inline void get_p2m_inner(p2m_inner_lock_t *inner)
+{
+ lock_p2m_inner(inner);
+ atomic_inc(&inner->count);
+ unlock_p2m_inner(inner);
+}
+
+static inline void put_p2m_inner(p2m_inner_lock_t *inner)
+{
+ lock_p2m_inner(inner);
+ atomic_dec(&inner->count);
+ unlock_p2m_inner(inner);
+}
+
+/* XXX Consider starvation here */
+static inline void get_p2m_inner_exclusive(p2m_inner_lock_t *inner)
+{
+ int count;
+retry:
+ while (1)
+ {
+ mb();
+ count = atomic_read(&inner->count);
+ if ( count == 0 )
+ break;
+ cpu_relax();
+ }
+
+ spin_lock(&inner->lock);
+ mb();
+ count = atomic_read(&inner->count);
+ if ( count )
+ {
+ spin_unlock(&inner->lock);
+ goto retry;
+ }
+ /* We leave holding the spinlock */
+}
+
+static inline void put_p2m_inner_exclusive(p2m_inner_lock_t *inner)
+{
+ spin_unlock(&inner->lock);
+}
+
+/* Because we operate under page-table sizing constraints, we need to be
+ * extremely conscious about the space we're taking up. So we become somewhat
+ * re-inventers of the wheel, and we disable many things. */
+typedef struct p2m_leaf_lock {
+ raw_spinlock_t raw;
+ u16 recurse_cpu:12;
+ u16 recurse_cnt:4;
+/* Padding to confine each inner lock to its own word */
+#define LEAF_PAD 4
+ uint8_t pad[LEAF_PAD];
+} __attribute__((packed)) p2m_leaf_lock_t;
+
+/* BUILD_BUG_ON(sizeof(p2m_leaf_lock_t) != sizeof(unsigned long)); */
+
+static inline void init_p2m_leaf_lock(p2m_leaf_lock_t *lock)
+{
+ *lock = (p2m_leaf_lock_t) { _RAW_SPIN_LOCK_UNLOCKED, 0xfffu, 0, { } };
+}
+
+static inline int __p2m_spin_trylock_recursive(p2m_leaf_lock_t *lock)
+{
+ int cpu = smp_processor_id();
+
+ if ( likely(lock->recurse_cpu != cpu) )
+ {
+ if ( !_raw_spin_trylock(&lock->raw) )
+ return 0;
+ preempt_disable();
+ lock->recurse_cpu = cpu;
+ }
+
+ lock->recurse_cnt++;
+ return 1;
+}
+
+static inline void lock_p2m_leaf(p2m_leaf_lock_t *lock)
+{
+ while ( !__p2m_spin_trylock_recursive(lock) )
+ cpu_relax();
+}
+
+static inline void unlock_p2m_leaf(p2m_leaf_lock_t *lock)
+{
+ if ( likely(--lock->recurse_cnt == 0) )
+ {
+ lock->recurse_cpu = 0xfffu;
+ preempt_enable();
+ _raw_spin_unlock(&lock->raw);
+ }
+}
+
+/* Deadlock book-keeping, see below */
+#define MAX_LOCK_DEPTH 16
+
+/* The lock structure */
+typedef struct __p2m_lock
+{
+ /* To enforce ordering in mm-locks */
+ int unlock_level;
+ /* To protect on-demand allocation of locks
+ * (yeah you heard that right) */
+ spinlock_t alloc_lock;
+ /* Global lock */
+ p2m_inner_lock_t global;
+ /* 2M locks. Allocate on demand: fun */
+ p2m_leaf_lock_t **locks_2m;
+ /* Book-keeping for deadlock detection. Could be a per-cpu. */
+ unsigned long deadlock_guard[NR_CPUS][MAX_LOCK_DEPTH + 1];
+ uint8_t lock_depth[NR_CPUS];
+ /* Is anybody holding this exclusively */
+ unsigned int exclusive_holder;
+ /* Order of pages allocates for first level of locks_2m */
+ uint8_t order;
+} p2m_lock_t;
+
+#define EXCLUSIVE_CPU_NULL ~0U
+
+/* Some deadlock book-keeping. Say CPU A holds a lock on range A, CPU B holds
a
+ * lock on range B. Now, CPU A wants to lock range B and vice-versa. Deadlock.
+ * We detect this by remembering the start of the current locked range.
+ * We keep a fairly small stack of guards (8), because we don't anticipate
+ * a great deal of recursive locking because (a) recursive locking is rare
+ * (b) it is evil (c) only PoD seems to do it (is PoD therefore evil?) */
+
+#define DEADLOCK_NULL ~0UL
+
+#define CURRENT_GUARD(l) ((l)->deadlock_guard[current->processor] \
+ [(l)->lock_depth[current->processor]])
+
+#define DEADLOCK_CHECK(cond, action, _f, _a...) \
+do { \
+ if ( (cond) ) \
+ { \
+ printk(_f, ##_a); \
+ action; \
+ } \
+} while(0)
+
+static inline void push_guard(p2m_lock_t *p2ml, unsigned long gfn)
+{
+ int cpu = current->processor;
+
+ DEADLOCK_CHECK(((p2ml->lock_depth[cpu] + 1) > MAX_LOCK_DEPTH),
+ BUG(), "CPU %u exceeded deadlock depth\n", cpu);
+
+ p2ml->lock_depth[cpu]++;
+ p2ml->deadlock_guard[cpu][p2ml->lock_depth[cpu]] = gfn;
+}
+
+static inline void pop_guard(p2m_lock_t *p2ml)
+{
+ int cpu = current->processor;
+
+ DEADLOCK_CHECK((!p2ml->lock_depth[cpu] == 0), BUG(),
+ "CPU %u underflow deadlock depth\n", cpu);
+
+ p2ml->lock_depth[cpu]--;
+}
+
+static inline int p2m_lock_init(struct p2m_domain *p2m)
+{
+ unsigned int i;
+ p2m_lock_t *p2ml;
+
+ p2ml = xmalloc(p2m_lock_t);
+ if ( !p2ml )
+ return -ENOMEM;
+
+ memset(p2ml, 0, sizeof(p2m_lock_t));
+
+ spin_lock_init(&p2ml->alloc_lock);
+ init_p2m_inner_lock(&p2ml->global);
+
+ p2ml->locks_2m = alloc_xenheap_page();
+ if ( !p2ml->locks_2m )
+ {
+ xfree(p2ml);
+ return -ENOMEM;
+ }
+ memset(p2ml->locks_2m, 0, PAGE_SIZE);
+
+ for (i = 0; i < NR_CPUS; i++)
+ p2ml->deadlock_guard[i][0] = DEADLOCK_NULL;
+
+ p2ml->exclusive_holder = EXCLUSIVE_CPU_NULL;
+
+ p2m->lock = p2ml;
+ return 0;
+}
+
+/* Conversion macros for aligned boundaries */
+#define gfn_to_superpage(g, o) (((g) & (~((1 << (o)) - 1))) >> (o))
+#define gfn_to_1g_sp(gfn) gfn_to_superpage(gfn, PAGE_ORDER_1G)
+#define gfn_to_2m_sp(gfn) gfn_to_superpage(gfn, PAGE_ORDER_2M)
+#define gfn_1g_to_2m(gfn_1g) ((gfn_1g) << (PAGE_ORDER_1G -
PAGE_ORDER_2M))
+#define gfn_1g_to_last_2m(gfn_1g) (gfn_1g_to_2m(gfn_1g) + \
+ ((1 << (PAGE_ORDER_1G -
PAGE_ORDER_2M)) - 1))
+#define gfn_1g_to_4k(gfn_1g) ((gfn_1g) << PAGE_ORDER_1G)
+#define gfn_1g_to_last_4k(gfn_1g) (gfn_1g_to_4k(gfn_1g) + ((1 <<
PAGE_ORDER_1G) - 1))
+
+/* Global lock accessors. Global lock is our only "inner" node. */
+#define p2m_exclusive_locked_by_me(l) \
+ ((l)->lock->exclusive_holder == current->processor)
+
+static inline void get_p2m_global_exclusive(struct p2m_domain *p2m)
+{
+ p2m_lock_t *p2ml = p2m->lock;
+ DEADLOCK_CHECK((CURRENT_GUARD(p2ml) != DEADLOCK_NULL), BUG(),
+ "P2M DEADLOCK: cpu %u prev range start %lx trying
global\n",
+ (unsigned) current->processor, CURRENT_GUARD(p2ml));
+
+ get_p2m_inner_exclusive(&p2ml->global);
+ p2ml->exclusive_holder = current->processor;
+}
+
+static inline void put_p2m_global_exclusive(struct p2m_domain *p2m)
+{
+ p2m_lock_t *p2ml = p2m->lock;
+ p2ml->exclusive_holder = EXCLUSIVE_CPU_NULL;
+ put_p2m_inner_exclusive(&p2ml->global);
+}
+
+/* Not to be confused with shortcut for external use */
+static inline void __get_p2m_global(struct p2m_domain *p2m)
+{
+ get_p2m_inner(&p2m->lock->global);
+}
+
+/* Not to be confused with shortcut for external use */
+static inline void __put_p2m_global(struct p2m_domain *p2m)
+{
+ put_p2m_inner(&p2m->lock->global);
+}
+
+/* 2M lock accessors */
+static inline p2m_leaf_lock_t *__get_2m_lock(p2m_lock_t *p2ml,
+ unsigned long gfn_1g, unsigned long gfn_2m)
+{
+ p2m_leaf_lock_t *lock_2m_l1;
+ BUG_ON(gfn_1g >= (1 << PAGETABLE_ORDER));
+ BUG_ON(gfn_2m >= (1 << PAGETABLE_ORDER));
+ lock_2m_l1 = p2ml->locks_2m[gfn_1g];
+ BUG_ON(lock_2m_l1 == NULL);
+ return (lock_2m_l1 + gfn_2m);
+}
+
+static inline void get_p2m_2m(struct p2m_domain *p2m, unsigned long gfn_1g,
+ unsigned long gfn_2m)
+{
+ lock_p2m_leaf(__get_2m_lock(p2m->lock, gfn_1g, gfn_2m));
+}
+
+static inline void put_p2m_2m(struct p2m_domain *p2m, unsigned long gfn_1g,
+ unsigned long gfn_2m)
+{
+ unlock_p2m_leaf(__get_2m_lock(p2m->lock, gfn_1g, gfn_2m));
+}
+
+/* Allocate 2M locks we may not have allocated yet for this 1G superpage */
+static inline int alloc_locks_2m(struct p2m_domain *p2m, unsigned long gfn_1g)
+{
+ p2m_lock_t *p2ml = p2m->lock;
+
+ /* With a single page for l1, we cover a gfn space of 512GB (39 bits)
+ * Given that current x86_64 processors physically address 40 bits,
+ * we're in no immediate danger of overflowing this table for a domU.
+ * If necessary, the l1 itself can grow subject to proper locking
+ * on the p2ml->alloc_lock */
+
+ /* Quick test for common case */
+ if ( likely(p2ml->locks_2m[gfn_1g] != NULL) )
+ return 0;
+
+ spin_lock(&(p2ml->alloc_lock));
+
+ if ( likely(p2ml->locks_2m[gfn_1g] == NULL) )
+ {
+ unsigned long j;
+ p2m_leaf_lock_t *p = alloc_xenheap_page();
+ if ( !p )
+ {
+ spin_unlock(&(p2ml->alloc_lock));
+ return -ENOMEM;
+ }
+
+ for (j = 0; j < (1 << PAGETABLE_ORDER); j++)
+ init_p2m_leaf_lock(&p[j]);
+
+ p2ml->locks_2m[gfn_1g] = p;
+ }
+
+ spin_unlock(&(p2ml->alloc_lock));
+ return 0;
+}
+
+static inline unsigned long __get_last_gfn(unsigned long gfn, unsigned int
order)
+{
+ /* Underflow */
+ unsigned long last_gfn = gfn + (1 << order) - 1;
+ BUG_ON(last_gfn < gfn);
+ return last_gfn;
+}
+
+static inline void get_p2m(struct p2m_domain *p2m, unsigned long gfn, unsigned
int order)
+{
+ unsigned long last_gfn, first_1g, last_1g, first_2m, last_2m, i, j;
+ p2m_lock_t *p2ml = p2m->lock;
+
+ /* Holders of the p2m in exclusive mode can lock sub ranges. We make that
a no-op.
+ * however, locking exclusively again is considered rude and tasteless. */
+ if ( (p2m_exclusive_locked_by_me(p2m)) && (order != P2M_ORDER_GLOBAL) )
+ return;
+
+ DEADLOCK_CHECK(((CURRENT_GUARD(p2ml) != DEADLOCK_NULL) &&
+ (CURRENT_GUARD(p2ml) > gfn)), WARN(),
+ "P2M DEADLOCK: cpu %d prev range start %lx new range start
%lx",
+ current->processor, CURRENT_GUARD(p2ml), gfn);
+
+ preempt_disable();
+
+ if ( order == P2M_ORDER_GLOBAL ) {
+ get_p2m_global_exclusive(p2m);
+ goto get_p2m_out;
+ }
+
+ __get_p2m_global(p2m);
+ /* We're non-preemptible. We've disallowed global p2m locking. We
+ * will now (allocate and) lock all relevant 2M leafs */
+
+ last_gfn = __get_last_gfn(gfn, order);
+ first_1g = gfn_to_1g_sp(gfn);
+ last_1g = gfn_to_1g_sp(last_gfn);
+
+ for (i = first_1g; i <= last_1g; i++)
+ {
+ first_2m = (gfn_1g_to_4k(i) > gfn) ? gfn_1g_to_2m(i) :
gfn_to_2m_sp(gfn);
+ last_2m = min(gfn_to_2m_sp(last_gfn), gfn_1g_to_last_2m(i));
+
+ if ( alloc_locks_2m(p2m, i) )
+ {
+ /* There really isn't much we can do at this point */
+ panic("Fine-grained p2m locking failed to alloc 2M locks"
+ " for 1G page %lx, domain %hu\n", i, p2m->domain->domain_id);
+ }
+
+ for (j = first_2m; j <= last_2m; j++)
+ {
+ get_p2m_2m(p2m, i, j & ((1 << PAGETABLE_ORDER) - 1));
+ }
+ }
+
+get_p2m_out:
+ push_guard(p2ml, gfn);
+}
+
+/* Conversely to the get method, we unlock all leafs pro-actively here */
+static inline void put_p2m(struct p2m_domain *p2m, unsigned long gfn, unsigned
int order)
+{
+ unsigned long last_gfn, first_1g, last_1g, first_2m, last_2m, i, j;
+ p2m_lock_t *p2ml = p2m->lock;
+
+ last_gfn = __get_last_gfn(gfn, order);
+
+ /* See comment about exclusive holders recursively locking sub-ranges in
get_p2m */
+ if ( (p2m_exclusive_locked_by_me(p2m)) && (order != P2M_ORDER_GLOBAL) )
+ return;
+
+ if ( order == P2M_ORDER_GLOBAL )
+ {
+ put_p2m_global_exclusive(p2m);
+ goto cleanup;
+ }
+
+ first_1g = gfn_to_1g_sp(gfn);
+ last_1g = gfn_to_1g_sp(last_gfn);
+
+ for (i = first_1g; i <= last_1g; i++)
+ {
+ first_2m = (gfn_1g_to_4k(i) > gfn) ? gfn_1g_to_2m(i) :
gfn_to_2m_sp(gfn);
+ last_2m = min(gfn_to_2m_sp(last_gfn), gfn_1g_to_last_2m(i));
+
+ for (j = first_2m; j <= last_2m; j++)
+ {
+ put_p2m_2m(p2m, i, j & ((1 << PAGETABLE_ORDER) - 1));
+ }
+ }
+
+ __put_p2m_global(p2m);
+
+cleanup:
+ pop_guard(p2ml);
+ preempt_enable();
+}
+
+static inline void p2m_lock_destroy(struct p2m_domain *p2m)
+{
+ unsigned int i;
+ p2m_lock_t *p2ml = p2m->lock;
+
+ get_p2m_global_exclusive(p2m);
+
+ for (i = 0; i < (1 << PAGETABLE_ORDER); i++)
+ if ( p2ml->locks_2m[i] )
+ free_xenheap_page(p2ml->locks_2m[i]);
+
+ free_xenheap_page(p2ml->locks_2m);
+
+ put_p2m_global_exclusive(p2m);
+
+ xfree(p2ml);
+ p2m->lock = NULL;
+}
+
+/* Backwards compatibility */
+#define p2m_lock(p) get_p2m((p), 0, P2M_ORDER_GLOBAL)
+#define p2m_unlock(p) put_p2m((p), 0, P2M_ORDER_GLOBAL)
+#define p2m_locked_by_me(p) p2m_exclusive_locked_by_me((p))
+/* There is no backwards compatibility for this, unless we make the
+ * global lock recursive */
+#define p2m_lock_recursive(p) ((void)0)
+
+#endif /* __x86_64__ */
+
+/* Commonly-used shortcus */
+#define get_p2m_global(p2m) get_p2m((p2m), 0, P2M_ORDER_GLOBAL)
+#define put_p2m_global(p2m) put_p2m((p2m), 0, P2M_ORDER_GLOBAL)
+
+#define get_p2m_gfn(p2m, gfn) get_p2m((p2m), (gfn), 0)
+#define put_p2m_gfn(p2m, gfn) put_p2m((p2m), (gfn), 0)
+
+#endif /* _XEN_P2M_LOCK_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 981073d78f7f -r a23e1262b124 xen/arch/x86/mm/p2m-pod.c
--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -34,6 +34,7 @@
#include <asm/hvm/svm/amd-iommu-proto.h>
#include "mm-locks.h"
+#include "p2m-lock.h"
/* Override macros from asm/page.h to make them work with mfn_t */
#undef mfn_to_page
diff -r 981073d78f7f -r a23e1262b124 xen/arch/x86/mm/p2m-pt.c
--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -39,6 +39,7 @@
#include <asm/hvm/svm/amd-iommu-proto.h>
#include "mm-locks.h"
+#include "p2m-lock.h"
/* Override macros from asm/page.h to make them work with mfn_t */
#undef mfn_to_page
diff -r 981073d78f7f -r a23e1262b124 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -38,6 +38,7 @@
#include <asm/hvm/svm/amd-iommu-proto.h>
#include "mm-locks.h"
+#include "p2m-lock.h"
/* turn on/off 1GB host page table support for hap, default on */
static bool_t __read_mostly opt_hap_1gb = 1;
@@ -69,9 +70,12 @@ boolean_param("hap_2mb", opt_hap_2mb);
/* Init the datastructures for later use by the p2m code */
-static void p2m_initialise(struct domain *d, struct p2m_domain *p2m)
+static int p2m_initialise(struct domain *d, struct p2m_domain *p2m)
{
- mm_lock_init(&p2m->lock);
+ if (p2m_lock_init(p2m))
+ {
+ return -ENOMEM;
+ }
mm_lock_init(&p2m->pod.lock);
INIT_LIST_HEAD(&p2m->np2m_list);
INIT_PAGE_LIST_HEAD(&p2m->pages);
@@ -89,7 +93,7 @@ static void p2m_initialise(struct domain
else
p2m_pt_init(p2m);
- return;
+ return 0;
}
static int
@@ -103,7 +107,11 @@ p2m_init_nestedp2m(struct domain *d)
d->arch.nested_p2m[i] = p2m = xzalloc(struct p2m_domain);
if (p2m == NULL)
return -ENOMEM;
- p2m_initialise(d, p2m);
+ if (p2m_initialise(d, p2m))
+ {
+ xfree(p2m);
+ return -ENOMEM;
+ }
p2m->write_p2m_entry = nestedp2m_write_p2m_entry;
list_add(&p2m->np2m_list, &p2m_get_hostp2m(d)->np2m_list);
}
@@ -118,7 +126,11 @@ int p2m_init(struct domain *d)
p2m_get_hostp2m(d) = p2m = xzalloc(struct p2m_domain);
if ( p2m == NULL )
return -ENOMEM;
- p2m_initialise(d, p2m);
+ if (p2m_initialise(d, p2m))
+ {
+ xfree(p2m);
+ return -ENOMEM;
+ }
/* Must initialise nestedp2m unconditionally
* since nestedhvm_enabled(d) returns false here.
@@ -331,6 +343,7 @@ static void p2m_teardown_nestedp2m(struc
uint8_t i;
for (i = 0; i < MAX_NESTEDP2M; i++) {
+ p2m_lock_destroy(d->arch.nested_p2m[i]);
xfree(d->arch.nested_p2m[i]);
d->arch.nested_p2m[i] = NULL;
}
@@ -338,6 +351,7 @@ static void p2m_teardown_nestedp2m(struc
void p2m_final_teardown(struct domain *d)
{
+ p2m_lock_destroy(d->arch.p2m);
/* Iterate over all p2m tables per domain */
xfree(d->arch.p2m);
d->arch.p2m = NULL;
diff -r 981073d78f7f -r a23e1262b124 xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -187,9 +187,10 @@ typedef enum {
#define p2m_is_broken(_t) (p2m_to_mask(_t) & P2M_BROKEN_TYPES)
/* Per-p2m-table state */
+struct __p2m_lock;
struct p2m_domain {
/* Lock that protects updates to the p2m */
- mm_lock_t lock;
+ struct __p2m_lock *lock;
/* Shadow translated domain: p2m mapping */
pagetable_t phys_table;
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|