This doesn't include any of the new features, like copy grants or
transitive grants, but it does include most of the V2 infrastructure.
Signed-off-by: Steven Smith <steven.smith@xxxxxxxxxx>
---
arch/x86/xen/grant-table.c | 38 +++++++-
drivers/xen/grant-table.c | 177 +++++++++++++++++++++++++++++------
include/xen/grant_table.h | 8 +-
include/xen/interface/grant_table.h | 125 +++++++++++++++++++++++-
4 files changed, 308 insertions(+), 40 deletions(-)
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 49ba9b5..77af9e9 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -54,6 +54,16 @@ static int map_pte_fn(pte_t *pte, struct page *pmd_page,
return 0;
}
+static int map_pte_fn_status(pte_t *pte, struct page *pmd_page,
+ unsigned long addr, void *data)
+{
+ uint64_t **frames = (uint64_t **)data;
+
+ set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
+ (*frames)++;
+ return 0;
+}
+
static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
unsigned long addr, void *data)
{
@@ -64,10 +74,10 @@ static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
unsigned long max_nr_gframes,
- struct grant_entry **__shared)
+ void **__shared)
{
int rc;
- struct grant_entry *shared = *__shared;
+ void *shared = *__shared;
if (shared == NULL) {
struct vm_struct *area =
@@ -83,8 +93,28 @@ int arch_gnttab_map_shared(unsigned long *frames, unsigned
long nr_gframes,
return rc;
}
-void arch_gnttab_unmap_shared(struct grant_entry *shared,
- unsigned long nr_gframes)
+int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
+ unsigned long max_nr_gframes,
+ grant_status_t **__shared)
+{
+ int rc;
+ grant_status_t *shared = *__shared;
+
+ if (shared == NULL) {
+ struct vm_struct *area =
+ xen_alloc_vm_area(PAGE_SIZE * max_nr_gframes);
+ BUG_ON(area == NULL);
+ shared = area->addr;
+ *__shared = shared;
+ }
+
+ rc = apply_to_page_range(&init_mm, (unsigned long)shared,
+ PAGE_SIZE * nr_gframes,
+ map_pte_fn_status, &frames);
+ return rc;
+}
+
+void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
{
apply_to_page_range(&init_mm, (unsigned long)shared,
PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 52183aa..3ac29e3 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -49,7 +49,10 @@
/* External tools reserve first few grant table entries. */
#define NR_RESERVED_ENTRIES 8
#define GNTTAB_LIST_END 0xffffffff
-#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry))
+#define GREFS_PER_GRANT_FRAME \
+(grant_table_version == 1 ? \
+ (PAGE_SIZE / sizeof(struct grant_entry_v1)) : \
+ (PAGE_SIZE / sizeof(struct grant_entry_v2)))
static void pending_free_timer(unsigned long ignore);
@@ -64,13 +67,22 @@ static DEFINE_TIMER(gnttab_delayed_free_timer,
pending_free_timer, 0, 0);
static DEFINE_SPINLOCK(gnttab_pending_free_lock);
static DEFINE_SPINLOCK(gnttab_list_lock);
-static struct grant_entry *shared;
+static union {
+ struct grant_entry_v1 *v1;
+ struct grant_entry_v2 *v2;
+ void *raw;
+} shared;
+
+static grant_status_t *grstatus;
static struct gnttab_free_callback *gnttab_free_callback_list;
+static int grant_table_version;
+
static int gnttab_expand(unsigned int req_entries);
#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
+#define SPP (PAGE_SIZE / sizeof(grant_status_t))
static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
{
@@ -150,15 +162,22 @@ static void update_grant_entry(grant_ref_t ref, domid_t
domid,
* 1. Write ent->domid.
* 2. Write ent->frame:
* GTF_permit_access: Frame to which access is permitted.
- * GTF_accept_transfer: Pseudo-phys frame slot being filled by new
- * frame, or zero if none.
+ * GTF_accept_transfer: Pseudo-phys frame slot being filled by
+ * new frame, or zero if none.
* 3. Write memory barrier (WMB).
* 4. Write ent->flags, inc. valid type.
*/
- shared[ref].frame = frame;
- shared[ref].domid = domid;
- wmb();
- shared[ref].flags = flags;
+ if (grant_table_version == 1) {
+ shared.v1[ref].frame = frame;
+ shared.v1[ref].domid = domid;
+ wmb();
+ shared.v1[ref].flags = flags;
+ } else {
+ shared.v2[ref].frame = frame;
+ shared.v2[ref].hdr.domid = domid;
+ wmb();
+ shared.v2[ref].hdr.flags = GTF_permit_access | flags;
+ }
}
/*
@@ -191,7 +210,10 @@ int gnttab_query_foreign_access(grant_ref_t ref)
{
u16 nflags;
- nflags = shared[ref].flags;
+ if (grant_table_version == 1)
+ nflags = shared.v1[ref].flags;
+ else
+ nflags = grstatus[ref];
return (nflags & (GTF_reading|GTF_writing));
}
@@ -200,13 +222,37 @@ EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
static int _gnttab_end_foreign_access_ref(grant_ref_t ref)
{
u16 flags, nflags;
-
- nflags = shared[ref].flags;
- do {
- flags = nflags;
- if (flags & (GTF_reading|GTF_writing))
+ u16 *pflags;
+
+ if (grant_table_version == 1) {
+ pflags = &shared.v1[ref].flags;
+ nflags = *pflags;
+ do {
+ flags = nflags;
+ if (flags & (GTF_reading|GTF_writing))
+ return 0;
+ nflags = sync_cmpxchg(&shared.v1[ref].flags, flags,
+ 0);
+ } while (nflags != flags);
+ } else {
+ shared.v2[ref].hdr.flags = 0;
+ mb();
+ if (grstatus[ref] & (GTF_reading|GTF_writing)) {
return 0;
- } while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) !=
flags);
+ } else {
+ /* The read of grstatus needs to have acquire
+ semantics. On x86, reads already have
+ that, and we just need to protect against
+ compiler reorderings. On other
+ architectures we may need a full
+ barrier. */
+#ifdef CONFIG_X86
+ barrier();
+#else
+ mb();
+#endif
+ }
+ }
return 1;
}
@@ -333,25 +379,34 @@ unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t
ref)
{
unsigned long frame;
u16 flags;
+ u16 *pflags;
+
+ if (grant_table_version == 1)
+ pflags = &shared.v1[ref].flags;
+ else
+ pflags = &shared.v2[ref].hdr.flags;
/*
* If a transfer is not even yet started, try to reclaim the grant
* reference and return failure (== 0).
*/
- while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
- if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags)
+ while (!((flags = *pflags) & GTF_transfer_committed)) {
+ if (sync_cmpxchg(pflags, flags, 0) == flags)
return 0;
cpu_relax();
}
/* If a transfer is in progress then wait until it is completed. */
while (!(flags & GTF_transfer_completed)) {
- flags = shared[ref].flags;
+ flags = *pflags;
cpu_relax();
}
rmb(); /* Read the frame number /after/ reading completion status. */
- frame = shared[ref].frame;
+ if (grant_table_version == 1)
+ frame = shared.v1[ref].frame;
+ else
+ frame = shared.v2[ref].frame;
BUG_ON(frame == 0);
return frame;
@@ -525,34 +580,98 @@ static inline unsigned int max_nr_grant_frames(void)
return xen_max;
}
+static unsigned nr_status_frames(unsigned nr_grant_frames)
+{
+ return (nr_grant_frames * GREFS_PER_GRANT_FRAME + SPP - 1) / SPP;
+}
+
+static void gnttab_request_version(void)
+{
+ int rc;
+ struct gnttab_set_version gsv;
+
+ gsv.version = 2;
+ rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
+ if (rc == 0) {
+ grant_table_version = 2;
+ printk(KERN_NOTICE "Using V2 grant tables.\n");
+ } else {
+ if (grant_table_version == 2) {
+ /* If we've already used version 2 features,
+ but then suddenly discover that they're not
+ available (e.g. migrating to an older
+ version of Xen), almost unbounded badness
+ can happen. */
+ panic("we need grant tables version 2, but only version
1 is available");
+ }
+ grant_table_version = 1;
+ printk(KERN_WARNING "Using legacy V1 grant tables; upgrade to a
newer version of Xen.\n");
+ }
+}
+
static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
{
struct gnttab_setup_table setup;
- unsigned long *frames;
+ unsigned long *gframes;
+ uint64_t *sframes;
unsigned int nr_gframes = end_idx + 1;
+ unsigned int nr_sframes;
int rc;
- frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
- if (!frames)
+ gframes = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
+ if (!gframes)
return -ENOMEM;
setup.dom = DOMID_SELF;
setup.nr_frames = nr_gframes;
- set_xen_guest_handle(setup.frame_list, frames);
+ set_xen_guest_handle(setup.frame_list, gframes);
rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
if (rc == -ENOSYS) {
- kfree(frames);
+ kfree(gframes);
return -ENOSYS;
}
BUG_ON(rc || setup.status);
- rc = arch_gnttab_map_shared(frames, nr_gframes, max_nr_grant_frames(),
- &shared);
+ if (grant_table_version > 1) {
+ struct gnttab_get_status_frames getframes;
+
+ nr_sframes = nr_status_frames(nr_gframes);
+
+ sframes = kmalloc(nr_sframes * sizeof(uint64_t),
+ GFP_ATOMIC);
+ if (!sframes) {
+ kfree(gframes);
+ return -ENOMEM;
+ }
+ getframes.dom = DOMID_SELF;
+ getframes.nr_frames = nr_sframes;
+ getframes.frame_list = (unsigned long)sframes;
+
+ rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames,
+ &getframes, 1);
+ if (rc == -ENOSYS) {
+ kfree(gframes);
+ kfree(sframes);
+ return -ENOSYS;
+ }
+
+ BUG_ON(rc || getframes.status);
+
+ rc = arch_gnttab_map_status(
+ sframes, nr_sframes,
+ nr_status_frames(max_nr_grant_frames()),
+ &grstatus);
+ BUG_ON(rc);
+ kfree(sframes);
+ }
+
+ rc = arch_gnttab_map_shared(gframes, nr_gframes, max_nr_grant_frames(),
+ &shared.raw);
BUG_ON(rc);
- kfree(frames);
+ kfree(gframes);
return 0;
}
@@ -663,6 +782,7 @@ EXPORT_SYMBOL_GPL(gnttab_reset_grant_page);
int gnttab_resume(void)
{
+ gnttab_request_version();
if (max_nr_grant_frames() < nr_grant_frames)
return -ENOSYS;
return gnttab_map(0, nr_grant_frames - 1);
@@ -670,7 +790,8 @@ int gnttab_resume(void)
int gnttab_suspend(void)
{
- arch_gnttab_unmap_shared(shared, nr_grant_frames);
+ arch_gnttab_unmap(shared.raw, nr_grant_frames);
+ arch_gnttab_unmap(grstatus, nr_status_frames(nr_grant_frames));
return 0;
}
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index b89ee8a..1ebfbd9 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -149,9 +149,11 @@ gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap,
unsigned long addr,
int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
unsigned long max_nr_gframes,
- struct grant_entry **__shared);
-void arch_gnttab_unmap_shared(struct grant_entry *shared,
- unsigned long nr_gframes);
+ void **__shared);
+int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
+ unsigned long max_nr_gframes,
+ grant_status_t **__shared);
+void arch_gnttab_unmap(void *shared, unsigned long nr_gframes);
#define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
diff --git a/include/xen/interface/grant_table.h
b/include/xen/interface/grant_table.h
index 8211af8..653f8c7 100644
--- a/include/xen/interface/grant_table.h
+++ b/include/xen/interface/grant_table.h
@@ -84,12 +84,22 @@
*/
/*
+ * Reference to a grant entry in a specified domain's grant table.
+ */
+typedef uint32_t grant_ref_t;
+
+/*
* A grant table comprises a packed array of grant entries in one or more
* page frames shared between Xen and a guest.
* [XEN]: This field is written by Xen and read by the sharing guest.
* [GST]: This field is written by the guest and read by Xen.
*/
-struct grant_entry {
+
+/*
+ * Version 1 of the grant table entry structure is maintained purely
+ * for backwards compatibility. New guests should use version 2.
+ */
+struct grant_entry_v1 {
/* GTF_xxx: various type and flag information. [XEN,GST] */
uint16_t flags;
/* The domain being granted foreign privileges. [GST] */
@@ -107,10 +117,13 @@ struct grant_entry {
* GTF_permit_access: Allow @domid to map/access @frame.
* GTF_accept_transfer: Allow @domid to transfer ownership of one page frame
* to this guest. Xen writes the page number to @frame.
+ * GTF_transitive: Allow @domid to transitively access a subrange of
+ * @trans_grant in @trans_domid. No mappings are allowed.
*/
#define GTF_invalid (0U<<0)
#define GTF_permit_access (1U<<0)
#define GTF_accept_transfer (2U<<0)
+#define GTF_transitive (3U<<0)
#define GTF_type_mask (3U<<0)
/*
@@ -118,6 +131,9 @@ struct grant_entry {
* GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST]
* GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN]
* GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN]
+ * GTF_sub_page: Grant access to only a subrange of the page. @domid
+ * will only be allowed to copy from the grant, and not
+ * map it. [GST]
*/
#define _GTF_readonly (2)
#define GTF_readonly (1U<<_GTF_readonly)
@@ -125,6 +141,8 @@ struct grant_entry {
#define GTF_reading (1U<<_GTF_reading)
#define _GTF_writing (4)
#define GTF_writing (1U<<_GTF_writing)
+#define _GTF_sub_page (8)
+#define GTF_sub_page (1U<<_GTF_sub_page)
/*
* Subflags for GTF_accept_transfer:
@@ -141,15 +159,75 @@ struct grant_entry {
#define _GTF_transfer_completed (3)
#define GTF_transfer_completed (1U<<_GTF_transfer_completed)
+/*
+ * Version 2 grant table entries. These fulfil the same role as
+ * version 1 entries, but can represent more complicated operations.
+ * Any given domain will have either a version 1 or a version 2 table,
+ * and every entry in the table will be the same version.
+ *
+ * The interface by which domains use grant references does not depend
+ * on the grant table version in use by the other domain.
+ */
-/***********************************
- * GRANT TABLE QUERIES AND USES
+/*
+ * Version 1 and version 2 grant entries share a common prefix. The
+ * fields of the prefix are documented as part of struct
+ * grant_entry_v1.
*/
+struct grant_entry_header {
+ uint16_t flags;
+ domid_t domid;
+};
+typedef struct grant_entry_header grant_entry_header_t;
/*
- * Reference to a grant entry in a specified domain's grant table.
+ * Version 2 of the grant entry structure.
+ */
+struct grant_entry_v2 {
+ grant_entry_header_t hdr;
+ union {
+ /*
+ * The frame to which we are granting access. This field has
+ * the same meaning as the grant_entry_v1 field of the same
+ * name.
+ */
+ uint32_t frame;
+
+ /*
+ * If the grant type is GTF_grant_access and GTF_sub_page is
+ * set, @domid is allowed to access bytes
+ * [@page_off,@page_off+@length) in frame @frame.
+ */
+ struct {
+ uint32_t frame;
+ uint16_t page_off;
+ uint16_t length;
+ } sub_page;
+
+ /*
+ * If the grant is GTF_transitive, @domid is allowed to use
+ * the grant @gref in domain @trans_domid, as if it was the
+ * local domain. Obviously, the transitive access must be
+ * compatible with the original grant.
+ *
+ * The current version of Xen does not allow transitive grants
+ * to be mapped.
+ */
+ struct {
+ domid_t trans_domid;
+ uint16_t pad0;
+ grant_ref_t gref;
+ } transitive;
+
+ uint32_t __spacer[3]; /* Pad to a power of two */
+ };
+};
+
+typedef uint16_t grant_status_t;
+
+/***********************************
+ * GRANT TABLE QUERIES AND USES
*/
-typedef uint32_t grant_ref_t;
/*
* Handle to track a mapping created via a grant reference.
@@ -343,6 +421,43 @@ struct gnttab_unmap_and_replace {
DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_and_replace);
/*
+ * GNTTABOP_set_version: Request a particular version of the grant
+ * table shared table structure. This operation can only be performed
+ * once in any given domain. It must be performed before any grants
+ * are activated; otherwise, the domain will be stuck with version 1.
+ * The only defined versions are 1 and 2.
+ */
+#define GNTTABOP_set_version 8
+struct gnttab_set_version {
+ /* IN parameters */
+ uint32_t version;
+};
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_set_version);
+
+/*
+ * GNTTABOP_get_status_frames: Get the list of frames used to store grant
+ * status for <dom>. In grant format version 2, the status is separated
+ * from the other shared grant fields to allow more efficient synchronization
+ * using barriers instead of atomic cmpexch operations.
+ * <nr_frames> specify the size of vector <frame_list>.
+ * The frame addresses are returned in the <frame_list>.
+ * Only <nr_frames> addresses are returned, even if the table is larger.
+ * NOTES:
+ * 1. <dom> may be specified as DOMID_SELF.
+ * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+ */
+#define GNTTABOP_get_status_frames 9
+struct gnttab_get_status_frames {
+ /* IN parameters. */
+ uint32_t nr_frames;
+ domid_t dom;
+ /* OUT parameters. */
+ int16_t status; /* GNTST_* */
+ uint64_t frame_list;
+};
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_status_frames);
+
+/*
* Bitfield values for update_pin_status.flags.
*/
/* Map the grant entry for access by I/O devices. */
--
1.6.3.1
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|