[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2 3/3] x86/ioreq: Extend ioreq server to support multiple ioreq pages



A single shared ioreq page provides PAGE_SIZE/sizeof(ioreq_t) = 128
slots, limiting HVM guests to 128 vCPUs. To support more vCPUs, extend
the ioreq server to use xvzalloc_array() for allocating a contiguous
virtual array of ioreq_t slots sized to d->max_vcpus, backed by
potentially non-contiguous physical pages.

For the GFN-mapped path (x86), individual pages are mapped via
prepare_ring_for_helper() and then combined into a single contiguous
VA using vmap(). The number of ioreq pages is computed at runtime
via nr_ioreq_pages(d) = DIV_ROUND_UP(d->max_vcpus, IOREQS_PER_PAGE),
so small VMs only allocate one page. All existing single-page paths
(bufioreq, legacy clients) remain unchanged.

Mark the now-unused shared_iopage_t in the public header as deprecated.

Signed-off-by: Julian Vetter <julian.vetter@xxxxxxxxxx>
---
Changes in v2
- Use xvalloc_array to allocate the contigeous region
- Removed unncessary includes
- nr_ioreq_pages is now based on d->max_vcpus and not the HVM_MAX_VCPUS
  define
- Reduced indentation by 1 level in hvm_alloc_ioreq_gfns
- Added blank lines between declarations and statements
- Added comment why we can just return in hvm_add_ioreq_gfn without
  rollback
---
 xen/arch/x86/hvm/ioreq.c       | 198 ++++++++++++++++++++++++++++++++-
 xen/common/ioreq.c             |  95 ++++++++++++----
 xen/include/public/hvm/ioreq.h |   5 +
 xen/include/xen/ioreq.h        |  13 ++-
 4 files changed, 285 insertions(+), 26 deletions(-)

diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c
index 5ebc48dbd4..a77f00dd96 100644
--- a/xen/arch/x86/hvm/ioreq.c
+++ b/xen/arch/x86/hvm/ioreq.c
@@ -6,6 +6,7 @@
  */
 
 #include <xen/domain.h>
+#include <xen/domain_page.h>
 #include <xen/event.h>
 #include <xen/init.h>
 #include <xen/ioreq.h>
@@ -15,6 +16,7 @@
 #include <xen/sched.h>
 #include <xen/softirq.h>
 #include <xen/trace.h>
+#include <xen/vmap.h>
 #include <xen/vpci.h>
 
 #include <asm/hvm/emulate.h>
@@ -89,6 +91,39 @@ static gfn_t hvm_alloc_ioreq_gfn(struct ioreq_server *s)
     return hvm_alloc_legacy_ioreq_gfn(s);
 }
 
+static gfn_t hvm_alloc_ioreq_gfns(struct ioreq_server *s,
+                                  unsigned int nr_pages)
+{
+    struct domain *d = s->target;
+    unsigned long mask;
+    unsigned int i, run;
+
+    if ( nr_pages == 1 )
+        return hvm_alloc_ioreq_gfn(s);
+
+    /* Find nr_pages consecutive set bits */
+    mask = d->arch.hvm.ioreq_gfn.mask;
+
+    for ( i = 0, run = 0; i < BITS_PER_LONG; i++ )
+    {
+        if ( !test_bit(i, &mask) )
+            run = 0;
+        else if ( ++run == nr_pages )
+        {
+            /* Found a run - clear all bits and return base GFN */
+            unsigned int start = i - nr_pages + 1;
+            unsigned int j;
+
+            for ( j = start; j <= i; j++ )
+                clear_bit(j, &d->arch.hvm.ioreq_gfn.mask);
+
+            return _gfn(d->arch.hvm.ioreq_gfn.base + start);
+        }
+    }
+
+    return INVALID_GFN;
+}
+
 static bool hvm_free_legacy_ioreq_gfn(struct ioreq_server *s,
                                       gfn_t gfn)
 {
@@ -121,11 +156,23 @@ static void hvm_free_ioreq_gfn(struct ioreq_server *s, 
gfn_t gfn)
     }
 }
 
+static void hvm_free_ioreq_gfns(struct ioreq_server *s, gfn_t gfn,
+                                unsigned int nr_pages)
+{
+    unsigned int i;
+
+    for ( i = 0; i < nr_pages; i++ )
+        hvm_free_ioreq_gfn(s, gfn_add(gfn, i));
+}
+
 static void hvm_unmap_ioreq_gfn(struct ioreq_server *s, bool buf)
 {
-    struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq;
+    unsigned int i, nr_pages;
 
+    if ( buf )
     {
+        struct ioreq_page *iorp = &s->bufioreq;
+
         if ( gfn_eq(iorp->gfn, INVALID_GFN) )
             return;
 
@@ -134,16 +181,41 @@ static void hvm_unmap_ioreq_gfn(struct ioreq_server *s, 
bool buf)
 
         hvm_free_ioreq_gfn(s, iorp->gfn);
         iorp->gfn = INVALID_GFN;
+        return;
+    }
+
+    if ( gfn_eq(s->ioreq_gfn, INVALID_GFN) )
+        return;
+
+    nr_pages = nr_ioreq_pages(s->target);
+
+    for ( i = 0; i < nr_pages; i++ )
+    {
+        struct page_info *pg = vmap_to_page((char *)s->ioreq +
+                                            i * PAGE_SIZE);
+
+        put_page_and_type(pg);
+        put_page(pg);
     }
+    vunmap(s->ioreq);
+    s->ioreq = NULL;
+
+    hvm_free_ioreq_gfns(s, s->ioreq_gfn, nr_pages);
+    s->ioreq_gfn = INVALID_GFN;
 }
 
 static int hvm_map_ioreq_gfn(struct ioreq_server *s, bool buf)
 {
     struct domain *d = s->target;
-    struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq;
+    unsigned int i, nr_pages;
+    gfn_t base_gfn;
+    mfn_t *mfns;
     int rc;
 
+    if ( buf )
     {
+        struct ioreq_page *iorp = &s->bufioreq;
+
         if ( iorp->page )
         {
             /*
@@ -173,30 +245,122 @@ static int hvm_map_ioreq_gfn(struct ioreq_server *s, 
bool buf)
 
         return rc;
     }
+
+    /* ioreq: multi-page with contiguous VA */
+    if ( s->ioreq )
+    {
+        if ( gfn_eq(s->ioreq_gfn, INVALID_GFN) )
+            return -EPERM;
+        return 0;
+    }
+
+    if ( d->is_dying )
+        return -EINVAL;
+
+    nr_pages = nr_ioreq_pages(d);
+    base_gfn = hvm_alloc_ioreq_gfns(s, nr_pages);
+
+    if ( gfn_eq(base_gfn, INVALID_GFN) )
+        return -ENOMEM;
+
+    mfns = xmalloc_array(mfn_t, nr_pages);
+    if ( !mfns )
+    {
+        hvm_free_ioreq_gfns(s, base_gfn, nr_pages);
+        return -ENOMEM;
+    }
+
+    /*
+     * Use prepare_ring_for_helper() to obtain page and type references
+     * for each GFN. Discard its per-page VA immediately, as all pages
+     * will be combined into a single contiguous VA via vmap() below.
+     */
+    for ( i = 0; i < nr_pages; i++ )
+    {
+        struct page_info *pg;
+        void *va;
+
+        rc = prepare_ring_for_helper(d, gfn_x(base_gfn) + i, &pg, &va);
+        if ( rc )
+            goto fail;
+
+        /* Discard per-page VA */
+        unmap_domain_page_global(va);
+        mfns[i] = page_to_mfn(pg);
+    }
+
+    /* Map all mfns as single contiguous VA */
+    s->ioreq = vmap(mfns, nr_pages);
+    if ( !s->ioreq )
+    {
+        rc = -ENOMEM;
+        goto fail;
+    }
+
+    s->ioreq_gfn = base_gfn;
+    xfree(mfns);
+
+    return 0;
+
+ fail:
+    while ( i-- > 0 )
+    {
+        struct page_info *pg = mfn_to_page(mfns[i]);
+
+        put_page_and_type(pg);
+        put_page(pg);
+    }
+    hvm_free_ioreq_gfns(s, base_gfn, nr_pages);
+    xfree(mfns);
+
+    return rc;
 }
 
 static void hvm_remove_ioreq_gfn(struct ioreq_server *s, bool buf)
 {
     struct domain *d = s->target;
-    struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq;
+    unsigned int i, nr_pages;
 
+    if ( buf )
     {
+        struct ioreq_page *iorp = &s->bufioreq;
+
         if ( gfn_eq(iorp->gfn, INVALID_GFN) )
             return;
 
         if ( p2m_remove_page(d, iorp->gfn, page_to_mfn(iorp->page), 0) )
             domain_crash(d);
         clear_page(iorp->va);
+        return;
+    }
+
+    if ( gfn_eq(s->ioreq_gfn, INVALID_GFN) )
+        return;
+
+    nr_pages = nr_ioreq_pages(d);
+
+    for ( i = 0; i < nr_pages; i++ )
+    {
+        gfn_t gfn = gfn_add(s->ioreq_gfn, i);
+        struct page_info *pg = vmap_to_page((char *)s->ioreq +
+                                            i * PAGE_SIZE);
+
+        if ( p2m_remove_page(d, gfn, page_to_mfn(pg), 0) )
+            domain_crash(d);
     }
+    memset(s->ioreq, 0, nr_pages * PAGE_SIZE);
 }
 
 static int hvm_add_ioreq_gfn(struct ioreq_server *s, bool buf)
 {
     struct domain *d = s->target;
-    struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq;
+    unsigned int i, nr_pages;
     int rc;
 
+    if ( buf )
     {
+        struct ioreq_page *iorp = &s->bufioreq;
+
         if ( gfn_eq(iorp->gfn, INVALID_GFN) )
             return 0;
 
@@ -208,6 +372,32 @@ static int hvm_add_ioreq_gfn(struct ioreq_server *s, bool 
buf)
 
         return rc;
     }
+
+    if ( gfn_eq(s->ioreq_gfn, INVALID_GFN) )
+        return 0;
+
+    nr_pages = nr_ioreq_pages(d);
+    memset(s->ioreq, 0, nr_pages * PAGE_SIZE);
+
+    for ( i = 0; i < nr_pages; i++ )
+    {
+        gfn_t gfn = gfn_add(s->ioreq_gfn, i);
+        struct page_info *pg = vmap_to_page((char *)s->ioreq +
+                                            i * PAGE_SIZE);
+
+        rc = p2m_add_page(d, gfn, page_to_mfn(pg), 0, p2m_ram_rw);
+        if ( rc )
+            /*
+             * No rollback of previously added pages: The caller
+             * (arch_ioreq_server_disable) has no error handling path,
+             * and partial failure here will be cleaned up when the
+             * ioreq server is eventually destroyed.
+             */
+            return rc;
+
+        paging_mark_pfn_dirty(d, _pfn(gfn_x(gfn)));
+    }
+    return 0;
 }
 
 int arch_ioreq_server_map_pages(struct ioreq_server *s)
diff --git a/xen/common/ioreq.c b/xen/common/ioreq.c
index 5d722c8d4e..0ad86d3af3 100644
--- a/xen/common/ioreq.c
+++ b/xen/common/ioreq.c
@@ -26,6 +26,7 @@
 #include <xen/paging.h>
 #include <xen/sched.h>
 #include <xen/trace.h>
+#include <xen/xvmalloc.h>
 
 #include <asm/guest_atomics.h>
 #include <asm/ioreq.h>
@@ -95,12 +96,10 @@ static struct ioreq_server *get_ioreq_server(const struct 
domain *d,
 
 static ioreq_t *get_ioreq(struct ioreq_server *s, struct vcpu *v)
 {
-    shared_iopage_t *p = s->ioreq.va;
-
     ASSERT((v == current) || !vcpu_runnable(v));
-    ASSERT(p != NULL);
+    ASSERT(s->ioreq != NULL);
 
-    return &p->vcpu_ioreq[v->vcpu_id];
+    return &s->ioreq[v->vcpu_id];
 }
 
 /*
@@ -260,9 +259,32 @@ bool vcpu_ioreq_handle_completion(struct vcpu *v)
 
 static int ioreq_server_alloc_mfn(struct ioreq_server *s, bool buf)
 {
-    struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq;
+    struct ioreq_page *iorp;
     struct page_info *page;
 
+    if ( !buf )
+    {
+        if ( s->ioreq )
+        {
+            /*
+             * If a guest frame has already been mapped (which may happen
+             * on demand if ioreq_server_get_info() is called), then
+             * allocating a page is not permitted.
+             */
+            if ( !gfn_eq(s->ioreq_gfn, INVALID_GFN) )
+                return -EPERM;
+
+            return 0;
+        }
+
+        s->ioreq = xvzalloc_array(ioreq_t, s->target->max_vcpus);
+
+        return s->ioreq ? 0 : -ENOMEM;
+    }
+
+    /* bufioreq: single page allocation */
+    iorp = &s->bufioreq;
+
     if ( iorp->page )
     {
         /*
@@ -309,8 +331,17 @@ static int ioreq_server_alloc_mfn(struct ioreq_server *s, 
bool buf)
 
 static void ioreq_server_free_mfn(struct ioreq_server *s, bool buf)
 {
-    struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq;
-    struct page_info *page = iorp->page;
+    struct ioreq_page *iorp;
+    struct page_info *page;
+
+    if ( !buf )
+    {
+        XVFREE(s->ioreq);
+        return;
+    }
+
+    iorp = &s->bufioreq;
+    page = iorp->page;
 
     if ( !page )
         return;
@@ -334,11 +365,29 @@ bool is_ioreq_server_page(struct domain *d, const struct 
page_info *page)
 
     FOR_EACH_IOREQ_SERVER(d, id, s)
     {
-        if ( (s->ioreq.page == page) || (s->bufioreq.page == page) )
+        if ( s->bufioreq.page == page )
         {
             found = true;
             break;
         }
+
+        if ( s->ioreq )
+        {
+            unsigned int i;
+
+            for ( i = 0; i < nr_ioreq_pages(d); i++ )
+            {
+                if ( vmap_to_page((char *)s->ioreq +
+                                  i * PAGE_SIZE) == page )
+                {
+                    found = true;
+                    break;
+                }
+            }
+
+            if ( found )
+                break;
+        }
     }
 
     rspin_unlock(&d->ioreq_server.lock);
@@ -351,7 +400,7 @@ static void ioreq_server_update_evtchn(struct ioreq_server 
*s,
 {
     ASSERT(spin_is_locked(&s->lock));
 
-    if ( s->ioreq.va != NULL )
+    if ( s->ioreq != NULL )
     {
         ioreq_t *p = get_ioreq(s, sv->vcpu);
 
@@ -591,7 +640,7 @@ static int ioreq_server_init(struct ioreq_server *s,
     INIT_LIST_HEAD(&s->ioreq_vcpu_list);
     spin_lock_init(&s->bufioreq_lock);
 
-    s->ioreq.gfn = INVALID_GFN;
+    s->ioreq_gfn = INVALID_GFN;
     s->bufioreq.gfn = INVALID_GFN;
 
     rc = ioreq_server_alloc_rangesets(s, id);
@@ -770,7 +819,7 @@ static int ioreq_server_get_info(struct domain *d, 
ioservid_t id,
     }
 
     if ( ioreq_gfn )
-        *ioreq_gfn = gfn_x(s->ioreq.gfn);
+        *ioreq_gfn = gfn_x(s->ioreq_gfn);
 
     if ( HANDLE_BUFIOREQ(s) )
     {
@@ -813,26 +862,30 @@ int ioreq_server_get_frame(struct domain *d, ioservid_t 
id,
     if ( rc )
         goto out;
 
-    switch ( idx )
+    if ( idx == XENMEM_resource_ioreq_server_frame_bufioreq)
     {
-    case XENMEM_resource_ioreq_server_frame_bufioreq:
         rc = -ENOENT;
         if ( !HANDLE_BUFIOREQ(s) )
             goto out;
 
         *mfn = page_to_mfn(s->bufioreq.page);
         rc = 0;
-        break;
-
-    case XENMEM_resource_ioreq_server_frame_ioreq(0):
-        *mfn = page_to_mfn(s->ioreq.page);
-        rc = 0;
-        break;
+    }
+    else if (( idx >= XENMEM_resource_ioreq_server_frame_ioreq(0) ) &&
+             ( idx < 
XENMEM_resource_ioreq_server_frame_ioreq(nr_ioreq_pages(d)) ))
+    {
+        unsigned int page_idx = idx - 
XENMEM_resource_ioreq_server_frame_ioreq(0);
 
-    default:
         rc = -EINVAL;
-        break;
+        if ( idx >= XENMEM_resource_ioreq_server_frame_ioreq(0) &&
+             page_idx < nr_ioreq_pages(d) && s->ioreq )
+        {
+            *mfn = vmap_to_mfn((char *)s->ioreq + page_idx * PAGE_SIZE);
+            rc = 0;
+        }
     }
+    else
+        rc = -EINVAL;
 
  out:
     rspin_unlock(&d->ioreq_server.lock);
diff --git a/xen/include/public/hvm/ioreq.h b/xen/include/public/hvm/ioreq.h
index 7a6bc760d0..1c1a9e61ae 100644
--- a/xen/include/public/hvm/ioreq.h
+++ b/xen/include/public/hvm/ioreq.h
@@ -49,6 +49,11 @@ struct ioreq {
 };
 typedef struct ioreq ioreq_t;
 
+/*
+ * Deprecated: shared_iopage is no longer used by Xen internally.
+ * The ioreq server now uses a dynamically sized ioreq_t array
+ * to support more than 128 vCPUs.
+ */
 struct shared_iopage {
     struct ioreq vcpu_ioreq[1];
 };
diff --git a/xen/include/xen/ioreq.h b/xen/include/xen/ioreq.h
index e86f0869fa..a4c7621f3f 100644
--- a/xen/include/xen/ioreq.h
+++ b/xen/include/xen/ioreq.h
@@ -19,9 +19,19 @@
 #ifndef __XEN_IOREQ_H__
 #define __XEN_IOREQ_H__
 
+#include <xen/macros.h>
 #include <xen/sched.h>
 
 #include <public/hvm/dm_op.h>
+#include <public/hvm/ioreq.h>
+
+/* 4096 / 32 = 128 ioreq slots per page */
+#define IOREQS_PER_PAGE  (PAGE_SIZE / sizeof(ioreq_t))
+
+static inline unsigned int nr_ioreq_pages(const struct domain *d)
+{
+    return DIV_ROUND_UP(d->max_vcpus, IOREQS_PER_PAGE);
+}
 
 struct ioreq_page {
     gfn_t gfn;
@@ -45,7 +55,8 @@ struct ioreq_server {
     /* Lock to serialize toolstack modifications */
     spinlock_t             lock;
 
-    struct ioreq_page      ioreq;
+    ioreq_t                *ioreq;
+    gfn_t                  ioreq_gfn;
     struct list_head       ioreq_vcpu_list;
     struct ioreq_page      bufioreq;
 
-- 
2.51.0



--
Julian Vetter | Vates Hypervisor & Kernel Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.