WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] Re: [PATCH 10/15] xen: Introduce the Xen mapcache

To: stefano.stabellini@xxxxxxxxxxxxx
Subject: [Xen-devel] Re: [PATCH 10/15] xen: Introduce the Xen mapcache
From: Anthony Liguori <anthony@xxxxxxxxxxxxx>
Date: Fri, 13 Aug 2010 13:55:38 -0500
Cc: Anthony.Perard@xxxxxxxxxx, xen-devel@xxxxxxxxxxxxxxxxxxx, qemu-devel@xxxxxxxxxx
Delivery-date: Fri, 13 Aug 2010 12:03:05 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <1281622202-3453-10-git-send-email-stefano.stabellini@xxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <alpine.DEB.2.00.1008121244200.2545@kaball-desktop> <1281622202-3453-10-git-send-email-stefano.stabellini@xxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1.11) Gecko/20100713 Lightning/1.0b1 Thunderbird/3.0.6
On 08/12/2010 09:09 AM, stefano.stabellini@xxxxxxxxxxxxx wrote:
From: Anthony PERARD<anthony.perard@xxxxxxxxxx>

Introduce a mapcache to handle the 64bit address space of the guest
from a 32bit userland process (Qemu).
The mapcache maps chucks of guest memory on demand, unmaps them when
they are not needed anymore.

Signed-off-by: Anthony PERARD<anthony.perard@xxxxxxxxxx>
Signed-off-by: Stefano Stabellini<stefano.stabellini@xxxxxxxxxxxxx>
---
  hw/xen_machine_fv.c       |    7 ++
  target-xen/qemu-xen.h     |   15 +++
  target-xen/xen_mapcache.c |  233 +++++++++++++++++++++++++++++++++++++++++++++
  3 files changed, 255 insertions(+), 0 deletions(-)

diff --git a/hw/xen_machine_fv.c b/hw/xen_machine_fv.c
index b1bc88d..58237d6 100644
--- a/hw/xen_machine_fv.c
+++ b/hw/xen_machine_fv.c
@@ -84,6 +84,13 @@ static void xen_init_fv(ram_addr_t ram_size,
          exit(1);
      }

+#if defined(__i386__) || defined(__x86_64__)
+    if (qemu_map_cache_init()) {
+        fprintf(stderr, "qemu_map_cache_init returned: error %d\n", errno);
+        exit(-1);
+    }
+#endif
+
      xc_get_hvm_param(xen_xc, xen_domid, HVM_PARAM_IOREQ_PFN,&ioreq_pfn);
      fprintf(stderr, "shared page at pfn %lx\n", ioreq_pfn);
      shared_page = xc_map_foreign_range(xen_xc, xen_domid, XC_PAGE_SIZE,
diff --git a/target-xen/qemu-xen.h b/target-xen/qemu-xen.h
index 79a4638..e4a7030 100644
--- a/target-xen/qemu-xen.h
+++ b/target-xen/qemu-xen.h
@@ -13,6 +13,21 @@

  /* xen_mapcache.c */

+#if (defined(__i386__) || defined(__x86_64__))&&  !defined(QEMU_TOOL)
+#define MAPCACHE
+
+#if defined(__i386__)
+#define MAX_MCACHE_SIZE    0x40000000 /* 1GB max for x86 */
+#define MCACHE_BUCKET_SHIFT 16
+#elif defined(__x86_64__)
+#define MAX_MCACHE_SIZE    0x1000000000 /* 64GB max for x86_64 */
+#define MCACHE_BUCKET_SHIFT 20
+#endif
+
+#define MCACHE_BUCKET_SIZE (1UL<<  MCACHE_BUCKET_SHIFT)
+#endif
+
+int qemu_map_cache_init(void);
  uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, uint8_t lock);
  void     qemu_invalidate_entry(uint8_t *buffer);
  void     qemu_invalidate_map_cache(void);
diff --git a/target-xen/xen_mapcache.c b/target-xen/xen_mapcache.c
index 39daae2..efe036c 100644
--- a/target-xen/xen_mapcache.c
+++ b/target-xen/xen_mapcache.c
@@ -1,5 +1,237 @@
+#include "config.h"
+
+#include "hw/xen_backend.h"
  #include "qemu-xen.h"

+#include<xen/hvm/params.h>
+#include<sys/mman.h>
+
+#if defined(MAPCACHE)
+
+#define BITS_PER_LONG (sizeof(long)*8)
+#define BITS_TO_LONGS(bits) \
+    (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
+#define DECLARE_BITMAP(name,bits) \
+    unsigned long name[BITS_TO_LONGS(bits)]
+#define test_bit(bit,map) \
+    (!!((map)[(bit)/BITS_PER_LONG]&  (1UL<<  ((bit)%BITS_PER_LONG))))
+
+struct map_cache {
+    unsigned long paddr_index;
+    uint8_t      *vaddr_base;
+    DECLARE_BITMAP(valid_mapping, MCACHE_BUCKET_SIZE>>XC_PAGE_SHIFT);
+    uint8_t lock;
+    struct map_cache *next;
+};
+
+struct map_cache_rev {
+    uint8_t      *vaddr_req;
+    unsigned long paddr_index;
+    QTAILQ_ENTRY(map_cache_rev) next;
+};


CODING_STYLE

+static struct map_cache *mapcache_entry;
+static unsigned long nr_buckets;
+QTAILQ_HEAD(map_cache_head, map_cache_rev) locked_entries = 
QTAILQ_HEAD_INITIALIZER(locked_entries);
+
+/* For most cases (>99.9%), the page address is the same. */
+static unsigned long last_address_index = ~0UL;
+static uint8_t      *last_address_vaddr;

Should refactor away global state.

+int qemu_map_cache_init(void)
+{
+    unsigned long size;
+
+    nr_buckets = (((MAX_MCACHE_SIZE>>  XC_PAGE_SHIFT) +
+                   (1UL<<  (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1)>>
+                  (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT));
+
+    /*
+     * Use mmap() directly: lets us allocate a big hash table with no up-front
+     * cost in storage space. The OS will allocate memory only for the buckets
+     * that we actually use. All others will contain all zeroes.
+     */
+    size = nr_buckets * sizeof(struct map_cache);
+    size = (size + XC_PAGE_SIZE - 1)&  ~(XC_PAGE_SIZE - 1);
+    fprintf(stderr, "qemu_map_cache_init nr_buckets = %lx size %lu\n", 
nr_buckets, size);
+    mapcache_entry = mmap(NULL, size, PROT_READ|PROT_WRITE,
+                          MAP_SHARED|MAP_ANON, -1, 0);
+    if (mapcache_entry == MAP_FAILED) {
+        errno = ENOMEM;
+        return -1;
+    }
+
+    return 0;
+}
+
+static void qemu_remap_bucket(struct map_cache *entry,
+                              unsigned long address_index)
+{
+    uint8_t *vaddr_base;
+    xen_pfn_t pfns[MCACHE_BUCKET_SIZE>>  XC_PAGE_SHIFT];
+    int err[MCACHE_BUCKET_SIZE>>  XC_PAGE_SHIFT];
+    unsigned int i, j;
+
+    if (entry->vaddr_base != NULL) {
+        errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE);
+        if (errno) {
+            fprintf(stderr, "unmap fails %d\n", errno);
+            exit(-1);
+        }
+    }
+
+    for (i = 0; i<  MCACHE_BUCKET_SIZE>>  XC_PAGE_SHIFT; i++) {
+        pfns[i] = (address_index<<  (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i;
+    }
+
+    vaddr_base = xc_map_foreign_bulk(xen_xc, xen_domid, PROT_READ|PROT_WRITE,
+                                     pfns, err,
+                                     MCACHE_BUCKET_SIZE>>  XC_PAGE_SHIFT);
+    if (vaddr_base == NULL) {
+        fprintf(stderr, "xc_map_foreign_bulk error %d\n", errno);
+        exit(-1);
+    }
+
+    entry->vaddr_base  = vaddr_base;
+    entry->paddr_index = address_index;
+
+    for (i = 0; i<  MCACHE_BUCKET_SIZE>>  XC_PAGE_SHIFT; i += BITS_PER_LONG) {
+        unsigned long word = 0;
+        j = ((i + BITS_PER_LONG)>  (MCACHE_BUCKET_SIZE>>  XC_PAGE_SHIFT)) ?
+            (MCACHE_BUCKET_SIZE>>  XC_PAGE_SHIFT) % BITS_PER_LONG : 
BITS_PER_LONG;
+        while (j>  0) {
+            word = (word<<  1) | !err[i + --j];
+        }
+        entry->valid_mapping[i / BITS_PER_LONG] = word;
+    }
+}
+
+uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, uint8_t lock)
+{
+    struct map_cache *entry, *pentry = NULL;
+    unsigned long address_index  = phys_addr>>  MCACHE_BUCKET_SHIFT;
+    unsigned long address_offset = phys_addr&  (MCACHE_BUCKET_SIZE-1);
+
+    if (address_index == last_address_index&&  !lock)
+        return last_address_vaddr + address_offset;
+
+    entry =&mapcache_entry[address_index % nr_buckets];
+
+    while (entry&&  entry->lock&&  entry->paddr_index != address_index&&  
entry->vaddr_base) {
+        pentry = entry;
+        entry = entry->next;
+    }
+    if (!entry) {
+        entry = qemu_mallocz(sizeof(struct map_cache));
+        pentry->next = entry;
+        qemu_remap_bucket(entry, address_index);
+    } else if (!entry->lock) {
+        if (!entry->vaddr_base || entry->paddr_index != address_index || 
!test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping))
+            qemu_remap_bucket(entry, address_index);
+    }
+
+    if (!test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping)) {
+        last_address_index = ~0UL;
+        return NULL;
+    }
+
+    last_address_index = address_index;
+    last_address_vaddr = entry->vaddr_base;
+    if (lock) {
+        struct map_cache_rev *reventry = qemu_mallocz(sizeof(struct 
map_cache_rev));
+        entry->lock++;
+        reventry->vaddr_req = last_address_vaddr + address_offset;
+        reventry->paddr_index = last_address_index;
+        QTAILQ_INSERT_TAIL(&locked_entries, reventry, next);
+    }
+
+    return last_address_vaddr + address_offset;
+}
+
+void qemu_invalidate_entry(uint8_t *buffer)
+{
+    struct map_cache *entry = NULL, *pentry = NULL;
+    struct map_cache_rev *reventry;
+    unsigned long paddr_index;
+    int found = 0;
+
+    if (last_address_vaddr == buffer)
+        last_address_index =  ~0UL;
+
+    QTAILQ_FOREACH(reventry,&locked_entries, next) {
+        if (reventry->vaddr_req == buffer) {
+            paddr_index = reventry->paddr_index;
+            found = 1;
+            break;
+        }
+    }
+    if (!found) {
+        fprintf(stderr, "qemu_invalidate_entry: could not find %p\n", buffer);
+        QTAILQ_FOREACH(reventry,&locked_entries, next) {
+            fprintf(stderr, "   %lx ->  %p is present\n", reventry->paddr_index, 
reventry->vaddr_req);
+        }
+        return;
+    }
+    QTAILQ_REMOVE(&locked_entries, reventry, next);
+    qemu_free(reventry);
+
+    entry =&mapcache_entry[paddr_index % nr_buckets];
+    while (entry&&  entry->paddr_index != paddr_index) {
+        pentry = entry;
+        entry = entry->next;
+    }
+    if (!entry) {
+        fprintf(stderr, "Trying to unmap address %p that is not in the 
mapcache!\n", buffer);
+        return;
+    }
+    entry->lock--;
+    if (entry->lock>  0 || pentry == NULL)
+        return;
+
+    pentry->next = entry->next;
+    errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE);
+    if (errno) {
+        fprintf(stderr, "unmap fails %d\n", errno);
+        exit(-1);
+    }
+    qemu_free(entry);
+}
+
+void qemu_invalidate_map_cache(void)
+{
+    unsigned long i;
+    struct map_cache_rev *reventry;
+
+    qemu_aio_flush();
+
+    QTAILQ_FOREACH(reventry,&locked_entries, next) {
+        fprintf(stderr, "There should be no locked mappings at this time, but %lx ->  %p 
is present\n", reventry->paddr_index, reventry->vaddr_req);
+    }
+
+    mapcache_lock();
+
+    for (i = 0; i<  nr_buckets; i++) {
+        struct map_cache *entry =&mapcache_entry[i];
+
+        if (entry->vaddr_base == NULL)
+            continue;
+
+        errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE);
+        if (errno) {
+            fprintf(stderr, "unmap fails %d\n", errno);
+            exit(-1);
+        }
+
+        entry->paddr_index = 0;
+        entry->vaddr_base  = NULL;
+    }
+
+    last_address_index =  ~0UL;
+    last_address_vaddr = NULL;
+
+    mapcache_unlock();
+}
+#else
  uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, uint8_t lock)
  {
      return phys_ram_addr(phys_addr);
@@ -12,3 +244,4 @@ void qemu_invalidate_map_cache(void)
  void qemu_invalidate_entry(uint8_t *buffer)
  {
  }
+#endif /* !MAPCACHE */

This should really tie into the RAMBlock infrastructure.

Regards,

Anthony Liguori

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>