From 5759b103332708459561641e58c7e9e716e363cc Mon Sep 17 00:00:00 2001 From: Jun Nakajima Date: Tue, 31 Aug 2010 16:41:25 +0100 Subject: [PATCH] xen: Introduce the Xen mapcache The mapcache maps chucks of guest memory on demand, unmaps them when they are not needed anymore. Each call to qemu_get_ram_ptr makes a call to qemu_map_cache with the lock option, so mapcache will not unmap these ram_ptr. Signed-off-by: Anthony PERARD Signed-off-by: Stefano Stabellini --- Makefile.target | 3 + configure | 3 + exec.c | 39 ++++++- hw/xen.h | 10 ++ hw/xen_common.h | 2 + xen-all.c | 64 +++++++++++ xen-mapcache-stub.c | 33 ++++++ xen-mapcache.c | 301 +++++++++++++++++++++++++++++++++++++++++++++++++++ xen-mapcache.h | 14 +++ xen-stub.c | 4 + 10 files changed, 469 insertions(+), 4 deletions(-) create mode 100644 xen-mapcache-stub.c create mode 100644 xen-mapcache.c create mode 100644 xen-mapcache.h diff --git a/Makefile.target b/Makefile.target index db84edb..5646582 100644 --- a/Makefile.target +++ b/Makefile.target @@ -187,8 +187,11 @@ QEMU_CFLAGS += $(VNC_PNG_CFLAGS) obj-$(CONFIG_XEN) += xen_machine_pv.o xen_domainbuild.o # xen support +CONFIG_NO_XEN_MAPCACHE = $(if $(subst n,,$(CONFIG_XEN_MAPCACHE)),n,y) obj-$(CONFIG_XEN) += xen-all.o obj-$(CONFIG_NO_XEN) += xen-stub.o +obj-$(CONFIG_XEN_MAPCACHE) += xen-mapcache.o +obj-$(CONFIG_NO_XEN_MAPCACHE) += xen-mapcache-stub.o # xen full virtualized machine obj-i386-$(CONFIG_XEN) += xen_machine_fv.o diff --git a/configure b/configure index 80c249d..5528324 100755 --- a/configure +++ b/configure @@ -2925,6 +2925,9 @@ case "$target_arch2" in i386|x86_64) if test "$xen" = "yes" -a "$target_softmmu" = "yes" ; then echo "CONFIG_XEN=y" >> $config_target_mak + if test "$cpu" = "i386" -o "$cpu" = "x86_64"; then + echo "CONFIG_XEN_MAPCACHE=y" >> $config_target_mak + fi fi esac case "$target_arch2" in diff --git a/exec.c b/exec.c index 1fbe91c..ed42837 100644 --- a/exec.c +++ b/exec.c @@ -39,6 +39,7 @@ #include "hw/qdev.h" #include "osdep.h" #include "kvm.h" +#include "hw/xen.h" #include "qemu-timer.h" #if defined(CONFIG_USER_ONLY) #include @@ -58,8 +59,11 @@ #include #endif #endif +#else /* !CONFIG_USER_ONLY */ +#include "xen-mapcache.h" #endif + //#define DEBUG_TB_INVALIDATE //#define DEBUG_FLUSH //#define DEBUG_TLB @@ -2834,6 +2838,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, } } + new_block->offset = find_ram_offset(size); if (host) { new_block->host = host; } else { @@ -2855,13 +2860,15 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, PROT_EXEC|PROT_READ|PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); #else - new_block->host = qemu_vmalloc(size); + if (xen_mapcache_enabled()) { + xen_ram_alloc(new_block->offset, size); + } else { + new_block->host = qemu_vmalloc(size); + } #endif qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE); } } - - new_block->offset = find_ram_offset(size); new_block->length = size; QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next); @@ -2902,7 +2909,11 @@ void qemu_ram_free(ram_addr_t addr) #if defined(TARGET_S390X) && defined(CONFIG_KVM) munmap(block->host, block->length); #else - qemu_vfree(block->host); + if (xen_mapcache_enabled()) { + qemu_invalidate_entry(block->host); + } else { + qemu_vfree(block->host); + } #endif } qemu_free(block); @@ -2928,6 +2939,15 @@ void *qemu_get_ram_ptr(ram_addr_t addr) if (addr - block->offset < block->length) { QLIST_REMOVE(block, next); QLIST_INSERT_HEAD(&ram_list.blocks, block, next); + if (xen_mapcache_enabled()) { + /* We need to check if the requested address is in the RAM + * because we don't want to map the entire memory in QEMU. + */ + if (block->offset == 0) { + return qemu_map_cache(addr, 0, 1); + } + block->host = qemu_map_cache(block->offset, block->length, 1); + } return block->host + (addr - block->offset); } } @@ -2946,11 +2966,19 @@ ram_addr_t qemu_ram_addr_from_host(void *ptr) uint8_t *host = ptr; QLIST_FOREACH(block, &ram_list.blocks, next) { + /* This case append when the block is not mapped. */ + if (block->host == NULL) { + continue; + } if (host - block->host < block->length) { return block->offset + (host - block->host); } } + if (xen_mapcache_enabled()) { + return qemu_ram_addr_from_mapcache(ptr); + } + fprintf(stderr, "Bad ram pointer %p\n", ptr); abort(); @@ -3725,6 +3753,9 @@ void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len, if (is_write) { cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len); } + if (xen_enabled()) { + qemu_invalidate_entry(buffer); + } qemu_vfree(bounce.buffer); bounce.buffer = NULL; cpu_notify_map_clients(); diff --git a/hw/xen.h b/hw/xen.h index c5189b1..0261ae6 100644 --- a/hw/xen.h +++ b/hw/xen.h @@ -28,10 +28,20 @@ extern int xen_allowed; #define xen_enabled() (0) #endif +#if defined CONFIG_XEN_MAPCACHE +# define xen_mapcache_enabled() (xen_enabled()) +#else +# define xen_mapcache_enabled() (0) +#endif + int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num); void xen_piix3_set_irq(void *opaque, int irq_num, int level); void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len); int xen_init(int smp_cpus); +#if defined(NEED_CPU_H) && !defined(CONFIG_USER_ONLY) +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size); +#endif + #endif /* QEMU_HW_XEN_H */ diff --git a/hw/xen_common.h b/hw/xen_common.h index a24bcb3..2773b45 100644 --- a/hw/xen_common.h +++ b/hw/xen_common.h @@ -36,6 +36,8 @@ typedef int qemu_xc_interface; xc_gnttab_map_grant_refs(gnt, count, domids, refs, flags) # define xc_gnttab_munmap(xc, gnt, pages, niov) xc_gnttab_munmap(gnt, pages, niov) # define xc_gnttab_close(xc, dev) xc_gnttab_close(dev) +# define xc_map_foreign_bulk(xc, domid, opts, pfns, err, size) \ + xc_map_foreign_batch(xc, domid, opts, pfns, size) #else typedef xc_interface *qemu_xc_interface; # define XC_HANDLER_INITIAL_VALUE NULL diff --git a/xen-all.c b/xen-all.c index 90c03eb..3048c4d 100644 --- a/xen-all.c +++ b/xen-all.c @@ -12,6 +12,8 @@ #include "hw/xen_common.h" #include "hw/xen_backend.h" +#include "xen-mapcache.h" + /* Xen specific function for piix pci */ int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) @@ -54,6 +56,64 @@ qemu_irq *i8259_xen_init(void) return qemu_allocate_irqs(i8259_set_irq, NULL, 16); } + +/* Memory Ops */ + +static void xen_ram_init(ram_addr_t ram_size) +{ + RAMBlock *new_block; + ram_addr_t below_4g_mem_size, above_4g_mem_size = 0; + + new_block = qemu_mallocz(sizeof (*new_block)); + pstrcpy(new_block->idstr, sizeof (new_block->idstr), "xen.ram"); + new_block->host = NULL; + new_block->offset = 0; + new_block->length = ram_size; + + QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next); + + ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty, + new_block->length >> TARGET_PAGE_BITS); + memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS), + 0xff, new_block->length >> TARGET_PAGE_BITS); + + if (ram_size >= 0xe0000000 ) { + above_4g_mem_size = ram_size - 0xe0000000; + below_4g_mem_size = 0xe0000000; + } else { + below_4g_mem_size = ram_size; + } + + cpu_register_physical_memory(0, below_4g_mem_size, new_block->offset); +#if TARGET_PHYS_ADDR_BITS > 32 + if (above_4g_mem_size > 0) { + cpu_register_physical_memory(0x100000000ULL, above_4g_mem_size, + new_block->offset + below_4g_mem_size); + } +#endif +} + +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size) +{ + unsigned long nr_pfn; + xen_pfn_t *pfn_list; + int i; + + nr_pfn = size >> TARGET_PAGE_BITS; + pfn_list = qemu_malloc(sizeof (*pfn_list) * nr_pfn); + + for (i = 0; i < nr_pfn; i++) { + pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i; + } + + if (xc_domain_memory_populate_physmap(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) { + hw_error("xen: failed to populate ram at %lx", ram_addr); + } + + qemu_free(pfn_list); +} + + /* Initialise Xen */ int xen_init(int smp_cpus) @@ -64,5 +124,9 @@ int xen_init(int smp_cpus) return -1; } + /* Init RAM management */ + qemu_map_cache_init(); + xen_ram_init(ram_size); + return 0; } diff --git a/xen-mapcache-stub.c b/xen-mapcache-stub.c new file mode 100644 index 0000000..69ce2e7 --- /dev/null +++ b/xen-mapcache-stub.c @@ -0,0 +1,33 @@ +#include "config.h" + +#include "exec-all.h" +#include "qemu-common.h" +#include "cpu-common.h" +#include "xen-mapcache.h" + +int qemu_map_cache_init(void) +{ + return 0; +} + +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock) +{ + return qemu_get_ram_ptr(phys_addr); +} + +void qemu_map_cache_unlock(void *buffer) +{ +} + +ram_addr_t qemu_ram_addr_from_mapcache(void *ptr) +{ + return -1; +} + +void qemu_invalidate_map_cache(void) +{ +} + +void qemu_invalidate_entry(uint8_t *buffer) +{ +} diff --git a/xen-mapcache.c b/xen-mapcache.c new file mode 100644 index 0000000..3e1cca9 --- /dev/null +++ b/xen-mapcache.c @@ -0,0 +1,301 @@ +#include "config.h" + +#include "hw/xen_backend.h" +#include "blockdev.h" + +#include +#include + +#include "xen-mapcache.h" + + +//#define MAPCACHE_DEBUG + +#ifdef MAPCACHE_DEBUG +# define DPRINTF(fmt, ...) do { \ + fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \ +} while (0) +#else +# define DPRINTF(fmt, ...) do { } while (0) +#endif + +#if defined(__i386__) +# define MAX_MCACHE_SIZE 0x40000000 /* 1GB max for x86 */ +# define MCACHE_BUCKET_SHIFT 16 +#elif defined(__x86_64__) +# define MAX_MCACHE_SIZE 0x1000000000 /* 64GB max for x86_64 */ +# define MCACHE_BUCKET_SHIFT 20 +#endif +#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT) + +#define BITS_PER_LONG (sizeof(long) * 8) +#define BITS_TO_LONGS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG) +#define DECLARE_BITMAP(name, bits) unsigned long name[BITS_TO_LONGS(bits)] +#define test_bit(bit, map) \ + (!!((map)[(bit) / BITS_PER_LONG] & (1UL << ((bit) % BITS_PER_LONG)))) + +typedef struct MapCacheEntry { + target_phys_addr_t paddr_index; + uint8_t *vaddr_base; + DECLARE_BITMAP(valid_mapping, MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT); + uint8_t lock; + struct MapCacheEntry *next; +} MapCacheEntry; + +typedef struct MapCacheRev { + uint8_t *vaddr_req; + target_phys_addr_t paddr_index; + QTAILQ_ENTRY(MapCacheRev) next; +} MapCacheRev; + +typedef struct MapCache { + MapCacheEntry *entry; + unsigned long nr_buckets; + QTAILQ_HEAD(map_cache_head, MapCacheRev) locked_entries; + + /* For most cases (>99.9%), the page address is the same. */ + target_phys_addr_t last_address_index; + uint8_t *last_address_vaddr; +} MapCache; + +static MapCache *mapcache; + + +int qemu_map_cache_init(void) +{ + unsigned long size; + + mapcache = qemu_mallocz(sizeof (MapCache)); + + QTAILQ_INIT(&mapcache->locked_entries); + mapcache->last_address_index = -1; + + mapcache->nr_buckets = (((MAX_MCACHE_SIZE >> XC_PAGE_SHIFT) + + (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >> + (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)); + + /* + * Use mmap() directly: lets us allocate a big hash table with no up-front + * cost in storage space. The OS will allocate memory only for the buckets + * that we actually use. All others will contain all zeroes. + */ + size = mapcache->nr_buckets * sizeof (MapCacheEntry); + size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1); + DPRINTF("qemu_map_cache_init, nr_buckets = %lx size %lu\n", mapcache->nr_buckets, size); + mapcache->entry = mmap(NULL, size, PROT_READ|PROT_WRITE, + MAP_SHARED|MAP_ANON, -1, 0); + if (mapcache->entry == MAP_FAILED) { + return -1; + } + + return 0; +} + +static void qemu_remap_bucket(MapCacheEntry *entry, + target_phys_addr_t size, + target_phys_addr_t address_index) +{ + uint8_t *vaddr_base; + xen_pfn_t *pfns; + int *err; + unsigned int i, j; + target_phys_addr_t nb_pfn = size >> XC_PAGE_SHIFT; + + pfns = qemu_mallocz(nb_pfn * sizeof (xen_pfn_t)); + err = qemu_mallocz(nb_pfn * sizeof (int)); + + if (entry->vaddr_base != NULL) { + if (munmap(entry->vaddr_base, size) != 0) { + perror("unmap fails"); + exit(-1); + } + } + + for (i = 0; i < nb_pfn; i++) { + pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i; + } + + vaddr_base = xc_map_foreign_bulk(xen_xc, xen_domid, PROT_READ|PROT_WRITE, + pfns, err, nb_pfn); + if (vaddr_base == NULL) { + perror("xc_map_foreign_bulk"); + exit(-1); + } + + entry->vaddr_base = vaddr_base; + entry->paddr_index = address_index; + + for (i = 0; i < nb_pfn; i += BITS_PER_LONG) { + unsigned long word = 0; + if ((i + BITS_PER_LONG) > nb_pfn) { + j = nb_pfn % BITS_PER_LONG; + } else { + j = BITS_PER_LONG; + } + while (j > 0) { + word = (word << 1) | !err[i + --j]; + } + entry->valid_mapping[i / BITS_PER_LONG] = word; + } + + qemu_free(pfns); + qemu_free(err); +} + +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock) +{ + MapCacheEntry *entry, *pentry = NULL; + target_phys_addr_t address_index = phys_addr >> MCACHE_BUCKET_SHIFT; + target_phys_addr_t address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1); + + if (address_index == mapcache->last_address_index && !lock) { + return mapcache->last_address_vaddr + address_offset; + } + + entry = &mapcache->entry[address_index % mapcache->nr_buckets]; + + while (entry && entry->lock && entry->paddr_index != address_index && entry->vaddr_base) { + pentry = entry; + entry = entry->next; + } + if (!entry) { + entry = qemu_mallocz(sizeof (MapCacheEntry)); + pentry->next = entry; + qemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, address_index); + } else if (!entry->lock) { + if (!entry->vaddr_base || entry->paddr_index != address_index || + !test_bit(address_offset >> XC_PAGE_SHIFT, entry->valid_mapping)) { + qemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, address_index); + } + } + + if (!test_bit(address_offset >> XC_PAGE_SHIFT, entry->valid_mapping)) { + mapcache->last_address_index = -1; + return NULL; + } + + mapcache->last_address_index = address_index; + mapcache->last_address_vaddr = entry->vaddr_base; + if (lock) { + MapCacheRev *reventry = qemu_mallocz(sizeof(MapCacheRev)); + entry->lock++; + reventry->vaddr_req = mapcache->last_address_vaddr + address_offset; + reventry->paddr_index = mapcache->last_address_index; + QTAILQ_INSERT_TAIL(&mapcache->locked_entries, reventry, next); + } + + return mapcache->last_address_vaddr + address_offset; +} + +ram_addr_t qemu_ram_addr_from_mapcache(void *ptr) +{ + MapCacheRev *reventry; + target_phys_addr_t paddr_index; + int found = 0; + + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + if (reventry->vaddr_req == ptr) { + paddr_index = reventry->paddr_index; + found = 1; + break; + } + } + if (!found) { + fprintf(stderr, "qemu_ram_addr_from_mapcache, could not find %p\n", ptr); + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + DPRINTF(" %lx -> %p is present\n", reventry->paddr_index, + reventry->vaddr_req); + } + abort(); + return 0; + } + + return paddr_index << MCACHE_BUCKET_SHIFT; +} + +void qemu_invalidate_entry(uint8_t *buffer) +{ + MapCacheEntry *entry = NULL, *pentry = NULL; + MapCacheRev *reventry; + target_phys_addr_t paddr_index; + int found = 0; + + if (mapcache->last_address_vaddr == buffer) { + mapcache->last_address_index = -1; + } + + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + if (reventry->vaddr_req == buffer) { + paddr_index = reventry->paddr_index; + found = 1; + break; + } + } + if (!found) { + DPRINTF("qemu_invalidate_entry, could not find %p\n", buffer); + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + DPRINTF(" %lx -> %p is present\n", reventry->paddr_index, reventry->vaddr_req); + } + return; + } + QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next); + qemu_free(reventry); + + entry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; + while (entry && entry->paddr_index != paddr_index) { + pentry = entry; + entry = entry->next; + } + if (!entry) { + DPRINTF("Trying to unmap address %p that is not in the mapcache!\n", buffer); + return; + } + entry->lock--; + if (entry->lock > 0 || pentry == NULL) { + return; + } + + pentry->next = entry->next; + if (munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE) != 0) { + perror("unmap fails"); + exit(-1); + } + qemu_free(entry); +} + +void qemu_invalidate_map_cache(void) +{ + unsigned long i; + MapCacheRev *reventry; + + qemu_aio_flush(); + + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + DPRINTF("There should be no locked mappings at this time, " + "but %lx -> %p is present\n", + reventry->paddr_index, reventry->vaddr_req); + } + + mapcache_lock(); + + for (i = 0; i < mapcache->nr_buckets; i++) { + MapCacheEntry *entry = &mapcache->entry[i]; + + if (entry->vaddr_base == NULL) { + continue; + } + + if (munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE) != 0) { + perror("unmap fails"); + exit(-1); + } + + entry->paddr_index = 0; + entry->vaddr_base = NULL; + } + + mapcache->last_address_index = -1; + mapcache->last_address_vaddr = NULL; + + mapcache_unlock(); +} diff --git a/xen-mapcache.h b/xen-mapcache.h new file mode 100644 index 0000000..86a017b --- /dev/null +++ b/xen-mapcache.h @@ -0,0 +1,14 @@ +#ifndef XEN_MAPCACHE_H +#define XEN_MAPCACHE_H + +int qemu_map_cache_init(void); +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock); +void qemu_map_cache_unlock(void *phys_addr); +ram_addr_t qemu_ram_addr_from_mapcache(void *ptr); +void qemu_invalidate_entry(uint8_t *buffer); +void qemu_invalidate_map_cache(void); + +#define mapcache_lock() ((void)0) +#define mapcache_unlock() ((void)0) + +#endif /* !XEN_MAPCACHE_H */ diff --git a/xen-stub.c b/xen-stub.c index 07e64bc..c9f477d 100644 --- a/xen-stub.c +++ b/xen-stub.c @@ -24,6 +24,10 @@ void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len) { } +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size) +{ +} + int xen_init(int smp_cpus) { return -ENOSYS; -- 1.7.1