WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [RFC PATCH 2/4] (Take 2): tmem: Implement precache on top of

Tmem [PATCH 2/4] (Take 2): Implement precache on top of tmem layer

Hooks added to existing page cache, VFS, and FS (ext3 only for now)
routines to:
1) create a tmem pool when filesystem is mounted and record its id
2) "put" clean pages that are being evicted
3) attempt to "get" pages prior to reading from a mounted FS and
   fallback to reading from the FS if "get" fails
4) "flush" as necessary to ensure coherency btwn page cache & precache
5) destroy the tmem pool when the FS is unmounted

Hooks for page cache and VFS placed by Chris Mason

Signed-off-by: Dan Magenheimer <dan.magenheimer@xxxxxxxxxx>


 fs/buffer.c                              |    5 
 fs/ext3/super.c                          |    2 
 fs/mpage.c                               |    8 +
 fs/super.c                               |    5 
 include/linux/fs.h                       |    7 +
 include/linux/precache.h                 |   50 +++++++
 mm/Kconfig                               |    8 +
 mm/Makefile                              |    1 
 mm/filemap.c                             |   11 +
 mm/precache.c                            |  134 +++++++++++++++++++++
 mm/truncate.c                            |   10 +
 11 files changed, 241 insertions(+)

--- linux-2.6.30/fs/super.c     2009-06-09 21:05:27.000000000 -0600
+++ linux-2.6.30-tmem/fs/super.c        2009-06-19 09:33:59.000000000 -0600
@@ -39,6 +39,7 @@
 #include <linux/mutex.h>
 #include <linux/file.h>
 #include <linux/async.h>
+#include <linux/precache.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
@@ -110,6 +111,9 @@ static struct super_block *alloc_super(s
                s->s_qcop = sb_quotactl_ops;
                s->s_op = &default_op;
                s->s_time_gran = 1000000000;
+#ifdef CONFIG_PRECACHE
+               s->precache_poolid = -1;
+#endif
        }
 out:
        return s;
@@ -200,6 +204,7 @@ void deactivate_super(struct super_block
                vfs_dq_off(s, 0);
                down_write(&s->s_umount);
                fs->kill_sb(s);
+               precache_flush_filesystem(s);
                put_filesystem(fs);
                put_super(s);
        }
--- linux-2.6.30/fs/ext3/super.c        2009-06-09 21:05:27.000000000 -0600
+++ linux-2.6.30-tmem/fs/ext3/super.c   2009-06-19 09:33:59.000000000 -0600
@@ -37,6 +37,7 @@
 #include <linux/quotaops.h>
 #include <linux/seq_file.h>
 #include <linux/log2.h>
+#include <linux/precache.h>
 
 #include <asm/uaccess.h>
 
@@ -1306,6 +1307,7 @@ static int ext3_setup_super(struct super
        } else {
                printk("internal journal\n");
        }
+       precache_init(sb);
        return res;
 }
 
--- linux-2.6.30/include/linux/fs.h     2009-06-09 21:05:27.000000000 -0600
+++ linux-2.6.30-tmem/include/linux/fs.h        2009-06-19 09:33:59.000000000 
-0600
@@ -1377,6 +1377,13 @@ struct super_block {
         * storage for asynchronous operations
         */
        struct list_head s_async_list;
+
+#ifdef CONFIG_PRECACHE
+       /*
+        * saved pool identifier for precache (-1 means none)
+        */
+       u32 precache_poolid;
+#endif
 };
 
 extern struct timespec current_fs_time(struct super_block *sb);
--- linux-2.6.30/fs/buffer.c    2009-06-09 21:05:27.000000000 -0600
+++ linux-2.6.30-tmem/fs/buffer.c       2009-06-19 09:33:59.000000000 -0600
@@ -41,6 +41,7 @@
 #include <linux/bitops.h>
 #include <linux/mpage.h>
 #include <linux/bit_spinlock.h>
+#include <linux/precache.h>
 
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
 
@@ -271,6 +272,10 @@ void invalidate_bdev(struct block_device
 
        invalidate_bh_lrus();
        invalidate_mapping_pages(mapping, 0, -1);
+       /* 99% of the time, we don't need to flush the precache on the bdev.
+        * But, for the strange corners, lets be cautious
+        */
+       precache_flush_inode(mapping);
 }
 
 /*
--- linux-2.6.30/fs/mpage.c     2009-06-09 21:05:27.000000000 -0600
+++ linux-2.6.30-tmem/fs/mpage.c        2009-06-19 09:33:59.000000000 -0600
@@ -26,6 +26,7 @@
 #include <linux/writeback.h>
 #include <linux/backing-dev.h>
 #include <linux/pagevec.h>
+#include <linux/precache.h>
 
 /*
  * I/O completion handler for multipage BIOs.
@@ -285,6 +286,13 @@ do_mpage_readpage(struct bio *bio, struc
                SetPageMappedToDisk(page);
        }
 
+       if (fully_mapped &&
+           blocks_per_page == 1 && !PageUptodate(page) &&
+           precache_get(page->mapping, page->index, page) == 1) {
+               SetPageUptodate(page);
+               goto confused;
+       }
+
        /*
         * This page will go to BIO.  Do we need to send this BIO off first?
         */
--- linux-2.6.30/mm/truncate.c  2009-06-09 21:05:27.000000000 -0600
+++ linux-2.6.30-tmem/mm/truncate.c     2009-06-19 09:37:42.000000000 -0600
@@ -18,6 +18,7 @@
 #include <linux/task_io_accounting_ops.h>
 #include <linux/buffer_head.h> /* grr. try_to_release_page,
                                   do_invalidatepage */
+#include <linux/precache.h>
 #include "internal.h"
 
 
@@ -50,6 +51,7 @@ void do_invalidatepage(struct page *page
 static inline void truncate_partial_page(struct page *page, unsigned partial)
 {
        zero_user_segment(page, partial, PAGE_CACHE_SIZE);
+       precache_flush(page->mapping, page->index);
        if (page_has_private(page))
                do_invalidatepage(page, partial);
 }
@@ -107,6 +109,10 @@ truncate_complete_page(struct address_sp
        clear_page_mlock(page);
        remove_from_page_cache(page);
        ClearPageMappedToDisk(page);
+       /* this must be after the remove_from_page_cache which
+        * calls precache_put
+        */
+       precache_flush(mapping, page->index);
        page_cache_release(page);       /* pagecache ref */
 }
 
@@ -168,6 +174,7 @@ void truncate_inode_pages_range(struct a
        pgoff_t next;
        int i;
 
+       precache_flush_inode(mapping);
        if (mapping->nrpages == 0)
                return;
 
@@ -251,6 +258,7 @@ void truncate_inode_pages_range(struct a
                }
                pagevec_release(&pvec);
        }
+       precache_flush_inode(mapping);
 }
 EXPORT_SYMBOL(truncate_inode_pages_range);
 
@@ -398,6 +406,7 @@ int invalidate_inode_pages2_range(struct
        int did_range_unmap = 0;
        int wrapped = 0;
 
+       precache_flush_inode(mapping);
        pagevec_init(&pvec, 0);
        next = start;
        while (next <= end && !wrapped &&
@@ -454,6 +463,7 @@ int invalidate_inode_pages2_range(struct
                pagevec_release(&pvec);
                cond_resched();
        }
+       precache_flush_inode(mapping);
        return ret;
 }
 EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);
--- linux-2.6.30/mm/filemap.c   2009-06-09 21:05:27.000000000 -0600
+++ linux-2.6.30-tmem/mm/filemap.c      2009-06-19 09:33:59.000000000 -0600
@@ -34,6 +34,7 @@
 #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
 #include <linux/memcontrol.h>
 #include <linux/mm_inline.h> /* for page_is_file_cache() */
+#include <linux/precache.h>
 #include "internal.h"
 
 /*
@@ -116,6 +117,16 @@ void __remove_from_page_cache(struct pag
 {
        struct address_space *mapping = page->mapping;
 
+       /*
+        * if we're uptodate, flush out into the precache, otherwise
+        * invalidate any existing precache entries.  We can't leave
+        * stale data around in the precache once our page is gone
+        */
+       if (PageUptodate(page))
+               precache_put(page->mapping, page->index, page);
+       else
+               precache_flush(page->mapping, page->index);
+
        radix_tree_delete(&mapping->page_tree, page->index);
        page->mapping = NULL;
        mapping->nrpages--;
--- linux-2.6.30/include/linux/precache.h       1969-12-31 17:00:00.000000000 
-0700
+++ linux-2.6.30-tmem/include/linux/precache.h  2009-07-06 15:46:16.000000000 
-0600
@@ -0,0 +1,50 @@
+#ifndef _LINUX_PRECACHE_H
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+
+#ifdef CONFIG_PRECACHE
+extern void precache_init(struct super_block *sb);
+extern int precache_get(struct address_space *mapping, unsigned long index,
+              struct page *empty_page);
+extern int precache_put(struct address_space *mapping, unsigned long index,
+               struct page *page);
+extern int precache_flush(struct address_space *mapping, unsigned long index);
+extern int precache_flush_inode(struct address_space *mapping);
+extern int precache_flush_filesystem(struct super_block *s);
+#else
+static inline void precache_init(struct super_block *sb)
+{
+}
+
+static inline int precache_get(struct address_space *mapping,
+               unsigned long index, struct page *empty_page)
+{
+       return 0;
+}
+
+static inline int precache_put(struct address_space *mapping,
+               unsigned long index, struct page *page)
+{
+       return 0;
+}
+
+static inline int precache_flush(struct address_space *mapping,
+               unsigned long index)
+{
+       return 0;
+}
+
+static inline int precache_flush_inode(struct address_space *mapping)
+{
+       return 0;
+}
+
+static inline int precache_flush_filesystem(struct super_block *s)
+{
+       return 0;
+}
+#endif
+
+#define _LINUX_PRECACHE_H
+#endif /* _LINUX_PRECACHE_H */
--- linux-2.6.30/mm/precache.c  1969-12-31 17:00:00.000000000 -0700
+++ linux-2.6.30-tmem/mm/precache.c     2009-07-06 15:50:04.000000000 -0600
@@ -0,0 +1,134 @@
+/*
+ * linux/mm/precache.c
+ *
+ * Implements "precache" for filesystems/pagecache on top of transcendent
+ * memory ("tmem") API.  A filesystem creates an "ephemeral tmem pool"
+ * and retains the returned pool_id in its superblock.  Clean pages evicted
+ * from pagecache may be "put" into the pool and associated with a "handle"
+ * consisting of the pool_id, an object (inode) id, and an index (page offset).
+ * Note that the page is copied to tmem; no kernel mappings are changed.
+ * If the page is later needed, the filesystem (or VFS) issues a "get", passing
+ * the same handle and an empty pageframe.  If successful, the page is copied
+ * into the pageframe and a disk read is avoided.  But since the tmem pool
+ * is of indeterminate size, a "put" page has indeterminate longevity
+ * ("ephemeral"), and the "get" may fail, in which case the filesystem must
+ * read the page from disk as before.  Note that the filesystem/pagecache are
+ * responsible for maintaining coherency between the pagecache, precache,
+ * and the disk, for which "flush page" and "flush object" actions are
+ * provided.  And when a filesystem is unmounted, it must "destroy" the pool.
+ *
+ * Tmem supports two different modes for a precache: "private" or "shared".
+ * Shared pools are still under development. For a private pool, a successful
+ * "get" always flushes, implementing "exclusive cache" semantics.  Note
+ * that a failed "duplicate" put (overwrite) always guarantees the old data
+ * is flushed.
+ *
+ * Note also that multiple accesses to a tmem pool may be concurrent and any
+ * ordering must be guaranteed by the caller.
+ *
+ * Copyright (C) 2008,2009 Dan Magenheimer, Oracle Corp.
+ */
+
+#include <linux/precache.h>
+#include <linux/module.h>
+#include <linux/tmem.h>
+
+static int precache_auto_allocate; /* set to 1 to auto_allocate */
+
+int precache_put(struct address_space *mapping, unsigned long index,
+ struct page *page)
+{
+       u32 tmem_pool = mapping->host->i_sb->precache_poolid;
+       u64 obj = (unsigned long) mapping->host->i_ino;
+       u32 ind = (u32) index;
+       unsigned long pfn = page_to_pfn(page);
+       struct tmem_pool_uuid uuid_private = TMEM_POOL_PRIVATE_UUID;
+       int ret;
+
+       if ((s32)tmem_pool < 0) {
+               if (!precache_auto_allocate)
+                       return 0;
+               /* a put on a non-existent precache may auto-allocate one */
+               ret = tmem_new_pool(uuid_private, 0);
+               if (ret < 0)
+                       return 0;
+               printk(KERN_INFO
+                       "Mapping superblock for s_id=%s to precache_id=%d\n",
+                       mapping->host->i_sb->s_id, tmem_pool);
+               mapping->host->i_sb->precache_poolid = tmem_pool;
+       }
+       if (ind != index)
+               return 0;
+       mb(); /* ensure page is quiescent; tmem may address it with an alias */
+       return tmem_put_page(tmem_pool, obj, ind, pfn);
+}
+
+int precache_get(struct address_space *mapping, unsigned long index,
+ struct page *empty_page)
+{
+       u32 tmem_pool = mapping->host->i_sb->precache_poolid;
+       u64 obj = (unsigned long) mapping->host->i_ino;
+       u32 ind = (u32) index;
+       unsigned long pfn = page_to_pfn(empty_page);
+
+       if ((s32)tmem_pool < 0)
+               return 0;
+       if (ind != index)
+               return 0;
+
+       return tmem_get_page(tmem_pool, obj, ind, pfn);
+}
+EXPORT_SYMBOL(precache_get);
+
+int precache_flush(struct address_space *mapping, unsigned long index)
+{
+       u32 tmem_pool = mapping->host->i_sb->precache_poolid;
+       u64 obj = (unsigned long) mapping->host->i_ino;
+       u32 ind = (u32) index;
+
+       if ((s32)tmem_pool < 0)
+               return 0;
+       if (ind != index)
+               return 0;
+
+       return tmem_flush_page(tmem_pool, obj, ind);
+}
+EXPORT_SYMBOL(precache_flush);
+
+int precache_flush_inode(struct address_space *mapping)
+{
+       u32 tmem_pool = mapping->host->i_sb->precache_poolid;
+       u64 obj = (unsigned long) mapping->host->i_ino;
+
+       if ((s32)tmem_pool < 0)
+               return 0;
+
+       return tmem_flush_object(tmem_pool, obj);
+}
+EXPORT_SYMBOL(precache_flush_inode);
+
+int precache_flush_filesystem(struct super_block *sb)
+{
+       u32 tmem_pool = sb->precache_poolid;
+       int ret;
+
+       if ((s32)tmem_pool < 0)
+               return 0;
+       ret = tmem_destroy_pool(tmem_pool);
+       if (!ret)
+               return 0;
+       printk(KERN_INFO
+               "Unmapping superblock for s_id=%s from precache_id=%d\n",
+               sb->s_id, ret);
+       sb->precache_poolid = 0;
+       return 1;
+}
+EXPORT_SYMBOL(precache_flush_filesystem);
+
+void precache_init(struct super_block *sb)
+{
+       struct tmem_pool_uuid uuid_private = TMEM_POOL_PRIVATE_UUID;
+
+       sb->precache_poolid = tmem_new_pool(uuid_private, 0);
+}
+EXPORT_SYMBOL(precache_init);
--- linux-2.6.30-tmem-tmem/mm/Kconfig   2009-07-06 16:36:31.000000000 -0600
+++ linux-2.6.30-tmem-precache/mm/Kconfig       2009-07-06 16:37:05.000000000 
-0600
@@ -263,3 +263,11 @@ config TMEM
          In a virtualized environment, allows unused and underutilized
          system physical memory to be made accessible through a narrow
          well-defined page-copy-based API.
+
+config PRECACHE
+       bool "Cache clean pages in transcendent memory"
+       depends on TMEM
+       help
+         Allows the transcendent memory pool to be used to store clean
+         page-cache pages which, under some circumstances, will greatly
+         reduce paging and thus improve performance.
--- linux-2.6.30-tmem-tmem/mm/Makefile  2009-07-06 16:36:52.000000000 -0600
+++ linux-2.6.30-tmem-precache/mm/Makefile      2009-07-06 16:37:10.000000000 
-0600
@@ -17,6 +17,7 @@ obj-$(CONFIG_PROC_PAGE_MONITOR) += pagew
 obj-$(CONFIG_BOUNCE)   += bounce.o
 obj-$(CONFIG_SWAP)     += page_io.o swap_state.o swapfile.o thrash.o
 obj-$(CONFIG_TMEM)     += tmem.o
+obj-$(CONFIG_PRECACHE) += precache.o
 obj-$(CONFIG_HAS_DMA)  += dmapool.o
 obj-$(CONFIG_HUGETLBFS)        += hugetlb.o
 obj-$(CONFIG_NUMA)     += mempolicy.o

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [RFC PATCH 2/4] (Take 2): tmem: Implement precache on top of tmem layer, Dan Magenheimer <=