# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1243877847 -3600
# Node ID 4294a04b24bc55ffc18215d85a9eda517935b816
# Parent 027f19e97e2852b643c2f05413ba24d8286ff3a5
tmem: shared ephemeral (SE) pool (clustering) fixes
Tmem can share clean page cache pages for Linux domains
in a virtual cluster (currently only the ocfs2 filesystem
has a patch on the Linux side). So when one domain
"puts" (evicts) a page, any domain in the cluster can
"get" it, thus saving disk reads. This functionality
is already present; these are only bug fixes.
- fix bugs when an SE pool is destroyed
- fixes in parsing tool for xm tmem-list output for SE pools
- incorrect locking in one case for destroying an SE pool
- clearer verbosity for transfer when an SE pool is destroyed
- minor cleanup: merge routines that are mostly duplicate
Signed-off-by: Dan Magenheimer <dan.magenheimer@xxxxxxxxxx>
---
tools/misc/xen-tmem-list-parse.c | 24 ++++++++++--
xen/common/tmem.c | 73 ++++++++++++++-------------------------
2 files changed, 47 insertions(+), 50 deletions(-)
diff -r 027f19e97e28 -r 4294a04b24bc tools/misc/xen-tmem-list-parse.c
--- a/tools/misc/xen-tmem-list-parse.c Mon Jun 01 15:52:19 2009 +0100
+++ b/tools/misc/xen-tmem-list-parse.c Mon Jun 01 18:37:27 2009 +0100
@@ -29,6 +29,20 @@ unsigned long long parse(char *s,char *m
return ret;
}
+unsigned long long parse_hex(char *s,char *match)
+{
+ char *s1 = strstr(s,match);
+ unsigned long long ret;
+
+ if ( s1 == NULL )
+ return 0LL;
+ s1 += 2;
+ if ( *s1++ != ':' )
+ return 0LL;
+ sscanf(s1,"%llx",&ret);
+ return ret;
+}
+
unsigned long long parse2(char *s,char *match1, char *match2)
{
char match[3];
@@ -64,7 +78,7 @@ void parse_sharers(char *s, char *match,
s1 += 2;
if (*s1++ != ':')
return;
- while (*s1 <= '0' && *s1 <= '9')
+ while (*s1 >= '0' && *s1 <= '9')
*b++ = *s1++;
*b++ = ',';
s1 = strstr(s1,match);
@@ -196,6 +210,8 @@ void parse_pool(char *s)
unsigned long long flush_objs = parse(s,"ot");
parse_string(s,"PT",pool_type,2);
+ if (pool_type[1] == 'S')
+ return; /* no need to repeat print data for shared pools */
printf("domid%lu,id%lu[%s]:pgp=%llu(max=%llu) obj=%llu(%llu) "
"objnode=%llu(%llu) puts=%llu/%llu/%llu(dup=%llu/%llu) "
"gets=%llu/%llu(%llu%%) "
@@ -216,8 +232,8 @@ void parse_shared_pool(char *s)
char pool_type[3];
char buf[BUFSIZE];
unsigned long pool_id = parse(s,"PI");
- unsigned long long uid0 = parse(s,"U0");
- unsigned long long uid1 = parse(s,"U1");
+ unsigned long long uid0 = parse_hex(s,"U0");
+ unsigned long long uid1 = parse_hex(s,"U1");
unsigned long long pgp_count = parse(s,"Pc");
unsigned long long max_pgp_count = parse(s,"Pm");
unsigned long long obj_count = parse(s,"Oc");
@@ -238,7 +254,7 @@ void parse_shared_pool(char *s)
parse_string(s,"PT",pool_type,2);
parse_sharers(s,"SC",buf,BUFSIZE);
- printf("poolid=%lu[%s] uuid=%llu.%llu, shared-by:%s: "
+ printf("poolid=%lu[%s] uuid=%llx.%llx, shared-by:%s: "
"pgp=%llu(max=%llu) obj=%llu(%llu) "
"objnode=%llu(%llu) puts=%llu/%llu/%llu(dup=%llu/%llu) "
"gets=%llu/%llu(%llu%%) "
diff -r 027f19e97e28 -r 4294a04b24bc xen/common/tmem.c
--- a/xen/common/tmem.c Mon Jun 01 15:52:19 2009 +0100
+++ b/xen/common/tmem.c Mon Jun 01 18:37:27 2009 +0100
@@ -581,21 +581,6 @@ static NOINLINE void obj_free(obj_t *obj
tmem_free(obj,sizeof(obj_t),pool);
}
-static NOINLINE void obj_rb_destroy_node(struct rb_node *node)
-{
- obj_t * obj;
-
- if ( node == NULL )
- return;
- obj_rb_destroy_node(node->rb_left);
- obj_rb_destroy_node(node->rb_right);
- obj = container_of(node, obj_t, rb_tree_node);
- tmem_spin_lock(&obj->obj_spinlock);
- ASSERT(obj->no_evict == 0);
- radix_tree_destroy(&obj->tree_root, pgp_destroy, rtn_free);
- obj_free(obj,1);
-}
-
static NOINLINE int obj_rb_insert(struct rb_root *root, obj_t *obj)
{
struct rb_node **new, *parent = NULL;
@@ -650,26 +635,15 @@ static NOINLINE obj_t * obj_new(pool_t *
}
/* free an object after destroying any pgps in it */
-static NOINLINE void obj_destroy(obj_t *obj)
+static NOINLINE void obj_destroy(obj_t *obj, int no_rebalance)
{
ASSERT_WRITELOCK(&obj->pool->pool_rwlock);
radix_tree_destroy(&obj->tree_root, pgp_destroy, rtn_free);
- obj_free(obj,0);
-}
-
-/* destroy all objects in a pool */
-static NOINLINE void obj_rb_destroy_all(pool_t *pool)
-{
- int i;
-
- tmem_write_lock(&pool->pool_rwlock);
- for (i = 0; i < OBJ_HASH_BUCKETS; i++)
- obj_rb_destroy_node(pool->obj_rb_root[i].rb_node);
- tmem_write_unlock(&pool->pool_rwlock);
-}
-
-/* destroys all objects in a pool that have last_client set to cli_id */
-static void obj_free_selective(pool_t *pool, cli_id_t cli_id)
+ obj_free(obj,no_rebalance);
+}
+
+/* destroys all objs in a pool, or only if obj->last_client matches cli_id */
+static void pool_destroy_objs(pool_t *pool, bool_t selective, cli_id_t cli_id)
{
struct rb_node *node;
obj_t *obj;
@@ -684,8 +658,11 @@ static void obj_free_selective(pool_t *p
obj = container_of(node, obj_t, rb_tree_node);
tmem_spin_lock(&obj->obj_spinlock);
node = rb_next(node);
- if ( obj->last_client == cli_id )
- obj_destroy(obj);
+ ASSERT(obj->no_evict == 0);
+ if ( !selective )
+ obj_destroy(obj,1);
+ else if ( obj->last_client == cli_id )
+ obj_destroy(obj,0);
else
tmem_spin_unlock(&obj->obj_spinlock);
}
@@ -740,8 +717,9 @@ static int shared_pool_join(pool_t *pool
return -1;
sl->client = new_client;
list_add_tail(&sl->share_list, &pool->share_list);
- printk("adding new %s %d to shared pool owned by %s %d\n",
- client_str, new_client->cli_id, client_str, pool->client->cli_id);
+ if ( new_client->cli_id != pool->client->cli_id )
+ printk("adding new %s %d to shared pool owned by %s %d\n",
+ client_str, new_client->cli_id, client_str, pool->client->cli_id);
return ++pool->shared_count;
}
@@ -766,6 +744,10 @@ static NOINLINE void shared_pool_reassig
if (new_client->pools[poolid] == pool)
break;
ASSERT(poolid != MAX_POOLS_PER_DOMAIN);
+ new_client->eph_count += _atomic_read(pool->pgp_count);
+ old_client->eph_count -= _atomic_read(pool->pgp_count);
+ list_splice_init(&old_client->ephemeral_page_list,
+ &new_client->ephemeral_page_list);
printk("reassigned shared pool from %s=%d to %s=%d pool_id=%d\n",
cli_id_str, old_client->cli_id, cli_id_str, new_client->cli_id,
poolid);
pool->pool_id = poolid;
@@ -781,7 +763,8 @@ static NOINLINE int shared_pool_quit(poo
ASSERT(is_shared(pool));
ASSERT(pool->client != NULL);
- obj_free_selective(pool,cli_id);
+ ASSERT_WRITELOCK(&tmem_rwlock);
+ pool_destroy_objs(pool,1,cli_id);
list_for_each_entry(sl,&pool->share_list, share_list)
{
if (sl->client->cli_id != cli_id)
@@ -812,15 +795,15 @@ static void pool_flush(pool_t *pool, cli
ASSERT(pool != NULL);
if ( (is_shared(pool)) && (shared_pool_quit(pool,cli_id) > 0) )
{
- printk("tmem: unshared shared pool %d from %s=%d\n",
- pool->pool_id, cli_id_str,pool->client->cli_id);
+ printk("tmem: %s=%d no longer using shared pool %d owned by %s=%d\n",
+ cli_id_str, cli_id, pool->pool_id, cli_id_str,pool->client->cli_id);
return;
}
printk("%s %s-%s tmem pool ",destroy?"destroying":"flushing",
is_persistent(pool) ? "persistent" : "ephemeral" ,
is_shared(pool) ? "shared" : "private");
printk("%s=%d pool_id=%d\n",
cli_id_str,pool->client->cli_id,pool->pool_id);
- obj_rb_destroy_all(pool);
+ pool_destroy_objs(pool,0,CLI_ID_NULL);
if ( destroy )
{
pool->client->pools[pool->pool_id] = NULL;
@@ -1378,7 +1361,7 @@ static NOINLINE int do_tmem_flush_object
if ( obj == NULL )
goto out;
tmem_write_lock(&pool->pool_rwlock);
- obj_destroy(obj);
+ obj_destroy(obj,0);
pool->flush_objs_found++;
tmem_write_unlock(&pool->pool_rwlock);
@@ -1455,7 +1438,7 @@ static NOINLINE int do_tmem_new_pool(uin
{
if ( shpool->uuid[0] == uuid_lo && shpool->uuid[1] == uuid_hi )
{
- printk("(matches shared pool uuid=%"PRIx64".%"PRIu64") ",
+ printk("(matches shared pool uuid=%"PRIx64".%"PRIx64") ",
uuid_hi, uuid_lo);
printk("pool_id=%d\n",d_poolid);
client->pools[d_poolid] = global_shared_pools[s_poolid];
@@ -1507,10 +1490,8 @@ static int tmemc_freeze_pools(int cli_id
if ( cli_id == CLI_ID_NULL )
{
list_for_each_entry(client,&global_client_list,client_list)
- {
client->frozen = freeze;
- printk("tmem: all pools %s for all %ss\n",s,client_str);
- }
+ printk("tmem: all pools %s for all %ss\n",s,client_str);
}
else
{
@@ -1878,7 +1859,7 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
}
}
- if ( op.cmd == TMEM_NEW_POOL )
+ if ( op.cmd == TMEM_NEW_POOL || op.cmd == TMEM_DESTROY_POOL )
{
if ( !tmem_write_lock_set )
{
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|