Just returned from a week of vacation.
I see these patches have not yet been applied to unstable for
4.0-testing, hypervisor or tools. Did I drop the ball on something?
(sorry if I am suffering from post-vacation amnesia).
Thanks,
Dan
> -----Original Message-----
> From: Dan Magenheimer
> Sent: Friday, September 03, 2010 9:48 AM
> To: Xen-Devel (xen-devel@xxxxxxxxxxxxxxxxxxx)
> Cc: Ian Jackson; Keir Fraser
> Subject: [PATCH] tmem (hypervisor-side): ABI v1 to handle long object-
> ids (XEN-4.0-TESTING and XEN-UNSTABLE)
>
> [PATCH] tmem (hypervisor-side): move to new ABI version to handle
> long object-ids
>
> Please apply patch to both xen-4.0-testing and xen-unstable
> (same patch applies cleanly to both).
>
> (Note to Keir/Ian: These patches should be applied
> together, but I'm not clear on how to submit patches
> that cross MAINTAINERS boundaries as this one does.)
>
> After a great deal of discussion and review with linux
> kernel developers, it appears there are "next-generation"
> filesystems (such as btrfs, xfs, Lustre) that will not
> be able to use tmem due to an ABI limitation... a field
> that represents a unique file identifier is 64-bits in
> the tmem ABI and may need to be as large as 192-bits.
> So to support these guest filesystems, the tmem ABI must be
> revised, from "v0" to "v1".
>
> I *think* it is still the case that tmem is experimental
> and is not used anywhere yet in production.
>
> The tmem ABI is designed to support multiple revisions,
> so the Xen tmem implementation could be updated to
> handle both v0 and v1. However this is a bit
> messy and would require data structures for both v0
> and v1 to appear in public Xen header files.
>
> I am inclined to update the Xen tmem implementation
> to only support v1 and gracefully fail v0. This would
> result in only a performance loss (as if tmem were
> disabled) for newly launched tmem-v0-enabled guests,
> but live-migration between old tmem-v0 Xen and new
> tmem-v1 Xen machines would fail, and saved tmem-v0
> guests will not be able to be restored on a tmem-v1
> Xen machine. I would plan to update both pre-4.0.2
> and unstable (future 4.1) to only support v1.
>
> I believe these restrictions are reasonable at this
> point in the tmem lifecycle, though they may not
> be reasonable in the near future; should the tmem
> ABI need to be revised from v1 to v2, I understand
> backwards compatibility will be required.
>
> Signed-off-by: Dan Magenheimer <dan.magenheimer@xxxxxxxxxx>
>
> diff -r 07ac5459b250 xen/common/tmem.c
> --- a/xen/common/tmem.c Wed Aug 25 09:23:31 2010 +0100
> +++ b/xen/common/tmem.c Thu Sep 02 16:43:33 2010 -0600
> @@ -26,7 +26,7 @@
> #define EXPORT /* indicates code other modules are dependent upon */
> #define FORWARD
>
> -#define TMEM_SPEC_VERSION 0
> +#define TMEM_SPEC_VERSION 1
>
> /************ INTERFACE TO TMEM HOST-DEPENDENT (tmh) CODE
> ************/
>
> @@ -149,14 +149,13 @@ typedef struct share_list sharelist_t;
>
> #define OBJ_HASH_BUCKETS 256 /* must be power of two */
> #define OBJ_HASH_BUCKETS_MASK (OBJ_HASH_BUCKETS-1)
> -#define OBJ_HASH(_oid) (tmh_hash(_oid, BITS_PER_LONG) &
> OBJ_HASH_BUCKETS_MASK)
>
> struct tm_pool {
> bool_t shared;
> bool_t persistent;
> bool_t is_dying;
> int pageshift; /* 0 == 2**12 */
> - struct list_head pool_list; /* FIXME do we need this anymore? */
> + struct list_head pool_list;
> client_t *client;
> uint64_t uuid[2]; /* 0 for private, non-zero for shared */
> uint32_t pool_id;
> @@ -189,9 +188,14 @@ typedef struct tm_pool pool_t;
> #define is_shared(_p) (_p->shared)
> #define is_private(_p) (!(_p->shared))
>
> +struct oid {
> + uint64_t oid[3];
> +};
> +typedef struct oid OID;
> +
> struct tmem_object_root {
> DECL_SENTINEL
> - uint64_t oid;
> + OID oid;
> struct rb_node rb_tree_node; /* protected by pool->pool_rwlock */
> unsigned long objnode_count; /* atomicity depends on obj_spinlock
> */
> long pgp_count; /* atomicity depends on obj_spinlock */
> @@ -217,12 +221,14 @@ struct tmem_page_descriptor {
> struct list_head client_inv_pages;
> };
> union {
> - struct list_head client_eph_pages;
> - struct list_head pool_pers_pages;
> - };
> - union {
> - obj_t *obj;
> - uint64_t inv_oid; /* used for invalid list only */
> + struct {
> + union {
> + struct list_head client_eph_pages;
> + struct list_head pool_pers_pages;
> + };
> + obj_t *obj;
> + } us;
> + OID inv_oid; /* used for invalid list only */
> };
> pagesize_t size; /* 0 == PAGE_SIZE (pfp), -1 == data invalid,
> else compressed data (cdata) */
> @@ -467,9 +473,9 @@ static NOINLINE int pcd_associate(pgp_t
>
> if ( !tmh_dedup_enabled() )
> return 0;
> - ASSERT(pgp->obj != NULL);
> - ASSERT(pgp->obj->pool != NULL);
> - ASSERT(!pgp->obj->pool->persistent);
> + ASSERT(pgp->us.obj != NULL);
> + ASSERT(pgp->us.obj->pool != NULL);
> + ASSERT(!pgp->us.obj->pool->persistent);
> if ( cdata == NULL )
> {
> ASSERT(pgp->pfp != NULL);
> @@ -528,7 +534,7 @@ static NOINLINE int pcd_associate(pgp_t
> /* match! if not compressed, free the no-longer-needed
> page */
> /* but if compressed, data is assumed static so don't
> free! */
> if ( cdata == NULL )
> - tmem_page_free(pgp->obj->pool,pgp->pfp);
> + tmem_page_free(pgp->us.obj->pool,pgp->pfp);
> deduped_puts++;
> goto match;
> }
> @@ -540,7 +546,7 @@ static NOINLINE int pcd_associate(pgp_t
> ret = -ENOMEM;
> goto unlock;
> } else if ( cdata != NULL ) {
> - if ( (pcd->cdata = tmem_malloc_bytes(csize,pgp->obj->pool)) ==
> NULL )
> + if ( (pcd->cdata = tmem_malloc_bytes(csize,pgp->us.obj->pool))
> == NULL )
> {
> tmem_free(pcd,sizeof(pcd_t),NULL);
> ret = -ENOMEM;
> @@ -561,11 +567,11 @@ static NOINLINE int pcd_associate(pgp_t
> pcd->size = 0;
> pcd->tze = NULL;
> } else if ( pfp_size < PAGE_SIZE &&
> - ((pcd->tze = tmem_malloc_bytes(pfp_size,pgp->obj->pool)) !=
> NULL) ) {
> + ((pcd->tze = tmem_malloc_bytes(pfp_size,pgp->us.obj->pool))
> != NULL) ) {
> tmh_tze_copy_from_pfp(pcd->tze,pgp->pfp,pfp_size);
> pcd->size = pfp_size;
> pcd_tot_tze_size += pfp_size;
> - tmem_page_free(pgp->obj->pool,pgp->pfp);
> + tmem_page_free(pgp->us.obj->pool,pgp->pfp);
> } else {
> pcd->pfp = pgp->pfp;
> pcd->size = PAGE_SIZE;
> @@ -602,9 +608,9 @@ static NOINLINE pgp_t *pgp_alloc(obj_t *
> pool = obj->pool;
> if ( (pgp = tmem_malloc(pgp_t, pool)) == NULL )
> return NULL;
> - pgp->obj = obj;
> + pgp->us.obj = obj;
> INIT_LIST_HEAD(&pgp->global_eph_pages);
> - INIT_LIST_HEAD(&pgp->client_eph_pages);
> + INIT_LIST_HEAD(&pgp->us.client_eph_pages);
> pgp->pfp = NULL;
> if ( tmh_dedup_enabled() )
> {
> @@ -642,7 +648,7 @@ static NOINLINE void pgp_free_data(pgp_t
> else if ( pgp_size )
> tmem_free(pgp->cdata,pgp_size,pool);
> else
> - tmem_page_free(pgp->obj->pool,pgp->pfp);
> + tmem_page_free(pgp->us.obj->pool,pgp->pfp);
> if ( pool != NULL && pgp_size )
> {
> pool->client->compressed_pages--;
> @@ -657,18 +663,18 @@ static NOINLINE void pgp_free(pgp_t *pgp
> pool_t *pool = NULL;
>
> ASSERT_SENTINEL(pgp,PGD);
> - ASSERT(pgp->obj != NULL);
> - ASSERT_SENTINEL(pgp->obj,OBJ);
> - ASSERT_SENTINEL(pgp->obj->pool,POOL);
> - ASSERT(pgp->obj->pool->client != NULL);
> + ASSERT(pgp->us.obj != NULL);
> + ASSERT_SENTINEL(pgp->us.obj,OBJ);
> + ASSERT_SENTINEL(pgp->us.obj->pool,POOL);
> + ASSERT(pgp->us.obj->pool->client != NULL);
> if ( from_delete )
> - ASSERT(pgp_lookup_in_obj(pgp->obj,pgp->index) == NULL);
> - ASSERT(pgp->obj->pool != NULL);
> - pool = pgp->obj->pool;
> + ASSERT(pgp_lookup_in_obj(pgp->us.obj,pgp->index) == NULL);
> + ASSERT(pgp->us.obj->pool != NULL);
> + pool = pgp->us.obj->pool;
> if ( is_ephemeral(pool) )
> {
> ASSERT(list_empty(&pgp->global_eph_pages));
> - ASSERT(list_empty(&pgp->client_eph_pages));
> + ASSERT(list_empty(&pgp->us.client_eph_pages));
> }
> pgp_free_data(pgp, pool);
> atomic_dec_and_assert(global_pgp_count);
> @@ -676,12 +682,12 @@ static NOINLINE void pgp_free(pgp_t *pgp
> pgp->size = -1;
> if ( is_persistent(pool) && pool->client->live_migrating )
> {
> - pgp->inv_oid = pgp->obj->oid;
> + pgp->inv_oid = pgp->us.obj->oid;
> pgp->pool_id = pool->pool_id;
> return;
> }
> INVERT_SENTINEL(pgp,PGD);
> - pgp->obj = NULL;
> + pgp->us.obj = NULL;
> pgp->index = -1;
> tmem_free(pgp,sizeof(pgp_t),pool);
> }
> @@ -693,7 +699,7 @@ static NOINLINE void pgp_free_from_inv_l
> ASSERT_SENTINEL(pool,POOL);
> ASSERT_SENTINEL(pgp,PGD);
> INVERT_SENTINEL(pgp,PGD);
> - pgp->obj = NULL;
> + pgp->us.obj = NULL;
> pgp->index = -1;
> tmem_free(pgp,sizeof(pgp_t),pool);
> }
> @@ -704,18 +710,18 @@ static void pgp_delist(pgp_t *pgp, bool_
> client_t *client;
>
> ASSERT(pgp != NULL);
> - ASSERT(pgp->obj != NULL);
> - ASSERT(pgp->obj->pool != NULL);
> - client = pgp->obj->pool->client;
> + ASSERT(pgp->us.obj != NULL);
> + ASSERT(pgp->us.obj->pool != NULL);
> + client = pgp->us.obj->pool->client;
> ASSERT(client != NULL);
> - if ( is_ephemeral(pgp->obj->pool) )
> + if ( is_ephemeral(pgp->us.obj->pool) )
> {
> if ( !no_eph_lock )
> tmem_spin_lock(&eph_lists_spinlock);
> - if ( !list_empty(&pgp->client_eph_pages) )
> + if ( !list_empty(&pgp->us.client_eph_pages) )
> client->eph_count--;
> ASSERT(client->eph_count >= 0);
> - list_del_init(&pgp->client_eph_pages);
> + list_del_init(&pgp->us.client_eph_pages);
> if ( !list_empty(&pgp->global_eph_pages) )
> global_eph_count--;
> ASSERT(global_eph_count >= 0);
> @@ -728,12 +734,12 @@ static void pgp_delist(pgp_t *pgp, bool_
> tmem_spin_lock(&pers_lists_spinlock);
> list_add_tail(&pgp->client_inv_pages,
> &client->persistent_invalidated_list);
> - if ( pgp != pgp->obj->pool->cur_pgp )
> - list_del_init(&pgp->pool_pers_pages);
> + if ( pgp != pgp->us.obj->pool->cur_pgp )
> + list_del_init(&pgp->us.pool_pers_pages);
> tmem_spin_unlock(&pers_lists_spinlock);
> } else {
> tmem_spin_lock(&pers_lists_spinlock);
> - list_del_init(&pgp->pool_pers_pages);
> + list_del_init(&pgp->us.pool_pers_pages);
> tmem_spin_unlock(&pers_lists_spinlock);
> }
> }
> @@ -745,10 +751,10 @@ static NOINLINE void pgp_delete(pgp_t *p
> uint64_t life;
>
> ASSERT(pgp != NULL);
> - ASSERT(pgp->obj != NULL);
> - ASSERT(pgp->obj->pool != NULL);
> + ASSERT(pgp->us.obj != NULL);
> + ASSERT(pgp->us.obj->pool != NULL);
> life = get_cycles() - pgp->timestamp;
> - pgp->obj->pool->sum_life_cycles += life;
> + pgp->us.obj->pool->sum_life_cycles += life;
> pgp_delist(pgp, no_eph_lock);
> pgp_free(pgp,1);
> }
> @@ -758,11 +764,11 @@ static NOINLINE void pgp_destroy(void *v
> {
> pgp_t *pgp = (pgp_t *)v;
>
> - ASSERT_SPINLOCK(&pgp->obj->obj_spinlock);
> + ASSERT_SPINLOCK(&pgp->us.obj->obj_spinlock);
> pgp_delist(pgp,0);
> - ASSERT(pgp->obj != NULL);
> - pgp->obj->pgp_count--;
> - ASSERT(pgp->obj->pgp_count >= 0);
> + ASSERT(pgp->us.obj != NULL);
> + pgp->us.obj->pgp_count--;
> + ASSERT(pgp->us.obj->pgp_count >= 0);
> pgp_free(pgp,0);
> }
>
> @@ -849,37 +855,74 @@ static void rtn_free(rtn_t *rtn)
>
> /************ POOL OBJECT COLLECTION MANIPULATION ROUTINES
> *******************/
>
> +int oid_compare(OID *left, OID *right)
> +{
> + if ( left->oid[2] == right->oid[2] )
> + {
> + if ( left->oid[1] == right->oid[1] )
> + {
> + if ( left->oid[0] == right->oid[0] )
> + return 0;
> + else if ( left->oid[0] < left->oid[0] )
> + return -1;
> + else
> + return 1;
> + }
> + else if ( left->oid[1] < left->oid[1] )
> + return -1;
> + else
> + return 1;
> + }
> + else if ( left->oid[2] < left->oid[2] )
> + return -1;
> + else
> + return 1;
> +}
> +
> +void oid_set_invalid(OID *oidp)
> +{
> + oidp->oid[0] = oidp->oid[1] = oidp->oid[2] = -1UL;
> +}
> +
> +unsigned oid_hash(OID *oidp)
> +{
> + return (tmh_hash(oidp->oid[0] ^ oidp->oid[1] ^ oidp->oid[2],
> + BITS_PER_LONG) & OBJ_HASH_BUCKETS_MASK);
> +}
> +
> /* searches for object==oid in pool, returns locked object if found */
> -static NOINLINE obj_t * obj_find(pool_t *pool, uint64_t oid)
> +static NOINLINE obj_t * obj_find(pool_t *pool, OID *oidp)
> {
> struct rb_node *node;
> obj_t *obj;
>
> restart_find:
> tmem_read_lock(&pool->pool_rwlock);
> - node = pool->obj_rb_root[OBJ_HASH(oid)].rb_node;
> + node = pool->obj_rb_root[oid_hash(oidp)].rb_node;
> while ( node )
> {
> obj = container_of(node, obj_t, rb_tree_node);
> - if ( obj->oid == oid )
> + switch ( oid_compare(&obj->oid, oidp) )
> {
> - if ( tmh_lock_all )
> - obj->no_evict = 1;
> - else
> - {
> - if ( !tmem_spin_trylock(&obj->obj_spinlock) )
> + case 0: /* equal */
> + if ( tmh_lock_all )
> + obj->no_evict = 1;
> + else
> {
> + if ( !tmem_spin_trylock(&obj->obj_spinlock) )
> + {
> + tmem_read_unlock(&pool->pool_rwlock);
> + goto restart_find;
> + }
> tmem_read_unlock(&pool->pool_rwlock);
> - goto restart_find;
> }
> - tmem_read_unlock(&pool->pool_rwlock);
> - }
> - return obj;
> + return obj;
> + case -1:
> + node = node->rb_left;
> + break;
> + case 1:
> + node = node->rb_right;
> }
> - else if ( oid < obj->oid )
> - node = node->rb_left;
> - else
> - node = node->rb_right;
> }
> tmem_read_unlock(&pool->pool_rwlock);
> return NULL;
> @@ -889,7 +932,7 @@ static NOINLINE void obj_free(obj_t *obj
> static NOINLINE void obj_free(obj_t *obj, int no_rebalance)
> {
> pool_t *pool;
> - uint64_t old_oid;
> + OID old_oid;
>
> ASSERT_SPINLOCK(&obj->obj_spinlock);
> ASSERT(obj != NULL);
> @@ -908,12 +951,12 @@ static NOINLINE void obj_free(obj_t *obj
> INVERT_SENTINEL(obj,OBJ);
> obj->pool = NULL;
> old_oid = obj->oid;
> - obj->oid = -1;
> + oid_set_invalid(&obj->oid);
> obj->last_client = CLI_ID_NULL;
> atomic_dec_and_assert(global_obj_count);
> /* use no_rebalance only if all objects are being destroyed anyway
> */
> if ( !no_rebalance )
> - rb_erase(&obj->rb_tree_node,&pool-
> >obj_rb_root[OBJ_HASH(old_oid)]);
> + rb_erase(&obj->rb_tree_node,&pool-
> >obj_rb_root[oid_hash(&old_oid)]);
> tmem_free(obj,sizeof(obj_t),pool);
> }
>
> @@ -927,12 +970,17 @@ static NOINLINE int obj_rb_insert(struct
> {
> this = container_of(*new, obj_t, rb_tree_node);
> parent = *new;
> - if ( obj->oid < this->oid )
> - new = &((*new)->rb_left);
> - else if ( obj->oid > this->oid )
> - new = &((*new)->rb_right);
> - else
> - return 0;
> + switch ( oid_compare(&obj->oid, &this->oid) )
> + {
> + case 0:
> + return 0;
> + case -1:
> + new = &((*new)->rb_left);
> + break;
> + case 1:
> + new = &((*new)->rb_right);
> + break;
> + }
> }
> rb_link_node(&obj->rb_tree_node, parent, new);
> rb_insert_color(&obj->rb_tree_node, root);
> @@ -943,7 +991,7 @@ static NOINLINE int obj_rb_insert(struct
> * allocate, initialize, and insert an tmem_object_root
> * (should be called only if find failed)
> */
> -static NOINLINE obj_t * obj_new(pool_t *pool, uint64_t oid)
> +static NOINLINE obj_t * obj_new(pool_t *pool, OID *oidp)
> {
> obj_t *obj;
>
> @@ -958,13 +1006,13 @@ static NOINLINE obj_t * obj_new(pool_t *
> INIT_RADIX_TREE(&obj->tree_root,0);
> spin_lock_init(&obj->obj_spinlock);
> obj->pool = pool;
> - obj->oid = oid;
> + obj->oid = *oidp;
> obj->objnode_count = 0;
> obj->pgp_count = 0;
> obj->last_client = CLI_ID_NULL;
> SET_SENTINEL(obj,OBJ);
> tmem_spin_lock(&obj->obj_spinlock);
> - obj_rb_insert(&pool->obj_rb_root[OBJ_HASH(oid)], obj);
> + obj_rb_insert(&pool->obj_rb_root[oid_hash(oidp)], obj);
> obj->no_evict = 1;
> ASSERT_SPINLOCK(&obj->obj_spinlock);
> return obj;
> @@ -1256,7 +1304,7 @@ static void client_freeze(client_t *clie
>
> static bool_t tmem_try_to_evict_pgp(pgp_t *pgp, bool_t
> *hold_pool_rwlock)
> {
> - obj_t *obj = pgp->obj;
> + obj_t *obj = pgp->us.obj;
> pool_t *pool = obj->pool;
> client_t *client = pool->client;
> uint16_t firstbyte = pgp->firstbyte;
> @@ -1280,8 +1328,8 @@ static bool_t tmem_try_to_evict_pgp(pgp_
> pgp->eviction_attempted++;
> list_del(&pgp->global_eph_pages);
> list_add_tail(&pgp-
> >global_eph_pages,&global_ephemeral_page_list);
> - list_del(&pgp->client_eph_pages);
> - list_add_tail(&pgp->client_eph_pages,&client-
> >ephemeral_page_list);
> + list_del(&pgp->us.client_eph_pages);
> + list_add_tail(&pgp->us.client_eph_pages,&client-
> >ephemeral_page_list);
> goto pcd_unlock;
> }
> }
> @@ -1314,7 +1362,7 @@ static int tmem_evict(void)
> if ( (client != NULL) && client_over_quota(client) &&
> !list_empty(&client->ephemeral_page_list) )
> {
> - list_for_each_entry_safe(pgp,pgp2,&client-
> >ephemeral_page_list,client_eph_pages)
> + list_for_each_entry_safe(pgp,pgp2,&client-
> >ephemeral_page_list,us.client_eph_pages)
> if ( tmem_try_to_evict_pgp(pgp,&hold_pool_rwlock) )
> goto found;
> } else if ( list_empty(&global_ephemeral_page_list) ) {
> @@ -1331,7 +1379,7 @@ found:
> found:
> ASSERT(pgp != NULL);
> ASSERT_SENTINEL(pgp,PGD);
> - obj = pgp->obj;
> + obj = pgp->us.obj;
> ASSERT(obj != NULL);
> ASSERT(obj->no_evict == 0);
> ASSERT(obj->pool != NULL);
> @@ -1407,16 +1455,16 @@ static NOINLINE int do_tmem_put_compress
> DECL_LOCAL_CYC_COUNTER(compress);
>
> ASSERT(pgp != NULL);
> - ASSERT(pgp->obj != NULL);
> - ASSERT_SPINLOCK(&pgp->obj->obj_spinlock);
> - ASSERT(pgp->obj->pool != NULL);
> - ASSERT(pgp->obj->pool->client != NULL);
> + ASSERT(pgp->us.obj != NULL);
> + ASSERT_SPINLOCK(&pgp->us.obj->obj_spinlock);
> + ASSERT(pgp->us.obj->pool != NULL);
> + ASSERT(pgp->us.obj->pool->client != NULL);
> #ifdef __i386__
> return -ENOMEM;
> #endif
>
> if ( pgp->pfp != NULL )
> - pgp_free_data(pgp, pgp->obj->pool);
> + pgp_free_data(pgp, pgp->us.obj->pool);
> START_CYC_COUNTER(compress);
> ret = tmh_compress_from_client(cmfn, &dst, &size, cva);
> if ( (ret == -EFAULT) || (ret == 0) )
> @@ -1424,10 +1472,10 @@ static NOINLINE int do_tmem_put_compress
> else if ( (size == 0) || (size >= tmem_subpage_maxsize()) ) {
> ret = 0;
> goto out;
> - } else if ( tmh_dedup_enabled() && !is_persistent(pgp->obj->pool)
> ) {
> + } else if ( tmh_dedup_enabled() && !is_persistent(pgp->us.obj-
> >pool) ) {
> if ( (ret = pcd_associate(pgp,dst,size)) == -ENOMEM )
> goto out;
> - } else if ( (p = tmem_malloc_bytes(size,pgp->obj->pool)) == NULL )
> {
> + } else if ( (p = tmem_malloc_bytes(size,pgp->us.obj->pool)) ==
> NULL ) {
> ret = -ENOMEM;
> goto out;
> } else {
> @@ -1435,8 +1483,8 @@ static NOINLINE int do_tmem_put_compress
> pgp->cdata = p;
> }
> pgp->size = size;
> - pgp->obj->pool->client->compressed_pages++;
> - pgp->obj->pool->client->compressed_sum_size += size;
> + pgp->us.obj->pool->client->compressed_pages++;
> + pgp->us.obj->pool->client->compressed_sum_size += size;
> ret = 1;
>
> out:
> @@ -1456,7 +1504,7 @@ static NOINLINE int do_tmem_dup_put(pgp_
> ASSERT(pgp != NULL);
> ASSERT(pgp->pfp != NULL);
> ASSERT(pgp->size != -1);
> - obj = pgp->obj;
> + obj = pgp->us.obj;
> ASSERT_SPINLOCK(&obj->obj_spinlock);
> ASSERT(obj != NULL);
> pool = obj->pool;
> @@ -1535,7 +1583,7 @@ cleanup:
>
>
> static NOINLINE int do_tmem_put(pool_t *pool,
> - uint64_t oid, uint32_t index,
> + OID *oidp, uint32_t index,
> tmem_cli_mfn_t cmfn, pagesize_t tmem_offset,
> pagesize_t pfn_offset, pagesize_t len, void *cva)
> {
> @@ -1547,7 +1595,7 @@ static NOINLINE int do_tmem_put(pool_t *
> ASSERT(pool != NULL);
> pool->puts++;
> /* does page already exist (dup)? if so, handle specially */
> - if ( (obj = objfound = obj_find(pool,oid)) != NULL )
> + if ( (obj = objfound = obj_find(pool,oidp)) != NULL )
> {
> ASSERT_SPINLOCK(&objfound->obj_spinlock);
> if ((pgp = pgp_lookup_in_obj(objfound, index)) != NULL)
> @@ -1561,7 +1609,7 @@ static NOINLINE int do_tmem_put(pool_t *
> if ( (objfound == NULL) )
> {
> tmem_write_lock(&pool->pool_rwlock);
> - if ( (obj = objnew = obj_new(pool,oid)) == NULL )
> + if ( (obj = objnew = obj_new(pool,oidp)) == NULL )
> {
> tmem_write_unlock(&pool->pool_rwlock);
> return -ENOMEM;
> @@ -1627,14 +1675,14 @@ insert_page:
> &global_ephemeral_page_list);
> if (++global_eph_count > global_eph_count_max)
> global_eph_count_max = global_eph_count;
> - list_add_tail(&pgp->client_eph_pages,
> + list_add_tail(&pgp->us.client_eph_pages,
> &client->ephemeral_page_list);
> if (++client->eph_count > client->eph_count_max)
> client->eph_count_max = client->eph_count;
> tmem_spin_unlock(&eph_lists_spinlock);
> } else { /* is_persistent */
> tmem_spin_lock(&pers_lists_spinlock);
> - list_add_tail(&pgp->pool_pers_pages,
> + list_add_tail(&pgp->us.pool_pers_pages,
> &pool->persistent_page_list);
> tmem_spin_unlock(&pers_lists_spinlock);
> }
> @@ -1678,7 +1726,7 @@ free:
> return ret;
> }
>
> -static NOINLINE int do_tmem_get(pool_t *pool, uint64_t oid, uint32_t
> index,
> +static NOINLINE int do_tmem_get(pool_t *pool, OID *oidp, uint32_t
> index,
> tmem_cli_mfn_t cmfn, pagesize_t tmem_offset,
> pagesize_t pfn_offset, pagesize_t len, void *cva)
> {
> @@ -1691,7 +1739,7 @@ static NOINLINE int do_tmem_get(pool_t *
> return -EEMPTY;
>
> pool->gets++;
> - obj = obj_find(pool,oid);
> + obj = obj_find(pool,oidp);
> if ( obj == NULL )
> return 0;
>
> @@ -1737,8 +1785,8 @@ static NOINLINE int do_tmem_get(pool_t *
> tmem_spin_lock(&eph_lists_spinlock);
> list_del(&pgp->global_eph_pages);
> list_add_tail(&pgp-
> >global_eph_pages,&global_ephemeral_page_list);
> - list_del(&pgp->client_eph_pages);
> - list_add_tail(&pgp->client_eph_pages,&client-
> >ephemeral_page_list);
> + list_del(&pgp->us.client_eph_pages);
> + list_add_tail(&pgp->us.client_eph_pages,&client-
> >ephemeral_page_list);
> tmem_spin_unlock(&eph_lists_spinlock);
> ASSERT(obj != NULL);
> obj->last_client = tmh_get_cli_id_from_current();
> @@ -1763,13 +1811,13 @@ bad_copy:
>
> }
>
> -static NOINLINE int do_tmem_flush_page(pool_t *pool, uint64_t oid,
> uint32_t index)
> +static NOINLINE int do_tmem_flush_page(pool_t *pool, OID *oidp,
> uint32_t index)
> {
> obj_t *obj;
> pgp_t *pgp;
>
> pool->flushs++;
> - obj = obj_find(pool,oid);
> + obj = obj_find(pool,oidp);
> if ( obj == NULL )
> goto out;
> pgp = pgp_delete_from_obj(obj, index);
> @@ -1798,12 +1846,12 @@ out:
> return 1;
> }
>
> -static NOINLINE int do_tmem_flush_object(pool_t *pool, uint64_t oid)
> +static NOINLINE int do_tmem_flush_object(pool_t *pool, OID *oidp)
> {
> obj_t *obj;
>
> pool->flush_objs++;
> - obj = obj_find(pool,oid);
> + obj = obj_find(pool,oidp);
> if ( obj == NULL )
> goto out;
> tmem_write_lock(&pool->pool_rwlock);
> @@ -1863,6 +1911,16 @@ static NOINLINE int do_tmem_new_pool(cli
> if ( pagebits != (PAGE_SHIFT - 12) )
> {
> printk("failed... unsupported pagesize
> %d\n",1<<(pagebits+12));
> + return -EPERM;
> + }
> + if ( flags & TMEM_POOL_PRECOMPRESSED )
> + {
> + printk("failed... precompression flag set but unsupported\n");
> + return -EPERM;
> + }
> + if ( flags & TMEM_POOL_RESERVED_BITS )
> + {
> + printk("failed... reserved bits must be zero\n");
> return -EPERM;
> }
> if ( (pool = pool_alloc()) == NULL )
> @@ -2369,6 +2427,7 @@ static NOINLINE int tmemc_save_get_next_
> pool_t *pool = (client == NULL || pool_id >= MAX_POOLS_PER_DOMAIN)
> ? NULL : client->pools[pool_id];
> pgp_t *pgp;
> + OID oid;
> int ret = 0;
> struct tmem_handle *h;
> unsigned int pagesize = 1 << (pool->pageshift+12);
> @@ -2389,22 +2448,23 @@ static NOINLINE int tmemc_save_get_next_
> {
> /* process the first one */
> pool->cur_pgp = pgp = list_entry((&pool-
> >persistent_page_list)->next,
> - pgp_t,pool_pers_pages);
> - } else if ( list_is_last(&pool->cur_pgp->pool_pers_pages,
> + pgp_t,us.pool_pers_pages);
> + } else if ( list_is_last(&pool->cur_pgp->us.pool_pers_pages,
> &pool->persistent_page_list) )
> {
> /* already processed the last one in the list */
> ret = -1;
> goto out;
> }
> - pgp = list_entry((&pool->cur_pgp->pool_pers_pages)->next,
> - pgp_t,pool_pers_pages);
> + pgp = list_entry((&pool->cur_pgp->us.pool_pers_pages)->next,
> + pgp_t,us.pool_pers_pages);
> pool->cur_pgp = pgp;
> + oid = pgp->us.obj->oid;
> h = (struct tmem_handle *)buf.p;
> - h->oid = pgp->obj->oid;
> + *(OID *)&h->oid[0] = oid;
> h->index = pgp->index;
> buf.p = (void *)(h+1);
> - ret = do_tmem_get(pool, h->oid, h->index,0,0,0,pagesize,buf.p);
> + ret = do_tmem_get(pool, &oid, h->index,0,0,0,pagesize,buf.p);
>
> out:
> tmem_spin_unlock(&pers_lists_spinlock);
> @@ -2444,7 +2504,7 @@ static NOINLINE int tmemc_save_get_next_
> }
> h = (struct tmem_handle *)buf.p;
> h->pool_id = pgp->pool_id;
> - h->oid = pgp->inv_oid;
> + *(OID *)&h->oid = pgp->inv_oid;
> h->index = pgp->index;
> ret = 1;
> out:
> @@ -2452,7 +2512,7 @@ out:
> return ret;
> }
>
> -static int tmemc_restore_put_page(int cli_id, int pool_id, uint64_t
> oid,
> +static int tmemc_restore_put_page(int cli_id, int pool_id, OID *oidp,
> uint32_t index, tmem_cli_va_t buf, uint32_t
> bufsize)
> {
> client_t *client = tmh_client_from_cli_id(cli_id);
> @@ -2461,10 +2521,10 @@ static int tmemc_restore_put_page(int cl
>
> if ( pool == NULL )
> return -1;
> - return do_tmem_put(pool,oid,index,0,0,0,bufsize,buf.p);
> + return do_tmem_put(pool,oidp,index,0,0,0,bufsize,buf.p);
> }
>
> -static int tmemc_restore_flush_page(int cli_id, int pool_id, uint64_t
> oid,
> +static int tmemc_restore_flush_page(int cli_id, int pool_id, OID
> *oidp,
> uint32_t index)
> {
> client_t *client = tmh_client_from_cli_id(cli_id);
> @@ -2473,7 +2533,7 @@ static int tmemc_restore_flush_page(int
>
> if ( pool == NULL )
> return -1;
> - return do_tmem_flush_page(pool,oid,index);
> + return do_tmem_flush_page(pool,oidp,index);
> }
>
> static NOINLINE int do_tmem_control(struct tmem_op *op)
> @@ -2481,6 +2541,7 @@ static NOINLINE int do_tmem_control(stru
> int ret;
> uint32_t pool_id = op->pool_id;
> uint32_t subop = op->u.ctrl.subop;
> + OID *oidp = (OID *)(&op->u.ctrl.oid[0]);
>
> if (!tmh_current_is_privileged())
> {
> @@ -2533,12 +2594,12 @@ static NOINLINE int do_tmem_control(stru
> break;
> case TMEMC_RESTORE_PUT_PAGE:
> ret = tmemc_restore_put_page(op->u.ctrl.cli_id,pool_id,
> - op->u.ctrl.arg3, op->u.ctrl.arg2,
> + oidp, op->u.ctrl.arg2,
> op->u.ctrl.buf, op->u.ctrl.arg1);
> break;
> case TMEMC_RESTORE_FLUSH_PAGE:
> ret = tmemc_restore_flush_page(op->u.ctrl.cli_id,pool_id,
> - op->u.ctrl.arg3, op-
> >u.ctrl.arg2);
> + oidp, op->u.ctrl.arg2);
> break;
> default:
> ret = -1;
> @@ -2553,6 +2614,7 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
> struct tmem_op op;
> client_t *client = tmh_client_from_current();
> pool_t *pool = NULL;
> + OID *oidp;
> int rc = 0;
> bool_t succ_get = 0, succ_put = 0;
> bool_t non_succ_get = 0, non_succ_put = 0;
> @@ -2656,6 +2718,7 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
> ASSERT_SENTINEL(pool,POOL);
> }
>
> + oidp = (OID *)&op.u.gen.oid[0];
> switch ( op.cmd )
> {
> case TMEM_NEW_POOL:
> @@ -2664,28 +2727,28 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
> break;
> case TMEM_NEW_PAGE:
> tmem_ensure_avail_pages();
> - rc = do_tmem_put(pool, op.u.gen.object,
> + rc = do_tmem_put(pool, oidp,
> op.u.gen.index, op.u.gen.cmfn, 0, 0, 0,
> NULL);
> break;
> case TMEM_PUT_PAGE:
> tmem_ensure_avail_pages();
> - rc = do_tmem_put(pool, op.u.gen.object,
> + rc = do_tmem_put(pool, oidp,
> op.u.gen.index, op.u.gen.cmfn, 0, 0, PAGE_SIZE,
> NULL);
> if (rc == 1) succ_put = 1;
> else non_succ_put = 1;
> break;
> case TMEM_GET_PAGE:
> - rc = do_tmem_get(pool, op.u.gen.object, op.u.gen.index,
> op.u.gen.cmfn,
> + rc = do_tmem_get(pool, oidp, op.u.gen.index, op.u.gen.cmfn,
> 0, 0, PAGE_SIZE, 0);
> if (rc == 1) succ_get = 1;
> else non_succ_get = 1;
> break;
> case TMEM_FLUSH_PAGE:
> flush = 1;
> - rc = do_tmem_flush_page(pool, op.u.gen.object,
> op.u.gen.index);
> + rc = do_tmem_flush_page(pool, oidp, op.u.gen.index);
> break;
> case TMEM_FLUSH_OBJECT:
> - rc = do_tmem_flush_object(pool, op.u.gen.object);
> + rc = do_tmem_flush_object(pool, oidp);
> flush_obj = 1;
> break;
> case TMEM_DESTROY_POOL:
> @@ -2693,12 +2756,12 @@ EXPORT long do_tmem_op(tmem_cli_op_t uop
> rc = do_tmem_destroy_pool(op.pool_id);
> break;
> case TMEM_READ:
> - rc = do_tmem_get(pool, op.u.gen.object, op.u.gen.index,
> op.u.gen.cmfn,
> + rc = do_tmem_get(pool, oidp, op.u.gen.index, op.u.gen.cmfn,
> op.u.gen.tmem_offset, op.u.gen.pfn_offset,
> op.u.gen.len,0);
> break;
> case TMEM_WRITE:
> - rc = do_tmem_put(pool, op.u.gen.object,
> + rc = do_tmem_put(pool, oidp,
> op.u.gen.index, op.u.gen.cmfn,
> op.u.gen.tmem_offset, op.u.gen.pfn_offset,
> op.u.gen.len, NULL);
> diff -r 07ac5459b250 xen/include/public/tmem.h
> --- a/xen/include/public/tmem.h Wed Aug 25 09:23:31 2010 +0100
> +++ b/xen/include/public/tmem.h Thu Sep 02 16:43:33 2010 -0600
> @@ -28,6 +28,9 @@
> #define __XEN_PUBLIC_TMEM_H__
>
> #include "xen.h"
> +
> +/* version of ABI */
> +#define TMEM_SPEC_VERSION 1
>
> /* Commands to HYPERVISOR_tmem_op() */
> #define TMEM_CONTROL 0
> @@ -75,10 +78,12 @@
> /* Bits for HYPERVISOR_tmem_op(TMEM_NEW_POOL) */
> #define TMEM_POOL_PERSIST 1
> #define TMEM_POOL_SHARED 2
> +#define TMEM_POOL_PRECOMPRESSED 4
> #define TMEM_POOL_PAGESIZE_SHIFT 4
> #define TMEM_POOL_PAGESIZE_MASK 0xf
> #define TMEM_POOL_VERSION_SHIFT 24
> #define TMEM_POOL_VERSION_MASK 0xff
> +#define TMEM_POOL_RESERVED_BITS 0x00ffff00
>
> /* Bits for client flags (save/restore) */
> #define TMEM_CLIENT_COMPRESS 1
> @@ -106,12 +111,12 @@ struct tmem_op {
> uint32_t cli_id;
> uint32_t arg1;
> uint32_t arg2;
> - uint64_t arg3;
> + uint64_t oid[3];
> tmem_cli_va_t buf;
> } ctrl; /* for cmd == TMEM_CONTROL */
> struct {
>
> - uint64_t object;
> + uint64_t oid[3];
> uint32_t index;
> uint32_t tmem_offset;
> uint32_t pfn_offset;
> @@ -126,9 +131,8 @@ struct tmem_handle {
> struct tmem_handle {
> uint32_t pool_id;
> uint32_t index;
> - uint64_t oid;
> + uint64_t oid[3];
> };
> -
> #endif
>
> #endif /* __XEN_PUBLIC_TMEM_H__ */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|