Signed-off-by: Jose Renato Santos # HG changeset patch # User jsantos@hpl.hp.com # Date 1170205232 28800 # Node ID 29b0c275dd3cc421bbc62480ea262e9e2565bd10 # Parent 5e8da0663ff863c9de8948b72399e591f34828a2 Use RCU for domain_list and domain_hash diff -r 5e8da0663ff8 -r 29b0c275dd3c xen/acm/acm_chinesewall_hooks.c --- a/xen/acm/acm_chinesewall_hooks.c Tue Jan 30 01:23:58 2007 +0000 +++ b/xen/acm/acm_chinesewall_hooks.c Tue Jan 30 17:00:32 2007 -0800 @@ -196,7 +196,7 @@ chwall_init_state(struct acm_chwall_poli ssidref_t chwall_ssidref; struct domain **pd; - write_lock(&domlist_lock); + spin_lock(&domlist_lock); /* go through all domains and adjust policy as if this domain was started now */ pd = &domain_list; for (pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list) @@ -247,7 +247,7 @@ chwall_init_state(struct acm_chwall_poli } } out: - write_unlock(&domlist_lock); + spin_unlock(&domlist_lock); return violation; /* returning "violation != 0" means that the currently running set of domains would * not be possible if the new policy had been enforced before starting them; for chinese diff -r 5e8da0663ff8 -r 29b0c275dd3c xen/acm/acm_simple_type_enforcement_hooks.c --- a/xen/acm/acm_simple_type_enforcement_hooks.c Tue Jan 30 01:23:58 2007 +0000 +++ b/xen/acm/acm_simple_type_enforcement_hooks.c Tue Jan 30 17:00:32 2007 -0800 @@ -175,36 +175,38 @@ ste_init_state(struct acm_ste_policy_buf int violation = 1; struct ste_ssid *ste_ssid, *ste_rssid; ssidref_t ste_ssidref, ste_rssidref; - struct domain **pd, *rdom; + struct domain *d, *rdom; domid_t rdomid; grant_entry_t sha_copy; int port, i; - read_lock(&domlist_lock); /* go by domain? or directly by global? event/grant list */ + rcu_read_lock(); /* domain_list */ + /* go by domain? or directly by global? event/grant list */ /* go through all domains and adjust policy as if this domain was started now */ - pd = &domain_list; - for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) { + for ( d = rcu_dereference(domain_list); + d != NULL; + d = rcu_dereference(d->next_in_list) ) { ste_ssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, - (struct acm_ssid_domain *)(*pd)->ssid); + (struct acm_ssid_domain *)d->ssid); ste_ssidref = ste_ssid->ste_ssidref; traceprintk("%s: validating policy for eventch domain %x (ste-Ref=%x).\n", - __func__, (*pd)->domain_id, ste_ssidref); + __func__, d->domain_id, ste_ssidref); /* a) check for event channel conflicts */ for (port=0; port < NR_EVTCHN_BUCKETS; port++) { - spin_lock(&(*pd)->evtchn_lock); - if ((*pd)->evtchn[port] == NULL) { - spin_unlock(&(*pd)->evtchn_lock); + spin_lock(&d->evtchn_lock); + if (d->evtchn[port] == NULL) { + spin_unlock(&d->evtchn_lock); continue; } - if ((*pd)->evtchn[port]->state == ECS_INTERDOMAIN) { - rdom = (*pd)->evtchn[port]->u.interdomain.remote_dom; + if (d->evtchn[port]->state == ECS_INTERDOMAIN) { + rdom = d->evtchn[port]->u.interdomain.remote_dom; rdomid = rdom->domain_id; /* rdom now has remote domain */ ste_rssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, (struct acm_ssid_domain *)(rdom->ssid)); ste_rssidref = ste_rssid->ste_ssidref; - } else if ((*pd)->evtchn[port]->state == ECS_UNBOUND) { - rdomid = (*pd)->evtchn[port]->u.unbound.remote_domid; + } else if (d->evtchn[port]->state == ECS_UNBOUND) { + rdomid = (d->evtchn[port]->u.unbound.remote_domid; if ((rdom = get_domain_by_id(rdomid)) == NULL) { printk("%s: Error finding domain to id %x!\n", __func__, rdomid); goto out; @@ -215,34 +217,34 @@ ste_init_state(struct acm_ste_policy_buf ste_rssidref = ste_rssid->ste_ssidref; put_domain(rdom); } else { - spin_unlock(&(*pd)->evtchn_lock); + spin_unlock(&d->evtchn_lock); continue; /* port unused */ } - spin_unlock(&(*pd)->evtchn_lock); + spin_unlock(&d->evtchn_lock); /* rdom now has remote domain */ ste_rssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, (struct acm_ssid_domain *)(rdom->ssid)); ste_rssidref = ste_rssid->ste_ssidref; traceprintk("%s: eventch: domain %x (ssidref %x) --> domain %x (rssidref %x) used (port %x).\n", - __func__, (*pd)->domain_id, ste_ssidref, rdom->domain_id, ste_rssidref, port); + __func__, d->domain_id, ste_ssidref, rdom->domain_id, ste_rssidref, port); /* check whether on subj->ssid, obj->ssid share a common type*/ if (!have_common_type(ste_ssidref, ste_rssidref)) { printkd("%s: Policy violation in event channel domain %x -> domain %x.\n", - __func__, (*pd)->domain_id, rdomid); + __func__, d->domain_id, rdomid); goto out; } } /* b) check for grant table conflicts on shared pages */ - if ((*pd)->grant_table->shared == NULL) { - printkd("%s: Grant ... sharing for domain %x not setup!\n", __func__, (*pd)->domain_id); + if (d->grant_table->shared == NULL) { + printkd("%s: Grant ... sharing for domain %x not setup!\n", __func__, (d->domain_id); continue; } for ( i = 0; i < NR_GRANT_ENTRIES; i++ ) { - sha_copy = (*pd)->grant_table->shared[i]; + sha_copy = (d->grant_table->shared[i]; if ( sha_copy.flags ) { printkd("%s: grant dom (%hu) SHARED (%d) flags:(%hx) dom:(%hu) frame:(%lx)\n", - __func__, (*pd)->domain_id, i, sha_copy.flags, sha_copy.domid, + __func__, (d->domain_id, i, sha_copy.flags, sha_copy.domid, (unsigned long)sha_copy.frame); rdomid = sha_copy.domid; if ((rdom = get_domain_by_id(rdomid)) == NULL) { @@ -256,7 +258,7 @@ ste_init_state(struct acm_ste_policy_buf put_domain(rdom); if (!have_common_type(ste_ssidref, ste_rssidref)) { printkd("%s: Policy violation in grant table sharing domain %x -> domain %x.\n", - __func__, (*pd)->domain_id, rdomid); + __func__, d->domain_id, rdomid); goto out; } } @@ -264,7 +266,7 @@ ste_init_state(struct acm_ste_policy_buf } violation = 0; out: - read_unlock(&domlist_lock); + rcu_read_unlock(); /* domain_list */ return violation; /* returning "violation != 0" means that existing sharing between domains would not * have been allowed if the new policy had been enforced before the sharing; for ste, @@ -280,7 +282,7 @@ ste_set_policy(u8 *buf, u32 buf_size) struct acm_ste_policy_buffer *ste_buf = (struct acm_ste_policy_buffer *)buf; void *ssidrefsbuf; struct ste_ssid *ste_ssid; - struct domain **pd; + struct domain *d; int i; if (buf_size < sizeof(struct acm_ste_policy_buffer)) @@ -325,15 +327,16 @@ ste_set_policy(u8 *buf, u32 buf_size) ste_bin_pol.ssidrefs = (domaintype_t *)ssidrefsbuf; /* clear all ste caches */ - read_lock(&domlist_lock); - pd = &domain_list; - for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) { + rcu_read_lock(); /* domain_list */ + for ( d = rcu_dereference(domain_list); + d != NULL; + d = rcu_dereference(d->next_in_list) ) { ste_ssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, - (struct acm_ssid_domain *)(*pd)->ssid); + (struct acm_ssid_domain *)(d)->ssid); for (i=0; iste_cache[i].valid = ACM_STE_free; } - read_unlock(&domlist_lock); + rcu_read_unlock(); /* domain_list */ return ACM_OK; error_free: @@ -435,14 +438,16 @@ clean_id_from_cache(domid_t id) { struct ste_ssid *ste_ssid; int i; - struct domain **pd; + struct domain *d; struct acm_ssid_domain *ssid; printkd("deleting cache for dom %x.\n", id); - read_lock(&domlist_lock); /* look through caches of all domains */ - pd = &domain_list; - for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) { - ssid = (struct acm_ssid_domain *)((*pd)->ssid); + rcu_read_lock(); /* domain_list */ + /* look through caches of all domains */ + for ( d = rcu_dereference(domain_list); + d != NULL; + d = rcu_dereference(d->next_in_list ) { + ssid = (struct acm_ssid_domain *)(d->ssid); if (ssid == NULL) continue; /* hanging domain structure, no ssid any more ... */ @@ -458,7 +463,8 @@ clean_id_from_cache(domid_t id) ste_ssid->ste_cache[i].valid = ACM_STE_free; } out: - read_unlock(&domlist_lock); + rcu_read_unlock(); /* domain_list */ + } /*************************** diff -r 5e8da0663ff8 -r 29b0c275dd3c xen/arch/ia64/xen/flushtlb.c --- a/xen/arch/ia64/xen/flushtlb.c Tue Jan 30 01:23:58 2007 +0000 +++ b/xen/arch/ia64/xen/flushtlb.c Tue Jan 30 17:00:32 2007 -0800 @@ -78,7 +78,7 @@ new_tlbflush_clock_period(void) struct domain* d; struct vcpu* v; /* flush all vhpt of vcpu of all existing domain. */ - read_lock(&domlist_lock); + rcu_read_lock(); /* domain_list */ for_each_domain(d) { for_each_vcpu(d, v) { vcpu_purge_tr_entry(&PSCBX(v,dtlb)); @@ -92,7 +92,7 @@ new_tlbflush_clock_period(void) vcpu_vhpt_flush(v); } } - read_unlock(&domlist_lock); + rcu_read_unlock(); /* domain_list */ /* unlock has release semantics */ /* flush all vhpt of physical cpu and mTLB */ diff -r 5e8da0663ff8 -r 29b0c275dd3c xen/arch/x86/time.c --- a/xen/arch/x86/time.c Tue Jan 30 01:23:58 2007 +0000 +++ b/xen/arch/x86/time.c Tue Jan 30 17:00:32 2007 -0800 @@ -720,10 +720,11 @@ void do_settime(unsigned long secs, unsi wc_nsec = _wc_nsec = (u32)y; spin_unlock(&wc_lock); - read_lock(&domlist_lock); + rcu_read_lock(); /* domain_list */ for_each_domain ( d ) update_domain_wallclock_time(d); - read_unlock(&domlist_lock); + rcu_read_unlock(); /* domain_list */ + } static void local_time_calibration(void *unused) diff -r 5e8da0663ff8 -r 29b0c275dd3c xen/common/domain.c --- a/xen/common/domain.c Tue Jan 30 01:23:58 2007 +0000 +++ b/xen/common/domain.c Tue Jan 30 17:00:32 2007 -0800 @@ -24,14 +24,19 @@ #include #include #include +#include #include #include #include -/* Both these structures are protected by the domlist_lock. */ -DEFINE_RWLOCK(domlist_lock); +/* domain_list and domain_hash are protected by the same RCU functions + * they are not modified atomically and may be inconsistent with each other + * thus, only one of these can be used on each RCU reader-side + * critical section */ struct domain *domain_hash[DOMAIN_HASH_SIZE]; struct domain *domain_list; +/* spin lock for domain_list and domain_hash (RCU) */ +DEFINE_SPINLOCK(domlist_lock); struct domain *dom0; @@ -174,16 +179,20 @@ struct domain *domain_create(domid_t dom if ( !is_idle_domain(d) ) { - write_lock(&domlist_lock); + spin_lock(&domlist_lock); pd = &domain_list; /* NB. domain_list maintained in order of domid. */ for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) if ( (*pd)->domain_id > d->domain_id ) break; d->next_in_list = *pd; - *pd = d; d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)]; - domain_hash[DOMAIN_HASH(domid)] = d; - write_unlock(&domlist_lock); + /* Two rcu assignments are not atomic + * Readers may see inconsistent domlist and hash table + * That is OK as long as each RCU reader-side critical section uses + * only one or them */ + rcu_assign_pointer(*pd, d); + rcu_assign_pointer(domain_hash[DOMAIN_HASH(domid)], d); + spin_unlock(&domlist_lock); } return d; @@ -207,8 +216,8 @@ struct domain *get_domain_by_id(domid_t { struct domain *d; - read_lock(&domlist_lock); - d = domain_hash[DOMAIN_HASH(dom)]; + rcu_read_lock(); + d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]); while ( d != NULL ) { if ( d->domain_id == dom ) @@ -217,9 +226,9 @@ struct domain *get_domain_by_id(domid_t d = NULL; break; } - d = d->next_in_hashbucket; - } - read_unlock(&domlist_lock); + d = rcu_dereference(d->next_in_hashbucket); + } + rcu_read_unlock(); return d; } @@ -314,6 +323,23 @@ void domain_pause_for_debugger(void) send_guest_global_virq(dom0, VIRQ_DEBUGGER); } +/* Complete domain destroy after RCU readers are not holding + old references */ +static void complete_domain_destroy(struct rcu_head *head) +{ + struct domain *d = container_of(head, struct domain, rcu); + + rangeset_domain_destroy(d); + + evtchn_destroy(d); + grant_table_destroy(d); + + arch_domain_destroy(d); + + free_domain(d); + + send_guest_global_virq(dom0, VIRQ_DOM_EXC); +} /* Release resources belonging to task @p. */ void domain_destroy(struct domain *d) @@ -331,27 +357,19 @@ void domain_destroy(struct domain *d) return; /* Delete from task list and task hashtable. */ - write_lock(&domlist_lock); + spin_lock(&domlist_lock); pd = &domain_list; while ( *pd != d ) pd = &(*pd)->next_in_list; - *pd = d->next_in_list; + rcu_assign_pointer(*pd, d->next_in_list); pd = &domain_hash[DOMAIN_HASH(d->domain_id)]; while ( *pd != d ) pd = &(*pd)->next_in_hashbucket; - *pd = d->next_in_hashbucket; - write_unlock(&domlist_lock); - - rangeset_domain_destroy(d); - - evtchn_destroy(d); - grant_table_destroy(d); - - arch_domain_destroy(d); - - free_domain(d); - - send_guest_global_virq(dom0, VIRQ_DOM_EXC); + rcu_assign_pointer(*pd, d->next_in_hashbucket); + spin_unlock(&domlist_lock); + + /* schedule RCU asynchronous completion of domain destroy */ + call_rcu(&d->rcu, complete_domain_destroy); } static void vcpu_pause_setup(struct vcpu *v) diff -r 5e8da0663ff8 -r 29b0c275dd3c xen/common/domctl.c --- a/xen/common/domctl.c Tue Jan 30 01:23:58 2007 +0000 +++ b/xen/common/domctl.c Tue Jan 30 17:00:32 2007 -0800 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -140,12 +141,12 @@ static unsigned int default_vcpu0_locati cpumask_t cpu_exclude_map; /* Do an initial CPU placement. Pick the least-populated CPU. */ - read_lock(&domlist_lock); + rcu_read_lock(); /* domain_list */ for_each_domain ( d ) for_each_vcpu ( d, v ) if ( !test_bit(_VCPUF_down, &v->vcpu_flags) ) cnt[v->processor]++; - read_unlock(&domlist_lock); + rcu_read_unlock(); /* domain_list */ /* * If we're on a HT system, we only auto-allocate to a non-primary HT. We @@ -480,7 +481,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc if ( dom == DOMID_SELF ) dom = current->domain->domain_id; - read_lock(&domlist_lock); + rcu_read_lock(); /* domain_list */ for_each_domain ( d ) { @@ -490,12 +491,12 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc if ( (d == NULL) || !get_domain(d) ) { - read_unlock(&domlist_lock); + rcu_read_unlock(); /* domain_list */ ret = -ESRCH; break; } - read_unlock(&domlist_lock); + rcu_read_unlock(); /* domain_list */ getdomaininfo(d, &op->u.getdomaininfo); diff -r 5e8da0663ff8 -r 29b0c275dd3c xen/common/keyhandler.c --- a/xen/common/keyhandler.c Tue Jan 30 01:23:58 2007 +0000 +++ b/xen/common/keyhandler.c Tue Jan 30 17:00:32 2007 -0800 @@ -145,7 +145,7 @@ static void dump_domains(unsigned char k printk("'%c' pressed -> dumping domain info (now=0x%X:%08X)\n", key, (u32)(now>>32), (u32)now); - read_lock(&domlist_lock); + rcu_read_lock(); /* domain_list */ for_each_domain ( d ) { @@ -196,7 +196,7 @@ static void dump_domains(unsigned char k } } - read_unlock(&domlist_lock); + rcu_read_unlock(); /* domain_list */ } static cpumask_t read_clocks_cpumask = CPU_MASK_NONE; diff -r 5e8da0663ff8 -r 29b0c275dd3c xen/common/sysctl.c --- a/xen/common/sysctl.c Tue Jan 30 01:23:58 2007 +0000 +++ b/xen/common/sysctl.c Tue Jan 30 17:00:32 2007 -0800 @@ -78,7 +78,7 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc struct xen_domctl_getdomaininfo info; u32 num_domains = 0; - read_lock(&domlist_lock); + rcu_read_lock(); /* domain_list */ for_each_domain ( d ) { @@ -106,7 +106,7 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc num_domains++; } - read_unlock(&domlist_lock); + rcu_read_unlock(); /* domain_list */ if ( ret != 0 ) break; diff -r 5e8da0663ff8 -r 29b0c275dd3c xen/include/xen/sched.h --- a/xen/include/xen/sched.h Tue Jan 30 01:23:58 2007 +0000 +++ b/xen/include/xen/sched.h Tue Jan 30 17:00:32 2007 -0800 @@ -16,6 +16,7 @@ #include #include #include +#include #include #ifdef CONFIG_COMPAT @@ -24,7 +25,7 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_in #endif extern unsigned long volatile jiffies; -extern rwlock_t domlist_lock; +extern spinlock_t domlist_lock; /* A global pointer to the initial domain (DOM0). */ extern struct domain *dom0; @@ -193,6 +194,8 @@ struct domain /* OProfile support. */ struct xenoprof *xenoprof; int32_t time_offset_seconds; + + struct rcu_head rcu; }; struct domain_setup_info @@ -365,16 +368,19 @@ unsigned long hypercall_create_continuat local_events_need_delivery() \ )) -/* This domain_hash and domain_list are protected by the domlist_lock. */ +/* domain_list and domain_hash are protected by the same RCU functions + * they are not modified atomically and may be inconsistent with each other + * thus, only one of these can be used on each RCU reader-side + * critical section */ #define DOMAIN_HASH_SIZE 256 #define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1)) extern struct domain *domain_hash[DOMAIN_HASH_SIZE]; extern struct domain *domain_list; #define for_each_domain(_d) \ - for ( (_d) = domain_list; \ + for ( (_d) = rcu_dereference(domain_list); \ (_d) != NULL; \ - (_d) = (_d)->next_in_list ) + (_d) = rcu_dereference((_d)->next_in_list )) \ #define for_each_vcpu(_d,_v) \ for ( (_v) = (_d)->vcpu[0]; \