WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] Use RCU for domain_list and domain_hash.

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] Use RCU for domain_list and domain_hash.
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Wed, 21 Feb 2007 18:00:15 -0800
Delivery-date: Wed, 21 Feb 2007 17:59:55 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User kfraser@xxxxxxxxxxxxxxxxxxxxx
# Date 1172074429 0
# Node ID 97826d77bd4debec34716c9492aeffefa91b3932
# Parent  3c581edac93accaedde8deacc532dd53e0ffb5c8
Use RCU for domain_list and domain_hash.

Signed-off-by: Jose Renato Santos <jsantos@xxxxxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
 xen/acm/acm_chinesewall_hooks.c             |   15 ++---
 xen/acm/acm_simple_type_enforcement_hooks.c |   76 ++++++++++++++--------------
 xen/arch/ia64/linux-xen/mca.c               |    2 
 xen/arch/ia64/linux-xen/perfmon.c           |   12 +---
 xen/arch/powerpc/audit.c                    |    2 
 xen/arch/x86/hvm/svm/vmcb.c                 |    5 +
 xen/arch/x86/hvm/vmx/vmcs.c                 |    5 +
 xen/arch/x86/mm/shadow/common.c             |    4 +
 xen/arch/x86/time.c                         |    4 -
 xen/common/domain.c                         |   72 ++++++++++++++++----------
 xen/common/domctl.c                         |   11 ++--
 xen/common/keyhandler.c                     |    4 -
 xen/common/sched_sedf.c                     |    8 ++
 xen/common/sysctl.c                         |    4 -
 xen/include/xen/rcupdate.h                  |   53 +++++++++++++++++++
 xen/include/xen/sched.h                     |   17 +++---
 16 files changed, 194 insertions(+), 100 deletions(-)

diff -r 3c581edac93a -r 97826d77bd4d xen/acm/acm_chinesewall_hooks.c
--- a/xen/acm/acm_chinesewall_hooks.c   Wed Feb 21 14:44:09 2007 +0000
+++ b/xen/acm/acm_chinesewall_hooks.c   Wed Feb 21 16:13:49 2007 +0000
@@ -194,19 +194,18 @@ chwall_init_state(struct acm_chwall_poli
     int violation = 0, i, j;
     struct chwall_ssid *chwall_ssid;
     ssidref_t chwall_ssidref;
-    struct domain **pd;
-
-    write_lock(&domlist_lock);
+    struct domain *d;
+
+    spin_lock(&domlist_update_lock);
     /* go through all domains and adjust policy as if this domain was started 
now */
-    pd = &domain_list;
-    for (pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list)
+    for_each_domain ( d )
     {
         chwall_ssid =
             GET_SSIDP(ACM_CHINESE_WALL_POLICY,
-                      (struct acm_ssid_domain *) (*pd)->ssid);
+                      (struct acm_ssid_domain *)d->ssid);
         chwall_ssidref = chwall_ssid->chwall_ssidref;
         traceprintk("%s: validating policy for domain %x (chwall-REF=%x).\n",
-                    __func__, (*pd)->domain_id, chwall_ssidref);
+                    __func__, d->domain_id, chwall_ssidref);
         /* a) adjust types ref-count for running domains */
         for (i = 0; i < chwall_buf->chwall_max_types; i++)
             running_types[i] +=
@@ -247,7 +246,7 @@ chwall_init_state(struct acm_chwall_poli
         }
     }
  out:
-    write_unlock(&domlist_lock);
+    spin_unlock(&domlist_update_lock);
     return violation;
     /* returning "violation != 0" means that the currently running set of 
domains would
      * not be possible if the new policy had been enforced before starting 
them; for chinese
diff -r 3c581edac93a -r 97826d77bd4d xen/acm/acm_simple_type_enforcement_hooks.c
--- a/xen/acm/acm_simple_type_enforcement_hooks.c       Wed Feb 21 14:44:09 
2007 +0000
+++ b/xen/acm/acm_simple_type_enforcement_hooks.c       Wed Feb 21 16:13:49 
2007 +0000
@@ -175,36 +175,37 @@ ste_init_state(struct acm_ste_policy_buf
     int violation = 1;
     struct ste_ssid *ste_ssid, *ste_rssid;
     ssidref_t ste_ssidref, ste_rssidref;
-    struct domain **pd, *rdom;
+    struct domain *d, *rdom;
     domid_t rdomid;
     struct grant_entry sha_copy;
     int port, i;
 
-    read_lock(&domlist_lock); /* go by domain? or directly by global? 
event/grant list */
+    rcu_read_lock(&domlist_read_lock);
+    /* go by domain? or directly by global? event/grant list */
     /* go through all domains and adjust policy as if this domain was started 
now */
-    pd = &domain_list;
-    for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) {
+    for_each_domain ( d )
+    {
         ste_ssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, 
-                             (struct acm_ssid_domain *)(*pd)->ssid);
+                             (struct acm_ssid_domain *)d->ssid);
         ste_ssidref = ste_ssid->ste_ssidref;
         traceprintk("%s: validating policy for eventch domain %x 
(ste-Ref=%x).\n",
-                    __func__, (*pd)->domain_id, ste_ssidref);
+                    __func__, d->domain_id, ste_ssidref);
         /* a) check for event channel conflicts */
         for (port=0; port < NR_EVTCHN_BUCKETS; port++) {
-            spin_lock(&(*pd)->evtchn_lock);
-            if ((*pd)->evtchn[port] == NULL) {
-                spin_unlock(&(*pd)->evtchn_lock);
+            spin_lock(&d->evtchn_lock);
+            if (d->evtchn[port] == NULL) {
+                spin_unlock(&d->evtchn_lock);
                 continue;
             }
-            if ((*pd)->evtchn[port]->state == ECS_INTERDOMAIN) {
-                rdom = (*pd)->evtchn[port]->u.interdomain.remote_dom;
+            if (d->evtchn[port]->state == ECS_INTERDOMAIN) {
+                rdom = d->evtchn[port]->u.interdomain.remote_dom;
                 rdomid = rdom->domain_id;
                 /* rdom now has remote domain */
                 ste_rssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, 
                                       (struct acm_ssid_domain *)(rdom->ssid));
                 ste_rssidref = ste_rssid->ste_ssidref;
-            } else if ((*pd)->evtchn[port]->state == ECS_UNBOUND) {
-                rdomid = (*pd)->evtchn[port]->u.unbound.remote_domid;
+            } else if (d->evtchn[port]->state == ECS_UNBOUND) {
+                rdomid = d->evtchn[port]->u.unbound.remote_domid;
                 if ((rdom = get_domain_by_id(rdomid)) == NULL) {
                     printk("%s: Error finding domain to id %x!\n", __func__, 
rdomid);
                     goto out;
@@ -215,36 +216,36 @@ ste_init_state(struct acm_ste_policy_buf
                 ste_rssidref = ste_rssid->ste_ssidref;
                 put_domain(rdom);
             } else {
-                spin_unlock(&(*pd)->evtchn_lock);
+                spin_unlock(&d->evtchn_lock);
                 continue; /* port unused */
             }
-            spin_unlock(&(*pd)->evtchn_lock);
+            spin_unlock(&d->evtchn_lock);
 
             /* rdom now has remote domain */
             ste_rssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, 
                                   (struct acm_ssid_domain *)(rdom->ssid));
             ste_rssidref = ste_rssid->ste_ssidref;
             traceprintk("%s: eventch: domain %x (ssidref %x) --> domain %x 
(rssidref %x) used (port %x).\n", 
-                        __func__, (*pd)->domain_id, ste_ssidref, 
rdom->domain_id, ste_rssidref, port);  
+                        __func__, d->domain_id, ste_ssidref, rdom->domain_id, 
ste_rssidref, port);  
             /* check whether on subj->ssid, obj->ssid share a common type*/
             if (!have_common_type(ste_ssidref, ste_rssidref)) {
                 printkd("%s: Policy violation in event channel domain %x -> 
domain %x.\n",
-                        __func__, (*pd)->domain_id, rdomid);
+                        __func__, d->domain_id, rdomid);
                 goto out;
             }
         } 
         /* b) check for grant table conflicts on shared pages */
-        spin_lock(&(*pd)->grant_table->lock);
-        for ( i = 0; i < nr_grant_entries((*pd)->grant_table); i++ ) {
+        spin_lock(&d->grant_table->lock);
+        for ( i = 0; i < nr_grant_entries(d->grant_table); i++ ) {
 #define SPP (PAGE_SIZE / sizeof(struct grant_entry))
-            sha_copy = (*pd)->grant_table->shared[i/SPP][i%SPP];
+            sha_copy = d->grant_table->shared[i/SPP][i%SPP];
             if ( sha_copy.flags ) {
                 printkd("%s: grant dom (%hu) SHARED (%d) flags:(%hx) dom:(%hu) 
frame:(%lx)\n",
-                        __func__, (*pd)->domain_id, i, sha_copy.flags, 
sha_copy.domid, 
+                        __func__, d->domain_id, i, sha_copy.flags, 
sha_copy.domid, 
                         (unsigned long)sha_copy.frame);
                 rdomid = sha_copy.domid;
                 if ((rdom = get_domain_by_id(rdomid)) == NULL) {
-                    spin_unlock(&(*pd)->grant_table->lock);
+                    spin_unlock(&d->grant_table->lock);
                     printkd("%s: domain not found ERROR!\n", __func__);
                     goto out;
                 };
@@ -254,18 +255,18 @@ ste_init_state(struct acm_ste_policy_buf
                 ste_rssidref = ste_rssid->ste_ssidref;
                 put_domain(rdom);
                 if (!have_common_type(ste_ssidref, ste_rssidref)) {
-                    spin_unlock(&(*pd)->grant_table->lock);
+                    spin_unlock(&d->grant_table->lock);
                     printkd("%s: Policy violation in grant table sharing 
domain %x -> domain %x.\n",
-                            __func__, (*pd)->domain_id, rdomid);
+                            __func__, d->domain_id, rdomid);
                     goto out;
                 }
             }
         }
-        spin_unlock(&(*pd)->grant_table->lock);
+        spin_unlock(&d->grant_table->lock);
     }
     violation = 0;
  out:
-    read_unlock(&domlist_lock);
+    rcu_read_unlock(&domlist_read_lock);
     return violation;
     /* returning "violation != 0" means that existing sharing between domains 
would not 
      * have been allowed if the new policy had been enforced before the 
sharing; for ste, 
@@ -281,7 +282,7 @@ ste_set_policy(u8 *buf, u32 buf_size)
     struct acm_ste_policy_buffer *ste_buf = (struct acm_ste_policy_buffer 
*)buf;
     void *ssidrefsbuf;
     struct ste_ssid *ste_ssid;
-    struct domain **pd;
+    struct domain *d;
     int i;
 
     if (buf_size < sizeof(struct acm_ste_policy_buffer))
@@ -326,15 +327,14 @@ ste_set_policy(u8 *buf, u32 buf_size)
     ste_bin_pol.ssidrefs = (domaintype_t *)ssidrefsbuf;
 
     /* clear all ste caches */
-    read_lock(&domlist_lock);
-    pd = &domain_list;
-    for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) {
+    rcu_read_lock(&domlist_read_lock);
+    for_each_domain ( d ) {
         ste_ssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, 
-                             (struct acm_ssid_domain *)(*pd)->ssid);
+                             (struct acm_ssid_domain *)(d)->ssid);
         for (i=0; i<ACM_TE_CACHE_SIZE; i++)
             ste_ssid->ste_cache[i].valid = ACM_STE_free;
     }
-    read_unlock(&domlist_lock);
+    rcu_read_unlock(&domlist_read_lock);
     return ACM_OK;
 
  error_free:
@@ -436,14 +436,14 @@ clean_id_from_cache(domid_t id)
 {
     struct ste_ssid *ste_ssid;
     int i;
-    struct domain **pd;
+    struct domain *d;
     struct acm_ssid_domain *ssid;
 
     printkd("deleting cache for dom %x.\n", id);
-    read_lock(&domlist_lock); /* look through caches of all domains */
-    pd = &domain_list;
-    for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) {
-        ssid = (struct acm_ssid_domain *)((*pd)->ssid);
+    rcu_read_lock(&domlist_read_lock);
+    /* look through caches of all domains */
+    for_each_domain ( d ) {
+        ssid = (struct acm_ssid_domain *)(d->ssid);
 
         if (ssid == NULL)
             continue; /* hanging domain structure, no ssid any more ... */
@@ -459,7 +459,7 @@ clean_id_from_cache(domid_t id)
                 ste_ssid->ste_cache[i].valid = ACM_STE_free;
     }
  out:
-    read_unlock(&domlist_lock);
+    rcu_read_unlock(&domlist_read_lock);
 }
 
 /***************************
diff -r 3c581edac93a -r 97826d77bd4d xen/arch/ia64/linux-xen/mca.c
--- a/xen/arch/ia64/linux-xen/mca.c     Wed Feb 21 14:44:09 2007 +0000
+++ b/xen/arch/ia64/linux-xen/mca.c     Wed Feb 21 16:13:49 2007 +0000
@@ -790,6 +790,7 @@ init_handler_platform (pal_min_state_are
                        /* this route is for dump routine */
                        unw_init_running(try_crashdump, pt);
                } else {
+                       rcu_read_lock(&domlist_read_lock);
                        for_each_domain(d) {
                                for_each_vcpu(d, v) {
                                        printk("Backtrace of current vcpu "
@@ -798,6 +799,7 @@ init_handler_platform (pal_min_state_are
                                        show_stack(v, NULL);
                                }
                        }
+                       rcu_read_unlock(&domlist_read_lock);
                }
        }
        unw_init_running(freeze_cpu_osinit, NULL);
diff -r 3c581edac93a -r 97826d77bd4d xen/arch/ia64/linux-xen/perfmon.c
--- a/xen/arch/ia64/linux-xen/perfmon.c Wed Feb 21 14:44:09 2007 +0000
+++ b/xen/arch/ia64/linux-xen/perfmon.c Wed Feb 21 16:13:49 2007 +0000
@@ -7225,7 +7225,6 @@ DEFINE_PER_CPU(pfm_context_t*, xenpfm_co
 /*
  * note: some functions mask interrupt with this lock held
  * so that this lock can't be locked from interrupt handler.
- * lock order domlist_lock => xenpfm_context_lock
  */
 DEFINE_SPINLOCK(xenpfm_context_lock);
 
@@ -7507,10 +7506,8 @@ xenpfm_context_unload(void)
                arg.error[cpu] = 0;
 
        BUG_ON(in_irq());
-       read_lock(&domlist_lock);
        spin_lock(&xenpfm_context_lock);
        error = xenpfm_start_stop_locked(0);
-       read_unlock(&domlist_lock);
        if (error) {
                spin_unlock(&xenpfm_context_lock);
                return error;
@@ -7688,10 +7685,11 @@ xenpfm_start_stop_locked(int is_start)
        while (atomic_read(&arg.started) != cpus)
                cpu_relax();
 
-       for_each_domain(d) {
+       rcu_read_lock(&domlist_read_lock);
+       for_each_domain(d)
                for_each_vcpu(d, v)
                        xenpfm_start_stop_vcpu(v, is_start);
-       }
+       rcu_read_unlock(&domlist_read_lock);
 
        arg.error[smp_processor_id()] = __xenpfm_start_stop(is_start);
        atomic_inc(&arg.finished);
@@ -7716,11 +7714,9 @@ xenpfm_start_stop(int is_start)
        int error;
        
        BUG_ON(in_irq());
-       read_lock(&domlist_lock);
        spin_lock(&xenpfm_context_lock);
-       error =xenpfm_start_stop_locked(is_start);
+       error = xenpfm_start_stop_locked(is_start);
        spin_unlock(&xenpfm_context_lock);
-       read_unlock(&domlist_lock);
 
        return error;
 }
diff -r 3c581edac93a -r 97826d77bd4d xen/arch/powerpc/audit.c
--- a/xen/arch/powerpc/audit.c  Wed Feb 21 14:44:09 2007 +0000
+++ b/xen/arch/powerpc/audit.c  Wed Feb 21 16:13:49 2007 +0000
@@ -34,8 +34,10 @@ void audit_domains(void)
 void audit_domains(void)
 {
     struct domain *d;
+    rcu_read_lock(&domlist_read_lock);
     for_each_domain ( d )
         audit_domain(d);
+    rcu_read_unlock(&domlist_read_lock);
 }
 
 void audit_domains_key(unsigned char key)
diff -r 3c581edac93a -r 97826d77bd4d xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c       Wed Feb 21 14:44:09 2007 +0000
+++ b/xen/arch/x86/hvm/svm/vmcb.c       Wed Feb 21 16:13:49 2007 +0000
@@ -330,6 +330,9 @@ static void vmcb_dump(unsigned char ch)
     struct vcpu *v;
     
     printk("*********** VMCB Areas **************\n");
+
+    rcu_read_lock(&domlist_read_lock);
+
     for_each_domain ( d )
     {
         if ( !is_hvm_domain(d) )
@@ -341,6 +344,8 @@ static void vmcb_dump(unsigned char ch)
             svm_dump_vmcb("key_handler", v->arch.hvm_svm.vmcb);
         }
     }
+
+    rcu_read_unlock(&domlist_read_lock);
 
     printk("**************************************\n");
 }
diff -r 3c581edac93a -r 97826d77bd4d xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c       Wed Feb 21 14:44:09 2007 +0000
+++ b/xen/arch/x86/hvm/vmx/vmcs.c       Wed Feb 21 16:13:49 2007 +0000
@@ -567,6 +567,9 @@ static void vmcs_dump(unsigned char ch)
     struct vcpu *v;
     
     printk("*********** VMCS Areas **************\n");
+
+    rcu_read_lock(&domlist_read_lock);
+
     for_each_domain ( d )
     {
         if ( !is_hvm_domain(d) )
@@ -581,6 +584,8 @@ static void vmcs_dump(unsigned char ch)
         }
     }
 
+    rcu_read_unlock(&domlist_read_lock);
+
     printk("**************************************\n");
 }
 
diff -r 3c581edac93a -r 97826d77bd4d xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Wed Feb 21 14:44:09 2007 +0000
+++ b/xen/arch/x86/mm/shadow/common.c   Wed Feb 21 16:13:49 2007 +0000
@@ -890,13 +890,17 @@ static void shadow_blow_all_tables(unsig
 {
     struct domain *d;
     printk("'%c' pressed -> blowing all shadow tables\n", c);
+    rcu_read_lock(&domlist_read_lock);
     for_each_domain(d)
+    {
         if ( shadow_mode_enabled(d) && d->vcpu[0] != NULL )
         {
             shadow_lock(d);
             shadow_blow_tables(d);
             shadow_unlock(d);
         }
+    }
+    rcu_read_unlock(&domlist_read_lock);
 }
 
 /* Register this function in the Xen console keypress table */
diff -r 3c581edac93a -r 97826d77bd4d xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Wed Feb 21 14:44:09 2007 +0000
+++ b/xen/arch/x86/time.c       Wed Feb 21 16:13:49 2007 +0000
@@ -720,10 +720,10 @@ void do_settime(unsigned long secs, unsi
     wc_nsec = _wc_nsec = (u32)y;
     spin_unlock(&wc_lock);
 
-    read_lock(&domlist_lock);
+    rcu_read_lock(&domlist_read_lock);
     for_each_domain ( d )
         update_domain_wallclock_time(d);
-    read_unlock(&domlist_lock);
+    rcu_read_unlock(&domlist_read_lock);
 }
 
 static void local_time_calibration(void *unused)
diff -r 3c581edac93a -r 97826d77bd4d xen/common/domain.c
--- a/xen/common/domain.c       Wed Feb 21 14:44:09 2007 +0000
+++ b/xen/common/domain.c       Wed Feb 21 16:13:49 2007 +0000
@@ -24,13 +24,18 @@
 #include <xen/shutdown.h>
 #include <xen/percpu.h>
 #include <xen/multicall.h>
+#include <xen/rcupdate.h>
 #include <asm/debugger.h>
 #include <public/sched.h>
 #include <public/vcpu.h>
 
-/* Both these structures are protected by the domlist_lock. */
-DEFINE_RWLOCK(domlist_lock);
-struct domain *domain_hash[DOMAIN_HASH_SIZE];
+/* Protect updates/reads (resp.) of domain_list and domain_hash. */
+DEFINE_SPINLOCK(domlist_update_lock);
+DEFINE_RCU_READ_LOCK(domlist_read_lock);
+
+#define DOMAIN_HASH_SIZE 256
+#define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1))
+static struct domain *domain_hash[DOMAIN_HASH_SIZE];
 struct domain *domain_list;
 
 struct domain *dom0;
@@ -174,16 +179,20 @@ struct domain *domain_create(domid_t dom
 
     if ( !is_idle_domain(d) )
     {
-        write_lock(&domlist_lock);
+        spin_lock(&domlist_update_lock);
         pd = &domain_list; /* NB. domain_list maintained in order of domid. */
         for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list )
             if ( (*pd)->domain_id > d->domain_id )
                 break;
         d->next_in_list = *pd;
-        *pd = d;
         d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)];
-        domain_hash[DOMAIN_HASH(domid)] = d;
-        write_unlock(&domlist_lock);
+        /* Two rcu assignments are not atomic 
+         * Readers may see inconsistent domlist and hash table
+         * That is OK as long as each RCU reader-side critical section uses
+         * only one or them  */
+        rcu_assign_pointer(*pd, d);
+        rcu_assign_pointer(domain_hash[DOMAIN_HASH(domid)], d);
+        spin_unlock(&domlist_update_lock);
     }
 
     return d;
@@ -207,8 +216,8 @@ struct domain *get_domain_by_id(domid_t 
 {
     struct domain *d;
 
-    read_lock(&domlist_lock);
-    d = domain_hash[DOMAIN_HASH(dom)];
+    rcu_read_lock(&domlist_read_lock);
+    d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
     while ( d != NULL )
     {
         if ( d->domain_id == dom )
@@ -217,9 +226,9 @@ struct domain *get_domain_by_id(domid_t 
                 d = NULL;
             break;
         }
-        d = d->next_in_hashbucket;
-    }
-    read_unlock(&domlist_lock);
+        d = rcu_dereference(d->next_in_hashbucket);
+    }
+    rcu_read_unlock(&domlist_read_lock);
 
     return d;
 }
@@ -314,6 +323,23 @@ void domain_pause_for_debugger(void)
     send_guest_global_virq(dom0, VIRQ_DEBUGGER);
 }
 
+/* Complete domain destroy after RCU readers are not holding 
+   old references */
+static void complete_domain_destroy(struct rcu_head *head)
+{
+    struct domain *d = container_of(head, struct domain, rcu);
+
+    rangeset_domain_destroy(d);
+
+    evtchn_destroy(d);
+    grant_table_destroy(d);
+
+    arch_domain_destroy(d);
+
+    free_domain(d);
+
+    send_guest_global_virq(dom0, VIRQ_DOM_EXC);
+}
 
 /* Release resources belonging to task @p. */
 void domain_destroy(struct domain *d)
@@ -331,27 +357,19 @@ void domain_destroy(struct domain *d)
         return;
 
     /* Delete from task list and task hashtable. */
-    write_lock(&domlist_lock);
+    spin_lock(&domlist_update_lock);
     pd = &domain_list;
     while ( *pd != d ) 
         pd = &(*pd)->next_in_list;
-    *pd = d->next_in_list;
+    rcu_assign_pointer(*pd, d->next_in_list);
     pd = &domain_hash[DOMAIN_HASH(d->domain_id)];
     while ( *pd != d ) 
         pd = &(*pd)->next_in_hashbucket;
-    *pd = d->next_in_hashbucket;
-    write_unlock(&domlist_lock);
-
-    rangeset_domain_destroy(d);
-
-    evtchn_destroy(d);
-    grant_table_destroy(d);
-
-    arch_domain_destroy(d);
-
-    free_domain(d);
-
-    send_guest_global_virq(dom0, VIRQ_DOM_EXC);
+    rcu_assign_pointer(*pd, d->next_in_hashbucket);
+    spin_unlock(&domlist_update_lock);
+
+    /* schedule RCU asynchronous completion of domain destroy */
+    call_rcu(&d->rcu, complete_domain_destroy);
 }
 
 static void vcpu_pause_setup(struct vcpu *v)
diff -r 3c581edac93a -r 97826d77bd4d xen/common/domctl.c
--- a/xen/common/domctl.c       Wed Feb 21 14:44:09 2007 +0000
+++ b/xen/common/domctl.c       Wed Feb 21 16:13:49 2007 +0000
@@ -17,6 +17,7 @@
 #include <xen/trace.h>
 #include <xen/console.h>
 #include <xen/iocap.h>
+#include <xen/rcupdate.h>
 #include <xen/guest_access.h>
 #include <xen/bitmap.h>
 #include <asm/current.h>
@@ -140,12 +141,12 @@ static unsigned int default_vcpu0_locati
     cpumask_t      cpu_exclude_map;
 
     /* Do an initial CPU placement. Pick the least-populated CPU. */
-    read_lock(&domlist_lock);
+    rcu_read_lock(&domlist_read_lock);
     for_each_domain ( d )
         for_each_vcpu ( d, v )
         if ( !test_bit(_VCPUF_down, &v->vcpu_flags) )
             cnt[v->processor]++;
-    read_unlock(&domlist_lock);
+    rcu_read_unlock(&domlist_read_lock);
 
     /*
      * If we're on a HT system, we only auto-allocate to a non-primary HT. We 
@@ -480,7 +481,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
         if ( dom == DOMID_SELF )
             dom = current->domain->domain_id;
 
-        read_lock(&domlist_lock);
+        rcu_read_lock(&domlist_read_lock);
 
         for_each_domain ( d )
         {
@@ -490,12 +491,12 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
 
         if ( (d == NULL) || !get_domain(d) )
         {
-            read_unlock(&domlist_lock);
+            rcu_read_unlock(&domlist_read_lock);
             ret = -ESRCH;
             break;
         }
 
-        read_unlock(&domlist_lock);
+        rcu_read_unlock(&domlist_read_lock);
 
         getdomaininfo(d, &op->u.getdomaininfo);
 
diff -r 3c581edac93a -r 97826d77bd4d xen/common/keyhandler.c
--- a/xen/common/keyhandler.c   Wed Feb 21 14:44:09 2007 +0000
+++ b/xen/common/keyhandler.c   Wed Feb 21 16:13:49 2007 +0000
@@ -145,7 +145,7 @@ static void dump_domains(unsigned char k
     printk("'%c' pressed -> dumping domain info (now=0x%X:%08X)\n", key,
            (u32)(now>>32), (u32)now);
 
-    read_lock(&domlist_lock);
+    rcu_read_lock(&domlist_read_lock);
 
     for_each_domain ( d )
     {
@@ -196,7 +196,7 @@ static void dump_domains(unsigned char k
         }
     }
 
-    read_unlock(&domlist_lock);
+    rcu_read_unlock(&domlist_read_lock);
 }
 
 static cpumask_t read_clocks_cpumask = CPU_MASK_NONE;
diff -r 3c581edac93a -r 97826d77bd4d xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c   Wed Feb 21 14:44:09 2007 +0000
+++ b/xen/common/sched_sedf.c   Wed Feb 21 16:13:49 2007 +0000
@@ -1277,6 +1277,7 @@ static void sedf_dump_cpu_state(int i)
     loop = 0;
     printk("\nnot on Q\n");
 
+    rcu_read_lock(&domlist_read_lock);
     for_each_domain ( d )
     {
         for_each_vcpu(d, ed)
@@ -1288,6 +1289,7 @@ static void sedf_dump_cpu_state(int i)
             }
         }
     }
+    rcu_read_unlock(&domlist_read_lock);
 }
 
 
@@ -1298,8 +1300,9 @@ static int sedf_adjust_weights(struct xe
     struct domain      *d;
     int                 sumw[NR_CPUS] = { 0 };
     s_time_t            sumt[NR_CPUS] = { 0 };
- 
+
     /* Sum across all weights. */
+    rcu_read_lock(&domlist_read_lock);
     for_each_domain( d )
     {
         for_each_vcpu( d, p )
@@ -1323,8 +1326,10 @@ static int sedf_adjust_weights(struct xe
             }
         }
     }
+    rcu_read_unlock(&domlist_read_lock);
 
     /* Adjust all slices (and periods) to the new weight. */
+    rcu_read_lock(&domlist_read_lock);
     for_each_domain( d )
     {
         for_each_vcpu ( d, p )
@@ -1341,6 +1346,7 @@ static int sedf_adjust_weights(struct xe
             }
         }
     }
+    rcu_read_unlock(&domlist_read_lock);
 
     return 0;
 }
diff -r 3c581edac93a -r 97826d77bd4d xen/common/sysctl.c
--- a/xen/common/sysctl.c       Wed Feb 21 14:44:09 2007 +0000
+++ b/xen/common/sysctl.c       Wed Feb 21 16:13:49 2007 +0000
@@ -78,7 +78,7 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
         struct xen_domctl_getdomaininfo info;
         u32 num_domains = 0;
 
-        read_lock(&domlist_lock);
+        rcu_read_lock(&domlist_read_lock);
 
         for_each_domain ( d )
         {
@@ -106,7 +106,7 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
             num_domains++;
         }
         
-        read_unlock(&domlist_lock);
+        rcu_read_unlock(&domlist_read_lock);
         
         if ( ret != 0 )
             break;
diff -r 3c581edac93a -r 97826d77bd4d xen/include/xen/rcupdate.h
--- a/xen/include/xen/rcupdate.h        Wed Feb 21 14:44:09 2007 +0000
+++ b/xen/include/xen/rcupdate.h        Wed Feb 21 16:13:49 2007 +0000
@@ -111,6 +111,59 @@ int rcu_pending(int cpu);
 int rcu_pending(int cpu);
 int rcu_needs_cpu(int cpu);
 
+/*
+ * Dummy lock type for passing to rcu_read_{lock,unlock}. Currently exists
+ * only to document the reason for rcu_read_lock() critical sections.
+ */
+struct _rcu_read_lock {};
+typedef struct _rcu_read_lock rcu_read_lock_t;
+#define DEFINE_RCU_READ_LOCK(x) rcu_read_lock_t x
+
+/**
+ * rcu_read_lock - mark the beginning of an RCU read-side critical section.
+ *
+ * When call_rcu() is invoked
+ * on one CPU while other CPUs are within RCU read-side critical
+ * sections, invocation of the corresponding RCU callback is deferred
+ * until after the all the other CPUs exit their critical sections.
+ *
+ * Note, however, that RCU callbacks are permitted to run concurrently
+ * with RCU read-side critical sections.  One way that this can happen
+ * is via the following sequence of events: (1) CPU 0 enters an RCU
+ * read-side critical section, (2) CPU 1 invokes call_rcu() to register
+ * an RCU callback, (3) CPU 0 exits the RCU read-side critical section,
+ * (4) CPU 2 enters a RCU read-side critical section, (5) the RCU
+ * callback is invoked.  This is legal, because the RCU read-side critical
+ * section that was running concurrently with the call_rcu() (and which
+ * therefore might be referencing something that the corresponding RCU
+ * callback would free up) has completed before the corresponding
+ * RCU callback is invoked.
+ *
+ * RCU read-side critical sections may be nested.  Any deferred actions
+ * will be deferred until the outermost RCU read-side critical section
+ * completes.
+ *
+ * It is illegal to block while in an RCU read-side critical section.
+ */
+#define rcu_read_lock(x)       do { } while (0)
+
+/**
+ * rcu_read_unlock - marks the end of an RCU read-side critical section.
+ *
+ * See rcu_read_lock() for more information.
+ */
+#define rcu_read_unlock(x)     do { } while (0)
+
+/*
+ * So where is rcu_write_lock()?  It does not exist, as there is no
+ * way for writers to lock out RCU readers.  This is a feature, not
+ * a bug -- this property is what provides RCU's performance benefits.
+ * Of course, writers must coordinate with each other.  The normal
+ * spinlock primitives work well for this, but any other technique may be
+ * used as well.  RCU does not care how the writers keep out of each
+ * others' way, as long as they do so.
+ */
+
 /**
  * rcu_dereference - fetch an RCU-protected pointer in an
  * RCU read-side critical section.  This pointer may later
diff -r 3c581edac93a -r 97826d77bd4d xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Wed Feb 21 14:44:09 2007 +0000
+++ b/xen/include/xen/sched.h   Wed Feb 21 16:13:49 2007 +0000
@@ -16,6 +16,7 @@
 #include <xen/rangeset.h>
 #include <asm/domain.h>
 #include <xen/xenoprof.h>
+#include <xen/rcupdate.h>
 #include <xen/irq.h>
 
 #ifdef CONFIG_COMPAT
@@ -24,7 +25,6 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_in
 #endif
 
 extern unsigned long volatile jiffies;
-extern rwlock_t domlist_lock;
 
 /* A global pointer to the initial domain (DOM0). */
 extern struct domain *dom0;
@@ -193,6 +193,8 @@ struct domain
     /* OProfile support. */
     struct xenoprof *xenoprof;
     int32_t time_offset_seconds;
+
+    struct rcu_head rcu;
 };
 
 struct domain_setup_info
@@ -356,16 +358,17 @@ unsigned long hypercall_create_continuat
         local_events_need_delivery()            \
     ))
 
-/* This domain_hash and domain_list are protected by the domlist_lock. */
-#define DOMAIN_HASH_SIZE 256
-#define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1))
-extern struct domain *domain_hash[DOMAIN_HASH_SIZE];
+/* Protect updates/reads (resp.) of domain_list and domain_hash. */
+extern spinlock_t domlist_update_lock;
+extern rcu_read_lock_t domlist_read_lock;
+
 extern struct domain *domain_list;
 
+/* Caller must hold the domlist_read_lock or domlist_update_lock. */
 #define for_each_domain(_d)                     \
- for ( (_d) = domain_list;                      \
+ for ( (_d) = rcu_dereference(domain_list);     \
        (_d) != NULL;                            \
-       (_d) = (_d)->next_in_list )
+       (_d) = rcu_dereference((_d)->next_in_list )) \
 
 #define for_each_vcpu(_d,_v)                    \
  for ( (_v) = (_d)->vcpu[0];                    \

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] Use RCU for domain_list and domain_hash., Xen patchbot-unstable <=