The following patch fixes a race condition in xend start. If you just
fork and exit from the parent, then when xend start returns, xend might
not be ready to accept connections. On a really slow system, it can
actually take 10s of seconds to start xend (this seems like a bug in its
own right but that's getting ahead of ourselves) so you may do a xend
start with xm commands failing for some time.
This patch makes it so that the xend threads notify the parent process
when they are ready to get actual work done which is when xend start
will actually return.
An easy way to see the before/after effect is to do:
xend start && xm list
Before and after the patch.
Regards,
Anthony Liguori
# HG changeset patch
# User anthony@xxxxxxxxxxxxxxxxxxxxx
# Node ID 8c866d412673e4169386ee5c96ba10f20f54bf6c
# Parent 1184286a2ee6c8a8d7e564bc533941e702d54ebb
Make sure xend start doesn't return until xend is ready to accept connections.
This means xend start && xm list actually works now (instead of throwing an
acception).
Signed-off-by: Anthony Liguori <aliguori@xxxxxxxxxx>
diff -r 1184286a2ee6 -r 8c866d412673 tools/misc/xend
--- a/tools/misc/xend Wed Sep 14 17:54:43 2005
+++ b/tools/misc/xend Wed Sep 14 23:47:37 2005
@@ -86,9 +86,6 @@
daemon = SrvDaemon.instance()
if not sys.argv[1:]:
print 'usage: %s {start|stop|restart}' % sys.argv[0]
- elif os.fork():
- pid, status = os.wait()
- return status >> 8
elif sys.argv[1] == 'start':
start_xenstored()
start_consoled()
diff -r 1184286a2ee6 -r 8c866d412673 tools/python/xen/web/httpserver.py
--- a/tools/python/xen/web/httpserver.py Wed Sep 14 17:54:43 2005
+++ b/tools/python/xen/web/httpserver.py Wed Sep 14 23:47:37 2005
@@ -273,6 +273,9 @@
self.interface = interface
self.port = port
self.root = root
+ # ready indicates when we are ready to begin accept connections
+ # it should be set after a successful bind
+ self.ready = False
def getRoot(self):
return self.root
@@ -283,6 +286,7 @@
def run(self):
self.bind()
self.listen()
+ self.ready = True
self.requestLoop()
def stop(self):
diff -r 1184286a2ee6 -r 8c866d412673 tools/python/xen/xend/server/SrvDaemon.py
--- a/tools/python/xen/xend/server/SrvDaemon.py Wed Sep 14 17:54:43 2005
+++ b/tools/python/xen/xend/server/SrvDaemon.py Wed Sep 14 23:47:37 2005
@@ -137,13 +137,6 @@
else:
return 0
- def onSIGCHLD(self, signum, frame):
- if self.child > 0:
- try:
- pid, sts = os.waitpid(self.child, os.WNOHANG)
- except os.error, ex:
- pass
-
def fork_pid(self, pidfile):
"""Fork and write the pid of the child to 'pidfile'.
@@ -200,15 +193,29 @@
# Trying to run an already-running service is a success.
return 0
- signal.signal(signal.SIGCHLD, self.onSIGCHLD)
+ ret = 0
+
+ # we use a pipe to communicate between the parent and the child process
+ # this way we know when the child has actually initialized itself so
+ # we can avoid a race condition during startup
+
+ r,w = os.pipe()
if self.fork_pid(XEND_PID_FILE):
- #Parent. Sleep to give child time to start.
- time.sleep(1)
+ os.close(w)
+ r = os.fdopen(r, 'r')
+ s = r.read()
+ r.close()
+ if not len(s):
+ ret = 1
+ else:
+ ret = int(s)
else:
+ os.close(r)
# Child
self.tracing(trace)
- self.run()
- return 0
+ self.run(os.fdopen(w, 'w'))
+
+ return ret
def tracing(self, traceon):
"""Turn tracing on or off.
@@ -290,7 +297,7 @@
def stop(self):
return self.cleanup(kill=True)
- def run(self):
+ def run(self, status):
_enforce_dom0_cpus()
try:
log.info("Xend Daemon started")
@@ -298,12 +305,14 @@
relocate.listenRelocation()
servers = SrvServer.create()
self.daemonize()
- servers.start()
+ servers.start(status)
except Exception, ex:
print >>sys.stderr, 'Exception starting xend:', ex
if XEND_DEBUG:
traceback.print_exc()
log.exception("Exception starting xend (%s)" % ex)
+ status.write('1')
+ status.close()
self.exit(1)
def exit(self, rc=0):
diff -r 1184286a2ee6 -r 8c866d412673 tools/python/xen/xend/server/SrvServer.py
--- a/tools/python/xen/xend/server/SrvServer.py Wed Sep 14 17:54:43 2005
+++ b/tools/python/xen/xend/server/SrvServer.py Wed Sep 14 23:47:37 2005
@@ -48,6 +48,7 @@
from xen.xend import Vifctl
from xen.xend.XendLogging import log
from xen.web.SrvDir import SrvDir
+import time
from SrvRoot import SrvRoot
@@ -59,13 +60,32 @@
def add(self, server):
self.servers.append(server)
- def start(self):
+ def start(self, status):
Vifctl.network('start')
threads = []
for server in self.servers:
thread = Thread(target=server.run)
thread.start()
threads.append(thread)
+
+
+ # check for when all threads have initialized themselves and then
+ # close the status pipe
+
+ threads_left = True
+ while threads_left:
+ threads_left = False
+
+ for server in self.servers:
+ if not server.ready:
+ threads_left = True
+ break
+
+ if threads_left:
+ time.sleep(.5)
+
+ status.write('0')
+ status.close()
for t in threads:
t.join()
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|