>From b145011be18b700e89234f6ddc12e32872c2c121 Mon Sep 17 00:00:00 2001 From: Michal Novotny Date: Mon, 30 Aug 2010 15:06:05 +0200 Subject: [PATCH] Fix bootloader handling when empty string is being output This is the patch to fix empty string as the output value of the bootloader string. If there is no output then xend is being hung indefinitely unless you press Ctrl + C keys to trigger SIGINT. This patch fixes the issue by implementing timeout to the select() function for reading from pipe and also checking for state of the process - i.e. whether the process is running or whether it's dead or zombie (because fork() call does leave zombies when the process is done but the parent process is still running). I tried it with the bogus bootloader that just returns with error code 0 and also the bogus bootloader that sleeps for 10 seconds and then returns with error code 0. According to my testing when the bootloader process has finished and the output of the process is empty it fails with "bootloader didn't return any data" message which is the expected behaviour. This patch has also been tested with the various timeout values (incl. no timeout specified) for pyGrub and everything was working fine since it was failing *only* in the case both output from pyGrub was empty and the bootloader process was not running according the pid. The check for bootloader running is implemented by checking the presence of /proc/{$PID} and also state in the stat file (/proc/{$PID}/stat). Testing: The patch has been tested on RHEL-5.5 x86_64 dom0 with RHEL-5.4 PV guest both with good and bad bootloader setup. By bad bootloader I mean just the shell script exiting with code 0 and also shell script sleeping for 10 seconds and exiting with code 0. Before my patch applied it got stuck indefinitely in the xend because of waiting for some data on select() call and after my patch applied it's opening the FIFO with O_NDELAY and tries to read the data indefinitely every second using the select() call with 1 second as the timeout argument. If there were no data output using the read call it just fails after the child (bootloader) process exited. Otherwise, when bootloader did return some data, the guest started successfully no matter what the timeout was defined in pyGrub. Signed-off-by: Michal Novotny Michal --- tools/python/xen/xend/XendBootloader.py | 36 ++++++++++++++++++++++++++---- 1 files changed, 31 insertions(+), 5 deletions(-) diff --git a/tools/python/xen/xend/XendBootloader.py b/tools/python/xen/xend/XendBootloader.py index 0cef917..66a688f 100644 --- a/tools/python/xen/xend/XendBootloader.py +++ b/tools/python/xen/xend/XendBootloader.py @@ -24,6 +24,24 @@ from XendError import VmError import pty, termios, fcntl from xen.lowlevel import ptsname +def hasPidInProcFS(pid): + path = "/proc/%s" % pid + return os.path.exists(path) + +def isRunning(pid): + path = "/proc/%s" % pid + if not os.path.exists(path): + return False + fd = open("%s/stat" % path) + val = fd.readline().split()[2] + fd.close() + try: + val.index('Z') + ret = False + except: + ret = True + return ret + def bootloader(blexec, disk, dom, quiet = False, blargs = '', kernel = '', ramdisk = '', kernel_args = ''): """Run the boot loader executable on the given disk and return a @@ -128,16 +146,22 @@ def bootloader(blexec, disk, dom, quiet = False, blargs = '', kernel = '', if os.uname()[0] != 'SunOS': tty.setraw(m2); - fcntl.fcntl(m2, fcntl.F_SETFL, os.O_NDELAY); + # We check whether OS supports procfs in /proc like Linux does + canUseProcfs = hasPidInProcFS( os.getpid() ) + + fcntl.fcntl(m2, fcntl.F_SETFL, os.O_NDELAY) while True: try: - r = os.open(fifo, os.O_RDONLY) + mode = os.O_RDONLY + if canUseProcfs: + mode |= os.O_NDELAY + r = os.open(fifo, mode) except OSError, e: if e.errno == errno.EINTR: continue break - fcntl.fcntl(r, fcntl.F_SETFL, os.O_NDELAY); + fcntl.fcntl(r, fcntl.F_SETFL, os.O_NDELAY) ret = "" inbuf=""; outbuf=""; @@ -156,13 +180,15 @@ def bootloader(blexec, disk, dom, quiet = False, blargs = '', kernel = '', # if there is actual data to write, otherwise this would loop too fast, # eating up CPU time. - while True: + while ((canUseProcfs and isRunning(child)) or not canUseProcfs): wsel = [] if len(outbuf) != 0: wsel = wsel + [m1] if len(inbuf) != 0: wsel = wsel + [m2] - sel = select.select([r, m1, m2], wsel, []) + + sel = select.select([r, m1, m2], wsel, [], 1) + try: if m1 in sel[0]: s = os.read(m1, 16) -- 1.5.5.6