This patch makes xl create check whether qemu-dm has started
correctly, and causes it to fail immediately with appropriate errors
if not. There are other bugfixes too.
More specifically:
* libxl_create_device_model forks twice rather than once so that the
process which calls libxl does not end up being the actual parent
of qemu. That avoids the need for the qemu-dm process to be reaped
at some indefinite time in the future.
* The first fork generates an intermediate process which is
responsible for writing the qemu-dm pid to xenstore and then merely
waits to collect and report on qemu-dm's exit status during
startup. New arguments to libxl_create_device_model allow the
preservation of its pid so that a later call can check whether the
startup is successful.
* xl.c's create_domain checks for errors in its libxl calls.
Consequential changes:
* libxl_wait_for_device_model now takes a callback function parameter
which is called repeatedly in the loop iteration and allows the
caller to abort the wait.
* libxl_exec no longer calls fork; there is a new libxl_fork.
* osdep.[ch] new use #define _GNU_SOURCE. The provided asprintf
declarations are suppressed when not needed (currently, always).
* There is a hook to override waitpid, which will be necessary for
some callers.
Remaining problems and other issues I noticed or we found:
* The error handling is rather inconsistent still and lacking in
places.
* xl_logv is declared but not defined.
* _GNU_SOURCE should be used throughout. The asprintf implementation
should be disabled in favour of the system one.
* XL_LOG_ERROR_ERRNO needs to actually print the errno value.
* destroy_device_model can kill random dom0 processes (!)
* struct libxl_ctx should be defined in libxl_internal.h.
Signed-off-by: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
(Changes since v1:
* Remove new error log level; should be in a future patch
* Properly fixed the asprintf problem
* Indentation no uses literal tabs)
diff -r 49deb113cd40 tools/libxl/Makefile
--- a/tools/libxl/Makefile Fri Nov 13 15:46:58 2009 +0000
+++ b/tools/libxl/Makefile Mon Nov 16 12:06:57 2009 +0000
@@ -23,8 +23,7 @@
LIBCONFIG_SOURCE = libconfig-1.3.2
LIBCONFIG_OUTPUT = $(LIBCONFIG_SOURCE)/.libs
-LIBXL_OBJS-y =
-LIBXL_OBJS-$(CONFIG_Linux) += osdeps.o
+LIBXL_OBJS-y = osdeps.o
LIBXL_OBJS = flexarray.o libxl.o libxl_dom.o libxl_exec.o libxl_xshelp.o
libxl_device.o libxl_internal.o xenguest.o libxl_utils.o $(LIBXL_OBJS-y)
CLIENTS = xl
diff -r 49deb113cd40 tools/libxl/libxl.c
--- a/tools/libxl/libxl.c Fri Nov 13 15:46:58 2009 +0000
+++ b/tools/libxl/libxl.c Mon Nov 16 12:06:57 2009 +0000
@@ -21,8 +21,10 @@
#include <sys/types.h>
#include <fcntl.h>
#include <sys/select.h>
+#include <sys/wait.h>
#include <signal.h>
#include <unistd.h> /* for write, unlink and close */
+#include <assert.h>
#include "libxl.h"
#include "libxl_utils.h"
#include "libxl_internal.h"
@@ -38,6 +40,8 @@
ctx->xch = xc_interface_open();
ctx->xsh = xs_daemon_open();
+
+ ctx->waitpid_instead= libxl_waitpid_instead_default;
return 0;
}
@@ -496,16 +500,24 @@
return (char **) flexarray_contents(dm_args);
}
+struct libxl_device_model_starting {
+ int domid;
+ pid_t intermediate;
+};
+
int libxl_create_device_model(struct libxl_ctx *ctx,
libxl_device_model_info *info,
- libxl_device_nic *vifs, int num_vifs)
+ libxl_device_nic *vifs, int num_vifs,
+ libxl_device_model_starting **starting_r)
{
char *dom_path, *path, *logfile, *logfile_new;
- char *kvs[3];
struct stat stat_buf;
- int logfile_w, null, pid;
+ int logfile_w, null;
int i;
char **args;
+ pid_t intermediate;
+
+ *starting_r= 0;
args = libxl_build_device_model_args(ctx, info, vifs, num_vifs);
if (!args)
@@ -533,17 +545,121 @@
logfile = libxl_sprintf(ctx, "/var/log/xen/qemu-dm-%s.log",
info->dom_name);
logfile_w = open(logfile, O_WRONLY|O_CREAT, 0644);
null = open("/dev/null", O_RDONLY);
- pid = libxl_exec(ctx, null, logfile_w, logfile_w, info->device_model,
args);
+
+ if (starting_r) {
+ *starting_r= libxl_calloc(ctx, sizeof(**starting_r), 1);
+ if (!*starting_r) return ERROR_NOMEM;
+ }
+
+ intermediate = libxl_fork(ctx);
+ if (intermediate==-1) return ERROR_FAIL;
+
+ if (!intermediate) {
+ struct libxl_ctx clone;
+ char *kvs[3];
+ pid_t child, got;
+ int status;
+
+ child = libxl_fork(ctx);
+ if (!child) {
+ libxl_exec(ctx, null, logfile_w, logfile_w,
+ info->device_model, args);
+ }
+
+ if (!starting_r) _exit(0); /* just detach then */
+
+ clone = *ctx;
+ clone.xsh = xs_daemon_open();
+ /* we mustn't use the parent's handle in the child */
+
+ kvs[0] = libxl_sprintf(ctx, "image/device-model-pid");
+ kvs[1] = libxl_sprintf(ctx, "%d", child);
+ kvs[2] = NULL;
+ libxl_xs_writev(ctx, XBT_NULL, dom_path, kvs);
+
+ got = ctx->waitpid_instead(child, &status, 0);
+ assert(got == child);
+
+ libxl_report_child_exitstatus(ctx, "device model", child, status);
+ _exit(WIFEXITED(status) ? WEXITSTATUS(status) :
+ WIFSIGNALED(status) && WTERMSIG(status)<127
+ ? WTERMSIG(status)+128 : -1);
+ }
+
+ if (starting_r) {
+ (*starting_r)->domid= info->domid;
+ (*starting_r)->intermediate= intermediate;
+ }
+
close(null);
close(logfile_w);
- kvs[0] = libxl_sprintf(ctx, "image/device-model-pid");
- kvs[1] = libxl_sprintf(ctx, "%d", pid);
- kvs[2] = NULL;
- libxl_xs_writev(ctx, XBT_NULL, dom_path, kvs);
-
return 0;
}
+
+static void report_dm_intermediate_status(struct libxl_ctx *ctx,
+ libxl_device_model_starting *starting,
+ pid_t got, int status) {
+ if (!WIFEXITED(status))
+ /* intermediate process did the logging itself if it exited */
+ libxl_report_child_exitstatus(ctx,
+ "device model intermediate process"
+ " (startup monitor)", starting->intermediate,
+ status);
+}
+
+int libxl_detach_device_model(struct libxl_ctx *ctx,
+ libxl_device_model_starting *starting) {
+ int r, status;
+ int rc = 0;
+ pid_t got;
+
+ if (starting->intermediate) {
+ r = kill(starting->intermediate, SIGKILL);
+ if (r) {
+ XL_LOG(ctx, XL_LOG_ERROR_ERRNO, "could not kill device model"
+ "intermediate process [%ld]",
+ (unsigned long)starting->intermediate);
+ abort(); /* things are very wrong */
+ }
+ got = ctx->waitpid_instead(starting->intermediate, &status, 0);
+ assert(got == starting->intermediate);
+ if (!(WIFSIGNALED(status) && WTERMSIG(status)==SIGKILL)) {
+ report_dm_intermediate_status(ctx, starting, got, status);
+ rc = ERROR_FAIL;
+ }
+ }
+
+ libxl_free(ctx, starting);
+
+ return rc;
+}
+
+static int check_dm_failure(struct libxl_ctx *ctx,
+ void *starting_void) {
+ libxl_device_model_starting *starting = starting_void;
+ pid_t got;
+ int status;
+
+ got = ctx->waitpid_instead(starting->intermediate, &status, WNOHANG);
+ if (!got) return 0;
+
+ assert(got == starting->intermediate);
+ report_dm_intermediate_status(ctx, starting, got, status);
+ starting->intermediate= 0;
+ return ERROR_FAIL;
+}
+
+int libxl_confirm_device_model_startup(struct libxl_ctx *ctx,
+ libxl_device_model_starting *starting) {
+ int problem = libxl_wait_for_device_model(ctx, starting->domid, "running",
+ check_dm_failure,
+ starting);
+ int detach = libxl_detach_device_model(ctx, starting);
+ return problem ? problem : detach;
+ return ERROR_FAIL;
+}
+
/******************************************************************************/
int libxl_device_disk_add(struct libxl_ctx *ctx, uint32_t domid,
libxl_device_disk *disk)
@@ -941,7 +1057,7 @@
hvm = is_hvm(ctx, domid);
if (hvm) {
- if (libxl_wait_for_device_model(ctx, domid, "running") < 0) {
+ if (libxl_wait_for_device_model(ctx, domid, "running", 0,0) < 0) {
return -1;
}
snprintf(path, sizeof(path), "/local/domain/0/device-model/%d/state",
domid);
@@ -955,7 +1071,7 @@
pcidev->bus, pcidev->dev, pcidev->func);
snprintf(path, sizeof(path),
"/local/domain/0/device-model/%d/command", domid);
xs_write(ctx->xsh, XBT_NULL, path, "pci-ins", strlen("pci-ins"));
- if (libxl_wait_for_device_model(ctx, domid, "pci-inserted") < 0)
+ if (libxl_wait_for_device_model(ctx, domid, "pci-inserted", 0,0) < 0)
XL_LOG(ctx, XL_LOG_ERROR, "Device Model didn't respond in time\n");
snprintf(path, sizeof(path),
"/local/domain/0/device-model/%d/parameter", domid);
vdevfn = libxl_xs_read(ctx, XBT_NULL, path);
@@ -1029,7 +1145,7 @@
hvm = is_hvm(ctx, domid);
if (hvm) {
- if (libxl_wait_for_device_model(ctx, domid, "running") < 0) {
+ if (libxl_wait_for_device_model(ctx, domid, "running", 0,0) < 0) {
return -1;
}
snprintf(path, sizeof(path), "/local/domain/0/device-model/%d/state",
domid);
@@ -1039,7 +1155,7 @@
pcidev->bus, pcidev->dev, pcidev->func);
snprintf(path, sizeof(path),
"/local/domain/0/device-model/%d/command", domid);
xs_write(ctx->xsh, XBT_NULL, path, "pci-rem", strlen("pci-rem"));
- if (libxl_wait_for_device_model(ctx, domid, "pci-removed") < 0) {
+ if (libxl_wait_for_device_model(ctx, domid, "pci-removed", 0,0) < 0) {
XL_LOG(ctx, XL_LOG_ERROR, "Device Model didn't respond in time\n");
return -1;
}
diff -r 49deb113cd40 tools/libxl/libxl.h
--- a/tools/libxl/libxl.h Fri Nov 13 15:46:58 2009 +0000
+++ b/tools/libxl/libxl.h Mon Nov 16 12:06:57 2009 +0000
@@ -42,6 +42,11 @@
/* mini-GC */
int alloc_maxsize;
void **alloc_ptrs;
+
+ /* for callers who reap children willy-nilly; caller must only
+ * set this after libxl_init and before any other call - or
+ * may leave them untouched */
+ int (*waitpid_instead)(pid_t pid, int *status, int flags);
};
typedef struct {
@@ -193,9 +198,19 @@
struct libxl_dominfo * libxl_domain_list(struct libxl_ctx *ctx, int
*nb_domain);
xc_dominfo_t * libxl_domain_infolist(struct libxl_ctx *ctx, int *nb_domain);
+typedef struct libxl_device_model_starting libxl_device_model_starting;
int libxl_create_device_model(struct libxl_ctx *ctx,
libxl_device_model_info *info,
- libxl_device_nic *vifs, int num_vifs);
+ libxl_device_nic *vifs, int num_vifs,
+ libxl_device_model_starting **starting_r);
+ /* Caller must either: pass starting_r==0, or on successful
+ * return pass *starting_r to libxl_confirm_device_model
+ * or libxl_detach_device_model */
+int libxl_confirm_device_model_startup(struct libxl_ctx *ctx,
+ libxl_device_model_starting *starting);
+int libxl_detach_device_model(struct libxl_ctx *ctx,
+ libxl_device_model_starting *starting);
+ /* DM is detached even if error is returned */
int libxl_device_disk_add(struct libxl_ctx *ctx, uint32_t domid,
libxl_device_disk *disk);
int libxl_device_disk_clean_shutdown(struct libxl_ctx *ctx, uint32_t domid);
diff -r 49deb113cd40 tools/libxl/libxl_device.c
--- a/tools/libxl/libxl_device.c Fri Nov 13 15:46:58 2009 +0000
+++ b/tools/libxl/libxl_device.c Mon Nov 16 12:06:57 2009 +0000
@@ -260,12 +260,17 @@
return -1;
}
-int libxl_wait_for_device_model(struct libxl_ctx *ctx, uint32_t domid, char
*state)
+int libxl_wait_for_device_model(struct libxl_ctx *ctx,
+ uint32_t domid, char *state,
+ int (*check_callback)(struct libxl_ctx *ctx,
+ void *userdata),
+ void *check_callback_userdata)
{
char path[50];
char *p;
int watchdog = 100;
unsigned int len;
+ int rc;
snprintf(path, sizeof(path), "/local/domain/0/device-model/%d/state",
domid);
while (watchdog > 0) {
@@ -282,6 +287,10 @@
usleep(100000);
watchdog--;
}
+ }
+ if (check_callback) {
+ rc = check_callback(ctx, check_callback_userdata);
+ if (rc) return rc;
}
}
XL_LOG(ctx, XL_LOG_ERROR, "Device Model not ready\n");
diff -r 49deb113cd40 tools/libxl/libxl_exec.c
--- a/tools/libxl/libxl_exec.c Fri Nov 13 15:46:58 2009 +0000
+++ b/tools/libxl/libxl_exec.c Mon Nov 16 12:06:57 2009 +0000
@@ -15,34 +15,81 @@
* GNU Lesser General Public License for more details.
*/
+#include "osdeps.h"
+
#include <stdio.h>
+#include <string.h>
#include <unistd.h>
#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
#include "libxl.h"
#include "libxl_internal.h"
-int libxl_exec(struct libxl_ctx *ctx, int stdinfd, int stdoutfd, int stderrfd,
- char *arg0, char **args)
+pid_t libxl_fork(struct libxl_ctx *ctx)
{
- int pid, i;
+ pid_t pid;
pid = fork();
if (pid == -1) {
- XL_LOG(ctx, XL_LOG_ERROR, "fork failed");
+ XL_LOG(ctx, XL_LOG_ERROR_ERRNO, "fork failed");
return -1;
}
- if (pid == 0) {
- /* child */
- if (stdinfd != -1)
- dup2(stdinfd, STDIN_FILENO);
- if (stdoutfd != -1)
- dup2(stdoutfd, STDOUT_FILENO);
- if (stderrfd != -1)
- dup2(stderrfd, STDERR_FILENO);
- for (i = 4; i < 256; i++)
- close(i);
- execv(arg0, args);
- exit(256);
- }
+
return pid;
}
+
+void libxl_exec(struct libxl_ctx *ctx, int stdinfd, int stdoutfd, int stderrfd,
+ char *arg0, char **args)
+ /* call this in the child */
+{
+ int i;
+
+ if (stdinfd != -1)
+ dup2(stdinfd, STDIN_FILENO);
+ if (stdoutfd != -1)
+ dup2(stdoutfd, STDOUT_FILENO);
+ if (stderrfd != -1)
+ dup2(stderrfd, STDERR_FILENO);
+ for (i = 4; i < 256; i++)
+ close(i);
+ execv(arg0, args);
+ XL_LOG(ctx, XL_LOG_ERROR_ERRNO, "exec %s failed", arg0);
+ _exit(-1);
+}
+
+void libxl_report_child_exitstatus(struct libxl_ctx *ctx,
+ const char *what, pid_t pid, int status) {
+ /* treats all exit statuses as errors; if that's not what you want,
+ * check status yourself first */
+
+ if (WIFEXITED(status)) {
+ int st= WEXITSTATUS(status);
+ if (st)
+ XL_LOG(ctx, XL_LOG_ERROR, "%s [%ld] exited"
+ " with error status %d", what, (unsigned long)pid, st);
+ else
+ XL_LOG(ctx, XL_LOG_ERROR, "%s [%ld] unexpectedly"
+ " exited status zero", what, (unsigned long)pid);
+ } else if (WIFSIGNALED(status)) {
+ int sig= WTERMSIG(status);
+ const char *str= strsignal(sig);
+ const char *coredump= WCOREDUMP(status) ? " (core dumped)" : "";
+ if (str)
+ XL_LOG(ctx, XL_LOG_ERROR, "%s [%ld] died due to"
+ " fatal signal %s%s", what, (unsigned long)pid,
+ str, coredump);
+ else
+ XL_LOG(ctx, XL_LOG_ERROR, "%s [%ld] died due to unknown"
+ " fatal signal number %d%s", what, (unsigned long)pid,
+ sig, coredump);
+ } else {
+ XL_LOG(ctx, XL_LOG_ERROR, "%s [%ld] gave unknown"
+ " wait status 0x%x", what, (unsigned long)pid, status);
+ }
+}
+
+pid_t libxl_waitpid_instead_default(pid_t pid, int *status, int flags) {
+ return waitpid(pid,status,flags);
+}
diff -r 49deb113cd40 tools/libxl/libxl_internal.h
--- a/tools/libxl/libxl_internal.h Fri Nov 13 15:46:58 2009 +0000
+++ b/tools/libxl/libxl_internal.h Mon Nov 16 12:06:57 2009 +0000
@@ -45,6 +45,7 @@
#define XL_LOG_WARNING 1
#define XL_LOG_ERROR 0
+void xl_logv(struct libxl_ctx *ctx, int loglevel, const char *file, int line,
const char *func, char *fmt, va_list al);
void xl_log(struct libxl_ctx *ctx, int loglevel, const char *file, int line,
const char *func, char *fmt, ...);
typedef struct {
@@ -126,7 +127,11 @@
char **bents, char **fents);
int libxl_device_destroy(struct libxl_ctx *ctx, char *be_path, int force);
int libxl_devices_destroy(struct libxl_ctx *ctx, uint32_t domid, int force);
-int libxl_wait_for_device_model(struct libxl_ctx *ctx, uint32_t domid, char
*state);
+int libxl_wait_for_device_model(struct libxl_ctx *ctx,
+ uint32_t domid, char *state,
+ int (*check_callback)(struct libxl_ctx *ctx,
+ void *userdata),
+ void *check_callback_userdata);
int libxl_wait_for_backend(struct libxl_ctx *ctx, char *be_path, char *state);
int libxl_device_pci_flr(struct libxl_ctx *ctx, unsigned int domain, unsigned
int bus,
unsigned int dev, unsigned int func);
@@ -137,8 +142,12 @@
int vcpus, int store_evtchn, unsigned long
*store_mfn);
/* xl_exec */
-int libxl_exec(struct libxl_ctx *ctx, int stdinfd, int stdoutfd, int stderrfd,
- char *arg0, char **args);
+pid_t libxl_fork(struct libxl_ctx *ctx);
+void libxl_exec(struct libxl_ctx *ctx, int stdinfd, int stdoutfd, int stderrfd,
+ char *arg0, char **args);
+void libxl_report_child_exitstatus(struct libxl_ctx *ctx,
+ const char *what, pid_t pid, int status);
+pid_t libxl_waitpid_instead_default(pid_t pid, int *status, int flags);
#endif
diff -r 49deb113cd40 tools/libxl/osdeps.c
--- a/tools/libxl/osdeps.c Fri Nov 13 15:46:58 2009 +0000
+++ b/tools/libxl/osdeps.c Mon Nov 16 12:06:57 2009 +0000
@@ -13,11 +13,15 @@
* GNU Lesser General Public License for more details.
*/
+#include "osdeps.h"
+
#include <unistd.h>
#include <stdarg.h>
#include <stdio.h>
#include <sys/time.h>
#include <stdlib.h>
+
+#ifdef NEED_OWN_ASPRINTF
int vasprintf(char **buffer, const char *fmt, va_list ap)
{
@@ -60,3 +64,5 @@
va_end (ap);
return status;
}
+
+#endif
diff -r 49deb113cd40 tools/libxl/osdeps.h
--- a/tools/libxl/osdeps.h Fri Nov 13 15:46:58 2009 +0000
+++ b/tools/libxl/osdeps.h Mon Nov 16 12:06:57 2009 +0000
@@ -16,9 +16,10 @@
#ifndef LIBXL_OSDEP
#define LIBXL_OSDEP
+#define _GNU_SOURCE
+
+#ifdef NEED_OWN_ASPRINTF
#include <stdarg.h>
-
-#if defined(__linux__)
int asprintf(char **buffer, char *fmt, ...);
int vasprintf(char **buffer, const char *fmt, va_list ap);
#endif
diff -r 49deb113cd40 tools/libxl/xl.c
--- a/tools/libxl/xl.c Fri Nov 13 15:46:58 2009 +0000
+++ b/tools/libxl/xl.c Mon Nov 16 12:06:57 2009 +0000
@@ -572,6 +572,15 @@
config_destroy(&config);
}
+#define MUST( call ) ({ \
+ int must_rc = (call); \
+ if (must_rc) { \
+ fprintf(stderr,"xl: fatal error: %s:%d, rc=%d: %s\n", \
+ __FILE__,__LINE__, must_rc, #call); \
+ exit(-must_rc); \
+ } \
+ })
+
static void create_domain(int debug, const char *filename)
{
struct libxl_ctx ctx;
@@ -584,30 +593,35 @@
libxl_device_pci *pcidevs = NULL;
int num_disks = 0, num_vifs = 0, num_pcidevs = 0;
int i;
+ libxl_device_model_starting *dm_starting;
printf("Parsing config file %s\n", filename);
parse_config_file(filename, &info1, &info2, &disks, &num_disks, &vifs,
&num_vifs, &pcidevs, &num_pcidevs, &dm_info);
if (debug)
printf_info(&info1, &info2, disks, num_disks, vifs, num_vifs, pcidevs,
num_pcidevs, &dm_info);
- libxl_ctx_init(&ctx);
- libxl_ctx_set_log(&ctx, log_callback, NULL);
- libxl_domain_make(&ctx, &info1, &domid);
- libxl_domain_build(&ctx, &info2, domid);
+ MUST( libxl_ctx_init(&ctx) );
+ MUST( libxl_ctx_set_log(&ctx, log_callback, NULL) );
+ MUST( libxl_domain_make(&ctx, &info1, &domid) );
+ MUST( libxl_domain_build(&ctx, &info2, domid) );
device_model_info_domid_fixup(&dm_info, domid);
for (i = 0; i < num_disks; i++) {
disk_info_domid_fixup(disks + i, domid);
- libxl_device_disk_add(&ctx, domid, &disks[i]);
+ MUST( libxl_device_disk_add(&ctx, domid, &disks[i]) );
}
for (i = 0; i < num_vifs; i++) {
nic_info_domid_fixup(vifs + i, domid);
- libxl_device_nic_add(&ctx, domid, &vifs[i]);
+ MUST( libxl_device_nic_add(&ctx, domid, &vifs[i]) );
}
- libxl_create_device_model(&ctx, &dm_info, vifs, num_vifs);
+
+ MUST( libxl_create_device_model(&ctx, &dm_info, vifs, num_vifs,
+ &dm_starting) );
for (i = 0; i < num_pcidevs; i++)
libxl_device_pci_add(&ctx, domid, &pcidevs[i]);
+ MUST( libxl_confirm_device_model_startup(&ctx, dm_starting) );
+
libxl_domain_unpause(&ctx, domid);
}
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|