One more attempt to add kexec/kdump support for a PVonHVM guest.
This is one large patch, if the approach is ok I will split it into
three parts:
- rebind virq during boot to avoid BUG in bind_virq_to_irq, required for
the timer and debugirq. Loop through all ports to find the virq/cpu
combo.
- add xs_introduce() in xs_init() to shutdown all open watches that the
previous kernel had registered. Without this change the kexec may
crash due to unexpected watch events.
This depends on a xenstored change which has to allow the XS_INTRODUCE
from a guest.
And this part may need more work because the xs_introduce may hang
forever on an old dom0 tool stack
- reset backend state if the frontend devices are either in Connected
(kdump) or Closed (kexec) state.
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
---
drivers/xen/events.c | 40 ++++++++--
drivers/xen/xenbus/xenbus_comms.c | 4 -
drivers/xen/xenbus/xenbus_comms.h | 2
drivers/xen/xenbus/xenbus_probe.c | 2
drivers/xen/xenbus/xenbus_probe_frontend.c | 116 +++++++++++++++++++++++++++++
drivers/xen/xenbus/xenbus_xs.c | 24 +++++-
6 files changed, 179 insertions(+), 9 deletions(-)
Index: linux-3.0/drivers/xen/events.c
===================================================================
--- linux-3.0.orig/drivers/xen/events.c
+++ linux-3.0/drivers/xen/events.c
@@ -877,11 +877,35 @@ static int bind_interdomain_evtchn_to_ir
return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
}
+/* BITS_PER_LONG is used in Xen */
+#define MAX_EVTCHNS (64 * 64)
+
+static int find_virq(unsigned int virq, unsigned int cpu)
+{
+ struct evtchn_status status;
+ int port, rc = -ENOENT;
+
+ memset(&status, 0, sizeof(status));
+ for (port = 0; port <= MAX_EVTCHNS; port++) {
+ status.dom = DOMID_SELF;
+ status.port = port;
+ rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
+ if (rc < 0)
+ continue;
+ if (status.status != EVTCHNSTAT_virq)
+ continue;
+ if (status.u.virq == virq && status.vcpu == cpu) {
+ rc = port;
+ break;
+ }
+ }
+ return rc;
+}
int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
{
struct evtchn_bind_virq bind_virq;
- int evtchn, irq;
+ int evtchn, irq, ret;
spin_lock(&irq_mapping_update_lock);
@@ -897,10 +921,16 @@ int bind_virq_to_irq(unsigned int virq,
bind_virq.virq = virq;
bind_virq.vcpu = cpu;
- if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
- &bind_virq) != 0)
- BUG();
- evtchn = bind_virq.port;
+ ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+ &bind_virq);
+ if (ret == 0)
+ evtchn = bind_virq.port;
+ else {
+ if (ret == -EEXIST)
+ ret = find_virq(virq, cpu);
+ BUG_ON(ret < 0);
+ evtchn = ret;
+ }
xen_irq_info_virq_init(cpu, irq, evtchn, virq);
Index: linux-3.0/drivers/xen/xenbus/xenbus_comms.c
===================================================================
--- linux-3.0.orig/drivers/xen/xenbus/xenbus_comms.c
+++ linux-3.0/drivers/xen/xenbus/xenbus_comms.c
@@ -212,7 +212,9 @@ int xb_init_comms(void)
printk(KERN_WARNING "XENBUS response ring is not quiescent "
"(%08x:%08x): fixing up\n",
intf->rsp_cons, intf->rsp_prod);
- intf->rsp_cons = intf->rsp_prod;
+ /* breaks kdump */
+ if (!reset_devices)
+ intf->rsp_cons = intf->rsp_prod;
}
if (xenbus_irq) {
Index: linux-3.0/drivers/xen/xenbus/xenbus_comms.h
===================================================================
--- linux-3.0.orig/drivers/xen/xenbus/xenbus_comms.h
+++ linux-3.0/drivers/xen/xenbus/xenbus_comms.h
@@ -31,7 +31,7 @@
#ifndef _XENBUS_COMMS_H
#define _XENBUS_COMMS_H
-int xs_init(void);
+int xs_init(unsigned long xen_store_mfn);
int xb_init_comms(void);
/* Low level routines. */
Index: linux-3.0/drivers/xen/xenbus/xenbus_probe.c
===================================================================
--- linux-3.0.orig/drivers/xen/xenbus/xenbus_probe.c
+++ linux-3.0/drivers/xen/xenbus/xenbus_probe.c
@@ -757,7 +757,7 @@ static int __init xenbus_init(void)
}
/* Initialize the interface to xenstore. */
- err = xs_init();
+ err = xs_init(xen_store_mfn);
if (err) {
printk(KERN_WARNING
"XENBUS: Error initializing xenstore comms: %i\n", err);
Index: linux-3.0/drivers/xen/xenbus/xenbus_probe_frontend.c
===================================================================
--- linux-3.0.orig/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ linux-3.0/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -252,10 +252,126 @@ int __xenbus_register_frontend(struct xe
}
EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
+static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq);
+static int backend_state;
+
+static void xenbus_reset_backend_state_changed(struct xenbus_watch *w,
+ const char **v, unsigned int l)
+{
+ xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i", &backend_state);
+ printk(KERN_DEBUG "XENBUS: %s %s\n",
+ v[XS_WATCH_PATH], xenbus_strstate(backend_state));
+ wake_up(&backend_state_wq);
+}
+
+static void xenbus_reset_wait_for_backend(int expected)
+{
+ wait_event_interruptible(backend_state_wq, backend_state == expected);
+}
+
+/*
+ * Reset frontend if it is in Connected or Closed state.
+ * Wait for backend to catch up.
+ * State Connected happens during kdump, Closed after kexec.
+ */
+static void xenbus_reset_frontend(char *fe, char *be, int be_state)
+{
+ struct xenbus_watch be_watch;
+
+ printk(KERN_DEBUG "XENBUS: backend %s %s\n",
+ be, xenbus_strstate(be_state));
+
+ memset(&be_watch, 0, sizeof(be_watch));
+ be_watch.node = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/state", be);
+ if (!be_watch.node)
+ return;
+
+ be_watch.callback = xenbus_reset_backend_state_changed;
+ backend_state = XenbusStateUnknown;
+
+ printk(KERN_INFO "XENBUS: triggering reconnect on %s\n", be);
+ register_xenbus_watch(&be_watch);
+
+ switch (be_state) {
+ case XenbusStateConnected:
+ xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosing);
+ xenbus_reset_wait_for_backend(XenbusStateClosing);
+
+ case XenbusStateClosing:
+ xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosed);
+ xenbus_reset_wait_for_backend(XenbusStateClosed);
+
+ case XenbusStateClosed:
+ xenbus_printf(XBT_NIL, fe, "state", "%d",
XenbusStateInitialising);
+ xenbus_reset_wait_for_backend(XenbusStateInitWait);
+ }
+
+ unregister_xenbus_watch(&be_watch);
+ printk(KERN_INFO "XENBUS: reconnect done on %s\n", be);
+ kfree(be_watch.node);
+}
+
+static void xenbus_check_frontend(char *class, char *dev)
+{
+ int be_state, fe_state, err;
+ char *backend, *frontend;
+
+ frontend = kasprintf(GFP_NOIO | __GFP_HIGH, "device/%s/%s", class, dev);
+ if (!frontend)
+ return;
+
+ err = xenbus_scanf(XBT_NIL, frontend, "state", "%i", &fe_state);
+ if (err != 1)
+ goto out;
+
+ switch (fe_state) {
+ case XenbusStateConnected:
+ case XenbusStateClosed:
+ printk(KERN_DEBUG "XENBUS: frontend %s %s\n",
+ frontend, xenbus_strstate(fe_state));
+ backend = xenbus_read(XBT_NIL, frontend, "backend", NULL);
+ if (!backend || IS_ERR(backend))
+ goto out;
+ err = xenbus_scanf(XBT_NIL, backend, "state", "%i", &be_state);
+ if (err == 1)
+ xenbus_reset_frontend(frontend, backend, be_state);
+ kfree(backend);
+ break;
+ default:
+ break;
+ }
+out:
+ kfree(frontend);
+}
+
+static void xenbus_reset_state(void)
+{
+ char **devclass, **dev;
+ int devclass_n, dev_n;
+ int i, j;
+
+ devclass = xenbus_directory(XBT_NIL, "device", "", &devclass_n);
+ if (IS_ERR(devclass))
+ return;
+
+ for (i = 0; i < devclass_n; i++) {
+ dev = xenbus_directory(XBT_NIL, "device", devclass[i], &dev_n);
+ if (IS_ERR(dev))
+ continue;
+ for (j = 0; j < dev_n; j++)
+ xenbus_check_frontend(devclass[i], dev[j]);
+ kfree(dev);
+ }
+ kfree(devclass);
+}
+
static int frontend_probe_and_watch(struct notifier_block *notifier,
unsigned long event,
void *data)
{
+ /* reset devices in Connected or Closed state */
+ if (xen_hvm_domain())
+ xenbus_reset_state();
/* Enumerate devices in xenstore and watch for changes. */
xenbus_probe_devices(&xenbus_frontend);
register_xenbus_watch(&fe_watch);
Index: linux-3.0/drivers/xen/xenbus/xenbus_xs.c
===================================================================
--- linux-3.0.orig/drivers/xen/xenbus/xenbus_xs.c
+++ linux-3.0/drivers/xen/xenbus/xenbus_xs.c
@@ -620,6 +620,20 @@ static struct xenbus_watch *find_watch(c
return NULL;
}
+static int xs_introduce(const char *domid, const char *mfn, const char *port)
+{
+ struct kvec iov[3];
+
+ iov[0].iov_base = (char *)domid;
+ iov[0].iov_len = strlen(domid) + 1;
+ iov[1].iov_base = (char *)mfn;
+ iov[1].iov_len = strlen(mfn) + 1;
+ iov[2].iov_base = (char *)port;
+ iov[2].iov_len = strlen(port) + 1;
+
+ return xs_error(xs_talkv(XBT_NIL, XS_INTRODUCE, iov,
+ ARRAY_SIZE(iov), NULL));
+}
/* Register callback to watch this node. */
int register_xenbus_watch(struct xenbus_watch *watch)
{
@@ -867,10 +881,11 @@ static int xenbus_thread(void *unused)
return 0;
}
-int xs_init(void)
+int xs_init(unsigned long xen_store_mfn)
{
int err;
struct task_struct *task;
+ char domid[12], mfn[24], port[24];
INIT_LIST_HEAD(&xs_state.reply_list);
spin_lock_init(&xs_state.reply_lock);
@@ -897,5 +912,12 @@ int xs_init(void)
if (IS_ERR(task))
return PTR_ERR(task);
+ snprintf(domid, sizeof(domid), "%u", DOMID_SELF);
+ snprintf(mfn, sizeof(mfn), "%lu", xen_store_mfn);
+ snprintf(port, sizeof(port), "%d", xen_store_evtchn);
+ err = xs_introduce(domid, mfn, port);
+ if (err)
+ printk(KERN_ALERT "%s: introduce failed: %d\n", __func__, err);
+
return 0;
}
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|