All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] xen/hvm: add kexec/kdump support
@ 2011-08-01 12:46 Olaf Hering
  2011-08-01 13:28 ` Olaf Hering
  0 siblings, 1 reply; 2+ messages in thread
From: Olaf Hering @ 2011-08-01 12:46 UTC (permalink / raw)
  To: xen-devel

One more attempt to add kexec/kdump support for a PVonHVM guest.
This is one large patch, if the approach is ok I will split it into
three parts:

- rebind virq during boot to avoid BUG in bind_virq_to_irq, required for
  the timer and debugirq. Loop through all ports to find the virq/cpu
  combo.

- add xs_introduce() in xs_init() to shutdown all open watches that the
  previous kernel had registered. Without this change the kexec may
  crash due to unexpected watch events.
  This depends on a xenstored change which has to allow the XS_INTRODUCE
  from a guest.
  And this part may need more work because the xs_introduce may hang
  forever on an old dom0 tool stack

- reset backend state if the frontend devices are either in Connected
  (kdump) or Closed (kexec) state.

Signed-off-by: Olaf Hering <olaf@aepfle.de>

---
 drivers/xen/events.c                       |   40 ++++++++--
 drivers/xen/xenbus/xenbus_comms.c          |    4 -
 drivers/xen/xenbus/xenbus_comms.h          |    2 
 drivers/xen/xenbus/xenbus_probe.c          |    2 
 drivers/xen/xenbus/xenbus_probe_frontend.c |  116 +++++++++++++++++++++++++++++
 drivers/xen/xenbus/xenbus_xs.c             |   24 +++++-
 6 files changed, 179 insertions(+), 9 deletions(-)

Index: linux-3.0/drivers/xen/events.c
===================================================================
--- linux-3.0.orig/drivers/xen/events.c
+++ linux-3.0/drivers/xen/events.c
@@ -877,11 +877,35 @@ static int bind_interdomain_evtchn_to_ir
 	return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
 }
 
+/* BITS_PER_LONG is used in Xen */
+#define MAX_EVTCHNS (64 * 64)
+
+static int find_virq(unsigned int virq, unsigned int cpu)
+{
+	struct evtchn_status status;
+	int port, rc = -ENOENT;
+
+	memset(&status, 0, sizeof(status));
+	for (port = 0; port <= MAX_EVTCHNS; port++) {
+		status.dom = DOMID_SELF;
+		status.port = port;
+		rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
+		if (rc < 0)
+			continue;
+		if (status.status != EVTCHNSTAT_virq)
+			continue;
+		if (status.u.virq == virq && status.vcpu == cpu) {
+			rc = port;
+			break;
+		}
+	}
+	return rc;
+}
 
 int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
 {
 	struct evtchn_bind_virq bind_virq;
-	int evtchn, irq;
+	int evtchn, irq, ret;
 
 	spin_lock(&irq_mapping_update_lock);
 
@@ -897,10 +921,16 @@ int bind_virq_to_irq(unsigned int virq,
 
 		bind_virq.virq = virq;
 		bind_virq.vcpu = cpu;
-		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
-						&bind_virq) != 0)
-			BUG();
-		evtchn = bind_virq.port;
+		ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+						&bind_virq);
+		if (ret == 0)
+			evtchn = bind_virq.port;
+		else {
+			if (ret == -EEXIST)
+				ret = find_virq(virq, cpu);
+			BUG_ON(ret < 0);
+			evtchn = ret;
+		}
 
 		xen_irq_info_virq_init(cpu, irq, evtchn, virq);
 
Index: linux-3.0/drivers/xen/xenbus/xenbus_comms.c
===================================================================
--- linux-3.0.orig/drivers/xen/xenbus/xenbus_comms.c
+++ linux-3.0/drivers/xen/xenbus/xenbus_comms.c
@@ -212,7 +212,9 @@ int xb_init_comms(void)
 		printk(KERN_WARNING "XENBUS response ring is not quiescent "
 		       "(%08x:%08x): fixing up\n",
 		       intf->rsp_cons, intf->rsp_prod);
-		intf->rsp_cons = intf->rsp_prod;
+		/* breaks kdump */
+		if (!reset_devices)
+			intf->rsp_cons = intf->rsp_prod;
 	}
 
 	if (xenbus_irq) {
Index: linux-3.0/drivers/xen/xenbus/xenbus_comms.h
===================================================================
--- linux-3.0.orig/drivers/xen/xenbus/xenbus_comms.h
+++ linux-3.0/drivers/xen/xenbus/xenbus_comms.h
@@ -31,7 +31,7 @@
 #ifndef _XENBUS_COMMS_H
 #define _XENBUS_COMMS_H
 
-int xs_init(void);
+int xs_init(unsigned long xen_store_mfn);
 int xb_init_comms(void);
 
 /* Low level routines. */
Index: linux-3.0/drivers/xen/xenbus/xenbus_probe.c
===================================================================
--- linux-3.0.orig/drivers/xen/xenbus/xenbus_probe.c
+++ linux-3.0/drivers/xen/xenbus/xenbus_probe.c
@@ -757,7 +757,7 @@ static int __init xenbus_init(void)
 	}
 
 	/* Initialize the interface to xenstore. */
-	err = xs_init();
+	err = xs_init(xen_store_mfn);
 	if (err) {
 		printk(KERN_WARNING
 		       "XENBUS: Error initializing xenstore comms: %i\n", err);
Index: linux-3.0/drivers/xen/xenbus/xenbus_probe_frontend.c
===================================================================
--- linux-3.0.orig/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ linux-3.0/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -252,10 +252,126 @@ int __xenbus_register_frontend(struct xe
 }
 EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
 
+static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq);
+static int backend_state;
+
+static void xenbus_reset_backend_state_changed(struct xenbus_watch *w,
+					const char **v, unsigned int l)
+{
+	xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i", &backend_state);
+	printk(KERN_DEBUG "XENBUS: %s %s\n",
+			v[XS_WATCH_PATH], xenbus_strstate(backend_state));
+	wake_up(&backend_state_wq);
+}
+
+static void xenbus_reset_wait_for_backend(int expected)
+{
+	wait_event_interruptible(backend_state_wq, backend_state == expected);
+}
+
+/*
+ * Reset frontend if it is in Connected or Closed state.
+ * Wait for backend to catch up.
+ * State Connected happens during kdump, Closed after kexec.
+ */
+static void xenbus_reset_frontend(char *fe, char *be, int be_state)
+{
+	struct xenbus_watch be_watch;
+
+	printk(KERN_DEBUG "XENBUS: backend %s %s\n",
+			be, xenbus_strstate(be_state));
+
+	memset(&be_watch, 0, sizeof(be_watch));
+	be_watch.node = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/state", be);
+	if (!be_watch.node)
+		return;
+
+	be_watch.callback = xenbus_reset_backend_state_changed;
+	backend_state = XenbusStateUnknown;
+
+	printk(KERN_INFO "XENBUS: triggering reconnect on %s\n", be);
+	register_xenbus_watch(&be_watch);
+
+	switch (be_state) {
+	case XenbusStateConnected:
+		xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosing);
+		xenbus_reset_wait_for_backend(XenbusStateClosing);
+
+	case XenbusStateClosing:
+		xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosed);
+		xenbus_reset_wait_for_backend(XenbusStateClosed);
+
+	case XenbusStateClosed:
+		xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateInitialising);
+		xenbus_reset_wait_for_backend(XenbusStateInitWait);
+	}
+
+	unregister_xenbus_watch(&be_watch);
+	printk(KERN_INFO "XENBUS: reconnect done on %s\n", be);
+	kfree(be_watch.node);
+}
+
+static void xenbus_check_frontend(char *class, char *dev)
+{
+	int be_state, fe_state, err;
+	char *backend, *frontend;
+
+	frontend = kasprintf(GFP_NOIO | __GFP_HIGH, "device/%s/%s", class, dev);
+	if (!frontend)
+		return;
+
+	err = xenbus_scanf(XBT_NIL, frontend, "state", "%i", &fe_state);
+	if (err != 1)
+		goto out;
+
+	switch (fe_state) {
+	case XenbusStateConnected:
+	case XenbusStateClosed:
+		printk(KERN_DEBUG "XENBUS: frontend %s %s\n",
+				frontend, xenbus_strstate(fe_state));
+		backend = xenbus_read(XBT_NIL, frontend, "backend", NULL);
+		if (!backend || IS_ERR(backend))
+			goto out;
+		err = xenbus_scanf(XBT_NIL, backend, "state", "%i", &be_state);
+		if (err == 1)
+			xenbus_reset_frontend(frontend, backend, be_state);
+		kfree(backend);
+		break;
+	default:
+		break;
+	}
+out:
+	kfree(frontend);
+}
+
+static void xenbus_reset_state(void)
+{
+	char **devclass, **dev;
+	int devclass_n, dev_n;
+	int i, j;
+
+	devclass = xenbus_directory(XBT_NIL, "device", "", &devclass_n);
+	if (IS_ERR(devclass))
+		return;
+
+	for (i = 0; i < devclass_n; i++) {
+		dev = xenbus_directory(XBT_NIL, "device", devclass[i], &dev_n);
+		if (IS_ERR(dev))
+			continue;
+		for (j = 0; j < dev_n; j++)
+			xenbus_check_frontend(devclass[i], dev[j]);
+		kfree(dev);
+	}
+	kfree(devclass);
+}
+
 static int frontend_probe_and_watch(struct notifier_block *notifier,
 				   unsigned long event,
 				   void *data)
 {
+	/* reset devices in Connected or Closed state */
+	if (xen_hvm_domain())
+		xenbus_reset_state();
 	/* Enumerate devices in xenstore and watch for changes. */
 	xenbus_probe_devices(&xenbus_frontend);
 	register_xenbus_watch(&fe_watch);
Index: linux-3.0/drivers/xen/xenbus/xenbus_xs.c
===================================================================
--- linux-3.0.orig/drivers/xen/xenbus/xenbus_xs.c
+++ linux-3.0/drivers/xen/xenbus/xenbus_xs.c
@@ -620,6 +620,20 @@ static struct xenbus_watch *find_watch(c
 	return NULL;
 }
 
+static int xs_introduce(const char *domid, const char *mfn, const char *port)
+{
+	struct kvec iov[3];
+
+	iov[0].iov_base = (char *)domid;
+	iov[0].iov_len = strlen(domid) + 1;
+	iov[1].iov_base = (char *)mfn;
+	iov[1].iov_len = strlen(mfn) + 1;
+	iov[2].iov_base = (char *)port;
+	iov[2].iov_len = strlen(port) + 1;
+
+	return xs_error(xs_talkv(XBT_NIL, XS_INTRODUCE, iov,
+				 ARRAY_SIZE(iov), NULL));
+}
 /* Register callback to watch this node. */
 int register_xenbus_watch(struct xenbus_watch *watch)
 {
@@ -867,10 +881,11 @@ static int xenbus_thread(void *unused)
 	return 0;
 }
 
-int xs_init(void)
+int xs_init(unsigned long xen_store_mfn)
 {
 	int err;
 	struct task_struct *task;
+	char domid[12], mfn[24], port[24];
 
 	INIT_LIST_HEAD(&xs_state.reply_list);
 	spin_lock_init(&xs_state.reply_lock);
@@ -897,5 +912,12 @@ int xs_init(void)
 	if (IS_ERR(task))
 		return PTR_ERR(task);
 
+	snprintf(domid, sizeof(domid), "%u", DOMID_SELF);
+	snprintf(mfn, sizeof(mfn), "%lu", xen_store_mfn);
+	snprintf(port, sizeof(port), "%d", xen_store_evtchn);
+	err = xs_introduce(domid, mfn, port);
+	if (err)
+		printk(KERN_ALERT "%s: introduce failed: %d\n", __func__, err);
+
 	return 0;
 }

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2011-08-01 13:28 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-08-01 12:46 [PATCH v3] xen/hvm: add kexec/kdump support Olaf Hering
2011-08-01 13:28 ` Olaf Hering

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.