[RFC PATCH V2 5/8] xen: implement 3-level event channel routines

xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed

From: Wei Liu <wei.liu2@citrix.com>
To: xen-devel@lists.xen.org
Cc: david.vrabel@citrix.com, Wei Liu <wei.liu2@citrix.com>,
	ian.campbell@citrix.com, jbeulich@suse.com,
	konrad.wilk@oracle.com
Subject: [RFC PATCH V2 5/8] xen: implement 3-level event channel routines
Date: Mon, 21 Jan 2013 14:58:25 +0000	[thread overview]
Message-ID: <1358780308-30425-6-git-send-email-wei.liu2@citrix.com> (raw)
In-Reply-To: <1358780308-30425-1-git-send-email-wei.liu2@citrix.com>

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 drivers/xen/events.c |  407 +++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 385 insertions(+), 22 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 913ef0c..5b45441 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -57,6 +57,16 @@ EXPORT_SYMBOL_GPL(evtchn_level);
 unsigned int nr_event_channels;
 EXPORT_SYMBOL_GPL(nr_event_channels);
 
+/* 2nd level selector for 3-level event channel */
+DEFINE_PER_CPU(unsigned long [sizeof(unsigned long) * 8], evtchn_sel_l2);
+/* shared bitmaps for 3-level event channel */
+#define __NR_ELEMS (NR_EVENT_CHANNELS_L3/BITS_PER_LONG)
+unsigned long evtchn_pending[__NR_ELEMS] __page_aligned_bss;
+unsigned long evtchn_mask   [__NR_ELEMS] __page_aligned_bss;
+#undef __NR_ELEMS
+/* Helper macros */
+#define LONG_BITORDER (BITS_PER_LONG == 64 ? 6 : 5)
+
 struct evtchn_ops {
 	unsigned long (*active_evtchns)   (unsigned int cpu,
 					   struct shared_info *sh,
@@ -314,6 +324,15 @@ static inline unsigned long active_evtchns_l2(unsigned int cpu,
 		~sh->evtchn_mask[idx];
 }
 
+static inline unsigned long active_evtchns_l3(unsigned int cpu,
+					      struct shared_info *sh,
+					      unsigned int idx)
+{
+	return evtchn_pending[idx] &
+		per_cpu(cpu_evtchn_mask, cpu)[idx] &
+		~evtchn_mask[idx];
+}
+
 static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
 {
 	int irq = evtchn_to_irq[chn];
@@ -353,18 +372,32 @@ static inline void clear_evtchn_l2(int port)
 	sync_clear_bit(port, &s->evtchn_pending[0]);
 }
 
+static inline void clear_evtchn_l3(int port)
+{
+	sync_clear_bit(port, &evtchn_pending[0]);
+}
+
 static inline void set_evtchn_l2(int port)
 {
 	struct shared_info *s = HYPERVISOR_shared_info;
 	sync_set_bit(port, &s->evtchn_pending[0]);
 }
 
+static inline void set_evtchn_l3(int port)
+{
+	sync_set_bit(port, &evtchn_pending[0]);
+}
+
 static inline int test_evtchn_l2(int port)
 {
 	struct shared_info *s = HYPERVISOR_shared_info;
 	return sync_test_bit(port, &s->evtchn_pending[0]);
 }
 
+static inline int test_evtchn_l3(int port)
+{
+	return sync_test_bit(port, &evtchn_pending[0]);
+}
 
 /**
  * notify_remote_via_irq - send event to remote end of event channel via irq
@@ -389,6 +422,11 @@ static void mask_evtchn_l2(int port)
 	sync_set_bit(port, &s->evtchn_mask[0]);
 }
 
+static void mask_evtchn_l3(int port)
+{
+	sync_set_bit(port, &evtchn_mask[0]);
+}
+
 static void unmask_evtchn_l2(int port)
 {
 	struct shared_info *s = HYPERVISOR_shared_info;
@@ -419,6 +457,40 @@ static void unmask_evtchn_l2(int port)
 	put_cpu();
 }
 
+static void unmask_evtchn_l3(int port)
+{
+	unsigned int cpu = get_cpu();
+	unsigned int l1bit = port >> (LONG_BITORDER << 1);
+	unsigned int l2bit = port >> LONG_BITORDER;
+
+	BUG_ON(!irqs_disabled());
+
+	/* Slow path (hypercall) if this is a non-local port. */
+	if (unlikely(cpu != cpu_from_evtchn(port))) {
+		struct evtchn_unmask unmask = { .port = port };
+		(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
+	} else {
+		struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+
+		sync_clear_bit(port, &evtchn_mask[0]);
+
+		/*
+		 * The following is basically the equivalent of
+		 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
+		 * the interrupt edge' if the channel is masked.
+		 */
+		if (sync_test_bit(port, &evtchn_pending[0]) &&
+		    !sync_test_and_set_bit(l2bit,
+					   &per_cpu(evtchn_sel_l2, cpu)[0]) &&
+		    !sync_test_and_set_bit(l1bit,
+					   &vcpu_info->evtchn_pending_sel))
+			vcpu_info->evtchn_upcall_pending = 1;
+	}
+
+	put_cpu();
+}
+
+
 static void xen_irq_init(unsigned irq)
 {
 	struct irq_info *info;
@@ -1190,25 +1262,8 @@ static irqreturn_t debug_interrupt_l2(int irq, void *dev_id)
 	int cpu = smp_processor_id();
 	unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
 	int i;
-	unsigned long flags;
-	static DEFINE_SPINLOCK(debug_lock);
 	struct vcpu_info *v;
 
-	spin_lock_irqsave(&debug_lock, flags);
-
-	printk("\nvcpu %d\n  ", cpu);
-
-	for_each_online_cpu(i) {
-		int pending;
-		v = per_cpu(xen_vcpu, i);
-		pending = (get_irq_regs() && i == cpu)
-			? xen_irqs_disabled(get_irq_regs())
-			: v->evtchn_upcall_mask;
-		printk("%d: masked=%d pending=%d event_sel %0*lx\n  ", i,
-		       pending, v->evtchn_upcall_pending,
-		       (int)(sizeof(v->evtchn_pending_sel)*2),
-		       v->evtchn_pending_sel);
-	}
 	v = per_cpu(xen_vcpu, cpu);
 
 	printk("\npending:\n   ");
@@ -1260,18 +1315,143 @@ static irqreturn_t debug_interrupt_l2(int irq, void *dev_id)
 		}
 	}
 
-	spin_unlock_irqrestore(&debug_lock, flags);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t debug_interrupt_l3(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+	unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
+	int i;
+	struct vcpu_info *v;
+
+	v = per_cpu(xen_vcpu, cpu);
+
+	printk("\npending (only show words which have bits set to 1):\n   ");
+	for (i = ARRAY_SIZE(evtchn_pending)-1; i >= 0; i--)
+		if (evtchn_pending[i] != 0UL) {
+			printk(" word index %d %0*lx\n",
+			       i,
+			       (int)sizeof(evtchn_pending[0])*2,
+			       evtchn_pending[i]);
+		}
+
+	printk("\nglobal mask (only show words which have bits set to 0):\n   ");
+	for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--)
+		if (evtchn_mask[i] != ~0UL) {
+			printk(" word index %d %0*lx\n",
+			       i,
+			       (int)sizeof(evtchn_mask[0])*2,
+			       evtchn_mask[i]);
+		}
+
+	printk("\nglobally unmasked (only show result words which have bits set to 1):\n   ");
+	for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--)
+		if ((evtchn_pending[i] & ~evtchn_mask[i]) != 0UL) {
+			printk(" word index %d %0*lx\n",
+			       i,
+			       (int)(sizeof(evtchn_mask[0])*2),
+			       evtchn_pending[i] & ~evtchn_mask[i]);
+		}
+
+	printk("\nlocal cpu%d mask (only show words which have bits set to 1):\n   ", cpu);
+	for (i = (NR_EVENT_CHANNELS_L3/BITS_PER_LONG)-1; i >= 0; i--)
+		if (cpu_evtchn[i] != 0UL) {
+			printk(" word index %d %0*lx\n",
+			       i,
+			       (int)(sizeof(cpu_evtchn[0])*2),
+			       cpu_evtchn[i]);
+		}
+
+	printk("\nlocally unmasked (only show result words which have bits set to 1):\n   ");
+	for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--) {
+		unsigned long pending = evtchn_pending[i]
+			& ~evtchn_mask[i]
+			& cpu_evtchn[i];
+		if (pending != 0UL) {
+			printk(" word index %d %0*lx\n",
+			       i,
+			       (int)(sizeof(evtchn_mask[0])*2),
+			       pending);
+		}
+	}
+
+	printk("\npending list:\n");
+	for (i = 0; i < NR_EVENT_CHANNELS_L3; i++) {
+		if (sync_test_bit(i, evtchn_pending)) {
+			int word_idx = i / (BITS_PER_LONG * BITS_PER_LONG);
+			int word_idx_l2 = i / BITS_PER_LONG;
+			printk("  %d: event %d -> irq %d%s%s%s%s\n",
+			       cpu_from_evtchn(i), i,
+			       evtchn_to_irq[i],
+			       !sync_test_bit(word_idx, &v->evtchn_pending_sel)
+					     ? "" : " l1-clear",
+			       !sync_test_bit(word_idx_l2, per_cpu(evtchn_sel_l2, cpu))
+					     ? "" : " l2-clear",
+			       sync_test_bit(i, evtchn_mask)
+					     ? "" : " globally-masked",
+			       sync_test_bit(i, cpu_evtchn)
+					     ? "" : " locally-masked");
+		}
+	}
 
 	return IRQ_HANDLED;
 }
 
 irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
 {
-	return eops->debug_interrupt(irq, dev_id);
+	irqreturn_t rc;
+	static DEFINE_SPINLOCK(debug_lock);
+	unsigned long flags;
+	int cpu = smp_processor_id();
+	struct vcpu_info *v;
+	int i;
+
+	spin_lock_irqsave(&debug_lock, flags);
+
+	printk("\nvcpu %d\n  ", cpu);
+
+	for_each_online_cpu(i) {
+		int pending;
+		v = per_cpu(xen_vcpu, i);
+		pending = (get_irq_regs() && i == cpu)
+			? xen_irqs_disabled(get_irq_regs())
+			: v->evtchn_upcall_mask;
+		printk("%d: masked=%d pending=%d event_sel %0*lx\n  ", i,
+		       pending, v->evtchn_upcall_pending,
+		       (int)(sizeof(v->evtchn_pending_sel)*2),
+		       v->evtchn_pending_sel);
+	}
+
+	rc = eops->debug_interrupt(irq, dev_id);
+
+	spin_unlock_irqrestore(&debug_lock, flags);
+
+	return rc;
 }
 
+/* The following per-cpu variables are used to save current state of event 
+ * processing loop.
+ *
+ * 2-level event channel:
+ *  current_word_idx is the bit index in L1 selector indicating the currently
+ *  processing word in shared bitmap.
+ *  current_bit_idx is the bit index in the currently processing word in shared
+ *  bitmap.
+ *  N.B. current_word_idx_l2 is not used.
+ *
+ * 3-level event channel:
+ *  current_word_idx is the bit index in L1 selector indicating the currently
+ *  processing word in L2 selector.
+ *  current_word_idx_l2 is the bit index in L2 selector word indicating the
+ *  currently processing word in shared bitmap.
+ *  current_bit_idx is the bit index in the currently processing word in shared
+ *  bitmap.
+ *
+ */
 static DEFINE_PER_CPU(unsigned, xed_nesting_count);
 static DEFINE_PER_CPU(unsigned int, current_word_idx);
+static DEFINE_PER_CPU(unsigned int, current_word_idx_l2);
 static DEFINE_PER_CPU(unsigned int, current_bit_idx);
 
 /*
@@ -1395,6 +1575,163 @@ out:
 	put_cpu();
 }
 
+/*
+ * In the 3-level event channel implementation, the first level is a
+ * bitset of words which contain pending bits in the second level.
+ * The second level is another bitsets which contain pending bits in
+ * the third level.  The third level is a bit set of pending events
+ * themselves.
+ */
+static void do_upcall_l3(void)
+{
+	struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+	unsigned count;
+	int start_word_idx_l1, start_word_idx_l2, start_bit_idx;
+	int word_idx_l1, word_idx_l2, bit_idx;
+	int i, j;
+	unsigned long l1cb, l2cb;
+	int cpu = get_cpu();
+
+	l1cb = BITS_PER_LONG * BITS_PER_LONG;
+	l2cb = BITS_PER_LONG;
+
+	do {
+		unsigned long pending_words_l1;
+
+		vcpu_info->evtchn_upcall_pending = 0;
+
+		if (__this_cpu_inc_return(xed_nesting_count) - 1)
+			goto out;
+#ifndef CONFIG_X86
+		/* No need for a barrier -- XCHG is a barrier on x86. */
+		/* Clear master flag /before/ clearing selector flag. */
+		wmb();
+#endif
+		/* here we get l1 pending selector */
+		pending_words_l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
+
+		start_word_idx_l1 = __this_cpu_read(current_word_idx);
+		start_word_idx_l2 = __this_cpu_read(current_word_idx_l2);
+		start_bit_idx = __this_cpu_read(current_bit_idx);
+
+		word_idx_l1 = start_word_idx_l1;
+
+		/* loop through l1, try to pick up l2 */
+		for (i = 0; pending_words_l1 != 0; i++) {
+			unsigned long words_l1;
+			unsigned long pending_words_l2;
+			unsigned long pwl2idx;
+
+			words_l1 = MASK_LSBS(pending_words_l1, word_idx_l1);
+
+			if (words_l1 == 0) {
+				word_idx_l1 = 0;
+				start_word_idx_l2 = 0;
+				continue;
+			}
+
+			word_idx_l1 = __ffs(words_l1);
+
+			pwl2idx = word_idx_l1 * BITS_PER_LONG;
+
+			pending_words_l2 =
+				xchg(&per_cpu(evtchn_sel_l2, cpu)[pwl2idx],
+				     0);
+
+			word_idx_l2 = 0;
+			if (word_idx_l1 == start_word_idx_l1) {
+				if (i == 0)
+					word_idx_l2 = start_word_idx_l2;
+				else
+					word_idx_l2 &= (1UL << start_word_idx_l2) - 1;
+			}
+
+			for (j = 0; pending_words_l2 != 0; j++) {
+				unsigned long pending_bits;
+				unsigned long words_l2;
+				unsigned long idx;
+
+				words_l2 = MASK_LSBS(pending_words_l2,
+						     word_idx_l2);
+
+				if (words_l2 == 0) {
+					word_idx_l2 = 0;
+					bit_idx = 0;
+					continue;
+				}
+
+				word_idx_l2 = __ffs(words_l2);
+
+				idx = word_idx_l1*BITS_PER_LONG+word_idx_l2;
+				pending_bits =
+					eops->active_evtchns(cpu, NULL, idx);
+
+				bit_idx = 0;
+				if (word_idx_l2 == start_word_idx_l2) {
+					if (j == 0)
+						bit_idx = start_bit_idx;
+					else
+						bit_idx &= (1UL<<start_bit_idx)-1;
+				}
+
+				/* process port */
+				do {
+					unsigned long bits;
+					int port, irq;
+					struct irq_desc *desc;
+
+					bits = MASK_LSBS(pending_bits, bit_idx);
+
+					if (bits == 0)
+						break;
+
+					bit_idx = __ffs(bits);
+
+					port = word_idx_l1 * l1cb +
+						word_idx_l2 * l2cb +
+						bit_idx;
+
+					irq = evtchn_to_irq[port];
+
+					if (irq != -1) {
+						desc = irq_to_desc(irq);
+						if (desc)
+							generic_handle_irq_desc(irq, desc);
+					}
+
+					bit_idx = (bit_idx + 1) % BITS_PER_LONG;
+
+					__this_cpu_write(current_bit_idx, bit_idx);
+					__this_cpu_write(current_word_idx_l2,
+							 bit_idx ? word_idx_l2 :
+							 (word_idx_l2+1) % BITS_PER_LONG);
+					__this_cpu_write(current_word_idx_l2,
+							 word_idx_l2 ? word_idx_l1 :
+							 (word_idx_l1+1) % BITS_PER_LONG);
+				} while (bit_idx != 0);
+
+				if ((word_idx_l2 != start_word_idx_l2) || (j != 0))
+					pending_words_l2 &= ~(1UL << word_idx_l2);
+
+				word_idx_l2 = (word_idx_l2) % BITS_PER_LONG;
+			}
+
+			if ((word_idx_l1 != start_word_idx_l1) || (i != 0))
+				pending_words_l1 &= ~(1UL << word_idx_l1);
+
+			word_idx_l1 = (word_idx_l1) % BITS_PER_LONG;
+		}
+
+		BUG_ON(!irqs_disabled());
+		count = __this_cpu_read(xed_nesting_count);
+		__this_cpu_write(xed_nesting_count, 0);
+	} while (count != 1 || vcpu_info->evtchn_upcall_pending);
+
+out:
+	put_cpu();
+}
+
+
 void xen_evtchn_do_upcall(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
@@ -1534,6 +1871,11 @@ static inline int test_and_set_mask_l2(int chn)
 	return sync_test_and_set_bit(chn, sh->evtchn_mask);
 }
 
+static inline int test_and_set_mask_l3(int chn)
+{
+	return sync_test_and_set_bit(chn, evtchn_mask);
+}
+
 static int retrigger_dynirq(struct irq_data *data)
 {
 	int evtchn = evtchn_from_irq(data->irq);
@@ -1824,14 +2166,35 @@ static struct evtchn_ops evtchn_ops_l2 __read_mostly = {
 	.debug_interrupt   = debug_interrupt_l2,
 };
 
+static struct evtchn_ops evtchn_ops_l3 __read_mostly = {
+	.active_evtchns    = active_evtchns_l3,
+	.clear_evtchn      = clear_evtchn_l3,
+	.set_evtchn        = set_evtchn_l3,
+	.test_evtchn       = test_evtchn_l3,
+	.mask_evtchn       = mask_evtchn_l3,
+	.unmask_evtchn     = unmask_evtchn_l3,
+	.test_and_set_mask = test_and_set_mask_l3,
+	.do_upcall         = do_upcall_l3,
+	.debug_interrupt   = debug_interrupt_l3,
+};
+
 void __init xen_init_IRQ(void)
 {
 	int i, rc;
 	int cpu;
 
-	evtchn_level = 2;
-	nr_event_channels = NR_EVENT_CHANNELS_L2;
-	eops = &evtchn_ops_l2;
+	switch (evtchn_level) {
+	case 2:
+		nr_event_channels = NR_EVENT_CHANNELS_L2;
+		eops = &evtchn_ops_l2;
+		break;
+	case 3:
+		nr_event_channels = NR_EVENT_CHANNELS_L3;
+		eops = &evtchn_ops_l3;
+		break;
+	default:
+		BUG();
+	}
 
 	/* Setup 2-level event channel */
 	evtchn_to_irq = kcalloc(nr_event_channels, sizeof(*evtchn_to_irq),
-- 
1.7.10.4

next prev parent reply	other threads:[~2013-01-21 14:58 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-01-21 14:58 [RFC PATCH V2 0/8] Implement 3-level event channel in Linux Wei Liu
2013-01-21 14:58 ` [RFC PATCH V2 1/8] xen: fix output of xen_debug_interrupt Wei Liu
2013-01-21 14:58 ` [RFC PATCH V2 2/8] xen: sync public headers Wei Liu
2013-01-21 14:58 ` [RFC PATCH V2 3/8] xen: generalized event channel operations Wei Liu
2013-01-22  9:00   ` Jan Beulich
2013-01-21 14:58 ` [RFC PATCH V2 4/8] xen: dynamically allocate cpu_evtchn_mask Wei Liu
2013-01-21 14:58 ` Wei Liu [this message]
2013-01-22  9:12   ` [RFC PATCH V2 5/8] xen: implement 3-level event channel routines Jan Beulich
2013-01-21 14:58 ` [RFC PATCH V2 6/8] xen: introduce xen_event_channel_register_3level Wei Liu
2013-01-21 14:58 ` [RFC PATCH V2 7/8] xen: introduce interfaces to register N-level event channel Wei Liu
2013-01-21 14:58 ` [RFC PATCH V2 8/8] xen: register 3-level " Wei Liu

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:913ef0c dfblob:5b45441 )
 OR (
bs:"[RFC PATCH V2 5/8] xen: implement 3-level event channel routines" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1358780308-30425-6-git-send-email-wei.liu2@citrix.com \
    --to=wei.liu2@citrix.com \
    --cc=david.vrabel@citrix.com \
    --cc=ian.campbell@citrix.com \
    --cc=jbeulich@suse.com \
    --cc=konrad.wilk@oracle.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).