xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Wei Liu <wei.liu2@citrix.com>
To: xen-devel@lists.xen.org, konrad.wilk@oracle.com
Cc: Wei Liu <wei.liu2@citrix.com>,
	ian.campbell@citrix.com, jbeulich@suse.com,
	david.vrabel@citrix.com
Subject: [RFC PATCH V5 09/14] xen: implement 3-level event channel routines
Date: Tue, 19 Mar 2013 15:22:03 +0000	[thread overview]
Message-ID: <1363706528-27141-10-git-send-email-wei.liu2@citrix.com> (raw)
In-Reply-To: <1363706528-27141-1-git-send-email-wei.liu2@citrix.com>

Implement several routines for 3-level event channel ABI. Some routines are
shared between 2/3-level ABIs.

For N-level (now only 2 and 3) event channel ABIs, the active events are
processed in a top-down approach, i.e. L1 -> L2 -> .. -> L(n-1) -> bitmap. The
selectors are processed recursively, the event bitmap is processed by a
dedicated function called process_port.

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 drivers/xen/events.c |  376 +++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 293 insertions(+), 83 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index ee35ff9..fe1831b 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -76,7 +76,12 @@ static const struct evtchn_ops *eops;
 static xen_ulong_t *evtchn_pending;
 static xen_ulong_t *evtchn_mask;
 /* The following per-cpu var points to selector(s). */
-static DEFINE_PER_CPU(xen_ulong_t *[1], evtchn_sel);
+static DEFINE_PER_CPU(xen_ulong_t *[2], evtchn_sel);
+/*
+ * 2nd level selector for 3-level event channel, '8' stands for 8 bits
+ * per byte.
+ */
+static DEFINE_PER_CPU(xen_ulong_t [sizeof(xen_ulong_t) * 8], evtchn_sel_l2);
 
 /*
  * This lock protects updates to the following mapping and reference-count
@@ -150,6 +155,11 @@ static bool (*pirq_needs_eoi)(unsigned irq);
  */
 #define BITS_PER_EVTCHN_WORD (sizeof(xen_ulong_t)*8)
 /*
+ * If xen_ulong_t is 8 byte, it's 64 bits wide, 2^6 == 64, otherwise
+ * it is 32 bits, 2^5 == 32
+ */
+#define EVTCHN_WORD_BITORDER (sizeof(xen_ulong_t) == 8 ? 6 : 5)
+/*
  * Make a bitmask (i.e. unsigned long *) of a xen_ulong_t
  * array. Primarily to avoid long lines (hence the terse name).
  */
@@ -435,6 +445,29 @@ static inline void __unmask_local_port_l2(int port)
 		vcpu_info->evtchn_upcall_pending = 1;
 }
 
+static inline void __unmask_local_port_l3(int port)
+{
+	struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+	int cpu = smp_processor_id();
+	unsigned int l1bit = port >> (EVTCHN_WORD_BITORDER << 1);
+	unsigned int l2bit = port >> EVTCHN_WORD_BITORDER;
+
+	sync_clear_bit(port, BM(&evtchn_mask[0]));
+
+	/*
+	 * The following is basically the equivalent of
+	 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
+	 * the interrupt edge' if the channel is masked.
+	 */
+	if (sync_test_bit(port, BM(&evtchn_pending[0])) &&
+	    !sync_test_and_set_bit(l2bit,
+				   BM(per_cpu(evtchn_sel, cpu)[1])) &&
+	    !sync_test_and_set_bit(l1bit,
+				   BM(per_cpu(evtchn_sel, cpu)[0])))
+		vcpu_info->evtchn_upcall_pending = 1;
+
+}
+
 static void unmask_evtchn(int port)
 {
 	unsigned int cpu = get_cpu();
@@ -1326,119 +1359,254 @@ static irqreturn_t xen_debug_interrupt_l2(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t xen_debug_interrupt_l3(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+	xen_ulong_t *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
+	unsigned long nr_elems = NR_EVENT_CHANNELS_L3 / BITS_PER_EVTCHN_WORD;
+	int i;
+	struct vcpu_info *v;
+
+	v = per_cpu(xen_vcpu, cpu);
+
+	printk(KERN_DEBUG "\npending (only show words which have bits set to 1):\n   ");
+	for (i = nr_elems-1; i >= 0; i--)
+		if (evtchn_pending[i] != 0UL) {
+			printk(KERN_DEBUG " word index %d %0*"PRI_xen_ulong"\n",
+			       i,
+			       (int)(sizeof(evtchn_pending[0])*2),
+			       evtchn_pending[i]);
+		}
+
+	printk(KERN_DEBUG "\nglobal mask (only show words which have bits set to 0):\n   ");
+	for (i = nr_elems-1; i >= 0; i--)
+		if (evtchn_mask[i] != ~0UL) {
+			printk(KERN_DEBUG " word index %d %0*"PRI_xen_ulong"\n",
+			       i,
+			       (int)(sizeof(evtchn_mask[0])*2),
+			       evtchn_mask[i]);
+		}
+
+	printk(KERN_DEBUG "\nglobally unmasked (only show result words which have bits set to 1):\n   ");
+	for (i = nr_elems-1; i >= 0; i--)
+		if ((evtchn_pending[i] & ~evtchn_mask[i]) != 0UL) {
+			printk(KERN_DEBUG " word index %d %0*"PRI_xen_ulong"\n",
+			       i,
+			       (int)(sizeof(evtchn_mask[0])*2),
+			       evtchn_pending[i] & ~evtchn_mask[i]);
+		}
+
+	printk(KERN_DEBUG "\nlocal cpu%d mask (only show words which have bits set to 1):\n   ", cpu);
+	for (i = (NR_EVENT_CHANNELS_L3/BITS_PER_EVTCHN_WORD)-1; i >= 0; i--)
+		if (cpu_evtchn[i] != 0UL) {
+			printk(KERN_DEBUG " word index %d %0*"PRI_xen_ulong"\n",
+			       i,
+			       (int)(sizeof(cpu_evtchn[0])*2),
+			       cpu_evtchn[i]);
+		}
+
+	printk(KERN_DEBUG "\nlocally unmasked (only show result words which have bits set to 1):\n   ");
+	for (i = nr_elems-1; i >= 0; i--) {
+		xen_ulong_t pending = evtchn_pending[i]
+			& ~evtchn_mask[i]
+			& cpu_evtchn[i];
+		if (pending != 0UL) {
+			printk(KERN_DEBUG " word index %d %0*"PRI_xen_ulong"\n",
+			       i,
+			       (int)(sizeof(evtchn_mask[0])*2),
+			       pending);
+		}
+	}
+
+	printk(KERN_DEBUG "\npending list:\n");
+	for (i = 0; i < NR_EVENT_CHANNELS_L3; i++) {
+		if (sync_test_bit(i, evtchn_pending)) {
+			int word_idx = i / (BITS_PER_EVTCHN_WORD * BITS_PER_EVTCHN_WORD);
+			int word_idx_l2 = i / BITS_PER_EVTCHN_WORD;
+			printk(KERN_DEBUG "  %d: event %d -> irq %d%s%s%s%s\n",
+			       cpu_from_evtchn(i), i,
+			       evtchn_to_irq[i],
+			       !sync_test_bit(word_idx, BM(per_cpu(evtchn_sel, cpu)[0]))
+			       ? "" : " l1-clear",
+			       !sync_test_bit(word_idx_l2, BM(per_cpu(evtchn_sel, cpu)[1]))
+			       ? "" : " l2-clear",
+			       sync_test_bit(i, BM(evtchn_mask))
+			       ? "" : " globally-masked",
+			       sync_test_bit(i, BM(cpu_evtchn))
+			       ? "" : " locally-masked");
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
 static DEFINE_PER_CPU(unsigned, xed_nesting_count);
-static DEFINE_PER_CPU(unsigned int, current_word_idx);
-static DEFINE_PER_CPU(unsigned int, current_bit_idx);
+static DEFINE_PER_CPU(unsigned int[3], current_idx);
 
 /*
  * Mask out the i least significant bits of w
  */
 #define MASK_LSBS(w, i) (w & ((~((xen_ulong_t)0UL)) << i))
 
+static __always_inline void process_port(int cpu,
+					 unsigned int base,
+					 unsigned int *idx,
+					 unsigned int *idx_array)
+{
+	xen_ulong_t pending_bits, bits;
+	int port, irq;
+	struct irq_desc *desc;
+
+	pending_bits = active_evtchns(cpu, base >> EVTCHN_WORD_BITORDER);
+
+	do {
+		bits = MASK_LSBS(pending_bits, *idx);
+
+		/* If we masked out all events, move on. */
+		if (bits == 0)
+			break;
+
+		*idx = EVTCHN_FIRST_BIT(bits);
+
+		/* Process port. */
+		port = base + *idx;
+		irq = evtchn_to_irq[port];
+
+		if (irq != -1) {
+			desc = irq_to_desc(irq);
+			if (desc)
+				generic_handle_irq_desc(irq, desc);
+		}
+
+		*idx = (*idx + 1) % BITS_PER_EVTCHN_WORD;
+
+		/* Next caller starts at last processed + 1 */
+		/*
+		 * As this routine is shared by 2/3-level event
+		 * channel, we need to write all three current_idx
+		 * elements. In the 2-level case, the caller /should/
+		 * always set idx_array[2] to ~0U, so in practice the
+		 * write to current_idx[1] is equivalent to writing
+		 * idx_array[1].
+		 */
+		__this_cpu_write(current_idx[0],
+				 idx_array[1] ? idx_array[0] :
+				 (idx_array[0]+1) % BITS_PER_EVTCHN_WORD);
+		__this_cpu_write(current_idx[1],
+				 idx_array[2] ? idx_array[1] :
+				 (idx_array[1]+1) % BITS_PER_EVTCHN_WORD);
+		__this_cpu_write(current_idx[2], idx_array[2]);
+	} while (*idx != 0);
+}
+
 /*
- * Search the CPUs pending events bitmasks.  For each one found, map
- * the event number to an irq, and feed it into do_IRQ() for
- * handling.
+ * This function process active event channel top-down, L1 -> L2 ->
+ * .. -> L(n-1) -> bitmap. The selectors are processed recursively,
+ * the event bitmap is processed by process_port
  *
- * Xen uses a two-level bitmap to speed searching.  The first level is
- * a bitset of words which contain pending event bits.  The second
- * level is a bitset of pending events themselves.
+ * @cpu: current cpu id
+ * @base: accumulated offsets along selector processing
+ * @start_idx: array used to resume index
+ * @idx: array of current processing index
+ * @sel_idx: selector word index
+ * @level: current processing level, from 0 to highest_level
+ * @highest_level: highest recursion level
+ *
+ * If level == higest_level, we reach the event bitmap.  level
+ * variable starts from 0, so highest_level for 2-level ABI is 1,
+ * while for 3-level ABI it is 2.
  */
-static void __xen_evtchn_do_upcall_l2(void)
+static void process(int cpu,
+		    unsigned int base,
+		    unsigned int *start_idx,
+		    unsigned int *idx,
+		    unsigned int sel_idx,
+		    unsigned short level,
+		    unsigned short highest_level)
 {
-	int start_word_idx, start_bit_idx;
-	int word_idx, bit_idx;
 	int i;
-	int cpu = get_cpu();
-	struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
-	unsigned count;
+	xen_ulong_t pending_words;
 
-	do {
-		xen_ulong_t pending_words;
+	if (level == highest_level) {
+		process_port(cpu, base, &idx[level], idx);
+		return;
+	}
 
-		vcpu_info->evtchn_upcall_pending = 0;
+	pending_words =
+		xchg_xen_ulong(&per_cpu(evtchn_sel, cpu)[level][sel_idx], 0);
 
-		if (__this_cpu_inc_return(xed_nesting_count) - 1)
-			goto out;
+	/* This loop is used to process selectors. */
+	for (i = 0; pending_words != 0; i++) {
+		xen_ulong_t words;
+		unsigned int saved_base;
+
+		words = MASK_LSBS(pending_words, idx[level]);
 
 		/*
-		 * Master flag must be cleared /before/ clearing
-		 * selector flag. xchg_xen_ulong must contain an
-		 * appropriate barrier.
+		 * If we masked out all events, wrap to beginning.
 		 */
-		pending_words = xchg_xen_ulong(per_cpu(evtchn_sel, cpu)[0], 0);
-
-		start_word_idx = __this_cpu_read(current_word_idx);
-		start_bit_idx = __this_cpu_read(current_bit_idx);
-
-		word_idx = start_word_idx;
+		if (words == 0) {
+			idx[level] = 0;
+			start_idx[level+1] = 0;
+			continue;
+		}
 
-		for (i = 0; pending_words != 0; i++) {
-			xen_ulong_t pending_bits;
-			xen_ulong_t words;
+		idx[level] = EVTCHN_FIRST_BIT(words);
+
+		idx[level+1] = 0; /* usually scan entire word from start */
+		if (idx[level] == start_idx[level]) {
+			/* We scan the starting word in two parts */
+			if (i == 0)
+				/* 1st time: start in the middle */
+				idx[level+1] = start_idx[level+1];
+			else
+				/* 2nd time: mask bits done already */
+				idx[level+1] &= (1UL << start_idx[level+1]) - 1;
+		}
 
-			words = MASK_LSBS(pending_words, word_idx);
+		saved_base = base;
+		base += (idx[level] <<
+			 (EVTCHN_WORD_BITORDER * (highest_level-level)));
 
-			/*
-			 * If we masked out all events, wrap to beginning.
-			 */
-			if (words == 0) {
-				word_idx = 0;
-				bit_idx = 0;
-				continue;
-			}
-			word_idx = EVTCHN_FIRST_BIT(words);
-
-			pending_bits = active_evtchns(cpu, word_idx);
-			bit_idx = 0; /* usually scan entire word from start */
-			if (word_idx == start_word_idx) {
-				/* We scan the starting word in two parts */
-				if (i == 0)
-					/* 1st time: start in the middle */
-					bit_idx = start_bit_idx;
-				else
-					/* 2nd time: mask bits done already */
-					bit_idx &= (1UL << start_bit_idx) - 1;
-			}
+		process(cpu, base, start_idx, idx, idx[level],
+			level+1, highest_level);
 
-			do {
-				xen_ulong_t bits;
-				int port, irq;
-				struct irq_desc *desc;
+		base = saved_base;
 
-				bits = MASK_LSBS(pending_bits, bit_idx);
+		/* Scan start_l1i twice; all others once. */
+		if ((idx[level] != start_idx[level]) || (i != 0))
+			pending_words &= ~(1UL << idx[level]);
 
-				/* If we masked out all events, move on. */
-				if (bits == 0)
-					break;
+		idx[level] = (idx[level] + 1) % BITS_PER_EVTCHN_WORD;
+	}
+}
 
-				bit_idx = EVTCHN_FIRST_BIT(bits);
 
-				/* Process port. */
-				port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
-				irq = evtchn_to_irq[port];
+/* This routine is shared between 2/3-level ABI */
+static void ___xen_evtchn_do_upcall(unsigned int *start_idx,
+				    unsigned int *idx,
+				    unsigned short highest_level)
+{
+	int cpu = get_cpu();
+	struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+	unsigned count;
 
-				if (irq != -1) {
-					desc = irq_to_desc(irq);
-					if (desc)
-						generic_handle_irq_desc(irq, desc);
-				}
+	do {
+		vcpu_info->evtchn_upcall_pending = 0;
 
-				bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
+		if (__this_cpu_inc_return(xed_nesting_count) - 1)
+			goto out;
 
-				/* Next caller starts at last processed + 1 */
-				__this_cpu_write(current_word_idx,
-						 bit_idx ? word_idx :
-						 (word_idx+1) % BITS_PER_EVTCHN_WORD);
-				__this_cpu_write(current_bit_idx, bit_idx);
-			} while (bit_idx != 0);
+		start_idx[0] = __this_cpu_read(current_idx[0]);
+		start_idx[1] = __this_cpu_read(current_idx[1]);
+		start_idx[2] = __this_cpu_read(current_idx[2]);
 
-			/* Scan start_l1i twice; all others once. */
-			if ((word_idx != start_word_idx) || (i != 0))
-				pending_words &= ~(1UL << word_idx);
+		idx[0] = start_idx[0];
 
-			word_idx = (word_idx + 1) % BITS_PER_EVTCHN_WORD;
-		}
+		process(cpu, 0 /* base */, start_idx, idx,
+			0 /* selector index */,
+			0 /* starting from L1 (1-1=0) */,
+			highest_level);
 
 		BUG_ON(!irqs_disabled());
 
@@ -1451,6 +1619,42 @@ out:
 	put_cpu();
 }
 
+/*
+ * Search the CPUs pending events bitmasks.  For each one found, map
+ * the event number to an irq, and feed it into do_IRQ() for
+ * handling.
+ *
+ * Xen uses a two-level bitmap to speed searching.  The first level is
+ * a bitset of words which contain pending event bits.  The second
+ * level is a bitset of pending events themselves.
+ */
+static void __xen_evtchn_do_upcall_l2(void)
+{
+	/*
+	 * Need three elements to feed into __process_port, but the
+	 * third element is never used for 2-level ABI and should
+	 * always be set to ~0U.
+	 */
+	unsigned int start_idx[3] = { 0, 0, ~0U };
+	unsigned int idx[3] = { 0, 0, ~0U };
+
+	___xen_evtchn_do_upcall(start_idx, idx, 1);
+}
+
+static void __xen_evtchn_do_upcall_l3(void)
+{
+	/*
+	 * Need three elements to feed into __process_port, but the
+	 * third element is never used for 2-level ABI and should
+	 * always be set to ~0U.
+	 */
+	unsigned int start_idx[3] = { 0, 0, 0 };
+	unsigned int idx[3] = { 0, 0, 0 };
+
+	___xen_evtchn_do_upcall(start_idx, idx, 2);
+
+}
+
 void xen_evtchn_do_upcall(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
@@ -1870,6 +2074,12 @@ const struct evtchn_ops evtchn_l2_ops = {
 	.do_upcall = __xen_evtchn_do_upcall_l2
 };
 
+const struct evtchn_ops evtchn_l3_ops = {
+	.unmask = __unmask_local_port_l3,
+	.debug_interrupt = xen_debug_interrupt_l3,
+	.do_upcall = __xen_evtchn_do_upcall_l3
+};
+
 static int __cpuinit xen_events_notifier_cb(struct notifier_block *self,
 					    unsigned long action,
 					    void *hcpu)
-- 
1.7.10.4

  parent reply	other threads:[~2013-03-19 15:22 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-03-19 15:21 [RFC PATCH V5] Implement 3-level event channel ABI in Linux Wei Liu
2013-03-19 15:21 ` [RFC PATCH V5 01/14] xen: remove typedef in event_channel.h Wei Liu
2013-03-19 15:21 ` [RFC PATCH V5 02/14] xen: add KERN_DEBUG in printk Wei Liu
2013-03-19 15:21 ` [RFC PATCH V5 03/14] xen: fix output of xen_debug_interrupt Wei Liu
2013-03-19 15:21 ` [RFC PATCH V5 04/14] xen: sync public headers Wei Liu
2013-03-19 15:21 ` [RFC PATCH V5 05/14] xen: introduce test_and_set_mask Wei Liu
2013-03-19 15:22 ` [RFC PATCH V5 06/14] xen: replace raw bit ops with functions Wei Liu
2013-03-19 15:22 ` [RFC PATCH V5 07/14] xen: generalized event channel operations Wei Liu
2013-03-19 15:22 ` [RFC PATCH V5 08/14] xen: dynamically allocate cpu_evtchn_mask Wei Liu
2013-03-19 15:22 ` Wei Liu [this message]
2013-03-19 15:22 ` [RFC PATCH V5 10/14] xen: document 2/3-level event channel ABI Wei Liu
2013-03-19 15:22 ` [RFC PATCH V5 11/14] xen: introduce xen_event_channel_query_extended_abis Wei Liu
2013-03-19 15:22 ` [RFC PATCH V5 12/14] xen: introduce xen_event_channel_register_3level Wei Liu
2013-03-19 15:22 ` [RFC PATCH V5 13/14] xen: introduce xen_event_channel_register_extended Wei Liu
2013-03-19 15:22 ` [RFC PATCH V5 14/14] xen: register 3-level event channel Wei Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1363706528-27141-10-git-send-email-wei.liu2@citrix.com \
    --to=wei.liu2@citrix.com \
    --cc=david.vrabel@citrix.com \
    --cc=ian.campbell@citrix.com \
    --cc=jbeulich@suse.com \
    --cc=konrad.wilk@oracle.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).