From: Wei Liu <wei.liu2@citrix.com>
To: xen-devel@lists.xen.org
Cc: david.vrabel@citrix.com, Wei Liu <wei.liu2@citrix.com>,
ian.campbell@citrix.com, jbeulich@suse.com,
konrad.wilk@oracle.com
Subject: [PATCH 09/13] xen: implement 3-level event channel routines
Date: Thu, 31 Jan 2013 14:47:03 +0000 [thread overview]
Message-ID: <1359643627-29486-10-git-send-email-wei.liu2@citrix.com> (raw)
In-Reply-To: <1359643627-29486-1-git-send-email-wei.liu2@citrix.com>
Only do_upcall, debug_interrupt and unmask_evtchn are required.
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
drivers/xen/events.c | 291 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 291 insertions(+)
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 30ca620..d953e81 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -51,6 +51,9 @@
#include <xen/interface/hvm/hvm_op.h>
#include <xen/interface/hvm/params.h>
+/* Helper macro(s) */
+#define LONG_BITORDER (BITS_PER_LONG == 64 ? 6 : 5)
+
/* N-level event channel, starting from 2 */
unsigned int evtchn_level = 2;
EXPORT_SYMBOL_GPL(evtchn_level);
@@ -61,6 +64,9 @@ EXPORT_SYMBOL_GPL(nr_event_channels);
static unsigned long *evtchn_pending;
static unsigned long *evtchn_mask;
+/* 2nd level selector for 3-level event channel */
+static DEFINE_PER_CPU(unsigned long[sizeof(unsigned long) * 8], evtchn_sel_l2);
+
/*
* This lock protects updates to the following mapping and reference-count
* arrays. The lock does not need to be acquired to read the mapping tables.
@@ -396,6 +402,28 @@ static inline void __unmask_local_port_l2(int port)
vcpu_info->evtchn_upcall_pending = 1;
}
+static inline void __unmask_local_port_l3(int port)
+{
+ struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+ int cpu = smp_processor_id();
+ unsigned int l1bit = port >> (LONG_BITORDER << 1);
+ unsigned int l2bit = port >> LONG_BITORDER;
+
+ sync_clear_bit(port, &evtchn_mask[0]);
+
+ /*
+ * The following is basically the equivalent of
+ * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
+ * the interrupt edge' if the channel is masked.
+ */
+ if (sync_test_bit(port, &evtchn_pending[0]) &&
+ !sync_test_and_set_bit(l2bit,
+ &per_cpu(evtchn_sel_l2, cpu)[0]) &&
+ !sync_test_and_set_bit(l1bit,
+ &vcpu_info->evtchn_pending_sel))
+ vcpu_info->evtchn_upcall_pending = 1;
+}
+
static void unmask_evtchn(int port)
{
unsigned int cpu = get_cpu();
@@ -411,6 +439,9 @@ static void unmask_evtchn(int port)
case 2:
__unmask_local_port_l2(port);
break;
+ case 3:
+ __unmask_local_port_l3(port);
+ break;
default:
BUG();
}
@@ -1185,6 +1216,7 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
}
static irqreturn_t xen_debug_interrupt_l2(int irq, void *dev_id);
+static irqreturn_t xen_debug_interrupt_l3(int irq, void *dev_id);
irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
{
@@ -1215,6 +1247,9 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
case 2:
rc = xen_debug_interrupt_l2(irq, dev_id);
break;
+ case 3:
+ rc = xen_debug_interrupt_l3(irq, dev_id);
+ break;
default:
BUG();
}
@@ -1285,8 +1320,109 @@ static irqreturn_t xen_debug_interrupt_l2(int irq, void *dev_id)
return IRQ_HANDLED;
}
+static irqreturn_t xen_debug_interrupt_l3(int irq, void *dev_id)
+{
+ int cpu = smp_processor_id();
+ unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
+ unsigned long nr_elems = NR_EVENT_CHANNELS_L3 / BITS_PER_LONG;
+ int i;
+ struct vcpu_info *v;
+
+ v = per_cpu(xen_vcpu, cpu);
+
+ printk(KERN_DEBUG "\npending (only show words which have bits set to 1):\n ");
+ for (i = nr_elems-1; i >= 0; i--)
+ if (evtchn_pending[i] != 0UL) {
+ printk(KERN_DEBUG " word index %d %0*lx\n",
+ i,
+ (int)sizeof(evtchn_pending[0])*2,
+ evtchn_pending[i]);
+ }
+
+ printk(KERN_DEBUG "\nglobal mask (only show words which have bits set to 0):\n ");
+ for (i = nr_elems-1; i >= 0; i--)
+ if (evtchn_mask[i] != ~0UL) {
+ printk(KERN_DEBUG " word index %d %0*lx\n",
+ i,
+ (int)sizeof(evtchn_mask[0])*2,
+ evtchn_mask[i]);
+ }
+
+ printk(KERN_DEBUG "\nglobally unmasked (only show result words which have bits set to 1):\n ");
+ for (i = nr_elems-1; i >= 0; i--)
+ if ((evtchn_pending[i] & ~evtchn_mask[i]) != 0UL) {
+ printk(KERN_DEBUG " word index %d %0*lx\n",
+ i,
+ (int)(sizeof(evtchn_mask[0])*2),
+ evtchn_pending[i] & ~evtchn_mask[i]);
+ }
+
+ printk(KERN_DEBUG "\nlocal cpu%d mask (only show words which have bits set to 1):\n ", cpu);
+ for (i = (NR_EVENT_CHANNELS_L3/BITS_PER_LONG)-1; i >= 0; i--)
+ if (cpu_evtchn[i] != 0UL) {
+ printk(KERN_DEBUG " word index %d %0*lx\n",
+ i,
+ (int)(sizeof(cpu_evtchn[0])*2),
+ cpu_evtchn[i]);
+ }
+
+ printk(KERN_DEBUG "\nlocally unmasked (only show result words which have bits set to 1):\n ");
+ for (i = nr_elems-1; i >= 0; i--) {
+ unsigned long pending = evtchn_pending[i]
+ & ~evtchn_mask[i]
+ & cpu_evtchn[i];
+ if (pending != 0UL) {
+ printk(KERN_DEBUG " word index %d %0*lx\n",
+ i,
+ (int)(sizeof(evtchn_mask[0])*2),
+ pending);
+ }
+ }
+
+ printk(KERN_DEBUG "\npending list:\n");
+ for (i = 0; i < NR_EVENT_CHANNELS_L3; i++) {
+ if (sync_test_bit(i, evtchn_pending)) {
+ int word_idx = i / (BITS_PER_LONG * BITS_PER_LONG);
+ int word_idx_l2 = i / BITS_PER_LONG;
+ printk(KERN_DEBUG " %d: event %d -> irq %d%s%s%s%s\n",
+ cpu_from_evtchn(i), i,
+ evtchn_to_irq[i],
+ !sync_test_bit(word_idx, &v->evtchn_pending_sel)
+ ? "" : " l1-clear",
+ !sync_test_bit(word_idx_l2, per_cpu(evtchn_sel_l2, cpu))
+ ? "" : " l2-clear",
+ sync_test_bit(i, evtchn_mask)
+ ? "" : " globally-masked",
+ sync_test_bit(i, cpu_evtchn)
+ ? "" : " locally-masked");
+ }
+ }
+
+ return IRQ_HANDLED;
+}
+
+/* The following per-cpu variables are used to save current state of event
+ * processing loop.
+ *
+ * 2-level event channel:
+ * current_word_idx is the bit index in L1 selector indicating the currently
+ * processing word in shared bitmap.
+ * current_bit_idx is the bit index in the currently processing word in shared
+ * bitmap.
+ * N.B. current_word_idx_l2 is not used.
+ *
+ * 3-level event channel:
+ * current_word_idx is the bit index in L1 selector indicating the currently
+ * processing word in L2 selector.
+ * current_word_idx_l2 is the bit index in L2 selector word indicating the
+ * currently processing word in shared bitmap.
+ * current_bit_idx is the bit index in the currently processing word in shared
+ * bitmap.
+ *
+ */
static DEFINE_PER_CPU(unsigned, xed_nesting_count);
static DEFINE_PER_CPU(unsigned int, current_word_idx);
+static DEFINE_PER_CPU(unsigned int, current_word_idx_l2);
static DEFINE_PER_CPU(unsigned int, current_bit_idx);
/*
@@ -1409,6 +1545,155 @@ out:
put_cpu();
}
+/*
+ * In the 3-level event channel implementation, the first level is a
+ * bitset of words which contain pending bits in the second level.
+ * The second level is another bitsets which contain pending bits in
+ * the third level. The third level is a bit set of pending events
+ * themselves.
+ */
+static void __xen_evtchn_do_upcall_l3(void)
+{
+ struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+ unsigned count;
+ int start_word_idx_l1, start_word_idx_l2, start_bit_idx;
+ int word_idx_l1, word_idx_l2, bit_idx;
+ int i, j;
+ int cpu = get_cpu();
+
+ do {
+ unsigned long pending_words_l1;
+
+ vcpu_info->evtchn_upcall_pending = 0;
+
+ if (__this_cpu_inc_return(xed_nesting_count) - 1)
+ goto out;
+#ifndef CONFIG_X86
+ /* No need for a barrier -- XCHG is a barrier on x86. */
+ /* Clear master flag /before/ clearing selector flag. */
+ wmb();
+#endif
+ /* here we get l1 pending selector */
+ pending_words_l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
+
+ start_word_idx_l1 = __this_cpu_read(current_word_idx);
+ start_word_idx_l2 = __this_cpu_read(current_word_idx_l2);
+ start_bit_idx = __this_cpu_read(current_bit_idx);
+
+ word_idx_l1 = start_word_idx_l1;
+
+ /* loop through l1, try to pick up l2 */
+ for (i = 0; pending_words_l1 != 0; i++) {
+ unsigned long words_l1;
+ unsigned long pending_words_l2;
+
+ words_l1 = MASK_LSBS(pending_words_l1, word_idx_l1);
+
+ if (words_l1 == 0) {
+ word_idx_l1 = 0;
+ start_word_idx_l2 = 0;
+ continue;
+ }
+
+ word_idx_l1 = __ffs(words_l1);
+
+ pending_words_l2 =
+ xchg(&per_cpu(evtchn_sel_l2, cpu)[word_idx_l1],
+ 0);
+
+ word_idx_l2 = 0;
+ if (word_idx_l1 == start_word_idx_l1) {
+ if (i == 0)
+ word_idx_l2 = start_word_idx_l2;
+ else
+ word_idx_l2 &= (1UL << start_word_idx_l2) - 1;
+ }
+
+ for (j = 0; pending_words_l2 != 0; j++) {
+ unsigned long pending_bits;
+ unsigned long words_l2;
+ unsigned long idx;
+
+ words_l2 = MASK_LSBS(pending_words_l2,
+ word_idx_l2);
+
+ if (words_l2 == 0) {
+ word_idx_l2 = 0;
+ bit_idx = 0;
+ continue;
+ }
+
+ word_idx_l2 = __ffs(words_l2);
+
+ idx = word_idx_l1*BITS_PER_LONG+word_idx_l2;
+ pending_bits =
+ active_evtchns(cpu, idx);
+
+ bit_idx = 0;
+ if (word_idx_l2 == start_word_idx_l2) {
+ if (j == 0)
+ bit_idx = start_bit_idx;
+ else
+ bit_idx &= (1UL<<start_bit_idx)-1;
+ }
+
+ /* process port */
+ do {
+ unsigned long bits;
+ int port, irq;
+ struct irq_desc *desc;
+
+ bits = MASK_LSBS(pending_bits, bit_idx);
+
+ if (bits == 0)
+ break;
+
+ bit_idx = __ffs(bits);
+
+ port = (word_idx_l1 << (LONG_BITORDER << 1)) +
+ (word_idx_l2 << LONG_BITORDER) +
+ bit_idx;
+
+ irq = evtchn_to_irq[port];
+
+ if (irq != -1) {
+ desc = irq_to_desc(irq);
+ if (desc)
+ generic_handle_irq_desc(irq, desc);
+ }
+
+ bit_idx = (bit_idx + 1) % BITS_PER_LONG;
+
+ __this_cpu_write(current_bit_idx, bit_idx);
+ __this_cpu_write(current_word_idx_l2,
+ bit_idx ? word_idx_l2 :
+ (word_idx_l2+1) % BITS_PER_LONG);
+ __this_cpu_write(current_word_idx_l2,
+ word_idx_l2 ? word_idx_l1 :
+ (word_idx_l1+1) % BITS_PER_LONG);
+ } while (bit_idx != 0);
+
+ if ((word_idx_l2 != start_word_idx_l2) || (j != 0))
+ pending_words_l2 &= ~(1UL << word_idx_l2);
+
+ word_idx_l2 = (word_idx_l2 + 1) % BITS_PER_LONG;
+ }
+
+ if ((word_idx_l1 != start_word_idx_l1) || (i != 0))
+ pending_words_l1 &= ~(1UL << word_idx_l1);
+
+ word_idx_l1 = (word_idx_l1 + 1) % BITS_PER_LONG;
+ }
+
+ BUG_ON(!irqs_disabled());
+ count = __this_cpu_read(xed_nesting_count);
+ __this_cpu_write(xed_nesting_count, 0);
+ } while (count != 1 || vcpu_info->evtchn_upcall_pending);
+
+out:
+ put_cpu();
+}
+
void xen_evtchn_do_upcall(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
@@ -1420,6 +1705,9 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
case 2:
__xen_evtchn_do_upcall_l2();
break;
+ case 3:
+ __xen_evtchn_do_upcall_l3();
+ break;
default:
BUG();
}
@@ -1434,6 +1722,9 @@ void xen_hvm_evtchn_do_upcall(void)
case 2:
__xen_evtchn_do_upcall_l2();
break;
+ case 3:
+ __xen_evtchn_do_upcall_l3();
+ break;
default:
BUG();
}
--
1.7.10.4
next prev parent reply other threads:[~2013-01-31 14:47 UTC|newest]
Thread overview: 61+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-01-31 14:46 [PATCH 00/13] Implement 3-level event channel in Linux Wei Liu
2013-01-31 14:46 ` [PATCH 01/13] xen: fix output of xen_debug_interrupt Wei Liu
2013-02-01 11:06 ` David Vrabel
2013-02-01 13:10 ` Wei Liu
2013-02-01 13:14 ` Wei Liu
2013-01-31 14:46 ` [PATCH 02/13] xen: fix error handling path if xen_allocate_irq_dynamic fails Wei Liu
2013-02-01 11:19 ` David Vrabel
2013-02-01 11:26 ` Ian Campbell
2013-02-01 12:36 ` Wei Liu
2013-02-06 15:49 ` Konrad Rzeszutek Wilk
2013-01-31 14:46 ` [PATCH 03/13] xen: fix evtchn_unbind_from_user Wei Liu
2013-02-01 11:15 ` David Vrabel
2013-02-01 12:33 ` Wei Liu
2013-02-05 16:57 ` Konrad Rzeszutek Wilk
2013-01-31 14:46 ` [PATCH 04/13] xen: sync public headers Wei Liu
2013-02-05 17:00 ` Konrad Rzeszutek Wilk
2013-02-05 17:23 ` Wei Liu
2013-02-06 16:57 ` Konrad Rzeszutek Wilk
2013-02-07 9:22 ` Paul Durrant
2013-02-07 11:57 ` Wei Liu
2013-02-08 16:06 ` Paul Durrant
2013-02-08 16:22 ` Ian Campbell
2013-02-08 16:36 ` Paul Durrant
2013-02-08 16:49 ` Tim Deegan
2013-02-08 16:56 ` Jan Beulich
2013-02-08 16:59 ` Ian Campbell
2013-02-08 17:06 ` Tim Deegan
2013-02-08 17:09 ` Ian Campbell
2013-02-08 19:45 ` David Vrabel
2013-02-08 19:55 ` Ian Campbell
2013-02-08 17:07 ` Paul Durrant
2013-02-08 16:37 ` Wei Liu
2013-02-08 16:40 ` Ian Campbell
2013-02-07 12:23 ` Ian Campbell
2013-01-31 14:46 ` [PATCH 05/13] xen: introduce test_and_set_mask Wei Liu
2013-02-01 11:35 ` David Vrabel
2013-02-01 22:08 ` Ian Campbell
2013-01-31 14:47 ` [PATCH 06/13] xen: replace raw bit ops with functions Wei Liu
2013-01-31 14:47 ` [PATCH 07/13] xen: generalized event channel operations Wei Liu
2013-02-05 17:04 ` Konrad Rzeszutek Wilk
2013-02-05 17:08 ` Wei Liu
2013-02-05 17:19 ` Konrad Rzeszutek Wilk
2013-02-05 17:23 ` Wei Liu
2013-02-05 19:44 ` Konrad Rzeszutek Wilk
2013-01-31 14:47 ` [PATCH 08/13] xen: dynamically allocate cpu_evtchn_mask Wei Liu
2013-02-01 11:29 ` David Vrabel
2013-01-31 14:47 ` Wei Liu [this message]
2013-02-05 17:09 ` [PATCH 09/13] xen: implement 3-level event channel routines Konrad Rzeszutek Wilk
2013-02-05 17:39 ` Wei Liu
2013-02-05 19:46 ` Konrad Rzeszutek Wilk
2013-01-31 14:47 ` [PATCH 10/13] xen: introduce xen_event_channel_register_3level Wei Liu
2013-02-04 8:56 ` Jan Beulich
2013-02-04 10:36 ` Wei Liu
2013-02-05 16:55 ` Konrad Rzeszutek Wilk
2013-02-05 17:05 ` Wei Liu
2013-02-05 17:14 ` Konrad Rzeszutek Wilk
2013-01-31 14:47 ` [PATCH 11/13] xen: introduce xen_event_channel_register_nlevel Wei Liu
2013-01-31 14:47 ` [PATCH 12/13] xen: register 3-level event channel Wei Liu
2013-02-01 11:31 ` David Vrabel
2013-01-31 14:47 ` [PATCH 13/13] xen: only register 3-level event channel for Dom0 Wei Liu
2013-01-31 16:39 ` Wei Liu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1359643627-29486-10-git-send-email-wei.liu2@citrix.com \
--to=wei.liu2@citrix.com \
--cc=david.vrabel@citrix.com \
--cc=ian.campbell@citrix.com \
--cc=jbeulich@suse.com \
--cc=konrad.wilk@oracle.com \
--cc=xen-devel@lists.xen.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).