* [RFC PATCH 2/3] Xen: rework NR_EVENT_CHANNELS related stuffs. @ 2012-12-31 18:37 Wei Liu 2012-12-31 18:37 ` [RFC PATCH 3/3] Xen: implement 3-level event channel routines Wei Liu 0 siblings, 1 reply; 4+ messages in thread From: Wei Liu @ 2012-12-31 18:37 UTC (permalink / raw) To: xen-devel; +Cc: Wei Liu Signed-off-by: Wei Liu <wei.liu2@citrix.com> --- drivers/xen/events.c | 44 +++++++++++++++++++++++++++++-------------- drivers/xen/evtchn.c | 16 +++++++++------- include/xen/events.h | 3 +++ include/xen/interface/xen.h | 17 ++++++++++++++++- 4 files changed, 58 insertions(+), 22 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 835101f..f60ba76 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -52,7 +52,8 @@ #include <xen/interface/hvm/params.h> /* N-level event channel, starting from 2 */ -static unsigned int evtchn_level = 2; +unsigned int evtchn_level = 2; +EXPORT_SYMBOL_GPL(evtchn_level); struct evtchn_ops { unsigned long (*active_evtchns)(unsigned int, @@ -130,8 +131,7 @@ static int *evtchn_to_irq; static unsigned long *pirq_eoi_map; static bool (*pirq_needs_eoi)(unsigned irq); -static DEFINE_PER_CPU(unsigned long [NR_EVENT_CHANNELS/BITS_PER_LONG], - cpu_evtchn_mask); +static DEFINE_PER_CPU(unsigned long *, cpu_evtchn_mask); /* Xen will never allocate port zero for any purpose. */ #define VALID_EVTCHN(chn) ((chn) != 0) @@ -913,7 +913,7 @@ static int find_virq(unsigned int virq, unsigned int cpu) int port, rc = -ENOENT; memset(&status, 0, sizeof(status)); - for (port = 0; port <= NR_EVENT_CHANNELS; port++) { + for (port = 0; port <= NR_EVENT_CHANNELS(evtchn_level); port++) { status.dom = DOMID_SELF; status.port = port; rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status); @@ -1138,7 +1138,7 @@ int evtchn_get(unsigned int evtchn) struct irq_info *info; int err = -ENOENT; - if (evtchn >= NR_EVENT_CHANNELS) + if (evtchn >= NR_EVENT_CHANNELS(evtchn_level)) return -EINVAL; mutex_lock(&irq_mapping_update_lock); @@ -1227,7 +1227,7 @@ static irqreturn_t __xen_debug_interrupt_l2(int irq, void *dev_id) i % 8 == 0 ? "\n " : " "); printk("\nlocal cpu%d mask:\n ", cpu); - for (i = (NR_EVENT_CHANNELS/BITS_PER_LONG)-1; i >= 0; i--) + for (i = (NR_EVENT_CHANNELS(evtchn_level)/BITS_PER_LONG)-1; i >= 0; i--) printk("%0*lx%s", (int)(sizeof(cpu_evtchn[0])*2), cpu_evtchn[i], i % 8 == 0 ? "\n " : " "); @@ -1242,7 +1242,7 @@ static irqreturn_t __xen_debug_interrupt_l2(int irq, void *dev_id) } printk("\npending list:\n"); - for (i = 0; i < NR_EVENT_CHANNELS; i++) { + for (i = 0; i < NR_EVENT_CHANNELS(evtchn_level); i++) { if (sync_test_bit(i, sh->evtchn_pending)) { int word_idx = i / BITS_PER_LONG; printk(" %d: event %d -> irq %d%s%s%s\n", @@ -1709,14 +1709,14 @@ void xen_irq_resume(void) init_evtchn_cpu_bindings(); /* New event-channel space is not 'live' yet. */ - for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS(evtchn_level); evtchn++) eops->mask_evtchn(evtchn); /* No IRQ <-> event-channel mappings. */ list_for_each_entry(info, &xen_irq_list_head, list) info->evtchn = 0; /* zap event-channel binding */ - for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS(evtchn_level); evtchn++) evtchn_to_irq[evtchn] = -1; for_each_possible_cpu(cpu) { @@ -1824,21 +1824,37 @@ static struct evtchn_ops evtchn_ops_l2 __read_mostly = { void __init xen_init_IRQ(void) { int i, rc; + int cpu; - evtchn_level = 2; + /* Setup 2-level event channel */ eops = &evtchn_ops_l2; + evtchn_level = 2; - /* Setup 2-level event channel */ - evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq), + evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS(evtchn_level), + sizeof(*evtchn_to_irq), GFP_KERNEL); BUG_ON(!evtchn_to_irq); - for (i = 0; i < NR_EVENT_CHANNELS; i++) + + for_each_possible_cpu(cpu) { + void *p; + unsigned int nr = NR_EVENT_CHANNELS(evtchn_level)/BITS_PER_LONG; + p = kzalloc_node(sizeof(unsigned long) * nr, + GFP_KERNEL, + cpu_to_node(cpu)); + if (!p) + p = kzalloc(sizeof(unsigned long) * nr, + GFP_KERNEL); + BUG_ON(!p); + per_cpu(cpu_evtchn_mask, cpu) = p; + } + + for (i = 0; i < NR_EVENT_CHANNELS(evtchn_level); i++) evtchn_to_irq[i] = -1; init_evtchn_cpu_bindings(); /* No event channels are 'live' right now. */ - for (i = 0; i < NR_EVENT_CHANNELS; i++) + for (i = 0; i < NR_EVENT_CHANNELS(evtchn_level); i++) eops->mask_evtchn(i); pirq_needs_eoi = pirq_needs_eoi_flag; diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index b1f60a0..cb45ecf 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -232,7 +232,7 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { unsigned port = kbuf[i]; - if (port < NR_EVENT_CHANNELS && + if (port < NR_EVENT_CHANNELS(evtchn_level) && get_port_user(port) == u && !get_port_enabled(port)) { set_port_enabled(port, true); @@ -364,7 +364,7 @@ static long evtchn_ioctl(struct file *file, break; rc = -EINVAL; - if (unbind.port >= NR_EVENT_CHANNELS) + if (unbind.port >= NR_EVENT_CHANNELS(evtchn_level)) break; spin_lock_irq(&port_user_lock); @@ -392,7 +392,7 @@ static long evtchn_ioctl(struct file *file, if (copy_from_user(¬ify, uarg, sizeof(notify))) break; - if (notify.port >= NR_EVENT_CHANNELS) { + if (notify.port >= NR_EVENT_CHANNELS(evtchn_level)) { rc = -EINVAL; } else if (get_port_user(notify.port) != u) { rc = -ENOTCONN; @@ -482,7 +482,7 @@ static int evtchn_release(struct inode *inode, struct file *filp) free_page((unsigned long)u->ring); - for (i = 0; i < NR_EVENT_CHANNELS; i++) { + for (i = 0; i < NR_EVENT_CHANNELS(evtchn_level); i++) { if (get_port_user(i) != u) continue; @@ -491,7 +491,7 @@ static int evtchn_release(struct inode *inode, struct file *filp) spin_unlock_irq(&port_user_lock); - for (i = 0; i < NR_EVENT_CHANNELS; i++) { + for (i = 0; i < NR_EVENT_CHANNELS(evtchn_level); i++) { if (get_port_user(i) != u) continue; @@ -528,7 +528,8 @@ static int __init evtchn_init(void) if (!xen_domain()) return -ENODEV; - port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL); + port_user = kcalloc(NR_EVENT_CHANNELS(evtchn_level), + sizeof(*port_user), GFP_KERNEL); if (port_user == NULL) return -ENOMEM; @@ -541,7 +542,8 @@ static int __init evtchn_init(void) return err; } - printk(KERN_INFO "Event-channel device installed.\n"); + printk(KERN_INFO "Event-channel device installed." + " Event-channel level: %d\n", evtchn_level); return 0; } diff --git a/include/xen/events.h b/include/xen/events.h index 04399b2..bc10f22 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -109,4 +109,7 @@ int xen_irq_from_gsi(unsigned gsi); /* Determine whether to ignore this IRQ if it is passed to a guest. */ int xen_test_irq_shared(int irq); +/* N-level event channels */ +extern unsigned int evtchn_level; + #endif /* _XEN_EVENTS_H */ diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index a890804..c66e1ff 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -283,9 +283,24 @@ DEFINE_GUEST_HANDLE_STRUCT(multicall_entry); /* * Event channel endpoints per domain: + * 2-level: * 1024 if a long is 32 bits; 4096 if a long is 64 bits. + * 3-level: + * 32k if a long is 32 bits; 256k if a long is 64 bits. */ -#define NR_EVENT_CHANNELS (sizeof(unsigned long) * sizeof(unsigned long) * 64) +#define NR_EVENT_CHANNELS_L2 (sizeof(unsigned long) * sizeof(unsigned long) * 64) +#define NR_EVENT_CHANNELS_L3 (NR_EVENT_CHANNELS_L2 * sizeof(unsigned long)) +#define NR_EVENT_CHANNELS(x) ({ unsigned int __v = 0; \ + switch (x) { \ + case 2: \ + __v = NR_EVENT_CHANNELS_L2; break; \ + case 3: \ + __v = NR_EVENT_CHANNELS_L3; break; \ + default: \ + BUG(); \ + } \ + __v; }) + struct vcpu_time_info { /* -- 1.7.10.4 ^ permalink raw reply related [flat|nested] 4+ messages in thread
* [RFC PATCH 3/3] Xen: implement 3-level event channel routines. 2012-12-31 18:37 [RFC PATCH 2/3] Xen: rework NR_EVENT_CHANNELS related stuffs Wei Liu @ 2012-12-31 18:37 ` Wei Liu 0 siblings, 0 replies; 4+ messages in thread From: Wei Liu @ 2012-12-31 18:37 UTC (permalink / raw) To: xen-devel; +Cc: Wei Liu Signed-off-by: Wei Liu <wei.liu2@citrix.com> --- arch/x86/xen/enlighten.c | 7 + drivers/xen/events.c | 419 +++++++++++++++++++++++++++++++-- include/xen/events.h | 2 + include/xen/interface/event_channel.h | 24 ++ include/xen/interface/xen.h | 2 +- 5 files changed, 437 insertions(+), 17 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bc893e7..f471881 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -43,6 +43,7 @@ #include <xen/hvm.h> #include <xen/hvc-console.h> #include <xen/acpi.h> +#include <xen/events.h> #include <asm/paravirt.h> #include <asm/apic.h> @@ -195,6 +196,9 @@ void xen_vcpu_restore(void) HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) BUG(); } + + if (evtchn_level_param == 3) + xen_event_channel_setup_3level(); } static void __init xen_banner(void) @@ -1028,6 +1032,9 @@ void xen_setup_vcpu_info_placement(void) for_each_possible_cpu(cpu) xen_vcpu_setup(cpu); + if (evtchn_level_param == 3) + xen_event_channel_setup_3level(); + /* xen_vcpu_setup managed to place the vcpu_info within the percpu area for all cpus, so make use of it */ if (have_vcpu_info_placement) { diff --git a/drivers/xen/events.c b/drivers/xen/events.c index f60ba76..adb94e9 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -52,9 +52,15 @@ #include <xen/interface/hvm/params.h> /* N-level event channel, starting from 2 */ +unsigned int evtchn_level_param = -1; unsigned int evtchn_level = 2; EXPORT_SYMBOL_GPL(evtchn_level); +/* 3-level event channel */ +DEFINE_PER_CPU(unsigned long [sizeof(unsigned long)*8], evtchn_sel_l2); +unsigned long evtchn_pending[NR_EVENT_CHANNELS_L3/BITS_PER_LONG] __page_aligned_bss; +unsigned long evtchn_mask[NR_EVENT_CHANNELS_L3/BITS_PER_LONG] __page_aligned_bss; + struct evtchn_ops { unsigned long (*active_evtchns)(unsigned int, struct shared_info*, unsigned int); @@ -142,6 +148,29 @@ static struct irq_chip xen_pirq_chip; static void enable_dynirq(struct irq_data *data); static void disable_dynirq(struct irq_data *data); +static int __init parse_evtchn_level(char *arg) +{ + if (!arg) + return -EINVAL; + + if (strcmp(arg, "3") == 0) + evtchn_level_param = 3; + + return 0; +} +early_param("evtchn_level", parse_evtchn_level); + +static inline int __is_masked_l2(int chn) +{ + struct shared_info *sh = HYPERVISOR_shared_info; + return sync_test_and_set_bit(chn, sh->evtchn_mask); +} + +static inline int __is_masked_l3(int chn) +{ + return sync_test_and_set_bit(chn, evtchn_mask); +} + /* Get info for IRQ */ static struct irq_info *info_for_irq(unsigned irq) { @@ -311,6 +340,15 @@ static inline unsigned long __active_evtchns_l2(unsigned int cpu, ~sh->evtchn_mask[idx]; } +static inline unsigned long __active_evtchns_l3(unsigned int cpu, + struct shared_info *sh, + unsigned int idx) +{ + return evtchn_pending[idx] & + per_cpu(cpu_evtchn_mask, cpu)[idx] & + ~evtchn_mask[idx]; +} + static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) { int irq = evtchn_to_irq[chn]; @@ -351,18 +389,33 @@ static inline void __clear_evtchn_l2(int port) sync_clear_bit(port, &s->evtchn_pending[0]); } +static inline void __clear_evtchn_l3(int port) +{ + sync_clear_bit(port, &evtchn_pending[0]); +} + static inline void __set_evtchn_l2(int port) { struct shared_info *s = HYPERVISOR_shared_info; sync_set_bit(port, &s->evtchn_pending[0]); } +static inline void __set_evtchn_l3(int port) +{ + sync_set_bit(port, &evtchn_pending[0]); +} + static inline int __test_evtchn_l2(int port) { struct shared_info *s = HYPERVISOR_shared_info; return sync_test_bit(port, &s->evtchn_pending[0]); } +static inline int __test_evtchn_l3(int port) +{ + return sync_test_bit(port, &evtchn_pending[0]); +} + /** * notify_remote_via_irq - send event to remote end of event channel via irq * @irq: irq of event channel to send event to @@ -386,6 +439,11 @@ static void __mask_evtchn_l2(int port) sync_set_bit(port, &s->evtchn_mask[0]); } +static void __mask_evtchn_l3(int port) +{ + sync_set_bit(port, &evtchn_mask[0]); +} + static void __unmask_evtchn_l2(int port) { struct shared_info *s = HYPERVISOR_shared_info; @@ -416,6 +474,36 @@ static void __unmask_evtchn_l2(int port) put_cpu(); } +static void __unmask_evtchn_l3(int port) +{ + unsigned int cpu = get_cpu(); + int l1cb = BITS_PER_LONG * BITS_PER_LONG; + int l2cb = BITS_PER_LONG; + + if (unlikely(cpu != cpu_from_evtchn(port))) { + struct evtchn_unmask unmask = { .port = port }; + (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); + } else { + struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); + + sync_clear_bit(port, &evtchn_mask[0]); + + /* + * The following is basically the equivalent of + * 'hw_resend_irq'. Just like a real IO-APIC we 'lose + * the interrupt edge' if the channel is masked. + */ + if (sync_test_bit(port, &evtchn_pending[0]) && + !sync_test_and_set_bit(port / l2cb, + &per_cpu(evtchn_sel_l2, cpu)[0]) && + !sync_test_and_set_bit(port / l1cb, + &vcpu_info->evtchn_pending_sel)) + vcpu_info->evtchn_upcall_pending = 1; + } + + put_cpu(); +} + static void xen_irq_init(unsigned irq) { struct irq_info *info; @@ -1181,6 +1269,7 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) notify_remote_via_irq(irq); } +static DEFINE_SPINLOCK(debug_lock); static irqreturn_t __xen_debug_interrupt_l2(int irq, void *dev_id) { struct shared_info *sh = HYPERVISOR_shared_info; @@ -1188,7 +1277,6 @@ static irqreturn_t __xen_debug_interrupt_l2(int irq, void *dev_id) unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu); int i; unsigned long flags; - static DEFINE_SPINLOCK(debug_lock); struct vcpu_info *v; spin_lock_irqsave(&debug_lock, flags); @@ -1196,13 +1284,13 @@ static irqreturn_t __xen_debug_interrupt_l2(int irq, void *dev_id) printk("\nvcpu %d\n ", cpu); for_each_online_cpu(i) { - int pending; + int masked; v = per_cpu(xen_vcpu, i); - pending = (get_irq_regs() && i == cpu) + masked = (get_irq_regs() && i == cpu) ? xen_irqs_disabled(get_irq_regs()) : v->evtchn_upcall_mask; printk("%d: masked=%d pending=%d event_sel %0*lx\n ", i, - pending, v->evtchn_upcall_pending, + masked, v->evtchn_upcall_pending, (int)(sizeof(v->evtchn_pending_sel)*2), v->evtchn_pending_sel); } @@ -1227,7 +1315,7 @@ static irqreturn_t __xen_debug_interrupt_l2(int irq, void *dev_id) i % 8 == 0 ? "\n " : " "); printk("\nlocal cpu%d mask:\n ", cpu); - for (i = (NR_EVENT_CHANNELS(evtchn_level)/BITS_PER_LONG)-1; i >= 0; i--) + for (i = (NR_EVENT_CHANNELS(2)/BITS_PER_LONG)-1; i >= 0; i--) printk("%0*lx%s", (int)(sizeof(cpu_evtchn[0])*2), cpu_evtchn[i], i % 8 == 0 ? "\n " : " "); @@ -1242,7 +1330,7 @@ static irqreturn_t __xen_debug_interrupt_l2(int irq, void *dev_id) } printk("\npending list:\n"); - for (i = 0; i < NR_EVENT_CHANNELS(evtchn_level); i++) { + for (i = 0; i < NR_EVENT_CHANNELS(2); i++) { if (sync_test_bit(i, sh->evtchn_pending)) { int word_idx = i / BITS_PER_LONG; printk(" %d: event %d -> irq %d%s%s%s\n", @@ -1262,15 +1350,110 @@ static irqreturn_t __xen_debug_interrupt_l2(int irq, void *dev_id) return IRQ_HANDLED; } +static irqreturn_t __xen_debug_interrupt_l3(int irq, void *dev_id) +{ + int cpu = smp_processor_id(); + unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu); + int i, j; + unsigned long flags; + struct vcpu_info *v; + + spin_lock_irqsave(&debug_lock, flags); + + printk("\nvcpu %d\n ", cpu); + + for_each_online_cpu(i) { + int masked; + + v = per_cpu(xen_vcpu, i); + masked = (get_irq_regs() && i == cpu) + ? xen_irqs_disabled(get_irq_regs()) + : v->evtchn_upcall_mask; + printk("%d: masked=%d pending=%d event_sel_l1 %0*lx\n ", i, + masked, v->evtchn_upcall_pending, + (int)(sizeof(v->evtchn_pending_sel)*2), + v->evtchn_pending_sel); + + printk("\nevtchn_sel_l2:\n "); + for (j = (sizeof(unsigned long)*8)-1; j >= 0; j--) + printk("%0*lx%s", + (int)(sizeof(evtchn_sel_l2[0])*2), + per_cpu(evtchn_sel_l2, i)[j], + j % 8 == 0 ? "\n " : " "); + } + + v = per_cpu(xen_vcpu, cpu); + + printk("\npending:\n "); + for (i = ARRAY_SIZE(evtchn_pending)-1; i >= 0; i--) + printk("%0*lx%s", (int)(sizeof(evtchn_pending[0])*2), + evtchn_pending[i], + i % 8 == 0 ? "\n " : " "); + + printk("\nglobal mask:\n "); + for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--) + printk("%0*lx%s", (int)(sizeof(evtchn_mask[0])*2), + evtchn_mask[i], + i % 8 == 0 ? "\n " : " "); + + printk("\nglobally unmasked:\n "); + for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--) + printk("%0*lx%s", (int)(sizeof(evtchn_mask[0])*2), + evtchn_pending[i] & ~evtchn_mask[i], + i % 8 == 0 ? "\n " : " "); + + printk("\nlocal cpu%d mask:\n ", cpu); + for (i = (NR_EVENT_CHANNELS(3)/BITS_PER_LONG)-1; i >= 0; i--) + printk("%0*lx%s", (int)(sizeof(cpu_evtchn[0])*2), + cpu_evtchn[i], + i % 8 == 0 ? "\n " : " "); + + printk("\nlocally unmasked:\n "); + for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--) { + unsigned long pending = evtchn_pending[i] + & ~evtchn_mask[i] + & cpu_evtchn[i]; + printk("%0*lx%s", (int)(sizeof(evtchn_mask[0])*2), + pending, i % 8 == 0 ? "\n " : " "); + } + + printk("\npending list:\n"); + for (i = 0; i < NR_EVENT_CHANNELS(3); i++) { + if (sync_test_bit(i, evtchn_pending)) { + int word_idx_l1 = i / (BITS_PER_LONG * BITS_PER_LONG); + int word_idx_l2 = i / BITS_PER_LONG; + printk(" %d: event %d -> irq %d%s%s%s%s\n", + cpu_from_evtchn(i), i, + evtchn_to_irq[i], + sync_test_bit(word_idx_l1, &v->evtchn_pending_sel) + ? "" : " l1-clear", + sync_test_bit(word_idx_l2, per_cpu(evtchn_sel_l2, cpu)) + ? "" : " l2-clear", + !sync_test_bit(i, evtchn_mask) + ? "" : " globally-masked", + sync_test_bit(i, cpu_evtchn) + ? "" : " locally-masked"); + } + } + + spin_unlock_irqrestore(&debug_lock, flags); + + return IRQ_HANDLED; +} + irqreturn_t xen_debug_interrupt(int irq, void *dev_id) { return eops->xen_debug_interrupt(irq, dev_id); } static DEFINE_PER_CPU(unsigned, xed_nesting_count); + +/* 2-level event channel does not use current_word_idx_l2 */ static DEFINE_PER_CPU(unsigned int, current_word_idx); +static DEFINE_PER_CPU(unsigned int, current_word_idx_l2); static DEFINE_PER_CPU(unsigned int, current_bit_idx); + /* * Mask out the i least significant bits of w */ @@ -1303,7 +1486,8 @@ static void __xen_evtchn_do_upcall_l2(void) if (__this_cpu_inc_return(xed_nesting_count) - 1) goto out; -#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ +#ifndef CONFIG_X86 + /* No need for a barrier -- XCHG is a barrier on x86. */ /* Clear master flag /before/ clearing selector flag. */ wmb(); #endif @@ -1392,6 +1576,155 @@ out: put_cpu(); } +void __xen_evtchn_do_upcall_l3(void) +{ + struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); + unsigned count; + int start_word_idx_l1, start_word_idx_l2, start_bit_idx; + int word_idx_l1, word_idx_l2, bit_idx; + int i, j; + unsigned long l1cb, l2cb; + int cpu = get_cpu(); + + l1cb = BITS_PER_LONG * BITS_PER_LONG; + l2cb = BITS_PER_LONG; + + do { + unsigned long pending_words_l1; + + vcpu_info->evtchn_upcall_pending = 0; + + if (__this_cpu_inc_return(xed_nesting_count) - 1) + goto out; +#ifndef CONFIG_X86 + /* No need for a barrier -- XCHG is a barrier on x86. */ + /* Clear master flag /before/ clearing selector flag. */ + wmb(); +#endif + /* here we get l1 pending selector */ + pending_words_l1 = xchg(&vcpu_info->evtchn_pending_sel, 0); + + start_word_idx_l1 = __this_cpu_read(current_word_idx); + start_word_idx_l2 = __this_cpu_read(current_word_idx_l2); + start_bit_idx = __this_cpu_read(current_bit_idx); + + word_idx_l1 = start_word_idx_l1; + + /* loop through l1, try to pick up l2 */ + for (i = 0; pending_words_l1 != 0; i++) { + unsigned long words_l1; + unsigned long pending_words_l2; + unsigned long pwl2idx; + + words_l1 = MASK_LSBS(pending_words_l1, word_idx_l1); + + if (words_l1 == 0) { + word_idx_l1 = 0; + start_word_idx_l2 = 0; + continue; + } + + word_idx_l1 = __ffs(words_l1); + + pwl2idx = word_idx_l1 * BITS_PER_LONG; + + pending_words_l2 = + xchg(&per_cpu(evtchn_sel_l2, cpu)[pwl2idx], + 0); + + word_idx_l2 = 0; + if (word_idx_l1 == start_word_idx_l1) { + if (i == 0) + word_idx_l2 = start_word_idx_l2; + else + word_idx_l2 &= (1UL << start_word_idx_l2) - 1; + } + + for (j = 0; pending_words_l2 != 0; j++) { + unsigned long pending_bits; + unsigned long words_l2; + unsigned long idx; + + words_l2 = MASK_LSBS(pending_words_l2, + word_idx_l2); + + if (words_l2 == 0) { + word_idx_l2 = 0; + bit_idx = 0; + continue; + } + + word_idx_l2 = __ffs(words_l2); + + idx = word_idx_l1*BITS_PER_LONG+word_idx_l2; + pending_bits = + eops->active_evtchns(cpu, NULL, idx); + + bit_idx = 0; + if (word_idx_l2 == start_word_idx_l2) { + if (j == 0) + bit_idx = start_bit_idx; + else + bit_idx &= (1UL<<start_bit_idx)-1; + } + + /* process port */ + do { + unsigned long bits; + int port, irq; + struct irq_desc *desc; + + bits = MASK_LSBS(pending_bits, bit_idx); + + if (bits == 0) + break; + + bit_idx = __ffs(bits); + + port = word_idx_l1 * l1cb + + word_idx_l2 * l2cb + + bit_idx; + + irq = evtchn_to_irq[port]; + + if (irq != -1) { + desc = irq_to_desc(irq); + if (desc) + generic_handle_irq_desc(irq, desc); + } + + bit_idx = (bit_idx + 1) % BITS_PER_LONG; + + __this_cpu_write(current_bit_idx, bit_idx); + __this_cpu_write(current_word_idx_l2, + bit_idx ? word_idx_l2 : + (word_idx_l2+1) % BITS_PER_LONG); + __this_cpu_write(current_word_idx_l2, + word_idx_l2 ? word_idx_l1 : + (word_idx_l1+1) % BITS_PER_LONG); + } while (bit_idx != 0); + + if ((word_idx_l2 != start_word_idx_l2) || (j != 0)) + pending_words_l2 &= ~(1UL << word_idx_l2); + + word_idx_l2 = (word_idx_l2) % BITS_PER_LONG; + } + + if ((word_idx_l1 != start_word_idx_l1) || (i != 0)) + pending_words_l1 &= ~(1UL << word_idx_l1); + + word_idx_l1 = (word_idx_l1) % BITS_PER_LONG; + } + + BUG_ON(!irqs_disabled()); + count = __this_cpu_read(xed_nesting_count); + __this_cpu_write(xed_nesting_count, 0); + } while (count != 1 || vcpu_info->evtchn_upcall_pending); + +out: + put_cpu(); +} + void xen_evtchn_do_upcall(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); @@ -1525,12 +1858,6 @@ static void mask_ack_dynirq(struct irq_data *data) ack_dynirq(data); } -static inline int __is_masked_l2(int chn) -{ - struct shared_info *sh = HYPERVISOR_shared_info; - return sync_test_and_set_bit(chn, sh->evtchn_mask); -} - static int retrigger_dynirq(struct irq_data *data) { int evtchn = evtchn_from_irq(data->irq); @@ -1821,14 +2148,74 @@ static struct evtchn_ops evtchn_ops_l2 __read_mostly = { .xen_debug_interrupt = __xen_debug_interrupt_l2, }; +static struct evtchn_ops evtchn_ops_l3 __read_mostly = { + .active_evtchns = __active_evtchns_l3, + .clear_evtchn = __clear_evtchn_l3, + .set_evtchn = __set_evtchn_l3, + .test_evtchn = __test_evtchn_l3, + .mask_evtchn = __mask_evtchn_l3, + .unmask_evtchn = __unmask_evtchn_l3, + .is_masked = __is_masked_l3, + .xen_evtchn_do_upcall = __xen_evtchn_do_upcall_l3, + .xen_debug_interrupt = __xen_debug_interrupt_l3, +}; + +int xen_event_channel_setup_3level(void) +{ + evtchn_register_nlevel_t reg; + int i, nr_pages, cpu; + unsigned long mfns[nr_cpu_ids]; + unsigned long offsets[nr_cpu_ids]; + int rc = -EINVAL; + + memset(®, 0, sizeof(reg)); + + reg.level = 3; + nr_pages = (sizeof(unsigned long) == 4 ? 1 : 8); + + for (i = 0; i < nr_pages; i++) { + unsigned long offset = PAGE_SIZE * i; + reg.u.l3.evtchn_pending[i] = + arbitrary_virt_to_mfn( + (void *)((unsigned long)evtchn_pending+offset)); + reg.u.l3.evtchn_mask[i] = + arbitrary_virt_to_mfn( + (void *)((unsigned long)evtchn_mask+offset)); + } + + reg.u.l3.l2sel_mfn = mfns; + reg.u.l3.l2sel_offset = offsets; + reg.u.l3.nr_vcpus = nr_cpu_ids; + + for_each_possible_cpu(cpu) { + reg.u.l3.l2sel_mfn[cpu] = + arbitrary_virt_to_mfn(&per_cpu(evtchn_sel_l2, cpu)); + reg.u.l3.l2sel_offset[cpu] = + offset_in_page(&per_cpu(evtchn_sel_l2, cpu)); + } + + rc = HYPERVISOR_event_channel_op(EVTCHNOP_register_nlevel, ®); + + if (rc == 0) + evtchn_level = 3; + + return rc; +} +EXPORT_SYMBOL_GPL(xen_event_channel_setup_3level); + void __init xen_init_IRQ(void) { int i, rc; int cpu; - /* Setup 2-level event channel */ - eops = &evtchn_ops_l2; - evtchn_level = 2; + switch (evtchn_level) { + case 2: + eops = &evtchn_ops_l2; break; + case 3: + eops = &evtchn_ops_l3; break; + default: + BUG(); + } evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS(evtchn_level), sizeof(*evtchn_to_irq), diff --git a/include/xen/events.h b/include/xen/events.h index bc10f22..87696fc 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -111,5 +111,7 @@ int xen_test_irq_shared(int irq); /* N-level event channels */ extern unsigned int evtchn_level; +extern unsigned int evtchn_level_param; +int xen_event_channel_setup_3level(void); #endif /* _XEN_EVENTS_H */ diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h index f494292..f764d21 100644 --- a/include/xen/interface/event_channel.h +++ b/include/xen/interface/event_channel.h @@ -190,6 +190,30 @@ struct evtchn_reset { }; typedef struct evtchn_reset evtchn_reset_t; +/* + * EVTCHNOP_register_nlevel: Register N level event channels. + * NOTES: + * 1. currently only 3-level is supported. + * 2. should fall back to basic 2-level if this call fails. + */ +#define EVTCHNOP_register_nlevel 11 +#define MAX_L3_PAGES 8 /* 8 pages for 64 bits */ +struct evtchn_register_3level { + unsigned long evtchn_pending[MAX_L3_PAGES]; + unsigned long evtchn_mask[MAX_L3_PAGES]; + unsigned long *l2sel_mfn; + unsigned long *l2sel_offset; + unsigned int nr_vcpus; +}; + +struct evtchn_register_nlevel { + uint32_t level; + union { + struct evtchn_register_3level l3; + } u; +}; +typedef struct evtchn_register_nlevel evtchn_register_nlevel_t; + struct evtchn_op { uint32_t cmd; /* EVTCHNOP_* */ union { diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index c66e1ff..7cb9d8f 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -289,7 +289,7 @@ DEFINE_GUEST_HANDLE_STRUCT(multicall_entry); * 32k if a long is 32 bits; 256k if a long is 64 bits. */ #define NR_EVENT_CHANNELS_L2 (sizeof(unsigned long) * sizeof(unsigned long) * 64) -#define NR_EVENT_CHANNELS_L3 (NR_EVENT_CHANNELS_L2 * sizeof(unsigned long)) +#define NR_EVENT_CHANNELS_L3 (NR_EVENT_CHANNELS_L2 * 64) #define NR_EVENT_CHANNELS(x) ({ unsigned int __v = 0; \ switch (x) { \ case 2: \ -- 1.7.10.4 ^ permalink raw reply related [flat|nested] 4+ messages in thread
* Implement 3-level event channel routines in Linux. @ 2012-12-31 18:38 Wei Liu 2012-12-31 18:38 ` [RFC PATCH 3/3] Xen: implement 3-level event channel routines Wei Liu 0 siblings, 1 reply; 4+ messages in thread From: Wei Liu @ 2012-12-31 18:38 UTC (permalink / raw) To: xen-devel; +Cc: konrad.wilk This patch series implements 3-level event channel routines in Linux kernel. My thought is that 3-level event channel is only useful for Dom0 or driver domain, so it is not enabled by default. Enable it with evtchn_level=3 in kernel command line. HVM is not supported at the moment. As it is not very likely it will need this. And I haven't found a right place to issue the hypercall. My understaning is that PVH has more or less the same initialization process as PV, so the current implementation should work for PVH as well. Please correct me if I'm wrong. ^ permalink raw reply [flat|nested] 4+ messages in thread
* [RFC PATCH 3/3] Xen: implement 3-level event channel routines. 2012-12-31 18:38 Implement 3-level event channel routines in Linux Wei Liu @ 2012-12-31 18:38 ` Wei Liu 2013-01-02 14:57 ` David Vrabel 0 siblings, 1 reply; 4+ messages in thread From: Wei Liu @ 2012-12-31 18:38 UTC (permalink / raw) To: xen-devel; +Cc: Wei Liu, konrad.wilk Signed-off-by: Wei Liu <wei.liu2@citrix.com> --- arch/x86/xen/enlighten.c | 7 + drivers/xen/events.c | 419 +++++++++++++++++++++++++++++++-- include/xen/events.h | 2 + include/xen/interface/event_channel.h | 24 ++ include/xen/interface/xen.h | 2 +- 5 files changed, 437 insertions(+), 17 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bc893e7..f471881 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -43,6 +43,7 @@ #include <xen/hvm.h> #include <xen/hvc-console.h> #include <xen/acpi.h> +#include <xen/events.h> #include <asm/paravirt.h> #include <asm/apic.h> @@ -195,6 +196,9 @@ void xen_vcpu_restore(void) HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) BUG(); } + + if (evtchn_level_param == 3) + xen_event_channel_setup_3level(); } static void __init xen_banner(void) @@ -1028,6 +1032,9 @@ void xen_setup_vcpu_info_placement(void) for_each_possible_cpu(cpu) xen_vcpu_setup(cpu); + if (evtchn_level_param == 3) + xen_event_channel_setup_3level(); + /* xen_vcpu_setup managed to place the vcpu_info within the percpu area for all cpus, so make use of it */ if (have_vcpu_info_placement) { diff --git a/drivers/xen/events.c b/drivers/xen/events.c index f60ba76..adb94e9 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -52,9 +52,15 @@ #include <xen/interface/hvm/params.h> /* N-level event channel, starting from 2 */ +unsigned int evtchn_level_param = -1; unsigned int evtchn_level = 2; EXPORT_SYMBOL_GPL(evtchn_level); +/* 3-level event channel */ +DEFINE_PER_CPU(unsigned long [sizeof(unsigned long)*8], evtchn_sel_l2); +unsigned long evtchn_pending[NR_EVENT_CHANNELS_L3/BITS_PER_LONG] __page_aligned_bss; +unsigned long evtchn_mask[NR_EVENT_CHANNELS_L3/BITS_PER_LONG] __page_aligned_bss; + struct evtchn_ops { unsigned long (*active_evtchns)(unsigned int, struct shared_info*, unsigned int); @@ -142,6 +148,29 @@ static struct irq_chip xen_pirq_chip; static void enable_dynirq(struct irq_data *data); static void disable_dynirq(struct irq_data *data); +static int __init parse_evtchn_level(char *arg) +{ + if (!arg) + return -EINVAL; + + if (strcmp(arg, "3") == 0) + evtchn_level_param = 3; + + return 0; +} +early_param("evtchn_level", parse_evtchn_level); + +static inline int __is_masked_l2(int chn) +{ + struct shared_info *sh = HYPERVISOR_shared_info; + return sync_test_and_set_bit(chn, sh->evtchn_mask); +} + +static inline int __is_masked_l3(int chn) +{ + return sync_test_and_set_bit(chn, evtchn_mask); +} + /* Get info for IRQ */ static struct irq_info *info_for_irq(unsigned irq) { @@ -311,6 +340,15 @@ static inline unsigned long __active_evtchns_l2(unsigned int cpu, ~sh->evtchn_mask[idx]; } +static inline unsigned long __active_evtchns_l3(unsigned int cpu, + struct shared_info *sh, + unsigned int idx) +{ + return evtchn_pending[idx] & + per_cpu(cpu_evtchn_mask, cpu)[idx] & + ~evtchn_mask[idx]; +} + static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) { int irq = evtchn_to_irq[chn]; @@ -351,18 +389,33 @@ static inline void __clear_evtchn_l2(int port) sync_clear_bit(port, &s->evtchn_pending[0]); } +static inline void __clear_evtchn_l3(int port) +{ + sync_clear_bit(port, &evtchn_pending[0]); +} + static inline void __set_evtchn_l2(int port) { struct shared_info *s = HYPERVISOR_shared_info; sync_set_bit(port, &s->evtchn_pending[0]); } +static inline void __set_evtchn_l3(int port) +{ + sync_set_bit(port, &evtchn_pending[0]); +} + static inline int __test_evtchn_l2(int port) { struct shared_info *s = HYPERVISOR_shared_info; return sync_test_bit(port, &s->evtchn_pending[0]); } +static inline int __test_evtchn_l3(int port) +{ + return sync_test_bit(port, &evtchn_pending[0]); +} + /** * notify_remote_via_irq - send event to remote end of event channel via irq * @irq: irq of event channel to send event to @@ -386,6 +439,11 @@ static void __mask_evtchn_l2(int port) sync_set_bit(port, &s->evtchn_mask[0]); } +static void __mask_evtchn_l3(int port) +{ + sync_set_bit(port, &evtchn_mask[0]); +} + static void __unmask_evtchn_l2(int port) { struct shared_info *s = HYPERVISOR_shared_info; @@ -416,6 +474,36 @@ static void __unmask_evtchn_l2(int port) put_cpu(); } +static void __unmask_evtchn_l3(int port) +{ + unsigned int cpu = get_cpu(); + int l1cb = BITS_PER_LONG * BITS_PER_LONG; + int l2cb = BITS_PER_LONG; + + if (unlikely(cpu != cpu_from_evtchn(port))) { + struct evtchn_unmask unmask = { .port = port }; + (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); + } else { + struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); + + sync_clear_bit(port, &evtchn_mask[0]); + + /* + * The following is basically the equivalent of + * 'hw_resend_irq'. Just like a real IO-APIC we 'lose + * the interrupt edge' if the channel is masked. + */ + if (sync_test_bit(port, &evtchn_pending[0]) && + !sync_test_and_set_bit(port / l2cb, + &per_cpu(evtchn_sel_l2, cpu)[0]) && + !sync_test_and_set_bit(port / l1cb, + &vcpu_info->evtchn_pending_sel)) + vcpu_info->evtchn_upcall_pending = 1; + } + + put_cpu(); +} + static void xen_irq_init(unsigned irq) { struct irq_info *info; @@ -1181,6 +1269,7 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) notify_remote_via_irq(irq); } +static DEFINE_SPINLOCK(debug_lock); static irqreturn_t __xen_debug_interrupt_l2(int irq, void *dev_id) { struct shared_info *sh = HYPERVISOR_shared_info; @@ -1188,7 +1277,6 @@ static irqreturn_t __xen_debug_interrupt_l2(int irq, void *dev_id) unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu); int i; unsigned long flags; - static DEFINE_SPINLOCK(debug_lock); struct vcpu_info *v; spin_lock_irqsave(&debug_lock, flags); @@ -1196,13 +1284,13 @@ static irqreturn_t __xen_debug_interrupt_l2(int irq, void *dev_id) printk("\nvcpu %d\n ", cpu); for_each_online_cpu(i) { - int pending; + int masked; v = per_cpu(xen_vcpu, i); - pending = (get_irq_regs() && i == cpu) + masked = (get_irq_regs() && i == cpu) ? xen_irqs_disabled(get_irq_regs()) : v->evtchn_upcall_mask; printk("%d: masked=%d pending=%d event_sel %0*lx\n ", i, - pending, v->evtchn_upcall_pending, + masked, v->evtchn_upcall_pending, (int)(sizeof(v->evtchn_pending_sel)*2), v->evtchn_pending_sel); } @@ -1227,7 +1315,7 @@ static irqreturn_t __xen_debug_interrupt_l2(int irq, void *dev_id) i % 8 == 0 ? "\n " : " "); printk("\nlocal cpu%d mask:\n ", cpu); - for (i = (NR_EVENT_CHANNELS(evtchn_level)/BITS_PER_LONG)-1; i >= 0; i--) + for (i = (NR_EVENT_CHANNELS(2)/BITS_PER_LONG)-1; i >= 0; i--) printk("%0*lx%s", (int)(sizeof(cpu_evtchn[0])*2), cpu_evtchn[i], i % 8 == 0 ? "\n " : " "); @@ -1242,7 +1330,7 @@ static irqreturn_t __xen_debug_interrupt_l2(int irq, void *dev_id) } printk("\npending list:\n"); - for (i = 0; i < NR_EVENT_CHANNELS(evtchn_level); i++) { + for (i = 0; i < NR_EVENT_CHANNELS(2); i++) { if (sync_test_bit(i, sh->evtchn_pending)) { int word_idx = i / BITS_PER_LONG; printk(" %d: event %d -> irq %d%s%s%s\n", @@ -1262,15 +1350,110 @@ static irqreturn_t __xen_debug_interrupt_l2(int irq, void *dev_id) return IRQ_HANDLED; } +static irqreturn_t __xen_debug_interrupt_l3(int irq, void *dev_id) +{ + int cpu = smp_processor_id(); + unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu); + int i, j; + unsigned long flags; + struct vcpu_info *v; + + spin_lock_irqsave(&debug_lock, flags); + + printk("\nvcpu %d\n ", cpu); + + for_each_online_cpu(i) { + int masked; + + v = per_cpu(xen_vcpu, i); + masked = (get_irq_regs() && i == cpu) + ? xen_irqs_disabled(get_irq_regs()) + : v->evtchn_upcall_mask; + printk("%d: masked=%d pending=%d event_sel_l1 %0*lx\n ", i, + masked, v->evtchn_upcall_pending, + (int)(sizeof(v->evtchn_pending_sel)*2), + v->evtchn_pending_sel); + + printk("\nevtchn_sel_l2:\n "); + for (j = (sizeof(unsigned long)*8)-1; j >= 0; j--) + printk("%0*lx%s", + (int)(sizeof(evtchn_sel_l2[0])*2), + per_cpu(evtchn_sel_l2, i)[j], + j % 8 == 0 ? "\n " : " "); + } + + v = per_cpu(xen_vcpu, cpu); + + printk("\npending:\n "); + for (i = ARRAY_SIZE(evtchn_pending)-1; i >= 0; i--) + printk("%0*lx%s", (int)(sizeof(evtchn_pending[0])*2), + evtchn_pending[i], + i % 8 == 0 ? "\n " : " "); + + printk("\nglobal mask:\n "); + for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--) + printk("%0*lx%s", (int)(sizeof(evtchn_mask[0])*2), + evtchn_mask[i], + i % 8 == 0 ? "\n " : " "); + + printk("\nglobally unmasked:\n "); + for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--) + printk("%0*lx%s", (int)(sizeof(evtchn_mask[0])*2), + evtchn_pending[i] & ~evtchn_mask[i], + i % 8 == 0 ? "\n " : " "); + + printk("\nlocal cpu%d mask:\n ", cpu); + for (i = (NR_EVENT_CHANNELS(3)/BITS_PER_LONG)-1; i >= 0; i--) + printk("%0*lx%s", (int)(sizeof(cpu_evtchn[0])*2), + cpu_evtchn[i], + i % 8 == 0 ? "\n " : " "); + + printk("\nlocally unmasked:\n "); + for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--) { + unsigned long pending = evtchn_pending[i] + & ~evtchn_mask[i] + & cpu_evtchn[i]; + printk("%0*lx%s", (int)(sizeof(evtchn_mask[0])*2), + pending, i % 8 == 0 ? "\n " : " "); + } + + printk("\npending list:\n"); + for (i = 0; i < NR_EVENT_CHANNELS(3); i++) { + if (sync_test_bit(i, evtchn_pending)) { + int word_idx_l1 = i / (BITS_PER_LONG * BITS_PER_LONG); + int word_idx_l2 = i / BITS_PER_LONG; + printk(" %d: event %d -> irq %d%s%s%s%s\n", + cpu_from_evtchn(i), i, + evtchn_to_irq[i], + sync_test_bit(word_idx_l1, &v->evtchn_pending_sel) + ? "" : " l1-clear", + sync_test_bit(word_idx_l2, per_cpu(evtchn_sel_l2, cpu)) + ? "" : " l2-clear", + !sync_test_bit(i, evtchn_mask) + ? "" : " globally-masked", + sync_test_bit(i, cpu_evtchn) + ? "" : " locally-masked"); + } + } + + spin_unlock_irqrestore(&debug_lock, flags); + + return IRQ_HANDLED; +} + irqreturn_t xen_debug_interrupt(int irq, void *dev_id) { return eops->xen_debug_interrupt(irq, dev_id); } static DEFINE_PER_CPU(unsigned, xed_nesting_count); + +/* 2-level event channel does not use current_word_idx_l2 */ static DEFINE_PER_CPU(unsigned int, current_word_idx); +static DEFINE_PER_CPU(unsigned int, current_word_idx_l2); static DEFINE_PER_CPU(unsigned int, current_bit_idx); + /* * Mask out the i least significant bits of w */ @@ -1303,7 +1486,8 @@ static void __xen_evtchn_do_upcall_l2(void) if (__this_cpu_inc_return(xed_nesting_count) - 1) goto out; -#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ +#ifndef CONFIG_X86 + /* No need for a barrier -- XCHG is a barrier on x86. */ /* Clear master flag /before/ clearing selector flag. */ wmb(); #endif @@ -1392,6 +1576,155 @@ out: put_cpu(); } +void __xen_evtchn_do_upcall_l3(void) +{ + struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); + unsigned count; + int start_word_idx_l1, start_word_idx_l2, start_bit_idx; + int word_idx_l1, word_idx_l2, bit_idx; + int i, j; + unsigned long l1cb, l2cb; + int cpu = get_cpu(); + + l1cb = BITS_PER_LONG * BITS_PER_LONG; + l2cb = BITS_PER_LONG; + + do { + unsigned long pending_words_l1; + + vcpu_info->evtchn_upcall_pending = 0; + + if (__this_cpu_inc_return(xed_nesting_count) - 1) + goto out; +#ifndef CONFIG_X86 + /* No need for a barrier -- XCHG is a barrier on x86. */ + /* Clear master flag /before/ clearing selector flag. */ + wmb(); +#endif + /* here we get l1 pending selector */ + pending_words_l1 = xchg(&vcpu_info->evtchn_pending_sel, 0); + + start_word_idx_l1 = __this_cpu_read(current_word_idx); + start_word_idx_l2 = __this_cpu_read(current_word_idx_l2); + start_bit_idx = __this_cpu_read(current_bit_idx); + + word_idx_l1 = start_word_idx_l1; + + /* loop through l1, try to pick up l2 */ + for (i = 0; pending_words_l1 != 0; i++) { + unsigned long words_l1; + unsigned long pending_words_l2; + unsigned long pwl2idx; + + words_l1 = MASK_LSBS(pending_words_l1, word_idx_l1); + + if (words_l1 == 0) { + word_idx_l1 = 0; + start_word_idx_l2 = 0; + continue; + } + + word_idx_l1 = __ffs(words_l1); + + pwl2idx = word_idx_l1 * BITS_PER_LONG; + + pending_words_l2 = + xchg(&per_cpu(evtchn_sel_l2, cpu)[pwl2idx], + 0); + + word_idx_l2 = 0; + if (word_idx_l1 == start_word_idx_l1) { + if (i == 0) + word_idx_l2 = start_word_idx_l2; + else + word_idx_l2 &= (1UL << start_word_idx_l2) - 1; + } + + for (j = 0; pending_words_l2 != 0; j++) { + unsigned long pending_bits; + unsigned long words_l2; + unsigned long idx; + + words_l2 = MASK_LSBS(pending_words_l2, + word_idx_l2); + + if (words_l2 == 0) { + word_idx_l2 = 0; + bit_idx = 0; + continue; + } + + word_idx_l2 = __ffs(words_l2); + + idx = word_idx_l1*BITS_PER_LONG+word_idx_l2; + pending_bits = + eops->active_evtchns(cpu, NULL, idx); + + bit_idx = 0; + if (word_idx_l2 == start_word_idx_l2) { + if (j == 0) + bit_idx = start_bit_idx; + else + bit_idx &= (1UL<<start_bit_idx)-1; + } + + /* process port */ + do { + unsigned long bits; + int port, irq; + struct irq_desc *desc; + + bits = MASK_LSBS(pending_bits, bit_idx); + + if (bits == 0) + break; + + bit_idx = __ffs(bits); + + port = word_idx_l1 * l1cb + + word_idx_l2 * l2cb + + bit_idx; + + irq = evtchn_to_irq[port]; + + if (irq != -1) { + desc = irq_to_desc(irq); + if (desc) + generic_handle_irq_desc(irq, desc); + } + + bit_idx = (bit_idx + 1) % BITS_PER_LONG; + + __this_cpu_write(current_bit_idx, bit_idx); + __this_cpu_write(current_word_idx_l2, + bit_idx ? word_idx_l2 : + (word_idx_l2+1) % BITS_PER_LONG); + __this_cpu_write(current_word_idx_l2, + word_idx_l2 ? word_idx_l1 : + (word_idx_l1+1) % BITS_PER_LONG); + } while (bit_idx != 0); + + if ((word_idx_l2 != start_word_idx_l2) || (j != 0)) + pending_words_l2 &= ~(1UL << word_idx_l2); + + word_idx_l2 = (word_idx_l2) % BITS_PER_LONG; + } + + if ((word_idx_l1 != start_word_idx_l1) || (i != 0)) + pending_words_l1 &= ~(1UL << word_idx_l1); + + word_idx_l1 = (word_idx_l1) % BITS_PER_LONG; + } + + BUG_ON(!irqs_disabled()); + count = __this_cpu_read(xed_nesting_count); + __this_cpu_write(xed_nesting_count, 0); + } while (count != 1 || vcpu_info->evtchn_upcall_pending); + +out: + put_cpu(); +} + void xen_evtchn_do_upcall(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); @@ -1525,12 +1858,6 @@ static void mask_ack_dynirq(struct irq_data *data) ack_dynirq(data); } -static inline int __is_masked_l2(int chn) -{ - struct shared_info *sh = HYPERVISOR_shared_info; - return sync_test_and_set_bit(chn, sh->evtchn_mask); -} - static int retrigger_dynirq(struct irq_data *data) { int evtchn = evtchn_from_irq(data->irq); @@ -1821,14 +2148,74 @@ static struct evtchn_ops evtchn_ops_l2 __read_mostly = { .xen_debug_interrupt = __xen_debug_interrupt_l2, }; +static struct evtchn_ops evtchn_ops_l3 __read_mostly = { + .active_evtchns = __active_evtchns_l3, + .clear_evtchn = __clear_evtchn_l3, + .set_evtchn = __set_evtchn_l3, + .test_evtchn = __test_evtchn_l3, + .mask_evtchn = __mask_evtchn_l3, + .unmask_evtchn = __unmask_evtchn_l3, + .is_masked = __is_masked_l3, + .xen_evtchn_do_upcall = __xen_evtchn_do_upcall_l3, + .xen_debug_interrupt = __xen_debug_interrupt_l3, +}; + +int xen_event_channel_setup_3level(void) +{ + evtchn_register_nlevel_t reg; + int i, nr_pages, cpu; + unsigned long mfns[nr_cpu_ids]; + unsigned long offsets[nr_cpu_ids]; + int rc = -EINVAL; + + memset(®, 0, sizeof(reg)); + + reg.level = 3; + nr_pages = (sizeof(unsigned long) == 4 ? 1 : 8); + + for (i = 0; i < nr_pages; i++) { + unsigned long offset = PAGE_SIZE * i; + reg.u.l3.evtchn_pending[i] = + arbitrary_virt_to_mfn( + (void *)((unsigned long)evtchn_pending+offset)); + reg.u.l3.evtchn_mask[i] = + arbitrary_virt_to_mfn( + (void *)((unsigned long)evtchn_mask+offset)); + } + + reg.u.l3.l2sel_mfn = mfns; + reg.u.l3.l2sel_offset = offsets; + reg.u.l3.nr_vcpus = nr_cpu_ids; + + for_each_possible_cpu(cpu) { + reg.u.l3.l2sel_mfn[cpu] = + arbitrary_virt_to_mfn(&per_cpu(evtchn_sel_l2, cpu)); + reg.u.l3.l2sel_offset[cpu] = + offset_in_page(&per_cpu(evtchn_sel_l2, cpu)); + } + + rc = HYPERVISOR_event_channel_op(EVTCHNOP_register_nlevel, ®); + + if (rc == 0) + evtchn_level = 3; + + return rc; +} +EXPORT_SYMBOL_GPL(xen_event_channel_setup_3level); + void __init xen_init_IRQ(void) { int i, rc; int cpu; - /* Setup 2-level event channel */ - eops = &evtchn_ops_l2; - evtchn_level = 2; + switch (evtchn_level) { + case 2: + eops = &evtchn_ops_l2; break; + case 3: + eops = &evtchn_ops_l3; break; + default: + BUG(); + } evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS(evtchn_level), sizeof(*evtchn_to_irq), diff --git a/include/xen/events.h b/include/xen/events.h index bc10f22..87696fc 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -111,5 +111,7 @@ int xen_test_irq_shared(int irq); /* N-level event channels */ extern unsigned int evtchn_level; +extern unsigned int evtchn_level_param; +int xen_event_channel_setup_3level(void); #endif /* _XEN_EVENTS_H */ diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h index f494292..f764d21 100644 --- a/include/xen/interface/event_channel.h +++ b/include/xen/interface/event_channel.h @@ -190,6 +190,30 @@ struct evtchn_reset { }; typedef struct evtchn_reset evtchn_reset_t; +/* + * EVTCHNOP_register_nlevel: Register N level event channels. + * NOTES: + * 1. currently only 3-level is supported. + * 2. should fall back to basic 2-level if this call fails. + */ +#define EVTCHNOP_register_nlevel 11 +#define MAX_L3_PAGES 8 /* 8 pages for 64 bits */ +struct evtchn_register_3level { + unsigned long evtchn_pending[MAX_L3_PAGES]; + unsigned long evtchn_mask[MAX_L3_PAGES]; + unsigned long *l2sel_mfn; + unsigned long *l2sel_offset; + unsigned int nr_vcpus; +}; + +struct evtchn_register_nlevel { + uint32_t level; + union { + struct evtchn_register_3level l3; + } u; +}; +typedef struct evtchn_register_nlevel evtchn_register_nlevel_t; + struct evtchn_op { uint32_t cmd; /* EVTCHNOP_* */ union { diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index c66e1ff..7cb9d8f 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -289,7 +289,7 @@ DEFINE_GUEST_HANDLE_STRUCT(multicall_entry); * 32k if a long is 32 bits; 256k if a long is 64 bits. */ #define NR_EVENT_CHANNELS_L2 (sizeof(unsigned long) * sizeof(unsigned long) * 64) -#define NR_EVENT_CHANNELS_L3 (NR_EVENT_CHANNELS_L2 * sizeof(unsigned long)) +#define NR_EVENT_CHANNELS_L3 (NR_EVENT_CHANNELS_L2 * 64) #define NR_EVENT_CHANNELS(x) ({ unsigned int __v = 0; \ switch (x) { \ case 2: \ -- 1.7.10.4 ^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [RFC PATCH 3/3] Xen: implement 3-level event channel routines. 2012-12-31 18:38 ` [RFC PATCH 3/3] Xen: implement 3-level event channel routines Wei Liu @ 2013-01-02 14:57 ` David Vrabel 0 siblings, 0 replies; 4+ messages in thread From: David Vrabel @ 2013-01-02 14:57 UTC (permalink / raw) To: Wei Liu; +Cc: konrad.wilk, xen-devel On 31/12/12 18:38, Wei Liu wrote: > Changeset description? > Signed-off-by: Wei Liu <wei.liu2@citrix.com> > --- > arch/x86/xen/enlighten.c | 7 + > drivers/xen/events.c | 419 +++++++++++++++++++++++++++++++-- > include/xen/events.h | 2 + > include/xen/interface/event_channel.h | 24 ++ > include/xen/interface/xen.h | 2 +- > 5 files changed, 437 insertions(+), 17 deletions(-) > > diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c > index bc893e7..f471881 100644 > --- a/arch/x86/xen/enlighten.c > +++ b/arch/x86/xen/enlighten.c > @@ -43,6 +43,7 @@ > #include <xen/hvm.h> > #include <xen/hvc-console.h> > #include <xen/acpi.h> > +#include <xen/events.h> > > #include <asm/paravirt.h> > #include <asm/apic.h> > @@ -195,6 +196,9 @@ void xen_vcpu_restore(void) > HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) > BUG(); > } > + > + if (evtchn_level_param == 3) > + xen_event_channel_setup_3level(); Why is this here? > } > > static void __init xen_banner(void) > @@ -1028,6 +1032,9 @@ void xen_setup_vcpu_info_placement(void) > for_each_possible_cpu(cpu) > xen_vcpu_setup(cpu); > > + if (evtchn_level_param == 3) > + xen_event_channel_setup_3level(); > + Why is this here instead of xen_init_IRQ()? > /* xen_vcpu_setup managed to place the vcpu_info within the > percpu area for all cpus, so make use of it */ > if (have_vcpu_info_placement) { > diff --git a/drivers/xen/events.c b/drivers/xen/events.c > index f60ba76..adb94e9 100644 > --- a/drivers/xen/events.c > +++ b/drivers/xen/events.c [...] > + > +/* 2-level event channel does not use current_word_idx_l2 */ > static DEFINE_PER_CPU(unsigned int, current_word_idx); > +static DEFINE_PER_CPU(unsigned int, current_word_idx_l2); > static DEFINE_PER_CPU(unsigned int, current_bit_idx); I suggest renaming these to current_word_idx_l3 and current_word_idx_l2. The use of these variable really needs documentation, particularly why they're used. I presume (but not really sure) that they're to ensure the average event latency is constant independent of which channel it is. > + > /* > * Mask out the i least significant bits of w > */ > @@ -1303,7 +1486,8 @@ static void __xen_evtchn_do_upcall_l2(void) > if (__this_cpu_inc_return(xed_nesting_count) - 1) > goto out; > > -#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ > +#ifndef CONFIG_X86 > + /* No need for a barrier -- XCHG is a barrier on x86. */ > /* Clear master flag /before/ clearing selector flag. */ > wmb(); > #endif > @@ -1392,6 +1576,155 @@ out: > put_cpu(); > } > > +void __xen_evtchn_do_upcall_l3(void) This is one of my least favourite functions... A comment describing the algorithm used here would be nice. > @@ -1821,14 +2148,74 @@ static struct evtchn_ops evtchn_ops_l2 __read_mostly = { > .xen_debug_interrupt = __xen_debug_interrupt_l2, > }; > > +static struct evtchn_ops evtchn_ops_l3 __read_mostly = { const > + .active_evtchns = __active_evtchns_l3, > + .clear_evtchn = __clear_evtchn_l3, > + .set_evtchn = __set_evtchn_l3, > + .test_evtchn = __test_evtchn_l3, > + .mask_evtchn = __mask_evtchn_l3, > + .unmask_evtchn = __unmask_evtchn_l3, > + .is_masked = __is_masked_l3, > + .xen_evtchn_do_upcall = __xen_evtchn_do_upcall_l3, > + .xen_debug_interrupt = __xen_debug_interrupt_l3, > +}; > + > +int xen_event_channel_setup_3level(void) > +{ > + evtchn_register_nlevel_t reg; > + int i, nr_pages, cpu; > + unsigned long mfns[nr_cpu_ids]; > + unsigned long offsets[nr_cpu_ids]; These arrays are too large for the stack if the domain has many VCPUs. With 256 VCPUs this uses a page of stack. > diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h > index f494292..f764d21 100644 > --- a/include/xen/interface/event_channel.h > +++ b/include/xen/interface/event_channel.h [...] > diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h > index c66e1ff..7cb9d8f 100644 > --- a/include/xen/interface/xen.h > +++ b/include/xen/interface/xen.h [,.,] Put these in he patch sync'ing the headers. David ^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2013-01-02 14:57 UTC | newest] Thread overview: 4+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2012-12-31 18:37 [RFC PATCH 2/3] Xen: rework NR_EVENT_CHANNELS related stuffs Wei Liu 2012-12-31 18:37 ` [RFC PATCH 3/3] Xen: implement 3-level event channel routines Wei Liu -- strict thread matches above, loose matches on Subject: below -- 2012-12-31 18:38 Implement 3-level event channel routines in Linux Wei Liu 2012-12-31 18:38 ` [RFC PATCH 3/3] Xen: implement 3-level event channel routines Wei Liu 2013-01-02 14:57 ` David Vrabel
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).