From: mhkelley58@gmail.com
To: kys@microsoft.com, haiyangz@microsoft.com, wei.liu@kernel.org,
decui@microsoft.com, tglx@linutronix.de, mingo@redhat.com,
bp@alien8.de, dave.hansen@linux.intel.com, x86@kernel.org,
hpa@zytor.com, lpieralisi@kernel.org, kw@linux.com,
robh@kernel.org, bhelgaas@google.com,
James.Bottomley@HansenPartnership.com,
martin.petersen@oracle.com, arnd@arndb.de,
linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-pci@vger.kernel.org, linux-scsi@vger.kernel.org,
linux-arch@vger.kernel.org
Cc: maz@kernel.org, den@valinux.co.jp, jgowans@amazon.com,
dawei.li@shingroup.cn
Subject: [RFC 09/12] Drivers: hv: vmbus: Use Linux IRQs to handle VMBus channel interrupts
Date: Mon, 3 Jun 2024 22:09:37 -0700 [thread overview]
Message-ID: <20240604050940.859909-10-mhklinux@outlook.com> (raw)
In-Reply-To: <20240604050940.859909-1-mhklinux@outlook.com>
From: Michael Kelley <mhklinux@outlook.com>
Do the following:
1) Create an interrupt handler for VMBus channel interrupts by pulling
out portions of vmbus_chan_sched() into vmbus_chan_handler(). The
outer part of vmbus_chan_sched() that loops through the synic event
page bitmap remains unchanged. But when a pending VMBus channel
interrupt is found, call generic_handle_irq_desc() to invoke
handle_simple_irq() and then vmbus_chan_handler() for the channel's
IRQ. handle_simple_irq() does the IRQ stats for that channel's IRQ,
so that per-channel interrupt counts appear in /proc/interrupts. The
overall processing of VMBus channel interrupts is unchanged except
for the intervening handle_simple_irq() that does the stats. No acks
or EOIs are required for VMBus channel IRQs.
2) Update __vmbus_open() to call request_irq(), specifying the previously
setup channel IRQ name and vmbus_chan_handler() as the interrupt
handler. Set the IRQ affinity to the target_cpu assigned when the
channel was created.
3) Update vmbus_isr() to return "false" if it only handles VMBus
interrupts, which were passed to the channel IRQ handler. If
vmbus_isr() handles one or more control message interrupts, then
return "true". Update the related definitions to specify a boolean
return value.
4) The callers of vmbus_isr() increment IRQ stats for the top-level
IRQ only if "true" is returned. On x86, the caller is
sysvec_hyperv_callback(), which manages the stats directly. On
arm64, the caller is vmbus_percpu_isr(), which maps the boolean
return value to IRQ_NONE ("false") or IRQ_HANDLED ("true").
Then handle_percpu_demux_irq() conditionally updates the
stats based on the return value from vmbus_percpu_isr().
With these changes, interrupts from VMBus channels are now
processed as Linux IRQs that are demultiplexed from the main
VMBus interrupt.
Signed-off-by: Michael Kelley <mhklinux@outlook.com>
---
arch/x86/kernel/cpu/mshyperv.c | 9 ++--
drivers/hv/channel.c | 25 +++++++++-
drivers/hv/hv_common.c | 2 +-
drivers/hv/vmbus_drv.c | 84 +++++++++++++++++++---------------
include/asm-generic/mshyperv.h | 3 +-
5 files changed, 79 insertions(+), 44 deletions(-)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index e0fd57a8ba84..18bc282a99db 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -110,7 +110,7 @@ void hv_set_msr(unsigned int reg, u64 value)
}
EXPORT_SYMBOL_GPL(hv_set_msr);
-static void (*vmbus_handler)(void);
+static bool (*vmbus_handler)(void);
static void (*hv_stimer0_handler)(void);
static void (*hv_kexec_handler)(void);
static void (*hv_crash_handler)(struct pt_regs *regs);
@@ -119,9 +119,8 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
{
struct pt_regs *old_regs = set_irq_regs(regs);
- inc_irq_stat(irq_hv_callback_count);
- if (vmbus_handler)
- vmbus_handler();
+ if (vmbus_handler && vmbus_handler())
+ inc_irq_stat(irq_hv_callback_count);
if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)
apic_eoi();
@@ -129,7 +128,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
set_irq_regs(old_regs);
}
-void hv_setup_vmbus_handler(void (*handler)(void))
+void hv_setup_vmbus_handler(bool (*handler)(void))
{
vmbus_handler = handler;
}
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index fb8cd8469328..1aa020b538f1 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -638,6 +638,7 @@ static int __vmbus_open(struct vmbus_channel *newchannel,
struct vmbus_channel_open_channel *open_msg;
struct vmbus_channel_msginfo *open_info = NULL;
struct page *page = newchannel->ringbuffer_page;
+ u32 relid = newchannel->offermsg.child_relid;
u32 send_pages, recv_pages;
unsigned long flags;
int err;
@@ -685,13 +686,31 @@ static int __vmbus_open(struct vmbus_channel *newchannel,
if (err)
goto error_free_gpadl;
+ /* Request the IRQ and assign to target_cpu */
+ err = request_irq(newchannel->irq, vmbus_chan_handler, 0,
+ newchannel->irq_name, newchannel);
+ if (err) {
+ pr_err("request_irq failed with %d for relid %d irq %d\n",
+ err, relid, newchannel->irq);
+ goto error_free_gpadl;
+ }
+ err = irq_set_affinity_and_hint(newchannel->irq,
+ cpumask_of(newchannel->target_cpu));
+ if (err) {
+ pr_err("irq_set_affinity_and_hint failed with %d for relid %d irq %d\n",
+ err, relid, newchannel->irq);
+ free_irq(newchannel->irq, newchannel);
+ goto error_free_gpadl;
+ }
+ newchannel->irq_requested = true;
+
/* Create and init the channel open message */
open_info = kzalloc(sizeof(*open_info) +
sizeof(struct vmbus_channel_open_channel),
GFP_KERNEL);
if (!open_info) {
err = -ENOMEM;
- goto error_free_gpadl;
+ goto error_free_irq;
}
init_completion(&open_info->waitevent);
@@ -759,6 +778,10 @@ static int __vmbus_open(struct vmbus_channel *newchannel,
spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
error_free_info:
kfree(open_info);
+error_free_irq:
+ irq_update_affinity_hint(newchannel->irq, NULL);
+ free_irq(newchannel->irq, newchannel);
+ newchannel->irq_requested = false;
error_free_gpadl:
vmbus_teardown_gpadl(newchannel, &newchannel->ringbuffer_gpadlhandle);
error_clean_ring:
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index 9c452bfbd571..38a23add721c 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -610,7 +610,7 @@ bool __weak hv_isolation_type_tdx(void)
}
EXPORT_SYMBOL_GPL(hv_isolation_type_tdx);
-void __weak hv_setup_vmbus_handler(void (*handler)(void))
+void __weak hv_setup_vmbus_handler(bool (*handler)(void))
{
}
EXPORT_SYMBOL_GPL(hv_setup_vmbus_handler);
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 8fd03d41e71a..b73be7c02d37 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1193,6 +1193,45 @@ static void vmbus_force_channel_rescinded(struct vmbus_channel *channel)
}
#endif /* CONFIG_PM_SLEEP */
+irqreturn_t vmbus_chan_handler(int irq, void *dev_id)
+{
+ void (*callback_fn)(void *context);
+ struct vmbus_channel *channel = dev_id;
+
+ /*
+ * Make sure that the ring buffer data structure doesn't get
+ * freed while we dereference the ring buffer pointer. Test
+ * for the channel's onchannel_callback being NULL within a
+ * sched_lock critical section. See also the inline comments
+ * in vmbus_reset_channel_cb().
+ */
+ spin_lock(&channel->sched_lock);
+
+ callback_fn = channel->onchannel_callback;
+ if (unlikely(callback_fn == NULL))
+ goto spin_unlock;
+
+ trace_vmbus_chan_sched(channel);
+
+ ++channel->interrupts;
+
+ switch (channel->callback_mode) {
+ case HV_CALL_ISR:
+ (*callback_fn)(channel->channel_callback_context);
+ break;
+
+ case HV_CALL_BATCHED:
+ hv_begin_read(&channel->inbound);
+ fallthrough;
+ case HV_CALL_DIRECT:
+ tasklet_schedule(&channel->callback_event);
+ }
+
+spin_unlock:
+ spin_unlock(&channel->sched_lock);
+ return IRQ_HANDLED;
+}
+
/*
* Schedule all channels with events pending
*/
@@ -1217,7 +1256,6 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
return;
for_each_set_bit(relid, recv_int_page, maxbits) {
- void (*callback_fn)(void *context);
struct vmbus_channel *channel;
struct irq_desc *desc;
@@ -1244,43 +1282,14 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
if (channel->rescind)
goto sched_unlock_rcu;
- /*
- * Make sure that the ring buffer data structure doesn't get
- * freed while we dereference the ring buffer pointer. Test
- * for the channel's onchannel_callback being NULL within a
- * sched_lock critical section. See also the inline comments
- * in vmbus_reset_channel_cb().
- */
- spin_lock(&channel->sched_lock);
-
- callback_fn = channel->onchannel_callback;
- if (unlikely(callback_fn == NULL))
- goto sched_unlock;
-
- trace_vmbus_chan_sched(channel);
-
- ++channel->interrupts;
-
- switch (channel->callback_mode) {
- case HV_CALL_ISR:
- (*callback_fn)(channel->channel_callback_context);
- break;
-
- case HV_CALL_BATCHED:
- hv_begin_read(&channel->inbound);
- fallthrough;
- case HV_CALL_DIRECT:
- tasklet_schedule(&channel->callback_event);
- }
+ generic_handle_irq_desc(desc);
-sched_unlock:
- spin_unlock(&channel->sched_lock);
sched_unlock_rcu:
rcu_read_unlock();
}
}
-static void vmbus_isr(void)
+static bool vmbus_isr(void)
{
struct hv_per_cpu_context *hv_cpu
= this_cpu_ptr(hv_context.cpu_context);
@@ -1299,15 +1308,18 @@ static void vmbus_isr(void)
vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED);
} else
tasklet_schedule(&hv_cpu->msg_dpc);
- }
- add_interrupt_randomness(vmbus_interrupt);
+ add_interrupt_randomness(vmbus_interrupt);
+ return true;
+ }
+ return false;
}
static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id)
{
- vmbus_isr();
- return IRQ_HANDLED;
+ if (vmbus_isr())
+ return IRQ_HANDLED;
+ return IRQ_NONE;
}
int vmbus_irq_set_affinity(struct irq_data *data,
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 0488ff8b511f..0a5559b9d5f7 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -178,7 +178,7 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
int hv_get_hypervisor_version(union hv_hypervisor_version_info *info);
-void hv_setup_vmbus_handler(void (*handler)(void));
+void hv_setup_vmbus_handler(bool (*handler)(void));
void hv_remove_vmbus_handler(void);
void hv_setup_stimer0_handler(void (*handler)(void));
void hv_remove_stimer0_handler(void);
@@ -188,6 +188,7 @@ void hv_remove_kexec_handler(void);
void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs));
void hv_remove_crash_handler(void);
+extern irqreturn_t vmbus_chan_handler(int irq, void *dev_id);
extern void vmbus_irq_mask(struct irq_data *data);
extern void vmbus_irq_unmask(struct irq_data *data);
extern int vmbus_irq_set_affinity(struct irq_data *data,
--
2.25.1
next prev parent reply other threads:[~2024-06-04 5:10 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-04 5:09 [RFC 00/12] Hyper-V guests use Linux IRQs for channel interrupts mhkelley58
2024-06-04 5:09 ` [RFC 01/12] Drivers: hv: vmbus: Drop unsupported VMBus devices earlier mhkelley58
2024-06-24 7:11 ` Wei Liu
2024-06-04 5:09 ` [RFC 02/12] Drivers: hv: vmbus: Fix error path that deletes non-existent sysfs group mhkelley58
2024-06-04 5:09 ` [RFC 03/12] Drivers: hv: vmbus: Add an IRQ name to VMBus channels mhkelley58
2024-06-04 5:09 ` [RFC 04/12] PCI: hv: Annotate the VMBus channel IRQ name mhkelley58
2024-09-20 23:13 ` Bjorn Helgaas
2024-06-04 5:09 ` [RFC 05/12] scsi: storvsc: " mhkelley58
2024-06-04 5:09 ` [RFC 06/12] genirq: Add per-cpu flow handler with conditional IRQ stats mhkelley58
2024-06-04 18:13 ` Thomas Gleixner
2024-06-04 23:03 ` Michael Kelley
2024-06-05 13:20 ` Thomas Gleixner
2024-06-05 13:45 ` Michael Kelley
2024-06-05 14:19 ` Thomas Gleixner
2024-06-06 3:14 ` Michael Kelley
2024-06-06 9:34 ` Thomas Gleixner
2024-06-06 14:34 ` Michael Kelley
2024-06-04 5:09 ` [RFC 07/12] Drivers: hv: vmbus: Set up irqdomain and irqchip for the VMBus connection mhkelley58
2024-06-04 5:09 ` [RFC 08/12] Drivers: hv: vmbus: Allocate an IRQ per channel and use for relid mapping mhkelley58
2024-06-04 5:09 ` mhkelley58 [this message]
2024-06-04 5:09 ` [RFC 10/12] Drivers: hv: vmbus: Implement vmbus_irq_set_affinity mhkelley58
2024-06-04 5:09 ` [RFC 11/12] Drivers: hv: vmbus: Wait for MODIFYCHANNEL to finish when offlining CPUs mhkelley58
2024-06-24 17:55 ` Boqun Feng
2024-06-24 19:32 ` Michael Kelley
2024-06-04 5:09 ` [RFC 12/12] Drivers: hv: vmbus: Ensure IRQ affinity isn't set to a CPU going offline mhkelley58
2024-09-16 18:15 ` [RFC 00/12] Hyper-V guests use Linux IRQs for channel interrupts Michael Kelley
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240604050940.859909-10-mhklinux@outlook.com \
--to=mhkelley58@gmail.com \
--cc=James.Bottomley@HansenPartnership.com \
--cc=arnd@arndb.de \
--cc=bhelgaas@google.com \
--cc=bp@alien8.de \
--cc=dave.hansen@linux.intel.com \
--cc=dawei.li@shingroup.cn \
--cc=decui@microsoft.com \
--cc=den@valinux.co.jp \
--cc=haiyangz@microsoft.com \
--cc=hpa@zytor.com \
--cc=jgowans@amazon.com \
--cc=kw@linux.com \
--cc=kys@microsoft.com \
--cc=linux-arch@vger.kernel.org \
--cc=linux-hyperv@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-pci@vger.kernel.org \
--cc=linux-scsi@vger.kernel.org \
--cc=lpieralisi@kernel.org \
--cc=martin.petersen@oracle.com \
--cc=maz@kernel.org \
--cc=mhklinux@outlook.com \
--cc=mingo@redhat.com \
--cc=robh@kernel.org \
--cc=tglx@linutronix.de \
--cc=wei.liu@kernel.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox