From: mhkelley58@gmail.com
To: kys@microsoft.com, haiyangz@microsoft.com, wei.liu@kernel.org,
decui@microsoft.com, tglx@linutronix.de, mingo@redhat.com,
bp@alien8.de, dave.hansen@linux.intel.com, x86@kernel.org,
hpa@zytor.com, lpieralisi@kernel.org, kw@linux.com,
robh@kernel.org, bhelgaas@google.com,
James.Bottomley@HansenPartnership.com,
martin.petersen@oracle.com, arnd@arndb.de,
linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-pci@vger.kernel.org, linux-scsi@vger.kernel.org,
linux-arch@vger.kernel.org
Cc: maz@kernel.org, den@valinux.co.jp, jgowans@amazon.com,
dawei.li@shingroup.cn
Subject: [RFC 10/12] Drivers: hv: vmbus: Implement vmbus_irq_set_affinity
Date: Mon, 3 Jun 2024 22:09:38 -0700 [thread overview]
Message-ID: <20240604050940.859909-11-mhklinux@outlook.com> (raw)
In-Reply-To: <20240604050940.859909-1-mhklinux@outlook.com>
From: Michael Kelley <mhklinux@outlook.com>
Pull out core code from target_cpu_store() to implement
vmbus_irq_set_affinity() so the affinity of VMBus channel interrupts
can be updated from user space via /proc/irq.
Since vmbus_irq_set_affinity() runs with interrupts disabled,
vmbus_send_modifychannel() can't wait for an ACK from Hyper-V. As
such, remove the "wait for ack" version of vmbus_send_modifychannel().
Not waiting isn't a problem unless the old CPU is quickly taken offline
before Hyper-V makes the change, which is dealt with in a subsequent
patch.
Also change target_cpu_store() to call irq_set_affinity() so that
changes made via /sys/bus/vmbus/devices/<guid>/channels/<nn>/cpu
are in sync with the /proc/irq interface. The cpus_read_lock() is
no longer needed in target_cpu_store() because irq_set_affinity()
ensures that the interrupt affinity is not set to an offline
CPU.
Signed-off-by: Michael Kelley <mhklinux@outlook.com>
---
drivers/hv/channel.c | 97 ++++++-------------------
drivers/hv/vmbus_drv.c | 161 +++++++++++++++++++++++++----------------
2 files changed, 121 insertions(+), 137 deletions(-)
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 1aa020b538f1..b7920072e243 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -212,79 +212,6 @@ int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id,
}
EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request);
-static int send_modifychannel_without_ack(struct vmbus_channel *channel, u32 target_vp)
-{
- struct vmbus_channel_modifychannel msg;
- int ret;
-
- memset(&msg, 0, sizeof(msg));
- msg.header.msgtype = CHANNELMSG_MODIFYCHANNEL;
- msg.child_relid = channel->offermsg.child_relid;
- msg.target_vp = target_vp;
-
- ret = vmbus_post_msg(&msg, sizeof(msg), true);
- trace_vmbus_send_modifychannel(&msg, ret);
-
- return ret;
-}
-
-static int send_modifychannel_with_ack(struct vmbus_channel *channel, u32 target_vp)
-{
- struct vmbus_channel_modifychannel *msg;
- struct vmbus_channel_msginfo *info;
- unsigned long flags;
- int ret;
-
- info = kzalloc(sizeof(struct vmbus_channel_msginfo) +
- sizeof(struct vmbus_channel_modifychannel),
- GFP_KERNEL);
- if (!info)
- return -ENOMEM;
-
- init_completion(&info->waitevent);
- info->waiting_channel = channel;
-
- msg = (struct vmbus_channel_modifychannel *)info->msg;
- msg->header.msgtype = CHANNELMSG_MODIFYCHANNEL;
- msg->child_relid = channel->offermsg.child_relid;
- msg->target_vp = target_vp;
-
- spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
- list_add_tail(&info->msglistentry, &vmbus_connection.chn_msg_list);
- spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
-
- ret = vmbus_post_msg(msg, sizeof(*msg), true);
- trace_vmbus_send_modifychannel(msg, ret);
- if (ret != 0) {
- spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
- list_del(&info->msglistentry);
- spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
- goto free_info;
- }
-
- /*
- * Release channel_mutex; otherwise, vmbus_onoffer_rescind() could block on
- * the mutex and be unable to signal the completion.
- *
- * See the caller target_cpu_store() for information about the usage of the
- * mutex.
- */
- mutex_unlock(&vmbus_connection.channel_mutex);
- wait_for_completion(&info->waitevent);
- mutex_lock(&vmbus_connection.channel_mutex);
-
- spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
- list_del(&info->msglistentry);
- spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
-
- if (info->response.modify_response.status)
- ret = -EAGAIN;
-
-free_info:
- kfree(info);
- return ret;
-}
-
/*
* Set/change the vCPU (@target_vp) the channel (@child_relid) will interrupt.
*
@@ -294,14 +221,32 @@ static int send_modifychannel_with_ack(struct vmbus_channel *channel, u32 target
* out an ACK, we can not know when the host will stop interrupting the "old"
* vCPU and start interrupting the "new" vCPU for the given channel.
*
+ * But even if Hyper-V provides the ACK, we don't wait for it because the
+ * caller, vmbus_irq_set_affinity(), is running with a spin lock held. The
+ * unknown delay in when the host will start interrupting the new vCPU is not
+ * a problem unless the old vCPU is taken offline, and that situation is dealt
+ * with separately in the CPU offlining path.
+ *
* The CHANNELMSG_MODIFYCHANNEL message type is supported since VMBus version
* VERSION_WIN10_V4_1.
*/
int vmbus_send_modifychannel(struct vmbus_channel *channel, u32 target_vp)
{
- if (vmbus_proto_version >= VERSION_WIN10_V5_3)
- return send_modifychannel_with_ack(channel, target_vp);
- return send_modifychannel_without_ack(channel, target_vp);
+ struct vmbus_channel_modifychannel msg;
+ int ret;
+
+ if (vmbus_proto_version < VERSION_WIN10_V4_1)
+ return -EINVAL;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.header.msgtype = CHANNELMSG_MODIFYCHANNEL;
+ msg.child_relid = channel->offermsg.child_relid;
+ msg.target_vp = target_vp;
+
+ ret = vmbus_post_msg(&msg, sizeof(msg), false);
+ trace_vmbus_send_modifychannel(&msg, ret);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(vmbus_send_modifychannel);
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index b73be7c02d37..87f2f3436136 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -22,7 +22,6 @@
#include <linux/kernel_stat.h>
#include <linux/of_address.h>
#include <linux/clockchips.h>
-#include <linux/cpu.h>
#include <linux/sched/isolation.h>
#include <linux/sched/task_stack.h>
@@ -1322,10 +1321,107 @@ static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id)
return IRQ_NONE;
}
+/*
+ * This function is invoked by user space affinity changes initiated
+ * from /proc/irq/<nn> or from the legacy VMBus-specific interface at
+ * /sys/bus/vmbus/devices/<guid>/channels/<nn>/cpu.
+ *
+ * In the former case, the /proc implementation ensures that unmapping
+ * (i.e., deleting) the IRQ will pend while this function is in progress.
+ * Since deleting the channel unmaps the IRQ first, the channel can't go
+ * away either.
+ *
+ * In the latter case, the VMBus connection channel_mutex is held, which
+ * prevents channel deltion, and therefore IRQ unampping as well.
+ *
+ * So in both cases, accessing the channel and IRQ data structures is safe.
+ */
int vmbus_irq_set_affinity(struct irq_data *data,
const struct cpumask *dest, bool force)
{
- return 0;
+ static int next_cpu;
+ static cpumask_t tempmask;
+ int origin_cpu, target_cpu;
+ struct vmbus_channel *channel = irq_data_get_irq_handler_data(data);
+ int ret;
+
+ if (!channel) {
+ pr_err("Bad channel in vmbus_irq_set_affinity for relid %ld\n",
+ data->hwirq);
+ return -EINVAL;
+ }
+
+ /* Don't consider CPUs that are isolated */
+ if (housekeeping_enabled(HK_TYPE_MANAGED_IRQ))
+ cpumask_and(&tempmask, dest,
+ housekeeping_cpumask(HK_TYPE_MANAGED_IRQ));
+ else
+ cpumask_copy(&tempmask, dest);
+
+ /*
+ * If Hyper-V is already targeting a CPU in the new affinity mask,
+ * keep that targeting and Hyper-V doesn't need to be updated. But
+ * still set effective affinity as it may be unset when the IRQ is
+ * first created.
+ */
+ origin_cpu = channel->target_cpu;
+ if (cpumask_test_cpu(origin_cpu, &tempmask)) {
+ target_cpu = origin_cpu;
+ goto update_effective;
+ }
+
+ /*
+ * Pick a CPU from the new affinity mask. As a simple heuristic to
+ * spread out the selection when the mask contains multiple CPUs,
+ * start with whatever CPU was last selected.
+ */
+ target_cpu = cpumask_next_wrap(next_cpu, &tempmask, nr_cpu_ids, false);
+ if (target_cpu >= nr_cpu_ids)
+ return -EINVAL;
+ next_cpu = target_cpu;
+
+ /*
+ * Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels;
+ * avoid sending the message and fail here for such channels.
+ */
+ if (channel->state != CHANNEL_OPENED_STATE)
+ return -EIO;
+
+ ret = vmbus_send_modifychannel(channel,
+ hv_cpu_number_to_vp_number(target_cpu));
+ if (ret)
+ return ret;
+
+ /*
+ * Warning. At this point, there is *no* guarantee that the host will
+ * have successfully processed the vmbus_send_modifychannel() request.
+ * See the header comment of vmbus_send_modifychannel() for more info.
+ *
+ * Lags in the processing of the above vmbus_send_modifychannel() can
+ * result in missed interrupts if the "old" target CPU is taken offline
+ * before Hyper-V starts sending interrupts to the "new" target CPU.
+ * hv_synic_cleanup() will ensure no interrupts are missed.
+ *
+ * But apart from this offlining scenario, the code tolerates such
+ * lags. It will function correctly even if a channel interrupt comes
+ * in on a CPU that is different from the channel target_cpu value.
+ */
+
+ channel->target_cpu = target_cpu;
+
+ /* See init_vp_index(). */
+ if (hv_is_perf_channel(channel))
+ hv_update_allocated_cpus(origin_cpu, target_cpu);
+
+ /* Currently set only for storvsc channels. */
+ if (channel->change_target_cpu_callback) {
+ (*channel->change_target_cpu_callback)(channel,
+ origin_cpu, target_cpu);
+ }
+
+update_effective:
+ irq_data_update_effective_affinity(data, cpumask_of(target_cpu));
+ return IRQ_SET_MASK_OK;
}
/*
@@ -1655,7 +1751,7 @@ static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf)
static ssize_t target_cpu_store(struct vmbus_channel *channel,
const char *buf, size_t count)
{
- u32 target_cpu, origin_cpu;
+ u32 target_cpu;
ssize_t ret = count;
if (vmbus_proto_version < VERSION_WIN10_V4_1)
@@ -1668,17 +1764,6 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
if (target_cpu >= nr_cpumask_bits)
return -EINVAL;
- if (!cpumask_test_cpu(target_cpu, housekeeping_cpumask(HK_TYPE_MANAGED_IRQ)))
- return -EINVAL;
-
- /* No CPUs should come up or down during this. */
- cpus_read_lock();
-
- if (!cpu_online(target_cpu)) {
- cpus_read_unlock();
- return -EINVAL;
- }
-
/*
* Synchronizes target_cpu_store() and channel closure:
*
@@ -1703,55 +1788,9 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
*/
mutex_lock(&vmbus_connection.channel_mutex);
- /*
- * Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels;
- * avoid sending the message and fail here for such channels.
- */
- if (channel->state != CHANNEL_OPENED_STATE) {
- ret = -EIO;
- goto cpu_store_unlock;
- }
-
- origin_cpu = channel->target_cpu;
- if (target_cpu == origin_cpu)
- goto cpu_store_unlock;
-
- if (vmbus_send_modifychannel(channel,
- hv_cpu_number_to_vp_number(target_cpu))) {
- ret = -EIO;
- goto cpu_store_unlock;
- }
-
- /*
- * For version before VERSION_WIN10_V5_3, the following warning holds:
- *
- * Warning. At this point, there is *no* guarantee that the host will
- * have successfully processed the vmbus_send_modifychannel() request.
- * See the header comment of vmbus_send_modifychannel() for more info.
- *
- * Lags in the processing of the above vmbus_send_modifychannel() can
- * result in missed interrupts if the "old" target CPU is taken offline
- * before Hyper-V starts sending interrupts to the "new" target CPU.
- * But apart from this offlining scenario, the code tolerates such
- * lags. It will function correctly even if a channel interrupt comes
- * in on a CPU that is different from the channel target_cpu value.
- */
-
- channel->target_cpu = target_cpu;
-
- /* See init_vp_index(). */
- if (hv_is_perf_channel(channel))
- hv_update_allocated_cpus(origin_cpu, target_cpu);
-
- /* Currently set only for storvsc channels. */
- if (channel->change_target_cpu_callback) {
- (*channel->change_target_cpu_callback)(channel,
- origin_cpu, target_cpu);
- }
+ ret = irq_set_affinity(channel->irq, cpumask_of(target_cpu));
-cpu_store_unlock:
mutex_unlock(&vmbus_connection.channel_mutex);
- cpus_read_unlock();
return ret;
}
static VMBUS_CHAN_ATTR(cpu, 0644, target_cpu_show, target_cpu_store);
--
2.25.1
next prev parent reply other threads:[~2024-06-04 5:10 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-04 5:09 [RFC 00/12] Hyper-V guests use Linux IRQs for channel interrupts mhkelley58
2024-06-04 5:09 ` [RFC 01/12] Drivers: hv: vmbus: Drop unsupported VMBus devices earlier mhkelley58
2024-06-24 7:11 ` Wei Liu
2024-06-04 5:09 ` [RFC 02/12] Drivers: hv: vmbus: Fix error path that deletes non-existent sysfs group mhkelley58
2024-06-04 5:09 ` [RFC 03/12] Drivers: hv: vmbus: Add an IRQ name to VMBus channels mhkelley58
2024-06-04 5:09 ` [RFC 04/12] PCI: hv: Annotate the VMBus channel IRQ name mhkelley58
2024-09-20 23:13 ` Bjorn Helgaas
2024-06-04 5:09 ` [RFC 05/12] scsi: storvsc: " mhkelley58
2024-06-04 5:09 ` [RFC 06/12] genirq: Add per-cpu flow handler with conditional IRQ stats mhkelley58
2024-06-04 18:13 ` Thomas Gleixner
2024-06-04 23:03 ` Michael Kelley
2024-06-05 13:20 ` Thomas Gleixner
2024-06-05 13:45 ` Michael Kelley
2024-06-05 14:19 ` Thomas Gleixner
2024-06-06 3:14 ` Michael Kelley
2024-06-06 9:34 ` Thomas Gleixner
2024-06-06 14:34 ` Michael Kelley
2024-06-04 5:09 ` [RFC 07/12] Drivers: hv: vmbus: Set up irqdomain and irqchip for the VMBus connection mhkelley58
2024-06-04 5:09 ` [RFC 08/12] Drivers: hv: vmbus: Allocate an IRQ per channel and use for relid mapping mhkelley58
2024-06-04 5:09 ` [RFC 09/12] Drivers: hv: vmbus: Use Linux IRQs to handle VMBus channel interrupts mhkelley58
2024-06-04 5:09 ` mhkelley58 [this message]
2024-06-04 5:09 ` [RFC 11/12] Drivers: hv: vmbus: Wait for MODIFYCHANNEL to finish when offlining CPUs mhkelley58
2024-06-24 17:55 ` Boqun Feng
2024-06-24 19:32 ` Michael Kelley
2024-06-04 5:09 ` [RFC 12/12] Drivers: hv: vmbus: Ensure IRQ affinity isn't set to a CPU going offline mhkelley58
2024-09-16 18:15 ` [RFC 00/12] Hyper-V guests use Linux IRQs for channel interrupts Michael Kelley
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240604050940.859909-11-mhklinux@outlook.com \
--to=mhkelley58@gmail.com \
--cc=James.Bottomley@HansenPartnership.com \
--cc=arnd@arndb.de \
--cc=bhelgaas@google.com \
--cc=bp@alien8.de \
--cc=dave.hansen@linux.intel.com \
--cc=dawei.li@shingroup.cn \
--cc=decui@microsoft.com \
--cc=den@valinux.co.jp \
--cc=haiyangz@microsoft.com \
--cc=hpa@zytor.com \
--cc=jgowans@amazon.com \
--cc=kw@linux.com \
--cc=kys@microsoft.com \
--cc=linux-arch@vger.kernel.org \
--cc=linux-hyperv@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-pci@vger.kernel.org \
--cc=linux-scsi@vger.kernel.org \
--cc=lpieralisi@kernel.org \
--cc=martin.petersen@oracle.com \
--cc=maz@kernel.org \
--cc=mhklinux@outlook.com \
--cc=mingo@redhat.com \
--cc=robh@kernel.org \
--cc=tglx@linutronix.de \
--cc=wei.liu@kernel.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox