* [PATCH v5 1/2] drivers/hv: introduce vmbus_channel_set_cpu()
@ 2025-01-16 23:07 Hamza Mahfooz
2025-01-16 23:07 ` [PATCH v5 2/2] drivers/hv: add CPU offlining support Hamza Mahfooz
2025-01-17 3:39 ` [PATCH v5 1/2] drivers/hv: introduce vmbus_channel_set_cpu() Michael Kelley
0 siblings, 2 replies; 3+ messages in thread
From: Hamza Mahfooz @ 2025-01-16 23:07 UTC (permalink / raw)
To: linux-hyperv
Cc: Hamza Mahfooz, Boqun Feng, Michael Kelley, Wei Liu,
K. Y. Srinivasan, Haiyang Zhang, Dexuan Cui, linux-kernel
The core functionality in target_cpu_store() is also needed in a
subsequent patch for automatically changing the CPU when taking
a CPU offline. As such, factor out the body of target_cpu_store()
into new function vmbus_channel_set_cpu() that can also be used
elsewhere.
No functional change is intended.
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Michael Kelley <mhklinux@outlook.com>
Cc: Wei Liu <wei.liu@kernel.org>
Signed-off-by: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com>
---
v2: separate vmbus_channel_set_cpu() changes from
cpu offlining changes.
v3: address comments from Michael.
---
drivers/hv/vmbus_drv.c | 50 +++++++++++++++++++++++++-----------------
include/linux/hyperv.h | 1 +
2 files changed, 31 insertions(+), 20 deletions(-)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 2892b8da20a5..0ca0e85e6edd 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1611,16 +1611,16 @@ static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf)
{
return sprintf(buf, "%u\n", channel->target_cpu);
}
-static ssize_t target_cpu_store(struct vmbus_channel *channel,
- const char *buf, size_t count)
+
+int vmbus_channel_set_cpu(struct vmbus_channel *channel, u32 target_cpu)
{
- u32 target_cpu, origin_cpu;
- ssize_t ret = count;
+ u32 origin_cpu;
+ int ret = 0;
- if (vmbus_proto_version < VERSION_WIN10_V4_1)
- return -EIO;
+ lockdep_assert_cpus_held();
+ lockdep_assert_held(&vmbus_connection.channel_mutex);
- if (sscanf(buf, "%uu", &target_cpu) != 1)
+ if (vmbus_proto_version < VERSION_WIN10_V4_1)
return -EIO;
/* Validate target_cpu for the cpumask_test_cpu() operation below. */
@@ -1630,22 +1630,17 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
if (!cpumask_test_cpu(target_cpu, housekeeping_cpumask(HK_TYPE_MANAGED_IRQ)))
return -EINVAL;
- /* No CPUs should come up or down during this. */
- cpus_read_lock();
-
- if (!cpu_online(target_cpu)) {
- cpus_read_unlock();
+ if (!cpu_online(target_cpu))
return -EINVAL;
- }
/*
- * Synchronizes target_cpu_store() and channel closure:
+ * Synchronizes vmbus_channel_set_cpu() and channel closure:
*
* { Initially: state = CHANNEL_OPENED }
*
* CPU1 CPU2
*
- * [target_cpu_store()] [vmbus_disconnect_ring()]
+ * [vmbus_channel_set_cpu()] [vmbus_disconnect_ring()]
*
* LOCK channel_mutex LOCK channel_mutex
* LOAD r1 = state LOAD r2 = state
@@ -1660,7 +1655,6 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
* Note. The host processes the channel messages "sequentially", in
* the order in which they are received on a per-partition basis.
*/
- mutex_lock(&vmbus_connection.channel_mutex);
/*
* Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels;
@@ -1668,17 +1662,17 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
*/
if (channel->state != CHANNEL_OPENED_STATE) {
ret = -EIO;
- goto cpu_store_unlock;
+ goto end;
}
origin_cpu = channel->target_cpu;
if (target_cpu == origin_cpu)
- goto cpu_store_unlock;
+ goto end;
if (vmbus_send_modifychannel(channel,
hv_cpu_number_to_vp_number(target_cpu))) {
ret = -EIO;
- goto cpu_store_unlock;
+ goto end;
}
/*
@@ -1708,9 +1702,25 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
origin_cpu, target_cpu);
}
-cpu_store_unlock:
+end:
+ return ret;
+}
+
+static ssize_t target_cpu_store(struct vmbus_channel *channel,
+ const char *buf, size_t count)
+{
+ ssize_t ret = count;
+ u32 target_cpu;
+
+ if (sscanf(buf, "%uu", &target_cpu) != 1)
+ return -EIO;
+
+ cpus_read_lock();
+ mutex_lock(&vmbus_connection.channel_mutex);
+ ret = vmbus_channel_set_cpu(channel, target_cpu);
mutex_unlock(&vmbus_connection.channel_mutex);
cpus_read_unlock();
+
return ret;
}
static VMBUS_CHAN_ATTR(cpu, 0644, target_cpu_show, target_cpu_store);
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 02a226bcf0ed..25e9e982f1b0 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1670,6 +1670,7 @@ int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id,
const guid_t *shv_host_servie_id);
int vmbus_send_modifychannel(struct vmbus_channel *channel, u32 target_vp);
void vmbus_set_event(struct vmbus_channel *channel);
+int vmbus_channel_set_cpu(struct vmbus_channel *channel, u32 target_cpu);
/* Get the start of the ring buffer. */
static inline void *
--
2.47.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH v5 2/2] drivers/hv: add CPU offlining support
2025-01-16 23:07 [PATCH v5 1/2] drivers/hv: introduce vmbus_channel_set_cpu() Hamza Mahfooz
@ 2025-01-16 23:07 ` Hamza Mahfooz
2025-01-17 3:39 ` [PATCH v5 1/2] drivers/hv: introduce vmbus_channel_set_cpu() Michael Kelley
1 sibling, 0 replies; 3+ messages in thread
From: Hamza Mahfooz @ 2025-01-16 23:07 UTC (permalink / raw)
To: linux-hyperv
Cc: Hamza Mahfooz, Boqun Feng, Michael Kelley, Wei Liu,
K. Y. Srinivasan, Haiyang Zhang, Dexuan Cui, linux-kernel
Currently, it is tedious to offline CPUs in a Hyper-V VM since CPUs may
have VMBus channels attached to them that a user would have to manually
rebind elsewhere. So, as made mention of in
commit d570aec0f2154 ("Drivers: hv: vmbus: Synchronize init_vp_index()
vs. CPU hotplug"), rebind channels associated with CPUs that a user is
trying to offline to a new "randomly" selected CPU.
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Michael Kelley <mhklinux@outlook.com>
Cc: Wei Liu <wei.liu@kernel.org>
Signed-off-by: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com>
---
v2: remove cpus_read_{un,}lock() from hv_pick_new_cpu() and add
lockdep_assert_cpus_held().
v3: use for_each_cpu_wrap() in hv_pick_new_cpu().
v4: store get_random_u32_below() in start.
v5: style fixes, use target_cpu and set ret to -EBUSY by default.
---
drivers/hv/hv.c | 72 ++++++++++++++++++++++++++++++++++---------------
1 file changed, 51 insertions(+), 21 deletions(-)
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 36d9ba097ff5..fab0690b5c41 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -433,13 +433,47 @@ static bool hv_synic_event_pending(void)
return pending;
}
+static int hv_pick_new_cpu(struct vmbus_channel *channel)
+{
+ int ret = -EBUSY;
+ int start;
+ int cpu;
+
+ lockdep_assert_cpus_held();
+ lockdep_assert_held(&vmbus_connection.channel_mutex);
+
+ /*
+ * We can't assume that the relevant interrupts will be sent before
+ * the cpu is offlined on older versions of hyperv.
+ */
+ if (vmbus_proto_version < VERSION_WIN10_V5_3)
+ return -EBUSY;
+
+ start = get_random_u32_below(nr_cpu_ids);
+
+ for_each_cpu_wrap(cpu, cpu_online_mask, start) {
+ if (channel->target_cpu == cpu ||
+ channel->target_cpu == VMBUS_CONNECT_CPU)
+ continue;
+
+ ret = vmbus_channel_set_cpu(channel, cpu);
+ if (!ret)
+ break;
+ }
+
+ if (ret)
+ ret = vmbus_channel_set_cpu(channel, VMBUS_CONNECT_CPU);
+
+ return ret;
+}
+
/*
* hv_synic_cleanup - Cleanup routine for hv_synic_init().
*/
int hv_synic_cleanup(unsigned int cpu)
{
struct vmbus_channel *channel, *sc;
- bool channel_found = false;
+ int ret = 0;
if (vmbus_connection.conn_state != CONNECTED)
goto always_cleanup;
@@ -456,38 +490,34 @@ int hv_synic_cleanup(unsigned int cpu)
/*
* Search for channels which are bound to the CPU we're about to
- * cleanup. In case we find one and vmbus is still connected, we
- * fail; this will effectively prevent CPU offlining.
- *
- * TODO: Re-bind the channels to different CPUs.
+ * cleanup.
*/
mutex_lock(&vmbus_connection.channel_mutex);
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
if (channel->target_cpu == cpu) {
- channel_found = true;
- break;
+ ret = hv_pick_new_cpu(channel);
+ if (ret) {
+ mutex_unlock(&vmbus_connection.channel_mutex);
+ return ret;
+ }
}
list_for_each_entry(sc, &channel->sc_list, sc_list) {
if (sc->target_cpu == cpu) {
- channel_found = true;
- break;
+ ret = hv_pick_new_cpu(sc);
+ if (ret) {
+ mutex_unlock(&vmbus_connection.channel_mutex);
+ return ret;
+ }
}
}
- if (channel_found)
- break;
}
mutex_unlock(&vmbus_connection.channel_mutex);
- if (channel_found)
- return -EBUSY;
-
/*
- * channel_found == false means that any channels that were previously
- * assigned to the CPU have been reassigned elsewhere with a call of
- * vmbus_send_modifychannel(). Scan the event flags page looking for
- * bits that are set and waiting with a timeout for vmbus_chan_sched()
- * to process such bits. If bits are still set after this operation
- * and VMBus is connected, fail the CPU offlining operation.
+ * Scan the event flags page looking for bits that are set and waiting
+ * with a timeout for vmbus_chan_sched() to process such bits. If bits
+ * are still set after this operation and VMBus is connected, fail the
+ * CPU offlining operation.
*/
if (vmbus_proto_version >= VERSION_WIN10_V4_1 && hv_synic_event_pending())
return -EBUSY;
@@ -497,5 +527,5 @@ int hv_synic_cleanup(unsigned int cpu)
hv_synic_disable_regs(cpu);
- return 0;
+ return ret;
}
--
2.47.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* RE: [PATCH v5 1/2] drivers/hv: introduce vmbus_channel_set_cpu()
2025-01-16 23:07 [PATCH v5 1/2] drivers/hv: introduce vmbus_channel_set_cpu() Hamza Mahfooz
2025-01-16 23:07 ` [PATCH v5 2/2] drivers/hv: add CPU offlining support Hamza Mahfooz
@ 2025-01-17 3:39 ` Michael Kelley
1 sibling, 0 replies; 3+ messages in thread
From: Michael Kelley @ 2025-01-17 3:39 UTC (permalink / raw)
To: Hamza Mahfooz, linux-hyperv@vger.kernel.org
Cc: Boqun Feng, Wei Liu, K. Y. Srinivasan, Haiyang Zhang, Dexuan Cui,
linux-kernel@vger.kernel.org
From: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com> Sent: Thursday, January 16, 2025 3:07 PM
>
> The core functionality in target_cpu_store() is also needed in a
> subsequent patch for automatically changing the CPU when taking
> a CPU offline. As such, factor out the body of target_cpu_store()
> into new function vmbus_channel_set_cpu() that can also be used
> elsewhere.
>
> No functional change is intended.
>
> Cc: Boqun Feng <boqun.feng@gmail.com>
> Cc: Michael Kelley <mhklinux@outlook.com>
> Cc: Wei Liu <wei.liu@kernel.org>
> Signed-off-by: Hamza Mahfooz <hamzamahfooz@linux.microsoft.com>
> ---
> v2: separate vmbus_channel_set_cpu() changes from
> cpu offlining changes.
>
> v3: address comments from Michael.
> ---
> drivers/hv/vmbus_drv.c | 50 +++++++++++++++++++++++++-----------------
> include/linux/hyperv.h | 1 +
> 2 files changed, 31 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
> index 2892b8da20a5..0ca0e85e6edd 100644
> --- a/drivers/hv/vmbus_drv.c
> +++ b/drivers/hv/vmbus_drv.c
> @@ -1611,16 +1611,16 @@ static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf)
> {
> return sprintf(buf, "%u\n", channel->target_cpu);
> }
> -static ssize_t target_cpu_store(struct vmbus_channel *channel,
> - const char *buf, size_t count)
> +
> +int vmbus_channel_set_cpu(struct vmbus_channel *channel, u32 target_cpu)
> {
> - u32 target_cpu, origin_cpu;
> - ssize_t ret = count;
> + u32 origin_cpu;
> + int ret = 0;
>
> - if (vmbus_proto_version < VERSION_WIN10_V4_1)
> - return -EIO;
> + lockdep_assert_cpus_held();
With the previous bugs fixed, I applied both patches in this series
and did a test build. Unfortunately, lockdep_assert_cpus_held() is
not exported, and the build fails if CONFIG_HYPERV=m.
cpus_read_lock(), cpus_read_unlock(), and cpus_read_trylock() are
exported, but lockdep_assert_cpus_held() is not. I don't know why
they are inconsistent in that regard. You'll need to drop the use of
lockdep_assert_cpus_held() here and in hv_pick_new_cpu(), or
add a patch to export lockdep_assert_cpus_held(), assuming it's
valid to do so.
With any Hyper-V related changes, it's necessary to build and
test with CONFIG_HYPERV=m to catch problems like this.
> + lockdep_assert_held(&vmbus_connection.channel_mutex);
>
> - if (sscanf(buf, "%uu", &target_cpu) != 1)
> + if (vmbus_proto_version < VERSION_WIN10_V4_1)
> return -EIO;
>
> /* Validate target_cpu for the cpumask_test_cpu() operation below. */
> @@ -1630,22 +1630,17 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
> if (!cpumask_test_cpu(target_cpu, housekeeping_cpumask(HK_TYPE_MANAGED_IRQ)))
> return -EINVAL;
>
> - /* No CPUs should come up or down during this. */
> - cpus_read_lock();
> -
> - if (!cpu_online(target_cpu)) {
> - cpus_read_unlock();
> + if (!cpu_online(target_cpu))
> return -EINVAL;
> - }
>
> /*
> - * Synchronizes target_cpu_store() and channel closure:
> + * Synchronizes vmbus_channel_set_cpu() and channel closure:
> *
> * { Initially: state = CHANNEL_OPENED }
> *
> * CPU1 CPU2
> *
> - * [target_cpu_store()] [vmbus_disconnect_ring()]
> + * [vmbus_channel_set_cpu()] [vmbus_disconnect_ring()]
> *
> * LOCK channel_mutex LOCK channel_mutex
> * LOAD r1 = state LOAD r2 = state
> @@ -1660,7 +1655,6 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
> * Note. The host processes the channel messages "sequentially", in
> * the order in which they are received on a per-partition basis.
> */
> - mutex_lock(&vmbus_connection.channel_mutex);
>
> /*
> * Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels;
> @@ -1668,17 +1662,17 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
> */
> if (channel->state != CHANNEL_OPENED_STATE) {
> ret = -EIO;
> - goto cpu_store_unlock;
> + goto end;
> }
>
> origin_cpu = channel->target_cpu;
> if (target_cpu == origin_cpu)
> - goto cpu_store_unlock;
> + goto end;
>
> if (vmbus_send_modifychannel(channel,
> hv_cpu_number_to_vp_number(target_cpu))) {
> ret = -EIO;
> - goto cpu_store_unlock;
> + goto end;
> }
>
> /*
> @@ -1708,9 +1702,25 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
> origin_cpu, target_cpu);
> }
>
> -cpu_store_unlock:
> +end:
> + return ret;
> +}
> +
> +static ssize_t target_cpu_store(struct vmbus_channel *channel,
> + const char *buf, size_t count)
> +{
> + ssize_t ret = count;
> + u32 target_cpu;
> +
> + if (sscanf(buf, "%uu", &target_cpu) != 1)
> + return -EIO;
> +
> + cpus_read_lock();
> + mutex_lock(&vmbus_connection.channel_mutex);
> + ret = vmbus_channel_set_cpu(channel, target_cpu);
There's a problem here that I didn't previously see. Like all other
functions for setting a sysfs attribute, this function should return
the number of bytes processed, or a negative errno value. That's
why "ret" is initialized to "count". But vmbus_channel_set_cpu()
sets "ret" to zero if it succeeds, and the wrong value will be
returned. The original code correctly returned "count".
This problem would have been evident with a simple test of
writing to /sys/bus/vmbus/devices/<guid>/channels/<nn>/cpu.
Michael
> mutex_unlock(&vmbus_connection.channel_mutex);
> cpus_read_unlock();
> +
> return ret;
> }
> static VMBUS_CHAN_ATTR(cpu, 0644, target_cpu_show, target_cpu_store);
> diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
> index 02a226bcf0ed..25e9e982f1b0 100644
> --- a/include/linux/hyperv.h
> +++ b/include/linux/hyperv.h
> @@ -1670,6 +1670,7 @@ int vmbus_send_tl_connect_request(const guid_t
> *shv_guest_servie_id,
> const guid_t *shv_host_servie_id);
> int vmbus_send_modifychannel(struct vmbus_channel *channel, u32 target_vp);
> void vmbus_set_event(struct vmbus_channel *channel);
> +int vmbus_channel_set_cpu(struct vmbus_channel *channel, u32 target_cpu);
>
> /* Get the start of the ring buffer. */
> static inline void *
> --
> 2.47.1
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2025-01-17 3:39 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-01-16 23:07 [PATCH v5 1/2] drivers/hv: introduce vmbus_channel_set_cpu() Hamza Mahfooz
2025-01-16 23:07 ` [PATCH v5 2/2] drivers/hv: add CPU offlining support Hamza Mahfooz
2025-01-17 3:39 ` [PATCH v5 1/2] drivers/hv: introduce vmbus_channel_set_cpu() Michael Kelley
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox