Linux-HyperV List

Linux-HyperV List
 help / color / mirror / Atom feed

* [PATCH v4 11/12] Drivers: hv: vmbus: Suspend after cleaning up hv_sock and sub channels
From: Dexuan Cui @ 2019-09-03  0:23 UTC (permalink / raw)
  To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
	Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
	KY Srinivasan, Michael Kelley, tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1567470139-119355-1-git-send-email-decui@microsoft.com>

Before suspend, Linux must make sure all the hv_sock channels have been
properly cleaned up, because a hv_sock connection can not persist across
hibernation, and the user-space app must be properly notified of the
state change of the connection.

Before suspend, Linux also must make sure all the sub-channels have been
destroyed, i.e. the related channel structs of the sub-channels must be
properly removed, otherwise they would cause a conflict when the
sub-channels are recreated upon resume.

Add a counter to track such channels, and vmbus_bus_suspend() should wait
for the counter to drop to zero.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
 drivers/hv/channel_mgmt.c | 26 ++++++++++++++++++++++++++
 drivers/hv/connection.c   |  3 +++
 drivers/hv/hyperv_vmbus.h | 12 ++++++++++++
 drivers/hv/vmbus_drv.c    | 44 +++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 44b92fa..5518d03 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -545,6 +545,10 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 
 	mutex_lock(&vmbus_connection.channel_mutex);
 
+	/* Remember the channels that should be cleaned up upon suspend. */
+	if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
+		atomic_inc(&vmbus_connection.nr_chan_close_on_suspend);
+
 	/*
 	 * Now that we have acquired the channel_mutex,
 	 * we can release the potentially racing rescind thread.
@@ -944,6 +948,16 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 	vmbus_process_offer(newchannel);
 }
 
+static void check_ready_for_suspend_event(void)
+{
+	/*
+	 * If all the sub-channels or hv_sock channels have been cleaned up,
+	 * then it's safe to suspend.
+	 */
+	if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend))
+		complete(&vmbus_connection.ready_for_suspend_event);
+}
+
 /*
  * vmbus_onoffer_rescind - Rescind offer handler.
  *
@@ -954,6 +968,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 	struct vmbus_channel_rescind_offer *rescind;
 	struct vmbus_channel *channel;
 	struct device *dev;
+	bool clean_up_chan_for_suspend;
 
 	rescind = (struct vmbus_channel_rescind_offer *)hdr;
 
@@ -993,6 +1008,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 		return;
 	}
 
+	clean_up_chan_for_suspend = is_hvsock_channel(channel) ||
+				    is_sub_channel(channel);
 	/*
 	 * Before setting channel->rescind in vmbus_rescind_cleanup(), we
 	 * should make sure the channel callback is not running any more.
@@ -1018,6 +1035,10 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 	if (channel->device_obj) {
 		if (channel->chn_rescind_callback) {
 			channel->chn_rescind_callback(channel);
+
+			if (clean_up_chan_for_suspend)
+				check_ready_for_suspend_event();
+
 			return;
 		}
 		/*
@@ -1050,6 +1071,11 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 		}
 		mutex_unlock(&vmbus_connection.channel_mutex);
 	}
+
+	/* The "channel" may have been freed. Do not access it any longer. */
+
+	if (clean_up_chan_for_suspend)
+		check_ready_for_suspend_event();
 }
 
 void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 806319c..99851ea 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -26,6 +26,9 @@
 struct vmbus_connection vmbus_connection = {
 	.conn_state		= DISCONNECTED,
 	.next_gpadl_handle	= ATOMIC_INIT(0xE1E10),
+
+	.ready_for_suspend_event= COMPLETION_INITIALIZER(
+				  vmbus_connection.ready_for_suspend_event),
 };
 EXPORT_SYMBOL_GPL(vmbus_connection);
 
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 613888e..eedbe59 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -258,6 +258,18 @@ struct vmbus_connection {
 	struct workqueue_struct *work_queue;
 	struct workqueue_struct *handle_primary_chan_wq;
 	struct workqueue_struct *handle_sub_chan_wq;
+
+	/*
+	 * The number of sub-channels and hv_sock channels that should be
+	 * cleaned up upon suspend: sub-channels will be re-created upon
+	 * resume, and hv_sock channels should not survive suspend.
+	 */
+	atomic_t nr_chan_close_on_suspend;
+	/*
+	 * vmbus_bus_suspend() waits for "nr_chan_close_on_suspend" to
+	 * drop to zero.
+	 */
+	struct completion ready_for_suspend_event;
 };
 
 
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 45b976e..32ec951 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2127,7 +2127,8 @@ static int vmbus_acpi_add(struct acpi_device *device)
 
 static int vmbus_bus_suspend(struct device *dev)
 {
-	struct vmbus_channel *channel;
+	struct vmbus_channel *channel, *sc;
+	unsigned long flags;
 
 	while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
 		/*
@@ -2146,6 +2147,44 @@ static int vmbus_bus_suspend(struct device *dev)
 	}
 	mutex_unlock(&vmbus_connection.channel_mutex);
 
+	/*
+	 * Wait until all the sub-channels and hv_sock channels have been
+	 * cleaned up. Sub-channels should be destroyed upon suspend, otherwise
+	 * they would conflict with the new sub-channels that will be created
+	 * in the resume path. hv_sock channels should also be destroyed, but
+	 * a hv_sock channel of an established hv_sock connection can not be
+	 * really destroyed since it may still be referenced by the userspace
+	 * application, so we just force the hv_sock channel to be rescinded
+	 * by vmbus_force_channel_rescinded(), and the userspace application
+	 * will thoroughly destroy the channel after hibernation.
+	 *
+	 * Note: the counter nr_chan_close_on_suspend may never go above 0 if
+	 * the VM has no sub-channel and hv_sock channel, e.g. a 1-vCPU VM.
+	 */
+	if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0)
+		wait_for_completion(&vmbus_connection.ready_for_suspend_event);
+
+	mutex_lock(&vmbus_connection.channel_mutex);
+
+	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+		if (is_hvsock_channel(channel)) {
+			if (!channel->rescind) {
+				pr_err("hv_sock channel not rescinded!\n");
+				WARN_ON_ONCE(1);
+			}
+			continue;
+		}
+
+		spin_lock_irqsave(&channel->lock, flags);
+		list_for_each_entry(sc, &channel->sc_list, sc_list) {
+			pr_err("Sub-channel not deleted!\n");
+			WARN_ON_ONCE(1);
+		}
+		spin_unlock_irqrestore(&channel->lock, flags);
+	}
+
+	mutex_unlock(&vmbus_connection.channel_mutex);
+
 	vmbus_initiate_unload(false);
 
 	vmbus_connection.conn_state = DISCONNECTED;
@@ -2186,6 +2225,9 @@ static int vmbus_bus_resume(struct device *dev)
 
 	vmbus_request_offers();
 
+	/* Reset the event for the next suspend. */
+	reinit_completion(&vmbus_connection.ready_for_suspend_event);
+
 	return 0;
 }
 
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH v4 10/12] Drivers: hv: vmbus: Clean up hv_sock channels by force upon suspend
From: Dexuan Cui @ 2019-09-03  0:23 UTC (permalink / raw)
  To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
	Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
	KY Srinivasan, Michael Kelley, tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1567470139-119355-1-git-send-email-decui@microsoft.com>

Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for
hibernation. There is no better method to clean up the channels since
some of the channels may still be referenced by the userspace apps when
hibernation is triggered: in this case, with this patch, the "rescind"
fields of the channels are set, and the apps will thoroughly destroy
the channels after hibernation.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
---
 drivers/hv/vmbus_drv.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index ce9974b..45b976e 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -24,6 +24,7 @@
 #include <linux/sched/task_stack.h>
 
 #include <asm/mshyperv.h>
+#include <linux/delay.h>
 #include <linux/notifier.h>
 #include <linux/ptrace.h>
 #include <linux/screen_info.h>
@@ -1069,6 +1070,41 @@ void vmbus_on_msg_dpc(unsigned long data)
 	vmbus_signal_eom(msg, message_type);
 }
 
+/*
+ * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for
+ * hibernation, because hv_sock connections can not persist across hibernation.
+ */
+static void vmbus_force_channel_rescinded(struct vmbus_channel *channel)
+{
+	struct onmessage_work_context *ctx;
+	struct vmbus_channel_rescind_offer *rescind;
+
+	WARN_ON(!is_hvsock_channel(channel));
+
+	/*
+	 * sizeof(*ctx) is small and the allocation should really not fail,
+	 * otherwise the state of the hv_sock connections ends up in limbo.
+	 */
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL | __GFP_NOFAIL);
+
+	/*
+	 * So far, these are not really used by Linux. Just set them to the
+	 * reasonable values conforming to the definitions of the fields.
+	 */
+	ctx->msg.header.message_type = 1;
+	ctx->msg.header.payload_size = sizeof(*rescind);
+
+	/* These values are actually used by Linux. */
+	rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.u.payload;
+	rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER;
+	rescind->child_relid = channel->offermsg.child_relid;
+
+	INIT_WORK(&ctx->work, vmbus_onmessage_work);
+
+	queue_work_on(vmbus_connection.connect_cpu,
+		      vmbus_connection.work_queue,
+		      &ctx->work);
+}
 
 /*
  * Direct callback for channels using other deferred processing
@@ -2091,6 +2127,25 @@ static int vmbus_acpi_add(struct acpi_device *device)
 
 static int vmbus_bus_suspend(struct device *dev)
 {
+	struct vmbus_channel *channel;
+
+	while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
+		/*
+		 * We wait here until the completion of any channel
+		 * offers that are currently in progress.
+		 */
+		msleep(1);
+	}
+
+	mutex_lock(&vmbus_connection.channel_mutex);
+	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+		if (!is_hvsock_channel(channel))
+			continue;
+
+		vmbus_force_channel_rescinded(channel);
+	}
+	mutex_unlock(&vmbus_connection.channel_mutex);
+
 	vmbus_initiate_unload(false);
 
 	vmbus_connection.conn_state = DISCONNECTED;
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH v4 08/12] Drivers: hv: vmbus: Ignore the offers when resuming from hibernation
From: Dexuan Cui @ 2019-09-03  0:23 UTC (permalink / raw)
  To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
	Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
	KY Srinivasan, Michael Kelley, tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1567470139-119355-1-git-send-email-decui@microsoft.com>

When the VM resumes, the host re-sends the offers. We should not add the
offers to the global vmbus_connection.chn_list again.

This patch assumes the RELIDs of the channels don't change across
hibernation. Actually this is not always true, especially in the case of
NIC SR-IOV the VF vmbus device's RELID sometimes can change. A later patch
will address this issue by mapping the new offers to the old channels and
fixing up the old channels, if necessary.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
 drivers/hv/channel_mgmt.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index addcef5..44b92fa 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -848,18 +848,74 @@ void vmbus_initiate_unload(bool crash)
 }
 
 /*
+ * find_primary_channel_by_offer - Get the channel object given the new offer.
+ * This is only used in the resume path of hibernation.
+ */
+static struct vmbus_channel *
+find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer)
+{
+	struct vmbus_channel *channel = NULL, *iter;
+	const guid_t *inst1, *inst2;
+
+	/* Ignore sub-channel offers. */
+	if (offer->offer.sub_channel_index != 0)
+		return NULL;
+
+	mutex_lock(&vmbus_connection.channel_mutex);
+
+	list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) {
+		inst1 = &iter->offermsg.offer.if_instance;
+		inst2 = &offer->offer.if_instance;
+
+		if (guid_equal(inst1, inst2)) {
+			channel = iter;
+			break;
+		}
+	}
+
+	mutex_unlock(&vmbus_connection.channel_mutex);
+
+	return channel;
+}
+
+/*
  * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
  *
  */
 static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 {
 	struct vmbus_channel_offer_channel *offer;
-	struct vmbus_channel *newchannel;
+	struct vmbus_channel *oldchannel, *newchannel;
+	size_t offer_sz;
 
 	offer = (struct vmbus_channel_offer_channel *)hdr;
 
 	trace_vmbus_onoffer(offer);
 
+	oldchannel = find_primary_channel_by_offer(offer);
+
+	if (oldchannel != NULL) {
+		atomic_dec(&vmbus_connection.offer_in_progress);
+
+		/*
+		 * We're resuming from hibernation: we expect the host to send
+		 * exactly the same offers that we had before the hibernation.
+		 */
+		offer_sz = sizeof(*offer);
+		if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0)
+			return;
+
+		pr_debug("Mismatched offer from the host (relid=%d)\n",
+			 offer->child_relid);
+
+		print_hex_dump_debug("Old vmbus offer: ", DUMP_PREFIX_OFFSET,
+				     16, 4, &oldchannel->offermsg, offer_sz,
+				     false);
+		print_hex_dump_debug("New vmbus offer: ", DUMP_PREFIX_OFFSET,
+				     16, 4, offer, offer_sz, false);
+		return;
+	}
+
 	/* Allocate the channel object and save this offer. */
 	newchannel = alloc_channel();
 	if (!newchannel) {
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH v4 07/12] Drivers: hv: vmbus: Implement suspend/resume for VSC drivers for hibernation
From: Dexuan Cui @ 2019-09-03  0:23 UTC (permalink / raw)
  To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
	Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
	KY Srinivasan, Michael Kelley, tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1567470139-119355-1-git-send-email-decui@microsoft.com>

The high-level VSC drivers will implement device-specific callbacks.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
---
 drivers/hv/vmbus_drv.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/hyperv.h |  3 +++
 2 files changed, 49 insertions(+)

diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 2ef375c..a30c70a 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -911,6 +911,43 @@ static void vmbus_shutdown(struct device *child_device)
 		drv->shutdown(dev);
 }
 
+/*
+ * vmbus_suspend - Suspend a vmbus device
+ */
+static int vmbus_suspend(struct device *child_device)
+{
+	struct hv_driver *drv;
+	struct hv_device *dev = device_to_hv_device(child_device);
+
+	/* The device may not be attached yet */
+	if (!child_device->driver)
+		return 0;
+
+	drv = drv_to_hv_drv(child_device->driver);
+	if (!drv->suspend)
+		return -EOPNOTSUPP;
+
+	return drv->suspend(dev);
+}
+
+/*
+ * vmbus_resume - Resume a vmbus device
+ */
+static int vmbus_resume(struct device *child_device)
+{
+	struct hv_driver *drv;
+	struct hv_device *dev = device_to_hv_device(child_device);
+
+	/* The device may not be attached yet */
+	if (!child_device->driver)
+		return 0;
+
+	drv = drv_to_hv_drv(child_device->driver);
+	if (!drv->resume)
+		return -EOPNOTSUPP;
+
+	return drv->resume(dev);
+}
 
 /*
  * vmbus_device_release - Final callback release of the vmbus child device
@@ -926,6 +963,14 @@ static void vmbus_device_release(struct device *device)
 	kfree(hv_dev);
 }
 
+/*
+ * Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than
+ * SET_SYSTEM_SLEEP_PM_OPS: see the comment before vmbus_bus_pm.
+ */
+static const struct dev_pm_ops vmbus_pm = {
+	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_suspend, vmbus_resume)
+};
+
 /* The one and only one */
 static struct bus_type  hv_bus = {
 	.name =		"vmbus",
@@ -936,6 +981,7 @@ static void vmbus_device_release(struct device *device)
 	.uevent =		vmbus_uevent,
 	.dev_groups =		vmbus_dev_groups,
 	.drv_groups =		vmbus_drv_groups,
+	.pm =			&vmbus_pm,
 };
 
 struct onmessage_work_context {
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 2d39248..8a60e77 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1157,6 +1157,9 @@ struct hv_driver {
 	int (*remove)(struct hv_device *);
 	void (*shutdown)(struct hv_device *);
 
+	int (*suspend)(struct hv_device *);
+	int (*resume)(struct hv_device *);
+
 };
 
 /* Base device object */
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH v4 00/12] Enhance the hv_vmbus driver to support hibernation
From: Dexuan Cui @ 2019-09-03  0:23 UTC (permalink / raw)
  To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
	Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
	KY Srinivasan, Michael Kelley, tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org, Dexuan Cui

Hi all,
The patchset is to enhance hv_vmbus to support hibernation when Linux VM
runs on Hyper-V. A second patchset to enhance the high-level VSC drivers
(hv_netvsc, hv_storvsc, etc.) for hibernation will be posted after this
patchset is acceped. If you want to test this hibernation feaure, all the
needed patches can be found on my github branch:
https://github.com/dcui/linux/commits/decui/hibernation/2019-0830/v4/v5.3-rc6-plus

This patchset is based on v5.3-rc6, and can also cleanly apply to -rc7.

Please review.

Hi tglx,
I hope all the 12 patchset, including the below 3 patches,

[PATCH v4 01/12] x86/hyper-v: Suspend/resume the hypercall page for hibernation
[PATCH v4 02/12] x86/hyper-v: Implement hv_is_hibernation_supported()
[PATCH v4 03/12] clocksource/drivers: Suspend/resume Hyper-V

can go through Sasha's hyperv/linux.git tree, since all the changes belong
to the hv stuff. However, if you think it's better for these 3 patches to go
through your tip.git tree, it also works for me.

Hi Michael,
I added your Reviewed-by's for patch 1, 3~7, 9~10, since you have reviewed
these patches. Please review the v4 of the others: 2, 8, 11 and 12.

Thanks,
Dexuan

Changes in v4:
  Patch 2: Enhanced the changelog.

  Patch 8: Moved find_primary_channel_by_offer() to channel_mgmt.c, where
           it's used.

  Patch 10: Improved the changelog (typo) and comment.

  Patch 11: Removed WARN_ON_ONCE(), added a comment for the case
            nr_chan_close_on_suspend == 0.

  Patch 12: Improved the comment.

Changes in v3:
  Patch 2: Add a new API hv_is_hibernation_supported().

  Patch 6: Add a new helper function is_sub_channel().

  Patch 8: Find the old channels via Instance GUID rather than the RELID.

  Patch 10: Add code to clean up hv_sock channels by force

  Patch 11: Add code to wait in the suspend path: all the hv_sock channels
            and sub-channels should be cleaned up first before Linux sends
            the VMBUS UNLOAD message.

  Patch 12: Add code to fix up the old primary channels before further
            resuming.

Changes in v2:
  Patch 3: Improved the changelog and added a comment.

  Patch 4: Remove the "hv_stimer_cleanup" in hv_synic_suspend(), because I
           suppose https://lkml.org/lkml/2019/7/27/5 will be accepted. Also
           improved changelog and the comment.

  Patch 5: Fixed the third argument of print_hex_dump_debug(). Also improved
           the changelog.

  Patch 6: Improved the changelog and the comment. Added a check for the
           'vmbus_proto_version' in vmbus_bus_resume().

Dexuan Cui (12):
  x86/hyper-v: Suspend/resume the hypercall page for hibernation
  x86/hyper-v: Implement hv_is_hibernation_supported()
  clocksource/drivers: Suspend/resume Hyper-V clocksource for
    hibernation
  Drivers: hv: vmbus: Break out synic enable and disable operations
  Drivers: hv: vmbus: Suspend/resume the synic for hibernation
  Drivers: hv: vmbus: Add a helper function is_sub_channel()
  Drivers: hv: vmbus: Implement suspend/resume for VSC drivers for
    hibernation
  Drivers: hv: vmbus: Ignore the offers when resuming from hibernation
  Drivers: hv: vmbus: Suspend/resume the vmbus itself for hibernation
  Drivers: hv: vmbus: Clean up hv_sock channels by force upon suspend
  Drivers: hv: vmbus: Suspend after cleaning up hv_sock and sub channels
  Drivers: hv: vmbus: Resume after fixing up old primary channels

 arch/x86/hyperv/hv_init.c          |  41 ++++++
 drivers/clocksource/hyperv_timer.c |  25 ++++
 drivers/hv/channel_mgmt.c          | 161 +++++++++++++++++++---
 drivers/hv/connection.c            |   8 +-
 drivers/hv/hv.c                    |  66 +++++----
 drivers/hv/hyperv_vmbus.h          |  30 +++++
 drivers/hv/vmbus_drv.c             | 265 +++++++++++++++++++++++++++++++++++++
 include/asm-generic/mshyperv.h     |   2 +
 include/linux/hyperv.h             |  16 ++-
 9 files changed, 565 insertions(+), 49 deletions(-)

-- 
1.8.3.1

^ permalink raw reply

* [PATCH] irqdomain: Add the missing assignment of domain->fwnode for named fwnode
From: Dexuan Cui @ 2019-09-02 23:14 UTC (permalink / raw)
  To: Marc Zyngier, Thomas Gleixner
  Cc: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, Sasha Levin,
	Lorenzo Pieralisi, Bjorn Helgaas, linux-kernel@vger.kernel.org,
	linux-hyperv@vger.kernel.org, Michael Kelley,
	Lili Deng (Wicresoft North America Ltd)

Recently device pass-through stops working for Linux VM running on Hyper-V.

git-bisect shows the regression is caused by the recent commit
467a3bb97432 ("PCI: hv: Allocate a named fwnode ..."), but the root cause
is that the commit d59f6617eef0 forgets to set the domain->fwnode for
IRQCHIP_FWNODE_NAMED*, and as a result:

1. The domain->fwnode remains to be NULL.

2. irq_find_matching_fwspec() returns NULL since "h->fwnode == fwnode" is
false, and pci_set_bus_msi_domain() sets the Hyper-V PCI root bus's
msi_domain to NULL.

3. When the device is added onto the root bus, the device's dev->msi_domain
is set to NULL in pci_set_msi_domain().

4. When a device driver tries to enable MSI-X, pci_msi_setup_msi_irqs()
calls arch_setup_msi_irqs(), which uses the native MSI chip (i.e.
arch/x86/kernel/apic/msi.c: pci_msi_controller) to set up the irqs, but
actually pci_msi_setup_msi_irqs() is supposed to call
msi_domain_alloc_irqs() with the hbus->irq_domain, which is created in
hv_pcie_init_irq_domain() and is associated with the Hyper-V chip
hv_msi_irq_chip. Consequently, the irq line is not properly set up, and
the device driver can not receive any interrupt.

Fixes: d59f6617eef0 ("genirq: Allow fwnode to carry name information only")
Fixes: 467a3bb97432 ("PCI: hv: Allocate a named fwnode instead of an address-based one")
Reported-by: Lili Deng <v-lide@microsoft.com>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
---

Note: the commit 467a3bb97432 ("PCI: hv: Allocate a named fwnode ...") has not
gone in Linus's tree yet (the commit is in linux-next for a while), so the commit ID
in the changelog can change when it goes in Linus's tree.

This patch works in my test, but I'm not 100% sure this is the right fix. 

Looking forward to your comment!

 kernel/irq/irqdomain.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index e7bbab149750..132672b74e4b 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -149,6 +149,7 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
 		switch (fwid->type) {
 		case IRQCHIP_FWNODE_NAMED:
 		case IRQCHIP_FWNODE_NAMED_ID:
+			domain->fwnode = fwnode;
 			domain->name = kstrdup(fwid->name, GFP_KERNEL);
 			if (!domain->name) {
 				kfree(domain);
-- 
2.19.1

^ permalink raw reply related

* RE: [PATCH v4 1/2] drivers: hv: vmbus: Introduce latency testing
From: Michael Kelley @ 2019-09-02 18:31 UTC (permalink / raw)
  To: brandonbonaby94, KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
	sashal@kernel.org
  Cc: brandonbonaby94, linux-hyperv@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <da1ab5c98ce902ec181a799d8d1f040e8cc39af6.1567047549.git.brandonbonaby94@gmail.com>

From: Branden Bonaby <brandonbonaby94@gmail.com> Sent: Wednesday, August 28, 2019 9:24 PM
> 
> Introduce user specified latency in the packet reception path
> By exposing the test parameters as part of the debugfs channel
> attributes. We will control the testing state via these attributes.
> 
> Signed-off-by: Branden Bonaby <brandonbonaby94@gmail.com>
> ---
> diff --git a/Documentation/ABI/testing/debugfs-hyperv
> b/Documentation/ABI/testing/debugfs-hyperv
> new file mode 100644
> index 000000000000..0903e6427a2d
> --- /dev/null
> +++ b/Documentation/ABI/testing/debugfs-hyperv
> @@ -0,0 +1,23 @@
> +What:           /sys/kernel/debug/hyperv/<UUID>/fuzz_test_state
> +Date:           August 2019
> +KernelVersion:  5.3

Given the way the timing works for adding code to the Linux kernel,
the earliest your new code will be included is 5.4.  The merge window
for 5.4 will open in two weeks, so your code would need to be accepted
by then to be included in 5.4.  Otherwise, it won't make it until the next
merge window, which would be for 5.5.  I would suggest changing this
(and the others below) to 5.4 for now, but you might have to change to
5.5 if additional revisions are needed.

> +Contact:        Branden Bonaby <brandonbonaby94@gmail.com>
> +Description:    Fuzz testing status of a vmbus device, whether its in an ON
> +                state or a OFF state
> +Users:          Debugging tools
> +
> +What:           /sys/kernel/debug/hyperv/<UUID>/delay/fuzz_test_buffer_interrupt_delay
> +Date:           August 2019
> +KernelVersion:  5.3
> +Contact:        Branden Bonaby <brandonbonaby94@gmail.com>
> +Description:    Fuzz testing buffer interrupt delay value between 0 - 1000
> +		 microseconds (inclusive).
> +Users:          Debugging tools
> +
> +What:           /sys/kernel/debug/hyperv/<UUID>/delay/fuzz_test_message_delay
> +Date:           August 2019
> +KernelVersion:  5.3
> +Contact:        Branden Bonaby <brandonbonaby94@gmail.com>
> +Description:    Fuzz testing message delay value between 0 - 1000 microseconds
> +		 (inclusive).
> +Users:          Debugging tools
> diff --git a/MAINTAINERS b/MAINTAINERS
> index b1bc9c87838b..6931a4eeaac0 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -7460,6 +7460,7 @@ F:	include/uapi/linux/hyperv.h
>  F:	include/asm-generic/mshyperv.h
>  F:	tools/hv/
>  F:	Documentation/ABI/stable/sysfs-bus-vmbus
> +F:	Documentation/ABI/testing/debugfs-hyperv
> 
>  HYPERBUS SUPPORT
>  M:	Vignesh Raghavendra <vigneshr@ti.com>
> diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
> index 9a59957922d4..d97437ba0626 100644
> --- a/drivers/hv/Kconfig
> +++ b/drivers/hv/Kconfig
> @@ -29,4 +29,11 @@ config HYPERV_BALLOON
>  	help
>  	  Select this option to enable Hyper-V Balloon driver.
> 
> +config HYPERV_TESTING
> +        bool "Hyper-V testing"
> +        default n
> +        depends on HYPERV && DEBUG_FS
> +        help
> +          Select this option to enable Hyper-V vmbus testing.
> +
>  endmenu
> diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile
> index a1eec7177c2d..d754bd86ca47 100644
> --- a/drivers/hv/Makefile
> +++ b/drivers/hv/Makefile
> @@ -2,6 +2,7 @@
>  obj-$(CONFIG_HYPERV)		+= hv_vmbus.o
>  obj-$(CONFIG_HYPERV_UTILS)	+= hv_utils.o
>  obj-$(CONFIG_HYPERV_BALLOON)	+= hv_balloon.o
> +obj-$(CONFIG_HYPERV_TESTING)	+= hv_debugfs.o

There's an additional complexity here that I failed to describe to
you in my previous comments.  :-(    The above line makes the
hv_debugfs code part of the main Linux OS image -- i.e., the
vmlinuz file that gets installed into /boot, such that if hv_debugfs
is built, it is always loaded into the memory of the running system.
That's OK, but we should consider the alternative, which is to
make the hv_debugfs code part of the hv_vmbus module that
is specified a bit further down in the same Makefile.   A module
is installed into /lib/modules/<kernel version>/kernel, and
is only loaded into memory on the running system if something
actually references code in the module.  This approach helps avoid
loading code into memory that isn't actually used.

Whether code is built as a separate module or is just built into
the main vmlinuz file is also controlled by the .config file setting.
For example, CONFIG_HYPERV=m says to treat hv_vmbus as a
separate module, while CONFIG_HYPERV=y says to build the
hv_vmbus module directly into the vmlinuz file even though the
Makefile constructs it as a module.

Making hv_debugfs part of the hv_vmbus module is generally better
than just building it into the main vmlinuz because it's best to include
only things that must always be present in the main vmlinuz file.
hv_debugfs doesn't have any reason it needs to always be present.
By comparison, hv_balloon is included in the main vmlinuz, which
might be due to it dealing with memory mgmt and needing to be
in the vmlinuz image, but I'm not sure.

You can make hv_debugfs part of the hv_vmbus module with this line
placed after the line specifying hv_vmbus_y, instead of the line you
currently have:

hv_vmbus-$(CONFIG_HYPERV_TESTING)       += hv_debugfs.o

> +
> +static int hv_debugfs_delay_set(void *data, u64 val)
> +{
> +	int ret = 0;
> +
> +	if (val >= 1 && val <= 1000)
> +		*(u32 *)data = val;
> +	else if (val == 0)
> +		*(u32 *)data = 0;

I think this "else if" clause can be folded into the first
"if" statement by just checking "val >= 0".

> +	else
> +		ret = -EINVAL;
> +
> +	return ret;
> +}
> +

> +/* Remove dentry associated with released hv device */
> +void hv_debug_rm_dev_dir(struct hv_device *dev)
> +{
> +	if (!IS_ERR(hv_debug_root))
> +		debugfs_remove_recursive(dev->debug_dir);
> +}

This function is the first of five functions that are called by
code outside of hv_debugfs.c.   You probably saw the separate
email from the kbuild test robot showing a build error on each
of these five functions.  Here's why.

When CONFIG_HYPERV=m is set, and hv_vmbus is built as a
module, there are references to the five functions from the
module to your hv_debugfs that is built as core code in
vmlinuz.  By default, Linux does not allow such core code to
be called by modules.   Core code must add a statement like:

EXPORT_SYMBOL_GPL(hv_debug_rm_dev_dir);

to allow the module to call it.   This gives the code writer
a very minimal amount of scope control.  However, if you build 
hv_debugfs as part of the module hv_vmbus, and the only
references to the five functions are within the module hv_vmbus,
then the EXPORT statements aren't needed because all
references are internal to the hv_vmbus module.  But if
you envision a function like hv_debug_delay_test() being
called by other Hyper-V drivers that are outside the
hv_vmbus module, then you will need the EXPORT statement
at least for that function.

You probably have your .config file setup with
CONFIG_HYPERV=y.  In that case, the hv_vmbus module is
built-in to the kernel, so you didn't get the errors reported
by the kbuild test robot.  When testing new code, it's a
good practice to build with the HYPERV settings set to
'm' to make sure that any issues with module references
get flushed out and fixed.

Michael

^ permalink raw reply

* RE: [PATCH V2] x86/Hyper-V: Fix overflow issue in the fill_gva_list()
From: Michael Kelley @ 2019-09-02 14:40 UTC (permalink / raw)
  To: lantianyu1986@gmail.com, KY Srinivasan, Haiyang Zhang,
	Stephen Hemminger, sashal@kernel.org, tglx@linutronix.de,
	mingo@redhat.com, bp@alien8.de, hpa@zytor.com, x86@kernel.org,
	alex.williamson@redhat.com, cohuck@redhat.com
  Cc: Tianyu Lan, linux-hyperv@vger.kernel.org,
	linux-kernel@vger.kernel.org, kvm@vger.kernel.org
In-Reply-To: <20190902124143.119478-1-Tianyu.Lan@microsoft.com>

From: Tianyu Lan <Tianyu.Lan@microsoft.com> Sent: Monday, September 2, 2019 5:42 AM
> 
> When the 'start' parameter is >=  0xFF000000 on 32-bit
> systems, or >= 0xFFFFFFFF'FF000000 on 64-bit systems,
> fill_gva_list gets into an infinite loop.  With such inputs,
> 'cur' overflows after adding HV_TLB_FLUSH_UNIT and always
> compares as less than end.  Memory is filled with guest virtual
> addresses until the system crashes
> 
> Fix this by never incrementing 'cur' to be larger than 'end'.
> 
> Reported-by: Jong Hyun Park <park.jonghyun@yonsei.ac.kr>
> Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
> Fixes: 2ffd9e33ce4a ("x86/hyper-v: Use hypercall for remote TLB flush")
> ---
> Change since v1:
>      - Simply the commit message
> 
>  arch/x86/hyperv/mmu.c | 8 +++++---
>  1 file changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
> index e65d7fe6489f..5208ba49c89a 100644
> --- a/arch/x86/hyperv/mmu.c
> +++ b/arch/x86/hyperv/mmu.c
> @@ -37,12 +37,14 @@ static inline int fill_gva_list(u64 gva_list[], int offset,
>  		 * Lower 12 bits encode the number of additional
>  		 * pages to flush (in addition to the 'cur' page).
>  		 */
> -		if (diff >= HV_TLB_FLUSH_UNIT)
> +		if (diff >= HV_TLB_FLUSH_UNIT) {
>  			gva_list[gva_n] |= ~PAGE_MASK;
> -		else if (diff)
> +			cur += HV_TLB_FLUSH_UNIT;
> +		}  else if (diff) {
>  			gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
> +			cur = end;
> +		}
> 
> -		cur += HV_TLB_FLUSH_UNIT;
>  		gva_n++;
> 
>  	} while (cur < end);
> --
> 2.14.5

Reviewed-by: Michael Kelley <mikelley@microsoft.com>


^ permalink raw reply

* Re: [PATCH] x86/Hyper-V: Fix reference of pv_ops with CONFIG_PARAVIRT=N
From: Wei Liu @ 2019-09-02 14:27 UTC (permalink / raw)
  To: lantianyu1986
  Cc: kys, haiyangz, sthemmin, sashal, tglx, mingo, bp, hpa, x86,
	michael.h.kelley, Tianyu Lan, linux-hyperv, linux-kernel, Wei Liu
In-Reply-To: <20190828080747.204419-1-Tianyu.Lan@microsoft.com>

On Wed, Aug 28, 2019 at 04:07:47PM +0800, lantianyu1986@gmail.com wrote:
> From: Tianyu Lan <Tianyu.Lan@microsoft.com>
> 
> hv_setup_sched_clock() references pv_ops and this should
> be under CONFIG_PARAVIRT=Y. Fix it.
> 
> Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>

Reviewed-by: Wei Liu <wei.liu@kernel.org>

^ permalink raw reply

* Re: [PATCH] x86/Hyper-V: Fix overflow issue in the fill_gva_list()
From: Tianyu Lan @ 2019-09-02 12:46 UTC (permalink / raw)
  To: Michael Kelley
  Cc: KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
	sashal@kernel.org, tglx@linutronix.de, mingo@redhat.com,
	bp@alien8.de, hpa@zytor.com, x86@kernel.org,
	gregkh@linuxfoundation.org, alex.williamson@redhat.com,
	cohuck@redhat.com, Tianyu Lan, linux-hyperv@vger.kernel.org,
	linux-kernel@vger.kernel.org, kvm@vger.kernel.org
In-Reply-To: <DM5PR21MB0137B7C2AAD0FC65CB3E1306D7BD0@DM5PR21MB0137.namprd21.prod.outlook.com>

On Sat, Aug 31, 2019 at 1:41 AM Michael Kelley <mikelley@microsoft.com> wrote:
>
> From: lantianyu1986@gmail.com  Sent: Thursday, August 29, 2019 11:16 PM
> >
> > From: Tianyu Lan <Tianyu.Lan@microsoft.com>
> >
> > fill_gva_list() populates gva list and adds offset
> > HV_TLB_FLUSH_UNIT(0x1000000) to variable "cur"
> > in the each loop. When diff between "end" and "cur" is
> > less than HV_TLB_FLUSH_UNIT, the gva entry should
> > be the last one and the loop should be end.
> >
> > If cur is equal or greater than 0xFF000000 on 32-bit
> > mode, "cur" will be overflow after adding HV_TLB_FLUSH_UNIT.
> > Its value will be wrapped and less than "end". fill_gva_list()
> > falls into an infinite loop and fill gva list out of
> > border finally.
> >
> > Set "cur" to be "end" to make loop end when diff is
> > less than HV_TLB_FLUSH_UNIT and add HV_TLB_FLUSH_UNIT to
> > "cur" when diff is equal or greater than HV_TLB_FLUSH_UNIT.
> > Fix the overflow issue.
>
> Let me suggest simplifying the commit message a bit.  It
> doesn't need to describe every line of the code change.   I think
> it should also make clear that the same problem could occur on
> 64-bit systems with the right "start" address.  My suggestion:
>
> When the 'start' parameter is >=  0xFF000000 on 32-bit
> systems, or >= 0xFFFFFFFF'FF000000 on 64-bit systems,
> fill_gva_list gets into an infinite loop.  With such inputs,
> 'cur' overflows after adding HV_TLB_FLUSH_UNIT and always
> compares as less than end.  Memory is filled with guest virtual
> addresses until the system crashes
>
> Fix this by never incrementing 'cur' to be larger than 'end'.
>
> >
> > Reported-by: Jong Hyun Park <park.jonghyun@yonsei.ac.kr>
> > Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
> > Fixes: 2ffd9e33ce4a ("x86/hyper-v: Use hypercall for remote
> > TLB flush")
>
> The "Fixes:" line needs to not wrap.  It's exempt from the
> "wrap at 75 columns" rule in order to simplify parsing scripts.
>
> The code itself looks good.

Hi Michael:
       Thanks for suggestion. Update commit log in V2.
-- 
Best regards
Tianyu Lan

^ permalink raw reply

* [PATCH V2] x86/Hyper-V: Fix overflow issue in the fill_gva_list()
From: lantianyu1986 @ 2019-09-02 12:41 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, sashal, tglx, mingo, bp, hpa, x86,
	alex.williamson, cohuck, michael.h.kelley
  Cc: Tianyu Lan, linux-hyperv, linux-kernel, kvm

From: Tianyu Lan <Tianyu.Lan@microsoft.com>

When the 'start' parameter is >=  0xFF000000 on 32-bit
systems, or >= 0xFFFFFFFF'FF000000 on 64-bit systems,
fill_gva_list gets into an infinite loop.  With such inputs,
'cur' overflows after adding HV_TLB_FLUSH_UNIT and always
compares as less than end.  Memory is filled with guest virtual
addresses until the system crashes

Fix this by never incrementing 'cur' to be larger than 'end'.

Reported-by: Jong Hyun Park <park.jonghyun@yonsei.ac.kr>
Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
Fixes: 2ffd9e33ce4a ("x86/hyper-v: Use hypercall for remote TLB flush")
---
Change since v1:
     - Simply the commit message 

 arch/x86/hyperv/mmu.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index e65d7fe6489f..5208ba49c89a 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -37,12 +37,14 @@ static inline int fill_gva_list(u64 gva_list[], int offset,
 		 * Lower 12 bits encode the number of additional
 		 * pages to flush (in addition to the 'cur' page).
 		 */
-		if (diff >= HV_TLB_FLUSH_UNIT)
+		if (diff >= HV_TLB_FLUSH_UNIT) {
 			gva_list[gva_n] |= ~PAGE_MASK;
-		else if (diff)
+			cur += HV_TLB_FLUSH_UNIT;
+		}  else if (diff) {
 			gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
+			cur = end;
+		}
 
-		cur += HV_TLB_FLUSH_UNIT;
 		gva_n++;
 
 	} while (cur < end);
-- 
2.14.5


^ permalink raw reply related

* RE: [PATCH v4 4/5] HID: hv: Remove dependencies on PAGE_SIZE for ring buffer
From: Jiri Kosina @ 2019-09-02 11:30 UTC (permalink / raw)
  To: Michael Kelley
  Cc: m.maya.nakamura, KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
	sashal@kernel.org, benjamin.tissoires@redhat.com, x86@kernel.org,
	linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <DM5PR21MB013708BF1876B7282C049B76D7BC0@DM5PR21MB0137.namprd21.prod.outlook.com>

On Sat, 31 Aug 2019, Michael Kelley wrote:

> From: Maya Nakamura <m.maya.nakamura@gmail.com>  Sent: Friday, July 12, 2019 1:28 AM
> > 
> > Define the ring buffer size as a constant expression because it should
> > not depend on the guest page size.
> > 
> > Signed-off-by: Maya Nakamura <m.maya.nakamura@gmail.com>
> > Reviewed-by: Michael Kelley <mikelley@microsoft.com>
> 
> Jiri and Benjamin -- OK if this small patch for the Hyper-V HID driver
> goes through the Hyper-V tree maintained by Sasha Levin?   It's a purely
> Hyper-V change so the ring buffer size isn't bigger when running
> on ARM64 where the page size might be 16K or 64K.

Yeah; FWIW feel free to add

	Acked-by: Jiri Kosina <jkosina@suse.cz>

Thanks,

-- 
Jiri Kosina
SUSE Labs


^ permalink raw reply

* RE: [PATCH v3 12/12] Drivers: hv: vmbus: Resume after fixing up old primary channels
From: Dexuan Cui @ 2019-08-31  5:08 UTC (permalink / raw)
  To: Michael Kelley, linux-hyperv@vger.kernel.org,
	gregkh@linuxfoundation.org, Stephen Hemminger, Sasha Levin,
	sashal@kernel.org, Haiyang Zhang, KY Srinivasan,
	tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org
In-Reply-To: <PU1P153MB0169E3DD602FB575C346186DBFBC0@PU1P153MB0169.APCP153.PROD.OUTLOOK.COM>

> From: Dexuan Cui
> Sent: Friday, August 30, 2019 9:37 PM
> > Is the intent to proceed and use the new offer?
> Yes, since this is not an error.
> 
> I'll add a comment before the "Mismatched offer from the host" for this.

Hi Michael, 
I'm going to make the below change in v4.

--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -956,7 +956,13 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
                        return;
                }

-               pr_debug("Mismatched offer from the host (relid=%d)\n",
+               /*
+                * This is not an error, since the host can also change the
+                * other field(s) of the offer, e.g. on WS RS5 (Build 17763),
+                * the offer->connection_id of the Mellanox VF vmbus device
+                * can change when the host reoffers the device upon resume.
+                */
+               pr_debug("vmbus offer changed: relid=%d\n",
                         offer->child_relid);

                print_hex_dump_debug("Old vmbus offer: ", DUMP_PREFIX_OFFSET,
@@ -965,6 +971,7 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
                print_hex_dump_debug("New vmbus offer: ", DUMP_PREFIX_OFFSET,
                                     16, 4, offer, offer_sz, false);

+               /* Fix up the old channel. */
                vmbus_setup_channel_state(oldchannel, offer);

                check_ready_for_resume_event();
Thanks,
-- Dexuan

^ permalink raw reply

* RE: [PATCH v3 12/12] Drivers: hv: vmbus: Resume after fixing up old primary channels
From: Dexuan Cui @ 2019-08-31  4:37 UTC (permalink / raw)
  To: Michael Kelley, linux-hyperv@vger.kernel.org,
	gregkh@linuxfoundation.org, Stephen Hemminger, Sasha Levin,
	sashal@kernel.org, Haiyang Zhang, KY Srinivasan,
	tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org
In-Reply-To: <DM5PR21MB01370691E881D59773B9EF60D7A40@DM5PR21MB0137.namprd21.prod.outlook.com>

> From: Michael Kelley <mikelley@microsoft.com>
> Sent: Friday, August 23, 2019 1:25 PM
> 
> From: Dexuan Cui Sent: Monday, August 19, 2019 6:52  PM
> > @@ -890,6 +937,11 @@ static void vmbus_onoffer(struct
> > vmbus_channel_message_header *hdr)
> >  				     false);
> >  		print_hex_dump_debug("New vmbus offer: ",
> DUMP_PREFIX_OFFSET,
> >  				     16, 4, offer, offer_sz, false);
> > +
> > +		vmbus_setup_channel_state(oldchannel, offer);
> > +
> > +		check_ready_for_resume_event();
> 
> This is the error case where the new offer didn't match some aspect of
> the old offer. 

Actually, this is not an error: besides the RELID, the host can also change
the offer->connection_id when it re-offers a device to the guest: so far,
I only see this host behavior for the VF vmbus device, and in this case, the
first vmbus_setup_channel_state() in vmbus_onoffer() is used to do the
fix-up:
    channel->sig_event = offer->connection_id;
and later channel->sig_event is used in vmbus_set_event().

Despite the host behavior, it looks the VF vmbus device still works fine,
so (IMO) this is not an error. I'll write a separate email to report this to
Hyper-V team.

> Is the intent to proceed and use the new offer? 
Yes, since this is not an error.

I'll add a comment before the "Mismatched offer from the host" for this.

BTW, the 3 debug lines here output nothing, unless we enable the output
by 
  cd /sys/kernel/debug/dynamic_debug/
  echo 'file drivers/hv/channel_mgmt.c +p' > control
.

> I can see that check_ready_for_resume_event() has to be called in
> the error case, otherwise the resume operation will hang forever, but 
> I'm not sure about setting up the channel state and then proceeding as
> if all is good.
> > +
> > 		return;

Thanks,
-- Dexuan

^ permalink raw reply

* RE: [PATCH v3 11/12] Drivers: hv: vmbus: Suspend after cleaning up hv_sock and sub channels
From: Dexuan Cui @ 2019-08-31  2:54 UTC (permalink / raw)
  To: Michael Kelley, linux-hyperv@vger.kernel.org,
	gregkh@linuxfoundation.org, Stephen Hemminger, Sasha Levin,
	sashal@kernel.org, Haiyang Zhang, KY Srinivasan,
	tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org
In-Reply-To: <DM5PR21MB01379C31FC628F4666413804D7A40@DM5PR21MB0137.namprd21.prod.outlook.com>

> From: Michael Kelley <mikelley@microsoft.com>
> Sent: Friday, August 23, 2019 1:17 PM
> 
> From: Dexuan Cui Sent: Monday, August 19, 2019 6:52 PM
> >
> > diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
> > @@ -499,6 +499,8 @@ static void vmbus_add_channel_work(struct
> work_struct *work)
> >  	return;
> >
> >  err_deq_chan:
> > +	WARN_ON_ONCE(1);
> > +
> 
> Why this change?  I was thinking maybe it's a debug statement that got
> left in.

This is indeed a debug statement. :-) 
I was not so sure if the error handling is absolutely correct there.
I think I can remove this WARN_ON_ONCE() since almost all of the possible
error paths have a pr_err(). We can make an extra patch to fix any bug in
the existing error handling code. BTW, it should be pretty unlikely to reach
the "err_deq_chan label" in practice.

> > @@ -1021,6 +1044,11 @@ static void vmbus_onoffer_rescind(struct
> > vmbus_channel_message_header *hdr)
> >  		}
> >  		mutex_unlock(&vmbus_connection.channel_mutex);
> >  	}
> > +
> > +	/* The "channel" may have been freed. Do not access it any longer. */
> > +
> > +	if (clean_up_chan_for_suspend)
> > +		check_ready_for_suspend_event();
> >  }
> 
> Having to add the above lines twice is a bit clumsy, but the problem is
> the overall structure of the vmbus_onoffer_rescind.  The early return in
> the case of a rescind_callback function is a bit weird, but I guess it makes
> sense since from what I can tell, only uio and hv_sock have rescind callback
> functions. Some minor restructuring might be warranted, but I don't feel
> strongly about it.

I agree. We should restructure vmbus_onoffer_rescind(), but I think we can
do it in a separate patch. Here in this patch I'm focused on the minimal
required change for hibernation.

> > +	/*
> > +	 * Wait until all the sub-channels and hv_sock channels have been
> > +	 * cleaned up. Sub-channels should be destroyed upon suspend,
> otherwise
> > +	 * they would conflict with the new sub-channels that will be created
> > +	 * in the resume path. hv_sock channels should also be destroyed, but
> > +	 * a hv_sock channel of an established hv_sock connection can not be
> > +	 * really destroyed since it may still be referenced by the userspace
> > +	 * application, so we just force the hv_sock channel to be rescinded
> > +	 * by vmbus_force_channel_rescinded(), and the userspace application
> > +	 * will thoroughly destroy the channel after hibernation.
> > +	 */
> > +	if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0)
> 
> At first glance, the above line seemed useless to me.  You could just do the
> wait_for_completion() unconditionally.  But is the intent to handle the case
> where the VM never had any sub-channels or hv_sock channels, and so
> nr_chan_close_on_suspend never went above 0?  

Yes.

> If so, a comment might be helpful.
> > +wait_for_completion(&vmbus_connection.ready_for_suspend_event);

Ok. I'll add a commnt for this in v4.

Thanks,
-- Dexuan

^ permalink raw reply

* RE: [PATCH v3 10/12] Drivers: hv: vmbus: Clean up hv_sock channels by force upon suspend
From: Dexuan Cui @ 2019-08-31  2:00 UTC (permalink / raw)
  To: Michael Kelley, linux-hyperv@vger.kernel.org,
	gregkh@linuxfoundation.org, Stephen Hemminger, Sasha Levin,
	sashal@kernel.org, Haiyang Zhang, KY Srinivasan,
	tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org
In-Reply-To: <DM5PR21MB013722B88011C7D587BB6182D7A40@DM5PR21MB0137.namprd21.prod.outlook.com>

> From: Michael Kelley <mikelley@microsoft.com>
> Sent: Friday, August 23, 2019 1:02 PM
> 
> From: Dexuan Cui Sent: Monday, August 19, 2019 6:52 PM
> >
> > Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for
> > hibernation. There is no better method to clean up the channels since
> > some of the channels may still be referenced by the userspace apps when
> > hiberantin is triggered: in this case, the "rescind" fields of the
> 
> s/hiberantin/hibernation/

Thanks! Will fix this in v4.

> > @@ -2091,6 +2127,25 @@ static int vmbus_acpi_add(struct acpi_device
> *device)
> >
> >  static int vmbus_bus_suspend(struct device *dev)
> >  {
> > +	struct vmbus_channel *channel;
> > +
> > +	while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
> > +		/*
> > +		 * We wait here until any channel offer is currently
> > +		 * being processed.
> > +		 */
> 
> The wording of the comment is a bit off.  Maybe
> 
> 		/*
> 		 * We wait here until the completion of any channel
> 		 * offers that are currently in progress.
> 		 */

Will use the better version this in v4. 

Thanks,
-- Dexuan

^ permalink raw reply

* RE: [PATCH v3 08/12] Drivers: hv: vmbus: Ignore the offers when resuming from hibernation
From: Dexuan Cui @ 2019-08-31  0:23 UTC (permalink / raw)
  To: Michael Kelley, linux-hyperv@vger.kernel.org,
	gregkh@linuxfoundation.org, Stephen Hemminger, Sasha Levin,
	sashal@kernel.org, Haiyang Zhang, KY Srinivasan,
	tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org
In-Reply-To: <DM5PR21MB01375F5A46E46079DA611622D7A40@DM5PR21MB0137.namprd21.prod.outlook.com>

> From: Michael Kelley <mikelley@microsoft.com>
> Sent: Friday, August 23, 2019 12:57 PM
> 
> From: Dexuan Cui <decui@microsoft.com>  Sent: August 19, 2019 6:52 PM
> >
> > diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
> > @@ -337,6 +337,33 @@ struct vmbus_channel *relid2channel(u32 relid)
> >  }
> >
> >  /*
> > + * find_primary_channel_by_offer - Get the channel object given the new
> offer.
> > + * This is only used in the resume path of hibernation.
> > + */
> > +struct vmbus_channel *
> > +find_primary_channel_by_offer(const struct vmbus_channel_offer_channel
> *offer)
> > +{
> > +	struct vmbus_channel *channel;
> > +	const guid_t *inst1, *inst2;
> > +
> > +	WARN_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
> > +
> > +	/* Ignore sub-channel offers. */
> > +	if (offer->offer.sub_channel_index != 0)
> > +		return NULL;
> > +
> > +	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
> > +		inst1 = &channel->offermsg.offer.if_instance;
> > +		inst2 = &offer->offer.if_instance;
> > +
> > +		if (guid_equal(inst1, inst2))
> > +			return channel;
> > +	}
> > +
> > +	return NULL;
> > +}
> 
> Any particular reason this new function is in connection.c instead of
> putting it in channel_mgmt.c where it is called?

There is a similar function relid2channel(), which is in connection.c.

Since the new function is only used in channel_mgmt.c. I'll move it to
channel_mgmt.c in v4.

Thanks,
-- Dexuan

^ permalink raw reply

* RE: [PATCH v3 02/12] x86/hyper-v: Implement hv_is_hibernation_supported()
From: Dexuan Cui @ 2019-08-30 23:37 UTC (permalink / raw)
  To: Michael Kelley, linux-hyperv@vger.kernel.org,
	gregkh@linuxfoundation.org, Stephen Hemminger, Sasha Levin,
	sashal@kernel.org, Haiyang Zhang, KY Srinivasan,
	tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org
In-Reply-To: <DM5PR21MB0137A7EC5E51EAF8A0667359D7A40@DM5PR21MB0137.namprd21.prod.outlook.com>

> From: Michael Kelley <mikelley@microsoft.com>
> Sent: Friday, August 23, 2019 12:50 PM
> 
> From: Dexuan Cui <decui@microsoft.com> Sent: August 19, 2019 6:52 PM
> >
> > When a Linux VM runs on Hyper-V and hibernates, it must disable the
> > memory hot-add/remove and balloon up/down capabilities in the hv_balloon
> > driver.
> 
> I'm unclear on the above statement.  I think the requirement is that
> ballooning must not be active when hibernation is initiated.  Is hibernation
> blocked in that case?  If not, what happens?

Ballooning (and hot-addition of memory) must not be active if the user
wants the Linux VM to support hibernation, not just when hibernation is
initiated. This is because ballooning and hot-addition of memory are
incompatible with hibernation according to Hyper-V team, e.g. upon
hibernation the balloon VSP doesn't save any info about ballooned-out
pages (if any), so after Linux resumes, Linux balloon VSC expects that the
VSP will return the pages if Linux is under memory pressure, but the VSP
will never return the pages, since the VSP thinks it never stole the
pages from the VM.

So, if the user wants Linux VM to support hibernation, Linux must completely
forbid ballooning and hot-addition of memory, and hence the only
functionality of balloon VSC is reporting the memory pressure to the host.

Ideally, when Linux detects that the user wants it to support hibernation,
the balloon VSC should tell the VSP that it does not support ballooning and
hot-addition; however, the current version of the VSP requires
the VSC should support these capabilities, otherwise the capability negotiation
fails and the VSC can not load at all, so in my changes to the VSC driver, I
still report to the VSP that Linux supports the capabilities, but the VSC
ignores the host's requests of balloon up/down and hot add, and returns an
error to the VSP, when applicable.

BTW, in the future the balloon VSP driver will allow the VSC to not support
the capabilities of balloon up/down and hot add.

The remaining problem is: how can Linux know the user wants Linux VM
to support hibernation?

The ACPI S4 state is not a must for hibernation to work, because Linux is
able to hibernate as long as the system can shut down.

My decision is that: we artificially use the presence of the virtual
ACPI S4 state as the indicator of the user's intent of using hibernation.
BTW, I believe the Windows team made the same decision.

Once all the vmbus and VSC patches are accepted, I'll submit a patch to
forbid hibernation if the virtual ACPI S4 state is absent, e.g.
hv_is_hibernation_supported() is false.

> > By default, Hyper-V does not enable the virtual ACPI S4 state for a VM;
> > on recent Hyper-V hosts, the administrator is able to enable the virtual
> > ACPI S4 state for a VM, so we hope to use the presence of the virtual ACPI
> 
> "we hope" sounds very indefinite. :-(    Does ACPI S4 have to be enabled for
> hibernation to be initiated?  Goes back to my first question ....

Technically, we don't have to enable ACPI S4, but in practice, as I mentioned,
I'll submit a patch to forbid hibernation if ACPI S4 is absent.

> 
> > S4 state as a hint for hv_balloon to disable the aforementioned
> > capabilities.
> >
> > The new API will be used by hv_balloon.

I'll add my above explanation into the changelog in v4.

Thanks,
-- Dexuan

^ permalink raw reply

* RE: [PATCH net-next, 2/2] hv_netvsc: Sync offloading features to VF NIC
From: Haiyang Zhang @ 2019-08-30 23:12 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: sashal@kernel.org, linux-hyperv@vger.kernel.org,
	netdev@vger.kernel.org, KY Srinivasan, Stephen Hemminger,
	olaf@aepfle.de, vkuznets, davem@davemloft.net,
	linux-kernel@vger.kernel.org, Mark Bloch
In-Reply-To: <20190830160451.43a61cf9@cakuba.netronome.com>



> -----Original Message-----
> From: Jakub Kicinski <jakub.kicinski@netronome.com>
> Sent: Friday, August 30, 2019 4:05 PM
> To: Haiyang Zhang <haiyangz@microsoft.com>
> Cc: sashal@kernel.org; linux-hyperv@vger.kernel.org;
> netdev@vger.kernel.org; KY Srinivasan <kys@microsoft.com>; Stephen
> Hemminger <sthemmin@microsoft.com>; olaf@aepfle.de; vkuznets
> <vkuznets@redhat.com>; davem@davemloft.net; linux-
> kernel@vger.kernel.org; Mark Bloch <markb@mellanox.com>
> Subject: Re: [PATCH net-next, 2/2] hv_netvsc: Sync offloading features to VF
> NIC
> 
> On Fri, 30 Aug 2019 03:45:38 +0000, Haiyang Zhang wrote:
> > VF NIC may go down then come up during host servicing events. This
> > causes the VF NIC offloading feature settings to roll back to the
> > defaults. This patch can synchronize features from synthetic NIC to
> > the VF NIC during ndo_set_features (ethtool -K), and
> > netvsc_register_vf when VF comes back after host events.
> >
> > Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> > Cc: Mark Bloch <markb@mellanox.com>
> 
> If we want to make this change in behaviour we should change net_failover
> at the same time.

I will check net_failover. Thanks.

^ permalink raw reply

* Re: [PATCH net-next, 1/2] hv_netvsc: Allow scatter-gather feature to be tunable
From: Jakub Kicinski @ 2019-08-30 23:05 UTC (permalink / raw)
  To: Haiyang Zhang
  Cc: sashal@kernel.org, linux-hyperv@vger.kernel.org,
	netdev@vger.kernel.org, KY Srinivasan, Stephen Hemminger,
	olaf@aepfle.de, vkuznets, davem@davemloft.net,
	linux-kernel@vger.kernel.org
In-Reply-To: <1567136656-49288-2-git-send-email-haiyangz@microsoft.com>

On Fri, 30 Aug 2019 03:45:24 +0000, Haiyang Zhang wrote:
> In a previous patch, the NETIF_F_SG was missing after the code changes.
> That caused the SG feature to be "fixed". This patch includes it into
> hw_features, so it is tunable again.
> 
> Fixes: 	23312a3be999 ("netvsc: negotiate checksum and segmentation parameters")
         ^
Looks like a tab sneaked in there.

> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>

^ permalink raw reply

* Re: [PATCH net-next, 2/2] hv_netvsc: Sync offloading features to VF NIC
From: Jakub Kicinski @ 2019-08-30 23:04 UTC (permalink / raw)
  To: Haiyang Zhang
  Cc: sashal@kernel.org, linux-hyperv@vger.kernel.org,
	netdev@vger.kernel.org, KY Srinivasan, Stephen Hemminger,
	olaf@aepfle.de, vkuznets, davem@davemloft.net,
	linux-kernel@vger.kernel.org, Mark Bloch
In-Reply-To: <1567136656-49288-3-git-send-email-haiyangz@microsoft.com>

On Fri, 30 Aug 2019 03:45:38 +0000, Haiyang Zhang wrote:
> VF NIC may go down then come up during host servicing events. This
> causes the VF NIC offloading feature settings to roll back to the
> defaults. This patch can synchronize features from synthetic NIC to
> the VF NIC during ndo_set_features (ethtool -K),
> and netvsc_register_vf when VF comes back after host events.
> 
> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> Cc: Mark Bloch <markb@mellanox.com>

If we want to make this change in behaviour we should change
net_failover at the same time.

^ permalink raw reply

* RE: [PATCH] x86/Hyper-V: Fix overflow issue in the fill_gva_list()
From: Michael Kelley @ 2019-08-30 17:41 UTC (permalink / raw)
  To: lantianyu1986@gmail.com, KY Srinivasan, Haiyang Zhang,
	Stephen Hemminger, sashal@kernel.org, tglx@linutronix.de,
	mingo@redhat.com, bp@alien8.de, hpa@zytor.com, x86@kernel.org,
	gregkh@linuxfoundation.org, alex.williamson@redhat.com,
	cohuck@redhat.com
  Cc: Tianyu Lan, linux-hyperv@vger.kernel.org,
	linux-kernel@vger.kernel.org, kvm@vger.kernel.org
In-Reply-To: <20190830061540.211072-1-Tianyu.Lan@microsoft.com>

From: lantianyu1986@gmail.com  Sent: Thursday, August 29, 2019 11:16 PM
> 
> From: Tianyu Lan <Tianyu.Lan@microsoft.com>
> 
> fill_gva_list() populates gva list and adds offset
> HV_TLB_FLUSH_UNIT(0x1000000) to variable "cur"
> in the each loop. When diff between "end" and "cur" is
> less than HV_TLB_FLUSH_UNIT, the gva entry should
> be the last one and the loop should be end.
> 
> If cur is equal or greater than 0xFF000000 on 32-bit
> mode, "cur" will be overflow after adding HV_TLB_FLUSH_UNIT.
> Its value will be wrapped and less than "end". fill_gva_list()
> falls into an infinite loop and fill gva list out of
> border finally.
> 
> Set "cur" to be "end" to make loop end when diff is
> less than HV_TLB_FLUSH_UNIT and add HV_TLB_FLUSH_UNIT to
> "cur" when diff is equal or greater than HV_TLB_FLUSH_UNIT.
> Fix the overflow issue.

Let me suggest simplifying the commit message a bit.  It
doesn't need to describe every line of the code change.   I think
it should also make clear that the same problem could occur on
64-bit systems with the right "start" address.  My suggestion:

When the 'start' parameter is >=  0xFF000000 on 32-bit
systems, or >= 0xFFFFFFFF'FF000000 on 64-bit systems,
fill_gva_list gets into an infinite loop.  With such inputs,
'cur' overflows after adding HV_TLB_FLUSH_UNIT and always
compares as less than end.  Memory is filled with guest virtual
addresses until the system crashes

Fix this by never incrementing 'cur' to be larger than 'end'.

> 
> Reported-by: Jong Hyun Park <park.jonghyun@yonsei.ac.kr>
> Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
> Fixes: 2ffd9e33ce4a ("x86/hyper-v: Use hypercall for remote
> TLB flush")

The "Fixes:" line needs to not wrap.  It's exempt from the
"wrap at 75 columns" rule in order to simplify parsing scripts.

The code itself looks good.

Michael

^ permalink raw reply

* [PATCH] x86/Hyper-V: Fix overflow issue in the fill_gva_list()
From: lantianyu1986 @ 2019-08-30  6:15 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, sashal, tglx, mingo, bp, hpa, x86,
	gregkh, alex.williamson, cohuck, michael.h.kelley
  Cc: Tianyu Lan, linux-hyperv, linux-kernel, kvm

From: Tianyu Lan <Tianyu.Lan@microsoft.com>

fill_gva_list() populates gva list and adds offset
HV_TLB_FLUSH_UNIT(0x1000000) to variable "cur"
in the each loop. When diff between "end" and "cur" is
less than HV_TLB_FLUSH_UNIT, the gva entry should
be the last one and the loop should be end.

If cur is equal or greater than 0xFF000000 on 32-bit
mode, "cur" will be overflow after adding HV_TLB_FLUSH_UNIT.
Its value will be wrapped and less than "end". fill_gva_list()
falls into an infinite loop and fill gva list out of
border finally.

Set "cur" to be "end" to make loop end when diff is
less than HV_TLB_FLUSH_UNIT and add HV_TLB_FLUSH_UNIT to
"cur" when diff is equal or greater than HV_TLB_FLUSH_UNIT.
Fix the overflow issue.

Reported-by: Jong Hyun Park <park.jonghyun@yonsei.ac.kr>
Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
Fixes: 2ffd9e33ce4a ("x86/hyper-v: Use hypercall for remote
TLB flush")
---
 arch/x86/hyperv/mmu.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index e65d7fe6489f..5208ba49c89a 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -37,12 +37,14 @@ static inline int fill_gva_list(u64 gva_list[], int offset,
 		 * Lower 12 bits encode the number of additional
 		 * pages to flush (in addition to the 'cur' page).
 		 */
-		if (diff >= HV_TLB_FLUSH_UNIT)
+		if (diff >= HV_TLB_FLUSH_UNIT) {
 			gva_list[gva_n] |= ~PAGE_MASK;
-		else if (diff)
+			cur += HV_TLB_FLUSH_UNIT;
+		}  else if (diff) {
 			gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
+			cur = end;
+		}

-		cur += HV_TLB_FLUSH_UNIT;
 		gva_n++;

 	} while (cur < end);
-- 
2.14.5

^ permalink raw reply related

* [PATCH net-next, 2/2] hv_netvsc: Sync offloading features to VF NIC
From: Haiyang Zhang @ 2019-08-30  3:45 UTC (permalink / raw)
  To: sashal@kernel.org, linux-hyperv@vger.kernel.org,
	netdev@vger.kernel.org
  Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger, olaf@aepfle.de,
	vkuznets, davem@davemloft.net, linux-kernel@vger.kernel.org,
	Mark Bloch
In-Reply-To: <1567136656-49288-1-git-send-email-haiyangz@microsoft.com>

VF NIC may go down then come up during host servicing events. This
causes the VF NIC offloading feature settings to roll back to the
defaults. This patch can synchronize features from synthetic NIC to
the VF NIC during ndo_set_features (ethtool -K),
and netvsc_register_vf when VF comes back after host events.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Mark Bloch <markb@mellanox.com>
---
 drivers/net/hyperv/netvsc_drv.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 1f1192e..39dddcd 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1785,13 +1785,15 @@ static int netvsc_set_features(struct net_device *ndev,
 	netdev_features_t change = features ^ ndev->features;
 	struct net_device_context *ndevctx = netdev_priv(ndev);
 	struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
+	struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev);
 	struct ndis_offload_params offloads;
+	int ret = 0;
 
 	if (!nvdev || nvdev->destroy)
 		return -ENODEV;
 
 	if (!(change & NETIF_F_LRO))
-		return 0;
+		goto syncvf;
 
 	memset(&offloads, 0, sizeof(struct ndis_offload_params));
 
@@ -1803,7 +1805,19 @@ static int netvsc_set_features(struct net_device *ndev,
 		offloads.rsc_ip_v6 = NDIS_OFFLOAD_PARAMETERS_RSC_DISABLED;
 	}
 
-	return rndis_filter_set_offload_params(ndev, nvdev, &offloads);
+	ret = rndis_filter_set_offload_params(ndev, nvdev, &offloads);
+
+	if (ret)
+		features ^= NETIF_F_LRO;
+
+syncvf:
+	if (!vf_netdev)
+		return ret;
+
+	vf_netdev->wanted_features = features;
+	netdev_update_features(vf_netdev);
+
+	return ret;
 }
 
 static u32 netvsc_get_msglevel(struct net_device *ndev)
@@ -2181,6 +2195,10 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
 
 	dev_hold(vf_netdev);
 	rcu_assign_pointer(net_device_ctx->vf_netdev, vf_netdev);
+
+	vf_netdev->wanted_features = ndev->features;
+	netdev_update_features(vf_netdev);
+
 	return NOTIFY_OK;
 }
 
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH net-next, 1/2] hv_netvsc: Allow scatter-gather feature to be tunable
From: Haiyang Zhang @ 2019-08-30  3:45 UTC (permalink / raw)
  To: sashal@kernel.org, linux-hyperv@vger.kernel.org,
	netdev@vger.kernel.org
  Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger, olaf@aepfle.de,
	vkuznets, davem@davemloft.net, linux-kernel@vger.kernel.org
In-Reply-To: <1567136656-49288-1-git-send-email-haiyangz@microsoft.com>

In a previous patch, the NETIF_F_SG was missing after the code changes.
That caused the SG feature to be "fixed". This patch includes it into
hw_features, so it is tunable again.

Fixes: 	23312a3be999 ("netvsc: negotiate checksum and segmentation parameters")
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
---
 drivers/net/hyperv/hyperv_net.h   | 2 +-
 drivers/net/hyperv/netvsc_drv.c   | 4 ++--
 drivers/net/hyperv/rndis_filter.c | 1 +
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index ecc9af0..670ef68 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -822,7 +822,7 @@ struct nvsp_message {
 
 #define NETVSC_SUPPORTED_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | \
 				      NETIF_F_TSO | NETIF_F_IPV6_CSUM | \
-				      NETIF_F_TSO6 | NETIF_F_LRO)
+				      NETIF_F_TSO6 | NETIF_F_LRO | NETIF_F_SG)
 
 #define VRSS_SEND_TAB_SIZE 16  /* must be power of 2 */
 #define VRSS_CHANNEL_MAX 64
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 0a6cd2f..1f1192e 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -2313,8 +2313,8 @@ static int netvsc_probe(struct hv_device *dev,
 
 	/* hw_features computed in rndis_netdev_set_hwcaps() */
 	net->features = net->hw_features |
-		NETIF_F_HIGHDMA | NETIF_F_SG |
-		NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
+		NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX |
+		NETIF_F_HW_VLAN_CTAG_RX;
 	net->vlan_features = net->features;
 
 	netdev_lockdep_set_classes(net);
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 317dbe9..abaf815 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -1207,6 +1207,7 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device,
 
 	/* Compute tx offload settings based on hw capabilities */
 	net->hw_features |= NETIF_F_RXCSUM;
+	net->hw_features |= NETIF_F_SG;
 
 	if ((hwcaps.csum.ip4_txcsum & NDIS_TXCSUM_ALL_TCP4) == NDIS_TXCSUM_ALL_TCP4) {
 		/* Can checksum TCP */
-- 
1.8.3.1


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox