Linux-HyperV List
 help / color / mirror / Atom feed
* [PATCH v4] video: hyperv: hyperv_fb: Obtain screen resolution from Hyper-V host
From: Wei Hu @ 2019-09-05  9:11 UTC (permalink / raw)
  To: Michael Kelley, b.zolnierkie@samsung.com,
	linux-hyperv@vger.kernel.org, dri-devel@lists.freedesktop.org,
	linux-fbdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	Stephen Hemminger, sashal@kernel.org, Haiyang Zhang,
	KY Srinivasan, Dexuan Cui
  Cc: Wei Hu, Iouri Tarassov

Beginning from Windows 10 RS5+, VM screen resolution is obtained from host.
The "video=hyperv_fb" boot time option is not needed, but still can be
used to overwrite what the host specifies. The VM resolution on the host
could be set by executing the powershell "set-vmvideo" command.

Signed-off-by: Iouri Tarassov <iourit@microsoft.com>
Signed-off-by: Wei Hu <weh@microsoft.com>
---
    v2:
    - Implemented fallback when version negotiation failed.
    - Defined full size for supported_resolution array.

    v3:
    - Corrected the synthvid major and minor version comparison problem.

    v4:
    - Changed function name to synthvid_ver_ge().

 drivers/video/fbdev/hyperv_fb.c | 159 +++++++++++++++++++++++++++++---
 1 file changed, 147 insertions(+), 12 deletions(-)

diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c
index 00f5bdcc6c6f..fe319fc39bec 100644
--- a/drivers/video/fbdev/hyperv_fb.c
+++ b/drivers/video/fbdev/hyperv_fb.c
@@ -23,6 +23,14 @@
  *
  * Portrait orientation is also supported:
  *     For example: video=hyperv_fb:864x1152
+ *
+ * When a Windows 10 RS5+ host is used, the virtual machine screen
+ * resolution is obtained from the host. The "video=hyperv_fb" option is
+ * not needed, but still can be used to overwrite what the host specifies.
+ * The VM resolution on the host could be set by executing the powershell
+ * "set-vmvideo" command. For example
+ *     set-vmvideo -vmname name -horizontalresolution:1920 \
+ * -verticalresolution:1200 -resolutiontype single
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -44,6 +52,10 @@
 #define SYNTHVID_VERSION(major, minor) ((minor) << 16 | (major))
 #define SYNTHVID_VERSION_WIN7 SYNTHVID_VERSION(3, 0)
 #define SYNTHVID_VERSION_WIN8 SYNTHVID_VERSION(3, 2)
+#define SYNTHVID_VERSION_WIN10 SYNTHVID_VERSION(3, 5)
+
+#define SYNTHVID_VER_GET_MAJOR(ver) (ver & 0x0000ffff)
+#define SYNTHVID_VER_GET_MINOR(ver) ((ver & 0xffff0000) >> 16)
 
 #define SYNTHVID_DEPTH_WIN7 16
 #define SYNTHVID_DEPTH_WIN8 32
@@ -82,16 +94,25 @@ enum synthvid_msg_type {
 	SYNTHVID_POINTER_SHAPE		= 8,
 	SYNTHVID_FEATURE_CHANGE		= 9,
 	SYNTHVID_DIRT			= 10,
+	SYNTHVID_RESOLUTION_REQUEST	= 13,
+	SYNTHVID_RESOLUTION_RESPONSE	= 14,
 
-	SYNTHVID_MAX			= 11
+	SYNTHVID_MAX			= 15
 };
 
+#define		SYNTHVID_EDID_BLOCK_SIZE	128
+#define		SYNTHVID_MAX_RESOLUTION_COUNT	64
+
+struct hvd_screen_info {
+	u16 width;
+	u16 height;
+} __packed;
+
 struct synthvid_msg_hdr {
 	u32 type;
 	u32 size;  /* size of this header + payload after this field*/
 } __packed;
 
-
 struct synthvid_version_req {
 	u32 version;
 } __packed;
@@ -102,6 +123,19 @@ struct synthvid_version_resp {
 	u8 max_video_outputs;
 } __packed;
 
+struct synthvid_supported_resolution_req {
+	u8 maximum_resolution_count;
+} __packed;
+
+struct synthvid_supported_resolution_resp {
+	u8 edid_block[SYNTHVID_EDID_BLOCK_SIZE];
+	u8 resolution_count;
+	u8 default_resolution_index;
+	u8 is_standard;
+	struct hvd_screen_info
+		supported_resolution[SYNTHVID_MAX_RESOLUTION_COUNT];
+} __packed;
+
 struct synthvid_vram_location {
 	u64 user_ctx;
 	u8 is_vram_gpa_specified;
@@ -187,6 +221,8 @@ struct synthvid_msg {
 		struct synthvid_pointer_shape ptr_shape;
 		struct synthvid_feature_change feature_chg;
 		struct synthvid_dirt dirt;
+		struct synthvid_supported_resolution_req resolution_req;
+		struct synthvid_supported_resolution_resp resolution_resp;
 	};
 } __packed;
 
@@ -224,6 +260,8 @@ struct hvfb_par {
 
 static uint screen_width = HVFB_WIDTH;
 static uint screen_height = HVFB_HEIGHT;
+static uint screen_width_max = HVFB_WIDTH;
+static uint screen_height_max = HVFB_HEIGHT;
 static uint screen_depth;
 static uint screen_fb_size;
 
@@ -354,6 +392,7 @@ static void synthvid_recv_sub(struct hv_device *hdev)
 
 	/* Complete the wait event */
 	if (msg->vid_hdr.type == SYNTHVID_VERSION_RESPONSE ||
+	    msg->vid_hdr.type == SYNTHVID_RESOLUTION_RESPONSE ||
 	    msg->vid_hdr.type == SYNTHVID_VRAM_LOCATION_ACK) {
 		memcpy(par->init_buf, msg, MAX_VMBUS_PKT_SIZE);
 		complete(&par->wait);
@@ -400,6 +439,17 @@ static void synthvid_receive(void *ctx)
 	} while (bytes_recvd > 0 && ret == 0);
 }
 
+/* Check if the ver1 version is equal or greater than ver2 */
+static inline bool synthvid_ver_ge(u32 ver1, u32 ver2)
+{
+	if (SYNTHVID_VER_GET_MAJOR(ver1) > SYNTHVID_VER_GET_MAJOR(ver2) ||
+	    (SYNTHVID_VER_GET_MAJOR(ver1) == SYNTHVID_VER_GET_MAJOR(ver2) &&
+	     SYNTHVID_VER_GET_MINOR(ver1) >= SYNTHVID_VER_GET_MINOR(ver2)))
+		return true;
+
+	return false;
+}
+
 /* Check synthetic video protocol version with the host */
 static int synthvid_negotiate_ver(struct hv_device *hdev, u32 ver)
 {
@@ -428,6 +478,64 @@ static int synthvid_negotiate_ver(struct hv_device *hdev, u32 ver)
 	}
 
 	par->synthvid_version = ver;
+	pr_info("Synthvid Version major %d, minor %d\n",
+		SYNTHVID_VER_GET_MAJOR(ver), SYNTHVID_VER_GET_MINOR(ver));
+
+out:
+	return ret;
+}
+
+/* Get current resolution from the host */
+static int synthvid_get_supported_resolution(struct hv_device *hdev)
+{
+	struct fb_info *info = hv_get_drvdata(hdev);
+	struct hvfb_par *par = info->par;
+	struct synthvid_msg *msg = (struct synthvid_msg *)par->init_buf;
+	int ret = 0;
+	unsigned long t;
+	u8 index;
+	int i;
+
+	memset(msg, 0, sizeof(struct synthvid_msg));
+	msg->vid_hdr.type = SYNTHVID_RESOLUTION_REQUEST;
+	msg->vid_hdr.size = sizeof(struct synthvid_msg_hdr) +
+		sizeof(struct synthvid_supported_resolution_req);
+
+	msg->resolution_req.maximum_resolution_count =
+		SYNTHVID_MAX_RESOLUTION_COUNT;
+	synthvid_send(hdev, msg);
+
+	t = wait_for_completion_timeout(&par->wait, VSP_TIMEOUT);
+	if (!t) {
+		pr_err("Time out on waiting resolution response\n");
+			ret = -ETIMEDOUT;
+			goto out;
+	}
+
+	if (msg->resolution_resp.resolution_count == 0) {
+		pr_err("No supported resolutions\n");
+		ret = -ENODEV;
+		goto out;
+	}
+
+	index = msg->resolution_resp.default_resolution_index;
+	if (index >= msg->resolution_resp.resolution_count) {
+		pr_err("Invalid resolution index: %d\n", index);
+		ret = -ENODEV;
+		goto out;
+	}
+
+	for (i = 0; i < msg->resolution_resp.resolution_count; i++) {
+		screen_width_max = max_t(unsigned int, screen_width_max,
+		    msg->resolution_resp.supported_resolution[i].width);
+		screen_height_max = max_t(unsigned int, screen_height_max,
+		    msg->resolution_resp.supported_resolution[i].height);
+	}
+
+	screen_width =
+		msg->resolution_resp.supported_resolution[index].width;
+	screen_height =
+		msg->resolution_resp.supported_resolution[index].height;
 
 out:
 	return ret;
@@ -448,11 +556,27 @@ static int synthvid_connect_vsp(struct hv_device *hdev)
 	}
 
 	/* Negotiate the protocol version with host */
-	if (vmbus_proto_version == VERSION_WS2008 ||
-	    vmbus_proto_version == VERSION_WIN7)
-		ret = synthvid_negotiate_ver(hdev, SYNTHVID_VERSION_WIN7);
-	else
+	switch (vmbus_proto_version) {
+	case VERSION_WIN10:
+	case VERSION_WIN10_V5:
+		ret = synthvid_negotiate_ver(hdev, SYNTHVID_VERSION_WIN10);
+		if (!ret)
+			break;
+		/* Fallthrough */
+	case VERSION_WIN8:
+	case VERSION_WIN8_1:
 		ret = synthvid_negotiate_ver(hdev, SYNTHVID_VERSION_WIN8);
+		if (!ret)
+			break;
+		/* Fallthrough */
+	case VERSION_WS2008:
+	case VERSION_WIN7:
+		ret = synthvid_negotiate_ver(hdev, SYNTHVID_VERSION_WIN7);
+		break;
+	default:
+		ret = synthvid_negotiate_ver(hdev, SYNTHVID_VERSION_WIN10);
+		break;
+	}
 
 	if (ret) {
 		pr_err("Synthetic video device version not accepted\n");
@@ -464,6 +588,12 @@ static int synthvid_connect_vsp(struct hv_device *hdev)
 	else
 		screen_depth = SYNTHVID_DEPTH_WIN8;
 
+	if (synthvid_ver_ge(par->synthvid_version, SYNTHVID_VERSION_WIN10)) {
+		ret = synthvid_get_supported_resolution(hdev);
+		if (ret)
+			pr_info("Failed to get supported resolution from host, use default\n");
+	}
+
 	screen_fb_size = hdev->channel->offermsg.offer.
 				mmio_megabytes * 1024 * 1024;
 
@@ -653,6 +783,8 @@ static void hvfb_get_option(struct fb_info *info)
 	}
 
 	if (x < HVFB_WIDTH_MIN || y < HVFB_HEIGHT_MIN ||
+	    (synthvid_ver_ge(par->synthvid_version, SYNTHVID_VERSION_WIN10) &&
+	    (x > screen_width_max || y > screen_height_max)) ||
 	    (par->synthvid_version == SYNTHVID_VERSION_WIN8 &&
 	     x * y * screen_depth / 8 > SYNTHVID_FB_SIZE_WIN8) ||
 	    (par->synthvid_version == SYNTHVID_VERSION_WIN7 &&
@@ -689,8 +821,12 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info)
 		}
 
 		if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
-		    pci_resource_len(pdev, 0) < screen_fb_size)
+		    pci_resource_len(pdev, 0) < screen_fb_size) {
+			pr_err("Resource not available or (0x%lx < 0x%lx)\n",
+			       (unsigned long) pci_resource_len(pdev, 0),
+			       (unsigned long) screen_fb_size);
 			goto err1;
+		}
 
 		pot_end = pci_resource_end(pdev, 0);
 		pot_start = pot_end - screen_fb_size + 1;
@@ -781,17 +917,16 @@ static int hvfb_probe(struct hv_device *hdev,
 		goto error1;
 	}
 
+	hvfb_get_option(info);
+	pr_info("Screen resolution: %dx%d, Color depth: %d\n",
+		screen_width, screen_height, screen_depth);
+
 	ret = hvfb_getmem(hdev, info);
 	if (ret) {
 		pr_err("No memory for framebuffer\n");
 		goto error2;
 	}
 
-	hvfb_get_option(info);
-	pr_info("Screen resolution: %dx%d, Color depth: %d\n",
-		screen_width, screen_height, screen_depth);
-
-
 	/* Set up fb_info */
 	info->flags = FBINFO_DEFAULT;
 
-- 
2.20.1


^ permalink raw reply related

* [PATCH v3] video: hyperv: hyperv_fb: Support deferred IO for Hyper-V frame buffer driver
From: Wei Hu @ 2019-09-05  8:28 UTC (permalink / raw)
  To: Michael Kelley, rdunlap@infradead.org, shc_work@mail.ru,
	gregkh@linuxfoundation.org, lee.jones@linaro.org,
	alexandre.belloni@bootlin.com, baijiaju1990@gmail.com,
	fthain@telegraphics.com.au, info@metux.net,
	linux-hyperv@vger.kernel.org, dri-devel@lists.freedesktop.org,
	linux-fbdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	sashal@kernel.org, Stephen Hemminger, Haiyang Zhang,
	KY Srinivasan, Dexuan Cui
  Cc: Wei Hu

Without deferred IO support, hyperv_fb driver informs the host to refresh
the entire guest frame buffer at fixed rate, e.g. at 20Hz, no matter there
is screen update or not. This patch supports deferred IO for screens in
graphics mode and also enables the frame buffer on-demand refresh. The
highest refresh rate is still set at 20Hz.

Currently Hyper-V only takes a physical address from guest as the starting
address of frame buffer. This implies the guest must allocate contiguous
physical memory for frame buffer. In addition, Hyper-V Gen 2 VMs only
accept address from MMIO region as frame buffer address. Due to these
limitations on Hyper-V host, we keep a shadow copy of frame buffer
in the guest. This means one more copy of the dirty rectangle inside
guest when doing the on-demand refresh. This can be optimized in the
future with help from host. For now the host performance gain from deferred
IO outweighs the shadow copy impact in the guest.

Signed-off-by: Wei Hu <weh@microsoft.com>
---
    v2: Incorporated review comments from Michael Kelley
    - Increased dirty rectangle by one row in deferred IO case when sending
    to Hyper-V.
    - Corrected the dirty rectangle size in the text mode.
    - Added more comments.
    - Other minor code cleanups.

    v3: Incorporated more review comments
    - Removed a few unnecessary variable tests

 drivers/video/fbdev/Kconfig     |   1 +
 drivers/video/fbdev/hyperv_fb.c | 216 +++++++++++++++++++++++++++++---
 2 files changed, 197 insertions(+), 20 deletions(-)

diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 1b2f5f31fb6f..e781f89a1824 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -2241,6 +2241,7 @@ config FB_HYPERV
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
+	select FB_DEFERRED_IO
 	help
 	  This framebuffer driver supports Microsoft Hyper-V Synthetic Video.
 
diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c
index 1464c6f14687..63bfd35e392c 100644
--- a/drivers/video/fbdev/hyperv_fb.c
+++ b/drivers/video/fbdev/hyperv_fb.c
@@ -237,6 +237,7 @@ struct synthvid_msg {
 #define RING_BUFSIZE (256 * 1024)
 #define VSP_TIMEOUT (10 * HZ)
 #define HVFB_UPDATE_DELAY (HZ / 20)
+#define HVFB_ONDEMAND_THROTTLE (HZ / 20)
 
 struct hvfb_par {
 	struct fb_info *info;
@@ -256,6 +257,17 @@ struct hvfb_par {
 	bool synchronous_fb;
 
 	struct notifier_block hvfb_panic_nb;
+
+	/* Memory for deferred IO and frame buffer itself */
+	unsigned char *dio_vp;
+	unsigned char *mmio_vp;
+	unsigned long mmio_pp;
+	spinlock_t docopy_lock; /* Lock to protect memory copy */
+
+	/* Dirty rectangle, protected by delayed_refresh_lock */
+	int x1, y1, x2, y2;
+	bool delayed_refresh;
+	spinlock_t delayed_refresh_lock;
 };
 
 static uint screen_width = HVFB_WIDTH;
@@ -264,6 +276,7 @@ static uint screen_width_max = HVFB_WIDTH;
 static uint screen_height_max = HVFB_HEIGHT;
 static uint screen_depth;
 static uint screen_fb_size;
+static uint dio_fb_size; /* FB size for deferred IO */
 
 /* Send message to Hyper-V host */
 static inline int synthvid_send(struct hv_device *hdev,
@@ -350,28 +363,92 @@ static int synthvid_send_ptr(struct hv_device *hdev)
 }
 
 /* Send updated screen area (dirty rectangle) location to host */
-static int synthvid_update(struct fb_info *info)
+static int
+synthvid_update(struct fb_info *info, int x1, int y1, int x2, int y2)
 {
 	struct hv_device *hdev = device_to_hv_device(info->device);
 	struct synthvid_msg msg;
 
 	memset(&msg, 0, sizeof(struct synthvid_msg));
+	if (x2 == INT_MAX)
+		x2 = info->var.xres;
+	if (y2 == INT_MAX)
+		y2 = info->var.yres;
 
 	msg.vid_hdr.type = SYNTHVID_DIRT;
 	msg.vid_hdr.size = sizeof(struct synthvid_msg_hdr) +
 		sizeof(struct synthvid_dirt);
 	msg.dirt.video_output = 0;
 	msg.dirt.dirt_count = 1;
-	msg.dirt.rect[0].x1 = 0;
-	msg.dirt.rect[0].y1 = 0;
-	msg.dirt.rect[0].x2 = info->var.xres;
-	msg.dirt.rect[0].y2 = info->var.yres;
+	msg.dirt.rect[0].x1 = (x1 > x2) ? 0 : x1;
+	msg.dirt.rect[0].y1 = (y1 > y2) ? 0 : y1;
+	msg.dirt.rect[0].x2 =
+		(x2 < x1 || x2 > info->var.xres) ? info->var.xres : x2;
+	msg.dirt.rect[0].y2 =
+		(y2 < y1 || y2 > info->var.yres) ? info->var.yres : y2;
 
 	synthvid_send(hdev, &msg);
 
 	return 0;
 }
 
+static void hvfb_docopy(struct hvfb_par *par,
+			unsigned long offset,
+			unsigned long size)
+{
+	if (!par || !par->mmio_vp || !par->dio_vp || !par->fb_ready ||
+	    size == 0 || offset >= dio_fb_size)
+		return;
+
+	if (offset + size > dio_fb_size)
+		size = dio_fb_size - offset;
+
+	memcpy(par->mmio_vp + offset, par->dio_vp + offset, size);
+}
+
+/* Deferred IO callback */
+static void synthvid_deferred_io(struct fb_info *p,
+				 struct list_head *pagelist)
+{
+	struct hvfb_par *par = p->par;
+	struct page *page;
+	unsigned long start, end;
+	int y1, y2, miny, maxy;
+	unsigned long flags;
+
+	miny = INT_MAX;
+	maxy = 0;
+
+	/*
+	 * Merge dirty pages. It is possible that last page cross
+	 * over the end of frame buffer row yres. This is taken care of
+	 * in synthvid_update function by clamping the y2
+	 * value to yres.
+	 */
+	list_for_each_entry(page, pagelist, lru) {
+		start = page->index << PAGE_SHIFT;
+		end = start + PAGE_SIZE - 1;
+		y1 = start / p->fix.line_length;
+		y2 = end / p->fix.line_length;
+		miny = min_t(int, miny, y1);
+		maxy = max_t(int, maxy, y2);
+
+		/* Copy from dio space to mmio address */
+		if (par->fb_ready) {
+			spin_lock_irqsave(&par->docopy_lock, flags);
+			hvfb_docopy(par, start, PAGE_SIZE);
+			spin_unlock_irqrestore(&par->docopy_lock, flags);
+		}
+	}
+
+	if (par->fb_ready)
+		synthvid_update(p, 0, miny, p->var.xres, maxy + 1);
+}
+
+static struct fb_deferred_io synthvid_defio = {
+	.delay		= HZ / 20,
+	.deferred_io	= synthvid_deferred_io,
+};
 
 /*
  * Actions on received messages from host:
@@ -618,7 +695,7 @@ static int synthvid_send_config(struct hv_device *hdev)
 	msg->vid_hdr.type = SYNTHVID_VRAM_LOCATION;
 	msg->vid_hdr.size = sizeof(struct synthvid_msg_hdr) +
 		sizeof(struct synthvid_vram_location);
-	msg->vram.user_ctx = msg->vram.vram_gpa = info->fix.smem_start;
+	msg->vram.user_ctx = msg->vram.vram_gpa = par->mmio_pp;
 	msg->vram.is_vram_gpa_specified = 1;
 	synthvid_send(hdev, msg);
 
@@ -628,7 +705,7 @@ static int synthvid_send_config(struct hv_device *hdev)
 		ret = -ETIMEDOUT;
 		goto out;
 	}
-	if (msg->vram_ack.user_ctx != info->fix.smem_start) {
+	if (msg->vram_ack.user_ctx != par->mmio_pp) {
 		pr_err("Unable to set VRAM location\n");
 		ret = -ENODEV;
 		goto out;
@@ -645,19 +722,79 @@ static int synthvid_send_config(struct hv_device *hdev)
 
 /*
  * Delayed work callback:
- * It is called at HVFB_UPDATE_DELAY or longer time interval to process
- * screen updates. It is re-scheduled if further update is necessary.
+ * It is scheduled to call whenever update request is received and it has
+ * not been called in last HVFB_ONDEMAND_THROTTLE time interval.
  */
 static void hvfb_update_work(struct work_struct *w)
 {
 	struct hvfb_par *par = container_of(w, struct hvfb_par, dwork.work);
 	struct fb_info *info = par->info;
+	unsigned long flags;
+	int x1, x2, y1, y2;
+	int j;
+
+	spin_lock_irqsave(&par->delayed_refresh_lock, flags);
+	/* Reset the request flag */
+	par->delayed_refresh = false;
+
+	/* Store the dirty rectangle to local variables */
+	x1 = par->x1;
+	x2 = par->x2;
+	y1 = par->y1;
+	y2 = par->y2;
+
+	/* Clear dirty rectangle */
+	par->x1 = par->y1 = INT_MAX;
+	par->x2 = par->y2 = 0;
+
+	spin_unlock_irqrestore(&par->delayed_refresh_lock, flags);
 
+	if (x1 > info->var.xres || x2 > info->var.xres ||
+	    y1 > info->var.yres || y2 > info->var.yres || x2 <= x1)
+		return;
+
+	/* Copy the dirty rectangle to frame buffer memory */
+	spin_lock_irqsave(&par->docopy_lock, flags);
+	for (j = y1; j < y2; j++) {
+		hvfb_docopy(par,
+			    j * info->fix.line_length +
+			    (x1 * screen_depth / 8),
+			    (x2 - x1) * screen_depth / 8);
+	}
+	spin_unlock_irqrestore(&par->docopy_lock, flags);
+
+	/* Refresh */
 	if (par->fb_ready)
-		synthvid_update(info);
+		synthvid_update(info, x1, y1, x2, y2);
+}
 
-	if (par->update)
-		schedule_delayed_work(&par->dwork, HVFB_UPDATE_DELAY);
+/*
+ * Control the on-demand refresh frequency. It schedules a delayed
+ * screen update if it has not yet.
+ */
+static void hvfb_ondemand_refresh_throttle(struct hvfb_par *par,
+					   int x1, int y1, int w, int h)
+{
+	unsigned long flags;
+	int x2 = x1 + w;
+	int y2 = y1 + h;
+
+	spin_lock_irqsave(&par->delayed_refresh_lock, flags);
+
+	/* Merge dirty rectangle */
+	par->x1 = min_t(int, par->x1, x1);
+	par->y1 = min_t(int, par->y1, y1);
+	par->x2 = max_t(int, par->x2, x2);
+	par->y2 = max_t(int, par->y2, y2);
+
+	/* Schedule a delayed screen update if not yet */
+	if (par->delayed_refresh == false) {
+		schedule_delayed_work(&par->dwork,
+				      HVFB_ONDEMAND_THROTTLE);
+		par->delayed_refresh = true;
+	}
+
+	spin_unlock_irqrestore(&par->delayed_refresh_lock, flags);
 }
 
 static int hvfb_on_panic(struct notifier_block *nb,
@@ -669,7 +806,8 @@ static int hvfb_on_panic(struct notifier_block *nb,
 	par = container_of(nb, struct hvfb_par, hvfb_panic_nb);
 	par->synchronous_fb = true;
 	info = par->info;
-	synthvid_update(info);
+	hvfb_docopy(par, 0, dio_fb_size);
+	synthvid_update(info, 0, 0, INT_MAX, INT_MAX);
 
 	return NOTIFY_DONE;
 }
@@ -730,7 +868,10 @@ static void hvfb_cfb_fillrect(struct fb_info *p,
 
 	cfb_fillrect(p, rect);
 	if (par->synchronous_fb)
-		synthvid_update(p);
+		synthvid_update(p, 0, 0, INT_MAX, INT_MAX);
+	else
+		hvfb_ondemand_refresh_throttle(par, rect->dx, rect->dy,
+					       rect->width, rect->height);
 }
 
 static void hvfb_cfb_copyarea(struct fb_info *p,
@@ -740,7 +881,10 @@ static void hvfb_cfb_copyarea(struct fb_info *p,
 
 	cfb_copyarea(p, area);
 	if (par->synchronous_fb)
-		synthvid_update(p);
+		synthvid_update(p, 0, 0, INT_MAX, INT_MAX);
+	else
+		hvfb_ondemand_refresh_throttle(par, area->dx, area->dy,
+					       area->width, area->height);
 }
 
 static void hvfb_cfb_imageblit(struct fb_info *p,
@@ -750,7 +894,10 @@ static void hvfb_cfb_imageblit(struct fb_info *p,
 
 	cfb_imageblit(p, image);
 	if (par->synchronous_fb)
-		synthvid_update(p);
+		synthvid_update(p, 0, 0, INT_MAX, INT_MAX);
+	else
+		hvfb_ondemand_refresh_throttle(par, image->dx, image->dy,
+					       image->width, image->height);
 }
 
 static struct fb_ops hvfb_ops = {
@@ -809,6 +956,9 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info)
 	resource_size_t pot_start, pot_end;
 	int ret;
 
+	dio_fb_size =
+		screen_width * screen_height * screen_depth / 8;
+
 	if (gen2vm) {
 		pot_start = 0;
 		pot_end = -1;
@@ -843,9 +993,14 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info)
 	if (!fb_virt)
 		goto err2;
 
+	/* Allocate memory for deferred IO */
+	par->dio_vp = vzalloc(round_up(dio_fb_size, PAGE_SIZE));
+	if (par->dio_vp == NULL)
+		goto err3;
+
 	info->apertures = alloc_apertures(1);
 	if (!info->apertures)
-		goto err3;
+		goto err4;
 
 	if (gen2vm) {
 		info->apertures->ranges[0].base = screen_info.lfb_base;
@@ -857,16 +1012,23 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info)
 		info->apertures->ranges[0].size = pci_resource_len(pdev, 0);
 	}
 
+	/* Physical address of FB device */
+	par->mmio_pp = par->mem->start;
+	/* Virtual address of FB device */
+	par->mmio_vp = (unsigned char *) fb_virt;
+
 	info->fix.smem_start = par->mem->start;
-	info->fix.smem_len = screen_fb_size;
-	info->screen_base = fb_virt;
-	info->screen_size = screen_fb_size;
+	info->fix.smem_len = dio_fb_size;
+	info->screen_base = par->dio_vp;
+	info->screen_size = dio_fb_size;
 
 	if (!gen2vm)
 		pci_dev_put(pdev);
 
 	return 0;
 
+err4:
+	vfree(par->dio_vp);
 err3:
 	iounmap(fb_virt);
 err2:
@@ -884,6 +1046,7 @@ static void hvfb_putmem(struct fb_info *info)
 {
 	struct hvfb_par *par = info->par;
 
+	vfree(par->dio_vp);
 	iounmap(info->screen_base);
 	vmbus_free_mmio(par->mem->start, screen_fb_size);
 	par->mem = NULL;
@@ -909,6 +1072,12 @@ static int hvfb_probe(struct hv_device *hdev,
 	init_completion(&par->wait);
 	INIT_DELAYED_WORK(&par->dwork, hvfb_update_work);
 
+	par->delayed_refresh = false;
+	spin_lock_init(&par->delayed_refresh_lock);
+	spin_lock_init(&par->docopy_lock);
+	par->x1 = par->y1 = INT_MAX;
+	par->x2 = par->y2 = 0;
+
 	/* Connect to VSP */
 	hv_set_drvdata(hdev, info);
 	ret = synthvid_connect_vsp(hdev);
@@ -960,6 +1129,10 @@ static int hvfb_probe(struct hv_device *hdev,
 	info->fbops = &hvfb_ops;
 	info->pseudo_palette = par->pseudo_palette;
 
+	/* Initialize deferred IO */
+	info->fbdefio = &synthvid_defio;
+	fb_deferred_io_init(info);
+
 	/* Send config to host */
 	ret = synthvid_send_config(hdev);
 	if (ret)
@@ -981,6 +1154,7 @@ static int hvfb_probe(struct hv_device *hdev,
 	return 0;
 
 error:
+	fb_deferred_io_cleanup(info);
 	hvfb_putmem(info);
 error2:
 	vmbus_close(hdev->channel);
@@ -1003,6 +1177,8 @@ static int hvfb_remove(struct hv_device *hdev)
 	par->update = false;
 	par->fb_ready = false;
 
+	fb_deferred_io_cleanup(info);
+
 	unregister_framebuffer(info);
 	cancel_delayed_work_sync(&par->dwork);
 
-- 
2.20.1


^ permalink raw reply related

* RE: [PATCH v4 12/12] Drivers: hv: vmbus: Resume after fixing up old primary channels
From: Michael Kelley @ 2019-09-04 16:45 UTC (permalink / raw)
  To: Dexuan Cui, linux-hyperv@vger.kernel.org,
	gregkh@linuxfoundation.org, Stephen Hemminger, Sasha Levin,
	sashal@kernel.org, Haiyang Zhang, KY Srinivasan,
	tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org
In-Reply-To: <1567470139-119355-13-git-send-email-decui@microsoft.com>

From: Dexuan Cui <decui@microsoft.com> Sent: Monday, September 2, 2019 5:23 PM
> 
> When the host re-offers the primary channels upon resume, the host only
> guarantees the Instance GUID  doesn't change, so vmbus_bus_suspend()
> should invalidate channel->offermsg.child_relid and figure out the
> number of primary channels that need to be fixed up upon resume.
> 
> Upon resume, vmbus_onoffer() finds the old channel structs, and maps
> the new offers to the old channels, and fixes up the old structs,
> and finally the resume callbacks of the VSC drivers will re-open
> the channels.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
>  drivers/hv/channel_mgmt.c | 85 ++++++++++++++++++++++++++++++++++++-----------
>  drivers/hv/connection.c   |  2 ++
>  drivers/hv/hyperv_vmbus.h | 14 ++++++++
>  drivers/hv/vmbus_drv.c    | 17 ++++++++++
>  include/linux/hyperv.h    |  3 ++
>  5 files changed, 101 insertions(+), 20 deletions(-)
> 

Reviewed-by: Michael Kelley <mikelley@microsoft.com>

^ permalink raw reply

* RE: [PATCH v4 11/12] Drivers: hv: vmbus: Suspend after cleaning up hv_sock and sub channels
From: Michael Kelley @ 2019-09-04 16:44 UTC (permalink / raw)
  To: Dexuan Cui, linux-hyperv@vger.kernel.org,
	gregkh@linuxfoundation.org, Stephen Hemminger, Sasha Levin,
	sashal@kernel.org, Haiyang Zhang, KY Srinivasan,
	tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org
In-Reply-To: <1567470139-119355-12-git-send-email-decui@microsoft.com>

From: Dexuan Cui <decui@microsoft.com> Sent: Monday, September 2, 2019 5:23 PM
> 
> Before suspend, Linux must make sure all the hv_sock channels have been
> properly cleaned up, because a hv_sock connection can not persist across
> hibernation, and the user-space app must be properly notified of the
> state change of the connection.
> 
> Before suspend, Linux also must make sure all the sub-channels have been
> destroyed, i.e. the related channel structs of the sub-channels must be
> properly removed, otherwise they would cause a conflict when the
> sub-channels are recreated upon resume.
> 
> Add a counter to track such channels, and vmbus_bus_suspend() should wait
> for the counter to drop to zero.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
>  drivers/hv/channel_mgmt.c | 26 ++++++++++++++++++++++++++
>  drivers/hv/connection.c   |  3 +++
>  drivers/hv/hyperv_vmbus.h | 12 ++++++++++++
>  drivers/hv/vmbus_drv.c    | 44 +++++++++++++++++++++++++++++++++++++++++++-
>  4 files changed, 84 insertions(+), 1 deletion(-)
> 

Reviewed-by: Michael Kelley <mikelley@microsoft.com>

^ permalink raw reply

* RE: [PATCH v4 08/12] Drivers: hv: vmbus: Ignore the offers when resuming from hibernation
From: Michael Kelley @ 2019-09-04 16:43 UTC (permalink / raw)
  To: Dexuan Cui, linux-hyperv@vger.kernel.org,
	gregkh@linuxfoundation.org, Stephen Hemminger, Sasha Levin,
	sashal@kernel.org, Haiyang Zhang, KY Srinivasan,
	tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org
In-Reply-To: <1567470139-119355-9-git-send-email-decui@microsoft.com>

From: Dexuan Cui <decui@microsoft.com> Sent: Monday, September 2, 2019 5:23 PM
> 
> When the VM resumes, the host re-sends the offers. We should not add the
> offers to the global vmbus_connection.chn_list again.
> 
> This patch assumes the RELIDs of the channels don't change across
> hibernation. Actually this is not always true, especially in the case of
> NIC SR-IOV the VF vmbus device's RELID sometimes can change. A later patch
> will address this issue by mapping the new offers to the old channels and
> fixing up the old channels, if necessary.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
>  drivers/hv/channel_mgmt.c | 58
> ++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 57 insertions(+), 1 deletion(-)
> 

Reviewed-by: Michael Kelley <mikelley@microsoft.com>

^ permalink raw reply

* RE: [PATCH v4 02/12] x86/hyper-v: Implement hv_is_hibernation_supported()
From: Michael Kelley @ 2019-09-04 16:43 UTC (permalink / raw)
  To: Dexuan Cui, linux-hyperv@vger.kernel.org,
	gregkh@linuxfoundation.org, Stephen Hemminger, Sasha Levin,
	sashal@kernel.org, Haiyang Zhang, KY Srinivasan,
	tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org
In-Reply-To: <1567470139-119355-3-git-send-email-decui@microsoft.com>

From: Dexuan Cui <decui@microsoft.com> Sent: Monday, September 2, 2019 5:23 PM
> 
> The API will be used by the hv_balloon and hv_vmbus drivers.
> 
> Balloon up/down and hot-add of memory must not be active if the user
> wants the Linux VM to support hibernation, because they are incompatible
> with hibernation according to Hyper-V team, e.g. upon suspend the
> balloon VSP doesn't save any info about the ballooned-out pages (if any);
> so, after Linux resumes, Linux balloon VSC expects that the VSP will
> return the pages if Linux is under memory pressure, but the VSP will
> never do that, since the VSP thinks it never stole the pages from the VM.
> 
> So, if the user wants Linux VM to support hibernation, Linux must forbid
> balloon up/down and hot-add, and the only functionality of the balloon VSC
> driver is reporting the VM's memory pressure to the host.
> 
> Ideally, when Linux detects that the user wants it to support hibernation,
> the balloon VSC should tell the VSP that it does not support ballooning
> and hot-add. However, the current version of the VSP requires the VSC
> should support these capabilities, otherwise the capability negotiation
> fails and the VSC can not load at all, so with the later changes to the
> VSC driver, Linux VM still reports to the VSP that the VSC supports these
> capabilities, but the VSC ignores the VSP's requests of balloon up/down
> and hot add, and reports an error to the VSP, when applicable. BTW, in
> the future the balloon VSP driver will allow the VSC to not support the
> capabilities of balloon up/down and hot add.
> 
> The ACPI S4 state is not a must for hibernation to work, because Linux is
> able to hibernate as long as the system can shut down. However in practice
> we decide to artificially use the presence of the virtual ACPI S4 state as
> an indicator of the user's intent of using hibernation, because Linux VM
> must find a way to know if the user wants to use the hibernation feature
> or not.
> 
> By default, Hyper-V does not enable the virtual ACPI S4 state; on recent
> Hyper-V hosts (e.g. RS5, 19H1), the administrator is able to enable the
> state for a VM by WMI commands.
> 
> Once all the vmbus and VSC patches for the hibernation feature are
> accepted, an extra patch will be submitted to forbid hibernation if the
> virtual ACPI S4 state is absent, i.e. hv_is_hibernation_supported() is
> false.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
>  arch/x86/hyperv/hv_init.c      | 7 +++++++
>  include/asm-generic/mshyperv.h | 2 ++
>  2 files changed, 9 insertions(+)

Reviewed-by: Michael Kelley <mikelley@microsoft.com>

^ permalink raw reply

* Re: [PATCH v3] PCI: hv: Make functions static
From: Krzysztof Wilczynski @ 2019-09-04 14:36 UTC (permalink / raw)
  To: Lorenzo Pieralisi
  Cc: Haiyang Zhang, Bjorn Helgaas, KY Srinivasan, Stephen Hemminger,
	Sasha Levin, linux-pci@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-hyperv@vger.kernel.org
In-Reply-To: <20190904142737.GA28184@e121166-lin.cambridge.arm.com>

Hello Lorenzo,

[...]
> This patch should go via the net tree - the code it is fixing
> is queued there, I will drop this patch from the PCI review
> queue.
[...]

Thank you!  Appreciated.

Krzysztof

^ permalink raw reply

* Re: [PATCH v3] PCI: hv: Make functions static
From: Lorenzo Pieralisi @ 2019-09-04 14:27 UTC (permalink / raw)
  To: Haiyang Zhang
  Cc: Krzysztof Wilczynski, Bjorn Helgaas, KY Srinivasan,
	Stephen Hemminger, Sasha Levin, linux-pci@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-hyperv@vger.kernel.org
In-Reply-To: <DM6PR21MB13372349374A473FF98AD7BCCAA20@DM6PR21MB1337.namprd21.prod.outlook.com>

On Thu, Aug 29, 2019 at 03:50:47PM +0000, Haiyang Zhang wrote:
> 
> 
> > -----Original Message-----
> > From: Krzysztof Wilczynski <kswilczynski@gmail.com> On Behalf Of Krzysztof
> > Wilczynski
> > Sent: Thursday, August 29, 2019 2:17 AM
> > To: Bjorn Helgaas <helgaas@kernel.org>
> > Cc: KY Srinivasan <kys@microsoft.com>; Haiyang Zhang
> > <haiyangz@microsoft.com>; Stephen Hemminger
> > <sthemmin@microsoft.com>; Sasha Levin <sashal@kernel.org>; Lorenzo
> > Pieralisi <lorenzo.pieralisi@arm.com>; linux-pci@vger.kernel.org; linux-
> > kernel@vger.kernel.org; linux-hyperv@vger.kernel.org
> > Subject: [PATCH v3] PCI: hv: Make functions static
> > 
> > Functions hv_read_config_block(), hv_write_config_block() and
> > hv_register_block_invalidate() are not used anywhere else and are local to
> > drivers/pci/controller/pci-hyperv.c,
> > and do not need to be in global scope, so make these static.
> > 
> > Resolve following compiler warning that can be seen when building with
> > warnings enabled (W=1):
> > 
> > drivers/pci/controller/pci-hyperv.c:933:5: warning:
> >  no previous prototype for ‘hv_read_config_block’
> >   [-Wmissing-prototypes]
> > 
> > drivers/pci/controller/pci-hyperv.c:1013:5: warning:
> >  no previous prototype for ‘hv_write_config_block’
> >   [-Wmissing-prototypes]
> > 
> > drivers/pci/controller/pci-hyperv.c:1082:5: warning:
> >  no previous prototype for ‘hv_register_block_invalidate’
> >   [-Wmissing-prototypes]
> > 
> > Signed-off-by: Krzysztof Wilczynski <kw@linux.com>
> 
> Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>

This patch should go via the net tree - the code it is fixing
is queued there, I will drop this patch from the PCI review
queue.

If it helps:

Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>

^ permalink raw reply

* RE: [PATCH] HID: hyperv: Use in-place iterator API in the channel callback
From: Jiri Kosina @ 2019-09-04 14:23 UTC (permalink / raw)
  To: Dexuan Cui
  Cc: benjamin.tissoires@redhat.com, linux-input@vger.kernel.org,
	linux-hyperv@vger.kernel.org, Stephen Hemminger, Sasha Levin,
	sashal@kernel.org, Haiyang Zhang, KY Srinivasan, Michael Kelley,
	gregkh@linuxfoundation.org, linux-kernel@vger.kernel.org
In-Reply-To: <KU1P153MB016679060F4360071B751AF0BFB90@KU1P153MB0166.APCP153.PROD.OUTLOOK.COM>

On Tue, 3 Sep 2019, Dexuan Cui wrote:

> > Hi Jiri, Benjamin, can this patch go through Sasha's hyperv tree:
> > https://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git
> > 
> > This is a purely Hyper-V specific change.
> 
> Hi Jiri, Benjamin,
> Are you OK if this patch for the Hyper-V HID driver goes through the Hyper-V
> tree maintained by Sasha Levin? It's a purely Hyper-V change, and I have
> been using the patch for several months and there is no regression.

No problem with that. Feel free to add

	Acked-by: Jiri Kosina <jkosina@suse.cz>

in that case.

Thanks,

-- 
Jiri Kosina
SUSE Labs


^ permalink raw reply

* [PATCH AUTOSEL 4.19 006/167] Drivers: hv: kvp: Fix two "this statement may fall through" warnings
From: Sasha Levin @ 2019-09-03 16:22 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Dexuan Cui, K . Y . Srinivasan, Haiyang Zhang, Stephen Hemminger,
	Stable, Greg Kroah-Hartman, Sasha Levin, linux-hyperv
In-Reply-To: <20190903162519.7136-1-sashal@kernel.org>

From: Dexuan Cui <decui@microsoft.com>

[ Upstream commit fc62c3b1977d62e6374fd6e28d371bb42dfa5c9d ]

We don't need to call process_ib_ipinfo() if message->kvp_hdr.operation is
KVP_OP_GET_IP_INFO in kvp_send_key(), because here we just need to pass on
the op code from the host to the userspace; when the userspace returns
the info requested by the host, we pass the info on to the host in
kvp_respond_to_host() -> process_ob_ipinfo(). BTW, the current buggy code
actually doesn't cause any harm, because only message->kvp_hdr.operation
is used by the userspace, in the case of KVP_OP_GET_IP_INFO.

The patch also adds a missing "break;" in kvp_send_key(). BTW, the current
buggy code actually doesn't cause any harm, because in the case of
KVP_OP_SET, the unexpected fall-through corrupts
message->body.kvp_set.data.key_size, but that is not really used: see
the definition of struct hv_kvp_exchg_msg_value.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/hv/hv_kvp.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index 5eed1e7da15c4..57715a0c81202 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -353,7 +353,6 @@ static void process_ib_ipinfo(void *in_msg, void *out_msg, int op)
 
 		out->body.kvp_ip_val.dhcp_enabled = in->kvp_ip_val.dhcp_enabled;
 
-	default:
 		utf16s_to_utf8s((wchar_t *)in->kvp_ip_val.adapter_id,
 				MAX_ADAPTER_ID_SIZE,
 				UTF16_LITTLE_ENDIAN,
@@ -406,7 +405,7 @@ kvp_send_key(struct work_struct *dummy)
 		process_ib_ipinfo(in_msg, message, KVP_OP_SET_IP_INFO);
 		break;
 	case KVP_OP_GET_IP_INFO:
-		process_ib_ipinfo(in_msg, message, KVP_OP_GET_IP_INFO);
+		/* We only need to pass on message->kvp_hdr.operation.  */
 		break;
 	case KVP_OP_SET:
 		switch (in_msg->body.kvp_set.data.value_type) {
@@ -446,6 +445,9 @@ kvp_send_key(struct work_struct *dummy)
 			break;
 
 		}
+
+		break;
+
 	case KVP_OP_GET:
 		message->body.kvp_set.data.key_size =
 			utf16s_to_utf8s(
-- 
2.20.1


^ permalink raw reply related

* [PATCH AUTOSEL 4.19 020/167] Drivers: hv: kvp: Fix the indentation of some "break" statements
From: Sasha Levin @ 2019-09-03 16:22 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Dexuan Cui, K . Y . Srinivasan, Haiyang Zhang, Stephen Hemminger,
	Greg Kroah-Hartman, Sasha Levin, linux-hyperv
In-Reply-To: <20190903162519.7136-1-sashal@kernel.org>

From: Dexuan Cui <decui@microsoft.com>

[ Upstream commit d544c22d6951be3386ac59bb9a99c9bc566b3f09 ]

No functional change.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/hv/hv_kvp.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index 57715a0c81202..a7513a8a8e372 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -420,7 +420,7 @@ kvp_send_key(struct work_struct *dummy)
 				UTF16_LITTLE_ENDIAN,
 				message->body.kvp_set.data.value,
 				HV_KVP_EXCHANGE_MAX_VALUE_SIZE - 1) + 1;
-				break;
+			break;
 
 		case REG_U32:
 			/*
@@ -456,7 +456,7 @@ kvp_send_key(struct work_struct *dummy)
 			UTF16_LITTLE_ENDIAN,
 			message->body.kvp_set.data.key,
 			HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1) + 1;
-			break;
+		break;
 
 	case KVP_OP_DELETE:
 		message->body.kvp_delete.key_size =
@@ -466,12 +466,12 @@ kvp_send_key(struct work_struct *dummy)
 			UTF16_LITTLE_ENDIAN,
 			message->body.kvp_delete.key,
 			HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1) + 1;
-			break;
+		break;
 
 	case KVP_OP_ENUMERATE:
 		message->body.kvp_enum_data.index =
 			in_msg->body.kvp_enum_data.index;
-			break;
+		break;
 	}
 
 	kvp_transaction.state = HVUTIL_USERSPACE_REQ;
-- 
2.20.1


^ permalink raw reply related

* [PATCH AUTOSEL 4.19 021/167] Drivers: hv: kvp: Fix the recent regression caused by incorrect clean-up
From: Sasha Levin @ 2019-09-03 16:22 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Dexuan Cui, K . Y . Srinivasan, Stephen Hemminger, Haiyang Zhang,
	Stable, Greg Kroah-Hartman, Sasha Levin, linux-hyperv
In-Reply-To: <20190903162519.7136-1-sashal@kernel.org>

From: Dexuan Cui <decui@microsoft.com>

[ Upstream commit e670de54c813b5bc3672dd1c67871dc60e9206f4 ]

In kvp_send_key(), we do need call process_ib_ipinfo() if
message->kvp_hdr.operation is KVP_OP_GET_IP_INFO, because it turns out
the userland hv_kvp_daemon needs the info of operation, adapter_id and
addr_family. With the incorrect fc62c3b1977d, the host can't get the
VM's IP via KVP.

And, fc62c3b1977d added a "break;", but actually forgot to initialize
the key_size/value in the case of KVP_OP_SET, so the default key_size of
0 is passed to the kvp daemon, and the pool files
/var/lib/hyperv/.kvp_pool_* can't be updated.

This patch effectively rolls back the previous fc62c3b1977d, and
correctly fixes the "this statement may fall through" warnings.

This patch is tested on WS 2012 R2 and 2016.

Fixes: fc62c3b1977d ("Drivers: hv: kvp: Fix two "this statement may fall through" warnings")
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/hv/hv_kvp.c | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index a7513a8a8e372..d6106e1a0d4af 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -353,6 +353,9 @@ static void process_ib_ipinfo(void *in_msg, void *out_msg, int op)
 
 		out->body.kvp_ip_val.dhcp_enabled = in->kvp_ip_val.dhcp_enabled;
 
+		/* fallthrough */
+
+	case KVP_OP_GET_IP_INFO:
 		utf16s_to_utf8s((wchar_t *)in->kvp_ip_val.adapter_id,
 				MAX_ADAPTER_ID_SIZE,
 				UTF16_LITTLE_ENDIAN,
@@ -405,7 +408,11 @@ kvp_send_key(struct work_struct *dummy)
 		process_ib_ipinfo(in_msg, message, KVP_OP_SET_IP_INFO);
 		break;
 	case KVP_OP_GET_IP_INFO:
-		/* We only need to pass on message->kvp_hdr.operation.  */
+		/*
+		 * We only need to pass on the info of operation, adapter_id
+		 * and addr_family to the userland kvp daemon.
+		 */
+		process_ib_ipinfo(in_msg, message, KVP_OP_GET_IP_INFO);
 		break;
 	case KVP_OP_SET:
 		switch (in_msg->body.kvp_set.data.value_type) {
@@ -446,9 +453,9 @@ kvp_send_key(struct work_struct *dummy)
 
 		}
 
-		break;
-
-	case KVP_OP_GET:
+		/*
+		 * The key is always a string - utf16 encoding.
+		 */
 		message->body.kvp_set.data.key_size =
 			utf16s_to_utf8s(
 			(wchar_t *)in_msg->body.kvp_set.data.key,
@@ -456,6 +463,17 @@ kvp_send_key(struct work_struct *dummy)
 			UTF16_LITTLE_ENDIAN,
 			message->body.kvp_set.data.key,
 			HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1) + 1;
+
+		break;
+
+	case KVP_OP_GET:
+		message->body.kvp_get.data.key_size =
+			utf16s_to_utf8s(
+			(wchar_t *)in_msg->body.kvp_get.data.key,
+			in_msg->body.kvp_get.data.key_size,
+			UTF16_LITTLE_ENDIAN,
+			message->body.kvp_get.data.key,
+			HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1) + 1;
 		break;
 
 	case KVP_OP_DELETE:
-- 
2.20.1


^ permalink raw reply related

* Re: [PATCH] irqdomain: Add the missing assignment of domain->fwnode for named fwnode
From: Marc Zyngier @ 2019-09-03  8:22 UTC (permalink / raw)
  To: Dexuan Cui, Thomas Gleixner
  Cc: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, Sasha Levin,
	Lorenzo Pieralisi, Bjorn Helgaas, linux-kernel@vger.kernel.org,
	linux-hyperv@vger.kernel.org, Michael Kelley,
	Lili Deng (Wicresoft North America Ltd)
In-Reply-To: <PU1P153MB01694D9AF625AC335C600C5FBFBE0@PU1P153MB0169.APCP153.PROD.OUTLOOK.COM>

Hi Dexuan,

On 03/09/2019 00:14, Dexuan Cui wrote:
> 
> Recently device pass-through stops working for Linux VM running on Hyper-V.
> 
> git-bisect shows the regression is caused by the recent commit
> 467a3bb97432 ("PCI: hv: Allocate a named fwnode ..."), but the root cause
> is that the commit d59f6617eef0 forgets to set the domain->fwnode for
> IRQCHIP_FWNODE_NAMED*, and as a result:
> 
> 1. The domain->fwnode remains to be NULL.
> 
> 2. irq_find_matching_fwspec() returns NULL since "h->fwnode == fwnode" is
> false, and pci_set_bus_msi_domain() sets the Hyper-V PCI root bus's
> msi_domain to NULL.
> 
> 3. When the device is added onto the root bus, the device's dev->msi_domain
> is set to NULL in pci_set_msi_domain().
> 
> 4. When a device driver tries to enable MSI-X, pci_msi_setup_msi_irqs()
> calls arch_setup_msi_irqs(), which uses the native MSI chip (i.e.
> arch/x86/kernel/apic/msi.c: pci_msi_controller) to set up the irqs, but
> actually pci_msi_setup_msi_irqs() is supposed to call
> msi_domain_alloc_irqs() with the hbus->irq_domain, which is created in
> hv_pcie_init_irq_domain() and is associated with the Hyper-V chip
> hv_msi_irq_chip. Consequently, the irq line is not properly set up, and
> the device driver can not receive any interrupt.
> 
> Fixes: d59f6617eef0 ("genirq: Allow fwnode to carry name information only")
> Fixes: 467a3bb97432 ("PCI: hv: Allocate a named fwnode instead of an address-based one")
> Reported-by: Lili Deng <v-lide@microsoft.com>
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
> 
> Note: the commit 467a3bb97432 ("PCI: hv: Allocate a named fwnode ...") has not
> gone in Linus's tree yet (the commit is in linux-next for a while), so the commit ID
> in the changelog can change when it goes in Linus's tree.

This branch is supposed to be stable, and I try to only apply fixes to
it. This normally ensures that commit IDs are the same once they land in
Linus' tree.

> This patch works in my test, but I'm not 100% sure this is the right fix. 
> 
> Looking forward to your comment!
> 
>  kernel/irq/irqdomain.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
> index e7bbab149750..132672b74e4b 100644
> --- a/kernel/irq/irqdomain.c
> +++ b/kernel/irq/irqdomain.c
> @@ -149,6 +149,7 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
>  		switch (fwid->type) {
>  		case IRQCHIP_FWNODE_NAMED:
>  		case IRQCHIP_FWNODE_NAMED_ID:
> +			domain->fwnode = fwnode;
>  			domain->name = kstrdup(fwid->name, GFP_KERNEL);
>  			if (!domain->name) {
>  				kfree(domain);
> 

Looks absolutely correct to me, thanks for fixing it. I've applied it on
top of irqchip-next.

Thanks,

	M.
-- 
Jazz is not dead, it just smells funny...

^ permalink raw reply

* RE: [PATCH] HID: hyperv: Use in-place iterator API in the channel callback
From: Dexuan Cui @ 2019-09-03  0:43 UTC (permalink / raw)
  To: jikos@kernel.org, benjamin.tissoires@redhat.com,
	linux-input@vger.kernel.org, linux-hyperv@vger.kernel.org,
	Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
	KY Srinivasan, Michael Kelley
  Cc: gregkh@linuxfoundation.org, linux-kernel@vger.kernel.org
In-Reply-To: <1566269763-26817-1-git-send-email-decui@microsoft.com>

> -----Original Message-----
> From: Dexuan Cui <decui@microsoft.com>
> Sent: Monday, August 19, 2019 7:57 PM
> To: jikos@kernel.org; benjamin.tissoires@redhat.com;
> linux-input@vger.kernel.org; linux-hyperv@vger.kernel.org; Stephen
> Hemminger <sthemmin@microsoft.com>; Sasha Levin
> <Alexander.Levin@microsoft.com>; sashal@kernel.org; Haiyang Zhang
> <haiyangz@microsoft.com>; KY Srinivasan <kys@microsoft.com>; Michael
> Kelley <mikelley@microsoft.com>
> Cc: gregkh@linuxfoundation.org; linux-kernel@vger.kernel.org; Dexuan Cui
> <decui@microsoft.com>
> Subject: [PATCH] HID: hyperv: Use in-place iterator API in the channel callback
> 
> Simplify the ring buffer handling with the in-place API.
> 
> Also avoid the dynamic allocation and the memory leak in the channel
> callback function.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
> 
> Hi Jiri, Benjamin, can this patch go through Sasha's hyperv tree:
> https://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git
> 
> This is a purely Hyper-V specific change.

Hi Jiri, Benjamin,
Are you OK if this patch for the Hyper-V HID driver goes through the Hyper-V
tree maintained by Sasha Levin? It's a purely Hyper-V change, and I have
been using the patch for several months and there is no regression.

Thanks,
-- Dexuan

^ permalink raw reply

* RE: [PATCH v2] PCI: PM: Move to D0 before calling pci_legacy_resume_early()
From: Dexuan Cui @ 2019-09-03  0:34 UTC (permalink / raw)
  To: lorenzo.pieralisi@arm.com, bhelgaas@google.com,
	linux-pci@vger.kernel.org
  Cc: Michael Kelley, linux-hyperv@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	driverdev-devel@linuxdriverproject.org, Sasha Levin,
	Haiyang Zhang, KY Srinivasan, olaf@aepfle.de, apw@canonical.com,
	jasowang@redhat.com, vkuznets, marcelo.cerri@canonical.com,
	Stephen Hemminger, jackm@mellanox.com
In-Reply-To: <KU1P153MB016637CAEAD346F0AA8E3801BFAD0@KU1P153MB0166.APCP153.PROD.OUTLOOK.COM>

> From: Dexuan Cui <decui@microsoft.com>
> Sent: Tuesday, August 13, 2019 6:07 PM
> To: lorenzo.pieralisi@arm.com; bhelgaas@google.com;
> linux-pci@vger.kernel.org
> Cc: Michael Kelley <mikelley@microsoft.com>; linux-hyperv@vger.kernel.org;
> linux-kernel@vger.kernel.org; driverdev-devel@linuxdriverproject.org; Sasha
> Levin <Alexander.Levin@microsoft.com>; Haiyang Zhang
> <haiyangz@microsoft.com>; KY Srinivasan <kys@microsoft.com>;
> olaf@aepfle.de; apw@canonical.com; jasowang@redhat.com; vkuznets
> <vkuznets@redhat.com>; marcelo.cerri@canonical.com; Stephen Hemminger
> <sthemmin@microsoft.com>; jackm@mellanox.com; Dexuan Cui
> <decui@microsoft.com>
> Subject: [PATCH v2] PCI: PM: Move to D0 before calling
> pci_legacy_resume_early()
> 
> 
> In pci_legacy_suspend_late(), the device state is moved to PCI_UNKNOWN.
> 
> In pci_pm_thaw_noirq(), the state is supposed to be moved back to PCI_D0,
> but the current code misses the pci_legacy_resume_early() path, so the
> state remains in PCI_UNKNOWN in that path. As a result, in the resume
> phase of hibernation, this causes an error for the Mellanox VF driver,
> which fails to enable MSI-X because pci_msi_supported() is false due
> to dev->current_state != PCI_D0:
> 
> mlx4_core a6d1:00:02.0: Detected virtual function - running in slave mode
> mlx4_core a6d1:00:02.0: Sending reset
> mlx4_core a6d1:00:02.0: Sending vhcr0
> mlx4_core a6d1:00:02.0: HCA minimum page size:512
> mlx4_core a6d1:00:02.0: Timestamping is not supported in slave mode
> mlx4_core a6d1:00:02.0: INTx is not supported in multi-function mode,
> aborting
> PM: dpm_run_callback(): pci_pm_thaw+0x0/0xd7 returns -95
> PM: Device a6d1:00:02.0 failed to thaw: error -95
> 
> To be more accurate, the "resume" phase means the "thaw" callbacks which
> run before the system enters hibernation: when the user runs the command
> "echo disk > /sys/power/state" for hibernation, first the kernel "freezes"
> all the devices and creates a hibernation image, then the kernel "thaws"
> the devices including the disk/NIC, writes the memory to the disk, and
> powers down. This patch fixes the error message for the Mellanox VF driver
> in this phase.
> 
> When the system starts again, a fresh kernel starts to run, and when the
> kernel detects that a hibernation image was saved, the kernel "quiesces"
> the devices, and then "restores" the devices from the saved image. In this
> path:
> device_resume_noirq() -> ... ->
>   pci_pm_restore_noirq() ->
>     pci_pm_default_resume_early() ->
>       pci_power_up() moves the device states back to PCI_D0. This path is
> not broken and doesn't need my patch.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
> 
> changes in v2:
> 	Updated the changelog with more details.
> 
>  drivers/pci/pci-driver.c | 7 ++++---
>  1 file changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
> index 36dbe960306b..27dfc68db9e7 100644
> --- a/drivers/pci/pci-driver.c
> +++ b/drivers/pci/pci-driver.c
> @@ -1074,15 +1074,16 @@ static int pci_pm_thaw_noirq(struct device *dev)
>  			return error;
>  	}
> 
> -	if (pci_has_legacy_pm_support(pci_dev))
> -		return pci_legacy_resume_early(dev);
> -
>  	/*
>  	 * pci_restore_state() requires the device to be in D0 (because of MSI
>  	 * restoration among other things), so force it into D0 in case the
>  	 * driver's "freeze" callbacks put it into a low-power state directly.
>  	 */
>  	pci_set_power_state(pci_dev, PCI_D0);
> +
> +	if (pci_has_legacy_pm_support(pci_dev))
> +		return pci_legacy_resume_early(dev);
> +
>  	pci_restore_state(pci_dev);
> 
>  	if (drv && drv->pm && drv->pm->thaw_noirq)
> --

Hi, Lorenzo, Bjorn,

Can you please take a look at the v2 ?

Thanks,
-- Dexuan

^ permalink raw reply

* Re: [PATCH v4 4/5] HID: hv: Remove dependencies on PAGE_SIZE for ring buffer
From: Sasha Levin @ 2019-09-03  0:26 UTC (permalink / raw)
  To: Jiri Kosina
  Cc: Michael Kelley, m.maya.nakamura, KY Srinivasan, Haiyang Zhang,
	Stephen Hemminger, benjamin.tissoires@redhat.com, x86@kernel.org,
	linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <nycvar.YFH.7.76.1909021330230.27147@cbobk.fhfr.pm>

On Mon, Sep 02, 2019 at 01:30:44PM +0200, Jiri Kosina wrote:
>On Sat, 31 Aug 2019, Michael Kelley wrote:
>
>> From: Maya Nakamura <m.maya.nakamura@gmail.com>  Sent: Friday, July 12, 2019 1:28 AM
>> >
>> > Define the ring buffer size as a constant expression because it should
>> > not depend on the guest page size.
>> >
>> > Signed-off-by: Maya Nakamura <m.maya.nakamura@gmail.com>
>> > Reviewed-by: Michael Kelley <mikelley@microsoft.com>
>>
>> Jiri and Benjamin -- OK if this small patch for the Hyper-V HID driver
>> goes through the Hyper-V tree maintained by Sasha Levin?   It's a purely
>> Hyper-V change so the ring buffer size isn't bigger when running
>> on ARM64 where the page size might be 16K or 64K.
>
>Yeah; FWIW feel free to add
>
>	Acked-by: Jiri Kosina <jkosina@suse.cz>

Queued up for hyperv-next, thanks!

--
Thanks,
Sasha

^ permalink raw reply

* [PATCH v4 01/12] x86/hyper-v: Suspend/resume the hypercall page for hibernation
From: Dexuan Cui @ 2019-09-03  0:23 UTC (permalink / raw)
  To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
	Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
	KY Srinivasan, Michael Kelley, tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1567470139-119355-1-git-send-email-decui@microsoft.com>

This is needed for hibernation, e.g. when we resume the old kernel, we need
to disable the "current" kernel's hypercall page and then resume the old
kernel's.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/x86/hyperv/hv_init.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 0d25868..78e53d9 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -20,6 +20,7 @@
 #include <linux/hyperv.h>
 #include <linux/slab.h>
 #include <linux/cpuhotplug.h>
+#include <linux/syscore_ops.h>
 #include <clocksource/hyperv_timer.h>
 
 void *hv_hypercall_pg;
@@ -223,6 +224,34 @@ static int __init hv_pci_init(void)
 	return 1;
 }
 
+static int hv_suspend(void)
+{
+	union hv_x64_msr_hypercall_contents hypercall_msr;
+
+	/* Reset the hypercall page */
+	rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+	hypercall_msr.enable = 0;
+	wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+
+	return 0;
+}
+
+static void hv_resume(void)
+{
+	union hv_x64_msr_hypercall_contents hypercall_msr;
+
+	/* Re-enable the hypercall page */
+	rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+	hypercall_msr.enable = 1;
+	hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
+	wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+}
+
+static struct syscore_ops hv_syscore_ops = {
+	.suspend = hv_suspend,
+	.resume = hv_resume,
+};
+
 /*
  * This function is to be invoked early in the boot sequence after the
  * hypervisor has been detected.
@@ -303,6 +332,9 @@ void __init hyperv_init(void)
 
 	/* Register Hyper-V specific clocksource */
 	hv_init_clocksource();
+
+	register_syscore_ops(&hv_syscore_ops);
+
 	return;
 
 remove_cpuhp_state:
@@ -322,6 +354,8 @@ void hyperv_cleanup(void)
 {
 	union hv_x64_msr_hypercall_contents hypercall_msr;
 
+	unregister_syscore_ops(&hv_syscore_ops);
+
 	/* Reset our OS id */
 	wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
 
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH v4 02/12] x86/hyper-v: Implement hv_is_hibernation_supported()
From: Dexuan Cui @ 2019-09-03  0:23 UTC (permalink / raw)
  To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
	Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
	KY Srinivasan, Michael Kelley, tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1567470139-119355-1-git-send-email-decui@microsoft.com>

The API will be used by the hv_balloon and hv_vmbus drivers.

Balloon up/down and hot-add of memory must not be active if the user
wants the Linux VM to support hibernation, because they are incompatible
with hibernation according to Hyper-V team, e.g. upon suspend the
balloon VSP doesn't save any info about the ballooned-out pages (if any);
so, after Linux resumes, Linux balloon VSC expects that the VSP will
return the pages if Linux is under memory pressure, but the VSP will
never do that, since the VSP thinks it never stole the pages from the VM.

So, if the user wants Linux VM to support hibernation, Linux must forbid
balloon up/down and hot-add, and the only functionality of the balloon VSC
driver is reporting the VM's memory pressure to the host.

Ideally, when Linux detects that the user wants it to support hibernation,
the balloon VSC should tell the VSP that it does not support ballooning
and hot-add. However, the current version of the VSP requires the VSC
should support these capabilities, otherwise the capability negotiation
fails and the VSC can not load at all, so with the later changes to the
VSC driver, Linux VM still reports to the VSP that the VSC supports these
capabilities, but the VSC ignores the VSP's requests of balloon up/down
and hot add, and reports an error to the VSP, when applicable. BTW, in
the future the balloon VSP driver will allow the VSC to not support the
capabilities of balloon up/down and hot add.

The ACPI S4 state is not a must for hibernation to work, because Linux is
able to hibernate as long as the system can shut down. However in practice
we decide to artificially use the presence of the virtual ACPI S4 state as
an indicator of the user's intent of using hibernation, because Linux VM
must find a way to know if the user wants to use the hibernation feature
or not.

By default, Hyper-V does not enable the virtual ACPI S4 state; on recent
Hyper-V hosts (e.g. RS5, 19H1), the administrator is able to enable the
state for a VM by WMI commands.

Once all the vmbus and VSC patches for the hibernation feature are
accepted, an extra patch will be submitted to forbid hibernation if the
virtual ACPI S4 state is absent, i.e. hv_is_hibernation_supported() is
false.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
 arch/x86/hyperv/hv_init.c      | 7 +++++++
 include/asm-generic/mshyperv.h | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 78e53d9..6735e45 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -7,6 +7,7 @@
  * Author : K. Y. Srinivasan <kys@microsoft.com>
  */
 
+#include <linux/acpi.h>
 #include <linux/efi.h>
 #include <linux/types.h>
 #include <asm/apic.h>
@@ -453,3 +454,9 @@ bool hv_is_hyperv_initialized(void)
 	return hypercall_msr.enable;
 }
 EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized);
+
+bool hv_is_hibernation_supported(void)
+{
+	return acpi_sleep_state_supported(ACPI_STATE_S4);
+}
+EXPORT_SYMBOL_GPL(hv_is_hibernation_supported);
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 0becb7d..1cb4001 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -166,9 +166,11 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset,
 void hyperv_report_panic(struct pt_regs *regs, long err);
 void hyperv_report_panic_msg(phys_addr_t pa, size_t size);
 bool hv_is_hyperv_initialized(void);
+bool hv_is_hibernation_supported(void);
 void hyperv_cleanup(void);
 #else /* CONFIG_HYPERV */
 static inline bool hv_is_hyperv_initialized(void) { return false; }
+static inline bool hv_is_hibernation_supported(void) { return false; }
 static inline void hyperv_cleanup(void) {}
 #endif /* CONFIG_HYPERV */
 
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH v4 03/12] clocksource/drivers: Suspend/resume Hyper-V clocksource for hibernation
From: Dexuan Cui @ 2019-09-03  0:23 UTC (permalink / raw)
  To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
	Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
	KY Srinivasan, Michael Kelley, tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1567470139-119355-1-git-send-email-decui@microsoft.com>

This is needed for hibernation, e.g. when we resume the old kernel, we need
to disable the "current" kernel's TSC page and then resume the old kernel's.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
---
 drivers/clocksource/hyperv_timer.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index 17b96f9..8f3422c 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -238,12 +238,37 @@ static u64 read_hv_clock_tsc(struct clocksource *arg)
 	return read_hv_sched_clock_tsc();
 }
 
+static void suspend_hv_clock_tsc(struct clocksource *arg)
+{
+	u64 tsc_msr;
+
+	/* Disable the TSC page */
+	hv_get_reference_tsc(tsc_msr);
+	tsc_msr &= ~BIT_ULL(0);
+	hv_set_reference_tsc(tsc_msr);
+}
+
+
+static void resume_hv_clock_tsc(struct clocksource *arg)
+{
+	phys_addr_t phys_addr = page_to_phys(vmalloc_to_page(tsc_pg));
+	u64 tsc_msr;
+
+	/* Re-enable the TSC page */
+	hv_get_reference_tsc(tsc_msr);
+	tsc_msr &= GENMASK_ULL(11, 0);
+	tsc_msr |= BIT_ULL(0) | (u64)phys_addr;
+	hv_set_reference_tsc(tsc_msr);
+}
+
 static struct clocksource hyperv_cs_tsc = {
 	.name	= "hyperv_clocksource_tsc_page",
 	.rating	= 400,
 	.read	= read_hv_clock_tsc,
 	.mask	= CLOCKSOURCE_MASK(64),
 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
+	.suspend= suspend_hv_clock_tsc,
+	.resume	= resume_hv_clock_tsc,
 };
 #endif
 
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH v4 04/12] Drivers: hv: vmbus: Break out synic enable and disable operations
From: Dexuan Cui @ 2019-09-03  0:23 UTC (permalink / raw)
  To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
	Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
	KY Srinivasan, Michael Kelley, tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1567470139-119355-1-git-send-email-decui@microsoft.com>

Break out synic enable and disable operations into separate
hv_synic_disable_regs() and hv_synic_enable_regs() functions for use by a
later patch to support hibernation.

There is no functional change except the unnecessary check
"if (sctrl.enable != 1) return -EFAULT;" which is removed, because when
we're in hv_synic_cleanup(), we're absolutely sure sctrl.enable must be 1.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
---
 drivers/hv/hv.c           | 66 ++++++++++++++++++++++++++---------------------
 drivers/hv/hyperv_vmbus.h |  2 ++
 2 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 6188fb7..fcc5279 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -154,7 +154,7 @@ void hv_synic_free(void)
  * retrieve the initialized message and event pages.  Otherwise, we create and
  * initialize the message and event pages.
  */
-int hv_synic_init(unsigned int cpu)
+void hv_synic_enable_regs(unsigned int cpu)
 {
 	struct hv_per_cpu_context *hv_cpu
 		= per_cpu_ptr(hv_context.cpu_context, cpu);
@@ -196,6 +196,11 @@ int hv_synic_init(unsigned int cpu)
 	sctrl.enable = 1;
 
 	hv_set_synic_state(sctrl.as_uint64);
+}
+
+int hv_synic_init(unsigned int cpu)
+{
+	hv_synic_enable_regs(cpu);
 
 	hv_stimer_init(cpu);
 
@@ -205,20 +210,45 @@ int hv_synic_init(unsigned int cpu)
 /*
  * hv_synic_cleanup - Cleanup routine for hv_synic_init().
  */
-int hv_synic_cleanup(unsigned int cpu)
+void hv_synic_disable_regs(unsigned int cpu)
 {
 	union hv_synic_sint shared_sint;
 	union hv_synic_simp simp;
 	union hv_synic_siefp siefp;
 	union hv_synic_scontrol sctrl;
+
+	hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
+
+	shared_sint.masked = 1;
+
+	/* Need to correctly cleanup in the case of SMP!!! */
+	/* Disable the interrupt */
+	hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
+
+	hv_get_simp(simp.as_uint64);
+	simp.simp_enabled = 0;
+	simp.base_simp_gpa = 0;
+
+	hv_set_simp(simp.as_uint64);
+
+	hv_get_siefp(siefp.as_uint64);
+	siefp.siefp_enabled = 0;
+	siefp.base_siefp_gpa = 0;
+
+	hv_set_siefp(siefp.as_uint64);
+
+	/* Disable the global synic bit */
+	hv_get_synic_state(sctrl.as_uint64);
+	sctrl.enable = 0;
+	hv_set_synic_state(sctrl.as_uint64);
+}
+
+int hv_synic_cleanup(unsigned int cpu)
+{
 	struct vmbus_channel *channel, *sc;
 	bool channel_found = false;
 	unsigned long flags;
 
-	hv_get_synic_state(sctrl.as_uint64);
-	if (sctrl.enable != 1)
-		return -EFAULT;
-
 	/*
 	 * Search for channels which are bound to the CPU we're about to
 	 * cleanup. In case we find one and vmbus is still connected we need to
@@ -249,29 +279,7 @@ int hv_synic_cleanup(unsigned int cpu)
 
 	hv_stimer_cleanup(cpu);
 
-	hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
-
-	shared_sint.masked = 1;
-
-	/* Need to correctly cleanup in the case of SMP!!! */
-	/* Disable the interrupt */
-	hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
-
-	hv_get_simp(simp.as_uint64);
-	simp.simp_enabled = 0;
-	simp.base_simp_gpa = 0;
-
-	hv_set_simp(simp.as_uint64);
-
-	hv_get_siefp(siefp.as_uint64);
-	siefp.siefp_enabled = 0;
-	siefp.base_siefp_gpa = 0;
-
-	hv_set_siefp(siefp.as_uint64);
-
-	/* Disable the global synic bit */
-	sctrl.enable = 0;
-	hv_set_synic_state(sctrl.as_uint64);
+	hv_synic_disable_regs(cpu);
 
 	return 0;
 }
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index fb16a62..9f7fb6d 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -169,8 +169,10 @@ extern int hv_post_message(union hv_connection_id connection_id,
 
 extern void hv_synic_free(void);
 
+extern void hv_synic_enable_regs(unsigned int cpu);
 extern int hv_synic_init(unsigned int cpu);
 
+extern void hv_synic_disable_regs(unsigned int cpu);
 extern int hv_synic_cleanup(unsigned int cpu);
 
 /* Interface */
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH v4 06/12] Drivers: hv: vmbus: Add a helper function is_sub_channel()
From: Dexuan Cui @ 2019-09-03  0:23 UTC (permalink / raw)
  To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
	Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
	KY Srinivasan, Michael Kelley, tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1567470139-119355-1-git-send-email-decui@microsoft.com>

The existing method of telling if a channel is sub-channel in
vmbus_process_offer() is cumbersome. This new simple helper function
is preferred in future.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
---
 include/linux/hyperv.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 6256cc3..2d39248 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -245,7 +245,10 @@ struct vmbus_channel_offer {
 		} pipe;
 	} u;
 	/*
-	 * The sub_channel_index is defined in win8.
+	 * The sub_channel_index is defined in Win8: a value of zero means a
+	 * primary channel and a value of non-zero means a sub-channel.
+	 *
+	 * Before Win8, the field is reserved, meaning it's always zero.
 	 */
 	u16 sub_channel_index;
 	u16 reserved3;
@@ -934,6 +937,11 @@ static inline bool is_hvsock_channel(const struct vmbus_channel *c)
 		  VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER);
 }
 
+static inline bool is_sub_channel(const struct vmbus_channel *c)
+{
+	return c->offermsg.offer.sub_channel_index != 0;
+}
+
 static inline void set_channel_affinity_state(struct vmbus_channel *c,
 					      enum hv_numa_policy policy)
 {
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH v4 09/12] Drivers: hv: vmbus: Suspend/resume the vmbus itself for hibernation
From: Dexuan Cui @ 2019-09-03  0:23 UTC (permalink / raw)
  To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
	Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
	KY Srinivasan, Michael Kelley, tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1567470139-119355-1-git-send-email-decui@microsoft.com>

Before Linux enters hibernation, it sends the CHANNELMSG_UNLOAD message to
the host so all the offers are gone. After hibernation, Linux needs to
re-negotiate with the host using the same vmbus protocol version (which
was in use before hibernation), and ask the host to re-offer the vmbus
devices.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
---
 drivers/hv/connection.c   |  3 +--
 drivers/hv/hyperv_vmbus.h |  2 ++
 drivers/hv/vmbus_drv.c    | 59 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 09829e1..806319c 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -59,8 +59,7 @@ static __u32 vmbus_get_next_version(__u32 current_version)
 	}
 }
 
-static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
-					__u32 version)
+int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
 {
 	int ret = 0;
 	unsigned int cur_cpu;
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 9f7fb6d..613888e 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -272,6 +272,8 @@ struct vmbus_msginfo {
 
 extern struct vmbus_connection vmbus_connection;
 
+int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version);
+
 static inline void vmbus_send_interrupt(u32 relid)
 {
 	sync_set_bit(relid, vmbus_connection.send_int_page);
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index a30c70a..ce9974b 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2089,6 +2089,51 @@ static int vmbus_acpi_add(struct acpi_device *device)
 	return ret_val;
 }
 
+static int vmbus_bus_suspend(struct device *dev)
+{
+	vmbus_initiate_unload(false);
+
+	vmbus_connection.conn_state = DISCONNECTED;
+
+	return 0;
+}
+
+static int vmbus_bus_resume(struct device *dev)
+{
+	struct vmbus_channel_msginfo *msginfo;
+	size_t msgsize;
+	int ret;
+
+	/*
+	 * We only use the 'vmbus_proto_version', which was in use before
+	 * hibernation, to re-negotiate with the host.
+	 */
+	if (vmbus_proto_version == VERSION_INVAL ||
+	    vmbus_proto_version == 0) {
+		pr_err("Invalid proto version = 0x%x\n", vmbus_proto_version);
+		return -EINVAL;
+	}
+
+	msgsize = sizeof(*msginfo) +
+		  sizeof(struct vmbus_channel_initiate_contact);
+
+	msginfo = kzalloc(msgsize, GFP_KERNEL);
+
+	if (msginfo == NULL)
+		return -ENOMEM;
+
+	ret = vmbus_negotiate_version(msginfo, vmbus_proto_version);
+
+	kfree(msginfo);
+
+	if (ret != 0)
+		return ret;
+
+	vmbus_request_offers();
+
+	return 0;
+}
+
 static const struct acpi_device_id vmbus_acpi_device_ids[] = {
 	{"VMBUS", 0},
 	{"VMBus", 0},
@@ -2096,6 +2141,19 @@ static int vmbus_acpi_add(struct acpi_device *device)
 };
 MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids);
 
+/*
+ * Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than
+ * SET_SYSTEM_SLEEP_PM_OPS, otherwise NIC SR-IOV can not work, because the
+ * "pci_dev_pm_ops" uses the "noirq" callbacks: in the resume path, the
+ * pci "noirq" restore callback runs before "non-noirq" callbacks (see
+ * resume_target_kernel() -> dpm_resume_start(), and hibernation_restore() ->
+ * dpm_resume_end()). This means vmbus_bus_resume() and the pci-hyperv's
+ * resume callback must also run via the "noirq" callbacks.
+ */
+static const struct dev_pm_ops vmbus_bus_pm = {
+	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_bus_suspend, vmbus_bus_resume)
+};
+
 static struct acpi_driver vmbus_acpi_driver = {
 	.name = "vmbus",
 	.ids = vmbus_acpi_device_ids,
@@ -2103,6 +2161,7 @@ static int vmbus_acpi_add(struct acpi_device *device)
 		.add = vmbus_acpi_add,
 		.remove = vmbus_acpi_remove,
 	},
+	.drv.pm = &vmbus_bus_pm,
 };
 
 static void hv_kexec_handler(void)
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH v4 12/12] Drivers: hv: vmbus: Resume after fixing up old primary channels
From: Dexuan Cui @ 2019-09-03  0:23 UTC (permalink / raw)
  To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
	Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
	KY Srinivasan, Michael Kelley, tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1567470139-119355-1-git-send-email-decui@microsoft.com>

When the host re-offers the primary channels upon resume, the host only
guarantees the Instance GUID  doesn't change, so vmbus_bus_suspend()
should invalidate channel->offermsg.child_relid and figure out the
number of primary channels that need to be fixed up upon resume.

Upon resume, vmbus_onoffer() finds the old channel structs, and maps
the new offers to the old channels, and fixes up the old structs,
and finally the resume callbacks of the VSC drivers will re-open
the channels.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
 drivers/hv/channel_mgmt.c | 85 ++++++++++++++++++++++++++++++++++++-----------
 drivers/hv/connection.c   |  2 ++
 drivers/hv/hyperv_vmbus.h | 14 ++++++++
 drivers/hv/vmbus_drv.c    | 17 ++++++++++
 include/linux/hyperv.h    |  3 ++
 5 files changed, 101 insertions(+), 20 deletions(-)

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 5518d03..8eb1675 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -407,7 +407,15 @@ void hv_process_channel_removal(struct vmbus_channel *channel)
 		cpumask_clear_cpu(channel->target_cpu,
 				  &primary_channel->alloced_cpus_in_node);
 
-	vmbus_release_relid(channel->offermsg.child_relid);
+	/*
+	 * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
+	 * the relid is invalidated; after hibernation, when the user-space app
+	 * destroys the channel, the relid is INVALID_RELID, and in this case
+	 * it's unnecessary and unsafe to release the old relid, since the same
+	 * relid can refer to a completely different channel now.
+	 */
+	if (channel->offermsg.child_relid != INVALID_RELID)
+		vmbus_release_relid(channel->offermsg.child_relid);
 
 	free_channel(channel);
 }
@@ -851,6 +859,36 @@ void vmbus_initiate_unload(bool crash)
 		vmbus_wait_for_unload();
 }
 
+static void check_ready_for_resume_event(void)
+{
+	/*
+	 * If all the old primary channels have been fixed up, then it's safe
+	 * to resume.
+	 */
+	if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume))
+		complete(&vmbus_connection.ready_for_resume_event);
+}
+
+static void vmbus_setup_channel_state(struct vmbus_channel *channel,
+				      struct vmbus_channel_offer_channel *offer)
+{
+	/*
+	 * Setup state for signalling the host.
+	 */
+	channel->sig_event = VMBUS_EVENT_CONNECTION_ID;
+
+	if (vmbus_proto_version != VERSION_WS2008) {
+		channel->is_dedicated_interrupt =
+				(offer->is_dedicated_interrupt != 0);
+		channel->sig_event = offer->connection_id;
+	}
+
+	memcpy(&channel->offermsg, offer,
+	       sizeof(struct vmbus_channel_offer_channel));
+	channel->monitor_grp = (u8)offer->monitorid / 32;
+	channel->monitor_bit = (u8)offer->monitorid % 32;
+}
+
 /*
  * find_primary_channel_by_offer - Get the channel object given the new offer.
  * This is only used in the resume path of hibernation.
@@ -902,14 +940,29 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 		atomic_dec(&vmbus_connection.offer_in_progress);
 
 		/*
-		 * We're resuming from hibernation: we expect the host to send
-		 * exactly the same offers that we had before the hibernation.
+		 * We're resuming from hibernation: all the sub-channel and
+		 * hv_sock channels we had before the hibernation should have
+		 * been cleaned up, and now we must be seeing a re-offered
+		 * primary channel that we had before the hibernation.
 		 */
+
+		WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID);
+		/* Fix up the relid. */
+		oldchannel->offermsg.child_relid = offer->child_relid;
+
 		offer_sz = sizeof(*offer);
-		if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0)
+		if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0) {
+			check_ready_for_resume_event();
 			return;
+		}
 
-		pr_debug("Mismatched offer from the host (relid=%d)\n",
+		/*
+		 * This is not an error, since the host can also change the
+		 * other field(s) of the offer, e.g. on WS RS5 (Build 17763),
+		 * the offer->connection_id of the Mellanox VF vmbus device
+		 * can change when the host reoffers the device upon resume.
+		 */
+		pr_debug("vmbus offer changed: relid=%d\n",
 			 offer->child_relid);
 
 		print_hex_dump_debug("Old vmbus offer: ", DUMP_PREFIX_OFFSET,
@@ -917,6 +970,12 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 				     false);
 		print_hex_dump_debug("New vmbus offer: ", DUMP_PREFIX_OFFSET,
 				     16, 4, offer, offer_sz, false);
+
+		/* Fix up the old channel. */
+		vmbus_setup_channel_state(oldchannel, offer);
+
+		check_ready_for_resume_event();
+
 		return;
 	}
 
@@ -929,21 +988,7 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 		return;
 	}
 
-	/*
-	 * Setup state for signalling the host.
-	 */
-	newchannel->sig_event = VMBUS_EVENT_CONNECTION_ID;
-
-	if (vmbus_proto_version != VERSION_WS2008) {
-		newchannel->is_dedicated_interrupt =
-				(offer->is_dedicated_interrupt != 0);
-		newchannel->sig_event = offer->connection_id;
-	}
-
-	memcpy(&newchannel->offermsg, offer,
-	       sizeof(struct vmbus_channel_offer_channel));
-	newchannel->monitor_grp = (u8)offer->monitorid / 32;
-	newchannel->monitor_bit = (u8)offer->monitorid % 32;
+	vmbus_setup_channel_state(newchannel, offer);
 
 	vmbus_process_offer(newchannel);
 }
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 99851ea..6e4c015 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -29,6 +29,8 @@ struct vmbus_connection vmbus_connection = {
 
 	.ready_for_suspend_event= COMPLETION_INITIALIZER(
 				  vmbus_connection.ready_for_suspend_event),
+	.ready_for_resume_event	= COMPLETION_INITIALIZER(
+				  vmbus_connection.ready_for_resume_event),
 };
 EXPORT_SYMBOL_GPL(vmbus_connection);
 
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index eedbe59..ca9ef7c 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -270,6 +270,20 @@ struct vmbus_connection {
 	 * drop to zero.
 	 */
 	struct completion ready_for_suspend_event;
+
+	/*
+	 * The number of primary channels that should be "fixed up"
+	 * upon resume: these channels are re-offered upon resume, and some
+	 * fields of the channel offers (i.e. child_relid and connection_id)
+	 * can change, so the old offermsg must be fixed up, before the resume
+	 * callbacks of the VSC drivers start to further touch the channels.
+	 */
+	atomic_t nr_chan_fixup_on_resume;
+	/*
+	 * vmbus_bus_resume() waits for "nr_chan_fixup_on_resume" to
+	 * drop to zero.
+	 */
+	struct completion ready_for_resume_event;
 };
 
 
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 32ec951..391f0b2 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2164,9 +2164,17 @@ static int vmbus_bus_suspend(struct device *dev)
 	if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0)
 		wait_for_completion(&vmbus_connection.ready_for_suspend_event);
 
+	WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0);
+
 	mutex_lock(&vmbus_connection.channel_mutex);
 
 	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+		/*
+		 * Invalidate the field. Upon resume, vmbus_onoffer() will fix
+		 * up the field, and the other fields (if necessary).
+		 */
+		channel->offermsg.child_relid = INVALID_RELID;
+
 		if (is_hvsock_channel(channel)) {
 			if (!channel->rescind) {
 				pr_err("hv_sock channel not rescinded!\n");
@@ -2181,6 +2189,8 @@ static int vmbus_bus_suspend(struct device *dev)
 			WARN_ON_ONCE(1);
 		}
 		spin_unlock_irqrestore(&channel->lock, flags);
+
+		atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume);
 	}
 
 	mutex_unlock(&vmbus_connection.channel_mutex);
@@ -2189,6 +2199,9 @@ static int vmbus_bus_suspend(struct device *dev)
 
 	vmbus_connection.conn_state = DISCONNECTED;
 
+	/* Reset the event for the next resume. */
+	reinit_completion(&vmbus_connection.ready_for_resume_event);
+
 	return 0;
 }
 
@@ -2223,8 +2236,12 @@ static int vmbus_bus_resume(struct device *dev)
 	if (ret != 0)
 		return ret;
 
+	WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0);
+
 	vmbus_request_offers();
 
+	wait_for_completion(&vmbus_connection.ready_for_resume_event);
+
 	/* Reset the event for the next suspend. */
 	reinit_completion(&vmbus_connection.ready_for_suspend_event);
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 8a60e77..a3aa9e9 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -426,6 +426,9 @@ enum vmbus_channel_message_type {
 	CHANNELMSG_COUNT
 };
 
+/* Hyper-V supports about 2048 channels, and the RELIDs start with 1. */
+#define INVALID_RELID	U32_MAX
+
 struct vmbus_channel_message_header {
 	enum vmbus_channel_message_type msgtype;
 	u32 padding;
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH v4 05/12] Drivers: hv: vmbus: Suspend/resume the synic for hibernation
From: Dexuan Cui @ 2019-09-03  0:23 UTC (permalink / raw)
  To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
	Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
	KY Srinivasan, Michael Kelley, tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1567470139-119355-1-git-send-email-decui@microsoft.com>

This is needed when we resume the old kernel from the "current" kernel.

Note: when hv_synic_suspend() and hv_synic_resume() run, all the
non-boot CPUs have been offlined, and interrupts are disabled on CPU0.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
---
 drivers/hv/vmbus_drv.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index ebd35fc..2ef375c 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -30,6 +30,7 @@
 #include <linux/kdebug.h>
 #include <linux/efi.h>
 #include <linux/random.h>
+#include <linux/syscore_ops.h>
 #include <clocksource/hyperv_timer.h>
 #include "hyperv_vmbus.h"
 
@@ -2086,6 +2087,47 @@ static void hv_crash_handler(struct pt_regs *regs)
 	hyperv_cleanup();
 };
 
+static int hv_synic_suspend(void)
+{
+	/*
+	 * When we reach here, all the non-boot CPUs have been offlined, and
+	 * the stimers on them have been unbound in hv_synic_cleanup() ->
+	 * hv_stimer_cleanup() -> clockevents_unbind_device().
+	 *
+	 * hv_synic_suspend() only runs on CPU0 with interrupts disabled. Here
+	 * we do not unbind the stimer on CPU0 because: 1) it's unnecessary
+	 * because the interrupts remain disabled between syscore_suspend()
+	 * and syscore_resume(): see create_image() and resume_target_kernel();
+	 * 2) the stimer on CPU0 is automatically disabled later by
+	 * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ...
+	 * -> clockevents_shutdown() -> ... -> hv_ce_shutdown(); 3) a warning
+	 * would be triggered if we call clockevents_unbind_device(), which
+	 * may sleep, in an interrupts-disabled context. So, we intentionally
+	 * don't call hv_stimer_cleanup(0) here.
+	 */
+
+	hv_synic_disable_regs(0);
+
+	return 0;
+}
+
+static void hv_synic_resume(void)
+{
+	hv_synic_enable_regs(0);
+
+	/*
+	 * Note: we don't need to call hv_stimer_init(0), because the timer
+	 * on CPU0 is not unbound in hv_synic_suspend(), and the timer is
+	 * automatically re-enabled in timekeeping_resume().
+	 */
+}
+
+/* The callbacks run only on CPU0, with irqs_disabled. */
+static struct syscore_ops hv_synic_syscore_ops = {
+	.suspend = hv_synic_suspend,
+	.resume = hv_synic_resume,
+};
+
 static int __init hv_acpi_init(void)
 {
 	int ret, t;
@@ -2116,6 +2158,8 @@ static int __init hv_acpi_init(void)
 	hv_setup_kexec_handler(hv_kexec_handler);
 	hv_setup_crash_handler(hv_crash_handler);
 
+	register_syscore_ops(&hv_synic_syscore_ops);
+
 	return 0;
 
 cleanup:
@@ -2128,6 +2172,8 @@ static void __exit vmbus_exit(void)
 {
 	int cpu;
 
+	unregister_syscore_ops(&hv_synic_syscore_ops);
+
 	hv_remove_kexec_handler();
 	hv_remove_crash_handler();
 	vmbus_connection.conn_state = DISCONNECTED;
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH v4 11/12] Drivers: hv: vmbus: Suspend after cleaning up hv_sock and sub channels
From: Dexuan Cui @ 2019-09-03  0:23 UTC (permalink / raw)
  To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
	Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
	KY Srinivasan, Michael Kelley, tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1567470139-119355-1-git-send-email-decui@microsoft.com>

Before suspend, Linux must make sure all the hv_sock channels have been
properly cleaned up, because a hv_sock connection can not persist across
hibernation, and the user-space app must be properly notified of the
state change of the connection.

Before suspend, Linux also must make sure all the sub-channels have been
destroyed, i.e. the related channel structs of the sub-channels must be
properly removed, otherwise they would cause a conflict when the
sub-channels are recreated upon resume.

Add a counter to track such channels, and vmbus_bus_suspend() should wait
for the counter to drop to zero.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
 drivers/hv/channel_mgmt.c | 26 ++++++++++++++++++++++++++
 drivers/hv/connection.c   |  3 +++
 drivers/hv/hyperv_vmbus.h | 12 ++++++++++++
 drivers/hv/vmbus_drv.c    | 44 +++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 44b92fa..5518d03 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -545,6 +545,10 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 
 	mutex_lock(&vmbus_connection.channel_mutex);
 
+	/* Remember the channels that should be cleaned up upon suspend. */
+	if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
+		atomic_inc(&vmbus_connection.nr_chan_close_on_suspend);
+
 	/*
 	 * Now that we have acquired the channel_mutex,
 	 * we can release the potentially racing rescind thread.
@@ -944,6 +948,16 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 	vmbus_process_offer(newchannel);
 }
 
+static void check_ready_for_suspend_event(void)
+{
+	/*
+	 * If all the sub-channels or hv_sock channels have been cleaned up,
+	 * then it's safe to suspend.
+	 */
+	if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend))
+		complete(&vmbus_connection.ready_for_suspend_event);
+}
+
 /*
  * vmbus_onoffer_rescind - Rescind offer handler.
  *
@@ -954,6 +968,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 	struct vmbus_channel_rescind_offer *rescind;
 	struct vmbus_channel *channel;
 	struct device *dev;
+	bool clean_up_chan_for_suspend;
 
 	rescind = (struct vmbus_channel_rescind_offer *)hdr;
 
@@ -993,6 +1008,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 		return;
 	}
 
+	clean_up_chan_for_suspend = is_hvsock_channel(channel) ||
+				    is_sub_channel(channel);
 	/*
 	 * Before setting channel->rescind in vmbus_rescind_cleanup(), we
 	 * should make sure the channel callback is not running any more.
@@ -1018,6 +1035,10 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 	if (channel->device_obj) {
 		if (channel->chn_rescind_callback) {
 			channel->chn_rescind_callback(channel);
+
+			if (clean_up_chan_for_suspend)
+				check_ready_for_suspend_event();
+
 			return;
 		}
 		/*
@@ -1050,6 +1071,11 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 		}
 		mutex_unlock(&vmbus_connection.channel_mutex);
 	}
+
+	/* The "channel" may have been freed. Do not access it any longer. */
+
+	if (clean_up_chan_for_suspend)
+		check_ready_for_suspend_event();
 }
 
 void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 806319c..99851ea 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -26,6 +26,9 @@
 struct vmbus_connection vmbus_connection = {
 	.conn_state		= DISCONNECTED,
 	.next_gpadl_handle	= ATOMIC_INIT(0xE1E10),
+
+	.ready_for_suspend_event= COMPLETION_INITIALIZER(
+				  vmbus_connection.ready_for_suspend_event),
 };
 EXPORT_SYMBOL_GPL(vmbus_connection);
 
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 613888e..eedbe59 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -258,6 +258,18 @@ struct vmbus_connection {
 	struct workqueue_struct *work_queue;
 	struct workqueue_struct *handle_primary_chan_wq;
 	struct workqueue_struct *handle_sub_chan_wq;
+
+	/*
+	 * The number of sub-channels and hv_sock channels that should be
+	 * cleaned up upon suspend: sub-channels will be re-created upon
+	 * resume, and hv_sock channels should not survive suspend.
+	 */
+	atomic_t nr_chan_close_on_suspend;
+	/*
+	 * vmbus_bus_suspend() waits for "nr_chan_close_on_suspend" to
+	 * drop to zero.
+	 */
+	struct completion ready_for_suspend_event;
 };
 
 
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 45b976e..32ec951 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2127,7 +2127,8 @@ static int vmbus_acpi_add(struct acpi_device *device)
 
 static int vmbus_bus_suspend(struct device *dev)
 {
-	struct vmbus_channel *channel;
+	struct vmbus_channel *channel, *sc;
+	unsigned long flags;
 
 	while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
 		/*
@@ -2146,6 +2147,44 @@ static int vmbus_bus_suspend(struct device *dev)
 	}
 	mutex_unlock(&vmbus_connection.channel_mutex);
 
+	/*
+	 * Wait until all the sub-channels and hv_sock channels have been
+	 * cleaned up. Sub-channels should be destroyed upon suspend, otherwise
+	 * they would conflict with the new sub-channels that will be created
+	 * in the resume path. hv_sock channels should also be destroyed, but
+	 * a hv_sock channel of an established hv_sock connection can not be
+	 * really destroyed since it may still be referenced by the userspace
+	 * application, so we just force the hv_sock channel to be rescinded
+	 * by vmbus_force_channel_rescinded(), and the userspace application
+	 * will thoroughly destroy the channel after hibernation.
+	 *
+	 * Note: the counter nr_chan_close_on_suspend may never go above 0 if
+	 * the VM has no sub-channel and hv_sock channel, e.g. a 1-vCPU VM.
+	 */
+	if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0)
+		wait_for_completion(&vmbus_connection.ready_for_suspend_event);
+
+	mutex_lock(&vmbus_connection.channel_mutex);
+
+	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+		if (is_hvsock_channel(channel)) {
+			if (!channel->rescind) {
+				pr_err("hv_sock channel not rescinded!\n");
+				WARN_ON_ONCE(1);
+			}
+			continue;
+		}
+
+		spin_lock_irqsave(&channel->lock, flags);
+		list_for_each_entry(sc, &channel->sc_list, sc_list) {
+			pr_err("Sub-channel not deleted!\n");
+			WARN_ON_ONCE(1);
+		}
+		spin_unlock_irqrestore(&channel->lock, flags);
+	}
+
+	mutex_unlock(&vmbus_connection.channel_mutex);
+
 	vmbus_initiate_unload(false);
 
 	vmbus_connection.conn_state = DISCONNECTED;
@@ -2186,6 +2225,9 @@ static int vmbus_bus_resume(struct device *dev)
 
 	vmbus_request_offers();
 
+	/* Reset the event for the next suspend. */
+	reinit_completion(&vmbus_connection.ready_for_suspend_event);
+
 	return 0;
 }
 
-- 
1.8.3.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox