Linux-HyperV List
 help / color / mirror / Atom feed
* [GIT PULL] Hyper-V fixes for v5.3-rc
From: Sasha Levin @ 2019-08-17 19:59 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, linux-hyperv, kys, sthemmin, linux-kernel

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA512

The following changes since commit 5f9e832c137075045d15cd6899ab0505cfb2ca4b:

  Linus 5.3-rc1 (2019-07-21 14:05:38 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git tags/hyperv-fixes-signed

for you to fetch changes up to bafe1e79e05de725e26b3f60c90b49e635b686b9:

  MAINTAINERS: Fix Hyperv vIOMMU driver file name (2019-08-17 15:29:39 -0400)

- ----------------------------------------------------------------
- - A few fixes for the userspace hyper-v tools from Adrian Vladu.
- - A fix for the hyper-v MAINTAINERs entry from Lan Tianyu.
- - Fix for SPDX license identifier in the userspace tools from Nishad
Kamdar.

- ----------------------------------------------------------------
Adrian Vladu (3):
      tools: hv: fixed Python pep8/flake8 warnings for lsvmbus
      tools: hv: fix KVP and VSS daemons exit code
      tools: hv: fix typos in toolchain

Lan Tianyu (1):
      MAINTAINERS: Fix Hyperv vIOMMU driver file name

Nishad Kamdar (1):
      tools: hv: Use the correct style for SPDX License Identifier

 MAINTAINERS                  |  2 +-
 drivers/hv/hv_trace.h        |  2 +-
 tools/hv/hv_get_dhcp_info.sh |  2 +-
 tools/hv/hv_kvp_daemon.c     |  8 +++--
 tools/hv/hv_set_ifconfig.sh  |  2 +-
 tools/hv/hv_vss_daemon.c     |  4 ++-
 tools/hv/lsvmbus             | 75 +++++++++++++++++++++++++-------------------
 7 files changed, 54 insertions(+), 41 deletions(-)
-----BEGIN PGP SIGNATURE-----

iQIzBAEBCgAdFiEE4n5dijQDou9mhzu83qZv95d3LNwFAl1YXDEACgkQ3qZv95d3
LNwzQxAAlJgFE5AQ0etqM4ns/wx05AOePVHR90ua+FCU0W3umL5vGeYxCbl9dJ08
zYUhnoq/y4nbccIH7edJlrxb/J9+Slsp0FxWBbPSGjvbLK0yjaxHup8bcdyq6/pP
UM/fvaPzd7NnK/LPehDlV1l2skqbsimm2wbv7P1sXYZ8aQwowXxJkeVeKvfipiCw
MAB1KMCZGJg1n9w6xi2j+wnV4cuRgcMX/n+0C5Qc2AfFAVPrPzEJoGiRJBhQgqf0
JDg1+hWKrAyTAJvKHQf8o/8EsvLr/Itm1t9Q+s3eQUFqbvVPilbFR8OltSgPHgUM
PJG+Kur49jjOpUR1OJ8MeRQXqblRqSxpe7POsjP1vnGTCvuO/sLSHiCDT7+3YIEN
+bXaONOTCQSH5j8KgNE6MN/wfGIpoSEgMJEoN16OyELsQa2zIhRCwHdz72DkojJq
QzrGkChLwAz8Dw3Ul/NX36MJMAyT9DTM3GE1IXY4LzOL0381JRbvms1vwP2dIkL4
c3+tGJ7iBi23KEfkBdD3fq2JBs3KVCucIMOZX9RWDqbRhl61015GKPSALAD7r9vR
BKYo3hdaDxk7DpSTHTupUds/EmSnkS+7poaXl2iY2jVJTUXXIyg7Ig3IBI7Vqhc9
3SgunAaaKdgrh60JxkVTJ7WQUAPbOf3h/a/P1WAhSrpkxHB0XXI=
=BevR
-----END PGP SIGNATURE-----

^ permalink raw reply

* RE: [PATCH] video: hyperv: hyperv_fb: Obtain screen resolution from Hyper-V host
From: Michael Kelley @ 2019-08-18 20:06 UTC (permalink / raw)
  To: Wei Hu, b.zolnierkie@samsung.com, linux-hyperv@vger.kernel.org,
	dri-devel@lists.freedesktop.org, linux-fbdev@vger.kernel.org,
	linux-kernel@vger.kernel.org, sashal@kernel.org,
	Stephen Hemminger, Haiyang Zhang, KY Srinivasan, Dexuan Cui,
	Iouri Tarassov
In-Reply-To: <20190813095351.1780-1-weh@microsoft.com>

From: Wei Hu <weh@microsoft.com> Sent: Tuesday, August 13, 2019 2:55 AM
> 
> Beginning from Windows 10 RS5+, VM screen resolution is obtained from host.
> The "video=hyperv_fb" boot time option is not needed, but still can be
> used to overwrite the VM resolution. The VM resolution on the host could be

I would word this as "used to override what the host specifies."

> set by executing the powershell "set-vmvideo" command.
> 
> Signed-off-by: Iouri Tarassov <iourit@microsoft.com>
> Signed-off-by: Wei Hu <weh@microsoft.com>
> ---
>  drivers/video/fbdev/hyperv_fb.c | 136 +++++++++++++++++++++++++++++---
>  1 file changed, 125 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c
> index 00f5bdcc6c6f..1042f3311fa2 100644
> --- a/drivers/video/fbdev/hyperv_fb.c
> +++ b/drivers/video/fbdev/hyperv_fb.c
> @@ -23,6 +23,14 @@
>   *
>   * Portrait orientation is also supported:
>   *     For example: video=hyperv_fb:864x1152
> + *
> + * When a Windows 10 RS5+ host is used, the virtual machine screen
> + * resolution is obtained from the host. The "video=hyperv_fb" option is
> + * not needed, but still can be used to overwrite the VM resolution. The

As above, "but still can be used to override what the host specifies."

> + * VM resolution on the host could be set by executing the powershell
> + * "set-vmvideo" command. For example
> + *     set-vmvideo -vmname name -horizontalresolution:1920 \
> + * -verticalresolution:1200 -resolutiontype single
>   */
> 
>  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> @@ -44,6 +52,7 @@
>  #define SYNTHVID_VERSION(major, minor) ((minor) << 16 | (major))
>  #define SYNTHVID_VERSION_WIN7 SYNTHVID_VERSION(3, 0)
>  #define SYNTHVID_VERSION_WIN8 SYNTHVID_VERSION(3, 2)
> +#define SYNTHVID_VERSION_WIN10 SYNTHVID_VERSION(3, 5)
> 
>  #define SYNTHVID_DEPTH_WIN7 16
>  #define SYNTHVID_DEPTH_WIN8 32
> @@ -82,16 +91,25 @@ enum synthvid_msg_type {
>  	SYNTHVID_POINTER_SHAPE		= 8,
>  	SYNTHVID_FEATURE_CHANGE		= 9,
>  	SYNTHVID_DIRT			= 10,
> +	SYNTHVID_RESOLUTION_REQUEST	= 13,
> +	SYNTHVID_RESOLUTION_RESPONSE	= 14,
> 
> -	SYNTHVID_MAX			= 11
> +	SYNTHVID_MAX			= 15
>  };
> 
> +#define		SYNTHVID_EDID_BLOCK_SIZE	128
> +#define		SYNTHVID_MAX_RESOLUTION_COUNT	64
> +
> +struct hvd_screen_info {
> +	u16 width;
> +	u16 height;
> +} __packed;
> +
>  struct synthvid_msg_hdr {
>  	u32 type;
>  	u32 size;  /* size of this header + payload after this field*/
>  } __packed;
> 
> -
>  struct synthvid_version_req {
>  	u32 version;
>  } __packed;
> @@ -102,6 +120,18 @@ struct synthvid_version_resp {
>  	u8 max_video_outputs;
>  } __packed;
> 
> +struct synthvid_supported_resolution_req {
> +	u8 maximum_resolution_count;
> +} __packed;
> +
> +struct synthvid_supported_resolution_resp {
> +	u8 edid_block[SYNTHVID_EDID_BLOCK_SIZE];
> +	u8 resolution_count;
> +	u8 default_resolution_index;
> +	u8 is_standard;
> +	struct hvd_screen_info supported_resolution[1];

It seems like the array size should be SYNTHVID_MAX_RESOLUTION_COUNT.
Otherwise code might not factor in the full size of the data structure, such
as in the memset() call in synthvid_get_supported_resolution().

> +} __packed;
> +
>  struct synthvid_vram_location {
>  	u64 user_ctx;
>  	u8 is_vram_gpa_specified;
> @@ -187,6 +217,8 @@ struct synthvid_msg {
>  		struct synthvid_pointer_shape ptr_shape;
>  		struct synthvid_feature_change feature_chg;
>  		struct synthvid_dirt dirt;
> +		struct synthvid_supported_resolution_req resolution_req;
> +		struct synthvid_supported_resolution_resp resolution_resp;
>  	};
>  } __packed;
> 
> @@ -224,6 +256,8 @@ struct hvfb_par {
> 
>  static uint screen_width = HVFB_WIDTH;
>  static uint screen_height = HVFB_HEIGHT;
> +static uint screen_width_max = HVFB_WIDTH;
> +static uint screen_height_max = HVFB_HEIGHT;
>  static uint screen_depth;
>  static uint screen_fb_size;
> 
> @@ -354,6 +388,7 @@ static void synthvid_recv_sub(struct hv_device *hdev)
> 
>  	/* Complete the wait event */
>  	if (msg->vid_hdr.type == SYNTHVID_VERSION_RESPONSE ||
> +	    msg->vid_hdr.type == SYNTHVID_RESOLUTION_RESPONSE ||
>  	    msg->vid_hdr.type == SYNTHVID_VRAM_LOCATION_ACK) {
>  		memcpy(par->init_buf, msg, MAX_VMBUS_PKT_SIZE);
>  		complete(&par->wait);
> @@ -428,6 +463,64 @@ static int synthvid_negotiate_ver(struct hv_device *hdev, u32 ver)
>  	}
> 
>  	par->synthvid_version = ver;
> +	pr_info("Synthvid Version major %d, minor %d\n",
> +		ver & 0x0000ffff, (ver & 0xffff0000) >> 16);
> +
> +out:
> +	return ret;
> +}
> +
> +/* Get current resolution from the host */
> +static int synthvid_get_supported_resolution(struct hv_device *hdev)
> +{
> +	struct fb_info *info = hv_get_drvdata(hdev);
> +	struct hvfb_par *par = info->par;
> +	struct synthvid_msg *msg = (struct synthvid_msg *)par->init_buf;
> +	int ret = 0;
> +	unsigned long t;
> +	u8 index;
> +	int i;
> +
> +	memset(msg, 0, sizeof(struct synthvid_msg));
> +	msg->vid_hdr.type = SYNTHVID_RESOLUTION_REQUEST;
> +	msg->vid_hdr.size = sizeof(struct synthvid_msg_hdr) +
> +		sizeof(struct synthvid_supported_resolution_req);
> +
> +	msg->resolution_req.maximum_resolution_count =
> +		SYNTHVID_MAX_RESOLUTION_COUNT;
> +	synthvid_send(hdev, msg);
> +
> +	t = wait_for_completion_timeout(&par->wait, VSP_TIMEOUT);
> +	if (!t) {
> +		pr_err("Time out on waiting resolution response\n");
> +			ret = -ETIMEDOUT;
> +			goto out;
> +	}
> +
> +	if (msg->resolution_resp.resolution_count == 0) {
> +		pr_err("No supported resolutions\n");
> +		ret = -ENODEV;
> +		goto out;
> +	}
> +
> +	index = msg->resolution_resp.default_resolution_index;
> +	if (index >= msg->resolution_resp.resolution_count) {
> +		pr_err("Invalid resolution index: %d\n", index);
> +		ret = -ENODEV;
> +		goto out;
> +	}
> +
> +	for (i = 0; i < msg->resolution_resp.resolution_count; i++) {
> +		screen_width_max = max_t(unsigned int, screen_width_max,
> +		    msg->resolution_resp.supported_resolution[i].width);
> +		screen_height_max = max_t(unsigned int, screen_height_max,
> +		    msg->resolution_resp.supported_resolution[i].height);
> +	}
> +
> +	screen_width =
> +		msg->resolution_resp.supported_resolution[index].width;
> +	screen_height =
> +		msg->resolution_resp.supported_resolution[index].height;
> 
>  out:
>  	return ret;
> @@ -448,11 +541,21 @@ static int synthvid_connect_vsp(struct hv_device *hdev)
>  	}
> 
>  	/* Negotiate the protocol version with host */
> -	if (vmbus_proto_version == VERSION_WS2008 ||
> -	    vmbus_proto_version == VERSION_WIN7)
> +	switch (vmbus_proto_version) {
> +	case VERSION_WS2008:
> +	case VERSION_WIN7:
>  		ret = synthvid_negotiate_ver(hdev, SYNTHVID_VERSION_WIN7);
> -	else
> +		break;
> +	case VERSION_WIN8:
> +	case VERSION_WIN8_1:
>  		ret = synthvid_negotiate_ver(hdev, SYNTHVID_VERSION_WIN8);
> +		break;
> +	case VERSION_WIN10:

I wonder if this does the right thing on a system with VERSION_WIN10.  The existing
code would treat this like VERSION_WIN8.  Your commit message says that the new
functionality of getting the resolution from the host came as part of RS5, and I suspect
there are host versions that report VERSION_WIN10 but that aren't RS5.  You may have
already clarified this with the Hyper-V people, but if not, we should do so.  The
version negotiation here doesn't fallback to an earlier version if Hyper-V doesn't accept
what this code requests.  However, the more robust approach might be to implement
fallback on the SYNTHVID_VERSION setting.

> +	case VERSION_WIN10_V5:
> +	default:
> +		ret = synthvid_negotiate_ver(hdev, SYNTHVID_VERSION_WIN10);
> +		break;
> +	}
> 

Michael

^ permalink raw reply

* RE: [PATCH] video: hyperv: hyperv_fb: Support deferred IO for Hyper-V frame buffer driver
From: Michael Kelley @ 2019-08-18 22:41 UTC (permalink / raw)
  To: Wei Hu, rdunlap@infradead.org, shc_work@mail.ru,
	gregkh@linuxfoundation.org, lee.jones@linaro.org,
	alexandre.belloni@bootlin.com, baijiaju1990@gmail.com,
	fthain@telegraphics.com.au, info@metux.net,
	linux-hyperv@vger.kernel.org, dri-devel@lists.freedesktop.org,
	linux-fbdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	sashal@kernel.org, Stephen Hemminger, Haiyang Zhang,
	KY Srinivasan, Dexuan Cui, Iouri Tarassov
In-Reply-To: <20190813103548.2008-1-weh@microsoft.com>

From: Wei Hu <weh@microsoft.com> Sent: Tuesday, August 13, 2019 3:37 AM
> 
> Without deferred IO support, hyperv_fb driver informs the host to refresh
> the entire guest frame buffer at fixed rate, e.g. at 20Hz, no matter there
> is screen update or not. This patch supports defered IO for screens in

s/defered/deferred/

> graphic mode and also enables the framme buffer on-demand refresh. The

s/graphic/graphics/
s/framme/frame/

> highest refresh rate is still set at 20Hz.
> 
> Due to limitation on Hyper-V host, we keep a shadow copy of frame buffer

I think it might be worthwhile to explain exactly what the issue is so that
there's a record kept.

> in the guest. This means one more copy of the dirty rectangle inside
> guest when doing the on-demand refresh. This can be optimized in the
> future with help from host. For now the host performance gain from deferred
> IO outweighs the shadow copy impact in the guest.
> 
> Signed-off-by: Wei Hu <weh@microsoft.com>
> ---
>  drivers/video/fbdev/Kconfig     |   1 +
>  drivers/video/fbdev/hyperv_fb.c | 217 +++++++++++++++++++++++++++++---
>  2 files changed, 198 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
> index 1b2f5f31fb6f..e781f89a1824 100644
> --- a/drivers/video/fbdev/Kconfig
> +++ b/drivers/video/fbdev/Kconfig
> @@ -2241,6 +2241,7 @@ config FB_HYPERV
>  	select FB_CFB_FILLRECT
>  	select FB_CFB_COPYAREA
>  	select FB_CFB_IMAGEBLIT
> +	select FB_DEFERRED_IO
>  	help
>  	  This framebuffer driver supports Microsoft Hyper-V Synthetic Video.
> 
> diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c
> index 1042f3311fa2..85198a6ea8e7 100644
> --- a/drivers/video/fbdev/hyperv_fb.c
> +++ b/drivers/video/fbdev/hyperv_fb.c
> @@ -233,6 +233,7 @@ struct synthvid_msg {
>  #define RING_BUFSIZE (256 * 1024)
>  #define VSP_TIMEOUT (10 * HZ)
>  #define HVFB_UPDATE_DELAY (HZ / 20)
> +#define HVFB_ONDEMAND_THROTTLE (HZ / 20)
> 
>  struct hvfb_par {
>  	struct fb_info *info;
> @@ -252,6 +253,17 @@ struct hvfb_par {
>  	bool synchronous_fb;
> 
>  	struct notifier_block hvfb_panic_nb;
> +
> +	/* Memory for deferred IO and frame buffer itself */
> +	unsigned char *dio_vp;
> +	unsigned char *mmio_vp;
> +	unsigned long mmio_pp;
> +	spinlock_t docopy_lock; /* Lock to protect memory copy */
> +
> +	/* Dirty rectangle, protected by delayed_refresh_lock */
> +	int x1, y1, x2, y2;
> +	bool delayed_refresh;
> +	spinlock_t delayed_refresh_lock;
>  };
> 
>  static uint screen_width = HVFB_WIDTH;
> @@ -260,6 +272,7 @@ static uint screen_width_max = HVFB_WIDTH;
>  static uint screen_height_max = HVFB_HEIGHT;
>  static uint screen_depth;
>  static uint screen_fb_size;
> +static uint dio_fb_size; /* FB size for deferred IO */
> 
>  /* Send message to Hyper-V host */
>  static inline int synthvid_send(struct hv_device *hdev,
> @@ -346,28 +359,88 @@ static int synthvid_send_ptr(struct hv_device *hdev)
>  }
> 
>  /* Send updated screen area (dirty rectangle) location to host */
> -static int synthvid_update(struct fb_info *info)
> +static int
> +synthvid_update(struct fb_info *info, int x1, int y1, int x2, int y2)
>  {
>  	struct hv_device *hdev = device_to_hv_device(info->device);
>  	struct synthvid_msg msg;
> 
>  	memset(&msg, 0, sizeof(struct synthvid_msg));
> +	if (x2 == INT_MAX)
> +		x2 = info->var.xres;
> +	if (y2 == INT_MAX)
> +		y2 = info->var.yres;
> 
>  	msg.vid_hdr.type = SYNTHVID_DIRT;
>  	msg.vid_hdr.size = sizeof(struct synthvid_msg_hdr) +
>  		sizeof(struct synthvid_dirt);
>  	msg.dirt.video_output = 0;
>  	msg.dirt.dirt_count = 1;
> -	msg.dirt.rect[0].x1 = 0;
> -	msg.dirt.rect[0].y1 = 0;
> -	msg.dirt.rect[0].x2 = info->var.xres;
> -	msg.dirt.rect[0].y2 = info->var.yres;
> +	msg.dirt.rect[0].x1 = (x1 < 0 || x1 > x2) ? 0 : x1;
> +	msg.dirt.rect[0].y1 = (y2 < 0 || y1 > y2) ? 0 : y1;

This should be:

	msg.dirt.rect[0].y1 = (y1 < 0 || y1 > y2) ? 0 : y1;

Also, throughout the code, I don't think there are any places where
x or y coordinate values are ever negative.  INT_MAX or 0 is used as the
sentinel value indicating "not set".  So can all the tests for less than 0
now be eliminated, both in this function and in other functions?

> +	msg.dirt.rect[0].x2 =
> +		(x2 < x1 || x2 > info->var.xres) ? info->var.xres : x2;
> +	msg.dirt.rect[0].y2 =
> +		(y2 < y1 || y2 > info->var.yres) ? info->var.yres : y2;

How exactly is the dirty rectangle specified to Hyper-V?  Suppose the frame
buffer resolution is 100x200.  If you want to specify the entire rectangle, the
first coordinate is (0, 0).  But what is the second coordinate?  Should it be
(99, 199) or (100, 200)?  The above code (and original code) implies it
should specified as (100, 200), which is actually a point outside the
maximum resolution, which is counter-intuitive and makes me wonder
if the code is correct.

> 
>  	synthvid_send(hdev, &msg);
> 
>  	return 0;
>  }
> 
> +static void hvfb_docopy(struct hvfb_par *par,
> +			unsigned long offset,
> +			unsigned long size)
> +{
> +	if (!par || !par->mmio_vp || !par->dio_vp || !par->fb_ready ||
> +	    size == 0 || offset >= dio_fb_size)
> +		return;
> +
> +	if (offset + size > dio_fb_size)
> +		size = dio_fb_size - offset;
> +
> +	memcpy(par->mmio_vp + offset, par->dio_vp + offset, size);
> +}
> +
> +/* Deferred IO callback */
> +static void synthvid_deferred_io(struct fb_info *p,
> +				 struct list_head *pagelist)
> +{
> +	struct hvfb_par *par = p->par;
> +	struct page *page;
> +	unsigned long start, end;
> +	int y1, y2, miny, maxy;
> +	unsigned long flags;
> +
> +	miny = INT_MAX;
> +	maxy = 0;
> +
> +	list_for_each_entry(page, pagelist, lru) {
> +		start = page->index << PAGE_SHIFT;
> +		end = start + PAGE_SIZE - 1;
> +		y1 = start / p->fix.line_length;
> +		y2 = end / p->fix.line_length;

The above division rounds down because any remainder is discarded.  I
wondered whether rounding down is correct, which got me to thinking
about how the dirty rectangle is specified.  Is y2 the index of the last
dirty row?  If so, that's not consistent with the code in synthvid_update(),
which might choose var.yres as y2, and that's the index of a row outside
of the frame buffer.

> +		if (y2 > p->var.yres)
> +			y2 = p->var.yres;
> +		miny = min_t(int, miny, y1);
> +		maxy = max_t(int, maxy, y2);
> +
> +		/* Copy from dio space to mmio address */
> +		if (par->fb_ready) {
> +			spin_lock_irqsave(&par->docopy_lock, flags);
> +			hvfb_docopy(par, start, PAGE_SIZE);
> +			spin_unlock_irqrestore(&par->docopy_lock, flags);
> +		}
> +	}
> +
> +	if (par->fb_ready)
> +		synthvid_update(p, 0, miny, p->var.xres, maxy);
> +}
> +
> +static struct fb_deferred_io synthvid_defio = {
> +	.delay		= HZ / 20,
> +	.deferred_io	= synthvid_deferred_io,
> +};
> 
>  /*
>   * Actions on received messages from host:
> @@ -597,7 +670,7 @@ static int synthvid_send_config(struct hv_device *hdev)
>  	msg->vid_hdr.type = SYNTHVID_VRAM_LOCATION;
>  	msg->vid_hdr.size = sizeof(struct synthvid_msg_hdr) +
>  		sizeof(struct synthvid_vram_location);
> -	msg->vram.user_ctx = msg->vram.vram_gpa = info->fix.smem_start;
> +	msg->vram.user_ctx = msg->vram.vram_gpa = par->mmio_pp;
>  	msg->vram.is_vram_gpa_specified = 1;
>  	synthvid_send(hdev, msg);
> 
> @@ -607,7 +680,7 @@ static int synthvid_send_config(struct hv_device *hdev)
>  		ret = -ETIMEDOUT;
>  		goto out;
>  	}
> -	if (msg->vram_ack.user_ctx != info->fix.smem_start) {
> +	if (msg->vram_ack.user_ctx != par->mmio_pp) {
>  		pr_err("Unable to set VRAM location\n");
>  		ret = -ENODEV;
>  		goto out;
> @@ -624,19 +697,85 @@ static int synthvid_send_config(struct hv_device *hdev)
> 
>  /*
>   * Delayed work callback:
> - * It is called at HVFB_UPDATE_DELAY or longer time interval to process
> - * screen updates. It is re-scheduled if further update is necessary.
> + * It is scheduled to call whenever update request is received and it has
> + * not been called in last HVFB_ONDEMAND_THROTTLE time interval.
>   */
>  static void hvfb_update_work(struct work_struct *w)
>  {
>  	struct hvfb_par *par = container_of(w, struct hvfb_par, dwork.work);
>  	struct fb_info *info = par->info;
> +	unsigned long flags;
> +	int x1, x2, y1, y2;
> +	int j;
> +
> +	x1 = y1 = 0;
> +	x2 = y2 = INT_MAX;

The above two lines seem superfluous since all four values
are unconditionally set below when storing the dirty
rectangle to local variables.

> +
> +	spin_lock_irqsave(&par->delayed_refresh_lock, flags);
> +	/* Reset the request flag */
> +	par->delayed_refresh = false;
> +
> +	/* Store the dirty rectangle to local variables */
> +	x1 = par->x1;
> +	x2 = par->x2;
> +	y1 = par->y1;
> +	y2 = par->y2;
> +
> +	/* Clear dirty rectangle */
> +	par->x1 = par->y1 = INT_MAX;
> +	par->x2 = par->y2 = 0;
> 
> +	spin_unlock_irqrestore(&par->delayed_refresh_lock, flags);
> +
> +	if (x1 < 0 || x1 > info->var.xres || x2 < 0 ||
> +	    x2 > info->var.xres || y1 < 0 || y1 > info->var.yres ||
> +	    y2 < 0 || y2 > info->var.yres)
> +		return;
> +
> +	/* Copy the dirty rectangle to frame buffer memory */
> +	spin_lock_irqsave(&par->docopy_lock, flags);
> +	for (j = y1; j <= y2 && x1 < x2; j++) {

x1 < x2 doesn't seem to be needed as a loop control test as
neither value is changed in the loop.

> +		if (j == info->var.yres)
> +			break;
> +		hvfb_docopy(par,
> +			    j * info->fix.line_length +
> +			    (x1 * screen_depth / 8),
> +			    (x2 - x1 + 1) * screen_depth / 8);

Whether the +1 is needed above gets back to the question I
raised earlier about how to interpret the coordinates -- whether
the (x2, y2) coordinate is just outside the dirty rectangle or
just inside the dirty rectangle.  Most of the code seems to treat
it as being just outside the dirty rectangle, in which case the +1
should not be used.

> +	}
> +	spin_unlock_irqrestore(&par->docopy_lock, flags);
> +
> +	/* Refresh */
>  	if (par->fb_ready)
> -		synthvid_update(info);
> +		synthvid_update(info, x1, y1, x2, y2);
> +}
> +
> +/*
> + * Control the on-demand refresh frequency. It schedules a delayed
> + * screen update if it has not yet.
> + */
> +static void hvfb_ondemand_refresh_throttle(struct hvfb_par *par,
> +					   int x1, int y1, int w, int h)
> +{
> +	unsigned long flags;
> +	int x2 = x1 + w;
> +	int y2 = y1 + h;
> +
> +	spin_lock_irqsave(&par->delayed_refresh_lock, flags);
> +
> +	/* Merge dirty rectangle */
> +	par->x1 = min_t(int, par->x1, x1);
> +	par->y1 = min_t(int, par->y1, y1);
> +	par->x2 = max_t(int, par->x2, x2);
> +	par->y2 = max_t(int, par->y2, y2);
> +
> +	/* Schedule a delayed screen update if not yet */
> +	if (par->delayed_refresh == false) {
> +		schedule_delayed_work(&par->dwork,
> +				      HVFB_ONDEMAND_THROTTLE);
> +		par->delayed_refresh = true;
> +	}
> 
> -	if (par->update)
> -		schedule_delayed_work(&par->dwork, HVFB_UPDATE_DELAY);
> +	spin_unlock_irqrestore(&par->delayed_refresh_lock, flags);
>  }
> 
>  static int hvfb_on_panic(struct notifier_block *nb,
> @@ -648,7 +787,8 @@ static int hvfb_on_panic(struct notifier_block *nb,
>  	par = container_of(nb, struct hvfb_par, hvfb_panic_nb);
>  	par->synchronous_fb = true;
>  	info = par->info;
> -	synthvid_update(info);
> +	hvfb_docopy(par, 0, dio_fb_size);
> +	synthvid_update(info, 0, 0, INT_MAX, INT_MAX);
> 
>  	return NOTIFY_DONE;
>  }
> @@ -709,7 +849,10 @@ static void hvfb_cfb_fillrect(struct fb_info *p,
> 
>  	cfb_fillrect(p, rect);
>  	if (par->synchronous_fb)
> -		synthvid_update(p);
> +		synthvid_update(p, 0, 0, INT_MAX, INT_MAX);
> +	else
> +		hvfb_ondemand_refresh_throttle(par, rect->dx, rect->dy,
> +					       rect->width, rect->height);
>  }
> 
>  static void hvfb_cfb_copyarea(struct fb_info *p,
> @@ -719,7 +862,10 @@ static void hvfb_cfb_copyarea(struct fb_info *p,
> 
>  	cfb_copyarea(p, area);
>  	if (par->synchronous_fb)
> -		synthvid_update(p);
> +		synthvid_update(p, 0, 0, INT_MAX, INT_MAX);
> +	else
> +		hvfb_ondemand_refresh_throttle(par, area->dx, area->dy,
> +					       area->width, area->height);
>  }
> 
>  static void hvfb_cfb_imageblit(struct fb_info *p,
> @@ -729,7 +875,10 @@ static void hvfb_cfb_imageblit(struct fb_info *p,
> 
>  	cfb_imageblit(p, image);
>  	if (par->synchronous_fb)
> -		synthvid_update(p);
> +		synthvid_update(p, 0, 0, INT_MAX, INT_MAX);
> +	else
> +		hvfb_ondemand_refresh_throttle(par, image->dx, image->dy,
> +					       image->width, image->height);
>  }
> 
>  static struct fb_ops hvfb_ops = {
> @@ -788,6 +937,9 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info
> *info)
>  	resource_size_t pot_start, pot_end;
>  	int ret;
> 
> +	dio_fb_size =
> +		screen_width * screen_height * screen_depth / 8;
> +
>  	if (gen2vm) {
>  		pot_start = 0;
>  		pot_end = -1;
> @@ -822,9 +974,15 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info
> *info)
>  	if (!fb_virt)
>  		goto err2;
> 
> +	/* Allocate memory for deferred IO */
> +	par->dio_vp = vzalloc(((dio_fb_size >> PAGE_SHIFT) + 1)
> +				 << PAGE_SHIFT);

I'd suggest using the round_up() function so that what you are doing
is explicit.

> +	if (par->dio_vp == NULL)
> +		goto err3;
> +
>  	info->apertures = alloc_apertures(1);
>  	if (!info->apertures)
> -		goto err3;
> +		goto err4;
> 
>  	if (gen2vm) {
>  		info->apertures->ranges[0].base = screen_info.lfb_base;
> @@ -836,16 +994,23 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info
> *info)
>  		info->apertures->ranges[0].size = pci_resource_len(pdev, 0);
>  	}
> 
> +	/* Physical address of FB device */
> +	par->mmio_pp = par->mem->start;
> +	/* Virtual address of FB device */
> +	par->mmio_vp = (unsigned char *) fb_virt;
> +
>  	info->fix.smem_start = par->mem->start;
> -	info->fix.smem_len = screen_fb_size;
> -	info->screen_base = fb_virt;
> -	info->screen_size = screen_fb_size;
> +	info->fix.smem_len = dio_fb_size;
> +	info->screen_base = par->dio_vp;
> +	info->screen_size = dio_fb_size;
> 
>  	if (!gen2vm)
>  		pci_dev_put(pdev);
> 
>  	return 0;
> 
> +err4:
> +	vfree(par->dio_vp);
>  err3:
>  	iounmap(fb_virt);
>  err2:
> @@ -863,6 +1028,7 @@ static void hvfb_putmem(struct fb_info *info)
>  {
>  	struct hvfb_par *par = info->par;
> 
> +	vfree(par->dio_vp);
>  	iounmap(info->screen_base);
>  	vmbus_free_mmio(par->mem->start, screen_fb_size);
>  	par->mem = NULL;
> @@ -888,6 +1054,12 @@ static int hvfb_probe(struct hv_device *hdev,
>  	init_completion(&par->wait);
>  	INIT_DELAYED_WORK(&par->dwork, hvfb_update_work);
> 
> +	par->delayed_refresh = false;
> +	spin_lock_init(&par->delayed_refresh_lock);
> +	spin_lock_init(&par->docopy_lock);
> +	par->x1 = par->y1 = INT_MAX;
> +	par->x2 = par->y2 = 0;
> +
>  	/* Connect to VSP */
>  	hv_set_drvdata(hdev, info);
>  	ret = synthvid_connect_vsp(hdev);
> @@ -939,6 +1111,10 @@ static int hvfb_probe(struct hv_device *hdev,
>  	info->fbops = &hvfb_ops;
>  	info->pseudo_palette = par->pseudo_palette;
> 
> +	/* Initialize deferred IO */
> +	info->fbdefio = &synthvid_defio;
> +	fb_deferred_io_init(info);
> +
>  	/* Send config to host */
>  	ret = synthvid_send_config(hdev);
>  	if (ret)
> @@ -960,6 +1136,7 @@ static int hvfb_probe(struct hv_device *hdev,
>  	return 0;
> 
>  error:
> +	fb_deferred_io_cleanup(info);
>  	hvfb_putmem(info);
>  error2:
>  	vmbus_close(hdev->channel);
> --
> 2.20.1


^ permalink raw reply

* [PATCH v2] Tools: hv: move to tools buildsystem
From: Andy Shevchenko @ 2019-08-19 12:41 UTC (permalink / raw)
  To: K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger, linux-hyperv,
	Sasha Levin
  Cc: Andy Shevchenko, Vitaly Kuznetsov

There is a nice buildsystem dedicated for userspace tools in Linux kernel tree.
Switch Hyper-V daemons to be built by it.

Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
- fix commit message (Vitaly)

 tools/hv/Build    |  3 +++
 tools/hv/Makefile | 51 +++++++++++++++++++++++++++++++++++++----------
 2 files changed, 44 insertions(+), 10 deletions(-)
 create mode 100644 tools/hv/Build

diff --git a/tools/hv/Build b/tools/hv/Build
new file mode 100644
index 000000000000..6cf51fa4b306
--- /dev/null
+++ b/tools/hv/Build
@@ -0,0 +1,3 @@
+hv_kvp_daemon-y += hv_kvp_daemon.o
+hv_vss_daemon-y += hv_vss_daemon.o
+hv_fcopy_daemon-y += hv_fcopy_daemon.o
diff --git a/tools/hv/Makefile b/tools/hv/Makefile
index 5db5e62cebda..b57143d9459c 100644
--- a/tools/hv/Makefile
+++ b/tools/hv/Makefile
@@ -1,28 +1,55 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for Hyper-V tools
-
-WARNINGS = -Wall -Wextra
-CFLAGS = $(WARNINGS) -g $(shell getconf LFS_CFLAGS)
-
-CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
+include ../scripts/Makefile.include
 
 sbindir ?= /usr/sbin
 libexecdir ?= /usr/libexec
 sharedstatedir ?= /var/lib
 
-ALL_PROGRAMS := hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+# Do not use make's built-in rules
+# (this improves performance and avoids hard-to-debug behaviour);
+MAKEFLAGS += -r
+
+override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
+
+ALL_TARGETS := hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
+ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
 
 ALL_SCRIPTS := hv_get_dhcp_info.sh hv_get_dns_info.sh hv_set_ifconfig.sh
 
 all: $(ALL_PROGRAMS)
 
-%: %.c
-	$(CC) $(CFLAGS) -o $@ $^
+export srctree OUTPUT CC LD CFLAGS
+include $(srctree)/tools/build/Makefile.include
+
+HV_KVP_DAEMON_IN := $(OUTPUT)hv_kvp_daemon-in.o
+$(HV_KVP_DAEMON_IN): FORCE
+	$(Q)$(MAKE) $(build)=hv_kvp_daemon
+$(OUTPUT)hv_kvp_daemon: $(HV_KVP_DAEMON_IN)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
+HV_VSS_DAEMON_IN := $(OUTPUT)hv_vss_daemon-in.o
+$(HV_VSS_DAEMON_IN): FORCE
+	$(Q)$(MAKE) $(build)=hv_vss_daemon
+$(OUTPUT)hv_vss_daemon: $(HV_VSS_DAEMON_IN)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
+HV_FCOPY_DAEMON_IN := $(OUTPUT)hv_fcopy_daemon-in.o
+$(HV_FCOPY_DAEMON_IN): FORCE
+	$(Q)$(MAKE) $(build)=hv_fcopy_daemon
+$(OUTPUT)hv_fcopy_daemon: $(HV_FCOPY_DAEMON_IN)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
 
 clean:
-	$(RM) hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
+	rm -f $(ALL_PROGRAMS)
+	find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
 
-install: all
+install: $(ALL_PROGRAMS)
 	install -d -m 755 $(DESTDIR)$(sbindir); \
 	install -d -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd; \
 	install -d -m 755 $(DESTDIR)$(sharedstatedir); \
@@ -33,3 +60,7 @@ install: all
 	for script in $(ALL_SCRIPTS); do \
 		install $$script -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd/$${script%.sh}; \
 	done
+
+FORCE:
+
+.PHONY: all install clean FORCE prepare
-- 
2.23.0.rc1


^ permalink raw reply related

* [PATCH V3 0/3] KVM/Hyper-V: Add Hyper-V direct tlb flush support
From: lantianyu1986 @ 2019-08-19 13:17 UTC (permalink / raw)
  To: pbonzini, rkrcmar, corbet, kys, haiyangz, sthemmin, sashal, tglx,
	mingo, bp, hpa, x86, michael.h.kelley
  Cc: Tianyu Lan, kvm, linux-doc, linux-hyperv, linux-kernel, vkuznets

From: Tianyu Lan <Tianyu.Lan@microsoft.com>

This patchset is to add Hyper-V direct tlb support in KVM. Hyper-V
in L0 can delegate L1 hypervisor to handle tlb flush request from
L2 guest when direct tlb flush is enabled in L1.

Patch 2 introduces new cap KVM_CAP_HYPERV_DIRECT_TLBFLUSH to enable
feature from user space. User space should enable this feature only
when Hyper-V hypervisor capability is exposed to guest and KVM profile
is hided. There is a parameter conflict between KVM and Hyper-V hypercall.
We hope L2 guest doesn't use KVM hypercall when the feature is
enabled. Detail please see comment of new API "KVM_CAP_HYPERV_DIRECT_TLBFLUSH"

Change since v2:
       - Move hv assist page(hv_pa_pg) from struct kvm  to struct kvm_hv.

Change since v1:
       - Fix offset issue in the patch 1.
       - Update description of KVM KVM_CAP_HYPERV_DIRECT_TLBFLUSH.


Tianyu Lan (2):
  x86/Hyper-V: Fix definition of struct hv_vp_assist_page
  KVM/Hyper-V: Add new KVM cap KVM_CAP_HYPERV_DIRECT_TLBFLUSH

Vitaly Kuznetsov (1):
  KVM/Hyper-V/VMX: Add direct tlb flush support

 Documentation/virtual/kvm/api.txt  | 13 +++++++++++++
 arch/x86/include/asm/hyperv-tlfs.h | 24 ++++++++++++++++++-----
 arch/x86/include/asm/kvm_host.h    |  4 ++++
 arch/x86/kvm/vmx/evmcs.h           |  2 ++
 arch/x86/kvm/vmx/vmx.c             | 39 ++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c                 |  8 ++++++++
 include/uapi/linux/kvm.h           |  1 +
 7 files changed, 86 insertions(+), 5 deletions(-)

-- 
2.14.5


^ permalink raw reply

* [PATCH V3 1/3] x86/Hyper-V: Fix definition of struct hv_vp_assist_page
From: lantianyu1986 @ 2019-08-19 13:17 UTC (permalink / raw)
  To: pbonzini, rkrcmar, corbet, kys, haiyangz, sthemmin, sashal, tglx,
	mingo, bp, hpa, x86, michael.h.kelley
  Cc: Tianyu Lan, kvm, linux-doc, linux-kernel, linux-hyperv, vkuznets
In-Reply-To: <20190819131737.26942-1-Tianyu.Lan@microsoft.com>

From: Tianyu Lan <Tianyu.Lan@microsoft.com>

The struct hv_vp_assist_page was defined incorrectly.
The "vtl_control" should be u64[3], "nested_enlightenments
_control" should be a u64 and there is 7 reserved bytes
following "enlighten_vmentry". This patch is to fix it.

Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
--
Change since v1:
       Move definition of struct hv_nested_enlightenments_control
       into this patch to fix offset issue.
---
 arch/x86/include/asm/hyperv-tlfs.h | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index af78cd72b8f3..cf0b2a04271d 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -514,14 +514,24 @@ struct hv_timer_message_payload {
 	__u64 delivery_time;	/* When the message was delivered */
 } __packed;
 
+struct hv_nested_enlightenments_control {
+	struct {
+		__u32 directhypercall:1;
+		__u32 reserved:31;
+	} features;
+	struct {
+		__u32 reserved;
+	} hypercallControls;
+} __packed;
+
 /* Define virtual processor assist page structure. */
 struct hv_vp_assist_page {
 	__u32 apic_assist;
-	__u32 reserved;
-	__u64 vtl_control[2];
-	__u64 nested_enlightenments_control[2];
-	__u32 enlighten_vmentry;
-	__u32 padding;
+	__u32 reserved1;
+	__u64 vtl_control[3];
+	struct hv_nested_enlightenments_control nested_control;
+	__u8 enlighten_vmentry;
+	__u8 reserved2[7];
 	__u64 current_nested_vmcs;
 } __packed;
 
-- 
2.14.5


^ permalink raw reply related

* [PATCH V3 2/3] KVM/Hyper-V: Add new KVM cap KVM_CAP_HYPERV_DIRECT_TLBFLUSH
From: lantianyu1986 @ 2019-08-19 13:17 UTC (permalink / raw)
  To: pbonzini, rkrcmar, corbet, kys, haiyangz, sthemmin, sashal, tglx,
	mingo, bp, hpa, x86, michael.h.kelley
  Cc: Tianyu Lan, kvm, linux-doc, linux-kernel, linux-hyperv, vkuznets
In-Reply-To: <20190819131737.26942-1-Tianyu.Lan@microsoft.com>

From: Tianyu Lan <Tianyu.Lan@microsoft.com>

This patch adds new KVM cap KVM_CAP_HYPERV_DIRECT_TLBFLUSH and let
user space to enable direct tlb flush function when only Hyper-V
hypervsior capability is exposed to VM. This patch also adds
enable_direct_tlbflush callback in the struct kvm_x86_ops and
platforms may use it to implement direct tlb flush support.

Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
Change since v1:
       Update description of KVM_CAP_HYPERV_DIRECT_TLBFLUSH
       in the KVM API doc.
---
 Documentation/virtual/kvm/api.txt | 13 +++++++++++++
 arch/x86/include/asm/kvm_host.h   |  2 ++
 arch/x86/kvm/x86.c                |  8 ++++++++
 include/uapi/linux/kvm.h          |  1 +
 4 files changed, 24 insertions(+)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 2cd6250b2896..0c6e1b25d0c8 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -5289,3 +5289,16 @@ Architectures: x86
 This capability indicates that KVM supports paravirtualized Hyper-V IPI send
 hypercalls:
 HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx.
+8.21 KVM_CAP_HYPERV_DIRECT_TLBFLUSH
+
+Architecture: x86
+
+This capability indicates that KVM running on top of Hyper-V hypervisor
+enables Direct TLB flush for its guests meaning that TLB flush
+hypercalls are handled by Level 0 hypervisor (Hyper-V) bypassing KVM.
+Due to the different ABI for hypercall parameters between Hyper-V and
+KVM, enabling this capability effectively disables all hypercall
+handling by KVM (as some KVM hypercall may be mistakenly treated as TLB
+flush hypercalls by Hyper-V) so userspace should disable KVM identification
+in CPUID and only exposes Hyper-V identification. In this case, guest
+thinks it's running on Hyper-V and only use Hyper-V hypercalls.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0cc5b611a113..667d154e89d4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1205,6 +1205,8 @@ struct kvm_x86_ops {
 	uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu);
 
 	bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu);
+
+	int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9d7b9e6a0939..a9d8ee7f7bf0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3183,6 +3183,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = kvm_x86_ops->get_nested_state ?
 			kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0;
 		break;
+	case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
+		r = kvm_x86_ops->enable_direct_tlbflush ? 1 : 0;
+		break;
 	default:
 		break;
 	}
@@ -3953,6 +3956,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 				r = -EFAULT;
 		}
 		return r;
+	case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
+		if (!kvm_x86_ops->enable_direct_tlbflush)
+			return -ENOTTY;
+
+		return kvm_x86_ops->enable_direct_tlbflush(vcpu);
 
 	default:
 		return -EINVAL;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a7c19540ce21..cb959bc925b1 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -996,6 +996,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_ARM_PTRAUTH_ADDRESS 171
 #define KVM_CAP_ARM_PTRAUTH_GENERIC 172
 #define KVM_CAP_PMU_EVENT_FILTER 173
+#define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 174
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
2.14.5


^ permalink raw reply related

* [PATCH V3 3/3] KVM/Hyper-V/VMX: Add direct tlb flush support
From: lantianyu1986 @ 2019-08-19 13:17 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, sashal, tglx, mingo, bp, hpa, x86,
	pbonzini, rkrcmar, michael.h.kelley
  Cc: Vitaly Kuznetsov, linux-hyperv, linux-kernel, kvm, Tianyu Lan
In-Reply-To: <20190819131737.26942-1-Tianyu.Lan@microsoft.com>

From: Vitaly Kuznetsov <vkuznets@redhat.com>


This patch is to enable Hyper-V direct tlb flush function
for vmx.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
Change since v2:
       - Move hv assist page(hv_pa_pg) from struct kvm to struct kvm_hv.
---
 arch/x86/include/asm/hyperv-tlfs.h |  4 ++++
 arch/x86/include/asm/kvm_host.h    |  2 ++
 arch/x86/kvm/vmx/evmcs.h           |  2 ++
 arch/x86/kvm/vmx/vmx.c             | 39 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 47 insertions(+)

diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index cf0b2a04271d..d53d6e4a6210 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -171,6 +171,7 @@
 #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED		BIT(14)
 
 /* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */
+#define HV_X64_NESTED_DIRECT_FLUSH			BIT(17)
 #define HV_X64_NESTED_GUEST_MAPPING_FLUSH		BIT(18)
 #define HV_X64_NESTED_MSR_BITMAP			BIT(19)
 
@@ -882,4 +883,7 @@ struct hv_tlb_flush_ex {
 	u64 gva_list[];
 } __packed;
 
+struct hv_partition_assist_pg {
+	u32 tlb_lock_count;
+};
 #endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 667d154e89d4..ad4b5c02db0e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -840,6 +840,8 @@ struct kvm_hv {
 
 	/* How many vCPUs have VP index != vCPU index */
 	atomic_t num_mismatched_vp_indexes;
+
+	struct hv_partition_assist_pg *hv_pa_pg;
 };
 
 enum kvm_irqchip_mode {
diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
index 39a24eec8884..07ebf6882a45 100644
--- a/arch/x86/kvm/vmx/evmcs.h
+++ b/arch/x86/kvm/vmx/evmcs.h
@@ -178,6 +178,8 @@ static inline void evmcs_load(u64 phys_addr)
 	struct hv_vp_assist_page *vp_ap =
 		hv_get_vp_assist_page(smp_processor_id());
 
+	if (current_evmcs->hv_enlightenments_control.nested_flush_hypercall)
+		vp_ap->nested_control.features.directhypercall = 1;
 	vp_ap->current_nested_vmcs = phys_addr;
 	vp_ap->enlighten_vmentry = 1;
 }
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 84f8d49a2fd2..ed8056049070 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -486,6 +486,35 @@ static int hv_remote_flush_tlb(struct kvm *kvm)
 	return hv_remote_flush_tlb_with_range(kvm, NULL);
 }
 
+static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
+{
+	struct hv_enlightened_vmcs *evmcs;
+	struct hv_partition_assist_pg **p_hv_pa_pg =
+			&vcpu->kvm->arch.hyperv.hv_pa_pg;
+	/*
+	 * Synthetic VM-Exit is not enabled in current code and so All
+	 * evmcs in singe VM shares same assist page.
+	 */
+	if (!*p_hv_pa_pg) {
+		*p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL);
+		if (!*p_hv_pa_pg)
+			return -ENOMEM;
+		pr_debug("KVM: Hyper-V: allocated PA_PG for %llx\n",
+		       (u64)&vcpu->kvm);
+	}
+
+	evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs;
+
+	evmcs->partition_assist_page =
+		__pa(*p_hv_pa_pg);
+	evmcs->hv_vm_id = (u64)vcpu->kvm;
+	evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
+
+	pr_debug("KVM: Hyper-V: enabled DIRECT flush for %llx\n",
+		 (u64)vcpu->kvm);
+	return 0;
+}
+
 #endif /* IS_ENABLED(CONFIG_HYPERV) */
 
 /*
@@ -6516,6 +6545,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		current_evmcs->hv_clean_fields |=
 			HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
 
+	if (static_branch_unlikely(&enable_evmcs))
+		current_evmcs->hv_vp_id = vcpu->arch.hyperv.vp_index;
+
 	/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
 	if (vmx->host_debugctlmsr)
 		update_debugctlmsr(vmx->host_debugctlmsr);
@@ -6583,6 +6615,7 @@ static struct kvm *vmx_vm_alloc(void)
 
 static void vmx_vm_free(struct kvm *kvm)
 {
+	kfree(kvm->arch.hyperv.hv_pa_pg);
 	vfree(to_kvm_vmx(kvm));
 }
 
@@ -7815,6 +7848,7 @@ static void vmx_exit(void)
 			if (!vp_ap)
 				continue;
 
+			vp_ap->nested_control.features.directhypercall = 0;
 			vp_ap->current_nested_vmcs = 0;
 			vp_ap->enlighten_vmentry = 0;
 		}
@@ -7854,6 +7888,11 @@ static int __init vmx_init(void)
 			pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n");
 			static_branch_enable(&enable_evmcs);
 		}
+
+		if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH)
+			vmx_x86_ops.enable_direct_tlbflush
+				= hv_enable_direct_tlbflush;
+
 	} else {
 		enlightened_vmcs = false;
 	}
-- 
2.14.5


^ permalink raw reply related

* Re: [PATCH V3 2/3] KVM/Hyper-V: Add new KVM cap KVM_CAP_HYPERV_DIRECT_TLBFLUSH
From: Thomas Gleixner @ 2019-08-19 13:27 UTC (permalink / raw)
  To: lantianyu1986
  Cc: pbonzini, rkrcmar, corbet, kys, haiyangz, sthemmin, sashal, mingo,
	bp, hpa, x86, michael.h.kelley, Tianyu Lan, kvm, linux-doc,
	linux-kernel, linux-hyperv, vkuznets
In-Reply-To: <20190819131737.26942-3-Tianyu.Lan@microsoft.com>

On Mon, 19 Aug 2019, lantianyu1986@gmail.com wrote:

> From: Tianyu Lan <Tianyu.Lan@microsoft.com>
> 
> This patch adds

Same git grep command as before

>  new KVM cap KVM_CAP_HYPERV_DIRECT_TLBFLUSH and let

baseball cap? Please do not use weird acronyms. This is text and there is
not limitation on characters.

> user space to enable direct tlb flush function when only Hyper-V
> hypervsior capability is exposed to VM.

Sorry, but I'm not understanding this sentence.

> This patch also adds

Once more

> enable_direct_tlbflush callback in the struct kvm_x86_ops and
> platforms may use it to implement direct tlb flush support.

Please tell in the changelog WHY you are doing things not what. The what is
obviously in the patch.

So you want to explain what you are trying to achieve and why it is
useful. Then you can add a short note about what you are adding, but not at
the level of detail which is available from the diff itself.

Thanks,

	tglx

^ permalink raw reply

* Re: [PATCH V3 3/3] KVM/Hyper-V/VMX: Add direct tlb flush support
From: Thomas Gleixner @ 2019-08-19 13:29 UTC (permalink / raw)
  To: lantianyu1986
  Cc: kys, haiyangz, sthemmin, sashal, mingo, bp, hpa, x86, pbonzini,
	rkrcmar, michael.h.kelley, Vitaly Kuznetsov, linux-hyperv,
	linux-kernel, kvm, Tianyu Lan
In-Reply-To: <20190819131737.26942-4-Tianyu.Lan@microsoft.com>

On Mon, 19 Aug 2019, lantianyu1986@gmail.com wrote:
> From: Vitaly Kuznetsov <vkuznets@redhat.com>
> 
> 
> This patch is to enable Hyper-V direct tlb flush function
> for vmx.

Groan. This sentence is not any different from the subject line.

Thanks,

	tglx

^ permalink raw reply

* Re: [PATCH V3 1/3] x86/Hyper-V: Fix definition of struct hv_vp_assist_page
From: Thomas Gleixner @ 2019-08-19 13:30 UTC (permalink / raw)
  To: lantianyu1986
  Cc: pbonzini, rkrcmar, corbet, kys, haiyangz, sthemmin, sashal, mingo,
	bp, hpa, x86, michael.h.kelley, Tianyu Lan, kvm, linux-doc,
	linux-kernel, linux-hyperv, vkuznets
In-Reply-To: <20190819131737.26942-2-Tianyu.Lan@microsoft.com>

On Mon, 19 Aug 2019, lantianyu1986@gmail.com wrote:

> From: Tianyu Lan <Tianyu.Lan@microsoft.com>
> 
> The struct hv_vp_assist_page was defined incorrectly.
> The "vtl_control" should be u64[3], "nested_enlightenments

s/The /The member/

> _control" should be a u64 and there is 7 reserved bytes

s/is/are/

> following "enlighten_vmentry". This patch is to fix it.

git grep 'This patch' Documentation/process/

Thanks,

	tglx

^ permalink raw reply

* Re: [PATCH v2] Tools: hv: move to tools buildsystem
From: Vitaly Kuznetsov @ 2019-08-19 14:01 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger, linux-hyperv,
	Sasha Levin
In-Reply-To: <20190819124100.81289-1-andriy.shevchenko@linux.intel.com>

Andy Shevchenko <andriy.shevchenko@linux.intel.com> writes:

> There is a nice buildsystem dedicated for userspace tools in Linux kernel tree.
> Switch Hyper-V daemons to be built by it.
>
> Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
> Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>

Tested-by: Vitaly Kuznetsov <vkuznets@redhat.com>

While testing this I noticed that we get a warning (gcc-8.3):

hv_kvp_daemon.c: In function ‘kvp_get_ip_info.constprop’:
hv_kvp_daemon.c:812:30: warning: ‘ip_buffer’ may be used uninitialized in this function [-Wmaybe-uninitialized]
  struct hv_kvp_ipaddr_value *ip_buffer;

while this is clearly a false positive, I'd still want it to
disappear. I'll send a patch.

> ---
> - fix commit message (Vitaly)

Thanks!

>
>  tools/hv/Build    |  3 +++
>  tools/hv/Makefile | 51 +++++++++++++++++++++++++++++++++++++----------
>  2 files changed, 44 insertions(+), 10 deletions(-)
>  create mode 100644 tools/hv/Build
>
> diff --git a/tools/hv/Build b/tools/hv/Build
> new file mode 100644
> index 000000000000..6cf51fa4b306
> --- /dev/null
> +++ b/tools/hv/Build
> @@ -0,0 +1,3 @@
> +hv_kvp_daemon-y += hv_kvp_daemon.o
> +hv_vss_daemon-y += hv_vss_daemon.o
> +hv_fcopy_daemon-y += hv_fcopy_daemon.o
> diff --git a/tools/hv/Makefile b/tools/hv/Makefile
> index 5db5e62cebda..b57143d9459c 100644
> --- a/tools/hv/Makefile
> +++ b/tools/hv/Makefile
> @@ -1,28 +1,55 @@
>  # SPDX-License-Identifier: GPL-2.0
>  # Makefile for Hyper-V tools
> -
> -WARNINGS = -Wall -Wextra
> -CFLAGS = $(WARNINGS) -g $(shell getconf LFS_CFLAGS)
> -
> -CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
> +include ../scripts/Makefile.include
>  
>  sbindir ?= /usr/sbin
>  libexecdir ?= /usr/libexec
>  sharedstatedir ?= /var/lib
>  
> -ALL_PROGRAMS := hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
> +ifeq ($(srctree),)
> +srctree := $(patsubst %/,%,$(dir $(CURDIR)))
> +srctree := $(patsubst %/,%,$(dir $(srctree)))
> +endif
> +
> +# Do not use make's built-in rules
> +# (this improves performance and avoids hard-to-debug behaviour);
> +MAKEFLAGS += -r
> +
> +override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
> +
> +ALL_TARGETS := hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
> +ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
>  
>  ALL_SCRIPTS := hv_get_dhcp_info.sh hv_get_dns_info.sh hv_set_ifconfig.sh
>  
>  all: $(ALL_PROGRAMS)
>  
> -%: %.c
> -	$(CC) $(CFLAGS) -o $@ $^
> +export srctree OUTPUT CC LD CFLAGS
> +include $(srctree)/tools/build/Makefile.include
> +
> +HV_KVP_DAEMON_IN := $(OUTPUT)hv_kvp_daemon-in.o
> +$(HV_KVP_DAEMON_IN): FORCE
> +	$(Q)$(MAKE) $(build)=hv_kvp_daemon
> +$(OUTPUT)hv_kvp_daemon: $(HV_KVP_DAEMON_IN)
> +	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
> +
> +HV_VSS_DAEMON_IN := $(OUTPUT)hv_vss_daemon-in.o
> +$(HV_VSS_DAEMON_IN): FORCE
> +	$(Q)$(MAKE) $(build)=hv_vss_daemon
> +$(OUTPUT)hv_vss_daemon: $(HV_VSS_DAEMON_IN)
> +	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
> +
> +HV_FCOPY_DAEMON_IN := $(OUTPUT)hv_fcopy_daemon-in.o
> +$(HV_FCOPY_DAEMON_IN): FORCE
> +	$(Q)$(MAKE) $(build)=hv_fcopy_daemon
> +$(OUTPUT)hv_fcopy_daemon: $(HV_FCOPY_DAEMON_IN)
> +	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
>  
>  clean:
> -	$(RM) hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
> +	rm -f $(ALL_PROGRAMS)
> +	find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
>  
> -install: all
> +install: $(ALL_PROGRAMS)
>  	install -d -m 755 $(DESTDIR)$(sbindir); \
>  	install -d -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd; \
>  	install -d -m 755 $(DESTDIR)$(sharedstatedir); \
> @@ -33,3 +60,7 @@ install: all
>  	for script in $(ALL_SCRIPTS); do \
>  		install $$script -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd/$${script%.sh}; \
>  	done
> +
> +FORCE:
> +
> +.PHONY: all install clean FORCE prepare

-- 
Vitaly

^ permalink raw reply

* [PATCH] Tools: hv: kvp: eliminate 'may be used uninitialized' warning
From: Vitaly Kuznetsov @ 2019-08-19 14:44 UTC (permalink / raw)
  To: linux-hyperv
  Cc: K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger, Sasha Levin,
	linux-kernel

When building hv_kvp_daemon GCC-8.3 complains:

hv_kvp_daemon.c: In function ‘kvp_get_ip_info.constprop’:
hv_kvp_daemon.c:812:30: warning: ‘ip_buffer’ may be used uninitialized in this function [-Wmaybe-uninitialized]
  struct hv_kvp_ipaddr_value *ip_buffer;

this seems to be a false positive: we only use ip_buffer when
op == KVP_OP_GET_IP_INFO and it is only unset when op == KVP_OP_ENUMERATE.

Silence the warning by initializing ip_buffer to NULL.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 tools/hv/hv_kvp_daemon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index d7e06fe0270e..6d809abaf338 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -809,7 +809,7 @@ kvp_get_ip_info(int family, char *if_name, int op,
 	int sn_offset = 0;
 	int error = 0;
 	char *buffer;
-	struct hv_kvp_ipaddr_value *ip_buffer;
+	struct hv_kvp_ipaddr_value *ip_buffer = NULL;
 	char cidr_mask[5]; /* /xyz */
 	int weight;
 	int i;
-- 
2.20.1


^ permalink raw reply related

* RE: [PATCH net-next, 3/6] net/mlx5: Add wrappers for HyperV PCIe operations
From: Haiyang Zhang @ 2019-08-19 15:04 UTC (permalink / raw)
  To: Eran Ben Elisha, Mark Bloch, sashal@kernel.org,
	davem@davemloft.net, Saeed Mahameed, leon@kernel.org,
	lorenzo.pieralisi@arm.com, bhelgaas@google.com,
	linux-pci@vger.kernel.org, linux-hyperv@vger.kernel.org,
	netdev@vger.kernel.org
  Cc: KY Srinivasan, Stephen Hemminger, linux-kernel@vger.kernel.org
In-Reply-To: <f81d2a27-cedc-40ca-daf6-8f3053cc2d38@mellanox.com>



> -----Original Message-----
> From: Eran Ben Elisha <eranbe@mellanox.com>
> Sent: Thursday, August 15, 2019 7:35 AM
> To: Mark Bloch <markb@mellanox.com>; Haiyang Zhang
> <haiyangz@microsoft.com>; sashal@kernel.org; davem@davemloft.net;
> Saeed Mahameed <saeedm@mellanox.com>; leon@kernel.org;
> lorenzo.pieralisi@arm.com; bhelgaas@google.com; linux-
> pci@vger.kernel.org; linux-hyperv@vger.kernel.org; netdev@vger.kernel.org
> Cc: KY Srinivasan <kys@microsoft.com>; Stephen Hemminger
> <sthemmin@microsoft.com>; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH net-next, 3/6] net/mlx5: Add wrappers for HyperV PCIe
> operations
> 
> 
> 
> On 8/14/2019 11:41 PM, Mark Bloch wrote:
> >
> >
> > On 8/14/19 12:08 PM, Haiyang Zhang wrote:
> >> From: Eran Ben Elisha <eranbe@mellanox.com>
> >>
> >> Add wrapper functions for HyperV PCIe read / write /
> >> block_invalidate_register operations.  This will be used as an
> >> infrastructure in the downstream patch for software communication.
> >>
> >> This will be enabled by default if CONFIG_PCI_HYPERV_MINI is set.
> >>
> >> Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
> >> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
> >> ---
> >>   drivers/net/ethernet/mellanox/mlx5/core/Makefile |  1 +
> >>   drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c | 64
> ++++++++++++++++++++++++
> >>   drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h | 22 ++++++++
> >>   3 files changed, 87 insertions(+)
> >>   create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
> >>   create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
> >>
> >> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> >> index 8b7edaa..a8950b1 100644
> >> --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> >> +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> >> @@ -45,6 +45,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH)   +=
> eswitch.o eswitch_offloads.o eswitch_offlo
> >>   mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
> >>   mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
> >>   mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
> >> +mlx5_core-$(CONFIG_PCI_HYPERV_MINI)     += lib/hv.o
> >>
> >>   #
> >>   # Ipoib netdev
> >> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
> b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
> >> new file mode 100644
> >> index 0000000..cf08d02
> >> --- /dev/null
> >> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
> >> @@ -0,0 +1,64 @@
> >> +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
> >> +// Copyright (c) 2018 Mellanox Technologies
> >> +
> >> +#include <linux/hyperv.h>
> >> +#include "mlx5_core.h"
> >> +#include "lib/hv.h"
> >> +
> >> +static int mlx5_hv_config_common(struct mlx5_core_dev *dev, void
> *buf, int len,
> >> +				 int offset, bool read)
> >> +{
> >> +	int rc = -EOPNOTSUPP;
> >> +	int bytes_returned;
> >> +	int block_id;
> >> +
> >> +	if (offset % HV_CONFIG_BLOCK_SIZE_MAX || len %
> HV_CONFIG_BLOCK_SIZE_MAX)
> >> +		return -EINVAL;
> >> +
> >> +	block_id = offset / HV_CONFIG_BLOCK_SIZE_MAX;
> >> +
> >> +	rc = read ?
> >> +	     hyperv_read_cfg_blk(dev->pdev, buf,
> >> +				 HV_CONFIG_BLOCK_SIZE_MAX, block_id,
> >> +				 &bytes_returned) :
> >> +	     hyperv_write_cfg_blk(dev->pdev, buf,
> >> +				  HV_CONFIG_BLOCK_SIZE_MAX, block_id);
> >> +
> >> +	/* Make sure len bytes were read successfully  */
> >> +	if (read)
> >> +		rc |= !(len == bytes_returned);
> >> +
> >> +	if (rc) {
> >> +		mlx5_core_err(dev, "Failed to %s hv config, err = %d, len
> = %d, offset = %d\n",
> >> +			      read ? "read" : "write", rc, len,
> >> +			      offset);
> >> +		return rc;
> >> +	}
> >> +
> >> +	return 0;
> >> +}
> >
> > This seems out of place why not expose this function as part of hyperv and
> mlx5
> > will just pass the pdev.
> >
> The HV driver works with block chunks. I found it less convenience to do
> so directly, so I add a small wrapper for mlx5 core.
> 
> Haiyangz,
> Do you see a reason to export this callback style from the HYPERV level
> instead?
I don’t think the wrapper has to be in the hv interface. 
One function for read, another for write, are pretty straight forward. 
Users (other drivers) may use or wrap them in the way they like to.

Thanks,
- Haiyang

^ permalink raw reply

* [PATCH net-next,v2 0/6] Add software backchannel and mlx5e HV VHCA stats
From: Haiyang Zhang @ 2019-08-19 19:30 UTC (permalink / raw)
  To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
	leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
	bhelgaas@google.com, linux-pci@vger.kernel.org,
	linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
  Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
	linux-kernel@vger.kernel.org

This patch set adds paravirtual backchannel in software in pci_hyperv,
which is required by the mlx5e driver HV VHCA stats agent.

The stats agent is responsible on running a periodic rx/tx packets/bytes
stats update.

Dexuan Cui (1):
  PCI: hv: Add a paravirtual backchannel in software

Haiyang Zhang (5):
  PCI: hv: Add a Hyper-V PCI interface driver for software backchannel
    interface
  net/mlx5: Add wrappers for HyperV PCIe operations
  net/mlx5: Add HV VHCA infrastructure
  net/mlx5: Add HV VHCA control agent
  net/mlx5e: Add mlx5e HV VHCA stats agent

 MAINTAINERS                                        |   1 +
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |  13 +
 .../ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c | 162 +++++++++
 .../ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h |  25 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |   3 +
 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c   |  64 ++++
 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h   |  22 ++
 .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c  | 371 +++++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h  | 104 ++++++
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |   7 +
 drivers/pci/Kconfig                                |   1 +
 drivers/pci/controller/Kconfig                     |   7 +
 drivers/pci/controller/Makefile                    |   1 +
 drivers/pci/controller/pci-hyperv-intf.c           |  70 ++++
 drivers/pci/controller/pci-hyperv.c                | 308 +++++++++++++++++
 include/linux/hyperv.h                             |  29 ++
 include/linux/mlx5/driver.h                        |   2 +
 18 files changed, 1192 insertions(+)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
 create mode 100644 drivers/pci/controller/pci-hyperv-intf.c

-- 
1.8.3.1


^ permalink raw reply

* [PATCH net-next,v2 2/6] PCI: hv: Add a Hyper-V PCI interface driver for software backchannel interface
From: Haiyang Zhang @ 2019-08-19 19:30 UTC (permalink / raw)
  To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
	leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
	bhelgaas@google.com, linux-pci@vger.kernel.org,
	linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
  Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
	linux-kernel@vger.kernel.org
In-Reply-To: <1566242976-108801-1-git-send-email-haiyangz@microsoft.com>

This interface driver is a helper driver allows other drivers to
have a common interface with the Hyper-V PCI frontend driver.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 MAINTAINERS                              |  1 +
 drivers/pci/Kconfig                      |  1 +
 drivers/pci/controller/Kconfig           |  7 ++++
 drivers/pci/controller/Makefile          |  1 +
 drivers/pci/controller/pci-hyperv-intf.c | 70 ++++++++++++++++++++++++++++++++
 drivers/pci/controller/pci-hyperv.c      | 12 ++++--
 include/linux/hyperv.h                   | 30 ++++++++++----
 7 files changed, 111 insertions(+), 11 deletions(-)
 create mode 100644 drivers/pci/controller/pci-hyperv-intf.c

diff --git a/MAINTAINERS b/MAINTAINERS
index e352550..866ae88 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7453,6 +7453,7 @@ F:	drivers/hid/hid-hyperv.c
 F:	drivers/hv/
 F:	drivers/input/serio/hyperv-keyboard.c
 F:	drivers/pci/controller/pci-hyperv.c
+F:	drivers/pci/controller/pci-hyperv-intf.c
 F:	drivers/net/hyperv/
 F:	drivers/scsi/storvsc_drv.c
 F:	drivers/uio/uio_hv_generic.c
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 2ab9240..c313de9 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -182,6 +182,7 @@ config PCI_LABEL
 config PCI_HYPERV
         tristate "Hyper-V PCI Frontend"
         depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64
+	select PCI_HYPERV_INTERFACE
         help
           The PCI device frontend driver allows the kernel to import arbitrary
           PCI devices from a PCI backend to support PCI driver domains.
diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig
index fe9f9f1..70e0782 100644
--- a/drivers/pci/controller/Kconfig
+++ b/drivers/pci/controller/Kconfig
@@ -281,5 +281,12 @@ config VMD
 	  To compile this driver as a module, choose M here: the
 	  module will be called vmd.
 
+config PCI_HYPERV_INTERFACE
+	tristate "Hyper-V PCI Interface"
+	depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64
+	help
+	  The Hyper-V PCI Interface is a helper driver allows other drivers to
+	  have a common interface with the Hyper-V PCI frontend driver.
+
 source "drivers/pci/controller/dwc/Kconfig"
 endmenu
diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile
index d56a507..a2a22c9 100644
--- a/drivers/pci/controller/Makefile
+++ b/drivers/pci/controller/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_PCIE_CADENCE_HOST) += pcie-cadence-host.o
 obj-$(CONFIG_PCIE_CADENCE_EP) += pcie-cadence-ep.o
 obj-$(CONFIG_PCI_FTPCI100) += pci-ftpci100.o
 obj-$(CONFIG_PCI_HYPERV) += pci-hyperv.o
+obj-$(CONFIG_PCI_HYPERV_INTERFACE) += pci-hyperv-intf.o
 obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o
 obj-$(CONFIG_PCI_AARDVARK) += pci-aardvark.o
 obj-$(CONFIG_PCI_TEGRA) += pci-tegra.o
diff --git a/drivers/pci/controller/pci-hyperv-intf.c b/drivers/pci/controller/pci-hyperv-intf.c
new file mode 100644
index 0000000..087b7eb
--- /dev/null
+++ b/drivers/pci/controller/pci-hyperv-intf.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) Microsoft Corporation.
+ *
+ * Author:
+ *   Haiyang Zhang <haiyangz@microsoft.com>
+ *
+ * This small module is a helper driver allows other drivers to
+ * have a common interface with the Hyper-V PCI frontend driver.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/hyperv.h>
+
+struct hyperv_pci_block_ops hvpci_block_ops;
+EXPORT_SYMBOL(hvpci_block_ops);
+
+int hyperv_read_cfg_blk(struct pci_dev *dev, void *buf, unsigned int buf_len,
+			unsigned int block_id, unsigned int *bytes_returned)
+{
+	if (!hvpci_block_ops.read_block)
+		return -EOPNOTSUPP;
+
+	return hvpci_block_ops.read_block(dev, buf, buf_len, block_id,
+					  bytes_returned);
+}
+EXPORT_SYMBOL(hyperv_read_cfg_blk);
+
+int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len,
+			 unsigned int block_id)
+{
+	if (!hvpci_block_ops.write_block)
+		return -EOPNOTSUPP;
+
+	return hvpci_block_ops.write_block(dev, buf, len, block_id);
+}
+EXPORT_SYMBOL(hyperv_write_cfg_blk);
+
+int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
+				void (*block_invalidate)(void *context,
+							 u64 block_mask))
+{
+	if (!hvpci_block_ops.reg_blk_invalidate)
+		return -EOPNOTSUPP;
+
+	return hvpci_block_ops.reg_blk_invalidate(dev, context,
+						  block_invalidate);
+}
+EXPORT_SYMBOL(hyperv_reg_block_invalidate);
+
+static void __exit exit_hv_pci_intf(void)
+{
+	pr_info("unloaded\n");
+}
+
+static int __init init_hv_pci_intf(void)
+{
+	pr_info("loaded\n");
+
+	return 0;
+}
+
+module_init(init_hv_pci_intf);
+module_exit(exit_hv_pci_intf);
+
+MODULE_DESCRIPTION("Hyper-V PCI Interface");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 57adeca..9c93ac2 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -983,7 +983,6 @@ int hv_read_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
 	*bytes_returned = comp_pkt.bytes_returned;
 	return 0;
 }
-EXPORT_SYMBOL(hv_read_config_block);
 
 /**
  * hv_pci_write_config_compl() - Invoked when a response packet for a write
@@ -1070,7 +1069,6 @@ int hv_write_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
 
 	return 0;
 }
-EXPORT_SYMBOL(hv_write_config_block);
 
 /**
  * hv_register_block_invalidate() - Invoked when a config block invalidation
@@ -1101,7 +1099,6 @@ int hv_register_block_invalidate(struct pci_dev *pdev, void *context,
 	return 0;
 
 }
-EXPORT_SYMBOL(hv_register_block_invalidate);
 
 /* Interrupt management hooks */
 static void hv_int_desc_free(struct hv_pci_dev *hpdev,
@@ -3045,10 +3042,19 @@ static int hv_pci_remove(struct hv_device *hdev)
 static void __exit exit_hv_pci_drv(void)
 {
 	vmbus_driver_unregister(&hv_pci_drv);
+
+	hvpci_block_ops.read_block = NULL;
+	hvpci_block_ops.write_block = NULL;
+	hvpci_block_ops.reg_blk_invalidate = NULL;
 }
 
 static int __init init_hv_pci_drv(void)
 {
+	/* Initialize PCI block r/w interface */
+	hvpci_block_ops.read_block = hv_read_config_block;
+	hvpci_block_ops.write_block = hv_write_config_block;
+	hvpci_block_ops.reg_blk_invalidate = hv_register_block_invalidate;
+
 	return vmbus_driver_register(&hv_pci_drv);
 }
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 9d37f8c..2afe6fd 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1579,18 +1579,32 @@ struct vmpacket_descriptor *
 	    pkt = hv_pkt_iter_next(channel, pkt))
 
 /*
- * Functions for passing data between SR-IOV PF and VF drivers.  The VF driver
+ * Interface for passing data between SR-IOV PF and VF drivers. The VF driver
  * sends requests to read and write blocks. Each block must be 128 bytes or
  * smaller. Optionally, the VF driver can register a callback function which
  * will be invoked when the host says that one or more of the first 64 block
  * IDs is "invalid" which means that the VF driver should reread them.
  */
 #define HV_CONFIG_BLOCK_SIZE_MAX 128
-int hv_read_config_block(struct pci_dev *dev, void *buf, unsigned int buf_len,
-			 unsigned int block_id, unsigned int *bytes_returned);
-int hv_write_config_block(struct pci_dev *dev, void *buf, unsigned int len,
-			  unsigned int block_id);
-int hv_register_block_invalidate(struct pci_dev *dev, void *context,
-				 void (*block_invalidate)(void *context,
-							  u64 block_mask));
+
+int hyperv_read_cfg_blk(struct pci_dev *dev, void *buf, unsigned int buf_len,
+			unsigned int block_id, unsigned int *bytes_returned);
+int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len,
+			 unsigned int block_id);
+int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
+				void (*block_invalidate)(void *context,
+							 u64 block_mask));
+
+struct hyperv_pci_block_ops {
+	int (*read_block)(struct pci_dev *dev, void *buf, unsigned int buf_len,
+			  unsigned int block_id, unsigned int *bytes_returned);
+	int (*write_block)(struct pci_dev *dev, void *buf, unsigned int len,
+			   unsigned int block_id);
+	int (*reg_blk_invalidate)(struct pci_dev *dev, void *context,
+				  void (*block_invalidate)(void *context,
+							   u64 block_mask));
+};
+
+extern struct hyperv_pci_block_ops hvpci_block_ops;
+
 #endif /* _HYPERV_H */
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH net-next,v2 6/6] net/mlx5e: Add mlx5e HV VHCA stats agent
From: Haiyang Zhang @ 2019-08-19 19:30 UTC (permalink / raw)
  To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
	leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
	bhelgaas@google.com, linux-pci@vger.kernel.org,
	linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
  Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
	linux-kernel@vger.kernel.org
In-Reply-To: <1566242976-108801-1-git-send-email-haiyangz@microsoft.com>

From: Eran Ben Elisha <eranbe@mellanox.com>

HV VHCA stats agent is responsible on running a preiodic rx/tx
packets/bytes stats update. Currently the supported format is version
MLX5_HV_VHCA_STATS_VERSION. Block ID 1 is dedicated for statistics data
transfer from the VF to the PF.

The reporter fetch the statistics data from all opened channels, fill it
in a buffer and send it to mlx5_hv_vhca_write_agent.

As the stats layer should include some metadata per block (sequence and
offset), the HV VHCA layer shall modify the buffer before actually send it
over block 1.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   1 +
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |  13 ++
 .../ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c | 162 +++++++++++++++++++++
 .../ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h |  25 ++++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |   3 +
 .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h  |   1 +
 6 files changed, 205 insertions(+)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index fc59a40..a889a38 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -36,6 +36,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
 mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \
 					lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o \
 					en/tc_tun_geneve.o
+mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
 
 #
 # Core extra
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 8fc5107..4a8008b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -54,6 +54,7 @@
 #include "mlx5_core.h"
 #include "en_stats.h"
 #include "en/fs.h"
+#include "lib/hv_vhca.h"
 
 extern const struct net_device_ops mlx5e_netdev_ops;
 struct page_pool;
@@ -777,6 +778,15 @@ struct mlx5e_modify_sq_param {
 	int rl_index;
 };
 
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+struct mlx5e_hv_vhca_stats_agent {
+	struct mlx5_hv_vhca_agent *agent;
+	struct delayed_work        work;
+	u16                        delay;
+	void                      *buf;
+};
+#endif
+
 struct mlx5e_xsk {
 	/* UMEMs are stored separately from channels, because we don't want to
 	 * lose them when channels are recreated. The kernel also stores UMEMs,
@@ -848,6 +858,9 @@ struct mlx5e_priv {
 	struct devlink_health_reporter *tx_reporter;
 	struct devlink_health_reporter *rx_reporter;
 	struct mlx5e_xsk           xsk;
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+	struct mlx5e_hv_vhca_stats_agent stats_agent;
+#endif
 };
 
 struct mlx5e_profile {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
new file mode 100644
index 0000000..c37b4ac
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include "en.h"
+#include "en/hv_vhca_stats.h"
+#include "lib/hv_vhca.h"
+#include "lib/hv.h"
+
+struct mlx5e_hv_vhca_per_ring_stats {
+	u64     rx_packets;
+	u64     rx_bytes;
+	u64     tx_packets;
+	u64     tx_bytes;
+};
+
+static void
+mlx5e_hv_vhca_fill_ring_stats(struct mlx5e_priv *priv, int ch,
+			      struct mlx5e_hv_vhca_per_ring_stats *data)
+{
+	struct mlx5e_channel_stats *stats;
+	int tc;
+
+	stats = &priv->channel_stats[ch];
+	data->rx_packets = stats->rq.packets;
+	data->rx_bytes   = stats->rq.bytes;
+
+	for (tc = 0; tc < priv->max_opened_tc; tc++) {
+		data->tx_packets += stats->sq[tc].packets;
+		data->tx_bytes   += stats->sq[tc].bytes;
+	}
+}
+
+static void mlx5e_hv_vhca_fill_stats(struct mlx5e_priv *priv, u64 *data,
+				     int buf_len)
+{
+	int ch, i = 0;
+
+	for (ch = 0; ch < priv->max_nch; ch++) {
+		u64 *buf = data + i;
+
+		if (WARN_ON_ONCE(buf +
+				 sizeof(struct mlx5e_hv_vhca_per_ring_stats) >
+				 data + buf_len))
+			return;
+
+		mlx5e_hv_vhca_fill_ring_stats(priv, ch,
+					      (struct mlx5e_hv_vhca_per_ring_stats *)buf);
+		i += sizeof(struct mlx5e_hv_vhca_per_ring_stats) / sizeof(u64);
+	}
+}
+
+static int mlx5e_hv_vhca_stats_buf_size(struct mlx5e_priv *priv)
+{
+	return (sizeof(struct mlx5e_hv_vhca_per_ring_stats) *
+		priv->max_nch);
+}
+
+static void mlx5e_hv_vhca_stats_work(struct work_struct *work)
+{
+	struct mlx5e_hv_vhca_stats_agent *sagent;
+	struct mlx5_hv_vhca_agent *agent;
+	struct delayed_work *dwork;
+	struct mlx5e_priv *priv;
+	int buf_len, rc;
+	void *buf;
+
+	dwork = to_delayed_work(work);
+	sagent = container_of(dwork, struct mlx5e_hv_vhca_stats_agent, work);
+	priv = container_of(sagent, struct mlx5e_priv, stats_agent);
+	buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
+	agent = sagent->agent;
+	buf = sagent->buf;
+
+	memset(buf, 0, buf_len);
+	mlx5e_hv_vhca_fill_stats(priv, buf, buf_len);
+
+	rc = mlx5_hv_vhca_agent_write(agent, buf, buf_len);
+	if (rc) {
+		mlx5_core_err(priv->mdev,
+			      "%s: Failed to write stats, err = %d\n",
+			      __func__, rc);
+		return;
+	}
+
+	if (sagent->delay)
+		queue_delayed_work(priv->wq, &sagent->work, sagent->delay);
+}
+
+enum {
+	MLX5_HV_VHCA_STATS_VERSION     = 1,
+	MLX5_HV_VHCA_STATS_UPDATE_ONCE = 0xFFFF,
+};
+
+static void mlx5e_hv_vhca_stats_control(struct mlx5_hv_vhca_agent *agent,
+					struct mlx5_hv_vhca_control_block *block)
+{
+	struct mlx5e_hv_vhca_stats_agent *sagent;
+	struct mlx5e_priv *priv;
+
+	priv = mlx5_hv_vhca_agent_priv(agent);
+	sagent = &priv->stats_agent;
+
+	block->version = MLX5_HV_VHCA_STATS_VERSION;
+	block->rings   = priv->max_nch;
+
+	if (!block->command) {
+		cancel_delayed_work_sync(&priv->stats_agent.work);
+		return;
+	}
+
+	sagent->delay = block->command == MLX5_HV_VHCA_STATS_UPDATE_ONCE ? 0 :
+			msecs_to_jiffies(block->command * 100);
+
+	queue_delayed_work(priv->wq, &sagent->work, sagent->delay);
+}
+
+static void mlx5e_hv_vhca_stats_cleanup(struct mlx5_hv_vhca_agent *agent)
+{
+	struct mlx5e_priv *priv = mlx5_hv_vhca_agent_priv(agent);
+
+	cancel_delayed_work_sync(&priv->stats_agent.work);
+}
+
+int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
+{
+	int buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
+	struct mlx5_hv_vhca_agent *agent;
+
+	priv->stats_agent.buf = kvzalloc(buf_len, GFP_KERNEL);
+	if (!priv->stats_agent.buf)
+		return -ENOMEM;
+
+	agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca,
+					  MLX5_HV_VHCA_AGENT_STATS,
+					  mlx5e_hv_vhca_stats_control, NULL,
+					  mlx5e_hv_vhca_stats_cleanup,
+					  priv);
+
+	if (IS_ERR_OR_NULL(agent)) {
+		if (IS_ERR(agent))
+			netdev_warn(priv->netdev,
+				    "Failed to create hv vhca stats agent, err = %ld\n",
+				    PTR_ERR(agent));
+
+		kfree(priv->stats_agent.buf);
+		return IS_ERR_OR_NULL(agent);
+	}
+
+	priv->stats_agent.agent = agent;
+	INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work);
+
+	return 0;
+}
+
+void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
+{
+	if (IS_ERR_OR_NULL(priv->stats_agent.agent))
+		return;
+
+	mlx5_hv_vhca_agent_destroy(priv->stats_agent.agent);
+	kfree(priv->stats_agent.buf);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
new file mode 100644
index 0000000..664463f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_STATS_VHCA_H__
+#define __MLX5_EN_STATS_VHCA_H__
+#include "en.h"
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+
+int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv);
+void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv);
+
+#else
+
+static inline int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
+{
+	return 0;
+}
+
+static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
+{
+}
+#endif
+
+#endif /* __MLX5_EN_STATS_VHCA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5721d3d..fac8455 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -64,6 +64,7 @@
 #include "en/xsk/setup.h"
 #include "en/xsk/rx.h"
 #include "en/xsk/tx.h"
+#include "en/hv_vhca_stats.h"
 
 
 bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
@@ -5103,6 +5104,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
 	if (mlx5e_monitor_counter_supported(priv))
 		mlx5e_monitor_counter_init(priv);
 
+	mlx5e_hv_vhca_stats_create(priv);
 	if (netdev->reg_state != NETREG_REGISTERED)
 		return;
 #ifdef CONFIG_MLX5_CORE_EN_DCB
@@ -5135,6 +5137,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
 
 	queue_work(priv->wq, &priv->set_rx_mode_work);
 
+	mlx5e_hv_vhca_stats_destroy(priv);
 	if (mlx5e_monitor_counter_supported(priv))
 		mlx5e_monitor_counter_cleanup(priv);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
index 984e7ad..4bad6a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
@@ -13,6 +13,7 @@
 
 enum mlx5_hv_vhca_agent_type {
 	MLX5_HV_VHCA_AGENT_CONTROL = 0,
+	MLX5_HV_VHCA_AGENT_STATS   = 1,
 	MLX5_HV_VHCA_AGENT_MAX = 32,
 };
 
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH net-next,v2 5/6] net/mlx5: Add HV VHCA control agent
From: Haiyang Zhang @ 2019-08-19 19:30 UTC (permalink / raw)
  To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
	leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
	bhelgaas@google.com, linux-pci@vger.kernel.org,
	linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
  Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
	linux-kernel@vger.kernel.org
In-Reply-To: <1566242976-108801-1-git-send-email-haiyangz@microsoft.com>

From: Eran Ben Elisha <eranbe@mellanox.com>

Control agent is responsible over of the control block (ID 0). It should
update the PF via this block about every capability change. In addition,
upon block 0 invalidate, it should activate all other supported agents
with data requests from the PF.

Upon agent create/destroy, the invalidate callback of the control agent
is being called in order to update the PF driver about this change.

The control agent is an integral part of HV VHCA and will be created
and destroy as part of the HV VHCA init/cleanup flow.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c  | 122 ++++++++++++++++++++-
 .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h  |   1 +
 2 files changed, 121 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
index 84d1d75..4047629 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
@@ -109,22 +109,131 @@ void mlx5_hv_vhca_invalidate(void *context, u64 block_mask)
 	queue_work(hv_vhca->work_queue, &work->invalidate_work);
 }
 
+#define AGENT_MASK(type) (type ? BIT(type - 1) : 0 /* control */)
+
+static void mlx5_hv_vhca_agents_control(struct mlx5_hv_vhca *hv_vhca,
+					struct mlx5_hv_vhca_control_block *block)
+{
+	int i;
+
+	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
+		struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
+
+		if (!agent || !agent->control)
+			continue;
+
+		if (!(AGENT_MASK(agent->type) & block->control))
+			continue;
+
+		agent->control(agent, block);
+	}
+}
+
+static void mlx5_hv_vhca_capabilities(struct mlx5_hv_vhca *hv_vhca,
+				      u32 *capabilities)
+{
+	int i;
+
+	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
+		struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
+
+		if (agent)
+			*capabilities |= AGENT_MASK(agent->type);
+	}
+}
+
+static void
+mlx5_hv_vhca_control_agent_invalidate(struct mlx5_hv_vhca_agent *agent,
+				      u64 block_mask)
+{
+	struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
+	struct mlx5_core_dev *dev = hv_vhca->dev;
+	struct mlx5_hv_vhca_control_block *block;
+	u32 capabilities = 0;
+	int err;
+
+	block = kzalloc(sizeof(*block), GFP_KERNEL);
+	if (!block)
+		return;
+
+	err = mlx5_hv_read_config(dev, block, sizeof(*block), 0);
+	if (err)
+		goto free_block;
+
+	mlx5_hv_vhca_capabilities(hv_vhca, &capabilities);
+
+	/* In case no capabilities, send empty block in return */
+	if (!capabilities) {
+		memset(block, 0, sizeof(*block));
+		goto write;
+	}
+
+	if (block->capabilities != capabilities)
+		block->capabilities = capabilities;
+
+	if (block->control & ~capabilities)
+		goto free_block;
+
+	mlx5_hv_vhca_agents_control(hv_vhca, block);
+	block->command_ack = block->command;
+
+write:
+	mlx5_hv_write_config(dev, block, sizeof(*block), 0);
+
+free_block:
+	kfree(block);
+}
+
+static struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_control_agent_create(struct mlx5_hv_vhca *hv_vhca)
+{
+	return mlx5_hv_vhca_agent_create(hv_vhca, MLX5_HV_VHCA_AGENT_CONTROL,
+					 NULL,
+					 mlx5_hv_vhca_control_agent_invalidate,
+					 NULL, NULL);
+}
+
+static void mlx5_hv_vhca_control_agent_destroy(struct mlx5_hv_vhca_agent *agent)
+{
+	mlx5_hv_vhca_agent_destroy(agent);
+}
+
 int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
 {
+	struct mlx5_hv_vhca_agent *agent;
+	int err;
+
 	if (IS_ERR_OR_NULL(hv_vhca))
 		return IS_ERR_OR_NULL(hv_vhca);
 
-	return mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
-					   mlx5_hv_vhca_invalidate);
+	err = mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
+					  mlx5_hv_vhca_invalidate);
+	if (err)
+		return err;
+
+	agent = mlx5_hv_vhca_control_agent_create(hv_vhca);
+	if (IS_ERR_OR_NULL(agent)) {
+		mlx5_hv_unregister_invalidate(hv_vhca->dev);
+		return IS_ERR_OR_NULL(agent);
+	}
+
+	hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL] = agent;
+
+	return 0;
 }
 
 void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
 {
+	struct mlx5_hv_vhca_agent *agent;
 	int i;
 
 	if (IS_ERR_OR_NULL(hv_vhca))
 		return;
 
+	agent = hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL];
+	if (agent)
+		mlx5_hv_vhca_control_agent_destroy(agent);
+
 	mutex_lock(&hv_vhca->agents_lock);
 	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++)
 		WARN_ON(hv_vhca->agents[i]);
@@ -134,6 +243,11 @@ void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
 	mlx5_hv_unregister_invalidate(hv_vhca->dev);
 }
 
+static void mlx5_hv_vhca_agents_update(struct mlx5_hv_vhca *hv_vhca)
+{
+	mlx5_hv_vhca_invalidate(hv_vhca, BIT(MLX5_HV_VHCA_AGENT_CONTROL));
+}
+
 struct mlx5_hv_vhca_agent *
 mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
 			  enum mlx5_hv_vhca_agent_type type,
@@ -174,6 +288,8 @@ struct mlx5_hv_vhca_agent *
 	hv_vhca->agents[type] = agent;
 	mutex_unlock(&hv_vhca->agents_lock);
 
+	mlx5_hv_vhca_agents_update(hv_vhca);
+
 	return agent;
 }
 
@@ -195,6 +311,8 @@ void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
 		agent->cleanup(agent);
 
 	kfree(agent);
+
+	mlx5_hv_vhca_agents_update(hv_vhca);
 }
 
 static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
index cdf1303..984e7ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
@@ -12,6 +12,7 @@
 struct mlx5_hv_vhca_control_block;
 
 enum mlx5_hv_vhca_agent_type {
+	MLX5_HV_VHCA_AGENT_CONTROL = 0,
 	MLX5_HV_VHCA_AGENT_MAX = 32,
 };
 
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH net-next,v2 4/6] net/mlx5: Add HV VHCA infrastructure
From: Haiyang Zhang @ 2019-08-19 19:30 UTC (permalink / raw)
  To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
	leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
	bhelgaas@google.com, linux-pci@vger.kernel.org,
	linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
  Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
	linux-kernel@vger.kernel.org
In-Reply-To: <1566242976-108801-1-git-send-email-haiyangz@microsoft.com>

From: Eran Ben Elisha <eranbe@mellanox.com>

HV VHCA is a layer which provides PF to VF communication channel based on
HyperV PCI config channel. It implements Mellanox's Inter VHCA control
communication protocol. The protocol contains control block in order to
pass messages between the PF and VF drivers, and data blocks in order to
pass actual data.

The infrastructure is agent based. Each agent will be responsible of
contiguous buffer blocks in the VHCA config space. This infrastructure will
bind agents to their blocks, and those agents can only access read/write
the buffer blocks assigned to them. Each agent will provide three
callbacks (control, invalidate, cleanup). Control will be invoked when
block-0 is invalidated with a command that concerns this agent. Invalidate
callback will be invoked if one of the blocks assigned to this agent was
invalidated. Cleanup will be invoked before the agent is being freed in
order to clean all of its open resources or deferred works.

Block-0 serves as the control block. All execution commands from the PF
will be written by the PF over this block. VF will ack on those by
writing on block-0 as well. Its format is described by struct
mlx5_hv_vhca_control_block layout.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c  | 253 +++++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h  | 102 +++++++++
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |   7 +
 include/linux/mlx5/driver.h                        |   2 +
 5 files changed, 365 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 247295b..fc59a40 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -45,7 +45,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offlo
 mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
 mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
 mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
-mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o
+mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o lib/hv_vhca.o
 
 #
 # Ipoib netdev
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
new file mode 100644
index 0000000..84d1d75
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/hyperv.h>
+#include "mlx5_core.h"
+#include "lib/hv.h"
+#include "lib/hv_vhca.h"
+
+struct mlx5_hv_vhca {
+	struct mlx5_core_dev       *dev;
+	struct workqueue_struct    *work_queue;
+	struct mlx5_hv_vhca_agent  *agents[MLX5_HV_VHCA_AGENT_MAX];
+	struct mutex                agents_lock; /* Protect agents array */
+};
+
+struct mlx5_hv_vhca_work {
+	struct work_struct     invalidate_work;
+	struct mlx5_hv_vhca   *hv_vhca;
+	u64                    block_mask;
+};
+
+struct mlx5_hv_vhca_data_block {
+	u16     sequence;
+	u16     offset;
+	u8      reserved[4];
+	u64     data[15];
+};
+
+struct mlx5_hv_vhca_agent {
+	enum mlx5_hv_vhca_agent_type	 type;
+	struct mlx5_hv_vhca		*hv_vhca;
+	void				*priv;
+	u16                              seq;
+	void (*control)(struct mlx5_hv_vhca_agent *agent,
+			struct mlx5_hv_vhca_control_block *block);
+	void (*invalidate)(struct mlx5_hv_vhca_agent *agent,
+			   u64 block_mask);
+	void (*cleanup)(struct mlx5_hv_vhca_agent *agent);
+};
+
+struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
+{
+	struct mlx5_hv_vhca *hv_vhca = NULL;
+
+	hv_vhca = kzalloc(sizeof(*hv_vhca), GFP_KERNEL);
+	if (!hv_vhca)
+		return ERR_PTR(-ENOMEM);
+
+	hv_vhca->work_queue = create_singlethread_workqueue("mlx5_hv_vhca");
+	if (!hv_vhca->work_queue) {
+		kfree(hv_vhca);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	hv_vhca->dev = dev;
+	mutex_init(&hv_vhca->agents_lock);
+
+	return hv_vhca;
+}
+
+void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
+{
+	if (IS_ERR_OR_NULL(hv_vhca))
+		return;
+
+	destroy_workqueue(hv_vhca->work_queue);
+	kfree(hv_vhca);
+}
+
+static void mlx5_hv_vhca_invalidate_work(struct work_struct *work)
+{
+	struct mlx5_hv_vhca_work *hwork;
+	struct mlx5_hv_vhca *hv_vhca;
+	int i;
+
+	hwork = container_of(work, struct mlx5_hv_vhca_work, invalidate_work);
+	hv_vhca = hwork->hv_vhca;
+
+	mutex_lock(&hv_vhca->agents_lock);
+	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
+		struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
+
+		if (!agent || !agent->invalidate)
+			continue;
+
+		if (!(BIT(agent->type) & hwork->block_mask))
+			continue;
+
+		agent->invalidate(agent, hwork->block_mask);
+	}
+	mutex_unlock(&hv_vhca->agents_lock);
+
+	kfree(hwork);
+}
+
+void mlx5_hv_vhca_invalidate(void *context, u64 block_mask)
+{
+	struct mlx5_hv_vhca *hv_vhca = (struct mlx5_hv_vhca *)context;
+	struct mlx5_hv_vhca_work *work;
+
+	work = kzalloc(sizeof(*work), GFP_ATOMIC);
+	if (!work)
+		return;
+
+	INIT_WORK(&work->invalidate_work, mlx5_hv_vhca_invalidate_work);
+	work->hv_vhca    = hv_vhca;
+	work->block_mask = block_mask;
+
+	queue_work(hv_vhca->work_queue, &work->invalidate_work);
+}
+
+int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
+{
+	if (IS_ERR_OR_NULL(hv_vhca))
+		return IS_ERR_OR_NULL(hv_vhca);
+
+	return mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
+					   mlx5_hv_vhca_invalidate);
+}
+
+void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
+{
+	int i;
+
+	if (IS_ERR_OR_NULL(hv_vhca))
+		return;
+
+	mutex_lock(&hv_vhca->agents_lock);
+	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++)
+		WARN_ON(hv_vhca->agents[i]);
+
+	mutex_unlock(&hv_vhca->agents_lock);
+
+	mlx5_hv_unregister_invalidate(hv_vhca->dev);
+}
+
+struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+			  enum mlx5_hv_vhca_agent_type type,
+			  void (*control)(struct mlx5_hv_vhca_agent*,
+					  struct mlx5_hv_vhca_control_block *block),
+			  void (*invalidate)(struct mlx5_hv_vhca_agent*,
+					     u64 block_mask),
+			  void (*cleaup)(struct mlx5_hv_vhca_agent *agent),
+			  void *priv)
+{
+	struct mlx5_hv_vhca_agent *agent;
+
+	if (IS_ERR_OR_NULL(hv_vhca))
+		return ERR_PTR(-ENOMEM);
+
+	if (type >= MLX5_HV_VHCA_AGENT_MAX)
+		return ERR_PTR(-EINVAL);
+
+	mutex_lock(&hv_vhca->agents_lock);
+	if (hv_vhca->agents[type]) {
+		mutex_unlock(&hv_vhca->agents_lock);
+		return ERR_PTR(-EINVAL);
+	}
+	mutex_unlock(&hv_vhca->agents_lock);
+
+	agent = kzalloc(sizeof(*agent), GFP_KERNEL);
+	if (!agent)
+		return ERR_PTR(-ENOMEM);
+
+	agent->type      = type;
+	agent->hv_vhca   = hv_vhca;
+	agent->priv      = priv;
+	agent->control   = control;
+	agent->invalidate = invalidate;
+	agent->cleanup   = cleaup;
+
+	mutex_lock(&hv_vhca->agents_lock);
+	hv_vhca->agents[type] = agent;
+	mutex_unlock(&hv_vhca->agents_lock);
+
+	return agent;
+}
+
+void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
+{
+	struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
+
+	mutex_lock(&hv_vhca->agents_lock);
+
+	if (WARN_ON(agent != hv_vhca->agents[agent->type])) {
+		mutex_unlock(&hv_vhca->agents_lock);
+		return;
+	}
+
+	hv_vhca->agents[agent->type] = NULL;
+	mutex_unlock(&hv_vhca->agents_lock);
+
+	if (agent->cleanup)
+		agent->cleanup(agent);
+
+	kfree(agent);
+}
+
+static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent,
+					   struct mlx5_hv_vhca_data_block *data_block,
+					   void *src, int len, int *offset)
+{
+	int bytes = min_t(int, (int)sizeof(data_block->data), len);
+
+	data_block->sequence = agent->seq;
+	data_block->offset   = (*offset)++;
+	memcpy(data_block->data, src, bytes);
+
+	return bytes;
+}
+
+static void mlx5_hv_vhca_agent_seq_update(struct mlx5_hv_vhca_agent *agent)
+{
+	agent->seq++;
+}
+
+int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
+			     void *buf, int len)
+{
+	int offset = agent->type * HV_CONFIG_BLOCK_SIZE_MAX;
+	int block_offset = 0;
+	int total = 0;
+	int err;
+
+	while (len) {
+		struct mlx5_hv_vhca_data_block data_block = {0};
+		int bytes;
+
+		bytes = mlx5_hv_vhca_data_block_prepare(agent, &data_block,
+							buf + total,
+							len, &block_offset);
+		if (!bytes)
+			return -ENOMEM;
+
+		err = mlx5_hv_write_config(agent->hv_vhca->dev, &data_block,
+					   sizeof(data_block), offset);
+		if (err)
+			return err;
+
+		total += bytes;
+		len   -= bytes;
+	}
+
+	mlx5_hv_vhca_agent_seq_update(agent);
+
+	return 0;
+}
+
+void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent)
+{
+	return agent->priv;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
new file mode 100644
index 0000000..cdf1303
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __LIB_HV_VHCA_H__
+#define __LIB_HV_VHCA_H__
+
+#include "en.h"
+#include "lib/hv.h"
+
+struct mlx5_hv_vhca_agent;
+struct mlx5_hv_vhca;
+struct mlx5_hv_vhca_control_block;
+
+enum mlx5_hv_vhca_agent_type {
+	MLX5_HV_VHCA_AGENT_MAX = 32,
+};
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+
+struct mlx5_hv_vhca_control_block {
+	u32     capabilities;
+	u32     control;
+	u16     command;
+	u16     command_ack;
+	u16     version;
+	u16     rings;
+	u32     reserved1[28];
+};
+
+struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev);
+void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca);
+int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca);
+void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca);
+void mlx5_hv_vhca_invalidate(void *context, u64 block_mask);
+
+struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+			  enum mlx5_hv_vhca_agent_type type,
+			  void (*control)(struct mlx5_hv_vhca_agent*,
+					  struct mlx5_hv_vhca_control_block *block),
+			  void (*invalidate)(struct mlx5_hv_vhca_agent*,
+					     u64 block_mask),
+			  void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
+			  void *context);
+
+void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent);
+int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
+			     void *buf, int len);
+void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent);
+
+#else
+
+static inline struct mlx5_hv_vhca *
+mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
+{
+	return NULL;
+}
+
+static inline void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
+{
+}
+
+static inline int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
+{
+	return 0;
+}
+
+static inline void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
+{
+}
+
+static inline void mlx5_hv_vhca_invalidate(void *context,
+					   u64 block_mask)
+{
+}
+
+static inline struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+			  enum mlx5_hv_vhca_agent_type type,
+			  void (*control)(struct mlx5_hv_vhca_agent*,
+					  struct mlx5_hv_vhca_control_block *block),
+			  void (*invalidate)(struct mlx5_hv_vhca_agent*,
+					     u64 block_mask),
+			  void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
+			  void *context)
+{
+	return NULL;
+}
+
+static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
+{
+}
+
+static inline int
+mlx5_hv_vhca_write_agent(struct mlx5_hv_vhca_agent *agent,
+			 void *buf, int len)
+{
+	return 0;
+}
+#endif
+
+#endif /* __LIB_HV_VHCA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 80437d4..4b74315 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -69,6 +69,7 @@
 #include "lib/pci_vsc.h"
 #include "diag/fw_tracer.h"
 #include "ecpf.h"
+#include "lib/hv_vhca.h"
 
 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
 MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
@@ -868,6 +869,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
 	}
 
 	dev->tracer = mlx5_fw_tracer_create(dev);
+	dev->hv_vhca = mlx5_hv_vhca_create(dev);
 
 	return 0;
 
@@ -897,6 +899,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
 
 static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 {
+	mlx5_hv_vhca_destroy(dev->hv_vhca);
 	mlx5_fw_tracer_destroy(dev->tracer);
 	mlx5_fpga_cleanup(dev);
 	mlx5_eswitch_cleanup(dev->priv.eswitch);
@@ -1063,6 +1066,8 @@ static int mlx5_load(struct mlx5_core_dev *dev)
 		goto err_fw_tracer;
 	}
 
+	mlx5_hv_vhca_init(dev->hv_vhca);
+
 	err = mlx5_fpga_device_start(dev);
 	if (err) {
 		mlx5_core_err(dev, "fpga device start failed %d\n", err);
@@ -1118,6 +1123,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
 err_ipsec_start:
 	mlx5_fpga_device_stop(dev);
 err_fpga_start:
+	mlx5_hv_vhca_cleanup(dev->hv_vhca);
 	mlx5_fw_tracer_cleanup(dev->tracer);
 err_fw_tracer:
 	mlx5_eq_table_destroy(dev);
@@ -1138,6 +1144,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
 	mlx5_accel_ipsec_cleanup(dev);
 	mlx5_accel_tls_cleanup(dev);
 	mlx5_fpga_device_stop(dev);
+	mlx5_hv_vhca_cleanup(dev->hv_vhca);
 	mlx5_fw_tracer_cleanup(dev->tracer);
 	mlx5_eq_table_destroy(dev);
 	mlx5_irq_table_destroy(dev);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 467de34..0e00cbc 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -638,6 +638,7 @@ struct mlx5_clock {
 struct mlx5_fw_tracer;
 struct mlx5_vxlan;
 struct mlx5_geneve;
+struct mlx5_hv_vhca;
 
 struct mlx5_core_dev {
 	struct device *device;
@@ -685,6 +686,7 @@ struct mlx5_core_dev {
 	struct mlx5_ib_clock_info  *clock_info;
 	struct mlx5_fw_tracer   *tracer;
 	u32                      vsc_addr;
+	struct mlx5_hv_vhca	*hv_vhca;
 };
 
 struct mlx5_db {
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH net-next,v2 3/6] net/mlx5: Add wrappers for HyperV PCIe operations
From: Haiyang Zhang @ 2019-08-19 19:30 UTC (permalink / raw)
  To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
	leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
	bhelgaas@google.com, linux-pci@vger.kernel.org,
	linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
  Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
	linux-kernel@vger.kernel.org
In-Reply-To: <1566242976-108801-1-git-send-email-haiyangz@microsoft.com>

From: Eran Ben Elisha <eranbe@mellanox.com>

Add wrapper functions for HyperV PCIe read / write /
block_invalidate_register operations.  This will be used as an
infrastructure in the downstream patch for software communication.

This will be enabled by default if CONFIG_PCI_HYPERV_INTERFACE is set.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c | 64 ++++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h | 22 ++++++++
 3 files changed, 87 insertions(+)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 8b7edaa..247295b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -45,6 +45,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offlo
 mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
 mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
 mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
+mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o
 
 #
 # Ipoib netdev
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
new file mode 100644
index 0000000..cf08d02
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/hyperv.h>
+#include "mlx5_core.h"
+#include "lib/hv.h"
+
+static int mlx5_hv_config_common(struct mlx5_core_dev *dev, void *buf, int len,
+				 int offset, bool read)
+{
+	int rc = -EOPNOTSUPP;
+	int bytes_returned;
+	int block_id;
+
+	if (offset % HV_CONFIG_BLOCK_SIZE_MAX || len % HV_CONFIG_BLOCK_SIZE_MAX)
+		return -EINVAL;
+
+	block_id = offset / HV_CONFIG_BLOCK_SIZE_MAX;
+
+	rc = read ?
+	     hyperv_read_cfg_blk(dev->pdev, buf,
+				 HV_CONFIG_BLOCK_SIZE_MAX, block_id,
+				 &bytes_returned) :
+	     hyperv_write_cfg_blk(dev->pdev, buf,
+				  HV_CONFIG_BLOCK_SIZE_MAX, block_id);
+
+	/* Make sure len bytes were read successfully  */
+	if (read)
+		rc |= !(len == bytes_returned);
+
+	if (rc) {
+		mlx5_core_err(dev, "Failed to %s hv config, err = %d, len = %d, offset = %d\n",
+			      read ? "read" : "write", rc, len,
+			      offset);
+		return rc;
+	}
+
+	return 0;
+}
+
+int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
+			int offset)
+{
+	return mlx5_hv_config_common(dev, buf, len, offset, true);
+}
+
+int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
+			 int offset)
+{
+	return mlx5_hv_config_common(dev, buf, len, offset, false);
+}
+
+int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
+				void (*block_invalidate)(void *context,
+							 u64 block_mask))
+{
+	return hyperv_reg_block_invalidate(dev->pdev, context,
+					   block_invalidate);
+}
+
+void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev)
+{
+	hyperv_reg_block_invalidate(dev->pdev, NULL, NULL);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
new file mode 100644
index 0000000..f9a4557
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __LIB_HV_H__
+#define __LIB_HV_H__
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+
+#include <linux/hyperv.h>
+#include <linux/mlx5/driver.h>
+
+int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
+			int offset);
+int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
+			 int offset);
+int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
+				void (*block_invalidate)(void *context,
+							 u64 block_mask));
+void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev);
+#endif
+
+#endif /* __LIB_HV_H__ */
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH net-next,v2 1/6] PCI: hv: Add a paravirtual backchannel in software
From: Haiyang Zhang @ 2019-08-19 19:30 UTC (permalink / raw)
  To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
	leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
	bhelgaas@google.com, linux-pci@vger.kernel.org,
	linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
  Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
	linux-kernel@vger.kernel.org, Dexuan Cui, Jake Oshins
In-Reply-To: <1566242976-108801-1-git-send-email-haiyangz@microsoft.com>

From: Dexuan Cui <decui@microsoft.com>

Windows SR-IOV provides a backchannel mechanism in software for communication
between a VF driver and a PF driver.  These "configuration blocks" are
similar in concept to PCI configuration space, but instead of doing reads and
writes in 32-bit chunks through a very slow path, packets of up to 128 bytes
can be sent or received asynchronously.

Nearly every SR-IOV device contains just such a communications channel in
hardware, so using this one in software is usually optional.  Using the
software channel, however, allows driver implementers to leverage software
tools that fuzz the communications channel looking for vulnerabilities.

The usage model for these packets puts the responsibility for reading or
writing on the VF driver.  The VF driver sends a read or a write packet,
indicating which "block" is being referred to by number.

If the PF driver wishes to initiate communication, it can "invalidate" one or
more of the first 64 blocks.  This invalidation is delivered via a callback
supplied by the VF driver by this driver.

No protocol is implied, except that supplied by the PF and VF drivers.

Signed-off-by: Jake Oshins <jakeo@microsoft.com>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
---
 drivers/pci/controller/pci-hyperv.c | 302 ++++++++++++++++++++++++++++++++++++
 include/linux/hyperv.h              |  15 ++
 2 files changed, 317 insertions(+)

diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 40b6254..57adeca 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -365,6 +365,39 @@ struct pci_delete_interrupt {
 	struct tran_int_desc int_desc;
 } __packed;
 
+/*
+ * Note: the VM must pass a valid block id, wslot and bytes_requested.
+ */
+struct pci_read_block {
+	struct pci_message message_type;
+	u32 block_id;
+	union win_slot_encoding wslot;
+	u32 bytes_requested;
+} __packed;
+
+struct pci_read_block_response {
+	struct vmpacket_descriptor hdr;
+	u32 status;
+	u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX];
+} __packed;
+
+/*
+ * Note: the VM must pass a valid block id, wslot and byte_count.
+ */
+struct pci_write_block {
+	struct pci_message message_type;
+	u32 block_id;
+	union win_slot_encoding wslot;
+	u32 byte_count;
+	u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX];
+} __packed;
+
+struct pci_dev_inval_block {
+	struct pci_incoming_message incoming;
+	union win_slot_encoding wslot;
+	u64 block_mask;
+} __packed;
+
 struct pci_dev_incoming {
 	struct pci_incoming_message incoming;
 	union win_slot_encoding wslot;
@@ -499,6 +532,9 @@ struct hv_pci_dev {
 	struct hv_pcibus_device *hbus;
 	struct work_struct wrk;
 
+	void (*block_invalidate)(void *context, u64 block_mask);
+	void *invalidate_context;
+
 	/*
 	 * What would be observed if one wrote 0xFFFFFFFF to a BAR and then
 	 * read it back, for each of the BAR offsets within config space.
@@ -817,6 +853,256 @@ static int hv_pcifront_write_config(struct pci_bus *bus, unsigned int devfn,
 	.write = hv_pcifront_write_config,
 };
 
+/*
+ * Paravirtual backchannel
+ *
+ * Hyper-V SR-IOV provides a backchannel mechanism in software for
+ * communication between a VF driver and a PF driver.  These
+ * "configuration blocks" are similar in concept to PCI configuration space,
+ * but instead of doing reads and writes in 32-bit chunks through a very slow
+ * path, packets of up to 128 bytes can be sent or received asynchronously.
+ *
+ * Nearly every SR-IOV device contains just such a communications channel in
+ * hardware, so using this one in software is usually optional.  Using the
+ * software channel, however, allows driver implementers to leverage software
+ * tools that fuzz the communications channel looking for vulnerabilities.
+ *
+ * The usage model for these packets puts the responsibility for reading or
+ * writing on the VF driver.  The VF driver sends a read or a write packet,
+ * indicating which "block" is being referred to by number.
+ *
+ * If the PF driver wishes to initiate communication, it can "invalidate" one or
+ * more of the first 64 blocks.  This invalidation is delivered via a callback
+ * supplied by the VF driver by this driver.
+ *
+ * No protocol is implied, except that supplied by the PF and VF drivers.
+ */
+
+struct hv_read_config_compl {
+	struct hv_pci_compl comp_pkt;
+	void *buf;
+	unsigned int len;
+	unsigned int bytes_returned;
+};
+
+/**
+ * hv_pci_read_config_compl() - Invoked when a response packet
+ * for a read config block operation arrives.
+ * @context:		Identifies the read config operation
+ * @resp:		The response packet itself
+ * @resp_packet_size:	Size in bytes of the response packet
+ */
+static void hv_pci_read_config_compl(void *context, struct pci_response *resp,
+				     int resp_packet_size)
+{
+	struct hv_read_config_compl *comp = context;
+	struct pci_read_block_response *read_resp =
+		(struct pci_read_block_response *)resp;
+	unsigned int data_len, hdr_len;
+
+	hdr_len = offsetof(struct pci_read_block_response, bytes);
+	if (resp_packet_size < hdr_len) {
+		comp->comp_pkt.completion_status = -1;
+		goto out;
+	}
+
+	data_len = resp_packet_size - hdr_len;
+	if (data_len > 0 && read_resp->status == 0) {
+		comp->bytes_returned = min(comp->len, data_len);
+		memcpy(comp->buf, read_resp->bytes, comp->bytes_returned);
+	} else {
+		comp->bytes_returned = 0;
+	}
+
+	comp->comp_pkt.completion_status = read_resp->status;
+out:
+	complete(&comp->comp_pkt.host_event);
+}
+
+/**
+ * hv_read_config_block() - Sends a read config block request to
+ * the back-end driver running in the Hyper-V parent partition.
+ * @pdev:		The PCI driver's representation for this device.
+ * @buf:		Buffer into which the config block will be copied.
+ * @len:		Size in bytes of buf.
+ * @block_id:		Identifies the config block which has been requested.
+ * @bytes_returned:	Size which came back from the back-end driver.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int hv_read_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
+			 unsigned int block_id, unsigned int *bytes_returned)
+{
+	struct hv_pcibus_device *hbus =
+		container_of(pdev->bus->sysdata, struct hv_pcibus_device,
+			     sysdata);
+	struct {
+		struct pci_packet pkt;
+		char buf[sizeof(struct pci_read_block)];
+	} pkt;
+	struct hv_read_config_compl comp_pkt;
+	struct pci_read_block *read_blk;
+	int ret;
+
+	if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX)
+		return -EINVAL;
+
+	init_completion(&comp_pkt.comp_pkt.host_event);
+	comp_pkt.buf = buf;
+	comp_pkt.len = len;
+
+	memset(&pkt, 0, sizeof(pkt));
+	pkt.pkt.completion_func = hv_pci_read_config_compl;
+	pkt.pkt.compl_ctxt = &comp_pkt;
+	read_blk = (struct pci_read_block *)&pkt.pkt.message;
+	read_blk->message_type.type = PCI_READ_BLOCK;
+	read_blk->wslot.slot = devfn_to_wslot(pdev->devfn);
+	read_blk->block_id = block_id;
+	read_blk->bytes_requested = len;
+
+	ret = vmbus_sendpacket(hbus->hdev->channel, read_blk,
+			       sizeof(*read_blk), (unsigned long)&pkt.pkt,
+			       VM_PKT_DATA_INBAND,
+			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+	if (ret)
+		return ret;
+
+	ret = wait_for_response(hbus->hdev, &comp_pkt.comp_pkt.host_event);
+	if (ret)
+		return ret;
+
+	if (comp_pkt.comp_pkt.completion_status != 0 ||
+	    comp_pkt.bytes_returned == 0) {
+		dev_err(&hbus->hdev->device,
+			"Read Config Block failed: 0x%x, bytes_returned=%d\n",
+			comp_pkt.comp_pkt.completion_status,
+			comp_pkt.bytes_returned);
+		return -EIO;
+	}
+
+	*bytes_returned = comp_pkt.bytes_returned;
+	return 0;
+}
+EXPORT_SYMBOL(hv_read_config_block);
+
+/**
+ * hv_pci_write_config_compl() - Invoked when a response packet for a write
+ * config block operation arrives.
+ * @context:		Identifies the write config operation
+ * @resp:		The response packet itself
+ * @resp_packet_size:	Size in bytes of the response packet
+ */
+static void hv_pci_write_config_compl(void *context, struct pci_response *resp,
+				      int resp_packet_size)
+{
+	struct hv_pci_compl *comp_pkt = context;
+
+	comp_pkt->completion_status = resp->status;
+	complete(&comp_pkt->host_event);
+}
+
+/**
+ * hv_write_config_block() - Sends a write config block request to the
+ * back-end driver running in the Hyper-V parent partition.
+ * @pdev:		The PCI driver's representation for this device.
+ * @buf:		Buffer from which the config block will	be copied.
+ * @len:		Size in bytes of buf.
+ * @block_id:		Identifies the config block which is being written.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int hv_write_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
+			  unsigned int block_id)
+{
+	struct hv_pcibus_device *hbus =
+		container_of(pdev->bus->sysdata, struct hv_pcibus_device,
+			     sysdata);
+	struct {
+		struct pci_packet pkt;
+		char buf[sizeof(struct pci_write_block)];
+		u32 reserved;
+	} pkt;
+	struct hv_pci_compl comp_pkt;
+	struct pci_write_block *write_blk;
+	u32 pkt_size;
+	int ret;
+
+	if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX)
+		return -EINVAL;
+
+	init_completion(&comp_pkt.host_event);
+
+	memset(&pkt, 0, sizeof(pkt));
+	pkt.pkt.completion_func = hv_pci_write_config_compl;
+	pkt.pkt.compl_ctxt = &comp_pkt;
+	write_blk = (struct pci_write_block *)&pkt.pkt.message;
+	write_blk->message_type.type = PCI_WRITE_BLOCK;
+	write_blk->wslot.slot = devfn_to_wslot(pdev->devfn);
+	write_blk->block_id = block_id;
+	write_blk->byte_count = len;
+	memcpy(write_blk->bytes, buf, len);
+	pkt_size = offsetof(struct pci_write_block, bytes) + len;
+	/*
+	 * This quirk is required on some hosts shipped around 2018, because
+	 * these hosts don't check the pkt_size correctly (new hosts have been
+	 * fixed since early 2019). The quirk is also safe on very old hosts
+	 * and new hosts, because, on them, what really matters is the length
+	 * specified in write_blk->byte_count.
+	 */
+	pkt_size += sizeof(pkt.reserved);
+
+	ret = vmbus_sendpacket(hbus->hdev->channel, write_blk, pkt_size,
+			       (unsigned long)&pkt.pkt, VM_PKT_DATA_INBAND,
+			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+	if (ret)
+		return ret;
+
+	ret = wait_for_response(hbus->hdev, &comp_pkt.host_event);
+	if (ret)
+		return ret;
+
+	if (comp_pkt.completion_status != 0) {
+		dev_err(&hbus->hdev->device,
+			"Write Config Block failed: 0x%x\n",
+			comp_pkt.completion_status);
+		return -EIO;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(hv_write_config_block);
+
+/**
+ * hv_register_block_invalidate() - Invoked when a config block invalidation
+ * arrives from the back-end driver.
+ * @pdev:		The PCI driver's representation for this device.
+ * @context:		Identifies the device.
+ * @block_invalidate:	Identifies all of the blocks being invalidated.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int hv_register_block_invalidate(struct pci_dev *pdev, void *context,
+				 void (*block_invalidate)(void *context,
+							  u64 block_mask))
+{
+	struct hv_pcibus_device *hbus =
+		container_of(pdev->bus->sysdata, struct hv_pcibus_device,
+			     sysdata);
+	struct hv_pci_dev *hpdev;
+
+	hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
+	if (!hpdev)
+		return -ENODEV;
+
+	hpdev->block_invalidate = block_invalidate;
+	hpdev->invalidate_context = context;
+
+	put_pcichild(hpdev);
+	return 0;
+
+}
+EXPORT_SYMBOL(hv_register_block_invalidate);
+
 /* Interrupt management hooks */
 static void hv_int_desc_free(struct hv_pci_dev *hpdev,
 			     struct tran_int_desc *int_desc)
@@ -1968,6 +2254,7 @@ static void hv_pci_onchannelcallback(void *context)
 	struct pci_response *response;
 	struct pci_incoming_message *new_message;
 	struct pci_bus_relations *bus_rel;
+	struct pci_dev_inval_block *inval;
 	struct pci_dev_incoming *dev_message;
 	struct hv_pci_dev *hpdev;
 
@@ -2045,6 +2332,21 @@ static void hv_pci_onchannelcallback(void *context)
 				}
 				break;
 
+			case PCI_INVALIDATE_BLOCK:
+
+				inval = (struct pci_dev_inval_block *)buffer;
+				hpdev = get_pcichild_wslot(hbus,
+							   inval->wslot.slot);
+				if (hpdev) {
+					if (hpdev->block_invalidate) {
+						hpdev->block_invalidate(
+						    hpdev->invalidate_context,
+						    inval->block_mask);
+					}
+					put_pcichild(hpdev);
+				}
+				break;
+
 			default:
 				dev_warn(&hbus->hdev->device,
 					"Unimplemented protocol message %x\n",
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 6256cc3..9d37f8c 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1578,4 +1578,19 @@ struct vmpacket_descriptor *
 	for (pkt = hv_pkt_iter_first(channel); pkt; \
 	    pkt = hv_pkt_iter_next(channel, pkt))
 
+/*
+ * Functions for passing data between SR-IOV PF and VF drivers.  The VF driver
+ * sends requests to read and write blocks. Each block must be 128 bytes or
+ * smaller. Optionally, the VF driver can register a callback function which
+ * will be invoked when the host says that one or more of the first 64 block
+ * IDs is "invalid" which means that the VF driver should reread them.
+ */
+#define HV_CONFIG_BLOCK_SIZE_MAX 128
+int hv_read_config_block(struct pci_dev *dev, void *buf, unsigned int buf_len,
+			 unsigned int block_id, unsigned int *bytes_returned);
+int hv_write_config_block(struct pci_dev *dev, void *buf, unsigned int len,
+			  unsigned int block_id);
+int hv_register_block_invalidate(struct pci_dev *dev, void *context,
+				 void (*block_invalidate)(void *context,
+							  u64 block_mask));
 #endif /* _HYPERV_H */
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH] storvsc: setup 1:1 mapping between hardware queue and CPU queue
From: longli @ 2019-08-19 19:35 UTC (permalink / raw)
  To: K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger, Sasha Levin,
	James E.J. Bottomley, Martin K. Petersen, linux-hyperv,
	linux-scsi, linux-kernel
  Cc: Long Li

From: Long Li <longli@microsoft.com>

storvsc doesn't use a dedicated hardware queue for a given CPU queue. When
issuing I/O, it selects returning CPU (hardware queue) dynamically based on
vmbus channel usage across all channels.

This patch sets up a 1:1 mapping between hardware queue and CPU queue, thus
avoiding unnecessary locking at upper layer when issuing I/O.

Signed-off-by: Long Li <longli@microsoft.com>
---
 drivers/scsi/storvsc_drv.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index b89269120a2d..26c16d40ec46 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1682,6 +1682,18 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
 	return 0;
 }
 
+static int storvsc_map_queues(struct Scsi_Host *shost)
+{
+	unsigned int cpu;
+	struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT];
+
+	for_each_possible_cpu(cpu) {
+		qmap->mq_map[cpu] = cpu;
+	}
+
+	return 0;
+}
+
 static struct scsi_host_template scsi_driver = {
 	.module	=		THIS_MODULE,
 	.name =			"storvsc_host_t",
@@ -1697,6 +1709,7 @@ static struct scsi_host_template scsi_driver = {
 	.this_id =		-1,
 	/* Make sure we dont get a sg segment crosses a page boundary */
 	.dma_boundary =		PAGE_SIZE-1,
+	.map_queues =		storvsc_map_queues,
 	.no_write_same =	1,
 	.track_queue_depth =	1,
 };
@@ -1836,8 +1849,7 @@ static int storvsc_probe(struct hv_device *device,
 	/*
 	 * Set the number of HW queues we are supporting.
 	 */
-	if (stor_device->num_sc != 0)
-		host->nr_hw_queues = stor_device->num_sc + 1;
+	host->nr_hw_queues = num_possible_cpus();
 
 	/*
 	 * Set the error handler work queue.
-- 
2.17.1


^ permalink raw reply related

* [PATCH v3 00/12] Enhance the hv_vmbus driver to support hibernation
From: Dexuan Cui @ 2019-08-20  1:51 UTC (permalink / raw)
  To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
	Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
	KY Srinivasan, Michael Kelley, tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org, Dexuan Cui

Hi all,
The patchset is to enhance hv_vmbus to support hibernation when Linux VM
runs on Hyper-V. A second patchset to enhance the high-level VSC drivers
(hv_netvsc, hv_storvsc, etc.) for hibernation will be posted after this
patchset is acceped. If you want to test this hibernation feaure, all the
needed patches can be found on my github branch:
https://github.com/dcui/linux/commits/decui/hibernation/2019-0818/v5.3-rc5

This patchset is based on v5.3-rc5.  

Please review.

Hi tglx,
I hope all the 12 patchset, including the below 3 patches,

[PATCH v3 01/12] x86/hyper-v: Suspend/resume the hypercall page for hibernation
[PATCH v3 02/12] x86/hyper-v: Implement hv_is_hibernation_supported()
[PATCH v3 03/12] clocksource/drivers: Suspend/resume Hyper-V

can go through Sasha's hyperv/linux.git tree, since all the changes belong
to the hv stuff. However, if you think it's better for these 3 patches to go
through the tip.git tree, it also works for me.

Hi Michael Kelley,
I added your Reviewed-by's for patch 1, 3, 4, 5, 7 and 9, since you reviewed
these patches. Please review the others.

Looking forward to your comments!

Thanks,
Dexuan

Changes in v2:
  Patch 3: Improved the changelog and added a comment.

  Patch 4: Remove the "hv_stimer_cleanup" in hv_synic_suspend(), because I
           suppose https://lkml.org/lkml/2019/7/27/5 will be accepted. Also
           improved changelog and the comment.

  Patch 5: Fixed the third argument of print_hex_dump_debug(). Also improved
           the changelog.

  Patch 6: Improved the changelog and the comment. Added a check for the
           'vmbus_proto_version' in vmbus_bus_resume().

Changes in v3:
  Patch 2: Add a new API hv_is_hibernation_supported().

  Patch 6: Add a new helper function is_sub_channel().

  Patch 8: Find the old channels via Instance GUID rather than the RELID.

  Patch 10: Add code to clean up  hv_sock channels by force

  Patch 11: Add code to wait in the suspend path: all the hv_sock channels
            and sub-channels should be cleaned up first before Linux sends
            the VMBUS UNLOAD message.

  Patch 12: Add code to fix up the old primary channels before further
            resuming.

Dexuan Cui (12):
  x86/hyper-v: Suspend/resume the hypercall page for hibernation
  x86/hyper-v: Implement hv_is_hibernation_supported()
  clocksource/drivers: Suspend/resume Hyper-V clocksource for
    hibernation
  Drivers: hv: vmbus: Break out synic enable and disable operations
  Drivers: hv: vmbus: Suspend/resume the synic for hibernation
  Drivers: hv: vmbus: Add a helper function is_sub_channel()
  Drivers: hv: vmbus: Implement suspend/resume for VSC drivers for
    hibernation
  Drivers: hv: vmbus: Ignore the offers when resuming from hibernation
  Drivers: hv: vmbus: Suspend/resume the vmbus itself for hibernation
  Drivers: hv: vmbus: Clean up hv_sock channels by force upon suspend
  Drivers: hv: vmbus: Suspend after cleaning up hv_sock and sub channels
  Drivers: hv: vmbus: Resume after fixing up old primary channels

 arch/x86/hyperv/hv_init.c          |  41 ++++++
 drivers/clocksource/hyperv_timer.c |  25 ++++
 drivers/hv/channel_mgmt.c          | 127 +++++++++++++++---
 drivers/hv/connection.c            |  35 ++++-
 drivers/hv/hv.c                    |  66 ++++++----
 drivers/hv/hyperv_vmbus.h          |  33 +++++
 drivers/hv/vmbus_drv.c             | 262 +++++++++++++++++++++++++++++++++++++
 include/asm-generic/mshyperv.h     |   2 +
 include/linux/hyperv.h             |  16 ++-
 9 files changed, 558 insertions(+), 49 deletions(-)

-- 
1.8.3.1


^ permalink raw reply

* [PATCH v3 01/12] x86/hyper-v: Suspend/resume the hypercall page for hibernation
From: Dexuan Cui @ 2019-08-20  1:51 UTC (permalink / raw)
  To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
	Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
	KY Srinivasan, Michael Kelley, tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1566265863-21252-1-git-send-email-decui@microsoft.com>

This is needed for hibernation, e.g. when we resume the old kernel, we need
to disable the "current" kernel's hypercall page and then resume the old
kernel's.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
---
 arch/x86/hyperv/hv_init.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 0d25868..78e53d9 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -20,6 +20,7 @@
 #include <linux/hyperv.h>
 #include <linux/slab.h>
 #include <linux/cpuhotplug.h>
+#include <linux/syscore_ops.h>
 #include <clocksource/hyperv_timer.h>
 
 void *hv_hypercall_pg;
@@ -223,6 +224,34 @@ static int __init hv_pci_init(void)
 	return 1;
 }
 
+static int hv_suspend(void)
+{
+	union hv_x64_msr_hypercall_contents hypercall_msr;
+
+	/* Reset the hypercall page */
+	rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+	hypercall_msr.enable = 0;
+	wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+
+	return 0;
+}
+
+static void hv_resume(void)
+{
+	union hv_x64_msr_hypercall_contents hypercall_msr;
+
+	/* Re-enable the hypercall page */
+	rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+	hypercall_msr.enable = 1;
+	hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
+	wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+}
+
+static struct syscore_ops hv_syscore_ops = {
+	.suspend = hv_suspend,
+	.resume = hv_resume,
+};
+
 /*
  * This function is to be invoked early in the boot sequence after the
  * hypervisor has been detected.
@@ -303,6 +332,9 @@ void __init hyperv_init(void)
 
 	/* Register Hyper-V specific clocksource */
 	hv_init_clocksource();
+
+	register_syscore_ops(&hv_syscore_ops);
+
 	return;
 
 remove_cpuhp_state:
@@ -322,6 +354,8 @@ void hyperv_cleanup(void)
 {
 	union hv_x64_msr_hypercall_contents hypercall_msr;
 
+	unregister_syscore_ops(&hv_syscore_ops);
+
 	/* Reset our OS id */
 	wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
 
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH v3 07/12] Drivers: hv: vmbus: Implement suspend/resume for VSC drivers for hibernation
From: Dexuan Cui @ 2019-08-20  1:52 UTC (permalink / raw)
  To: linux-hyperv@vger.kernel.org, gregkh@linuxfoundation.org,
	Stephen Hemminger, Sasha Levin, sashal@kernel.org, Haiyang Zhang,
	KY Srinivasan, Michael Kelley, tglx@linutronix.de
  Cc: linux-kernel@vger.kernel.org, Dexuan Cui
In-Reply-To: <1566265863-21252-1-git-send-email-decui@microsoft.com>

The high-level VSC drivers will implement device-specific callbacks.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
---
 drivers/hv/vmbus_drv.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/hyperv.h |  3 +++
 2 files changed, 49 insertions(+)

diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 2ef375c..a30c70a 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -911,6 +911,43 @@ static void vmbus_shutdown(struct device *child_device)
 		drv->shutdown(dev);
 }
 
+/*
+ * vmbus_suspend - Suspend a vmbus device
+ */
+static int vmbus_suspend(struct device *child_device)
+{
+	struct hv_driver *drv;
+	struct hv_device *dev = device_to_hv_device(child_device);
+
+	/* The device may not be attached yet */
+	if (!child_device->driver)
+		return 0;
+
+	drv = drv_to_hv_drv(child_device->driver);
+	if (!drv->suspend)
+		return -EOPNOTSUPP;
+
+	return drv->suspend(dev);
+}
+
+/*
+ * vmbus_resume - Resume a vmbus device
+ */
+static int vmbus_resume(struct device *child_device)
+{
+	struct hv_driver *drv;
+	struct hv_device *dev = device_to_hv_device(child_device);
+
+	/* The device may not be attached yet */
+	if (!child_device->driver)
+		return 0;
+
+	drv = drv_to_hv_drv(child_device->driver);
+	if (!drv->resume)
+		return -EOPNOTSUPP;
+
+	return drv->resume(dev);
+}
 
 /*
  * vmbus_device_release - Final callback release of the vmbus child device
@@ -926,6 +963,14 @@ static void vmbus_device_release(struct device *device)
 	kfree(hv_dev);
 }
 
+/*
+ * Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than
+ * SET_SYSTEM_SLEEP_PM_OPS: see the comment before vmbus_bus_pm.
+ */
+static const struct dev_pm_ops vmbus_pm = {
+	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_suspend, vmbus_resume)
+};
+
 /* The one and only one */
 static struct bus_type  hv_bus = {
 	.name =		"vmbus",
@@ -936,6 +981,7 @@ static void vmbus_device_release(struct device *device)
 	.uevent =		vmbus_uevent,
 	.dev_groups =		vmbus_dev_groups,
 	.drv_groups =		vmbus_drv_groups,
+	.pm =			&vmbus_pm,
 };
 
 struct onmessage_work_context {
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 2d39248..8a60e77 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1157,6 +1157,9 @@ struct hv_driver {
 	int (*remove)(struct hv_device *);
 	void (*shutdown)(struct hv_device *);
 
+	int (*suspend)(struct hv_device *);
+	int (*resume)(struct hv_device *);
+
 };
 
 /* Base device object */
-- 
1.8.3.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox