Linux-HyperV List
 help / color / mirror / Atom feed
* [GIT PULL] Hyper-V fixes for v5.3-rc
From: Sasha Levin @ 2019-08-17 19:59 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: linux-kernel, linux-hyperv, kys, sthemmin, linux-kernel

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA512

The following changes since commit 5f9e832c137075045d15cd6899ab0505cfb2ca4b:

  Linus 5.3-rc1 (2019-07-21 14:05:38 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git tags/hyperv-fixes-signed

for you to fetch changes up to bafe1e79e05de725e26b3f60c90b49e635b686b9:

  MAINTAINERS: Fix Hyperv vIOMMU driver file name (2019-08-17 15:29:39 -0400)

- ----------------------------------------------------------------
- - A few fixes for the userspace hyper-v tools from Adrian Vladu.
- - A fix for the hyper-v MAINTAINERs entry from Lan Tianyu.
- - Fix for SPDX license identifier in the userspace tools from Nishad
Kamdar.

- ----------------------------------------------------------------
Adrian Vladu (3):
      tools: hv: fixed Python pep8/flake8 warnings for lsvmbus
      tools: hv: fix KVP and VSS daemons exit code
      tools: hv: fix typos in toolchain

Lan Tianyu (1):
      MAINTAINERS: Fix Hyperv vIOMMU driver file name

Nishad Kamdar (1):
      tools: hv: Use the correct style for SPDX License Identifier

 MAINTAINERS                  |  2 +-
 drivers/hv/hv_trace.h        |  2 +-
 tools/hv/hv_get_dhcp_info.sh |  2 +-
 tools/hv/hv_kvp_daemon.c     |  8 +++--
 tools/hv/hv_set_ifconfig.sh  |  2 +-
 tools/hv/hv_vss_daemon.c     |  4 ++-
 tools/hv/lsvmbus             | 75 +++++++++++++++++++++++++-------------------
 7 files changed, 54 insertions(+), 41 deletions(-)
-----BEGIN PGP SIGNATURE-----

iQIzBAEBCgAdFiEE4n5dijQDou9mhzu83qZv95d3LNwFAl1YXDEACgkQ3qZv95d3
LNwzQxAAlJgFE5AQ0etqM4ns/wx05AOePVHR90ua+FCU0W3umL5vGeYxCbl9dJ08
zYUhnoq/y4nbccIH7edJlrxb/J9+Slsp0FxWBbPSGjvbLK0yjaxHup8bcdyq6/pP
UM/fvaPzd7NnK/LPehDlV1l2skqbsimm2wbv7P1sXYZ8aQwowXxJkeVeKvfipiCw
MAB1KMCZGJg1n9w6xi2j+wnV4cuRgcMX/n+0C5Qc2AfFAVPrPzEJoGiRJBhQgqf0
JDg1+hWKrAyTAJvKHQf8o/8EsvLr/Itm1t9Q+s3eQUFqbvVPilbFR8OltSgPHgUM
PJG+Kur49jjOpUR1OJ8MeRQXqblRqSxpe7POsjP1vnGTCvuO/sLSHiCDT7+3YIEN
+bXaONOTCQSH5j8KgNE6MN/wfGIpoSEgMJEoN16OyELsQa2zIhRCwHdz72DkojJq
QzrGkChLwAz8Dw3Ul/NX36MJMAyT9DTM3GE1IXY4LzOL0381JRbvms1vwP2dIkL4
c3+tGJ7iBi23KEfkBdD3fq2JBs3KVCucIMOZX9RWDqbRhl61015GKPSALAD7r9vR
BKYo3hdaDxk7DpSTHTupUds/EmSnkS+7poaXl2iY2jVJTUXXIyg7Ig3IBI7Vqhc9
3SgunAaaKdgrh60JxkVTJ7WQUAPbOf3h/a/P1WAhSrpkxHB0XXI=
=BevR
-----END PGP SIGNATURE-----

^ permalink raw reply

* [PATCH] Drivers: hv: balloon: Remove dependencies on guest page size
From: Himadri Pandya @ 2019-08-17  4:08 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, sashal, mikelley
  Cc: linux-hyperv, linux-kernel, Himadri Pandya

Hyper-V assumes page size to be 4K. This might not be the case for
ARM64 architecture. Hence use hyper-v specific page size and page
shift definitions to avoid conflicts between different host and guest
page sizes on ARM64.

Also, remove some old and incorrect comments and redefine ballooning
granularities to handle larger page sizes correctly.

Signed-off-by: Himadri Pandya <himadri18.07@gmail.com>
---
 drivers/hv/hv_balloon.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 34bd73526afd..935904830d42 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -23,6 +23,7 @@
 #include <linux/percpu_counter.h>
 
 #include <linux/hyperv.h>
+#include <asm/hyperv-tlfs.h>
 
 #define CREATE_TRACE_POINTS
 #include "hv_trace_balloon.h"
@@ -341,8 +342,6 @@ struct dm_unballoon_response {
  *
  * mem_range: Memory range to hot add.
  *
- * On Linux we currently don't support this since we cannot hot add
- * arbitrary granularity of memory.
  */
 
 struct dm_hot_add {
@@ -477,7 +476,7 @@ module_param(pressure_report_delay, uint, (S_IRUGO | S_IWUSR));
 MODULE_PARM_DESC(pressure_report_delay, "Delay in secs in reporting pressure");
 static atomic_t trans_id = ATOMIC_INIT(0);
 
-static int dm_ring_size = (5 * PAGE_SIZE);
+static int dm_ring_size = 20 * 1024;
 
 /*
  * Driver specific state.
@@ -493,10 +492,10 @@ enum hv_dm_state {
 };
 
 
-static __u8 recv_buffer[PAGE_SIZE];
-static __u8 balloon_up_send_buffer[PAGE_SIZE];
-#define PAGES_IN_2M	512
-#define HA_CHUNK (32 * 1024)
+static __u8 recv_buffer[HV_HYP_PAGE_SIZE];
+static __u8 balloon_up_send_buffer[HV_HYP_PAGE_SIZE];
+#define PAGES_IN_2M (2 * 1024 * 1024 / PAGE_SIZE)
+#define HA_CHUNK (128 * 1024 * 1024 / PAGE_SIZE)
 
 struct hv_dynmem_device {
 	struct hv_device *dev;
@@ -1076,7 +1075,7 @@ static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg)
 			__u64 *max_page_count = (__u64 *)&info_hdr[1];
 
 			pr_info("Max. dynamic memory size: %llu MB\n",
-				(*max_page_count) >> (20 - PAGE_SHIFT));
+				(*max_page_count) >> (20 - HV_HYP_PAGE_SHIFT));
 		}
 
 		break;
@@ -1218,7 +1217,7 @@ static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm,
 
 	for (i = 0; (i * alloc_unit) < num_pages; i++) {
 		if (bl_resp->hdr.size + sizeof(union dm_mem_page_range) >
-			PAGE_SIZE)
+			HV_HYP_PAGE_SIZE)
 			return i * alloc_unit;
 
 		/*
@@ -1274,9 +1273,9 @@ static void balloon_up(struct work_struct *dummy)
 
 	/*
 	 * We will attempt 2M allocations. However, if we fail to
-	 * allocate 2M chunks, we will go back to 4k allocations.
+	 * allocate 2M chunks, we will go back to PAGE_SIZE allocations.
 	 */
-	alloc_unit = 512;
+	alloc_unit = PAGES_IN_2M;
 
 	avail_pages = si_mem_available();
 	floor = compute_balloon_floor();
@@ -1292,7 +1291,7 @@ static void balloon_up(struct work_struct *dummy)
 	}
 
 	while (!done) {
-		memset(balloon_up_send_buffer, 0, PAGE_SIZE);
+		memset(balloon_up_send_buffer, 0, HV_HYP_PAGE_SIZE);
 		bl_resp = (struct dm_balloon_response *)balloon_up_send_buffer;
 		bl_resp->hdr.type = DM_BALLOON_RESPONSE;
 		bl_resp->hdr.size = sizeof(struct dm_balloon_response);
@@ -1491,7 +1490,7 @@ static void balloon_onchannelcallback(void *context)
 
 	memset(recv_buffer, 0, sizeof(recv_buffer));
 	vmbus_recvpacket(dev->channel, recv_buffer,
-			 PAGE_SIZE, &recvlen, &requestid);
+			 HV_HYP_PAGE_SIZE, &recvlen, &requestid);
 
 	if (recvlen > 0) {
 		dm_msg = (struct dm_message *)recv_buffer;
-- 
2.17.1


^ permalink raw reply related

* RE: [PATCH net-next, 2/6] PCI: hv: Add a Hyper-V PCI mini driver for software backchannel interface
From: Haiyang Zhang @ 2019-08-16 19:50 UTC (permalink / raw)
  To: vkuznets
  Cc: KY Srinivasan, Stephen Hemminger, linux-kernel@vger.kernel.org,
	sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
	leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
	bhelgaas@google.com, linux-pci@vger.kernel.org,
	linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
In-Reply-To: <871rxl84ry.fsf@vitty.brq.redhat.com>



> -----Original Message-----
> From: Vitaly Kuznetsov <vkuznets@redhat.com>
> Sent: Friday, August 16, 2019 12:16 PM
> To: Haiyang Zhang <haiyangz@microsoft.com>
> Cc: KY Srinivasan <kys@microsoft.com>; Stephen Hemminger
> <sthemmin@microsoft.com>; linux-kernel@vger.kernel.org;
> sashal@kernel.org; davem@davemloft.net; saeedm@mellanox.com;
> leon@kernel.org; eranbe@mellanox.com; lorenzo.pieralisi@arm.com;
> bhelgaas@google.com; linux-pci@vger.kernel.org; linux-
> hyperv@vger.kernel.org; netdev@vger.kernel.org
> Subject: RE: [PATCH net-next, 2/6] PCI: hv: Add a Hyper-V PCI mini driver for
> software backchannel interface
> 
> Haiyang Zhang <haiyangz@microsoft.com> writes:
> 
> >
> > The pci_hyperv can only be loaded on VMs on Hyper-V and Azure. Other
> > drivers like MLX5e will have symbolic dependency of pci_hyperv if they
> > use functions exported by pci_hyperv. This dependency will cause other
> > drivers fail to load on other platforms, like VMs on KVM. So we
> > created this mini driver, which can be loaded on any platforms to
> > provide the symbolic dependency.
> 
> (/me wondering is there a nicer way around this, by using __weak or
> something like that...)
> 
> In case this stub is the best solution I'd suggest to rename it to something like
> PCI_HYPERV_INTERFACE to make it clear it is not a separate driver (_MINI
> makes me think so).

Thanks! I will consider those options.

^ permalink raw reply

* RE: [PATCH net-next, 2/6] PCI: hv: Add a Hyper-V PCI mini driver for software backchannel interface
From: Vitaly Kuznetsov @ 2019-08-16 16:16 UTC (permalink / raw)
  To: Haiyang Zhang
  Cc: KY Srinivasan, Stephen Hemminger, linux-kernel@vger.kernel.org,
	sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
	leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
	bhelgaas@google.com, linux-pci@vger.kernel.org,
	linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
In-Reply-To: <DM6PR21MB13375FA0BA0220A91EF448E1CAAF0@DM6PR21MB1337.namprd21.prod.outlook.com>

Haiyang Zhang <haiyangz@microsoft.com> writes:

>
> The pci_hyperv can only be loaded on VMs on Hyper-V and Azure. Other 
> drivers like MLX5e will have symbolic dependency of pci_hyperv if they 
> use functions exported by pci_hyperv. This dependency will cause other 
> drivers fail to load on other platforms, like VMs on KVM. So we created 
> this mini driver, which can be loaded on any platforms to provide the 
> symbolic dependency.

(/me wondering is there a nicer way around this, by using __weak or
something like that...)

In case this stub is the best solution I'd suggest to rename it to
something like PCI_HYPERV_INTERFACE to make it clear it is not a
separate driver (_MINI makes me think so).

-- 
Vitaly

^ permalink raw reply

* Re: [PATCH V2 1/2] clocksource/Hyper-v: Allocate Hyper-V tsc page statically
From: Daniel Lezcano @ 2019-08-16 15:00 UTC (permalink / raw)
  To: lantianyu1986, luto, tglx, mingo, bp, hpa, x86, kys, haiyangz,
	sthemmin, sashal, arnd, michael.h.kelley
  Cc: Tianyu Lan, linux-kernel, linux-hyperv, linux-arch
In-Reply-To: <20190814123216.32245-2-Tianyu.Lan@microsoft.com>

On 14/08/2019 14:32, lantianyu1986@gmail.com wrote:
> From: Tianyu Lan <Tianyu.Lan@microsoft.com>
> 
> Prepare to add Hyper-V sched clock callback and move Hyper-V
> Reference TSC initialization much earlier in the boot process.  Earlier
> initialization is needed so that it happens while the timestamp value
> is still 0 and no discontinuity in the timestamp will occur when
> pv_ops.time.sched_clock calculates its offset.  The earlier
> initialization requires that the Hyper-V TSC page be allocated
> statically instead of with vmalloc(), so fixup the references
> to the TSC page and the method of getting its physical address.
> 
> Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>

Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>

> ---
>     Change since v1:
>            - Update commit log
>            - Remove and operation of tsc page's va with PAGE_MASK
> 
>  arch/x86/entry/vdso/vma.c          |  2 +-
>  drivers/clocksource/hyperv_timer.c | 12 ++++--------
>  2 files changed, 5 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
> index 349a61d8bf34..f5937742b290 100644
> --- a/arch/x86/entry/vdso/vma.c
> +++ b/arch/x86/entry/vdso/vma.c
> @@ -122,7 +122,7 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
>  
>  		if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK))
>  			return vmf_insert_pfn(vma, vmf->address,
> -					vmalloc_to_pfn(tsc_pg));
> +					virt_to_phys(tsc_pg) >> PAGE_SHIFT);
>  	}
>  
>  	return VM_FAULT_SIGBUS;
> diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
> index ba2c79e6a0ee..432aa331df04 100644
> --- a/drivers/clocksource/hyperv_timer.c
> +++ b/drivers/clocksource/hyperv_timer.c
> @@ -214,17 +214,17 @@ EXPORT_SYMBOL_GPL(hyperv_cs);
>  
>  #ifdef CONFIG_HYPERV_TSCPAGE
>  
> -static struct ms_hyperv_tsc_page *tsc_pg;
> +static struct ms_hyperv_tsc_page tsc_pg __aligned(PAGE_SIZE);
>  
>  struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
>  {
> -	return tsc_pg;
> +	return &tsc_pg;
>  }
>  EXPORT_SYMBOL_GPL(hv_get_tsc_page);
>  
>  static u64 notrace read_hv_sched_clock_tsc(void)
>  {
> -	u64 current_tick = hv_read_tsc_page(tsc_pg);
> +	u64 current_tick = hv_read_tsc_page(&tsc_pg);
>  
>  	if (current_tick == U64_MAX)
>  		hv_get_time_ref_count(current_tick);
> @@ -280,12 +280,8 @@ static bool __init hv_init_tsc_clocksource(void)
>  	if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
>  		return false;
>  
> -	tsc_pg = vmalloc(PAGE_SIZE);
> -	if (!tsc_pg)
> -		return false;
> -
>  	hyperv_cs = &hyperv_cs_tsc;
> -	phys_addr = page_to_phys(vmalloc_to_page(tsc_pg));
> +	phys_addr = virt_to_phys(&tsc_pg);
>  
>  	/*
>  	 * The Hyper-V TLFS specifies to preserve the value of reserved
> 


-- 
 <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog


^ permalink raw reply

* RE: [PATCH net-next, 2/6] PCI: hv: Add a Hyper-V PCI mini driver for software backchannel interface
From: Haiyang Zhang @ 2019-08-16 14:48 UTC (permalink / raw)
  To: vkuznets, sashal@kernel.org, davem@davemloft.net,
	saeedm@mellanox.com, leon@kernel.org, eranbe@mellanox.com,
	lorenzo.pieralisi@arm.com, bhelgaas@google.com,
	linux-pci@vger.kernel.org, linux-hyperv@vger.kernel.org,
	netdev@vger.kernel.org
  Cc: KY Srinivasan, Stephen Hemminger, linux-kernel@vger.kernel.org
In-Reply-To: <878srt8fd8.fsf@vitty.brq.redhat.com>



> -----Original Message-----
> From: Vitaly Kuznetsov <vkuznets@redhat.com>
> Sent: Friday, August 16, 2019 8:28 AM
> To: Haiyang Zhang <haiyangz@microsoft.com>; sashal@kernel.org;
> davem@davemloft.net; saeedm@mellanox.com; leon@kernel.org;
> eranbe@mellanox.com; lorenzo.pieralisi@arm.com; bhelgaas@google.com;
> linux-pci@vger.kernel.org; linux-hyperv@vger.kernel.org;
> netdev@vger.kernel.org
> Cc: Haiyang Zhang <haiyangz@microsoft.com>; KY Srinivasan
> <kys@microsoft.com>; Stephen Hemminger <sthemmin@microsoft.com>;
> linux-kernel@vger.kernel.org
> Subject: Re: [PATCH net-next, 2/6] PCI: hv: Add a Hyper-V PCI mini driver for
> software backchannel interface
> 
> Haiyang Zhang <haiyangz@microsoft.com> writes:
> 
> > This mini driver is a helper driver allows other drivers to have a
> > common interface with the Hyper-V PCI frontend driver.
> >
> > Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> > Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
> > ---
> >  MAINTAINERS                              |  1 +
> >  drivers/pci/Kconfig                      |  1 +
> >  drivers/pci/controller/Kconfig           |  7 ++++
> >  drivers/pci/controller/Makefile          |  1 +
> >  drivers/pci/controller/pci-hyperv-mini.c | 70
> ++++++++++++++++++++++++++++++++
> >  drivers/pci/controller/pci-hyperv.c      | 12 ++++--
> >  include/linux/hyperv.h                   | 30 ++++++++++----
> >  7 files changed, 111 insertions(+), 11 deletions(-)  create mode
> > 100644 drivers/pci/controller/pci-hyperv-mini.c
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS index e352550..c4962b9 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -7453,6 +7453,7 @@ F:	drivers/hid/hid-hyperv.c
> >  F:	drivers/hv/
> >  F:	drivers/input/serio/hyperv-keyboard.c
> >  F:	drivers/pci/controller/pci-hyperv.c
> > +F:	drivers/pci/controller/pci-hyperv-mini.c
> >  F:	drivers/net/hyperv/
> >  F:	drivers/scsi/storvsc_drv.c
> >  F:	drivers/uio/uio_hv_generic.c
> > diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index
> > 2ab9240..bb852f5 100644
> > --- a/drivers/pci/Kconfig
> > +++ b/drivers/pci/Kconfig
> > @@ -182,6 +182,7 @@ config PCI_LABEL
> >  config PCI_HYPERV
> >          tristate "Hyper-V PCI Frontend"
> >          depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN
> &&
> > X86_64
> > +	select PCI_HYPERV_MINI
> >          help
> >            The PCI device frontend driver allows the kernel to import arbitrary
> >            PCI devices from a PCI backend to support PCI driver domains.
> > diff --git a/drivers/pci/controller/Kconfig
> > b/drivers/pci/controller/Kconfig index fe9f9f1..8e31cba 100644
> > --- a/drivers/pci/controller/Kconfig
> > +++ b/drivers/pci/controller/Kconfig
> > @@ -281,5 +281,12 @@ config VMD
> >  	  To compile this driver as a module, choose M here: the
> >  	  module will be called vmd.
> >
> > +config PCI_HYPERV_MINI
> > +	tristate "Hyper-V PCI Mini"
> > +	depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN
> && X86_64
> > +	help
> > +	  The Hyper-V PCI Mini is a helper driver allows other drivers to
> > +	  have a common interface with the Hyper-V PCI frontend driver.
> > +
> 
> Out of pure curiosity, why not just export this interface from PCI_HYPERV
> directly? Why do we need this stub?

The pci_hyperv can only be loaded on VMs on Hyper-V and Azure. Other 
drivers like MLX5e will have symbolic dependency of pci_hyperv if they 
use functions exported by pci_hyperv. This dependency will cause other 
drivers fail to load on other platforms, like VMs on KVM. So we created 
this mini driver, which can be loaded on any platforms to provide the 
symbolic dependency.

Thanks,
- Haiyang

^ permalink raw reply

* Re: [PATCH net-next, 2/6] PCI: hv: Add a Hyper-V PCI mini driver for software backchannel interface
From: Vitaly Kuznetsov @ 2019-08-16 12:27 UTC (permalink / raw)
  To: Haiyang Zhang, sashal@kernel.org, davem@davemloft.net,
	saeedm@mellanox.com, leon@kernel.org, eranbe@mellanox.com,
	lorenzo.pieralisi@arm.com, bhelgaas@google.com,
	linux-pci@vger.kernel.org, linux-hyperv@vger.kernel.org,
	netdev@vger.kernel.org
  Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
	linux-kernel@vger.kernel.org
In-Reply-To: <1565809632-39138-3-git-send-email-haiyangz@microsoft.com>

Haiyang Zhang <haiyangz@microsoft.com> writes:

> This mini driver is a helper driver allows other drivers to
> have a common interface with the Hyper-V PCI frontend driver.
>
> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
> ---
>  MAINTAINERS                              |  1 +
>  drivers/pci/Kconfig                      |  1 +
>  drivers/pci/controller/Kconfig           |  7 ++++
>  drivers/pci/controller/Makefile          |  1 +
>  drivers/pci/controller/pci-hyperv-mini.c | 70 ++++++++++++++++++++++++++++++++
>  drivers/pci/controller/pci-hyperv.c      | 12 ++++--
>  include/linux/hyperv.h                   | 30 ++++++++++----
>  7 files changed, 111 insertions(+), 11 deletions(-)
>  create mode 100644 drivers/pci/controller/pci-hyperv-mini.c
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index e352550..c4962b9 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -7453,6 +7453,7 @@ F:	drivers/hid/hid-hyperv.c
>  F:	drivers/hv/
>  F:	drivers/input/serio/hyperv-keyboard.c
>  F:	drivers/pci/controller/pci-hyperv.c
> +F:	drivers/pci/controller/pci-hyperv-mini.c
>  F:	drivers/net/hyperv/
>  F:	drivers/scsi/storvsc_drv.c
>  F:	drivers/uio/uio_hv_generic.c
> diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
> index 2ab9240..bb852f5 100644
> --- a/drivers/pci/Kconfig
> +++ b/drivers/pci/Kconfig
> @@ -182,6 +182,7 @@ config PCI_LABEL
>  config PCI_HYPERV
>          tristate "Hyper-V PCI Frontend"
>          depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64
> +	select PCI_HYPERV_MINI
>          help
>            The PCI device frontend driver allows the kernel to import arbitrary
>            PCI devices from a PCI backend to support PCI driver domains.
> diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig
> index fe9f9f1..8e31cba 100644
> --- a/drivers/pci/controller/Kconfig
> +++ b/drivers/pci/controller/Kconfig
> @@ -281,5 +281,12 @@ config VMD
>  	  To compile this driver as a module, choose M here: the
>  	  module will be called vmd.
>  
> +config PCI_HYPERV_MINI
> +	tristate "Hyper-V PCI Mini"
> +	depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64
> +	help
> +	  The Hyper-V PCI Mini is a helper driver allows other drivers to
> +	  have a common interface with the Hyper-V PCI frontend driver.
> +

Out of pure curiosity, why not just export this interface from
PCI_HYPERV directly? Why do we need this stub?

>  source "drivers/pci/controller/dwc/Kconfig"
>  endmenu
> diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile
> index d56a507..77e0132 100644
> --- a/drivers/pci/controller/Makefile
> +++ b/drivers/pci/controller/Makefile
> @@ -4,6 +4,7 @@ obj-$(CONFIG_PCIE_CADENCE_HOST) += pcie-cadence-host.o
>  obj-$(CONFIG_PCIE_CADENCE_EP) += pcie-cadence-ep.o
>  obj-$(CONFIG_PCI_FTPCI100) += pci-ftpci100.o
>  obj-$(CONFIG_PCI_HYPERV) += pci-hyperv.o
> +obj-$(CONFIG_PCI_HYPERV_MINI) += pci-hyperv-mini.o
>  obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o
>  obj-$(CONFIG_PCI_AARDVARK) += pci-aardvark.o
>  obj-$(CONFIG_PCI_TEGRA) += pci-tegra.o
> diff --git a/drivers/pci/controller/pci-hyperv-mini.c b/drivers/pci/controller/pci-hyperv-mini.c
> new file mode 100644
> index 0000000..9b6cd1c
> --- /dev/null
> +++ b/drivers/pci/controller/pci-hyperv-mini.c
> @@ -0,0 +1,70 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) Microsoft Corporation.
> + *
> + * Author:
> + *   Haiyang Zhang <haiyangz@microsoft.com>
> + *
> + * This mini driver is a helper driver allows other drivers to
> + * have a common interface with the Hyper-V PCI frontend driver.
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/hyperv.h>
> +
> +struct hyperv_pci_block_ops hvpci_block_ops;
> +EXPORT_SYMBOL(hvpci_block_ops);
> +
> +int hyperv_read_cfg_blk(struct pci_dev *dev, void *buf, unsigned int buf_len,
> +			unsigned int block_id, unsigned int *bytes_returned)
> +{
> +	if (!hvpci_block_ops.read_block)
> +		return -EOPNOTSUPP;
> +
> +	return hvpci_block_ops.read_block(dev, buf, buf_len, block_id,
> +					  bytes_returned);
> +}
> +EXPORT_SYMBOL(hyperv_read_cfg_blk);
> +
> +int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len,
> +			 unsigned int block_id)
> +{
> +	if (!hvpci_block_ops.write_block)
> +		return -EOPNOTSUPP;
> +
> +	return hvpci_block_ops.write_block(dev, buf, len, block_id);
> +}
> +EXPORT_SYMBOL(hyperv_write_cfg_blk);
> +
> +int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
> +				void (*block_invalidate)(void *context,
> +							 u64 block_mask))
> +{
> +	if (!hvpci_block_ops.reg_blk_invalidate)
> +		return -EOPNOTSUPP;
> +
> +	return hvpci_block_ops.reg_blk_invalidate(dev, context,
> +						  block_invalidate);
> +}
> +EXPORT_SYMBOL(hyperv_reg_block_invalidate);
> +
> +static void __exit exit_hv_pci_mini(void)
> +{
> +	pr_info("unloaded\n");
> +}
> +
> +static int __init init_hv_pci_mini(void)
> +{
> +	pr_info("loaded\n");
> +
> +	return 0;
> +}
> +
> +module_init(init_hv_pci_mini);
> +module_exit(exit_hv_pci_mini);
> +
> +MODULE_DESCRIPTION("Hyper-V PCI Mini");
> +MODULE_LICENSE("GPL v2");
> diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
> index 57adeca..9c93ac2 100644
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -983,7 +983,6 @@ int hv_read_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
>  	*bytes_returned = comp_pkt.bytes_returned;
>  	return 0;
>  }
> -EXPORT_SYMBOL(hv_read_config_block);
>  
>  /**
>   * hv_pci_write_config_compl() - Invoked when a response packet for a write
> @@ -1070,7 +1069,6 @@ int hv_write_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
>  
>  	return 0;
>  }
> -EXPORT_SYMBOL(hv_write_config_block);
>  
>  /**
>   * hv_register_block_invalidate() - Invoked when a config block invalidation
> @@ -1101,7 +1099,6 @@ int hv_register_block_invalidate(struct pci_dev *pdev, void *context,
>  	return 0;
>  
>  }
> -EXPORT_SYMBOL(hv_register_block_invalidate);
>  
>  /* Interrupt management hooks */
>  static void hv_int_desc_free(struct hv_pci_dev *hpdev,
> @@ -3045,10 +3042,19 @@ static int hv_pci_remove(struct hv_device *hdev)
>  static void __exit exit_hv_pci_drv(void)
>  {
>  	vmbus_driver_unregister(&hv_pci_drv);
> +
> +	hvpci_block_ops.read_block = NULL;
> +	hvpci_block_ops.write_block = NULL;
> +	hvpci_block_ops.reg_blk_invalidate = NULL;
>  }
>  
>  static int __init init_hv_pci_drv(void)
>  {
> +	/* Initialize PCI block r/w interface */
> +	hvpci_block_ops.read_block = hv_read_config_block;
> +	hvpci_block_ops.write_block = hv_write_config_block;
> +	hvpci_block_ops.reg_blk_invalidate = hv_register_block_invalidate;
> +
>  	return vmbus_driver_register(&hv_pci_drv);
>  }
>  
> diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
> index 9d37f8c..2afe6fd 100644
> --- a/include/linux/hyperv.h
> +++ b/include/linux/hyperv.h
> @@ -1579,18 +1579,32 @@ struct vmpacket_descriptor *
>  	    pkt = hv_pkt_iter_next(channel, pkt))
>  
>  /*
> - * Functions for passing data between SR-IOV PF and VF drivers.  The VF driver
> + * Interface for passing data between SR-IOV PF and VF drivers. The VF driver
>   * sends requests to read and write blocks. Each block must be 128 bytes or
>   * smaller. Optionally, the VF driver can register a callback function which
>   * will be invoked when the host says that one or more of the first 64 block
>   * IDs is "invalid" which means that the VF driver should reread them.
>   */
>  #define HV_CONFIG_BLOCK_SIZE_MAX 128
> -int hv_read_config_block(struct pci_dev *dev, void *buf, unsigned int buf_len,
> -			 unsigned int block_id, unsigned int *bytes_returned);
> -int hv_write_config_block(struct pci_dev *dev, void *buf, unsigned int len,
> -			  unsigned int block_id);
> -int hv_register_block_invalidate(struct pci_dev *dev, void *context,
> -				 void (*block_invalidate)(void *context,
> -							  u64 block_mask));
> +
> +int hyperv_read_cfg_blk(struct pci_dev *dev, void *buf, unsigned int buf_len,
> +			unsigned int block_id, unsigned int *bytes_returned);
> +int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len,
> +			 unsigned int block_id);
> +int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
> +				void (*block_invalidate)(void *context,
> +							 u64 block_mask));
> +
> +struct hyperv_pci_block_ops {
> +	int (*read_block)(struct pci_dev *dev, void *buf, unsigned int buf_len,
> +			  unsigned int block_id, unsigned int *bytes_returned);
> +	int (*write_block)(struct pci_dev *dev, void *buf, unsigned int len,
> +			   unsigned int block_id);
> +	int (*reg_blk_invalidate)(struct pci_dev *dev, void *context,
> +				  void (*block_invalidate)(void *context,
> +							   u64 block_mask));
> +};
> +
> +extern struct hyperv_pci_block_ops hvpci_block_ops;
> +
>  #endif /* _HYPERV_H */

-- 
Vitaly

^ permalink raw reply

* RE: [PATCH v6,1/2] PCI: hv: Detect and fix Hyper-V PCI domain number collision
From: Haiyang Zhang @ 2019-08-16 12:10 UTC (permalink / raw)
  To: Lorenzo Pieralisi
  Cc: sashal@kernel.org, bhelgaas@google.com,
	linux-hyperv@vger.kernel.org, linux-pci@vger.kernel.org,
	KY Srinivasan, Stephen Hemminger, olaf@aepfle.de, vkuznets,
	linux-kernel@vger.kernel.org
In-Reply-To: <20190816095208.GA23677@e121166-lin.cambridge.arm.com>



> -----Original Message-----
> From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> Sent: Friday, August 16, 2019 5:52 AM
> To: Haiyang Zhang <haiyangz@microsoft.com>
> Cc: sashal@kernel.org; bhelgaas@google.com; linux-
> hyperv@vger.kernel.org; linux-pci@vger.kernel.org; KY Srinivasan
> <kys@microsoft.com>; Stephen Hemminger <sthemmin@microsoft.com>;
> olaf@aepfle.de; vkuznets <vkuznets@redhat.com>; linux-
> kernel@vger.kernel.org
> Subject: Re: [PATCH v6,1/2] PCI: hv: Detect and fix Hyper-V PCI domain
> number collision
> 
> On Thu, Aug 15, 2019 at 05:01:37PM +0000, Haiyang Zhang wrote:
> > Currently in Azure cloud, for passthrough devices, the host sets the
> > device instance ID's bytes 8 - 15 to a value derived from the host
> > HWID, which is the same on all devices in a VM. So, the device
> > instance ID's bytes 8 and 9 provided by the host are no longer unique.
> > This affects all Azure hosts since July 2018, and can cause device
> > passthrough to VMs to fail because the bytes 8 and 9 are used as PCI
> > domain number. Collision of domain numbers will cause the second
> > device with the same domain number fail to load.
> >
> > In the cases of collision, we will detect and find another number that
> > is not in use.
> >
> > Suggested-by: Michael Kelley <mikelley@microsoft.com>
> > Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> > Acked-by: Sasha Levin <sashal@kernel.org>
> 
> I assume you will take care of backporting and sending this patch to stable
> kernels given that you have not applied any tag with such request.
> 
> I appreciate it may not be easy to define but a Fixes: tag would help.

Sure, I will add a Fixes tag, and Cc stable. Usually Sasha from our team will
do the stable porting in batches.

Thanks,
- Haiyang

^ permalink raw reply

* Re: [PATCH v6,1/2] PCI: hv: Detect and fix Hyper-V PCI domain number collision
From: Lorenzo Pieralisi @ 2019-08-16  9:52 UTC (permalink / raw)
  To: Haiyang Zhang
  Cc: sashal@kernel.org, bhelgaas@google.com,
	linux-hyperv@vger.kernel.org, linux-pci@vger.kernel.org,
	KY Srinivasan, Stephen Hemminger, olaf@aepfle.de, vkuznets,
	linux-kernel@vger.kernel.org
In-Reply-To: <1565888460-38694-1-git-send-email-haiyangz@microsoft.com>

On Thu, Aug 15, 2019 at 05:01:37PM +0000, Haiyang Zhang wrote:
> Currently in Azure cloud, for passthrough devices, the host sets the device
> instance ID's bytes 8 - 15 to a value derived from the host HWID, which is
> the same on all devices in a VM. So, the device instance ID's bytes 8 and 9
> provided by the host are no longer unique. This affects all Azure hosts
> since July 2018, and can cause device passthrough to VMs to fail because
> the bytes 8 and 9 are used as PCI domain number. Collision of domain
> numbers will cause the second device with the same domain number fail to
> load.
> 
> In the cases of collision, we will detect and find another number that is
> not in use.
> 
> Suggested-by: Michael Kelley <mikelley@microsoft.com>
> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> Acked-by: Sasha Levin <sashal@kernel.org>

I assume you will take care of backporting and sending this patch
to stable kernels given that you have not applied any tag with
such request.

I appreciate it may not be easy to define but a Fixes: tag would help.

Thanks,
Lorenzo

> ---
>  drivers/pci/controller/pci-hyperv.c | 92 +++++++++++++++++++++++++++++++------
>  1 file changed, 79 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
> index 40b6254..31b8fd5 100644
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -2510,6 +2510,48 @@ static void put_hvpcibus(struct hv_pcibus_device *hbus)
>  		complete(&hbus->remove_event);
>  }
>  
> +#define HVPCI_DOM_MAP_SIZE (64 * 1024)
> +static DECLARE_BITMAP(hvpci_dom_map, HVPCI_DOM_MAP_SIZE);
> +
> +/*
> + * PCI domain number 0 is used by emulated devices on Gen1 VMs, so define 0
> + * as invalid for passthrough PCI devices of this driver.
> + */
> +#define HVPCI_DOM_INVALID 0
> +
> +/**
> + * hv_get_dom_num() - Get a valid PCI domain number
> + * Check if the PCI domain number is in use, and return another number if
> + * it is in use.
> + *
> + * @dom: Requested domain number
> + *
> + * return: domain number on success, HVPCI_DOM_INVALID on failure
> + */
> +static u16 hv_get_dom_num(u16 dom)
> +{
> +	unsigned int i;
> +
> +	if (test_and_set_bit(dom, hvpci_dom_map) == 0)
> +		return dom;
> +
> +	for_each_clear_bit(i, hvpci_dom_map, HVPCI_DOM_MAP_SIZE) {
> +		if (test_and_set_bit(i, hvpci_dom_map) == 0)
> +			return i;
> +	}
> +
> +	return HVPCI_DOM_INVALID;
> +}
> +
> +/**
> + * hv_put_dom_num() - Mark the PCI domain number as free
> + * @dom: Domain number to be freed
> + */
> +static void hv_put_dom_num(u16 dom)
> +{
> +	clear_bit(dom, hvpci_dom_map);
> +}
> +
>  /**
>   * hv_pci_probe() - New VMBus channel probe, for a root PCI bus
>   * @hdev:	VMBus's tracking struct for this root PCI bus
> @@ -2521,6 +2563,7 @@ static int hv_pci_probe(struct hv_device *hdev,
>  			const struct hv_vmbus_device_id *dev_id)
>  {
>  	struct hv_pcibus_device *hbus;
> +	u16 dom_req, dom;
>  	int ret;
>  
>  	/*
> @@ -2535,19 +2578,34 @@ static int hv_pci_probe(struct hv_device *hdev,
>  	hbus->state = hv_pcibus_init;
>  
>  	/*
> -	 * The PCI bus "domain" is what is called "segment" in ACPI and
> -	 * other specs.  Pull it from the instance ID, to get something
> -	 * unique.  Bytes 8 and 9 are what is used in Windows guests, so
> -	 * do the same thing for consistency.  Note that, since this code
> -	 * only runs in a Hyper-V VM, Hyper-V can (and does) guarantee
> -	 * that (1) the only domain in use for something that looks like
> -	 * a physical PCI bus (which is actually emulated by the
> -	 * hypervisor) is domain 0 and (2) there will be no overlap
> -	 * between domains derived from these instance IDs in the same
> -	 * VM.
> +	 * The PCI bus "domain" is what is called "segment" in ACPI and other
> +	 * specs. Pull it from the instance ID, to get something usually
> +	 * unique. In rare cases of collision, we will find out another number
> +	 * not in use.
> +	 *
> +	 * Note that, since this code only runs in a Hyper-V VM, Hyper-V
> +	 * together with this guest driver can guarantee that (1) The only
> +	 * domain used by Gen1 VMs for something that looks like a physical
> +	 * PCI bus (which is actually emulated by the hypervisor) is domain 0.
> +	 * (2) There will be no overlap between domains (after fixing possible
> +	 * collisions) in the same VM.
>  	 */
> -	hbus->sysdata.domain = hdev->dev_instance.b[9] |
> -			       hdev->dev_instance.b[8] << 8;
> +	dom_req = hdev->dev_instance.b[8] << 8 | hdev->dev_instance.b[9];
> +	dom = hv_get_dom_num(dom_req);
> +
> +	if (dom == HVPCI_DOM_INVALID) {
> +		dev_err(&hdev->device,
> +			"Unable to use dom# 0x%hx or other numbers", dom_req);
> +		ret = -EINVAL;
> +		goto free_bus;
> +	}
> +
> +	if (dom != dom_req)
> +		dev_info(&hdev->device,
> +			 "PCI dom# 0x%hx has collision, using 0x%hx",
> +			 dom_req, dom);
> +
> +	hbus->sysdata.domain = dom;
>  
>  	hbus->hdev = hdev;
>  	refcount_set(&hbus->remove_lock, 1);
> @@ -2562,7 +2620,7 @@ static int hv_pci_probe(struct hv_device *hdev,
>  					   hbus->sysdata.domain);
>  	if (!hbus->wq) {
>  		ret = -ENOMEM;
> -		goto free_bus;
> +		goto free_dom;
>  	}
>  
>  	ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
> @@ -2639,6 +2697,8 @@ static int hv_pci_probe(struct hv_device *hdev,
>  	vmbus_close(hdev->channel);
>  destroy_wq:
>  	destroy_workqueue(hbus->wq);
> +free_dom:
> +	hv_put_dom_num(hbus->sysdata.domain);
>  free_bus:
>  	free_page((unsigned long)hbus);
>  	return ret;
> @@ -2720,6 +2780,9 @@ static int hv_pci_remove(struct hv_device *hdev)
>  	put_hvpcibus(hbus);
>  	wait_for_completion(&hbus->remove_event);
>  	destroy_workqueue(hbus->wq);
> +
> +	hv_put_dom_num(hbus->sysdata.domain);
> +
>  	free_page((unsigned long)hbus);
>  	return 0;
>  }
> @@ -2747,6 +2810,9 @@ static void __exit exit_hv_pci_drv(void)
>  
>  static int __init init_hv_pci_drv(void)
>  {
> +	/* Set the invalid domain number's bit, so it will not be used */
> +	set_bit(HVPCI_DOM_INVALID, hvpci_dom_map);
> +
>  	return vmbus_driver_register(&hv_pci_drv);
>  }
>  
> -- 
> 1.8.3.1
> 

^ permalink raw reply

* [PATCH v6,2/2] PCI: hv: Use bytes 4 and 5 from instance ID as the PCI domain numbers
From: Haiyang Zhang @ 2019-08-15 17:01 UTC (permalink / raw)
  To: sashal@kernel.org, bhelgaas@google.com, lorenzo.pieralisi@arm.com,
	linux-hyperv@vger.kernel.org, linux-pci@vger.kernel.org
  Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger, olaf@aepfle.de,
	vkuznets, linux-kernel@vger.kernel.org
In-Reply-To: <1565888460-38694-1-git-send-email-haiyangz@microsoft.com>

As recommended by Azure host team, the bytes 4, 5 have more uniqueness
(info entropy) than bytes 8, 9. So now we use bytes 4, 5 as the PCI domain
numbers. On older hosts, bytes 4, 5 can also be used -- no backward
compatibility issues here. The chance of collision is greatly reduced.

In the rare cases of collision, the driver code detects and finds another
number that is not in use.

Suggested-by: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Acked-by: Sasha Levin <sashal@kernel.org>
---
 drivers/pci/controller/pci-hyperv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 31b8fd5..4f3d97e 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -2590,7 +2590,7 @@ static int hv_pci_probe(struct hv_device *hdev,
 	 * (2) There will be no overlap between domains (after fixing possible
 	 * collisions) in the same VM.
 	 */
-	dom_req = hdev->dev_instance.b[8] << 8 | hdev->dev_instance.b[9];
+	dom_req = hdev->dev_instance.b[5] << 8 | hdev->dev_instance.b[4];
 	dom = hv_get_dom_num(dom_req);
 
 	if (dom == HVPCI_DOM_INVALID) {
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH v6,1/2] PCI: hv: Detect and fix Hyper-V PCI domain number collision
From: Haiyang Zhang @ 2019-08-15 17:01 UTC (permalink / raw)
  To: sashal@kernel.org, bhelgaas@google.com, lorenzo.pieralisi@arm.com,
	linux-hyperv@vger.kernel.org, linux-pci@vger.kernel.org
  Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger, olaf@aepfle.de,
	vkuznets, linux-kernel@vger.kernel.org

Currently in Azure cloud, for passthrough devices, the host sets the device
instance ID's bytes 8 - 15 to a value derived from the host HWID, which is
the same on all devices in a VM. So, the device instance ID's bytes 8 and 9
provided by the host are no longer unique. This affects all Azure hosts
since July 2018, and can cause device passthrough to VMs to fail because
the bytes 8 and 9 are used as PCI domain number. Collision of domain
numbers will cause the second device with the same domain number fail to
load.

In the cases of collision, we will detect and find another number that is
not in use.

Suggested-by: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Acked-by: Sasha Levin <sashal@kernel.org>
---
 drivers/pci/controller/pci-hyperv.c | 92 +++++++++++++++++++++++++++++++------
 1 file changed, 79 insertions(+), 13 deletions(-)

diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 40b6254..31b8fd5 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -2510,6 +2510,48 @@ static void put_hvpcibus(struct hv_pcibus_device *hbus)
 		complete(&hbus->remove_event);
 }
 
+#define HVPCI_DOM_MAP_SIZE (64 * 1024)
+static DECLARE_BITMAP(hvpci_dom_map, HVPCI_DOM_MAP_SIZE);
+
+/*
+ * PCI domain number 0 is used by emulated devices on Gen1 VMs, so define 0
+ * as invalid for passthrough PCI devices of this driver.
+ */
+#define HVPCI_DOM_INVALID 0
+
+/**
+ * hv_get_dom_num() - Get a valid PCI domain number
+ * Check if the PCI domain number is in use, and return another number if
+ * it is in use.
+ *
+ * @dom: Requested domain number
+ *
+ * return: domain number on success, HVPCI_DOM_INVALID on failure
+ */
+static u16 hv_get_dom_num(u16 dom)
+{
+	unsigned int i;
+
+	if (test_and_set_bit(dom, hvpci_dom_map) == 0)
+		return dom;
+
+	for_each_clear_bit(i, hvpci_dom_map, HVPCI_DOM_MAP_SIZE) {
+		if (test_and_set_bit(i, hvpci_dom_map) == 0)
+			return i;
+	}
+
+	return HVPCI_DOM_INVALID;
+}
+
+/**
+ * hv_put_dom_num() - Mark the PCI domain number as free
+ * @dom: Domain number to be freed
+ */
+static void hv_put_dom_num(u16 dom)
+{
+	clear_bit(dom, hvpci_dom_map);
+}
+
 /**
  * hv_pci_probe() - New VMBus channel probe, for a root PCI bus
  * @hdev:	VMBus's tracking struct for this root PCI bus
@@ -2521,6 +2563,7 @@ static int hv_pci_probe(struct hv_device *hdev,
 			const struct hv_vmbus_device_id *dev_id)
 {
 	struct hv_pcibus_device *hbus;
+	u16 dom_req, dom;
 	int ret;
 
 	/*
@@ -2535,19 +2578,34 @@ static int hv_pci_probe(struct hv_device *hdev,
 	hbus->state = hv_pcibus_init;
 
 	/*
-	 * The PCI bus "domain" is what is called "segment" in ACPI and
-	 * other specs.  Pull it from the instance ID, to get something
-	 * unique.  Bytes 8 and 9 are what is used in Windows guests, so
-	 * do the same thing for consistency.  Note that, since this code
-	 * only runs in a Hyper-V VM, Hyper-V can (and does) guarantee
-	 * that (1) the only domain in use for something that looks like
-	 * a physical PCI bus (which is actually emulated by the
-	 * hypervisor) is domain 0 and (2) there will be no overlap
-	 * between domains derived from these instance IDs in the same
-	 * VM.
+	 * The PCI bus "domain" is what is called "segment" in ACPI and other
+	 * specs. Pull it from the instance ID, to get something usually
+	 * unique. In rare cases of collision, we will find out another number
+	 * not in use.
+	 *
+	 * Note that, since this code only runs in a Hyper-V VM, Hyper-V
+	 * together with this guest driver can guarantee that (1) The only
+	 * domain used by Gen1 VMs for something that looks like a physical
+	 * PCI bus (which is actually emulated by the hypervisor) is domain 0.
+	 * (2) There will be no overlap between domains (after fixing possible
+	 * collisions) in the same VM.
 	 */
-	hbus->sysdata.domain = hdev->dev_instance.b[9] |
-			       hdev->dev_instance.b[8] << 8;
+	dom_req = hdev->dev_instance.b[8] << 8 | hdev->dev_instance.b[9];
+	dom = hv_get_dom_num(dom_req);
+
+	if (dom == HVPCI_DOM_INVALID) {
+		dev_err(&hdev->device,
+			"Unable to use dom# 0x%hx or other numbers", dom_req);
+		ret = -EINVAL;
+		goto free_bus;
+	}
+
+	if (dom != dom_req)
+		dev_info(&hdev->device,
+			 "PCI dom# 0x%hx has collision, using 0x%hx",
+			 dom_req, dom);
+
+	hbus->sysdata.domain = dom;
 
 	hbus->hdev = hdev;
 	refcount_set(&hbus->remove_lock, 1);
@@ -2562,7 +2620,7 @@ static int hv_pci_probe(struct hv_device *hdev,
 					   hbus->sysdata.domain);
 	if (!hbus->wq) {
 		ret = -ENOMEM;
-		goto free_bus;
+		goto free_dom;
 	}
 
 	ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
@@ -2639,6 +2697,8 @@ static int hv_pci_probe(struct hv_device *hdev,
 	vmbus_close(hdev->channel);
 destroy_wq:
 	destroy_workqueue(hbus->wq);
+free_dom:
+	hv_put_dom_num(hbus->sysdata.domain);
 free_bus:
 	free_page((unsigned long)hbus);
 	return ret;
@@ -2720,6 +2780,9 @@ static int hv_pci_remove(struct hv_device *hdev)
 	put_hvpcibus(hbus);
 	wait_for_completion(&hbus->remove_event);
 	destroy_workqueue(hbus->wq);
+
+	hv_put_dom_num(hbus->sysdata.domain);
+
 	free_page((unsigned long)hbus);
 	return 0;
 }
@@ -2747,6 +2810,9 @@ static void __exit exit_hv_pci_drv(void)
 
 static int __init init_hv_pci_drv(void)
 {
+	/* Set the invalid domain number's bit, so it will not be used */
+	set_bit(HVPCI_DOM_INVALID, hvpci_dom_map);
+
 	return vmbus_driver_register(&hv_pci_drv);
 }
 
-- 
1.8.3.1


^ permalink raw reply related

* RE: [PATCH v5,1/2] PCI: hv: Detect and fix Hyper-V PCI domain number collision
From: Haiyang Zhang @ 2019-08-15 16:55 UTC (permalink / raw)
  To: Lorenzo Pieralisi
  Cc: sashal@kernel.org, bhelgaas@google.com,
	linux-hyperv@vger.kernel.org, linux-pci@vger.kernel.org,
	KY Srinivasan, Stephen Hemminger, olaf@aepfle.de, vkuznets,
	linux-kernel@vger.kernel.org
In-Reply-To: <20190815160908.GA29157@e121166-lin.cambridge.arm.com>



> -----Original Message-----
> From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> Sent: Thursday, August 15, 2019 12:11 PM
> To: Haiyang Zhang <haiyangz@microsoft.com>
> Cc: sashal@kernel.org; bhelgaas@google.com; linux-
> hyperv@vger.kernel.org; linux-pci@vger.kernel.org; KY Srinivasan
> <kys@microsoft.com>; Stephen Hemminger <sthemmin@microsoft.com>;
> olaf@aepfle.de; vkuznets <vkuznets@redhat.com>; linux-
> kernel@vger.kernel.org
> Subject: Re: [PATCH v5,1/2] PCI: hv: Detect and fix Hyper-V PCI domain
> number collision
> 
> On Wed, Aug 14, 2019 at 03:52:15PM +0000, Haiyang Zhang wrote:
> > Currently in Azure cloud, for passthrough devices, the host sets the device
> > instance ID's bytes 8 - 15 to a value derived from the host HWID, which is
> > the same on all devices in a VM. So, the device instance ID's bytes 8 and 9
> > provided by the host are no longer unique. This affects all Azure hosts
> > since last year, and can cause device passthrough to VMs to fail because
> 
> Bjorn already asked, can you be a bit more specific than "since last
> year" here please ?
> 
> It would be useful to understand when/how this became an issue.
The host change happens around July 2018. The Azure roll out takes
multi weeks, so there is no specific date. I will include the Month
Year in the log.

> 
> > the bytes 8 and 9 are used as PCI domain number. Collision of domain
> > numbers will cause the second device with the same domain number fail to
> > load.
> >
> > In the cases of collision, we will detect and find another number that is
> > not in use.
> >
> > Suggested-by: Michael Kelley <mikelley@microsoft.com>
> > Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> > Acked-by: Sasha Levin <sashal@kernel.org>
> > ---
> >  drivers/pci/controller/pci-hyperv.c | 92
> +++++++++++++++++++++++++++++++------
> >  1 file changed, 79 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-
> hyperv.c
> > index 40b6254..31b8fd5 100644
> > --- a/drivers/pci/controller/pci-hyperv.c
> > +++ b/drivers/pci/controller/pci-hyperv.c
> > @@ -2510,6 +2510,48 @@ static void put_hvpcibus(struct
> hv_pcibus_device *hbus)
> >  		complete(&hbus->remove_event);
> >  }
> >
> > +#define HVPCI_DOM_MAP_SIZE (64 * 1024)
> > +static DECLARE_BITMAP(hvpci_dom_map, HVPCI_DOM_MAP_SIZE);
> > +
> > +/*
> > + * PCI domain number 0 is used by emulated devices on Gen1 VMs, so
> define 0
> > + * as invalid for passthrough PCI devices of this driver.
> > + */
> > +#define HVPCI_DOM_INVALID 0
> > +
> > +/**
> > + * hv_get_dom_num() - Get a valid PCI domain number
> > + * Check if the PCI domain number is in use, and return another number if
> > + * it is in use.
> > + *
> > + * @dom: Requested domain number
> > + *
> > + * return: domain number on success, HVPCI_DOM_INVALID on failure
> > + */
> > +static u16 hv_get_dom_num(u16 dom)
> > +{
> > +	unsigned int i;
> 
> > +
> > +	if (test_and_set_bit(dom, hvpci_dom_map) == 0)
> > +		return dom;
> > +
> > +	for_each_clear_bit(i, hvpci_dom_map, HVPCI_DOM_MAP_SIZE) {
> > +		if (test_and_set_bit(i, hvpci_dom_map) == 0)
> > +			return i;
> > +	}
> 
> Don't you need locking around code reading/updating hvpci_dom_map ?

If the bit changes after for_each_clear_bit() considers it as a "clear bit" - the
test_and_set_bit() does test&set in an atomic operation - the return value
will be 1 instead of 0. Then the loop will continue to the next clear bit, until
the test_and_set_bit() is successful. So no locking is necessary here.

Thanks,
- Haiyang


^ permalink raw reply

* Re: [PATCH v5,1/2] PCI: hv: Detect and fix Hyper-V PCI domain number collision
From: Lorenzo Pieralisi @ 2019-08-15 16:10 UTC (permalink / raw)
  To: Haiyang Zhang
  Cc: sashal@kernel.org, bhelgaas@google.com,
	linux-hyperv@vger.kernel.org, linux-pci@vger.kernel.org,
	KY Srinivasan, Stephen Hemminger, olaf@aepfle.de, vkuznets,
	linux-kernel@vger.kernel.org
In-Reply-To: <1565797908-5970-1-git-send-email-haiyangz@microsoft.com>

On Wed, Aug 14, 2019 at 03:52:15PM +0000, Haiyang Zhang wrote:
> Currently in Azure cloud, for passthrough devices, the host sets the device
> instance ID's bytes 8 - 15 to a value derived from the host HWID, which is
> the same on all devices in a VM. So, the device instance ID's bytes 8 and 9
> provided by the host are no longer unique. This affects all Azure hosts
> since last year, and can cause device passthrough to VMs to fail because

Bjorn already asked, can you be a bit more specific than "since last
year" here please ?

It would be useful to understand when/how this became an issue.

> the bytes 8 and 9 are used as PCI domain number. Collision of domain
> numbers will cause the second device with the same domain number fail to
> load.
> 
> In the cases of collision, we will detect and find another number that is
> not in use.
> 
> Suggested-by: Michael Kelley <mikelley@microsoft.com>
> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> Acked-by: Sasha Levin <sashal@kernel.org>
> ---
>  drivers/pci/controller/pci-hyperv.c | 92 +++++++++++++++++++++++++++++++------
>  1 file changed, 79 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
> index 40b6254..31b8fd5 100644
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -2510,6 +2510,48 @@ static void put_hvpcibus(struct hv_pcibus_device *hbus)
>  		complete(&hbus->remove_event);
>  }
>  
> +#define HVPCI_DOM_MAP_SIZE (64 * 1024)
> +static DECLARE_BITMAP(hvpci_dom_map, HVPCI_DOM_MAP_SIZE);
> +
> +/*
> + * PCI domain number 0 is used by emulated devices on Gen1 VMs, so define 0
> + * as invalid for passthrough PCI devices of this driver.
> + */
> +#define HVPCI_DOM_INVALID 0
> +
> +/**
> + * hv_get_dom_num() - Get a valid PCI domain number
> + * Check if the PCI domain number is in use, and return another number if
> + * it is in use.
> + *
> + * @dom: Requested domain number
> + *
> + * return: domain number on success, HVPCI_DOM_INVALID on failure
> + */
> +static u16 hv_get_dom_num(u16 dom)
> +{
> +	unsigned int i;

> +
> +	if (test_and_set_bit(dom, hvpci_dom_map) == 0)
> +		return dom;
> +
> +	for_each_clear_bit(i, hvpci_dom_map, HVPCI_DOM_MAP_SIZE) {
> +		if (test_and_set_bit(i, hvpci_dom_map) == 0)
> +			return i;
> +	}

Don't you need locking around code reading/updating hvpci_dom_map ?

Thanks,
Lorenzo

> +
> +	return HVPCI_DOM_INVALID;
> +}
> +
> +/**
> + * hv_put_dom_num() - Mark the PCI domain number as free
> + * @dom: Domain number to be freed
> + */
> +static void hv_put_dom_num(u16 dom)
> +{
> +	clear_bit(dom, hvpci_dom_map);
> +}
> +
>  /**
>   * hv_pci_probe() - New VMBus channel probe, for a root PCI bus
>   * @hdev:	VMBus's tracking struct for this root PCI bus
> @@ -2521,6 +2563,7 @@ static int hv_pci_probe(struct hv_device *hdev,
>  			const struct hv_vmbus_device_id *dev_id)
>  {
>  	struct hv_pcibus_device *hbus;
> +	u16 dom_req, dom;
>  	int ret;
>  
>  	/*
> @@ -2535,19 +2578,34 @@ static int hv_pci_probe(struct hv_device *hdev,
>  	hbus->state = hv_pcibus_init;
>  
>  	/*
> -	 * The PCI bus "domain" is what is called "segment" in ACPI and
> -	 * other specs.  Pull it from the instance ID, to get something
> -	 * unique.  Bytes 8 and 9 are what is used in Windows guests, so
> -	 * do the same thing for consistency.  Note that, since this code
> -	 * only runs in a Hyper-V VM, Hyper-V can (and does) guarantee
> -	 * that (1) the only domain in use for something that looks like
> -	 * a physical PCI bus (which is actually emulated by the
> -	 * hypervisor) is domain 0 and (2) there will be no overlap
> -	 * between domains derived from these instance IDs in the same
> -	 * VM.
> +	 * The PCI bus "domain" is what is called "segment" in ACPI and other
> +	 * specs. Pull it from the instance ID, to get something usually
> +	 * unique. In rare cases of collision, we will find out another number
> +	 * not in use.
> +	 *
> +	 * Note that, since this code only runs in a Hyper-V VM, Hyper-V
> +	 * together with this guest driver can guarantee that (1) The only
> +	 * domain used by Gen1 VMs for something that looks like a physical
> +	 * PCI bus (which is actually emulated by the hypervisor) is domain 0.
> +	 * (2) There will be no overlap between domains (after fixing possible
> +	 * collisions) in the same VM.
>  	 */
> -	hbus->sysdata.domain = hdev->dev_instance.b[9] |
> -			       hdev->dev_instance.b[8] << 8;
> +	dom_req = hdev->dev_instance.b[8] << 8 | hdev->dev_instance.b[9];
> +	dom = hv_get_dom_num(dom_req);
> +
> +	if (dom == HVPCI_DOM_INVALID) {
> +		dev_err(&hdev->device,
> +			"Unable to use dom# 0x%hx or other numbers", dom_req);
> +		ret = -EINVAL;
> +		goto free_bus;
> +	}
> +
> +	if (dom != dom_req)
> +		dev_info(&hdev->device,
> +			 "PCI dom# 0x%hx has collision, using 0x%hx",
> +			 dom_req, dom);
> +
> +	hbus->sysdata.domain = dom;
>  
>  	hbus->hdev = hdev;
>  	refcount_set(&hbus->remove_lock, 1);
> @@ -2562,7 +2620,7 @@ static int hv_pci_probe(struct hv_device *hdev,
>  					   hbus->sysdata.domain);
>  	if (!hbus->wq) {
>  		ret = -ENOMEM;
> -		goto free_bus;
> +		goto free_dom;
>  	}
>  
>  	ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
> @@ -2639,6 +2697,8 @@ static int hv_pci_probe(struct hv_device *hdev,
>  	vmbus_close(hdev->channel);
>  destroy_wq:
>  	destroy_workqueue(hbus->wq);
> +free_dom:
> +	hv_put_dom_num(hbus->sysdata.domain);
>  free_bus:
>  	free_page((unsigned long)hbus);
>  	return ret;
> @@ -2720,6 +2780,9 @@ static int hv_pci_remove(struct hv_device *hdev)
>  	put_hvpcibus(hbus);
>  	wait_for_completion(&hbus->remove_event);
>  	destroy_workqueue(hbus->wq);
> +
> +	hv_put_dom_num(hbus->sysdata.domain);
> +
>  	free_page((unsigned long)hbus);
>  	return 0;
>  }
> @@ -2747,6 +2810,9 @@ static void __exit exit_hv_pci_drv(void)
>  
>  static int __init init_hv_pci_drv(void)
>  {
> +	/* Set the invalid domain number's bit, so it will not be used */
> +	set_bit(HVPCI_DOM_INVALID, hvpci_dom_map);
> +
>  	return vmbus_driver_register(&hv_pci_drv);
>  }
>  
> -- 
> 1.8.3.1
> 

^ permalink raw reply

* Re: [PATCH V2 3/3] KVM/Hyper-V/VMX: Add direct tlb flush support
From: Tianyu Lan @ 2019-08-15 12:24 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: KY Srinivasan, Haiyang Zhang, Stephen Hemminger, Sasha Levin,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, H. Peter Anvin,
	the arch/x86 maintainers, Radim Krcmar, michael.h.kelley,
	Vitaly Kuznetsov, linux-hyperv, linux-kernel@vger kernel org, kvm,
	Tianyu Lan
In-Reply-To: <1a1410a7-e2dc-904e-a271-3e2017d42bae@redhat.com>

Hi Paolo:
          Thanks for your review.

On Wed, Aug 14, 2019 at 9:33 PM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 14/08/19 09:34, lantianyu1986@gmail.com wrote:
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index c5da875f19e3..479ad76661e6 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -500,6 +500,7 @@ struct kvm {
> >       struct srcu_struct srcu;
> >       struct srcu_struct irq_srcu;
> >       pid_t userspace_pid;
> > +     struct hv_partition_assist_pg *hv_pa_pg;
> >  };
> >
> >  #define kvm_err(fmt, ...) \
>
> This does not exist on non-x86 architectures.  Please move it to struct
> kvm_arch.
>
Nice catch. Will update in the next version. Thanks.
-- 
Best regards
Tianyu Lan

^ permalink raw reply

* Re: [PATCH net-next, 5/6] net/mlx5: Add HV VHCA control agent
From: Eran Ben Elisha @ 2019-08-15 11:35 UTC (permalink / raw)
  To: Mark Bloch, haiyangz, sashal@kernel.org, davem@davemloft.net,
	Saeed Mahameed, leon@kernel.org, lorenzo.pieralisi@arm.com,
	bhelgaas@google.com, linux-pci@vger.kernel.org,
	linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
  Cc: kys, Stephen Hemminger, linux-kernel@vger.kernel.org
In-Reply-To: <745f663e-0c56-84d0-a02b-106f788e3e8f@mellanox.com>



On 8/14/2019 11:41 PM, Mark Bloch wrote:
> 
> 
> On 8/14/19 12:09 PM, Haiyang Zhang wrote:
>> From: Eran Ben Elisha <eranbe@mellanox.com>
>>
>> Control agent is responsible over of the control block (ID 0). It should
>> update the PF via this block about every capability change. In addition,
>> upon block 0 invalidate, it should activate all other supported agents
>> with data requests from the PF.
>>
>> Upon agent create/destroy, the invalidate callback of the control agent
>> is being called in order to update the PF driver about this change.
>>
>> The control agent is an integral part of HV VHCA and will be created
>> and destroy as part of the HV VHCA init/cleanup flow.
>>
>> Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
>> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
>> ---
>>   .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c  | 122 ++++++++++++++++++++-
>>   .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h  |   1 +
>>   2 files changed, 121 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
>> index b2eebdf..3c7fffa 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
>> @@ -110,22 +110,131 @@ void mlx5_hv_vhca_invalidate(void *context, u64 block_mask)
>>   	queue_work(hv_vhca->work_queue, &work->invalidate_work);
>>   }
>>   
>> +#define AGENT_MASK(type) (type ? BIT(type - 1) : 0 /* control */)
>> +
>> +static void mlx5_hv_vhca_agents_control(struct mlx5_hv_vhca *hv_vhca,
>> +					struct mlx5_hv_vhca_control_block *block)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
>> +		struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
>> +
>> +		if (!agent || !agent->control)
>> +			continue;
>> +
>> +		if (!(AGENT_MASK(agent->type) & block->control))
>> +			continue;
>> +
>> +		agent->control(agent, block);
>> +	}
>> +}
>> +
>> +static void mlx5_hv_vhca_capabilities(struct mlx5_hv_vhca *hv_vhca,
>> +				      u32 *capabilities)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
>> +		struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
>> +
>> +		if (agent)
>> +			*capabilities |= AGENT_MASK(agent->type);
>> +	}
>> +}
>> +
>> +static void
>> +mlx5_hv_vhca_control_agent_invalidate(struct mlx5_hv_vhca_agent *agent,
>> +				      u64 block_mask)
>> +{
>> +	struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
>> +	struct mlx5_core_dev *dev = hv_vhca->dev;
>> +	struct mlx5_hv_vhca_control_block *block;
>> +	u32 capabilities = 0;
>> +	int err;
>> +
>> +	block = kzalloc(sizeof(*block), GFP_KERNEL);
>> +	if (!block)
>> +		return;
>> +
>> +	err = mlx5_hv_read_config(dev, block, sizeof(*block), 0);
>> +	if (err)
>> +		goto free_block;
>> +
>> +	mlx5_hv_vhca_capabilities(hv_vhca, &capabilities);
>> +
>> +	/* In case no capabilities, send empty block in return */
>> +	if (!capabilities) {
>> +		memset(block, 0, sizeof(*block));
>> +		goto write;
>> +	}
>> +
>> +	if (block->capabilities != capabilities)
>> +		block->capabilities = capabilities;
>> +
>> +	if (block->control & ~capabilities)
>> +		goto free_block;
>> +
>> +	mlx5_hv_vhca_agents_control(hv_vhca, block);
>> +	block->command_ack = block->command;
>> +
>> +write:
>> +	mlx5_hv_write_config(dev, block, sizeof(*block), 0);
>> +
>> +free_block:
>> +	kfree(block);
>> +}
>> +
>> +static struct mlx5_hv_vhca_agent *
>> +mlx5_hv_vhca_control_agent_create(struct mlx5_hv_vhca *hv_vhca)
>> +{
>> +	return mlx5_hv_vhca_agent_create(hv_vhca, MLX5_HV_VHCA_AGENT_CONTROL,
>> +					 NULL,
>> +					 mlx5_hv_vhca_control_agent_invalidate,
>> +					 NULL, NULL);
>> +}
>> +
>> +static void mlx5_hv_vhca_control_agent_destroy(struct mlx5_hv_vhca_agent *agent)
>> +{
>> +	mlx5_hv_vhca_agent_destroy(agent);
>> +}
>> +
>>   int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
>>   {
>> +	struct mlx5_hv_vhca_agent *agent;
>> +	int err;
>> +
>>   	if (IS_ERR_OR_NULL(hv_vhca))
>>   		return IS_ERR_OR_NULL(hv_vhca);
>>   
>> -	return mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
>> -					   mlx5_hv_vhca_invalidate);
>> +	err = mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
>> +					  mlx5_hv_vhca_invalidate);
>> +	if (err)
>> +		return err;
>> +
>> +	agent = mlx5_hv_vhca_control_agent_create(hv_vhca);
>> +	if (IS_ERR_OR_NULL(agent)) {
>> +		mlx5_hv_unregister_invalidate(hv_vhca->dev);
>> +		return IS_ERR_OR_NULL(agent);
>> +	}
>> +
>> +	hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL] = agent;
>> +
>> +	return 0;
>>   }
>>   
>>   void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
>>   {
>> +	struct mlx5_hv_vhca_agent *agent;
>>   	int i;
>>   
>>   	if (IS_ERR_OR_NULL(hv_vhca))
>>   		return;
>>   
>> +	agent = hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL];
>> +	if (!IS_ERR_OR_NULL(agent))
>> +		mlx5_hv_vhca_control_agent_destroy(agent);
> 
> Can the agent be err ptr here?

Only NULL, will fix.

> 
>> +
>>   	mutex_lock(&hv_vhca->agents_lock);
>>   	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++)
>>   		WARN_ON(hv_vhca->agents[i]);
> 
> With the comment above in mind, here you check only for not null

Comment above was right... after fixing it, all is aligned here.

> 
>> @@ -135,6 +244,11 @@ void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
>>   	mlx5_hv_unregister_invalidate(hv_vhca->dev);
>>   }
>>   
>> +static void mlx5_hv_vhca_agents_update(struct mlx5_hv_vhca *hv_vhca)
>> +{
>> +	mlx5_hv_vhca_invalidate(hv_vhca, BIT(MLX5_HV_VHCA_AGENT_CONTROL));
>> +}
>> +
>>   struct mlx5_hv_vhca_agent *
>>   mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
>>   			  enum mlx5_hv_vhca_agent_type type,
>> @@ -168,6 +282,8 @@ struct mlx5_hv_vhca_agent *
>>   	hv_vhca->agents[type] = agent;
>>   	mutex_unlock(&hv_vhca->agents_lock);
>>   
>> +	mlx5_hv_vhca_agents_update(hv_vhca);
>> +
>>   	return agent;
>>   }
>>   
>> @@ -189,6 +305,8 @@ void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
>>   		agent->cleanup(agent);
>>   
>>   	kfree(agent);
>> +
>> +	mlx5_hv_vhca_agents_update(hv_vhca);
>>   }
>>   
>>   static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent,
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
>> index fa7ee85..6f4bfb1 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
>> @@ -12,6 +12,7 @@
>>   struct mlx5_hv_vhca_control_block;
>>   
>>   enum mlx5_hv_vhca_agent_type {
>> +	MLX5_HV_VHCA_AGENT_CONTROL = 0,
> 
> No need to start value

I find it more easy to read when having the value explicitly.
If you or Saeed has a strong opinion against it, this can be easily fixed.

> 
>>   	MLX5_HV_VHCA_AGENT_MAX = 32,
>>   };
>>   
>>
> 
> Mark
> 

^ permalink raw reply

* Re: [PATCH net-next, 4/6] net/mlx5: Add HV VHCA infrastructure
From: Eran Ben Elisha @ 2019-08-15 11:35 UTC (permalink / raw)
  To: Mark Bloch, haiyangz, sashal@kernel.org, davem@davemloft.net,
	Saeed Mahameed, leon@kernel.org, lorenzo.pieralisi@arm.com,
	bhelgaas@google.com, linux-pci@vger.kernel.org,
	linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
  Cc: kys, Stephen Hemminger, linux-kernel@vger.kernel.org
In-Reply-To: <65561e09-6fa5-b223-b547-36736c4a9d83@mellanox.com>



On 8/14/2019 11:41 PM, Mark Bloch wrote:
> 
> 
> On 8/14/19 12:08 PM, Haiyang Zhang wrote:
>> From: Eran Ben Elisha <eranbe@mellanox.com>
>>
>> HV VHCA is a layer which provides PF to VF communication channel based on
>> HyperV PCI config channel. It implements Mellanox's Inter VHCA control
>> communication protocol. The protocol contains control block in order to
>> pass messages between the PF and VF drivers, and data blocks in order to
>> pass actual data.
>>
>> The infrastructure is agent based. Each agent will be responsible of
>> contiguous buffer blocks in the VHCA config space. This infrastructure will
>> bind agents to their blocks, and those agents can only access read/write
>> the buffer blocks assigned to them. Each agent will provide three
>> callbacks (control, invalidate, cleanup). Control will be invoked when
>> block-0 is invalidated with a command that concerns this agent. Invalidate
>> callback will be invoked if one of the blocks assigned to this agent was
>> invalidated. Cleanup will be invoked before the agent is being freed in
>> order to clean all of its open resources or deferred works.
>>
>> Block-0 serves as the control block. All execution commands from the PF
>> will be written by the PF over this block. VF will ack on those by
>> writing on block-0 as well. Its format is described by struct
>> mlx5_hv_vhca_control_block layout.
>>
>> Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
>> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
>> ---
>>   drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
>>   .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c  | 247 +++++++++++++++++++++
>>   .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h  | 102 +++++++++
>>   drivers/net/ethernet/mellanox/mlx5/core/main.c     |   7 +
>>   include/linux/mlx5/driver.h                        |   2 +
>>   5 files changed, 359 insertions(+), 1 deletion(-)
>>   create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
>>   create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>> index a8950b1..e0a1056 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>> @@ -45,7 +45,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offlo
>>   mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
>>   mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
>>   mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
>> -mlx5_core-$(CONFIG_PCI_HYPERV_MINI)     += lib/hv.o
>> +mlx5_core-$(CONFIG_PCI_HYPERV_MINI)+= lib/hv.o lib/hv_vhca.o
>>   
>>   #
>>   # Ipoib netdev
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
>> new file mode 100644
>> index 0000000..b2eebdf
>> --- /dev/null
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
>> @@ -0,0 +1,247 @@
>> +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
>> +// Copyright (c) 2018 Mellanox Technologies
>> +
>> +#include <linux/hyperv.h>
>> +#include "mlx5_core.h"
>> +#include "lib/hv.h"
>> +#include "lib/hv_vhca.h"
>> +
>> +struct mlx5_hv_vhca {
>> +	struct mlx5_core_dev       *dev;
>> +	struct workqueue_struct    *work_queue;
>> +	struct mlx5_hv_vhca_agent  *agents[MLX5_HV_VHCA_AGENT_MAX];
>> +	struct mutex                agents_lock; /* Protect agents array */
>> +};
>> +
>> +struct mlx5_hv_vhca_work {
>> +	struct work_struct     invalidate_work;
>> +	struct mlx5_hv_vhca   *hv_vhca;
>> +	u64                    block_mask;
>> +};
>> +
>> +struct mlx5_hv_vhca_data_block {
>> +	u16     sequence;
>> +	u16     offset;
>> +	u8      reserved[4];
>> +	u64     data[15];
>> +};
>> +
>> +struct mlx5_hv_vhca_agent {
>> +	enum mlx5_hv_vhca_agent_type	 type;
>> +	struct mlx5_hv_vhca		*hv_vhca;
>> +	void				*priv;
>> +	int                              seq;
> Why is this int? and in data block is u16?

No good reason. Should be changed to u16.

> 
>> +	void (*control)(struct mlx5_hv_vhca_agent *agent,
>> +			struct mlx5_hv_vhca_control_block *block);
>> +	void (*invalidate)(struct mlx5_hv_vhca_agent *agent,
>> +			   u64 block_mask);
>> +	void (*cleanup)(struct mlx5_hv_vhca_agent *agent);
>> +};
>> +
>> +struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
>> +{
>> +	struct mlx5_hv_vhca *hv_vhca = NULL;
>> +
>> +	hv_vhca = kzalloc(sizeof(*hv_vhca), GFP_KERNEL);
>> +	if (!hv_vhca)
>> +		return ERR_PTR(-ENOMEM);
>> +
>> +	hv_vhca->work_queue = create_singlethread_workqueue("mlx5_hv_vhca");
>> +	if (!hv_vhca->work_queue) {
>> +		kfree(hv_vhca);
>> +		return ERR_PTR(-ENOMEM);
>> +	}
>> +
>> +	hv_vhca->dev = dev;
>> +	mutex_init(&hv_vhca->agents_lock);
>> +
>> +	return hv_vhca;
>> +}
>> +
>> +void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
>> +{
>> +	if (IS_ERR_OR_NULL(hv_vhca))
>> +		return;
>> +
>> +	flush_workqueue(hv_vhca->work_queue);
>> +	destroy_workqueue(hv_vhca->work_queue);
> 
> Why not just destroy?

Will fix.

> 
>> +	kfree(hv_vhca);
>> +}
>> +
>> +static void mlx5_hv_vhca_invalidate_work(struct work_struct *work)
>> +{
>> +	struct mlx5_hv_vhca_work *hwork;
>> +	struct mlx5_hv_vhca *hv_vhca;
>> +	int i;
>> +
>> +	hwork = container_of(work, struct mlx5_hv_vhca_work, invalidate_work);
>> +	hv_vhca = hwork->hv_vhca;
>> +
>> +	mutex_lock(&hv_vhca->agents_lock);
>> +	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
>> +		struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
>> +
>> +		if (!agent || !agent->invalidate)
>> +			continue;
>> +
>> +		if (!(BIT(agent->type) & hwork->block_mask))
>> +			continue;
>> +
>> +		agent->invalidate(agent, hwork->block_mask);
>> +	}
>> +	mutex_unlock(&hv_vhca->agents_lock);
>> +
>> +	kfree(hwork);
>> +}
>> +
>> +void mlx5_hv_vhca_invalidate(void *context, u64 block_mask)
>> +{
>> +	struct mlx5_hv_vhca *hv_vhca = (struct mlx5_hv_vhca *)context;
>> +	struct mlx5_hv_vhca_work *work;
>> +
>> +	work = kzalloc(sizeof(*work), GFP_ATOMIC);
>> +	if (!work)
>> +		return;
>> +
>> +	INIT_WORK(&work->invalidate_work, mlx5_hv_vhca_invalidate_work);
>> +	work->hv_vhca    = hv_vhca;
>> +	work->block_mask = block_mask;
>> +
>> +	queue_work(hv_vhca->work_queue, &work->invalidate_work);
>> +}
>> +
>> +int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
>> +{
>> +	if (IS_ERR_OR_NULL(hv_vhca))
>> +		return IS_ERR_OR_NULL(hv_vhca);
>> +
>> +	return mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
>> +					   mlx5_hv_vhca_invalidate);
>> +}
>> +
>> +void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
>> +{
>> +	int i;
>> +
>> +	if (IS_ERR_OR_NULL(hv_vhca))
>> +		return;
>> +
>> +	mutex_lock(&hv_vhca->agents_lock);
>> +	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++)
>> +		WARN_ON(hv_vhca->agents[i]);
>> +
>> +	mutex_unlock(&hv_vhca->agents_lock);
>> +
>> +	mlx5_hv_unregister_invalidate(hv_vhca->dev);
>> +}
>> +
>> +struct mlx5_hv_vhca_agent *
>> +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
>> +			  enum mlx5_hv_vhca_agent_type type,
>> +			  void (*control)(struct mlx5_hv_vhca_agent*,
>> +					  struct mlx5_hv_vhca_control_block *block),
>> +			  void (*invalidate)(struct mlx5_hv_vhca_agent*,
>> +					     u64 block_mask),
>> +			  void (*cleaup)(struct mlx5_hv_vhca_agent *agent),
>> +			  void *priv)
>> +{
>> +	struct mlx5_hv_vhca_agent *agent;
>> +
>> +	if (IS_ERR_OR_NULL(hv_vhca))
>> +		return ERR_PTR(-ENOMEM);
>> +
>> +	if (hv_vhca->agents[type])
>> +		return ERR_PTR(-EINVAL);> +
>> +	agent = kzalloc(sizeof(*agent), GFP_KERNEL);
>> +	if (!agent)
>> +		return ERR_PTR(-ENOMEM);
>> +
>> +	agent->type      = type;
>> +	agent->hv_vhca   = hv_vhca;
>> +	agent->priv      = priv;
>> +	agent->control   = control;
>> +	agent->invalidate = invalidate;
>> +	agent->cleanup   = cleaup;
>> +
>> +	mutex_lock(&hv_vhca->agents_lock);
>> +	hv_vhca->agents[type] = agent;
>> +	mutex_unlock(&hv_vhca->agents_lock);
> 
> You have a check for this not under a lock a few lines up,
> but assign under a lock?

good point, will add.

> 
> Mark
> 
>> +
>> +	return agent;
>> +}
>> +
>> +void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
>> +{
>> +	struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
>> +
>> +	mutex_lock(&hv_vhca->agents_lock);
>> +
>> +	if (WARN_ON(agent != hv_vhca->agents[agent->type])) {
>> +		mutex_unlock(&hv_vhca->agents_lock);
>> +		return;
>> +	}
>> +
>> +	hv_vhca->agents[agent->type] = NULL;
>> +	mutex_unlock(&hv_vhca->agents_lock);
>> +
>> +	if (agent->cleanup)
>> +		agent->cleanup(agent);
>> +
>> +	kfree(agent);
>> +}
>> +
>> +static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent,
>> +					   struct mlx5_hv_vhca_data_block *data_block,
>> +					   void *src, int len, int *offset)
>> +{
>> +	int bytes = min_t(int, (int)sizeof(data_block->data), len);
>> +
>> +	data_block->sequence = agent->seq;
>> +	data_block->offset   = (*offset)++;
>> +	memcpy(data_block->data, src, bytes);
>> +
>> +	return bytes;
>> +}
>> +
>> +static void mlx5_hv_vhca_agent_seq_update(struct mlx5_hv_vhca_agent *agent)
>> +{
>> +	agent->seq++;
>> +}
>> +
>> +int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
>> +			     void *buf, int len)
>> +{
>> +	int offset = agent->type * HV_CONFIG_BLOCK_SIZE_MAX;
>> +	int block_offset = 0;
>> +	int total = 0;
>> +	int err;
>> +
>> +	while (len) {
>> +		struct mlx5_hv_vhca_data_block data_block = {0};
>> +		int bytes;
>> +
>> +		bytes = mlx5_hv_vhca_data_block_prepare(agent, &data_block,
>> +							buf + total,
>> +							len, &block_offset);
>> +		if (!bytes)
>> +			return -ENOMEM;
>> +
>> +		err = mlx5_hv_write_config(agent->hv_vhca->dev, &data_block,
>> +					   sizeof(data_block), offset);
>> +		if (err)
>> +			return err;
>> +
>> +		total += bytes;
>> +		len   -= bytes;
>> +	}
>> +
>> +	mlx5_hv_vhca_agent_seq_update(agent);
>> +
>> +	return 0;
>> +}
>> +
>> +void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent)
>> +{
>> +	return agent->priv;
>> +}
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
>> new file mode 100644
>> index 0000000..fa7ee85
>> --- /dev/null
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
>> @@ -0,0 +1,102 @@
>> +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
>> +/* Copyright (c) 2019 Mellanox Technologies. */
>> +
>> +#ifndef __LIB_HV_VHCA_H__
>> +#define __LIB_HV_VHCA_H__
>> +
>> +#include "en.h"
>> +#include "lib/hv.h"
>> +
>> +struct mlx5_hv_vhca_agent;
>> +struct mlx5_hv_vhca;
>> +struct mlx5_hv_vhca_control_block;
>> +
>> +enum mlx5_hv_vhca_agent_type {
>> +	MLX5_HV_VHCA_AGENT_MAX = 32,
>> +};
>> +
>> +#if IS_ENABLED(CONFIG_PCI_HYPERV_MINI)
>> +
>> +struct mlx5_hv_vhca_control_block {
>> +	u32     capabilities;
>> +	u32     control;
>> +	u16     command;
>> +	u16     command_ack;
>> +	u16     version;
>> +	u16     rings;
>> +	u32     reserved1[28];
>> +};
>> +
>> +struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev);
>> +void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca);
>> +int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca);
>> +void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca);
>> +void mlx5_hv_vhca_invalidate(void *context, u64 block_mask);
>> +
>> +struct mlx5_hv_vhca_agent *
>> +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
>> +			  enum mlx5_hv_vhca_agent_type type,
>> +			  void (*control)(struct mlx5_hv_vhca_agent*,
>> +					  struct mlx5_hv_vhca_control_block *block),
>> +			  void (*invalidate)(struct mlx5_hv_vhca_agent*,
>> +					     u64 block_mask),
>> +			  void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
>> +			  void *context);
>> +
>> +void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent);
>> +int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
>> +			     void *buf, int len);
>> +void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent);
>> +
>> +#else
>> +
>> +static inline struct mlx5_hv_vhca *
>> +mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
>> +{
>> +	return NULL;
>> +}
>> +
>> +static inline void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
>> +{
>> +}
>> +
>> +static inline int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
>> +{
>> +	return 0;
>> +}
>> +
>> +static inline void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
>> +{
>> +}
>> +
>> +static inline void mlx5_hv_vhca_invalidate(void *context,
>> +					   u64 block_mask)
>> +{
>> +}
>> +
>> +static inline struct mlx5_hv_vhca_agent *
>> +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
>> +			  enum mlx5_hv_vhca_agent_type type,
>> +			  void (*control)(struct mlx5_hv_vhca_agent*,
>> +					  struct mlx5_hv_vhca_control_block *block),
>> +			  void (*invalidate)(struct mlx5_hv_vhca_agent*,
>> +					     u64 block_mask),
>> +			  void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
>> +			  void *context)
>> +{
>> +	return NULL;
>> +}
>> +
>> +static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
>> +{
>> +}
>> +
>> +static inline int
>> +mlx5_hv_vhca_write_agent(struct mlx5_hv_vhca_agent *agent,
>> +			 void *buf, int len)
>> +{
>> +	return 0;
>> +}
>> +#endif
>> +
>> +#endif /* __LIB_HV_VHCA_H__ */
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
>> index 4cc90eb..50ee38b 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
>> @@ -69,6 +69,7 @@
>>   #include "lib/pci_vsc.h"
>>   #include "diag/fw_tracer.h"
>>   #include "ecpf.h"
>> +#include "lib/hv_vhca.h"
>>   
>>   MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
>>   MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
>> @@ -872,6 +873,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
>>   	}
>>   
>>   	dev->tracer = mlx5_fw_tracer_create(dev);
>> +	dev->hv_vhca = mlx5_hv_vhca_create(dev);
>>   
>>   	return 0;
>>   
>> @@ -902,6 +904,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
>>   
>>   static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
>>   {
>> +	mlx5_hv_vhca_destroy(dev->hv_vhca);
>>   	mlx5_fw_tracer_destroy(dev->tracer);
>>   	mlx5_fpga_cleanup(dev);
>>   	mlx5_eswitch_cleanup(dev->priv.eswitch);
>> @@ -1068,6 +1071,8 @@ static int mlx5_load(struct mlx5_core_dev *dev)
>>   		goto err_fw_tracer;
>>   	}
>>   
>> +	mlx5_hv_vhca_init(dev->hv_vhca);
>> +
>>   	err = mlx5_fpga_device_start(dev);
>>   	if (err) {
>>   		mlx5_core_err(dev, "fpga device start failed %d\n", err);
>> @@ -1123,6 +1128,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
>>   err_ipsec_start:
>>   	mlx5_fpga_device_stop(dev);
>>   err_fpga_start:
>> +	mlx5_hv_vhca_cleanup(dev->hv_vhca);
>>   	mlx5_fw_tracer_cleanup(dev->tracer);
>>   err_fw_tracer:
>>   	mlx5_eq_table_destroy(dev);
>> @@ -1143,6 +1149,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
>>   	mlx5_accel_ipsec_cleanup(dev);
>>   	mlx5_accel_tls_cleanup(dev);
>>   	mlx5_fpga_device_stop(dev);
>> +	mlx5_hv_vhca_cleanup(dev->hv_vhca);
>>   	mlx5_fw_tracer_cleanup(dev->tracer);
>>   	mlx5_eq_table_destroy(dev);
>>   	mlx5_irq_table_destroy(dev);
>> diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
>> index 2b84ee9..97bb98c 100644
>> --- a/include/linux/mlx5/driver.h
>> +++ b/include/linux/mlx5/driver.h
>> @@ -646,6 +646,7 @@ struct mlx5_clock {
>>   struct mlx5_fw_tracer;
>>   struct mlx5_vxlan;
>>   struct mlx5_geneve;
>> +struct mlx5_hv_vhca;
>>   
>>   struct mlx5_core_dev {
>>   	struct device *device;
>> @@ -693,6 +694,7 @@ struct mlx5_core_dev {
>>   	struct mlx5_ib_clock_info  *clock_info;
>>   	struct mlx5_fw_tracer   *tracer;
>>   	u32                      vsc_addr;
>> +	struct mlx5_hv_vhca	*hv_vhca;
>>   };
>>   
>>   struct mlx5_db {
>>

^ permalink raw reply

* Re: [PATCH net-next, 3/6] net/mlx5: Add wrappers for HyperV PCIe operations
From: Eran Ben Elisha @ 2019-08-15 11:34 UTC (permalink / raw)
  To: Mark Bloch, haiyangz, sashal@kernel.org, davem@davemloft.net,
	Saeed Mahameed, leon@kernel.org, lorenzo.pieralisi@arm.com,
	bhelgaas@google.com, linux-pci@vger.kernel.org,
	linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
  Cc: kys, Stephen Hemminger, linux-kernel@vger.kernel.org
In-Reply-To: <e2a38f24-5a63-ef89-9d69-6a0f2770a9e4@mellanox.com>



On 8/14/2019 11:41 PM, Mark Bloch wrote:
> 
> 
> On 8/14/19 12:08 PM, Haiyang Zhang wrote:
>> From: Eran Ben Elisha <eranbe@mellanox.com>
>>
>> Add wrapper functions for HyperV PCIe read / write /
>> block_invalidate_register operations.  This will be used as an
>> infrastructure in the downstream patch for software communication.
>>
>> This will be enabled by default if CONFIG_PCI_HYPERV_MINI is set.
>>
>> Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
>> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
>> ---
>>   drivers/net/ethernet/mellanox/mlx5/core/Makefile |  1 +
>>   drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c | 64 ++++++++++++++++++++++++
>>   drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h | 22 ++++++++
>>   3 files changed, 87 insertions(+)
>>   create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
>>   create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>> index 8b7edaa..a8950b1 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>> @@ -45,6 +45,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offlo
>>   mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
>>   mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
>>   mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
>> +mlx5_core-$(CONFIG_PCI_HYPERV_MINI)     += lib/hv.o
>>   
>>   #
>>   # Ipoib netdev
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
>> new file mode 100644
>> index 0000000..cf08d02
>> --- /dev/null
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
>> @@ -0,0 +1,64 @@
>> +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
>> +// Copyright (c) 2018 Mellanox Technologies
>> +
>> +#include <linux/hyperv.h>
>> +#include "mlx5_core.h"
>> +#include "lib/hv.h"
>> +
>> +static int mlx5_hv_config_common(struct mlx5_core_dev *dev, void *buf, int len,
>> +				 int offset, bool read)
>> +{
>> +	int rc = -EOPNOTSUPP;
>> +	int bytes_returned;
>> +	int block_id;
>> +
>> +	if (offset % HV_CONFIG_BLOCK_SIZE_MAX || len % HV_CONFIG_BLOCK_SIZE_MAX)
>> +		return -EINVAL;
>> +
>> +	block_id = offset / HV_CONFIG_BLOCK_SIZE_MAX;
>> +
>> +	rc = read ?
>> +	     hyperv_read_cfg_blk(dev->pdev, buf,
>> +				 HV_CONFIG_BLOCK_SIZE_MAX, block_id,
>> +				 &bytes_returned) :
>> +	     hyperv_write_cfg_blk(dev->pdev, buf,
>> +				  HV_CONFIG_BLOCK_SIZE_MAX, block_id);
>> +
>> +	/* Make sure len bytes were read successfully  */
>> +	if (read)
>> +		rc |= !(len == bytes_returned);
>> +
>> +	if (rc) {
>> +		mlx5_core_err(dev, "Failed to %s hv config, err = %d, len = %d, offset = %d\n",
>> +			      read ? "read" : "write", rc, len,
>> +			      offset);
>> +		return rc;
>> +	}
>> +
>> +	return 0;
>> +}
> 
> This seems out of place why not expose this function as part of hyperv and mlx5
> will just pass the pdev.
> 
The HV driver works with block chunks. I found it less convenience to do 
so directly, so I add a small wrapper for mlx5 core.

Haiyangz,
Do you see a reason to export this callback style from the HYPERV level 
instead?

>> +
>> +int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
>> +			int offset)
>> +{
>> +	return mlx5_hv_config_common(dev, buf, len, offset, true);
>> +}
>> +
>> +int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
>> +			 int offset)
>> +{
>> +	return mlx5_hv_config_common(dev, buf, len, offset, false);
>> +}
>> +
>> +int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
>> +				void (*block_invalidate)(void *context,
>> +							 u64 block_mask))
>> +{
>> +	return hyperv_reg_block_invalidate(dev->pdev, context,
>> +					   block_invalidate);
>> +}
>> +
>> +void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev)
>> +{
>> +	hyperv_reg_block_invalidate(dev->pdev, NULL, NULL);
>> +}
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
>> new file mode 100644
>> index 0000000..7f69771
>> --- /dev/null
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
>> @@ -0,0 +1,22 @@
>> +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
>> +/* Copyright (c) 2019 Mellanox Technologies. */
>> +
>> +#ifndef __LIB_HV_H__
>> +#define __LIB_HV_H__
>> +
>> +#if IS_ENABLED(CONFIG_PCI_HYPERV_MINI)
>> +
>> +#include <linux/hyperv.h>
>> +#include <linux/mlx5/driver.h>
>> +
>> +int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
>> +			int offset);
>> +int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
>> +			 int offset);
>> +int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
>> +				void (*block_invalidate)(void *context,
>> +							 u64 block_mask));
>> +void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev);
>> +#endif
>> +
>> +#endif /* __LIB_HV_H__ */
>>
> 
> Mark
> 

^ permalink raw reply

* Re: [PATCH net-next, 4/6] net/mlx5: Add HV VHCA infrastructure
From: Mark Bloch @ 2019-08-14 20:41 UTC (permalink / raw)
  To: haiyangz, sashal@kernel.org, davem@davemloft.net, Saeed Mahameed,
	leon@kernel.org, Eran Ben Elisha, lorenzo.pieralisi@arm.com,
	bhelgaas@google.com, linux-pci@vger.kernel.org,
	linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
  Cc: kys, Stephen Hemminger, linux-kernel@vger.kernel.org
In-Reply-To: <1565809632-39138-5-git-send-email-haiyangz@microsoft.com>



On 8/14/19 12:08 PM, Haiyang Zhang wrote:
> From: Eran Ben Elisha <eranbe@mellanox.com>
> 
> HV VHCA is a layer which provides PF to VF communication channel based on
> HyperV PCI config channel. It implements Mellanox's Inter VHCA control
> communication protocol. The protocol contains control block in order to
> pass messages between the PF and VF drivers, and data blocks in order to
> pass actual data.
> 
> The infrastructure is agent based. Each agent will be responsible of
> contiguous buffer blocks in the VHCA config space. This infrastructure will
> bind agents to their blocks, and those agents can only access read/write
> the buffer blocks assigned to them. Each agent will provide three
> callbacks (control, invalidate, cleanup). Control will be invoked when
> block-0 is invalidated with a command that concerns this agent. Invalidate
> callback will be invoked if one of the blocks assigned to this agent was
> invalidated. Cleanup will be invoked before the agent is being freed in
> order to clean all of its open resources or deferred works.
> 
> Block-0 serves as the control block. All execution commands from the PF
> will be written by the PF over this block. VF will ack on those by
> writing on block-0 as well. Its format is described by struct
> mlx5_hv_vhca_control_block layout.
> 
> Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
> ---
>  drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
>  .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c  | 247 +++++++++++++++++++++
>  .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h  | 102 +++++++++
>  drivers/net/ethernet/mellanox/mlx5/core/main.c     |   7 +
>  include/linux/mlx5/driver.h                        |   2 +
>  5 files changed, 359 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
>  create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> index a8950b1..e0a1056 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> @@ -45,7 +45,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offlo
>  mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
>  mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
>  mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
> -mlx5_core-$(CONFIG_PCI_HYPERV_MINI)     += lib/hv.o
> +mlx5_core-$(CONFIG_PCI_HYPERV_MINI)+= lib/hv.o lib/hv_vhca.o
>  
>  #
>  # Ipoib netdev
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
> new file mode 100644
> index 0000000..b2eebdf
> --- /dev/null
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
> @@ -0,0 +1,247 @@
> +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
> +// Copyright (c) 2018 Mellanox Technologies
> +
> +#include <linux/hyperv.h>
> +#include "mlx5_core.h"
> +#include "lib/hv.h"
> +#include "lib/hv_vhca.h"
> +
> +struct mlx5_hv_vhca {
> +	struct mlx5_core_dev       *dev;
> +	struct workqueue_struct    *work_queue;
> +	struct mlx5_hv_vhca_agent  *agents[MLX5_HV_VHCA_AGENT_MAX];
> +	struct mutex                agents_lock; /* Protect agents array */
> +};
> +
> +struct mlx5_hv_vhca_work {
> +	struct work_struct     invalidate_work;
> +	struct mlx5_hv_vhca   *hv_vhca;
> +	u64                    block_mask;
> +};
> +
> +struct mlx5_hv_vhca_data_block {
> +	u16     sequence;
> +	u16     offset;
> +	u8      reserved[4];
> +	u64     data[15];
> +};
> +
> +struct mlx5_hv_vhca_agent {
> +	enum mlx5_hv_vhca_agent_type	 type;
> +	struct mlx5_hv_vhca		*hv_vhca;
> +	void				*priv;
> +	int                              seq;
Why is this int? and in data block is u16?

> +	void (*control)(struct mlx5_hv_vhca_agent *agent,
> +			struct mlx5_hv_vhca_control_block *block);
> +	void (*invalidate)(struct mlx5_hv_vhca_agent *agent,
> +			   u64 block_mask);
> +	void (*cleanup)(struct mlx5_hv_vhca_agent *agent);
> +};
> +
> +struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
> +{
> +	struct mlx5_hv_vhca *hv_vhca = NULL;
> +
> +	hv_vhca = kzalloc(sizeof(*hv_vhca), GFP_KERNEL);
> +	if (!hv_vhca)
> +		return ERR_PTR(-ENOMEM);
> +
> +	hv_vhca->work_queue = create_singlethread_workqueue("mlx5_hv_vhca");
> +	if (!hv_vhca->work_queue) {
> +		kfree(hv_vhca);
> +		return ERR_PTR(-ENOMEM);
> +	}
> +
> +	hv_vhca->dev = dev;
> +	mutex_init(&hv_vhca->agents_lock);
> +
> +	return hv_vhca;
> +}
> +
> +void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
> +{
> +	if (IS_ERR_OR_NULL(hv_vhca))
> +		return;
> +
> +	flush_workqueue(hv_vhca->work_queue);
> +	destroy_workqueue(hv_vhca->work_queue);

Why not just destroy?

> +	kfree(hv_vhca);
> +}
> +
> +static void mlx5_hv_vhca_invalidate_work(struct work_struct *work)
> +{
> +	struct mlx5_hv_vhca_work *hwork;
> +	struct mlx5_hv_vhca *hv_vhca;
> +	int i;
> +
> +	hwork = container_of(work, struct mlx5_hv_vhca_work, invalidate_work);
> +	hv_vhca = hwork->hv_vhca;
> +
> +	mutex_lock(&hv_vhca->agents_lock);
> +	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
> +		struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
> +
> +		if (!agent || !agent->invalidate)
> +			continue;
> +
> +		if (!(BIT(agent->type) & hwork->block_mask))
> +			continue;
> +
> +		agent->invalidate(agent, hwork->block_mask);
> +	}
> +	mutex_unlock(&hv_vhca->agents_lock);
> +
> +	kfree(hwork);
> +}
> +
> +void mlx5_hv_vhca_invalidate(void *context, u64 block_mask)
> +{
> +	struct mlx5_hv_vhca *hv_vhca = (struct mlx5_hv_vhca *)context;
> +	struct mlx5_hv_vhca_work *work;
> +
> +	work = kzalloc(sizeof(*work), GFP_ATOMIC);
> +	if (!work)
> +		return;
> +
> +	INIT_WORK(&work->invalidate_work, mlx5_hv_vhca_invalidate_work);
> +	work->hv_vhca    = hv_vhca;
> +	work->block_mask = block_mask;
> +
> +	queue_work(hv_vhca->work_queue, &work->invalidate_work);
> +}
> +
> +int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
> +{
> +	if (IS_ERR_OR_NULL(hv_vhca))
> +		return IS_ERR_OR_NULL(hv_vhca);
> +
> +	return mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
> +					   mlx5_hv_vhca_invalidate);
> +}
> +
> +void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
> +{
> +	int i;
> +
> +	if (IS_ERR_OR_NULL(hv_vhca))
> +		return;
> +
> +	mutex_lock(&hv_vhca->agents_lock);
> +	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++)
> +		WARN_ON(hv_vhca->agents[i]);
> +
> +	mutex_unlock(&hv_vhca->agents_lock);
> +
> +	mlx5_hv_unregister_invalidate(hv_vhca->dev);
> +}
> +
> +struct mlx5_hv_vhca_agent *
> +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
> +			  enum mlx5_hv_vhca_agent_type type,
> +			  void (*control)(struct mlx5_hv_vhca_agent*,
> +					  struct mlx5_hv_vhca_control_block *block),
> +			  void (*invalidate)(struct mlx5_hv_vhca_agent*,
> +					     u64 block_mask),
> +			  void (*cleaup)(struct mlx5_hv_vhca_agent *agent),
> +			  void *priv)
> +{
> +	struct mlx5_hv_vhca_agent *agent;
> +
> +	if (IS_ERR_OR_NULL(hv_vhca))
> +		return ERR_PTR(-ENOMEM);
> +
> +	if (hv_vhca->agents[type])
> +		return ERR_PTR(-EINVAL);> +
> +	agent = kzalloc(sizeof(*agent), GFP_KERNEL);
> +	if (!agent)
> +		return ERR_PTR(-ENOMEM);
> +
> +	agent->type      = type;
> +	agent->hv_vhca   = hv_vhca;
> +	agent->priv      = priv;
> +	agent->control   = control;
> +	agent->invalidate = invalidate;
> +	agent->cleanup   = cleaup;
> +
> +	mutex_lock(&hv_vhca->agents_lock);
> +	hv_vhca->agents[type] = agent;
> +	mutex_unlock(&hv_vhca->agents_lock);

You have a check for this not under a lock a few lines up,
but assign under a lock?

Mark

> +
> +	return agent;
> +}
> +
> +void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
> +{
> +	struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
> +
> +	mutex_lock(&hv_vhca->agents_lock);
> +
> +	if (WARN_ON(agent != hv_vhca->agents[agent->type])) {
> +		mutex_unlock(&hv_vhca->agents_lock);
> +		return;
> +	}
> +
> +	hv_vhca->agents[agent->type] = NULL;
> +	mutex_unlock(&hv_vhca->agents_lock);
> +
> +	if (agent->cleanup)
> +		agent->cleanup(agent);
> +
> +	kfree(agent);
> +}
> +
> +static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent,
> +					   struct mlx5_hv_vhca_data_block *data_block,
> +					   void *src, int len, int *offset)
> +{
> +	int bytes = min_t(int, (int)sizeof(data_block->data), len);
> +
> +	data_block->sequence = agent->seq;
> +	data_block->offset   = (*offset)++;
> +	memcpy(data_block->data, src, bytes);
> +
> +	return bytes;
> +}
> +
> +static void mlx5_hv_vhca_agent_seq_update(struct mlx5_hv_vhca_agent *agent)
> +{
> +	agent->seq++;
> +}
> +
> +int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
> +			     void *buf, int len)
> +{
> +	int offset = agent->type * HV_CONFIG_BLOCK_SIZE_MAX;
> +	int block_offset = 0;
> +	int total = 0;
> +	int err;
> +
> +	while (len) {
> +		struct mlx5_hv_vhca_data_block data_block = {0};
> +		int bytes;
> +
> +		bytes = mlx5_hv_vhca_data_block_prepare(agent, &data_block,
> +							buf + total,
> +							len, &block_offset);
> +		if (!bytes)
> +			return -ENOMEM;
> +
> +		err = mlx5_hv_write_config(agent->hv_vhca->dev, &data_block,
> +					   sizeof(data_block), offset);
> +		if (err)
> +			return err;
> +
> +		total += bytes;
> +		len   -= bytes;
> +	}
> +
> +	mlx5_hv_vhca_agent_seq_update(agent);
> +
> +	return 0;
> +}
> +
> +void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent)
> +{
> +	return agent->priv;
> +}
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
> new file mode 100644
> index 0000000..fa7ee85
> --- /dev/null
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
> @@ -0,0 +1,102 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
> +/* Copyright (c) 2019 Mellanox Technologies. */
> +
> +#ifndef __LIB_HV_VHCA_H__
> +#define __LIB_HV_VHCA_H__
> +
> +#include "en.h"
> +#include "lib/hv.h"
> +
> +struct mlx5_hv_vhca_agent;
> +struct mlx5_hv_vhca;
> +struct mlx5_hv_vhca_control_block;
> +
> +enum mlx5_hv_vhca_agent_type {
> +	MLX5_HV_VHCA_AGENT_MAX = 32,
> +};
> +
> +#if IS_ENABLED(CONFIG_PCI_HYPERV_MINI)
> +
> +struct mlx5_hv_vhca_control_block {
> +	u32     capabilities;
> +	u32     control;
> +	u16     command;
> +	u16     command_ack;
> +	u16     version;
> +	u16     rings;
> +	u32     reserved1[28];
> +};
> +
> +struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev);
> +void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca);
> +int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca);
> +void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca);
> +void mlx5_hv_vhca_invalidate(void *context, u64 block_mask);
> +
> +struct mlx5_hv_vhca_agent *
> +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
> +			  enum mlx5_hv_vhca_agent_type type,
> +			  void (*control)(struct mlx5_hv_vhca_agent*,
> +					  struct mlx5_hv_vhca_control_block *block),
> +			  void (*invalidate)(struct mlx5_hv_vhca_agent*,
> +					     u64 block_mask),
> +			  void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
> +			  void *context);
> +
> +void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent);
> +int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
> +			     void *buf, int len);
> +void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent);
> +
> +#else
> +
> +static inline struct mlx5_hv_vhca *
> +mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
> +{
> +	return NULL;
> +}
> +
> +static inline void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
> +{
> +}
> +
> +static inline int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
> +{
> +	return 0;
> +}
> +
> +static inline void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
> +{
> +}
> +
> +static inline void mlx5_hv_vhca_invalidate(void *context,
> +					   u64 block_mask)
> +{
> +}
> +
> +static inline struct mlx5_hv_vhca_agent *
> +mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
> +			  enum mlx5_hv_vhca_agent_type type,
> +			  void (*control)(struct mlx5_hv_vhca_agent*,
> +					  struct mlx5_hv_vhca_control_block *block),
> +			  void (*invalidate)(struct mlx5_hv_vhca_agent*,
> +					     u64 block_mask),
> +			  void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
> +			  void *context)
> +{
> +	return NULL;
> +}
> +
> +static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
> +{
> +}
> +
> +static inline int
> +mlx5_hv_vhca_write_agent(struct mlx5_hv_vhca_agent *agent,
> +			 void *buf, int len)
> +{
> +	return 0;
> +}
> +#endif
> +
> +#endif /* __LIB_HV_VHCA_H__ */
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
> index 4cc90eb..50ee38b 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
> @@ -69,6 +69,7 @@
>  #include "lib/pci_vsc.h"
>  #include "diag/fw_tracer.h"
>  #include "ecpf.h"
> +#include "lib/hv_vhca.h"
>  
>  MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
>  MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
> @@ -872,6 +873,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
>  	}
>  
>  	dev->tracer = mlx5_fw_tracer_create(dev);
> +	dev->hv_vhca = mlx5_hv_vhca_create(dev);
>  
>  	return 0;
>  
> @@ -902,6 +904,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
>  
>  static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
>  {
> +	mlx5_hv_vhca_destroy(dev->hv_vhca);
>  	mlx5_fw_tracer_destroy(dev->tracer);
>  	mlx5_fpga_cleanup(dev);
>  	mlx5_eswitch_cleanup(dev->priv.eswitch);
> @@ -1068,6 +1071,8 @@ static int mlx5_load(struct mlx5_core_dev *dev)
>  		goto err_fw_tracer;
>  	}
>  
> +	mlx5_hv_vhca_init(dev->hv_vhca);
> +
>  	err = mlx5_fpga_device_start(dev);
>  	if (err) {
>  		mlx5_core_err(dev, "fpga device start failed %d\n", err);
> @@ -1123,6 +1128,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
>  err_ipsec_start:
>  	mlx5_fpga_device_stop(dev);
>  err_fpga_start:
> +	mlx5_hv_vhca_cleanup(dev->hv_vhca);
>  	mlx5_fw_tracer_cleanup(dev->tracer);
>  err_fw_tracer:
>  	mlx5_eq_table_destroy(dev);
> @@ -1143,6 +1149,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
>  	mlx5_accel_ipsec_cleanup(dev);
>  	mlx5_accel_tls_cleanup(dev);
>  	mlx5_fpga_device_stop(dev);
> +	mlx5_hv_vhca_cleanup(dev->hv_vhca);
>  	mlx5_fw_tracer_cleanup(dev->tracer);
>  	mlx5_eq_table_destroy(dev);
>  	mlx5_irq_table_destroy(dev);
> diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
> index 2b84ee9..97bb98c 100644
> --- a/include/linux/mlx5/driver.h
> +++ b/include/linux/mlx5/driver.h
> @@ -646,6 +646,7 @@ struct mlx5_clock {
>  struct mlx5_fw_tracer;
>  struct mlx5_vxlan;
>  struct mlx5_geneve;
> +struct mlx5_hv_vhca;
>  
>  struct mlx5_core_dev {
>  	struct device *device;
> @@ -693,6 +694,7 @@ struct mlx5_core_dev {
>  	struct mlx5_ib_clock_info  *clock_info;
>  	struct mlx5_fw_tracer   *tracer;
>  	u32                      vsc_addr;
> +	struct mlx5_hv_vhca	*hv_vhca;
>  };
>  
>  struct mlx5_db {
> 

^ permalink raw reply

* Re: [PATCH net-next, 5/6] net/mlx5: Add HV VHCA control agent
From: Mark Bloch @ 2019-08-14 20:41 UTC (permalink / raw)
  To: haiyangz, sashal@kernel.org, davem@davemloft.net, Saeed Mahameed,
	leon@kernel.org, Eran Ben Elisha, lorenzo.pieralisi@arm.com,
	bhelgaas@google.com, linux-pci@vger.kernel.org,
	linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
  Cc: kys, Stephen Hemminger, linux-kernel@vger.kernel.org
In-Reply-To: <1565809632-39138-6-git-send-email-haiyangz@microsoft.com>



On 8/14/19 12:09 PM, Haiyang Zhang wrote:
> From: Eran Ben Elisha <eranbe@mellanox.com>
> 
> Control agent is responsible over of the control block (ID 0). It should
> update the PF via this block about every capability change. In addition,
> upon block 0 invalidate, it should activate all other supported agents
> with data requests from the PF.
> 
> Upon agent create/destroy, the invalidate callback of the control agent
> is being called in order to update the PF driver about this change.
> 
> The control agent is an integral part of HV VHCA and will be created
> and destroy as part of the HV VHCA init/cleanup flow.
> 
> Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
> ---
>  .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c  | 122 ++++++++++++++++++++-
>  .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h  |   1 +
>  2 files changed, 121 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
> index b2eebdf..3c7fffa 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
> @@ -110,22 +110,131 @@ void mlx5_hv_vhca_invalidate(void *context, u64 block_mask)
>  	queue_work(hv_vhca->work_queue, &work->invalidate_work);
>  }
>  
> +#define AGENT_MASK(type) (type ? BIT(type - 1) : 0 /* control */)
> +
> +static void mlx5_hv_vhca_agents_control(struct mlx5_hv_vhca *hv_vhca,
> +					struct mlx5_hv_vhca_control_block *block)
> +{
> +	int i;
> +
> +	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
> +		struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
> +
> +		if (!agent || !agent->control)
> +			continue;
> +
> +		if (!(AGENT_MASK(agent->type) & block->control))
> +			continue;
> +
> +		agent->control(agent, block);
> +	}
> +}
> +
> +static void mlx5_hv_vhca_capabilities(struct mlx5_hv_vhca *hv_vhca,
> +				      u32 *capabilities)
> +{
> +	int i;
> +
> +	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
> +		struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
> +
> +		if (agent)
> +			*capabilities |= AGENT_MASK(agent->type);
> +	}
> +}
> +
> +static void
> +mlx5_hv_vhca_control_agent_invalidate(struct mlx5_hv_vhca_agent *agent,
> +				      u64 block_mask)
> +{
> +	struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
> +	struct mlx5_core_dev *dev = hv_vhca->dev;
> +	struct mlx5_hv_vhca_control_block *block;
> +	u32 capabilities = 0;
> +	int err;
> +
> +	block = kzalloc(sizeof(*block), GFP_KERNEL);
> +	if (!block)
> +		return;
> +
> +	err = mlx5_hv_read_config(dev, block, sizeof(*block), 0);
> +	if (err)
> +		goto free_block;
> +
> +	mlx5_hv_vhca_capabilities(hv_vhca, &capabilities);
> +
> +	/* In case no capabilities, send empty block in return */
> +	if (!capabilities) {
> +		memset(block, 0, sizeof(*block));
> +		goto write;
> +	}
> +
> +	if (block->capabilities != capabilities)
> +		block->capabilities = capabilities;
> +
> +	if (block->control & ~capabilities)
> +		goto free_block;
> +
> +	mlx5_hv_vhca_agents_control(hv_vhca, block);
> +	block->command_ack = block->command;
> +
> +write:
> +	mlx5_hv_write_config(dev, block, sizeof(*block), 0);
> +
> +free_block:
> +	kfree(block);
> +}
> +
> +static struct mlx5_hv_vhca_agent *
> +mlx5_hv_vhca_control_agent_create(struct mlx5_hv_vhca *hv_vhca)
> +{
> +	return mlx5_hv_vhca_agent_create(hv_vhca, MLX5_HV_VHCA_AGENT_CONTROL,
> +					 NULL,
> +					 mlx5_hv_vhca_control_agent_invalidate,
> +					 NULL, NULL);
> +}
> +
> +static void mlx5_hv_vhca_control_agent_destroy(struct mlx5_hv_vhca_agent *agent)
> +{
> +	mlx5_hv_vhca_agent_destroy(agent);
> +}
> +
>  int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
>  {
> +	struct mlx5_hv_vhca_agent *agent;
> +	int err;
> +
>  	if (IS_ERR_OR_NULL(hv_vhca))
>  		return IS_ERR_OR_NULL(hv_vhca);
>  
> -	return mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
> -					   mlx5_hv_vhca_invalidate);
> +	err = mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
> +					  mlx5_hv_vhca_invalidate);
> +	if (err)
> +		return err;
> +
> +	agent = mlx5_hv_vhca_control_agent_create(hv_vhca);
> +	if (IS_ERR_OR_NULL(agent)) {
> +		mlx5_hv_unregister_invalidate(hv_vhca->dev);
> +		return IS_ERR_OR_NULL(agent);
> +	}
> +
> +	hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL] = agent;
> +
> +	return 0;
>  }
>  
>  void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
>  {
> +	struct mlx5_hv_vhca_agent *agent;
>  	int i;
>  
>  	if (IS_ERR_OR_NULL(hv_vhca))
>  		return;
>  
> +	agent = hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL];
> +	if (!IS_ERR_OR_NULL(agent))
> +		mlx5_hv_vhca_control_agent_destroy(agent);

Can the agent be err ptr here?

> +
>  	mutex_lock(&hv_vhca->agents_lock);
>  	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++)
>  		WARN_ON(hv_vhca->agents[i]);

With the comment above in mind, here you check only for not null

> @@ -135,6 +244,11 @@ void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
>  	mlx5_hv_unregister_invalidate(hv_vhca->dev);
>  }
>  
> +static void mlx5_hv_vhca_agents_update(struct mlx5_hv_vhca *hv_vhca)
> +{
> +	mlx5_hv_vhca_invalidate(hv_vhca, BIT(MLX5_HV_VHCA_AGENT_CONTROL));
> +}
> +
>  struct mlx5_hv_vhca_agent *
>  mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
>  			  enum mlx5_hv_vhca_agent_type type,
> @@ -168,6 +282,8 @@ struct mlx5_hv_vhca_agent *
>  	hv_vhca->agents[type] = agent;
>  	mutex_unlock(&hv_vhca->agents_lock);
>  
> +	mlx5_hv_vhca_agents_update(hv_vhca);
> +
>  	return agent;
>  }
>  
> @@ -189,6 +305,8 @@ void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
>  		agent->cleanup(agent);
>  
>  	kfree(agent);
> +
> +	mlx5_hv_vhca_agents_update(hv_vhca);
>  }
>  
>  static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent,
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
> index fa7ee85..6f4bfb1 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
> @@ -12,6 +12,7 @@
>  struct mlx5_hv_vhca_control_block;
>  
>  enum mlx5_hv_vhca_agent_type {
> +	MLX5_HV_VHCA_AGENT_CONTROL = 0,

No need to start value

>  	MLX5_HV_VHCA_AGENT_MAX = 32,
>  };
>  
> 

Mark

^ permalink raw reply

* Re: [PATCH net-next, 3/6] net/mlx5: Add wrappers for HyperV PCIe operations
From: Mark Bloch @ 2019-08-14 20:41 UTC (permalink / raw)
  To: haiyangz, sashal@kernel.org, davem@davemloft.net, Saeed Mahameed,
	leon@kernel.org, Eran Ben Elisha, lorenzo.pieralisi@arm.com,
	bhelgaas@google.com, linux-pci@vger.kernel.org,
	linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
  Cc: kys, Stephen Hemminger, linux-kernel@vger.kernel.org
In-Reply-To: <1565809632-39138-4-git-send-email-haiyangz@microsoft.com>



On 8/14/19 12:08 PM, Haiyang Zhang wrote:
> From: Eran Ben Elisha <eranbe@mellanox.com>
> 
> Add wrapper functions for HyperV PCIe read / write /
> block_invalidate_register operations.  This will be used as an
> infrastructure in the downstream patch for software communication.
> 
> This will be enabled by default if CONFIG_PCI_HYPERV_MINI is set.
> 
> Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
> ---
>  drivers/net/ethernet/mellanox/mlx5/core/Makefile |  1 +
>  drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c | 64 ++++++++++++++++++++++++
>  drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h | 22 ++++++++
>  3 files changed, 87 insertions(+)
>  create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
>  create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> index 8b7edaa..a8950b1 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> @@ -45,6 +45,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offlo
>  mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
>  mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
>  mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
> +mlx5_core-$(CONFIG_PCI_HYPERV_MINI)     += lib/hv.o
>  
>  #
>  # Ipoib netdev
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
> new file mode 100644
> index 0000000..cf08d02
> --- /dev/null
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
> @@ -0,0 +1,64 @@
> +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
> +// Copyright (c) 2018 Mellanox Technologies
> +
> +#include <linux/hyperv.h>
> +#include "mlx5_core.h"
> +#include "lib/hv.h"
> +
> +static int mlx5_hv_config_common(struct mlx5_core_dev *dev, void *buf, int len,
> +				 int offset, bool read)
> +{
> +	int rc = -EOPNOTSUPP;
> +	int bytes_returned;
> +	int block_id;
> +
> +	if (offset % HV_CONFIG_BLOCK_SIZE_MAX || len % HV_CONFIG_BLOCK_SIZE_MAX)
> +		return -EINVAL;
> +
> +	block_id = offset / HV_CONFIG_BLOCK_SIZE_MAX;
> +
> +	rc = read ?
> +	     hyperv_read_cfg_blk(dev->pdev, buf,
> +				 HV_CONFIG_BLOCK_SIZE_MAX, block_id,
> +				 &bytes_returned) :
> +	     hyperv_write_cfg_blk(dev->pdev, buf,
> +				  HV_CONFIG_BLOCK_SIZE_MAX, block_id);
> +
> +	/* Make sure len bytes were read successfully  */
> +	if (read)
> +		rc |= !(len == bytes_returned);
> +
> +	if (rc) {
> +		mlx5_core_err(dev, "Failed to %s hv config, err = %d, len = %d, offset = %d\n",
> +			      read ? "read" : "write", rc, len,
> +			      offset);
> +		return rc;
> +	}
> +
> +	return 0;
> +}

This seems out of place why not expose this function as part of hyperv and mlx5
will just pass the pdev.

> +
> +int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
> +			int offset)
> +{
> +	return mlx5_hv_config_common(dev, buf, len, offset, true);
> +}
> +
> +int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
> +			 int offset)
> +{
> +	return mlx5_hv_config_common(dev, buf, len, offset, false);
> +}
> +
> +int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
> +				void (*block_invalidate)(void *context,
> +							 u64 block_mask))
> +{
> +	return hyperv_reg_block_invalidate(dev->pdev, context,
> +					   block_invalidate);
> +}
> +
> +void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev)
> +{
> +	hyperv_reg_block_invalidate(dev->pdev, NULL, NULL);
> +}
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
> new file mode 100644
> index 0000000..7f69771
> --- /dev/null
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
> @@ -0,0 +1,22 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
> +/* Copyright (c) 2019 Mellanox Technologies. */
> +
> +#ifndef __LIB_HV_H__
> +#define __LIB_HV_H__
> +
> +#if IS_ENABLED(CONFIG_PCI_HYPERV_MINI)
> +
> +#include <linux/hyperv.h>
> +#include <linux/mlx5/driver.h>
> +
> +int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
> +			int offset);
> +int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
> +			 int offset);
> +int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
> +				void (*block_invalidate)(void *context,
> +							 u64 block_mask));
> +void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev);
> +#endif
> +
> +#endif /* __LIB_HV_H__ */
> 

Mark

^ permalink raw reply

* Re: [PATCH] hv_netvsc: Fix a memory leak bug
From: Stephen Hemminger @ 2019-08-14 20:37 UTC (permalink / raw)
  To: Wenwen Wang
  Cc: K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger, Sasha Levin,
	David S. Miller, open list:Hyper-V CORE AND DRIVERS,
	open list:NETWORKING DRIVERS, open list
In-Reply-To: <1565813771-8967-1-git-send-email-wenwen@cs.uga.edu>

On Wed, 14 Aug 2019 15:16:11 -0500
Wenwen Wang <wenwen@cs.uga.edu> wrote:

> In rndis_filter_device_add(), 'rndis_device' is allocated through kzalloc()
> by invoking get_rndis_device(). In the following execution, if an error
> occurs, the execution will go to the 'err_dev_remv' label. However, the
> allocated 'rndis_device' is not deallocated, leading to a memory leak bug.
> 
> Signed-off-by: Wenwen Wang <wenwen@cs.uga.edu>
> ---
>  drivers/net/hyperv/rndis_filter.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
> index 317dbe9..ed35085 100644
> --- a/drivers/net/hyperv/rndis_filter.c
> +++ b/drivers/net/hyperv/rndis_filter.c
> @@ -1420,6 +1420,7 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
>  
>  err_dev_remv:
>  	rndis_filter_device_remove(dev, net_device);
> +	kfree(rndis_device);
>  	return ERR_PTR(ret);
>  }
>  

The rndis_device is already freed by:

rndis_filter_device_remove
	netvsc_device_remove
		free_netvsc_device_rcu

free_netvsc_device called by rcu

static void free_netvsc_device(struct rcu_head *head)
{
	struct netvsc_device *nvdev
		= container_of(head, struct netvsc_device, rcu);
	int i;

	kfree(nvdev->extension);  << here

^ permalink raw reply

* RE: [PATCH] hv_netvsc: Fix a memory leak bug
From: Haiyang Zhang @ 2019-08-14 20:35 UTC (permalink / raw)
  To: Wenwen Wang
  Cc: KY Srinivasan, Stephen Hemminger, Sasha Levin, David S. Miller,
	open list:Hyper-V CORE AND DRIVERS, open list:NETWORKING DRIVERS,
	open list
In-Reply-To: <1565813771-8967-1-git-send-email-wenwen@cs.uga.edu>



> -----Original Message-----
> From: Wenwen Wang <wenwen@cs.uga.edu>
> Sent: Wednesday, August 14, 2019 4:16 PM
> To: Wenwen Wang <wenwen@cs.uga.edu>
> Cc: KY Srinivasan <kys@microsoft.com>; Haiyang Zhang
> <haiyangz@microsoft.com>; Stephen Hemminger
> <sthemmin@microsoft.com>; Sasha Levin <sashal@kernel.org>; David S.
> Miller <davem@davemloft.net>; open list:Hyper-V CORE AND DRIVERS
> <linux-hyperv@vger.kernel.org>; open list:NETWORKING DRIVERS
> <netdev@vger.kernel.org>; open list <linux-kernel@vger.kernel.org>
> Subject: [PATCH] hv_netvsc: Fix a memory leak bug
> 
> In rndis_filter_device_add(), 'rndis_device' is allocated through kzalloc()
> by invoking get_rndis_device(). In the following execution, if an error
> occurs, the execution will go to the 'err_dev_remv' label. However, the
> allocated 'rndis_device' is not deallocated, leading to a memory leak bug.
> 
> Signed-off-by: Wenwen Wang <wenwen@cs.uga.edu>
> ---
>  drivers/net/hyperv/rndis_filter.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/net/hyperv/rndis_filter.c
> b/drivers/net/hyperv/rndis_filter.c
> index 317dbe9..ed35085 100644
> --- a/drivers/net/hyperv/rndis_filter.c
> +++ b/drivers/net/hyperv/rndis_filter.c
> @@ -1420,6 +1420,7 @@ struct netvsc_device
> *rndis_filter_device_add(struct hv_device *dev,
> 
>  err_dev_remv:
>  	rndis_filter_device_remove(dev, net_device);
> +	kfree(rndis_device);

The kfree() is not necessary here. 
Because it is already freed by --
rndis_filter_device_remove() --> netvsc_device_remove() 
--> free_netvsc_device_rcu() --> free_netvsc_device()
--> kfree(nvdev->extension);  //This frees rndis_device.

Thanks,
- Haiyang

^ permalink raw reply

* [PATCH] hv_netvsc: Fix a memory leak bug
From: Wenwen Wang @ 2019-08-14 20:16 UTC (permalink / raw)
  To: Wenwen Wang
  Cc: K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger, Sasha Levin,
	David S. Miller, open list:Hyper-V CORE AND DRIVERS,
	open list:NETWORKING DRIVERS, open list

In rndis_filter_device_add(), 'rndis_device' is allocated through kzalloc()
by invoking get_rndis_device(). In the following execution, if an error
occurs, the execution will go to the 'err_dev_remv' label. However, the
allocated 'rndis_device' is not deallocated, leading to a memory leak bug.

Signed-off-by: Wenwen Wang <wenwen@cs.uga.edu>
---
 drivers/net/hyperv/rndis_filter.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 317dbe9..ed35085 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -1420,6 +1420,7 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
 
 err_dev_remv:
 	rndis_filter_device_remove(dev, net_device);
+	kfree(rndis_device);
 	return ERR_PTR(ret);
 }
 
-- 
2.7.4


^ permalink raw reply related

* [PATCH net-next, 2/6] PCI: hv: Add a Hyper-V PCI mini driver for software backchannel interface
From: Haiyang Zhang @ 2019-08-14 19:08 UTC (permalink / raw)
  To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
	leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
	bhelgaas@google.com, linux-pci@vger.kernel.org,
	linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
  Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
	linux-kernel@vger.kernel.org
In-Reply-To: <1565809632-39138-1-git-send-email-haiyangz@microsoft.com>

This mini driver is a helper driver allows other drivers to
have a common interface with the Hyper-V PCI frontend driver.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 MAINTAINERS                              |  1 +
 drivers/pci/Kconfig                      |  1 +
 drivers/pci/controller/Kconfig           |  7 ++++
 drivers/pci/controller/Makefile          |  1 +
 drivers/pci/controller/pci-hyperv-mini.c | 70 ++++++++++++++++++++++++++++++++
 drivers/pci/controller/pci-hyperv.c      | 12 ++++--
 include/linux/hyperv.h                   | 30 ++++++++++----
 7 files changed, 111 insertions(+), 11 deletions(-)
 create mode 100644 drivers/pci/controller/pci-hyperv-mini.c

diff --git a/MAINTAINERS b/MAINTAINERS
index e352550..c4962b9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7453,6 +7453,7 @@ F:	drivers/hid/hid-hyperv.c
 F:	drivers/hv/
 F:	drivers/input/serio/hyperv-keyboard.c
 F:	drivers/pci/controller/pci-hyperv.c
+F:	drivers/pci/controller/pci-hyperv-mini.c
 F:	drivers/net/hyperv/
 F:	drivers/scsi/storvsc_drv.c
 F:	drivers/uio/uio_hv_generic.c
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 2ab9240..bb852f5 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -182,6 +182,7 @@ config PCI_LABEL
 config PCI_HYPERV
         tristate "Hyper-V PCI Frontend"
         depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64
+	select PCI_HYPERV_MINI
         help
           The PCI device frontend driver allows the kernel to import arbitrary
           PCI devices from a PCI backend to support PCI driver domains.
diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig
index fe9f9f1..8e31cba 100644
--- a/drivers/pci/controller/Kconfig
+++ b/drivers/pci/controller/Kconfig
@@ -281,5 +281,12 @@ config VMD
 	  To compile this driver as a module, choose M here: the
 	  module will be called vmd.
 
+config PCI_HYPERV_MINI
+	tristate "Hyper-V PCI Mini"
+	depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64
+	help
+	  The Hyper-V PCI Mini is a helper driver allows other drivers to
+	  have a common interface with the Hyper-V PCI frontend driver.
+
 source "drivers/pci/controller/dwc/Kconfig"
 endmenu
diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile
index d56a507..77e0132 100644
--- a/drivers/pci/controller/Makefile
+++ b/drivers/pci/controller/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_PCIE_CADENCE_HOST) += pcie-cadence-host.o
 obj-$(CONFIG_PCIE_CADENCE_EP) += pcie-cadence-ep.o
 obj-$(CONFIG_PCI_FTPCI100) += pci-ftpci100.o
 obj-$(CONFIG_PCI_HYPERV) += pci-hyperv.o
+obj-$(CONFIG_PCI_HYPERV_MINI) += pci-hyperv-mini.o
 obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o
 obj-$(CONFIG_PCI_AARDVARK) += pci-aardvark.o
 obj-$(CONFIG_PCI_TEGRA) += pci-tegra.o
diff --git a/drivers/pci/controller/pci-hyperv-mini.c b/drivers/pci/controller/pci-hyperv-mini.c
new file mode 100644
index 0000000..9b6cd1c
--- /dev/null
+++ b/drivers/pci/controller/pci-hyperv-mini.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) Microsoft Corporation.
+ *
+ * Author:
+ *   Haiyang Zhang <haiyangz@microsoft.com>
+ *
+ * This mini driver is a helper driver allows other drivers to
+ * have a common interface with the Hyper-V PCI frontend driver.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/hyperv.h>
+
+struct hyperv_pci_block_ops hvpci_block_ops;
+EXPORT_SYMBOL(hvpci_block_ops);
+
+int hyperv_read_cfg_blk(struct pci_dev *dev, void *buf, unsigned int buf_len,
+			unsigned int block_id, unsigned int *bytes_returned)
+{
+	if (!hvpci_block_ops.read_block)
+		return -EOPNOTSUPP;
+
+	return hvpci_block_ops.read_block(dev, buf, buf_len, block_id,
+					  bytes_returned);
+}
+EXPORT_SYMBOL(hyperv_read_cfg_blk);
+
+int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len,
+			 unsigned int block_id)
+{
+	if (!hvpci_block_ops.write_block)
+		return -EOPNOTSUPP;
+
+	return hvpci_block_ops.write_block(dev, buf, len, block_id);
+}
+EXPORT_SYMBOL(hyperv_write_cfg_blk);
+
+int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
+				void (*block_invalidate)(void *context,
+							 u64 block_mask))
+{
+	if (!hvpci_block_ops.reg_blk_invalidate)
+		return -EOPNOTSUPP;
+
+	return hvpci_block_ops.reg_blk_invalidate(dev, context,
+						  block_invalidate);
+}
+EXPORT_SYMBOL(hyperv_reg_block_invalidate);
+
+static void __exit exit_hv_pci_mini(void)
+{
+	pr_info("unloaded\n");
+}
+
+static int __init init_hv_pci_mini(void)
+{
+	pr_info("loaded\n");
+
+	return 0;
+}
+
+module_init(init_hv_pci_mini);
+module_exit(exit_hv_pci_mini);
+
+MODULE_DESCRIPTION("Hyper-V PCI Mini");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 57adeca..9c93ac2 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -983,7 +983,6 @@ int hv_read_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
 	*bytes_returned = comp_pkt.bytes_returned;
 	return 0;
 }
-EXPORT_SYMBOL(hv_read_config_block);
 
 /**
  * hv_pci_write_config_compl() - Invoked when a response packet for a write
@@ -1070,7 +1069,6 @@ int hv_write_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
 
 	return 0;
 }
-EXPORT_SYMBOL(hv_write_config_block);
 
 /**
  * hv_register_block_invalidate() - Invoked when a config block invalidation
@@ -1101,7 +1099,6 @@ int hv_register_block_invalidate(struct pci_dev *pdev, void *context,
 	return 0;
 
 }
-EXPORT_SYMBOL(hv_register_block_invalidate);
 
 /* Interrupt management hooks */
 static void hv_int_desc_free(struct hv_pci_dev *hpdev,
@@ -3045,10 +3042,19 @@ static int hv_pci_remove(struct hv_device *hdev)
 static void __exit exit_hv_pci_drv(void)
 {
 	vmbus_driver_unregister(&hv_pci_drv);
+
+	hvpci_block_ops.read_block = NULL;
+	hvpci_block_ops.write_block = NULL;
+	hvpci_block_ops.reg_blk_invalidate = NULL;
 }
 
 static int __init init_hv_pci_drv(void)
 {
+	/* Initialize PCI block r/w interface */
+	hvpci_block_ops.read_block = hv_read_config_block;
+	hvpci_block_ops.write_block = hv_write_config_block;
+	hvpci_block_ops.reg_blk_invalidate = hv_register_block_invalidate;
+
 	return vmbus_driver_register(&hv_pci_drv);
 }
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 9d37f8c..2afe6fd 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1579,18 +1579,32 @@ struct vmpacket_descriptor *
 	    pkt = hv_pkt_iter_next(channel, pkt))
 
 /*
- * Functions for passing data between SR-IOV PF and VF drivers.  The VF driver
+ * Interface for passing data between SR-IOV PF and VF drivers. The VF driver
  * sends requests to read and write blocks. Each block must be 128 bytes or
  * smaller. Optionally, the VF driver can register a callback function which
  * will be invoked when the host says that one or more of the first 64 block
  * IDs is "invalid" which means that the VF driver should reread them.
  */
 #define HV_CONFIG_BLOCK_SIZE_MAX 128
-int hv_read_config_block(struct pci_dev *dev, void *buf, unsigned int buf_len,
-			 unsigned int block_id, unsigned int *bytes_returned);
-int hv_write_config_block(struct pci_dev *dev, void *buf, unsigned int len,
-			  unsigned int block_id);
-int hv_register_block_invalidate(struct pci_dev *dev, void *context,
-				 void (*block_invalidate)(void *context,
-							  u64 block_mask));
+
+int hyperv_read_cfg_blk(struct pci_dev *dev, void *buf, unsigned int buf_len,
+			unsigned int block_id, unsigned int *bytes_returned);
+int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len,
+			 unsigned int block_id);
+int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
+				void (*block_invalidate)(void *context,
+							 u64 block_mask));
+
+struct hyperv_pci_block_ops {
+	int (*read_block)(struct pci_dev *dev, void *buf, unsigned int buf_len,
+			  unsigned int block_id, unsigned int *bytes_returned);
+	int (*write_block)(struct pci_dev *dev, void *buf, unsigned int len,
+			   unsigned int block_id);
+	int (*reg_blk_invalidate)(struct pci_dev *dev, void *context,
+				  void (*block_invalidate)(void *context,
+							   u64 block_mask));
+};
+
+extern struct hyperv_pci_block_ops hvpci_block_ops;
+
 #endif /* _HYPERV_H */
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH net-next, 3/6] net/mlx5: Add wrappers for HyperV PCIe operations
From: Haiyang Zhang @ 2019-08-14 19:08 UTC (permalink / raw)
  To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
	leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
	bhelgaas@google.com, linux-pci@vger.kernel.org,
	linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
  Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
	linux-kernel@vger.kernel.org
In-Reply-To: <1565809632-39138-1-git-send-email-haiyangz@microsoft.com>

From: Eran Ben Elisha <eranbe@mellanox.com>

Add wrapper functions for HyperV PCIe read / write /
block_invalidate_register operations.  This will be used as an
infrastructure in the downstream patch for software communication.

This will be enabled by default if CONFIG_PCI_HYPERV_MINI is set.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c | 64 ++++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h | 22 ++++++++
 3 files changed, 87 insertions(+)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 8b7edaa..a8950b1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -45,6 +45,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offlo
 mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
 mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
 mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
+mlx5_core-$(CONFIG_PCI_HYPERV_MINI)     += lib/hv.o
 
 #
 # Ipoib netdev
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
new file mode 100644
index 0000000..cf08d02
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/hyperv.h>
+#include "mlx5_core.h"
+#include "lib/hv.h"
+
+static int mlx5_hv_config_common(struct mlx5_core_dev *dev, void *buf, int len,
+				 int offset, bool read)
+{
+	int rc = -EOPNOTSUPP;
+	int bytes_returned;
+	int block_id;
+
+	if (offset % HV_CONFIG_BLOCK_SIZE_MAX || len % HV_CONFIG_BLOCK_SIZE_MAX)
+		return -EINVAL;
+
+	block_id = offset / HV_CONFIG_BLOCK_SIZE_MAX;
+
+	rc = read ?
+	     hyperv_read_cfg_blk(dev->pdev, buf,
+				 HV_CONFIG_BLOCK_SIZE_MAX, block_id,
+				 &bytes_returned) :
+	     hyperv_write_cfg_blk(dev->pdev, buf,
+				  HV_CONFIG_BLOCK_SIZE_MAX, block_id);
+
+	/* Make sure len bytes were read successfully  */
+	if (read)
+		rc |= !(len == bytes_returned);
+
+	if (rc) {
+		mlx5_core_err(dev, "Failed to %s hv config, err = %d, len = %d, offset = %d\n",
+			      read ? "read" : "write", rc, len,
+			      offset);
+		return rc;
+	}
+
+	return 0;
+}
+
+int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
+			int offset)
+{
+	return mlx5_hv_config_common(dev, buf, len, offset, true);
+}
+
+int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
+			 int offset)
+{
+	return mlx5_hv_config_common(dev, buf, len, offset, false);
+}
+
+int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
+				void (*block_invalidate)(void *context,
+							 u64 block_mask))
+{
+	return hyperv_reg_block_invalidate(dev->pdev, context,
+					   block_invalidate);
+}
+
+void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev)
+{
+	hyperv_reg_block_invalidate(dev->pdev, NULL, NULL);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
new file mode 100644
index 0000000..7f69771
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __LIB_HV_H__
+#define __LIB_HV_H__
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_MINI)
+
+#include <linux/hyperv.h>
+#include <linux/mlx5/driver.h>
+
+int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
+			int offset);
+int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
+			 int offset);
+int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
+				void (*block_invalidate)(void *context,
+							 u64 block_mask));
+void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev);
+#endif
+
+#endif /* __LIB_HV_H__ */
-- 
1.8.3.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox