* [PATCH AUTOSEL 5.2 44/44] tools: hv: fix KVP and VSS daemons exit code
From: Sasha Levin @ 2019-08-20 13:40 UTC (permalink / raw)
To: linux-kernel, stable
Cc: Adrian Vladu, K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger,
Sasha Levin, Alessandro Pilotti, linux-hyperv
In-Reply-To: <20190820134028.10829-1-sashal@kernel.org>
From: Adrian Vladu <avladu@cloudbasesolutions.com>
[ Upstream commit b0995156071b0ff29a5902964a9dc8cfad6f81c0 ]
HyperV KVP and VSS daemons should exit with 0 when the '--help'
or '-h' flags are used.
Signed-off-by: Adrian Vladu <avladu@cloudbasesolutions.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Sasha Levin <sashal@kernel.org>
Cc: Alessandro Pilotti <apilotti@cloudbasesolutions.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
tools/hv/hv_kvp_daemon.c | 2 ++
tools/hv/hv_vss_daemon.c | 2 ++
2 files changed, 4 insertions(+)
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index d7e06fe0270ee..0ce50c319cfd6 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -1386,6 +1386,8 @@ int main(int argc, char *argv[])
daemonize = 0;
break;
case 'h':
+ print_usage(argv);
+ exit(0);
default:
print_usage(argv);
exit(EXIT_FAILURE);
diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c
index efe1e34dd91b4..8f813f5233d48 100644
--- a/tools/hv/hv_vss_daemon.c
+++ b/tools/hv/hv_vss_daemon.c
@@ -218,6 +218,8 @@ int main(int argc, char *argv[])
daemonize = 0;
break;
case 'h':
+ print_usage(argv);
+ exit(0);
default:
print_usage(argv);
exit(EXIT_FAILURE);
--
2.20.1
^ permalink raw reply related
* [PATCH AUTOSEL 5.2 43/44] tools: hv: fixed Python pep8/flake8 warnings for lsvmbus
From: Sasha Levin @ 2019-08-20 13:40 UTC (permalink / raw)
To: linux-kernel, stable
Cc: Adrian Vladu, K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger,
Sasha Levin, Dexuan Cui, Alessandro Pilotti, linux-hyperv
In-Reply-To: <20190820134028.10829-1-sashal@kernel.org>
From: Adrian Vladu <avladu@cloudbasesolutions.com>
[ Upstream commit 5912e791f3018de0a007c8cfa9cb38c97d3e5f5c ]
Fixed pep8/flake8 python style code for lsvmbus tool.
The TAB indentation was on purpose ignored (pep8 rule W191) to make
sure the code is complying with the Linux code guideline.
The following command doe not show any warnings now:
pep8 --ignore=W191 lsvmbus
flake8 --ignore=W191 lsvmbus
Signed-off-by: Adrian Vladu <avladu@cloudbasesolutions.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Sasha Levin <sashal@kernel.org>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Alessandro Pilotti <apilotti@cloudbasesolutions.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
tools/hv/lsvmbus | 75 +++++++++++++++++++++++++++---------------------
1 file changed, 42 insertions(+), 33 deletions(-)
diff --git a/tools/hv/lsvmbus b/tools/hv/lsvmbus
index 55e7374bade0d..099f2c44dbed2 100644
--- a/tools/hv/lsvmbus
+++ b/tools/hv/lsvmbus
@@ -4,10 +4,10 @@
import os
from optparse import OptionParser
+help_msg = "print verbose messages. Try -vv, -vvv for more verbose messages"
parser = OptionParser()
-parser.add_option("-v", "--verbose", dest="verbose",
- help="print verbose messages. Try -vv, -vvv for \
- more verbose messages", action="count")
+parser.add_option(
+ "-v", "--verbose", dest="verbose", help=help_msg, action="count")
(options, args) = parser.parse_args()
@@ -21,27 +21,28 @@ if not os.path.isdir(vmbus_sys_path):
exit(-1)
vmbus_dev_dict = {
- '{0e0b6031-5213-4934-818b-38d90ced39db}' : '[Operating system shutdown]',
- '{9527e630-d0ae-497b-adce-e80ab0175caf}' : '[Time Synchronization]',
- '{57164f39-9115-4e78-ab55-382f3bd5422d}' : '[Heartbeat]',
- '{a9a0f4e7-5a45-4d96-b827-8a841e8c03e6}' : '[Data Exchange]',
- '{35fa2e29-ea23-4236-96ae-3a6ebacba440}' : '[Backup (volume checkpoint)]',
- '{34d14be3-dee4-41c8-9ae7-6b174977c192}' : '[Guest services]',
- '{525074dc-8985-46e2-8057-a307dc18a502}' : '[Dynamic Memory]',
- '{cfa8b69e-5b4a-4cc0-b98b-8ba1a1f3f95a}' : 'Synthetic mouse',
- '{f912ad6d-2b17-48ea-bd65-f927a61c7684}' : 'Synthetic keyboard',
- '{da0a7802-e377-4aac-8e77-0558eb1073f8}' : 'Synthetic framebuffer adapter',
- '{f8615163-df3e-46c5-913f-f2d2f965ed0e}' : 'Synthetic network adapter',
- '{32412632-86cb-44a2-9b5c-50d1417354f5}' : 'Synthetic IDE Controller',
- '{ba6163d9-04a1-4d29-b605-72e2ffb1dc7f}' : 'Synthetic SCSI Controller',
- '{2f9bcc4a-0069-4af3-b76b-6fd0be528cda}' : 'Synthetic fiber channel adapter',
- '{8c2eaf3d-32a7-4b09-ab99-bd1f1c86b501}' : 'Synthetic RDMA adapter',
- '{44c4f61d-4444-4400-9d52-802e27ede19f}' : 'PCI Express pass-through',
- '{276aacf4-ac15-426c-98dd-7521ad3f01fe}' : '[Reserved system device]',
- '{f8e65716-3cb3-4a06-9a60-1889c5cccab5}' : '[Reserved system device]',
- '{3375baf4-9e15-4b30-b765-67acb10d607b}' : '[Reserved system device]',
+ '{0e0b6031-5213-4934-818b-38d90ced39db}': '[Operating system shutdown]',
+ '{9527e630-d0ae-497b-adce-e80ab0175caf}': '[Time Synchronization]',
+ '{57164f39-9115-4e78-ab55-382f3bd5422d}': '[Heartbeat]',
+ '{a9a0f4e7-5a45-4d96-b827-8a841e8c03e6}': '[Data Exchange]',
+ '{35fa2e29-ea23-4236-96ae-3a6ebacba440}': '[Backup (volume checkpoint)]',
+ '{34d14be3-dee4-41c8-9ae7-6b174977c192}': '[Guest services]',
+ '{525074dc-8985-46e2-8057-a307dc18a502}': '[Dynamic Memory]',
+ '{cfa8b69e-5b4a-4cc0-b98b-8ba1a1f3f95a}': 'Synthetic mouse',
+ '{f912ad6d-2b17-48ea-bd65-f927a61c7684}': 'Synthetic keyboard',
+ '{da0a7802-e377-4aac-8e77-0558eb1073f8}': 'Synthetic framebuffer adapter',
+ '{f8615163-df3e-46c5-913f-f2d2f965ed0e}': 'Synthetic network adapter',
+ '{32412632-86cb-44a2-9b5c-50d1417354f5}': 'Synthetic IDE Controller',
+ '{ba6163d9-04a1-4d29-b605-72e2ffb1dc7f}': 'Synthetic SCSI Controller',
+ '{2f9bcc4a-0069-4af3-b76b-6fd0be528cda}': 'Synthetic fiber channel adapter',
+ '{8c2eaf3d-32a7-4b09-ab99-bd1f1c86b501}': 'Synthetic RDMA adapter',
+ '{44c4f61d-4444-4400-9d52-802e27ede19f}': 'PCI Express pass-through',
+ '{276aacf4-ac15-426c-98dd-7521ad3f01fe}': '[Reserved system device]',
+ '{f8e65716-3cb3-4a06-9a60-1889c5cccab5}': '[Reserved system device]',
+ '{3375baf4-9e15-4b30-b765-67acb10d607b}': '[Reserved system device]',
}
+
def get_vmbus_dev_attr(dev_name, attr):
try:
f = open('%s/%s/%s' % (vmbus_sys_path, dev_name, attr), 'r')
@@ -52,6 +53,7 @@ def get_vmbus_dev_attr(dev_name, attr):
return lines
+
class VMBus_Dev:
pass
@@ -66,12 +68,13 @@ for f in os.listdir(vmbus_sys_path):
chn_vp_mapping = get_vmbus_dev_attr(f, 'channel_vp_mapping')
chn_vp_mapping = [c.strip() for c in chn_vp_mapping]
- chn_vp_mapping = sorted(chn_vp_mapping,
- key = lambda c : int(c.split(':')[0]))
+ chn_vp_mapping = sorted(
+ chn_vp_mapping, key=lambda c: int(c.split(':')[0]))
- chn_vp_mapping = ['\tRel_ID=%s, target_cpu=%s' %
- (c.split(':')[0], c.split(':')[1])
- for c in chn_vp_mapping]
+ chn_vp_mapping = [
+ '\tRel_ID=%s, target_cpu=%s' %
+ (c.split(':')[0], c.split(':')[1]) for c in chn_vp_mapping
+ ]
d = VMBus_Dev()
d.sysfs_path = '%s/%s' % (vmbus_sys_path, f)
d.vmbus_id = vmbus_id
@@ -85,7 +88,7 @@ for f in os.listdir(vmbus_sys_path):
vmbus_dev_list.append(d)
-vmbus_dev_list = sorted(vmbus_dev_list, key = lambda d : int(d.vmbus_id))
+vmbus_dev_list = sorted(vmbus_dev_list, key=lambda d: int(d.vmbus_id))
format0 = '%2s: %s'
format1 = '%2s: Class_ID = %s - %s\n%s'
@@ -95,9 +98,15 @@ for d in vmbus_dev_list:
if verbose == 0:
print(('VMBUS ID ' + format0) % (d.vmbus_id, d.dev_desc))
elif verbose == 1:
- print (('VMBUS ID ' + format1) % \
- (d.vmbus_id, d.class_id, d.dev_desc, d.chn_vp_mapping))
+ print(
+ ('VMBUS ID ' + format1) %
+ (d.vmbus_id, d.class_id, d.dev_desc, d.chn_vp_mapping)
+ )
else:
- print (('VMBUS ID ' + format2) % \
- (d.vmbus_id, d.class_id, d.dev_desc, \
- d.device_id, d.sysfs_path, d.chn_vp_mapping))
+ print(
+ ('VMBUS ID ' + format2) %
+ (
+ d.vmbus_id, d.class_id, d.dev_desc,
+ d.device_id, d.sysfs_path, d.chn_vp_mapping
+ )
+ )
--
2.20.1
^ permalink raw reply related
* RE: [PATCH 0/2] clocksource/Hyper-V: Add Hyper-V specific sched clock function
From: Michael Kelley @ 2019-08-20 14:32 UTC (permalink / raw)
To: vkuznets, Tianyu Lan
Cc: Peter Zijlstra, Tianyu Lan, linux-arch@vger.kernel.org,
linux-hyperv@vger.kernel.org, linux-kernel@vger kernel org,
Andy Lutomirski, Thomas Gleixner, Ingo Molnar, Borislav Petkov,
H. Peter Anvin, the arch/x86 maintainers, KY Srinivasan,
Haiyang Zhang, Stephen Hemminger, Sasha Levin, Daniel Lezcano,
Arnd Bergmann, ashal@kernel.org
In-Reply-To: <87sgq5a2hq.fsf@vitty.brq.redhat.com>
From: Vitaly Kuznetsov <vkuznets@redhat.com> Sent: Tuesday, August 13, 2019 1:34 AM
>
> Michael Kelley <mikelley@microsoft.com> writes:
>
> > From: Tianyu Lan <lantianyu1986@gmail.com> Sent: Tuesday, July 30, 2019 6:41 AM
> >>
> >> On Mon, Jul 29, 2019 at 8:13 PM Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
> >> >
> >> > Peter Zijlstra <peterz@infradead.org> writes:
> >> >
> >> > > On Mon, Jul 29, 2019 at 12:59:26PM +0200, Vitaly Kuznetsov wrote:
> >> > >> lantianyu1986@gmail.com writes:
> >> > >>
> >> > >> > From: Tianyu Lan <Tianyu.Lan@microsoft.com>
> >> > >> >
> >> > >> > Hyper-V guests use the default native_sched_clock() in pv_ops.time.sched_clock
> >> > >> > on x86. But native_sched_clock() directly uses the raw TSC value, which
> >> > >> > can be discontinuous in a Hyper-V VM. Add the generic hv_setup_sched_clock()
> >> > >> > to set the sched clock function appropriately. On x86, this sets
> >> > >> > pv_ops.time.sched_clock to read the Hyper-V reference TSC value that is
> >> > >> > scaled and adjusted to be continuous.
> >> > >>
> >> > >> Hypervisor can, in theory, disable TSC page and then we're forced to use
> >> > >> MSR-based clocksource but using it as sched_clock() can be very slow,
> >> > >> I'm afraid.
> >> > >>
> >> > >> On the other hand, what we have now is probably worse: TSC can,
> >> > >> actually, jump backwards (e.g. on migration) and we're breaking the
> >> > >> requirements for sched_clock().
> >> > >
> >> > > That (obviously) also breaks the requirements for using TSC as
> >> > > clocksource.
> >> > >
> >> > > IOW, it breaks the entire purpose of having TSC in the first place.
> >> >
> >> > Currently, we mark raw TSC as unstable when running on Hyper-V (see
> >> > 88c9281a9fba6), 'TSC page' (which is TSC * scale + offset) is being used
> >> > instead. The problem is that 'TSC page' can be disabled by the
> >> > hypervisor and in that case the only remaining clocksource is MSR-based
> >> > (slow).
> >> >
> >>
> >> Yes, that will be slow if Hyper-V doesn't expose hv tsc page and
> >> kernel uses MSR based
> >> clocksource. Each MSR read will trigger one VM-EXIT. This also happens on other
> >> hypervisors (e,g, KVM doesn't expose KVM clock). Hypervisor should
> >> take this into
> >> account and determine which clocksource should be exposed or not.
> >>
> >
> > We've confirmed with the Hyper-V team that the TSC page is always available
> > on Hyper-V 2016 and later, and on Hyper-V 2012 R2 when the physical
> > hardware presents an InvariantTSC.
>
> Currently we check that TSC page is valid on every read and it seems
> this is redundant, right? It is either available on boot or not. I can
> only imagine migrating a VM to a non-InvariantTSC host when Hyper-V will
> likely disable the page (and we can get reenlightenment notification
> then).
I think Hyper-V can have brief intervals when the TSC page is not valid, so
the code checks for the "sequence" value being zero. Otherwise, yes, it
should always be there or not be there. Is there some other validity
check on every read that you are thinking of?
>
> > But the Linux Kconfig's are set up so
> > the TSC page is not used for 32-bit guests -- all clock reads are synthetic MSR
> > reads. For 32-bit, this set of changes will add more overhead because the
> > sched clock reads will now be MSR reads.
> >
> > I would be inclined to fix the problem, even with the perf hit on 32-bit Linux.
> > I don’t have any data on 32-bit Linux being used in a Hyper-V guest, but it's not
> > supported in Azure so usage is pretty small. The alternative would be to continue
> > to use the raw TSC value on 32-bit, even with the risk of a discontinuity in case of
> > live migration or similar scenarios.
>
> The issue needs fixing, I agree, however using MSR based clocksource as
> sched clock may give us too big of a performance hit (not sure who cares
> about 32 bit guest performance nowadays but still). What stops us from
> enabling TSC page for 32 bit guests if it is available?
I talked to KY Srinivasan for any history about TSC page on 32-bit. He said
there was no technical reason not to implement it, but our focus was always
64-bit Linux, so the 32-bit was much less important. Also, on 32-bit Linux,
the required 64x64 multiply and shift is more complex and takes more
more cycles (compare 32-bit implementation of mul_u64_u64_shr vs.
the 64-bit implementation), so the win over a MSR read is less. I
don't know of any actual measurements being made to compare vs.
MSR read.
Michael
>
> --
> Vitaly
^ permalink raw reply
* Re: [PATCH] Drivers: hv: vmbus: Remove the unused "tsc_page" from struct hv_context
From: Sasha Levin @ 2019-08-20 15:28 UTC (permalink / raw)
To: Dexuan Cui
Cc: linux-hyperv@vger.kernel.org, Stephen Hemminger, Sasha Levin,
Haiyang Zhang, KY Srinivasan, Michael Kelley,
gregkh@linuxfoundation.org, linux-kernel@vger.kernel.org
In-Reply-To: <1566270393-28009-1-git-send-email-decui@microsoft.com>
On Tue, Aug 20, 2019 at 03:06:40AM +0000, Dexuan Cui wrote:
>This field is no longer used after the commit
>63ed4e0c67df ("Drivers: hv: vmbus: Consolidate all Hyper-V specific clocksource code")
>, because it's replaced by the global variable
>"struct ms_hyperv_tsc_page *tsc_pg;" (now, the variable is in
>drivers/clocksource/hyperv_timer.c).
>
>Fixes: 63ed4e0c67df ("Drivers: hv: vmbus: Consolidate all Hyper-V specific clocksource code")
>Signed-off-by: Dexuan Cui <decui@microsoft.com>
Queued up for hyperv-fixes, thank you.
--
Thanks,
Sasha
^ permalink raw reply
* Re: [PATCH] Input: hyperv-keyboard: Use in-place iterator API in the channel callback
From: Sasha Levin @ 2019-08-20 15:29 UTC (permalink / raw)
To: dmitry.torokhov@gmail.com
Cc: Dexuan Cui, linux-input@vger.kernel.org,
linux-hyperv@vger.kernel.org, Stephen Hemminger, Sasha Levin,
Haiyang Zhang, KY Srinivasan, Michael Kelley,
gregkh@linuxfoundation.org, linux-kernel@vger.kernel.org
In-Reply-To: <20190820031805.GO121898@dtor-ws>
On Mon, Aug 19, 2019 at 08:18:05PM -0700, dmitry.torokhov@gmail.com wrote:
>On Tue, Aug 20, 2019 at 03:01:23AM +0000, Dexuan Cui wrote:
>> Simplify the ring buffer handling with the in-place API.
>>
>> Also avoid the dynamic allocation and the memory leak in the channel
>> callback function.
>>
>> Signed-off-by: Dexuan Cui <decui@microsoft.com>
>> ---
>>
>> Hi Dmitry, can this patch go through Sasha's hyperv tree:
>> https://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git
>>
>> This is a purely Hyper-V specific change.
>
>Sure, no problem.
>
>Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Queued up for hyperv-fixes, thank you.
--
Thanks,
Sasha
^ permalink raw reply
* Re: [PATCH] Tools: hv: kvp: eliminate 'may be used uninitialized' warning
From: Sasha Levin @ 2019-08-20 15:30 UTC (permalink / raw)
To: Vitaly Kuznetsov
Cc: linux-hyperv, K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger,
linux-kernel
In-Reply-To: <20190819144409.13349-1-vkuznets@redhat.com>
On Mon, Aug 19, 2019 at 04:44:09PM +0200, Vitaly Kuznetsov wrote:
>When building hv_kvp_daemon GCC-8.3 complains:
>
>hv_kvp_daemon.c: In function ‘kvp_get_ip_info.constprop’:
>hv_kvp_daemon.c:812:30: warning: ‘ip_buffer’ may be used uninitialized in this function [-Wmaybe-uninitialized]
> struct hv_kvp_ipaddr_value *ip_buffer;
>
>this seems to be a false positive: we only use ip_buffer when
>op == KVP_OP_GET_IP_INFO and it is only unset when op == KVP_OP_ENUMERATE.
>
>Silence the warning by initializing ip_buffer to NULL.
>
>Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Queued up for hyperv-fixes, thank you.
--
Thanks,
Sasha
^ permalink raw reply
* Re: [PATCH v2] Tools: hv: move to tools buildsystem
From: Sasha Levin @ 2019-08-20 15:32 UTC (permalink / raw)
To: Vitaly Kuznetsov
Cc: Andy Shevchenko, K. Y. Srinivasan, Haiyang Zhang,
Stephen Hemminger, linux-hyperv
In-Reply-To: <87pnl16ypo.fsf@vitty.brq.redhat.com>
On Mon, Aug 19, 2019 at 04:01:39PM +0200, Vitaly Kuznetsov wrote:
>Andy Shevchenko <andriy.shevchenko@linux.intel.com> writes:
>
>> There is a nice buildsystem dedicated for userspace tools in Linux kernel tree.
>> Switch Hyper-V daemons to be built by it.
>>
>> Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
>> Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
>
>Tested-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Queued up for hyperv-next, thank you.
--
Thanks,
Sasha
^ permalink raw reply
* Re: [PATCH] Drivers: hv: vmbus: Fix virt_to_hvpfn() for X86_PAE
From: Juliana Rodrigueiro @ 2019-08-20 15:51 UTC (permalink / raw)
To: Sasha Levin; +Cc: linux-hyperv@vger.kernel.org
In-Reply-To: <20190509010600.GQ1747@sasha-vm>
Hi Sasha.
I haven't spotted the following patch in the hyperv-fixes branch.
Did I look in the wrong place or it was not applied?
Thanks,
Juliana.
On 5/9/19 3:06 AM, Sasha Levin wrote:
> On Tue, May 07, 2019 at 12:51:51PM +0000, Michael Kelley wrote:
>> From: Dexuan Cui <decui@microsoft.com> Sent: Tuesday, May 7, 2019
>> 12:47 AM
>>>
>>> In the case of X86_PAE, unsigned long is u32, but the physical
>>> address type
>>> should be u64. Due to the bug here, the netvsc driver can not load
>>> successfully, and sometimes the VM can panic due to memory corruption
>>> (the
>>> hypervisor writes data to the wrong location).
>>>
>>> Fixes: 6ba34171bcbd ("Drivers: hv: vmbus: Remove use of
>>> slow_virt_to_phys()")
>>> Cc: stable@vger.kernel.org
>>> Cc: Michael Kelley <mikelley@microsoft.com>
>>> Reported-and-tested-by: Juliana Rodrigueiro
>>> <juliana.rodrigueiro@intra2net.com>
>>> Signed-off-by: Dexuan Cui <decui@microsoft.com>
>>
>> Reviewed-by: Michael Kelley <mikelley@microsoft.com>
>
> Queued for hyperv-fixes, thanks!
>
> --
> Thanks,
> Sasha
^ permalink raw reply
* Re: [PATCH] Drivers: hv: vmbus: Fix virt_to_hvpfn() for X86_PAE
From: Sasha Levin @ 2019-08-20 16:51 UTC (permalink / raw)
To: Juliana Rodrigueiro; +Cc: linux-hyperv@vger.kernel.org
In-Reply-To: <06e0ae5e-2fb0-5dac-a1a5-5583fdda334f@intra2net.com>
On Tue, Aug 20, 2019 at 05:51:39PM +0200, Juliana Rodrigueiro wrote:
>Hi Sasha.
>
>I haven't spotted the following patch in the hyperv-fixes branch.
>
>Did I look in the wrong place or it was not applied?
I've previously dropped it, it's in there now.
--
Thanks,
Sasha
^ permalink raw reply
* Re: [PATCH v6,1/2] PCI: hv: Detect and fix Hyper-V PCI domain number collision
From: Lorenzo Pieralisi @ 2019-08-20 16:58 UTC (permalink / raw)
To: Haiyang Zhang
Cc: sashal@kernel.org, bhelgaas@google.com,
linux-hyperv@vger.kernel.org, linux-pci@vger.kernel.org,
KY Srinivasan, Stephen Hemminger, olaf@aepfle.de, vkuznets,
linux-kernel@vger.kernel.org
In-Reply-To: <1565888460-38694-1-git-send-email-haiyangz@microsoft.com>
On Thu, Aug 15, 2019 at 05:01:37PM +0000, Haiyang Zhang wrote:
> Currently in Azure cloud, for passthrough devices, the host sets the device
> instance ID's bytes 8 - 15 to a value derived from the host HWID, which is
> the same on all devices in a VM. So, the device instance ID's bytes 8 and 9
> provided by the host are no longer unique. This affects all Azure hosts
> since July 2018, and can cause device passthrough to VMs to fail because
> the bytes 8 and 9 are used as PCI domain number. Collision of domain
> numbers will cause the second device with the same domain number fail to
> load.
>
> In the cases of collision, we will detect and find another number that is
> not in use.
>
> Suggested-by: Michael Kelley <mikelley@microsoft.com>
> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
> Acked-by: Sasha Levin <sashal@kernel.org>
> ---
> drivers/pci/controller/pci-hyperv.c | 92 +++++++++++++++++++++++++++++++------
> 1 file changed, 79 insertions(+), 13 deletions(-)
I have applied both patches to pci/hv for v5.4.
Thanks,
Lorenzo
> diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
> index 40b6254..31b8fd5 100644
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -2510,6 +2510,48 @@ static void put_hvpcibus(struct hv_pcibus_device *hbus)
> complete(&hbus->remove_event);
> }
>
> +#define HVPCI_DOM_MAP_SIZE (64 * 1024)
> +static DECLARE_BITMAP(hvpci_dom_map, HVPCI_DOM_MAP_SIZE);
> +
> +/*
> + * PCI domain number 0 is used by emulated devices on Gen1 VMs, so define 0
> + * as invalid for passthrough PCI devices of this driver.
> + */
> +#define HVPCI_DOM_INVALID 0
> +
> +/**
> + * hv_get_dom_num() - Get a valid PCI domain number
> + * Check if the PCI domain number is in use, and return another number if
> + * it is in use.
> + *
> + * @dom: Requested domain number
> + *
> + * return: domain number on success, HVPCI_DOM_INVALID on failure
> + */
> +static u16 hv_get_dom_num(u16 dom)
> +{
> + unsigned int i;
> +
> + if (test_and_set_bit(dom, hvpci_dom_map) == 0)
> + return dom;
> +
> + for_each_clear_bit(i, hvpci_dom_map, HVPCI_DOM_MAP_SIZE) {
> + if (test_and_set_bit(i, hvpci_dom_map) == 0)
> + return i;
> + }
> +
> + return HVPCI_DOM_INVALID;
> +}
> +
> +/**
> + * hv_put_dom_num() - Mark the PCI domain number as free
> + * @dom: Domain number to be freed
> + */
> +static void hv_put_dom_num(u16 dom)
> +{
> + clear_bit(dom, hvpci_dom_map);
> +}
> +
> /**
> * hv_pci_probe() - New VMBus channel probe, for a root PCI bus
> * @hdev: VMBus's tracking struct for this root PCI bus
> @@ -2521,6 +2563,7 @@ static int hv_pci_probe(struct hv_device *hdev,
> const struct hv_vmbus_device_id *dev_id)
> {
> struct hv_pcibus_device *hbus;
> + u16 dom_req, dom;
> int ret;
>
> /*
> @@ -2535,19 +2578,34 @@ static int hv_pci_probe(struct hv_device *hdev,
> hbus->state = hv_pcibus_init;
>
> /*
> - * The PCI bus "domain" is what is called "segment" in ACPI and
> - * other specs. Pull it from the instance ID, to get something
> - * unique. Bytes 8 and 9 are what is used in Windows guests, so
> - * do the same thing for consistency. Note that, since this code
> - * only runs in a Hyper-V VM, Hyper-V can (and does) guarantee
> - * that (1) the only domain in use for something that looks like
> - * a physical PCI bus (which is actually emulated by the
> - * hypervisor) is domain 0 and (2) there will be no overlap
> - * between domains derived from these instance IDs in the same
> - * VM.
> + * The PCI bus "domain" is what is called "segment" in ACPI and other
> + * specs. Pull it from the instance ID, to get something usually
> + * unique. In rare cases of collision, we will find out another number
> + * not in use.
> + *
> + * Note that, since this code only runs in a Hyper-V VM, Hyper-V
> + * together with this guest driver can guarantee that (1) The only
> + * domain used by Gen1 VMs for something that looks like a physical
> + * PCI bus (which is actually emulated by the hypervisor) is domain 0.
> + * (2) There will be no overlap between domains (after fixing possible
> + * collisions) in the same VM.
> */
> - hbus->sysdata.domain = hdev->dev_instance.b[9] |
> - hdev->dev_instance.b[8] << 8;
> + dom_req = hdev->dev_instance.b[8] << 8 | hdev->dev_instance.b[9];
> + dom = hv_get_dom_num(dom_req);
> +
> + if (dom == HVPCI_DOM_INVALID) {
> + dev_err(&hdev->device,
> + "Unable to use dom# 0x%hx or other numbers", dom_req);
> + ret = -EINVAL;
> + goto free_bus;
> + }
> +
> + if (dom != dom_req)
> + dev_info(&hdev->device,
> + "PCI dom# 0x%hx has collision, using 0x%hx",
> + dom_req, dom);
> +
> + hbus->sysdata.domain = dom;
>
> hbus->hdev = hdev;
> refcount_set(&hbus->remove_lock, 1);
> @@ -2562,7 +2620,7 @@ static int hv_pci_probe(struct hv_device *hdev,
> hbus->sysdata.domain);
> if (!hbus->wq) {
> ret = -ENOMEM;
> - goto free_bus;
> + goto free_dom;
> }
>
> ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
> @@ -2639,6 +2697,8 @@ static int hv_pci_probe(struct hv_device *hdev,
> vmbus_close(hdev->channel);
> destroy_wq:
> destroy_workqueue(hbus->wq);
> +free_dom:
> + hv_put_dom_num(hbus->sysdata.domain);
> free_bus:
> free_page((unsigned long)hbus);
> return ret;
> @@ -2720,6 +2780,9 @@ static int hv_pci_remove(struct hv_device *hdev)
> put_hvpcibus(hbus);
> wait_for_completion(&hbus->remove_event);
> destroy_workqueue(hbus->wq);
> +
> + hv_put_dom_num(hbus->sysdata.domain);
> +
> free_page((unsigned long)hbus);
> return 0;
> }
> @@ -2747,6 +2810,9 @@ static void __exit exit_hv_pci_drv(void)
>
> static int __init init_hv_pci_drv(void)
> {
> + /* Set the invalid domain number's bit, so it will not be used */
> + set_bit(HVPCI_DOM_INVALID, hvpci_dom_map);
> +
> return vmbus_driver_register(&hv_pci_drv);
> }
>
> --
> 1.8.3.1
>
^ permalink raw reply
* Re: [PATCH v2 2/3] drivers: hv: vmbus: add test attributes to debugfs
From: Branden Bonaby @ 2019-08-20 18:17 UTC (permalink / raw)
To: kys, haiyangz, sthemmin, sashal; +Cc: linux-hyperv, linux-kernel
In-Reply-To: <e055f27ffc37a9a6a756a3329a60da608db5a04f.1566266609.git.brandonbonaby94@gmail.com>
On Mon, Aug 19, 2019 at 10:44:48PM -0400, Branden Bonaby wrote:
> Expose the test parameters as part of the debugfs channel attributes.
> We will control the testing state via these attributes.
>
> Signed-off-by: Branden Bonaby <brandonbonaby94@gmail.com>
> ---
> Changes in v2:
> - Move test attributes to debugfs.
> - Wrap test code under #ifdef statements.
> - Add new documentation file under Documentation/ABI/testing.
> - Make commit message reflect the change from from sysfs to debugfs.
>
>
> + if (IS_ERR_OR_NULL(dev_root)) {
> + pr_debug("debugfs_hyperv: %s/%s/ not created\n",
> + TESTING, device);
> + return PTR_ERR(dev_root);
> + }
Whoops, that single IS_ERR_OR_NULL shouldn't be there, it should just
be IS_ERR I'll change and resend the patch.
^ permalink raw reply
* Re: [PATCH net-next,v2 2/6] PCI: hv: Add a Hyper-V PCI interface driver for software backchannel interface
From: David Miller @ 2019-08-20 19:29 UTC (permalink / raw)
To: haiyangz
Cc: sashal, saeedm, leon, eranbe, lorenzo.pieralisi, bhelgaas,
linux-pci, linux-hyperv, netdev, kys, sthemmin, linux-kernel
In-Reply-To: <1566242976-108801-3-git-send-email-haiyangz@microsoft.com>
From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Mon, 19 Aug 2019 19:30:47 +0000
> +static void __exit exit_hv_pci_intf(void)
> +{
> + pr_info("unloaded\n");
> +}
> +
> +static int __init init_hv_pci_intf(void)
> +{
> + pr_info("loaded\n");
> +
Clogging up the logs with useless messages like this is inappropriate.
Please remove these pr_info() calls.
Also, all of these symbols should probably be GPL exported.
^ permalink raw reply
* Re: [PATCH] vsock: Fix a lockdep warning in __vsock_release()
From: David Miller @ 2019-08-20 19:45 UTC (permalink / raw)
To: decui
Cc: jhansen, stefanha, sgarzare, netdev, sthemmin, Alexander.Levin,
sashal, haiyangz, kys, mikelley, linux-hyperv, gregkh,
linux-kernel
In-Reply-To: <1566270830-28981-1-git-send-email-decui@microsoft.com>
From: Dexuan Cui <decui@microsoft.com>
Date: Tue, 20 Aug 2019 03:14:22 +0000
> +static void __vsock_release2(struct sock *sk)
Do not duplicate an entire function just to adjust some aspect of the
lock debugging, please find a cleaner and more minimal way to
implement this fix.
^ permalink raw reply
* RE: [PATCH net-next,v2 2/6] PCI: hv: Add a Hyper-V PCI interface driver for software backchannel interface
From: Haiyang Zhang @ 2019-08-20 23:00 UTC (permalink / raw)
To: David Miller
Cc: sashal@kernel.org, saeedm@mellanox.com, leon@kernel.org,
eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
bhelgaas@google.com, linux-pci@vger.kernel.org,
linux-hyperv@vger.kernel.org, netdev@vger.kernel.org,
KY Srinivasan, Stephen Hemminger, linux-kernel@vger.kernel.org
In-Reply-To: <20190820.122925.1080288470348205792.davem@davemloft.net>
> -----Original Message-----
> From: David Miller <davem@davemloft.net>
> Sent: Tuesday, August 20, 2019 3:29 PM
> To: Haiyang Zhang <haiyangz@microsoft.com>
> Cc: sashal@kernel.org; saeedm@mellanox.com; leon@kernel.org;
> eranbe@mellanox.com; lorenzo.pieralisi@arm.com; bhelgaas@google.com;
> linux-pci@vger.kernel.org; linux-hyperv@vger.kernel.org;
> netdev@vger.kernel.org; KY Srinivasan <kys@microsoft.com>; Stephen
> Hemminger <sthemmin@microsoft.com>; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH net-next,v2 2/6] PCI: hv: Add a Hyper-V PCI interface
> driver for software backchannel interface
>
> From: Haiyang Zhang <haiyangz@microsoft.com>
> Date: Mon, 19 Aug 2019 19:30:47 +0000
>
> > +static void __exit exit_hv_pci_intf(void) {
> > + pr_info("unloaded\n");
> > +}
> > +
> > +static int __init init_hv_pci_intf(void) {
> > + pr_info("loaded\n");
> > +
>
> Clogging up the logs with useless messages like this is inappropriate.
> Please remove these pr_info() calls.
>
> Also, all of these symbols should probably be GPL exported.
I will update the patch -- remove the pr_info, and use EXPORT_SYMBOL_GPL()
for the symbols.
Thanks,
- Haiyang
^ permalink raw reply
* [PATCH v3 0/3] hv: vmbus: add fuzz testing to hv device
From: Branden Bonaby @ 2019-08-20 23:38 UTC (permalink / raw)
To: kys, haiyangz, sthemmin, sashal
Cc: Branden Bonaby, linux-hyperv, linux-kernel
This patchset introduces a testing framework for Hyper-V drivers.
This framework allows us to introduce delays in the packet receive
path on a per-device basis. While the current code only supports
introducing arbitrary delays in the host/guest communication path,
we intend to expand this to support error injection in the future.
changes in v3:
patch 2: change call to IS_ERR_OR_NULL, to IS_ERR.
patch 3: Align python tool to match Linux coding style.
Changes in v2:
Patch 1: As per Vitaly's suggestion, wrapped the test code under an
#ifdef and updated the Kconfig file, so that the test code
will only be used when the config option is set to true.
(default is false).
Updated hyperv_vmbus header to contain new #ifdef with new
new functions for the test code.
Patch 2: Moved code from under sysfs to debugfs and wrapped it under
the new ifdef.
Updated MAINTAINERS file with new debugfs-hyperv file under
the section for hyperv.
Patch 3: Updated testing tool with new debugfs location.
Branden Bonaby (3):
drivers: hv: vmbus: Introduce latency testing
drivers: hv: vmbus: add fuzz test attributes to debugfs
tools: hv: add vmbus testing tool
Documentation/ABI/testing/debugfs-hyperv | 21 ++
MAINTAINERS | 1 +
drivers/hv/Kconfig | 7 +
drivers/hv/connection.c | 3 +
drivers/hv/hyperv_vmbus.h | 20 ++
drivers/hv/ring_buffer.c | 7 +
drivers/hv/vmbus_drv.c | 167 +++++++++++
include/linux/hyperv.h | 21 ++
tools/hv/vmbus_testing | 342 +++++++++++++++++++++++
9 files changed, 589 insertions(+)
create mode 100644 Documentation/ABI/testing/debugfs-hyperv
create mode 100644 tools/hv/vmbus_testing
--
2.17.1
^ permalink raw reply
* [PATCH v3 1/3] drivers: hv: vmbus: Introduce latency testing
From: Branden Bonaby @ 2019-08-20 23:39 UTC (permalink / raw)
To: kys, haiyangz, sthemmin, sashal
Cc: Branden Bonaby, linux-hyperv, linux-kernel
In-Reply-To: <cover.1566340843.git.brandonbonaby94@gmail.com>
Introduce user specified latency in the packet reception path.
Signed-off-by: Branden Bonaby <brandonbonaby94@gmail.com>
---
Changes in v2:
- Add #ifdef in Kconfig file so test code will not interfere
with non-test code.
- Move test code functions for delay to hyperv_vmbus header
file.
- Wrap test code under #ifdef statement.
drivers/hv/Kconfig | 7 +++++++
drivers/hv/connection.c | 3 +++
drivers/hv/hyperv_vmbus.h | 20 ++++++++++++++++++++
drivers/hv/ring_buffer.c | 7 +++++++
include/linux/hyperv.h | 21 +++++++++++++++++++++
5 files changed, 58 insertions(+)
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index 9a59957922d4..d97437ba0626 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -29,4 +29,11 @@ config HYPERV_BALLOON
help
Select this option to enable Hyper-V Balloon driver.
+config HYPERV_TESTING
+ bool "Hyper-V testing"
+ default n
+ depends on HYPERV && DEBUG_FS
+ help
+ Select this option to enable Hyper-V vmbus testing.
+
endmenu
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 09829e15d4a0..c9c63a4033cd 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -357,6 +357,9 @@ void vmbus_on_event(unsigned long data)
trace_vmbus_on_event(channel);
+#ifdef CONFIG_HYPERV_TESTING
+ hv_debug_delay_test(channel, INTERRUPT_DELAY);
+#endif /* CONFIG_HYPERV_TESTING */
do {
void (*callback_fn)(void *);
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 362e70e9d145..edf14f596d8c 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -357,4 +357,24 @@ enum hvutil_device_state {
HVUTIL_DEVICE_DYING, /* driver unload is in progress */
};
+#ifdef CONFIG_HYPERV_TESTING
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#define TESTING "hyperv"
+
+enum delay {
+ INTERRUPT_DELAY = 0,
+ MESSAGE_DELAY = 1,
+};
+
+int hv_debug_delay_files(struct hv_device *dev, struct dentry *root);
+int hv_debug_add_dev_dir(struct hv_device *dev);
+void hv_debug_rm_dev_dir(struct hv_device *dev);
+void hv_debug_rm_all_dir(void);
+void hv_debug_set_dir_dentry(struct hv_device *dev, struct dentry *root);
+void hv_debug_delay_test(struct vmbus_channel *channel, enum delay delay_type);
+
+#endif /* CONFIG_HYPERV_TESTING */
+
#endif /* _HYPERV_VMBUS_H */
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 9a03b163cbbd..51adda23b398 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -396,6 +396,10 @@ struct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel)
struct hv_ring_buffer_info *rbi = &channel->inbound;
struct vmpacket_descriptor *desc;
+#ifdef CONFIG_HYPERV_TESTING
+ hv_debug_delay_test(channel, MESSAGE_DELAY);
+#endif /* CONFIG_HYPERV_TESTING */
+
if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
return NULL;
@@ -421,6 +425,9 @@ __hv_pkt_iter_next(struct vmbus_channel *channel,
u32 packetlen = desc->len8 << 3;
u32 dsize = rbi->ring_datasize;
+#ifdef CONFIG_HYPERV_TESTING
+ hv_debug_delay_test(channel, MESSAGE_DELAY);
+#endif /* CONFIG_HYPERV_TESTING */
/* bump offset to next potential packet */
rbi->priv_read_index += packetlen + VMBUS_PKT_TRAILER;
if (rbi->priv_read_index >= dsize)
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 6256cc34c4a6..6bf8ef5c780c 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -926,6 +926,21 @@ struct vmbus_channel {
* full outbound ring buffer.
*/
u64 out_full_first;
+
+#ifdef CONFIG_HYPERV_TESTING
+ /* enabling/disabling fuzz testing on the channel (default is false)*/
+ bool fuzz_testing_state;
+
+ /* Interrupt delay will delay the guest from emptying the ring buffer
+ * for a specific amount of time. The delay is in microseconds and will
+ * be between 1 to a maximum of 1000, its default is 0 (no delay).
+ * The Message delay will delay guest reading on a per message basis
+ * in microseconds between 1 to 1000 with the default being 0
+ * (no delay).
+ */
+ u32 fuzz_testing_interrupt_delay;
+ u32 fuzz_testing_message_delay;
+#endif /* CONFIG_HYPERV_TESTING */
};
static inline bool is_hvsock_channel(const struct vmbus_channel *c)
@@ -1166,6 +1181,12 @@ struct hv_device {
struct vmbus_channel *channel;
struct kset *channels_kset;
+
+#ifdef CONFIG_HYPERV_TESTING
+ /* place holder to keep track of the dir for hv device in debugfs */
+ struct dentry *debug_dir;
+#endif /* CONFIG_HYPERV_TESTING */
+
};
--
2.17.1
^ permalink raw reply related
* [PATCH v3 2/3] drivers: hv: vmbus: add test attributes to debugfs
From: Branden Bonaby @ 2019-08-20 23:39 UTC (permalink / raw)
To: kys, haiyangz, sthemmin, sashal
Cc: Branden Bonaby, linux-hyperv, linux-kernel
In-Reply-To: <cover.1566340843.git.brandonbonaby94@gmail.com>
Expose the test parameters as part of the debugfs channel attributes.
We will control the testing state via these attributes.
Signed-off-by: Branden Bonaby <brandonbonaby94@gmail.com>
---
Changes in v3:
- Change call to IS_ERR_OR_NULL, to IS_ERR.
Changes in v2:
- Move test attributes to debugfs.
- Wrap test code under #ifdef statements.
- Add new documentation file under Documentation/ABI/testing.
- Make commit message reflect the change from from sysfs to debugfs.
Documentation/ABI/testing/debugfs-hyperv | 21 +++
MAINTAINERS | 1 +
drivers/hv/vmbus_drv.c | 167 +++++++++++++++++++++++
3 files changed, 189 insertions(+)
create mode 100644 Documentation/ABI/testing/debugfs-hyperv
diff --git a/Documentation/ABI/testing/debugfs-hyperv b/Documentation/ABI/testing/debugfs-hyperv
new file mode 100644
index 000000000000..b25f751fafa8
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-hyperv
@@ -0,0 +1,21 @@
+What: /sys/kernel/debug/hyperv/<UUID>/fuzz_test_state
+Date: August 2019
+KernelVersion: 5.3
+Contact: Branden Bonaby <brandonbonaby94@gmail.com>
+Description: Fuzz testing status of a vmbus device, whether its in an ON
+ state or a OFF state
+Users: Debugging tools
+
+What: /sys/kernel/debug/hyperv/<UUID>/delay/fuzz_test_buffer_interrupt_delay
+Date: August 2019
+KernelVersion: 5.3
+Contact: Branden Bonaby <brandonbonaby94@gmail.com>
+Description: Fuzz testing buffer delay value between 0 - 1000
+Users: Debugging tools
+
+What: /sys/kernel/debug/hyperv/<UUID>/delay/fuzz_test_message_delay
+Date: August 2019
+KernelVersion: 5.3
+Contact: Branden Bonaby <brandonbonaby94@gmail.com>
+Description: Fuzz testing message delay value between 0 - 1000
+Users: Debugging tools
diff --git a/MAINTAINERS b/MAINTAINERS
index e81e60bd7c26..120284a8185f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7460,6 +7460,7 @@ F: include/uapi/linux/hyperv.h
F: include/asm-generic/mshyperv.h
F: tools/hv/
F: Documentation/ABI/stable/sysfs-bus-vmbus
+F: Documentation/ABI/testing/debugfs-hyperv
HYPERBUS SUPPORT
M: Vignesh Raghavendra <vigneshr@ti.com>
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index ebd35fc35290..d2e47f04d172 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -919,6 +919,10 @@ static void vmbus_device_release(struct device *device)
struct hv_device *hv_dev = device_to_hv_device(device);
struct vmbus_channel *channel = hv_dev->channel;
+#ifdef CONFIG_HYPERV_TESTING
+ hv_debug_rm_dev_dir(hv_dev);
+#endif /* CONFIG_HYPERV_TESTING */
+
mutex_lock(&vmbus_connection.channel_mutex);
hv_process_channel_removal(channel);
mutex_unlock(&vmbus_connection.channel_mutex);
@@ -1727,6 +1731,9 @@ int vmbus_device_register(struct hv_device *child_device_obj)
pr_err("Unable to register primary channeln");
goto err_kset_unregister;
}
+#ifdef CONFIG_HYPERV_TESTING
+ hv_debug_add_dev_dir(child_device_obj);
+#endif /* CONFIG_HYPERV_TESTING */
return 0;
@@ -2086,6 +2093,159 @@ static void hv_crash_handler(struct pt_regs *regs)
hyperv_cleanup();
};
+#ifdef CONFIG_HYPERV_TESTING
+
+struct dentry *hv_root;
+
+static int hv_debugfs_delay_get(void *data, u64 *val)
+{
+ *val = *(u32 *)data;
+ return 0;
+}
+
+static int hv_debugfs_delay_set(void *data, u64 val)
+{
+ if (val >= 1 && val <= 1000)
+ *(u32 *)data = val;
+ /*Best to not use else statement here since we want
+ * the delay to remain the same if val > 1000
+ */
+ else if (val <= 0)
+ *(u32 *)data = 0;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(hv_debugfs_delay_fops, hv_debugfs_delay_get,
+ hv_debugfs_delay_set, "%llu\n");
+
+/* Setup delay files to store test values */
+int hv_debug_delay_files(struct hv_device *dev, struct dentry *root)
+{
+ struct vmbus_channel *channel = dev->channel;
+ char *buffer = "fuzz_test_buffer_interrupt_delay";
+ char *message = "fuzz_test_message_delay";
+ int *buffer_val = &channel->fuzz_testing_interrupt_delay;
+ int *message_val = &channel->fuzz_testing_message_delay;
+ struct dentry *buffer_file, *message_file;
+
+ buffer_file = debugfs_create_file(buffer, 0644, root,
+ buffer_val,
+ &hv_debugfs_delay_fops);
+ if (IS_ERR(buffer_file)) {
+ pr_debug("debugfs_hyperv: file %s not created\n", buffer);
+ return PTR_ERR(buffer_file);
+ }
+
+ message_file = debugfs_create_file(message, 0644, root,
+ message_val,
+ &hv_debugfs_delay_fops);
+ if (IS_ERR(message_file)) {
+ pr_debug("debugfs_hyperv: file %s not created\n", message);
+ return PTR_ERR(message_file);
+ }
+
+ return 0;
+}
+
+/* Setup test state value for vmbus device */
+int hv_debug_set_test_state(struct hv_device *dev, struct dentry *root)
+{
+ struct vmbus_channel *channel = dev->channel;
+ bool *state = &channel->fuzz_testing_state;
+ char *status = "fuzz_test_state";
+ struct dentry *test_state;
+
+ test_state = debugfs_create_bool(status, 0644, root, state);
+ if (IS_ERR(test_state)) {
+ pr_debug("debugfs_hyperv: file %s not created\n", status);
+ return PTR_ERR(test_state);
+ }
+
+ return 0;
+}
+
+/* Bind hv device to a dentry for debugfs */
+void hv_debug_set_dir_dentry(struct hv_device *dev, struct dentry *root)
+{
+ if (hv_root)
+ dev->debug_dir = root;
+}
+
+/* Create all test dentry's and names for fuzz testing */
+int hv_debug_add_dev_dir(struct hv_device *dev)
+{
+ const char *device = dev_name(&dev->device);
+ char *delay_name = "delay";
+ struct dentry *delay, *dev_root;
+ int ret;
+
+ if (!IS_ERR(hv_root)) {
+ dev_root = debugfs_create_dir(device, hv_root);
+ if (IS_ERR(dev_root)) {
+ pr_debug("debugfs_hyperv: %s/%s/ not created\n",
+ TESTING, device);
+ return PTR_ERR(dev_root);
+ }
+
+ hv_debug_set_test_state(dev, dev_root);
+ hv_debug_set_dir_dentry(dev, dev_root);
+ delay = debugfs_create_dir(delay_name, dev_root);
+
+ if (IS_ERR(delay)) {
+ pr_debug("debugfs_hyperv: %s/%s/%s/ not created\n",
+ TESTING, device, delay_name);
+ return PTR_ERR(delay);
+ }
+ ret = hv_debug_delay_files(dev, delay);
+
+ return ret;
+ }
+ pr_debug("debugfs_hyperv: %s/ not in root debugfs path\n", TESTING);
+ return PTR_ERR(hv_root);
+}
+
+/* Remove dentry associated with released hv device */
+void hv_debug_rm_dev_dir(struct hv_device *dev)
+{
+ if (!IS_ERR(hv_root))
+ debugfs_remove_recursive(dev->debug_dir);
+}
+
+/* Remove all dentrys associated with vmbus testing */
+void hv_debug_rm_all_dir(void)
+{
+ debugfs_remove_recursive(hv_root);
+}
+
+/* Delay buffer/message reads on a vmbus channel */
+void hv_debug_delay_test(struct vmbus_channel *channel, enum delay delay_type)
+{
+ struct vmbus_channel *test_channel = channel->primary_channel ?
+ channel->primary_channel :
+ channel;
+ bool state = test_channel->fuzz_testing_state;
+
+ if (state) {
+ if (delay_type == 0)
+ udelay(test_channel->fuzz_testing_interrupt_delay);
+ else
+ udelay(test_channel->fuzz_testing_message_delay);
+ }
+}
+
+/* Initialize top dentry for vmbus testing */
+int hv_debug_init(void)
+{
+ hv_root = debugfs_create_dir(TESTING, NULL);
+ if (IS_ERR(hv_root)) {
+ pr_debug("debugfs_hyperv: %s/ not created\n", TESTING);
+ return PTR_ERR(hv_root);
+ }
+
+ return 0;
+}
+#endif /* CONFIG_HYPERV_TESTING */
+
static int __init hv_acpi_init(void)
{
int ret, t;
@@ -2108,6 +2268,9 @@ static int __init hv_acpi_init(void)
ret = -ETIMEDOUT;
goto cleanup;
}
+#ifdef CONFIG_HYPERV_TESTING
+ hv_debug_init();
+#endif /* CONFIG_HYPERV_TESTING */
ret = vmbus_bus_init();
if (ret)
@@ -2140,6 +2303,10 @@ static void __exit vmbus_exit(void)
tasklet_kill(&hv_cpu->msg_dpc);
}
+#ifdef CONFIG_HYPERV_TESTING
+ hv_debug_rm_all_dir();
+#endif /* CONFIG_HYPERV_TESTING */
+
vmbus_free_channels();
if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
--
2.17.1
^ permalink raw reply related
* [PATCH v3 3/3] tools: hv: add vmbus testing tool
From: Branden Bonaby @ 2019-08-20 23:40 UTC (permalink / raw)
To: kys, haiyangz, sthemmin, sashal
Cc: Branden Bonaby, linux-hyperv, linux-kernel
In-Reply-To: <cover.1566340843.git.brandonbonaby94@gmail.com>
This is a userspace tool to drive the testing. Currently it supports
introducing user specified delay in the host to guest communication
path on a per-channel basis.
Signed-off-by: Branden Bonaby <brandonbonaby94@gmail.com>
---
Changes in v3:
- Align python tool to match Linux coding style.
Changes in v2:
- Move testing location to new location in debugfs.
tools/hv/vmbus_testing | 342 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 342 insertions(+)
create mode 100644 tools/hv/vmbus_testing
diff --git a/tools/hv/vmbus_testing b/tools/hv/vmbus_testing
new file mode 100644
index 000000000000..0f249f6ee698
--- /dev/null
+++ b/tools/hv/vmbus_testing
@@ -0,0 +1,342 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Program to allow users to fuzz test Hyper-V drivers
+# by interfacing with Hyper-V debugfs directories
+# author: Branden Bonaby
+
+import os
+import cmd
+import argparse
+from collections import defaultdict
+from argparse import RawDescriptionHelpFormatter
+
+# debugfs paths for vmbus must exist (same as in lsvmbus)
+debugfs_sys_path = "/sys/kernel/debug/hyperv"
+if not os.path.isdir(debugfs_sys_path):
+ print("{} doesn't exist/check permissions".format(debugfs_sys_path))
+ exit(-1)
+# Do not change unless, you change the debugfs attributes
+# in "/sys/kernel/debug/hyperv/<UUID>/". All fuzz testing
+# attributes will start with "fuzz_test".
+pathlen = len(debugfs_sys_path)
+fuzz_state_location = "fuzz_test_state"
+fuzz_states = {
+ 0 : "Disable",
+ 1 : "Enable"
+}
+
+fuzz_methods = {
+ 1 : "Delay_testing"
+}
+
+fuzz_delay_types = {
+ 1 : "fuzz_test_buffer_interrupt_delay",
+ 2 : "fuzz_test_message_delay"
+}
+
+def parse_args():
+ parser = argparse.ArgumentParser(description = "vmbus_testing "
+ "[-s] [0|1] [-q] [-p] <debugfs-path>\n""vmbus_testing [-s]"
+ " [0|1] [-q][-p] <debugfs-path> delay [-d] [val][val] [-E|-D]\n"
+ "vmbus_testing [-q] disable-all\n"
+ "vmbus_testing [-q] view [-v|-V]\n"
+ "vmbus_testing --version",
+ epilog = "Current testing options {}".format(fuzz_methods),
+ prog = 'vmbus_testing',
+ formatter_class = RawDescriptionHelpFormatter)
+ subparsers = parser.add_subparsers(dest = "action")
+ parser.add_argument("--version", action = "version",
+ version = '%(prog)s 1.0')
+ parser.add_argument("-q","--quiet", action = "store_true",
+ help = "silence none important test messages")
+ parser.add_argument("-s","--state", metavar = "", type = int,
+ choices = range(0, 2),
+ help = "Turn testing ON or OFF for a single device."
+ " The value (1) will turn testing ON. The value"
+ " of (0) will turn testing OFF with the default set"
+ " to (0).")
+ parser.add_argument("-p","--path", metavar = "",
+ help = "Refers to the debugfs path to a vmbus device."
+ " If the path is not a valid path to a vmbus device,"
+ " the program will exit. The path must be the"
+ " absolute path; use the lsvmbus command to find"
+ " the path.")
+ parser_delay = subparsers.add_parser("delay",
+ help = "Delay buffer/message reads in microseconds.",
+ description = "vmbus_testing -s [0|1] [-q] -p "
+ "<debugfs-path> delay -d "
+ "[buffer-delay-value] [message-delay-value]\n"
+ "vmbus_testing [-q] delay [buffer-delay-value] "
+ "[message-delay-value] -E\n"
+ "vmbus_testing [-q] delay [buffer-delay-value] "
+ "[message-delay-value] -D",
+ formatter_class = RawDescriptionHelpFormatter)
+ delay_group = parser_delay.add_mutually_exclusive_group()
+ delay_group.add_argument("-E", "--en_all", action = "store_true",
+ help = "Enable Buffer/Message Delay testing on ALL"
+ " devices. Use -d option with this to set the values"
+ " for both the buffer delay and the message delay. No"
+ " value can be (0) or less than (-1). If testing is"
+ " disabled on a device prior to running this command,"
+ " testing will be enabled on the device as a result"
+ " of this command.")
+ delay_group.add_argument("-D", "--dis_all", action = "store_true",
+ help = "Disable Buffer/Message delay testing on ALL"
+ " devices. A value equal to (-1) will keep the"
+ " current delay value, and a value equal to (0) will"
+ " remove delay testing for the specfied delay column."
+ " only values (-1) and (0) will be accepted but at"
+ " least one value must be a (0) or a (-1).")
+ parser_delay.add_argument("-d", "--delay_time", metavar = "", nargs = 2,
+ type = check_range, default = [0, 0], required = (True),
+ help = "Buffer/message delay time. A value of (0) will"
+ "disable delay testing on the specified delay column,"
+ " while a value of (-1) will ignore the specified"
+ " delay column. The default values are [0] & [0]."
+ " The first column represents the buffer delay value"
+ " and the second represents the message delay value."
+ " Value constraints: -1 <= value <= 1000.")
+ parser_dis_all = subparsers.add_parser("disable-all",
+ help = "Disable ALL testing on all vmbus devices.",
+ description = "vmbus_testing disable-all",
+ formatter_class = RawDescriptionHelpFormatter)
+ parser_view = subparsers.add_parser("view",
+ help = "View testing on vmbus devices.",
+ description = "vmbus_testing view -V\n"
+ "vmbus_testing -p <debugfs-path> view -v",
+ formatter_class = RawDescriptionHelpFormatter)
+ view_group = parser_view.add_mutually_exclusive_group()
+ view_group.add_argument("-V", "--view_all", action = "store_true",
+ help = "View the test status for all vmbus devices.")
+ view_group.add_argument("-v", "--view_single", action = "store_true",
+ help = "View test values for a single vmbus device.")
+
+ return parser.parse_args()
+
+# value checking for range checking input in parser
+def check_range(arg1):
+ try:
+ val = int(arg1)
+ except ValueError as err:
+ raise argparse.ArgumentTypeError(str(err))
+ if val < -1 or val > 1000:
+ message = ("\n\nExpected -1 <= value <= 1000, got value"
+ " {}\n").format(val)
+ raise argparse.ArgumentTypeError(message)
+ return val
+
+def main():
+ try:
+ dev_list = []
+ for dir in os.listdir(debugfs_sys_path):
+ dev_list.append(os.path.join(debugfs_sys_path, dir))
+ #key value, pairs
+ #key = debugfs device path
+ #value = list of fuzz testing attributes.
+ dev_files = defaultdict(list)
+ for dev in dev_list:
+ path = os.path.join(dev, "delay")
+ for f in os.listdir(path):
+ if (f.startswith("fuzz_test")):
+ dev_files[path].append(f)
+
+ dev_files.default_factory = None
+ args = parse_args()
+ path = args.path
+ state = args.state
+ quiet = args.quiet
+ if (not quiet):
+ print("*** Use lsvmbus to get vmbus device type"
+ " information.*** ")
+ if (state is not None and validate_args_path(path, dev_list)):
+ if (state is not get_test_state(path)):
+ change_test_state(path, quiet)
+ state = get_test_state(path)
+ if (state == 0 and path is not None):
+ disable_testing_single_device(path, 0, quiet)
+ return
+ #Use subparsers as the key for different fuzz testing methods
+ if (args.action == "delay"):
+ delay = args.delay_time
+ if (validate_delay_values(args, delay)):
+ delay_test_all_devices(dev_list, delay, quiet)
+ elif (validate_args_path(path, dev_list)):
+ if(get_test_state(path) == 1):
+ delay_test_store(path, delay, quiet)
+ return
+ print("device testing OFF, use -s 1 to turn ON")
+ elif (args.action == "disable-all"):
+ disable_all_testing(dev_list, quiet)
+ elif (args.action == "view"):
+ if (args.view_all):
+ all_devices_test_status(dev_list)
+ elif (args.view_single):
+ if (validate_args_path(path, dev_list)):
+ device_test_values(dev_files, path)
+ return
+ print("Error,(check path) usage: -p"\
+ " <debugfs device path> view -v")
+ except AttributeError:
+ print("check usage, 1 or more elements not provided")
+ exit(-1)
+
+# Validate delay values to make sure they are acceptable to
+# to either enable all delays on a device or disable all
+# delays on a device
+def validate_delay_values(args, delay):
+ if (args.en_all):
+ for i in delay:
+ if (i < -1 or i == 0):
+ print("\nError, Values must be"
+ " equal to -1 or be > 0, use"
+ " -d option")
+ exit(-1)
+ return True
+ elif (args.dis_all):
+ for i in delay:
+ if (i < -1 or i > 0):
+ print("\nError, at least 1 value"
+ " is not a (0) or a (-1)")
+ exit(-1)
+ return True
+ else:
+ return False
+
+
+# Validate argument path
+def validate_args_path(path, dev_list):
+ if (path in dev_list):
+ return True
+ else:
+ return False
+
+# display Testing status of single device
+def device_test_values(dev_files, path):
+
+ delay_path = os.path.join(path, 'delay')
+ for test in dev_files.get(delay_path):
+ print("{}".format(test), end = '')
+ print((" value = {}")\
+ .format(read_test_files(os.path.join(delay_path, test))))
+
+# display Testing state of devices
+def all_devices_test_status(dev_list):
+ for device in dev_list:
+ if (get_test_state(device) is 1):
+ print("Testing = ON for: {}".format(device.split("/")[5]))
+ else:
+ print("Testing = OFF for: {}".format(device.split("/")[5]))
+
+# read the vmbus device files, path must be absolute path before calling
+def read_test_files(path):
+ try:
+ with open(path,"r") as f:
+ state = f.readline().strip()
+ if (state == 'N'):
+ state = 0
+ elif (state == 'Y'):
+ state = 1
+ return int(state)
+
+ except IOError as e:
+ errno, strerror = e.args
+ print("I/O error({0}): {1} on file {2}"
+ .format(errno, strerror, path))
+ exit(-1)
+ except ValueError:
+ print ("Element to int conversion error in: \n{}".format(path))
+ exit(-1)
+
+# writing to vmbus device files, path must be absolute path before calling
+def write_test_files(path, value):
+ try:
+ with open(path,"w") as f:
+ f.write("{}".format(value))
+ except IOError as e:
+ errno, strerror = e.args
+ print("I/O error({0}): {1} on file {2}"
+ .format(errno, strerror, path))
+ exit(-1)
+
+# change testing state of device
+def change_test_state(device, quiet):
+ state_path = os.path.join(device, fuzz_state_location)
+ if (get_test_state(device) is 0):
+ write_test_files(state_path, 1)
+ if (not quiet):
+ print("Testing = ON for device: {}"
+ .format(state_path.split("/")[5]))
+ else:
+ write_test_files(state_path, 0)
+ if (not quiet):
+ print("Testing = OFF for device: {}"
+ .format(state_path.split("/")[5]))
+
+# get testing state of device
+def get_test_state(device):
+ #state == 1 - test = ON
+ #state == 0 - test = OFF
+ return read_test_files(os.path.join(device, fuzz_state_location))
+
+# Enter 1 - 1000 microseconds, into a single device using the
+# fuzz_test_buffer_interrupt_delay and fuzz_test_message_delay
+# debugfs attributes
+def delay_test_store(device,delay_length, quiet):
+
+ try:
+ # delay[0]- buffer delay, delay[1]- message delay
+ buff_test = os.path.join(os.path.sep,device, 'delay',
+ fuzz_delay_types.get(1))
+ mess_test = os.path.join(os.path.sep,device, 'delay',
+ fuzz_delay_types.get(2))
+
+ if (delay_length[0] >= 0):
+ write_test_files(buff_test, delay_length[0])
+ if (delay_length[1] >= 0):
+ write_test_files(mess_test, delay_length[1])
+ if (not quiet):
+ print("Buffer delay testing = {} for: {}"
+ .format(read_test_files(buff_test),
+ buff_test.split("/")[5]))
+ print("Message delay testing = {} for: {}"
+ .format(read_test_files(mess_test),
+ mess_test.split("/")[5]))
+ except IOError as e:
+ errno, strerror = e.args
+ print("I/O error({0}): {1} on files {2}{3}"
+ .format(errno, strerror, buff_test, mess_test))
+ exit(-1)
+
+#enabling/disabling delay testing on all devices
+def delay_test_all_devices(dev_list,delay,quiet):
+
+ for device in (dev_list):
+ if (get_test_state(device) is 0):
+ change_test_state(device,quiet)
+ delay_test_store(device, delay, quiet)
+
+#disabling testing on single device
+def disable_testing_single_device(device,test_type,quiet):
+
+ #test_type represents corresponding key
+ #delay method in delay_methods dict.
+ #special type 0 , used to disable all
+ #testing on SINGLE device.
+
+ if (test_type is 1 or test_type is 0):
+ #disable list [buffer,message]
+ disable_delay = [0, 0]
+ if (get_test_state(device) is 1):
+ change_test_state(device, quiet)
+ delay_test_store(device, disable_delay, quiet)
+
+#disabling testing on ALL devices
+def disable_all_testing(dev_list,quiet):
+
+ #delay disable list [buffer,message]
+ for device in dev_list:
+ disable_testing_single_device(device, 0, quiet)
+
+if __name__ == "__main__":
+ main()
--
2.17.1
^ permalink raw reply related
* [PATCH net-next,v3, 0/6] Add software backchannel and mlx5e HV VHCA stats
From: Haiyang Zhang @ 2019-08-21 0:23 UTC (permalink / raw)
To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
bhelgaas@google.com, linux-pci@vger.kernel.org,
linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
linux-kernel@vger.kernel.org
This patch set adds paravirtual backchannel in software in pci_hyperv,
which is required by the mlx5e driver HV VHCA stats agent.
The stats agent is responsible on running a periodic rx/tx packets/bytes
stats update.
Dexuan Cui (1):
PCI: hv: Add a paravirtual backchannel in software
Eran Ben Elisha (4):
net/mlx5: Add wrappers for HyperV PCIe operations
net/mlx5: Add HV VHCA infrastructure
net/mlx5: Add HV VHCA control agent
net/mlx5e: Add mlx5e HV VHCA stats agent
Haiyang Zhang (1):
PCI: hv: Add a Hyper-V PCI interface driver for software backchannel
interface
MAINTAINERS | 1 +
drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +
drivers/net/ethernet/mellanox/mlx5/core/en.h | 13 +
.../ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c | 162 +++++++++
.../ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h | 25 ++
drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +
drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c | 64 ++++
drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h | 22 ++
.../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c | 371 +++++++++++++++++++++
.../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h | 104 ++++++
drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +
drivers/pci/Kconfig | 1 +
drivers/pci/controller/Kconfig | 7 +
drivers/pci/controller/Makefile | 1 +
drivers/pci/controller/pci-hyperv-intf.c | 67 ++++
drivers/pci/controller/pci-hyperv.c | 308 +++++++++++++++++
include/linux/hyperv.h | 29 ++
include/linux/mlx5/driver.h | 2 +
18 files changed, 1189 insertions(+)
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
create mode 100644 drivers/pci/controller/pci-hyperv-intf.c
--
1.8.3.1
^ permalink raw reply
* [PATCH net-next,v3, 6/6] net/mlx5e: Add mlx5e HV VHCA stats agent
From: Haiyang Zhang @ 2019-08-21 0:23 UTC (permalink / raw)
To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
bhelgaas@google.com, linux-pci@vger.kernel.org,
linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
linux-kernel@vger.kernel.org
In-Reply-To: <1566346948-69497-1-git-send-email-haiyangz@microsoft.com>
From: Eran Ben Elisha <eranbe@mellanox.com>
HV VHCA stats agent is responsible on running a preiodic rx/tx
packets/bytes stats update. Currently the supported format is version
MLX5_HV_VHCA_STATS_VERSION. Block ID 1 is dedicated for statistics data
transfer from the VF to the PF.
The reporter fetch the statistics data from all opened channels, fill it
in a buffer and send it to mlx5_hv_vhca_write_agent.
As the stats layer should include some metadata per block (sequence and
offset), the HV VHCA layer shall modify the buffer before actually send it
over block 1.
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/Makefile | 1 +
drivers/net/ethernet/mellanox/mlx5/core/en.h | 13 ++
.../ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c | 162 +++++++++++++++++++++
.../ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h | 25 ++++
drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +
.../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h | 1 +
6 files changed, 205 insertions(+)
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index fc59a40..a889a38 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -36,6 +36,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \
lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o \
en/tc_tun_geneve.o
+mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
#
# Core extra
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 8fc5107..4a8008b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -54,6 +54,7 @@
#include "mlx5_core.h"
#include "en_stats.h"
#include "en/fs.h"
+#include "lib/hv_vhca.h"
extern const struct net_device_ops mlx5e_netdev_ops;
struct page_pool;
@@ -777,6 +778,15 @@ struct mlx5e_modify_sq_param {
int rl_index;
};
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+struct mlx5e_hv_vhca_stats_agent {
+ struct mlx5_hv_vhca_agent *agent;
+ struct delayed_work work;
+ u16 delay;
+ void *buf;
+};
+#endif
+
struct mlx5e_xsk {
/* UMEMs are stored separately from channels, because we don't want to
* lose them when channels are recreated. The kernel also stores UMEMs,
@@ -848,6 +858,9 @@ struct mlx5e_priv {
struct devlink_health_reporter *tx_reporter;
struct devlink_health_reporter *rx_reporter;
struct mlx5e_xsk xsk;
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+ struct mlx5e_hv_vhca_stats_agent stats_agent;
+#endif
};
struct mlx5e_profile {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
new file mode 100644
index 0000000..c37b4ac
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include "en.h"
+#include "en/hv_vhca_stats.h"
+#include "lib/hv_vhca.h"
+#include "lib/hv.h"
+
+struct mlx5e_hv_vhca_per_ring_stats {
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 tx_packets;
+ u64 tx_bytes;
+};
+
+static void
+mlx5e_hv_vhca_fill_ring_stats(struct mlx5e_priv *priv, int ch,
+ struct mlx5e_hv_vhca_per_ring_stats *data)
+{
+ struct mlx5e_channel_stats *stats;
+ int tc;
+
+ stats = &priv->channel_stats[ch];
+ data->rx_packets = stats->rq.packets;
+ data->rx_bytes = stats->rq.bytes;
+
+ for (tc = 0; tc < priv->max_opened_tc; tc++) {
+ data->tx_packets += stats->sq[tc].packets;
+ data->tx_bytes += stats->sq[tc].bytes;
+ }
+}
+
+static void mlx5e_hv_vhca_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int buf_len)
+{
+ int ch, i = 0;
+
+ for (ch = 0; ch < priv->max_nch; ch++) {
+ u64 *buf = data + i;
+
+ if (WARN_ON_ONCE(buf +
+ sizeof(struct mlx5e_hv_vhca_per_ring_stats) >
+ data + buf_len))
+ return;
+
+ mlx5e_hv_vhca_fill_ring_stats(priv, ch,
+ (struct mlx5e_hv_vhca_per_ring_stats *)buf);
+ i += sizeof(struct mlx5e_hv_vhca_per_ring_stats) / sizeof(u64);
+ }
+}
+
+static int mlx5e_hv_vhca_stats_buf_size(struct mlx5e_priv *priv)
+{
+ return (sizeof(struct mlx5e_hv_vhca_per_ring_stats) *
+ priv->max_nch);
+}
+
+static void mlx5e_hv_vhca_stats_work(struct work_struct *work)
+{
+ struct mlx5e_hv_vhca_stats_agent *sagent;
+ struct mlx5_hv_vhca_agent *agent;
+ struct delayed_work *dwork;
+ struct mlx5e_priv *priv;
+ int buf_len, rc;
+ void *buf;
+
+ dwork = to_delayed_work(work);
+ sagent = container_of(dwork, struct mlx5e_hv_vhca_stats_agent, work);
+ priv = container_of(sagent, struct mlx5e_priv, stats_agent);
+ buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
+ agent = sagent->agent;
+ buf = sagent->buf;
+
+ memset(buf, 0, buf_len);
+ mlx5e_hv_vhca_fill_stats(priv, buf, buf_len);
+
+ rc = mlx5_hv_vhca_agent_write(agent, buf, buf_len);
+ if (rc) {
+ mlx5_core_err(priv->mdev,
+ "%s: Failed to write stats, err = %d\n",
+ __func__, rc);
+ return;
+ }
+
+ if (sagent->delay)
+ queue_delayed_work(priv->wq, &sagent->work, sagent->delay);
+}
+
+enum {
+ MLX5_HV_VHCA_STATS_VERSION = 1,
+ MLX5_HV_VHCA_STATS_UPDATE_ONCE = 0xFFFF,
+};
+
+static void mlx5e_hv_vhca_stats_control(struct mlx5_hv_vhca_agent *agent,
+ struct mlx5_hv_vhca_control_block *block)
+{
+ struct mlx5e_hv_vhca_stats_agent *sagent;
+ struct mlx5e_priv *priv;
+
+ priv = mlx5_hv_vhca_agent_priv(agent);
+ sagent = &priv->stats_agent;
+
+ block->version = MLX5_HV_VHCA_STATS_VERSION;
+ block->rings = priv->max_nch;
+
+ if (!block->command) {
+ cancel_delayed_work_sync(&priv->stats_agent.work);
+ return;
+ }
+
+ sagent->delay = block->command == MLX5_HV_VHCA_STATS_UPDATE_ONCE ? 0 :
+ msecs_to_jiffies(block->command * 100);
+
+ queue_delayed_work(priv->wq, &sagent->work, sagent->delay);
+}
+
+static void mlx5e_hv_vhca_stats_cleanup(struct mlx5_hv_vhca_agent *agent)
+{
+ struct mlx5e_priv *priv = mlx5_hv_vhca_agent_priv(agent);
+
+ cancel_delayed_work_sync(&priv->stats_agent.work);
+}
+
+int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
+{
+ int buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
+ struct mlx5_hv_vhca_agent *agent;
+
+ priv->stats_agent.buf = kvzalloc(buf_len, GFP_KERNEL);
+ if (!priv->stats_agent.buf)
+ return -ENOMEM;
+
+ agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca,
+ MLX5_HV_VHCA_AGENT_STATS,
+ mlx5e_hv_vhca_stats_control, NULL,
+ mlx5e_hv_vhca_stats_cleanup,
+ priv);
+
+ if (IS_ERR_OR_NULL(agent)) {
+ if (IS_ERR(agent))
+ netdev_warn(priv->netdev,
+ "Failed to create hv vhca stats agent, err = %ld\n",
+ PTR_ERR(agent));
+
+ kfree(priv->stats_agent.buf);
+ return IS_ERR_OR_NULL(agent);
+ }
+
+ priv->stats_agent.agent = agent;
+ INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work);
+
+ return 0;
+}
+
+void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
+{
+ if (IS_ERR_OR_NULL(priv->stats_agent.agent))
+ return;
+
+ mlx5_hv_vhca_agent_destroy(priv->stats_agent.agent);
+ kfree(priv->stats_agent.buf);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
new file mode 100644
index 0000000..664463f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_STATS_VHCA_H__
+#define __MLX5_EN_STATS_VHCA_H__
+#include "en.h"
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+
+int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv);
+void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv);
+
+#else
+
+static inline int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
+{
+ return 0;
+}
+
+static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
+{
+}
+#endif
+
+#endif /* __MLX5_EN_STATS_VHCA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5721d3d..fac8455 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -64,6 +64,7 @@
#include "en/xsk/setup.h"
#include "en/xsk/rx.h"
#include "en/xsk/tx.h"
+#include "en/hv_vhca_stats.h"
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
@@ -5103,6 +5104,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
if (mlx5e_monitor_counter_supported(priv))
mlx5e_monitor_counter_init(priv);
+ mlx5e_hv_vhca_stats_create(priv);
if (netdev->reg_state != NETREG_REGISTERED)
return;
#ifdef CONFIG_MLX5_CORE_EN_DCB
@@ -5135,6 +5137,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
queue_work(priv->wq, &priv->set_rx_mode_work);
+ mlx5e_hv_vhca_stats_destroy(priv);
if (mlx5e_monitor_counter_supported(priv))
mlx5e_monitor_counter_cleanup(priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
index 984e7ad..4bad6a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
@@ -13,6 +13,7 @@
enum mlx5_hv_vhca_agent_type {
MLX5_HV_VHCA_AGENT_CONTROL = 0,
+ MLX5_HV_VHCA_AGENT_STATS = 1,
MLX5_HV_VHCA_AGENT_MAX = 32,
};
--
1.8.3.1
^ permalink raw reply related
* [PATCH net-next,v3, 5/6] net/mlx5: Add HV VHCA control agent
From: Haiyang Zhang @ 2019-08-21 0:23 UTC (permalink / raw)
To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
bhelgaas@google.com, linux-pci@vger.kernel.org,
linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
linux-kernel@vger.kernel.org
In-Reply-To: <1566346948-69497-1-git-send-email-haiyangz@microsoft.com>
From: Eran Ben Elisha <eranbe@mellanox.com>
Control agent is responsible over of the control block (ID 0). It should
update the PF via this block about every capability change. In addition,
upon block 0 invalidate, it should activate all other supported agents
with data requests from the PF.
Upon agent create/destroy, the invalidate callback of the control agent
is being called in order to update the PF driver about this change.
The control agent is an integral part of HV VHCA and will be created
and destroy as part of the HV VHCA init/cleanup flow.
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
.../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c | 122 ++++++++++++++++++++-
.../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h | 1 +
2 files changed, 121 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
index 84d1d75..4047629 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
@@ -109,22 +109,131 @@ void mlx5_hv_vhca_invalidate(void *context, u64 block_mask)
queue_work(hv_vhca->work_queue, &work->invalidate_work);
}
+#define AGENT_MASK(type) (type ? BIT(type - 1) : 0 /* control */)
+
+static void mlx5_hv_vhca_agents_control(struct mlx5_hv_vhca *hv_vhca,
+ struct mlx5_hv_vhca_control_block *block)
+{
+ int i;
+
+ for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
+ struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
+
+ if (!agent || !agent->control)
+ continue;
+
+ if (!(AGENT_MASK(agent->type) & block->control))
+ continue;
+
+ agent->control(agent, block);
+ }
+}
+
+static void mlx5_hv_vhca_capabilities(struct mlx5_hv_vhca *hv_vhca,
+ u32 *capabilities)
+{
+ int i;
+
+ for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
+ struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
+
+ if (agent)
+ *capabilities |= AGENT_MASK(agent->type);
+ }
+}
+
+static void
+mlx5_hv_vhca_control_agent_invalidate(struct mlx5_hv_vhca_agent *agent,
+ u64 block_mask)
+{
+ struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
+ struct mlx5_core_dev *dev = hv_vhca->dev;
+ struct mlx5_hv_vhca_control_block *block;
+ u32 capabilities = 0;
+ int err;
+
+ block = kzalloc(sizeof(*block), GFP_KERNEL);
+ if (!block)
+ return;
+
+ err = mlx5_hv_read_config(dev, block, sizeof(*block), 0);
+ if (err)
+ goto free_block;
+
+ mlx5_hv_vhca_capabilities(hv_vhca, &capabilities);
+
+ /* In case no capabilities, send empty block in return */
+ if (!capabilities) {
+ memset(block, 0, sizeof(*block));
+ goto write;
+ }
+
+ if (block->capabilities != capabilities)
+ block->capabilities = capabilities;
+
+ if (block->control & ~capabilities)
+ goto free_block;
+
+ mlx5_hv_vhca_agents_control(hv_vhca, block);
+ block->command_ack = block->command;
+
+write:
+ mlx5_hv_write_config(dev, block, sizeof(*block), 0);
+
+free_block:
+ kfree(block);
+}
+
+static struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_control_agent_create(struct mlx5_hv_vhca *hv_vhca)
+{
+ return mlx5_hv_vhca_agent_create(hv_vhca, MLX5_HV_VHCA_AGENT_CONTROL,
+ NULL,
+ mlx5_hv_vhca_control_agent_invalidate,
+ NULL, NULL);
+}
+
+static void mlx5_hv_vhca_control_agent_destroy(struct mlx5_hv_vhca_agent *agent)
+{
+ mlx5_hv_vhca_agent_destroy(agent);
+}
+
int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
{
+ struct mlx5_hv_vhca_agent *agent;
+ int err;
+
if (IS_ERR_OR_NULL(hv_vhca))
return IS_ERR_OR_NULL(hv_vhca);
- return mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
- mlx5_hv_vhca_invalidate);
+ err = mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
+ mlx5_hv_vhca_invalidate);
+ if (err)
+ return err;
+
+ agent = mlx5_hv_vhca_control_agent_create(hv_vhca);
+ if (IS_ERR_OR_NULL(agent)) {
+ mlx5_hv_unregister_invalidate(hv_vhca->dev);
+ return IS_ERR_OR_NULL(agent);
+ }
+
+ hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL] = agent;
+
+ return 0;
}
void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
{
+ struct mlx5_hv_vhca_agent *agent;
int i;
if (IS_ERR_OR_NULL(hv_vhca))
return;
+ agent = hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL];
+ if (agent)
+ mlx5_hv_vhca_control_agent_destroy(agent);
+
mutex_lock(&hv_vhca->agents_lock);
for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++)
WARN_ON(hv_vhca->agents[i]);
@@ -134,6 +243,11 @@ void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
mlx5_hv_unregister_invalidate(hv_vhca->dev);
}
+static void mlx5_hv_vhca_agents_update(struct mlx5_hv_vhca *hv_vhca)
+{
+ mlx5_hv_vhca_invalidate(hv_vhca, BIT(MLX5_HV_VHCA_AGENT_CONTROL));
+}
+
struct mlx5_hv_vhca_agent *
mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
enum mlx5_hv_vhca_agent_type type,
@@ -174,6 +288,8 @@ struct mlx5_hv_vhca_agent *
hv_vhca->agents[type] = agent;
mutex_unlock(&hv_vhca->agents_lock);
+ mlx5_hv_vhca_agents_update(hv_vhca);
+
return agent;
}
@@ -195,6 +311,8 @@ void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
agent->cleanup(agent);
kfree(agent);
+
+ mlx5_hv_vhca_agents_update(hv_vhca);
}
static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
index cdf1303..984e7ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
@@ -12,6 +12,7 @@
struct mlx5_hv_vhca_control_block;
enum mlx5_hv_vhca_agent_type {
+ MLX5_HV_VHCA_AGENT_CONTROL = 0,
MLX5_HV_VHCA_AGENT_MAX = 32,
};
--
1.8.3.1
^ permalink raw reply related
* [PATCH net-next,v3, 4/6] net/mlx5: Add HV VHCA infrastructure
From: Haiyang Zhang @ 2019-08-21 0:23 UTC (permalink / raw)
To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
bhelgaas@google.com, linux-pci@vger.kernel.org,
linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
linux-kernel@vger.kernel.org
In-Reply-To: <1566346948-69497-1-git-send-email-haiyangz@microsoft.com>
From: Eran Ben Elisha <eranbe@mellanox.com>
HV VHCA is a layer which provides PF to VF communication channel based on
HyperV PCI config channel. It implements Mellanox's Inter VHCA control
communication protocol. The protocol contains control block in order to
pass messages between the PF and VF drivers, and data blocks in order to
pass actual data.
The infrastructure is agent based. Each agent will be responsible of
contiguous buffer blocks in the VHCA config space. This infrastructure will
bind agents to their blocks, and those agents can only access read/write
the buffer blocks assigned to them. Each agent will provide three
callbacks (control, invalidate, cleanup). Control will be invoked when
block-0 is invalidated with a command that concerns this agent. Invalidate
callback will be invoked if one of the blocks assigned to this agent was
invalidated. Cleanup will be invoked before the agent is being freed in
order to clean all of its open resources or deferred works.
Block-0 serves as the control block. All execution commands from the PF
will be written by the PF over this block. VF will ack on those by
writing on block-0 as well. Its format is described by struct
mlx5_hv_vhca_control_block layout.
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +-
.../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c | 253 +++++++++++++++++++++
.../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h | 102 +++++++++
drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +
include/linux/mlx5/driver.h | 2 +
5 files changed, 365 insertions(+), 1 deletion(-)
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 247295b..fc59a40 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -45,7 +45,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offlo
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
-mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o
+mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o lib/hv_vhca.o
#
# Ipoib netdev
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
new file mode 100644
index 0000000..84d1d75
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/hyperv.h>
+#include "mlx5_core.h"
+#include "lib/hv.h"
+#include "lib/hv_vhca.h"
+
+struct mlx5_hv_vhca {
+ struct mlx5_core_dev *dev;
+ struct workqueue_struct *work_queue;
+ struct mlx5_hv_vhca_agent *agents[MLX5_HV_VHCA_AGENT_MAX];
+ struct mutex agents_lock; /* Protect agents array */
+};
+
+struct mlx5_hv_vhca_work {
+ struct work_struct invalidate_work;
+ struct mlx5_hv_vhca *hv_vhca;
+ u64 block_mask;
+};
+
+struct mlx5_hv_vhca_data_block {
+ u16 sequence;
+ u16 offset;
+ u8 reserved[4];
+ u64 data[15];
+};
+
+struct mlx5_hv_vhca_agent {
+ enum mlx5_hv_vhca_agent_type type;
+ struct mlx5_hv_vhca *hv_vhca;
+ void *priv;
+ u16 seq;
+ void (*control)(struct mlx5_hv_vhca_agent *agent,
+ struct mlx5_hv_vhca_control_block *block);
+ void (*invalidate)(struct mlx5_hv_vhca_agent *agent,
+ u64 block_mask);
+ void (*cleanup)(struct mlx5_hv_vhca_agent *agent);
+};
+
+struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
+{
+ struct mlx5_hv_vhca *hv_vhca = NULL;
+
+ hv_vhca = kzalloc(sizeof(*hv_vhca), GFP_KERNEL);
+ if (!hv_vhca)
+ return ERR_PTR(-ENOMEM);
+
+ hv_vhca->work_queue = create_singlethread_workqueue("mlx5_hv_vhca");
+ if (!hv_vhca->work_queue) {
+ kfree(hv_vhca);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ hv_vhca->dev = dev;
+ mutex_init(&hv_vhca->agents_lock);
+
+ return hv_vhca;
+}
+
+void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
+{
+ if (IS_ERR_OR_NULL(hv_vhca))
+ return;
+
+ destroy_workqueue(hv_vhca->work_queue);
+ kfree(hv_vhca);
+}
+
+static void mlx5_hv_vhca_invalidate_work(struct work_struct *work)
+{
+ struct mlx5_hv_vhca_work *hwork;
+ struct mlx5_hv_vhca *hv_vhca;
+ int i;
+
+ hwork = container_of(work, struct mlx5_hv_vhca_work, invalidate_work);
+ hv_vhca = hwork->hv_vhca;
+
+ mutex_lock(&hv_vhca->agents_lock);
+ for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
+ struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
+
+ if (!agent || !agent->invalidate)
+ continue;
+
+ if (!(BIT(agent->type) & hwork->block_mask))
+ continue;
+
+ agent->invalidate(agent, hwork->block_mask);
+ }
+ mutex_unlock(&hv_vhca->agents_lock);
+
+ kfree(hwork);
+}
+
+void mlx5_hv_vhca_invalidate(void *context, u64 block_mask)
+{
+ struct mlx5_hv_vhca *hv_vhca = (struct mlx5_hv_vhca *)context;
+ struct mlx5_hv_vhca_work *work;
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return;
+
+ INIT_WORK(&work->invalidate_work, mlx5_hv_vhca_invalidate_work);
+ work->hv_vhca = hv_vhca;
+ work->block_mask = block_mask;
+
+ queue_work(hv_vhca->work_queue, &work->invalidate_work);
+}
+
+int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
+{
+ if (IS_ERR_OR_NULL(hv_vhca))
+ return IS_ERR_OR_NULL(hv_vhca);
+
+ return mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
+ mlx5_hv_vhca_invalidate);
+}
+
+void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
+{
+ int i;
+
+ if (IS_ERR_OR_NULL(hv_vhca))
+ return;
+
+ mutex_lock(&hv_vhca->agents_lock);
+ for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++)
+ WARN_ON(hv_vhca->agents[i]);
+
+ mutex_unlock(&hv_vhca->agents_lock);
+
+ mlx5_hv_unregister_invalidate(hv_vhca->dev);
+}
+
+struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+ enum mlx5_hv_vhca_agent_type type,
+ void (*control)(struct mlx5_hv_vhca_agent*,
+ struct mlx5_hv_vhca_control_block *block),
+ void (*invalidate)(struct mlx5_hv_vhca_agent*,
+ u64 block_mask),
+ void (*cleaup)(struct mlx5_hv_vhca_agent *agent),
+ void *priv)
+{
+ struct mlx5_hv_vhca_agent *agent;
+
+ if (IS_ERR_OR_NULL(hv_vhca))
+ return ERR_PTR(-ENOMEM);
+
+ if (type >= MLX5_HV_VHCA_AGENT_MAX)
+ return ERR_PTR(-EINVAL);
+
+ mutex_lock(&hv_vhca->agents_lock);
+ if (hv_vhca->agents[type]) {
+ mutex_unlock(&hv_vhca->agents_lock);
+ return ERR_PTR(-EINVAL);
+ }
+ mutex_unlock(&hv_vhca->agents_lock);
+
+ agent = kzalloc(sizeof(*agent), GFP_KERNEL);
+ if (!agent)
+ return ERR_PTR(-ENOMEM);
+
+ agent->type = type;
+ agent->hv_vhca = hv_vhca;
+ agent->priv = priv;
+ agent->control = control;
+ agent->invalidate = invalidate;
+ agent->cleanup = cleaup;
+
+ mutex_lock(&hv_vhca->agents_lock);
+ hv_vhca->agents[type] = agent;
+ mutex_unlock(&hv_vhca->agents_lock);
+
+ return agent;
+}
+
+void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
+{
+ struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
+
+ mutex_lock(&hv_vhca->agents_lock);
+
+ if (WARN_ON(agent != hv_vhca->agents[agent->type])) {
+ mutex_unlock(&hv_vhca->agents_lock);
+ return;
+ }
+
+ hv_vhca->agents[agent->type] = NULL;
+ mutex_unlock(&hv_vhca->agents_lock);
+
+ if (agent->cleanup)
+ agent->cleanup(agent);
+
+ kfree(agent);
+}
+
+static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent,
+ struct mlx5_hv_vhca_data_block *data_block,
+ void *src, int len, int *offset)
+{
+ int bytes = min_t(int, (int)sizeof(data_block->data), len);
+
+ data_block->sequence = agent->seq;
+ data_block->offset = (*offset)++;
+ memcpy(data_block->data, src, bytes);
+
+ return bytes;
+}
+
+static void mlx5_hv_vhca_agent_seq_update(struct mlx5_hv_vhca_agent *agent)
+{
+ agent->seq++;
+}
+
+int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
+ void *buf, int len)
+{
+ int offset = agent->type * HV_CONFIG_BLOCK_SIZE_MAX;
+ int block_offset = 0;
+ int total = 0;
+ int err;
+
+ while (len) {
+ struct mlx5_hv_vhca_data_block data_block = {0};
+ int bytes;
+
+ bytes = mlx5_hv_vhca_data_block_prepare(agent, &data_block,
+ buf + total,
+ len, &block_offset);
+ if (!bytes)
+ return -ENOMEM;
+
+ err = mlx5_hv_write_config(agent->hv_vhca->dev, &data_block,
+ sizeof(data_block), offset);
+ if (err)
+ return err;
+
+ total += bytes;
+ len -= bytes;
+ }
+
+ mlx5_hv_vhca_agent_seq_update(agent);
+
+ return 0;
+}
+
+void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent)
+{
+ return agent->priv;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
new file mode 100644
index 0000000..cdf1303
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __LIB_HV_VHCA_H__
+#define __LIB_HV_VHCA_H__
+
+#include "en.h"
+#include "lib/hv.h"
+
+struct mlx5_hv_vhca_agent;
+struct mlx5_hv_vhca;
+struct mlx5_hv_vhca_control_block;
+
+enum mlx5_hv_vhca_agent_type {
+ MLX5_HV_VHCA_AGENT_MAX = 32,
+};
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+
+struct mlx5_hv_vhca_control_block {
+ u32 capabilities;
+ u32 control;
+ u16 command;
+ u16 command_ack;
+ u16 version;
+ u16 rings;
+ u32 reserved1[28];
+};
+
+struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev);
+void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca);
+int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca);
+void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca);
+void mlx5_hv_vhca_invalidate(void *context, u64 block_mask);
+
+struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+ enum mlx5_hv_vhca_agent_type type,
+ void (*control)(struct mlx5_hv_vhca_agent*,
+ struct mlx5_hv_vhca_control_block *block),
+ void (*invalidate)(struct mlx5_hv_vhca_agent*,
+ u64 block_mask),
+ void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
+ void *context);
+
+void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent);
+int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
+ void *buf, int len);
+void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent);
+
+#else
+
+static inline struct mlx5_hv_vhca *
+mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
+{
+ return NULL;
+}
+
+static inline void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
+{
+}
+
+static inline int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
+{
+ return 0;
+}
+
+static inline void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
+{
+}
+
+static inline void mlx5_hv_vhca_invalidate(void *context,
+ u64 block_mask)
+{
+}
+
+static inline struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+ enum mlx5_hv_vhca_agent_type type,
+ void (*control)(struct mlx5_hv_vhca_agent*,
+ struct mlx5_hv_vhca_control_block *block),
+ void (*invalidate)(struct mlx5_hv_vhca_agent*,
+ u64 block_mask),
+ void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
+ void *context)
+{
+ return NULL;
+}
+
+static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
+{
+}
+
+static inline int
+mlx5_hv_vhca_write_agent(struct mlx5_hv_vhca_agent *agent,
+ void *buf, int len)
+{
+ return 0;
+}
+#endif
+
+#endif /* __LIB_HV_VHCA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 80437d4..4b74315 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -69,6 +69,7 @@
#include "lib/pci_vsc.h"
#include "diag/fw_tracer.h"
#include "ecpf.h"
+#include "lib/hv_vhca.h"
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
@@ -868,6 +869,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
}
dev->tracer = mlx5_fw_tracer_create(dev);
+ dev->hv_vhca = mlx5_hv_vhca_create(dev);
return 0;
@@ -897,6 +899,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
{
+ mlx5_hv_vhca_destroy(dev->hv_vhca);
mlx5_fw_tracer_destroy(dev->tracer);
mlx5_fpga_cleanup(dev);
mlx5_eswitch_cleanup(dev->priv.eswitch);
@@ -1063,6 +1066,8 @@ static int mlx5_load(struct mlx5_core_dev *dev)
goto err_fw_tracer;
}
+ mlx5_hv_vhca_init(dev->hv_vhca);
+
err = mlx5_fpga_device_start(dev);
if (err) {
mlx5_core_err(dev, "fpga device start failed %d\n", err);
@@ -1118,6 +1123,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
err_ipsec_start:
mlx5_fpga_device_stop(dev);
err_fpga_start:
+ mlx5_hv_vhca_cleanup(dev->hv_vhca);
mlx5_fw_tracer_cleanup(dev->tracer);
err_fw_tracer:
mlx5_eq_table_destroy(dev);
@@ -1138,6 +1144,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
mlx5_accel_ipsec_cleanup(dev);
mlx5_accel_tls_cleanup(dev);
mlx5_fpga_device_stop(dev);
+ mlx5_hv_vhca_cleanup(dev->hv_vhca);
mlx5_fw_tracer_cleanup(dev->tracer);
mlx5_eq_table_destroy(dev);
mlx5_irq_table_destroy(dev);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 467de34..0e00cbc 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -638,6 +638,7 @@ struct mlx5_clock {
struct mlx5_fw_tracer;
struct mlx5_vxlan;
struct mlx5_geneve;
+struct mlx5_hv_vhca;
struct mlx5_core_dev {
struct device *device;
@@ -685,6 +686,7 @@ struct mlx5_core_dev {
struct mlx5_ib_clock_info *clock_info;
struct mlx5_fw_tracer *tracer;
u32 vsc_addr;
+ struct mlx5_hv_vhca *hv_vhca;
};
struct mlx5_db {
--
1.8.3.1
^ permalink raw reply related
* [PATCH net-next,v3, 3/6] net/mlx5: Add wrappers for HyperV PCIe operations
From: Haiyang Zhang @ 2019-08-21 0:23 UTC (permalink / raw)
To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
bhelgaas@google.com, linux-pci@vger.kernel.org,
linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
linux-kernel@vger.kernel.org
In-Reply-To: <1566346948-69497-1-git-send-email-haiyangz@microsoft.com>
From: Eran Ben Elisha <eranbe@mellanox.com>
Add wrapper functions for HyperV PCIe read / write /
block_invalidate_register operations. This will be used as an
infrastructure in the downstream patch for software communication.
This will be enabled by default if CONFIG_PCI_HYPERV_INTERFACE is set.
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/Makefile | 1 +
drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c | 64 ++++++++++++++++++++++++
drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h | 22 ++++++++
3 files changed, 87 insertions(+)
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 8b7edaa..247295b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -45,6 +45,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offlo
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
+mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o
#
# Ipoib netdev
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
new file mode 100644
index 0000000..cf08d02
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/hyperv.h>
+#include "mlx5_core.h"
+#include "lib/hv.h"
+
+static int mlx5_hv_config_common(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset, bool read)
+{
+ int rc = -EOPNOTSUPP;
+ int bytes_returned;
+ int block_id;
+
+ if (offset % HV_CONFIG_BLOCK_SIZE_MAX || len % HV_CONFIG_BLOCK_SIZE_MAX)
+ return -EINVAL;
+
+ block_id = offset / HV_CONFIG_BLOCK_SIZE_MAX;
+
+ rc = read ?
+ hyperv_read_cfg_blk(dev->pdev, buf,
+ HV_CONFIG_BLOCK_SIZE_MAX, block_id,
+ &bytes_returned) :
+ hyperv_write_cfg_blk(dev->pdev, buf,
+ HV_CONFIG_BLOCK_SIZE_MAX, block_id);
+
+ /* Make sure len bytes were read successfully */
+ if (read)
+ rc |= !(len == bytes_returned);
+
+ if (rc) {
+ mlx5_core_err(dev, "Failed to %s hv config, err = %d, len = %d, offset = %d\n",
+ read ? "read" : "write", rc, len,
+ offset);
+ return rc;
+ }
+
+ return 0;
+}
+
+int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset)
+{
+ return mlx5_hv_config_common(dev, buf, len, offset, true);
+}
+
+int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset)
+{
+ return mlx5_hv_config_common(dev, buf, len, offset, false);
+}
+
+int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask))
+{
+ return hyperv_reg_block_invalidate(dev->pdev, context,
+ block_invalidate);
+}
+
+void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev)
+{
+ hyperv_reg_block_invalidate(dev->pdev, NULL, NULL);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
new file mode 100644
index 0000000..f9a4557
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __LIB_HV_H__
+#define __LIB_HV_H__
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+
+#include <linux/hyperv.h>
+#include <linux/mlx5/driver.h>
+
+int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset);
+int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset);
+int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask));
+void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev);
+#endif
+
+#endif /* __LIB_HV_H__ */
--
1.8.3.1
^ permalink raw reply related
* [PATCH net-next,v3, 2/6] PCI: hv: Add a Hyper-V PCI interface driver for software backchannel interface
From: Haiyang Zhang @ 2019-08-21 0:23 UTC (permalink / raw)
To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
bhelgaas@google.com, linux-pci@vger.kernel.org,
linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
linux-kernel@vger.kernel.org
In-Reply-To: <1566346948-69497-1-git-send-email-haiyangz@microsoft.com>
This interface driver is a helper driver allows other drivers to
have a common interface with the Hyper-V PCI frontend driver.
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
MAINTAINERS | 1 +
drivers/pci/Kconfig | 1 +
drivers/pci/controller/Kconfig | 7 ++++
drivers/pci/controller/Makefile | 1 +
drivers/pci/controller/pci-hyperv-intf.c | 67 ++++++++++++++++++++++++++++++++
drivers/pci/controller/pci-hyperv.c | 12 ++++--
include/linux/hyperv.h | 30 ++++++++++----
7 files changed, 108 insertions(+), 11 deletions(-)
create mode 100644 drivers/pci/controller/pci-hyperv-intf.c
diff --git a/MAINTAINERS b/MAINTAINERS
index e352550..866ae88 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7453,6 +7453,7 @@ F: drivers/hid/hid-hyperv.c
F: drivers/hv/
F: drivers/input/serio/hyperv-keyboard.c
F: drivers/pci/controller/pci-hyperv.c
+F: drivers/pci/controller/pci-hyperv-intf.c
F: drivers/net/hyperv/
F: drivers/scsi/storvsc_drv.c
F: drivers/uio/uio_hv_generic.c
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 2ab9240..c313de9 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -182,6 +182,7 @@ config PCI_LABEL
config PCI_HYPERV
tristate "Hyper-V PCI Frontend"
depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64
+ select PCI_HYPERV_INTERFACE
help
The PCI device frontend driver allows the kernel to import arbitrary
PCI devices from a PCI backend to support PCI driver domains.
diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig
index fe9f9f1..70e0782 100644
--- a/drivers/pci/controller/Kconfig
+++ b/drivers/pci/controller/Kconfig
@@ -281,5 +281,12 @@ config VMD
To compile this driver as a module, choose M here: the
module will be called vmd.
+config PCI_HYPERV_INTERFACE
+ tristate "Hyper-V PCI Interface"
+ depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64
+ help
+ The Hyper-V PCI Interface is a helper driver allows other drivers to
+ have a common interface with the Hyper-V PCI frontend driver.
+
source "drivers/pci/controller/dwc/Kconfig"
endmenu
diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile
index d56a507..a2a22c9 100644
--- a/drivers/pci/controller/Makefile
+++ b/drivers/pci/controller/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_PCIE_CADENCE_HOST) += pcie-cadence-host.o
obj-$(CONFIG_PCIE_CADENCE_EP) += pcie-cadence-ep.o
obj-$(CONFIG_PCI_FTPCI100) += pci-ftpci100.o
obj-$(CONFIG_PCI_HYPERV) += pci-hyperv.o
+obj-$(CONFIG_PCI_HYPERV_INTERFACE) += pci-hyperv-intf.o
obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o
obj-$(CONFIG_PCI_AARDVARK) += pci-aardvark.o
obj-$(CONFIG_PCI_TEGRA) += pci-tegra.o
diff --git a/drivers/pci/controller/pci-hyperv-intf.c b/drivers/pci/controller/pci-hyperv-intf.c
new file mode 100644
index 0000000..cc96be4
--- /dev/null
+++ b/drivers/pci/controller/pci-hyperv-intf.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) Microsoft Corporation.
+ *
+ * Author:
+ * Haiyang Zhang <haiyangz@microsoft.com>
+ *
+ * This small module is a helper driver allows other drivers to
+ * have a common interface with the Hyper-V PCI frontend driver.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/hyperv.h>
+
+struct hyperv_pci_block_ops hvpci_block_ops;
+EXPORT_SYMBOL_GPL(hvpci_block_ops);
+
+int hyperv_read_cfg_blk(struct pci_dev *dev, void *buf, unsigned int buf_len,
+ unsigned int block_id, unsigned int *bytes_returned)
+{
+ if (!hvpci_block_ops.read_block)
+ return -EOPNOTSUPP;
+
+ return hvpci_block_ops.read_block(dev, buf, buf_len, block_id,
+ bytes_returned);
+}
+EXPORT_SYMBOL_GPL(hyperv_read_cfg_blk);
+
+int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len,
+ unsigned int block_id)
+{
+ if (!hvpci_block_ops.write_block)
+ return -EOPNOTSUPP;
+
+ return hvpci_block_ops.write_block(dev, buf, len, block_id);
+}
+EXPORT_SYMBOL_GPL(hyperv_write_cfg_blk);
+
+int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask))
+{
+ if (!hvpci_block_ops.reg_blk_invalidate)
+ return -EOPNOTSUPP;
+
+ return hvpci_block_ops.reg_blk_invalidate(dev, context,
+ block_invalidate);
+}
+EXPORT_SYMBOL_GPL(hyperv_reg_block_invalidate);
+
+static void __exit exit_hv_pci_intf(void)
+{
+}
+
+static int __init init_hv_pci_intf(void)
+{
+ return 0;
+}
+
+module_init(init_hv_pci_intf);
+module_exit(exit_hv_pci_intf);
+
+MODULE_DESCRIPTION("Hyper-V PCI Interface");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 57adeca..9c93ac2 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -983,7 +983,6 @@ int hv_read_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
*bytes_returned = comp_pkt.bytes_returned;
return 0;
}
-EXPORT_SYMBOL(hv_read_config_block);
/**
* hv_pci_write_config_compl() - Invoked when a response packet for a write
@@ -1070,7 +1069,6 @@ int hv_write_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
return 0;
}
-EXPORT_SYMBOL(hv_write_config_block);
/**
* hv_register_block_invalidate() - Invoked when a config block invalidation
@@ -1101,7 +1099,6 @@ int hv_register_block_invalidate(struct pci_dev *pdev, void *context,
return 0;
}
-EXPORT_SYMBOL(hv_register_block_invalidate);
/* Interrupt management hooks */
static void hv_int_desc_free(struct hv_pci_dev *hpdev,
@@ -3045,10 +3042,19 @@ static int hv_pci_remove(struct hv_device *hdev)
static void __exit exit_hv_pci_drv(void)
{
vmbus_driver_unregister(&hv_pci_drv);
+
+ hvpci_block_ops.read_block = NULL;
+ hvpci_block_ops.write_block = NULL;
+ hvpci_block_ops.reg_blk_invalidate = NULL;
}
static int __init init_hv_pci_drv(void)
{
+ /* Initialize PCI block r/w interface */
+ hvpci_block_ops.read_block = hv_read_config_block;
+ hvpci_block_ops.write_block = hv_write_config_block;
+ hvpci_block_ops.reg_blk_invalidate = hv_register_block_invalidate;
+
return vmbus_driver_register(&hv_pci_drv);
}
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 9d37f8c..2afe6fd 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1579,18 +1579,32 @@ struct vmpacket_descriptor *
pkt = hv_pkt_iter_next(channel, pkt))
/*
- * Functions for passing data between SR-IOV PF and VF drivers. The VF driver
+ * Interface for passing data between SR-IOV PF and VF drivers. The VF driver
* sends requests to read and write blocks. Each block must be 128 bytes or
* smaller. Optionally, the VF driver can register a callback function which
* will be invoked when the host says that one or more of the first 64 block
* IDs is "invalid" which means that the VF driver should reread them.
*/
#define HV_CONFIG_BLOCK_SIZE_MAX 128
-int hv_read_config_block(struct pci_dev *dev, void *buf, unsigned int buf_len,
- unsigned int block_id, unsigned int *bytes_returned);
-int hv_write_config_block(struct pci_dev *dev, void *buf, unsigned int len,
- unsigned int block_id);
-int hv_register_block_invalidate(struct pci_dev *dev, void *context,
- void (*block_invalidate)(void *context,
- u64 block_mask));
+
+int hyperv_read_cfg_blk(struct pci_dev *dev, void *buf, unsigned int buf_len,
+ unsigned int block_id, unsigned int *bytes_returned);
+int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len,
+ unsigned int block_id);
+int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask));
+
+struct hyperv_pci_block_ops {
+ int (*read_block)(struct pci_dev *dev, void *buf, unsigned int buf_len,
+ unsigned int block_id, unsigned int *bytes_returned);
+ int (*write_block)(struct pci_dev *dev, void *buf, unsigned int len,
+ unsigned int block_id);
+ int (*reg_blk_invalidate)(struct pci_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask));
+};
+
+extern struct hyperv_pci_block_ops hvpci_block_ops;
+
#endif /* _HYPERV_H */
--
1.8.3.1
^ permalink raw reply related
* [PATCH net-next,v3, 1/6] PCI: hv: Add a paravirtual backchannel in software
From: Haiyang Zhang @ 2019-08-21 0:23 UTC (permalink / raw)
To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
bhelgaas@google.com, linux-pci@vger.kernel.org,
linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
linux-kernel@vger.kernel.org, Dexuan Cui, Jake Oshins
In-Reply-To: <1566346948-69497-1-git-send-email-haiyangz@microsoft.com>
From: Dexuan Cui <decui@microsoft.com>
Windows SR-IOV provides a backchannel mechanism in software for communication
between a VF driver and a PF driver. These "configuration blocks" are
similar in concept to PCI configuration space, but instead of doing reads and
writes in 32-bit chunks through a very slow path, packets of up to 128 bytes
can be sent or received asynchronously.
Nearly every SR-IOV device contains just such a communications channel in
hardware, so using this one in software is usually optional. Using the
software channel, however, allows driver implementers to leverage software
tools that fuzz the communications channel looking for vulnerabilities.
The usage model for these packets puts the responsibility for reading or
writing on the VF driver. The VF driver sends a read or a write packet,
indicating which "block" is being referred to by number.
If the PF driver wishes to initiate communication, it can "invalidate" one or
more of the first 64 blocks. This invalidation is delivered via a callback
supplied by the VF driver by this driver.
No protocol is implied, except that supplied by the PF and VF drivers.
Signed-off-by: Jake Oshins <jakeo@microsoft.com>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
---
drivers/pci/controller/pci-hyperv.c | 302 ++++++++++++++++++++++++++++++++++++
include/linux/hyperv.h | 15 ++
2 files changed, 317 insertions(+)
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 40b6254..57adeca 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -365,6 +365,39 @@ struct pci_delete_interrupt {
struct tran_int_desc int_desc;
} __packed;
+/*
+ * Note: the VM must pass a valid block id, wslot and bytes_requested.
+ */
+struct pci_read_block {
+ struct pci_message message_type;
+ u32 block_id;
+ union win_slot_encoding wslot;
+ u32 bytes_requested;
+} __packed;
+
+struct pci_read_block_response {
+ struct vmpacket_descriptor hdr;
+ u32 status;
+ u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX];
+} __packed;
+
+/*
+ * Note: the VM must pass a valid block id, wslot and byte_count.
+ */
+struct pci_write_block {
+ struct pci_message message_type;
+ u32 block_id;
+ union win_slot_encoding wslot;
+ u32 byte_count;
+ u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX];
+} __packed;
+
+struct pci_dev_inval_block {
+ struct pci_incoming_message incoming;
+ union win_slot_encoding wslot;
+ u64 block_mask;
+} __packed;
+
struct pci_dev_incoming {
struct pci_incoming_message incoming;
union win_slot_encoding wslot;
@@ -499,6 +532,9 @@ struct hv_pci_dev {
struct hv_pcibus_device *hbus;
struct work_struct wrk;
+ void (*block_invalidate)(void *context, u64 block_mask);
+ void *invalidate_context;
+
/*
* What would be observed if one wrote 0xFFFFFFFF to a BAR and then
* read it back, for each of the BAR offsets within config space.
@@ -817,6 +853,256 @@ static int hv_pcifront_write_config(struct pci_bus *bus, unsigned int devfn,
.write = hv_pcifront_write_config,
};
+/*
+ * Paravirtual backchannel
+ *
+ * Hyper-V SR-IOV provides a backchannel mechanism in software for
+ * communication between a VF driver and a PF driver. These
+ * "configuration blocks" are similar in concept to PCI configuration space,
+ * but instead of doing reads and writes in 32-bit chunks through a very slow
+ * path, packets of up to 128 bytes can be sent or received asynchronously.
+ *
+ * Nearly every SR-IOV device contains just such a communications channel in
+ * hardware, so using this one in software is usually optional. Using the
+ * software channel, however, allows driver implementers to leverage software
+ * tools that fuzz the communications channel looking for vulnerabilities.
+ *
+ * The usage model for these packets puts the responsibility for reading or
+ * writing on the VF driver. The VF driver sends a read or a write packet,
+ * indicating which "block" is being referred to by number.
+ *
+ * If the PF driver wishes to initiate communication, it can "invalidate" one or
+ * more of the first 64 blocks. This invalidation is delivered via a callback
+ * supplied by the VF driver by this driver.
+ *
+ * No protocol is implied, except that supplied by the PF and VF drivers.
+ */
+
+struct hv_read_config_compl {
+ struct hv_pci_compl comp_pkt;
+ void *buf;
+ unsigned int len;
+ unsigned int bytes_returned;
+};
+
+/**
+ * hv_pci_read_config_compl() - Invoked when a response packet
+ * for a read config block operation arrives.
+ * @context: Identifies the read config operation
+ * @resp: The response packet itself
+ * @resp_packet_size: Size in bytes of the response packet
+ */
+static void hv_pci_read_config_compl(void *context, struct pci_response *resp,
+ int resp_packet_size)
+{
+ struct hv_read_config_compl *comp = context;
+ struct pci_read_block_response *read_resp =
+ (struct pci_read_block_response *)resp;
+ unsigned int data_len, hdr_len;
+
+ hdr_len = offsetof(struct pci_read_block_response, bytes);
+ if (resp_packet_size < hdr_len) {
+ comp->comp_pkt.completion_status = -1;
+ goto out;
+ }
+
+ data_len = resp_packet_size - hdr_len;
+ if (data_len > 0 && read_resp->status == 0) {
+ comp->bytes_returned = min(comp->len, data_len);
+ memcpy(comp->buf, read_resp->bytes, comp->bytes_returned);
+ } else {
+ comp->bytes_returned = 0;
+ }
+
+ comp->comp_pkt.completion_status = read_resp->status;
+out:
+ complete(&comp->comp_pkt.host_event);
+}
+
+/**
+ * hv_read_config_block() - Sends a read config block request to
+ * the back-end driver running in the Hyper-V parent partition.
+ * @pdev: The PCI driver's representation for this device.
+ * @buf: Buffer into which the config block will be copied.
+ * @len: Size in bytes of buf.
+ * @block_id: Identifies the config block which has been requested.
+ * @bytes_returned: Size which came back from the back-end driver.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int hv_read_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
+ unsigned int block_id, unsigned int *bytes_returned)
+{
+ struct hv_pcibus_device *hbus =
+ container_of(pdev->bus->sysdata, struct hv_pcibus_device,
+ sysdata);
+ struct {
+ struct pci_packet pkt;
+ char buf[sizeof(struct pci_read_block)];
+ } pkt;
+ struct hv_read_config_compl comp_pkt;
+ struct pci_read_block *read_blk;
+ int ret;
+
+ if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX)
+ return -EINVAL;
+
+ init_completion(&comp_pkt.comp_pkt.host_event);
+ comp_pkt.buf = buf;
+ comp_pkt.len = len;
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.pkt.completion_func = hv_pci_read_config_compl;
+ pkt.pkt.compl_ctxt = &comp_pkt;
+ read_blk = (struct pci_read_block *)&pkt.pkt.message;
+ read_blk->message_type.type = PCI_READ_BLOCK;
+ read_blk->wslot.slot = devfn_to_wslot(pdev->devfn);
+ read_blk->block_id = block_id;
+ read_blk->bytes_requested = len;
+
+ ret = vmbus_sendpacket(hbus->hdev->channel, read_blk,
+ sizeof(*read_blk), (unsigned long)&pkt.pkt,
+ VM_PKT_DATA_INBAND,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ if (ret)
+ return ret;
+
+ ret = wait_for_response(hbus->hdev, &comp_pkt.comp_pkt.host_event);
+ if (ret)
+ return ret;
+
+ if (comp_pkt.comp_pkt.completion_status != 0 ||
+ comp_pkt.bytes_returned == 0) {
+ dev_err(&hbus->hdev->device,
+ "Read Config Block failed: 0x%x, bytes_returned=%d\n",
+ comp_pkt.comp_pkt.completion_status,
+ comp_pkt.bytes_returned);
+ return -EIO;
+ }
+
+ *bytes_returned = comp_pkt.bytes_returned;
+ return 0;
+}
+EXPORT_SYMBOL(hv_read_config_block);
+
+/**
+ * hv_pci_write_config_compl() - Invoked when a response packet for a write
+ * config block operation arrives.
+ * @context: Identifies the write config operation
+ * @resp: The response packet itself
+ * @resp_packet_size: Size in bytes of the response packet
+ */
+static void hv_pci_write_config_compl(void *context, struct pci_response *resp,
+ int resp_packet_size)
+{
+ struct hv_pci_compl *comp_pkt = context;
+
+ comp_pkt->completion_status = resp->status;
+ complete(&comp_pkt->host_event);
+}
+
+/**
+ * hv_write_config_block() - Sends a write config block request to the
+ * back-end driver running in the Hyper-V parent partition.
+ * @pdev: The PCI driver's representation for this device.
+ * @buf: Buffer from which the config block will be copied.
+ * @len: Size in bytes of buf.
+ * @block_id: Identifies the config block which is being written.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int hv_write_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
+ unsigned int block_id)
+{
+ struct hv_pcibus_device *hbus =
+ container_of(pdev->bus->sysdata, struct hv_pcibus_device,
+ sysdata);
+ struct {
+ struct pci_packet pkt;
+ char buf[sizeof(struct pci_write_block)];
+ u32 reserved;
+ } pkt;
+ struct hv_pci_compl comp_pkt;
+ struct pci_write_block *write_blk;
+ u32 pkt_size;
+ int ret;
+
+ if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX)
+ return -EINVAL;
+
+ init_completion(&comp_pkt.host_event);
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.pkt.completion_func = hv_pci_write_config_compl;
+ pkt.pkt.compl_ctxt = &comp_pkt;
+ write_blk = (struct pci_write_block *)&pkt.pkt.message;
+ write_blk->message_type.type = PCI_WRITE_BLOCK;
+ write_blk->wslot.slot = devfn_to_wslot(pdev->devfn);
+ write_blk->block_id = block_id;
+ write_blk->byte_count = len;
+ memcpy(write_blk->bytes, buf, len);
+ pkt_size = offsetof(struct pci_write_block, bytes) + len;
+ /*
+ * This quirk is required on some hosts shipped around 2018, because
+ * these hosts don't check the pkt_size correctly (new hosts have been
+ * fixed since early 2019). The quirk is also safe on very old hosts
+ * and new hosts, because, on them, what really matters is the length
+ * specified in write_blk->byte_count.
+ */
+ pkt_size += sizeof(pkt.reserved);
+
+ ret = vmbus_sendpacket(hbus->hdev->channel, write_blk, pkt_size,
+ (unsigned long)&pkt.pkt, VM_PKT_DATA_INBAND,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ if (ret)
+ return ret;
+
+ ret = wait_for_response(hbus->hdev, &comp_pkt.host_event);
+ if (ret)
+ return ret;
+
+ if (comp_pkt.completion_status != 0) {
+ dev_err(&hbus->hdev->device,
+ "Write Config Block failed: 0x%x\n",
+ comp_pkt.completion_status);
+ return -EIO;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(hv_write_config_block);
+
+/**
+ * hv_register_block_invalidate() - Invoked when a config block invalidation
+ * arrives from the back-end driver.
+ * @pdev: The PCI driver's representation for this device.
+ * @context: Identifies the device.
+ * @block_invalidate: Identifies all of the blocks being invalidated.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int hv_register_block_invalidate(struct pci_dev *pdev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask))
+{
+ struct hv_pcibus_device *hbus =
+ container_of(pdev->bus->sysdata, struct hv_pcibus_device,
+ sysdata);
+ struct hv_pci_dev *hpdev;
+
+ hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
+ if (!hpdev)
+ return -ENODEV;
+
+ hpdev->block_invalidate = block_invalidate;
+ hpdev->invalidate_context = context;
+
+ put_pcichild(hpdev);
+ return 0;
+
+}
+EXPORT_SYMBOL(hv_register_block_invalidate);
+
/* Interrupt management hooks */
static void hv_int_desc_free(struct hv_pci_dev *hpdev,
struct tran_int_desc *int_desc)
@@ -1968,6 +2254,7 @@ static void hv_pci_onchannelcallback(void *context)
struct pci_response *response;
struct pci_incoming_message *new_message;
struct pci_bus_relations *bus_rel;
+ struct pci_dev_inval_block *inval;
struct pci_dev_incoming *dev_message;
struct hv_pci_dev *hpdev;
@@ -2045,6 +2332,21 @@ static void hv_pci_onchannelcallback(void *context)
}
break;
+ case PCI_INVALIDATE_BLOCK:
+
+ inval = (struct pci_dev_inval_block *)buffer;
+ hpdev = get_pcichild_wslot(hbus,
+ inval->wslot.slot);
+ if (hpdev) {
+ if (hpdev->block_invalidate) {
+ hpdev->block_invalidate(
+ hpdev->invalidate_context,
+ inval->block_mask);
+ }
+ put_pcichild(hpdev);
+ }
+ break;
+
default:
dev_warn(&hbus->hdev->device,
"Unimplemented protocol message %x\n",
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 6256cc3..9d37f8c 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1578,4 +1578,19 @@ struct vmpacket_descriptor *
for (pkt = hv_pkt_iter_first(channel); pkt; \
pkt = hv_pkt_iter_next(channel, pkt))
+/*
+ * Functions for passing data between SR-IOV PF and VF drivers. The VF driver
+ * sends requests to read and write blocks. Each block must be 128 bytes or
+ * smaller. Optionally, the VF driver can register a callback function which
+ * will be invoked when the host says that one or more of the first 64 block
+ * IDs is "invalid" which means that the VF driver should reread them.
+ */
+#define HV_CONFIG_BLOCK_SIZE_MAX 128
+int hv_read_config_block(struct pci_dev *dev, void *buf, unsigned int buf_len,
+ unsigned int block_id, unsigned int *bytes_returned);
+int hv_write_config_block(struct pci_dev *dev, void *buf, unsigned int len,
+ unsigned int block_id);
+int hv_register_block_invalidate(struct pci_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask));
#endif /* _HYPERV_H */
--
1.8.3.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox