* [PATCH 09/15] Drivers: hv: Introduce hv_output_arg_exists in hv_common
From: Nuno Das Neves @ 2023-07-27 19:54 UTC (permalink / raw)
To: linux-hyperv, linux-kernel, x86, linux-arm-kernel, linux-arch
Cc: mikelley, kys, wei.liu, haiyangz, decui, ssengar, mukeshrathor,
stanislav.kinsburskiy, jinankjain, apais, Tianyu.Lan, vkuznets,
tglx, mingo, bp, dave.hansen, hpa, will, catalin.marinas
In-Reply-To: <1690487690-2428-1-git-send-email-nunodasneves@linux.microsoft.com>
This is a more flexible approach for determining whether to allocate the
output page.
This will be used in both mshv_vtl and root partition.
Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
---
drivers/hv/hv_common.c | 30 +++++++++++++++++++++++++-----
1 file changed, 25 insertions(+), 5 deletions(-)
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index 99d9b262b8a7..16f069beda78 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -57,6 +57,18 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
void * __percpu *hyperv_pcpu_output_arg;
EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
+/*
+ * Determine whether output arg is in use, for allocation/deallocation
+ */
+static bool hv_output_arg_exists(void)
+{
+ bool ret = hv_root_partition ? true : false;
+#ifdef CONFIG_MSHV_VTL
+ ret = true;
+#endif
+ return ret;
+}
+
static void hv_kmsg_dump_unregister(void);
static struct ctl_table_header *hv_ctl_table_hdr;
@@ -338,10 +350,12 @@ int __init hv_common_init(void)
hyperv_pcpu_input_arg = alloc_percpu(void *);
BUG_ON(!hyperv_pcpu_input_arg);
- /* Allocate the per-CPU state for output arg for root */
- if (hv_root_partition) {
+ if (hv_output_arg_exists()) {
hyperv_pcpu_output_arg = alloc_percpu(void *);
BUG_ON(!hyperv_pcpu_output_arg);
+ }
+
+ if (hv_root_partition) {
hv_synic_eventring_tail = alloc_percpu(u8 *);
BUG_ON(hv_synic_eventring_tail == NULL);
}
@@ -371,7 +385,7 @@ int hv_common_cpu_init(unsigned int cpu)
u8 **synic_eventring_tail;
u64 msr_vp_index;
gfp_t flags;
- int pgcount = hv_root_partition ? 2 : 1;
+ int pgcount = hv_output_arg_exists() ? 2 : 1;
/* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL;
@@ -381,9 +395,12 @@ int hv_common_cpu_init(unsigned int cpu)
if (!(*inputarg))
return -ENOMEM;
- if (hv_root_partition) {
+ if (hv_output_arg_exists()) {
outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
*outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE;
+ }
+
+ if (hv_root_partition) {
synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail);
*synic_eventring_tail = kcalloc(HV_SYNIC_SINT_COUNT, sizeof(u8),
flags);
@@ -417,9 +434,12 @@ int hv_common_cpu_die(unsigned int cpu)
mem = *inputarg;
*inputarg = NULL;
- if (hv_root_partition) {
+ if (hv_output_arg_exists()) {
outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
*outputarg = NULL;
+ }
+
+ if (hv_root_partition) {
synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail);
kfree(*synic_eventring_tail);
*synic_eventring_tail = NULL;
--
2.25.1
^ permalink raw reply related
* [PATCH 12/15] Documentation: Reserve ioctl number for mshv driver
From: Nuno Das Neves @ 2023-07-27 19:54 UTC (permalink / raw)
To: linux-hyperv, linux-kernel, x86, linux-arm-kernel, linux-arch
Cc: mikelley, kys, wei.liu, haiyangz, decui, ssengar, mukeshrathor,
stanislav.kinsburskiy, jinankjain, apais, Tianyu.Lan, vkuznets,
tglx, mingo, bp, dave.hansen, hpa, will, catalin.marinas
In-Reply-To: <1690487690-2428-1-git-send-email-nunodasneves@linux.microsoft.com>
Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
---
Documentation/userspace-api/ioctl/ioctl-number.rst | 2 ++
1 file changed, 2 insertions(+)
diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 0a1882e296ae..ca6b82419118 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -355,6 +355,8 @@ Code Seq# Include File Comments
0xB6 all linux/fpga-dfl.h
0xB7 all uapi/linux/remoteproc_cdev.h <mailto:linux-remoteproc@vger.kernel.org>
0xB7 all uapi/linux/nsfs.h <mailto:Andrei Vagin <avagin@openvz.org>>
+0xB8 all uapi/linux/mshv.h Microsoft Hypervisor VM management APIs
+ <mailto:linux-hyperv@vger.kernel.org>
0xC0 00-0F linux/usb/iowarrior.h
0xCA 00-0F uapi/misc/cxl.h
0xCA 10-2F uapi/misc/ocxl.h
--
2.25.1
^ permalink raw reply related
* [PATCH 10/15] x86: hyperv: Add mshv_handler irq handler and setup function
From: Nuno Das Neves @ 2023-07-27 19:54 UTC (permalink / raw)
To: linux-hyperv, linux-kernel, x86, linux-arm-kernel, linux-arch
Cc: mikelley, kys, wei.liu, haiyangz, decui, ssengar, mukeshrathor,
stanislav.kinsburskiy, jinankjain, apais, Tianyu.Lan, vkuznets,
tglx, mingo, bp, dave.hansen, hpa, will, catalin.marinas
In-Reply-To: <1690487690-2428-1-git-send-email-nunodasneves@linux.microsoft.com>
This will handle SYNIC interrupts such as intercepts, doorbells, and
scheduling messages intended for the mshv driver.
Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
---
arch/x86/kernel/cpu/mshyperv.c | 9 +++++++++
drivers/hv/hv_common.c | 5 +++++
include/asm-generic/mshyperv.h | 2 ++
3 files changed, 16 insertions(+)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index e44291e902ae..442d00fe70f1 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -105,6 +105,7 @@ void hv_set_register(unsigned int reg, u64 value)
}
EXPORT_SYMBOL_GPL(hv_set_register);
+static void (*mshv_handler)(void);
static void (*vmbus_handler)(void);
static void (*hv_stimer0_handler)(void);
static void (*hv_kexec_handler)(void);
@@ -115,6 +116,9 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
struct pt_regs *old_regs = set_irq_regs(regs);
inc_irq_stat(irq_hv_callback_count);
+ if (mshv_handler)
+ mshv_handler();
+
if (vmbus_handler)
vmbus_handler();
@@ -139,6 +143,11 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_nested_vmbus_intr)
set_irq_regs(old_regs);
}
+void hv_setup_mshv_irq(void (*handler)(void))
+{
+ mshv_handler = handler;
+}
+
void hv_setup_vmbus_handler(void (*handler)(void))
{
vmbus_handler = handler;
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index 16f069beda78..678a3f2e1dc1 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -557,6 +557,11 @@ void __weak hv_remove_vmbus_handler(void)
}
EXPORT_SYMBOL_GPL(hv_remove_vmbus_handler);
+void __weak hv_setup_mshv_irq(void (*handler)(void))
+{
+}
+EXPORT_SYMBOL_GPL(hv_setup_mshv_irq);
+
void __weak hv_setup_kexec_handler(void (*handler)(void))
{
}
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 9118d678b27a..2b20994d809e 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -193,6 +193,8 @@ void hv_remove_vmbus_handler(void);
void hv_setup_stimer0_handler(void (*handler)(void));
void hv_remove_stimer0_handler(void);
+void hv_setup_mshv_irq(void (*handler)(void));
+
void hv_setup_kexec_handler(void (*handler)(void));
void hv_remove_kexec_handler(void);
void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs));
--
2.25.1
^ permalink raw reply related
* [PATCH 14/15] asm-generic: hyperv: Use mshv headers conditionally. Add asm-generic/hyperv-defs.h
From: Nuno Das Neves @ 2023-07-27 19:54 UTC (permalink / raw)
To: linux-hyperv, linux-kernel, x86, linux-arm-kernel, linux-arch
Cc: mikelley, kys, wei.liu, haiyangz, decui, ssengar, mukeshrathor,
stanislav.kinsburskiy, jinankjain, apais, Tianyu.Lan, vkuznets,
tglx, mingo, bp, dave.hansen, hpa, will, catalin.marinas
In-Reply-To: <1690487690-2428-1-git-send-email-nunodasneves@linux.microsoft.com>
In order to keep unstable hyper-v interfaces independent of
hyperv-tlfs.h, hvhdk.h must replace hyperv-tlfs.h eveywhere it will be
used in the mshv driver.
Add hyperv-defs.h to replace some inclusions of hyperv-tlfs.h.
It includes hyperv-tlfs.h or hvhdk.h depending on a compile-time constant
HV_HYPERV_DEFS which will be defined in the mshv driver.
Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
---
arch/arm64/include/asm/mshyperv.h | 2 +-
arch/x86/include/asm/mshyperv.h | 3 +--
drivers/hv/hyperv_vmbus.h | 1 -
include/asm-generic/hyperv-defs.h | 26 ++++++++++++++++++++++++++
include/asm-generic/mshyperv.h | 2 +-
include/linux/hyperv.h | 2 +-
6 files changed, 30 insertions(+), 6 deletions(-)
create mode 100644 include/asm-generic/hyperv-defs.h
diff --git a/arch/arm64/include/asm/mshyperv.h b/arch/arm64/include/asm/mshyperv.h
index 20070a847304..8ec14caf3d4f 100644
--- a/arch/arm64/include/asm/mshyperv.h
+++ b/arch/arm64/include/asm/mshyperv.h
@@ -20,7 +20,7 @@
#include <linux/types.h>
#include <linux/arm-smccc.h>
-#include <asm/hyperv-tlfs.h>
+#include <asm-generic/hyperv-defs.h>
/*
* Declare calls to get and set Hyper-V VP register values on ARM64, which
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 1a0655a93672..d4fa6e46e7e2 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -6,10 +6,9 @@
#include <linux/nmi.h>
#include <linux/msi.h>
#include <asm/io.h>
-#include <asm/hyperv-tlfs.h>
#include <asm/nospec-branch.h>
#include <asm/paravirt.h>
-#include <asm/mshyperv.h>
+#include <asm-generic/hyperv-defs.h>
/*
* Hyper-V always provides a single IO-APIC at this MMIO address.
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 85f3c08bb42b..49529f0b8b99 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -15,7 +15,6 @@
#include <linux/list.h>
#include <linux/bitops.h>
#include <asm/sync_bitops.h>
-#include <asm/hyperv-tlfs.h>
#include <linux/atomic.h>
#include <linux/hyperv.h>
#include <linux/interrupt.h>
diff --git a/include/asm-generic/hyperv-defs.h b/include/asm-generic/hyperv-defs.h
new file mode 100644
index 000000000000..ac6fcba35c8c
--- /dev/null
+++ b/include/asm-generic/hyperv-defs.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_HYPERV_DEFS_H
+#define _ASM_GENERIC_HYPERV_DEFS_H
+
+/*
+ * There are cases where Microsoft Hypervisor ABIs are needed which may not be
+ * stable or present in the Hyper-V TLFS document. E.g. the mshv_root driver.
+ *
+ * As these interfaces are unstable and may differ from hyperv-tlfs.h, they
+ * must be kept separate and independent.
+ *
+ * However, code from files that depend on hyperv-tlfs.h (such as mshyperv.h)
+ * is still needed, so work around the issue by conditionally including the
+ * correct definitions.
+ *
+ * Note: Since they are independent of each other, there are many definitions
+ * duplicated in both hyperv-tlfs.h and uapi/hyperv/hv*.h files.
+ */
+#ifdef HV_HYPERV_DEFS
+#include <uapi/hyperv/hvhdk.h>
+#else
+#include <asm/hyperv-tlfs.h>
+#endif
+
+#endif /* _ASM_GENERIC_HYPERV_DEFS_H */
+
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 2b20994d809e..e86b6f51fb64 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -25,7 +25,7 @@
#include <linux/cpumask.h>
#include <linux/nmi.h>
#include <asm/ptrace.h>
-#include <asm/hyperv-tlfs.h>
+#include <asm-generic/hyperv-defs.h>
#define VTPM_BASE_ADDRESS 0xfed40000
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index f90de5abcd50..66ed8b3e5d89 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -24,7 +24,7 @@
#include <linux/mod_devicetable.h>
#include <linux/interrupt.h>
#include <linux/reciprocal_div.h>
-#include <asm/hyperv-tlfs.h>
+#include <asm-generic/hyperv-defs.h>
#define MAX_PAGE_BUFFER_COUNT 32
#define MAX_MULTIPAGE_BUFFER_COUNT 32 /* 128K */
--
2.25.1
^ permalink raw reply related
* [PATCH 11/15] Drivers: hv: export vmbus_isr, hv_context and hv_post_message
From: Nuno Das Neves @ 2023-07-27 19:54 UTC (permalink / raw)
To: linux-hyperv, linux-kernel, x86, linux-arm-kernel, linux-arch
Cc: mikelley, kys, wei.liu, haiyangz, decui, ssengar, mukeshrathor,
stanislav.kinsburskiy, jinankjain, apais, Tianyu.Lan, vkuznets,
tglx, mingo, bp, dave.hansen, hpa, will, catalin.marinas
In-Reply-To: <1690487690-2428-1-git-send-email-nunodasneves@linux.microsoft.com>
These will be used by the mshv_vtl driver.
Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
---
drivers/hv/hv.c | 2 ++
drivers/hv/hyperv_vmbus.h | 1 +
drivers/hv/vmbus_drv.c | 3 ++-
3 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index a897951634af..6c64ef8c8aff 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -24,6 +24,7 @@
/* The one and only */
struct hv_context hv_context;
+EXPORT_SYMBOL_GPL(hv_context);
/*
* hv_init - Main initialization routine.
@@ -80,6 +81,7 @@ int hv_post_message(union hv_connection_id connection_id,
return hv_result(status);
}
+EXPORT_SYMBOL_GPL(hv_post_message);
int hv_synic_alloc(void)
{
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 55f2086841ae..85f3c08bb42b 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -32,6 +32,7 @@
*/
#define HV_UTIL_NEGO_TIMEOUT 55
+void vmbus_isr(void);
/* Definitions for the monitored notification facility */
union hv_monitor_trigger_group {
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 4757d8c23ac1..bfd367df7b60 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1286,7 +1286,7 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu)
}
}
-static void vmbus_isr(void)
+void vmbus_isr(void)
{
struct hv_per_cpu_context *hv_cpu
= this_cpu_ptr(hv_context.cpu_context);
@@ -1309,6 +1309,7 @@ static void vmbus_isr(void)
add_interrupt_randomness(vmbus_interrupt);
}
+EXPORT_SYMBOL_GPL(vmbus_isr);
static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id)
{
--
2.25.1
^ permalink raw reply related
* [PATCH 02/15] mshyperv: Introduce hv_get_hypervisor_version
From: Nuno Das Neves @ 2023-07-27 19:54 UTC (permalink / raw)
To: linux-hyperv, linux-kernel, x86, linux-arm-kernel, linux-arch
Cc: mikelley, kys, wei.liu, haiyangz, decui, ssengar, mukeshrathor,
stanislav.kinsburskiy, jinankjain, apais, Tianyu.Lan, vkuznets,
tglx, mingo, bp, dave.hansen, hpa, will, catalin.marinas
In-Reply-To: <1690487690-2428-1-git-send-email-nunodasneves@linux.microsoft.com>
x86_64 and arm64 implementations to get the hypervisor version
information.
Also introduce hv_hypervisor_version_info structure to simplify parsing
the fields.
Replace the existing parsing when printing the version numbers.
Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
---
arch/arm64/hyperv/mshyperv.c | 23 +++++++++++-------
arch/x86/kernel/cpu/mshyperv.c | 40 ++++++++++++++++++++-----------
include/asm-generic/hyperv-tlfs.h | 23 ++++++++++++++++++
include/asm-generic/mshyperv.h | 2 ++
4 files changed, 66 insertions(+), 22 deletions(-)
diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c
index a406454578f0..d44c358ce45c 100644
--- a/arch/arm64/hyperv/mshyperv.c
+++ b/arch/arm64/hyperv/mshyperv.c
@@ -19,10 +19,19 @@
static bool hyperv_initialized;
+int hv_get_hypervisor_version(union hv_hypervisor_version_info *info)
+{
+ hv_get_vpreg_128(HV_REGISTER_HYPERVISOR_VERSION,
+ (struct hv_get_vp_registers_output *)info);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(hv_get_hypervisor_version);
+
static int __init hyperv_init(void)
{
- struct hv_get_vp_registers_output result;
- u32 a, b, c, d;
+ struct hv_get_vp_registers_output result;
+ union hv_hypervisor_version_info version;
u64 guest_id;
int ret;
@@ -55,13 +64,11 @@ static int __init hyperv_init(void)
ms_hyperv.misc_features);
/* Get information about the Hyper-V host version */
- hv_get_vpreg_128(HV_REGISTER_HYPERVISOR_VERSION, &result);
- a = result.as32.a;
- b = result.as32.b;
- c = result.as32.c;
- d = result.as32.d;
+ hv_get_hypervisor_version(&version);
pr_info("Hyper-V: Host Build %d.%d.%d.%d-%d-%d\n",
- b >> 16, b & 0xFFFF, a, d & 0xFFFFFF, c, d >> 24);
+ version.major_version, version.minor_version,
+ version.build_number, version.service_number,
+ version.service_pack, version.service_branch);
ret = hv_common_init();
if (ret)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 57f6a5879b30..e44291e902ae 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -328,13 +328,30 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
}
#endif
+int hv_get_hypervisor_version(union hv_hypervisor_version_info *info)
+{
+ unsigned int hv_max_functions;
+
+ hv_max_functions = cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS);
+ if (hv_max_functions < HYPERV_CPUID_VERSION) {
+ pr_err("%s: Could not detect Hyper-V version\n",
+ __func__);
+ return -ENODEV;
+ }
+
+ info->eax = cpuid_eax(HYPERV_CPUID_VERSION);
+ info->ebx = cpuid_ebx(HYPERV_CPUID_VERSION);
+ info->ecx = cpuid_ecx(HYPERV_CPUID_VERSION);
+ info->edx = cpuid_edx(HYPERV_CPUID_VERSION);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(hv_get_hypervisor_version);
+
static void __init ms_hyperv_init_platform(void)
{
int hv_max_functions_eax;
- int hv_host_info_eax;
- int hv_host_info_ebx;
- int hv_host_info_ecx;
- int hv_host_info_edx;
+ union hv_hypervisor_version_info version;
#ifdef CONFIG_PARAVIRT
pv_info.name = "Hyper-V";
@@ -388,16 +405,11 @@ static void __init ms_hyperv_init_platform(void)
/*
* Extract host information.
*/
- if (hv_max_functions_eax >= HYPERV_CPUID_VERSION) {
- hv_host_info_eax = cpuid_eax(HYPERV_CPUID_VERSION);
- hv_host_info_ebx = cpuid_ebx(HYPERV_CPUID_VERSION);
- hv_host_info_ecx = cpuid_ecx(HYPERV_CPUID_VERSION);
- hv_host_info_edx = cpuid_edx(HYPERV_CPUID_VERSION);
-
- pr_info("Hyper-V: Host Build %d.%d.%d.%d-%d-%d\n",
- hv_host_info_ebx >> 16, hv_host_info_ebx & 0xFFFF,
- hv_host_info_eax, hv_host_info_edx & 0xFFFFFF,
- hv_host_info_ecx, hv_host_info_edx >> 24);
+ if (hv_get_hypervisor_version(&version) == 0) {
+ pr_info("Hyper-V Host Build:%d-%d.%d-%d-%d.%d\n",
+ version.build_number, version.major_version,
+ version.minor_version, version.service_pack,
+ version.service_branch, version.service_number);
}
if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS &&
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index f4e4cc4f965f..373f26efa18a 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -786,6 +786,29 @@ struct hv_input_unmap_device_interrupt {
#define HV_SOURCE_SHADOW_NONE 0x0
#define HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE 0x1
+/*
+ * Version info reported by hypervisor
+ */
+union hv_hypervisor_version_info {
+ struct {
+ u32 build_number;
+
+ u32 minor_version : 16;
+ u32 major_version : 16;
+
+ u32 service_pack;
+
+ u32 service_number : 24;
+ u32 service_branch : 8;
+ };
+ struct {
+ u32 eax;
+ u32 ebx;
+ u32 ecx;
+ u32 edx;
+ };
+};
+
/*
* The whole argument should fit in a page to be able to pass to the hypervisor
* in one hypercall.
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 094c57320ed1..233c976344e5 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -153,6 +153,8 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
}
}
+int hv_get_hypervisor_version(union hv_hypervisor_version_info *info);
+
void hv_setup_vmbus_handler(void (*handler)(void));
void hv_remove_vmbus_handler(void);
void hv_setup_stimer0_handler(void (*handler)(void));
--
2.25.1
^ permalink raw reply related
* [PATCH 13/15] uapi: hyperv: Add mshv driver headers hvhdk.h, hvhdk_mini.h, hvgdk.h, hvgdk_mini.h
From: Nuno Das Neves @ 2023-07-27 19:54 UTC (permalink / raw)
To: linux-hyperv, linux-kernel, x86, linux-arm-kernel, linux-arch
Cc: mikelley, kys, wei.liu, haiyangz, decui, ssengar, mukeshrathor,
stanislav.kinsburskiy, jinankjain, apais, Tianyu.Lan, vkuznets,
tglx, mingo, bp, dave.hansen, hpa, will, catalin.marinas
In-Reply-To: <1690487690-2428-1-git-send-email-nunodasneves@linux.microsoft.com>
Containing hypervisor ABI definitions to use in mshv driver.
Version numbers for each file:
hvhdk.h 25212
hvhdk_mini.h 25294
hvgdk.h 25125
hvgdk_mini.h 25294
These are unstable interfaces and as such must be compiled independently
from published interfaces found in hyperv-tlfs.h.
These are in uapi because they will be used in the mshv ioctl API.
Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
---
include/uapi/hyperv/hvgdk.h | 41 +
include/uapi/hyperv/hvgdk_mini.h | 1077 ++++++++++++++++++++++++
include/uapi/hyperv/hvhdk.h | 1352 ++++++++++++++++++++++++++++++
include/uapi/hyperv/hvhdk_mini.h | 164 ++++
4 files changed, 2634 insertions(+)
create mode 100644 include/uapi/hyperv/hvgdk.h
create mode 100644 include/uapi/hyperv/hvgdk_mini.h
create mode 100644 include/uapi/hyperv/hvhdk.h
create mode 100644 include/uapi/hyperv/hvhdk_mini.h
diff --git a/include/uapi/hyperv/hvgdk.h b/include/uapi/hyperv/hvgdk.h
new file mode 100644
index 000000000000..9bcbb7d902b2
--- /dev/null
+++ b/include/uapi/hyperv/hvgdk.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (c) 2023, Microsoft Corporation.
+ *
+ * These files (hvhdk.h, hvhdk_mini.h, hvgdk.h, hvgdk_mini.h) define APIs for
+ * communicating with the Microsoft Hypervisor.
+ *
+ * These definitions are subject to change across hypervisor versions, and as
+ * such are separate and independent from hyperv-tlfs.h.
+ *
+ * The naming of these headers reflects conventions used in the Microsoft
+ * Hypervisor.
+ */
+#ifndef _UAPI_HV_HVGDK_H
+#define _UAPI_HV_HVGDK_H
+
+#include "hvgdk_mini.h"
+
+enum hv_unimplemented_msr_action {
+ HV_UNIMPLEMENTED_MSR_ACTION_FAULT = 0,
+ HV_UNIMPLEMENTED_MSR_ACTION_IGNORE_WRITE_READ_ZERO = 1,
+ HV_UNIMPLEMENTED_MSR_ACTION_COUNT = 2,
+};
+
+/* Define connection identifier type. */
+union hv_connection_id {
+ __u32 asu32;
+ struct {
+ __u32 id:24;
+ __u32 reserved:8;
+ } __packed u;
+};
+
+struct hv_input_unmap_gpa_pages {
+ __u64 target_partition_id;
+ __u64 target_gpa_base;
+ __u32 unmap_flags;
+ __u32 padding;
+} __packed;
+
+#endif /* #ifndef _UAPI_HV_HVGDK_H */
diff --git a/include/uapi/hyperv/hvgdk_mini.h b/include/uapi/hyperv/hvgdk_mini.h
new file mode 100644
index 000000000000..86d825ef6062
--- /dev/null
+++ b/include/uapi/hyperv/hvgdk_mini.h
@@ -0,0 +1,1077 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (c) 2023, Microsoft Corporation.
+ *
+ * These files (hvhdk.h, hvhdk_mini.h, hvgdk.h, hvgdk_mini.h) define APIs for
+ * communicating with the Microsoft Hypervisor.
+ *
+ * These definitions are subject to change across hypervisor versions, and as
+ * such are separate and independent from hyperv-tlfs.h.
+ *
+ * The naming of these headers reflects conventions used in the Microsoft
+ * Hypervisor.
+ */
+#ifndef _UAPI_HV_HVGDK_MINI_H
+#define _UAPI_HV_HVGDK_MINI_H
+
+struct hv_u128 {
+ __u64 low_part;
+ __u64 high_part;
+} __packed;
+
+/* hypercall status code */
+#define __HV_STATUS_DEF(OP) \
+ OP(HV_STATUS_SUCCESS, 0x0) \
+ OP(HV_STATUS_INVALID_HYPERCALL_CODE, 0x2) \
+ OP(HV_STATUS_INVALID_HYPERCALL_INPUT, 0x3) \
+ OP(HV_STATUS_INVALID_ALIGNMENT, 0x4) \
+ OP(HV_STATUS_INVALID_PARAMETER, 0x5) \
+ OP(HV_STATUS_ACCESS_DENIED, 0x6) \
+ OP(HV_STATUS_INVALID_PARTITION_STATE, 0x7) \
+ OP(HV_STATUS_OPERATION_DENIED, 0x8) \
+ OP(HV_STATUS_UNKNOWN_PROPERTY, 0x9) \
+ OP(HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE, 0xA) \
+ OP(HV_STATUS_INSUFFICIENT_MEMORY, 0xB) \
+ OP(HV_STATUS_INVALID_PARTITION_ID, 0xD) \
+ OP(HV_STATUS_INVALID_VP_INDEX, 0xE) \
+ OP(HV_STATUS_NOT_FOUND, 0x10) \
+ OP(HV_STATUS_INVALID_PORT_ID, 0x11) \
+ OP(HV_STATUS_INVALID_CONNECTION_ID, 0x12) \
+ OP(HV_STATUS_INSUFFICIENT_BUFFERS, 0x13) \
+ OP(HV_STATUS_NOT_ACKNOWLEDGED, 0x14) \
+ OP(HV_STATUS_INVALID_VP_STATE, 0x15) \
+ OP(HV_STATUS_NO_RESOURCES, 0x1D) \
+ OP(HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED, 0x20) \
+ OP(HV_STATUS_INVALID_LP_INDEX, 0x41) \
+ OP(HV_STATUS_INVALID_REGISTER_VALUE, 0x50) \
+ OP(HV_STATUS_CALL_PENDING, 0x79)
+
+#define __HV_MAKE_HV_STATUS_ENUM(NAME, VAL) NAME = (VAL),
+#define __HV_MAKE_HV_STATUS_CASE(NAME, VAL) case (NAME): return (#NAME);
+
+enum hv_status {
+ __HV_STATUS_DEF(__HV_MAKE_HV_STATUS_ENUM)
+};
+
+/* TODO not in hv headers */
+#define HV_LINUX_VENDOR_ID 0x8100
+#define HV_HYP_PAGE_SHIFT 12
+#define HV_HYP_PAGE_SIZE BIT(HV_HYP_PAGE_SHIFT)
+#define HV_HYP_PAGE_MASK (~(HV_HYP_PAGE_SIZE - 1))
+
+#define HV_PARTITION_ID_INVALID ((__u64) 0)
+#define HV_PARTITION_ID_SELF ((__u64)-1)
+
+/* Hyper-V specific model specific registers (MSRs) */
+
+/* HV_X64_SYNTHETIC_MSR */
+/* MSR used to identify the guest OS. */
+#define HV_X64_MSR_GUEST_OS_ID 0x40000000
+
+/* MSR used to setup pages used to communicate with the hypervisor. */
+#define HV_X64_MSR_HYPERCALL 0x40000001
+
+/* MSR used to provide vcpu index */
+#define HV_X64_MSR_VP_INDEX 0x40000002
+
+/* MSR used to reset the guest OS. */
+#define HV_X64_MSR_RESET 0x40000003
+
+/* MSR used to provide vcpu runtime in 100ns units */
+#define HV_X64_MSR_VP_RUNTIME 0x40000010
+
+/* MSR used to read the per-partition time reference counter */
+#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
+
+/* A partition's reference time stamp counter (TSC) page */
+#define HV_X64_MSR_REFERENCE_TSC 0x40000021
+
+/* MSR used to retrieve the TSC frequency */
+#define HV_X64_MSR_TSC_FREQUENCY 0x40000022
+
+/* MSR used to retrieve the local APIC timer frequency */
+#define HV_X64_MSR_APIC_FREQUENCY 0x40000023
+
+/* Define the virtual APIC registers */
+#define HV_X64_MSR_EOI 0x40000070
+#define HV_X64_MSR_ICR 0x40000071
+#define HV_X64_MSR_TPR 0x40000072
+#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073
+
+/* Define synthetic interrupt controller model specific registers. */
+#define HV_X64_MSR_SCONTROL 0x40000080
+#define HV_X64_MSR_SVERSION 0x40000081
+#define HV_X64_MSR_SIEFP 0x40000082
+#define HV_X64_MSR_SIMP 0x40000083
+#define HV_X64_MSR_EOM 0x40000084
+#define HV_X64_MSR_SIRBP 0x40000085
+#define HV_X64_MSR_SINT0 0x40000090
+#define HV_X64_MSR_SINT1 0x40000091
+#define HV_X64_MSR_SINT2 0x40000092
+#define HV_X64_MSR_SINT3 0x40000093
+#define HV_X64_MSR_SINT4 0x40000094
+#define HV_X64_MSR_SINT5 0x40000095
+#define HV_X64_MSR_SINT6 0x40000096
+#define HV_X64_MSR_SINT7 0x40000097
+#define HV_X64_MSR_SINT8 0x40000098
+#define HV_X64_MSR_SINT9 0x40000099
+#define HV_X64_MSR_SINT10 0x4000009A
+#define HV_X64_MSR_SINT11 0x4000009B
+#define HV_X64_MSR_SINT12 0x4000009C
+#define HV_X64_MSR_SINT13 0x4000009D
+#define HV_X64_MSR_SINT14 0x4000009E
+#define HV_X64_MSR_SINT15 0x4000009F
+
+/* Define synthetic interrupt controller model specific registers for nested hypervisor */
+#define HV_X64_MSR_NESTED_SCONTROL 0x40001080
+#define HV_X64_MSR_NESTED_SVERSION 0x40001081
+#define HV_X64_MSR_NESTED_SIEFP 0x40001082
+#define HV_X64_MSR_NESTED_SIMP 0x40001083
+#define HV_X64_MSR_NESTED_EOM 0x40001084
+#define HV_X64_MSR_NESTED_SINT0 0x40001090
+
+/*
+ * Synthetic Timer MSRs. Four timers per vcpu.
+ */
+#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0
+#define HV_X64_MSR_STIMER0_COUNT 0x400000B1
+#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2
+#define HV_X64_MSR_STIMER1_COUNT 0x400000B3
+#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4
+#define HV_X64_MSR_STIMER2_COUNT 0x400000B5
+#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6
+#define HV_X64_MSR_STIMER3_COUNT 0x400000B7
+
+/* Hyper-V guest idle MSR */
+#define HV_X64_MSR_GUEST_IDLE 0x400000F0
+
+/* Hyper-V guest crash notification MSR's */
+#define HV_X64_MSR_CRASH_P0 0x40000100
+#define HV_X64_MSR_CRASH_P1 0x40000101
+#define HV_X64_MSR_CRASH_P2 0x40000102
+#define HV_X64_MSR_CRASH_P3 0x40000103
+#define HV_X64_MSR_CRASH_P4 0x40000104
+#define HV_X64_MSR_CRASH_CTL 0x40000105
+
+/* TSC emulation after migration */
+#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
+#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
+#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
+
+/* TSC invariant control */
+#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118
+
+/*
+ * Version info reported by hypervisor
+ * Changed to a union for convenience
+ */
+union hv_hypervisor_version_info {
+ struct {
+ __u32 build_number;
+
+ __u32 minor_version : 16;
+ __u32 major_version : 16;
+
+ __u32 service_pack;
+
+ __u32 service_number : 24;
+ __u32 service_branch : 8;
+ };
+ struct {
+ __u32 eax;
+ __u32 ebx;
+ __u32 ecx;
+ __u32 edx;
+ };
+};
+
+/* HV_CPUID_FUNCTION */
+#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000
+#define HYPERV_CPUID_VERSION 0x40000002
+
+/* HV_X64_ENLIGHTENMENT_INFORMATION */
+
+/* DeprecateAutoEoi */
+#define HV_DEPRECATING_AEOI_RECOMMENDED BIT(9)
+
+#define HV_MAXIMUM_PROCESSORS 2048
+
+#define HV_MAX_VP_INDEX (HV_MAXIMUM_PROCESSORS - 1)
+#define HV_VP_INDEX_SELF ((__u32)-2)
+#define HV_ANY_VP ((__u32)-1)
+
+/* Declare the various hypercall operations. */
+/* HV_CALL_CODE */
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003
+#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
+#define HVCALL_SEND_IPI 0x000b
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014
+#define HVCALL_SEND_IPI_EX 0x0015
+#define HVCALL_CREATE_PARTITION 0x0040
+#define HVCALL_INITIALIZE_PARTITION 0x0041
+#define HVCALL_FINALIZE_PARTITION 0x0042
+#define HVCALL_DELETE_PARTITION 0x0043
+#define HVCALL_GET_PARTITION_PROPERTY 0x0044
+#define HVCALL_SET_PARTITION_PROPERTY 0x0045
+#define HVCALL_GET_PARTITION_ID 0x0046
+#define HVCALL_DEPOSIT_MEMORY 0x0048
+#define HVCALL_WITHDRAW_MEMORY 0x0049
+#define HVCALL_MAP_GPA_PAGES 0x004b
+#define HVCALL_UNMAP_GPA_PAGES 0x004c
+#define HVCALL_INSTALL_INTERCEPT 0x004d
+#define HVCALL_CREATE_VP 0x004e
+#define HVCALL_GET_VP_REGISTERS 0x0050
+#define HVCALL_SET_VP_REGISTERS 0x0051
+#define HVCALL_TRANSLATE_VIRTUAL_ADDRESS 0x0052
+#define HVCALL_CLEAR_VIRTUAL_INTERRUPT 0x0056
+#define HVCALL_DELETE_PORT 0x0058
+#define HVCALL_DISCONNECT_PORT 0x005b
+#define HVCALL_POST_MESSAGE 0x005c
+#define HVCALL_SIGNAL_EVENT 0x005d
+#define HVCALL_POST_DEBUG_DATA 0x0069
+#define HVCALL_RETRIEVE_DEBUG_DATA 0x006a
+#define HVCALL_RESET_DEBUG_SESSION 0x006b
+#define HVCALL_ADD_LOGICAL_PROCESSOR 0x0076
+#define HVCALL_GET_SYSTEM_PROPERTY 0x007b
+#define HVCALL_MAP_DEVICE_INTERRUPT 0x007c
+#define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d
+#define HVCALL_RETARGET_INTERRUPT 0x007e
+#define HVCALL_NOTIFY_PORT_RING_EMPTY 0x008b
+#define HVCALL_REGISTER_INTERCEPT_RESULT 0x0091
+#define HVCALL_ASSERT_VIRTUAL_INTERRUPT 0x0094
+#define HVCALL_CREATE_PORT 0x0095
+#define HVCALL_CONNECT_PORT 0x0096
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
+#define HVCALL_GET_GPA_PAGES_ACCESS_STATES 0x00c9
+#define HVCALL_SIGNAL_EVENT_DIRECT 0x00c0
+#define HVCALL_POST_MESSAGE_DIRECT 0x00c1
+#define HVCALL_DISPATCH_VP 0x00c2
+#define HVCALL_MAP_VP_STATE_PAGE 0x00e1
+#define HVCALL_UNMAP_VP_STATE_PAGE 0x00e2
+#define HVCALL_GET_VP_STATE 0x00e3
+#define HVCALL_SET_VP_STATE 0x00e4
+#define HVCALL_GET_VP_CPUID_VALUES 0x00f4
+
+/*
+ * Some macros - i.e. GENMASK_ULL and BIT_ULL - are not currently supported by
+ * userspace rust bindings generation tool.
+ * As the below are not currently needed in userspace, don't export them and
+ * avoid the issue altogether for now.
+ */
+#if defined(__KERNEL__)
+
+/* HV_HYPERCALL_INPUT */
+#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0)
+#define HV_HYPERCALL_FAST_BIT BIT(16)
+#define HV_HYPERCALL_VARHEAD_OFFSET 17
+#define HV_HYPERCALL_NESTED BIT(31)
+#define HV_HYPERCALL_REP_COMP_OFFSET 32
+#define HV_HYPERCALL_REP_COMP_1 BIT_ULL(32)
+#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32)
+#define HV_HYPERCALL_REP_START_OFFSET 48
+#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48)
+
+#endif /* __KERNEL__ */
+
+union hv_gpa_page_range {
+ u64 address_space;
+ struct {
+ u64 additional_pages:11;
+ u64 largepage:1;
+ u64 basepfn:52;
+ } page;
+ struct {
+ u64 reserved:12;
+ u64 page_size:1;
+ u64 reserved1:8;
+ u64 base_large_pfn:43;
+ };
+};
+
+/* Define the number of synthetic interrupt sources. */
+#define HV_SYNIC_SINT_COUNT (16)
+
+/* Hyper-V defined statically assigned SINTs */
+#define HV_SYNIC_INTERCEPTION_SINT_INDEX 0x00000000
+#define HV_SYNIC_IOMMU_FAULT_SINT_INDEX 0x00000001
+#define HV_SYNIC_VMBUS_SINT_INDEX 0x00000002
+#define HV_SYNIC_FIRST_UNUSED_SINT_INDEX 0x00000005
+
+/* mshv assigned SINT for doorbell */
+#define HV_SYNIC_DOORBELL_SINT_INDEX HV_SYNIC_FIRST_UNUSED_SINT_INDEX
+
+enum hv_interrupt_type {
+ HV_X64_INTERRUPT_TYPE_FIXED = 0x0000,
+ HV_X64_INTERRUPT_TYPE_LOWESTPRIORITY = 0x0001,
+ HV_X64_INTERRUPT_TYPE_SMI = 0x0002,
+ HV_X64_INTERRUPT_TYPE_REMOTEREAD = 0x0003,
+ HV_X64_INTERRUPT_TYPE_NMI = 0x0004,
+ HV_X64_INTERRUPT_TYPE_INIT = 0x0005,
+ HV_X64_INTERRUPT_TYPE_SIPI = 0x0006,
+ HV_X64_INTERRUPT_TYPE_EXTINT = 0x0007,
+ HV_X64_INTERRUPT_TYPE_LOCALINT0 = 0x0008,
+ HV_X64_INTERRUPT_TYPE_LOCALINT1 = 0x0009,
+ HV_X64_INTERRUPT_TYPE_MAXIMUM = 0x000A,
+};
+
+/* Define synthetic interrupt source. */
+union hv_synic_sint {
+ __u64 as_uint64;
+ struct {
+ __u64 vector : 8;
+ __u64 reserved1 : 8;
+ __u64 masked : 1;
+ __u64 auto_eoi : 1;
+ __u64 polling : 1;
+ __u64 as_intercept : 1;
+ __u64 proxy : 1;
+ __u64 reserved2 : 43;
+ } __packed;
+};
+
+union hv_x64_xsave_xfem_register {
+ __u64 as_uint64;
+ struct {
+ __u32 low_uint32;
+ __u32 high_uint32;
+ } __packed;
+ struct {
+ __u64 legacy_x87 : 1;
+ __u64 legacy_sse : 1;
+ __u64 avx : 1;
+ __u64 mpx_bndreg : 1;
+ __u64 mpx_bndcsr : 1;
+ __u64 avx_512_op_mask : 1;
+ __u64 avx_512_zmmhi : 1;
+ __u64 avx_512_zmm16_31 : 1;
+ __u64 rsvd8_9 : 2;
+ __u64 pasid : 1;
+ __u64 cet_u : 1;
+ __u64 cet_s : 1;
+ __u64 rsvd13_16 : 4;
+ __u64 xtile_cfg : 1;
+ __u64 xtile_data : 1;
+ __u64 rsvd19_63 : 45;
+ } __packed;
+};
+
+/* Define the number of synthetic timers */
+#define HV_SYNIC_STIMER_COUNT (4)
+
+/* Define port identifier type. */
+union hv_port_id {
+ __u32 asu32;
+ struct {
+ __u32 id : 24;
+ __u32 reserved : 8;
+ } __packed u; // TODO remove this u
+};
+
+#define HV_MESSAGE_SIZE (256)
+#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240)
+#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30)
+
+/* Define hypervisor message types. */
+enum hv_message_type {
+ HVMSG_NONE = 0x00000000,
+
+ /* Memory access messages. */
+ HVMSG_UNMAPPED_GPA = 0x80000000,
+ HVMSG_GPA_INTERCEPT = 0x80000001,
+
+ /* Timer notification messages. */
+ HVMSG_TIMER_EXPIRED = 0x80000010,
+
+ /* Error messages. */
+ HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020,
+ HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021,
+ HVMSG_UNSUPPORTED_FEATURE = 0x80000022,
+
+ /*
+ * Opaque intercept message. The original intercept message is only
+ * accessible from the mapped intercept message page.
+ */
+ HVMSG_OPAQUE_INTERCEPT = 0x8000003F,
+
+ /* Trace buffer complete messages. */
+ HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040,
+
+ /* Hypercall intercept */
+ HVMSG_HYPERCALL_INTERCEPT = 0x80000050,
+
+ /* SynIC intercepts */
+ HVMSG_SYNIC_EVENT_INTERCEPT = 0x80000060,
+ HVMSG_SYNIC_SINT_INTERCEPT = 0x80000061,
+ HVMSG_SYNIC_SINT_DELIVERABLE = 0x80000062,
+
+ /* Async call completion intercept */
+ HVMSG_ASYNC_CALL_COMPLETION = 0x80000070,
+
+ /* Root scheduler messages */
+ HVMSG_SCHEDULER_VP_SIGNAL_BITSET = 0x80000100,
+ HVMSG_SCHEDULER_VP_SIGNAL_PAIR = 0x80000101,
+
+ /* Platform-specific processor intercept messages. */
+ HVMSG_X64_IO_PORT_INTERCEPT = 0x80010000,
+ HVMSG_X64_MSR_INTERCEPT = 0x80010001,
+ HVMSG_X64_CPUID_INTERCEPT = 0x80010002,
+ HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003,
+ HVMSG_X64_APIC_EOI = 0x80010004,
+ HVMSG_X64_LEGACY_FP_ERROR = 0x80010005,
+ HVMSG_X64_IOMMU_PRQ = 0x80010006,
+ HVMSG_X64_HALT = 0x80010007,
+ HVMSG_X64_INTERRUPTION_DELIVERABLE = 0x80010008,
+ HVMSG_X64_SIPI_INTERCEPT = 0x80010009,
+};
+
+/* Define the format of the SIMP register */
+union hv_synic_simp {
+ __u64 as_uint64;
+ struct {
+ __u64 simp_enabled : 1;
+ __u64 preserved : 11;
+ __u64 base_simp_gpa : 52;
+ } __packed;
+};
+
+union hv_message_flags {
+ __u8 asu8;
+ struct {
+ __u8 msg_pending : 1;
+ __u8 reserved : 7;
+ } __packed;
+};
+
+struct hv_message_header {
+ __u32 message_type;
+ __u8 payload_size;
+ union hv_message_flags message_flags;
+ __u8 reserved[2];
+ union {
+ __u64 sender;
+ union hv_port_id port;
+ };
+} __packed;
+
+/*
+ * Message format for notifications delivered via
+ * intercept message(as_intercept=1)
+ */
+struct hv_notification_message_payload {
+ __u32 sint_index;
+} __packed;
+
+struct hv_message {
+ struct hv_message_header header;
+ union {
+ __u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
+ } u;
+} __packed;
+
+/* Define the synthetic interrupt message page layout. */
+struct hv_message_page {
+ struct hv_message sint_message[HV_SYNIC_SINT_COUNT];
+} __packed;
+
+struct hv_x64_segment_register {
+ __u64 base;
+ __u32 limit;
+ __u16 selector;
+ union {
+ struct {
+ __u16 segment_type : 4;
+ __u16 non_system_segment : 1;
+ __u16 descriptor_privilege_level : 2;
+ __u16 present : 1;
+ __u16 reserved : 4;
+ __u16 available : 1;
+ __u16 _long : 1;
+ __u16 _default : 1;
+ __u16 granularity : 1;
+ } __packed;
+ __u16 attributes;
+ };
+} __packed;
+
+struct hv_x64_table_register {
+ __u16 pad[3];
+ __u16 limit;
+ __u64 base;
+} __packed;
+
+union hv_x64_fp_control_status_register {
+ struct hv_u128 as_uint128;
+ struct {
+ __u16 fp_control;
+ __u16 fp_status;
+ __u8 fp_tag;
+ __u8 reserved;
+ __u16 last_fp_op;
+ union {
+ /* long mode */
+ __u64 last_fp_rip;
+ /* 32 bit mode */
+ struct {
+ __u32 last_fp_eip;
+ __u16 last_fp_cs;
+ __u16 padding;
+ } __packed;
+ };
+ } __packed;
+} __packed;
+
+union hv_x64_xmm_control_status_register {
+ struct hv_u128 as_uint128;
+ struct {
+ union {
+ /* long mode */
+ __u64 last_fp_rdp;
+ /* 32 bit mode */
+ struct {
+ __u32 last_fp_dp;
+ __u16 last_fp_ds;
+ __u16 padding;
+ } __packed;
+ };
+ __u32 xmm_status_control;
+ __u32 xmm_status_control_mask;
+ } __packed;
+} __packed;
+
+union hv_x64_fp_register {
+ struct hv_u128 as_uint128;
+ struct {
+ __u64 mantissa;
+ __u64 biased_exponent : 15;
+ __u64 sign : 1;
+ __u64 reserved : 48;
+ } __packed;
+} __packed;
+
+union hv_x64_msr_npiep_config_contents {
+ __u64 as_uint64;
+ struct {
+ /*
+ * These bits enable instruction execution prevention for
+ * specific instructions.
+ */
+ __u64 prevents_gdt : 1;
+ __u64 prevents_idt : 1;
+ __u64 prevents_ldt : 1;
+ __u64 prevents_tr : 1;
+
+ /* The reserved bits must always be 0. */
+ __u64 reserved : 60;
+ } __packed;
+};
+
+union hv_input_vtl {
+ __u8 as_uint8;
+ struct {
+ __u8 target_vtl : 4;
+ __u8 use_target_vtl : 1;
+ __u8 reserved_z : 3;
+ };
+} __packed;
+
+/* Note: not in hvgdk_mini.h */
+#define HV_SUPPORTS_REGISTER_DELIVERABILITY_NOTIFICATIONS
+
+union hv_register_vsm_partition_config {
+ __u64 as_u64;
+ struct {
+ __u64 enable_vtl_protection : 1;
+ __u64 default_vtl_protection_mask : 4;
+ __u64 zero_memory_on_reset : 1;
+ __u64 deny_lower_vtl_startup : 1;
+ __u64 intercept_acceptance : 1;
+ __u64 intercept_enable_vtl_protection : 1;
+ __u64 intercept_vp_startup : 1;
+ __u64 intercept_cpuid_unimplemented : 1;
+ __u64 intercept_unrecoverable_exception : 1;
+ __u64 intercept_page : 1;
+ __u64 mbz : 51;
+ };
+};
+
+struct hv_nested_enlightenments_control {
+ struct {
+ __u32 directhypercall : 1;
+ __u32 reserved : 31;
+ } __packed features;
+ struct {
+ __u32 inter_partition_comm : 1;
+ __u32 reserved : 31;
+ } __packed hypercall_controls;
+} __packed;
+
+/* Define virtual processor assist page structure. */
+struct hv_vp_assist_page {
+ __u32 apic_assist;
+ __u32 reserved1;
+ __u32 vtl_entry_reason;
+ __u32 vtl_reserved;
+ __u64 vtl_ret_x64rax;
+ __u64 vtl_ret_x64rcx;
+ struct hv_nested_enlightenments_control nested_control;
+ __u8 enlighten_vmentry;
+ __u8 reserved2[7];
+ __u64 current_nested_vmcs;
+ __u8 synthetic_time_unhalted_timer_expired;
+ __u8 reserved3[7];
+ __u8 virtualization_fault_information[40];
+ __u8 reserved4[8];
+ __u8 intercept_message[256];
+ __u8 vtl_ret_actions[256];
+} __packed;
+
+enum hv_register_name {
+ /* Suspend Registers */
+ HV_REGISTER_EXPLICIT_SUSPEND = 0x00000000,
+ HV_REGISTER_INTERCEPT_SUSPEND = 0x00000001,
+ HV_REGISTER_DISPATCH_SUSPEND = 0x00000003,
+
+ HV_REGISTER_VP_ROOT_SIGNAL_COUNT = 0x00090014,
+
+ /* Synthetic VSM registers */
+ HV_REGISTER_VSM_CODE_PAGE_OFFSETS = 0x000D0002,
+ HV_REGISTER_VSM_CAPABILITIES = 0x000D0006,
+ HV_REGISTER_VSM_PARTITION_CONFIG = 0x000D0007,
+
+ /* Interruptible notification register */
+ HV_X64_REGISTER_DELIVERABILITY_NOTIFICATIONS = 0x00010006,
+
+ /* X64 User-Mode Registers */
+ HV_X64_REGISTER_RAX = 0x00020000,
+ HV_X64_REGISTER_RCX = 0x00020001,
+ HV_X64_REGISTER_RDX = 0x00020002,
+ HV_X64_REGISTER_RBX = 0x00020003,
+ HV_X64_REGISTER_RSP = 0x00020004,
+ HV_X64_REGISTER_RBP = 0x00020005,
+ HV_X64_REGISTER_RSI = 0x00020006,
+ HV_X64_REGISTER_RDI = 0x00020007,
+ HV_X64_REGISTER_R8 = 0x00020008,
+ HV_X64_REGISTER_R9 = 0x00020009,
+ HV_X64_REGISTER_R10 = 0x0002000A,
+ HV_X64_REGISTER_R11 = 0x0002000B,
+ HV_X64_REGISTER_R12 = 0x0002000C,
+ HV_X64_REGISTER_R13 = 0x0002000D,
+ HV_X64_REGISTER_R14 = 0x0002000E,
+ HV_X64_REGISTER_R15 = 0x0002000F,
+ HV_X64_REGISTER_RIP = 0x00020010,
+ HV_X64_REGISTER_RFLAGS = 0x00020011,
+
+ /* X64 Floating Point and Vector Registers */
+ HV_X64_REGISTER_XMM0 = 0x00030000,
+ HV_X64_REGISTER_XMM1 = 0x00030001,
+ HV_X64_REGISTER_XMM2 = 0x00030002,
+ HV_X64_REGISTER_XMM3 = 0x00030003,
+ HV_X64_REGISTER_XMM4 = 0x00030004,
+ HV_X64_REGISTER_XMM5 = 0x00030005,
+ HV_X64_REGISTER_XMM6 = 0x00030006,
+ HV_X64_REGISTER_XMM7 = 0x00030007,
+ HV_X64_REGISTER_XMM8 = 0x00030008,
+ HV_X64_REGISTER_XMM9 = 0x00030009,
+ HV_X64_REGISTER_XMM10 = 0x0003000A,
+ HV_X64_REGISTER_XMM11 = 0x0003000B,
+ HV_X64_REGISTER_XMM12 = 0x0003000C,
+ HV_X64_REGISTER_XMM13 = 0x0003000D,
+ HV_X64_REGISTER_XMM14 = 0x0003000E,
+ HV_X64_REGISTER_XMM15 = 0x0003000F,
+ HV_X64_REGISTER_FP_MMX0 = 0x00030010,
+ HV_X64_REGISTER_FP_MMX1 = 0x00030011,
+ HV_X64_REGISTER_FP_MMX2 = 0x00030012,
+ HV_X64_REGISTER_FP_MMX3 = 0x00030013,
+ HV_X64_REGISTER_FP_MMX4 = 0x00030014,
+ HV_X64_REGISTER_FP_MMX5 = 0x00030015,
+ HV_X64_REGISTER_FP_MMX6 = 0x00030016,
+ HV_X64_REGISTER_FP_MMX7 = 0x00030017,
+ HV_X64_REGISTER_FP_CONTROL_STATUS = 0x00030018,
+ HV_X64_REGISTER_XMM_CONTROL_STATUS = 0x00030019,
+
+ /* X64 Control Registers */
+ HV_X64_REGISTER_CR0 = 0x00040000,
+ HV_X64_REGISTER_CR2 = 0x00040001,
+ HV_X64_REGISTER_CR3 = 0x00040002,
+ HV_X64_REGISTER_CR4 = 0x00040003,
+ HV_X64_REGISTER_CR8 = 0x00040004,
+ HV_X64_REGISTER_XFEM = 0x00040005,
+
+ /* X64 Intermediate Control Registers */
+ HV_X64_REGISTER_INTERMEDIATE_CR0 = 0x00041000,
+ HV_X64_REGISTER_INTERMEDIATE_CR4 = 0x00041003,
+ HV_X64_REGISTER_INTERMEDIATE_CR8 = 0x00041004,
+
+ /* X64 Debug Registers */
+ HV_X64_REGISTER_DR0 = 0x00050000,
+ HV_X64_REGISTER_DR1 = 0x00050001,
+ HV_X64_REGISTER_DR2 = 0x00050002,
+ HV_X64_REGISTER_DR3 = 0x00050003,
+ HV_X64_REGISTER_DR6 = 0x00050004,
+ HV_X64_REGISTER_DR7 = 0x00050005,
+
+ /* X64 Segment Registers */
+ HV_X64_REGISTER_ES = 0x00060000,
+ HV_X64_REGISTER_CS = 0x00060001,
+ HV_X64_REGISTER_SS = 0x00060002,
+ HV_X64_REGISTER_DS = 0x00060003,
+ HV_X64_REGISTER_FS = 0x00060004,
+ HV_X64_REGISTER_GS = 0x00060005,
+ HV_X64_REGISTER_LDTR = 0x00060006,
+ HV_X64_REGISTER_TR = 0x00060007,
+
+ /* X64 Table Registers */
+ HV_X64_REGISTER_IDTR = 0x00070000,
+ HV_X64_REGISTER_GDTR = 0x00070001,
+
+ /* X64 Virtualized MSRs */
+ HV_X64_REGISTER_TSC = 0x00080000,
+ HV_X64_REGISTER_EFER = 0x00080001,
+ HV_X64_REGISTER_KERNEL_GS_BASE = 0x00080002,
+ HV_X64_REGISTER_APIC_BASE = 0x00080003,
+ HV_X64_REGISTER_PAT = 0x00080004,
+ HV_X64_REGISTER_SYSENTER_CS = 0x00080005,
+ HV_X64_REGISTER_SYSENTER_EIP = 0x00080006,
+ HV_X64_REGISTER_SYSENTER_ESP = 0x00080007,
+ HV_X64_REGISTER_STAR = 0x00080008,
+ HV_X64_REGISTER_LSTAR = 0x00080009,
+ HV_X64_REGISTER_CSTAR = 0x0008000A,
+ HV_X64_REGISTER_SFMASK = 0x0008000B,
+ HV_X64_REGISTER_INITIAL_APIC_ID = 0x0008000C,
+
+ /* X64 Cache control MSRs */
+ HV_X64_REGISTER_MSR_MTRR_CAP = 0x0008000D,
+ HV_X64_REGISTER_MSR_MTRR_DEF_TYPE = 0x0008000E,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_BASE0 = 0x00080010,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_BASE1 = 0x00080011,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_BASE2 = 0x00080012,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_BASE3 = 0x00080013,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_BASE4 = 0x00080014,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_BASE5 = 0x00080015,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_BASE6 = 0x00080016,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_BASE7 = 0x00080017,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_BASE8 = 0x00080018,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_BASE9 = 0x00080019,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_BASEA = 0x0008001A,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_BASEB = 0x0008001B,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_BASEC = 0x0008001C,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_BASED = 0x0008001D,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_BASEE = 0x0008001E,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_BASEF = 0x0008001F,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_MASK0 = 0x00080040,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_MASK1 = 0x00080041,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_MASK2 = 0x00080042,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_MASK3 = 0x00080043,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_MASK4 = 0x00080044,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_MASK5 = 0x00080045,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_MASK6 = 0x00080046,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_MASK7 = 0x00080047,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_MASK8 = 0x00080048,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_MASK9 = 0x00080049,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_MASKA = 0x0008004A,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_MASKB = 0x0008004B,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_MASKC = 0x0008004C,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_MASKD = 0x0008004D,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_MASKE = 0x0008004E,
+ HV_X64_REGISTER_MSR_MTRR_PHYS_MASKF = 0x0008004F,
+ HV_X64_REGISTER_MSR_MTRR_FIX64K00000 = 0x00080070,
+ HV_X64_REGISTER_MSR_MTRR_FIX16K80000 = 0x00080071,
+ HV_X64_REGISTER_MSR_MTRR_FIX16KA0000 = 0x00080072,
+ HV_X64_REGISTER_MSR_MTRR_FIX4KC0000 = 0x00080073,
+ HV_X64_REGISTER_MSR_MTRR_FIX4KC8000 = 0x00080074,
+ HV_X64_REGISTER_MSR_MTRR_FIX4KD0000 = 0x00080075,
+ HV_X64_REGISTER_MSR_MTRR_FIX4KD8000 = 0x00080076,
+ HV_X64_REGISTER_MSR_MTRR_FIX4KE0000 = 0x00080077,
+ HV_X64_REGISTER_MSR_MTRR_FIX4KE8000 = 0x00080078,
+ HV_X64_REGISTER_MSR_MTRR_FIX4KF0000 = 0x00080079,
+ HV_X64_REGISTER_MSR_MTRR_FIX4KF8000 = 0x0008007A,
+
+ HV_X64_REGISTER_TSC_AUX = 0x0008007B,
+ HV_X64_REGISTER_BNDCFGS = 0x0008007C,
+ HV_X64_REGISTER_DEBUG_CTL = 0x0008007D,
+
+ HV_X64_REGISTER_SGX_LAUNCH_CONTROL0 = 0x00080080,
+ HV_X64_REGISTER_SGX_LAUNCH_CONTROL1 = 0x00080081,
+ HV_X64_REGISTER_SGX_LAUNCH_CONTROL2 = 0x00080082,
+ HV_X64_REGISTER_SGX_LAUNCH_CONTROL3 = 0x00080083,
+ HV_X64_REGISTER_SPEC_CTRL = 0x00080084,
+ HV_X64_REGISTER_PRED_CMD = 0x00080085,
+ HV_X64_REGISTER_VIRT_SPEC_CTRL = 0x00080086,
+ HV_X64_REGISTER_TSC_ADJUST = 0x00080096,
+
+ /* Other MSRs */
+ HV_X64_REGISTER_MSR_IA32_MISC_ENABLE = 0x000800A0,
+ HV_X64_REGISTER_IA32_FEATURE_CONTROL = 0x000800A1,
+ HV_X64_REGISTER_IA32_VMX_BASIC = 0x000800A2,
+ HV_X64_REGISTER_IA32_VMX_PINBASED_CTLS = 0x000800A3,
+ HV_X64_REGISTER_IA32_VMX_PROCBASED_CTLS = 0x000800A4,
+ HV_X64_REGISTER_IA32_VMX_EXIT_CTLS = 0x000800A5,
+ HV_X64_REGISTER_IA32_VMX_ENTRY_CTLS = 0x000800A6,
+ HV_X64_REGISTER_IA32_VMX_MISC = 0x000800A7,
+ HV_X64_REGISTER_IA32_VMX_CR0_FIXED0 = 0x000800A8,
+ HV_X64_REGISTER_IA32_VMX_CR0_FIXED1 = 0x000800A9,
+ HV_X64_REGISTER_IA32_VMX_CR4_FIXED0 = 0x000800AA,
+ HV_X64_REGISTER_IA32_VMX_CR4_FIXED1 = 0x000800AB,
+ HV_X64_REGISTER_IA32_VMX_VMCS_ENUM = 0x000800AC,
+ HV_X64_REGISTER_IA32_VMX_PROCBASED_CTLS2 = 0x000800AD,
+ HV_X64_REGISTER_IA32_VMX_EPT_VPID_CAP = 0x000800AE,
+ HV_X64_REGISTER_IA32_VMX_TRUE_PINBASED_CTLS = 0x000800AF,
+ HV_X64_REGISTER_IA32_VMX_TRUE_PROCBASED_CTLS = 0x000800B0,
+ HV_X64_REGISTER_IA32_VMX_TRUE_EXIT_CTLS = 0x000800B1,
+ HV_X64_REGISTER_IA32_VMX_TRUE_ENTRY_CTLS = 0x000800B2,
+
+ HV_X64_REGISTER_REG_PAGE = 0x0009001C,
+};
+
+
+/*
+ * Arch compatibility regs for use with hv_set/get_register
+ */
+#define HV_MSR_VP_INDEX (HV_X64_MSR_VP_INDEX)
+#define HV_MSR_TIME_REF_COUNT (HV_X64_MSR_TIME_REF_COUNT)
+#define HV_MSR_REFERENCE_TSC (HV_X64_MSR_REFERENCE_TSC)
+#define HV_MSR_STIMER0_CONFIG (HV_X64_MSR_STIMER0_CONFIG)
+#define HV_MSR_STIMER0_COUNT (HV_X64_MSR_STIMER0_COUNT)
+
+#define HV_MSR_SCONTROL (HV_X64_MSR_SCONTROL)
+#define HV_MSR_SIEFP (HV_X64_MSR_SIEFP)
+#define HV_MSR_SIMP (HV_X64_MSR_SIMP)
+#define HV_MSR_SIRBP (HV_X64_MSR_SIRBP)
+#define HV_MSR_EOM (HV_X64_MSR_EOM)
+#define HV_MSR_SINT0 (HV_X64_MSR_SINT0)
+
+#define HV_MSR_NESTED_SCONTROL (HV_X64_MSR_NESTED_SCONTROL)
+#define HV_MSR_NESTED_SIEFP (HV_X64_MSR_NESTED_SIEFP)
+#define HV_MSR_NESTED_SIMP (HV_X64_MSR_NESTED_SIMP)
+#define HV_MSR_NESTED_EOM (HV_X64_MSR_NESTED_EOM)
+#define HV_MSR_NESTED_SINT0 (HV_X64_MSR_NESTED_SINT0)
+
+#define HV_MSR_CRASH_P0 (HV_X64_MSR_CRASH_P0)
+#define HV_MSR_CRASH_P1 (HV_X64_MSR_CRASH_P1)
+#define HV_MSR_CRASH_P2 (HV_X64_MSR_CRASH_P2)
+#define HV_MSR_CRASH_P3 (HV_X64_MSR_CRASH_P3)
+#define HV_MSR_CRASH_P4 (HV_X64_MSR_CRASH_P4)
+#define HV_MSR_CRASH_CTL (HV_X64_MSR_CRASH_CTL)
+
+/* General Hypervisor Register Content Definitions */
+
+union hv_explicit_suspend_register {
+ __u64 as_uint64;
+ struct {
+ __u64 suspended : 1;
+ __u64 reserved : 63;
+ } __packed;
+};
+
+union hv_intercept_suspend_register {
+ __u64 as_uint64;
+ struct {
+ __u64 suspended : 1;
+ __u64 reserved : 63;
+ } __packed;
+};
+
+union hv_dispatch_suspend_register {
+ __u64 as_uint64;
+ struct {
+ __u64 suspended : 1;
+ __u64 reserved : 63;
+ } __packed;
+};
+
+union hv_x64_interrupt_state_register {
+ __u64 as_uint64;
+ struct {
+ __u64 interrupt_shadow : 1;
+ __u64 nmi_masked : 1;
+ __u64 reserved : 62;
+ } __packed;
+};
+
+union hv_x64_pending_exception_event {
+ __u64 as_uint64[2];
+ struct {
+ __u32 event_pending : 1;
+ __u32 event_type : 3;
+ __u32 reserved0 : 4;
+ __u32 deliver_error_code : 1;
+ __u32 reserved1 : 7;
+ __u32 vector : 16;
+ __u32 error_code;
+ __u64 exception_parameter;
+ } __packed;
+};
+
+union hv_x64_pending_virtualization_fault_event {
+ __u64 as_uint64[2];
+ struct {
+ __u32 event_pending : 1;
+ __u32 event_type : 3;
+ __u32 reserved0 : 4;
+ __u32 reserved1 : 8;
+ __u32 parameter0 : 16;
+ __u32 code;
+ __u64 parameter1;
+ } __packed;
+};
+
+// bunch of stuff in between
+
+union hv_x64_pending_interruption_register {
+ __u64 as_uint64;
+ struct {
+ __u32 interruption_pending : 1;
+ __u32 interruption_type : 3;
+ __u32 deliver_error_code : 1;
+ __u32 instruction_length : 4;
+ __u32 nested_event : 1;
+ __u32 reserved : 6;
+ __u32 interruption_vector : 16;
+ __u32 error_code;
+ } __packed;
+};
+
+union hv_register_value {
+ struct hv_u128 reg128;
+ __u64 reg64;
+ __u32 reg32;
+ __u16 reg16;
+ __u8 reg8;
+
+ union hv_x64_fp_register fp;
+ union hv_x64_fp_control_status_register fp_control_status;
+ union hv_x64_xmm_control_status_register xmm_control_status;
+ struct hv_x64_segment_register segment;
+ struct hv_x64_table_register table;
+ union hv_explicit_suspend_register explicit_suspend;
+ union hv_intercept_suspend_register intercept_suspend;
+ union hv_dispatch_suspend_register dispatch_suspend;
+ union hv_x64_interrupt_state_register interrupt_state;
+ union hv_x64_pending_interruption_register pending_interruption;
+ union hv_x64_msr_npiep_config_contents npiep_config;
+ union hv_x64_pending_exception_event pending_exception_event;
+ union hv_x64_pending_virtualization_fault_event
+ pending_virtualization_fault_event;
+};
+
+struct hv_register_assoc {
+ __u32 name; /* enum hv_register_name */
+ __u32 reserved1;
+ __u64 reserved2;
+ union hv_register_value value;
+} __packed;
+
+struct hv_input_get_vp_registers {
+ __u64 partition_id;
+ __u32 vp_index;
+ union hv_input_vtl input_vtl;
+ __u8 rsvd_z8;
+ __u16 rsvd_z16;
+ __u32 names[];
+} __packed;
+
+struct hv_input_set_vp_registers {
+ __u64 partition_id;
+ __u32 vp_index;
+ union hv_input_vtl input_vtl;
+ __u8 rsvd_z8;
+ __u16 rsvd_z16;
+ struct hv_register_assoc elements[];
+} __packed;
+
+union hv_msi_entry {
+ u64 as_uint64;
+ struct {
+ u32 address;
+ u32 data;
+ } __packed;
+};
+
+enum hv_interrupt_source {
+ HV_INTERRUPT_SOURCE_MSI = 1, /* MSI and MSI-X */
+ HV_INTERRUPT_SOURCE_IOAPIC,
+};
+
+union hv_ioapic_rte {
+ u64 as_uint64;
+
+ struct {
+ u32 vector:8;
+ u32 delivery_mode:3;
+ u32 destination_mode:1;
+ u32 delivery_status:1;
+ u32 interrupt_polarity:1;
+ u32 remote_irr:1;
+ u32 trigger_mode:1;
+ u32 interrupt_mask:1;
+ u32 reserved1:15;
+
+ u32 reserved2:24;
+ u32 destination_id:8;
+ };
+
+ struct {
+ u32 low_uint32;
+ u32 high_uint32;
+ };
+} __packed;
+
+struct hv_interrupt_entry {
+ u32 source; /* enum hv_interrupt_source */
+ u32 reserved1;
+ union {
+ union hv_msi_entry msi_entry;
+ union hv_ioapic_rte ioapic_rte;
+ };
+} __packed;
+
+enum hv_intercept_type {
+ HV_INTERCEPT_TYPE_X64_IO_PORT = 0X00000000,
+ HV_INTERCEPT_TYPE_X64_MSR = 0X00000001,
+ HV_INTERCEPT_TYPE_X64_CPUID = 0X00000002,
+ HV_INTERCEPT_TYPE_EXCEPTION = 0X00000003,
+ HV_INTERCEPT_TYPE_REGISTER = 0X00000004,
+ HV_INTERCEPT_TYPE_MMIO = 0X00000005,
+ HV_INTERCEPT_TYPE_X64_GLOBAL_CPUID = 0X00000006,
+ HV_INTERCEPT_TYPE_X64_APIC_SMI = 0X00000007,
+ HV_INTERCEPT_TYPE_HYPERCALL = 0X00000008,
+ HV_INTERCEPT_TYPE_X64_APIC_INIT_SIPI = 0X00000009,
+ HV_INTERCEPT_TYPE_X64_APIC_WRITE = 0X0000000B,
+ HV_INTERCEPT_TYPE_X64_MSR_INDEX = 0X0000000C,
+ HV_INTERCEPT_TYPE_MAX,
+ HV_INTERCEPT_TYPE_INVALID = 0XFFFFFFFF,
+};
+
+union hv_intercept_parameters {
+ /* HV_INTERCEPT_PARAMETERS is defined to be an 8-byte field. */
+ __u64 as_uint64;
+ /* HV_INTERCEPT_TYPE_X64_IO_PORT */
+ __u16 io_port;
+ /* HV_INTERCEPT_TYPE_X64_CPUID */
+ __u32 cpuid_index;
+ /* HV_INTERCEPT_TYPE_X64_APIC_WRITE */
+ __u32 apic_write_mask;
+ /* HV_INTERCEPT_TYPE_EXCEPTION */
+ __u16 exception_vector;
+ /* HV_INTERCEPT_TYPE_X64_MSR_INDEX */
+ __u32 msr_index;
+ /* N.B. Other intercept types do not have any parameters. */
+};
+
+/* Access types for the install intercept hypercall parameter */
+#define HV_INTERCEPT_ACCESS_MASK_NONE 0x00
+#define HV_INTERCEPT_ACCESS_MASK_READ 0X01
+#define HV_INTERCEPT_ACCESS_MASK_WRITE 0x02
+#define HV_INTERCEPT_ACCESS_MASK_EXECUTE 0x04
+
+struct hv_input_install_intercept {
+ __u64 partition_id;
+ __u32 access_type; /* mask */
+ __u32 intercept_type; /* hv_intercept_type */
+ union hv_intercept_parameters intercept_parameter;
+} __packed;
+
+#endif /* _UAPI_HV_HVGDK_MINI_H */
diff --git a/include/uapi/hyperv/hvhdk.h b/include/uapi/hyperv/hvhdk.h
new file mode 100644
index 000000000000..90184628db8b
--- /dev/null
+++ b/include/uapi/hyperv/hvhdk.h
@@ -0,0 +1,1352 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (c) 2023, Microsoft Corporation.
+ *
+ * These files (hvhdk.h, hvhdk_mini.h, hvgdk.h, hvgdk_mini.h) define APIs for
+ * communicating with the Microsoft Hypervisor.
+ *
+ * These definitions are subject to change across hypervisor versions, and as
+ * such are separate and independent from hyperv-tlfs.h.
+ *
+ * The naming of these headers reflects conventions used in the Microsoft
+ * Hypervisor.
+ */
+#ifndef _UAPI_HV_HVHDK_H
+#define _UAPI_HV_HVHDK_H
+
+#include "hvhdk_mini.h"
+#include "hvgdk.h"
+
+/* Bits for dirty mask of hv_vp_register_page */
+#define HV_X64_REGISTER_CLASS_GENERAL 0
+#define HV_X64_REGISTER_CLASS_IP 1
+#define HV_X64_REGISTER_CLASS_XMM 2
+#define HV_X64_REGISTER_CLASS_SEGMENT 3
+#define HV_X64_REGISTER_CLASS_FLAGS 4
+
+#define HV_VP_REGISTER_PAGE_VERSION_1 1u
+
+struct hv_vp_register_page {
+ __u16 version;
+ __u8 isvalid;
+ __u8 rsvdz;
+ __u32 dirty;
+ union {
+ struct {
+ /* General purpose registers
+ * (HV_X64_REGISTER_CLASS_GENERAL)
+ */
+ union {
+ struct {
+ __u64 rax;
+ __u64 rcx;
+ __u64 rdx;
+ __u64 rbx;
+ __u64 rsp;
+ __u64 rbp;
+ __u64 rsi;
+ __u64 rdi;
+ __u64 r8;
+ __u64 r9;
+ __u64 r10;
+ __u64 r11;
+ __u64 r12;
+ __u64 r13;
+ __u64 r14;
+ __u64 r15;
+ } __packed;
+
+ __u64 gp_registers[16];
+ };
+ /* Instruction pointer (HV_X64_REGISTER_CLASS_IP) */
+ __u64 rip;
+ /* Flags (HV_X64_REGISTER_CLASS_FLAGS) */
+ __u64 rflags;
+ } __packed;
+
+ __u64 registers[18];
+ };
+ /* Volatile XMM registers (HV_X64_REGISTER_CLASS_XMM) */
+ union {
+ struct {
+ struct hv_u128 xmm0;
+ struct hv_u128 xmm1;
+ struct hv_u128 xmm2;
+ struct hv_u128 xmm3;
+ struct hv_u128 xmm4;
+ struct hv_u128 xmm5;
+ } __packed;
+
+ struct hv_u128 xmm_registers[6];
+ };
+ /* Segment registers (HV_X64_REGISTER_CLASS_SEGMENT) */
+ union {
+ struct {
+ struct hv_x64_segment_register es;
+ struct hv_x64_segment_register cs;
+ struct hv_x64_segment_register ss;
+ struct hv_x64_segment_register ds;
+ struct hv_x64_segment_register fs;
+ struct hv_x64_segment_register gs;
+ } __packed;
+
+ struct hv_x64_segment_register segment_registers[6];
+ };
+ /* Misc. control registers (cannot be set via this interface) */
+ __u64 cr0;
+ __u64 cr3;
+ __u64 cr4;
+ __u64 cr8;
+ __u64 efer;
+ __u64 dr7;
+ union hv_x64_pending_interruption_register pending_interruption;
+ union hv_x64_interrupt_state_register interrupt_state;
+ __u64 instruction_emulation_hints;
+} __packed;
+
+#define HV_PARTITION_PROCESSOR_FEATURES_BANKS 2
+
+union hv_partition_processor_features {
+ __u64 as_uint64[HV_PARTITION_PROCESSOR_FEATURES_BANKS];
+ struct {
+ __u64 sse3_support:1;
+ __u64 lahf_sahf_support:1;
+ __u64 ssse3_support:1;
+ __u64 sse4_1_support:1;
+ __u64 sse4_2_support:1;
+ __u64 sse4a_support:1;
+ __u64 xop_support:1;
+ __u64 pop_cnt_support:1;
+ __u64 cmpxchg16b_support:1;
+ __u64 altmovcr8_support:1;
+ __u64 lzcnt_support:1;
+ __u64 mis_align_sse_support:1;
+ __u64 mmx_ext_support:1;
+ __u64 amd3dnow_support:1;
+ __u64 extended_amd3dnow_support:1;
+ __u64 page_1gb_support:1;
+ __u64 aes_support:1;
+ __u64 pclmulqdq_support:1;
+ __u64 pcid_support:1;
+ __u64 fma4_support:1;
+ __u64 f16c_support:1;
+ __u64 rd_rand_support:1;
+ __u64 rd_wr_fs_gs_support:1;
+ __u64 smep_support:1;
+ __u64 enhanced_fast_string_support:1;
+ __u64 bmi1_support:1;
+ __u64 bmi2_support:1;
+ __u64 hle_support_deprecated:1;
+ __u64 rtm_support_deprecated:1;
+ __u64 movbe_support:1;
+ __u64 npiep1_support:1;
+ __u64 dep_x87_fpu_save_support:1;
+ __u64 rd_seed_support:1;
+ __u64 adx_support:1;
+ __u64 intel_prefetch_support:1;
+ __u64 smap_support:1;
+ __u64 hle_support:1;
+ __u64 rtm_support:1;
+ __u64 rdtscp_support:1;
+ __u64 clflushopt_support:1;
+ __u64 clwb_support:1;
+ __u64 sha_support:1;
+ __u64 x87_pointers_saved_support:1;
+ __u64 invpcid_support:1;
+ __u64 ibrs_support:1;
+ __u64 stibp_support:1;
+ __u64 ibpb_support: 1;
+ __u64 unrestricted_guest_support:1;
+ __u64 mdd_support:1;
+ __u64 fast_short_rep_mov_support:1;
+ __u64 l1dcache_flush_support:1;
+ __u64 rdcl_no_support:1;
+ __u64 ibrs_all_support:1;
+ __u64 skip_l1df_support:1;
+ __u64 ssb_no_support:1;
+ __u64 rsb_a_no_support:1;
+ __u64 virt_spec_ctrl_support:1;
+ __u64 rd_pid_support:1;
+ __u64 umip_support:1;
+ __u64 mbs_no_support:1;
+ __u64 mb_clear_support:1;
+ __u64 taa_no_support:1;
+ __u64 tsx_ctrl_support:1;
+ /*
+ * N.B. The final processor feature bit in bank 0 is reserved to
+ * simplify potential downlevel backports.
+ */
+ __u64 reserved_bank0:1;
+
+ /* N.B. Begin bank 1 processor features. */
+ __u64 acount_mcount_support:1;
+ __u64 tsc_invariant_support:1;
+ __u64 cl_zero_support:1;
+ __u64 rdpru_support:1;
+ __u64 la57_support:1;
+ __u64 mbec_support:1;
+ __u64 nested_virt_support:1;
+ __u64 psfd_support:1;
+ __u64 cet_ss_support:1;
+ __u64 cet_ibt_support:1;
+ __u64 vmx_exception_inject_support:1;
+ __u64 enqcmd_support:1;
+ __u64 umwait_tpause_support:1;
+ __u64 movdiri_support:1;
+ __u64 movdir64b_support:1;
+ __u64 cldemote_support:1;
+ __u64 serialize_support:1;
+ __u64 tsc_deadline_tmr_support:1;
+ __u64 tsc_adjust_support:1;
+ __u64 fzlrep_movsb:1;
+ __u64 fsrep_stosb:1;
+ __u64 fsrep_cmpsb:1;
+ __u64 reserved_bank1:42;
+ } __packed;
+};
+
+union hv_partition_processor_xsave_features {
+ struct {
+ __u64 xsave_support : 1;
+ __u64 xsaveopt_support : 1;
+ __u64 avx_support : 1;
+ __u64 reserved1 : 61;
+ } __packed;
+ __u64 as_uint64;
+};
+
+struct hv_partition_creation_properties {
+ union hv_partition_processor_features disabled_processor_features;
+ union hv_partition_processor_xsave_features
+ disabled_processor_xsave_features;
+} __packed;
+
+
+/*
+ * Definition of the partition isolation state. Used for
+ * HV_PARTITION_PROPERTY_ISOLATION_STATE.
+ *
+ *
+ * The isolation states (hv_partition_isolation_state) are sub-states of
+ * ObPartitionActive that apply to VBS and hardware isolated partitions.
+ * For VBS isolation, the trusted host VTL 1 component uses the isolation
+ * state to establish a binding between a hypervisor partition and its
+ * own partition context, and to enforce certain invariants.
+ *
+ * Hardware-isolated partitions (including partitions that simulate
+ * hardware isolation) also use isolation states to track the progression
+ * of the partition security state through the architectural state machine.
+ * Insecure states indicate that there is no architectural state
+ * associated with the partition, and Secure indicates that the partition
+ * has secure architectural state.
+ *
+ * ObPartitionRestoring is treated differently for isolated partitions.
+ * Only the trusted host component is allowed to restore partition state,
+ * and ObPartitionRestoring can only transition directly to/from secure.
+ *
+ *
+ * ..................................................................
+ * . UNINITIALIZED FINALIZED .
+ * . | ^ ^ .
+ * . Initialize | / \ .
+ * . | / \ .
+ * . --------------|--------/--- ACTIVE --\------------------------ .
+ * . | | / \ | .
+ * . | | / Finalize \ Finalize | .
+ * . | v / \ | .
+ * . | INSECURE-CLEAN <---------------- INSECURE-DIRTY | .
+ * . | \ Scrub ^ | .
+ * . | \ / | .
+ * . | \ / | .
+ * . | Secure \ / Unsecure | .
+ * . | \ / | .
+ * . | \ / | .
+ * . | v / | .
+ * . | SECURE | .
+ * . | ^ | .
+ * . |_____________________________|______________________________| .
+ * . | .
+ * . v .
+ * . RESTORING .
+ * ..................................................................
+ */
+enum hv_partition_isolation_state {
+ /*
+ * Initial and final state for all non-isolated partitions.
+ */
+ HV_PARTITION_ISOLATION_INVALID = 0,
+
+ /*
+ * An "Insecure" partition is not being used by the trusted host
+ * component. In this state, VPs can be created and deleted. VPs cannot
+ * be started, and VP registers cannot be modified.
+
+ * Initial state of an isolated partition as result of Initialize or
+ * Scrub hypercalls. Guest-visible partition and VP state is considered
+ * "clean", in the sense that a call to ObScrubPartition should not
+ * result in any changes. Also, there are no accepted or confidential
+ * pages assigned to the partition. InsecureRundown is enabled.
+ */
+ HV_PARTITION_ISOLATION_INSECURE_CLEAN = 1,
+
+ /*
+ * Guest-visible partition and VP state is not "clean". Hence it must
+ * be scrubbed first. One of 2 explicit states the trusted host
+ * component can request. It cannot transition the state to Secure. In
+ * this state,
+ * - IsolationControl is clear.
+ * - Secure rundowns are completely disabled.
+ * - No assigned pages exist.
+ */
+ HV_PARTITION_ISOLATION_INSECURE_DIRTY = 2,
+
+ /*
+ * The partition is being used by the trusted host component (and is
+ * typically bound to a single partition context in that component).
+ * One of 2 explicit states the trusted host component can request. In
+ * this state,
+ * - VPs cannot be created or deleted.
+ * - Partition cannot be finalized, scrubbed.
+ * - Insecure rundowns are completely disabled.
+ */
+ HV_PARTITION_ISOLATION_SECURE = 3,
+
+ /*
+ * Represents a failed attempt to transition to Secure state. Partition
+ * in this state cannot be finalized, scrubbed since one or more pages
+ * may be assigned.
+ */
+ HV_PARTITION_ISOLATION_SECURE_DIRTY = 4,
+
+ /*
+ * An internal state indicating that a partition is in the process of
+ * transitioning from Secure to InsecureDirty.
+ */
+ HV_PARTITION_ISOLATION_SECURE_TERMINATING = 5,
+};
+
+
+#define HV_PARTITION_SYNTHETIC_PROCESSOR_FEATURES_BANKS 1
+
+union hv_partition_synthetic_processor_features {
+ __u64 as_uint64[HV_PARTITION_SYNTHETIC_PROCESSOR_FEATURES_BANKS];
+
+ struct {
+ /* Report a hypervisor is present. CPUID leaves
+ * 0x40000000 and 0x40000001 are supported.
+ */
+ __u64 hypervisor_present:1;
+
+ /*
+ * Features associated with HV#1:
+ */
+
+ /* Report support for Hv1 (CPUID leaves 0x40000000 - 0x40000006). */
+ __u64 hv1:1;
+
+ /* Access to HV_X64_MSR_VP_RUNTIME.
+ * Corresponds to access_vp_run_time_reg privilege.
+ */
+ __u64 access_vp_run_time_reg:1;
+
+ /* Access to HV_X64_MSR_TIME_REF_COUNT.
+ * Corresponds to access_partition_reference_counter privilege.
+ */
+ __u64 access_partition_reference_counter:1;
+
+ /* Access to SINT-related registers (HV_X64_MSR_SCONTROL through
+ * HV_X64_MSR_EOM and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15).
+ * Corresponds to access_synic_regs privilege.
+ */
+ __u64 access_synic_regs:1;
+
+ /* Access to synthetic timers and associated MSRs
+ * (HV_X64_MSR_STIMER0_CONFIG through HV_X64_MSR_STIMER3_COUNT).
+ * Corresponds to access_synthetic_timer_regs privilege.
+ */
+ __u64 access_synthetic_timer_regs:1;
+
+ /* Access to APIC MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR)
+ * as well as the VP assist page.
+ * Corresponds to access_intr_ctrl_regs privilege.
+ */
+ __u64 access_intr_ctrl_regs:1;
+
+ /* Access to registers associated with hypercalls (HV_X64_MSR_GUEST_OS_ID
+ * and HV_X64_MSR_HYPERCALL).
+ * Corresponds to access_hypercall_msrs privilege.
+ */
+ __u64 access_hypercall_regs:1;
+
+ /* VP index can be queried. corresponds to access_vp_index privilege. */
+ __u64 access_vp_index:1;
+
+ /* Access to the reference TSC. Corresponds to access_partition_reference_tsc
+ * privilege.
+ */
+ __u64 access_partition_reference_tsc:1;
+
+ /* Partition has access to the guest idle reg. Corresponds to
+ * access_guest_idle_reg privilege.
+ */
+ __u64 access_guest_idle_reg:1;
+
+ /* Partition has access to frequency regs. corresponds to access_frequency_regs
+ * privilege.
+ */
+ __u64 access_frequency_regs:1;
+
+ __u64 reserved_z12:1; /* Reserved for access_reenlightenment_controls. */
+ __u64 reserved_z13:1; /* Reserved for access_root_scheduler_reg. */
+ __u64 reserved_z14:1; /* Reserved for access_tsc_invariant_controls. */
+
+ /* Extended GVA ranges for HvCallFlushVirtualAddressList hypercall.
+ * Corresponds to privilege.
+ */
+ __u64 enable_extended_gva_ranges_for_flush_virtual_address_list:1;
+
+ __u64 reserved_z16:1; /* Reserved for access_vsm. */
+ __u64 reserved_z17:1; /* Reserved for access_vp_registers. */
+
+ /* Use fast hypercall output. Corresponds to privilege. */
+ __u64 fast_hypercall_output:1;
+
+ __u64 reserved_z19:1; /* Reserved for enable_extended_hypercalls. */
+
+ /*
+ * HvStartVirtualProcessor can be used to start virtual processors.
+ * Corresponds to privilege.
+ */
+ __u64 start_virtual_processor:1;
+
+ __u64 reserved_z21:1; /* Reserved for Isolation. */
+
+ /* Synthetic timers in direct mode. */
+ __u64 direct_synthetic_timers:1;
+
+ __u64 reserved_z23:1; /* Reserved for synthetic time unhalted timer */
+
+ /* Use extended processor masks. */
+ __u64 extended_processor_masks:1;
+
+ /* HvCallFlushVirtualAddressSpace / HvCallFlushVirtualAddressList are supported. */
+ __u64 tb_flush_hypercalls:1;
+
+ /* HvCallSendSyntheticClusterIpi is supported. */
+ __u64 synthetic_cluster_ipi:1;
+
+ /* HvCallNotifyLongSpinWait is supported. */
+ __u64 notify_long_spin_wait:1;
+
+ /* HvCallQueryNumaDistance is supported. */
+ __u64 query_numa_distance:1;
+
+ /* HvCallSignalEvent is supported. Corresponds to privilege. */
+ __u64 signal_events:1;
+
+ /* HvCallRetargetDeviceInterrupt is supported. */
+ __u64 retarget_device_interrupt:1;
+
+ /* HvCallRestorePartitionTime is supported. */
+ __u64 restore_time:1;
+
+ /* EnlightenedVmcs nested enlightenment is supported. */
+ __u64 enlightened_vmcs:1;
+
+ __u64 reserved:31;
+ } __packed;
+};
+
+#define HV_MAKE_COMPATIBILITY_VERSION(major_, minor_) \
+ ((__u32)((major_) << 8 | (minor_)))
+
+#define HV_COMPATIBILITY_21_H2 HV_MAKE_COMPATIBILITY_VERSION(0X6, 0X9)
+
+union hv_partition_isolation_properties {
+ __u64 as_uint64;
+ struct {
+ __u64 isolation_type: 5;
+ __u64 isolation_host_type : 2;
+ __u64 rsvd_z: 5;
+ __u64 shared_gpa_boundary_page_number: 52;
+ } __packed;
+};
+
+/*
+ * Various isolation types supported by MSHV.
+ */
+#define HV_PARTITION_ISOLATION_TYPE_NONE 0
+#define HV_PARTITION_ISOLATION_TYPE_SNP 2
+#define HV_PARTITION_ISOLATION_TYPE_TDX 3
+
+/*
+ * Various host isolation types supported by MSHV.
+ */
+#define HV_PARTITION_ISOLATION_HOST_TYPE_NONE 0x0
+#define HV_PARTITION_ISOLATION_HOST_TYPE_HARDWARE 0x1
+#define HV_PARTITION_ISOLATION_HOST_TYPE_RESERVED 0x2
+
+/* Note: Exo partition is enabled by default */
+#define HV_PARTITION_CREATION_FLAG_EXO_PARTITION (1 << 8)
+#define HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED (1 << 13)
+#define HV_PARTITION_CREATION_FLAG_INTERCEPT_MESSAGE_PAGE_ENABLED (1 << 19)
+#define HV_PARTITION_CREATION_FLAG_X2APIC_CAPABLE (1 << 22)
+
+struct hv_input_create_partition {
+ __u64 flags;
+ union hv_proximity_domain_info proximity_domain_info;
+ __u32 compatibility_version;
+ __u32 padding;
+ struct hv_partition_creation_properties partition_creation_properties;
+ union hv_partition_isolation_properties isolation_properties;
+} __packed;
+
+struct hv_output_create_partition {
+ __u64 partition_id;
+} __packed;
+
+struct hv_input_initialize_partition {
+ __u64 partition_id;
+} __packed;
+
+struct hv_input_finalize_partition {
+ __u64 partition_id;
+} __packed;
+
+struct hv_input_delete_partition {
+ __u64 partition_id;
+} __packed;
+
+struct hv_input_get_partition_property {
+ __u64 partition_id;
+ __u32 property_code; /* enum hv_partition_property_code */
+ __u32 padding;
+} __packed;
+
+struct hv_output_get_partition_property {
+ __u64 property_value;
+} __packed;
+
+struct hv_input_set_partition_property {
+ __u64 partition_id;
+ __u32 property_code; /* enum hv_partition_property_code */
+ __u32 padding;
+ __u64 property_value;
+} __packed;
+
+enum hv_vp_state_page_type {
+ HV_VP_STATE_PAGE_REGISTERS = 0,
+ HV_VP_STATE_PAGE_INTERCEPT_MESSAGE = 1,
+ HV_VP_STATE_PAGE_COUNT
+};
+
+struct hv_input_map_vp_state_page {
+ __u64 partition_id;
+ __u32 vp_index;
+ __u32 type; /* enum hv_vp_state_page_type */
+} __packed;
+
+struct hv_output_map_vp_state_page {
+ __u64 map_location; /* GPA page number */
+} __packed;
+
+struct hv_input_unmap_vp_state_page {
+ __u64 partition_id;
+ __u32 vp_index;
+ __u32 type; /* enum hv_vp_state_page_type */
+} __packed;
+
+struct hv_cpuid_leaf_info {
+ __u32 eax;
+ __u32 ecx;
+ __u64 xfem;
+ __u64 xss;
+} __packed;
+
+union hv_get_vp_cpuid_values_flags {
+ __u32 as_uint32;
+ struct {
+ __u32 use_vp_xfem_xss: 1;
+ __u32 apply_registered_values: 1;
+ __u32 reserved: 30;
+ } __packed;
+} __packed;
+
+struct hv_input_get_vp_cpuid_values {
+ __u64 partition_id;
+ __u32 vp_index;
+ union hv_get_vp_cpuid_values_flags flags;
+ __u32 reserved;
+ __u32 padding;
+ struct hv_cpuid_leaf_info cpuid_leaf_info[];
+} __packed;
+
+// NOTE: Not in hvhdk headers
+union hv_output_get_vp_cpuid_values {
+ __u32 as_uint32[4];
+ struct {
+ __u32 eax;
+ __u32 ebx;
+ __u32 ecx;
+ __u32 edx;
+ } __packed;
+};
+
+enum hv_translate_gva_result_code {
+ HV_TRANSLATE_GVA_SUCCESS = 0,
+
+ /* Translation failures. */
+ HV_TRANSLATE_GVA_PAGE_NOT_PRESENT = 1,
+ HV_TRANSLATE_GVA_PRIVILEGE_VIOLATION = 2,
+ HV_TRANSLATE_GVA_INVALID_PAGE_TABLE_FLAGS = 3,
+
+ /* GPA access failures. */
+ HV_TRANSLATE_GVA_GPA_UNMAPPED = 4,
+ HV_TRANSLATE_GVA_GPA_NO_READ_ACCESS = 5,
+ HV_TRANSLATE_GVA_GPA_NO_WRITE_ACCESS = 6,
+ HV_TRANSLATE_GVA_GPA_ILLEGAL_OVERLAY_ACCESS = 7,
+
+ /*
+ * Intercept for memory access by either
+ * - a higher VTL
+ * - a nested hypervisor (due to a violation of the nested page table)
+ */
+ HV_TRANSLATE_GVA_INTERCEPT = 8,
+
+ HV_TRANSLATE_GVA_GPA_UNACCEPTED = 9,
+};
+
+union hv_translate_gva_result {
+ __u64 as_uint64;
+ struct {
+ __u32 result_code; /* enum hv_translate_hva_result_code */
+ __u32 cache_type : 8;
+ __u32 overlay_page : 1;
+ __u32 reserved : 23;
+ } __packed;
+};
+
+/* Define synthetic interrupt controller flag constants. */
+#define HV_EVENT_FLAGS_COUNT (256 * 8)
+#define HV_EVENT_FLAGS_BYTE_COUNT (256)
+#define HV_EVENT_FLAGS_LONG_COUNT (256 / sizeof(__u32))
+
+struct hv_x64_apic_eoi_message {
+ __u32 vp_index;
+ __u32 interrupt_vector;
+} __packed;
+
+static inline int hv_get_interrupt_vector_from_payload(__u64 payload)
+{
+ struct hv_x64_apic_eoi_message *eoi_msg =
+ (struct hv_x64_apic_eoi_message *)payload;
+
+ return eoi_msg->interrupt_vector;
+}
+
+struct hv_opaque_intercept_message {
+ __u32 vp_index;
+} __packed;
+
+enum hv_port_type {
+ HV_PORT_TYPE_MESSAGE = 1,
+ HV_PORT_TYPE_EVENT = 2,
+ HV_PORT_TYPE_MONITOR = 3,
+ HV_PORT_TYPE_DOORBELL = 4 /* Root Partition only */
+};
+
+struct hv_port_info {
+ __u32 port_type; /* enum hv_port_type */
+ __u32 padding;
+ union {
+ struct {
+ __u32 target_sint;
+ __u32 target_vp;
+ __u64 rsvdz;
+ } message_port_info;
+ struct {
+ __u32 target_sint;
+ __u32 target_vp;
+ __u16 base_flag_number;
+ __u16 flag_count;
+ __u32 rsvdz;
+ } event_port_info;
+ struct {
+ __u64 monitor_address;
+ __u64 rsvdz;
+ } monitor_port_info;
+ struct {
+ __u32 target_sint;
+ __u32 target_vp;
+ __u64 rsvdz;
+ } doorbell_port_info;
+ };
+} __packed;
+
+struct hv_connection_info {
+ __u32 port_type;
+ __u32 padding;
+ union {
+ struct {
+ __u64 rsvdz;
+ } message_connection_info;
+ struct {
+ __u64 rsvdz;
+ } event_connection_info;
+ struct {
+ __u64 monitor_address;
+ } monitor_connection_info;
+ struct {
+ __u64 gpa;
+ __u64 trigger_value;
+ __u64 flags;
+ } doorbell_connection_info;
+ };
+} __packed;
+
+/* Define the synthetic interrupt controller event flags format. */
+union hv_synic_event_flags {
+ unsigned char flags8[HV_EVENT_FLAGS_BYTE_COUNT];
+ unsigned long flags[HV_EVENT_FLAGS_LONG_COUNT];
+};
+
+struct hv_synic_event_flags_page {
+ union hv_synic_event_flags event_flags[HV_SYNIC_SINT_COUNT];
+};
+
+#define HV_SYNIC_EVENT_RING_MESSAGE_COUNT 63
+
+struct hv_synic_event_ring {
+ __u8 signal_masked;
+ __u8 ring_full;
+ __u16 reserved_z;
+ __u32 data[HV_SYNIC_EVENT_RING_MESSAGE_COUNT];
+} __packed;
+
+struct hv_synic_event_ring_page {
+ struct hv_synic_event_ring sint_event_ring[HV_SYNIC_SINT_COUNT];
+};
+
+union hv_synic_scontrol {
+ __u64 as_uint64;
+ struct {
+ __u64 enable:1;
+ __u64 reserved:63;
+ } __packed;
+};
+
+union hv_synic_siefp {
+ __u64 as_uint64;
+ struct {
+ __u64 siefp_enabled:1;
+ __u64 preserved:11;
+ __u64 base_siefp_gpa:52;
+ } __packed;
+};
+
+union hv_synic_sirbp {
+ __u64 as_uint64;
+ struct {
+ __u64 sirbp_enabled:1;
+ __u64 preserved:11;
+ __u64 base_sirbp_gpa:52;
+ } __packed;
+};
+
+union hv_interrupt_control {
+ __u64 as_uint64;
+ struct {
+ __u32 interrupt_type; /* enum hv_interrupt type */
+ __u32 level_triggered : 1;
+ __u32 logical_dest_mode : 1;
+ __u32 rsvd : 30;
+ } __packed;
+};
+
+struct hv_local_interrupt_controller_state {
+ /* HV_X64_INTERRUPT_CONTROLLER_STATE */
+ __u32 apic_id;
+ __u32 apic_version;
+ __u32 apic_ldr;
+ __u32 apic_dfr;
+ __u32 apic_spurious;
+ __u32 apic_isr[8];
+ __u32 apic_tmr[8];
+ __u32 apic_irr[8];
+ __u32 apic_esr;
+ __u32 apic_icr_high;
+ __u32 apic_icr_low;
+ __u32 apic_lvt_timer;
+ __u32 apic_lvt_thermal;
+ __u32 apic_lvt_perfmon;
+ __u32 apic_lvt_lint0;
+ __u32 apic_lvt_lint1;
+ __u32 apic_lvt_error;
+ __u32 apic_lvt_cmci;
+ __u32 apic_error_status;
+ __u32 apic_initial_count;
+ __u32 apic_counter_value;
+ __u32 apic_divide_configuration;
+ __u32 apic_remote_read;
+} __packed;
+
+struct hv_stimer_state {
+ struct {
+ /*
+ * Indicates if there is an undelivered timer expiry message.
+ */
+ __u32 undelivered_msg_pending:1;
+ __u32 reserved:31;
+ } __packed flags;
+
+ __u32 resvd;
+
+ /* Timer configuration and count. */
+ __u64 config;
+ __u64 count;
+
+ /* Timer adjustment. */
+ __u64 adjustment;
+
+ /* Expiration time of the undelivered message. */
+ __u64 undelivered_exp_time;
+} __packed;
+
+struct hv_synthetic_timers_state {
+ struct hv_stimer_state timers[HV_SYNIC_STIMER_COUNT];
+
+ /* Reserved space for time unhalted timer. */
+ __u64 reserved[5];
+} __packed;
+
+union hv_x64_vp_execution_state {
+ __u16 as_uint16;
+ struct {
+ __u16 cpl:2;
+ __u16 cr0_pe:1;
+ __u16 cr0_am:1;
+ __u16 efer_lma:1;
+ __u16 debug_active:1;
+ __u16 interruption_pending:1;
+ __u16 vtl:4;
+ __u16 enclave_mode:1;
+ __u16 interrupt_shadow:1;
+ __u16 virtualization_fault_active:1;
+ __u16 reserved:2;
+ } __packed;
+};
+
+struct hv_x64_intercept_message_header {
+ __u32 vp_index;
+ __u8 instruction_length:4;
+ __u8 cr8:4; /* Only set for exo partitions */
+ __u8 intercept_access_type;
+ union hv_x64_vp_execution_state execution_state;
+ struct hv_x64_segment_register cs_segment;
+ __u64 rip;
+ __u64 rflags;
+} __packed;
+
+#define HV_HYPERCALL_INTERCEPT_MAX_XMM_REGISTERS 6
+
+struct hv_x64_hypercall_intercept_message {
+ struct hv_x64_intercept_message_header header;
+ __u64 rax;
+ __u64 rbx;
+ __u64 rcx;
+ __u64 rdx;
+ __u64 r8;
+ __u64 rsi;
+ __u64 rdi;
+ struct hv_u128 xmmregisters[HV_HYPERCALL_INTERCEPT_MAX_XMM_REGISTERS];
+ struct {
+ __u32 isolated:1;
+ __u32 reserved:31;
+ } __packed;
+} __packed;
+
+union hv_x64_register_access_info {
+ union hv_register_value source_value;
+ __u32 destination_register;
+ __u64 source_address;
+ __u64 destination_address;
+};
+
+struct hv_x64_register_intercept_message {
+ struct hv_x64_intercept_message_header header;
+ struct {
+ __u8 is_memory_op:1;
+ __u8 reserved:7;
+ } __packed;
+ __u8 reserved8;
+ __u16 reserved16;
+ __u32 register_name;
+ union hv_x64_register_access_info access_info;
+} __packed;
+
+union hv_x64_memory_access_info {
+ __u8 as_uint8;
+ struct {
+ __u8 gva_valid:1;
+ __u8 gva_gpa_valid:1;
+ __u8 hypercall_output_pending:1;
+ __u8 tlb_locked_no_overlay:1;
+ __u8 reserved:4;
+ } __packed;
+};
+
+union hv_x64_io_port_access_info {
+ __u8 as_uint8;
+ struct {
+ __u8 access_size:3;
+ __u8 string_op:1;
+ __u8 rep_prefix:1;
+ __u8 reserved:3;
+ } __packed;
+};
+
+union hv_x64_exception_info {
+ __u8 as_uint8;
+ struct {
+ __u8 error_code_valid:1;
+ __u8 software_exception:1;
+ __u8 reserved:6;
+ } __packed;
+};
+
+struct hv_x64_memory_intercept_message {
+ struct hv_x64_intercept_message_header header;
+ __u32 cache_type; /* enum hv_cache_type */
+ __u8 instruction_byte_count;
+ union hv_x64_memory_access_info memory_access_info;
+ __u8 tpr_priority;
+ __u8 reserved1;
+ __u64 guest_virtual_address;
+ __u64 guest_physical_address;
+ __u8 instruction_bytes[16];
+} __packed;
+
+struct hv_x64_cpuid_intercept_message {
+ struct hv_x64_intercept_message_header header;
+ __u64 rax;
+ __u64 rcx;
+ __u64 rdx;
+ __u64 rbx;
+ __u64 default_result_rax;
+ __u64 default_result_rcx;
+ __u64 default_result_rdx;
+ __u64 default_result_rbx;
+} __packed;
+
+struct hv_x64_msr_intercept_message {
+ struct hv_x64_intercept_message_header header;
+ __u32 msr_number;
+ __u32 reserved;
+ __u64 rdx;
+ __u64 rax;
+} __packed;
+
+struct hv_x64_io_port_intercept_message {
+ struct hv_x64_intercept_message_header header;
+ __u16 port_number;
+ union hv_x64_io_port_access_info access_info;
+ __u8 instruction_byte_count;
+ __u32 reserved;
+ __u64 rax;
+ __u8 instruction_bytes[16];
+ struct hv_x64_segment_register ds_segment;
+ struct hv_x64_segment_register es_segment;
+ __u64 rcx;
+ __u64 rsi;
+ __u64 rdi;
+} __packed;
+
+struct hv_x64_exception_intercept_message {
+ struct hv_x64_intercept_message_header header;
+ __u16 exception_vector;
+ union hv_x64_exception_info exception_info;
+ __u8 instruction_byte_count;
+ __u32 error_code;
+ __u64 exception_parameter;
+ __u64 reserved;
+ __u8 instruction_bytes[16];
+ struct hv_x64_segment_register ds_segment;
+ struct hv_x64_segment_register ss_segment;
+ __u64 rax;
+ __u64 rcx;
+ __u64 rdx;
+ __u64 rbx;
+ __u64 rsp;
+ __u64 rbp;
+ __u64 rsi;
+ __u64 rdi;
+ __u64 r8;
+ __u64 r9;
+ __u64 r10;
+ __u64 r11;
+ __u64 r12;
+ __u64 r13;
+ __u64 r14;
+ __u64 r15;
+} __packed;
+
+struct hv_x64_invalid_vp_register_message {
+ __u32 vp_index;
+ __u32 reserved;
+} __packed;
+
+struct hv_x64_unrecoverable_exception_message {
+ struct hv_x64_intercept_message_header header;
+} __packed;
+
+#define HV_UNSUPPORTED_FEATURE_INTERCEPT 1
+#define HV_UNSUPPORTED_FEATURE_TASK_SWITCH_TSS 2
+
+struct hv_x64_unsupported_feature_message {
+ __u32 vp_index;
+ __u32 feature_code;
+ __u64 feature_parameter;
+} __packed;
+
+struct hv_x64_halt_message {
+ struct hv_x64_intercept_message_header header;
+} __packed;
+
+#define HV_X64_PENDING_INTERRUPT 0
+#define HV_X64_PENDING_NMI 2
+#define HV_X64_PENDING_EXCEPTION 3
+
+struct hv_x64_interruption_deliverable_message {
+ struct hv_x64_intercept_message_header header;
+ __u32 deliverable_type; /* pending interruption type */
+ __u32 rsvd;
+} __packed;
+
+struct hv_x64_sint_deliverable_message {
+ struct hv_x64_intercept_message_header header;
+ __u16 deliverable_sints;
+ __u16 rsvd1;
+ __u32 rsvd2;
+} __packed;
+
+struct hv_x64_sipi_intercept_message {
+ struct hv_x64_intercept_message_header header;
+ __u32 target_vp_index;
+ __u32 interrupt_vector;
+} __packed;
+
+struct hv_register_x64_cpuid_result_parameters {
+ struct {
+ __u32 eax;
+ __u32 ecx;
+ __u8 subleaf_specific;
+ __u8 always_override;
+ __u16 padding;
+ } __packed input;
+ struct {
+ __u32 eax;
+ __u32 eax_mask;
+ __u32 ebx;
+ __u32 ebx_mask;
+ __u32 ecx;
+ __u32 ecx_mask;
+ __u32 edx;
+ __u32 edx_mask;
+ } __packed result;
+} __packed;
+
+struct hv_register_x64_msr_result_parameters {
+ __u32 msr_index;
+ __u32 access_type;
+ __u32 action; /* enum hv_unimplemented_msr_action */
+} __packed;
+
+union hv_register_intercept_result_parameters {
+ struct hv_register_x64_cpuid_result_parameters cpuid;
+ struct hv_register_x64_msr_result_parameters msr;
+} __packed;
+
+struct hv_async_completion_message_payload {
+ __u64 partition_id;
+ __u32 status;
+ __u32 completion_count;
+ __u64 sub_status;
+} __packed;
+
+struct hv_input_translate_virtual_address {
+ __u64 partition_id;
+ __u32 vp_index;
+ __u32 padding;
+ __u64 control_flags;
+ __u64 gva_page;
+} __packed;
+
+struct hv_output_translate_virtual_address {
+ union hv_translate_gva_result translation_result;
+ __u64 gpa_page;
+} __packed;
+
+enum hv_cache_type {
+ HV_CACHE_TYPE_UNCACHED = 0,
+ HV_CACHE_TYPE_WRITE_COMBINING = 1,
+ HV_CACHE_TYPE_WRITE_THROUGH = 4,
+ HV_CACHE_TYPE_WRITE_PROTECTED = 5,
+ HV_CACHE_TYPE_WRITE_BACK = 6,
+};
+
+#define HV_SUPPORTS_REGISTER_INTERCEPT
+
+struct hv_input_register_intercept_result {
+ __u64 partition_id;
+ __u32 vp_index;
+ __u32 intercept_type; /* enum hv_intercept_type */
+ union hv_register_intercept_result_parameters parameters;
+} __packed;
+
+struct hv_input_assert_virtual_interrupt {
+ __u64 partition_id;
+ union hv_interrupt_control control;
+ __u64 dest_addr; /* cpu's apic id */
+ __u32 vector;
+ __u8 target_vtl;
+ __u8 rsvd_z0;
+ __u16 rsvd_z1;
+} __packed;
+
+struct hv_input_create_port {
+ __u64 port_partition_id;
+ union hv_port_id port_id;
+ __u8 port_vtl;
+ __u8 min_connection_vtl;
+ __u16 padding;
+ __u64 connection_partition_id;
+ struct hv_port_info port_info;
+ union hv_proximity_domain_info proximity_domain_info;
+} __packed;
+
+union hv_input_delete_port {
+ __u64 as_uint64[2];
+ struct {
+ __u64 port_partition_id;
+ union hv_port_id port_id;
+ __u32 reserved;
+ };
+} __packed;
+
+struct hv_input_connect_port {
+ __u64 connection_partition_id;
+ union hv_connection_id connection_id;
+ __u8 connection_vtl;
+ __u8 rsvdz0;
+ __u16 rsvdz1;
+ __u64 port_partition_id;
+ union hv_port_id port_id;
+ __u32 reserved2;
+ struct hv_connection_info connection_info;
+ union hv_proximity_domain_info proximity_domain_info;
+} __packed;
+
+union hv_input_disconnect_port {
+ __u64 as_uint64[2];
+ struct {
+ __u64 connection_partition_id;
+ union hv_connection_id connection_id;
+ __u32 is_doorbell: 1;
+ __u32 reserved: 31;
+ } __packed;
+} __packed;
+
+union hv_input_notify_port_ring_empty {
+ __u64 as_uint64;
+ struct {
+ __u32 sint_index;
+ __u32 reserved;
+ };
+} __packed;
+
+struct hv_input_signal_event_direct {
+ __u64 target_partition;
+ __u32 target_vp;
+ __u8 target_vtl;
+ __u8 target_sint;
+ __u16 flag_number;
+} __packed;
+
+struct hv_output_signal_event_direct {
+ __u8 newly_signaled;
+ __u8 reserved[7];
+} __packed;
+
+struct hv_input_post_message_direct {
+ __u64 partition_id;
+ __u32 vp_index;
+ __u8 vtl;
+ __u8 padding[3];
+ __u32 sint_index;
+ __u8 message[HV_MESSAGE_SIZE];
+ __u32 padding2;
+} __packed;
+
+struct hv_guest_mapping_flush_list { /* HV_INPUT_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST */
+ u64 address_space;
+ u64 flags;
+ union hv_gpa_page_range gpa_list[];
+};
+
+#define HV_SUPPORTS_VP_STATE
+
+struct hv_vp_state_data_xsave {
+ __u64 flags;
+ union hv_x64_xsave_xfem_register states;
+} __packed;
+
+/*
+ * For getting and setting VP state, there are two options based on the state type:
+ *
+ * 1.) Data that is accessed by PFNs in the input hypercall page. This is used
+ * for state which may not fit into the hypercall pages.
+ * 2.) Data that is accessed directly in the input\output hypercall pages.
+ * This is used for state that will always fit into the hypercall pages.
+ *
+ * In the future this could be dynamic based on the size if needed.
+ *
+ * Note these hypercalls have an 8-byte aligned variable header size as per the tlfs
+ */
+
+#define HV_GET_SET_VP_STATE_TYPE_PFN (1 << 31)
+
+enum hv_get_set_vp_state_type {
+ HV_GET_SET_VP_STATE_LOCAL_INTERRUPT_CONTROLLER_STATE = 0 | HV_GET_SET_VP_STATE_TYPE_PFN,
+
+ HV_GET_SET_VP_STATE_XSAVE = 1 | HV_GET_SET_VP_STATE_TYPE_PFN,
+ /* Synthetic message page */
+ HV_GET_SET_VP_STATE_SIM_PAGE = 2 | HV_GET_SET_VP_STATE_TYPE_PFN,
+ /* Synthetic interrupt event flags page. */
+ HV_GET_SET_VP_STATE_SIEF_PAGE = 3 | HV_GET_SET_VP_STATE_TYPE_PFN,
+
+ /* Synthetic timers. */
+ HV_GET_SET_VP_STATE_SYNTHETIC_TIMERS = 4,
+};
+
+struct hv_vp_state_data {
+ __u32 type;
+ __u32 rsvd;
+ struct hv_vp_state_data_xsave xsave;
+} __packed;
+
+struct hv_input_get_vp_state {
+ __u64 partition_id;
+ __u32 vp_index;
+ __u8 input_vtl;
+ __u8 rsvd0;
+ __u16 rsvd1;
+ struct hv_vp_state_data state_data;
+ __u64 output_data_pfns[];
+} __packed;
+
+union hv_output_get_vp_state {
+ struct hv_local_interrupt_controller_state interrupt_controller_state;
+ struct hv_synthetic_timers_state synthetic_timers_state;
+} __packed;
+
+union hv_input_set_vp_state_data {
+ __u64 pfns;
+ __u8 bytes;
+} __packed;
+
+struct hv_input_set_vp_state {
+ __u64 partition_id;
+ __u32 vp_index;
+ __u8 input_vtl;
+ __u8 rsvd0;
+ __u16 rsvd1;
+ struct hv_vp_state_data state_data;
+ union hv_input_set_vp_state_data data[];
+} __packed;
+
+/*
+ * Dispatch state for the VP communicated by the hypervisor to the
+ * VP-dispatching thread in the root on return from HVCALL_DISPATCH_VP.
+ */
+enum hv_vp_dispatch_state {
+ HV_VP_DISPATCH_STATE_INVALID = 0,
+ HV_VP_DISPATCH_STATE_BLOCKED = 1,
+ HV_VP_DISPATCH_STATE_READY = 2,
+};
+
+/*
+ * Dispatch event that caused the current dispatch state on return from
+ * HVCALL_DISPATCH_VP.
+ */
+enum hv_vp_dispatch_event {
+ HV_VP_DISPATCH_EVENT_INVALID = 0x00000000,
+ HV_VP_DISPATCH_EVENT_SUSPEND = 0x00000001,
+ HV_VP_DISPATCH_EVENT_INTERCEPT = 0x00000002,
+};
+
+#define HV_ROOT_SCHEDULER_MAX_VPS_PER_CHILD_PARTITION 1024
+/* The maximum array size of HV_GENERIC_SET (vp_set) buffer */
+#define HV_GENERIC_SET_QWORD_COUNT(max) (((((max) - 1) >> 6) + 1) + 2)
+
+struct hv_vp_signal_bitset_scheduler_message {
+ __u64 partition_id;
+ __u32 overflow_count;
+ __u16 vp_count;
+ __u16 reserved;
+
+#define BITSET_BUFFER_SIZE \
+ HV_GENERIC_SET_QWORD_COUNT(HV_ROOT_SCHEDULER_MAX_VPS_PER_CHILD_PARTITION)
+ union {
+ struct hv_vpset bitset;
+ __u64 bitset_buffer[BITSET_BUFFER_SIZE];
+ } vp_bitset;
+#undef BITSET_BUFFER_SIZE
+} __packed;
+
+#if defined(__KERNEL__)
+static_assert(sizeof(struct hv_vp_signal_bitset_scheduler_message) <=
+ (sizeof(struct hv_message) - sizeof(struct hv_message_header)));
+#endif
+
+#define HV_MESSAGE_MAX_PARTITION_VP_PAIR_COUNT \
+ (((sizeof(struct hv_message) - sizeof(struct hv_message_header)) / \
+ (sizeof(__u64 /* partition id */) + sizeof(__u32 /* vp index */))) - 1)
+
+struct hv_vp_signal_pair_scheduler_message {
+ __u32 overflow_count;
+ __u8 vp_count;
+ __u8 reserved1[3];
+
+ __u64 partition_ids[HV_MESSAGE_MAX_PARTITION_VP_PAIR_COUNT];
+ __u32 vp_indexes[HV_MESSAGE_MAX_PARTITION_VP_PAIR_COUNT];
+
+ __u8 reserved2[4];
+} __packed;
+
+#if defined(__KERNEL__)
+static_assert(sizeof(struct hv_vp_signal_pair_scheduler_message) ==
+ (sizeof(struct hv_message) - sizeof(struct hv_message_header)));
+#endif
+
+/* Input and output structures for HVCALL_DISPATCH_VP */
+#define HV_DISPATCH_VP_FLAG_CLEAR_INTERCEPT_SUSPEND 0x1
+#define HV_DISPATCH_VP_FLAG_ENABLE_CALLER_INTERRUPTS 0x2
+#define HV_DISPATCH_VP_FLAG_SET_CALLER_SPEC_CTRL 0x4
+#define HV_DISPATCH_VP_FLAG_SKIP_VP_SPEC_FLUSH 0x8
+#define HV_DISPATCH_VP_FLAG_SKIP_CALLER_SPEC_FLUSH 0x10
+#define HV_DISPATCH_VP_FLAG_SKIP_CALLER_USER_SPEC_FLUSH 0x20
+
+struct hv_input_dispatch_vp {
+ __u64 partition_id;
+ __u32 vp_index;
+ __u32 flags;
+ __u64 time_slice; /* in 100ns */
+ __u64 spec_ctrl;
+} __packed;
+
+struct hv_output_dispatch_vp {
+ __u32 dispatch_state; /* enum hv_vp_dispatch_state */
+ __u32 dispatch_event; /* enum hv_vp_dispatch_event */
+} __packed;
+
+#endif /* _UAPI_HV_HVHDK_H */
diff --git a/include/uapi/hyperv/hvhdk_mini.h b/include/uapi/hyperv/hvhdk_mini.h
new file mode 100644
index 000000000000..c1c1cae127e5
--- /dev/null
+++ b/include/uapi/hyperv/hvhdk_mini.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (c) 2023, Microsoft Corporation.
+ *
+ * These files (hvhdk.h, hvhdk_mini.h, hvgdk.h, hvgdk_mini.h) define APIs for
+ * communicating with the Microsoft Hypervisor.
+ *
+ * These definitions are subject to change across hypervisor versions, and as
+ * such are separate and independent from hyperv-tlfs.h.
+ *
+ * The naming of these headers reflects conventions used in the Microsoft
+ * Hypervisor.
+ */
+#ifndef _UAPI_HV_HVHDK_MINI_H
+#define _UAPI_HV_HVHDK_MINI_H
+
+#include "hvgdk_mini.h"
+
+/*
+ * Doorbell connection_info flags.
+ */
+#define HV_DOORBELL_FLAG_TRIGGER_SIZE_MASK 0x00000007
+#define HV_DOORBELL_FLAG_TRIGGER_SIZE_ANY 0x00000000
+#define HV_DOORBELL_FLAG_TRIGGER_SIZE_BYTE 0x00000001
+#define HV_DOORBELL_FLAG_TRIGGER_SIZE_WORD 0x00000002
+#define HV_DOORBELL_FLAG_TRIGGER_SIZE_DWORD 0x00000003
+#define HV_DOORBELL_FLAG_TRIGGER_SIZE_QWORD 0x00000004
+#define HV_DOORBELL_FLAG_TRIGGER_ANY_VALUE 0x80000000
+
+/* Each generic set contains 64 elements */
+#define HV_GENERIC_SET_SHIFT (6)
+#define HV_GENERIC_SET_MASK (63)
+
+enum hv_generic_set_format {
+ HV_GENERIC_SET_SPARSE_4K,
+ HV_GENERIC_SET_ALL,
+};
+
+
+/* NOTE: following two #defines are not defined in Hyper-V code */
+/* The maximum number of sparse vCPU banks which can be encoded by 'struct hv_vpset' */
+#define HV_MAX_SPARSE_VCPU_BANKS (64)
+/* The number of vCPUs in one sparse bank */
+#define HV_VCPUS_PER_SPARSE_BANK (64)
+
+enum hv_scheduler_type {
+ HV_SCHEDULER_TYPE_LP = 1, /* Classic scheduler w/o SMT */
+ HV_SCHEDULER_TYPE_LP_SMT = 2, /* Classic scheduler w/ SMT */
+ HV_SCHEDULER_TYPE_CORE_SMT = 3, /* Core scheduler */
+ HV_SCHEDULER_TYPE_ROOT = 4, /* Root / integrated scheduler */
+ HV_SCHEDULER_TYPE_MAX
+};
+
+struct hv_vpset { /* HV_VP_SET */
+ __u64 format;
+ __u64 valid_bank_mask;
+ __u64 bank_contents[];
+} __packed;
+
+enum hv_partition_property_code {
+ /* Privilege properties */
+ HV_PARTITION_PROPERTY_PRIVILEGE_FLAGS = 0x00010000,
+ HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES = 0x00010001,
+
+ /* Resource properties */
+ HV_PARTITION_PROPERTY_GPA_PAGE_ACCESS_TRACKING = 0x00050005,
+ HV_PARTITION_PROPERTY_ISOLATION_STATE = 0x0005000c,
+ HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION = 0x00050017,
+
+ /* Compatibility properties */
+ HV_PARTITION_PROPERTY_PROCESSOR_XSAVE_FEATURES = 0x00060002,
+ HV_PARTITION_PROPERTY_MAX_XSAVE_DATA_SIZE = 0x00060008,
+ HV_PARTITION_PROPERTY_PROCESSOR_CLOCK_FREQUENCY = 0x00060009,
+};
+
+enum hv_system_property {
+ /* Add more values when needed */
+ HV_SYSTEM_PROPERTY_SCHEDULER_TYPE = 15,
+};
+struct hv_input_get_system_property {
+ __u32 property_id; /* enum hv_system_property */
+ union {
+ __u32 as_uint32;
+ /* More fields to be filled in when needed */
+ };
+} __packed;
+
+struct hv_output_get_system_property {
+ union {
+ __u32 scheduler_type; /* enum hv_scheduler_type */
+ };
+} __packed;
+
+struct hv_proximity_domain_flags {
+ __u32 proximity_preferred : 1;
+ __u32 reserved : 30;
+ __u32 proximity_info_valid : 1;
+} __packed;
+
+/* Not a union in windows but useful for zeroing */
+union hv_proximity_domain_info {
+ struct {
+ __u32 domain_id;
+ struct hv_proximity_domain_flags flags;
+ };
+ __u64 as_uint64;
+} __packed;
+
+struct hv_input_withdraw_memory {
+ __u64 partition_id;
+ union hv_proximity_domain_info proximity_domain_info;
+} __packed;
+
+struct hv_output_withdraw_memory {
+ /* Hack - compiler doesn't like empty array size
+ * in struct with no other members
+ */
+ __u64 gpa_page_list[0];
+} __packed;
+
+/* HV Map GPA (Guest Physical Address) Flags */
+#define HV_MAP_GPA_PERMISSIONS_NONE 0x0
+#define HV_MAP_GPA_READABLE 0x1
+#define HV_MAP_GPA_WRITABLE 0x2
+#define HV_MAP_GPA_KERNEL_EXECUTABLE 0x4
+#define HV_MAP_GPA_USER_EXECUTABLE 0x8
+#define HV_MAP_GPA_EXECUTABLE 0xC
+#define HV_MAP_GPA_PERMISSIONS_MASK 0xF
+
+struct hv_input_map_gpa_pages {
+ __u64 target_partition_id;
+ __u64 target_gpa_base;
+ __u32 map_flags;
+ __u32 padding;
+ __u64 source_gpa_page_list[];
+} __packed;
+
+union hv_gpa_page_access_state_flags {
+ struct {
+ __u64 clear_accessed : 1;
+ __u64 set_access : 1;
+ __u64 clear_dirty : 1;
+ __u64 set_dirty : 1;
+ __u64 reserved : 60;
+ } __packed;
+ __u64 as_uint64;
+};
+
+struct hv_input_get_gpa_pages_access_state {
+ __u64 partition_id;
+ union hv_gpa_page_access_state_flags flags;
+ __u64 hv_gpa_page_number;
+} __packed;
+
+union hv_gpa_page_access_state {
+ struct {
+ __u8 accessed : 1;
+ __u8 dirty : 1;
+ __u8 reserved: 6;
+ };
+ __u8 as_uint8;
+} __packed;
+
+#endif /* _UAPI_HV_HVHDK_MINI_H */
--
2.25.1
^ permalink raw reply related
* Re: [PATCH 00/15] Introduce /dev/mshv drivers
From: Nuno Das Neves @ 2023-07-27 21:05 UTC (permalink / raw)
To: linux-hyperv, linux-kernel, x86, linux-arm-kernel, linux-arch
Cc: mikelley, kys, wei.liu, haiyangz, decui, ssengar, mukeshrathor,
stanislav.kinsburskiy, jinankjain, apais, Tianyu.Lan, vkuznets,
tglx, mingo, bp, dave.hansen, hpa, will, catalin.marinas
In-Reply-To: <1690487690-2428-1-git-send-email-nunodasneves@linux.microsoft.com>
Apologies, it seems patch 15 bouncing from some mailing lists.
As a workaround, here is a link to the patch on github while we figure
out a solution:
https://github.com/NunoDasNeves/linux/commit/54329d18d68b0cbff4f1e0aeba00bc6dd9a21288
On 7/27/2023 12:54 PM, Nuno Das Neves wrote:
> This series introduces support for creating and running guest machines
> while running on the Microsoft Hypervisor. [0]
> This is done via an IOCTL interface accessed through /dev/mshv, similar to
> /dev/kvm. Another series introducing this support was previously posted.
> [1]
>
> These interfaces support VMMs running in:
> 1. The root patition - provided in the mshv_root module, and
> 2. VTL 2 - provided in the mshv_vtl module [2]
>
> Patches breakdown
> -----------------
> The first 7 patches are refactoring and adding some helper functions.
> They provide some benefit on their own and could be applied independently
> as cleanup patches.
>
> The following 5 patches just set things up for the driver code to come.
> These are very small. They are separated so that the remaining patches are
> more self-contained.
>
> The final 3 patches are the meat of the series:
> - Patch 13 contains new header files used by the driver.
> These are designed to mirror the ABI headers exported by Hyper-V. This is
> done to avoid polluting hyperv-tlfs.h and help track changes to the ABIs
> that are still unstable. (See FAQ below).
> - Patch 14 conditionally includes these new header files into mshyperv.h
> and linux/hyperv.h, in order to be able to use these files in the new
> drivers while remaining independent from hyperv-tlfs.h.
> - Patch 15 contains the new driver code located in drivers/hv. This is a
> large amount of code and new files, but it is mostly self-contained and
> all within drivers/hv - apart from the IOCTL interface itself in uapi.
>
> FAQ on include/uapi/hyperv/*.h
> ------------------------------
> Q:
> Why not just add these definitions to hyperv-tlfs.h?
> A:
> The intention of hyperv-tlfs.h is to contain stable definitions documented
> in the public TLFS document. These new definitions don't fit that criteria,
> so they should be separate.
>
> Q:
> Why are these files named hvgdk.h, hvgdk_mini.h, hvhdk.h and hvhdk_mini.h?
> A:
> The precise meaning of the names reflects conventions used internally at
> Microsoft.
> Naming them this way makes it easy to find where particular Hyper-V
> definitions come from, and check their correctness.
> It also facilitates the future work of automatically generating these files.
>
> Q:
> Why are they in uapi?
> A:
> In short, to keep things simple. There are many definitions needed in both
> the kernel and the VMM in userspace. Separating them doesn't serve much
> purpose, and makes it more laborious to import definitions from Hyper-V
> code.
>
> Q:
> The new headers redefine many things that are already in hyperv-tlfs.h - why?
> A:
> Some definitions are extended compared to what is documented in the TLFS.
> In order to avoid adding undocumented or unstable definitions to hyperv-tlfs.h,
> the new headers must compile independently.
> Therefore, the new headers must redefine many things in hyperv-tlfs.h in order
> to compile.
>
> --------------------------
> [0] "Hyper-V" is more well-known, but it really refers to the whole stack
> including the hypervisor and other components that run in Windows
> kernel and userspace.
> [1] Previous /dev/mshv patch series and discussion:
> https://lore.kernel.org/linux-hyperv/1632853875-20261-1-git-send-email-nunodasneves@linux.microsoft.com/
> [2] Virtual Secure Mode (VSM) and Virtual Trust Levels (VTL):
> https://learn.microsoft.com/en-us/virtualization/hyper-v-on-windows/tlfs/vsm
>
> Nuno Das Neves (15):
> hyperv-tlfs: Change shared HV_REGISTER_* defines to HV_MSR_*
> mshyperv: Introduce hv_get_hypervisor_version
> mshyperv: Introduce numa_node_to_proximity_domain_info
> asm-generic/mshyperv: Introduce hv_recommend_using_aeoi()
> hyperv: Move hv_connection_id to hyperv-tlfs
> hyperv-tlfs: Introduce hv_status_to_string and hv_status_to_errno
> Drivers: hv: Move hv_call_deposit_pages and hv_call_create_vp to
> common code
> Drivers: hv: Introduce per-cpu event ring tail
> Drivers: hv: Introduce hv_output_arg_exists in hv_common
> x86: hyperv: Add mshv_handler irq handler and setup function
> Drivers: hv: export vmbus_isr, hv_context and hv_post_message
> Documentation: Reserve ioctl number for mshv driver
> uapi: hyperv: Add mshv driver headers hvhdk.h, hvhdk_mini.h, hvgdk.h,
> hvgdk_mini.h
> asm-generic: hyperv: Use mshv headers conditionally. Add
> asm-generic/hyperv-defs.h
> Drivers: hv: Add modules to expose /dev/mshv to VMMs running on
> Hyper-V
>
> .../userspace-api/ioctl/ioctl-number.rst | 2 +
> arch/arm64/hyperv/mshyperv.c | 23 +-
> arch/arm64/include/asm/hyperv-tlfs.h | 25 +
> arch/arm64/include/asm/mshyperv.h | 2 +-
> arch/x86/hyperv/hv_init.c | 2 +-
> arch/x86/hyperv/hv_proc.c | 166 +-
> arch/x86/include/asm/hyperv-tlfs.h | 137 +-
> arch/x86/include/asm/mshyperv.h | 13 +-
> arch/x86/kernel/cpu/mshyperv.c | 71 +-
> drivers/acpi/numa/srat.c | 1 +
> drivers/clocksource/hyperv_timer.c | 24 +-
> drivers/hv/Kconfig | 54 +
> drivers/hv/Makefile | 21 +
> drivers/hv/hv.c | 46 +-
> drivers/hv/hv_call.c | 119 +
> drivers/hv/hv_common.c | 225 +-
> drivers/hv/hyperv_vmbus.h | 2 +-
> drivers/hv/mshv.h | 156 ++
> drivers/hv/mshv_eventfd.c | 758 +++++++
> drivers/hv/mshv_eventfd.h | 80 +
> drivers/hv/mshv_main.c | 208 ++
> drivers/hv/mshv_msi.c | 129 ++
> drivers/hv/mshv_portid_table.c | 84 +
> drivers/hv/mshv_root.h | 194 ++
> drivers/hv/mshv_root_hv_call.c | 1064 +++++++++
> drivers/hv/mshv_root_main.c | 1964 +++++++++++++++++
> drivers/hv/mshv_synic.c | 689 ++++++
> drivers/hv/mshv_vtl.h | 52 +
> drivers/hv/mshv_vtl_main.c | 1541 +++++++++++++
> drivers/hv/vmbus_drv.c | 3 +-
> drivers/hv/xfer_to_guest.c | 28 +
> include/asm-generic/hyperv-defs.h | 26 +
> include/asm-generic/hyperv-tlfs.h | 77 +-
> include/asm-generic/mshyperv.h | 76 +-
> include/linux/hyperv.h | 11 +-
> include/uapi/hyperv/hvgdk.h | 41 +
> include/uapi/hyperv/hvgdk_mini.h | 1077 +++++++++
> include/uapi/hyperv/hvhdk.h | 1352 ++++++++++++
> include/uapi/hyperv/hvhdk_mini.h | 164 ++
> include/uapi/linux/mshv.h | 298 +++
> 40 files changed, 10653 insertions(+), 352 deletions(-)
> create mode 100644 drivers/hv/hv_call.c
> create mode 100644 drivers/hv/mshv.h
> create mode 100644 drivers/hv/mshv_eventfd.c
> create mode 100644 drivers/hv/mshv_eventfd.h
> create mode 100644 drivers/hv/mshv_main.c
> create mode 100644 drivers/hv/mshv_msi.c
> create mode 100644 drivers/hv/mshv_portid_table.c
> create mode 100644 drivers/hv/mshv_root.h
> create mode 100644 drivers/hv/mshv_root_hv_call.c
> create mode 100644 drivers/hv/mshv_root_main.c
> create mode 100644 drivers/hv/mshv_synic.c
> create mode 100644 drivers/hv/mshv_vtl.h
> create mode 100644 drivers/hv/mshv_vtl_main.c
> create mode 100644 drivers/hv/xfer_to_guest.c
> create mode 100644 include/asm-generic/hyperv-defs.h
> create mode 100644 include/uapi/hyperv/hvgdk.h
> create mode 100644 include/uapi/hyperv/hvgdk_mini.h
> create mode 100644 include/uapi/hyperv/hvhdk.h
> create mode 100644 include/uapi/hyperv/hvhdk_mini.h
> create mode 100644 include/uapi/linux/mshv.h
>
^ permalink raw reply
* RE: [PATCH V2] x86/hyperv: Rename hv_isolation_type_snp/en_snp() to isol_type_snp_paravisor/enlightened()
From: Michael Kelley (LINUX) @ 2023-07-28 2:53 UTC (permalink / raw)
To: Tianyu Lan, KY Srinivasan, Haiyang Zhang, wei.liu@kernel.org,
Dexuan Cui, tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
dave.hansen@linux.intel.com, x86@kernel.org, hpa@zytor.com,
arnd@arndb.de
Cc: Tianyu Lan, linux-arch@vger.kernel.org,
linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org,
vkuznets@redhat.com
In-Reply-To: <20230726124900.300258-1-ltykernel@gmail.com>
From: Tianyu Lan <ltykernel@gmail.com> Sent: Wednesday, July 26, 2023 5:49 AM
>
> Rename hv_isolation_type_snp and hv_isolation_type_en_snp()
> to make them much intuitiver.
>
> Suggested-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> Signed-off-by: Tianyu Lan <tiala@microsoft.com>
> ---
> This patch is based on the patchset "x86/hyperv: Add AMD sev-snp
> enlightened guest support on hyperv" https://lore.kernel.org/lkml/20230718032304.136888-1-ltykernel@gmail.com/
>
> Change since v1:
> Add "hv_" prefix to isol_type_snp_paravisor/enlightened()
> ---
> arch/x86/hyperv/hv_init.c | 6 +++---
> arch/x86/hyperv/ivm.c | 17 +++++++++--------
> arch/x86/include/asm/mshyperv.h | 8 ++++----
> arch/x86/kernel/cpu/mshyperv.c | 12 ++++++------
> drivers/hv/connection.c | 2 +-
> drivers/hv/hv.c | 16 ++++++++--------
> drivers/hv/hv_common.c | 10 +++++-----
> include/asm-generic/mshyperv.h | 4 ++--
> 8 files changed, 38 insertions(+), 37 deletions(-)
>
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index b004370d3b01..3df948c69cff 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -52,7 +52,7 @@ static int hyperv_init_ghcb(void)
> void *ghcb_va;
> void **ghcb_base;
>
> - if (!hv_isolation_type_snp())
> + if (!hv_isol_type_snp_paravisor())
> return 0;
>
> if (!hv_ghcb_pg)
> @@ -116,7 +116,7 @@ static int hv_cpu_init(unsigned int cpu)
> * is blocked to run in Confidential VM. So only decrypt assist
> * page in non-root partition here.
> */
> - if (*hvp && hv_isolation_type_en_snp()) {
> + if (*hvp && hv_isol_type_snp_enlightened()) {
> WARN_ON_ONCE(set_memory_decrypted((unsigned
> long)(*hvp), 1));
> memset(*hvp, 0, PAGE_SIZE);
> }
> @@ -453,7 +453,7 @@ void __init hyperv_init(void)
> goto common_free;
> }
>
> - if (hv_isolation_type_snp()) {
> + if (hv_isol_type_snp_paravisor()) {
> /* Negotiate GHCB Version. */
> if (!hv_ghcb_negotiate_protocol())
> hv_ghcb_terminate(SEV_TERM_SET_GEN,
> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
> index 2eda4e69849d..2548d904e45a 100644
> --- a/arch/x86/hyperv/ivm.c
> +++ b/arch/x86/hyperv/ivm.c
> @@ -591,24 +591,25 @@ bool hv_is_isolation_supported(void)
> return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE;
> }
>
> -DEFINE_STATIC_KEY_FALSE(isolation_type_snp);
> +DEFINE_STATIC_KEY_FALSE(isol_type_snp_paravisor);
>
> /*
> - * hv_isolation_type_snp - Check system runs in the AMD SEV-SNP based
> + * hv_isol_type_snp_paravisor - Check system runs in the AMD SEV-SNP based
> * isolation VM.
> */
> -bool hv_isolation_type_snp(void)
> +bool hv_isol_type_snp_paravisor(void)
> {
> - return static_branch_unlikely(&isolation_type_snp);
> + return static_branch_unlikely(&isol_type_snp_paravisor);
> }
>
> -DEFINE_STATIC_KEY_FALSE(isolation_type_en_snp);
> +DEFINE_STATIC_KEY_FALSE(isol_type_snp_enlightened);
> +
> /*
> - * hv_isolation_type_en_snp - Check system runs in the AMD SEV-SNP based
> + * hv_isol_type_snp_enlightened - Check system runs in the AMD SEV-SNP based
> * isolation enlightened VM.
> */
> -bool hv_isolation_type_en_snp(void)
> +bool hv_isol_type_snp_enlightened(void)
> {
> - return static_branch_unlikely(&isolation_type_en_snp);
> + return static_branch_unlikely(&isol_type_snp_enlightened);
> }
>
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index c5a3c29fad01..e543a5a1b007 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -25,8 +25,8 @@
>
> union hv_ghcb;
>
> -DECLARE_STATIC_KEY_FALSE(isolation_type_snp);
> -DECLARE_STATIC_KEY_FALSE(isolation_type_en_snp);
> +DECLARE_STATIC_KEY_FALSE(isol_type_snp_paravisor);
> +DECLARE_STATIC_KEY_FALSE(isol_type_snp_enlightened);
>
> typedef int (*hyperv_fill_flush_list_func)(
> struct hv_guest_mapping_flush_list *flush,
> @@ -46,7 +46,7 @@ extern void *hv_hypercall_pg;
>
> extern u64 hv_current_partition_id;
>
> -extern bool hv_isolation_type_en_snp(void);
> +extern bool hv_isol_type_snp_enlightened(void);
>
> extern union hv_ghcb * __percpu *hv_ghcb_pg;
>
> @@ -268,7 +268,7 @@ static inline void hv_sev_init_mem_and_cpu(void) {}
> static int hv_snp_boot_ap(int cpu, unsigned long start_ip) {}
> #endif
>
> -extern bool hv_isolation_type_snp(void);
> +extern bool hv_isol_type_snp_paravisor(void);
This declaration of hv_isolation_type_snp() also occurs twice
in include/asm-generic/mshyperv.h. I think this one can be
dropped entirely rather than renamed since
include/asm-generic/mshyperv.h is #include'd at the bottom of
this file, and there is no user in between.
hv_isolation_type_snp() is used in several architecture
independent source code files, so having it declared in
include/asm-generic/mshyperv.h makes sense rather than
being in an architecture-specific version of mshyperv.h.
>
> static inline bool hv_is_synic_reg(unsigned int reg)
> {
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index 6ff0b60d30f9..3c61b4b6a5e3 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -66,7 +66,7 @@ u64 hv_get_non_nested_register(unsigned int reg)
> {
> u64 value;
>
> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
> + if (hv_is_synic_reg(reg) && hv_isol_type_snp_paravisor())
> hv_ghcb_msr_read(reg, &value);
> else
> rdmsrl(reg, value);
> @@ -76,7 +76,7 @@ EXPORT_SYMBOL_GPL(hv_get_non_nested_register);
>
> void hv_set_non_nested_register(unsigned int reg, u64 value)
> {
> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
> + if (hv_is_synic_reg(reg) && hv_isol_type_snp_paravisor()) {
> hv_ghcb_msr_write(reg, value);
>
> /* Write proxy bit via wrmsl instruction */
> @@ -300,7 +300,7 @@ static void __init hv_smp_prepare_cpus(unsigned int
> max_cpus)
> * Override wakeup_secondary_cpu_64 callback for SEV-SNP
> * enlightened guest.
> */
> - if (hv_isolation_type_en_snp())
> + if (hv_isol_type_snp_enlightened())
> apic->wakeup_secondary_cpu_64 = hv_snp_boot_ap;
>
> if (!hv_root_partition)
> @@ -421,9 +421,9 @@ static void __init ms_hyperv_init_platform(void)
>
>
> if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) {
> - static_branch_enable(&isolation_type_en_snp);
> + static_branch_enable(&isol_type_snp_enlightened);
> } else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) {
> - static_branch_enable(&isolation_type_snp);
> + static_branch_enable(&isol_type_snp_paravisor);
> }
> }
>
> @@ -545,7 +545,7 @@ static void __init ms_hyperv_init_platform(void)
> if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT))
> mark_tsc_unstable("running on Hyper-V");
>
> - if (hv_isolation_type_en_snp())
> + if (hv_isol_type_snp_enlightened())
> hv_sev_init_mem_and_cpu();
>
> hardlockup_detector_disable();
> diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
> index 02b54f85dc60..f86570f3bc1e 100644
> --- a/drivers/hv/connection.c
> +++ b/drivers/hv/connection.c
> @@ -484,7 +484,7 @@ void vmbus_set_event(struct vmbus_channel *channel)
>
> ++channel->sig_events;
>
> - if (hv_isolation_type_snp())
> + if (hv_isol_type_snp_paravisor())
> hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event,
> NULL, sizeof(channel->sig_event));
> else
> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> index ec6e35a0d9bf..3a6e5ecd03d8 100644
> --- a/drivers/hv/hv.c
> +++ b/drivers/hv/hv.c
> @@ -64,7 +64,7 @@ int hv_post_message(union hv_connection_id connection_id,
> aligned_msg->payload_size = payload_size;
> memcpy((void *)aligned_msg->payload, payload, payload_size);
>
> - if (hv_isolation_type_snp())
> + if (hv_isol_type_snp_paravisor())
> status = hv_ghcb_hypercall(HVCALL_POST_MESSAGE,
> (void *)aligned_msg, NULL,
> sizeof(*aligned_msg));
> @@ -109,7 +109,7 @@ int hv_synic_alloc(void)
> * Synic message and event pages are allocated by paravisor.
> * Skip these pages allocation here.
> */
> - if (!hv_isolation_type_snp() && !hv_root_partition) {
> + if (!hv_isol_type_snp_paravisor() && !hv_root_partition) {
> hv_cpu->synic_message_page =
> (void *)get_zeroed_page(GFP_ATOMIC);
> if (hv_cpu->synic_message_page == NULL) {
> @@ -125,7 +125,7 @@ int hv_synic_alloc(void)
> }
> }
>
> - if (hv_isolation_type_en_snp()) {
> + if (hv_isol_type_snp_enlightened()) {
> ret = set_memory_decrypted((unsigned long)
> hv_cpu->synic_message_page, 1);
> if (ret) {
> @@ -174,7 +174,7 @@ void hv_synic_free(void)
> = per_cpu_ptr(hv_context.cpu_context, cpu);
>
> /* It's better to leak the page if the encryption fails. */
> - if (hv_isolation_type_en_snp()) {
> + if (hv_isol_type_snp_enlightened()) {
> if (hv_cpu->synic_message_page) {
> ret = set_memory_encrypted((unsigned long)
> hv_cpu->synic_message_page, 1);
> @@ -221,7 +221,7 @@ void hv_synic_enable_regs(unsigned int cpu)
> simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
> simp.simp_enabled = 1;
>
> - if (hv_isolation_type_snp() || hv_root_partition) {
> + if (hv_isol_type_snp_paravisor() || hv_root_partition) {
> /* Mask out vTOM bit. ioremap_cache() maps decrypted */
> u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) &
> ~ms_hyperv.shared_gpa_boundary;
> @@ -240,7 +240,7 @@ void hv_synic_enable_regs(unsigned int cpu)
> siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
> siefp.siefp_enabled = 1;
>
> - if (hv_isolation_type_snp() || hv_root_partition) {
> + if (hv_isol_type_snp_paravisor() || hv_root_partition) {
> /* Mask out vTOM bit. ioremap_cache() maps decrypted */
> u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) &
> ~ms_hyperv.shared_gpa_boundary;
> @@ -323,7 +323,7 @@ void hv_synic_disable_regs(unsigned int cpu)
> * addresses.
> */
> simp.simp_enabled = 0;
> - if (hv_isolation_type_snp() || hv_root_partition) {
> + if (hv_isol_type_snp_paravisor() || hv_root_partition) {
> iounmap(hv_cpu->synic_message_page);
> hv_cpu->synic_message_page = NULL;
> } else {
> @@ -335,7 +335,7 @@ void hv_synic_disable_regs(unsigned int cpu)
> siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
> siefp.siefp_enabled = 0;
>
> - if (hv_isolation_type_snp() || hv_root_partition) {
> + if (hv_isol_type_snp_paravisor() || hv_root_partition) {
> iounmap(hv_cpu->synic_event_page);
> hv_cpu->synic_event_page = NULL;
> } else {
> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
> index 2d43ba2bc925..e205f85709ad 100644
> --- a/drivers/hv/hv_common.c
> +++ b/drivers/hv/hv_common.c
> @@ -381,7 +381,7 @@ int hv_common_cpu_init(unsigned int cpu)
> *outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE;
> }
>
> - if (hv_isolation_type_en_snp()) {
> + if (hv_isol_type_snp_enlightened()) {
> ret = set_memory_decrypted((unsigned long)*inputarg, pgcount);
> if (ret) {
> kfree(*inputarg);
> @@ -509,17 +509,17 @@ bool __weak hv_is_isolation_supported(void)
> }
> EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
>
> -bool __weak hv_isolation_type_snp(void)
> +bool __weak hv_isol_type_snp_paravisor(void)
> {
> return false;
> }
> -EXPORT_SYMBOL_GPL(hv_isolation_type_snp);
> +EXPORT_SYMBOL_GPL(hv_isol_type_snp_paravisor);
>
> -bool __weak hv_isolation_type_en_snp(void)
> +bool __weak hv_isol_type_snp_enlightened(void)
> {
> return false;
> }
> -EXPORT_SYMBOL_GPL(hv_isolation_type_en_snp);
> +EXPORT_SYMBOL_GPL(hv_isol_type_snp_enlightened);
>
> void __weak hv_setup_vmbus_handler(void (*handler)(void))
> {
> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
> index f73a044ecaa7..b8f2b48b640f 100644
> --- a/include/asm-generic/mshyperv.h
> +++ b/include/asm-generic/mshyperv.h
> @@ -64,7 +64,7 @@ extern void * __percpu *hyperv_pcpu_output_arg;
>
> extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr);
> extern u64 hv_do_fast_hypercall8(u16 control, u64 input8);
> -extern bool hv_isolation_type_snp(void);
> +extern bool hv_isol_type_snp_paravisor(void);
This declaration duplicates the same declaration below in this
same file. One of the two can be deleted entirely instead of
being renamed.
>
> /* Helper functions that provide a consistent pattern for checking Hyper-V hypercall
> status. */
> static inline int hv_result(u64 status)
> @@ -279,7 +279,7 @@ bool hv_is_hyperv_initialized(void);
> bool hv_is_hibernation_supported(void);
> enum hv_isolation_type hv_get_isolation_type(void);
> bool hv_is_isolation_supported(void);
> -bool hv_isolation_type_snp(void);
> +bool hv_isol_type_snp_paravisor(void);
Duplicate of above.
> u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size);
> void hyperv_cleanup(void);
> bool hv_query_ext_cap(u64 cap_query);
> --
> 2.25.1
^ permalink raw reply
* RE: [EXTERNAL] [Patch v3 0/4] RDMA/mana_ib Read Capabilities
From: Ajay Sharma @ 2023-07-28 3:01 UTC (permalink / raw)
To: Long Li, sharmaajay@linuxonhyperv.com, Jason Gunthorpe,
Leon Romanovsky, Dexuan Cui, Wei Liu, David S. Miller,
Eric Dumazet, Jakub Kicinski, Paolo Abeni
Cc: linux-rdma@vger.kernel.org, linux-hyperv@vger.kernel.org,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org, Ajay Sharma
In-Reply-To: <1690402104-29518-1-git-send-email-sharmaajay@linuxonhyperv.com>
+Long
> -----Original Message-----
> From: sharmaajay@linuxonhyperv.com <sharmaajay@linuxonhyperv.com>
> Sent: Wednesday, July 26, 2023 3:08 PM
> To: Jason Gunthorpe <jgg@ziepe.ca>; Leon Romanovsky <leon@kernel.org>;
> Dexuan Cui <decui@microsoft.com>; Wei Liu <wei.liu@kernel.org>; David S.
> Miller <davem@davemloft.net>; Eric Dumazet <edumazet@google.com>;
> Jakub Kicinski <kuba@kernel.org>; Paolo Abeni <pabeni@redhat.com>
> Cc: linux-rdma@vger.kernel.org; linux-hyperv@vger.kernel.org;
> netdev@vger.kernel.org; linux-kernel@vger.kernel.org; Ajay Sharma
> <sharmaajay@microsoft.com>
> Subject: [EXTERNAL] [Patch v3 0/4] RDMA/mana_ib Read Capabilities
>
> From: Ajay Sharma <sharmaajay@microsoft.com>
>
> This patch series introduces some cleanup changes and resource control
> changes. The mana and mana_ib devices are used at common places so a
> consistent naming is introduced. Adapter object container to have a common
> point of object release for resources and query the management software to
> prevent resource overflow.
> It also introduces async channel for management to notify the clients in case of
> errors/info.
>
> Ajay Sharma (4):
> RDMA/mana_ib : Rename all mana_ib_dev type variables to mib_dev
> RDMA/mana_ib : Register Mana IB device with Management SW
> RDMA/mana_ib : Create adapter and Add error eq
> RDMA/mana_ib : Query adapter capabilities
>
> drivers/infiniband/hw/mana/cq.c | 12 +-
> drivers/infiniband/hw/mana/device.c | 72 +++--
> drivers/infiniband/hw/mana/main.c | 282 +++++++++++++-----
> drivers/infiniband/hw/mana/mana_ib.h | 96 +++++-
> drivers/infiniband/hw/mana/mr.c | 42 ++-
> drivers/infiniband/hw/mana/qp.c | 82 ++---
> drivers/infiniband/hw/mana/wq.c | 21 +-
> .../net/ethernet/microsoft/mana/gdma_main.c | 151 ++++++----
> drivers/net/ethernet/microsoft/mana/mana_en.c | 3 +
> include/net/mana/gdma.h | 16 +-
> 10 files changed, 529 insertions(+), 248 deletions(-)
>
> --
> 2.25.1
^ permalink raw reply
* RE: [EXTERNAL] [Patch v3 2/4] RDMA/mana_ib : Register Mana IB device with Management SW
From: Ajay Sharma @ 2023-07-28 3:01 UTC (permalink / raw)
To: Long Li, sharmaajay@linuxonhyperv.com, Jason Gunthorpe,
Leon Romanovsky, Dexuan Cui, Wei Liu, David S. Miller,
Eric Dumazet, Jakub Kicinski, Paolo Abeni
Cc: linux-rdma@vger.kernel.org, linux-hyperv@vger.kernel.org,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org, Ajay Sharma
In-Reply-To: <1690402104-29518-3-git-send-email-sharmaajay@linuxonhyperv.com>
+Long
> -----Original Message-----
> From: sharmaajay@linuxonhyperv.com <sharmaajay@linuxonhyperv.com>
> Sent: Wednesday, July 26, 2023 3:08 PM
> To: Jason Gunthorpe <jgg@ziepe.ca>; Leon Romanovsky <leon@kernel.org>;
> Dexuan Cui <decui@microsoft.com>; Wei Liu <wei.liu@kernel.org>; David S.
> Miller <davem@davemloft.net>; Eric Dumazet <edumazet@google.com>;
> Jakub Kicinski <kuba@kernel.org>; Paolo Abeni <pabeni@redhat.com>
> Cc: linux-rdma@vger.kernel.org; linux-hyperv@vger.kernel.org;
> netdev@vger.kernel.org; linux-kernel@vger.kernel.org; Ajay Sharma
> <sharmaajay@microsoft.com>
> Subject: [EXTERNAL] [Patch v3 2/4] RDMA/mana_ib : Register Mana IB device
> with Management SW
>
> From: Ajay Sharma <sharmaajay@microsoft.com>
>
> Each of the MANA infiniband devices must be registered with the management
> software to request services/resources.
> Register the Mana IB device with Management which would later help get an
> adapter handle.
>
> Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com>
> ---
> drivers/infiniband/hw/mana/device.c | 20 +++++--
> drivers/infiniband/hw/mana/main.c | 58 ++++++-------------
> drivers/infiniband/hw/mana/mana_ib.h | 1 +
> drivers/infiniband/hw/mana/mr.c | 17 ++----
> drivers/infiniband/hw/mana/qp.c | 10 ++--
> .../net/ethernet/microsoft/mana/gdma_main.c | 5 ++
> include/net/mana/gdma.h | 3 +
> 7 files changed, 55 insertions(+), 59 deletions(-)
>
> diff --git a/drivers/infiniband/hw/mana/device.c
> b/drivers/infiniband/hw/mana/device.c
> index 083f27246ba8..ea4c8c8fc10d 100644
> --- a/drivers/infiniband/hw/mana/device.c
> +++ b/drivers/infiniband/hw/mana/device.c
> @@ -78,22 +78,34 @@ static int mana_ib_probe(struct auxiliary_device
> *adev,
> mib_dev->ib_dev.num_comp_vectors = 1;
> mib_dev->ib_dev.dev.parent = mdev->gdma_context->dev;
>
> - ret = ib_register_device(&mib_dev->ib_dev, "mana_%d",
> - mdev->gdma_context->dev);
> + ret = mana_gd_register_device(&mib_dev->gc->mana_ib);
> if (ret) {
> - ib_dealloc_device(&mib_dev->ib_dev);
> - return ret;
> + ibdev_err(&mib_dev->ib_dev, "Failed to register device, ret
> %d",
> + ret);
> + goto free_ib_device;
> }
>
> + ret = ib_register_device(&mib_dev->ib_dev, "mana_%d",
> + mdev->gdma_context->dev);
> + if (ret)
> + goto deregister_device;
> +
> dev_set_drvdata(&adev->dev, mib_dev);
>
> return 0;
> +
> +deregister_device:
> + mana_gd_deregister_device(&mib_dev->gc->mana_ib);
> +free_ib_device:
> + ib_dealloc_device(&mib_dev->ib_dev);
> + return ret;
> }
>
> static void mana_ib_remove(struct auxiliary_device *adev) {
> struct mana_ib_dev *mib_dev = dev_get_drvdata(&adev->dev);
>
> + mana_gd_deregister_device(&mib_dev->gc->mana_ib);
> ib_unregister_device(&mib_dev->ib_dev);
> ib_dealloc_device(&mib_dev->ib_dev);
> }
> diff --git a/drivers/infiniband/hw/mana/main.c
> b/drivers/infiniband/hw/mana/main.c
> index 189e774cdab6..2c4e3c496644 100644
> --- a/drivers/infiniband/hw/mana/main.c
> +++ b/drivers/infiniband/hw/mana/main.c
> @@ -8,7 +8,7 @@
> void mana_ib_uncfg_vport(struct mana_ib_dev *mib_dev, struct mana_ib_pd
> *pd,
> u32 port)
> {
> - struct gdma_dev *gd = mib_dev->gdma_dev;
> + struct gdma_dev *gd = &mib_dev->gc->mana;
> struct mana_port_context *mpc;
> struct net_device *ndev;
> struct mana_context *mc;
> @@ -32,7 +32,7 @@ int mana_ib_cfg_vport(struct mana_ib_dev *mib_dev,
> u32 port,
> struct mana_ib_pd *pd,
> u32 doorbell_id)
> {
> - struct gdma_dev *mdev = mib_dev->gdma_dev;
> + struct gdma_dev *mdev = &mib_dev->gc->mana;
> struct mana_port_context *mpc;
> struct mana_context *mc;
> struct net_device *ndev;
> @@ -81,17 +81,16 @@ int mana_ib_alloc_pd(struct ib_pd *ibpd, struct
> ib_udata *udata)
> struct gdma_create_pd_req req = {};
> enum gdma_pd_flags flags = 0;
> struct mana_ib_dev *mib_dev;
> - struct gdma_dev *mdev;
> +
> int err;
>
> mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> - mdev = mib_dev->gdma_dev;
>
> mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req),
> sizeof(resp));
>
> req.flags = flags;
> - err = mana_gd_send_request(mdev->gdma_context, sizeof(req), &req,
> + err = mana_gd_send_request(mib_dev->gc, sizeof(req), &req,
> sizeof(resp), &resp);
>
> if (err || resp.hdr.status) {
> @@ -121,17 +120,15 @@ int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct
> ib_udata *udata)
> struct gdma_destory_pd_resp resp = {};
> struct gdma_destroy_pd_req req = {};
> struct mana_ib_dev *mib_dev;
> - struct gdma_dev *mdev;
> int err;
>
> mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> - mdev = mib_dev->gdma_dev;
>
> mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_PD, sizeof(req),
> sizeof(resp));
>
> req.pd_handle = pd->pd_handle;
> - err = mana_gd_send_request(mdev->gdma_context, sizeof(req), &req,
> + err = mana_gd_send_request(mib_dev->gc, sizeof(req), &req,
> sizeof(resp), &resp);
>
> if (err || resp.hdr.status) {
> @@ -207,17 +204,13 @@ int mana_ib_alloc_ucontext(struct ib_ucontext
> *ibcontext,
> container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
> struct ib_device *ibdev = ibcontext->device;
> struct mana_ib_dev *mib_dev;
> - struct gdma_context *gc;
> - struct gdma_dev *dev;
> int doorbell_page;
> int ret;
>
> mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> - dev = mib_dev->gdma_dev;
> - gc = dev->gdma_context;
>
> /* Allocate a doorbell page index */
> - ret = mana_gd_allocate_doorbell_page(gc, &doorbell_page);
> + ret = mana_gd_allocate_doorbell_page(mib_dev->gc,
> &doorbell_page);
> if (ret) {
> ibdev_dbg(ibdev, "Failed to allocate doorbell page %d\n", ret);
> return ret;
> @@ -236,20 +229,17 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext
> *ibcontext)
> container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
> struct ib_device *ibdev = ibcontext->device;
> struct mana_ib_dev *mib_dev;
> - struct gdma_context *gc;
> int ret;
>
> mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> - gc = mib_dev->gdma_dev->gdma_context;
>
> - ret = mana_gd_destroy_doorbell_page(gc, mana_ucontext->doorbell);
> + ret = mana_gd_destroy_doorbell_page(mib_dev->gc,
> +mana_ucontext->doorbell);
> if (ret)
> ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret);
> }
>
> static int
> mana_ib_gd_first_dma_region(struct mana_ib_dev *mib_dev,
> - struct gdma_context *gc,
> struct gdma_create_dma_region_req *create_req,
> size_t num_pages, mana_handle_t *gdma_region,
> u32 expected_status)
> @@ -262,7 +252,7 @@ mana_ib_gd_first_dma_region(struct mana_ib_dev
> *mib_dev,
> struct_size(create_req, page_addr_list, num_pages);
> create_req->page_addr_list_len = num_pages;
>
> - err = mana_gd_send_request(gc, create_req_msg_size, create_req,
> + err = mana_gd_send_request(mib_dev->gc, create_req_msg_size,
> +create_req,
> sizeof(create_resp), &create_resp);
> if (err || create_resp.hdr.status != expected_status) {
> ibdev_dbg(&mib_dev->ib_dev,
> @@ -282,7 +272,7 @@ mana_ib_gd_first_dma_region(struct mana_ib_dev
> *mib_dev, }
>
> static int
> -mana_ib_gd_add_dma_region(struct mana_ib_dev *mib_dev, struct
> gdma_context *gc,
> +mana_ib_gd_add_dma_region(struct mana_ib_dev *mib_dev,
> struct gdma_dma_region_add_pages_req *add_req,
> unsigned int num_pages, u32 expected_status) { @@
> -295,7 +285,7 @@ mana_ib_gd_add_dma_region(struct mana_ib_dev
> *mib_dev, struct gdma_context *gc,
> add_req_msg_size, sizeof(add_resp));
> add_req->page_addr_list_len = num_pages;
>
> - err = mana_gd_send_request(gc, add_req_msg_size, add_req,
> + err = mana_gd_send_request(mib_dev->gc, add_req_msg_size,
> add_req,
> sizeof(add_resp), &add_resp);
> if (err || add_resp.hdr.status != expected_status) {
> ibdev_dbg(&mib_dev->ib_dev,
> @@ -323,18 +313,14 @@ int mana_ib_gd_create_dma_region(struct
> mana_ib_dev *mib_dev,
> struct ib_block_iter biter;
> size_t max_pgs_add_cmd = 0;
> size_t max_pgs_create_cmd;
> - struct gdma_context *gc;
> size_t num_pages_total;
> - struct gdma_dev *mdev;
> unsigned long page_sz;
> unsigned int tail = 0;
> u64 *page_addr_list;
> void *request_buf;
> int err;
>
> - mdev = mib_dev->gdma_dev;
> - gc = mdev->gdma_context;
> - hwc = gc->hwc.driver_data;
> + hwc = mib_dev->gc->hwc.driver_data;
>
> /* Hardware requires dma region to align to chosen page size */
> page_sz = ib_umem_find_best_pgsz(umem, PAGE_SZ_BM, 0); @@ -
> 388,7 +374,7 @@ int mana_ib_gd_create_dma_region(struct mana_ib_dev
> *mib_dev,
>
> if (!num_pages_processed) {
> /* First create message */
> - err = mana_ib_gd_first_dma_region(mib_dev, gc,
> create_req,
> + err = mana_ib_gd_first_dma_region(mib_dev,
> create_req,
> tail, gdma_region,
> expected_status);
> if (err)
> @@ -403,7 +389,7 @@ int mana_ib_gd_create_dma_region(struct
> mana_ib_dev *mib_dev,
> page_addr_list = add_req->page_addr_list;
> } else {
> /* Subsequent create messages */
> - err = mana_ib_gd_add_dma_region(mib_dev, gc,
> add_req, tail,
> + err = mana_ib_gd_add_dma_region(mib_dev,
> add_req, tail,
> expected_status);
> if (err)
> break;
> @@ -429,13 +415,9 @@ int mana_ib_gd_create_dma_region(struct
> mana_ib_dev *mib_dev,
>
> int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *mib_dev, u64
> gdma_region) {
> - struct gdma_dev *mdev = mib_dev->gdma_dev;
> - struct gdma_context *gc;
> -
> - gc = mdev->gdma_context;
> ibdev_dbg(&mib_dev->ib_dev, "destroy dma region 0x%llx\n",
> gdma_region);
>
> - return mana_gd_destroy_dma_region(gc, gdma_region);
> + return mana_gd_destroy_dma_region(mib_dev->gc, gdma_region);
> }
>
> int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct
> *vma) @@ -444,13 +426,11 @@ int mana_ib_mmap(struct ib_ucontext
> *ibcontext, struct vm_area_struct *vma)
> container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
> struct ib_device *ibdev = ibcontext->device;
> struct mana_ib_dev *mib_dev;
> - struct gdma_context *gc;
> phys_addr_t pfn;
> pgprot_t prot;
> int ret;
>
> mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> - gc = mib_dev->gdma_dev->gdma_context;
>
> if (vma->vm_pgoff != 0) {
> ibdev_dbg(ibdev, "Unexpected vm_pgoff %lu\n", vma-
> >vm_pgoff); @@ -458,18 +438,18 @@ int mana_ib_mmap(struct ib_ucontext
> *ibcontext, struct vm_area_struct *vma)
> }
>
> /* Map to the page indexed by ucontext->doorbell */
> - pfn = (gc->phys_db_page_base +
> - gc->db_page_size * mana_ucontext->doorbell) >>
> + pfn = (mib_dev->gc->phys_db_page_base +
> + mib_dev->gc->db_page_size * mana_ucontext->doorbell) >>
> PAGE_SHIFT;
> prot = pgprot_writecombine(vma->vm_page_prot);
>
> - ret = rdma_user_mmap_io(ibcontext, vma, pfn, gc->db_page_size,
> prot,
> - NULL);
> + ret = rdma_user_mmap_io(ibcontext, vma, pfn, mib_dev->gc-
> >db_page_size,
> + prot, NULL);
> if (ret)
> ibdev_dbg(ibdev, "can't rdma_user_mmap_io ret %d\n", ret);
> else
> ibdev_dbg(ibdev, "mapped I/O pfn 0x%llx page_size %u, ret
> %d\n",
> - pfn, gc->db_page_size, ret);
> + pfn, mib_dev->gc->db_page_size, ret);
>
> return ret;
> }
> diff --git a/drivers/infiniband/hw/mana/mana_ib.h
> b/drivers/infiniband/hw/mana/mana_ib.h
> index ee4efd0af278..3a2ba6b96f15 100644
> --- a/drivers/infiniband/hw/mana/mana_ib.h
> +++ b/drivers/infiniband/hw/mana/mana_ib.h
> @@ -30,6 +30,7 @@
> struct mana_ib_dev {
> struct ib_device ib_dev;
> struct gdma_dev *gdma_dev;
> + struct gdma_context *gc;
> };
>
> struct mana_ib_wq {
> diff --git a/drivers/infiniband/hw/mana/mr.c
> b/drivers/infiniband/hw/mana/mr.c index f6a53906204d..3106d1bce837
> 100644
> --- a/drivers/infiniband/hw/mana/mr.c
> +++ b/drivers/infiniband/hw/mana/mr.c
> @@ -29,13 +29,10 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev
> *mib_dev,
> struct mana_ib_mr *mr,
> struct gdma_create_mr_params *mr_params)
> {
> - struct gdma_dev *mdev = mib_dev->gdma_dev;
> struct gdma_create_mr_response resp = {};
> struct gdma_create_mr_request req = {};
> - struct gdma_context *gc;
> int err;
>
> - gc = mdev->gdma_context;
>
> mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_MR, sizeof(req),
> sizeof(resp));
> @@ -56,7 +53,8 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev
> *mib_dev,
> return -EINVAL;
> }
>
> - err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp),
> &resp);
> + err = mana_gd_send_request(mib_dev->gc, sizeof(req), &req,
> + sizeof(resp), &resp);
>
> if (err || resp.hdr.status) {
> ibdev_dbg(&mib_dev->ib_dev, "Failed to create mr %d, %u",
> err, @@ -77,22 +75,19 @@ static int mana_ib_gd_create_mr(struct
> mana_ib_dev *mib_dev, static int mana_ib_gd_destroy_mr(struct
> mana_ib_dev *mib_dev, u64 mr_handle) {
> struct gdma_destroy_mr_response resp = {};
> - struct gdma_dev *mdev = mib_dev->gdma_dev;
> struct gdma_destroy_mr_request req = {};
> - struct gdma_context *gc;
> int err;
>
> - gc = mdev->gdma_context;
> -
> mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_MR, sizeof(req),
> sizeof(resp));
>
> req.mr_handle = mr_handle;
>
> - err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp),
> &resp);
> + err = mana_gd_send_request(mib_dev->gc, sizeof(req), &req,
> + sizeof(resp), &resp);
> if (err || resp.hdr.status) {
> - dev_err(gc->dev, "Failed to destroy MR: %d, 0x%x\n", err,
> - resp.hdr.status);
> + dev_err(mib_dev->gc->dev, "Failed to destroy MR: %d,
> 0x%x\n",
> + err, resp.hdr.status);
> if (!err)
> err = -EPROTO;
> return err;
> diff --git a/drivers/infiniband/hw/mana/qp.c
> b/drivers/infiniband/hw/mana/qp.c index 2e3a57123ed7..874cfd794825
> 100644
> --- a/drivers/infiniband/hw/mana/qp.c
> +++ b/drivers/infiniband/hw/mana/qp.c
> @@ -21,7 +21,7 @@ static int mana_ib_cfg_vport_steering(struct
> mana_ib_dev *mib_dev,
> u32 req_buf_size;
> int i, err;
>
> - mdev = mib_dev->gdma_dev;
> + mdev = &mib_dev->gc->mana;
> gc = mdev->gdma_context;
>
> req_buf_size =
> @@ -102,7 +102,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp,
> struct ib_pd *pd,
> struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl;
> struct mana_ib_create_qp_rss_resp resp = {};
> struct mana_ib_create_qp_rss ucmd = {};
> - struct gdma_dev *gd = mib_dev->gdma_dev;
> + struct gdma_dev *gd = &mib_dev->gc->mana;
> mana_handle_t *mana_ind_table;
> struct mana_port_context *mpc;
> struct mana_context *mc;
> @@ -267,7 +267,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp,
> struct ib_pd *ibpd,
> rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
> ibucontext);
> struct mana_ib_create_qp_resp resp = {};
> - struct gdma_dev *gd = mib_dev->gdma_dev;
> + struct gdma_dev *gd = &mib_dev->gc->mana;
> struct mana_ib_create_qp ucmd = {};
> struct mana_obj_spec wq_spec = {};
> struct mana_obj_spec cq_spec = {};
> @@ -437,7 +437,7 @@ static int mana_ib_destroy_qp_rss(struct mana_ib_qp
> *qp, {
> struct mana_ib_dev *mib_dev =
> container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
> - struct gdma_dev *gd = mib_dev->gdma_dev;
> + struct gdma_dev *gd = &mib_dev->gc->mana;
> struct mana_port_context *mpc;
> struct mana_context *mc;
> struct net_device *ndev;
> @@ -464,7 +464,7 @@ static int mana_ib_destroy_qp_raw(struct
> mana_ib_qp *qp, struct ib_udata *udata) {
> struct mana_ib_dev *mib_dev =
> container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
> - struct gdma_dev *gd = mib_dev->gdma_dev;
> + struct gdma_dev *gd = &mib_dev->gc->mana;
> struct ib_pd *ibpd = qp->ibqp.pd;
> struct mana_port_context *mpc;
> struct mana_context *mc;
> diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> index 8f3f78b68592..9fa7a2d6c2b2 100644
> --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> @@ -139,6 +139,9 @@ static int mana_gd_detect_devices(struct pci_dev
> *pdev)
> if (dev_type == GDMA_DEVICE_MANA) {
> gc->mana.gdma_context = gc;
> gc->mana.dev_id = dev;
> + } else if (dev_type == GDMA_DEVICE_MANA_IB) {
> + gc->mana_ib.dev_id = dev;
> + gc->mana_ib.gdma_context = gc;
> }
> }
>
> @@ -940,6 +943,7 @@ int mana_gd_register_device(struct gdma_dev *gd)
>
> return 0;
> }
> +EXPORT_SYMBOL(mana_gd_register_device);
>
> int mana_gd_deregister_device(struct gdma_dev *gd) { @@ -970,6 +974,7
> @@ int mana_gd_deregister_device(struct gdma_dev *gd)
>
> return err;
> }
> +EXPORT_SYMBOL(mana_gd_deregister_device);
>
> u32 mana_gd_wq_avail_space(struct gdma_queue *wq) { diff --git
> a/include/net/mana/gdma.h b/include/net/mana/gdma.h index
> 96c120160f15..e2b212dd722b 100644
> --- a/include/net/mana/gdma.h
> +++ b/include/net/mana/gdma.h
> @@ -63,6 +63,7 @@ enum {
> GDMA_DEVICE_NONE = 0,
> GDMA_DEVICE_HWC = 1,
> GDMA_DEVICE_MANA = 2,
> + GDMA_DEVICE_MANA_IB = 3,
> };
>
> struct gdma_resource {
> @@ -384,6 +385,8 @@ struct gdma_context {
>
> /* Azure network adapter */
> struct gdma_dev mana;
> + /* rdma device */
> + struct gdma_dev mana_ib;
> };
>
> #define MAX_NUM_GDMA_DEVICES 4
> --
> 2.25.1
^ permalink raw reply
* RE: [EXTERNAL] [Patch v3 1/4] RDMA/mana_ib : Rename all mana_ib_dev type variables to mib_dev
From: Ajay Sharma @ 2023-07-28 3:02 UTC (permalink / raw)
To: Long Li, sharmaajay@linuxonhyperv.com, Jason Gunthorpe,
Leon Romanovsky, Dexuan Cui, Wei Liu, David S. Miller,
Eric Dumazet, Jakub Kicinski, Paolo Abeni
Cc: linux-rdma@vger.kernel.org, linux-hyperv@vger.kernel.org,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org, Ajay Sharma
In-Reply-To: <1690402104-29518-2-git-send-email-sharmaajay@linuxonhyperv.com>
+Long
> -----Original Message-----
> From: sharmaajay@linuxonhyperv.com <sharmaajay@linuxonhyperv.com>
> Sent: Wednesday, July 26, 2023 3:08 PM
> To: Jason Gunthorpe <jgg@ziepe.ca>; Leon Romanovsky <leon@kernel.org>;
> Dexuan Cui <decui@microsoft.com>; Wei Liu <wei.liu@kernel.org>; David S.
> Miller <davem@davemloft.net>; Eric Dumazet <edumazet@google.com>;
> Jakub Kicinski <kuba@kernel.org>; Paolo Abeni <pabeni@redhat.com>
> Cc: linux-rdma@vger.kernel.org; linux-hyperv@vger.kernel.org;
> netdev@vger.kernel.org; linux-kernel@vger.kernel.org; Ajay Sharma
> <sharmaajay@microsoft.com>
> Subject: [EXTERNAL] [Patch v3 1/4] RDMA/mana_ib : Rename all mana_ib_dev
> type variables to mib_dev
>
> From: Ajay Sharma <sharmaajay@microsoft.com>
>
> This patch does not introduce any functional changes. It creates naming
> convention to distinguish especially when used in the same function.Renaming
> all mana_ib_dev type variables to mib_dev to have clean separation between
> eth dev and ibdev variables.
>
> Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com>
> ---
> drivers/infiniband/hw/mana/cq.c | 12 ++--
> drivers/infiniband/hw/mana/device.c | 34 +++++------
> drivers/infiniband/hw/mana/main.c | 87 ++++++++++++++--------------
> drivers/infiniband/hw/mana/mana_ib.h | 9 +--
> drivers/infiniband/hw/mana/mr.c | 29 +++++-----
> drivers/infiniband/hw/mana/qp.c | 82 +++++++++++++-------------
> drivers/infiniband/hw/mana/wq.c | 21 +++----
> 7 files changed, 140 insertions(+), 134 deletions(-)
>
> diff --git a/drivers/infiniband/hw/mana/cq.c
> b/drivers/infiniband/hw/mana/cq.c index d141cab8a1e6..1aed4e6360ba
> 100644
> --- a/drivers/infiniband/hw/mana/cq.c
> +++ b/drivers/infiniband/hw/mana/cq.c
> @@ -11,10 +11,10 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct
> ib_cq_init_attr *attr,
> struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
> struct ib_device *ibdev = ibcq->device;
> struct mana_ib_create_cq ucmd = {};
> - struct mana_ib_dev *mdev;
> + struct mana_ib_dev *mib_dev;
> int err;
>
> - mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> + mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
>
> if (udata->inlen < sizeof(ucmd))
> return -EINVAL;
> @@ -41,7 +41,7 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct
> ib_cq_init_attr *attr,
> return err;
> }
>
> - err = mana_ib_gd_create_dma_region(mdev, cq->umem, &cq-
> >gdma_region);
> + err = mana_ib_gd_create_dma_region(mib_dev, cq->umem,
> +&cq->gdma_region);
> if (err) {
> ibdev_dbg(ibdev,
> "Failed to create dma region for create cq, %d\n",
> @@ -68,11 +68,11 @@ int mana_ib_destroy_cq(struct ib_cq *ibcq, struct
> ib_udata *udata) {
> struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
> struct ib_device *ibdev = ibcq->device;
> - struct mana_ib_dev *mdev;
> + struct mana_ib_dev *mib_dev;
>
> - mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> + mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
>
> - mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
> + mana_ib_gd_destroy_dma_region(mib_dev, cq->gdma_region);
> ib_umem_release(cq->umem);
>
> return 0;
> diff --git a/drivers/infiniband/hw/mana/device.c
> b/drivers/infiniband/hw/mana/device.c
> index d4541b8707e4..083f27246ba8 100644
> --- a/drivers/infiniband/hw/mana/device.c
> +++ b/drivers/infiniband/hw/mana/device.c
> @@ -51,51 +51,51 @@ static int mana_ib_probe(struct auxiliary_device
> *adev, {
> struct mana_adev *madev = container_of(adev, struct mana_adev,
> adev);
> struct gdma_dev *mdev = madev->mdev;
> + struct mana_ib_dev *mib_dev;
> struct mana_context *mc;
> - struct mana_ib_dev *dev;
> int ret;
>
> mc = mdev->driver_data;
>
> - dev = ib_alloc_device(mana_ib_dev, ib_dev);
> - if (!dev)
> + mib_dev = ib_alloc_device(mana_ib_dev, ib_dev);
> + if (!mib_dev)
> return -ENOMEM;
>
> - ib_set_device_ops(&dev->ib_dev, &mana_ib_dev_ops);
> + ib_set_device_ops(&mib_dev->ib_dev, &mana_ib_dev_ops);
>
> - dev->ib_dev.phys_port_cnt = mc->num_ports;
> + mib_dev->ib_dev.phys_port_cnt = mc->num_ports;
>
> - ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
> - mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
> + ibdev_dbg(&mib_dev->ib_dev, "mdev=%p id=%d num_ports=%d\n",
> mdev,
> + mdev->dev_id.as_uint32, mib_dev->ib_dev.phys_port_cnt);
>
> - dev->gdma_dev = mdev;
> - dev->ib_dev.node_type = RDMA_NODE_IB_CA;
> + mib_dev->gdma_dev = mdev;
> + mib_dev->ib_dev.node_type = RDMA_NODE_IB_CA;
>
> /*
> * num_comp_vectors needs to set to the max MSIX index
> * when interrupts and event queues are implemented
> */
> - dev->ib_dev.num_comp_vectors = 1;
> - dev->ib_dev.dev.parent = mdev->gdma_context->dev;
> + mib_dev->ib_dev.num_comp_vectors = 1;
> + mib_dev->ib_dev.dev.parent = mdev->gdma_context->dev;
>
> - ret = ib_register_device(&dev->ib_dev, "mana_%d",
> + ret = ib_register_device(&mib_dev->ib_dev, "mana_%d",
> mdev->gdma_context->dev);
> if (ret) {
> - ib_dealloc_device(&dev->ib_dev);
> + ib_dealloc_device(&mib_dev->ib_dev);
> return ret;
> }
>
> - dev_set_drvdata(&adev->dev, dev);
> + dev_set_drvdata(&adev->dev, mib_dev);
>
> return 0;
> }
>
> static void mana_ib_remove(struct auxiliary_device *adev) {
> - struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev);
> + struct mana_ib_dev *mib_dev = dev_get_drvdata(&adev->dev);
>
> - ib_unregister_device(&dev->ib_dev);
> - ib_dealloc_device(&dev->ib_dev);
> + ib_unregister_device(&mib_dev->ib_dev);
> + ib_dealloc_device(&mib_dev->ib_dev);
> }
>
> static const struct auxiliary_device_id mana_id_table[] = { diff --git
> a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
> index 7be4c3adb4e2..189e774cdab6 100644
> --- a/drivers/infiniband/hw/mana/main.c
> +++ b/drivers/infiniband/hw/mana/main.c
> @@ -5,10 +5,10 @@
>
> #include "mana_ib.h"
>
> -void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
> +void mana_ib_uncfg_vport(struct mana_ib_dev *mib_dev, struct mana_ib_pd
> +*pd,
> u32 port)
> {
> - struct gdma_dev *gd = dev->gdma_dev;
> + struct gdma_dev *gd = mib_dev->gdma_dev;
> struct mana_port_context *mpc;
> struct net_device *ndev;
> struct mana_context *mc;
> @@ -28,10 +28,11 @@ void mana_ib_uncfg_vport(struct mana_ib_dev *dev,
> struct mana_ib_pd *pd,
> mutex_unlock(&pd->vport_mutex);
> }
>
> -int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port, struct mana_ib_pd
> *pd,
> +int mana_ib_cfg_vport(struct mana_ib_dev *mib_dev, u32 port,
> + struct mana_ib_pd *pd,
> u32 doorbell_id)
> {
> - struct gdma_dev *mdev = dev->gdma_dev;
> + struct gdma_dev *mdev = mib_dev->gdma_dev;
> struct mana_port_context *mpc;
> struct mana_context *mc;
> struct net_device *ndev;
> @@ -45,7 +46,7 @@ int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32
> port, struct mana_ib_pd *pd,
>
> pd->vport_use_count++;
> if (pd->vport_use_count > 1) {
> - ibdev_dbg(&dev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Skip as this PD is already configured vport\n");
> mutex_unlock(&pd->vport_mutex);
> return 0;
> @@ -56,7 +57,8 @@ int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32
> port, struct mana_ib_pd *pd,
> pd->vport_use_count--;
> mutex_unlock(&pd->vport_mutex);
>
> - ibdev_dbg(&dev->ib_dev, "Failed to configure vPort %d\n",
> err);
> + ibdev_dbg(&mib_dev->ib_dev, "Failed to configure vPort
> %d\n",
> + err);
> return err;
> }
>
> @@ -65,7 +67,7 @@ int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32
> port, struct mana_ib_pd *pd,
> pd->tx_shortform_allowed = mpc->tx_shortform_allowed;
> pd->tx_vp_offset = mpc->tx_vp_offset;
>
> - ibdev_dbg(&dev->ib_dev, "vport handle %llx pdid %x doorbell_id
> %x\n",
> + ibdev_dbg(&mib_dev->ib_dev, "vport handle %llx pdid %x doorbell_id
> +%x\n",
> mpc->port_handle, pd->pdn, doorbell_id);
>
> return 0;
> @@ -78,12 +80,12 @@ int mana_ib_alloc_pd(struct ib_pd *ibpd, struct
> ib_udata *udata)
> struct gdma_create_pd_resp resp = {};
> struct gdma_create_pd_req req = {};
> enum gdma_pd_flags flags = 0;
> - struct mana_ib_dev *dev;
> + struct mana_ib_dev *mib_dev;
> struct gdma_dev *mdev;
> int err;
>
> - dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> - mdev = dev->gdma_dev;
> + mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> + mdev = mib_dev->gdma_dev;
>
> mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req),
> sizeof(resp));
> @@ -93,7 +95,7 @@ int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata
> *udata)
> sizeof(resp), &resp);
>
> if (err || resp.hdr.status) {
> - ibdev_dbg(&dev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to get pd_id err %d status %u\n", err,
> resp.hdr.status);
> if (!err)
> @@ -104,7 +106,7 @@ int mana_ib_alloc_pd(struct ib_pd *ibpd, struct
> ib_udata *udata)
>
> pd->pd_handle = resp.pd_handle;
> pd->pdn = resp.pd_id;
> - ibdev_dbg(&dev->ib_dev, "pd_handle 0x%llx pd_id %d\n",
> + ibdev_dbg(&mib_dev->ib_dev, "pd_handle 0x%llx pd_id %d\n",
> pd->pd_handle, pd->pdn);
>
> mutex_init(&pd->vport_mutex);
> @@ -118,12 +120,12 @@ int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct
> ib_udata *udata)
> struct ib_device *ibdev = ibpd->device;
> struct gdma_destory_pd_resp resp = {};
> struct gdma_destroy_pd_req req = {};
> - struct mana_ib_dev *dev;
> + struct mana_ib_dev *mib_dev;
> struct gdma_dev *mdev;
> int err;
>
> - dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> - mdev = dev->gdma_dev;
> + mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> + mdev = mib_dev->gdma_dev;
>
> mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_PD, sizeof(req),
> sizeof(resp));
> @@ -133,7 +135,7 @@ int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct
> ib_udata *udata)
> sizeof(resp), &resp);
>
> if (err || resp.hdr.status) {
> - ibdev_dbg(&dev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to destroy pd_handle 0x%llx err %d status
> %u",
> pd->pd_handle, err, resp.hdr.status);
> if (!err)
> @@ -204,14 +206,14 @@ int mana_ib_alloc_ucontext(struct ib_ucontext
> *ibcontext,
> struct mana_ib_ucontext *ucontext =
> container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
> struct ib_device *ibdev = ibcontext->device;
> - struct mana_ib_dev *mdev;
> + struct mana_ib_dev *mib_dev;
> struct gdma_context *gc;
> struct gdma_dev *dev;
> int doorbell_page;
> int ret;
>
> - mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> - dev = mdev->gdma_dev;
> + mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> + dev = mib_dev->gdma_dev;
> gc = dev->gdma_context;
>
> /* Allocate a doorbell page index */
> @@ -233,12 +235,12 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext
> *ibcontext)
> struct mana_ib_ucontext *mana_ucontext =
> container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
> struct ib_device *ibdev = ibcontext->device;
> - struct mana_ib_dev *mdev;
> + struct mana_ib_dev *mib_dev;
> struct gdma_context *gc;
> int ret;
>
> - mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> - gc = mdev->gdma_dev->gdma_context;
> + mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> + gc = mib_dev->gdma_dev->gdma_context;
>
> ret = mana_gd_destroy_doorbell_page(gc, mana_ucontext->doorbell);
> if (ret)
> @@ -246,7 +248,7 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext
> *ibcontext) }
>
> static int
> -mana_ib_gd_first_dma_region(struct mana_ib_dev *dev,
> +mana_ib_gd_first_dma_region(struct mana_ib_dev *mib_dev,
> struct gdma_context *gc,
> struct gdma_create_dma_region_req *create_req,
> size_t num_pages, mana_handle_t *gdma_region,
> @@ -263,7 +265,7 @@ mana_ib_gd_first_dma_region(struct mana_ib_dev
> *dev,
> err = mana_gd_send_request(gc, create_req_msg_size, create_req,
> sizeof(create_resp), &create_resp);
> if (err || create_resp.hdr.status != expected_status) {
> - ibdev_dbg(&dev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to create DMA region: %d, 0x%x\n",
> err, create_resp.hdr.status);
> if (!err)
> @@ -273,14 +275,14 @@ mana_ib_gd_first_dma_region(struct mana_ib_dev
> *dev,
> }
>
> *gdma_region = create_resp.dma_region_handle;
> - ibdev_dbg(&dev->ib_dev, "Created DMA region handle 0x%llx\n",
> + ibdev_dbg(&mib_dev->ib_dev, "Created DMA region handle 0x%llx\n",
> *gdma_region);
>
> return 0;
> }
>
> static int
> -mana_ib_gd_add_dma_region(struct mana_ib_dev *dev, struct gdma_context
> *gc,
> +mana_ib_gd_add_dma_region(struct mana_ib_dev *mib_dev, struct
> +gdma_context *gc,
> struct gdma_dma_region_add_pages_req *add_req,
> unsigned int num_pages, u32 expected_status) { @@
> -296,7 +298,7 @@ mana_ib_gd_add_dma_region(struct mana_ib_dev *dev,
> struct gdma_context *gc,
> err = mana_gd_send_request(gc, add_req_msg_size, add_req,
> sizeof(add_resp), &add_resp);
> if (err || add_resp.hdr.status != expected_status) {
> - ibdev_dbg(&dev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to create DMA region: %d, 0x%x\n",
> err, add_resp.hdr.status);
>
> @@ -309,7 +311,8 @@ mana_ib_gd_add_dma_region(struct mana_ib_dev
> *dev, struct gdma_context *gc,
> return 0;
> }
>
> -int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct
> ib_umem *umem,
> +int mana_ib_gd_create_dma_region(struct mana_ib_dev *mib_dev,
> + struct ib_umem *umem,
> mana_handle_t *gdma_region)
> {
> struct gdma_dma_region_add_pages_req *add_req = NULL; @@ -
> 329,14 +332,14 @@ int mana_ib_gd_create_dma_region(struct mana_ib_dev
> *dev, struct ib_umem *umem,
> void *request_buf;
> int err;
>
> - mdev = dev->gdma_dev;
> + mdev = mib_dev->gdma_dev;
> gc = mdev->gdma_context;
> hwc = gc->hwc.driver_data;
>
> /* Hardware requires dma region to align to chosen page size */
> page_sz = ib_umem_find_best_pgsz(umem, PAGE_SZ_BM, 0);
> if (!page_sz) {
> - ibdev_dbg(&dev->ib_dev, "failed to find page size.\n");
> + ibdev_dbg(&mib_dev->ib_dev, "failed to find page size.\n");
> return -ENOMEM;
> }
> num_pages_total = ib_umem_num_dma_blocks(umem, page_sz);
> @@ -362,13 +365,13 @@ int mana_ib_gd_create_dma_region(struct
> mana_ib_dev *dev, struct ib_umem *umem,
> create_req->gdma_page_type = order_base_2(page_sz) - PAGE_SHIFT;
> create_req->page_count = num_pages_total;
>
> - ibdev_dbg(&dev->ib_dev, "size_dma_region %lu num_pages_total
> %lu\n",
> + ibdev_dbg(&mib_dev->ib_dev, "size_dma_region %lu
> num_pages_total
> +%lu\n",
> umem->length, num_pages_total);
>
> - ibdev_dbg(&dev->ib_dev, "page_sz %lu offset_in_page %u\n",
> + ibdev_dbg(&mib_dev->ib_dev, "page_sz %lu offset_in_page %u\n",
> page_sz, create_req->offset_in_page);
>
> - ibdev_dbg(&dev->ib_dev, "num_pages_to_handle %lu,
> gdma_page_type %u",
> + ibdev_dbg(&mib_dev->ib_dev, "num_pages_to_handle %lu,
> gdma_page_type
> +%u",
> num_pages_to_handle, create_req->gdma_page_type);
>
> page_addr_list = create_req->page_addr_list; @@ -385,7 +388,7 @@
> int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct
> ib_umem *umem,
>
> if (!num_pages_processed) {
> /* First create message */
> - err = mana_ib_gd_first_dma_region(dev, gc,
> create_req,
> + err = mana_ib_gd_first_dma_region(mib_dev, gc,
> create_req,
> tail, gdma_region,
> expected_status);
> if (err)
> @@ -400,7 +403,7 @@ int mana_ib_gd_create_dma_region(struct
> mana_ib_dev *dev, struct ib_umem *umem,
> page_addr_list = add_req->page_addr_list;
> } else {
> /* Subsequent create messages */
> - err = mana_ib_gd_add_dma_region(dev, gc, add_req,
> tail,
> + err = mana_ib_gd_add_dma_region(mib_dev, gc,
> add_req, tail,
> expected_status);
> if (err)
> break;
> @@ -417,20 +420,20 @@ int mana_ib_gd_create_dma_region(struct
> mana_ib_dev *dev, struct ib_umem *umem,
> }
>
> if (err)
> - mana_ib_gd_destroy_dma_region(dev, *gdma_region);
> + mana_ib_gd_destroy_dma_region(mib_dev, *gdma_region);
>
> out:
> kfree(request_buf);
> return err;
> }
>
> -int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev, u64
> gdma_region)
> +int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *mib_dev, u64
> +gdma_region)
> {
> - struct gdma_dev *mdev = dev->gdma_dev;
> + struct gdma_dev *mdev = mib_dev->gdma_dev;
> struct gdma_context *gc;
>
> gc = mdev->gdma_context;
> - ibdev_dbg(&dev->ib_dev, "destroy dma region 0x%llx\n",
> gdma_region);
> + ibdev_dbg(&mib_dev->ib_dev, "destroy dma region 0x%llx\n",
> +gdma_region);
>
> return mana_gd_destroy_dma_region(gc, gdma_region); } @@ -
> 440,14 +443,14 @@ int mana_ib_mmap(struct ib_ucontext *ibcontext, struct
> vm_area_struct *vma)
> struct mana_ib_ucontext *mana_ucontext =
> container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
> struct ib_device *ibdev = ibcontext->device;
> - struct mana_ib_dev *mdev;
> + struct mana_ib_dev *mib_dev;
> struct gdma_context *gc;
> phys_addr_t pfn;
> pgprot_t prot;
> int ret;
>
> - mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> - gc = mdev->gdma_dev->gdma_context;
> + mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> + gc = mib_dev->gdma_dev->gdma_context;
>
> if (vma->vm_pgoff != 0) {
> ibdev_dbg(ibdev, "Unexpected vm_pgoff %lu\n", vma-
> >vm_pgoff); diff --git a/drivers/infiniband/hw/mana/mana_ib.h
> b/drivers/infiniband/hw/mana/mana_ib.h
> index 502cc8672eef..ee4efd0af278 100644
> --- a/drivers/infiniband/hw/mana/mana_ib.h
> +++ b/drivers/infiniband/hw/mana/mana_ib.h
> @@ -92,10 +92,11 @@ struct mana_ib_rwq_ind_table {
> struct ib_rwq_ind_table ib_ind_table;
> };
>
> -int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct
> ib_umem *umem,
> +int mana_ib_gd_create_dma_region(struct mana_ib_dev *mib_dev,
> + struct ib_umem *umem,
> mana_handle_t *gdma_region);
>
> -int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev,
> +int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *mib_dev,
> mana_handle_t gdma_region);
>
> struct ib_wq *mana_ib_create_wq(struct ib_pd *pd, @@ -129,9 +130,9 @@
> int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
>
> int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
>
> -int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port_id,
> +int mana_ib_cfg_vport(struct mana_ib_dev *mib_dev, u32 port_id,
> struct mana_ib_pd *pd, u32 doorbell_id); -void
> mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
> +void mana_ib_uncfg_vport(struct mana_ib_dev *mib_dev, struct mana_ib_pd
> +*pd,
> u32 port);
>
> int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
> diff --git a/drivers/infiniband/hw/mana/mr.c
> b/drivers/infiniband/hw/mana/mr.c index 351207c60eb6..f6a53906204d
> 100644
> --- a/drivers/infiniband/hw/mana/mr.c
> +++ b/drivers/infiniband/hw/mana/mr.c
> @@ -25,12 +25,13 @@ mana_ib_verbs_to_gdma_access_flags(int
> access_flags)
> return flags;
> }
>
> -static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct
> mana_ib_mr *mr,
> +static int mana_ib_gd_create_mr(struct mana_ib_dev *mib_dev,
> + struct mana_ib_mr *mr,
> struct gdma_create_mr_params *mr_params)
> {
> + struct gdma_dev *mdev = mib_dev->gdma_dev;
> struct gdma_create_mr_response resp = {};
> struct gdma_create_mr_request req = {};
> - struct gdma_dev *mdev = dev->gdma_dev;
> struct gdma_context *gc;
> int err;
>
> @@ -49,7 +50,7 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev
> *dev, struct mana_ib_mr *mr,
> break;
>
> default:
> - ibdev_dbg(&dev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "invalid param (GDMA_MR_TYPE) passed, type
> %d\n",
> req.mr_type);
> return -EINVAL;
> @@ -58,7 +59,7 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev
> *dev, struct mana_ib_mr *mr,
> err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp),
> &resp);
>
> if (err || resp.hdr.status) {
> - ibdev_dbg(&dev->ib_dev, "Failed to create mr %d, %u", err,
> + ibdev_dbg(&mib_dev->ib_dev, "Failed to create mr %d, %u",
> err,
> resp.hdr.status);
> if (!err)
> err = -EPROTO;
> @@ -73,11 +74,11 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev
> *dev, struct mana_ib_mr *mr,
> return 0;
> }
>
> -static int mana_ib_gd_destroy_mr(struct mana_ib_dev *dev, u64 mr_handle)
> +static int mana_ib_gd_destroy_mr(struct mana_ib_dev *mib_dev, u64
> +mr_handle)
> {
> struct gdma_destroy_mr_response resp = {};
> + struct gdma_dev *mdev = mib_dev->gdma_dev;
> struct gdma_destroy_mr_request req = {};
> - struct gdma_dev *mdev = dev->gdma_dev;
> struct gdma_context *gc;
> int err;
>
> @@ -107,12 +108,12 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd
> *ibpd, u64 start, u64 length,
> struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
> struct gdma_create_mr_params mr_params = {};
> struct ib_device *ibdev = ibpd->device;
> - struct mana_ib_dev *dev;
> + struct mana_ib_dev *mib_dev;
> struct mana_ib_mr *mr;
> u64 dma_region_handle;
> int err;
>
> - dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> + mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
>
> ibdev_dbg(ibdev,
> "start 0x%llx, iova 0x%llx length 0x%llx access_flags 0x%x",
> @@ -133,7 +134,7 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd
> *ibpd, u64 start, u64 length,
> goto err_free;
> }
>
> - err = mana_ib_gd_create_dma_region(dev, mr->umem,
> &dma_region_handle);
> + err = mana_ib_gd_create_dma_region(mib_dev, mr->umem,
> +&dma_region_handle);
> if (err) {
> ibdev_dbg(ibdev, "Failed create dma region for user-mr, %d\n",
> err);
> @@ -151,7 +152,7 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd
> *ibpd, u64 start, u64 length,
> mr_params.gva.access_flags =
> mana_ib_verbs_to_gdma_access_flags(access_flags);
>
> - err = mana_ib_gd_create_mr(dev, mr, &mr_params);
> + err = mana_ib_gd_create_mr(mib_dev, mr, &mr_params);
> if (err)
> goto err_dma_region;
>
> @@ -164,7 +165,7 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd
> *ibpd, u64 start, u64 length,
> return &mr->ibmr;
>
> err_dma_region:
> - mana_gd_destroy_dma_region(dev->gdma_dev->gdma_context,
> + mana_gd_destroy_dma_region(mib_dev->gdma_dev->gdma_context,
> dma_region_handle);
>
> err_umem:
> @@ -179,12 +180,12 @@ int mana_ib_dereg_mr(struct ib_mr *ibmr, struct
> ib_udata *udata) {
> struct mana_ib_mr *mr = container_of(ibmr, struct mana_ib_mr,
> ibmr);
> struct ib_device *ibdev = ibmr->device;
> - struct mana_ib_dev *dev;
> + struct mana_ib_dev *mib_dev;
> int err;
>
> - dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> + mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
>
> - err = mana_ib_gd_destroy_mr(dev, mr->mr_handle);
> + err = mana_ib_gd_destroy_mr(mib_dev, mr->mr_handle);
> if (err)
> return err;
>
> diff --git a/drivers/infiniband/hw/mana/qp.c
> b/drivers/infiniband/hw/mana/qp.c index 4b3b5b274e84..2e3a57123ed7
> 100644
> --- a/drivers/infiniband/hw/mana/qp.c
> +++ b/drivers/infiniband/hw/mana/qp.c
> @@ -5,7 +5,7 @@
>
> #include "mana_ib.h"
>
> -static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
> +static int mana_ib_cfg_vport_steering(struct mana_ib_dev *mib_dev,
> struct net_device *ndev,
> mana_handle_t default_rxobj,
> mana_handle_t ind_table[],
> @@ -21,7 +21,7 @@ static int mana_ib_cfg_vport_steering(struct
> mana_ib_dev *dev,
> u32 req_buf_size;
> int i, err;
>
> - mdev = dev->gdma_dev;
> + mdev = mib_dev->gdma_dev;
> gc = mdev->gdma_context;
>
> req_buf_size =
> @@ -55,10 +55,10 @@ static int mana_ib_cfg_vport_steering(struct
> mana_ib_dev *dev,
> * MANA_INDIRECT_TABLE_SIZE entries. Adjust the verb
> * ind_table to MANA_INDIRECT_TABLE_SIZE if required
> */
> - ibdev_dbg(&dev->ib_dev, "ind table size %u\n", 1 << log_ind_tbl_size);
> + ibdev_dbg(&mib_dev->ib_dev, "ind table size %u\n", 1 <<
> +log_ind_tbl_size);
> for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) {
> req_indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)];
> - ibdev_dbg(&dev->ib_dev, "index %u handle 0x%llx\n", i,
> + ibdev_dbg(&mib_dev->ib_dev, "index %u handle 0x%llx\n", i,
> req_indir_tab[i]);
> }
>
> @@ -68,7 +68,7 @@ static int mana_ib_cfg_vport_steering(struct
> mana_ib_dev *dev,
> else
> netdev_rss_key_fill(req->hashkey, MANA_HASH_KEY_SIZE);
>
> - ibdev_dbg(&dev->ib_dev, "vport handle %llu default_rxobj 0x%llx\n",
> + ibdev_dbg(&mib_dev->ib_dev, "vport handle %llu default_rxobj
> +0x%llx\n",
> req->vport, default_rxobj);
>
> err = mana_gd_send_request(gc, req_buf_size, req, sizeof(resp),
> &resp); @@ -97,12 +97,12 @@ static int mana_ib_create_qp_rss(struct ib_qp
> *ibqp, struct ib_pd *pd,
> struct ib_udata *udata)
> {
> struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
> - struct mana_ib_dev *mdev =
> + struct mana_ib_dev *mib_dev =
> container_of(pd->device, struct mana_ib_dev, ib_dev);
> struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl;
> struct mana_ib_create_qp_rss_resp resp = {};
> struct mana_ib_create_qp_rss ucmd = {};
> - struct gdma_dev *gd = mdev->gdma_dev;
> + struct gdma_dev *gd = mib_dev->gdma_dev;
> mana_handle_t *mana_ind_table;
> struct mana_port_context *mpc;
> struct mana_context *mc;
> @@ -123,21 +123,21 @@ static int mana_ib_create_qp_rss(struct ib_qp
> *ibqp, struct ib_pd *pd,
>
> ret = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata-
> >inlen));
> if (ret) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed copy from udata for create rss-qp, err %d\n",
> ret);
> return ret;
> }
>
> if (attr->cap.max_recv_wr > MAX_SEND_BUFFERS_PER_QUEUE) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Requested max_recv_wr %d exceeding limit\n",
> attr->cap.max_recv_wr);
> return -EINVAL;
> }
>
> if (attr->cap.max_recv_sge > MAX_RX_WQE_SGL_ENTRIES) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Requested max_recv_sge %d exceeding limit\n",
> attr->cap.max_recv_sge);
> return -EINVAL;
> @@ -145,14 +145,14 @@ static int mana_ib_create_qp_rss(struct ib_qp
> *ibqp, struct ib_pd *pd,
>
> ind_tbl_size = 1 << ind_tbl->log_ind_tbl_size;
> if (ind_tbl_size > MANA_INDIRECT_TABLE_SIZE) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Indirect table size %d exceeding limit\n",
> ind_tbl_size);
> return -EINVAL;
> }
>
> if (ucmd.rx_hash_function != MANA_IB_RX_HASH_FUNC_TOEPLITZ) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "RX Hash function is not supported, %d\n",
> ucmd.rx_hash_function);
> return -EINVAL;
> @@ -161,14 +161,14 @@ static int mana_ib_create_qp_rss(struct ib_qp
> *ibqp, struct ib_pd *pd,
> /* IB ports start with 1, MANA start with 0 */
> port = ucmd.port;
> if (port < 1 || port > mc->num_ports) {
> - ibdev_dbg(&mdev->ib_dev, "Invalid port %u in creating qp\n",
> + ibdev_dbg(&mib_dev->ib_dev, "Invalid port %u in creating
> qp\n",
> port);
> return -EINVAL;
> }
> ndev = mc->ports[port - 1];
> mpc = netdev_priv(ndev);
>
> - ibdev_dbg(&mdev->ib_dev, "rx_hash_function %d port %d\n",
> + ibdev_dbg(&mib_dev->ib_dev, "rx_hash_function %d port %d\n",
> ucmd.rx_hash_function, port);
>
> mana_ind_table = kcalloc(ind_tbl_size, sizeof(mana_handle_t), @@ -
> 210,7 +210,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct
> ib_pd *pd,
> wq->id = wq_spec.queue_index;
> cq->id = cq_spec.queue_index;
>
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "ret %d rx_object 0x%llx wq id %llu cq id %llu\n",
> ret, wq->rx_object, wq->id, cq->id);
>
> @@ -221,7 +221,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp,
> struct ib_pd *pd,
> }
> resp.num_entries = i;
>
> - ret = mana_ib_cfg_vport_steering(mdev, ndev, wq->rx_object,
> + ret = mana_ib_cfg_vport_steering(mib_dev, ndev, wq->rx_object,
> mana_ind_table,
> ind_tbl->log_ind_tbl_size,
> ucmd.rx_hash_key_len,
> @@ -231,7 +231,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp,
> struct ib_pd *pd,
>
> ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
> if (ret) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to copy to udata create rss-qp, %d\n",
> ret);
> goto fail;
> @@ -259,7 +259,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp,
> struct ib_pd *ibpd, {
> struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
> struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
> - struct mana_ib_dev *mdev =
> + struct mana_ib_dev *mib_dev =
> container_of(ibpd->device, struct mana_ib_dev, ib_dev);
> struct mana_ib_cq *send_cq =
> container_of(attr->send_cq, struct mana_ib_cq, ibcq); @@ -
> 267,7 +267,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct
> ib_pd *ibpd,
> rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
> ibucontext);
> struct mana_ib_create_qp_resp resp = {};
> - struct gdma_dev *gd = mdev->gdma_dev;
> + struct gdma_dev *gd = mib_dev->gdma_dev;
> struct mana_ib_create_qp ucmd = {};
> struct mana_obj_spec wq_spec = {};
> struct mana_obj_spec cq_spec = {};
> @@ -285,7 +285,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp,
> struct ib_pd *ibpd,
>
> err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata-
> >inlen));
> if (err) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to copy from udata create qp-raw, %d\n", err);
> return err;
> }
> @@ -296,14 +296,14 @@ static int mana_ib_create_qp_raw(struct ib_qp
> *ibqp, struct ib_pd *ibpd,
> return -EINVAL;
>
> if (attr->cap.max_send_wr > MAX_SEND_BUFFERS_PER_QUEUE) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Requested max_send_wr %d exceeding limit\n",
> attr->cap.max_send_wr);
> return -EINVAL;
> }
>
> if (attr->cap.max_send_sge > MAX_TX_WQE_SGL_ENTRIES) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Requested max_send_sge %d exceeding limit\n",
> attr->cap.max_send_sge);
> return -EINVAL;
> @@ -311,38 +311,38 @@ static int mana_ib_create_qp_raw(struct ib_qp
> *ibqp, struct ib_pd *ibpd,
>
> ndev = mc->ports[port - 1];
> mpc = netdev_priv(ndev);
> - ibdev_dbg(&mdev->ib_dev, "port %u ndev %p mpc %p\n", port, ndev,
> mpc);
> + ibdev_dbg(&mib_dev->ib_dev, "port %u ndev %p mpc %p\n", port,
> ndev,
> +mpc);
>
> - err = mana_ib_cfg_vport(mdev, port - 1, pd, mana_ucontext-
> >doorbell);
> + err = mana_ib_cfg_vport(mib_dev, port - 1, pd,
> +mana_ucontext->doorbell);
> if (err)
> return -ENODEV;
>
> qp->port = port;
>
> - ibdev_dbg(&mdev->ib_dev, "ucmd sq_buf_addr 0x%llx port %u\n",
> + ibdev_dbg(&mib_dev->ib_dev, "ucmd sq_buf_addr 0x%llx port %u\n",
> ucmd.sq_buf_addr, ucmd.port);
>
> umem = ib_umem_get(ibpd->device, ucmd.sq_buf_addr,
> ucmd.sq_buf_size,
> IB_ACCESS_LOCAL_WRITE);
> if (IS_ERR(umem)) {
> err = PTR_ERR(umem);
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to get umem for create qp-raw, err %d\n",
> err);
> goto err_free_vport;
> }
> qp->sq_umem = umem;
>
> - err = mana_ib_gd_create_dma_region(mdev, qp->sq_umem,
> + err = mana_ib_gd_create_dma_region(mib_dev, qp->sq_umem,
> &qp->sq_gdma_region);
> if (err) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to create dma region for create qp-raw,
> %d\n",
> err);
> goto err_release_umem;
> }
>
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "mana_ib_gd_create_dma_region ret %d gdma_region
> 0x%llx\n",
> err, qp->sq_gdma_region);
>
> @@ -358,7 +358,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp,
> struct ib_pd *ibpd,
> err = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_SQ,
> &wq_spec,
> &cq_spec, &qp->tx_object);
> if (err) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to create wq for create raw-qp, err %d\n",
> err);
> goto err_destroy_dma_region;
> @@ -371,7 +371,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp,
> struct ib_pd *ibpd,
> qp->sq_id = wq_spec.queue_index;
> send_cq->id = cq_spec.queue_index;
>
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "ret %d qp->tx_object 0x%llx sq id %llu cq id %llu\n", err,
> qp->tx_object, qp->sq_id, send_cq->id);
>
> @@ -381,7 +381,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp,
> struct ib_pd *ibpd,
>
> err = ib_copy_to_udata(udata, &resp, sizeof(resp));
> if (err) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed copy udata for create qp-raw, %d\n",
> err);
> goto err_destroy_wq_obj;
> @@ -393,13 +393,13 @@ static int mana_ib_create_qp_raw(struct ib_qp
> *ibqp, struct ib_pd *ibpd,
> mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object);
>
> err_destroy_dma_region:
> - mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region);
> + mana_ib_gd_destroy_dma_region(mib_dev, qp->sq_gdma_region);
>
> err_release_umem:
> ib_umem_release(umem);
>
> err_free_vport:
> - mana_ib_uncfg_vport(mdev, pd, port - 1);
> + mana_ib_uncfg_vport(mib_dev, pd, port - 1);
>
> return err;
> }
> @@ -435,9 +435,9 @@ static int mana_ib_destroy_qp_rss(struct mana_ib_qp
> *qp,
> struct ib_rwq_ind_table *ind_tbl,
> struct ib_udata *udata)
> {
> - struct mana_ib_dev *mdev =
> + struct mana_ib_dev *mib_dev =
> container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
> - struct gdma_dev *gd = mdev->gdma_dev;
> + struct gdma_dev *gd = mib_dev->gdma_dev;
> struct mana_port_context *mpc;
> struct mana_context *mc;
> struct net_device *ndev;
> @@ -452,7 +452,7 @@ static int mana_ib_destroy_qp_rss(struct mana_ib_qp
> *qp,
> for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) {
> ibwq = ind_tbl->ind_tbl[i];
> wq = container_of(ibwq, struct mana_ib_wq, ibwq);
> - ibdev_dbg(&mdev->ib_dev, "destroying wq->rx_object %llu\n",
> + ibdev_dbg(&mib_dev->ib_dev, "destroying wq->rx_object
> %llu\n",
> wq->rx_object);
> mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object);
> }
> @@ -462,9 +462,9 @@ static int mana_ib_destroy_qp_rss(struct mana_ib_qp
> *qp,
>
> static int mana_ib_destroy_qp_raw(struct mana_ib_qp *qp, struct ib_udata
> *udata) {
> - struct mana_ib_dev *mdev =
> + struct mana_ib_dev *mib_dev =
> container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
> - struct gdma_dev *gd = mdev->gdma_dev;
> + struct gdma_dev *gd = mib_dev->gdma_dev;
> struct ib_pd *ibpd = qp->ibqp.pd;
> struct mana_port_context *mpc;
> struct mana_context *mc;
> @@ -479,11 +479,11 @@ static int mana_ib_destroy_qp_raw(struct
> mana_ib_qp *qp, struct ib_udata *udata)
> mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object);
>
> if (qp->sq_umem) {
> - mana_ib_gd_destroy_dma_region(mdev, qp-
> >sq_gdma_region);
> + mana_ib_gd_destroy_dma_region(mib_dev, qp-
> >sq_gdma_region);
> ib_umem_release(qp->sq_umem);
> }
>
> - mana_ib_uncfg_vport(mdev, pd, qp->port - 1);
> + mana_ib_uncfg_vport(mib_dev, pd, qp->port - 1);
>
> return 0;
> }
> diff --git a/drivers/infiniband/hw/mana/wq.c
> b/drivers/infiniband/hw/mana/wq.c index 372d361510e0..56bc2b8b6690
> 100644
> --- a/drivers/infiniband/hw/mana/wq.c
> +++ b/drivers/infiniband/hw/mana/wq.c
> @@ -9,7 +9,7 @@ struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
> struct ib_wq_init_attr *init_attr,
> struct ib_udata *udata)
> {
> - struct mana_ib_dev *mdev =
> + struct mana_ib_dev *mib_dev =
> container_of(pd->device, struct mana_ib_dev, ib_dev);
> struct mana_ib_create_wq ucmd = {};
> struct mana_ib_wq *wq;
> @@ -21,7 +21,7 @@ struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
>
> err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata-
> >inlen));
> if (err) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to copy from udata for create wq, %d\n", err);
> return ERR_PTR(err);
> }
> @@ -30,13 +30,14 @@ struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
> if (!wq)
> return ERR_PTR(-ENOMEM);
>
> - ibdev_dbg(&mdev->ib_dev, "ucmd wq_buf_addr 0x%llx\n",
> ucmd.wq_buf_addr);
> + ibdev_dbg(&mib_dev->ib_dev, "ucmd wq_buf_addr 0x%llx\n",
> + ucmd.wq_buf_addr);
>
> umem = ib_umem_get(pd->device, ucmd.wq_buf_addr,
> ucmd.wq_buf_size,
> IB_ACCESS_LOCAL_WRITE);
> if (IS_ERR(umem)) {
> err = PTR_ERR(umem);
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to get umem for create wq, err %d\n", err);
> goto err_free_wq;
> }
> @@ -46,15 +47,15 @@ struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
> wq->wq_buf_size = ucmd.wq_buf_size;
> wq->rx_object = INVALID_MANA_HANDLE;
>
> - err = mana_ib_gd_create_dma_region(mdev, wq->umem, &wq-
> >gdma_region);
> + err = mana_ib_gd_create_dma_region(mib_dev, wq->umem,
> +&wq->gdma_region);
> if (err) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to create dma region for create wq, %d\n",
> err);
> goto err_release_umem;
> }
>
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "mana_ib_gd_create_dma_region ret %d gdma_region
> 0x%llx\n",
> err, wq->gdma_region);
>
> @@ -82,11 +83,11 @@ int mana_ib_destroy_wq(struct ib_wq *ibwq, struct
> ib_udata *udata) {
> struct mana_ib_wq *wq = container_of(ibwq, struct mana_ib_wq,
> ibwq);
> struct ib_device *ib_dev = ibwq->device;
> - struct mana_ib_dev *mdev;
> + struct mana_ib_dev *mib_dev;
>
> - mdev = container_of(ib_dev, struct mana_ib_dev, ib_dev);
> + mib_dev = container_of(ib_dev, struct mana_ib_dev, ib_dev);
>
> - mana_ib_gd_destroy_dma_region(mdev, wq->gdma_region);
> + mana_ib_gd_destroy_dma_region(mib_dev, wq->gdma_region);
> ib_umem_release(wq->umem);
>
> kfree(wq);
> --
> 2.25.1
^ permalink raw reply
* RE: [EXTERNAL] [Patch v3 3/4] RDMA/mana_ib : Create adapter and Add error eq
From: Ajay Sharma @ 2023-07-28 3:02 UTC (permalink / raw)
To: Long Li, sharmaajay@linuxonhyperv.com, Jason Gunthorpe,
Leon Romanovsky, Dexuan Cui, Wei Liu, David S. Miller,
Eric Dumazet, Jakub Kicinski, Paolo Abeni
Cc: linux-rdma@vger.kernel.org, linux-hyperv@vger.kernel.org,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org, Ajay Sharma
In-Reply-To: <1690402104-29518-4-git-send-email-sharmaajay@linuxonhyperv.com>
+Long
> -----Original Message-----
> From: sharmaajay@linuxonhyperv.com <sharmaajay@linuxonhyperv.com>
> Sent: Wednesday, July 26, 2023 3:08 PM
> To: Jason Gunthorpe <jgg@ziepe.ca>; Leon Romanovsky <leon@kernel.org>;
> Dexuan Cui <decui@microsoft.com>; Wei Liu <wei.liu@kernel.org>; David S.
> Miller <davem@davemloft.net>; Eric Dumazet <edumazet@google.com>;
> Jakub Kicinski <kuba@kernel.org>; Paolo Abeni <pabeni@redhat.com>
> Cc: linux-rdma@vger.kernel.org; linux-hyperv@vger.kernel.org;
> netdev@vger.kernel.org; linux-kernel@vger.kernel.org; Ajay Sharma
> <sharmaajay@microsoft.com>
> Subject: [EXTERNAL] [Patch v3 3/4] RDMA/mana_ib : Create adapter and Add
> error eq
>
> From: Ajay Sharma <sharmaajay@microsoft.com>
>
> Create adapter object as nice container for VF resources.
> Add error eq needed for adapter creation and later used for notification from
> Management SW. The management software uses this channel to send
> messages or error notifications back to the Client.
>
> Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com>
> ---
> drivers/infiniband/hw/mana/device.c | 22 ++-
> drivers/infiniband/hw/mana/main.c | 95 ++++++++++++
> drivers/infiniband/hw/mana/mana_ib.h | 33 ++++
> .../net/ethernet/microsoft/mana/gdma_main.c | 146 ++++++++++--------
> drivers/net/ethernet/microsoft/mana/mana_en.c | 3 +
> include/net/mana/gdma.h | 13 +-
> 6 files changed, 242 insertions(+), 70 deletions(-)
>
> diff --git a/drivers/infiniband/hw/mana/device.c
> b/drivers/infiniband/hw/mana/device.c
> index ea4c8c8fc10d..4077e440657a 100644
> --- a/drivers/infiniband/hw/mana/device.c
> +++ b/drivers/infiniband/hw/mana/device.c
> @@ -68,7 +68,7 @@ static int mana_ib_probe(struct auxiliary_device *adev,
> ibdev_dbg(&mib_dev->ib_dev, "mdev=%p id=%d num_ports=%d\n",
> mdev,
> mdev->dev_id.as_uint32, mib_dev->ib_dev.phys_port_cnt);
>
> - mib_dev->gdma_dev = mdev;
> + mib_dev->gc = mdev->gdma_context;
> mib_dev->ib_dev.node_type = RDMA_NODE_IB_CA;
>
> /*
> @@ -85,15 +85,31 @@ static int mana_ib_probe(struct auxiliary_device
> *adev,
> goto free_ib_device;
> }
>
> + ret = mana_ib_create_error_eq(mib_dev);
> + if (ret) {
> + ibdev_err(&mib_dev->ib_dev, "Failed to allocate err eq");
> + goto deregister_device;
> + }
> +
> + ret = mana_ib_create_adapter(mib_dev);
> + if (ret) {
> + ibdev_err(&mib_dev->ib_dev, "Failed to create adapter");
> + goto free_error_eq;
> + }
> +
> ret = ib_register_device(&mib_dev->ib_dev, "mana_%d",
> mdev->gdma_context->dev);
> if (ret)
> - goto deregister_device;
> + goto destroy_adapter;
>
> dev_set_drvdata(&adev->dev, mib_dev);
>
> return 0;
>
> +destroy_adapter:
> + mana_ib_destroy_adapter(mib_dev);
> +free_error_eq:
> + mana_gd_destroy_queue(mib_dev->gc, mib_dev->fatal_err_eq);
> deregister_device:
> mana_gd_deregister_device(&mib_dev->gc->mana_ib);
> free_ib_device:
> @@ -105,6 +121,8 @@ static void mana_ib_remove(struct auxiliary_device
> *adev) {
> struct mana_ib_dev *mib_dev = dev_get_drvdata(&adev->dev);
>
> + mana_gd_destroy_queue(mib_dev->gc, mib_dev->fatal_err_eq);
> + mana_ib_destroy_adapter(mib_dev);
> mana_gd_deregister_device(&mib_dev->gc->mana_ib);
> ib_unregister_device(&mib_dev->ib_dev);
> ib_dealloc_device(&mib_dev->ib_dev);
> diff --git a/drivers/infiniband/hw/mana/main.c
> b/drivers/infiniband/hw/mana/main.c
> index 2c4e3c496644..1b1a8670d0fa 100644
> --- a/drivers/infiniband/hw/mana/main.c
> +++ b/drivers/infiniband/hw/mana/main.c
> @@ -504,3 +504,98 @@ int mana_ib_query_gid(struct ib_device *ibdev, u32
> port, int index, void mana_ib_disassociate_ucontext(struct ib_ucontext
> *ibcontext) { }
> +
> +int mana_ib_destroy_adapter(struct mana_ib_dev *mib_dev) {
> + struct mana_ib_destroy_adapter_resp resp = {};
> + struct mana_ib_destroy_adapter_req req = {};
> + struct gdma_context *gc;
> + int err;
> +
> + gc = mib_dev->gc;
> +
> + mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_ADAPTER,
> sizeof(req),
> + sizeof(resp));
> + req.adapter = mib_dev->adapter_handle;
> + req.hdr.dev_id = gc->mana_ib.dev_id;
> +
> + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp),
> +&resp);
> +
> + if (err) {
> + ibdev_err(&mib_dev->ib_dev, "Failed to destroy adapter err
> %d", err);
> + return err;
> + }
> +
> + return 0;
> +}
> +
> +int mana_ib_create_adapter(struct mana_ib_dev *mib_dev) {
> + struct mana_ib_create_adapter_resp resp = {};
> + struct mana_ib_create_adapter_req req = {};
> + struct gdma_context *gc;
> + int err;
> +
> + gc = mib_dev->gc;
> +
> + mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_ADAPTER,
> sizeof(req),
> + sizeof(resp));
> + req.notify_eq_id = mib_dev->fatal_err_eq->id;
> + req.hdr.dev_id = gc->mana_ib.dev_id;
> +
> + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp),
> +&resp);
> +
> + if (err) {
> + ibdev_err(&mib_dev->ib_dev, "Failed to create adapter err
> %d",
> + err);
> + return err;
> + }
> +
> + mib_dev->adapter_handle = resp.adapter;
> +
> + return 0;
> +}
> +
> +static void mana_ib_soc_event_handler(void *ctx, struct gdma_queue
> *queue,
> + struct gdma_event *event)
> +{
> + struct mana_ib_dev *mib_dev = (struct mana_ib_dev *)ctx;
> +
> + switch (event->type) {
> + case GDMA_EQE_SOC_EVENT_NOTIFICATION:
> + ibdev_info(&mib_dev->ib_dev, "Received SOC Notification");
> + break;
> + case GDMA_EQE_SOC_EVENT_TEST:
> + ibdev_info(&mib_dev->ib_dev, "Received SoC Test");
> + break;
> + default:
> + ibdev_dbg(&mib_dev->ib_dev, "Received unsolicited evt %d",
> + event->type);
> + }
> +}
> +
> +int mana_ib_create_error_eq(struct mana_ib_dev *mib_dev) {
> + struct gdma_queue_spec spec = {};
> + int err;
> +
> + spec.type = GDMA_EQ;
> + spec.monitor_avl_buf = false;
> + spec.queue_size = EQ_SIZE;
> + spec.eq.callback = mana_ib_soc_event_handler;
> + spec.eq.context = mib_dev;
> + spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
> + spec.eq.msix_allocated = true;
> + spec.eq.msix_index = 0;
> + spec.doorbell = mib_dev->gc->mana_ib.doorbell;
> + spec.pdid = mib_dev->gc->mana_ib.pdid;
> +
> + err = mana_gd_create_mana_eq(&mib_dev->gc->mana_ib, &spec,
> + &mib_dev->fatal_err_eq);
> + if (err)
> + return err;
> +
> + mib_dev->fatal_err_eq->eq.disable_needed = true;
> +
> + return 0;
> +}
> diff --git a/drivers/infiniband/hw/mana/mana_ib.h
> b/drivers/infiniband/hw/mana/mana_ib.h
> index 3a2ba6b96f15..8a652bccd978 100644
> --- a/drivers/infiniband/hw/mana/mana_ib.h
> +++ b/drivers/infiniband/hw/mana/mana_ib.h
> @@ -31,6 +31,8 @@ struct mana_ib_dev {
> struct ib_device ib_dev;
> struct gdma_dev *gdma_dev;
> struct gdma_context *gc;
> + struct gdma_queue *fatal_err_eq;
> + mana_handle_t adapter_handle;
> };
>
> struct mana_ib_wq {
> @@ -93,6 +95,31 @@ struct mana_ib_rwq_ind_table {
> struct ib_rwq_ind_table ib_ind_table;
> };
>
> +enum mana_ib_command_code {
> + MANA_IB_CREATE_ADAPTER = 0x30002,
> + MANA_IB_DESTROY_ADAPTER = 0x30003,
> +};
> +
> +struct mana_ib_create_adapter_req {
> + struct gdma_req_hdr hdr;
> + u32 notify_eq_id;
> + u32 reserved;
> +}; /*HW Data */
> +
> +struct mana_ib_create_adapter_resp {
> + struct gdma_resp_hdr hdr;
> + mana_handle_t adapter;
> +}; /* HW Data */
> +
> +struct mana_ib_destroy_adapter_req {
> + struct gdma_req_hdr hdr;
> + mana_handle_t adapter;
> +}; /*HW Data */
> +
> +struct mana_ib_destroy_adapter_resp {
> + struct gdma_resp_hdr hdr;
> +}; /* HW Data */
> +
> int mana_ib_gd_create_dma_region(struct mana_ib_dev *mib_dev,
> struct ib_umem *umem,
> mana_handle_t *gdma_region);
> @@ -161,4 +188,10 @@ int mana_ib_query_gid(struct ib_device *ibdev, u32
> port, int index,
>
> void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext);
>
> +int mana_ib_create_error_eq(struct mana_ib_dev *mib_dev);
> +
> +int mana_ib_create_adapter(struct mana_ib_dev *mib_dev);
> +
> +int mana_ib_destroy_adapter(struct mana_ib_dev *mib_dev);
> +
> #endif
> diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> index 9fa7a2d6c2b2..55e194c9d84e 100644
> --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> @@ -185,7 +185,8 @@ void mana_gd_free_memory(struct gdma_mem_info
> *gmi) }
>
> static int mana_gd_create_hw_eq(struct gdma_context *gc,
> - struct gdma_queue *queue)
> + struct gdma_queue *queue,
> + u32 doorbell, u32 pdid)
> {
> struct gdma_create_queue_resp resp = {};
> struct gdma_create_queue_req req = {}; @@ -199,8 +200,8 @@ static
> int mana_gd_create_hw_eq(struct gdma_context *gc,
>
> req.hdr.dev_id = queue->gdma_dev->dev_id;
> req.type = queue->type;
> - req.pdid = queue->gdma_dev->pdid;
> - req.doolbell_id = queue->gdma_dev->doorbell;
> + req.pdid = pdid;
> + req.doolbell_id = doorbell;
> req.gdma_region = queue->mem_info.dma_region_handle;
> req.queue_size = queue->queue_size;
> req.log2_throttle_limit = queue->eq.log2_throttle_limit; @@ -371,53
> +372,51 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
> }
> }
>
> -static void mana_gd_process_eq_events(void *arg)
> +static void mana_gd_process_eq_events(struct list_head *eq_list)
> {
> u32 owner_bits, new_bits, old_bits;
> union gdma_eqe_info eqe_info;
> struct gdma_eqe *eq_eqe_ptr;
> - struct gdma_queue *eq = arg;
> + struct gdma_queue *eq;
> struct gdma_context *gc;
> struct gdma_eqe *eqe;
> u32 head, num_eqe;
> int i;
>
> - gc = eq->gdma_dev->gdma_context;
> -
> - num_eqe = eq->queue_size / GDMA_EQE_SIZE;
> - eq_eqe_ptr = eq->queue_mem_ptr;
> -
> - /* Process up to 5 EQEs at a time, and update the HW head. */
> - for (i = 0; i < 5; i++) {
> - eqe = &eq_eqe_ptr[eq->head % num_eqe];
> - eqe_info.as_uint32 = eqe->eqe_info;
> - owner_bits = eqe_info.owner_bits;
> -
> - old_bits = (eq->head / num_eqe - 1) &
> GDMA_EQE_OWNER_MASK;
> - /* No more entries */
> - if (owner_bits == old_bits)
> - break;
> -
> - new_bits = (eq->head / num_eqe) &
> GDMA_EQE_OWNER_MASK;
> - if (owner_bits != new_bits) {
> - dev_err(gc->dev, "EQ %d: overflow detected\n", eq-
> >id);
> - break;
> + list_for_each_entry_rcu(eq, eq_list, entry) {
> + gc = eq->gdma_dev->gdma_context;
> +
> + num_eqe = eq->queue_size / GDMA_EQE_SIZE;
> + eq_eqe_ptr = eq->queue_mem_ptr;
> + /* Process up to 5 EQEs at a time, and update the HW head. */
> + for (i = 0; i < 5; i++) {
> + eqe = &eq_eqe_ptr[eq->head % num_eqe];
> + eqe_info.as_uint32 = eqe->eqe_info;
> + owner_bits = eqe_info.owner_bits;
> +
> + old_bits = (eq->head / num_eqe - 1) &
> GDMA_EQE_OWNER_MASK;
> + /* No more entries */
> + if (owner_bits == old_bits)
> + break;
> +
> + new_bits = (eq->head / num_eqe) &
> GDMA_EQE_OWNER_MASK;
> + if (owner_bits != new_bits) {
> + dev_err(gc->dev, "EQ %d: overflow
> detected\n",
> + eq->id);
> + break;
> + }
> + /* Per GDMA spec, rmb is necessary after checking
> owner_bits, before
> + * reading eqe.
> + */
> + rmb();
> + mana_gd_process_eqe(eq);
> + eq->head++;
> }
>
> - /* Per GDMA spec, rmb is necessary after checking owner_bits,
> before
> - * reading eqe.
> - */
> - rmb();
> -
> - mana_gd_process_eqe(eq);
> -
> - eq->head++;
> + head = eq->head % (num_eqe << GDMA_EQE_OWNER_BITS);
> + mana_gd_ring_doorbell(gc, eq->gdma_dev->doorbell, eq-
> >type,
> + eq->id, head, SET_ARM_BIT);
> }
> -
> - head = eq->head % (num_eqe << GDMA_EQE_OWNER_BITS);
> -
> - mana_gd_ring_doorbell(gc, eq->gdma_dev->doorbell, eq->type, eq-
> >id,
> - head, SET_ARM_BIT);
> }
>
> static int mana_gd_register_irq(struct gdma_queue *queue, @@ -435,44
> +434,47 @@ static int mana_gd_register_irq(struct gdma_queue *queue,
> gc = gd->gdma_context;
> r = &gc->msix_resource;
> dev = gc->dev;
> + msi_index = spec->eq.msix_index;
>
> spin_lock_irqsave(&r->lock, flags);
>
> - msi_index = find_first_zero_bit(r->map, r->size);
> - if (msi_index >= r->size || msi_index >= gc->num_msix_usable) {
> - err = -ENOSPC;
> - } else {
> - bitmap_set(r->map, msi_index, 1);
> - queue->eq.msix_index = msi_index;
> - }
> -
> - spin_unlock_irqrestore(&r->lock, flags);
> + if (!spec->eq.msix_allocated) {
> + msi_index = find_first_zero_bit(r->map, r->size);
>
> - if (err) {
> - dev_err(dev, "Register IRQ err:%d, msi:%u rsize:%u, nMSI:%u",
> - err, msi_index, r->size, gc->num_msix_usable);
> + if (msi_index >= r->size ||
> + msi_index >= gc->num_msix_usable)
> + err = -ENOSPC;
> + else
> + bitmap_set(r->map, msi_index, 1);
>
> - return err;
> + if (err) {
> + dev_err(dev, "Register IRQ err:%d, msi:%u rsize:%u,
> nMSI:%u",
> + err, msi_index, r->size, gc->num_msix_usable);
> + goto out;
> + }
> }
>
> + queue->eq.msix_index = msi_index;
> gic = &gc->irq_contexts[msi_index];
>
> - WARN_ON(gic->handler || gic->arg);
> -
> - gic->arg = queue;
> + list_add_rcu(&queue->entry, &gic->eq_list);
>
> gic->handler = mana_gd_process_eq_events;
>
> - return 0;
> +out:
> + spin_unlock_irqrestore(&r->lock, flags);
> + return err;
> }
>
> -static void mana_gd_deregiser_irq(struct gdma_queue *queue)
> +static void mana_gd_deregister_irq(struct gdma_queue *queue)
> {
> struct gdma_dev *gd = queue->gdma_dev;
> struct gdma_irq_context *gic;
> struct gdma_context *gc;
> struct gdma_resource *r;
> unsigned int msix_index;
> + struct list_head *p, *n;
> + struct gdma_queue *eq;
> unsigned long flags;
>
> gc = gd->gdma_context;
> @@ -483,14 +485,23 @@ static void mana_gd_deregiser_irq(struct
> gdma_queue *queue)
> if (WARN_ON(msix_index >= gc->num_msix_usable))
> return;
>
> + spin_lock_irqsave(&r->lock, flags);
> +
> gic = &gc->irq_contexts[msix_index];
> - gic->handler = NULL;
> - gic->arg = NULL;
> + list_for_each_safe(p, n, &gic->eq_list) {
> + eq = list_entry(p, struct gdma_queue, entry);
> + if (queue == eq) {
> + list_del(&eq->entry);
> + break;
> + }
> + }
>
> - spin_lock_irqsave(&r->lock, flags);
> - bitmap_clear(r->map, msix_index, 1);
> - spin_unlock_irqrestore(&r->lock, flags);
> + if (list_empty(&gic->eq_list)) {
> + gic->handler = NULL;
> + bitmap_clear(r->map, msix_index, 1);
> + }
>
> + spin_unlock_irqrestore(&r->lock, flags);
> queue->eq.msix_index = INVALID_PCI_MSIX_INDEX; }
>
> @@ -553,7 +564,7 @@ static void mana_gd_destroy_eq(struct gdma_context
> *gc, bool flush_evenets,
> dev_warn(gc->dev, "Failed to flush EQ: %d\n", err);
> }
>
> - mana_gd_deregiser_irq(queue);
> + mana_gd_deregister_irq(queue);
>
> if (queue->eq.disable_needed)
> mana_gd_disable_queue(queue);
> @@ -568,7 +579,7 @@ static int mana_gd_create_eq(struct gdma_dev *gd,
> u32 log2_num_entries;
> int err;
>
> - queue->eq.msix_index = INVALID_PCI_MSIX_INDEX;
> + queue->eq.msix_index = spec->eq.msix_index;
>
> log2_num_entries = ilog2(queue->queue_size / GDMA_EQE_SIZE);
>
> @@ -590,7 +601,8 @@ static int mana_gd_create_eq(struct gdma_dev *gd,
> queue->eq.log2_throttle_limit = spec->eq.log2_throttle_limit ?: 1;
>
> if (create_hwq) {
> - err = mana_gd_create_hw_eq(gc, queue);
> + err = mana_gd_create_hw_eq(gc, queue,
> + spec->doorbell, spec->pdid);
> if (err)
> goto out;
>
> @@ -800,6 +812,7 @@ int mana_gd_create_mana_eq(struct gdma_dev *gd,
> kfree(queue);
> return err;
> }
> +EXPORT_SYMBOL(mana_gd_create_mana_eq);
>
> int mana_gd_create_mana_wq_cq(struct gdma_dev *gd,
> const struct gdma_queue_spec *spec, @@ -876,6
> +889,7 @@ void mana_gd_destroy_queue(struct gdma_context *gc, struct
> gdma_queue *queue)
> mana_gd_free_memory(gmi);
> kfree(queue);
> }
> +EXPORT_SYMBOL(mana_gd_destroy_queue);
>
> int mana_gd_verify_vf_version(struct pci_dev *pdev) { @@ -1193,7 +1207,7
> @@ static irqreturn_t mana_gd_intr(int irq, void *arg)
> struct gdma_irq_context *gic = arg;
>
> if (gic->handler)
> - gic->handler(gic->arg);
> + gic->handler(&gic->eq_list);
>
> return IRQ_HANDLED;
> }
> @@ -1246,7 +1260,7 @@ static int mana_gd_setup_irqs(struct pci_dev
> *pdev)
> for (i = 0; i < nvec; i++) {
> gic = &gc->irq_contexts[i];
> gic->handler = NULL;
> - gic->arg = NULL;
> + INIT_LIST_HEAD(&gic->eq_list);
>
> if (!i)
> snprintf(gic->name, MANA_IRQ_NAME_SZ,
> "mana_hwc@pci:%s", diff --git
> a/drivers/net/ethernet/microsoft/mana/mana_en.c
> b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index a499e460594b..d2ba7de8b512 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -1167,6 +1167,9 @@ static int mana_create_eq(struct mana_context
> *ac)
> spec.eq.callback = NULL;
> spec.eq.context = ac->eqs;
> spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
> + spec.eq.msix_allocated = false;
> + spec.doorbell = gd->doorbell;
> + spec.pdid = gd->pdid;
>
> for (i = 0; i < gc->max_num_queues; i++) {
> err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq);
> diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index
> e2b212dd722b..aee8e8fa1ea6 100644
> --- a/include/net/mana/gdma.h
> +++ b/include/net/mana/gdma.h
> @@ -57,6 +57,10 @@ enum gdma_eqe_type {
> GDMA_EQE_HWC_INIT_EQ_ID_DB = 129,
> GDMA_EQE_HWC_INIT_DATA = 130,
> GDMA_EQE_HWC_INIT_DONE = 131,
> +
> + /* IB NiC Events start at 176*/
> + GDMA_EQE_SOC_EVENT_NOTIFICATION = 176,
> + GDMA_EQE_SOC_EVENT_TEST,
> };
>
> enum {
> @@ -291,6 +295,7 @@ struct gdma_queue {
>
> u32 head;
> u32 tail;
> + struct list_head entry;
>
> /* Extra fields specific to EQ/CQ. */
> union {
> @@ -318,6 +323,8 @@ struct gdma_queue_spec {
> enum gdma_queue_type type;
> bool monitor_avl_buf;
> unsigned int queue_size;
> + u32 doorbell;
> + u32 pdid;
>
> /* Extra fields specific to EQ/CQ. */
> union {
> @@ -326,6 +333,8 @@ struct gdma_queue_spec {
> void *context;
>
> unsigned long log2_throttle_limit;
> + bool msix_allocated;
> + unsigned int msix_index;
> } eq;
>
> struct {
> @@ -341,8 +350,8 @@ struct gdma_queue_spec { #define
> MANA_IRQ_NAME_SZ 32
>
> struct gdma_irq_context {
> - void (*handler)(void *arg);
> - void *arg;
> + void (*handler)(struct list_head *arg);
> + struct list_head eq_list;
> char name[MANA_IRQ_NAME_SZ];
> };
>
> --
> 2.25.1
^ permalink raw reply
* RE: [EXTERNAL] [Patch v3 4/4] RDMA/mana_ib : Query adapter capabilities
From: Ajay Sharma @ 2023-07-28 3:02 UTC (permalink / raw)
To: Long Li, sharmaajay@linuxonhyperv.com, Jason Gunthorpe,
Leon Romanovsky, Dexuan Cui, Wei Liu, David S. Miller,
Eric Dumazet, Jakub Kicinski, Paolo Abeni
Cc: linux-rdma@vger.kernel.org, linux-hyperv@vger.kernel.org,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org, Ajay Sharma
In-Reply-To: <1690402104-29518-5-git-send-email-sharmaajay@linuxonhyperv.com>
> -----Original Message-----
> From: sharmaajay@linuxonhyperv.com <sharmaajay@linuxonhyperv.com>
> Sent: Wednesday, July 26, 2023 3:08 PM
> To: Jason Gunthorpe <jgg@ziepe.ca>; Leon Romanovsky <leon@kernel.org>;
> Dexuan Cui <decui@microsoft.com>; Wei Liu <wei.liu@kernel.org>; David S.
> Miller <davem@davemloft.net>; Eric Dumazet <edumazet@google.com>;
> Jakub Kicinski <kuba@kernel.org>; Paolo Abeni <pabeni@redhat.com>
> Cc: linux-rdma@vger.kernel.org; linux-hyperv@vger.kernel.org;
> netdev@vger.kernel.org; linux-kernel@vger.kernel.org; Ajay Sharma
> <sharmaajay@microsoft.com>
> Subject: [EXTERNAL] [Patch v3 4/4] RDMA/mana_ib : Query adapter
> capabilities
>
> From: Ajay Sharma <sharmaajay@microsoft.com>
>
> Query the adapter capabilities to expose to other clients and VF. This checks
> against the user supplied values and protects against overflows.
>
> Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com>
> ---
> drivers/infiniband/hw/mana/device.c | 4 ++
> drivers/infiniband/hw/mana/main.c | 66 +++++++++++++++++++++++++---
> drivers/infiniband/hw/mana/mana_ib.h | 53 +++++++++++++++++++++-
> 3 files changed, 115 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/infiniband/hw/mana/device.c
> b/drivers/infiniband/hw/mana/device.c
> index 4077e440657a..e15da43c73a0 100644
> --- a/drivers/infiniband/hw/mana/device.c
> +++ b/drivers/infiniband/hw/mana/device.c
> @@ -97,6 +97,10 @@ static int mana_ib_probe(struct auxiliary_device *adev,
> goto free_error_eq;
> }
>
> + ret = mana_ib_query_adapter_caps(mib_dev);
> + if (ret)
> + ibdev_dbg(&mib_dev->ib_dev, "Failed to get caps, use
> defaults");
> +
> ret = ib_register_device(&mib_dev->ib_dev, "mana_%d",
> mdev->gdma_context->dev);
> if (ret)
> diff --git a/drivers/infiniband/hw/mana/main.c
> b/drivers/infiniband/hw/mana/main.c
> index 1b1a8670d0fa..512815e1e64d 100644
> --- a/drivers/infiniband/hw/mana/main.c
> +++ b/drivers/infiniband/hw/mana/main.c
> @@ -469,21 +469,27 @@ int mana_ib_get_port_immutable(struct ib_device
> *ibdev, u32 port_num, int mana_ib_query_device(struct ib_device *ibdev,
> struct ib_device_attr *props,
> struct ib_udata *uhw)
> {
> + struct mana_ib_dev *mib_dev = container_of(ibdev,
> + struct mana_ib_dev, ib_dev);
> +
> props->max_qp = MANA_MAX_NUM_QUEUES;
> props->max_qp_wr = MAX_SEND_BUFFERS_PER_QUEUE;
> -
> - /*
> - * max_cqe could be potentially much bigger.
> - * As this version of driver only support RAW QP, set it to the same
> - * value as max_qp_wr
> - */
> props->max_cqe = MAX_SEND_BUFFERS_PER_QUEUE;
> -
> props->max_mr_size = MANA_IB_MAX_MR_SIZE;
> props->max_mr = MANA_IB_MAX_MR;
> props->max_send_sge = MAX_TX_WQE_SGL_ENTRIES;
> props->max_recv_sge = MAX_RX_WQE_SGL_ENTRIES;
>
> + /* If the Management SW is updated and supports adapter creation */
> + if (mib_dev->adapter_handle) {
> + props->max_qp = mib_dev->adapter_caps.max_qp_count;
> + props->max_qp_wr = mib_dev-
> >adapter_caps.max_requester_sq_size;
> + props->max_cqe = mib_dev-
> >adapter_caps.max_requester_sq_size;
> + props->max_mr = mib_dev->adapter_caps.max_mr_count;
> + props->max_send_sge = mib_dev-
> >adapter_caps.max_send_wqe_size;
> + props->max_recv_sge = mib_dev-
> >adapter_caps.max_recv_wqe_size;
> + }
> +
> return 0;
> }
>
> @@ -599,3 +605,49 @@ int mana_ib_create_error_eq(struct mana_ib_dev
> *mib_dev)
>
> return 0;
> }
> +
> +static void assign_caps(struct mana_ib_adapter_caps *caps,
> + struct mana_ib_query_adapter_caps_resp *resp) {
> + caps->max_sq_id = resp->max_sq_id;
> + caps->max_rq_id = resp->max_rq_id;
> + caps->max_cq_id = resp->max_cq_id;
> + caps->max_qp_count = resp->max_qp_count;
> + caps->max_cq_count = resp->max_cq_count;
> + caps->max_mr_count = resp->max_mr_count;
> + caps->max_pd_count = resp->max_pd_count;
> + caps->max_inbound_read_limit = resp->max_inbound_read_limit;
> + caps->max_outbound_read_limit = resp->max_outbound_read_limit;
> + caps->mw_count = resp->mw_count;
> + caps->max_srq_count = resp->max_srq_count;
> + caps->max_requester_sq_size = resp->max_requester_sq_size;
> + caps->max_responder_sq_size = resp->max_responder_sq_size;
> + caps->max_requester_rq_size = resp->max_requester_rq_size;
> + caps->max_responder_rq_size = resp->max_responder_rq_size;
> + caps->max_send_wqe_size = resp->max_send_wqe_size;
> + caps->max_recv_wqe_size = resp->max_recv_wqe_size;
> + caps->max_inline_data_size = resp->max_inline_data_size; }
> +
> +int mana_ib_query_adapter_caps(struct mana_ib_dev *mib_dev) {
> + struct mana_ib_query_adapter_caps_resp resp = {};
> + struct mana_ib_query_adapter_caps_req req = {};
> + int err;
> +
> + mana_gd_init_req_hdr(&req.hdr, MANA_IB_GET_ADAPTER_CAP,
> sizeof(req),
> + sizeof(resp));
> + req.hdr.resp.msg_version =
> MANA_IB__GET_ADAPTER_CAP_RESPONSE_V3;
> + req.hdr.dev_id = mib_dev->gc->mana_ib.dev_id;
> +
> + err = mana_gd_send_request(mib_dev->gc, sizeof(req), &req,
> + sizeof(resp), &resp);
> +
> + if (err) {
> + ibdev_err(&mib_dev->ib_dev, "Failed to query adapter caps
> err %d", err);
> + return err;
> + }
> +
> + assign_caps(&mib_dev->adapter_caps, &resp);
> + return 0;
> +}
> diff --git a/drivers/infiniband/hw/mana/mana_ib.h
> b/drivers/infiniband/hw/mana/mana_ib.h
> index 8a652bccd978..1044358230d3 100644
> --- a/drivers/infiniband/hw/mana/mana_ib.h
> +++ b/drivers/infiniband/hw/mana/mana_ib.h
> @@ -20,19 +20,41 @@
>
> /* MANA doesn't have any limit for MR size */
> #define MANA_IB_MAX_MR_SIZE U64_MAX
> -
> +#define MANA_IB__GET_ADAPTER_CAP_RESPONSE_V3 3
> /*
> * The hardware limit of number of MRs is greater than maximum number of
> MRs
> * that can possibly represent in 24 bits
> */
> #define MANA_IB_MAX_MR 0xFFFFFFu
>
> +struct mana_ib_adapter_caps {
> + u32 max_sq_id;
> + u32 max_rq_id;
> + u32 max_cq_id;
> + u32 max_qp_count;
> + u32 max_cq_count;
> + u32 max_mr_count;
> + u32 max_pd_count;
> + u32 max_inbound_read_limit;
> + u32 max_outbound_read_limit;
> + u32 mw_count;
> + u32 max_srq_count;
> + u32 max_requester_sq_size;
> + u32 max_responder_sq_size;
> + u32 max_requester_rq_size;
> + u32 max_responder_rq_size;
> + u32 max_send_wqe_size;
> + u32 max_recv_wqe_size;
> + u32 max_inline_data_size;
> +};
> +
> struct mana_ib_dev {
> struct ib_device ib_dev;
> struct gdma_dev *gdma_dev;
> struct gdma_context *gc;
> struct gdma_queue *fatal_err_eq;
> mana_handle_t adapter_handle;
> + struct mana_ib_adapter_caps adapter_caps;
> };
>
> struct mana_ib_wq {
> @@ -96,6 +118,7 @@ struct mana_ib_rwq_ind_table { };
>
> enum mana_ib_command_code {
> + MANA_IB_GET_ADAPTER_CAP = 0x30001,
> MANA_IB_CREATE_ADAPTER = 0x30002,
> MANA_IB_DESTROY_ADAPTER = 0x30003,
> };
> @@ -120,6 +143,32 @@ struct mana_ib_destroy_adapter_resp {
> struct gdma_resp_hdr hdr;
> }; /* HW Data */
>
> +struct mana_ib_query_adapter_caps_req {
> + struct gdma_req_hdr hdr;
> +}; /*HW Data */
> +
> +struct mana_ib_query_adapter_caps_resp {
> + struct gdma_resp_hdr hdr;
> + u32 max_sq_id;
> + u32 max_rq_id;
> + u32 max_cq_id;
> + u32 max_qp_count;
> + u32 max_cq_count;
> + u32 max_mr_count;
> + u32 max_pd_count;
> + u32 max_inbound_read_limit;
> + u32 max_outbound_read_limit;
> + u32 mw_count;
> + u32 max_srq_count;
> + u32 max_requester_sq_size;
> + u32 max_responder_sq_size;
> + u32 max_requester_rq_size;
> + u32 max_responder_rq_size;
> + u32 max_send_wqe_size;
> + u32 max_recv_wqe_size;
> + u32 max_inline_data_size;
> +}; /* HW Data */
> +
> int mana_ib_gd_create_dma_region(struct mana_ib_dev *mib_dev,
> struct ib_umem *umem,
> mana_handle_t *gdma_region);
> @@ -194,4 +243,6 @@ int mana_ib_create_adapter(struct mana_ib_dev
> *mib_dev);
>
> int mana_ib_destroy_adapter(struct mana_ib_dev *mib_dev);
>
> +int mana_ib_query_adapter_caps(struct mana_ib_dev *mib_dev);
> +
> #endif
> --
> 2.25.1
^ permalink raw reply
* RE: [PATCH V2] x86/hyperv: Rename hv_isolation_type_snp/en_snp() to isol_type_snp_paravisor/enlightened()
From: Dexuan Cui @ 2023-07-28 6:02 UTC (permalink / raw)
To: Tianyu Lan, KY Srinivasan, Haiyang Zhang, wei.liu@kernel.org,
tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
dave.hansen@linux.intel.com, x86@kernel.org, hpa@zytor.com,
arnd@arndb.de, Michael Kelley (LINUX)
Cc: Tianyu Lan, linux-arch@vger.kernel.org,
linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org,
vkuznets@redhat.com
In-Reply-To: <20230726124900.300258-1-ltykernel@gmail.com>
> From: Tianyu Lan <ltykernel@gmail.com>
> Sent: Wednesday, July 26, 2023 5:49 AM
> Subject: [PATCH V2] x86/hyperv: Rename hv_isolation_type_snp/en_snp() to
> isol_type_snp_paravisor/enlightened()
>
> From: Tianyu Lan <tiala@microsoft.com>
>
> Rename hv_isolation_type_snp and hv_isolation_type_en_snp()
> to make them much intuitiver.
>
> Suggested-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> Signed-off-by: Tianyu Lan <tiala@microsoft.com>
Can we make the names a little shorter by replacing "isol_type" with "cvm"?
e.g. hv_isolation_type_en_snp --> hv_cvm_snp_enlightened,
hv_isolation_type_snp --> hv_cvm_snp_paravisor.
IMO hv_cvm_snp_enlightened is better than hv_isol_type_snp_enlightened?
BTW, I'm not sure if we really want hv_isol_type_snp_enlightened()
and hv_isol_type_snp_paravisor().
I think probably we can use
"hv_cvm_snp() && !hyperv_paravisor_present" and
"hv_cvm_snp() && hyperv_paravisor_present" instead, respectively.
A lot of usage of hv_isol_type_snp_paravisor() in drivers/hv/hv.c and
arch/x86/kernel/cpu/mshyperv.c will need to be changed to
hyperv_paravisor_present for TDX VMs with paravisor.
Some of the hv_isol_type_snp_enlightened() usage will need to be
changed for TDX VMs without paravisor.
Can we hold off the patch before the fully enlightened SNP patches
and the TDX patches are accepted? IMO it's better to have the core
logic to be accepted first and then we can do clean-up later.
I have a drafted patch for TDX HCL support here:
https://github.com/dcui/linux/commit/9893873bdef6f1e5574f784ed6e1d9d5bc54f1d8
(the patch introduces a global variable " hyperv_paravisor_present")
I'm further polishing the patches and will post soon.
Thanks,
Dexuan
^ permalink raw reply
* Re: [PATCH v3] hv/hv_kvp_daemon: Add support for keyfile config based connection profile in NM
From: Ani Sinha @ 2023-07-28 10:24 UTC (permalink / raw)
To: Shradha Gupta
Cc: Wei Liu, Olaf Hering, linux-kernel, linux-hyperv,
K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger, Dexuan Cui,
Long Li, Michael Kelley
In-Reply-To: <53E9AC1D-C907-4B55-97F2-FC10DCD4D470@redhat.com>
> On 12-Jul-2023, at 12:32 PM, Ani Sinha <anisinha@redhat.com> wrote:
>
>
>
>> On 23-May-2023, at 11:06 AM, Shradha Gupta <shradhagupta@linux.microsoft.com> wrote:
>>
>> On Mon, May 08, 2023 at 05:16:19PM +0000, Wei Liu wrote:
>>> On Mon, May 08, 2023 at 07:12:46PM +0200, Olaf Hering wrote:
>>>> Mon, 8 May 2023 16:47:54 +0000 Wei Liu <wei.liu@kernel.org>:
>>>>
>>>>> Olaf, is this a reviewed-by from you? :-)
>>>>
>>>> Sorry, I did not review the new functionality, just tried to make sure there will be no regression for existing consumers.
>>>
>>> Okay, this is fine, too. Thank you for looking into this.
>>>
>>>
>>>>
>>>> Olaf
>>>
>>
>> Gentle reminder.
>>
>
> I have a comment about the following change:
>
> + error = fprintf(nmfile, "\n[ipv4]\n");
> + if (error < 0)
> + goto setval_error;
> +
> + if (new_val->dhcp_enabled) {
> + error = kvp_write_file(nmfile, "method", "", "auto");
> + if (error < 0)
> + goto setval_error;
> + } else {
> + error = kvp_write_file(nmfile, "method", "", "manual");
> + if (error < 0)
> + goto setval_error;
> + }
>
> I think the method equally would apply for ipv6 as it applies for ipv4.
> We can use https://www.golinuxcloud.com/nmcli-command-examples-cheatsheet-centos-rhel/#18_Disable_IPv6_Address_for_ethernet_connection_IPV6INIT as a reference.
> So setting the method should be common to both ipv4 and ipv6.
Ping …
^ permalink raw reply
* Re: [PATCH V3 5/9] x86/hyperv: Use vmmcall to implement Hyper-V hypercall in sev-snp enlightened guest
From: Tianyu Lan @ 2023-07-28 10:45 UTC (permalink / raw)
To: Michael Kelley (LINUX), KY Srinivasan, Haiyang Zhang,
wei.liu@kernel.org, Dexuan Cui, tglx@linutronix.de,
mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com,
x86@kernel.org, hpa@zytor.com, daniel.lezcano@linaro.org,
arnd@arndb.de
Cc: Tianyu Lan, linux-arch@vger.kernel.org,
linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org,
vkuznets@redhat.com
In-Reply-To: <BYAPR21MB16880B1657BA4C907D002730D700A@BYAPR21MB1688.namprd21.prod.outlook.com>
On 7/26/2023 10:29 PM, Michael Kelley (LINUX) wrote:
>> Hi Michael:
>> Thanks for your review. The patch mentioned by Boris has not been
>> merged and so still use X86_FEATURE_SEV_ES here. We may replace the
>> feature flag with X86_FEATURE_SEV_SNP after it's upstreamed.
>>
> Just so I'm clear, is it true that in an SEV-SNP VM, the CPUID flags for
> SEV-ES*and* SEV-SNP are set? That would seem to be necessary for
> your approach to work.
Yes, SEV and SEV-ES flags are set in the SEV-SNP guest and they are
necessary.
>
> I wonder if it would be better to take the patch from Brijesh Singh
> that adds X86_FEATURE_SEV_SNP and add it to your patch set (with
> Brijesh's agreement, of course). That patch is small and straightforward.
>
I will sync with Brijesh. Thanks for suggestion.
^ permalink raw reply
* Re: [PATCH V2] x86/hyperv: Rename hv_isolation_type_snp/en_snp() to isol_type_snp_paravisor/enlightened()
From: Tianyu Lan @ 2023-07-28 14:21 UTC (permalink / raw)
To: Michael Kelley (LINUX), KY Srinivasan, Haiyang Zhang,
wei.liu@kernel.org, Dexuan Cui, tglx@linutronix.de,
mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com,
x86@kernel.org, hpa@zytor.com, arnd@arndb.de
Cc: Tianyu Lan, linux-arch@vger.kernel.org,
linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org,
vkuznets@redhat.com
In-Reply-To: <BYAPR21MB168896AAD24E773B92DD2B10D706A@BYAPR21MB1688.namprd21.prod.outlook.com>
On 7/28/2023 10:53 AM, Michael Kelley (LINUX) wrote:
>> @@ -268,7 +268,7 @@ static inline void hv_sev_init_mem_and_cpu(void) {}
>> static int hv_snp_boot_ap(int cpu, unsigned long start_ip) {}
>> #endif
>>
>> -extern bool hv_isolation_type_snp(void);
>> +extern bool hv_isol_type_snp_paravisor(void);
> This declaration of hv_isolation_type_snp() also occurs twice
> in include/asm-generic/mshyperv.h. I think this one can be
> dropped entirely rather than renamed since
> include/asm-generic/mshyperv.h is #include'd at the bottom of
> this file, and there is no user in between.
>
> hv_isolation_type_snp() is used in several architecture
> independent source code files, so having it declared in
> include/asm-generic/mshyperv.h makes sense rather than
> being in an architecture-specific version of mshyperv.h.
>
Agree. Will update in the next version.
^ permalink raw reply
* [PATCH v4 1/1] RDMA/mana_ib: Add EQ interrupt support to mana ib driver.
From: Wei Hu @ 2023-07-28 17:07 UTC (permalink / raw)
To: netdev, linux-hyperv, linux-rdma, longli, sharmaajay, jgg, leon,
kys, haiyangz, wei.liu, decui, davem, edumazet, kuba, pabeni,
vkuznets, ssengar, shradhagupta, weh
Add EQ interrupt support for mana ib driver. Allocate EQs per ucontext
to receive interrupt. Attach EQ when CQ is created. Call CQ interrupt
handler when completion interrupt happens. EQs are destroyed when
ucontext is deallocated.
The change calls some public APIs in mana ethernet driver to
allocate EQs and other resources. Ehe EQ process routine is also shared
by mana ethernet and mana ib drivers.
Co-developed-by: Ajay Sharma <sharmaajay@microsoft.com>
Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com>
Signed-off-by: Wei Hu <weh@microsoft.com>
---
v2: Use ibdev_dbg to print error messages and return -ENOMEN
when kzalloc fails.
v3: Check return value on mana_ib_gd_destroy_dma_region(). Remove most
debug prints.
v4: Fix couple nits and performed thorough test in production evn.
drivers/infiniband/hw/mana/cq.c | 35 ++++-
drivers/infiniband/hw/mana/main.c | 84 +++++++++++
drivers/infiniband/hw/mana/mana_ib.h | 4 +
drivers/infiniband/hw/mana/qp.c | 79 ++++++++++-
.../net/ethernet/microsoft/mana/gdma_main.c | 131 ++++++++++--------
drivers/net/ethernet/microsoft/mana/mana_en.c | 1 +
include/net/mana/gdma.h | 9 +-
7 files changed, 278 insertions(+), 65 deletions(-)
diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
index d141cab8a1e6..6865dab66d48 100644
--- a/drivers/infiniband/hw/mana/cq.c
+++ b/drivers/infiniband/hw/mana/cq.c
@@ -12,13 +12,20 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_device *ibdev = ibcq->device;
struct mana_ib_create_cq ucmd = {};
struct mana_ib_dev *mdev;
+ struct gdma_context *gc;
+ struct gdma_dev *gd;
int err;
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ gd = mdev->gdma_dev;
+ gc = gd->gdma_context;
if (udata->inlen < sizeof(ucmd))
return -EINVAL;
+ cq->comp_vector = attr->comp_vector > gc->max_num_queues ?
+ 0 : attr->comp_vector;
+
err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
if (err) {
ibdev_dbg(ibdev,
@@ -69,11 +76,35 @@ int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
struct ib_device *ibdev = ibcq->device;
struct mana_ib_dev *mdev;
+ struct gdma_context *gc;
+ struct gdma_dev *gd;
+ int err;
+
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ gd = mdev->gdma_dev;
+ gc = gd->gdma_context;
+
- mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
- ib_umem_release(cq->umem);
+
+ if (atomic_read(&ibcq->usecnt) == 0) {
+ err = mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
+ if (err) {
+ ibdev_dbg(ibdev,
+ "Failed to destroy dma region, %d\n", err);
+ return err;
+ }
+ kfree(gc->cq_table[cq->id]);
+ gc->cq_table[cq->id] = NULL;
+ ib_umem_release(cq->umem);
+ }
return 0;
}
+
+void mana_ib_cq_handler(void *ctx, struct gdma_queue *gdma_cq)
+{
+ struct mana_ib_cq *cq = ctx;
+
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+}
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index 7be4c3adb4e2..b20a6c6c1de1 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -143,6 +143,78 @@ int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
return err;
}
+static void mana_ib_destroy_eq(struct mana_ib_ucontext *ucontext,
+ struct mana_ib_dev *mdev)
+{
+ struct gdma_context *gc = mdev->gdma_dev->gdma_context;
+ struct gdma_queue *eq;
+ int i;
+
+ if (!ucontext->eqs)
+ return;
+
+ for (i = 0; i < gc->max_num_queues; i++) {
+ eq = ucontext->eqs[i].eq;
+ if (!eq)
+ continue;
+
+ mana_gd_destroy_queue(gc, eq);
+ }
+
+ kfree(ucontext->eqs);
+ ucontext->eqs = NULL;
+}
+
+static int mana_ib_create_eq(struct mana_ib_ucontext *ucontext,
+ struct mana_ib_dev *mdev)
+{
+ struct gdma_queue_spec spec = {};
+ struct gdma_queue *queue;
+ struct gdma_context *gc;
+ struct ib_device *ibdev;
+ struct gdma_dev *gd;
+ int err;
+ int i;
+
+ if (!ucontext || !mdev)
+ return -EINVAL;
+
+ ibdev = ucontext->ibucontext.device;
+ gd = mdev->gdma_dev;
+
+ gc = gd->gdma_context;
+
+ ucontext->eqs = kcalloc(gc->max_num_queues, sizeof(struct mana_eq),
+ GFP_KERNEL);
+ if (!ucontext->eqs)
+ return -ENOMEM;
+
+ spec.type = GDMA_EQ;
+ spec.monitor_avl_buf = false;
+ spec.queue_size = EQ_SIZE;
+ spec.eq.callback = NULL;
+ spec.eq.context = ucontext->eqs;
+ spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
+ spec.eq.msix_allocated = true;
+
+ for (i = 0; i < gc->max_num_queues; i++) {
+ spec.eq.msix_index = i;
+ err = mana_gd_create_mana_eq(gd, &spec, &queue);
+ if (err)
+ goto out;
+
+ queue->eq.disable_needed = true;
+ ucontext->eqs[i].eq = queue;
+ }
+
+ return 0;
+
+out:
+ ibdev_dbg(ibdev, "Failed to allocated eq err %d\n", err);
+ mana_ib_destroy_eq(ucontext, mdev);
+ return err;
+}
+
static int mana_gd_destroy_doorbell_page(struct gdma_context *gc,
int doorbell_page)
{
@@ -225,7 +297,17 @@ int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext,
ucontext->doorbell = doorbell_page;
+ ret = mana_ib_create_eq(ucontext, mdev);
+ if (ret) {
+ ibdev_dbg(ibdev, "Failed to create eq's , ret %d\n", ret);
+ goto err;
+ }
+
return 0;
+
+err:
+ mana_gd_destroy_doorbell_page(gc, doorbell_page);
+ return ret;
}
void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
@@ -240,6 +322,8 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
gc = mdev->gdma_dev->gdma_context;
+ mana_ib_destroy_eq(mana_ucontext, mdev);
+
ret = mana_gd_destroy_doorbell_page(gc, mana_ucontext->doorbell);
if (ret)
ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret);
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index 502cc8672eef..9672fa1670a5 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -67,6 +67,7 @@ struct mana_ib_cq {
int cqe;
u64 gdma_region;
u64 id;
+ u32 comp_vector;
};
struct mana_ib_qp {
@@ -86,6 +87,7 @@ struct mana_ib_qp {
struct mana_ib_ucontext {
struct ib_ucontext ibucontext;
u32 doorbell;
+ struct mana_eq *eqs;
};
struct mana_ib_rwq_ind_table {
@@ -159,4 +161,6 @@ int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext);
+void mana_ib_cq_handler(void *ctx, struct gdma_queue *gdma_cq);
+
#endif
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
index 54b61930a7fd..b8fcb7a8eae0 100644
--- a/drivers/infiniband/hw/mana/qp.c
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -96,16 +96,20 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
struct mana_ib_dev *mdev =
container_of(pd->device, struct mana_ib_dev, ib_dev);
+ struct ib_ucontext *ib_ucontext = pd->uobject->context;
struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl;
struct mana_ib_create_qp_rss_resp resp = {};
struct mana_ib_create_qp_rss ucmd = {};
+ struct mana_ib_ucontext *mana_ucontext;
struct gdma_dev *gd = mdev->gdma_dev;
mana_handle_t *mana_ind_table;
struct mana_port_context *mpc;
+ struct gdma_queue *gdma_cq;
struct mana_context *mc;
struct net_device *ndev;
struct mana_ib_cq *cq;
struct mana_ib_wq *wq;
+ struct mana_eq *eq;
unsigned int ind_tbl_size;
struct ib_cq *ibcq;
struct ib_wq *ibwq;
@@ -114,6 +118,8 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
int ret;
mc = gd->driver_data;
+ mana_ucontext =
+ container_of(ib_ucontext, struct mana_ib_ucontext, ibucontext);
if (!udata || udata->inlen < sizeof(ucmd))
return -EINVAL;
@@ -180,6 +186,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
for (i = 0; i < ind_tbl_size; i++) {
struct mana_obj_spec wq_spec = {};
struct mana_obj_spec cq_spec = {};
+ unsigned int max_num_queues = gd->gdma_context->max_num_queues;
ibwq = ind_tbl->ind_tbl[i];
wq = container_of(ibwq, struct mana_ib_wq, ibwq);
@@ -193,7 +200,8 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
cq_spec.gdma_region = cq->gdma_region;
cq_spec.queue_size = cq->cqe * COMP_ENTRY_SIZE;
cq_spec.modr_ctx_id = 0;
- cq_spec.attached_eq = GDMA_CQ_NO_EQ;
+ eq = &mana_ucontext->eqs[cq->comp_vector % max_num_queues];
+ cq_spec.attached_eq = eq->eq->id;
ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ,
&wq_spec, &cq_spec, &wq->rx_object);
@@ -215,6 +223,22 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
resp.entries[i].wqid = wq->id;
mana_ind_table[i] = wq->rx_object;
+
+ if (gd->gdma_context->cq_table[cq->id] == NULL) {
+
+ gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
+ if (!gdma_cq) {
+ ret = -ENOMEM;
+ goto free_cq;
+ }
+
+ gdma_cq->cq.context = cq;
+ gdma_cq->type = GDMA_CQ;
+ gdma_cq->cq.callback = mana_ib_cq_handler;
+ gdma_cq->id = cq->id;
+ gd->gdma_context->cq_table[cq->id] = gdma_cq;
+ }
+
}
resp.num_entries = i;
@@ -224,7 +248,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
ucmd.rx_hash_key_len,
ucmd.rx_hash_key);
if (ret)
- goto fail;
+ goto free_cq;
ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
if (ret) {
@@ -238,6 +262,23 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
return 0;
+free_cq:
+ {
+ int j = i;
+ u64 cqid;
+
+ while (j-- > 0) {
+ cqid = resp.entries[j].cqid;
+ gdma_cq = gd->gdma_context->cq_table[cqid];
+ cq = gdma_cq->cq.context;
+ if (atomic_read(&cq->ibcq.usecnt) == 0) {
+ kfree(gd->gdma_context->cq_table[cqid]);
+ gd->gdma_context->cq_table[cqid] = NULL;
+ }
+ }
+
+ }
+
fail:
while (i-- > 0) {
ibwq = ind_tbl->ind_tbl[i];
@@ -269,10 +310,12 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
struct mana_obj_spec wq_spec = {};
struct mana_obj_spec cq_spec = {};
struct mana_port_context *mpc;
+ struct gdma_queue *gdma_cq;
struct mana_context *mc;
struct net_device *ndev;
struct ib_umem *umem;
- int err;
+ struct mana_eq *eq;
+ int err, eq_vec;
u32 port;
mc = gd->driver_data;
@@ -350,7 +393,9 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
cq_spec.gdma_region = send_cq->gdma_region;
cq_spec.queue_size = send_cq->cqe * COMP_ENTRY_SIZE;
cq_spec.modr_ctx_id = 0;
- cq_spec.attached_eq = GDMA_CQ_NO_EQ;
+ eq_vec = send_cq->comp_vector % gd->gdma_context->max_num_queues;
+ eq = &mana_ucontext->eqs[eq_vec];
+ cq_spec.attached_eq = eq->eq->id;
err = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_SQ, &wq_spec,
&cq_spec, &qp->tx_object);
@@ -368,6 +413,23 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
qp->sq_id = wq_spec.queue_index;
send_cq->id = cq_spec.queue_index;
+ if (gd->gdma_context->cq_table[send_cq->id] == NULL) {
+
+ gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
+ if (!gdma_cq) {
+ err = -ENOMEM;
+ goto err_destroy_wqobj_and_cq;
+ }
+
+ gdma_cq->cq.context = send_cq;
+ gdma_cq->type = GDMA_CQ;
+ gdma_cq->cq.callback = mana_ib_cq_handler;
+ gdma_cq->id = send_cq->id;
+ gd->gdma_context->cq_table[send_cq->id] = gdma_cq;
+ } else {
+ gdma_cq = gd->gdma_context->cq_table[send_cq->id];
+ }
+
ibdev_dbg(&mdev->ib_dev,
"ret %d qp->tx_object 0x%llx sq id %llu cq id %llu\n", err,
qp->tx_object, qp->sq_id, send_cq->id);
@@ -381,12 +443,17 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
ibdev_dbg(&mdev->ib_dev,
"Failed copy udata for create qp-raw, %d\n",
err);
- goto err_destroy_wq_obj;
+ goto err_destroy_wqobj_and_cq;
}
return 0;
-err_destroy_wq_obj:
+err_destroy_wqobj_and_cq:
+ if (atomic_read(&send_cq->ibcq.usecnt) == 0) {
+ kfree(gdma_cq);
+ gd->gdma_context->cq_table[send_cq->id] = NULL;
+ }
+
mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object);
err_destroy_dma_region:
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 8f3f78b68592..16e4b049a6c8 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -368,53 +368,57 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
}
}
-static void mana_gd_process_eq_events(void *arg)
+static void mana_gd_process_eq_events(struct list_head *eq_list)
{
u32 owner_bits, new_bits, old_bits;
union gdma_eqe_info eqe_info;
struct gdma_eqe *eq_eqe_ptr;
- struct gdma_queue *eq = arg;
struct gdma_context *gc;
+ struct gdma_queue *eq;
struct gdma_eqe *eqe;
u32 head, num_eqe;
int i;
- gc = eq->gdma_dev->gdma_context;
+ list_for_each_entry_rcu(eq, eq_list, entry) {
+ gc = eq->gdma_dev->gdma_context;
- num_eqe = eq->queue_size / GDMA_EQE_SIZE;
- eq_eqe_ptr = eq->queue_mem_ptr;
+ num_eqe = eq->queue_size / GDMA_EQE_SIZE;
+ eq_eqe_ptr = eq->queue_mem_ptr;
- /* Process up to 5 EQEs at a time, and update the HW head. */
- for (i = 0; i < 5; i++) {
- eqe = &eq_eqe_ptr[eq->head % num_eqe];
- eqe_info.as_uint32 = eqe->eqe_info;
- owner_bits = eqe_info.owner_bits;
+ /* Process up to 5 EQEs at a time, and update the HW head. */
+ for (i = 0; i < 5; i++) {
+ eqe = &eq_eqe_ptr[eq->head % num_eqe];
+ eqe_info.as_uint32 = eqe->eqe_info;
+ owner_bits = eqe_info.owner_bits;
- old_bits = (eq->head / num_eqe - 1) & GDMA_EQE_OWNER_MASK;
- /* No more entries */
- if (owner_bits == old_bits)
- break;
+ old_bits =
+ (eq->head / num_eqe - 1) & GDMA_EQE_OWNER_MASK;
+ /* No more entries */
+ if (owner_bits == old_bits)
+ break;
- new_bits = (eq->head / num_eqe) & GDMA_EQE_OWNER_MASK;
- if (owner_bits != new_bits) {
- dev_err(gc->dev, "EQ %d: overflow detected\n", eq->id);
- break;
- }
+ new_bits = (eq->head / num_eqe) & GDMA_EQE_OWNER_MASK;
+ if (owner_bits != new_bits) {
+ dev_err(gc->dev, "EQ %d: overflow detected\n",
+ eq->id);
+ break;
+ }
- /* Per GDMA spec, rmb is necessary after checking owner_bits, before
- * reading eqe.
- */
- rmb();
+ /* Per GDMA spec, rmb is necessary after checking
+ * owner_bits, before reading eqe.
+ */
+ rmb();
- mana_gd_process_eqe(eq);
+ mana_gd_process_eqe(eq);
- eq->head++;
- }
+ eq->head++;
+ }
- head = eq->head % (num_eqe << GDMA_EQE_OWNER_BITS);
+ head = eq->head % (num_eqe << GDMA_EQE_OWNER_BITS);
- mana_gd_ring_doorbell(gc, eq->gdma_dev->doorbell, eq->type, eq->id,
- head, SET_ARM_BIT);
+ mana_gd_ring_doorbell(gc, eq->gdma_dev->doorbell, eq->type,
+ eq->id, head, SET_ARM_BIT);
+ }
}
static int mana_gd_register_irq(struct gdma_queue *queue,
@@ -432,44 +436,47 @@ static int mana_gd_register_irq(struct gdma_queue *queue,
gc = gd->gdma_context;
r = &gc->msix_resource;
dev = gc->dev;
+ msi_index = spec->eq.msix_index;
spin_lock_irqsave(&r->lock, flags);
- msi_index = find_first_zero_bit(r->map, r->size);
- if (msi_index >= r->size || msi_index >= gc->num_msix_usable) {
- err = -ENOSPC;
- } else {
- bitmap_set(r->map, msi_index, 1);
- queue->eq.msix_index = msi_index;
- }
-
- spin_unlock_irqrestore(&r->lock, flags);
+ if (!spec->eq.msix_allocated) {
+ msi_index = find_first_zero_bit(r->map, r->size);
+ if (msi_index >= r->size || msi_index >= gc->num_msix_usable)
+ err = -ENOSPC;
+ else
+ bitmap_set(r->map, msi_index, 1);
- if (err) {
- dev_err(dev, "Register IRQ err:%d, msi:%u rsize:%u, nMSI:%u",
- err, msi_index, r->size, gc->num_msix_usable);
+ if (err) {
+ dev_err(dev, "Register IRQ err:%d, msi:%u rsize:%u, nMSI:%u",
+ err, msi_index, r->size, gc->num_msix_usable);
- return err;
+ goto out;
+ }
}
+ queue->eq.msix_index = msi_index;
gic = &gc->irq_contexts[msi_index];
- WARN_ON(gic->handler || gic->arg);
-
- gic->arg = queue;
+ list_add_rcu(&queue->entry, &gic->eq_list);
gic->handler = mana_gd_process_eq_events;
- return 0;
+out:
+ spin_unlock_irqrestore(&r->lock, flags);
+
+ return err;
}
-static void mana_gd_deregiser_irq(struct gdma_queue *queue)
+static void mana_gd_deregister_irq(struct gdma_queue *queue)
{
struct gdma_dev *gd = queue->gdma_dev;
struct gdma_irq_context *gic;
struct gdma_context *gc;
struct gdma_resource *r;
unsigned int msix_index;
+ struct list_head *p, *n;
+ struct gdma_queue *eq;
unsigned long flags;
gc = gd->gdma_context;
@@ -480,13 +487,25 @@ static void mana_gd_deregiser_irq(struct gdma_queue *queue)
if (WARN_ON(msix_index >= gc->num_msix_usable))
return;
+ spin_lock_irqsave(&r->lock, flags);
+
gic = &gc->irq_contexts[msix_index];
- gic->handler = NULL;
- gic->arg = NULL;
- spin_lock_irqsave(&r->lock, flags);
- bitmap_clear(r->map, msix_index, 1);
+ list_for_each_safe(p, n, &gic->eq_list) {
+ eq = list_entry(p, struct gdma_queue, entry);
+ if (queue == eq) {
+ list_del_rcu(&eq->entry);
+ break;
+ }
+ }
+
+ if (list_empty(&gic->eq_list)) {
+ gic->handler = NULL;
+ bitmap_clear(r->map, msix_index, 1);
+ }
+
spin_unlock_irqrestore(&r->lock, flags);
+ synchronize_rcu();
queue->eq.msix_index = INVALID_PCI_MSIX_INDEX;
}
@@ -550,7 +569,7 @@ static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets,
dev_warn(gc->dev, "Failed to flush EQ: %d\n", err);
}
- mana_gd_deregiser_irq(queue);
+ mana_gd_deregister_irq(queue);
if (queue->eq.disable_needed)
mana_gd_disable_queue(queue);
@@ -565,7 +584,7 @@ static int mana_gd_create_eq(struct gdma_dev *gd,
u32 log2_num_entries;
int err;
- queue->eq.msix_index = INVALID_PCI_MSIX_INDEX;
+ queue->eq.msix_index = spec->eq.msix_index;
log2_num_entries = ilog2(queue->queue_size / GDMA_EQE_SIZE);
@@ -602,6 +621,7 @@ static int mana_gd_create_eq(struct gdma_dev *gd,
mana_gd_destroy_eq(gc, false, queue);
return err;
}
+EXPORT_SYMBOL(mana_gd_create_mana_eq);
static void mana_gd_create_cq(const struct gdma_queue_spec *spec,
struct gdma_queue *queue)
@@ -873,6 +893,7 @@ void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue)
mana_gd_free_memory(gmi);
kfree(queue);
}
+EXPORT_SYMBOL(mana_gd_destroy_queue);
int mana_gd_verify_vf_version(struct pci_dev *pdev)
{
@@ -1188,7 +1209,7 @@ static irqreturn_t mana_gd_intr(int irq, void *arg)
struct gdma_irq_context *gic = arg;
if (gic->handler)
- gic->handler(gic->arg);
+ gic->handler(&gic->eq_list);
return IRQ_HANDLED;
}
@@ -1241,7 +1262,7 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev)
for (i = 0; i < nvec; i++) {
gic = &gc->irq_contexts[i];
gic->handler = NULL;
- gic->arg = NULL;
+ INIT_LIST_HEAD(&gic->eq_list);
if (!i)
snprintf(gic->name, MANA_IRQ_NAME_SZ, "mana_hwc@pci:%s",
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 06d6292e09b3..85345225813f 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1156,6 +1156,7 @@ static int mana_create_eq(struct mana_context *ac)
spec.eq.callback = NULL;
spec.eq.context = ac->eqs;
spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
+ spec.eq.msix_allocated = false;
for (i = 0; i < gc->max_num_queues; i++) {
err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq);
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 96c120160f15..cc728fc42043 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -6,6 +6,7 @@
#include <linux/dma-mapping.h>
#include <linux/netdevice.h>
+#include <linux/list.h>
#include "shm_channel.h"
@@ -291,6 +292,8 @@ struct gdma_queue {
u32 head;
u32 tail;
+ struct list_head entry;
+
/* Extra fields specific to EQ/CQ. */
union {
struct {
@@ -325,6 +328,8 @@ struct gdma_queue_spec {
void *context;
unsigned long log2_throttle_limit;
+ bool msix_allocated;
+ unsigned int msix_index;
} eq;
struct {
@@ -340,8 +345,8 @@ struct gdma_queue_spec {
#define MANA_IRQ_NAME_SZ 32
struct gdma_irq_context {
- void (*handler)(void *arg);
- void *arg;
+ void (*handler)(struct list_head *arg);
+ struct list_head eq_list;
char name[MANA_IRQ_NAME_SZ];
};
--
2.25.1
^ permalink raw reply related
* Re: [PATCH v4 1/1] RDMA/mana_ib: Add EQ interrupt support to mana ib driver.
From: Jason Gunthorpe @ 2023-07-28 17:43 UTC (permalink / raw)
To: Wei Hu
Cc: netdev, linux-hyperv, linux-rdma, longli, sharmaajay, leon, kys,
haiyangz, wei.liu, decui, davem, edumazet, kuba, pabeni, vkuznets,
ssengar, shradhagupta
In-Reply-To: <20230728170749.1888588-1-weh@microsoft.com>
On Fri, Jul 28, 2023 at 05:07:49PM +0000, Wei Hu wrote:
> Add EQ interrupt support for mana ib driver. Allocate EQs per ucontext
> to receive interrupt. Attach EQ when CQ is created. Call CQ interrupt
> handler when completion interrupt happens. EQs are destroyed when
> ucontext is deallocated.
It seems strange that interrupts would be somehow linked to a
ucontext? interrupts are highly limited, you can DOS the entire system
if someone abuses this.
Generally I expect a properly functioning driver to use one interrupt
per CPU core.
You should tie the CQ to a shared EQ belong to the core that the CQ
wants to have affinity to.
Jason
^ permalink raw reply
* RE: [PATCH v4 1/1] RDMA/mana_ib: Add EQ interrupt support to mana ib driver.
From: Long Li @ 2023-07-28 17:51 UTC (permalink / raw)
To: Jason Gunthorpe, Wei Hu
Cc: netdev@vger.kernel.org, linux-hyperv@vger.kernel.org,
linux-rdma@vger.kernel.org, Ajay Sharma, leon@kernel.org,
KY Srinivasan, Haiyang Zhang, wei.liu@kernel.org, Dexuan Cui,
davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
pabeni@redhat.com, vkuznets@redhat.com,
ssengar@linux.microsoft.com, shradhagupta@linux.microsoft.com
In-Reply-To: <ZMP+MH7f/Vk9/J0b@ziepe.ca>
> Subject: Re: [PATCH v4 1/1] RDMA/mana_ib: Add EQ interrupt support to mana ib
> driver.
>
> On Fri, Jul 28, 2023 at 05:07:49PM +0000, Wei Hu wrote:
> > Add EQ interrupt support for mana ib driver. Allocate EQs per ucontext
> > to receive interrupt. Attach EQ when CQ is created. Call CQ interrupt
> > handler when completion interrupt happens. EQs are destroyed when
> > ucontext is deallocated.
>
> It seems strange that interrupts would be somehow linked to a ucontext?
> interrupts are highly limited, you can DOS the entire system if someone abuses
> this.
>
> Generally I expect a properly functioning driver to use one interrupt per CPU core.
Yes, MANA uses one interrupt per CPU. One interrupt is shared among multiple
EQs.
>
> You should tie the CQ to a shared EQ belong to the core that the CQ wants to have
> affinity to.
The reason for using a separate EQ for a ucontext, is for preventing DOS. If we use
a shared EQ, a single ucontext can storm this shared EQ affecting other users.
If one ucontext decides to abuse its own EQ, the hardware won't be able generate
enough IRQs to storm the whole system.
Long
^ permalink raw reply
* Re: [PATCH v4 1/1] RDMA/mana_ib: Add EQ interrupt support to mana ib driver.
From: Jason Gunthorpe @ 2023-07-28 18:02 UTC (permalink / raw)
To: Long Li
Cc: Wei Hu, netdev@vger.kernel.org, linux-hyperv@vger.kernel.org,
linux-rdma@vger.kernel.org, Ajay Sharma, leon@kernel.org,
KY Srinivasan, Haiyang Zhang, wei.liu@kernel.org, Dexuan Cui,
davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
pabeni@redhat.com, vkuznets@redhat.com,
ssengar@linux.microsoft.com, shradhagupta@linux.microsoft.com
In-Reply-To: <PH7PR21MB3263C134979B17F1C53D3E8DCE06A@PH7PR21MB3263.namprd21.prod.outlook.com>
On Fri, Jul 28, 2023 at 05:51:46PM +0000, Long Li wrote:
> > Subject: Re: [PATCH v4 1/1] RDMA/mana_ib: Add EQ interrupt support to mana ib
> > driver.
> >
> > On Fri, Jul 28, 2023 at 05:07:49PM +0000, Wei Hu wrote:
> > > Add EQ interrupt support for mana ib driver. Allocate EQs per ucontext
> > > to receive interrupt. Attach EQ when CQ is created. Call CQ interrupt
> > > handler when completion interrupt happens. EQs are destroyed when
> > > ucontext is deallocated.
> >
> > It seems strange that interrupts would be somehow linked to a ucontext?
> > interrupts are highly limited, you can DOS the entire system if someone abuses
> > this.
> >
> > Generally I expect a properly functioning driver to use one interrupt per CPU core.
>
> Yes, MANA uses one interrupt per CPU. One interrupt is shared among multiple
> EQs.
So you have another multiplexing layer between the interrupt and the
EQ? That is alot of multiplexing layers..
> > You should tie the CQ to a shared EQ belong to the core that the CQ wants to have
> > affinity to.
>
> The reason for using a separate EQ for a ucontext, is for preventing DOS. If we use
> a shared EQ, a single ucontext can storm this shared EQ affecting
> other users.
With a proper design it should not be possible. The CQ adds an entry
to the EQ and that should be rate limited by the ability of userspace
to schedule to re-arm the CQ.
Jason
^ permalink raw reply
* RE: [PATCH v4 1/1] RDMA/mana_ib: Add EQ interrupt support to mana ib driver.
From: Long Li @ 2023-07-28 18:22 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: Wei Hu, netdev@vger.kernel.org, linux-hyperv@vger.kernel.org,
linux-rdma@vger.kernel.org, Ajay Sharma, leon@kernel.org,
KY Srinivasan, Haiyang Zhang, wei.liu@kernel.org, Dexuan Cui,
davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
pabeni@redhat.com, vkuznets@redhat.com,
ssengar@linux.microsoft.com, shradhagupta@linux.microsoft.com
In-Reply-To: <ZMQCuQU+b/Ai9HcU@ziepe.ca>
> Subject: Re: [PATCH v4 1/1] RDMA/mana_ib: Add EQ interrupt support to mana ib
> driver.
>
> On Fri, Jul 28, 2023 at 05:51:46PM +0000, Long Li wrote:
> > > Subject: Re: [PATCH v4 1/1] RDMA/mana_ib: Add EQ interrupt support
> > > to mana ib driver.
> > >
> > > On Fri, Jul 28, 2023 at 05:07:49PM +0000, Wei Hu wrote:
> > > > Add EQ interrupt support for mana ib driver. Allocate EQs per
> > > > ucontext to receive interrupt. Attach EQ when CQ is created. Call
> > > > CQ interrupt handler when completion interrupt happens. EQs are
> > > > destroyed when ucontext is deallocated.
> > >
> > > It seems strange that interrupts would be somehow linked to a ucontext?
> > > interrupts are highly limited, you can DOS the entire system if
> > > someone abuses this.
> > >
> > > Generally I expect a properly functioning driver to use one interrupt per CPU
> core.
> >
> > Yes, MANA uses one interrupt per CPU. One interrupt is shared among
> > multiple EQs.
>
> So you have another multiplexing layer between the interrupt and the EQ? That is
> alot of multiplexing layers..
>
> > > You should tie the CQ to a shared EQ belong to the core that the CQ
> > > wants to have affinity to.
> >
> > The reason for using a separate EQ for a ucontext, is for preventing
> > DOS. If we use a shared EQ, a single ucontext can storm this shared EQ
> > affecting other users.
>
> With a proper design it should not be possible. The CQ adds an entry to the EQ
> and that should be rate limited by the ability of userspace to schedule to re-arm
> the CQ.
I think DPDK user space can sometimes storm the EQ by arming the CQ from user-mode.
Please see the following code on arming the CQ from MLX4:
https://github.com/DPDK/dpdk/blob/12fcafcd62286933e6b167b14856d21f642efa5f/drivers/net/mlx4/mlx4_intr.c#L229
With a malicious DPDK user, this code can be abused to arm the CQ at extremely high rate.
Long
^ permalink raw reply
* Re: [PATCH v4 1/1] RDMA/mana_ib: Add EQ interrupt support to mana ib driver.
From: Jason Gunthorpe @ 2023-07-28 18:39 UTC (permalink / raw)
To: Long Li
Cc: Wei Hu, netdev@vger.kernel.org, linux-hyperv@vger.kernel.org,
linux-rdma@vger.kernel.org, Ajay Sharma, leon@kernel.org,
KY Srinivasan, Haiyang Zhang, wei.liu@kernel.org, Dexuan Cui,
davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
pabeni@redhat.com, vkuznets@redhat.com,
ssengar@linux.microsoft.com, shradhagupta@linux.microsoft.com
In-Reply-To: <PH7PR21MB326396D1782613FE406F616ACE06A@PH7PR21MB3263.namprd21.prod.outlook.com>
On Fri, Jul 28, 2023 at 06:22:53PM +0000, Long Li wrote:
> > Subject: Re: [PATCH v4 1/1] RDMA/mana_ib: Add EQ interrupt support to mana ib
> > driver.
> >
> > On Fri, Jul 28, 2023 at 05:51:46PM +0000, Long Li wrote:
> > > > Subject: Re: [PATCH v4 1/1] RDMA/mana_ib: Add EQ interrupt support
> > > > to mana ib driver.
> > > >
> > > > On Fri, Jul 28, 2023 at 05:07:49PM +0000, Wei Hu wrote:
> > > > > Add EQ interrupt support for mana ib driver. Allocate EQs per
> > > > > ucontext to receive interrupt. Attach EQ when CQ is created. Call
> > > > > CQ interrupt handler when completion interrupt happens. EQs are
> > > > > destroyed when ucontext is deallocated.
> > > >
> > > > It seems strange that interrupts would be somehow linked to a ucontext?
> > > > interrupts are highly limited, you can DOS the entire system if
> > > > someone abuses this.
> > > >
> > > > Generally I expect a properly functioning driver to use one interrupt per CPU
> > core.
> > >
> > > Yes, MANA uses one interrupt per CPU. One interrupt is shared among
> > > multiple EQs.
> >
> > So you have another multiplexing layer between the interrupt and the EQ? That is
> > alot of multiplexing layers..
> >
> > > > You should tie the CQ to a shared EQ belong to the core that the CQ
> > > > wants to have affinity to.
> > >
> > > The reason for using a separate EQ for a ucontext, is for preventing
> > > DOS. If we use a shared EQ, a single ucontext can storm this shared EQ
> > > affecting other users.
> >
> > With a proper design it should not be possible. The CQ adds an entry to the EQ
> > and that should be rate limited by the ability of userspace to schedule to re-arm
> > the CQ.
>
> I think DPDK user space can sometimes storm the EQ by arming the CQ
> from user-mode.
Maybe maliciously you can do a blind re-arm, but nothing sane should
do that.
> With a malicious DPDK user, this code can be abused to arm the CQ at
> extremely high rate.
Again, the rate of CQ re-arm is limited by the ability of userspace to
schedule, I'm reluctant to consider that a DOS vector. Doesn't your HW
have EQ overflow recovery?
Frankly, stacking more layers of IRQ multiplexing doesn't seem like it
should solve any problems, you are just shifting where the DOS can
occure. Allowing userspace to create EQs is its own DOS direction,
either you exhaust and DOS the number of EQs or you DOS the
multiplexing layer between the interrupt and the EQ.
Jason
^ permalink raw reply
* RE: [Patch v3 1/4] RDMA/mana_ib : Rename all mana_ib_dev type variables to mib_dev
From: Long Li @ 2023-07-28 21:23 UTC (permalink / raw)
To: sharmaajay@linuxonhyperv.com, Jason Gunthorpe, Leon Romanovsky,
Dexuan Cui, Wei Liu, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni
Cc: linux-rdma@vger.kernel.org, linux-hyperv@vger.kernel.org,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org, Ajay Sharma
In-Reply-To: <1690402104-29518-2-git-send-email-sharmaajay@linuxonhyperv.com>
> -----Original Message-----
> From: sharmaajay@linuxonhyperv.com <sharmaajay@linuxonhyperv.com>
> Sent: Wednesday, July 26, 2023 1:08 PM
> To: Jason Gunthorpe <jgg@ziepe.ca>; Leon Romanovsky <leon@kernel.org>;
> Dexuan Cui <decui@microsoft.com>; Wei Liu <wei.liu@kernel.org>; David S.
> Miller <davem@davemloft.net>; Eric Dumazet <edumazet@google.com>;
> Jakub Kicinski <kuba@kernel.org>; Paolo Abeni <pabeni@redhat.com>
> Cc: linux-rdma@vger.kernel.org; linux-hyperv@vger.kernel.org;
> netdev@vger.kernel.org; linux-kernel@vger.kernel.org; Ajay Sharma
> <sharmaajay@microsoft.com>
> Subject: [Patch v3 1/4] RDMA/mana_ib : Rename all mana_ib_dev type
> variables to mib_dev
>
> From: Ajay Sharma <sharmaajay@microsoft.com>
>
> This patch does not introduce any functional changes. It creates naming
> convention to distinguish especially when used in the same
> function.Renaming all mana_ib_dev type variables to mib_dev to have clean
> separation between eth dev and ibdev variables.
>
> Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com>
> ---
> drivers/infiniband/hw/mana/cq.c | 12 ++--
> drivers/infiniband/hw/mana/device.c | 34 +++++------
> drivers/infiniband/hw/mana/main.c | 87 ++++++++++++++--------------
> drivers/infiniband/hw/mana/mana_ib.h | 9 +--
> drivers/infiniband/hw/mana/mr.c | 29 +++++-----
> drivers/infiniband/hw/mana/qp.c | 82 +++++++++++++-------------
> drivers/infiniband/hw/mana/wq.c | 21 +++----
> 7 files changed, 140 insertions(+), 134 deletions(-)
>
> diff --git a/drivers/infiniband/hw/mana/cq.c
> b/drivers/infiniband/hw/mana/cq.c index d141cab8a1e6..1aed4e6360ba
> 100644
> --- a/drivers/infiniband/hw/mana/cq.c
> +++ b/drivers/infiniband/hw/mana/cq.c
> @@ -11,10 +11,10 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const
> struct ib_cq_init_attr *attr,
> struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
> struct ib_device *ibdev = ibcq->device;
> struct mana_ib_create_cq ucmd = {};
> - struct mana_ib_dev *mdev;
> + struct mana_ib_dev *mib_dev;
> int err;
>
> - mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> + mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
>
> if (udata->inlen < sizeof(ucmd))
> return -EINVAL;
> @@ -41,7 +41,7 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct
> ib_cq_init_attr *attr,
> return err;
> }
>
> - err = mana_ib_gd_create_dma_region(mdev, cq->umem, &cq-
> >gdma_region);
> + err = mana_ib_gd_create_dma_region(mib_dev, cq->umem,
> +&cq->gdma_region);
> if (err) {
> ibdev_dbg(ibdev,
> "Failed to create dma region for create cq, %d\n",
> @@ -68,11 +68,11 @@ int mana_ib_destroy_cq(struct ib_cq *ibcq, struct
> ib_udata *udata) {
> struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
> struct ib_device *ibdev = ibcq->device;
> - struct mana_ib_dev *mdev;
> + struct mana_ib_dev *mib_dev;
>
> - mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> + mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
>
> - mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
> + mana_ib_gd_destroy_dma_region(mib_dev, cq->gdma_region);
> ib_umem_release(cq->umem);
>
> return 0;
> diff --git a/drivers/infiniband/hw/mana/device.c
> b/drivers/infiniband/hw/mana/device.c
> index d4541b8707e4..083f27246ba8 100644
> --- a/drivers/infiniband/hw/mana/device.c
> +++ b/drivers/infiniband/hw/mana/device.c
> @@ -51,51 +51,51 @@ static int mana_ib_probe(struct auxiliary_device
> *adev, {
> struct mana_adev *madev = container_of(adev, struct mana_adev,
> adev);
> struct gdma_dev *mdev = madev->mdev;
> + struct mana_ib_dev *mib_dev;
> struct mana_context *mc;
> - struct mana_ib_dev *dev;
> int ret;
>
> mc = mdev->driver_data;
>
> - dev = ib_alloc_device(mana_ib_dev, ib_dev);
> - if (!dev)
> + mib_dev = ib_alloc_device(mana_ib_dev, ib_dev);
> + if (!mib_dev)
> return -ENOMEM;
>
> - ib_set_device_ops(&dev->ib_dev, &mana_ib_dev_ops);
> + ib_set_device_ops(&mib_dev->ib_dev, &mana_ib_dev_ops);
>
> - dev->ib_dev.phys_port_cnt = mc->num_ports;
> + mib_dev->ib_dev.phys_port_cnt = mc->num_ports;
>
> - ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n",
> mdev,
> - mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
> + ibdev_dbg(&mib_dev->ib_dev, "mdev=%p id=%d num_ports=%d\n",
> mdev,
> + mdev->dev_id.as_uint32, mib_dev->ib_dev.phys_port_cnt);
>
> - dev->gdma_dev = mdev;
> - dev->ib_dev.node_type = RDMA_NODE_IB_CA;
> + mib_dev->gdma_dev = mdev;
> + mib_dev->ib_dev.node_type = RDMA_NODE_IB_CA;
>
> /*
> * num_comp_vectors needs to set to the max MSIX index
> * when interrupts and event queues are implemented
> */
> - dev->ib_dev.num_comp_vectors = 1;
> - dev->ib_dev.dev.parent = mdev->gdma_context->dev;
> + mib_dev->ib_dev.num_comp_vectors = 1;
> + mib_dev->ib_dev.dev.parent = mdev->gdma_context->dev;
>
> - ret = ib_register_device(&dev->ib_dev, "mana_%d",
> + ret = ib_register_device(&mib_dev->ib_dev, "mana_%d",
> mdev->gdma_context->dev);
> if (ret) {
> - ib_dealloc_device(&dev->ib_dev);
> + ib_dealloc_device(&mib_dev->ib_dev);
> return ret;
> }
>
> - dev_set_drvdata(&adev->dev, dev);
> + dev_set_drvdata(&adev->dev, mib_dev);
>
> return 0;
> }
>
> static void mana_ib_remove(struct auxiliary_device *adev) {
> - struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev);
> + struct mana_ib_dev *mib_dev = dev_get_drvdata(&adev->dev);
>
> - ib_unregister_device(&dev->ib_dev);
> - ib_dealloc_device(&dev->ib_dev);
> + ib_unregister_device(&mib_dev->ib_dev);
> + ib_dealloc_device(&mib_dev->ib_dev);
> }
>
> static const struct auxiliary_device_id mana_id_table[] = { diff --git
> a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
> index 7be4c3adb4e2..189e774cdab6 100644
> --- a/drivers/infiniband/hw/mana/main.c
> +++ b/drivers/infiniband/hw/mana/main.c
> @@ -5,10 +5,10 @@
>
> #include "mana_ib.h"
>
> -void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd
> *pd,
> +void mana_ib_uncfg_vport(struct mana_ib_dev *mib_dev, struct
> mana_ib_pd
> +*pd,
> u32 port)
> {
> - struct gdma_dev *gd = dev->gdma_dev;
> + struct gdma_dev *gd = mib_dev->gdma_dev;
> struct mana_port_context *mpc;
> struct net_device *ndev;
> struct mana_context *mc;
> @@ -28,10 +28,11 @@ void mana_ib_uncfg_vport(struct mana_ib_dev *dev,
> struct mana_ib_pd *pd,
> mutex_unlock(&pd->vport_mutex);
> }
>
> -int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port, struct
> mana_ib_pd *pd,
> +int mana_ib_cfg_vport(struct mana_ib_dev *mib_dev, u32 port,
> + struct mana_ib_pd *pd,
> u32 doorbell_id)
> {
> - struct gdma_dev *mdev = dev->gdma_dev;
> + struct gdma_dev *mdev = mib_dev->gdma_dev;
> struct mana_port_context *mpc;
> struct mana_context *mc;
> struct net_device *ndev;
> @@ -45,7 +46,7 @@ int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32
> port, struct mana_ib_pd *pd,
>
> pd->vport_use_count++;
> if (pd->vport_use_count > 1) {
> - ibdev_dbg(&dev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Skip as this PD is already configured vport\n");
> mutex_unlock(&pd->vport_mutex);
> return 0;
> @@ -56,7 +57,8 @@ int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32
> port, struct mana_ib_pd *pd,
> pd->vport_use_count--;
> mutex_unlock(&pd->vport_mutex);
>
> - ibdev_dbg(&dev->ib_dev, "Failed to configure vPort %d\n",
> err);
> + ibdev_dbg(&mib_dev->ib_dev, "Failed to configure
> vPort %d\n",
> + err);
> return err;
> }
>
> @@ -65,7 +67,7 @@ int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32
> port, struct mana_ib_pd *pd,
> pd->tx_shortform_allowed = mpc->tx_shortform_allowed;
> pd->tx_vp_offset = mpc->tx_vp_offset;
>
> - ibdev_dbg(&dev->ib_dev, "vport handle %llx pdid %x
> doorbell_id %x\n",
> + ibdev_dbg(&mib_dev->ib_dev, "vport handle %llx pdid %x doorbell_id
> +%x\n",
> mpc->port_handle, pd->pdn, doorbell_id);
>
> return 0;
> @@ -78,12 +80,12 @@ int mana_ib_alloc_pd(struct ib_pd *ibpd, struct
> ib_udata *udata)
> struct gdma_create_pd_resp resp = {};
> struct gdma_create_pd_req req = {};
> enum gdma_pd_flags flags = 0;
> - struct mana_ib_dev *dev;
> + struct mana_ib_dev *mib_dev;
> struct gdma_dev *mdev;
> int err;
>
> - dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> - mdev = dev->gdma_dev;
> + mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> + mdev = mib_dev->gdma_dev;
>
> mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req),
> sizeof(resp));
> @@ -93,7 +95,7 @@ int mana_ib_alloc_pd(struct ib_pd *ibpd, struct
> ib_udata *udata)
> sizeof(resp), &resp);
>
> if (err || resp.hdr.status) {
> - ibdev_dbg(&dev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to get pd_id err %d status %u\n", err,
> resp.hdr.status);
> if (!err)
> @@ -104,7 +106,7 @@ int mana_ib_alloc_pd(struct ib_pd *ibpd, struct
> ib_udata *udata)
>
> pd->pd_handle = resp.pd_handle;
> pd->pdn = resp.pd_id;
> - ibdev_dbg(&dev->ib_dev, "pd_handle 0x%llx pd_id %d\n",
> + ibdev_dbg(&mib_dev->ib_dev, "pd_handle 0x%llx pd_id %d\n",
> pd->pd_handle, pd->pdn);
>
> mutex_init(&pd->vport_mutex);
> @@ -118,12 +120,12 @@ int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct
> ib_udata *udata)
> struct ib_device *ibdev = ibpd->device;
> struct gdma_destory_pd_resp resp = {};
> struct gdma_destroy_pd_req req = {};
> - struct mana_ib_dev *dev;
> + struct mana_ib_dev *mib_dev;
> struct gdma_dev *mdev;
> int err;
>
> - dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> - mdev = dev->gdma_dev;
> + mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> + mdev = mib_dev->gdma_dev;
>
> mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_PD, sizeof(req),
> sizeof(resp));
> @@ -133,7 +135,7 @@ int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct
> ib_udata *udata)
> sizeof(resp), &resp);
>
> if (err || resp.hdr.status) {
> - ibdev_dbg(&dev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to destroy pd_handle 0x%llx err %d
> status %u",
> pd->pd_handle, err, resp.hdr.status);
> if (!err)
> @@ -204,14 +206,14 @@ int mana_ib_alloc_ucontext(struct ib_ucontext
> *ibcontext,
> struct mana_ib_ucontext *ucontext =
> container_of(ibcontext, struct mana_ib_ucontext,
> ibucontext);
> struct ib_device *ibdev = ibcontext->device;
> - struct mana_ib_dev *mdev;
> + struct mana_ib_dev *mib_dev;
> struct gdma_context *gc;
> struct gdma_dev *dev;
> int doorbell_page;
> int ret;
>
> - mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> - dev = mdev->gdma_dev;
> + mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> + dev = mib_dev->gdma_dev;
> gc = dev->gdma_context;
>
> /* Allocate a doorbell page index */
> @@ -233,12 +235,12 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext
> *ibcontext)
> struct mana_ib_ucontext *mana_ucontext =
> container_of(ibcontext, struct mana_ib_ucontext,
> ibucontext);
> struct ib_device *ibdev = ibcontext->device;
> - struct mana_ib_dev *mdev;
> + struct mana_ib_dev *mib_dev;
> struct gdma_context *gc;
> int ret;
>
> - mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> - gc = mdev->gdma_dev->gdma_context;
> + mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> + gc = mib_dev->gdma_dev->gdma_context;
>
> ret = mana_gd_destroy_doorbell_page(gc, mana_ucontext-
> >doorbell);
> if (ret)
> @@ -246,7 +248,7 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext
> *ibcontext) }
>
> static int
> -mana_ib_gd_first_dma_region(struct mana_ib_dev *dev,
> +mana_ib_gd_first_dma_region(struct mana_ib_dev *mib_dev,
> struct gdma_context *gc,
> struct gdma_create_dma_region_req *create_req,
> size_t num_pages, mana_handle_t *gdma_region,
> @@ -263,7 +265,7 @@ mana_ib_gd_first_dma_region(struct mana_ib_dev
> *dev,
> err = mana_gd_send_request(gc, create_req_msg_size, create_req,
> sizeof(create_resp), &create_resp);
> if (err || create_resp.hdr.status != expected_status) {
> - ibdev_dbg(&dev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to create DMA region: %d, 0x%x\n",
> err, create_resp.hdr.status);
> if (!err)
> @@ -273,14 +275,14 @@ mana_ib_gd_first_dma_region(struct
> mana_ib_dev *dev,
> }
>
> *gdma_region = create_resp.dma_region_handle;
> - ibdev_dbg(&dev->ib_dev, "Created DMA region handle 0x%llx\n",
> + ibdev_dbg(&mib_dev->ib_dev, "Created DMA region handle
> 0x%llx\n",
> *gdma_region);
>
> return 0;
> }
>
> static int
> -mana_ib_gd_add_dma_region(struct mana_ib_dev *dev, struct
> gdma_context *gc,
> +mana_ib_gd_add_dma_region(struct mana_ib_dev *mib_dev, struct
> +gdma_context *gc,
> struct gdma_dma_region_add_pages_req *add_req,
> unsigned int num_pages, u32 expected_status)
> { @@ -296,7 +298,7 @@ mana_ib_gd_add_dma_region(struct mana_ib_dev
> *dev, struct gdma_context *gc,
> err = mana_gd_send_request(gc, add_req_msg_size, add_req,
> sizeof(add_resp), &add_resp);
> if (err || add_resp.hdr.status != expected_status) {
> - ibdev_dbg(&dev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to create DMA region: %d, 0x%x\n",
> err, add_resp.hdr.status);
>
> @@ -309,7 +311,8 @@ mana_ib_gd_add_dma_region(struct mana_ib_dev
> *dev, struct gdma_context *gc,
> return 0;
> }
>
> -int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct
> ib_umem *umem,
> +int mana_ib_gd_create_dma_region(struct mana_ib_dev *mib_dev,
> + struct ib_umem *umem,
> mana_handle_t *gdma_region)
> {
> struct gdma_dma_region_add_pages_req *add_req = NULL; @@ -
> 329,14 +332,14 @@ int mana_ib_gd_create_dma_region(struct mana_ib_dev
> *dev, struct ib_umem *umem,
> void *request_buf;
> int err;
>
> - mdev = dev->gdma_dev;
> + mdev = mib_dev->gdma_dev;
> gc = mdev->gdma_context;
> hwc = gc->hwc.driver_data;
>
> /* Hardware requires dma region to align to chosen page size */
> page_sz = ib_umem_find_best_pgsz(umem, PAGE_SZ_BM, 0);
> if (!page_sz) {
> - ibdev_dbg(&dev->ib_dev, "failed to find page size.\n");
> + ibdev_dbg(&mib_dev->ib_dev, "failed to find page size.\n");
> return -ENOMEM;
> }
> num_pages_total = ib_umem_num_dma_blocks(umem, page_sz);
> @@ -362,13 +365,13 @@ int mana_ib_gd_create_dma_region(struct
> mana_ib_dev *dev, struct ib_umem *umem,
> create_req->gdma_page_type = order_base_2(page_sz) - PAGE_SHIFT;
> create_req->page_count = num_pages_total;
>
> - ibdev_dbg(&dev->ib_dev, "size_dma_region %lu
> num_pages_total %lu\n",
> + ibdev_dbg(&mib_dev->ib_dev, "size_dma_region %lu
> num_pages_total
> +%lu\n",
> umem->length, num_pages_total);
>
> - ibdev_dbg(&dev->ib_dev, "page_sz %lu offset_in_page %u\n",
> + ibdev_dbg(&mib_dev->ib_dev, "page_sz %lu offset_in_page %u\n",
> page_sz, create_req->offset_in_page);
>
> - ibdev_dbg(&dev->ib_dev, "num_pages_to_handle %lu,
> gdma_page_type %u",
> + ibdev_dbg(&mib_dev->ib_dev, "num_pages_to_handle %lu,
> gdma_page_type
> +%u",
> num_pages_to_handle, create_req->gdma_page_type);
>
> page_addr_list = create_req->page_addr_list; @@ -385,7 +388,7 @@
> int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct
> ib_umem *umem,
>
> if (!num_pages_processed) {
> /* First create message */
> - err = mana_ib_gd_first_dma_region(dev, gc,
> create_req,
> + err = mana_ib_gd_first_dma_region(mib_dev, gc,
> create_req,
> tail, gdma_region,
> expected_status);
> if (err)
> @@ -400,7 +403,7 @@ int mana_ib_gd_create_dma_region(struct
> mana_ib_dev *dev, struct ib_umem *umem,
> page_addr_list = add_req->page_addr_list;
> } else {
> /* Subsequent create messages */
> - err = mana_ib_gd_add_dma_region(dev, gc, add_req,
> tail,
> + err = mana_ib_gd_add_dma_region(mib_dev, gc,
> add_req, tail,
> expected_status);
> if (err)
> break;
> @@ -417,20 +420,20 @@ int mana_ib_gd_create_dma_region(struct
> mana_ib_dev *dev, struct ib_umem *umem,
> }
>
> if (err)
> - mana_ib_gd_destroy_dma_region(dev, *gdma_region);
> + mana_ib_gd_destroy_dma_region(mib_dev, *gdma_region);
>
> out:
> kfree(request_buf);
> return err;
> }
>
> -int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev, u64
> gdma_region)
> +int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *mib_dev, u64
> +gdma_region)
> {
> - struct gdma_dev *mdev = dev->gdma_dev;
> + struct gdma_dev *mdev = mib_dev->gdma_dev;
> struct gdma_context *gc;
>
> gc = mdev->gdma_context;
> - ibdev_dbg(&dev->ib_dev, "destroy dma region 0x%llx\n",
> gdma_region);
> + ibdev_dbg(&mib_dev->ib_dev, "destroy dma region 0x%llx\n",
> +gdma_region);
>
> return mana_gd_destroy_dma_region(gc, gdma_region); } @@ -
> 440,14 +443,14 @@ int mana_ib_mmap(struct ib_ucontext *ibcontext,
> struct vm_area_struct *vma)
> struct mana_ib_ucontext *mana_ucontext =
> container_of(ibcontext, struct mana_ib_ucontext,
> ibucontext);
> struct ib_device *ibdev = ibcontext->device;
> - struct mana_ib_dev *mdev;
> + struct mana_ib_dev *mib_dev;
> struct gdma_context *gc;
> phys_addr_t pfn;
> pgprot_t prot;
> int ret;
>
> - mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> - gc = mdev->gdma_dev->gdma_context;
> + mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> + gc = mib_dev->gdma_dev->gdma_context;
>
> if (vma->vm_pgoff != 0) {
> ibdev_dbg(ibdev, "Unexpected vm_pgoff %lu\n", vma-
> >vm_pgoff); diff --git a/drivers/infiniband/hw/mana/mana_ib.h
> b/drivers/infiniband/hw/mana/mana_ib.h
> index 502cc8672eef..ee4efd0af278 100644
> --- a/drivers/infiniband/hw/mana/mana_ib.h
> +++ b/drivers/infiniband/hw/mana/mana_ib.h
> @@ -92,10 +92,11 @@ struct mana_ib_rwq_ind_table {
> struct ib_rwq_ind_table ib_ind_table;
> };
>
> -int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct
> ib_umem *umem,
> +int mana_ib_gd_create_dma_region(struct mana_ib_dev *mib_dev,
> + struct ib_umem *umem,
> mana_handle_t *gdma_region);
>
> -int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev,
> +int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *mib_dev,
> mana_handle_t gdma_region);
>
> struct ib_wq *mana_ib_create_wq(struct ib_pd *pd, @@ -129,9 +130,9 @@
> int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
>
> int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
>
> -int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port_id,
> +int mana_ib_cfg_vport(struct mana_ib_dev *mib_dev, u32 port_id,
> struct mana_ib_pd *pd, u32 doorbell_id); -void
> mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
> +void mana_ib_uncfg_vport(struct mana_ib_dev *mib_dev, struct
> mana_ib_pd
> +*pd,
> u32 port);
>
> int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
> diff --git a/drivers/infiniband/hw/mana/mr.c
> b/drivers/infiniband/hw/mana/mr.c index 351207c60eb6..f6a53906204d
> 100644
> --- a/drivers/infiniband/hw/mana/mr.c
> +++ b/drivers/infiniband/hw/mana/mr.c
> @@ -25,12 +25,13 @@ mana_ib_verbs_to_gdma_access_flags(int
> access_flags)
> return flags;
> }
>
> -static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct
> mana_ib_mr *mr,
> +static int mana_ib_gd_create_mr(struct mana_ib_dev *mib_dev,
> + struct mana_ib_mr *mr,
> struct gdma_create_mr_params *mr_params)
> {
> + struct gdma_dev *mdev = mib_dev->gdma_dev;
> struct gdma_create_mr_response resp = {};
> struct gdma_create_mr_request req = {};
> - struct gdma_dev *mdev = dev->gdma_dev;
> struct gdma_context *gc;
> int err;
>
> @@ -49,7 +50,7 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev
> *dev, struct mana_ib_mr *mr,
> break;
>
> default:
> - ibdev_dbg(&dev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "invalid param (GDMA_MR_TYPE) passed,
> type %d\n",
> req.mr_type);
> return -EINVAL;
> @@ -58,7 +59,7 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev
> *dev, struct mana_ib_mr *mr,
> err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp),
> &resp);
>
> if (err || resp.hdr.status) {
> - ibdev_dbg(&dev->ib_dev, "Failed to create mr %d, %u", err,
> + ibdev_dbg(&mib_dev->ib_dev, "Failed to create mr %d, %u",
> err,
> resp.hdr.status);
> if (!err)
> err = -EPROTO;
> @@ -73,11 +74,11 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev
> *dev, struct mana_ib_mr *mr,
> return 0;
> }
>
> -static int mana_ib_gd_destroy_mr(struct mana_ib_dev *dev, u64 mr_handle)
> +static int mana_ib_gd_destroy_mr(struct mana_ib_dev *mib_dev, u64
> +mr_handle)
> {
> struct gdma_destroy_mr_response resp = {};
> + struct gdma_dev *mdev = mib_dev->gdma_dev;
> struct gdma_destroy_mr_request req = {};
> - struct gdma_dev *mdev = dev->gdma_dev;
> struct gdma_context *gc;
> int err;
>
> @@ -107,12 +108,12 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd
> *ibpd, u64 start, u64 length,
> struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd,
> ibpd);
> struct gdma_create_mr_params mr_params = {};
> struct ib_device *ibdev = ibpd->device;
> - struct mana_ib_dev *dev;
> + struct mana_ib_dev *mib_dev;
> struct mana_ib_mr *mr;
> u64 dma_region_handle;
> int err;
>
> - dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> + mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
>
> ibdev_dbg(ibdev,
> "start 0x%llx, iova 0x%llx length 0x%llx access_flags 0x%x",
> @@ -133,7 +134,7 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd
> *ibpd, u64 start, u64 length,
> goto err_free;
> }
>
> - err = mana_ib_gd_create_dma_region(dev, mr->umem,
> &dma_region_handle);
> + err = mana_ib_gd_create_dma_region(mib_dev, mr->umem,
> +&dma_region_handle);
> if (err) {
> ibdev_dbg(ibdev, "Failed create dma region for user-
> mr, %d\n",
> err);
> @@ -151,7 +152,7 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd
> *ibpd, u64 start, u64 length,
> mr_params.gva.access_flags =
> mana_ib_verbs_to_gdma_access_flags(access_flags);
>
> - err = mana_ib_gd_create_mr(dev, mr, &mr_params);
> + err = mana_ib_gd_create_mr(mib_dev, mr, &mr_params);
> if (err)
> goto err_dma_region;
>
> @@ -164,7 +165,7 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd
> *ibpd, u64 start, u64 length,
> return &mr->ibmr;
>
> err_dma_region:
> - mana_gd_destroy_dma_region(dev->gdma_dev->gdma_context,
> + mana_gd_destroy_dma_region(mib_dev->gdma_dev-
> >gdma_context,
> dma_region_handle);
>
> err_umem:
> @@ -179,12 +180,12 @@ int mana_ib_dereg_mr(struct ib_mr *ibmr, struct
> ib_udata *udata) {
> struct mana_ib_mr *mr = container_of(ibmr, struct mana_ib_mr,
> ibmr);
> struct ib_device *ibdev = ibmr->device;
> - struct mana_ib_dev *dev;
> + struct mana_ib_dev *mib_dev;
> int err;
>
> - dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> + mib_dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
>
> - err = mana_ib_gd_destroy_mr(dev, mr->mr_handle);
> + err = mana_ib_gd_destroy_mr(mib_dev, mr->mr_handle);
> if (err)
> return err;
>
> diff --git a/drivers/infiniband/hw/mana/qp.c
> b/drivers/infiniband/hw/mana/qp.c index 4b3b5b274e84..2e3a57123ed7
> 100644
> --- a/drivers/infiniband/hw/mana/qp.c
> +++ b/drivers/infiniband/hw/mana/qp.c
> @@ -5,7 +5,7 @@
>
> #include "mana_ib.h"
>
> -static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
> +static int mana_ib_cfg_vport_steering(struct mana_ib_dev *mib_dev,
> struct net_device *ndev,
> mana_handle_t default_rxobj,
> mana_handle_t ind_table[],
> @@ -21,7 +21,7 @@ static int mana_ib_cfg_vport_steering(struct
> mana_ib_dev *dev,
> u32 req_buf_size;
> int i, err;
>
> - mdev = dev->gdma_dev;
> + mdev = mib_dev->gdma_dev;
> gc = mdev->gdma_context;
>
> req_buf_size =
> @@ -55,10 +55,10 @@ static int mana_ib_cfg_vport_steering(struct
> mana_ib_dev *dev,
> * MANA_INDIRECT_TABLE_SIZE entries. Adjust the verb
> * ind_table to MANA_INDIRECT_TABLE_SIZE if required
> */
> - ibdev_dbg(&dev->ib_dev, "ind table size %u\n", 1 <<
> log_ind_tbl_size);
> + ibdev_dbg(&mib_dev->ib_dev, "ind table size %u\n", 1 <<
> +log_ind_tbl_size);
> for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) {
> req_indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)];
> - ibdev_dbg(&dev->ib_dev, "index %u handle 0x%llx\n", i,
> + ibdev_dbg(&mib_dev->ib_dev, "index %u handle 0x%llx\n", i,
> req_indir_tab[i]);
> }
>
> @@ -68,7 +68,7 @@ static int mana_ib_cfg_vport_steering(struct
> mana_ib_dev *dev,
> else
> netdev_rss_key_fill(req->hashkey, MANA_HASH_KEY_SIZE);
>
> - ibdev_dbg(&dev->ib_dev, "vport handle %llu default_rxobj 0x%llx\n",
> + ibdev_dbg(&mib_dev->ib_dev, "vport handle %llu default_rxobj
> +0x%llx\n",
> req->vport, default_rxobj);
>
> err = mana_gd_send_request(gc, req_buf_size, req, sizeof(resp),
> &resp); @@ -97,12 +97,12 @@ static int mana_ib_create_qp_rss(struct ib_qp
> *ibqp, struct ib_pd *pd,
> struct ib_udata *udata)
> {
> struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp,
> ibqp);
> - struct mana_ib_dev *mdev =
> + struct mana_ib_dev *mib_dev =
> container_of(pd->device, struct mana_ib_dev, ib_dev);
> struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl;
> struct mana_ib_create_qp_rss_resp resp = {};
> struct mana_ib_create_qp_rss ucmd = {};
> - struct gdma_dev *gd = mdev->gdma_dev;
> + struct gdma_dev *gd = mib_dev->gdma_dev;
Need to follow the "reverse tree" style along with the rest of driver.
> mana_handle_t *mana_ind_table;
> struct mana_port_context *mpc;
> struct mana_context *mc;
> @@ -123,21 +123,21 @@ static int mana_ib_create_qp_rss(struct ib_qp
> *ibqp, struct ib_pd *pd,
>
> ret = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata-
> >inlen));
> if (ret) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed copy from udata for create rss-qp, err %d\n",
> ret);
> return ret;
> }
>
> if (attr->cap.max_recv_wr > MAX_SEND_BUFFERS_PER_QUEUE) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Requested max_recv_wr %d exceeding limit\n",
> attr->cap.max_recv_wr);
> return -EINVAL;
> }
>
> if (attr->cap.max_recv_sge > MAX_RX_WQE_SGL_ENTRIES) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Requested max_recv_sge %d exceeding limit\n",
> attr->cap.max_recv_sge);
> return -EINVAL;
> @@ -145,14 +145,14 @@ static int mana_ib_create_qp_rss(struct ib_qp
> *ibqp, struct ib_pd *pd,
>
> ind_tbl_size = 1 << ind_tbl->log_ind_tbl_size;
> if (ind_tbl_size > MANA_INDIRECT_TABLE_SIZE) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Indirect table size %d exceeding limit\n",
> ind_tbl_size);
> return -EINVAL;
> }
>
> if (ucmd.rx_hash_function != MANA_IB_RX_HASH_FUNC_TOEPLITZ) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "RX Hash function is not supported, %d\n",
> ucmd.rx_hash_function);
> return -EINVAL;
> @@ -161,14 +161,14 @@ static int mana_ib_create_qp_rss(struct ib_qp
> *ibqp, struct ib_pd *pd,
> /* IB ports start with 1, MANA start with 0 */
> port = ucmd.port;
> if (port < 1 || port > mc->num_ports) {
> - ibdev_dbg(&mdev->ib_dev, "Invalid port %u in creating
> qp\n",
> + ibdev_dbg(&mib_dev->ib_dev, "Invalid port %u in creating
> qp\n",
> port);
> return -EINVAL;
> }
> ndev = mc->ports[port - 1];
> mpc = netdev_priv(ndev);
>
> - ibdev_dbg(&mdev->ib_dev, "rx_hash_function %d port %d\n",
> + ibdev_dbg(&mib_dev->ib_dev, "rx_hash_function %d port %d\n",
> ucmd.rx_hash_function, port);
>
> mana_ind_table = kcalloc(ind_tbl_size, sizeof(mana_handle_t), @@ -
> 210,7 +210,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct
> ib_pd *pd,
> wq->id = wq_spec.queue_index;
> cq->id = cq_spec.queue_index;
>
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "ret %d rx_object 0x%llx wq id %llu cq id %llu\n",
> ret, wq->rx_object, wq->id, cq->id);
>
> @@ -221,7 +221,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp,
> struct ib_pd *pd,
> }
> resp.num_entries = i;
>
> - ret = mana_ib_cfg_vport_steering(mdev, ndev, wq->rx_object,
> + ret = mana_ib_cfg_vport_steering(mib_dev, ndev, wq->rx_object,
> mana_ind_table,
> ind_tbl->log_ind_tbl_size,
> ucmd.rx_hash_key_len,
> @@ -231,7 +231,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp,
> struct ib_pd *pd,
>
> ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
> if (ret) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to copy to udata create rss-qp, %d\n",
> ret);
> goto fail;
> @@ -259,7 +259,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp,
> struct ib_pd *ibpd, {
> struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd,
> ibpd);
> struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp,
> ibqp);
> - struct mana_ib_dev *mdev =
> + struct mana_ib_dev *mib_dev =
> container_of(ibpd->device, struct mana_ib_dev, ib_dev);
> struct mana_ib_cq *send_cq =
> container_of(attr->send_cq, struct mana_ib_cq, ibcq); @@ -
> 267,7 +267,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp,
> struct ib_pd *ibpd,
> rdma_udata_to_drv_context(udata, struct
> mana_ib_ucontext,
> ibucontext);
> struct mana_ib_create_qp_resp resp = {};
> - struct gdma_dev *gd = mdev->gdma_dev;
> + struct gdma_dev *gd = mib_dev->gdma_dev;
> struct mana_ib_create_qp ucmd = {};
> struct mana_obj_spec wq_spec = {};
> struct mana_obj_spec cq_spec = {};
> @@ -285,7 +285,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp,
> struct ib_pd *ibpd,
>
> err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata-
> >inlen));
> if (err) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to copy from udata create qp-raw, %d\n",
> err);
> return err;
> }
> @@ -296,14 +296,14 @@ static int mana_ib_create_qp_raw(struct ib_qp
> *ibqp, struct ib_pd *ibpd,
> return -EINVAL;
>
> if (attr->cap.max_send_wr > MAX_SEND_BUFFERS_PER_QUEUE) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Requested max_send_wr %d exceeding limit\n",
> attr->cap.max_send_wr);
> return -EINVAL;
> }
>
> if (attr->cap.max_send_sge > MAX_TX_WQE_SGL_ENTRIES) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Requested max_send_sge %d exceeding limit\n",
> attr->cap.max_send_sge);
> return -EINVAL;
> @@ -311,38 +311,38 @@ static int mana_ib_create_qp_raw(struct ib_qp
> *ibqp, struct ib_pd *ibpd,
>
> ndev = mc->ports[port - 1];
> mpc = netdev_priv(ndev);
> - ibdev_dbg(&mdev->ib_dev, "port %u ndev %p mpc %p\n", port,
> ndev, mpc);
> + ibdev_dbg(&mib_dev->ib_dev, "port %u ndev %p mpc %p\n", port,
> ndev,
> +mpc);
>
> - err = mana_ib_cfg_vport(mdev, port - 1, pd, mana_ucontext-
> >doorbell);
> + err = mana_ib_cfg_vport(mib_dev, port - 1, pd,
> +mana_ucontext->doorbell);
> if (err)
> return -ENODEV;
>
> qp->port = port;
>
> - ibdev_dbg(&mdev->ib_dev, "ucmd sq_buf_addr 0x%llx port %u\n",
> + ibdev_dbg(&mib_dev->ib_dev, "ucmd sq_buf_addr 0x%llx
> port %u\n",
> ucmd.sq_buf_addr, ucmd.port);
>
> umem = ib_umem_get(ibpd->device, ucmd.sq_buf_addr,
> ucmd.sq_buf_size,
> IB_ACCESS_LOCAL_WRITE);
> if (IS_ERR(umem)) {
> err = PTR_ERR(umem);
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to get umem for create qp-raw, err %d\n",
> err);
> goto err_free_vport;
> }
> qp->sq_umem = umem;
>
> - err = mana_ib_gd_create_dma_region(mdev, qp->sq_umem,
> + err = mana_ib_gd_create_dma_region(mib_dev, qp->sq_umem,
> &qp->sq_gdma_region);
> if (err) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to create dma region for create qp-
> raw, %d\n",
> err);
> goto err_release_umem;
> }
>
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "mana_ib_gd_create_dma_region ret %d gdma_region
> 0x%llx\n",
> err, qp->sq_gdma_region);
>
> @@ -358,7 +358,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp,
> struct ib_pd *ibpd,
> err = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_SQ,
> &wq_spec,
> &cq_spec, &qp->tx_object);
> if (err) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to create wq for create raw-qp, err %d\n",
> err);
> goto err_destroy_dma_region;
> @@ -371,7 +371,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp,
> struct ib_pd *ibpd,
> qp->sq_id = wq_spec.queue_index;
> send_cq->id = cq_spec.queue_index;
>
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "ret %d qp->tx_object 0x%llx sq id %llu cq id %llu\n", err,
> qp->tx_object, qp->sq_id, send_cq->id);
>
> @@ -381,7 +381,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp,
> struct ib_pd *ibpd,
>
> err = ib_copy_to_udata(udata, &resp, sizeof(resp));
> if (err) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed copy udata for create qp-raw, %d\n",
> err);
> goto err_destroy_wq_obj;
> @@ -393,13 +393,13 @@ static int mana_ib_create_qp_raw(struct ib_qp
> *ibqp, struct ib_pd *ibpd,
> mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object);
>
> err_destroy_dma_region:
> - mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region);
> + mana_ib_gd_destroy_dma_region(mib_dev, qp->sq_gdma_region);
>
> err_release_umem:
> ib_umem_release(umem);
>
> err_free_vport:
> - mana_ib_uncfg_vport(mdev, pd, port - 1);
> + mana_ib_uncfg_vport(mib_dev, pd, port - 1);
>
> return err;
> }
> @@ -435,9 +435,9 @@ static int mana_ib_destroy_qp_rss(struct
> mana_ib_qp *qp,
> struct ib_rwq_ind_table *ind_tbl,
> struct ib_udata *udata)
> {
> - struct mana_ib_dev *mdev =
> + struct mana_ib_dev *mib_dev =
> container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
> - struct gdma_dev *gd = mdev->gdma_dev;
> + struct gdma_dev *gd = mib_dev->gdma_dev;
> struct mana_port_context *mpc;
> struct mana_context *mc;
> struct net_device *ndev;
> @@ -452,7 +452,7 @@ static int mana_ib_destroy_qp_rss(struct
> mana_ib_qp *qp,
> for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) {
> ibwq = ind_tbl->ind_tbl[i];
> wq = container_of(ibwq, struct mana_ib_wq, ibwq);
> - ibdev_dbg(&mdev->ib_dev, "destroying wq-
> >rx_object %llu\n",
> + ibdev_dbg(&mib_dev->ib_dev, "destroying wq-
> >rx_object %llu\n",
> wq->rx_object);
> mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object);
> }
> @@ -462,9 +462,9 @@ static int mana_ib_destroy_qp_rss(struct
> mana_ib_qp *qp,
>
> static int mana_ib_destroy_qp_raw(struct mana_ib_qp *qp, struct ib_udata
> *udata) {
> - struct mana_ib_dev *mdev =
> + struct mana_ib_dev *mib_dev =
> container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
> - struct gdma_dev *gd = mdev->gdma_dev;
> + struct gdma_dev *gd = mib_dev->gdma_dev;
> struct ib_pd *ibpd = qp->ibqp.pd;
> struct mana_port_context *mpc;
> struct mana_context *mc;
> @@ -479,11 +479,11 @@ static int mana_ib_destroy_qp_raw(struct
> mana_ib_qp *qp, struct ib_udata *udata)
> mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object);
>
> if (qp->sq_umem) {
> - mana_ib_gd_destroy_dma_region(mdev, qp-
> >sq_gdma_region);
> + mana_ib_gd_destroy_dma_region(mib_dev, qp-
> >sq_gdma_region);
> ib_umem_release(qp->sq_umem);
> }
>
> - mana_ib_uncfg_vport(mdev, pd, qp->port - 1);
> + mana_ib_uncfg_vport(mib_dev, pd, qp->port - 1);
>
> return 0;
> }
> diff --git a/drivers/infiniband/hw/mana/wq.c
> b/drivers/infiniband/hw/mana/wq.c index 372d361510e0..56bc2b8b6690
> 100644
> --- a/drivers/infiniband/hw/mana/wq.c
> +++ b/drivers/infiniband/hw/mana/wq.c
> @@ -9,7 +9,7 @@ struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
> struct ib_wq_init_attr *init_attr,
> struct ib_udata *udata)
> {
> - struct mana_ib_dev *mdev =
> + struct mana_ib_dev *mib_dev =
> container_of(pd->device, struct mana_ib_dev, ib_dev);
> struct mana_ib_create_wq ucmd = {};
> struct mana_ib_wq *wq;
> @@ -21,7 +21,7 @@ struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
>
> err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata-
> >inlen));
> if (err) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to copy from udata for create wq, %d\n", err);
> return ERR_PTR(err);
> }
> @@ -30,13 +30,14 @@ struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
> if (!wq)
> return ERR_PTR(-ENOMEM);
>
> - ibdev_dbg(&mdev->ib_dev, "ucmd wq_buf_addr 0x%llx\n",
> ucmd.wq_buf_addr);
> + ibdev_dbg(&mib_dev->ib_dev, "ucmd wq_buf_addr 0x%llx\n",
> + ucmd.wq_buf_addr);
>
> umem = ib_umem_get(pd->device, ucmd.wq_buf_addr,
> ucmd.wq_buf_size,
> IB_ACCESS_LOCAL_WRITE);
> if (IS_ERR(umem)) {
> err = PTR_ERR(umem);
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to get umem for create wq, err %d\n", err);
> goto err_free_wq;
> }
> @@ -46,15 +47,15 @@ struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
> wq->wq_buf_size = ucmd.wq_buf_size;
> wq->rx_object = INVALID_MANA_HANDLE;
>
> - err = mana_ib_gd_create_dma_region(mdev, wq->umem, &wq-
> >gdma_region);
> + err = mana_ib_gd_create_dma_region(mib_dev, wq->umem,
> +&wq->gdma_region);
> if (err) {
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "Failed to create dma region for create wq, %d\n",
> err);
> goto err_release_umem;
> }
>
> - ibdev_dbg(&mdev->ib_dev,
> + ibdev_dbg(&mib_dev->ib_dev,
> "mana_ib_gd_create_dma_region ret %d gdma_region
> 0x%llx\n",
> err, wq->gdma_region);
>
> @@ -82,11 +83,11 @@ int mana_ib_destroy_wq(struct ib_wq *ibwq, struct
> ib_udata *udata) {
> struct mana_ib_wq *wq = container_of(ibwq, struct mana_ib_wq,
> ibwq);
> struct ib_device *ib_dev = ibwq->device;
> - struct mana_ib_dev *mdev;
> + struct mana_ib_dev *mib_dev;
>
> - mdev = container_of(ib_dev, struct mana_ib_dev, ib_dev);
> + mib_dev = container_of(ib_dev, struct mana_ib_dev, ib_dev);
>
> - mana_ib_gd_destroy_dma_region(mdev, wq->gdma_region);
> + mana_ib_gd_destroy_dma_region(mib_dev, wq->gdma_region);
> ib_umem_release(wq->umem);
>
> kfree(wq);
> --
> 2.25.1
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox