- * [RFC PATCH V3 01/11] x86/HV: Initialize GHCB page in Isolation VM
  2021-05-30 15:06 [RFC PATCH V3 00/11] x86/Hyper-V: Add Hyper-V Isolation VM support Tianyu Lan
@ 2021-05-30 15:06 ` Tianyu Lan
  2021-06-07  6:41   ` Christoph Hellwig
  2021-06-09 12:38   ` Joerg Roedel
  2021-05-30 15:06 ` [RFC PATCH V3 02/11] x86/HV: Initialize shared memory boundary in the " Tianyu Lan
                   ` (9 subsequent siblings)
  10 siblings, 2 replies; 49+ messages in thread
From: Tianyu Lan @ 2021-05-30 15:06 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	hch, m.szyprowski, robin.murphy, boris.ostrovsky, jgross,
	sstabellini, joro, will, xen-devel, davem, kuba, jejb,
	martin.petersen
  Cc: iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
From: Tianyu Lan <Tianyu.Lan@microsoft.com>
Hyper-V exposes GHCB page via SEV ES GHCB MSR for SNP guest
to communicate with hypervisor. Map GHCB page for all
cpus to read/write MSR register and submit hvcall request
via GHCB.
Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
 arch/x86/hyperv/hv_init.c       | 60 ++++++++++++++++++++++++++++++---
 arch/x86/include/asm/mshyperv.h |  2 ++
 include/asm-generic/mshyperv.h  |  2 ++
 3 files changed, 60 insertions(+), 4 deletions(-)
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index bb0ae4b5c00f..dc74d01cb859 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -60,6 +60,9 @@ static int hv_cpu_init(unsigned int cpu)
 	struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()];
 	void **input_arg;
 	struct page *pg;
+	u64 ghcb_gpa;
+	void *ghcb_va;
+	void **ghcb_base;
 
 	/* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
 	pg = alloc_pages(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL, hv_root_partition ? 1 : 0);
@@ -106,6 +109,17 @@ static int hv_cpu_init(unsigned int cpu)
 		wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, val);
 	}
 
+	if (ms_hyperv.ghcb_base) {
+		rdmsrl(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa);
+
+		ghcb_va = ioremap_cache(ghcb_gpa, HV_HYP_PAGE_SIZE);
+		if (!ghcb_va)
+			return -ENOMEM;
+
+		ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
+		*ghcb_base = ghcb_va;
+	}
+
 	return 0;
 }
 
@@ -201,6 +215,7 @@ static int hv_cpu_die(unsigned int cpu)
 	unsigned long flags;
 	void **input_arg;
 	void *pg;
+	void **ghcb_va = NULL;
 
 	local_irq_save(flags);
 	input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
@@ -214,6 +229,13 @@ static int hv_cpu_die(unsigned int cpu)
 		*output_arg = NULL;
 	}
 
+	if (ms_hyperv.ghcb_base) {
+		ghcb_va = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
+		if (*ghcb_va)
+			iounmap(*ghcb_va);
+		*ghcb_va = NULL;
+	}
+
 	local_irq_restore(flags);
 
 	free_pages((unsigned long)pg, hv_root_partition ? 1 : 0);
@@ -369,6 +391,9 @@ void __init hyperv_init(void)
 	u64 guest_id, required_msrs;
 	union hv_x64_msr_hypercall_contents hypercall_msr;
 	int cpuhp, i;
+	u64 ghcb_gpa;
+	void *ghcb_va;
+	void **ghcb_base;
 
 	if (x86_hyper_type != X86_HYPER_MS_HYPERV)
 		return;
@@ -429,9 +454,24 @@ void __init hyperv_init(void)
 			VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX,
 			VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
 			__builtin_return_address(0));
-	if (hv_hypercall_pg == NULL) {
-		wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
-		goto remove_cpuhp_state;
+	if (hv_hypercall_pg == NULL)
+		goto clean_guest_os_id;
+
+	if (hv_isolation_type_snp()) {
+		ms_hyperv.ghcb_base = alloc_percpu(void *);
+		if (!ms_hyperv.ghcb_base)
+			goto clean_guest_os_id;
+
+		rdmsrl(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa);
+		ghcb_va = ioremap_cache(ghcb_gpa, HV_HYP_PAGE_SIZE);
+		if (!ghcb_va) {
+			free_percpu(ms_hyperv.ghcb_base);
+			ms_hyperv.ghcb_base = NULL;
+			goto clean_guest_os_id;
+		}
+
+		ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
+		*ghcb_base = ghcb_va;
 	}
 
 	rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
@@ -502,7 +542,8 @@ void __init hyperv_init(void)
 	hv_query_ext_cap(0);
 	return;
 
-remove_cpuhp_state:
+clean_guest_os_id:
+	wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
 	cpuhp_remove_state(cpuhp);
 free_vp_assist_page:
 	kfree(hv_vp_assist_page);
@@ -531,6 +572,9 @@ void hyperv_cleanup(void)
 	 */
 	hv_hypercall_pg = NULL;
 
+	if (ms_hyperv.ghcb_base)
+		free_percpu(ms_hyperv.ghcb_base);
+
 	/* Reset the hypercall page */
 	hypercall_msr.as_uint64 = 0;
 	wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
@@ -615,6 +659,14 @@ bool hv_is_isolation_supported(void)
 }
 EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
 
+DEFINE_STATIC_KEY_FALSE(isolation_type_snp);
+
+bool hv_isolation_type_snp(void)
+{
+	return static_branch_unlikely(&isolation_type_snp);
+}
+EXPORT_SYMBOL_GPL(hv_isolation_type_snp);
+
 /* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */
 bool hv_query_ext_cap(u64 cap_query)
 {
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 67ff0d637e55..aeacca7c4da8 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -11,6 +11,8 @@
 #include <asm/paravirt.h>
 #include <asm/mshyperv.h>
 
+DECLARE_STATIC_KEY_FALSE(isolation_type_snp);
+
 typedef int (*hyperv_fill_flush_list_func)(
 		struct hv_guest_mapping_flush_list *flush,
 		void *data);
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 9a000ba2bb75..3ae56a29594f 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -35,6 +35,7 @@ struct ms_hyperv_info {
 	u32 max_lp_index;
 	u32 isolation_config_a;
 	u32 isolation_config_b;
+	void  __percpu **ghcb_base;
 };
 extern struct ms_hyperv_info ms_hyperv;
 
@@ -224,6 +225,7 @@ bool hv_is_hyperv_initialized(void);
 bool hv_is_hibernation_supported(void);
 enum hv_isolation_type hv_get_isolation_type(void);
 bool hv_is_isolation_supported(void);
+bool hv_isolation_type_snp(void);
 void hyperv_cleanup(void);
 bool hv_query_ext_cap(u64 cap_query);
 #else /* CONFIG_HYPERV */
-- 
2.25.1
^ permalink raw reply related	[flat|nested] 49+ messages in thread
- * Re: [RFC PATCH V3 01/11] x86/HV: Initialize GHCB page in Isolation VM
  2021-05-30 15:06 ` [RFC PATCH V3 01/11] x86/HV: Initialize GHCB page in Isolation VM Tianyu Lan
@ 2021-06-07  6:41   ` Christoph Hellwig
  2021-06-07  8:14     ` Tianyu Lan
  2021-06-09 12:38   ` Joerg Roedel
  1 sibling, 1 reply; 49+ messages in thread
From: Christoph Hellwig @ 2021-06-07  6:41 UTC (permalink / raw)
  To: Tianyu Lan
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	hch, m.szyprowski, robin.murphy, boris.ostrovsky, jgross,
	sstabellini, joro, will, xen-devel, davem, kuba, jejb,
	martin.petersen, iommu, linux-arch, linux-hyperv, linux-kernel,
	linux-scsi, netdev, vkuznets, thomas.lendacky, brijesh.singh,
	sunilmut
On Sun, May 30, 2021 at 11:06:18AM -0400, Tianyu Lan wrote:
> +	if (ms_hyperv.ghcb_base) {
> +		rdmsrl(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa);
> +
> +		ghcb_va = ioremap_cache(ghcb_gpa, HV_HYP_PAGE_SIZE);
> +		if (!ghcb_va)
> +			return -ENOMEM;
Can you explain this a bit more?  We've very much deprecated
ioremap_cache in favor of memremap.  Why yo you need a __iomem address
here?  Why do we need the remap here at all?
Does the data structure at this address not have any types that we
could use a struct for?
> +
> +		rdmsrl(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa);
> +		ghcb_va = ioremap_cache(ghcb_gpa, HV_HYP_PAGE_SIZE);
> +		if (!ghcb_va) {
This seems to duplicate the above code.
> +bool hv_isolation_type_snp(void)
> +{
> +	return static_branch_unlikely(&isolation_type_snp);
> +}
> +EXPORT_SYMBOL_GPL(hv_isolation_type_snp);
This probably wants a kerneldoc explaining when it should be used.
^ permalink raw reply	[flat|nested] 49+ messages in thread
- * Re: [RFC PATCH V3 01/11] x86/HV: Initialize GHCB page in Isolation VM
  2021-06-07  6:41   ` Christoph Hellwig
@ 2021-06-07  8:14     ` Tianyu Lan
  0 siblings, 0 replies; 49+ messages in thread
From: Tianyu Lan @ 2021-06-07  8:14 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	m.szyprowski, robin.murphy, boris.ostrovsky, jgross, sstabellini,
	joro, will, xen-devel, davem, kuba, jejb, martin.petersen, iommu,
	linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
Hi Christoph:
	Thanks for your review.
On 6/7/2021 2:41 PM, Christoph Hellwig wrote:
> On Sun, May 30, 2021 at 11:06:18AM -0400, Tianyu Lan wrote:
>> +	if (ms_hyperv.ghcb_base) {
>> +		rdmsrl(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa);
>> +
>> +		ghcb_va = ioremap_cache(ghcb_gpa, HV_HYP_PAGE_SIZE);
>> +		if (!ghcb_va)
>> +			return -ENOMEM;
> 
> Can you explain this a bit more?  We've very much deprecated
> ioremap_cache in favor of memremap.  Why yo you need a __iomem address
> here?  Why do we need the remap here at all? >
GHCB physical address is an address in extra address space which is 
above shared gpa boundary reported by Hyper-V CPUID. The addresses below
shared gpa boundary treated as encrypted and the one above is treated as 
decrypted. System memory is remapped in the extra address space and it 
starts from the boundary. The shared memory with host needs to use 
address in the extra address(pa + shared_gpa_boundary) in Linux guest.
Here is to map ghcb page for the communication operations with 
Hypervisor(e.g, hypercall and read/write MSR) via GHCB page.
memremap() will go through iomem_resource list and the address in extra 
address space will not be in the list. So I used ioremap_cache(). I will
memremap() instead of ioremap() here.
> Does the data structure at this address not have any types that we
> could use a struct for?
The struct will be added in the following patch. I will refresh the 
following patch and use the struct hv_ghcb for the mapped point.
> 
>> +
>> +		rdmsrl(MSR_AMD64_SEV_ES_GHCB, ghcb_gpa);
>> +		ghcb_va = ioremap_cache(ghcb_gpa, HV_HYP_PAGE_SIZE);
>> +		if (!ghcb_va) {
> 
> This seems to duplicate the above code.
The above is to map ghcb for BSP and here does the same thing for APs
Will add a new function to avoid the duplication.
> 
>> +bool hv_isolation_type_snp(void)
>> +{
>> +	return static_branch_unlikely(&isolation_type_snp);
>> +}
>> +EXPORT_SYMBOL_GPL(hv_isolation_type_snp);
> 
> This probably wants a kerneldoc explaining when it should be used. >
OK. I will add.
^ permalink raw reply	[flat|nested] 49+ messages in thread
 
- * Re: [RFC PATCH V3 01/11] x86/HV: Initialize GHCB page in Isolation VM
  2021-05-30 15:06 ` [RFC PATCH V3 01/11] x86/HV: Initialize GHCB page in Isolation VM Tianyu Lan
  2021-06-07  6:41   ` Christoph Hellwig
@ 2021-06-09 12:38   ` Joerg Roedel
  2021-06-10 14:13     ` Tianyu Lan
  1 sibling, 1 reply; 49+ messages in thread
From: Joerg Roedel @ 2021-06-09 12:38 UTC (permalink / raw)
  To: Tianyu Lan
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	hch, m.szyprowski, robin.murphy, boris.ostrovsky, jgross,
	sstabellini, will, xen-devel, davem, kuba, jejb, martin.petersen,
	iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
On Sun, May 30, 2021 at 11:06:18AM -0400, Tianyu Lan wrote:
> From: Tianyu Lan <Tianyu.Lan@microsoft.com>
> 
> Hyper-V exposes GHCB page via SEV ES GHCB MSR for SNP guest
> to communicate with hypervisor. Map GHCB page for all
> cpus to read/write MSR register and submit hvcall request
> via GHCB.
> 
> Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
> ---
>  arch/x86/hyperv/hv_init.c       | 60 ++++++++++++++++++++++++++++++---
>  arch/x86/include/asm/mshyperv.h |  2 ++
>  include/asm-generic/mshyperv.h  |  2 ++
>  3 files changed, 60 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index bb0ae4b5c00f..dc74d01cb859 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -60,6 +60,9 @@ static int hv_cpu_init(unsigned int cpu)
>  	struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()];
>  	void **input_arg;
>  	struct page *pg;
> +	u64 ghcb_gpa;
> +	void *ghcb_va;
> +	void **ghcb_base;
Any reason you can't reuse the SEV-ES support code in the Linux kernel?
It already has code to setup GHCBs for all vCPUs.
I see that you don't need #VC handling in your SNP VMs because of the
paravisor running underneath it, but just re-using the GHCB setup code
shouldn't be too hard.
Regards,
	Joerg
^ permalink raw reply	[flat|nested] 49+ messages in thread 
- * Re: [RFC PATCH V3 01/11] x86/HV: Initialize GHCB page in Isolation VM
  2021-06-09 12:38   ` Joerg Roedel
@ 2021-06-10 14:13     ` Tianyu Lan
  0 siblings, 0 replies; 49+ messages in thread
From: Tianyu Lan @ 2021-06-10 14:13 UTC (permalink / raw)
  To: Joerg Roedel
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	hch, m.szyprowski, robin.murphy, boris.ostrovsky, jgross,
	sstabellini, will, xen-devel, davem, kuba, jejb, martin.petersen,
	iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
Hi Joerg:
	Thanks for your review.
On 6/9/2021 8:38 PM, Joerg Roedel wrote:
> On Sun, May 30, 2021 at 11:06:18AM -0400, Tianyu Lan wrote:
>> From: Tianyu Lan <Tianyu.Lan@microsoft.com>
>>
>> Hyper-V exposes GHCB page via SEV ES GHCB MSR for SNP guest
>> to communicate with hypervisor. Map GHCB page for all
>> cpus to read/write MSR register and submit hvcall request
>> via GHCB.
>>
>> Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
>> ---
>>   arch/x86/hyperv/hv_init.c       | 60 ++++++++++++++++++++++++++++++---
>>   arch/x86/include/asm/mshyperv.h |  2 ++
>>   include/asm-generic/mshyperv.h  |  2 ++
>>   3 files changed, 60 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
>> index bb0ae4b5c00f..dc74d01cb859 100644
>> --- a/arch/x86/hyperv/hv_init.c
>> +++ b/arch/x86/hyperv/hv_init.c
>> @@ -60,6 +60,9 @@ static int hv_cpu_init(unsigned int cpu)
>>   	struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()];
>>   	void **input_arg;
>>   	struct page *pg;
>> +	u64 ghcb_gpa;
>> +	void *ghcb_va;
>> +	void **ghcb_base;
> 
> Any reason you can't reuse the SEV-ES support code in the Linux kernel?
> It already has code to setup GHCBs for all vCPUs.
> 
> I see that you don't need #VC handling in your SNP VMs because of the
> paravisor running underneath it, but just re-using the GHCB setup code
> shouldn't be too hard.
> 
Thanks for your suggestion. I will have a try to use SEV-ES code.
^ permalink raw reply	[flat|nested] 49+ messages in thread 
 
 
- * [RFC PATCH V3 02/11] x86/HV: Initialize shared memory boundary in the Isolation VM.
  2021-05-30 15:06 [RFC PATCH V3 00/11] x86/Hyper-V: Add Hyper-V Isolation VM support Tianyu Lan
  2021-05-30 15:06 ` [RFC PATCH V3 01/11] x86/HV: Initialize GHCB page in Isolation VM Tianyu Lan
@ 2021-05-30 15:06 ` Tianyu Lan
  2021-05-30 15:06 ` [RFC PATCH V3 03/11] x86/Hyper-V: Add new hvcall guest address host visibility support Tianyu Lan
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 49+ messages in thread
From: Tianyu Lan @ 2021-05-30 15:06 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	hch, m.szyprowski, robin.murphy, boris.ostrovsky, jgross,
	sstabellini, joro, will, xen-devel, davem, kuba, jejb,
	martin.petersen
  Cc: iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
From: Tianyu Lan <Tianyu.Lan@microsoft.com>
Hyper-V also exposes shared memory boundary via cpuid
HYPERV_CPUID_ISOLATION_CONFIG and store it in the
shared_gpa_boundary of ms_hyperv struct. This prepares
to share memory with host for SNP guest.
Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
 arch/x86/kernel/cpu/mshyperv.c |  2 ++
 include/asm-generic/mshyperv.h | 12 +++++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 22f13343b5da..d1ca36224657 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -320,6 +320,8 @@ static void __init ms_hyperv_init_platform(void)
 	if (ms_hyperv.priv_high & HV_ISOLATION) {
 		ms_hyperv.isolation_config_a = cpuid_eax(HYPERV_CPUID_ISOLATION_CONFIG);
 		ms_hyperv.isolation_config_b = cpuid_ebx(HYPERV_CPUID_ISOLATION_CONFIG);
+		ms_hyperv.shared_gpa_boundary =
+			(u64)1 << ms_hyperv.shared_gpa_boundary_bits;
 
 		pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n",
 			ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 3ae56a29594f..2914e27b0429 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -34,8 +34,18 @@ struct ms_hyperv_info {
 	u32 max_vp_index;
 	u32 max_lp_index;
 	u32 isolation_config_a;
-	u32 isolation_config_b;
+	union {
+		u32 isolation_config_b;
+		struct {
+			u32 cvm_type : 4;
+			u32 Reserved11 : 1;
+			u32 shared_gpa_boundary_active : 1;
+			u32 shared_gpa_boundary_bits : 6;
+			u32 Reserved12 : 20;
+		};
+	};
 	void  __percpu **ghcb_base;
+	u64 shared_gpa_boundary;
 };
 extern struct ms_hyperv_info ms_hyperv;
 
-- 
2.25.1
^ permalink raw reply related	[flat|nested] 49+ messages in thread
- * [RFC PATCH V3 03/11] x86/Hyper-V: Add new hvcall guest address host visibility support
  2021-05-30 15:06 [RFC PATCH V3 00/11] x86/Hyper-V: Add Hyper-V Isolation VM support Tianyu Lan
  2021-05-30 15:06 ` [RFC PATCH V3 01/11] x86/HV: Initialize GHCB page in Isolation VM Tianyu Lan
  2021-05-30 15:06 ` [RFC PATCH V3 02/11] x86/HV: Initialize shared memory boundary in the " Tianyu Lan
@ 2021-05-30 15:06 ` Tianyu Lan
  2021-05-30 18:25   ` Borislav Petkov
  2021-06-10  9:47   ` Vitaly Kuznetsov
  2021-05-30 15:06 ` [RFC PATCH V3 04/11] HV: Add Write/Read MSR registers via ghcb Tianyu Lan
                   ` (7 subsequent siblings)
  10 siblings, 2 replies; 49+ messages in thread
From: Tianyu Lan @ 2021-05-30 15:06 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	hch, m.szyprowski, robin.murphy, boris.ostrovsky, jgross,
	sstabellini, joro, will, xen-devel, davem, kuba, jejb,
	martin.petersen
  Cc: iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
From: Tianyu Lan <Tianyu.Lan@microsoft.com>
Add new hvcall guest address host visibility support. Mark vmbus
ring buffer visible to host when create gpadl buffer and mark back
to not visible when tear down gpadl buffer.
Co-developed-by: Sunil Muthuswamy <sunilmut@microsoft.com>
Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
 arch/x86/hyperv/Makefile           |   2 +-
 arch/x86/hyperv/ivm.c              | 106 +++++++++++++++++++++++++++++
 arch/x86/include/asm/hyperv-tlfs.h |  24 +++++++
 arch/x86/include/asm/mshyperv.h    |   4 +-
 arch/x86/mm/pat/set_memory.c       |  10 ++-
 drivers/hv/channel.c               |  38 ++++++++++-
 include/asm-generic/hyperv-tlfs.h  |   1 +
 include/linux/hyperv.h             |  10 +++
 8 files changed, 190 insertions(+), 5 deletions(-)
 create mode 100644 arch/x86/hyperv/ivm.c
diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile
index 48e2c51464e8..5d2de10809ae 100644
--- a/arch/x86/hyperv/Makefile
+++ b/arch/x86/hyperv/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-y			:= hv_init.o mmu.o nested.o irqdomain.o
+obj-y			:= hv_init.o mmu.o nested.o irqdomain.o ivm.o
 obj-$(CONFIG_X86_64)	+= hv_apic.o hv_proc.o
 
 ifdef CONFIG_X86_64
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
new file mode 100644
index 000000000000..fad1d3024056
--- /dev/null
+++ b/arch/x86/hyperv/ivm.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hyper-V Isolation VM interface with paravisor and hypervisor
+ *
+ * Author:
+ *  Tianyu Lan <Tianyu.Lan@microsoft.com>
+ */
+
+#include <linux/hyperv.h>
+#include <linux/types.h>
+#include <linux/bitfield.h>
+#include <asm/io.h>
+#include <asm/mshyperv.h>
+
+/*
+ * hv_mark_gpa_visibility - Set pages visible to host via hvcall.
+ *
+ * In Isolation VM, all guest memory is encripted from host and guest
+ * needs to set memory visible to host via hvcall before sharing memory
+ * with host.
+ */
+int hv_mark_gpa_visibility(u16 count, const u64 pfn[], u32 visibility)
+{
+	struct hv_gpa_range_for_visibility **input_pcpu, *input;
+	u16 pages_processed;
+	u64 hv_status;
+	unsigned long flags;
+
+	/* no-op if partition isolation is not enabled */
+	if (!hv_is_isolation_supported())
+		return 0;
+
+	if (count > HV_MAX_MODIFY_GPA_REP_COUNT) {
+		pr_err("Hyper-V: GPA count:%d exceeds supported:%lu\n", count,
+			HV_MAX_MODIFY_GPA_REP_COUNT);
+		return -EINVAL;
+	}
+
+	local_irq_save(flags);
+	input_pcpu = (struct hv_gpa_range_for_visibility **)
+			this_cpu_ptr(hyperv_pcpu_input_arg);
+	input = *input_pcpu;
+	if (unlikely(!input)) {
+		local_irq_restore(flags);
+		return -EINVAL;
+	}
+
+	input->partition_id = HV_PARTITION_ID_SELF;
+	input->host_visibility = visibility;
+	input->reserved0 = 0;
+	input->reserved1 = 0;
+	memcpy((void *)input->gpa_page_list, pfn, count * sizeof(*pfn));
+	hv_status = hv_do_rep_hypercall(
+			HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, count,
+			0, input, &pages_processed);
+	local_irq_restore(flags);
+
+	if (!(hv_status & HV_HYPERCALL_RESULT_MASK))
+		return 0;
+
+	return hv_status & HV_HYPERCALL_RESULT_MASK;
+}
+EXPORT_SYMBOL(hv_mark_gpa_visibility);
+
+/*
+ * hv_set_mem_host_visibility - Set specified memory visible to host.
+ *
+ * In Isolation VM, all guest memory is encrypted from host and guest
+ * needs to set memory visible to host via hvcall before sharing memory
+ * with host. This function works as wrap of hv_mark_gpa_visibility()
+ * with memory base and size.
+ */
+int hv_set_mem_host_visibility(void *kbuffer, size_t size,
+			       enum vmbus_page_visibility visibility)
+{
+	int pagecount = size >> HV_HYP_PAGE_SHIFT;
+	u64 *pfn_array;
+	int ret = 0;
+	int i, pfn;
+
+	if (!hv_is_isolation_supported())
+		return 0;
+
+	pfn_array = vzalloc(HV_HYP_PAGE_SIZE);
+	if (!pfn_array)
+		return -ENOMEM;
+
+	for (i = 0, pfn = 0; i < pagecount; i++) {
+		pfn_array[pfn] = virt_to_hvpfn(kbuffer + i * HV_HYP_PAGE_SIZE);
+		pfn++;
+
+		if (pfn == HV_MAX_MODIFY_GPA_REP_COUNT || i == pagecount - 1) {
+			ret |= hv_mark_gpa_visibility(pfn, pfn_array, visibility);
+			pfn = 0;
+
+			if (ret)
+				goto err_free_pfn_array;
+		}
+	}
+
+ err_free_pfn_array:
+	vfree(pfn_array);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(hv_set_mem_host_visibility);
+
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 606f5cc579b2..632281b91b44 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -262,6 +262,17 @@ enum hv_isolation_type {
 #define HV_X64_MSR_TIME_REF_COUNT	HV_REGISTER_TIME_REF_COUNT
 #define HV_X64_MSR_REFERENCE_TSC	HV_REGISTER_REFERENCE_TSC
 
+/* Hyper-V GPA map flags */
+#define HV_MAP_GPA_PERMISSIONS_NONE            0x0
+#define HV_MAP_GPA_READABLE                    0x1
+#define HV_MAP_GPA_WRITABLE                    0x2
+
+enum vmbus_page_visibility {
+	VMBUS_PAGE_NOT_VISIBLE = 0,
+	VMBUS_PAGE_VISIBLE_READ_ONLY = 1,
+	VMBUS_PAGE_VISIBLE_READ_WRITE = 3
+};
+
 /*
  * Declare the MSR used to setup pages used to communicate with the hypervisor.
  */
@@ -561,4 +572,17 @@ enum hv_interrupt_type {
 
 #include <asm-generic/hyperv-tlfs.h>
 
+/* All input parameters should be in single page. */
+#define HV_MAX_MODIFY_GPA_REP_COUNT		\
+	((PAGE_SIZE / sizeof(u64)) - 2)
+
+/* HvCallModifySparseGpaPageHostVisibility hypercall */
+struct hv_gpa_range_for_visibility {
+	u64 partition_id;
+	u32 host_visibility:2;
+	u32 reserved0:30;
+	u32 reserved1;
+	u64 gpa_page_list[HV_MAX_MODIFY_GPA_REP_COUNT];
+} __packed;
+
 #endif
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index aeacca7c4da8..6af9d55ffe3b 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -194,7 +194,9 @@ struct irq_domain *hv_create_pci_msi_domain(void);
 int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
 		struct hv_interrupt_entry *entry);
 int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
-
+int hv_mark_gpa_visibility(u16 count, const u64 pfn[], u32 visibility);
+int hv_set_mem_host_visibility(void *kbuffer, size_t size,
+			       enum vmbus_page_visibility visibility);
 #else /* CONFIG_HYPERV */
 static inline void hyperv_init(void) {}
 static inline void hyperv_setup_mmu_ops(void) {}
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 156cd235659f..a82975600107 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -29,6 +29,8 @@
 #include <asm/proto.h>
 #include <asm/memtype.h>
 #include <asm/set_memory.h>
+#include <asm/hyperv-tlfs.h>
+#include <asm/mshyperv.h>
 
 #include "../mm_internal.h"
 
@@ -1986,8 +1988,14 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
 	int ret;
 
 	/* Nothing to do if memory encryption is not active */
-	if (!mem_encrypt_active())
+	if (hv_is_isolation_supported()) {
+		return hv_set_mem_host_visibility((void *)addr,
+				numpages * HV_HYP_PAGE_SIZE,
+				enc ? VMBUS_PAGE_NOT_VISIBLE
+				: VMBUS_PAGE_VISIBLE_READ_WRITE);
+	} else if (!mem_encrypt_active()) {
 		return 0;
+	}
 
 	/* Should not be working on unaligned addresses */
 	if (WARN_ONCE(addr & ~PAGE_MASK, "misaligned address: %#lx\n", addr))
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index f3761c73b074..01048bb07082 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -17,6 +17,7 @@
 #include <linux/hyperv.h>
 #include <linux/uio.h>
 #include <linux/interrupt.h>
+#include <linux/set_memory.h>
 #include <asm/page.h>
 #include <asm/mshyperv.h>
 
@@ -465,7 +466,7 @@ static int __vmbus_establish_gpadl(struct vmbus_channel *channel,
 	struct list_head *curr;
 	u32 next_gpadl_handle;
 	unsigned long flags;
-	int ret = 0;
+	int ret = 0, index;
 
 	next_gpadl_handle =
 		(atomic_inc_return(&vmbus_connection.next_gpadl_handle) - 1);
@@ -474,6 +475,13 @@ static int __vmbus_establish_gpadl(struct vmbus_channel *channel,
 	if (ret)
 		return ret;
 
+	ret = set_memory_decrypted((unsigned long)kbuffer,
+				   HVPFN_UP(size));
+	if (ret) {
+		pr_warn("Failed to set host visibility.\n");
+		return ret;
+	}
+
 	init_completion(&msginfo->waitevent);
 	msginfo->waiting_channel = channel;
 
@@ -539,6 +547,15 @@ static int __vmbus_establish_gpadl(struct vmbus_channel *channel,
 	/* At this point, we received the gpadl created msg */
 	*gpadl_handle = gpadlmsg->gpadl;
 
+	if (type == HV_GPADL_BUFFER)
+		index = 0;
+	else
+		index = channel->gpadl_range[1].gpadlhandle ? 2 : 1;
+
+	channel->gpadl_range[index].size = size;
+	channel->gpadl_range[index].buffer = kbuffer;
+	channel->gpadl_range[index].gpadlhandle = *gpadl_handle;
+
 cleanup:
 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 	list_del(&msginfo->msglistentry);
@@ -549,6 +566,11 @@ static int __vmbus_establish_gpadl(struct vmbus_channel *channel,
 	}
 
 	kfree(msginfo);
+
+	if (ret)
+		set_memory_encrypted((unsigned long)kbuffer,
+				     HVPFN_UP(size));
+
 	return ret;
 }
 
@@ -811,7 +833,7 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle)
 	struct vmbus_channel_gpadl_teardown *msg;
 	struct vmbus_channel_msginfo *info;
 	unsigned long flags;
-	int ret;
+	int ret, i;
 
 	info = kzalloc(sizeof(*info) +
 		       sizeof(struct vmbus_channel_gpadl_teardown), GFP_KERNEL);
@@ -859,6 +881,18 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle)
 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 
 	kfree(info);
+
+	/* Find gpadl buffer virtual address and size. */
+	for (i = 0; i < VMBUS_GPADL_RANGE_COUNT; i++)
+		if (channel->gpadl_range[i].gpadlhandle == gpadl_handle)
+			break;
+
+	if (set_memory_encrypted((unsigned long)channel->gpadl_range[i].buffer,
+			HVPFN_UP(channel->gpadl_range[i].size)))
+		pr_warn("Fail to set mem host visibility.\n");
+
+	channel->gpadl_range[i].gpadlhandle = 0;
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl);
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index 515c3fb06ab3..8a0219255545 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -158,6 +158,7 @@ struct ms_hyperv_tsc_page {
 #define HVCALL_RETARGET_INTERRUPT		0x007e
 #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
 #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
+#define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY 0x00db
 
 /* Extended hypercalls */
 #define HV_EXT_CALL_QUERY_CAPABILITIES		0x8001
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 2e859d2f9609..06eccaba10c5 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -809,6 +809,14 @@ struct vmbus_device {
 
 #define VMBUS_DEFAULT_MAX_PKT_SIZE 4096
 
+struct vmbus_gpadl_range {
+	u32 gpadlhandle;
+	u32 size;
+	void *buffer;
+};
+
+#define VMBUS_GPADL_RANGE_COUNT		3
+
 struct vmbus_channel {
 	struct list_head listentry;
 
@@ -829,6 +837,8 @@ struct vmbus_channel {
 	struct completion rescind_event;
 
 	u32 ringbuffer_gpadlhandle;
+	/* GPADL_RING and Send/Receive GPADL_BUFFER. */
+	struct vmbus_gpadl_range gpadl_range[VMBUS_GPADL_RANGE_COUNT];
 
 	/* Allocated memory for ring buffer */
 	struct page *ringbuffer_page;
-- 
2.25.1
^ permalink raw reply related	[flat|nested] 49+ messages in thread
- * Re: [RFC PATCH V3 03/11] x86/Hyper-V: Add new hvcall guest address host visibility support
  2021-05-30 15:06 ` [RFC PATCH V3 03/11] x86/Hyper-V: Add new hvcall guest address host visibility support Tianyu Lan
@ 2021-05-30 18:25   ` Borislav Petkov
  2021-05-31  4:08     ` Tianyu Lan
  2021-06-10  9:47   ` Vitaly Kuznetsov
  1 sibling, 1 reply; 49+ messages in thread
From: Borislav Petkov @ 2021-05-30 18:25 UTC (permalink / raw)
  To: Tianyu Lan
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, x86, hpa,
	arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	hch, m.szyprowski, robin.murphy, boris.ostrovsky, jgross,
	sstabellini, joro, will, xen-devel, davem, kuba, jejb,
	martin.petersen, iommu, linux-arch, linux-hyperv, linux-kernel,
	linux-scsi, netdev, vkuznets, thomas.lendacky, brijesh.singh,
	sunilmut
On Sun, May 30, 2021 at 11:06:20AM -0400, Tianyu Lan wrote:
> diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
> index 156cd235659f..a82975600107 100644
> --- a/arch/x86/mm/pat/set_memory.c
> +++ b/arch/x86/mm/pat/set_memory.c
> @@ -29,6 +29,8 @@
>  #include <asm/proto.h>
>  #include <asm/memtype.h>
>  #include <asm/set_memory.h>
> +#include <asm/hyperv-tlfs.h>
> +#include <asm/mshyperv.h>
>  
>  #include "../mm_internal.h"
>  
> @@ -1986,8 +1988,14 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
>  	int ret;
>  
>  	/* Nothing to do if memory encryption is not active */
> -	if (!mem_encrypt_active())
> +	if (hv_is_isolation_supported()) {
> +		return hv_set_mem_host_visibility((void *)addr,
> +				numpages * HV_HYP_PAGE_SIZE,
> +				enc ? VMBUS_PAGE_NOT_VISIBLE
> +				: VMBUS_PAGE_VISIBLE_READ_WRITE);
Put all this gunk in a hv-specific function somewhere in hv-land which
you only call from here. This way you probably won't even need to export
hv_set_mem_host_visibility() and so on...
Thx.
-- 
Regards/Gruss,
    Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply	[flat|nested] 49+ messages in thread
- * Re: [RFC PATCH V3 03/11] x86/Hyper-V: Add new hvcall guest address host visibility support
  2021-05-30 18:25   ` Borislav Petkov
@ 2021-05-31  4:08     ` Tianyu Lan
  0 siblings, 0 replies; 49+ messages in thread
From: Tianyu Lan @ 2021-05-31  4:08 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, x86, hpa,
	arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	hch, m.szyprowski, robin.murphy, boris.ostrovsky, jgross,
	sstabellini, joro, will, xen-devel, davem, kuba, jejb,
	martin.petersen, iommu, linux-arch, linux-hyperv, linux-kernel,
	linux-scsi, netdev, vkuznets, thomas.lendacky, brijesh.singh,
	sunilmut
Hi Borislav:
	Thanks for your review.
On 5/31/2021 2:25 AM, Borislav Petkov wrote:
> On Sun, May 30, 2021 at 11:06:20AM -0400, Tianyu Lan wrote:
>> diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
>> index 156cd235659f..a82975600107 100644
>> --- a/arch/x86/mm/pat/set_memory.c
>> +++ b/arch/x86/mm/pat/set_memory.c
>> @@ -29,6 +29,8 @@
>>   #include <asm/proto.h>
>>   #include <asm/memtype.h>
>>   #include <asm/set_memory.h>
>> +#include <asm/hyperv-tlfs.h>
>> +#include <asm/mshyperv.h>
>>   
>>   #include "../mm_internal.h"
>>   
>> @@ -1986,8 +1988,14 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
>>   	int ret;
>>   
>>   	/* Nothing to do if memory encryption is not active */
>> -	if (!mem_encrypt_active())
>> +	if (hv_is_isolation_supported()) {
>> +		return hv_set_mem_host_visibility((void *)addr,
>> +				numpages * HV_HYP_PAGE_SIZE,
>> +				enc ? VMBUS_PAGE_NOT_VISIBLE
>> +				: VMBUS_PAGE_VISIBLE_READ_WRITE);
> 
> Put all this gunk in a hv-specific function somewhere in hv-land which
> you only call from here. This way you probably won't even need to export
> hv_set_mem_host_visibility() and so on...
> 
Good idea. Will update. Thanks.
^ permalink raw reply	[flat|nested] 49+ messages in thread
 
- * Re: [RFC PATCH V3 03/11] x86/Hyper-V: Add new hvcall guest address host visibility support
  2021-05-30 15:06 ` [RFC PATCH V3 03/11] x86/Hyper-V: Add new hvcall guest address host visibility support Tianyu Lan
  2021-05-30 18:25   ` Borislav Petkov
@ 2021-06-10  9:47   ` Vitaly Kuznetsov
  2021-06-10 14:18     ` Tianyu Lan
  1 sibling, 1 reply; 49+ messages in thread
From: Vitaly Kuznetsov @ 2021-06-10  9:47 UTC (permalink / raw)
  To: Tianyu Lan, kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo,
	bp, x86, hpa, arnd, dave.hansen, luto, peterz, akpm,
	kirill.shutemov, rppt, hannes, cai, krish.sadhukhan, saravanand,
	Tianyu.Lan, konrad.wilk, hch, m.szyprowski, robin.murphy,
	boris.ostrovsky, jgross, sstabellini, joro, will, xen-devel,
	davem, kuba, jejb, martin.petersen
  Cc: iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	thomas.lendacky, brijesh.singh, sunilmut
Tianyu Lan <ltykernel@gmail.com> writes:
> From: Tianyu Lan <Tianyu.Lan@microsoft.com>
>
> Add new hvcall guest address host visibility support. Mark vmbus
> ring buffer visible to host when create gpadl buffer and mark back
> to not visible when tear down gpadl buffer.
>
> Co-developed-by: Sunil Muthuswamy <sunilmut@microsoft.com>
> Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
> ---
>  arch/x86/hyperv/Makefile           |   2 +-
>  arch/x86/hyperv/ivm.c              | 106 +++++++++++++++++++++++++++++
>  arch/x86/include/asm/hyperv-tlfs.h |  24 +++++++
>  arch/x86/include/asm/mshyperv.h    |   4 +-
>  arch/x86/mm/pat/set_memory.c       |  10 ++-
>  drivers/hv/channel.c               |  38 ++++++++++-
>  include/asm-generic/hyperv-tlfs.h  |   1 +
>  include/linux/hyperv.h             |  10 +++
>  8 files changed, 190 insertions(+), 5 deletions(-)
>  create mode 100644 arch/x86/hyperv/ivm.c
>
> diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile
> index 48e2c51464e8..5d2de10809ae 100644
> --- a/arch/x86/hyperv/Makefile
> +++ b/arch/x86/hyperv/Makefile
> @@ -1,5 +1,5 @@
>  # SPDX-License-Identifier: GPL-2.0-only
> -obj-y			:= hv_init.o mmu.o nested.o irqdomain.o
> +obj-y			:= hv_init.o mmu.o nested.o irqdomain.o ivm.o
>  obj-$(CONFIG_X86_64)	+= hv_apic.o hv_proc.o
>  
>  ifdef CONFIG_X86_64
> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
> new file mode 100644
> index 000000000000..fad1d3024056
> --- /dev/null
> +++ b/arch/x86/hyperv/ivm.c
> @@ -0,0 +1,106 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Hyper-V Isolation VM interface with paravisor and hypervisor
> + *
> + * Author:
> + *  Tianyu Lan <Tianyu.Lan@microsoft.com>
> + */
> +
> +#include <linux/hyperv.h>
> +#include <linux/types.h>
> +#include <linux/bitfield.h>
> +#include <asm/io.h>
> +#include <asm/mshyperv.h>
> +
> +/*
> + * hv_mark_gpa_visibility - Set pages visible to host via hvcall.
> + *
> + * In Isolation VM, all guest memory is encripted from host and guest
> + * needs to set memory visible to host via hvcall before sharing memory
> + * with host.
> + */
> +int hv_mark_gpa_visibility(u16 count, const u64 pfn[], u32 visibility)
> +{
> +	struct hv_gpa_range_for_visibility **input_pcpu, *input;
> +	u16 pages_processed;
> +	u64 hv_status;
> +	unsigned long flags;
> +
> +	/* no-op if partition isolation is not enabled */
> +	if (!hv_is_isolation_supported())
> +		return 0;
> +
> +	if (count > HV_MAX_MODIFY_GPA_REP_COUNT) {
> +		pr_err("Hyper-V: GPA count:%d exceeds supported:%lu\n", count,
> +			HV_MAX_MODIFY_GPA_REP_COUNT);
> +		return -EINVAL;
> +	}
> +
> +	local_irq_save(flags);
> +	input_pcpu = (struct hv_gpa_range_for_visibility **)
> +			this_cpu_ptr(hyperv_pcpu_input_arg);
> +	input = *input_pcpu;
> +	if (unlikely(!input)) {
> +		local_irq_restore(flags);
> +		return -EINVAL;
> +	}
> +
> +	input->partition_id = HV_PARTITION_ID_SELF;
> +	input->host_visibility = visibility;
> +	input->reserved0 = 0;
> +	input->reserved1 = 0;
> +	memcpy((void *)input->gpa_page_list, pfn, count * sizeof(*pfn));
> +	hv_status = hv_do_rep_hypercall(
> +			HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, count,
> +			0, input, &pages_processed);
> +	local_irq_restore(flags);
> +
> +	if (!(hv_status & HV_HYPERCALL_RESULT_MASK))
> +		return 0;
> +
> +	return hv_status & HV_HYPERCALL_RESULT_MASK;
> +}
> +EXPORT_SYMBOL(hv_mark_gpa_visibility);
> +
> +/*
> + * hv_set_mem_host_visibility - Set specified memory visible to host.
> + *
> + * In Isolation VM, all guest memory is encrypted from host and guest
> + * needs to set memory visible to host via hvcall before sharing memory
> + * with host. This function works as wrap of hv_mark_gpa_visibility()
> + * with memory base and size.
> + */
> +int hv_set_mem_host_visibility(void *kbuffer, size_t size,
> +			       enum vmbus_page_visibility visibility)
> +{
> +	int pagecount = size >> HV_HYP_PAGE_SHIFT;
> +	u64 *pfn_array;
> +	int ret = 0;
> +	int i, pfn;
> +
> +	if (!hv_is_isolation_supported())
> +		return 0;
> +
> +	pfn_array = vzalloc(HV_HYP_PAGE_SIZE);
> +	if (!pfn_array)
> +		return -ENOMEM;
> +
> +	for (i = 0, pfn = 0; i < pagecount; i++) {
> +		pfn_array[pfn] = virt_to_hvpfn(kbuffer + i * HV_HYP_PAGE_SIZE);
> +		pfn++;
> +
> +		if (pfn == HV_MAX_MODIFY_GPA_REP_COUNT || i == pagecount - 1) {
> +			ret |= hv_mark_gpa_visibility(pfn, pfn_array, visibility);
> +			pfn = 0;
> +
> +			if (ret)
> +				goto err_free_pfn_array;
> +		}
> +	}
> +
> + err_free_pfn_array:
> +	vfree(pfn_array);
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(hv_set_mem_host_visibility);
> +
> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
> index 606f5cc579b2..632281b91b44 100644
> --- a/arch/x86/include/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> @@ -262,6 +262,17 @@ enum hv_isolation_type {
>  #define HV_X64_MSR_TIME_REF_COUNT	HV_REGISTER_TIME_REF_COUNT
>  #define HV_X64_MSR_REFERENCE_TSC	HV_REGISTER_REFERENCE_TSC
>  
> +/* Hyper-V GPA map flags */
> +#define HV_MAP_GPA_PERMISSIONS_NONE            0x0
> +#define HV_MAP_GPA_READABLE                    0x1
> +#define HV_MAP_GPA_WRITABLE                    0x2
> +
> +enum vmbus_page_visibility {
> +	VMBUS_PAGE_NOT_VISIBLE = 0,
> +	VMBUS_PAGE_VISIBLE_READ_ONLY = 1,
> +	VMBUS_PAGE_VISIBLE_READ_WRITE = 3
> +};
> +
Why do we need both flags and the enum? I don't see HV_MAP_GPA_* being
used anywhere and VMBUS_PAGE_VISIBLE_READ_WRITE looks like
HV_MAP_GPA_READABLE | HV_MAP_GPA_WRITABLE.
As this is used to communicate with the host, I'd suggest to avoid using
enum and just use flags everywhere.
>  /*
>   * Declare the MSR used to setup pages used to communicate with the hypervisor.
>   */
> @@ -561,4 +572,17 @@ enum hv_interrupt_type {
>  
>  #include <asm-generic/hyperv-tlfs.h>
>  
> +/* All input parameters should be in single page. */
> +#define HV_MAX_MODIFY_GPA_REP_COUNT		\
> +	((PAGE_SIZE / sizeof(u64)) - 2)
> +
> +/* HvCallModifySparseGpaPageHostVisibility hypercall */
> +struct hv_gpa_range_for_visibility {
> +	u64 partition_id;
> +	u32 host_visibility:2;
> +	u32 reserved0:30;
> +	u32 reserved1;
> +	u64 gpa_page_list[HV_MAX_MODIFY_GPA_REP_COUNT];
> +} __packed;
> +
>  #endif
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index aeacca7c4da8..6af9d55ffe3b 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -194,7 +194,9 @@ struct irq_domain *hv_create_pci_msi_domain(void);
>  int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
>  		struct hv_interrupt_entry *entry);
>  int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
> -
> +int hv_mark_gpa_visibility(u16 count, const u64 pfn[], u32 visibility);
> +int hv_set_mem_host_visibility(void *kbuffer, size_t size,
> +			       enum vmbus_page_visibility visibility);
>  #else /* CONFIG_HYPERV */
>  static inline void hyperv_init(void) {}
>  static inline void hyperv_setup_mmu_ops(void) {}
> diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
> index 156cd235659f..a82975600107 100644
> --- a/arch/x86/mm/pat/set_memory.c
> +++ b/arch/x86/mm/pat/set_memory.c
> @@ -29,6 +29,8 @@
>  #include <asm/proto.h>
>  #include <asm/memtype.h>
>  #include <asm/set_memory.h>
> +#include <asm/hyperv-tlfs.h>
> +#include <asm/mshyperv.h>
>  
>  #include "../mm_internal.h"
>  
> @@ -1986,8 +1988,14 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
>  	int ret;
>  
>  	/* Nothing to do if memory encryption is not active */
> -	if (!mem_encrypt_active())
> +	if (hv_is_isolation_supported()) {
> +		return hv_set_mem_host_visibility((void *)addr,
> +				numpages * HV_HYP_PAGE_SIZE,
> +				enc ? VMBUS_PAGE_NOT_VISIBLE
> +				: VMBUS_PAGE_VISIBLE_READ_WRITE);
> +	} else if (!mem_encrypt_active()) {
>  		return 0;
> +	}
>  
>  	/* Should not be working on unaligned addresses */
>  	if (WARN_ONCE(addr & ~PAGE_MASK, "misaligned address: %#lx\n", addr))
> diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
> index f3761c73b074..01048bb07082 100644
> --- a/drivers/hv/channel.c
> +++ b/drivers/hv/channel.c
> @@ -17,6 +17,7 @@
>  #include <linux/hyperv.h>
>  #include <linux/uio.h>
>  #include <linux/interrupt.h>
> +#include <linux/set_memory.h>
>  #include <asm/page.h>
>  #include <asm/mshyperv.h>
>  
> @@ -465,7 +466,7 @@ static int __vmbus_establish_gpadl(struct vmbus_channel *channel,
>  	struct list_head *curr;
>  	u32 next_gpadl_handle;
>  	unsigned long flags;
> -	int ret = 0;
> +	int ret = 0, index;
>  
>  	next_gpadl_handle =
>  		(atomic_inc_return(&vmbus_connection.next_gpadl_handle) - 1);
> @@ -474,6 +475,13 @@ static int __vmbus_establish_gpadl(struct vmbus_channel *channel,
>  	if (ret)
>  		return ret;
>  
> +	ret = set_memory_decrypted((unsigned long)kbuffer,
> +				   HVPFN_UP(size));
> +	if (ret) {
> +		pr_warn("Failed to set host visibility.\n");
> +		return ret;
> +	}
> +
>  	init_completion(&msginfo->waitevent);
>  	msginfo->waiting_channel = channel;
>  
> @@ -539,6 +547,15 @@ static int __vmbus_establish_gpadl(struct vmbus_channel *channel,
>  	/* At this point, we received the gpadl created msg */
>  	*gpadl_handle = gpadlmsg->gpadl;
>  
> +	if (type == HV_GPADL_BUFFER)
> +		index = 0;
> +	else
> +		index = channel->gpadl_range[1].gpadlhandle ? 2 : 1;
> +
> +	channel->gpadl_range[index].size = size;
> +	channel->gpadl_range[index].buffer = kbuffer;
> +	channel->gpadl_range[index].gpadlhandle = *gpadl_handle;
> +
>  cleanup:
>  	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
>  	list_del(&msginfo->msglistentry);
> @@ -549,6 +566,11 @@ static int __vmbus_establish_gpadl(struct vmbus_channel *channel,
>  	}
>  
>  	kfree(msginfo);
> +
> +	if (ret)
> +		set_memory_encrypted((unsigned long)kbuffer,
> +				     HVPFN_UP(size));
> +
>  	return ret;
>  }
>  
> @@ -811,7 +833,7 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle)
>  	struct vmbus_channel_gpadl_teardown *msg;
>  	struct vmbus_channel_msginfo *info;
>  	unsigned long flags;
> -	int ret;
> +	int ret, i;
>  
>  	info = kzalloc(sizeof(*info) +
>  		       sizeof(struct vmbus_channel_gpadl_teardown), GFP_KERNEL);
> @@ -859,6 +881,18 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle)
>  	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
>  
>  	kfree(info);
> +
> +	/* Find gpadl buffer virtual address and size. */
> +	for (i = 0; i < VMBUS_GPADL_RANGE_COUNT; i++)
> +		if (channel->gpadl_range[i].gpadlhandle == gpadl_handle)
> +			break;
> +
> +	if (set_memory_encrypted((unsigned long)channel->gpadl_range[i].buffer,
> +			HVPFN_UP(channel->gpadl_range[i].size)))
> +		pr_warn("Fail to set mem host visibility.\n");
> +
> +	channel->gpadl_range[i].gpadlhandle = 0;
> +
>  	return ret;
>  }
>  EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl);
> diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
> index 515c3fb06ab3..8a0219255545 100644
> --- a/include/asm-generic/hyperv-tlfs.h
> +++ b/include/asm-generic/hyperv-tlfs.h
> @@ -158,6 +158,7 @@ struct ms_hyperv_tsc_page {
>  #define HVCALL_RETARGET_INTERRUPT		0x007e
>  #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
>  #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
> +#define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY 0x00db
>  
>  /* Extended hypercalls */
>  #define HV_EXT_CALL_QUERY_CAPABILITIES		0x8001
> diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
> index 2e859d2f9609..06eccaba10c5 100644
> --- a/include/linux/hyperv.h
> +++ b/include/linux/hyperv.h
> @@ -809,6 +809,14 @@ struct vmbus_device {
>  
>  #define VMBUS_DEFAULT_MAX_PKT_SIZE 4096
>  
> +struct vmbus_gpadl_range {
> +	u32 gpadlhandle;
> +	u32 size;
> +	void *buffer;
> +};
> +
> +#define VMBUS_GPADL_RANGE_COUNT		3
> +
>  struct vmbus_channel {
>  	struct list_head listentry;
>  
> @@ -829,6 +837,8 @@ struct vmbus_channel {
>  	struct completion rescind_event;
>  
>  	u32 ringbuffer_gpadlhandle;
> +	/* GPADL_RING and Send/Receive GPADL_BUFFER. */
> +	struct vmbus_gpadl_range gpadl_range[VMBUS_GPADL_RANGE_COUNT];
>  
>  	/* Allocated memory for ring buffer */
>  	struct page *ringbuffer_page;
-- 
Vitaly
^ permalink raw reply	[flat|nested] 49+ messages in thread
- * Re: [RFC PATCH V3 03/11] x86/Hyper-V: Add new hvcall guest address host visibility support
  2021-06-10  9:47   ` Vitaly Kuznetsov
@ 2021-06-10 14:18     ` Tianyu Lan
  0 siblings, 0 replies; 49+ messages in thread
From: Tianyu Lan @ 2021-06-10 14:18 UTC (permalink / raw)
  To: Vitaly Kuznetsov, kys, haiyangz, sthemmin, wei.liu, decui, tglx,
	mingo, bp, x86, hpa, arnd, dave.hansen, luto, peterz, akpm,
	kirill.shutemov, rppt, hannes, cai, krish.sadhukhan, saravanand,
	Tianyu.Lan, konrad.wilk, hch, m.szyprowski, robin.murphy,
	boris.ostrovsky, jgross, sstabellini, joro, will, xen-devel,
	davem, kuba, jejb, martin.petersen
  Cc: iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	thomas.lendacky, brijesh.singh, sunilmut
Hi Vitaly:
	Thanks for your review.
On 6/10/2021 5:47 PM, Vitaly Kuznetsov wrote:
>> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
>> index 606f5cc579b2..632281b91b44 100644
>> --- a/arch/x86/include/asm/hyperv-tlfs.h
>> +++ b/arch/x86/include/asm/hyperv-tlfs.h
>> @@ -262,6 +262,17 @@ enum hv_isolation_type {
>>   #define HV_X64_MSR_TIME_REF_COUNT	HV_REGISTER_TIME_REF_COUNT
>>   #define HV_X64_MSR_REFERENCE_TSC	HV_REGISTER_REFERENCE_TSC
>>   
>> +/* Hyper-V GPA map flags */
>> +#define HV_MAP_GPA_PERMISSIONS_NONE            0x0
>> +#define HV_MAP_GPA_READABLE                    0x1
>> +#define HV_MAP_GPA_WRITABLE                    0x2
>> +
>> +enum vmbus_page_visibility {
>> +	VMBUS_PAGE_NOT_VISIBLE = 0,
>> +	VMBUS_PAGE_VISIBLE_READ_ONLY = 1,
>> +	VMBUS_PAGE_VISIBLE_READ_WRITE = 3
>> +};
>> +
> Why do we need both flags and the enum? I don't see HV_MAP_GPA_* being
> used anywhere and VMBUS_PAGE_VISIBLE_READ_WRITE looks like
> HV_MAP_GPA_READABLE | HV_MAP_GPA_WRITABLE.
> 
> As this is used to communicate with the host, I'd suggest to avoid using
> enum and just use flags everywhere.
> 
Nice catch. Will update in the next version.
Thanks.
^ permalink raw reply	[flat|nested] 49+ messages in thread
 
 
- * [RFC PATCH V3 04/11] HV: Add Write/Read MSR registers via ghcb
  2021-05-30 15:06 [RFC PATCH V3 00/11] x86/Hyper-V: Add Hyper-V Isolation VM support Tianyu Lan
                   ` (2 preceding siblings ...)
  2021-05-30 15:06 ` [RFC PATCH V3 03/11] x86/Hyper-V: Add new hvcall guest address host visibility support Tianyu Lan
@ 2021-05-30 15:06 ` Tianyu Lan
  2021-06-09 12:46   ` Joerg Roedel
  2021-05-30 15:06 ` [RFC PATCH V3 05/11] HV: Add ghcb hvcall support for SNP VM Tianyu Lan
                   ` (6 subsequent siblings)
  10 siblings, 1 reply; 49+ messages in thread
From: Tianyu Lan @ 2021-05-30 15:06 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	hch, m.szyprowski, robin.murphy, boris.ostrovsky, jgross,
	sstabellini, joro, will, xen-devel, davem, kuba, jejb,
	martin.petersen
  Cc: iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
From: Tianyu Lan <Tianyu.Lan@microsoft.com>
Hyper-V provides GHCB protocol to write Synthetic Interrupt
Controller MSR registers and these registers are emulated by
Hypervisor rather than paravisor in VMPL0.
Hyper-V requests to write SINTx MSR registers twice(once via
GHCB and once via wrmsr instruction emulated by paravisor including
the proxy bit 21). Guest OS ID MSR also needs to be set via GHCB.
Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
 arch/x86/hyperv/hv_init.c       |  26 +------
 arch/x86/hyperv/ivm.c           | 125 ++++++++++++++++++++++++++++++++
 arch/x86/include/asm/mshyperv.h |  78 +++++++++++++++++++-
 arch/x86/kernel/cpu/mshyperv.c  |   3 +
 drivers/hv/hv.c                 | 114 +++++++++++++++++++----------
 include/asm-generic/mshyperv.h  |   4 +-
 6 files changed, 287 insertions(+), 63 deletions(-)
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index dc74d01cb859..e35a3c1556b8 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -472,6 +472,9 @@ void __init hyperv_init(void)
 
 		ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
 		*ghcb_base = ghcb_va;
+
+		/* Hyper-V requires to write guest os id via ghcb in SNP IVM. */
+		hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);
 	}
 
 	rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
@@ -564,6 +567,7 @@ void hyperv_cleanup(void)
 
 	/* Reset our OS id */
 	wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
+	hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
 
 	/*
 	 * Reset hypercall page reference before reset the page,
@@ -645,28 +649,6 @@ bool hv_is_hibernation_supported(void)
 }
 EXPORT_SYMBOL_GPL(hv_is_hibernation_supported);
 
-enum hv_isolation_type hv_get_isolation_type(void)
-{
-	if (!(ms_hyperv.priv_high & HV_ISOLATION))
-		return HV_ISOLATION_TYPE_NONE;
-	return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b);
-}
-EXPORT_SYMBOL_GPL(hv_get_isolation_type);
-
-bool hv_is_isolation_supported(void)
-{
-	return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE;
-}
-EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
-
-DEFINE_STATIC_KEY_FALSE(isolation_type_snp);
-
-bool hv_isolation_type_snp(void)
-{
-	return static_branch_unlikely(&isolation_type_snp);
-}
-EXPORT_SYMBOL_GPL(hv_isolation_type_snp);
-
 /* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */
 bool hv_query_ext_cap(u64 cap_query)
 {
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index fad1d3024056..fd6dd804beef 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -6,12 +6,137 @@
  *  Tianyu Lan <Tianyu.Lan@microsoft.com>
  */
 
+#include <linux/types.h>
+#include <linux/bitfield.h>
 #include <linux/hyperv.h>
 #include <linux/types.h>
 #include <linux/bitfield.h>
 #include <asm/io.h>
+#include <asm/svm.h>
+#include <asm/sev-es.h>
 #include <asm/mshyperv.h>
 
+union hv_ghcb {
+	struct ghcb ghcb;
+} __packed __aligned(PAGE_SIZE);
+
+void hv_ghcb_msr_write(u64 msr, u64 value)
+{
+	union hv_ghcb *hv_ghcb;
+	void **ghcb_base;
+	unsigned long flags;
+
+	if (!ms_hyperv.ghcb_base)
+		return;
+
+	local_irq_save(flags);
+	ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
+	hv_ghcb = (union hv_ghcb *)*ghcb_base;
+	if (!hv_ghcb) {
+		local_irq_restore(flags);
+		return;
+	}
+
+	memset(hv_ghcb, 0x00, HV_HYP_PAGE_SIZE);
+
+	hv_ghcb->ghcb.protocol_version = 1;
+	hv_ghcb->ghcb.ghcb_usage = 0;
+
+	ghcb_set_sw_exit_code(&hv_ghcb->ghcb, SVM_EXIT_MSR);
+	ghcb_set_rcx(&hv_ghcb->ghcb, msr);
+	ghcb_set_rax(&hv_ghcb->ghcb, lower_32_bits(value));
+	ghcb_set_rdx(&hv_ghcb->ghcb, value >> 32);
+	ghcb_set_sw_exit_info_1(&hv_ghcb->ghcb, 1);
+	ghcb_set_sw_exit_info_2(&hv_ghcb->ghcb, 0);
+
+	VMGEXIT();
+
+	if ((hv_ghcb->ghcb.save.sw_exit_info_1 & 0xffffffff) == 1)
+		pr_warn("Fail to write msr via ghcb %llx.\n", msr);
+
+	local_irq_restore(flags);
+}
+
+void hv_ghcb_msr_read(u64 msr, u64 *value)
+{
+	union hv_ghcb *hv_ghcb;
+	void **ghcb_base;
+	unsigned long flags;
+
+	if (!ms_hyperv.ghcb_base)
+		return;
+
+	local_irq_save(flags);
+	ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
+	hv_ghcb = (union hv_ghcb *)*ghcb_base;
+	if (!hv_ghcb) {
+		local_irq_restore(flags);
+		return;
+	}
+
+	memset(hv_ghcb, 0x00, HV_HYP_PAGE_SIZE);
+	hv_ghcb->ghcb.protocol_version = 1;
+	hv_ghcb->ghcb.ghcb_usage = 0;
+
+	ghcb_set_sw_exit_code(&hv_ghcb->ghcb, SVM_EXIT_MSR);
+	ghcb_set_rcx(&hv_ghcb->ghcb, msr);
+	ghcb_set_sw_exit_info_1(&hv_ghcb->ghcb, 0);
+	ghcb_set_sw_exit_info_2(&hv_ghcb->ghcb, 0);
+
+	VMGEXIT();
+
+	if ((hv_ghcb->ghcb.save.sw_exit_info_1 & 0xffffffff) == 1)
+		pr_warn("Fail to read msr via ghcb %llx.\n", msr);
+	else
+		*value = (u64)lower_32_bits(hv_ghcb->ghcb.save.rax)
+			| ((u64)lower_32_bits(hv_ghcb->ghcb.save.rdx) << 32);
+	local_irq_restore(flags);
+}
+
+void hv_sint_rdmsrl_ghcb(u64 msr, u64 *value)
+{
+	hv_ghcb_msr_read(msr, value);
+}
+EXPORT_SYMBOL_GPL(hv_sint_rdmsrl_ghcb);
+
+void hv_sint_wrmsrl_ghcb(u64 msr, u64 value)
+{
+	hv_ghcb_msr_write(msr, value);
+
+	/* Write proxy bit vua wrmsrl instruction. */
+	if (msr >= HV_X64_MSR_SINT0 && msr <= HV_X64_MSR_SINT15)
+		wrmsrl(msr, value | 1 << 20);
+}
+EXPORT_SYMBOL_GPL(hv_sint_wrmsrl_ghcb);
+
+void hv_signal_eom_ghcb(void)
+{
+	hv_sint_wrmsrl_ghcb(HV_X64_MSR_EOM, 0);
+}
+EXPORT_SYMBOL_GPL(hv_signal_eom_ghcb);
+
+enum hv_isolation_type hv_get_isolation_type(void)
+{
+	if (!(ms_hyperv.priv_high & HV_ISOLATION))
+		return HV_ISOLATION_TYPE_NONE;
+	return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b);
+}
+EXPORT_SYMBOL_GPL(hv_get_isolation_type);
+
+bool hv_is_isolation_supported(void)
+{
+	return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE;
+}
+EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
+
+DEFINE_STATIC_KEY_FALSE(isolation_type_snp);
+
+bool hv_isolation_type_snp(void)
+{
+	return static_branch_unlikely(&isolation_type_snp);
+}
+EXPORT_SYMBOL_GPL(hv_isolation_type_snp);
+
 /*
  * hv_mark_gpa_visibility - Set pages visible to host via hvcall.
  *
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 6af9d55ffe3b..eec7f3357d51 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -30,6 +30,63 @@ static inline u64 hv_get_register(unsigned int reg)
 	return value;
 }
 
+#define hv_get_sint_reg(val, reg) {		\
+	if (hv_isolation_type_snp())		\
+		hv_get_##reg##_ghcb(&val);	\
+	else					\
+		rdmsrl(HV_X64_MSR_##reg, val);	\
+	}
+
+#define hv_set_sint_reg(val, reg) {		\
+	if (hv_isolation_type_snp())		\
+		hv_set_##reg##_ghcb(val);	\
+	else					\
+		wrmsrl(HV_X64_MSR_##reg, val);	\
+	}
+
+
+#define hv_get_simp(val) hv_get_sint_reg(val, SIMP)
+#define hv_get_siefp(val) hv_get_sint_reg(val, SIEFP)
+
+#define hv_set_simp(val) hv_set_sint_reg(val, SIMP)
+#define hv_set_siefp(val) hv_set_sint_reg(val, SIEFP)
+
+#define hv_get_synic_state(val) {			\
+	if (hv_isolation_type_snp())			\
+		hv_get_synic_state_ghcb(&val);		\
+	else						\
+		rdmsrl(HV_X64_MSR_SCONTROL, val);	\
+	}
+#define hv_set_synic_state(val) {			\
+	if (hv_isolation_type_snp())			\
+		hv_set_synic_state_ghcb(val);		\
+	else						\
+		wrmsrl(HV_X64_MSR_SCONTROL, val);	\
+	}
+
+#define hv_get_vp_index(index) rdmsrl(HV_X64_MSR_VP_INDEX, index)
+
+#define hv_signal_eom() {			 \
+	if (hv_isolation_type_snp() &&		 \
+	    old_msg_type != HVMSG_TIMER_EXPIRED) \
+		hv_signal_eom_ghcb();		 \
+	else					 \
+		wrmsrl(HV_X64_MSR_EOM, 0);	 \
+	}
+
+#define hv_get_synint_state(int_num, val) {		\
+	if (hv_isolation_type_snp())			\
+		hv_get_synint_state_ghcb(int_num, &val);\
+	else						\
+		rdmsrl(HV_X64_MSR_SINT0 + int_num, val);\
+	}
+#define hv_set_synint_state(int_num, val) {		\
+	if (hv_isolation_type_snp())			\
+		hv_set_synint_state_ghcb(int_num, val);	\
+	else						\
+		wrmsrl(HV_X64_MSR_SINT0 + int_num, val);\
+	}
+
 #define hv_get_raw_timer() rdtsc_ordered()
 
 void hyperv_vector_handler(struct pt_regs *regs);
@@ -197,6 +254,25 @@ int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
 int hv_mark_gpa_visibility(u16 count, const u64 pfn[], u32 visibility);
 int hv_set_mem_host_visibility(void *kbuffer, size_t size,
 			       enum vmbus_page_visibility visibility);
+void hv_sint_wrmsrl_ghcb(u64 msr, u64 value);
+void hv_sint_rdmsrl_ghcb(u64 msr, u64 *value);
+void hv_signal_eom_ghcb(void);
+void hv_ghcb_msr_write(u64 msr, u64 value);
+void hv_ghcb_msr_read(u64 msr, u64 *value);
+
+#define hv_get_synint_state_ghcb(int_num, val)			\
+	hv_sint_rdmsrl_ghcb(HV_X64_MSR_SINT0 + int_num, val)
+#define hv_set_synint_state_ghcb(int_num, val) \
+	hv_sint_wrmsrl_ghcb(HV_X64_MSR_SINT0 + int_num, val)
+
+#define hv_get_SIMP_ghcb(val) hv_sint_rdmsrl_ghcb(HV_X64_MSR_SIMP, val)
+#define hv_set_SIMP_ghcb(val) hv_sint_wrmsrl_ghcb(HV_X64_MSR_SIMP, val)
+
+#define hv_get_SIEFP_ghcb(val) hv_sint_rdmsrl_ghcb(HV_X64_MSR_SIEFP, val)
+#define hv_set_SIEFP_ghcb(val) hv_sint_wrmsrl_ghcb(HV_X64_MSR_SIEFP, val)
+
+#define hv_get_synic_state_ghcb(val) hv_sint_rdmsrl_ghcb(HV_X64_MSR_SCONTROL, val)
+#define hv_set_synic_state_ghcb(val) hv_sint_wrmsrl_ghcb(HV_X64_MSR_SCONTROL, val)
 #else /* CONFIG_HYPERV */
 static inline void hyperv_init(void) {}
 static inline void hyperv_setup_mmu_ops(void) {}
@@ -213,9 +289,9 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
 {
 	return -1;
 }
+static inline void hv_signal_eom_ghcb(void) { };
 #endif /* CONFIG_HYPERV */
 
-
 #include <asm-generic/mshyperv.h>
 
 #endif
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index d1ca36224657..c19e14ec8eec 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -325,6 +325,9 @@ static void __init ms_hyperv_init_platform(void)
 
 		pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n",
 			ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
+
+		if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP)
+			static_branch_enable(&isolation_type_snp);
 	}
 
 	if (ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED) {
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index e83507f49676..28faa8364952 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -136,17 +136,24 @@ int hv_synic_alloc(void)
 		tasklet_init(&hv_cpu->msg_dpc,
 			     vmbus_on_msg_dpc, (unsigned long) hv_cpu);
 
-		hv_cpu->synic_message_page =
-			(void *)get_zeroed_page(GFP_ATOMIC);
-		if (hv_cpu->synic_message_page == NULL) {
-			pr_err("Unable to allocate SYNIC message page\n");
-			goto err;
-		}
+		/*
+		 * Synic message and event pages are allocated by paravisor.
+		 * Skip these pages allocation here.
+		 */
+		if (!hv_isolation_type_snp()) {
+			hv_cpu->synic_message_page =
+				(void *)get_zeroed_page(GFP_ATOMIC);
+			if (hv_cpu->synic_message_page == NULL) {
+				pr_err("Unable to allocate SYNIC message page\n");
+				goto err;
+			}
 
-		hv_cpu->synic_event_page = (void *)get_zeroed_page(GFP_ATOMIC);
-		if (hv_cpu->synic_event_page == NULL) {
-			pr_err("Unable to allocate SYNIC event page\n");
-			goto err;
+			hv_cpu->synic_event_page =
+				(void *)get_zeroed_page(GFP_ATOMIC);
+			if (hv_cpu->synic_event_page == NULL) {
+				pr_err("Unable to allocate SYNIC event page\n");
+				goto err;
+			}
 		}
 
 		hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC);
@@ -173,10 +180,17 @@ void hv_synic_free(void)
 	for_each_present_cpu(cpu) {
 		struct hv_per_cpu_context *hv_cpu
 			= per_cpu_ptr(hv_context.cpu_context, cpu);
+		free_page((unsigned long)hv_cpu->post_msg_page);
+
+		/*
+		 * Synic message and event pages are allocated by paravisor.
+		 * Skip free these pages here.
+		 */
+		if (hv_isolation_type_snp())
+			continue;
 
 		free_page((unsigned long)hv_cpu->synic_event_page);
 		free_page((unsigned long)hv_cpu->synic_message_page);
-		free_page((unsigned long)hv_cpu->post_msg_page);
 	}
 
 	kfree(hv_context.hv_numa_map);
@@ -199,26 +213,43 @@ void hv_synic_enable_regs(unsigned int cpu)
 	union hv_synic_scontrol sctrl;
 
 	/* Setup the Synic's message page */
-	simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
+	hv_get_simp(simp.as_uint64);
 	simp.simp_enabled = 1;
-	simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
-		>> HV_HYP_PAGE_SHIFT;
 
-	hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
+	if (hv_isolation_type_snp()) {
+		hv_cpu->synic_message_page
+			= ioremap_cache(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
+					HV_HYP_PAGE_SIZE);
+		if (!hv_cpu->synic_message_page)
+			pr_err("Fail to map syinc message page.\n");
+	} else {
+		simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
+			>> HV_HYP_PAGE_SHIFT;
+	}
+
+	hv_set_simp(simp.as_uint64);
 
 	/* Setup the Synic's event page */
-	siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
+	hv_get_siefp(siefp.as_uint64);
 	siefp.siefp_enabled = 1;
-	siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
-		>> HV_HYP_PAGE_SHIFT;
 
-	hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
+	if (hv_isolation_type_snp()) {
+		hv_cpu->synic_event_page =
+			ioremap_cache(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
+				      HV_HYP_PAGE_SIZE);
+
+		if (!hv_cpu->synic_event_page)
+			pr_err("Fail to map syinc event page.\n");
+	} else {
+		siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
+			>> HV_HYP_PAGE_SHIFT;
+	}
+	hv_set_siefp(siefp.as_uint64);
 
 	/* Setup the shared SINT. */
 	if (vmbus_irq != -1)
 		enable_percpu_irq(vmbus_irq, 0);
-	shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
-					VMBUS_MESSAGE_SINT);
+	hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
 
 	shared_sint.vector = vmbus_interrupt;
 	shared_sint.masked = false;
@@ -233,14 +264,12 @@ void hv_synic_enable_regs(unsigned int cpu)
 #else
 	shared_sint.auto_eoi = 0;
 #endif
-	hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
-				shared_sint.as_uint64);
+	hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
 
 	/* Enable the global synic bit */
-	sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
+	hv_get_synic_state(sctrl.as_uint64);
 	sctrl.enable = 1;
-
-	hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
+	hv_set_synic_state(sctrl.as_uint64);
 }
 
 int hv_synic_init(unsigned int cpu)
@@ -262,32 +291,39 @@ void hv_synic_disable_regs(unsigned int cpu)
 	union hv_synic_siefp siefp;
 	union hv_synic_scontrol sctrl;
 
-	shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
-					VMBUS_MESSAGE_SINT);
-
+	hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
 	shared_sint.masked = 1;
+	hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
+
 
 	/* Need to correctly cleanup in the case of SMP!!! */
 	/* Disable the interrupt */
-	hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
-				shared_sint.as_uint64);
+	hv_get_simp(simp.as_uint64);
 
-	simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
+	/*
+	 * In Isolation VM, sim and sief pages are allocated by
+	 * paravisor. These pages also will be used by kdump
+	 * kernel. So just reset enable bit here and keep page
+	 * addresses.
+	 */
 	simp.simp_enabled = 0;
-	simp.base_simp_gpa = 0;
+	if (!hv_isolation_type_snp())
+		simp.base_simp_gpa = 0;
 
-	hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
+	hv_set_simp(simp.as_uint64);
 
-	siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
+	hv_get_siefp(siefp.as_uint64);
 	siefp.siefp_enabled = 0;
-	siefp.base_siefp_gpa = 0;
 
-	hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
+	if (!hv_isolation_type_snp())
+		siefp.base_siefp_gpa = 0;
+
+	hv_set_siefp(siefp.as_uint64);
 
 	/* Disable the global synic bit */
-	sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
+	hv_get_synic_state(sctrl.as_uint64);
 	sctrl.enable = 0;
-	hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
+	hv_set_synic_state(sctrl.as_uint64);
 
 	if (vmbus_irq != -1)
 		disable_percpu_irq(vmbus_irq);
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 2914e27b0429..63e0de2a7c54 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -23,6 +23,7 @@
 #include <linux/bitops.h>
 #include <linux/cpumask.h>
 #include <asm/ptrace.h>
+#include <asm/mshyperv.h>
 #include <asm/hyperv-tlfs.h>
 
 struct ms_hyperv_info {
@@ -51,6 +52,7 @@ extern struct ms_hyperv_info ms_hyperv;
 
 extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr);
 extern u64 hv_do_fast_hypercall8(u16 control, u64 input8);
+extern bool hv_isolation_type_snp(void);
 
 /* Helper functions that provide a consistent pattern for checking Hyper-V hypercall status. */
 static inline int hv_result(u64 status)
@@ -145,7 +147,7 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
 		 * possibly deliver another msg from the
 		 * hypervisor
 		 */
-		hv_set_register(HV_REGISTER_EOM, 0);
+		hv_signal_eom();
 	}
 }
 
-- 
2.25.1
^ permalink raw reply related	[flat|nested] 49+ messages in thread
- * Re: [RFC PATCH V3 04/11] HV: Add Write/Read MSR registers via ghcb
  2021-05-30 15:06 ` [RFC PATCH V3 04/11] HV: Add Write/Read MSR registers via ghcb Tianyu Lan
@ 2021-06-09 12:46   ` Joerg Roedel
  2021-06-10 14:15     ` Tianyu Lan
  0 siblings, 1 reply; 49+ messages in thread
From: Joerg Roedel @ 2021-06-09 12:46 UTC (permalink / raw)
  To: Tianyu Lan
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	hch, m.szyprowski, robin.murphy, boris.ostrovsky, jgross,
	sstabellini, will, xen-devel, davem, kuba, jejb, martin.petersen,
	iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
On Sun, May 30, 2021 at 11:06:21AM -0400, Tianyu Lan wrote:
> +void hv_ghcb_msr_write(u64 msr, u64 value)
> +{
> +	union hv_ghcb *hv_ghcb;
> +	void **ghcb_base;
> +	unsigned long flags;
> +
> +	if (!ms_hyperv.ghcb_base)
> +		return;
> +
> +	local_irq_save(flags);
> +	ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
> +	hv_ghcb = (union hv_ghcb *)*ghcb_base;
> +	if (!hv_ghcb) {
> +		local_irq_restore(flags);
> +		return;
> +	}
> +
> +	memset(hv_ghcb, 0x00, HV_HYP_PAGE_SIZE);
> +
> +	hv_ghcb->ghcb.protocol_version = 1;
> +	hv_ghcb->ghcb.ghcb_usage = 0;
> +
> +	ghcb_set_sw_exit_code(&hv_ghcb->ghcb, SVM_EXIT_MSR);
> +	ghcb_set_rcx(&hv_ghcb->ghcb, msr);
> +	ghcb_set_rax(&hv_ghcb->ghcb, lower_32_bits(value));
> +	ghcb_set_rdx(&hv_ghcb->ghcb, value >> 32);
> +	ghcb_set_sw_exit_info_1(&hv_ghcb->ghcb, 1);
> +	ghcb_set_sw_exit_info_2(&hv_ghcb->ghcb, 0);
> +
> +	VMGEXIT();
This is not safe to use from NMI context. You need at least some
checking or WARN_ON/assertion/whatever to catch cases where this is
violated. Otherwise it will result in some hard to debug bug reports.
Regards,
	Joerg
^ permalink raw reply	[flat|nested] 49+ messages in thread
- * Re: [RFC PATCH V3 04/11] HV: Add Write/Read MSR registers via ghcb
  2021-06-09 12:46   ` Joerg Roedel
@ 2021-06-10 14:15     ` Tianyu Lan
  0 siblings, 0 replies; 49+ messages in thread
From: Tianyu Lan @ 2021-06-10 14:15 UTC (permalink / raw)
  To: Joerg Roedel
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	hch, m.szyprowski, robin.murphy, boris.ostrovsky, jgross,
	sstabellini, will, xen-devel, davem, kuba, jejb, martin.petersen,
	iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
On 6/9/2021 8:46 PM, Joerg Roedel wrote:
> On Sun, May 30, 2021 at 11:06:21AM -0400, Tianyu Lan wrote:
>> +void hv_ghcb_msr_write(u64 msr, u64 value)
>> +{
>> +	union hv_ghcb *hv_ghcb;
>> +	void **ghcb_base;
>> +	unsigned long flags;
>> +
>> +	if (!ms_hyperv.ghcb_base)
>> +		return;
>> +
>> +	local_irq_save(flags);
>> +	ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
>> +	hv_ghcb = (union hv_ghcb *)*ghcb_base;
>> +	if (!hv_ghcb) {
>> +		local_irq_restore(flags);
>> +		return;
>> +	}
>> +
>> +	memset(hv_ghcb, 0x00, HV_HYP_PAGE_SIZE);
>> +
>> +	hv_ghcb->ghcb.protocol_version = 1;
>> +	hv_ghcb->ghcb.ghcb_usage = 0;
>> +
>> +	ghcb_set_sw_exit_code(&hv_ghcb->ghcb, SVM_EXIT_MSR);
>> +	ghcb_set_rcx(&hv_ghcb->ghcb, msr);
>> +	ghcb_set_rax(&hv_ghcb->ghcb, lower_32_bits(value));
>> +	ghcb_set_rdx(&hv_ghcb->ghcb, value >> 32);
>> +	ghcb_set_sw_exit_info_1(&hv_ghcb->ghcb, 1);
>> +	ghcb_set_sw_exit_info_2(&hv_ghcb->ghcb, 0);
>> +
>> +	VMGEXIT();
> 
> This is not safe to use from NMI context. You need at least some
> checking or WARN_ON/assertion/whatever to catch cases where this is
> violated. Otherwise it will result in some hard to debug bug reports.
> 
Nice catch. Will update in the next version.
Thanks.
^ permalink raw reply	[flat|nested] 49+ messages in thread
 
 
- * [RFC PATCH V3 05/11] HV: Add ghcb hvcall support for SNP VM
  2021-05-30 15:06 [RFC PATCH V3 00/11] x86/Hyper-V: Add Hyper-V Isolation VM support Tianyu Lan
                   ` (3 preceding siblings ...)
  2021-05-30 15:06 ` [RFC PATCH V3 04/11] HV: Add Write/Read MSR registers via ghcb Tianyu Lan
@ 2021-05-30 15:06 ` Tianyu Lan
  2021-06-09 12:49   ` Joerg Roedel
  2021-05-30 15:06 ` [RFC PATCH V3 06/11] HV/Vmbus: Add SNP support for VMbus channel initiate message Tianyu Lan
                   ` (5 subsequent siblings)
  10 siblings, 1 reply; 49+ messages in thread
From: Tianyu Lan @ 2021-05-30 15:06 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	hch, m.szyprowski, robin.murphy, boris.ostrovsky, jgross,
	sstabellini, joro, will, xen-devel, davem, kuba, jejb,
	martin.petersen
  Cc: iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
From: Tianyu Lan <Tianyu.Lan@microsoft.com>
Hyper-V provides ghcb hvcall to handle VMBus
HVCALL_SIGNAL_EVENT and HVCALL_POST_MESSAGE
msg in SNP Isolation VM. Add such support.
Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
 arch/x86/hyperv/ivm.c           | 69 +++++++++++++++++++++++++++++++++
 arch/x86/include/asm/mshyperv.h |  1 +
 drivers/hv/connection.c         |  6 ++-
 drivers/hv/hv.c                 |  8 +++-
 4 files changed, 82 insertions(+), 2 deletions(-)
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index fd6dd804beef..e687fca68ba3 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -18,8 +18,77 @@
 
 union hv_ghcb {
 	struct ghcb ghcb;
+	struct {
+		u64 hypercalldata[509];
+		u64 outputgpa;
+		union {
+			union {
+				struct {
+					u32 callcode        : 16;
+					u32 isfast          : 1;
+					u32 reserved1       : 14;
+					u32 isnested        : 1;
+					u32 countofelements : 12;
+					u32 reserved2       : 4;
+					u32 repstartindex   : 12;
+					u32 reserved3       : 4;
+				};
+				u64 asuint64;
+			} hypercallinput;
+			union {
+				struct {
+					u16 callstatus;
+					u16 reserved1;
+					u32 elementsprocessed : 12;
+					u32 reserved2         : 20;
+				};
+				u64 asunit64;
+			} hypercalloutput;
+		};
+		u64 reserved2;
+	} hypercall;
 } __packed __aligned(PAGE_SIZE);
 
+u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
+{
+	union hv_ghcb *hv_ghcb;
+	void **ghcb_base;
+	unsigned long flags;
+
+	if (!ms_hyperv.ghcb_base)
+		return -EFAULT;
+
+	local_irq_save(flags);
+	ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
+	hv_ghcb = (union hv_ghcb *)*ghcb_base;
+	if (!hv_ghcb) {
+		local_irq_restore(flags);
+		return -EFAULT;
+	}
+
+	memset(hv_ghcb, 0x00, HV_HYP_PAGE_SIZE);
+	hv_ghcb->ghcb.protocol_version = 1;
+	hv_ghcb->ghcb.ghcb_usage = 1;
+
+	hv_ghcb->hypercall.outputgpa = (u64)output;
+	hv_ghcb->hypercall.hypercallinput.asuint64 = 0;
+	hv_ghcb->hypercall.hypercallinput.callcode = control;
+
+	if (input_size)
+		memcpy(hv_ghcb->hypercall.hypercalldata, input, input_size);
+
+	VMGEXIT();
+
+	hv_ghcb->ghcb.ghcb_usage = 0xffffffff;
+	memset(hv_ghcb->ghcb.save.valid_bitmap, 0,
+	       sizeof(hv_ghcb->ghcb.save.valid_bitmap));
+
+	local_irq_restore(flags);
+
+	return hv_ghcb->hypercall.hypercalloutput.callstatus;
+}
+EXPORT_SYMBOL_GPL(hv_ghcb_hypercall);
+
 void hv_ghcb_msr_write(u64 msr, u64 value)
 {
 	union hv_ghcb *hv_ghcb;
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index eec7f3357d51..51dfbd040930 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -259,6 +259,7 @@ void hv_sint_rdmsrl_ghcb(u64 msr, u64 *value);
 void hv_signal_eom_ghcb(void);
 void hv_ghcb_msr_write(u64 msr, u64 value);
 void hv_ghcb_msr_read(u64 msr, u64 *value);
+u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size);
 
 #define hv_get_synint_state_ghcb(int_num, val)			\
 	hv_sint_rdmsrl_ghcb(HV_X64_MSR_SINT0 + int_num, val)
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 311cd005b3be..186fd4c8acd4 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -445,6 +445,10 @@ void vmbus_set_event(struct vmbus_channel *channel)
 
 	++channel->sig_events;
 
-	hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event);
+	if (hv_isolation_type_snp())
+		hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event,
+				NULL, sizeof(u64));
+	else
+		hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event);
 }
 EXPORT_SYMBOL_GPL(vmbus_set_event);
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 28faa8364952..03bcc831b034 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -97,7 +97,13 @@ int hv_post_message(union hv_connection_id connection_id,
 	aligned_msg->payload_size = payload_size;
 	memcpy((void *)aligned_msg->payload, payload, payload_size);
 
-	status = hv_do_hypercall(HVCALL_POST_MESSAGE, aligned_msg, NULL);
+	if (hv_isolation_type_snp())
+		status = hv_ghcb_hypercall(HVCALL_POST_MESSAGE,
+				(void *)aligned_msg, NULL,
+				sizeof(struct hv_input_post_message));
+	else
+		status = hv_do_hypercall(HVCALL_POST_MESSAGE,
+				aligned_msg, NULL);
 
 	/* Preemption must remain disabled until after the hypercall
 	 * so some other thread can't get scheduled onto this cpu and
-- 
2.25.1
^ permalink raw reply related	[flat|nested] 49+ messages in thread
- * Re: [RFC PATCH V3 05/11] HV: Add ghcb hvcall support for SNP VM
  2021-05-30 15:06 ` [RFC PATCH V3 05/11] HV: Add ghcb hvcall support for SNP VM Tianyu Lan
@ 2021-06-09 12:49   ` Joerg Roedel
  0 siblings, 0 replies; 49+ messages in thread
From: Joerg Roedel @ 2021-06-09 12:49 UTC (permalink / raw)
  To: Tianyu Lan
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	hch, m.szyprowski, robin.murphy, boris.ostrovsky, jgross,
	sstabellini, will, xen-devel, davem, kuba, jejb, martin.petersen,
	iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
On Sun, May 30, 2021 at 11:06:22AM -0400, Tianyu Lan wrote:
> +u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
> +{
> +	union hv_ghcb *hv_ghcb;
> +	void **ghcb_base;
> +	unsigned long flags;
> +
> +	if (!ms_hyperv.ghcb_base)
> +		return -EFAULT;
> +
> +	local_irq_save(flags);
> +	ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
> +	hv_ghcb = (union hv_ghcb *)*ghcb_base;
> +	if (!hv_ghcb) {
> +		local_irq_restore(flags);
> +		return -EFAULT;
> +	}
> +
> +	memset(hv_ghcb, 0x00, HV_HYP_PAGE_SIZE);
> +	hv_ghcb->ghcb.protocol_version = 1;
> +	hv_ghcb->ghcb.ghcb_usage = 1;
> +
> +	hv_ghcb->hypercall.outputgpa = (u64)output;
> +	hv_ghcb->hypercall.hypercallinput.asuint64 = 0;
> +	hv_ghcb->hypercall.hypercallinput.callcode = control;
> +
> +	if (input_size)
> +		memcpy(hv_ghcb->hypercall.hypercalldata, input, input_size);
> +
> +	VMGEXIT();
Also not NMI-safe. When you re-use the existing GHCB setup code from
from SEV-ES code, you can also use sev_es_get/put_ghcb() which takes
care of re-using a GHCB already in use.
Regards,
	Joerg
^ permalink raw reply	[flat|nested] 49+ messages in thread
 
- * [RFC PATCH V3 06/11] HV/Vmbus: Add SNP support for VMbus channel initiate message
  2021-05-30 15:06 [RFC PATCH V3 00/11] x86/Hyper-V: Add Hyper-V Isolation VM support Tianyu Lan
                   ` (4 preceding siblings ...)
  2021-05-30 15:06 ` [RFC PATCH V3 05/11] HV: Add ghcb hvcall support for SNP VM Tianyu Lan
@ 2021-05-30 15:06 ` Tianyu Lan
  2021-05-30 15:06 ` [RFC PATCH V3 07/11] HV/Vmbus: Initialize VMbus ring buffer for Isolation VM Tianyu Lan
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 49+ messages in thread
From: Tianyu Lan @ 2021-05-30 15:06 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	hch, m.szyprowski, robin.murphy, boris.ostrovsky, jgross,
	sstabellini, joro, will, xen-devel, davem, kuba, jejb,
	martin.petersen
  Cc: iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
From: Tianyu Lan <Tianyu.Lan@microsoft.com>
The monitor pages in the CHANNELMSG_INITIATE_CONTACT are shared
with host and so it's necessary to use hvcall to set them visible
to host. In Isolation VM with AMD SEV SNP, the access address
should be in the extra space which is above shared gpa boundary.
So remap these pages into the extra address(pa + shared_gpa_boundary).
Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
 drivers/hv/connection.c   | 62 +++++++++++++++++++++++++++++++++++++++
 drivers/hv/hyperv_vmbus.h |  1 +
 2 files changed, 63 insertions(+)
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 186fd4c8acd4..389adc92f958 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -104,6 +104,12 @@ int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
 
 	msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]);
 	msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]);
+
+	if (hv_is_isolation_supported()) {
+		msg->monitor_page1 += ms_hyperv.shared_gpa_boundary;
+		msg->monitor_page2 += ms_hyperv.shared_gpa_boundary;
+	}
+
 	msg->target_vcpu = hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU);
 
 	/*
@@ -148,6 +154,29 @@ int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
 		return -ECONNREFUSED;
 	}
 
+	if (hv_is_isolation_supported()) {
+		vmbus_connection.monitor_pages_va[0]
+			= vmbus_connection.monitor_pages[0];
+		vmbus_connection.monitor_pages[0]
+			= ioremap_cache(msg->monitor_page1, HV_HYP_PAGE_SIZE);
+		if (!vmbus_connection.monitor_pages[0])
+			return -ENOMEM;
+
+		vmbus_connection.monitor_pages_va[1]
+			= vmbus_connection.monitor_pages[1];
+		vmbus_connection.monitor_pages[1]
+			= ioremap_cache(msg->monitor_page2, HV_HYP_PAGE_SIZE);
+		if (!vmbus_connection.monitor_pages[1]) {
+			vunmap(vmbus_connection.monitor_pages[0]);
+			return -ENOMEM;
+		}
+
+		memset(vmbus_connection.monitor_pages[0], 0x00,
+		       HV_HYP_PAGE_SIZE);
+		memset(vmbus_connection.monitor_pages[1], 0x00,
+		       HV_HYP_PAGE_SIZE);
+	}
+
 	return ret;
 }
 
@@ -159,6 +188,7 @@ int vmbus_connect(void)
 	struct vmbus_channel_msginfo *msginfo = NULL;
 	int i, ret = 0;
 	__u32 version;
+	u64 pfn[2];
 
 	/* Initialize the vmbus connection */
 	vmbus_connection.conn_state = CONNECTING;
@@ -216,6 +246,16 @@ int vmbus_connect(void)
 		goto cleanup;
 	}
 
+	if (hv_is_isolation_supported()) {
+		pfn[0] = virt_to_hvpfn(vmbus_connection.monitor_pages[0]);
+		pfn[1] = virt_to_hvpfn(vmbus_connection.monitor_pages[1]);
+		if (hv_mark_gpa_visibility(2, pfn,
+				VMBUS_PAGE_VISIBLE_READ_WRITE)) {
+			ret = -EFAULT;
+			goto cleanup;
+		}
+	}
+
 	msginfo = kzalloc(sizeof(*msginfo) +
 			  sizeof(struct vmbus_channel_initiate_contact),
 			  GFP_KERNEL);
@@ -282,6 +322,8 @@ int vmbus_connect(void)
 
 void vmbus_disconnect(void)
 {
+	u64 pfn[2];
+
 	/*
 	 * First send the unload request to the host.
 	 */
@@ -301,6 +343,26 @@ void vmbus_disconnect(void)
 		vmbus_connection.int_page = NULL;
 	}
 
+	if (hv_is_isolation_supported()) {
+		if (vmbus_connection.monitor_pages_va[0]) {
+			vunmap(vmbus_connection.monitor_pages[0]);
+			vmbus_connection.monitor_pages[0]
+				= vmbus_connection.monitor_pages_va[0];
+			vmbus_connection.monitor_pages_va[0] = NULL;
+		}
+
+		if (vmbus_connection.monitor_pages_va[1]) {
+			vunmap(vmbus_connection.monitor_pages[1]);
+			vmbus_connection.monitor_pages[1]
+				= vmbus_connection.monitor_pages_va[1];
+			vmbus_connection.monitor_pages_va[1] = NULL;
+		}
+
+		pfn[0] = virt_to_hvpfn(vmbus_connection.monitor_pages[0]);
+		pfn[1] = virt_to_hvpfn(vmbus_connection.monitor_pages[1]);
+		hv_mark_gpa_visibility(2, pfn, VMBUS_PAGE_NOT_VISIBLE);
+	}
+
 	hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[0]);
 	hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[1]);
 	vmbus_connection.monitor_pages[0] = NULL;
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 42f3d9d123a1..40bc0eff6665 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -240,6 +240,7 @@ struct vmbus_connection {
 	 * is child->parent notification
 	 */
 	struct hv_monitor_page *monitor_pages[2];
+	void *monitor_pages_va[2];
 	struct list_head chn_msg_list;
 	spinlock_t channelmsg_lock;
 
-- 
2.25.1
^ permalink raw reply related	[flat|nested] 49+ messages in thread
- * [RFC PATCH V3 07/11] HV/Vmbus: Initialize VMbus ring buffer for Isolation VM
  2021-05-30 15:06 [RFC PATCH V3 00/11] x86/Hyper-V: Add Hyper-V Isolation VM support Tianyu Lan
                   ` (5 preceding siblings ...)
  2021-05-30 15:06 ` [RFC PATCH V3 06/11] HV/Vmbus: Add SNP support for VMbus channel initiate message Tianyu Lan
@ 2021-05-30 15:06 ` Tianyu Lan
  2021-05-30 15:06 ` [RFC PATCH V3 08/11] swiotlb: Add bounce buffer remap address setting function Tianyu Lan
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 49+ messages in thread
From: Tianyu Lan @ 2021-05-30 15:06 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	hch, m.szyprowski, robin.murphy, boris.ostrovsky, jgross,
	sstabellini, joro, will, xen-devel, davem, kuba, jejb,
	martin.petersen
  Cc: iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
From: Tianyu Lan <Tianyu.Lan@microsoft.com>
VMbus ring buffer are shared with host and it's need to
be accessed via extra address space of Isolation VM with
SNP support. This patch is to map the ring buffer
address in extra address space via ioremap(). HV host
visibility hvcall smears data in the ring buffer and
so reset the ring buffer memory to zero after calling
visibility hvcall.
Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
 drivers/hv/Kconfig        |  1 +
 drivers/hv/channel.c      | 10 +++++
 drivers/hv/hyperv_vmbus.h |  2 +
 drivers/hv/ring_buffer.c  | 84 ++++++++++++++++++++++++++++++---------
 4 files changed, 79 insertions(+), 18 deletions(-)
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index 66c794d92391..a8386998be40 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -7,6 +7,7 @@ config HYPERV
 	depends on X86 && ACPI && X86_LOCAL_APIC && HYPERVISOR_GUEST
 	select PARAVIRT
 	select X86_HV_CALLBACK_VECTOR
+	select VMAP_PFN
 	help
 	  Select this option to run Linux as a Hyper-V client operating
 	  system.
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 01048bb07082..7350da9dbe97 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -707,6 +707,16 @@ static int __vmbus_open(struct vmbus_channel *newchannel,
 	if (err)
 		goto error_clean_ring;
 
+	err = hv_ringbuffer_post_init(&newchannel->outbound,
+				      page, send_pages);
+	if (err)
+		goto error_free_gpadl;
+
+	err = hv_ringbuffer_post_init(&newchannel->inbound,
+				      &page[send_pages], recv_pages);
+	if (err)
+		goto error_free_gpadl;
+
 	/* Create and init the channel open message */
 	open_info = kzalloc(sizeof(*open_info) +
 			   sizeof(struct vmbus_channel_open_channel),
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 40bc0eff6665..15cd23a561f3 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -172,6 +172,8 @@ extern int hv_synic_cleanup(unsigned int cpu);
 /* Interface */
 
 void hv_ringbuffer_pre_init(struct vmbus_channel *channel);
+int hv_ringbuffer_post_init(struct hv_ring_buffer_info *ring_info,
+		struct page *pages, u32 page_cnt);
 
 int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
 		       struct page *pages, u32 pagecnt, u32 max_pkt_size);
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 2aee356840a2..d4f93fca1108 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -17,6 +17,8 @@
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/prefetch.h>
+#include <linux/io.h>
+#include <asm/mshyperv.h>
 
 #include "hyperv_vmbus.h"
 
@@ -179,43 +181,89 @@ void hv_ringbuffer_pre_init(struct vmbus_channel *channel)
 	mutex_init(&channel->outbound.ring_buffer_mutex);
 }
 
-/* Initialize the ring buffer. */
-int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
-		       struct page *pages, u32 page_cnt, u32 max_pkt_size)
+int hv_ringbuffer_post_init(struct hv_ring_buffer_info *ring_info,
+		       struct page *pages, u32 page_cnt)
 {
+	u64 physic_addr = page_to_pfn(pages) << PAGE_SHIFT;
+	unsigned long *pfns_wraparound;
+	void *vaddr;
 	int i;
-	struct page **pages_wraparound;
 
-	BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE));
+	if (!hv_isolation_type_snp())
+		return 0;
+
+	physic_addr += ms_hyperv.shared_gpa_boundary;
 
 	/*
 	 * First page holds struct hv_ring_buffer, do wraparound mapping for
 	 * the rest.
 	 */
-	pages_wraparound = kcalloc(page_cnt * 2 - 1, sizeof(struct page *),
+	pfns_wraparound = kcalloc(page_cnt * 2 - 1, sizeof(unsigned long),
 				   GFP_KERNEL);
-	if (!pages_wraparound)
+	if (!pfns_wraparound)
 		return -ENOMEM;
 
-	pages_wraparound[0] = pages;
+	pfns_wraparound[0] = physic_addr >> PAGE_SHIFT;
 	for (i = 0; i < 2 * (page_cnt - 1); i++)
-		pages_wraparound[i + 1] = &pages[i % (page_cnt - 1) + 1];
-
-	ring_info->ring_buffer = (struct hv_ring_buffer *)
-		vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, PAGE_KERNEL);
-
-	kfree(pages_wraparound);
+		pfns_wraparound[i + 1] = (physic_addr >> PAGE_SHIFT) +
+			i % (page_cnt - 1) + 1;
 
-
-	if (!ring_info->ring_buffer)
+	vaddr = vmap_pfn(pfns_wraparound, page_cnt * 2 - 1, PAGE_KERNEL_IO);
+	kfree(pfns_wraparound);
+	if (!vaddr)
 		return -ENOMEM;
 
-	ring_info->ring_buffer->read_index =
-		ring_info->ring_buffer->write_index = 0;
+	/* Clean memory after setting host visibility. */
+	memset((void *)vaddr, 0x00, page_cnt * PAGE_SIZE);
+
+	ring_info->ring_buffer = (struct hv_ring_buffer *)vaddr;
+	ring_info->ring_buffer->read_index = 0;
+	ring_info->ring_buffer->write_index = 0;
 
 	/* Set the feature bit for enabling flow control. */
 	ring_info->ring_buffer->feature_bits.value = 1;
 
+	return 0;
+}
+
+/* Initialize the ring buffer. */
+int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
+		       struct page *pages, u32 page_cnt, u32 max_pkt_size)
+{
+	int i;
+	struct page **pages_wraparound;
+
+	BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE));
+
+	if (!hv_isolation_type_snp()) {
+		/*
+		 * First page holds struct hv_ring_buffer, do wraparound mapping for
+		 * the rest.
+		 */
+		pages_wraparound = kcalloc(page_cnt * 2 - 1, sizeof(struct page *),
+					   GFP_KERNEL);
+		if (!pages_wraparound)
+			return -ENOMEM;
+
+		pages_wraparound[0] = pages;
+		for (i = 0; i < 2 * (page_cnt - 1); i++)
+			pages_wraparound[i + 1] = &pages[i % (page_cnt - 1) + 1];
+
+		ring_info->ring_buffer = (struct hv_ring_buffer *)
+			vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, PAGE_KERNEL);
+
+		kfree(pages_wraparound);
+
+		if (!ring_info->ring_buffer)
+			return -ENOMEM;
+
+		ring_info->ring_buffer->read_index =
+			ring_info->ring_buffer->write_index = 0;
+
+		/* Set the feature bit for enabling flow control. */
+		ring_info->ring_buffer->feature_bits.value = 1;
+	}
+
 	ring_info->ring_size = page_cnt << PAGE_SHIFT;
 	ring_info->ring_size_div10_reciprocal =
 		reciprocal_value(ring_info->ring_size / 10);
-- 
2.25.1
^ permalink raw reply related	[flat|nested] 49+ messages in thread
- * [RFC PATCH V3 08/11] swiotlb: Add bounce buffer remap address setting function
  2021-05-30 15:06 [RFC PATCH V3 00/11] x86/Hyper-V: Add Hyper-V Isolation VM support Tianyu Lan
                   ` (6 preceding siblings ...)
  2021-05-30 15:06 ` [RFC PATCH V3 07/11] HV/Vmbus: Initialize VMbus ring buffer for Isolation VM Tianyu Lan
@ 2021-05-30 15:06 ` Tianyu Lan
  2021-06-07  6:43   ` Christoph Hellwig
  2021-05-30 15:06 ` [RFC PATCH V3 09/11] HV/IOMMU: Enable swiotlb bounce buffer for Isolation VM Tianyu Lan
                   ` (2 subsequent siblings)
  10 siblings, 1 reply; 49+ messages in thread
From: Tianyu Lan @ 2021-05-30 15:06 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	hch, m.szyprowski, robin.murphy, boris.ostrovsky, jgross,
	sstabellini, joro, will, xen-devel, davem, kuba, jejb,
	martin.petersen
  Cc: iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
From: Tianyu Lan <Tianyu.Lan@microsoft.com>
For Hyper-V isolation VM with AMD SEV SNP, the bounce buffer(shared memory)
needs to be accessed via extra address space(e.g address above bit39).
Hyper-V code may remap extra address space outside of swiotlb. swiotlb_
bounce() needs to use remap virtual address to copy data from/to bounce
buffer. Add new interface swiotlb_set_bounce_remap() to do that.
Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
 include/linux/swiotlb.h |  5 +++++
 kernel/dma/swiotlb.c    | 14 +++++++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 216854a5e513..43f53cf52f48 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -113,8 +113,13 @@ unsigned int swiotlb_max_segment(void);
 size_t swiotlb_max_mapping_size(struct device *dev);
 bool is_swiotlb_active(void);
 void __init swiotlb_adjust_size(unsigned long size);
+void swiotlb_set_bounce_remap(unsigned char *vaddr);
 #else
 #define swiotlb_force SWIOTLB_NO_FORCE
+static inline void swiotlb_set_bounce_remap(unsigned char *vaddr)
+{
+}
+
 static inline bool is_swiotlb_buffer(phys_addr_t paddr)
 {
 	return false;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 8ca7d505d61c..fbc827ab5fb4 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -70,6 +70,7 @@ struct io_tlb_mem *io_tlb_default_mem;
  * not be bounced (unless SWIOTLB_FORCE is set).
  */
 static unsigned int max_segment;
+static unsigned char *swiotlb_bounce_remap_addr;
 
 static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT;
 
@@ -334,6 +335,11 @@ void __init swiotlb_exit(void)
 	io_tlb_default_mem = NULL;
 }
 
+void swiotlb_set_bounce_remap(unsigned char *vaddr)
+{
+	swiotlb_bounce_remap_addr = vaddr;
+}
+
 /*
  * Bounce: copy the swiotlb buffer from or back to the original dma location
  */
@@ -345,7 +351,13 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
 	phys_addr_t orig_addr = mem->slots[index].orig_addr;
 	size_t alloc_size = mem->slots[index].alloc_size;
 	unsigned long pfn = PFN_DOWN(orig_addr);
-	unsigned char *vaddr = phys_to_virt(tlb_addr);
+	unsigned char *vaddr;
+
+	if (swiotlb_bounce_remap_addr)
+		vaddr = swiotlb_bounce_remap_addr + tlb_addr -
+			io_tlb_default_mem->start;
+	else
+		vaddr = phys_to_virt(tlb_addr);
 
 	if (orig_addr == INVALID_PHYS_ADDR)
 		return;
-- 
2.25.1
^ permalink raw reply related	[flat|nested] 49+ messages in thread
- * Re: [RFC PATCH V3 08/11] swiotlb: Add bounce buffer remap address setting function
  2021-05-30 15:06 ` [RFC PATCH V3 08/11] swiotlb: Add bounce buffer remap address setting function Tianyu Lan
@ 2021-06-07  6:43   ` Christoph Hellwig
  2021-06-07 14:56     ` Tianyu Lan
  2021-06-14 13:49     ` Robin Murphy
  0 siblings, 2 replies; 49+ messages in thread
From: Christoph Hellwig @ 2021-06-07  6:43 UTC (permalink / raw)
  To: Tianyu Lan
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	hch, m.szyprowski, robin.murphy, boris.ostrovsky, jgross,
	sstabellini, joro, will, xen-devel, davem, kuba, jejb,
	martin.petersen, iommu, linux-arch, linux-hyperv, linux-kernel,
	linux-scsi, netdev, vkuznets, thomas.lendacky, brijesh.singh,
	sunilmut
On Sun, May 30, 2021 at 11:06:25AM -0400, Tianyu Lan wrote:
> From: Tianyu Lan <Tianyu.Lan@microsoft.com>
> 
> For Hyper-V isolation VM with AMD SEV SNP, the bounce buffer(shared memory)
> needs to be accessed via extra address space(e.g address above bit39).
> Hyper-V code may remap extra address space outside of swiotlb. swiotlb_
> bounce() needs to use remap virtual address to copy data from/to bounce
> buffer. Add new interface swiotlb_set_bounce_remap() to do that.
Why can't you use the bus_dma_region ranges to remap to your preferred
address?
^ permalink raw reply	[flat|nested] 49+ messages in thread 
- * Re: [RFC PATCH V3 08/11] swiotlb: Add bounce buffer remap address setting function
  2021-06-07  6:43   ` Christoph Hellwig
@ 2021-06-07 14:56     ` Tianyu Lan
  2021-06-10 14:25       ` Tianyu Lan
  2021-06-14  7:12       ` Christoph Hellwig
  2021-06-14 13:49     ` Robin Murphy
  1 sibling, 2 replies; 49+ messages in thread
From: Tianyu Lan @ 2021-06-07 14:56 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	m.szyprowski, robin.murphy, boris.ostrovsky, jgross, sstabellini,
	joro, will, xen-devel, davem, kuba, jejb, martin.petersen, iommu,
	linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
On 6/7/2021 2:43 PM, Christoph Hellwig wrote:
> On Sun, May 30, 2021 at 11:06:25AM -0400, Tianyu Lan wrote:
>> From: Tianyu Lan <Tianyu.Lan@microsoft.com>
>>
>> For Hyper-V isolation VM with AMD SEV SNP, the bounce buffer(shared memory)
>> needs to be accessed via extra address space(e.g address above bit39).
>> Hyper-V code may remap extra address space outside of swiotlb. swiotlb_
>> bounce() needs to use remap virtual address to copy data from/to bounce
>> buffer. Add new interface swiotlb_set_bounce_remap() to do that.
> 
> Why can't you use the bus_dma_region ranges to remap to your preferred
> address?
> 
Thanks for your suggestion.
These addresses in extra address space works as system memory mirror. 
The shared memory with host in Isolation VM needs to be accessed via 
extra address space which is above shared gpa boundary. During 
initializing swiotlb bounce buffer pool, only address bellow shared gpa 
boundary can be accepted by swiotlb API because it is treated as system 
memory and managed by memory management. This is why Hyper-V swiotlb 
bounce buffer pool needs to be allocated in Hyper-V code and map
associated physical address in extra address space. The patch target is
to add the new interface to set start virtual address of bounce buffer
pool and let swiotlb boucne buffer copy function to use right virtual 
address for extra address space.
bus_dma_region is to translate cpu physical address to dma address.
It can't modify the virtual address of bounce buffer pool and let
swiotlb code to copy data with right address. If some thing missed,
please correct me.
Thanks.
^ permalink raw reply	[flat|nested] 49+ messages in thread 
- * Re: [RFC PATCH V3 08/11] swiotlb: Add bounce buffer remap address setting function
  2021-06-07 14:56     ` Tianyu Lan
@ 2021-06-10 14:25       ` Tianyu Lan
  2021-06-14  7:12       ` Christoph Hellwig
  1 sibling, 0 replies; 49+ messages in thread
From: Tianyu Lan @ 2021-06-10 14:25 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	m.szyprowski, robin.murphy, boris.ostrovsky, jgross, sstabellini,
	joro, will, xen-devel, davem, kuba, jejb, martin.petersen, iommu,
	linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
On 6/7/2021 10:56 PM, Tianyu Lan wrote:
> 
> On 6/7/2021 2:43 PM, Christoph Hellwig wrote:
>> On Sun, May 30, 2021 at 11:06:25AM -0400, Tianyu Lan wrote:
>>> From: Tianyu Lan <Tianyu.Lan@microsoft.com>
>>>
>>> For Hyper-V isolation VM with AMD SEV SNP, the bounce buffer(shared 
>>> memory)
>>> needs to be accessed via extra address space(e.g address above bit39).
>>> Hyper-V code may remap extra address space outside of swiotlb. swiotlb_
>>> bounce() needs to use remap virtual address to copy data from/to bounce
>>> buffer. Add new interface swiotlb_set_bounce_remap() to do that.
>>
>> Why can't you use the bus_dma_region ranges to remap to your preferred
>> address?
>>
> 
> Thanks for your suggestion.
> 
> These addresses in extra address space works as system memory mirror. 
> The shared memory with host in Isolation VM needs to be accessed via 
> extra address space which is above shared gpa boundary. During 
> initializing swiotlb bounce buffer pool, only address bellow shared gpa 
> boundary can be accepted by swiotlb API because it is treated as system 
> memory and managed by memory management. This is why Hyper-V swiotlb 
> bounce buffer pool needs to be allocated in Hyper-V code and map
> associated physical address in extra address space. The patch target is
> to add the new interface to set start virtual address of bounce buffer
> pool and let swiotlb boucne buffer copy function to use right virtual 
> address for extra address space.
> 
> bus_dma_region is to translate cpu physical address to dma address.
> It can't modify the virtual address of bounce buffer pool and let
> swiotlb code to copy data with right address. If some thing missed,
> please correct me.
> 
Hi Christoph:
	Sorry to bother you. Could you have a look at my previous reply?
I try figuring out the right way.
Thanks.
^ permalink raw reply	[flat|nested] 49+ messages in thread 
- * Re: [RFC PATCH V3 08/11] swiotlb: Add bounce buffer remap address setting function
  2021-06-07 14:56     ` Tianyu Lan
  2021-06-10 14:25       ` Tianyu Lan
@ 2021-06-14  7:12       ` Christoph Hellwig
  2021-06-14 13:29         ` Tom Lendacky
  2021-06-14 13:37         ` Tianyu Lan
  1 sibling, 2 replies; 49+ messages in thread
From: Christoph Hellwig @ 2021-06-14  7:12 UTC (permalink / raw)
  To: Tianyu Lan
  Cc: Christoph Hellwig, kys, haiyangz, sthemmin, wei.liu, decui, tglx,
	mingo, bp, x86, hpa, arnd, dave.hansen, luto, peterz, akpm,
	kirill.shutemov, rppt, hannes, cai, krish.sadhukhan, saravanand,
	Tianyu.Lan, konrad.wilk, m.szyprowski, robin.murphy,
	boris.ostrovsky, jgross, sstabellini, joro, will, xen-devel,
	davem, kuba, jejb, martin.petersen, iommu, linux-arch,
	linux-hyperv, linux-kernel, linux-scsi, netdev, vkuznets,
	thomas.lendacky, brijesh.singh, sunilmut
On Mon, Jun 07, 2021 at 10:56:47PM +0800, Tianyu Lan wrote:
> These addresses in extra address space works as system memory mirror. The 
> shared memory with host in Isolation VM needs to be accessed via extra 
> address space which is above shared gpa boundary.
Why?
^ permalink raw reply	[flat|nested] 49+ messages in thread 
- * Re: [RFC PATCH V3 08/11] swiotlb: Add bounce buffer remap address setting function
  2021-06-14  7:12       ` Christoph Hellwig
@ 2021-06-14 13:29         ` Tom Lendacky
  2021-06-14 13:37         ` Tianyu Lan
  1 sibling, 0 replies; 49+ messages in thread
From: Tom Lendacky @ 2021-06-14 13:29 UTC (permalink / raw)
  To: Christoph Hellwig, Tianyu Lan
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	m.szyprowski, robin.murphy, boris.ostrovsky, jgross, sstabellini,
	joro, will, xen-devel, davem, kuba, jejb, martin.petersen, iommu,
	linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, brijesh.singh, sunilmut
On 6/14/21 2:12 AM, Christoph Hellwig wrote:
> On Mon, Jun 07, 2021 at 10:56:47PM +0800, Tianyu Lan wrote:
>> These addresses in extra address space works as system memory mirror. The 
>> shared memory with host in Isolation VM needs to be accessed via extra 
>> address space which is above shared gpa boundary.
> 
> Why?
> 
IIUC, this is using the vTOM feature of SEV-SNP. When this feature is
enabled for a VMPL level, any physical memory addresses below vTOM are
considered private/encrypted and any physical memory addresses above vTOM
are considered shared/unencrypted. With this option, you don't need a
fully enlightened guest that sets and clears page table encryption bits.
You just need the DMA buffers to be allocated in the proper range above vTOM.
See the section on "Virtual Machine Privilege Levels" in
https://www.amd.com/system/files/TechDocs/SEV-SNP-strengthening-vm-isolation-with-integrity-protection-and-more.pdf.
Thanks,
Tom
^ permalink raw reply	[flat|nested] 49+ messages in thread 
- * Re: [RFC PATCH V3 08/11] swiotlb: Add bounce buffer remap address setting function
  2021-06-14  7:12       ` Christoph Hellwig
  2021-06-14 13:29         ` Tom Lendacky
@ 2021-06-14 13:37         ` Tianyu Lan
  2021-06-14 13:42           ` Tianyu Lan
  1 sibling, 1 reply; 49+ messages in thread
From: Tianyu Lan @ 2021-06-14 13:37 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	m.szyprowski, robin.murphy, boris.ostrovsky, jgross, sstabellini,
	joro, will, xen-devel, davem, kuba, jejb, martin.petersen, iommu,
	linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
On 6/14/2021 3:12 PM, Christoph Hellwig wrote:
> On Mon, Jun 07, 2021 at 10:56:47PM +0800, Tianyu Lan wrote:
>> These addresses in extra address space works as system memory mirror. The
>> shared memory with host in Isolation VM needs to be accessed via extra
>> address space which is above shared gpa boundary.
> 
> Why?
> 
The shared_gpa_boundary in the AMD SEV SNP spec is called virtual top of
memory(vTOM). Memory addresses below vTOM are automatically treated as
private while memory above vTOM is treated as shared. Using vTOM to
separate memory in this way avoids the need to augment the standard x86
page tables with C-bit markings, simplifying guest OS software.
^ permalink raw reply	[flat|nested] 49+ messages in thread 
- * Re: [RFC PATCH V3 08/11] swiotlb: Add bounce buffer remap address setting function
  2021-06-14 13:37         ` Tianyu Lan
@ 2021-06-14 13:42           ` Tianyu Lan
  0 siblings, 0 replies; 49+ messages in thread
From: Tianyu Lan @ 2021-06-14 13:42 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	m.szyprowski, robin.murphy, boris.ostrovsky, jgross, sstabellini,
	joro, will, xen-devel, davem, kuba, jejb, martin.petersen, iommu,
	linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
On 6/14/2021 9:37 PM, Tianyu Lan wrote:
> 
> 
> On 6/14/2021 3:12 PM, Christoph Hellwig wrote:
>> On Mon, Jun 07, 2021 at 10:56:47PM +0800, Tianyu Lan wrote:
>>> These addresses in extra address space works as system memory mirror. 
>>> The
>>> shared memory with host in Isolation VM needs to be accessed via extra
>>> address space which is above shared gpa boundary.
>>
>> Why?
>>
> 
> The shared_gpa_boundary in the AMD SEV SNP spec is called virtual top of
> memory(vTOM). Memory addresses below vTOM are automatically treated as
> private while memory above vTOM is treated as shared. Using vTOM to
> separate memory in this way avoids the need to augment the standard x86
> page tables with C-bit markings, simplifying guest OS software.
Here is the spec link and vTOM description is in the page 14.
https://www.amd.com/system/files/TechDocs/SEV-SNP-strengthening-vm-isolation-with-integrity-protection-and-more.pdf
Thanks.
^ permalink raw reply	[flat|nested] 49+ messages in thread 
 
 
 
- * Re: [RFC PATCH V3 08/11] swiotlb: Add bounce buffer remap address setting function
  2021-06-07  6:43   ` Christoph Hellwig
  2021-06-07 14:56     ` Tianyu Lan
@ 2021-06-14 13:49     ` Robin Murphy
  2021-06-14 15:32       ` Christoph Hellwig
  1 sibling, 1 reply; 49+ messages in thread
From: Robin Murphy @ 2021-06-14 13:49 UTC (permalink / raw)
  To: Christoph Hellwig, Tianyu Lan
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	m.szyprowski, boris.ostrovsky, jgross, sstabellini, joro, will,
	xen-devel, davem, kuba, jejb, martin.petersen, iommu, linux-arch,
	linux-hyperv, linux-kernel, linux-scsi, netdev, vkuznets,
	thomas.lendacky, brijesh.singh, sunilmut
On 2021-06-07 07:43, Christoph Hellwig wrote:
> On Sun, May 30, 2021 at 11:06:25AM -0400, Tianyu Lan wrote:
>> From: Tianyu Lan <Tianyu.Lan@microsoft.com>
>>
>> For Hyper-V isolation VM with AMD SEV SNP, the bounce buffer(shared memory)
>> needs to be accessed via extra address space(e.g address above bit39).
>> Hyper-V code may remap extra address space outside of swiotlb. swiotlb_
>> bounce() needs to use remap virtual address to copy data from/to bounce
>> buffer. Add new interface swiotlb_set_bounce_remap() to do that.
> 
> Why can't you use the bus_dma_region ranges to remap to your preferred
> address?
FWIW, I think a better generalisation for this would be allowing 
set_memory_decrypted() to return an address rather than implicitly 
operating in-place, and hide all the various hypervisor hooks behind that.
Robin.
^ permalink raw reply	[flat|nested] 49+ messages in thread 
- * Re: [RFC PATCH V3 08/11] swiotlb: Add bounce buffer remap address setting function
  2021-06-14 13:49     ` Robin Murphy
@ 2021-06-14 15:32       ` Christoph Hellwig
  2021-06-15 15:24         ` Tianyu Lan
  0 siblings, 1 reply; 49+ messages in thread
From: Christoph Hellwig @ 2021-06-14 15:32 UTC (permalink / raw)
  To: Robin Murphy
  Cc: Christoph Hellwig, Tianyu Lan, kys, haiyangz, sthemmin, wei.liu,
	decui, tglx, mingo, bp, x86, hpa, arnd, dave.hansen, luto, peterz,
	akpm, kirill.shutemov, rppt, hannes, cai, krish.sadhukhan,
	saravanand, Tianyu.Lan, konrad.wilk, m.szyprowski,
	boris.ostrovsky, jgross, sstabellini, joro, will, xen-devel,
	davem, kuba, jejb, martin.petersen, iommu, linux-arch,
	linux-hyperv, linux-kernel, linux-scsi, netdev, vkuznets,
	thomas.lendacky, brijesh.singh, sunilmut
On Mon, Jun 14, 2021 at 02:49:51PM +0100, Robin Murphy wrote:
> FWIW, I think a better generalisation for this would be allowing 
> set_memory_decrypted() to return an address rather than implicitly 
> operating in-place, and hide all the various hypervisor hooks behind that.
Yes, something like that would be a good idea.  As-is
set_memory_decrypted is a pretty horribly API anyway due to passing
the address as void, and taking a size parameter while it works in units
of pages.  So I'd very much welcome a major overhaul of this API.
^ permalink raw reply	[flat|nested] 49+ messages in thread 
- * Re: [RFC PATCH V3 08/11] swiotlb: Add bounce buffer remap address setting function
  2021-06-14 15:32       ` Christoph Hellwig
@ 2021-06-15 15:24         ` Tianyu Lan
  2021-07-12  9:40           ` Tianyu Lan
  0 siblings, 1 reply; 49+ messages in thread
From: Tianyu Lan @ 2021-06-15 15:24 UTC (permalink / raw)
  To: Christoph Hellwig, Robin Murphy
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	m.szyprowski, boris.ostrovsky, jgross, sstabellini, joro, will,
	xen-devel, davem, kuba, jejb, martin.petersen, iommu, linux-arch,
	linux-hyperv, linux-kernel, linux-scsi, netdev, vkuznets,
	thomas.lendacky, brijesh.singh, sunilmut
On 6/14/2021 11:32 PM, Christoph Hellwig wrote:
> On Mon, Jun 14, 2021 at 02:49:51PM +0100, Robin Murphy wrote:
>> FWIW, I think a better generalisation for this would be allowing
>> set_memory_decrypted() to return an address rather than implicitly
>> operating in-place, and hide all the various hypervisor hooks behind that.
> 
> Yes, something like that would be a good idea.  As-is
> set_memory_decrypted is a pretty horribly API anyway due to passing
> the address as void, and taking a size parameter while it works in units
> of pages.  So I'd very much welcome a major overhaul of this API.
> 
Hi Christoph and Robin:
	Thanks for your suggestion. I will try this idea in the next version. 
Besides make the address translation into set_memory_
decrypted() and return address, do you want to make other changes to the 
API in order to make it more reasonable(e.g size parameter)?
Thanks
^ permalink raw reply	[flat|nested] 49+ messages in thread 
- * Re: [RFC PATCH V3 08/11] swiotlb: Add bounce buffer remap address setting function
  2021-06-15 15:24         ` Tianyu Lan
@ 2021-07-12  9:40           ` Tianyu Lan
  0 siblings, 0 replies; 49+ messages in thread
From: Tianyu Lan @ 2021-07-12  9:40 UTC (permalink / raw)
  To: Christoph Hellwig, Robin Murphy
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	m.szyprowski, boris.ostrovsky, jgross, sstabellini, joro, will,
	xen-devel, davem, kuba, jejb, martin.petersen, iommu, linux-arch,
	linux-hyperv, linux-kernel, linux-scsi, netdev, vkuznets,
	thomas.lendacky, brijesh.singh, sunilmut
Hi Christoph and Robin:
      I introduced new interface set_memory_decrypted_map() to hide all
the hypervisor code behind it in the latest version. In current generic
code, only swiotlb bounce buffer needs to be decrypted and remapped in 
the same time and so keep set_memory_decrypted(). If there were more 
requests of set_memory_decrypted_map() for other platform, we may
replace set_memory_decrypted() step by step. Please have a look.
       https://lkml.org/lkml/2021/7/7/570
Thanks.
On 6/15/2021 11:24 PM, Tianyu Lan wrote:
> On 6/14/2021 11:32 PM, Christoph Hellwig wrote:
>> On Mon, Jun 14, 2021 at 02:49:51PM +0100, Robin Murphy wrote:
>>> FWIW, I think a better generalisation for this would be allowing
>>> set_memory_decrypted() to return an address rather than implicitly
>>> operating in-place, and hide all the various hypervisor hooks behind 
>>> that.
>>
>> Yes, something like that would be a good idea.  As-is
>> set_memory_decrypted is a pretty horribly API anyway due to passing
>> the address as void, and taking a size parameter while it works in units
>> of pages.  So I'd very much welcome a major overhaul of this API.
>>
> 
> Hi Christoph and Robin:
>      Thanks for your suggestion. I will try this idea in the next 
> version. Besides make the address translation into set_memory_
> decrypted() and return address, do you want to make other changes to the 
> API in order to make it more reasonable(e.g size parameter)?
> 
> Thanks
^ permalink raw reply	[flat|nested] 49+ messages in thread
 
 
 
 
 
- * [RFC PATCH V3 09/11] HV/IOMMU: Enable swiotlb bounce buffer for Isolation VM
  2021-05-30 15:06 [RFC PATCH V3 00/11] x86/Hyper-V: Add Hyper-V Isolation VM support Tianyu Lan
                   ` (7 preceding siblings ...)
  2021-05-30 15:06 ` [RFC PATCH V3 08/11] swiotlb: Add bounce buffer remap address setting function Tianyu Lan
@ 2021-05-30 15:06 ` Tianyu Lan
  2021-06-02  1:16   ` Boris Ostrovsky
  2021-05-30 15:06 ` [RFC PATCH V3 10/11] HV/Netvsc: Add Isolation VM support for netvsc driver Tianyu Lan
  2021-05-30 15:06 ` [RFC PATCH V3 11/11] HV/Storvsc: Add Isolation VM support for storvsc driver Tianyu Lan
  10 siblings, 1 reply; 49+ messages in thread
From: Tianyu Lan @ 2021-05-30 15:06 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	hch, m.szyprowski, robin.murphy, boris.ostrovsky, jgross,
	sstabellini, joro, will, xen-devel, davem, kuba, jejb,
	martin.petersen
  Cc: iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
From: Tianyu Lan <Tianyu.Lan@microsoft.com>
Hyper-V Isolation VM requires bounce buffer support to copy
data from/to encrypted memory and so enable swiotlb force
mode to use swiotlb bounce buffer for DMA transaction.
In Isolation VM with AMD SEV, the bounce buffer needs to be
accessed via extra address space which is above shared_gpa_boundary
(E.G 39 bit address line) reported by Hyper-V CPUID ISOLATION_CONFIG.
The access physical address will be original physical address +
shared_gpa_boundary. The shared_gpa_boundary in the AMD SEV SNP
spec is called virtual top of memory(vTOM). Memory addresses below
vTOM are automatically treated as private while memory above
vTOM is treated as shared.
ioremap_cache() can't use in the hyperv_iommu_swiotlb_init() which
is too early place and remap bounce buffer in the hyperv_iommu_swiotlb_
later_init().
Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
 arch/x86/xen/pci-swiotlb-xen.c |  3 +-
 drivers/hv/vmbus_drv.c         |  3 ++
 drivers/iommu/hyperv-iommu.c   | 81 ++++++++++++++++++++++++++++++++++
 include/linux/hyperv.h         |  1 +
 4 files changed, 87 insertions(+), 1 deletion(-)
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index 54f9aa7e8457..43bd031aa332 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -4,6 +4,7 @@
 
 #include <linux/dma-map-ops.h>
 #include <linux/pci.h>
+#include <linux/hyperv.h>
 #include <xen/swiotlb-xen.h>
 
 #include <asm/xen/hypervisor.h>
@@ -91,6 +92,6 @@ int pci_xen_swiotlb_init_late(void)
 EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late);
 
 IOMMU_INIT_FINISH(pci_xen_swiotlb_detect,
-		  NULL,
+		  hyperv_swiotlb_detect,
 		  pci_xen_swiotlb_init,
 		  NULL);
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 92cb3f7d21d9..5e3bb76d4dee 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -23,6 +23,7 @@
 #include <linux/cpu.h>
 #include <linux/sched/task_stack.h>
 
+#include <linux/dma-map-ops.h>
 #include <linux/delay.h>
 #include <linux/notifier.h>
 #include <linux/ptrace.h>
@@ -2080,6 +2081,7 @@ struct hv_device *vmbus_device_create(const guid_t *type,
 	return child_device_obj;
 }
 
+static u64 vmbus_dma_mask = DMA_BIT_MASK(64);
 /*
  * vmbus_device_register - Register the child device
  */
@@ -2120,6 +2122,7 @@ int vmbus_device_register(struct hv_device *child_device_obj)
 	}
 	hv_debug_add_dev_dir(child_device_obj);
 
+	child_device_obj->device.dma_mask = &vmbus_dma_mask;
 	return 0;
 
 err_kset_unregister:
diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
index e285a220c913..2604619c6fa3 100644
--- a/drivers/iommu/hyperv-iommu.c
+++ b/drivers/iommu/hyperv-iommu.c
@@ -13,14 +13,22 @@
 #include <linux/irq.h>
 #include <linux/iommu.h>
 #include <linux/module.h>
+#include <linux/hyperv.h>
+#include <linux/io.h>
 
 #include <asm/apic.h>
 #include <asm/cpu.h>
 #include <asm/hw_irq.h>
 #include <asm/io_apic.h>
+#include <asm/iommu.h>
+#include <asm/iommu_table.h>
 #include <asm/irq_remapping.h>
 #include <asm/hypervisor.h>
 #include <asm/mshyperv.h>
+#include <asm/swiotlb.h>
+#include <linux/dma-map-ops.h>
+#include <linux/dma-direct.h>
+#include <linux/set_memory.h>
 
 #include "irq_remapping.h"
 
@@ -36,6 +44,8 @@
 static cpumask_t ioapic_max_cpumask = { CPU_BITS_NONE };
 static struct irq_domain *ioapic_ir_domain;
 
+static unsigned long hyperv_io_tlb_start, hyperv_io_tlb_size;
+
 static int hyperv_ir_set_affinity(struct irq_data *data,
 		const struct cpumask *mask, bool force)
 {
@@ -337,4 +347,75 @@ static const struct irq_domain_ops hyperv_root_ir_domain_ops = {
 	.free = hyperv_root_irq_remapping_free,
 };
 
+void __init hyperv_iommu_swiotlb_init(void)
+{
+	unsigned long bytes, io_tlb_nslabs;
+	void *vstart;
+
+	/* Allocate Hyper-V swiotlb  */
+	bytes = 200 * 1024 * 1024;
+	vstart = memblock_alloc_low(PAGE_ALIGN(bytes), PAGE_SIZE);
+	io_tlb_nslabs = bytes >> IO_TLB_SHIFT;
+	hyperv_io_tlb_size = bytes;
+
+	if (!vstart) {
+		pr_warn("Fail to allocate swiotlb.\n");
+		return;
+	}
+
+	hyperv_io_tlb_start = virt_to_phys(vstart);
+	if (!hyperv_io_tlb_start)
+		panic("%s: Failed to allocate %lu bytes align=0x%lx.\n",
+		      __func__, PAGE_ALIGN(bytes), PAGE_SIZE);
+
+	if (swiotlb_init_with_tbl(vstart, io_tlb_nslabs, 1))
+		panic("%s: Cannot allocate SWIOTLB buffer.\n", __func__);
+
+	swiotlb_set_max_segment(HV_HYP_PAGE_SIZE);
+}
+
+int __init hyperv_swiotlb_detect(void)
+{
+	if (hypervisor_is_type(X86_HYPER_MS_HYPERV)
+	    && hv_is_isolation_supported()) {
+		/*
+		 * Enable swiotlb force mode in Isolation VM to
+		 * use swiotlb bounce buffer for dma transaction.
+		 */
+		swiotlb_force = SWIOTLB_FORCE;
+		return 1;
+	}
+
+	return 0;
+}
+
+void __init hyperv_iommu_swiotlb_later_init(void)
+{
+	void *hyperv_io_tlb_remap;
+	int ret;
+
+	/* Mask bounce buffer visible to host and remap extra address. */
+	if (hv_isolation_type_snp()) {
+		ret = set_memory_decrypted((unsigned long)
+				phys_to_virt(hyperv_io_tlb_start),
+				HVPFN_UP(hyperv_io_tlb_size));
+		if (ret)
+			panic("%s: Fail to mark Hyper-v swiotlb buffer visible to host. err=%d\n",
+			      __func__, ret);
+
+		hyperv_io_tlb_remap = ioremap_cache(hyperv_io_tlb_start
+				+ ms_hyperv.shared_gpa_boundary,
+				hyperv_io_tlb_size);
+		if (!hyperv_io_tlb_remap)
+			panic("Fail to remap io tlb.\n");
+
+		memset(hyperv_io_tlb_remap, 0x00, hyperv_io_tlb_size);
+		swiotlb_set_bounce_remap(hyperv_io_tlb_remap);
+	}
+}
+
+IOMMU_INIT_FINISH(hyperv_swiotlb_detect,
+		  NULL, hyperv_iommu_swiotlb_init,
+		  hyperv_iommu_swiotlb_later_init);
+
 #endif
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 06eccaba10c5..babbe19f57e2 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1759,6 +1759,7 @@ int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len,
 int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
 				void (*block_invalidate)(void *context,
 							 u64 block_mask));
+int __init hyperv_swiotlb_detect(void);
 
 struct hyperv_pci_block_ops {
 	int (*read_block)(struct pci_dev *dev, void *buf, unsigned int buf_len,
-- 
2.25.1
^ permalink raw reply related	[flat|nested] 49+ messages in thread
- * Re: [RFC PATCH V3 09/11] HV/IOMMU: Enable swiotlb bounce buffer for Isolation VM
  2021-05-30 15:06 ` [RFC PATCH V3 09/11] HV/IOMMU: Enable swiotlb bounce buffer for Isolation VM Tianyu Lan
@ 2021-06-02  1:16   ` Boris Ostrovsky
  2021-06-02 15:01     ` Tianyu Lan
  0 siblings, 1 reply; 49+ messages in thread
From: Boris Ostrovsky @ 2021-06-02  1:16 UTC (permalink / raw)
  To: Tianyu Lan, kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo,
	bp, x86, hpa, arnd, dave.hansen, luto, peterz, akpm,
	kirill.shutemov, rppt, hannes, cai, krish.sadhukhan, saravanand,
	Tianyu.Lan, konrad.wilk, hch, m.szyprowski, robin.murphy, jgross,
	sstabellini, joro, will, xen-devel, davem, kuba, jejb,
	martin.petersen
  Cc: iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
On 5/30/21 11:06 AM, Tianyu Lan wrote:
> @@ -91,6 +92,6 @@ int pci_xen_swiotlb_init_late(void)
>  EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late);
>  
>  IOMMU_INIT_FINISH(2,
> -		  NULL,
> +		  hyperv_swiotlb_detect,
>  		  pci_xen_swiotlb_init,
>  		  NULL);
Could you explain this change?
-boris
^ permalink raw reply	[flat|nested] 49+ messages in thread 
- * Re: [RFC PATCH V3 09/11] HV/IOMMU: Enable swiotlb bounce buffer for Isolation VM
  2021-06-02  1:16   ` Boris Ostrovsky
@ 2021-06-02 15:01     ` Tianyu Lan
  2021-06-02 16:02       ` Boris Ostrovsky
  0 siblings, 1 reply; 49+ messages in thread
From: Tianyu Lan @ 2021-06-02 15:01 UTC (permalink / raw)
  To: Boris Ostrovsky, kys, haiyangz, sthemmin, wei.liu, decui, tglx,
	mingo, bp, x86, hpa, arnd, dave.hansen, luto, peterz, akpm,
	kirill.shutemov, rppt, hannes, cai, krish.sadhukhan, saravanand,
	Tianyu.Lan, konrad.wilk, hch, m.szyprowski, robin.murphy, jgross,
	sstabellini, joro, will, xen-devel, davem, kuba, jejb,
	martin.petersen
  Cc: iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
Hi Boris:
	Thanks for your review.
On 6/2/2021 9:16 AM, Boris Ostrovsky wrote:
> 
> On 5/30/21 11:06 AM, Tianyu Lan wrote:
>> @@ -91,6 +92,6 @@ int pci_xen_swiotlb_init_late(void)
>>   EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late);
>>   
>>   IOMMU_INIT_FINISH(2,
>> -		  NULL,
>> +		  hyperv_swiotlb_detect,
>>   		  pci_xen_swiotlb_init,
>>   		  NULL);
> 
> 
> Could you explain this change?
Hyper-V allocates its own swiotlb bounce buffer and the default
swiotlb buffer should not be allocated. swiotlb_init() in 
pci_swiotlb_init() is to allocate default swiotlb buffer.
To achieve this, put hyperv_swiotlb_detect() as the first entry in the 
iommu_table_entry list. The detect loop in the pci_iommu_alloc() will 
exit once hyperv_swiotlb_detect() is called in Hyper-V VM and other 
iommu_table_entry callback will not be called.
^ permalink raw reply	[flat|nested] 49+ messages in thread 
- * Re: [RFC PATCH V3 09/11] HV/IOMMU: Enable swiotlb bounce buffer for Isolation VM
  2021-06-02 15:01     ` Tianyu Lan
@ 2021-06-02 16:02       ` Boris Ostrovsky
  2021-06-03 15:37         ` Tianyu Lan
  0 siblings, 1 reply; 49+ messages in thread
From: Boris Ostrovsky @ 2021-06-02 16:02 UTC (permalink / raw)
  To: Tianyu Lan, kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo,
	bp, x86, hpa, arnd, dave.hansen, luto, peterz, akpm,
	kirill.shutemov, rppt, hannes, cai, krish.sadhukhan, saravanand,
	Tianyu.Lan, konrad.wilk, hch, m.szyprowski, robin.murphy, jgross,
	sstabellini, joro, will, xen-devel, davem, kuba, jejb,
	martin.petersen
  Cc: iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
On 6/2/21 11:01 AM, Tianyu Lan wrote:
> Hi Boris:
>     Thanks for your review.
>
> On 6/2/2021 9:16 AM, Boris Ostrovsky wrote:
>>
>> On 5/30/21 11:06 AM, Tianyu Lan wrote:
>>> @@ -91,6 +92,6 @@ int pci_xen_swiotlb_init_late(void)
>>>   EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late);
>>>     IOMMU_INIT_FINISH(2,
>>> -          NULL,
>>> +          hyperv_swiotlb_detect,
>>>             pci_xen_swiotlb_init,
>>>             NULL);
>>
>>
>> Could you explain this change?
>
> Hyper-V allocates its own swiotlb bounce buffer and the default
> swiotlb buffer should not be allocated. swiotlb_init() in pci_swiotlb_init() is to allocate default swiotlb buffer.
> To achieve this, put hyperv_swiotlb_detect() as the first entry in the iommu_table_entry list. The detect loop in the pci_iommu_alloc() will exit once hyperv_swiotlb_detect() is called in Hyper-V VM and other iommu_table_entry callback will not be called.
Right. But pci_xen_swiotlb_detect() will only do something for Xen PV guests, and those guests don't run on hyperV. It's either xen_pv_domain() (i.e. hypervisor_is_type(X86_HYPER_XEN_PV)) or hypervisor_is_type(X86_HYPER_MS_HYPERV) but never both. So I don't think there needs to be a dependency between the two callbacks.
-boris
^ permalink raw reply	[flat|nested] 49+ messages in thread 
- * Re: [RFC PATCH V3 09/11] HV/IOMMU: Enable swiotlb bounce buffer for Isolation VM
  2021-06-02 16:02       ` Boris Ostrovsky
@ 2021-06-03 15:37         ` Tianyu Lan
  2021-06-03 17:04           ` Boris Ostrovsky
  0 siblings, 1 reply; 49+ messages in thread
From: Tianyu Lan @ 2021-06-03 15:37 UTC (permalink / raw)
  To: Boris Ostrovsky, kys, haiyangz, sthemmin, wei.liu, decui, tglx,
	mingo, bp, x86, hpa, arnd, dave.hansen, luto, peterz, akpm,
	kirill.shutemov, rppt, hannes, cai, krish.sadhukhan, saravanand,
	Tianyu.Lan, konrad.wilk, hch, m.szyprowski, robin.murphy, jgross,
	sstabellini, joro, will, xen-devel, davem, kuba, jejb,
	martin.petersen
  Cc: iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
On 6/3/2021 12:02 AM, Boris Ostrovsky wrote:
> 
> On 6/2/21 11:01 AM, Tianyu Lan wrote:
>> Hi Boris:
>>      Thanks for your review.
>>
>> On 6/2/2021 9:16 AM, Boris Ostrovsky wrote:
>>>
>>> On 5/30/21 11:06 AM, Tianyu Lan wrote:
>>>> @@ -91,6 +92,6 @@ int pci_xen_swiotlb_init_late(void)
>>>>    EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late);
>>>>      IOMMU_INIT_FINISH(2,
>>>> -          NULL,
>>>> +          hyperv_swiotlb_detect,
>>>>              pci_xen_swiotlb_init,
>>>>              NULL);
>>>
>>>
>>> Could you explain this change?
>>
>> Hyper-V allocates its own swiotlb bounce buffer and the default
>> swiotlb buffer should not be allocated. swiotlb_init() in pci_swiotlb_init() is to allocate default swiotlb buffer.
>> To achieve this, put hyperv_swiotlb_detect() as the first entry in the iommu_table_entry list. The detect loop in the pci_iommu_alloc() will exit once hyperv_swiotlb_detect() is called in Hyper-V VM and other iommu_table_entry callback will not be called.
> 
> 
> 
> Right. But pci_xen_swiotlb_detect() will only do something for Xen PV guests, and those guests don't run on hyperV. It's either xen_pv_domain() (i.e. hypervisor_is_type(X86_HYPER_XEN_PV)) or hypervisor_is_type(X86_HYPER_MS_HYPERV) but never both. So I don't think there needs to be a dependency between the two callbacks.
Yes, the dependency is between hyperv_swiotlb_detect() and
pci_swiotlb_detect_override()/pci_swiotlb_detect_4gb(). Now
pci_swiotlb_detect_override() and pci_swiotlb_detect_4gb() depends on
pci_xen_swiotlb_detect(). To keep dependency between
hyperv_swiotlb_detect() and pci_swiotlb_detect_override/4gb(), make 
pci_xen_swiotlb_detect() depends on hyperv_swiotlb_detect() and just to
keep order in the IOMMU table. Current iommu_table_entry only has one
depend callback and this is why I put xen depends on hyperv detect function.
Thanks.
^ permalink raw reply	[flat|nested] 49+ messages in thread 
- * Re: [RFC PATCH V3 09/11] HV/IOMMU: Enable swiotlb bounce buffer for Isolation VM
  2021-06-03 15:37         ` Tianyu Lan
@ 2021-06-03 17:04           ` Boris Ostrovsky
  2021-06-07  6:44             ` Christoph Hellwig
  0 siblings, 1 reply; 49+ messages in thread
From: Boris Ostrovsky @ 2021-06-03 17:04 UTC (permalink / raw)
  To: Tianyu Lan, kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo,
	bp, x86, hpa, arnd, dave.hansen, luto, peterz, akpm,
	kirill.shutemov, rppt, hannes, cai, krish.sadhukhan, saravanand,
	Tianyu.Lan, konrad.wilk, hch, m.szyprowski, robin.murphy, jgross,
	sstabellini, joro, will, xen-devel, davem, kuba, jejb,
	martin.petersen
  Cc: iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
On 6/3/21 11:37 AM, Tianyu Lan wrote:
>
> Yes, the dependency is between hyperv_swiotlb_detect() and
> pci_swiotlb_detect_override()/pci_swiotlb_detect_4gb(). Now
> pci_swiotlb_detect_override() and pci_swiotlb_detect_4gb() depends on
> pci_xen_swiotlb_detect(). To keep dependency between
> hyperv_swiotlb_detect() and pci_swiotlb_detect_override/4gb(), make pci_xen_swiotlb_detect() depends on hyperv_swiotlb_detect() and just to
> keep order in the IOMMU table. Current iommu_table_entry only has one
> depend callback and this is why I put xen depends on hyperv detect function.
>
Ah, ok. Thanks.
-boris
^ permalink raw reply	[flat|nested] 49+ messages in thread 
- * Re: [RFC PATCH V3 09/11] HV/IOMMU: Enable swiotlb bounce buffer for Isolation VM
  2021-06-03 17:04           ` Boris Ostrovsky
@ 2021-06-07  6:44             ` Christoph Hellwig
  0 siblings, 0 replies; 49+ messages in thread
From: Christoph Hellwig @ 2021-06-07  6:44 UTC (permalink / raw)
  To: Boris Ostrovsky
  Cc: Tianyu Lan, kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo,
	bp, x86, hpa, arnd, dave.hansen, luto, peterz, akpm,
	kirill.shutemov, rppt, hannes, cai, krish.sadhukhan, saravanand,
	Tianyu.Lan, konrad.wilk, hch, m.szyprowski, robin.murphy, jgross,
	sstabellini, joro, will, xen-devel, davem, kuba, jejb,
	martin.petersen, iommu, linux-arch, linux-hyperv, linux-kernel,
	linux-scsi, netdev, vkuznets, thomas.lendacky, brijesh.singh,
	sunilmut
Honestly, we really need to do away with the concept of hypervisor-
specific swiotlb allocations and just add a hypervisor hook to remap the
"main" buffer. That should remove a lot of code and confusion not just
for Xen but also any future addition like hyperv.
^ permalink raw reply	[flat|nested] 49+ messages in thread 
 
 
 
 
 
 
- * [RFC PATCH V3 10/11] HV/Netvsc: Add Isolation VM support for netvsc driver
  2021-05-30 15:06 [RFC PATCH V3 00/11] x86/Hyper-V: Add Hyper-V Isolation VM support Tianyu Lan
                   ` (8 preceding siblings ...)
  2021-05-30 15:06 ` [RFC PATCH V3 09/11] HV/IOMMU: Enable swiotlb bounce buffer for Isolation VM Tianyu Lan
@ 2021-05-30 15:06 ` Tianyu Lan
  2021-06-07  6:50   ` Christoph Hellwig
  2021-06-10  9:52   ` Vitaly Kuznetsov
  2021-05-30 15:06 ` [RFC PATCH V3 11/11] HV/Storvsc: Add Isolation VM support for storvsc driver Tianyu Lan
  10 siblings, 2 replies; 49+ messages in thread
From: Tianyu Lan @ 2021-05-30 15:06 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	hch, m.szyprowski, robin.murphy, boris.ostrovsky, jgross,
	sstabellini, joro, will, xen-devel, davem, kuba, jejb,
	martin.petersen
  Cc: iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
From: Tianyu Lan <Tianyu.Lan@microsoft.com>
In Isolation VM, all shared memory with host needs to mark visible
to host via hvcall. vmbus_establish_gpadl() has already done it for
netvsc rx/tx ring buffer. The page buffer used by vmbus_sendpacket_
pagebuffer() still need to handle. Use DMA API to map/umap these
memory during sending/receiving packet and Hyper-V DMA ops callback
will use swiotlb function to allocate bounce buffer and copy data
from/to bounce buffer.
Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
 drivers/net/hyperv/hyperv_net.h   |   6 ++
 drivers/net/hyperv/netvsc.c       | 125 ++++++++++++++++++++++++++++--
 drivers/net/hyperv/rndis_filter.c |   3 +
 include/linux/hyperv.h            |   5 ++
 4 files changed, 133 insertions(+), 6 deletions(-)
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index b11aa68b44ec..c2fbb9d4df2c 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -164,6 +164,7 @@ struct hv_netvsc_packet {
 	u32 total_bytes;
 	u32 send_buf_index;
 	u32 total_data_buflen;
+	struct hv_dma_range *dma_range;
 };
 
 #define NETVSC_HASH_KEYLEN 40
@@ -1074,6 +1075,7 @@ struct netvsc_device {
 
 	/* Receive buffer allocated by us but manages by NetVSP */
 	void *recv_buf;
+	void *recv_original_buf;
 	u32 recv_buf_size; /* allocated bytes */
 	u32 recv_buf_gpadl_handle;
 	u32 recv_section_cnt;
@@ -1082,6 +1084,8 @@ struct netvsc_device {
 
 	/* Send buffer allocated by us */
 	void *send_buf;
+	void *send_original_buf;
+	u32 send_buf_size;
 	u32 send_buf_gpadl_handle;
 	u32 send_section_cnt;
 	u32 send_section_size;
@@ -1729,4 +1733,6 @@ struct rndis_message {
 #define RETRY_US_HI	10000
 #define RETRY_MAX	2000	/* >10 sec */
 
+void netvsc_dma_unmap(struct hv_device *hv_dev,
+		      struct hv_netvsc_packet *packet);
 #endif /* _HYPERV_NET_H */
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 7bd935412853..a01740c6c6b8 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -153,8 +153,21 @@ static void free_netvsc_device(struct rcu_head *head)
 	int i;
 
 	kfree(nvdev->extension);
-	vfree(nvdev->recv_buf);
-	vfree(nvdev->send_buf);
+
+	if (nvdev->recv_original_buf) {
+		vunmap(nvdev->recv_buf);
+		vfree(nvdev->recv_original_buf);
+	} else {
+		vfree(nvdev->recv_buf);
+	}
+
+	if (nvdev->send_original_buf) {
+		vunmap(nvdev->send_buf);
+		vfree(nvdev->send_original_buf);
+	} else {
+		vfree(nvdev->send_buf);
+	}
+
 	kfree(nvdev->send_section_map);
 
 	for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
@@ -338,8 +351,10 @@ static int netvsc_init_buf(struct hv_device *device,
 	struct net_device *ndev = hv_get_drvdata(device);
 	struct nvsp_message *init_packet;
 	unsigned int buf_size;
+	unsigned long *pfns;
 	size_t map_words;
 	int i, ret = 0;
+	void *vaddr;
 
 	/* Get receive buffer area. */
 	buf_size = device_info->recv_sections * device_info->recv_section_size;
@@ -375,6 +390,21 @@ static int netvsc_init_buf(struct hv_device *device,
 		goto cleanup;
 	}
 
+	if (hv_isolation_type_snp()) {
+		pfns = kcalloc(buf_size / HV_HYP_PAGE_SIZE, sizeof(unsigned long),
+			       GFP_KERNEL);
+		for (i = 0; i < buf_size / HV_HYP_PAGE_SIZE; i++)
+			pfns[i] = virt_to_hvpfn(net_device->recv_buf + i * HV_HYP_PAGE_SIZE) +
+				(ms_hyperv.shared_gpa_boundary >> HV_HYP_PAGE_SHIFT);
+
+		vaddr = vmap_pfn(pfns, buf_size / HV_HYP_PAGE_SIZE, PAGE_KERNEL_IO);
+		kfree(pfns);
+		if (!vaddr)
+			goto cleanup;
+		net_device->recv_original_buf = net_device->recv_buf;
+		net_device->recv_buf = vaddr;
+	}
+
 	/* Notify the NetVsp of the gpadl handle */
 	init_packet = &net_device->channel_init_pkt;
 	memset(init_packet, 0, sizeof(struct nvsp_message));
@@ -477,6 +507,23 @@ static int netvsc_init_buf(struct hv_device *device,
 		goto cleanup;
 	}
 
+	if (hv_isolation_type_snp()) {
+		pfns = kcalloc(buf_size / HV_HYP_PAGE_SIZE, sizeof(unsigned long),
+			       GFP_KERNEL);
+
+		for (i = 0; i < buf_size / HV_HYP_PAGE_SIZE; i++)
+			pfns[i] = virt_to_hvpfn(net_device->send_buf + i * HV_HYP_PAGE_SIZE)
+				+ (ms_hyperv.shared_gpa_boundary >> HV_HYP_PAGE_SHIFT);
+
+		vaddr = vmap_pfn(pfns, buf_size / HV_HYP_PAGE_SIZE, PAGE_KERNEL_IO);
+		kfree(pfns);
+		if (!vaddr)
+			goto cleanup;
+
+		net_device->send_original_buf = net_device->send_buf;
+		net_device->send_buf = vaddr;
+	}
+
 	/* Notify the NetVsp of the gpadl handle */
 	init_packet = &net_device->channel_init_pkt;
 	memset(init_packet, 0, sizeof(struct nvsp_message));
@@ -767,7 +814,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
 
 	/* Notify the layer above us */
 	if (likely(skb)) {
-		const struct hv_netvsc_packet *packet
+		struct hv_netvsc_packet *packet
 			= (struct hv_netvsc_packet *)skb->cb;
 		u32 send_index = packet->send_buf_index;
 		struct netvsc_stats *tx_stats;
@@ -783,6 +830,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
 		tx_stats->bytes += packet->total_bytes;
 		u64_stats_update_end(&tx_stats->syncp);
 
+		netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
 		napi_consume_skb(skb, budget);
 	}
 
@@ -947,6 +995,63 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
 		memset(dest, 0, padding);
 }
 
+void netvsc_dma_unmap(struct hv_device *hv_dev,
+		      struct hv_netvsc_packet *packet)
+{
+	u32 page_count = packet->cp_partial ?
+		packet->page_buf_cnt - packet->rmsg_pgcnt :
+		packet->page_buf_cnt;
+	int i;
+
+	if (!packet->dma_range)
+		return;
+
+	for (i = 0; i < page_count; i++)
+		dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma,
+				 packet->dma_range[i].mapping_size,
+				 DMA_TO_DEVICE);
+
+	kfree(packet->dma_range);
+}
+
+int netvsc_dma_map(struct hv_device *hv_dev,
+		   struct hv_netvsc_packet *packet,
+		   struct hv_page_buffer *pb)
+{
+	u32 page_count =  packet->cp_partial ?
+		packet->page_buf_cnt - packet->rmsg_pgcnt :
+		packet->page_buf_cnt;
+	dma_addr_t dma;
+	int i;
+
+	packet->dma_range = kcalloc(page_count,
+				    sizeof(*packet->dma_range),
+				    GFP_KERNEL);
+	if (!packet->dma_range)
+		return -ENOMEM;
+
+	for (i = 0; i < page_count; i++) {
+		char *src = phys_to_virt((pb[i].pfn << HV_HYP_PAGE_SHIFT)
+					 + pb[i].offset);
+		u32 len = pb[i].len;
+
+		dma = dma_map_single(&hv_dev->device, src, len,
+				     DMA_TO_DEVICE);
+		if (dma_mapping_error(&hv_dev->device, dma)) {
+			kfree(packet->dma_range);
+			return -ENOMEM;
+		}
+
+		packet->dma_range[i].dma = dma;
+		packet->dma_range[i].mapping_size = len;
+		pb[i].pfn = dma >> HV_HYP_PAGE_SHIFT;
+		pb[i].offset = offset_in_hvpage(dma);
+		pb[i].len = len;
+	}
+
+	return 0;
+}
+
 static inline int netvsc_send_pkt(
 	struct hv_device *device,
 	struct hv_netvsc_packet *packet,
@@ -987,14 +1092,22 @@ static inline int netvsc_send_pkt(
 
 	trace_nvsp_send_pkt(ndev, out_channel, rpkt);
 
+	packet->dma_range = NULL;
 	if (packet->page_buf_cnt) {
 		if (packet->cp_partial)
 			pb += packet->rmsg_pgcnt;
 
+		ret = netvsc_dma_map(ndev_ctx->device_ctx, packet, pb);
+		if (ret)
+			return ret;
+
 		ret = vmbus_sendpacket_pagebuffer(out_channel,
-						  pb, packet->page_buf_cnt,
-						  &nvmsg, sizeof(nvmsg),
-						  req_id);
+					  pb, packet->page_buf_cnt,
+					  &nvmsg, sizeof(nvmsg),
+					  req_id);
+
+		if (ret)
+			netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
 	} else {
 		ret = vmbus_sendpacket(out_channel,
 				       &nvmsg, sizeof(nvmsg),
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 983bf362466a..448c1ee39246 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -293,6 +293,8 @@ static void rndis_filter_receive_response(struct net_device *ndev,
 	u32 *req_id = &resp->msg.init_complete.req_id;
 	struct rndis_device *dev = nvdev->extension;
 	struct rndis_request *request = NULL;
+	struct hv_device *hv_dev = ((struct net_device_context *)
+			netdev_priv(ndev))->device_ctx;
 	bool found = false;
 	unsigned long flags;
 
@@ -361,6 +363,7 @@ static void rndis_filter_receive_response(struct net_device *ndev,
 			}
 		}
 
+		netvsc_dma_unmap(hv_dev, &request->pkt);
 		complete(&request->wait_event);
 	} else {
 		netdev_err(ndev,
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index babbe19f57e2..90abff664495 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1616,6 +1616,11 @@ struct hyperv_service_callback {
 	void (*callback)(void *context);
 };
 
+struct hv_dma_range {
+	dma_addr_t dma;
+	u32 mapping_size;
+};
+
 #define MAX_SRV_VER	0x7ffffff
 extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, u32 buflen,
 				const int *fw_version, int fw_vercnt,
-- 
2.25.1
^ permalink raw reply related	[flat|nested] 49+ messages in thread
- * Re: [RFC PATCH V3 10/11] HV/Netvsc: Add Isolation VM support for netvsc driver
  2021-05-30 15:06 ` [RFC PATCH V3 10/11] HV/Netvsc: Add Isolation VM support for netvsc driver Tianyu Lan
@ 2021-06-07  6:50   ` Christoph Hellwig
  2021-06-07 15:21     ` Tianyu Lan
  2021-06-10  9:52   ` Vitaly Kuznetsov
  1 sibling, 1 reply; 49+ messages in thread
From: Christoph Hellwig @ 2021-06-07  6:50 UTC (permalink / raw)
  To: Tianyu Lan
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	hch, m.szyprowski, robin.murphy, boris.ostrovsky, jgross,
	sstabellini, joro, will, xen-devel, davem, kuba, jejb,
	martin.petersen, iommu, linux-arch, linux-hyperv, linux-kernel,
	linux-scsi, netdev, vkuznets, thomas.lendacky, brijesh.singh,
	sunilmut
On Sun, May 30, 2021 at 11:06:27AM -0400, Tianyu Lan wrote:
> +	if (hv_isolation_type_snp()) {
> +		pfns = kcalloc(buf_size / HV_HYP_PAGE_SIZE, sizeof(unsigned long),
> +			       GFP_KERNEL);
> +		for (i = 0; i < buf_size / HV_HYP_PAGE_SIZE; i++)
> +			pfns[i] = virt_to_hvpfn(net_device->recv_buf + i * HV_HYP_PAGE_SIZE) +
> +				(ms_hyperv.shared_gpa_boundary >> HV_HYP_PAGE_SHIFT);
> +
> +		vaddr = vmap_pfn(pfns, buf_size / HV_HYP_PAGE_SIZE, PAGE_KERNEL_IO);
> +		kfree(pfns);
> +		if (!vaddr)
> +			goto cleanup;
> +		net_device->recv_original_buf = net_device->recv_buf;
> +		net_device->recv_buf = vaddr;
> +	}
This probably wnats a helper to make the thing more readable.  But who
came up with this fucked up communication protocol where the host needs
to map random pfns into a contigous range?  Sometime I really have to
wonder what crack the hyper-v people take when comparing this to the
relatively sane approach others take.
> +	for (i = 0; i < page_count; i++)
> +		dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma,
> +				 packet->dma_range[i].mapping_size,
> +				 DMA_TO_DEVICE);
> +
> +	kfree(packet->dma_range);
Any reason this isn't simply using a struct scatterlist?
> +	for (i = 0; i < page_count; i++) {
> +		char *src = phys_to_virt((pb[i].pfn << HV_HYP_PAGE_SHIFT)
> +					 + pb[i].offset);
> +		u32 len = pb[i].len;
> +
> +		dma = dma_map_single(&hv_dev->device, src, len,
> +				     DMA_TO_DEVICE);
dma_map_single can only be used on page baked memory, and if this is
using page backed memory you wouldn't need to do thee phys_to_virt
tricks.  Can someone explain the mess here in more detail?
>  	struct rndis_device *dev = nvdev->extension;
>  	struct rndis_request *request = NULL;
> +	struct hv_device *hv_dev = ((struct net_device_context *)
> +			netdev_priv(ndev))->device_ctx;
Why not use a net_device_context local variable instead of this cast
galore?
^ permalink raw reply	[flat|nested] 49+ messages in thread
- * Re: [RFC PATCH V3 10/11] HV/Netvsc: Add Isolation VM support for netvsc driver
  2021-06-07  6:50   ` Christoph Hellwig
@ 2021-06-07 15:21     ` Tianyu Lan
  2021-06-14  7:09       ` Christoph Hellwig
  0 siblings, 1 reply; 49+ messages in thread
From: Tianyu Lan @ 2021-06-07 15:21 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	m.szyprowski, robin.murphy, boris.ostrovsky, jgross, sstabellini,
	joro, will, xen-devel, davem, kuba, jejb, martin.petersen, iommu,
	linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
On 6/7/2021 2:50 PM, Christoph Hellwig wrote:
> On Sun, May 30, 2021 at 11:06:27AM -0400, Tianyu Lan wrote:
>> +	if (hv_isolation_type_snp()) {
>> +		pfns = kcalloc(buf_size / HV_HYP_PAGE_SIZE, sizeof(unsigned long),
>> +			       GFP_KERNEL);
>> +		for (i = 0; i < buf_size / HV_HYP_PAGE_SIZE; i++)
>> +			pfns[i] = virt_to_hvpfn(net_device->recv_buf + i * HV_HYP_PAGE_SIZE) +
>> +				(ms_hyperv.shared_gpa_boundary >> HV_HYP_PAGE_SHIFT);
>> +
>> +		vaddr = vmap_pfn(pfns, buf_size / HV_HYP_PAGE_SIZE, PAGE_KERNEL_IO);
>> +		kfree(pfns);
>> +		if (!vaddr)
>> +			goto cleanup;
>> +		net_device->recv_original_buf = net_device->recv_buf;
>> +		net_device->recv_buf = vaddr;
>> +	}
> 
> This probably wnats a helper to make the thing more readable.  But who
> came up with this fucked up communication protocol where the host needs
> to map random pfns into a contigous range?  Sometime I really have to
> wonder what crack the hyper-v people take when comparing this to the
> relatively sane approach others take.
Agree. Will add a helper function.
> 
>> +	for (i = 0; i < page_count; i++)
>> +		dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma,
>> +				 packet->dma_range[i].mapping_size,
>> +				 DMA_TO_DEVICE);
>> +
>> +	kfree(packet->dma_range);
> 
> Any reason this isn't simply using a struct scatterlist?
I will have a look. Thanks to reminder scatterlist.
> 
>> +	for (i = 0; i < page_count; i++) {
>> +		char *src = phys_to_virt((pb[i].pfn << HV_HYP_PAGE_SHIFT)
>> +					 + pb[i].offset);
>> +		u32 len = pb[i].len;
>> +
>> +		dma = dma_map_single(&hv_dev->device, src, len,
>> +				     DMA_TO_DEVICE);
> 
> dma_map_single can only be used on page baked memory, and if this is
> using page backed memory you wouldn't need to do thee phys_to_virt
> tricks.  Can someone explain the mess here in more detail?
Sorry. Could you elaborate the issue? These pages in the pb array are 
not allocated by DMA API and using dma_map_single() here is to map these 
pages' address to bounce buffer physical address.
> 
>>   	struct rndis_device *dev = nvdev->extension;
>>   	struct rndis_request *request = NULL;
>> +	struct hv_device *hv_dev = ((struct net_device_context *)
>> +			netdev_priv(ndev))->device_ctx;
> 
> Why not use a net_device_context local variable instead of this cast
> galore?
> 
OK. I will update.
Thanks.
^ permalink raw reply	[flat|nested] 49+ messages in thread
- * Re: [RFC PATCH V3 10/11] HV/Netvsc: Add Isolation VM support for netvsc driver
  2021-06-07 15:21     ` Tianyu Lan
@ 2021-06-14  7:09       ` Christoph Hellwig
  2021-06-14 14:04         ` Tianyu Lan
  0 siblings, 1 reply; 49+ messages in thread
From: Christoph Hellwig @ 2021-06-14  7:09 UTC (permalink / raw)
  To: Tianyu Lan
  Cc: Christoph Hellwig, kys, haiyangz, sthemmin, wei.liu, decui, tglx,
	mingo, bp, x86, hpa, arnd, dave.hansen, luto, peterz, akpm,
	kirill.shutemov, rppt, hannes, cai, krish.sadhukhan, saravanand,
	Tianyu.Lan, konrad.wilk, m.szyprowski, robin.murphy,
	boris.ostrovsky, jgross, sstabellini, joro, will, xen-devel,
	davem, kuba, jejb, martin.petersen, iommu, linux-arch,
	linux-hyperv, linux-kernel, linux-scsi, netdev, vkuznets,
	thomas.lendacky, brijesh.singh, sunilmut
On Mon, Jun 07, 2021 at 11:21:20PM +0800, Tianyu Lan wrote:
>> dma_map_single can only be used on page baked memory, and if this is
>> using page backed memory you wouldn't need to do thee phys_to_virt
>> tricks.  Can someone explain the mess here in more detail?
>
> Sorry. Could you elaborate the issue? These pages in the pb array are not 
> allocated by DMA API and using dma_map_single() here is to map these pages' 
> address to bounce buffer physical address.
dma_map_single just calls dma_map_page using virt_to_page.  So this
can't work on addresses not in the kernel linear mapping.
^ permalink raw reply	[flat|nested] 49+ messages in thread 
- * Re: [RFC PATCH V3 10/11] HV/Netvsc: Add Isolation VM support for netvsc driver
  2021-06-14  7:09       ` Christoph Hellwig
@ 2021-06-14 14:04         ` Tianyu Lan
  2021-06-14 15:33           ` Christoph Hellwig
  0 siblings, 1 reply; 49+ messages in thread
From: Tianyu Lan @ 2021-06-14 14:04 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	m.szyprowski, robin.murphy, boris.ostrovsky, jgross, sstabellini,
	joro, will, xen-devel, davem, kuba, jejb, martin.petersen, iommu,
	linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
On 6/14/2021 3:09 PM, Christoph Hellwig wrote:
> On Mon, Jun 07, 2021 at 11:21:20PM +0800, Tianyu Lan wrote:
>>> dma_map_single can only be used on page baked memory, and if this is
>>> using page backed memory you wouldn't need to do thee phys_to_virt
>>> tricks.  Can someone explain the mess here in more detail?
>>
>> Sorry. Could you elaborate the issue? These pages in the pb array are not
>> allocated by DMA API and using dma_map_single() here is to map these pages'
>> address to bounce buffer physical address.
> 
> dma_map_single just calls dma_map_page using virt_to_page.  So this
> can't work on addresses not in the kernel linear mapping.
> 
The pages in the hv_page_buffer array here are in the kernel linear 
mapping. The packet sent to host will contain an array which contains 
transaction data. In the isolation VM, data in the these pages needs to 
be copied to bounce buffer and so call dma_map_single() here to map 
these data pages with bounce buffer. The vmbus has ring buffer where the 
send/receive packets are copied to/from. The ring buffer has been 
remapped to the extra space above shared gpa boundary/vTom during 
probing Netvsc driver and so not call dma map function for vmbus ring
buffer.
^ permalink raw reply	[flat|nested] 49+ messages in thread 
- * Re: [RFC PATCH V3 10/11] HV/Netvsc: Add Isolation VM support for netvsc driver
  2021-06-14 14:04         ` Tianyu Lan
@ 2021-06-14 15:33           ` Christoph Hellwig
  2021-06-15 14:31             ` Tianyu Lan
  0 siblings, 1 reply; 49+ messages in thread
From: Christoph Hellwig @ 2021-06-14 15:33 UTC (permalink / raw)
  To: Tianyu Lan
  Cc: Christoph Hellwig, kys, haiyangz, sthemmin, wei.liu, decui, tglx,
	mingo, bp, x86, hpa, arnd, dave.hansen, luto, peterz, akpm,
	kirill.shutemov, rppt, hannes, cai, krish.sadhukhan, saravanand,
	Tianyu.Lan, konrad.wilk, m.szyprowski, robin.murphy,
	boris.ostrovsky, jgross, sstabellini, joro, will, xen-devel,
	davem, kuba, jejb, martin.petersen, iommu, linux-arch,
	linux-hyperv, linux-kernel, linux-scsi, netdev, vkuznets,
	thomas.lendacky, brijesh.singh, sunilmut
On Mon, Jun 14, 2021 at 10:04:06PM +0800, Tianyu Lan wrote:
> The pages in the hv_page_buffer array here are in the kernel linear 
> mapping. The packet sent to host will contain an array which contains 
> transaction data. In the isolation VM, data in the these pages needs to be 
> copied to bounce buffer and so call dma_map_single() here to map these data 
> pages with bounce buffer. The vmbus has ring buffer where the send/receive 
> packets are copied to/from. The ring buffer has been remapped to the extra 
> space above shared gpa boundary/vTom during probing Netvsc driver and so 
> not call dma map function for vmbus ring
> buffer.
So why do we have all that PFN magic instead of using struct page or
the usual kernel I/O buffers that contain a page pointer?
^ permalink raw reply	[flat|nested] 49+ messages in thread 
- * Re: [RFC PATCH V3 10/11] HV/Netvsc: Add Isolation VM support for netvsc driver
  2021-06-14 15:33           ` Christoph Hellwig
@ 2021-06-15 14:31             ` Tianyu Lan
  0 siblings, 0 replies; 49+ messages in thread
From: Tianyu Lan @ 2021-06-15 14:31 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	m.szyprowski, robin.murphy, boris.ostrovsky, jgross, sstabellini,
	joro, will, xen-devel, davem, kuba, jejb, martin.petersen, iommu,
	linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
On 6/14/2021 11:33 PM, Christoph Hellwig wrote:
> On Mon, Jun 14, 2021 at 10:04:06PM +0800, Tianyu Lan wrote:
>> The pages in the hv_page_buffer array here are in the kernel linear
>> mapping. The packet sent to host will contain an array which contains
>> transaction data. In the isolation VM, data in the these pages needs to be
>> copied to bounce buffer and so call dma_map_single() here to map these data
>> pages with bounce buffer. The vmbus has ring buffer where the send/receive
>> packets are copied to/from. The ring buffer has been remapped to the extra
>> space above shared gpa boundary/vTom during probing Netvsc driver and so
>> not call dma map function for vmbus ring
>> buffer.
> 
> So why do we have all that PFN magic instead of using struct page or
> the usual kernel I/O buffers that contain a page pointer?
> 
These PFNs originally is part of Hyper-V protocol data and will be sent
to host. Host accepts these GFN and copy data from/to guest memory. The 
translation from va to pa is done by caller that populates the 
hv_page_buffer array. I will try calling dma map function before 
populating struct hv_page_buffer and this can avoid redundant 
translation between PA and VA.
^ permalink raw reply	[flat|nested] 49+ messages in thread 
 
 
 
 
 
- * Re: [RFC PATCH V3 10/11] HV/Netvsc: Add Isolation VM support for netvsc driver
  2021-05-30 15:06 ` [RFC PATCH V3 10/11] HV/Netvsc: Add Isolation VM support for netvsc driver Tianyu Lan
  2021-06-07  6:50   ` Christoph Hellwig
@ 2021-06-10  9:52   ` Vitaly Kuznetsov
  1 sibling, 0 replies; 49+ messages in thread
From: Vitaly Kuznetsov @ 2021-06-10  9:52 UTC (permalink / raw)
  To: Tianyu Lan
  Cc: iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	thomas.lendacky, brijesh.singh, sunilmut, kys, haiyangz, sthemmin,
	wei.liu, decui, tglx, mingo, bp, x86, hpa, arnd, dave.hansen,
	luto, peterz, akpm, kirill.shutemov, rppt, hannes, cai,
	krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk, hch,
	m.szyprowski, robin.murphy, boris.ostrovsky, jgross, sstabellini,
	joro, will, xen-devel, davem, kuba, jejb, martin.petersen
Tianyu Lan <ltykernel@gmail.com> writes:
> From: Tianyu Lan <Tianyu.Lan@microsoft.com>
>
> In Isolation VM, all shared memory with host needs to mark visible
> to host via hvcall. vmbus_establish_gpadl() has already done it for
> netvsc rx/tx ring buffer. The page buffer used by vmbus_sendpacket_
> pagebuffer() still need to handle. Use DMA API to map/umap these
> memory during sending/receiving packet and Hyper-V DMA ops callback
> will use swiotlb function to allocate bounce buffer and copy data
> from/to bounce buffer.
>
> Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
> ---
>  drivers/net/hyperv/hyperv_net.h   |   6 ++
>  drivers/net/hyperv/netvsc.c       | 125 ++++++++++++++++++++++++++++--
>  drivers/net/hyperv/rndis_filter.c |   3 +
>  include/linux/hyperv.h            |   5 ++
>  4 files changed, 133 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
> index b11aa68b44ec..c2fbb9d4df2c 100644
> --- a/drivers/net/hyperv/hyperv_net.h
> +++ b/drivers/net/hyperv/hyperv_net.h
> @@ -164,6 +164,7 @@ struct hv_netvsc_packet {
>  	u32 total_bytes;
>  	u32 send_buf_index;
>  	u32 total_data_buflen;
> +	struct hv_dma_range *dma_range;
>  };
>  
>  #define NETVSC_HASH_KEYLEN 40
> @@ -1074,6 +1075,7 @@ struct netvsc_device {
>  
>  	/* Receive buffer allocated by us but manages by NetVSP */
>  	void *recv_buf;
> +	void *recv_original_buf;
>  	u32 recv_buf_size; /* allocated bytes */
>  	u32 recv_buf_gpadl_handle;
>  	u32 recv_section_cnt;
> @@ -1082,6 +1084,8 @@ struct netvsc_device {
>  
>  	/* Send buffer allocated by us */
>  	void *send_buf;
> +	void *send_original_buf;
> +	u32 send_buf_size;
>  	u32 send_buf_gpadl_handle;
>  	u32 send_section_cnt;
>  	u32 send_section_size;
> @@ -1729,4 +1733,6 @@ struct rndis_message {
>  #define RETRY_US_HI	10000
>  #define RETRY_MAX	2000	/* >10 sec */
>  
> +void netvsc_dma_unmap(struct hv_device *hv_dev,
> +		      struct hv_netvsc_packet *packet);
>  #endif /* _HYPERV_NET_H */
> diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
> index 7bd935412853..a01740c6c6b8 100644
> --- a/drivers/net/hyperv/netvsc.c
> +++ b/drivers/net/hyperv/netvsc.c
> @@ -153,8 +153,21 @@ static void free_netvsc_device(struct rcu_head *head)
>  	int i;
>  
>  	kfree(nvdev->extension);
> -	vfree(nvdev->recv_buf);
> -	vfree(nvdev->send_buf);
> +
> +	if (nvdev->recv_original_buf) {
> +		vunmap(nvdev->recv_buf);
> +		vfree(nvdev->recv_original_buf);
> +	} else {
> +		vfree(nvdev->recv_buf);
> +	}
> +
> +	if (nvdev->send_original_buf) {
> +		vunmap(nvdev->send_buf);
> +		vfree(nvdev->send_original_buf);
> +	} else {
> +		vfree(nvdev->send_buf);
> +	}
> +
>  	kfree(nvdev->send_section_map);
>  
>  	for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
> @@ -338,8 +351,10 @@ static int netvsc_init_buf(struct hv_device *device,
>  	struct net_device *ndev = hv_get_drvdata(device);
>  	struct nvsp_message *init_packet;
>  	unsigned int buf_size;
> +	unsigned long *pfns;
>  	size_t map_words;
>  	int i, ret = 0;
> +	void *vaddr;
>  
>  	/* Get receive buffer area. */
>  	buf_size = device_info->recv_sections * device_info->recv_section_size;
> @@ -375,6 +390,21 @@ static int netvsc_init_buf(struct hv_device *device,
>  		goto cleanup;
>  	}
>  
> +	if (hv_isolation_type_snp()) {
> +		pfns = kcalloc(buf_size / HV_HYP_PAGE_SIZE, sizeof(unsigned long),
> +			       GFP_KERNEL);
> +		for (i = 0; i < buf_size / HV_HYP_PAGE_SIZE; i++)
> +			pfns[i] = virt_to_hvpfn(net_device->recv_buf + i * HV_HYP_PAGE_SIZE) +
> +				(ms_hyperv.shared_gpa_boundary >> HV_HYP_PAGE_SHIFT);
> +
> +		vaddr = vmap_pfn(pfns, buf_size / HV_HYP_PAGE_SIZE, PAGE_KERNEL_IO);
> +		kfree(pfns);
> +		if (!vaddr)
> +			goto cleanup;
> +		net_device->recv_original_buf = net_device->recv_buf;
> +		net_device->recv_buf = vaddr;
> +	}
> +
>  	/* Notify the NetVsp of the gpadl handle */
>  	init_packet = &net_device->channel_init_pkt;
>  	memset(init_packet, 0, sizeof(struct nvsp_message));
> @@ -477,6 +507,23 @@ static int netvsc_init_buf(struct hv_device *device,
>  		goto cleanup;
>  	}
>  
> +	if (hv_isolation_type_snp()) {
> +		pfns = kcalloc(buf_size / HV_HYP_PAGE_SIZE, sizeof(unsigned long),
> +			       GFP_KERNEL);
> +
> +		for (i = 0; i < buf_size / HV_HYP_PAGE_SIZE; i++)
> +			pfns[i] = virt_to_hvpfn(net_device->send_buf + i * HV_HYP_PAGE_SIZE)
> +				+ (ms_hyperv.shared_gpa_boundary >> HV_HYP_PAGE_SHIFT);
> +
> +		vaddr = vmap_pfn(pfns, buf_size / HV_HYP_PAGE_SIZE, PAGE_KERNEL_IO);
> +		kfree(pfns);
> +		if (!vaddr)
> +			goto cleanup;
> +
> +		net_device->send_original_buf = net_device->send_buf;
> +		net_device->send_buf = vaddr;
> +	}
> +
>  	/* Notify the NetVsp of the gpadl handle */
>  	init_packet = &net_device->channel_init_pkt;
>  	memset(init_packet, 0, sizeof(struct nvsp_message));
> @@ -767,7 +814,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
>  
>  	/* Notify the layer above us */
>  	if (likely(skb)) {
> -		const struct hv_netvsc_packet *packet
> +		struct hv_netvsc_packet *packet
>  			= (struct hv_netvsc_packet *)skb->cb;
>  		u32 send_index = packet->send_buf_index;
>  		struct netvsc_stats *tx_stats;
> @@ -783,6 +830,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
>  		tx_stats->bytes += packet->total_bytes;
>  		u64_stats_update_end(&tx_stats->syncp);
>  
> +		netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
>  		napi_consume_skb(skb, budget);
>  	}
>  
> @@ -947,6 +995,63 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
>  		memset(dest, 0, padding);
>  }
>  
> +void netvsc_dma_unmap(struct hv_device *hv_dev,
> +		      struct hv_netvsc_packet *packet)
> +{
> +	u32 page_count = packet->cp_partial ?
> +		packet->page_buf_cnt - packet->rmsg_pgcnt :
> +		packet->page_buf_cnt;
> +	int i;
> +
> +	if (!packet->dma_range)
> +		return;
> +
> +	for (i = 0; i < page_count; i++)
> +		dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma,
> +				 packet->dma_range[i].mapping_size,
> +				 DMA_TO_DEVICE);
> +
> +	kfree(packet->dma_range);
> +}
> +
> +int netvsc_dma_map(struct hv_device *hv_dev,
> +		   struct hv_netvsc_packet *packet,
> +		   struct hv_page_buffer *pb)
> +{
> +	u32 page_count =  packet->cp_partial ?
> +		packet->page_buf_cnt - packet->rmsg_pgcnt :
> +		packet->page_buf_cnt;
> +	dma_addr_t dma;
> +	int i;
> +
> +	packet->dma_range = kcalloc(page_count,
> +				    sizeof(*packet->dma_range),
> +				    GFP_KERNEL);
> +	if (!packet->dma_range)
> +		return -ENOMEM;
> +
> +	for (i = 0; i < page_count; i++) {
> +		char *src = phys_to_virt((pb[i].pfn << HV_HYP_PAGE_SHIFT)
> +					 + pb[i].offset);
> +		u32 len = pb[i].len;
> +
> +		dma = dma_map_single(&hv_dev->device, src, len,
> +				     DMA_TO_DEVICE);
> +		if (dma_mapping_error(&hv_dev->device, dma)) {
> +			kfree(packet->dma_range);
> +			return -ENOMEM;
> +		}
> +
> +		packet->dma_range[i].dma = dma;
> +		packet->dma_range[i].mapping_size = len;
> +		pb[i].pfn = dma >> HV_HYP_PAGE_SHIFT;
> +		pb[i].offset = offset_in_hvpage(dma);
> +		pb[i].len = len;
> +	}
> +
> +	return 0;
> +}
> +
>  static inline int netvsc_send_pkt(
>  	struct hv_device *device,
>  	struct hv_netvsc_packet *packet,
> @@ -987,14 +1092,22 @@ static inline int netvsc_send_pkt(
>  
>  	trace_nvsp_send_pkt(ndev, out_channel, rpkt);
>  
> +	packet->dma_range = NULL;
>  	if (packet->page_buf_cnt) {
>  		if (packet->cp_partial)
>  			pb += packet->rmsg_pgcnt;
>  
> +		ret = netvsc_dma_map(ndev_ctx->device_ctx, packet, pb);
> +		if (ret)
> +			return ret;
> +
>  		ret = vmbus_sendpacket_pagebuffer(out_channel,
> -						  pb, packet->page_buf_cnt,
> -						  &nvmsg, sizeof(nvmsg),
> -						  req_id);
> +					  pb, packet->page_buf_cnt,
> +					  &nvmsg, sizeof(nvmsg),
> +					  req_id);
Nitpick: stray change? 
> +
> +		if (ret)
> +			netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
>  	} else {
>  		ret = vmbus_sendpacket(out_channel,
>  				       &nvmsg, sizeof(nvmsg),
> diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
> index 983bf362466a..448c1ee39246 100644
> --- a/drivers/net/hyperv/rndis_filter.c
> +++ b/drivers/net/hyperv/rndis_filter.c
> @@ -293,6 +293,8 @@ static void rndis_filter_receive_response(struct net_device *ndev,
>  	u32 *req_id = &resp->msg.init_complete.req_id;
>  	struct rndis_device *dev = nvdev->extension;
>  	struct rndis_request *request = NULL;
> +	struct hv_device *hv_dev = ((struct net_device_context *)
> +			netdev_priv(ndev))->device_ctx;
>  	bool found = false;
>  	unsigned long flags;
>  
> @@ -361,6 +363,7 @@ static void rndis_filter_receive_response(struct net_device *ndev,
>  			}
>  		}
>  
> +		netvsc_dma_unmap(hv_dev, &request->pkt);
>  		complete(&request->wait_event);
>  	} else {
>  		netdev_err(ndev,
> diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
> index babbe19f57e2..90abff664495 100644
> --- a/include/linux/hyperv.h
> +++ b/include/linux/hyperv.h
> @@ -1616,6 +1616,11 @@ struct hyperv_service_callback {
>  	void (*callback)(void *context);
>  };
>  
> +struct hv_dma_range {
> +	dma_addr_t dma;
> +	u32 mapping_size;
> +};
> +
>  #define MAX_SRV_VER	0x7ffffff
>  extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, u32 buflen,
>  				const int *fw_version, int fw_vercnt,
-- 
Vitaly
^ permalink raw reply	[flat|nested] 49+ messages in thread
 
- * [RFC PATCH V3 11/11] HV/Storvsc: Add Isolation VM support for storvsc driver
  2021-05-30 15:06 [RFC PATCH V3 00/11] x86/Hyper-V: Add Hyper-V Isolation VM support Tianyu Lan
                   ` (9 preceding siblings ...)
  2021-05-30 15:06 ` [RFC PATCH V3 10/11] HV/Netvsc: Add Isolation VM support for netvsc driver Tianyu Lan
@ 2021-05-30 15:06 ` Tianyu Lan
  2021-06-07  6:46   ` Christoph Hellwig
  10 siblings, 1 reply; 49+ messages in thread
From: Tianyu Lan @ 2021-05-30 15:06 UTC (permalink / raw)
  To: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	hch, m.szyprowski, robin.murphy, boris.ostrovsky, jgross,
	sstabellini, joro, will, xen-devel, davem, kuba, jejb,
	martin.petersen
  Cc: iommu, linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
From: Tianyu Lan <Tianyu.Lan@microsoft.com>
In Isolation VM, all shared memory with host needs to mark visible
to host via hvcall. vmbus_establish_gpadl() has already done it for
storvsc rx/tx ring buffer. The page buffer used by vmbus_sendpacket_
mpb_desc() still need to handle. Use DMA API to map/umap these
memory during sending/receiving packet and Hyper-V DMA ops callback
will use swiotlb function to allocate bounce buffer and copy data
from/to bounce buffer.
Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
 drivers/scsi/storvsc_drv.c | 63 +++++++++++++++++++++++++++++++++++---
 1 file changed, 58 insertions(+), 5 deletions(-)
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 403753929320..32da419c134e 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -21,6 +21,8 @@
 #include <linux/device.h>
 #include <linux/hyperv.h>
 #include <linux/blkdev.h>
+#include <linux/io.h>
+#include <linux/dma-mapping.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_host.h>
@@ -427,6 +429,8 @@ struct storvsc_cmd_request {
 	u32 payload_sz;
 
 	struct vstor_packet vstor_packet;
+	u32 hvpg_count;
+	struct hv_dma_range *dma_range;
 };
 
 
@@ -1267,6 +1271,7 @@ static void storvsc_on_channel_callback(void *context)
 	struct hv_device *device;
 	struct storvsc_device *stor_device;
 	struct Scsi_Host *shost;
+	int i;
 
 	if (channel->primary_channel != NULL)
 		device = channel->primary_channel->device_obj;
@@ -1321,6 +1326,17 @@ static void storvsc_on_channel_callback(void *context)
 				request = (struct storvsc_cmd_request *)scsi_cmd_priv(scmnd);
 			}
 
+			if (request->dma_range) {
+				for (i = 0; i < request->hvpg_count; i++)
+					dma_unmap_page(&device->device,
+							request->dma_range[i].dma,
+							request->dma_range[i].mapping_size,
+							request->vstor_packet.vm_srb.data_in
+							     == READ_TYPE ?
+							DMA_FROM_DEVICE : DMA_TO_DEVICE);
+				kfree(request->dma_range);
+			}
+
 			storvsc_on_receive(stor_device, packet, request);
 			continue;
 		}
@@ -1817,7 +1833,9 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
 		unsigned int hvpgoff, hvpfns_to_add;
 		unsigned long offset_in_hvpg = offset_in_hvpage(sgl->offset);
 		unsigned int hvpg_count = HVPFN_UP(offset_in_hvpg + length);
+		dma_addr_t dma;
 		u64 hvpfn;
+		u32 size;
 
 		if (hvpg_count > MAX_PAGE_BUFFER_COUNT) {
 
@@ -1831,6 +1849,13 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
 		payload->range.len = length;
 		payload->range.offset = offset_in_hvpg;
 
+		cmd_request->dma_range = kcalloc(hvpg_count,
+				 sizeof(*cmd_request->dma_range),
+				 GFP_ATOMIC);
+		if (!cmd_request->dma_range) {
+			ret = -ENOMEM;
+			goto free_payload;
+		}
 
 		for (i = 0; sgl != NULL; sgl = sg_next(sgl)) {
 			/*
@@ -1854,9 +1879,30 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
 			 * last sgl should be reached at the same time that
 			 * the PFN array is filled.
 			 */
-			while (hvpfns_to_add--)
-				payload->range.pfn_array[i++] =	hvpfn++;
+			while (hvpfns_to_add--) {
+				size = min(HV_HYP_PAGE_SIZE - offset_in_hvpg,
+					   (unsigned long)length);
+				dma = dma_map_page(&dev->device,
+							 pfn_to_page(hvpfn++),
+							 offset_in_hvpg, size,
+							 scmnd->sc_data_direction);
+				if (dma_mapping_error(&dev->device, dma)) {
+					ret = -ENOMEM;
+					goto free_dma_range;
+				}
+
+				if (offset_in_hvpg) {
+					payload->range.offset = dma & ~HV_HYP_PAGE_MASK;
+					offset_in_hvpg = 0;
+				}
+
+				cmd_request->dma_range[i].dma = dma;
+				cmd_request->dma_range[i].mapping_size = size;
+				payload->range.pfn_array[i++] = dma >> HV_HYP_PAGE_SHIFT;
+				length -= size;
+			}
 		}
+		cmd_request->hvpg_count = hvpg_count;
 	}
 
 	cmd_request->payload = payload;
@@ -1867,13 +1913,20 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
 	put_cpu();
 
 	if (ret == -EAGAIN) {
-		if (payload_sz > sizeof(cmd_request->mpb))
-			kfree(payload);
 		/* no more space */
-		return SCSI_MLQUEUE_DEVICE_BUSY;
+		ret = SCSI_MLQUEUE_DEVICE_BUSY;
+		goto free_dma_range;
 	}
 
 	return 0;
+
+free_dma_range:
+	kfree(cmd_request->dma_range);
+
+free_payload:
+	if (payload_sz > sizeof(cmd_request->mpb))
+		kfree(payload);
+	return ret;
 }
 
 static struct scsi_host_template scsi_driver = {
-- 
2.25.1
^ permalink raw reply related	[flat|nested] 49+ messages in thread
- * Re: [RFC PATCH V3 11/11] HV/Storvsc: Add Isolation VM support for storvsc driver
  2021-05-30 15:06 ` [RFC PATCH V3 11/11] HV/Storvsc: Add Isolation VM support for storvsc driver Tianyu Lan
@ 2021-06-07  6:46   ` Christoph Hellwig
  2021-06-07 14:59     ` Tianyu Lan
  0 siblings, 1 reply; 49+ messages in thread
From: Christoph Hellwig @ 2021-06-07  6:46 UTC (permalink / raw)
  To: Tianyu Lan
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	hch, m.szyprowski, robin.murphy, boris.ostrovsky, jgross,
	sstabellini, joro, will, xen-devel, davem, kuba, jejb,
	martin.petersen, iommu, linux-arch, linux-hyperv, linux-kernel,
	linux-scsi, netdev, vkuznets, thomas.lendacky, brijesh.singh,
	sunilmut
On Sun, May 30, 2021 at 11:06:28AM -0400, Tianyu Lan wrote:
> +				for (i = 0; i < request->hvpg_count; i++)
> +					dma_unmap_page(&device->device,
> +							request->dma_range[i].dma,
> +							request->dma_range[i].mapping_size,
> +							request->vstor_packet.vm_srb.data_in
> +							     == READ_TYPE ?
> +							DMA_FROM_DEVICE : DMA_TO_DEVICE);
> +				kfree(request->dma_range);
Unreadably long lines.  You probably want to factor the quoted code into
a little readable helper and do the same for the map side.
^ permalink raw reply	[flat|nested] 49+ messages in thread 
- * Re: [RFC PATCH V3 11/11] HV/Storvsc: Add Isolation VM support for storvsc driver
  2021-06-07  6:46   ` Christoph Hellwig
@ 2021-06-07 14:59     ` Tianyu Lan
  0 siblings, 0 replies; 49+ messages in thread
From: Tianyu Lan @ 2021-06-07 14:59 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: kys, haiyangz, sthemmin, wei.liu, decui, tglx, mingo, bp, x86,
	hpa, arnd, dave.hansen, luto, peterz, akpm, kirill.shutemov, rppt,
	hannes, cai, krish.sadhukhan, saravanand, Tianyu.Lan, konrad.wilk,
	m.szyprowski, robin.murphy, boris.ostrovsky, jgross, sstabellini,
	joro, will, xen-devel, davem, kuba, jejb, martin.petersen, iommu,
	linux-arch, linux-hyperv, linux-kernel, linux-scsi, netdev,
	vkuznets, thomas.lendacky, brijesh.singh, sunilmut
On 6/7/2021 2:46 PM, Christoph Hellwig wrote:
> On Sun, May 30, 2021 at 11:06:28AM -0400, Tianyu Lan wrote:
>> +				for (i = 0; i < request->hvpg_count; i++)
>> +					dma_unmap_page(&device->device,
>> +							request->dma_range[i].dma,
>> +							request->dma_range[i].mapping_size,
>> +							request->vstor_packet.vm_srb.data_in
>> +							     == READ_TYPE ?
>> +							DMA_FROM_DEVICE : DMA_TO_DEVICE);
>> +				kfree(request->dma_range);
> 
> Unreadably long lines.  You probably want to factor the quoted code into
> a little readable helper and do the same for the map side.
Good suggestion. Will update.
Thanks.
^ permalink raw reply	[flat|nested] 49+ messages in thread