LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH -V2 11/14] kvm: powerpc: book3s: Support building HV and PR KVM as module
From: Aneesh Kumar K.V @ 2013-10-07 16:47 UTC (permalink / raw)
  To: agraf, benh, paulus; +Cc: linuxppc-dev, kvm, kvm-ppc, Aneesh Kumar K.V
In-Reply-To: <1381164482-31001-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/kvm/Kconfig          |  6 +++---
 arch/powerpc/kvm/Makefile         | 11 ++++++++---
 arch/powerpc/kvm/book3s.c         | 12 +++++++++++-
 arch/powerpc/kvm/book3s_emulate.c |  2 +-
 arch/powerpc/kvm/book3s_hv.c      |  2 ++
 arch/powerpc/kvm/book3s_pr.c      |  5 ++++-
 arch/powerpc/kvm/book3s_rtas.c    |  1 +
 arch/powerpc/kvm/emulate.c        |  1 +
 arch/powerpc/kvm/powerpc.c        | 10 ++++++++++
 virt/kvm/kvm_main.c               |  4 ++++
 10 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index a96d7c3..8aeeda1 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -73,7 +73,7 @@ config KVM_BOOK3S_64
 	  If unsure, say N.
 
 config KVM_BOOK3S_64_HV
-	bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
+	tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host"
 	depends on KVM_BOOK3S_64
 	select KVM_BOOK3S_HV_POSSIBLE
 	select MMU_NOTIFIER
@@ -94,8 +94,8 @@ config KVM_BOOK3S_64_HV
 	  If unsure, say N.
 
 config KVM_BOOK3S_64_PR
-	bool "KVM support without using hypervisor mode in host"
-	depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV
+	tristate "KVM support without using hypervisor mode in host"
+	depends on KVM_BOOK3S_64
 	select KVM_BOOK3S_PR_POSSIBLE
 	---help---
 	  Support running guest kernels in virtual machines on processors
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index fa17b33..ce569b6 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -56,7 +56,7 @@ kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) := \
 	book3s_64_vio_hv.o
 
-kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
+kvm-pr-y := \
 	fpu.o \
 	book3s_paired_singles.o \
 	book3s_pr.o \
@@ -76,7 +76,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
 	book3s_rmhandlers.o
 endif
 
-kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV)  += \
+kvm-hv-y += \
 	book3s_hv.o \
 	book3s_hv_interrupts.o \
 	book3s_64_mmu_hv.o
@@ -84,13 +84,15 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV)  += \
 kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
 	book3s_hv_rm_xics.o
 
-kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) += \
+ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
 	book3s_hv_rmhandlers.o \
 	book3s_hv_rm_mmu.o \
 	book3s_hv_ras.o \
 	book3s_hv_builtin.o \
 	book3s_hv_cma.o \
 	$(kvm-book3s_64-builtin-xics-objs-y)
+endif
 
 kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
 	book3s_xics.o
@@ -131,4 +133,7 @@ obj-$(CONFIG_KVM_E500MC) += kvm.o
 obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o
 obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o
 
+obj-$(CONFIG_KVM_BOOK3S_64_PR) += kvm-pr.o
+obj-$(CONFIG_KVM_BOOK3S_64_HV) += kvm-hv.o
+
 obj-y += $(kvm-book3s_64-builtin-objs-y)
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 493aff7..39d2994 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -170,28 +170,32 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
 	printk(KERN_INFO "Queueing interrupt %x\n", vec);
 #endif
 }
-
+EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio);
 
 void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
 {
 	/* might as well deliver this straight away */
 	kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_PROGRAM, flags);
 }
+EXPORT_SYMBOL_GPL(kvmppc_core_queue_program);
 
 void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
 {
 	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
 }
+EXPORT_SYMBOL_GPL(kvmppc_core_queue_dec);
 
 int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
 {
 	return test_bit(BOOK3S_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
 }
+EXPORT_SYMBOL_GPL(kvmppc_core_pending_dec);
 
 void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
 {
 	kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
 }
+EXPORT_SYMBOL_GPL(kvmppc_core_dequeue_dec);
 
 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                 struct kvm_interrupt *irq)
@@ -329,6 +333,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(kvmppc_core_prepare_to_enter);
 
 pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing,
 			bool *writable)
@@ -354,6 +359,7 @@ pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing,
 
 	return gfn_to_pfn_prot(vcpu->kvm, gfn, writing, writable);
 }
+EXPORT_SYMBOL_GPL(kvmppc_gfn_to_pfn);
 
 static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
 			bool iswrite, struct kvmppc_pte *pte)
@@ -421,6 +427,7 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 
 	return EMULATE_DONE;
 }
+EXPORT_SYMBOL_GPL(kvmppc_st);
 
 int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 		      bool data)
@@ -451,6 +458,7 @@ nopte:
 mmio:
 	return EMULATE_DO_MMIO;
 }
+EXPORT_SYMBOL_GPL(kvmppc_ld);
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
@@ -706,6 +714,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
 {
 	kvmppc_ops->set_msr(vcpu, msr);
 }
+EXPORT_SYMBOL_GPL(kvmppc_set_msr);
 
 int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
@@ -787,6 +796,7 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 {
 	return kvmppc_ops->unmap_hva(kvm, hva);
 }
+EXPORT_SYMBOL_GPL(kvm_unmap_hva);
 
 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
 {
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index b9841ad..99d40f8 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -172,7 +172,7 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			vcpu->arch.mmu.tlbie(vcpu, addr, large);
 			break;
 		}
-#ifdef CONFIG_KVM_BOOK3S_64_PR
+#ifdef CONFIG_PPC_BOOK3S_64
 		case OP_31_XOP_FAKE_SC1:
 		{
 			/* SC 1 papr hypercalls */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 540438d..47fca8c 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -52,6 +52,7 @@
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
 #include <linux/hugetlb.h>
+#include <linux/module.h>
 
 #include "book3s.h"
 
@@ -2215,3 +2216,4 @@ static void kvmppc_book3s_exit_hv(void)
 
 module_init(kvmppc_book3s_init_hv);
 module_exit(kvmppc_book3s_exit_hv);
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index ca6c73d..7f583a4 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -40,6 +40,7 @@
 #include <linux/sched.h>
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
+#include <linux/module.h>
 
 #include "book3s.h"
 
@@ -918,7 +919,7 @@ program_interrupt:
 			ulong cmd = kvmppc_get_gpr(vcpu, 3);
 			int i;
 
-#ifdef CONFIG_KVM_BOOK3S_64_PR
+#ifdef CONFIG_PPC_BOOK3S_64
 			if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) {
 				r = RESUME_GUEST;
 				break;
@@ -1582,3 +1583,5 @@ static void kvmppc_book3s_exit_pr(void)
 
 module_init(kvmppc_book3s_init_pr);
 module_exit(kvmppc_book3s_exit_pr);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index 3219ba8..cf95cde 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -260,6 +260,7 @@ fail:
 	 */
 	return rc;
 }
+EXPORT_SYMBOL_GPL(kvmppc_rtas_hcall);
 
 void kvmppc_rtas_tokens_free(struct kvm *kvm)
 {
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index fe3e0c5..230b4e9 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -526,3 +526,4 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
 	return emulated;
 }
+EXPORT_SYMBOL_GPL(kvmppc_emulate_instruction);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 4481417..1258c1f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -126,6 +126,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 
 	return r;
 }
+EXPORT_SYMBOL_GPL(kvmppc_prepare_to_enter);
 
 int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 {
@@ -179,6 +180,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 
 	return r;
 }
+EXPORT_SYMBOL_GPL(kvmppc_kvm_pv);
 
 int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
 {
@@ -207,6 +209,7 @@ out:
 	vcpu->arch.sane = r;
 	return r ? 0 : -EINVAL;
 }
+EXPORT_SYMBOL_GPL(kvmppc_sanity_check);
 
 int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
@@ -241,6 +244,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
 	return r;
 }
+EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio);
 
 int kvm_arch_hardware_enable(void *garbage)
 {
@@ -665,6 +669,7 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	return EMULATE_DO_MMIO;
 }
+EXPORT_SYMBOL_GPL(kvmppc_handle_load);
 
 /* Same as above, but sign extends */
 int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
@@ -726,6 +731,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	return EMULATE_DO_MMIO;
 }
+EXPORT_SYMBOL_GPL(kvmppc_handle_store);
 
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
@@ -1074,22 +1080,26 @@ long kvmppc_alloc_lpid(void)
 
 	return lpid;
 }
+EXPORT_SYMBOL_GPL(kvmppc_alloc_lpid);
 
 void kvmppc_claim_lpid(long lpid)
 {
 	set_bit(lpid, lpid_inuse);
 }
+EXPORT_SYMBOL_GPL(kvmppc_claim_lpid);
 
 void kvmppc_free_lpid(long lpid)
 {
 	clear_bit(lpid, lpid_inuse);
 }
+EXPORT_SYMBOL_GPL(kvmppc_free_lpid);
 
 void kvmppc_init_lpid(unsigned long nr_lpids_param)
 {
 	nr_lpids = min_t(unsigned long, KVMPPC_NR_LPIDS, nr_lpids_param);
 	memset(lpid_inuse, 0, sizeof(lpid_inuse));
 }
+EXPORT_SYMBOL_GPL(kvmppc_init_lpid);
 
 int kvm_arch_init(void *opaque)
 {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index bf040c4..482b9c5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -186,6 +186,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
 		++kvm->stat.remote_tlb_flush;
 	cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
 }
+EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
 
 void kvm_reload_remote_mmus(struct kvm *kvm)
 {
@@ -964,6 +965,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
 out:
 	return r;
 }
+EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
 
 bool kvm_largepages_enabled(void)
 {
@@ -1648,6 +1650,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
 	memslot = gfn_to_memslot(kvm, gfn);
 	mark_page_dirty_in_slot(kvm, memslot, gfn);
 }
+EXPORT_SYMBOL_GPL(mark_page_dirty);
 
 /*
  * The vCPU has executed a HLT instruction with in-kernel mode enabled.
@@ -1673,6 +1676,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 
 	finish_wait(&vcpu->wq, &wait);
 }
+EXPORT_SYMBOL_GPL(kvm_vcpu_block);
 
 #ifndef CONFIG_S390
 /*
-- 
1.8.1.2

^ permalink raw reply related

* [PATCH -V2 12/14] kvm: Add struct kvm arg to memslot APIs
From: Aneesh Kumar K.V @ 2013-10-07 16:48 UTC (permalink / raw)
  To: agraf, benh, paulus; +Cc: linuxppc-dev, kvm, kvm-ppc, Aneesh Kumar K.V
In-Reply-To: <1381164482-31001-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

We will use that in the later patch to find the kvm ops handler

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/arm/kvm/arm.c                 |  5 +++--
 arch/ia64/kvm/kvm-ia64.c           |  5 +++--
 arch/mips/kvm/kvm_mips.c           |  5 +++--
 arch/powerpc/include/asm/kvm_ppc.h |  6 ++++--
 arch/powerpc/kvm/book3s.c          |  4 ++--
 arch/powerpc/kvm/booke.c           |  4 ++--
 arch/powerpc/kvm/powerpc.c         |  9 +++++----
 arch/s390/kvm/kvm-s390.c           |  5 +++--
 arch/x86/kvm/x86.c                 |  5 +++--
 include/linux/kvm_host.h           |  5 +++--
 virt/kvm/kvm_main.c                | 12 ++++++------
 11 files changed, 37 insertions(+), 28 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9c697db..e96c48f 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -152,12 +152,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	return 0;
 }
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index bdfd878..985bf80 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1550,12 +1550,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	return 0;
 }
diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c
index a7b0445..73b3482 100644
--- a/arch/mips/kvm/kvm_mips.c
+++ b/arch/mips/kvm/kvm_mips.c
@@ -198,12 +198,13 @@ kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
 	return -ENOIOCTLCMD;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	return 0;
 }
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index c13f15d..20f4616 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -134,9 +134,11 @@ extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
 extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
 extern int kvmppc_core_init_vm(struct kvm *kvm);
 extern void kvmppc_core_destroy_vm(struct kvm *kvm);
-extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+extern void kvmppc_core_free_memslot(struct kvm *kvm,
+				     struct kvm_memory_slot *free,
 				     struct kvm_memory_slot *dont);
-extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+extern int kvmppc_core_create_memslot(struct kvm *kvm,
+				      struct kvm_memory_slot *slot,
 				      unsigned long npages);
 extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 39d2994..130fe1d 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -761,13 +761,13 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
 	return kvmppc_ops->get_dirty_log(kvm, log);
 }
 
-void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			      struct kvm_memory_slot *dont)
 {
 	kvmppc_ops->free_memslot(free, dont);
 }
 
-int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			       unsigned long npages)
 {
 	return kvmppc_ops->create_memslot(slot, npages);
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 1769354..cb2d986 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1662,12 +1662,12 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
 	return -ENOTSUPP;
 }
 
-void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			      struct kvm_memory_slot *dont)
 {
 }
 
-int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			       unsigned long npages)
 {
 	return 0;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 1258c1f..db48a7f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -417,15 +417,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
-	kvmppc_core_free_memslot(free, dont);
+	kvmppc_core_free_memslot(kvm, free, dont);
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
-	return kvmppc_core_create_memslot(slot, npages);
+	return kvmppc_core_create_memslot(kvm, slot, npages);
 }
 
 void kvm_arch_memslots_updated(struct kvm *kvm)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ac8e667..09ceea0 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1053,12 +1053,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	return 0;
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e5ca72a..73b23a2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7065,7 +7065,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 	int i;
@@ -7086,7 +7086,8 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
 	}
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	int i;
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ca645a0..28bfed9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -508,9 +508,10 @@ int kvm_set_memory_region(struct kvm *kvm,
 			  struct kvm_userspace_memory_region *mem);
 int __kvm_set_memory_region(struct kvm *kvm,
 			    struct kvm_userspace_memory_region *mem);
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont);
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages);
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages);
 void kvm_arch_memslots_updated(struct kvm *kvm);
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 482b9c5..42cba68 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -541,13 +541,13 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 /*
  * Free any memory in @free but not in @dont.
  */
-static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
+static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free,
 				  struct kvm_memory_slot *dont)
 {
 	if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
 		kvm_destroy_dirty_bitmap(free);
 
-	kvm_arch_free_memslot(free, dont);
+	kvm_arch_free_memslot(kvm, free, dont);
 
 	free->npages = 0;
 }
@@ -558,7 +558,7 @@ void kvm_free_physmem(struct kvm *kvm)
 	struct kvm_memory_slot *memslot;
 
 	kvm_for_each_memslot(memslot, slots)
-		kvm_free_physmem_slot(memslot, NULL);
+		kvm_free_physmem_slot(kvm, memslot, NULL);
 
 	kfree(kvm->memslots);
 }
@@ -822,7 +822,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	if (change == KVM_MR_CREATE) {
 		new.userspace_addr = mem->userspace_addr;
 
-		if (kvm_arch_create_memslot(&new, npages))
+		if (kvm_arch_create_memslot(kvm, &new, npages))
 			goto out_free;
 	}
 
@@ -898,7 +898,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
 	kvm_arch_commit_memory_region(kvm, mem, &old, change);
 
-	kvm_free_physmem_slot(&old, &new);
+	kvm_free_physmem_slot(kvm, &old, &new);
 	kfree(old_memslots);
 
 	return 0;
@@ -906,7 +906,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 out_slots:
 	kfree(slots);
 out_free:
-	kvm_free_physmem_slot(&new, &old);
+	kvm_free_physmem_slot(kvm, &new, &old);
 out:
 	return r;
 }
-- 
1.8.1.2

^ permalink raw reply related

* [PATCH -V2 13/14] kvm: powerpc: book3s: Allow the HV and PR selection per virtual machine
From: Aneesh Kumar K.V @ 2013-10-07 16:48 UTC (permalink / raw)
  To: agraf, benh, paulus; +Cc: linuxppc-dev, kvm, kvm-ppc, Aneesh Kumar K.V
In-Reply-To: <1381164482-31001-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

This moves the kvmppc_ops callbacks to be a per VM entity. This
enables us to select HV and PR mode when creating a VM. We also
allow both kvm-hv and kvm-pr kernel module to be loaded. To
achieve this we move /dev/kvm ownership to kvm.ko module. Depending on
which KVM mode we select during VM creation we take a reference
count on respective module

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kvm_host.h |  1 +
 arch/powerpc/include/asm/kvm_ppc.h  |  7 +--
 arch/powerpc/kvm/44x.c              |  7 ++-
 arch/powerpc/kvm/book3s.c           | 89 +++++++++++++++++++++++++------------
 arch/powerpc/kvm/book3s.h           |  2 +
 arch/powerpc/kvm/book3s_hv.c        | 18 ++++----
 arch/powerpc/kvm/book3s_pr.c        | 25 +++++++----
 arch/powerpc/kvm/book3s_xics.c      |  2 +-
 arch/powerpc/kvm/booke.c            | 22 ++++-----
 arch/powerpc/kvm/e500.c             |  8 +++-
 arch/powerpc/kvm/e500mc.c           |  6 ++-
 arch/powerpc/kvm/emulate.c          | 11 ++---
 arch/powerpc/kvm/powerpc.c          | 76 ++++++++++++++++++++++---------
 include/uapi/linux/kvm.h            |  4 ++
 14 files changed, 187 insertions(+), 91 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index e86db97..c7a041d 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -275,6 +275,7 @@ struct kvm_arch {
 #ifdef CONFIG_KVM_XICS
 	struct kvmppc_xics *xics;
 #endif
+	struct kvmppc_ops *kvm_ops;
 };
 
 /*
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 20f4616..3069cf4 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -182,6 +182,7 @@ union kvmppc_one_reg {
 };
 
 struct kvmppc_ops {
+	struct module *owner;
 	bool is_hv_enabled;
 	int (*get_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 	int (*set_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
@@ -217,7 +218,6 @@ struct kvmppc_ops {
 			      unsigned long npages);
 	int (*init_vm)(struct kvm *kvm);
 	void (*destroy_vm)(struct kvm *kvm);
-	int (*check_processor_compat)(void);
 	int (*get_smmu_info)(struct kvm *kvm, struct kvm_ppc_smmu_info *info);
 	int (*emulate_op)(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			  unsigned int inst, int *advance);
@@ -229,7 +229,8 @@ struct kvmppc_ops {
 
 };
 
-extern struct kvmppc_ops *kvmppc_ops;
+extern struct kvmppc_ops *kvmppc_hv_ops;
+extern struct kvmppc_ops *kvmppc_pr_ops;
 
 /*
  * Cuts out inst bits with ordering according to spec.
@@ -326,7 +327,7 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
 
 static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
 {
-	kvmppc_ops->fast_vcpu_kick(vcpu);
+	vcpu->kvm->arch.kvm_ops->fast_vcpu_kick(vcpu);
 }
 
 #else
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index a765bcd..93221e8 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -213,16 +213,19 @@ static int __init kvmppc_44x_init(void)
 	if (r)
 		goto err_out;
 
-	r = kvm_init(&kvm_ops_44x, sizeof(struct kvmppc_vcpu_44x),
-		     0, THIS_MODULE);
+	r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE);
 	if (r)
 		goto err_out;
+	kvm_ops_44x.owner = THIS_MODULE;
+	kvmppc_pr_ops = &kvm_ops_44x;
+
 err_out:
 	return r;
 }
 
 static void __exit kvmppc_44x_exit(void)
 {
+	kvmppc_pr_ops = NULL;
 	kvmppc_booke_exit();
 }
 
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 130fe1d..ad8f6ed 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -34,6 +34,7 @@
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
 
+#include "book3s.h"
 #include "trace.h"
 
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
@@ -71,7 +72,7 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
 
 static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
 {
-	if (!kvmppc_ops->is_hv_enabled)
+	if (!vcpu->kvm->arch.kvm_ops->is_hv_enabled)
 		return to_book3s(vcpu)->hior;
 	return 0;
 }
@@ -79,7 +80,7 @@ static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
 static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
 			unsigned long pending_now, unsigned long old_pending)
 {
-	if (kvmppc_ops->is_hv_enabled)
+	if (vcpu->kvm->arch.kvm_ops->is_hv_enabled)
 		return;
 	if (pending_now)
 		vcpu->arch.shared->int_pending = 1;
@@ -93,7 +94,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
 	ulong crit_r1;
 	bool crit;
 
-	if (kvmppc_ops->is_hv_enabled)
+	if (vcpu->kvm->arch.kvm_ops->is_hv_enabled)
 		return false;
 
 	crit_raw = vcpu->arch.shared->critical;
@@ -477,13 +478,13 @@ void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 				  struct kvm_sregs *sregs)
 {
-	return kvmppc_ops->get_sregs(vcpu, sregs);
+	return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
 }
 
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 				  struct kvm_sregs *sregs)
 {
-	return kvmppc_ops->set_sregs(vcpu, sregs);
+	return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
 }
 
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
@@ -562,7 +563,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 	if (size > sizeof(val))
 		return -EINVAL;
 
-	r = kvmppc_ops->get_one_reg(vcpu, reg->id, &val);
+	r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);
 	if (r == -EINVAL) {
 		r = 0;
 		switch (reg->id) {
@@ -641,7 +642,7 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 	if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
 		return -EFAULT;
 
-	r = kvmppc_ops->set_one_reg(vcpu, reg->id, &val);
+	r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);
 	if (r == -EINVAL) {
 		r = 0;
 		switch (reg->id) {
@@ -702,23 +703,23 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 
 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
-	kvmppc_ops->vcpu_load(vcpu, cpu);
+	vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu);
 }
 
 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 {
-	kvmppc_ops->vcpu_put(vcpu);
+	vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu);
 }
 
 void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
 {
-	kvmppc_ops->set_msr(vcpu, msr);
+	vcpu->kvm->arch.kvm_ops->set_msr(vcpu, msr);
 }
 EXPORT_SYMBOL_GPL(kvmppc_set_msr);
 
 int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
-	return kvmppc_ops->vcpu_run(kvm_run, vcpu);
+	return vcpu->kvm->arch.kvm_ops->vcpu_run(kvm_run, vcpu);
 }
 
 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
@@ -743,84 +744,84 @@ void kvmppc_decrementer_func(unsigned long data)
 
 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 {
-	return kvmppc_ops->vcpu_create(kvm, id);
+	return kvm->arch.kvm_ops->vcpu_create(kvm, id);
 }
 
 void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 {
-	kvmppc_ops->vcpu_free(vcpu);
+	vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu);
 }
 
 int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
 {
-	return kvmppc_ops->check_requests(vcpu);
+	return vcpu->kvm->arch.kvm_ops->check_requests(vcpu);
 }
 
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
 {
-	return kvmppc_ops->get_dirty_log(kvm, log);
+	return kvm->arch.kvm_ops->get_dirty_log(kvm, log);
 }
 
 void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			      struct kvm_memory_slot *dont)
 {
-	kvmppc_ops->free_memslot(free, dont);
+	kvm->arch.kvm_ops->free_memslot(free, dont);
 }
 
 int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			       unsigned long npages)
 {
-	return kvmppc_ops->create_memslot(slot, npages);
+	return kvm->arch.kvm_ops->create_memslot(slot, npages);
 }
 
 void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
 {
-	kvmppc_ops->flush_memslot(kvm, memslot);
+	kvm->arch.kvm_ops->flush_memslot(kvm, memslot);
 }
 
 int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
 				struct kvm_userspace_memory_region *mem)
 {
-	return kvmppc_ops->prepare_memory_region(kvm, memslot, mem);
+	return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem);
 }
 
 void kvmppc_core_commit_memory_region(struct kvm *kvm,
 				struct kvm_userspace_memory_region *mem,
 				const struct kvm_memory_slot *old)
 {
-	kvmppc_ops->commit_memory_region(kvm, mem, old);
+	kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old);
 }
 
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 {
-	return kvmppc_ops->unmap_hva(kvm, hva);
+	return kvm->arch.kvm_ops->unmap_hva(kvm, hva);
 }
 EXPORT_SYMBOL_GPL(kvm_unmap_hva);
 
 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
 {
-	return kvmppc_ops->unmap_hva_range(kvm, start, end);
+	return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
 }
 
 int kvm_age_hva(struct kvm *kvm, unsigned long hva)
 {
-	return kvmppc_ops->age_hva(kvm, hva);
+	return kvm->arch.kvm_ops->age_hva(kvm, hva);
 }
 
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
 {
-	return kvmppc_ops->test_age_hva(kvm, hva);
+	return kvm->arch.kvm_ops->test_age_hva(kvm, hva);
 }
 
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
 {
-	kvmppc_ops->set_spte_hva(kvm, hva, pte);
+	kvm->arch.kvm_ops->set_spte_hva(kvm, hva, pte);
 }
 
 void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
 {
-	kvmppc_ops->mmu_destroy(vcpu);
+	vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu);
 }
 
 int kvmppc_core_init_vm(struct kvm *kvm)
@@ -831,12 +832,12 @@ int kvmppc_core_init_vm(struct kvm *kvm)
 	INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
 #endif
 
-	return kvmppc_ops->init_vm(kvm);
+	return kvm->arch.kvm_ops->init_vm(kvm);
 }
 
 void kvmppc_core_destroy_vm(struct kvm *kvm)
 {
-	kvmppc_ops->destroy_vm(kvm);
+	kvm->arch.kvm_ops->destroy_vm(kvm);
 
 #ifdef CONFIG_PPC64
 	kvmppc_rtas_tokens_free(kvm);
@@ -846,5 +847,35 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
 
 int kvmppc_core_check_processor_compat(void)
 {
-	return kvmppc_ops->check_processor_compat();
+	/*
+	 * We always return 0 for book3s. We check
+	 * for compatability while loading the HV
+	 * or PR module
+	 */
+	return 0;
+}
+
+static int kvmppc_book3s_init(void)
+{
+	int r;
+
+	r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+	if (r)
+		return r;
+#ifdef CONFIG_KVM_BOOK3S_32
+	r = kvmppc_book3s_init_pr();
+#endif
+	return r;
+
+}
+
+static void kvmppc_book3s_exit(void)
+{
+#ifdef CONFIG_KVM_BOOK3S_32
+	kvmppc_book3s_exit_pr();
+#endif
+	kvm_exit();
 }
+
+module_init(kvmppc_book3s_init);
+module_exit(kvmppc_book3s_exit);
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index 9e5b3a3..4bf956c 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -28,5 +28,7 @@ extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu,
 					int sprn, ulong spr_val);
 extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu,
 					int sprn, ulong *spr_val);
+extern int kvmppc_book3s_init_pr(void);
+extern void kvmppc_book3s_exit_pr(void);
 
 #endif
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 47fca8c..31922d5 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2159,7 +2159,7 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
 	return r;
 }
 
-static struct kvmppc_ops kvmppc_hv_ops = {
+static struct kvmppc_ops kvm_ops_hv = {
 	.is_hv_enabled = true,
 	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
 	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
@@ -2186,7 +2186,6 @@ static struct kvmppc_ops kvmppc_hv_ops = {
 	.create_memslot = kvmppc_core_create_memslot_hv,
 	.init_vm =  kvmppc_core_init_vm_hv,
 	.destroy_vm = kvmppc_core_destroy_vm_hv,
-	.check_processor_compat = kvmppc_core_check_processor_compat_hv,
 	.get_smmu_info = kvm_vm_ioctl_get_smmu_info_hv,
 	.emulate_op = kvmppc_core_emulate_op_hv,
 	.emulate_mtspr = kvmppc_core_emulate_mtspr_hv,
@@ -2198,20 +2197,23 @@ static struct kvmppc_ops kvmppc_hv_ops = {
 static int kvmppc_book3s_init_hv(void)
 {
 	int r;
-
-	r = kvm_init(&kvmppc_hv_ops, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
-
-	if (r)
+	/*
+	 * FIXME!! Do we need to check on all cpus ?
+	 */
+	r = kvmppc_core_check_processor_compat_hv();
+	if (r < 0)
 		return r;
 
-	r = kvmppc_mmu_hv_init();
+	kvm_ops_hv.owner = THIS_MODULE;
+	kvmppc_hv_ops = &kvm_ops_hv;
 
+	r = kvmppc_mmu_hv_init();
 	return r;
 }
 
 static void kvmppc_book3s_exit_hv(void)
 {
-	kvm_exit();
+	kvmppc_hv_ops = NULL;
 }
 
 module_init(kvmppc_book3s_init_hv);
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 7f583a4..fbd985f 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1525,7 +1525,7 @@ static long kvm_arch_vm_ioctl_pr(struct file *filp,
 	return -ENOTTY;
 }
 
-static struct kvmppc_ops kvmppc_pr_ops = {
+static struct kvmppc_ops kvm_ops_pr = {
 	.is_hv_enabled = false,
 	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr,
 	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr,
@@ -1552,7 +1552,6 @@ static struct kvmppc_ops kvmppc_pr_ops = {
 	.create_memslot = kvmppc_core_create_memslot_pr,
 	.init_vm = kvmppc_core_init_vm_pr,
 	.destroy_vm = kvmppc_core_destroy_vm_pr,
-	.check_processor_compat = kvmppc_core_check_processor_compat_pr,
 	.get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr,
 	.emulate_op = kvmppc_core_emulate_op_pr,
 	.emulate_mtspr = kvmppc_core_emulate_mtspr_pr,
@@ -1561,27 +1560,35 @@ static struct kvmppc_ops kvmppc_pr_ops = {
 	.arch_vm_ioctl  = kvm_arch_vm_ioctl_pr,
 };
 
-static int kvmppc_book3s_init_pr(void)
+
+int kvmppc_book3s_init_pr(void)
 {
 	int r;
 
-	r = kvm_init(&kvmppc_pr_ops, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
-
-	if (r)
+	r = kvmppc_core_check_processor_compat_pr();
+	if (r < 0)
 		return r;
 
-	r = kvmppc_mmu_hpte_sysinit();
+	kvm_ops_pr.owner = THIS_MODULE;
+	kvmppc_pr_ops = &kvm_ops_pr;
 
+	r = kvmppc_mmu_hpte_sysinit();
 	return r;
 }
 
-static void kvmppc_book3s_exit_pr(void)
+void kvmppc_book3s_exit_pr(void)
 {
+	kvmppc_pr_ops = NULL;
 	kvmppc_mmu_hpte_sysexit();
-	kvm_exit();
 }
 
+/*
+ * We only support separate modules for book3s 64
+ */
+#ifdef CONFIG_PPC_BOOK3S_64
+
 module_init(kvmppc_book3s_init_pr);
 module_exit(kvmppc_book3s_exit_pr);
 
 MODULE_LICENSE("GPL");
+#endif
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index cef3de9..76ef525 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -818,7 +818,7 @@ int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
 	}
 
 	/* Check for real mode returning too hard */
-	if (xics->real_mode && kvmppc_ops->is_hv_enabled)
+	if (xics->real_mode && vcpu->kvm->arch.kvm_ops->is_hv_enabled)
 		return kvmppc_xics_rm_complete(vcpu, req);
 
 	switch (req) {
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index cb2d986..15d0149 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1472,7 +1472,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 
 	get_sregs_base(vcpu, sregs);
 	get_sregs_arch206(vcpu, sregs);
-	return kvmppc_ops->get_sregs(vcpu, sregs);
+	return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
 }
 
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
@@ -1491,7 +1491,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	if (ret < 0)
 		return ret;
 
-	return kvmppc_ops->set_sregs(vcpu, sregs);
+	return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
 }
 
 int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
@@ -1548,7 +1548,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 		val = get_reg_val(reg->id, vcpu->arch.vrsave);
 		break;
 	default:
-		r = kvmppc_ops->get_one_reg(vcpu, reg->id, &val);
+		r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);
 		break;
 	}
 
@@ -1631,7 +1631,7 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 		vcpu->arch.vrsave = set_reg_val(reg->id, val);
 		break;
 	default:
-		r = kvmppc_ops->set_one_reg(vcpu, reg->id, &val);
+		r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);
 		break;
 	}
 
@@ -1911,37 +1911,37 @@ void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
 
 void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
 {
-	kvmppc_ops->mmu_destroy(vcpu);
+	vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu);
 }
 
 int kvmppc_core_init_vm(struct kvm *kvm)
 {
-	return kvmppc_ops->init_vm(kvm);
+	return kvm->arch.kvm_ops->init_vm(kvm);
 }
 
 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 {
-	return kvmppc_ops->vcpu_create(kvm, id);
+	return kvm->arch.kvm_ops->vcpu_create(kvm, id);
 }
 
 void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 {
-	kvmppc_ops->vcpu_free(vcpu);
+	vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu);
 }
 
 void kvmppc_core_destroy_vm(struct kvm *kvm)
 {
-	kvmppc_ops->destroy_vm(kvm);
+	kvm->arch.kvm_ops->destroy_vm(kvm);
 }
 
 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
-	kvmppc_ops->vcpu_load(vcpu, cpu);
+	vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu);
 }
 
 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 {
-	kvmppc_ops->vcpu_put(vcpu);
+	vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu);
 }
 
 int __init kvmppc_booke_init(void)
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index d225d5e..497b142 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -555,13 +555,19 @@ static int __init kvmppc_e500_init(void)
 	flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers +
 			   ivor[max_ivor] + handler_len);
 
-	r = kvm_init(&kvm_ops_e500, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+	r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+	if (r)
+		goto err_out;
+	kvm_ops_e500.owner = THIS_MODULE;
+	kvmppc_pr_ops = &kvm_ops_e500;
+
 err_out:
 	return r;
 }
 
 static void __exit kvmppc_e500_exit(void)
 {
+	kvmppc_pr_ops = NULL;
 	kvmppc_booke_exit();
 }
 
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index db6a383..4132cd2 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -373,15 +373,19 @@ static int __init kvmppc_e500mc_init(void)
 	kvmppc_init_lpid(64);
 	kvmppc_claim_lpid(0); /* host */
 
-	r = kvm_init(&kvm_ops_e500mc, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+	r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
 	if (r)
 		goto err_out;
+	kvm_ops_e500mc.owner = THIS_MODULE;
+	kvmppc_pr_ops = &kvm_ops_e500mc;
+
 err_out:
 	return r;
 }
 
 static void __exit kvmppc_e500mc_exit(void)
 {
+	kvmppc_pr_ops = NULL;
 	kvmppc_booke_exit();
 }
 
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 230b4e9..ced1810 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -173,8 +173,8 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 	case SPRN_PIR: break;
 
 	default:
-		emulated = kvmppc_ops->emulate_mtspr(vcpu, sprn,
-						     spr_val);
+		emulated = vcpu->kvm->arch.kvm_ops->emulate_mtspr(vcpu, sprn,
+								  spr_val);
 		if (emulated == EMULATE_FAIL)
 			printk(KERN_INFO "mtspr: unknown spr "
 				"0x%x\n", sprn);
@@ -234,8 +234,8 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 		spr_val = kvmppc_get_dec(vcpu, get_tb());
 		break;
 	default:
-		emulated = kvmppc_ops->emulate_mfspr(vcpu, sprn,
-						     &spr_val);
+		emulated = vcpu->kvm->arch.kvm_ops->emulate_mfspr(vcpu, sprn,
+								  &spr_val);
 		if (unlikely(emulated == EMULATE_FAIL)) {
 			printk(KERN_INFO "mfspr: unknown spr "
 				"0x%x\n", sprn);
@@ -507,7 +507,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	}
 
 	if (emulated == EMULATE_FAIL) {
-		emulated = kvmppc_ops->emulate_op(run, vcpu, inst, &advance);
+		emulated = vcpu->kvm->arch.kvm_ops->emulate_op(run, vcpu, inst,
+							       &advance);
 		if (emulated == EMULATE_AGAIN) {
 			advance = 0;
 		} else if (emulated == EMULATE_FAIL) {
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index db48a7f..00a995a 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -26,6 +26,7 @@
 #include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/file.h>
+#include <linux/module.h>
 #include <asm/cputable.h>
 #include <asm/uaccess.h>
 #include <asm/kvm_ppc.h>
@@ -39,7 +40,11 @@
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
-struct kvmppc_ops *kvmppc_ops;
+struct kvmppc_ops *kvmppc_hv_ops;
+EXPORT_SYMBOL_GPL(kvmppc_hv_ops);
+struct kvmppc_ops *kvmppc_pr_ops;
+EXPORT_SYMBOL_GPL(kvmppc_pr_ops);
+
 
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 {
@@ -195,7 +200,7 @@ int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
 		goto out;
 
 	/* HV KVM can only do PAPR mode for now */
-	if (!vcpu->arch.papr_enabled && kvmppc_ops->is_hv_enabled)
+	if (!vcpu->arch.papr_enabled && vcpu->kvm->arch.kvm_ops->is_hv_enabled)
 		goto out;
 
 #ifdef CONFIG_KVM_BOOKE_HV
@@ -271,10 +276,35 @@ void kvm_arch_check_processor_compat(void *rtn)
 
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
-	if (type)
-		return -EINVAL;
-
+	struct kvmppc_ops *kvm_ops = NULL;
+	/*
+	 * if we have both HV and PR enabled, default is HV
+	 */
+	if (type == 0) {
+		if (kvmppc_hv_ops)
+			kvm_ops = kvmppc_hv_ops;
+		else
+			kvm_ops = kvmppc_pr_ops;
+		if (!kvm_ops)
+			goto err_out;
+	} else	if (type == KVM_VM_PPC_HV) {
+		if (!kvmppc_hv_ops)
+			goto err_out;
+		kvm_ops = kvmppc_hv_ops;
+	} else if (type == KVM_VM_PPC_PR) {
+		if (!kvmppc_pr_ops)
+			goto err_out;
+		kvm_ops = kvmppc_pr_ops;
+	} else
+		goto err_out;
+
+	if (kvm_ops->owner && !try_module_get(kvm_ops->owner))
+		return -ENOENT;
+
+	kvm->arch.kvm_ops = kvm_ops;
 	return kvmppc_core_init_vm(kvm);
+err_out:
+	return -EINVAL;
 }
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
@@ -294,6 +324,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kvmppc_core_destroy_vm(kvm);
 
 	mutex_unlock(&kvm->lock);
+
+	/* drop the module reference */
+	module_put(kvm->arch.kvm_ops->owner);
 }
 
 void kvm_arch_sync_events(struct kvm *kvm)
@@ -303,6 +336,10 @@ void kvm_arch_sync_events(struct kvm *kvm)
 int kvm_dev_ioctl_check_extension(long ext)
 {
 	int r;
+	/* FIXME!!
+	 * Should some of this be vm ioctl ? is it possible now ?
+	 */
+	int hv_enabled = kvmppc_hv_ops ? 1: 0;
 
 	switch (ext) {
 #ifdef CONFIG_BOOKE
@@ -329,7 +366,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_SW_TLB:
 #endif
 		/* We support this only for PR */
-		r = !kvmppc_ops->is_hv_enabled;
+		r = !hv_enabled;
 		break;
 #ifdef CONFIG_KVM_MMIO
 	case KVM_CAP_COALESCED_MMIO:
@@ -354,13 +391,13 @@ int kvm_dev_ioctl_check_extension(long ext)
 #endif /* CONFIG_PPC_BOOK3S_64 */
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	case KVM_CAP_PPC_SMT:
-		if (kvmppc_ops->is_hv_enabled)
+		if (hv_enabled)
 			r = threads_per_core;
 		else
 			r = 0;
 		break;
 	case KVM_CAP_PPC_RMA:
-		r = kvmppc_ops->is_hv_enabled;
+		r = hv_enabled;
 		/* PPC970 requires an RMA */
 		if (r && cpu_has_feature(CPU_FTR_ARCH_201))
 			r = 2;
@@ -368,7 +405,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 #endif
 	case KVM_CAP_SYNC_MMU:
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-		if (kvmppc_ops->is_hv_enabled)
+		if (hv_enabled)
 			r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
 		else
 			r = 0;
@@ -380,7 +417,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 		break;
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	case KVM_CAP_PPC_HTAB_FD:
-		r = kvmppc_ops->is_hv_enabled;
+		r = hv_enabled;
 		break;
 #endif
 	case KVM_CAP_NR_VCPUS:
@@ -390,7 +427,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 		 * will have secondary threads "offline"), and for other KVM
 		 * implementations just count online CPUs.
 		 */
-		if (kvmppc_ops->is_hv_enabled)
+		if (hv_enabled)
 			r = num_present_cpus();
 		else
 			r = num_online_cpus();
@@ -1039,9 +1076,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	}
 	case KVM_PPC_GET_SMMU_INFO: {
 		struct kvm_ppc_smmu_info info;
+		struct kvm *kvm = filp->private_data;
 
 		memset(&info, 0, sizeof(info));
-		r = kvmppc_ops->get_smmu_info(kvm, &info);
+		r = kvm->arch.kvm_ops->get_smmu_info(kvm, &info);
 		if (r >= 0 && copy_to_user(argp, &info, sizeof(info)))
 			r = -EFAULT;
 		break;
@@ -1052,9 +1090,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
 		break;
 	}
-	default:
-		r = kvmppc_ops->arch_vm_ioctl(filp, ioctl, arg);
-
+	default: {
+		struct kvm *kvm = filp->private_data;
+		r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg);
+	}
 #else /* CONFIG_PPC_BOOK3S_64 */
 	default:
 		r = -ENOTTY;
@@ -1104,15 +1143,10 @@ EXPORT_SYMBOL_GPL(kvmppc_init_lpid);
 
 int kvm_arch_init(void *opaque)
 {
-	if (kvmppc_ops) {
-		printk(KERN_ERR "kvm: already loaded the other module\n");
-		return -EEXIST;
-	}
-	kvmppc_ops = (struct kvmppc_ops *)opaque;
 	return 0;
 }
 
 void kvm_arch_exit(void)
 {
-	kvmppc_ops = NULL;
+
 }
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 99c2533..aebfc11 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -518,6 +518,10 @@ struct kvm_ppc_smmu_info {
 /* machine type bits, to be used as argument to KVM_CREATE_VM */
 #define KVM_VM_S390_UCONTROL	1
 
+/* on ppc, 0 indicate default, 1 should force HV and 2 PR */
+#define KVM_VM_PPC_HV 1
+#define KVM_VM_PPC_PR 2
+
 #define KVM_S390_SIE_PAGE_OFFSET 1
 
 /*
-- 
1.8.1.2

^ permalink raw reply related

* [PATCH -V2 14/14] kvm: powerpc: book3s: drop is_hv_enabled
From: Aneesh Kumar K.V @ 2013-10-07 16:48 UTC (permalink / raw)
  To: agraf, benh, paulus; +Cc: linuxppc-dev, kvm, kvm-ppc, Aneesh Kumar K.V
In-Reply-To: <1381164482-31001-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

drop is_hv_enabled, because that should not be a callback property

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kvm_ppc.h | 6 +++++-
 arch/powerpc/kvm/book3s.c          | 6 +++---
 arch/powerpc/kvm/book3s_hv.c       | 1 -
 arch/powerpc/kvm/book3s_pr.c       | 1 -
 arch/powerpc/kvm/book3s_xics.c     | 2 +-
 arch/powerpc/kvm/powerpc.c         | 2 +-
 6 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 3069cf4..c8317fb 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -183,7 +183,6 @@ union kvmppc_one_reg {
 
 struct kvmppc_ops {
 	struct module *owner;
-	bool is_hv_enabled;
 	int (*get_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 	int (*set_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 	int (*get_one_reg)(struct kvm_vcpu *vcpu, u64 id,
@@ -232,6 +231,11 @@ struct kvmppc_ops {
 extern struct kvmppc_ops *kvmppc_hv_ops;
 extern struct kvmppc_ops *kvmppc_pr_ops;
 
+static inline bool is_kvmppc_hv_enabled(struct kvm *kvm)
+{
+	return kvm->arch.kvm_ops == kvmppc_hv_ops;
+}
+
 /*
  * Cuts out inst bits with ordering according to spec.
  * That means the leftmost bit is zero. All given bits are included.
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index ad8f6ed..8912608 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -72,7 +72,7 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
 
 static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->kvm->arch.kvm_ops->is_hv_enabled)
+	if (!is_kvmppc_hv_enabled(vcpu->kvm))
 		return to_book3s(vcpu)->hior;
 	return 0;
 }
@@ -80,7 +80,7 @@ static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
 static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
 			unsigned long pending_now, unsigned long old_pending)
 {
-	if (vcpu->kvm->arch.kvm_ops->is_hv_enabled)
+	if (is_kvmppc_hv_enabled(vcpu->kvm))
 		return;
 	if (pending_now)
 		vcpu->arch.shared->int_pending = 1;
@@ -94,7 +94,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
 	ulong crit_r1;
 	bool crit;
 
-	if (vcpu->kvm->arch.kvm_ops->is_hv_enabled)
+	if (is_kvmppc_hv_enabled(vcpu->kvm))
 		return false;
 
 	crit_raw = vcpu->arch.shared->critical;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 31922d5..b5229eb 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2160,7 +2160,6 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
 }
 
 static struct kvmppc_ops kvm_ops_hv = {
-	.is_hv_enabled = true,
 	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
 	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
 	.get_one_reg = kvmppc_get_one_reg_hv,
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index fbd985f..df36cf2 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1526,7 +1526,6 @@ static long kvm_arch_vm_ioctl_pr(struct file *filp,
 }
 
 static struct kvmppc_ops kvm_ops_pr = {
-	.is_hv_enabled = false,
 	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr,
 	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr,
 	.get_one_reg = kvmppc_get_one_reg_pr,
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 76ef525..20d56ec 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -818,7 +818,7 @@ int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
 	}
 
 	/* Check for real mode returning too hard */
-	if (xics->real_mode && vcpu->kvm->arch.kvm_ops->is_hv_enabled)
+	if (xics->real_mode && is_kvmppc_hv_enabled(vcpu->kvm))
 		return kvmppc_xics_rm_complete(vcpu, req);
 
 	switch (req) {
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 00a995a..058f9d6 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -200,7 +200,7 @@ int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
 		goto out;
 
 	/* HV KVM can only do PAPR mode for now */
-	if (!vcpu->arch.papr_enabled && vcpu->kvm->arch.kvm_ops->is_hv_enabled)
+	if (!vcpu->arch.papr_enabled && is_kvmppc_hv_enabled(vcpu->kvm))
 		goto out;
 
 #ifdef CONFIG_KVM_BOOKE_HV
-- 
1.8.1.2

^ permalink raw reply related

* [PATCH -V2 09/14] kvm: powerpc: book3s: pr: move PR related tracepoints to a separate header
From: Aneesh Kumar K.V @ 2013-10-07 16:47 UTC (permalink / raw)
  To: agraf, benh, paulus; +Cc: linuxppc-dev, kvm, kvm-ppc, Aneesh Kumar K.V
In-Reply-To: <1381164482-31001-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

This patch moves PR related tracepoints to a separate header. This
enables in converting PR to a kernel module which will be done in
later patches

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/kvm/book3s_64_mmu_host.c |   2 +-
 arch/powerpc/kvm/book3s_mmu_hpte.c    |   2 +-
 arch/powerpc/kvm/book3s_pr.c          |   4 +-
 arch/powerpc/kvm/trace.h              | 234 +--------------------------
 arch/powerpc/kvm/trace_pr.h           | 297 ++++++++++++++++++++++++++++++++++
 5 files changed, 309 insertions(+), 230 deletions(-)
 create mode 100644 arch/powerpc/kvm/trace_pr.h

diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 819672c..0d513af 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -27,7 +27,7 @@
 #include <asm/machdep.h>
 #include <asm/mmu_context.h>
 #include <asm/hw_irq.h>
-#include "trace.h"
+#include "trace_pr.h"
 
 #define PTE_SIZE 12
 
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index 6b79bfc..5a1ab12 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -28,7 +28,7 @@
 #include <asm/mmu_context.h>
 #include <asm/hw_irq.h>
 
-#include "trace.h"
+#include "trace_pr.h"
 
 #define PTE_SIZE	12
 
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index b6a525d..ca6c73d 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -42,7 +42,9 @@
 #include <linux/highmem.h>
 
 #include "book3s.h"
-#include "trace.h"
+
+#define CREATE_TRACE_POINTS
+#include "trace_pr.h"
 
 /* #define EXIT_DEBUG */
 /* #define DEBUG_EXT */
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index 9e8368e..80f252a 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -85,6 +85,12 @@ TRACE_EVENT(kvm_ppc_instr,
 	{41, "HV_PRIV"}
 #endif
 
+#ifndef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+/*
+ * For pr we define this in trace_pr.h since it pr can be built as
+ * a module
+ */
+
 TRACE_EVENT(kvm_exit,
 	TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
 	TP_ARGS(exit_nr, vcpu),
@@ -94,9 +100,6 @@ TRACE_EVENT(kvm_exit,
 		__field(	unsigned long,	pc		)
 		__field(	unsigned long,	msr		)
 		__field(	unsigned long,	dar		)
-#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
-		__field(	unsigned long,	srr1		)
-#endif
 		__field(	unsigned long,	last_inst	)
 	),
 
@@ -105,9 +108,6 @@ TRACE_EVENT(kvm_exit,
 		__entry->pc		= kvmppc_get_pc(vcpu);
 		__entry->dar		= kvmppc_get_fault_dar(vcpu);
 		__entry->msr		= vcpu->arch.shared->msr;
-#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
-		__entry->srr1		= vcpu->arch.shadow_srr1;
-#endif
 		__entry->last_inst	= vcpu->arch.last_inst;
 	),
 
@@ -115,18 +115,12 @@ TRACE_EVENT(kvm_exit,
 		" | pc=0x%lx"
 		" | msr=0x%lx"
 		" | dar=0x%lx"
-#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
-		" | srr1=0x%lx"
-#endif
 		" | last_inst=0x%lx"
 		,
 		__print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit),
 		__entry->pc,
 		__entry->msr,
 		__entry->dar,
-#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
-		__entry->srr1,
-#endif
 		__entry->last_inst
 		)
 );
@@ -145,6 +139,7 @@ TRACE_EVENT(kvm_unmap_hva,
 
 	TP_printk("unmap hva 0x%lx\n", __entry->hva)
 );
+#endif
 
 TRACE_EVENT(kvm_stlb_inval,
 	TP_PROTO(unsigned int stlb_index),
@@ -231,221 +226,6 @@ TRACE_EVENT(kvm_check_requests,
 		__entry->cpu_nr, __entry->requests)
 );
 
-
-/*************************************************************************
- *                         Book3S trace points                           *
- *************************************************************************/
-
-#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
-
-TRACE_EVENT(kvm_book3s_reenter,
-	TP_PROTO(int r, struct kvm_vcpu *vcpu),
-	TP_ARGS(r, vcpu),
-
-	TP_STRUCT__entry(
-		__field(	unsigned int,	r		)
-		__field(	unsigned long,	pc		)
-	),
-
-	TP_fast_assign(
-		__entry->r		= r;
-		__entry->pc		= kvmppc_get_pc(vcpu);
-	),
-
-	TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc)
-);
-
-#ifdef CONFIG_PPC_BOOK3S_64
-
-TRACE_EVENT(kvm_book3s_64_mmu_map,
-	TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr,
-		 struct kvmppc_pte *orig_pte),
-	TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte),
-
-	TP_STRUCT__entry(
-		__field(	unsigned char,		flag_w		)
-		__field(	unsigned char,		flag_x		)
-		__field(	unsigned long,		eaddr		)
-		__field(	unsigned long,		hpteg		)
-		__field(	unsigned long,		va		)
-		__field(	unsigned long long,	vpage		)
-		__field(	unsigned long,		hpaddr		)
-	),
-
-	TP_fast_assign(
-		__entry->flag_w	= ((rflags & HPTE_R_PP) == 3) ? '-' : 'w';
-		__entry->flag_x	= (rflags & HPTE_R_N) ? '-' : 'x';
-		__entry->eaddr	= orig_pte->eaddr;
-		__entry->hpteg	= hpteg;
-		__entry->va	= va;
-		__entry->vpage	= orig_pte->vpage;
-		__entry->hpaddr	= hpaddr;
-	),
-
-	TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx",
-		  __entry->flag_w, __entry->flag_x, __entry->eaddr,
-		  __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr)
-);
-
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-TRACE_EVENT(kvm_book3s_mmu_map,
-	TP_PROTO(struct hpte_cache *pte),
-	TP_ARGS(pte),
-
-	TP_STRUCT__entry(
-		__field(	u64,		host_vpn	)
-		__field(	u64,		pfn		)
-		__field(	ulong,		eaddr		)
-		__field(	u64,		vpage		)
-		__field(	ulong,		raddr		)
-		__field(	int,		flags		)
-	),
-
-	TP_fast_assign(
-		__entry->host_vpn	= pte->host_vpn;
-		__entry->pfn		= pte->pfn;
-		__entry->eaddr		= pte->pte.eaddr;
-		__entry->vpage		= pte->pte.vpage;
-		__entry->raddr		= pte->pte.raddr;
-		__entry->flags		= (pte->pte.may_read ? 0x4 : 0) |
-					  (pte->pte.may_write ? 0x2 : 0) |
-					  (pte->pte.may_execute ? 0x1 : 0);
-	),
-
-	TP_printk("Map: hvpn=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
-		  __entry->host_vpn, __entry->pfn, __entry->eaddr,
-		  __entry->vpage, __entry->raddr, __entry->flags)
-);
-
-TRACE_EVENT(kvm_book3s_mmu_invalidate,
-	TP_PROTO(struct hpte_cache *pte),
-	TP_ARGS(pte),
-
-	TP_STRUCT__entry(
-		__field(	u64,		host_vpn	)
-		__field(	u64,		pfn		)
-		__field(	ulong,		eaddr		)
-		__field(	u64,		vpage		)
-		__field(	ulong,		raddr		)
-		__field(	int,		flags		)
-	),
-
-	TP_fast_assign(
-		__entry->host_vpn	= pte->host_vpn;
-		__entry->pfn		= pte->pfn;
-		__entry->eaddr		= pte->pte.eaddr;
-		__entry->vpage		= pte->pte.vpage;
-		__entry->raddr		= pte->pte.raddr;
-		__entry->flags		= (pte->pte.may_read ? 0x4 : 0) |
-					  (pte->pte.may_write ? 0x2 : 0) |
-					  (pte->pte.may_execute ? 0x1 : 0);
-	),
-
-	TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
-		  __entry->host_vpn, __entry->pfn, __entry->eaddr,
-		  __entry->vpage, __entry->raddr, __entry->flags)
-);
-
-TRACE_EVENT(kvm_book3s_mmu_flush,
-	TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1,
-		 unsigned long long p2),
-	TP_ARGS(type, vcpu, p1, p2),
-
-	TP_STRUCT__entry(
-		__field(	int,			count		)
-		__field(	unsigned long long,	p1		)
-		__field(	unsigned long long,	p2		)
-		__field(	const char *,		type		)
-	),
-
-	TP_fast_assign(
-		__entry->count		= to_book3s(vcpu)->hpte_cache_count;
-		__entry->p1		= p1;
-		__entry->p2		= p2;
-		__entry->type		= type;
-	),
-
-	TP_printk("Flush %d %sPTEs: %llx - %llx",
-		  __entry->count, __entry->type, __entry->p1, __entry->p2)
-);
-
-TRACE_EVENT(kvm_book3s_slb_found,
-	TP_PROTO(unsigned long long gvsid, unsigned long long hvsid),
-	TP_ARGS(gvsid, hvsid),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long long,	gvsid		)
-		__field(	unsigned long long,	hvsid		)
-	),
-
-	TP_fast_assign(
-		__entry->gvsid		= gvsid;
-		__entry->hvsid		= hvsid;
-	),
-
-	TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid)
-);
-
-TRACE_EVENT(kvm_book3s_slb_fail,
-	TP_PROTO(u16 sid_map_mask, unsigned long long gvsid),
-	TP_ARGS(sid_map_mask, gvsid),
-
-	TP_STRUCT__entry(
-		__field(	unsigned short,		sid_map_mask	)
-		__field(	unsigned long long,	gvsid		)
-	),
-
-	TP_fast_assign(
-		__entry->sid_map_mask	= sid_map_mask;
-		__entry->gvsid		= gvsid;
-	),
-
-	TP_printk("%x/%x: %llx", __entry->sid_map_mask,
-		  SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid)
-);
-
-TRACE_EVENT(kvm_book3s_slb_map,
-	TP_PROTO(u16 sid_map_mask, unsigned long long gvsid,
-		 unsigned long long hvsid),
-	TP_ARGS(sid_map_mask, gvsid, hvsid),
-
-	TP_STRUCT__entry(
-		__field(	unsigned short,		sid_map_mask	)
-		__field(	unsigned long long,	guest_vsid	)
-		__field(	unsigned long long,	host_vsid	)
-	),
-
-	TP_fast_assign(
-		__entry->sid_map_mask	= sid_map_mask;
-		__entry->guest_vsid	= gvsid;
-		__entry->host_vsid	= hvsid;
-	),
-
-	TP_printk("%x: %llx -> %llx", __entry->sid_map_mask,
-		  __entry->guest_vsid, __entry->host_vsid)
-);
-
-TRACE_EVENT(kvm_book3s_slbmte,
-	TP_PROTO(u64 slb_vsid, u64 slb_esid),
-	TP_ARGS(slb_vsid, slb_esid),
-
-	TP_STRUCT__entry(
-		__field(	u64,	slb_vsid	)
-		__field(	u64,	slb_esid	)
-	),
-
-	TP_fast_assign(
-		__entry->slb_vsid	= slb_vsid;
-		__entry->slb_esid	= slb_esid;
-	),
-
-	TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid)
-);
-
-#endif /* CONFIG_PPC_BOOK3S */
-
-
 /*************************************************************************
  *                         Book3E trace points                           *
  *************************************************************************/
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
new file mode 100644
index 0000000..8b22e47
--- /dev/null
+++ b/arch/powerpc/kvm/trace_pr.h
@@ -0,0 +1,297 @@
+
+#if !defined(_TRACE_KVM_PR_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_PR_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm_pr
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_pr
+
+#define kvm_trace_symbol_exit \
+	{0x100, "SYSTEM_RESET"}, \
+	{0x200, "MACHINE_CHECK"}, \
+	{0x300, "DATA_STORAGE"}, \
+	{0x380, "DATA_SEGMENT"}, \
+	{0x400, "INST_STORAGE"}, \
+	{0x480, "INST_SEGMENT"}, \
+	{0x500, "EXTERNAL"}, \
+	{0x501, "EXTERNAL_LEVEL"}, \
+	{0x502, "EXTERNAL_HV"}, \
+	{0x600, "ALIGNMENT"}, \
+	{0x700, "PROGRAM"}, \
+	{0x800, "FP_UNAVAIL"}, \
+	{0x900, "DECREMENTER"}, \
+	{0x980, "HV_DECREMENTER"}, \
+	{0xc00, "SYSCALL"}, \
+	{0xd00, "TRACE"}, \
+	{0xe00, "H_DATA_STORAGE"}, \
+	{0xe20, "H_INST_STORAGE"}, \
+	{0xe40, "H_EMUL_ASSIST"}, \
+	{0xf00, "PERFMON"}, \
+	{0xf20, "ALTIVEC"}, \
+	{0xf40, "VSX"}
+
+TRACE_EVENT(kvm_book3s_reenter,
+	TP_PROTO(int r, struct kvm_vcpu *vcpu),
+	TP_ARGS(r, vcpu),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	r		)
+		__field(	unsigned long,	pc		)
+	),
+
+	TP_fast_assign(
+		__entry->r		= r;
+		__entry->pc		= kvmppc_get_pc(vcpu);
+	),
+
+	TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc)
+);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+
+TRACE_EVENT(kvm_book3s_64_mmu_map,
+	TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr,
+		 struct kvmppc_pte *orig_pte),
+	TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte),
+
+	TP_STRUCT__entry(
+		__field(	unsigned char,		flag_w		)
+		__field(	unsigned char,		flag_x		)
+		__field(	unsigned long,		eaddr		)
+		__field(	unsigned long,		hpteg		)
+		__field(	unsigned long,		va		)
+		__field(	unsigned long long,	vpage		)
+		__field(	unsigned long,		hpaddr		)
+	),
+
+	TP_fast_assign(
+		__entry->flag_w	= ((rflags & HPTE_R_PP) == 3) ? '-' : 'w';
+		__entry->flag_x	= (rflags & HPTE_R_N) ? '-' : 'x';
+		__entry->eaddr	= orig_pte->eaddr;
+		__entry->hpteg	= hpteg;
+		__entry->va	= va;
+		__entry->vpage	= orig_pte->vpage;
+		__entry->hpaddr	= hpaddr;
+	),
+
+	TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx",
+		  __entry->flag_w, __entry->flag_x, __entry->eaddr,
+		  __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr)
+);
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+TRACE_EVENT(kvm_book3s_mmu_map,
+	TP_PROTO(struct hpte_cache *pte),
+	TP_ARGS(pte),
+
+	TP_STRUCT__entry(
+		__field(	u64,		host_vpn	)
+		__field(	u64,		pfn		)
+		__field(	ulong,		eaddr		)
+		__field(	u64,		vpage		)
+		__field(	ulong,		raddr		)
+		__field(	int,		flags		)
+	),
+
+	TP_fast_assign(
+		__entry->host_vpn	= pte->host_vpn;
+		__entry->pfn		= pte->pfn;
+		__entry->eaddr		= pte->pte.eaddr;
+		__entry->vpage		= pte->pte.vpage;
+		__entry->raddr		= pte->pte.raddr;
+		__entry->flags		= (pte->pte.may_read ? 0x4 : 0) |
+					  (pte->pte.may_write ? 0x2 : 0) |
+					  (pte->pte.may_execute ? 0x1 : 0);
+	),
+
+	TP_printk("Map: hvpn=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
+		  __entry->host_vpn, __entry->pfn, __entry->eaddr,
+		  __entry->vpage, __entry->raddr, __entry->flags)
+);
+
+TRACE_EVENT(kvm_book3s_mmu_invalidate,
+	TP_PROTO(struct hpte_cache *pte),
+	TP_ARGS(pte),
+
+	TP_STRUCT__entry(
+		__field(	u64,		host_vpn	)
+		__field(	u64,		pfn		)
+		__field(	ulong,		eaddr		)
+		__field(	u64,		vpage		)
+		__field(	ulong,		raddr		)
+		__field(	int,		flags		)
+	),
+
+	TP_fast_assign(
+		__entry->host_vpn	= pte->host_vpn;
+		__entry->pfn		= pte->pfn;
+		__entry->eaddr		= pte->pte.eaddr;
+		__entry->vpage		= pte->pte.vpage;
+		__entry->raddr		= pte->pte.raddr;
+		__entry->flags		= (pte->pte.may_read ? 0x4 : 0) |
+					  (pte->pte.may_write ? 0x2 : 0) |
+					  (pte->pte.may_execute ? 0x1 : 0);
+	),
+
+	TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
+		  __entry->host_vpn, __entry->pfn, __entry->eaddr,
+		  __entry->vpage, __entry->raddr, __entry->flags)
+);
+
+TRACE_EVENT(kvm_book3s_mmu_flush,
+	TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1,
+		 unsigned long long p2),
+	TP_ARGS(type, vcpu, p1, p2),
+
+	TP_STRUCT__entry(
+		__field(	int,			count		)
+		__field(	unsigned long long,	p1		)
+		__field(	unsigned long long,	p2		)
+		__field(	const char *,		type		)
+	),
+
+	TP_fast_assign(
+		__entry->count		= to_book3s(vcpu)->hpte_cache_count;
+		__entry->p1		= p1;
+		__entry->p2		= p2;
+		__entry->type		= type;
+	),
+
+	TP_printk("Flush %d %sPTEs: %llx - %llx",
+		  __entry->count, __entry->type, __entry->p1, __entry->p2)
+);
+
+TRACE_EVENT(kvm_book3s_slb_found,
+	TP_PROTO(unsigned long long gvsid, unsigned long long hvsid),
+	TP_ARGS(gvsid, hvsid),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long long,	gvsid		)
+		__field(	unsigned long long,	hvsid		)
+	),
+
+	TP_fast_assign(
+		__entry->gvsid		= gvsid;
+		__entry->hvsid		= hvsid;
+	),
+
+	TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid)
+);
+
+TRACE_EVENT(kvm_book3s_slb_fail,
+	TP_PROTO(u16 sid_map_mask, unsigned long long gvsid),
+	TP_ARGS(sid_map_mask, gvsid),
+
+	TP_STRUCT__entry(
+		__field(	unsigned short,		sid_map_mask	)
+		__field(	unsigned long long,	gvsid		)
+	),
+
+	TP_fast_assign(
+		__entry->sid_map_mask	= sid_map_mask;
+		__entry->gvsid		= gvsid;
+	),
+
+	TP_printk("%x/%x: %llx", __entry->sid_map_mask,
+		  SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid)
+);
+
+TRACE_EVENT(kvm_book3s_slb_map,
+	TP_PROTO(u16 sid_map_mask, unsigned long long gvsid,
+		 unsigned long long hvsid),
+	TP_ARGS(sid_map_mask, gvsid, hvsid),
+
+	TP_STRUCT__entry(
+		__field(	unsigned short,		sid_map_mask	)
+		__field(	unsigned long long,	guest_vsid	)
+		__field(	unsigned long long,	host_vsid	)
+	),
+
+	TP_fast_assign(
+		__entry->sid_map_mask	= sid_map_mask;
+		__entry->guest_vsid	= gvsid;
+		__entry->host_vsid	= hvsid;
+	),
+
+	TP_printk("%x: %llx -> %llx", __entry->sid_map_mask,
+		  __entry->guest_vsid, __entry->host_vsid)
+);
+
+TRACE_EVENT(kvm_book3s_slbmte,
+	TP_PROTO(u64 slb_vsid, u64 slb_esid),
+	TP_ARGS(slb_vsid, slb_esid),
+
+	TP_STRUCT__entry(
+		__field(	u64,	slb_vsid	)
+		__field(	u64,	slb_esid	)
+	),
+
+	TP_fast_assign(
+		__entry->slb_vsid	= slb_vsid;
+		__entry->slb_esid	= slb_esid;
+	),
+
+	TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid)
+);
+
+TRACE_EVENT(kvm_exit,
+	TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
+	TP_ARGS(exit_nr, vcpu),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	exit_nr		)
+		__field(	unsigned long,	pc		)
+		__field(	unsigned long,	msr		)
+		__field(	unsigned long,	dar		)
+		__field(	unsigned long,	srr1		)
+		__field(	unsigned long,	last_inst	)
+	),
+
+	TP_fast_assign(
+		__entry->exit_nr	= exit_nr;
+		__entry->pc		= kvmppc_get_pc(vcpu);
+		__entry->dar		= kvmppc_get_fault_dar(vcpu);
+		__entry->msr		= vcpu->arch.shared->msr;
+		__entry->srr1		= vcpu->arch.shadow_srr1;
+		__entry->last_inst	= vcpu->arch.last_inst;
+	),
+
+	TP_printk("exit=%s"
+		" | pc=0x%lx"
+		" | msr=0x%lx"
+		" | dar=0x%lx"
+		" | srr1=0x%lx"
+		" | last_inst=0x%lx"
+		,
+		__print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit),
+		__entry->pc,
+		__entry->msr,
+		__entry->dar,
+		__entry->srr1,
+		__entry->last_inst
+		)
+);
+
+TRACE_EVENT(kvm_unmap_hva,
+	TP_PROTO(unsigned long hva),
+	TP_ARGS(hva),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	hva		)
+	),
+
+	TP_fast_assign(
+		__entry->hva		= hva;
+	),
+
+	TP_printk("unmap hva 0x%lx\n", __entry->hva)
+);
+
+#endif /* _TRACE_KVM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
1.8.1.2

^ permalink raw reply related

* Re: [PATCH RFC 54/77] ntb: Ensure number of MSIs on SNB is enough for the link interrupt
From: Jon Mason @ 2013-10-07 16:50 UTC (permalink / raw)
  To: Alexander Gordeev
  Cc: linux-mips, VMware, Inc., linux-nvme, linux-ide, stable,
	linux-s390, Andy King, linux-scsi, linux-rdma, x86, Ingo Molnar,
	linux-pci, iss_storagedev, linux-driver, Tejun Heo, Bjorn Helgaas,
	Dan Williams, Solarflare linux maintainers, netdev, linux-kernel,
	Ralf Baechle, e1000-devel, Martin Schwidefsky, linux390,
	linuxppc-dev
In-Reply-To: <20131005214303.GA21589@dhcp-26-207.brq.redhat.com>

On Sat, Oct 05, 2013 at 11:43:04PM +0200, Alexander Gordeev wrote:
> On Wed, Oct 02, 2013 at 05:48:05PM -0700, Jon Mason wrote:
> > On Wed, Oct 02, 2013 at 12:49:10PM +0200, Alexander Gordeev wrote:
> > > Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
> > > ---
> > >  drivers/ntb/ntb_hw.c |    2 +-
> > >  1 files changed, 1 insertions(+), 1 deletions(-)
> > > 
> > > diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c
> > > index de2062c..eccd5e5 100644
> > > --- a/drivers/ntb/ntb_hw.c
> > > +++ b/drivers/ntb/ntb_hw.c
> > > @@ -1066,7 +1066,7 @@ static int ntb_setup_msix(struct ntb_device *ndev)
> > >  		/* On SNB, the link interrupt is always tied to 4th vector.  If
> > >  		 * we can't get all 4, then we can't use MSI-X.
> > >  		 */
> > > -		if (ndev->hw_type != BWD_HW) {
> > > +		if ((rc < SNB_MSIX_CNT) && (ndev->hw_type != BWD_HW)) {
> > 
> > Nack, this check is unnecessary.
> 
> If SNB can do more than SNB_MSIX_CNT MSI-Xs then this check is needed
> to enable less than maximum MSI-Xs in case the maximum was not allocated.
> Otherwise SNB will fallback to single MSI instead of multiple MSI-Xs.

Per the comment in the code snippet above, "If we can't get all 4,
then we can't use MSI-X".  There is already a check to see if more
than 4 were acquired.  So it's not possible to hit this.  Even if it
was, don't use SNB_MSIX_CNT here (limits.msix_cnt is the preferred
variable).  Also, the "()" are unnecessary.

Thanks,
Jon

> -- 
> Regards,
> Alexander Gordeev
> agordeev@redhat.com

^ permalink raw reply

* [PATCH -V2 08/14] kvm: powerpc: book3s: Add is_hv_enabled to kvmppc_ops
From: Aneesh Kumar K.V @ 2013-10-07 16:47 UTC (permalink / raw)
  To: agraf, benh, paulus; +Cc: linuxppc-dev, kvm, kvm-ppc, Aneesh Kumar K.V
In-Reply-To: <1381164482-31001-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

This help us to identify whether we are running with hypervisor mode KVM
enabled. The change is needed so that we can have both HV and PR kvm
enabled in the same kernel.

If both HV and PR KVM are included, interrupts come in to the HV version
of the kvmppc_interrupt code, which then jumps to the PR handler,
renamed to kvmppc_interrupt_pr, if the guest is a PR guest.

Allowing both PR and HV in the same kernel required some changes to
kvm_dev_ioctl_check_extension(), since the values returned now can't
be selected with #ifdefs as much as previously. We look at is_hv_enabled
to return the right value when checking for capabilities.For capabilities that
are only provided by HV KVM, we return the HV value only if
is_hv_enabled is true. For capabilities provided by PR KVM but not HV,
we return the PR value only if is_hv_enabled is false.

NOTE: in later patch we replace is_hv_enabled with a static inline
function comparing kvm_ppc_ops

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kvm_book3s.h | 53 ----------------------------------
 arch/powerpc/include/asm/kvm_ppc.h    |  5 ++--
 arch/powerpc/kvm/book3s.c             | 44 ++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv.c          |  1 +
 arch/powerpc/kvm/book3s_pr.c          |  1 +
 arch/powerpc/kvm/book3s_xics.c        |  2 +-
 arch/powerpc/kvm/powerpc.c            | 54 +++++++++++++++++++----------------
 7 files changed, 79 insertions(+), 81 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 315a5d6..4a594b7 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -301,59 +301,6 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
 	return vcpu->arch.fault_dar;
 }
 
-#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
-
-static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
-{
-	return to_book3s(vcpu)->hior;
-}
-
-static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
-			unsigned long pending_now, unsigned long old_pending)
-{
-	if (pending_now)
-		vcpu->arch.shared->int_pending = 1;
-	else if (old_pending)
-		vcpu->arch.shared->int_pending = 0;
-}
-
-static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
-{
-	ulong crit_raw = vcpu->arch.shared->critical;
-	ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
-	bool crit;
-
-	/* Truncate crit indicators in 32 bit mode */
-	if (!(vcpu->arch.shared->msr & MSR_SF)) {
-		crit_raw &= 0xffffffff;
-		crit_r1 &= 0xffffffff;
-	}
-
-	/* Critical section when crit == r1 */
-	crit = (crit_raw == crit_r1);
-	/* ... and we're in supervisor mode */
-	crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
-
-	return crit;
-}
-#else /* CONFIG_KVM_BOOK3S_PR_POSSIBLE */
-
-static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
-static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
-			unsigned long pending_now, unsigned long old_pending)
-{
-}
-
-static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
-{
-	return false;
-}
-#endif
-
 /* Magic register values loaded into r3 and r4 before the 'sc' assembly
  * instruction for the OSI hypercalls */
 #define OSI_SC_MAGIC_R3			0x113724FA
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 326033c..c13f15d 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -180,6 +180,7 @@ union kvmppc_one_reg {
 };
 
 struct kvmppc_ops {
+	bool is_hv_enabled;
 	int (*get_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 	int (*set_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 	int (*get_one_reg)(struct kvm_vcpu *vcpu, u64 id,
@@ -309,10 +310,10 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 
 static inline u32 kvmppc_get_xics_latch(void)
 {
-	u32 xirr = get_paca()->kvm_hstate.saved_xirr;
+	u32 xirr;
 
+	xirr = get_paca()->kvm_hstate.saved_xirr;
 	get_paca()->kvm_hstate.saved_xirr = 0;
-
 	return xirr;
 }
 
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 784a1d5..493aff7 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -69,6 +69,50 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
 {
 }
 
+static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
+{
+	if (!kvmppc_ops->is_hv_enabled)
+		return to_book3s(vcpu)->hior;
+	return 0;
+}
+
+static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
+			unsigned long pending_now, unsigned long old_pending)
+{
+	if (kvmppc_ops->is_hv_enabled)
+		return;
+	if (pending_now)
+		vcpu->arch.shared->int_pending = 1;
+	else if (old_pending)
+		vcpu->arch.shared->int_pending = 0;
+}
+
+static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
+{
+	ulong crit_raw;
+	ulong crit_r1;
+	bool crit;
+
+	if (kvmppc_ops->is_hv_enabled)
+		return false;
+
+	crit_raw = vcpu->arch.shared->critical;
+	crit_r1 = kvmppc_get_gpr(vcpu, 1);
+
+	/* Truncate crit indicators in 32 bit mode */
+	if (!(vcpu->arch.shared->msr & MSR_SF)) {
+		crit_raw &= 0xffffffff;
+		crit_r1 &= 0xffffffff;
+	}
+
+	/* Critical section when crit == r1 */
+	crit = (crit_raw == crit_r1);
+	/* ... and we're in supervisor mode */
+	crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
+
+	return crit;
+}
+
 void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
 {
 	vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index eb72140..540438d 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2159,6 +2159,7 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
 }
 
 static struct kvmppc_ops kvmppc_hv_ops = {
+	.is_hv_enabled = true,
 	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
 	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
 	.get_one_reg = kvmppc_get_one_reg_hv,
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 05d28cf..b6a525d 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1523,6 +1523,7 @@ static long kvm_arch_vm_ioctl_pr(struct file *filp,
 }
 
 static struct kvmppc_ops kvmppc_pr_ops = {
+	.is_hv_enabled = false,
 	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr,
 	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr,
 	.get_one_reg = kvmppc_get_one_reg_pr,
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 51ed1ea..cef3de9 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -818,7 +818,7 @@ int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
 	}
 
 	/* Check for real mode returning too hard */
-	if (xics->real_mode)
+	if (xics->real_mode && kvmppc_ops->is_hv_enabled)
 		return kvmppc_xics_rm_complete(vcpu, req);
 
 	switch (req) {
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 5e3ab80..4481417 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -52,7 +52,6 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
-#ifndef CONFIG_KVM_BOOK3S_64_HV
 /*
  * Common checks before entering the guest world.  Call with interrupts
  * disabled.
@@ -127,7 +126,6 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 
 	return r;
 }
-#endif /* CONFIG_KVM_BOOK3S_64_HV */
 
 int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 {
@@ -194,11 +192,9 @@ int kvmppc_sanity_check(struct kvm_vcpu *vcpu)
 	if ((vcpu->arch.cpu_type != KVM_CPU_3S_64) && vcpu->arch.papr_enabled)
 		goto out;
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
 	/* HV KVM can only do PAPR mode for now */
-	if (!vcpu->arch.papr_enabled)
+	if (!vcpu->arch.papr_enabled && kvmppc_ops->is_hv_enabled)
 		goto out;
-#endif
 
 #ifdef CONFIG_KVM_BOOKE_HV
 	if (!cpu_has_feature(CPU_FTR_EMB_HV))
@@ -322,22 +318,26 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_DEVICE_CTRL:
 		r = 1;
 		break;
-#ifndef CONFIG_KVM_BOOK3S_64_HV
 	case KVM_CAP_PPC_PAIRED_SINGLES:
 	case KVM_CAP_PPC_OSI:
 	case KVM_CAP_PPC_GET_PVINFO:
 #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
 	case KVM_CAP_SW_TLB:
 #endif
-#ifdef CONFIG_KVM_MPIC
-	case KVM_CAP_IRQ_MPIC:
-#endif
-		r = 1;
+		/* We support this only for PR */
+		r = !kvmppc_ops->is_hv_enabled;
 		break;
+#ifdef CONFIG_KVM_MMIO
 	case KVM_CAP_COALESCED_MMIO:
 		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
 		break;
 #endif
+#ifdef CONFIG_KVM_MPIC
+	case KVM_CAP_IRQ_MPIC:
+		r = 1;
+		break;
+#endif
+
 #ifdef CONFIG_PPC_BOOK3S_64
 	case KVM_CAP_SPAPR_TCE:
 	case KVM_CAP_PPC_ALLOC_HTAB:
@@ -348,32 +348,37 @@ int kvm_dev_ioctl_check_extension(long ext)
 		r = 1;
 		break;
 #endif /* CONFIG_PPC_BOOK3S_64 */
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	case KVM_CAP_PPC_SMT:
-		r = threads_per_core;
+		if (kvmppc_ops->is_hv_enabled)
+			r = threads_per_core;
+		else
+			r = 0;
 		break;
 	case KVM_CAP_PPC_RMA:
-		r = 1;
+		r = kvmppc_ops->is_hv_enabled;
 		/* PPC970 requires an RMA */
-		if (cpu_has_feature(CPU_FTR_ARCH_201))
+		if (r && cpu_has_feature(CPU_FTR_ARCH_201))
 			r = 2;
 		break;
 #endif
 	case KVM_CAP_SYNC_MMU:
-#ifdef CONFIG_KVM_BOOK3S_64_HV
-		r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+		if (kvmppc_ops->is_hv_enabled)
+			r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
+		else
+			r = 0;
 #elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 		r = 1;
 #else
 		r = 0;
-		break;
 #endif
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+		break;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	case KVM_CAP_PPC_HTAB_FD:
-		r = 1;
+		r = kvmppc_ops->is_hv_enabled;
 		break;
 #endif
-		break;
 	case KVM_CAP_NR_VCPUS:
 		/*
 		 * Recommending a number of CPUs is somewhat arbitrary; we
@@ -381,11 +386,10 @@ int kvm_dev_ioctl_check_extension(long ext)
 		 * will have secondary threads "offline"), and for other KVM
 		 * implementations just count online CPUs.
 		 */
-#ifdef CONFIG_KVM_BOOK3S_64_HV
-		r = num_present_cpus();
-#else
-		r = num_online_cpus();
-#endif
+		if (kvmppc_ops->is_hv_enabled)
+			r = num_present_cpus();
+		else
+			r = num_online_cpus();
 		break;
 	case KVM_CAP_MAX_VCPUS:
 		r = KVM_MAX_VCPUS;
-- 
1.8.1.2

^ permalink raw reply related

* Re: [PATCH RFC 00/77] Re-design MSI/MSI-X interrupts enablement pattern
From: Tejun Heo @ 2013-10-07 18:01 UTC (permalink / raw)
  To: Alexander Gordeev
  Cc: linux-mips, VMware, Inc., linux-nvme, linux-ide, linux-s390,
	Andy King, linux-scsi, linux-rdma, x86, Ingo Molnar, linux-pci,
	iss_storagedev, linux-driver, linux390, Bjorn Helgaas,
	Dan Williams, Jon Mason, Solarflare linux maintainers, netdev,
	linux-kernel, Ralf Baechle, e1000-devel, Martin Schwidefsky,
	Ben Hutchings, linuxppc-dev
In-Reply-To: <20131006071027.GA29143@dhcp-26-207.brq.redhat.com>

Hey, guys.

On Sun, Oct 06, 2013 at 09:10:30AM +0200, Alexander Gordeev wrote:
> On Sun, Oct 06, 2013 at 05:19:46PM +1100, Benjamin Herrenschmidt wrote:
> > On Sun, 2013-10-06 at 08:02 +0200, Alexander Gordeev wrote:
> > > In fact, in the current design to address the quota race decently the
> > > drivers would have to protect the *loop* to prevent the quota change
> > > between a pci_enable_msix() returned a positive number and the the next
> > > call to pci_enable_msix() with that number. Is it doable?
> > 
> > I am not advocating for the current design, simply saying that your
> > proposal doesn't address this issue while Ben's does.

Hmmm... yean, the race condition could be an issue as multiple msi
allocation might fail even if the driver can and explicitly handle
multiple allocation if the quota gets reduced inbetween.

> There is one major flaw in min-max approach - the generic MSI layer
> will have to take decisions on exact number of MSIs to request, not
> device drivers.

The min-max approach would actually be pretty nice for the users which
actually care about this.

> This will never work for all devices, because there might be specific
> requirements which are not covered by the min-max. That is what Ben
> described "...say, any even number within a certain range". Ben suggests
> to leave the existing loop scheme to cover such devices, which I think is
> not right.

if it could work.

> What about introducing pci_lock_msi() and pci_unlock_msi() and let device
> drivers care about their ranges and specifics in race-safe manner?
> I do not call to introduce it right now (since it appears pSeries has not
> been hitting the race for years) just as a possible alternative to Ben's
> proposal.

I don't think the same race condition would happen with the loop.  The
problem case is where multiple msi(x) allocation fails completely
because the global limit went down before inquiry and allocation.  In
the loop based interface, it'd retry with the lower number.

As long as the number of drivers which need this sort of adaptive
allocation isn't too high and the common cases can be made simple, I
don't think the "complex" part of interface is all that important.
Maybe we can have reserve / cancel type interface or just keep the
loop with more explicit function names (ie. try_enable or something
like that).

Thanks.

-- 
tejun

^ permalink raw reply

* Re: [PATCH RFC 05/77] PCI/MSI: Convert pci_msix_table_size() to a public interface
From: Tejun Heo @ 2013-10-07 18:10 UTC (permalink / raw)
  To: Alexander Gordeev
  Cc: linux-mips, VMware, Inc., linux-nvme, linux-ide, linux-s390,
	Andy King, linux-scsi, linux-rdma, x86, Ingo Molnar, linux-pci,
	iss_storagedev, linux-driver, Bjorn Helgaas, Dan Williams,
	Jon Mason, Solarflare linux maintainers, netdev, linux-kernel,
	Ralf Baechle, e1000-devel, Martin Schwidefsky, linux390,
	linuxppc-dev
In-Reply-To: <e8b51bd48c24d0fc4ee8adea5c138c9bf84191e9.1380703262.git.agordeev@redhat.com>

Hello,

On Wed, Oct 02, 2013 at 12:48:21PM +0200, Alexander Gordeev wrote:
> Make pci_msix_table_size() to return a error code if the device
> does not support MSI-X. This update is needed to facilitate a
> forthcoming re-design MSI/MSI-X interrupts enabling pattern.
> 
> Device drivers will use this interface to obtain maximum number
> of MSI-X interrupts the device supports and use that value in
> the following call to pci_enable_msix() interface.
> 
> Signed-off-by: Alexander Gordeev <agordeev@redhat.com>

Hmmm... I probably missed something but why is this necessary?  To
discern between -EINVAL and -ENOSPC?  If so, does that really matter?

Thanks.

-- 
tejun

^ permalink raw reply

* Re: [PATCH RFC 07/77] PCI/MSI: Re-design MSI/MSI-X interrupts enablement pattern
From: Tejun Heo @ 2013-10-07 18:17 UTC (permalink / raw)
  To: Alexander Gordeev
  Cc: linux-mips, linux-doc, VMware, Inc., linux-nvme, linux-ide,
	linux-s390, Andy King, linux-scsi, linux-rdma, x86, Ingo Molnar,
	linux-pci, iss_storagedev, linux-driver, Bjorn Helgaas,
	Dan Williams, Jon Mason, Solarflare linux maintainers, netdev,
	linux-kernel, Ralf Baechle, e1000-devel, Martin Schwidefsky,
	linux390, linuxppc-dev
In-Reply-To: <d8c36203ada6efbfa9f7ce92c2f713ee3b6d6b8d.1380703262.git.agordeev@redhat.com>

Hello,

On Wed, Oct 02, 2013 at 12:48:23PM +0200, Alexander Gordeev wrote:
> +static int foo_driver_enable_msi(struct foo_adapter *adapter, int nvec)
> +{
> +	rc = pci_get_msi_cap(adapter->pdev);
> +	if (rc < 0)
> +		return rc;
> +
> +	nvec = min(nvec, rc);
> +	if (nvec < FOO_DRIVER_MINIMUM_NVEC) {
> +		return -ENOSPC;
> +
> +	rc = pci_enable_msi_block(adapter->pdev, nvec);
> +	return rc;
> +}

If there are many which duplicate the above pattern, it'd probably be
worthwhile to provide a helper?  It's usually a good idea to reduce
the amount of boilerplate code in drivers.

>  static int foo_driver_enable_msix(struct foo_adapter *adapter, int nvec)
>  {
> +	rc = pci_msix_table_size(adapter->pdev);
> +	if (rc < 0)
> +		return rc;
> +
> +	nvec = min(nvec, rc);
> +	if (nvec < FOO_DRIVER_MINIMUM_NVEC) {
> +		return -ENOSPC;
> +
> +	for (i = 0; i < nvec; i++)
> +		adapter->msix_entries[i].entry = i;
> +
> +	rc = pci_enable_msix(adapter->pdev, adapter->msix_entries, nvec);
> +	return rc;
>  }

Ditto.

> @@ -975,7 +951,7 @@ int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
>  	if (nr_entries < 0)
>  		return nr_entries;
>  	if (nvec > nr_entries)
> -		return nr_entries;
> +		return -EINVAL;
>  
>  	/* Check for any invalid entries */
>  	for (i = 0; i < nvec; i++) {

If we do things this way, it breaks all drivers using this interface
until they're converted, right?  Also, it probably isn't the best idea
to flip the behavior like this as this can go completely unnoticed (no
compiler warning or anything, the same function just behaves
differently).  Maybe it'd be a better idea to introduce a simpler
interface that most can be converted to?

Thanks.

-- 
tejun

^ permalink raw reply

* Re: [PATCH RFC 00/77] Re-design MSI/MSI-X interrupts enablement pattern
From: Tejun Heo @ 2013-10-07 18:21 UTC (permalink / raw)
  To: Alexander Gordeev
  Cc: linux-mips, VMware, Inc., linux-nvme, linux-ide, linux-s390,
	Andy King, linux-scsi, linux-rdma, x86, Ingo Molnar, linux-pci,
	iss_storagedev, linux-driver, Bjorn Helgaas, Dan Williams,
	Jon Mason, Solarflare linux maintainers, netdev, linux-kernel,
	Ralf Baechle, e1000-devel, Martin Schwidefsky, linux390,
	linuxppc-dev
In-Reply-To: <cover.1380703262.git.agordeev@redhat.com>

Hello, Alexander.

On Wed, Oct 02, 2013 at 12:48:16PM +0200, Alexander Gordeev wrote:
> Alexander Gordeev (77):
>   PCI/MSI: Fix return value when populate_msi_sysfs() failed
>   PCI/MSI/PPC: Fix wrong RTAS error code reporting
>   PCI/MSI/s390: Fix single MSI only check
>   PCI/MSI/s390: Remove superfluous check of MSI type
>   PCI/MSI: Convert pci_msix_table_size() to a public interface
>   PCI/MSI: Factor out pci_get_msi_cap() interface
>   PCI/MSI: Re-design MSI/MSI-X interrupts enablement pattern
>   PCI/MSI: Get rid of pci_enable_msi_block_auto() interface
>   ahci: Update MSI/MSI-X interrupts enablement code
>   ahci: Check MRSM bit when multiple MSIs enabled
...

Whee.... that's a lot more than I expected.  I was just scanning
multiple msi users.  Maybe we can stage the work in more manageable
steps so that you don't have to go through massive conversion only to
do it all over again afterwards and likewise people don't get
bombarded on each iteration?  Maybe we can first update pci / msi code
proper, msi and then msix?

Thanks.

-- 
tejun

^ permalink raw reply

* Re: [PATCH RFC 54/77] ntb: Ensure number of MSIs on SNB is enough for the link interrupt
From: Alexander Gordeev @ 2013-10-07 18:38 UTC (permalink / raw)
  To: Jon Mason
  Cc: linux-mips, VMware, Inc., linux-nvme, linux-ide, stable,
	linux-s390, Andy King, linux-scsi, linux-rdma, x86, Ingo Molnar,
	linux-pci, iss_storagedev, linux-driver, Tejun Heo, Bjorn Helgaas,
	Dan Williams, Solarflare linux maintainers, netdev, linux-kernel,
	Ralf Baechle, e1000-devel, Martin Schwidefsky, linux390,
	linuxppc-dev
In-Reply-To: <20131007165056.GA24536@jonmason-lab>

On Mon, Oct 07, 2013 at 09:50:57AM -0700, Jon Mason wrote:
> On Sat, Oct 05, 2013 at 11:43:04PM +0200, Alexander Gordeev wrote:
> > On Wed, Oct 02, 2013 at 05:48:05PM -0700, Jon Mason wrote:
> > > On Wed, Oct 02, 2013 at 12:49:10PM +0200, Alexander Gordeev wrote:
> > > > Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
> > > > ---
> > > >  drivers/ntb/ntb_hw.c |    2 +-
> > > >  1 files changed, 1 insertions(+), 1 deletions(-)
> > > > 
> > > > diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c
> > > > index de2062c..eccd5e5 100644
> > > > --- a/drivers/ntb/ntb_hw.c
> > > > +++ b/drivers/ntb/ntb_hw.c
> > > > @@ -1066,7 +1066,7 @@ static int ntb_setup_msix(struct ntb_device *ndev)
> > > >  		/* On SNB, the link interrupt is always tied to 4th vector.  If
> > > >  		 * we can't get all 4, then we can't use MSI-X.
> > > >  		 */
> > > > -		if (ndev->hw_type != BWD_HW) {
> > > > +		if ((rc < SNB_MSIX_CNT) && (ndev->hw_type != BWD_HW)) {
> > > 
> > > Nack, this check is unnecessary.
> > 
> > If SNB can do more than SNB_MSIX_CNT MSI-Xs then this check is needed
> > to enable less than maximum MSI-Xs in case the maximum was not allocated.
> > Otherwise SNB will fallback to single MSI instead of multiple MSI-Xs.
> 
> Per the comment in the code snippet above, "If we can't get all 4,
> then we can't use MSI-X".  There is already a check to see if more
> than 4 were acquired.  So it's not possible to hit this.  Even if it
> was, don't use SNB_MSIX_CNT here (limits.msix_cnt is the preferred
> variable).  Also, the "()" are unnecessary.

The changelog is definitely bogus. I meant here an improvement to the
existing scheme, not a conversion to the new one:

	msix_entries = msix_table_size(val);

Getting i.e. 16 vectors here.

	if (msix_entries > ndev->limits.msix_cnt) {
		rc = -EINVAL;
		goto err;
	}

Upper limit check i.e. succeeds.

	[...]

	rc = pci_enable_msix(pdev, ndev->msix_entries, msix_entries);

pci_enable_msix() does not success and returns i.e. 8 here, should retry.

	if (rc < 0)
		goto err1;
	if (rc > 0) {
		/* On SNB, the link interrupt is always tied to 4th vector.  If
		 * we can't get all 4, then we can't use MSI-X.
		 */
		if (ndev->hw_type != BWD_HW) {

On SNB bail out here, although could have continue with 8 vectors.
Can only use SNB_MSIX_CNT here, since limits.msix_cnt is the upper limit.

			rc = -EIO;
			goto err1;
		}

		[...]
	}

-- 
Regards,
Alexander Gordeev
agordeev@redhat.com

^ permalink raw reply

* Re: [PATCH v4 net-next] fix unsafe set_memory_rw from softirq
From: David Miller @ 2013-10-07 19:17 UTC (permalink / raw)
  To: eric.dumazet
  Cc: linux-s390, netdev, heiko.carstens, edumazet, dborkman,
	linuxppc-dev, linux-arm-kernel, ast
In-Reply-To: <1381078592.12191.0.camel@edumazet-glaptop.roam.corp.google.com>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 06 Oct 2013 09:56:32 -0700

> On Fri, 2013-10-04 at 00:14 -0700, Alexei Starovoitov wrote:
>> on x86 system with net.core.bpf_jit_enable = 1
> 
>> cannot reuse jited filter memory, since it's readonly,
>> so use original bpf insns memory to hold work_struct
>> 
>> defer kfree of sk_filter until jit completed freeing
>> 
>> tested on x86_64 and i386
>> 
>> Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
> 
> Acked-by: Eric Dumazet <edumazet@google.com>

I've decided to apply this to 'net', thanks.

^ permalink raw reply

* Re: Build regressions/improvements in v3.12-rc4
From: Geert Uytterhoeven @ 2013-10-07 19:27 UTC (permalink / raw)
  To: linux-kernel@vger.kernel.org; +Cc: linuxppc-dev@lists.ozlabs.org
In-Reply-To: <1381173828-3877-1-git-send-email-geert@linux-m68k.org>

On Mon, Oct 7, 2013 at 9:23 PM, Geert Uytterhoeven <geert@linux-m68k.org> wrote:
> JFYI, when comparing v3.12-rc4 to v3.12-rc3[3], the summaries are:
>   - build errors: +6/-10

  + /scratch/kisskb/src/arch/powerpc/xmon/xmon.c: error: implicit
declaration of function 'dump_tlb_44x'
[-Werror=implicit-function-declaration]:  => 897:4
  + /scratch/kisskb/src/drivers/tty/serial/nwpserial.c: error:
implicit declaration of function 'udelay'
[-Werror=implicit-function-declaration]:  => 53:3

powerpc-randconfig

> [1] http://kisskb.ellerman.id.au/kisskb/head/6753/ (all 120 configs)
> [3] http://kisskb.ellerman.id.au/kisskb/head/6727/ (all 120 configs)

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* Re: [PATCH RFC 00/77] Re-design MSI/MSI-X interrupts enablement pattern
From: Benjamin Herrenschmidt @ 2013-10-07 20:10 UTC (permalink / raw)
  To: Tejun Heo
  Cc: linux-mips, VMware, Inc., linux-pci, linux-nvme, linux-ide,
	linux-s390, Andy King, linux-scsi, linux-rdma, x86,
	Alexander Gordeev, iss_storagedev, linux-driver, linux390,
	Bjorn Helgaas, Dan Williams, Jon Mason, Ingo Molnar,
	Solarflare linux maintainers, netdev, linux-kernel, Ralf Baechle,
	e1000-devel, Martin Schwidefsky, Ben Hutchings, linuxppc-dev
In-Reply-To: <20131007180111.GC2481@htj.dyndns.org>

On Mon, 2013-10-07 at 14:01 -0400, Tejun Heo wrote:
> I don't think the same race condition would happen with the loop.  The
> problem case is where multiple msi(x) allocation fails completely
> because the global limit went down before inquiry and allocation.  In
> the loop based interface, it'd retry with the lower number.
> 
> As long as the number of drivers which need this sort of adaptive
> allocation isn't too high and the common cases can be made simple, I
> don't think the "complex" part of interface is all that important.
> Maybe we can have reserve / cancel type interface or just keep the
> loop with more explicit function names (ie. try_enable or something
> like that).

I'm thinking a better API overall might just have been to request
individual MSI-X one by one :-)

We want to be able to request an MSI-X at runtime anyway ... if I want
to dynamically add a queue to my network interface, I want it to be able
to pop a new arbitrary MSI-X.

And we don't want to lock drivers into contiguous MSI-X sets either.

And for the cleanup ... well that's what the "pcim" functions are for,
we can just make MSI-X variants.

Ben.

^ permalink raw reply

* Re: [PATCH RFC 54/77] ntb: Ensure number of MSIs on SNB is enough for the link interrupt
From: Jon Mason @ 2013-10-07 20:31 UTC (permalink / raw)
  To: Alexander Gordeev
  Cc: linux-mips, VMware, Inc., linux-nvme, linux-ide, stable,
	linux-s390, Andy King, linux-scsi, linux-rdma, x86, Ingo Molnar,
	linux-pci, iss_storagedev, linux-driver, Tejun Heo, Bjorn Helgaas,
	Dan Williams, Solarflare linux maintainers, netdev, linux-kernel,
	Ralf Baechle, e1000-devel, Martin Schwidefsky, linux390,
	linuxppc-dev
In-Reply-To: <20131007183845.GA1834@dhcp-26-207.brq.redhat.com>

On Mon, Oct 07, 2013 at 08:38:45PM +0200, Alexander Gordeev wrote:
> On Mon, Oct 07, 2013 at 09:50:57AM -0700, Jon Mason wrote:
> > On Sat, Oct 05, 2013 at 11:43:04PM +0200, Alexander Gordeev wrote:
> > > On Wed, Oct 02, 2013 at 05:48:05PM -0700, Jon Mason wrote:
> > > > On Wed, Oct 02, 2013 at 12:49:10PM +0200, Alexander Gordeev wrote:
> > > > > Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
> > > > > ---
> > > > >  drivers/ntb/ntb_hw.c |    2 +-
> > > > >  1 files changed, 1 insertions(+), 1 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c
> > > > > index de2062c..eccd5e5 100644
> > > > > --- a/drivers/ntb/ntb_hw.c
> > > > > +++ b/drivers/ntb/ntb_hw.c
> > > > > @@ -1066,7 +1066,7 @@ static int ntb_setup_msix(struct ntb_device *ndev)
> > > > >  		/* On SNB, the link interrupt is always tied to 4th vector.  If
> > > > >  		 * we can't get all 4, then we can't use MSI-X.
> > > > >  		 */
> > > > > -		if (ndev->hw_type != BWD_HW) {
> > > > > +		if ((rc < SNB_MSIX_CNT) && (ndev->hw_type != BWD_HW)) {
> > > > 
> > > > Nack, this check is unnecessary.
> > > 
> > > If SNB can do more than SNB_MSIX_CNT MSI-Xs then this check is needed
> > > to enable less than maximum MSI-Xs in case the maximum was not allocated.
> > > Otherwise SNB will fallback to single MSI instead of multiple MSI-Xs.
> > 
> > Per the comment in the code snippet above, "If we can't get all 4,
> > then we can't use MSI-X".  There is already a check to see if more
> > than 4 were acquired.  So it's not possible to hit this.  Even if it
> > was, don't use SNB_MSIX_CNT here (limits.msix_cnt is the preferred
> > variable).  Also, the "()" are unnecessary.
> 
> The changelog is definitely bogus. I meant here an improvement to the
> existing scheme, not a conversion to the new one:
> 
> 	msix_entries = msix_table_size(val);
> 
> Getting i.e. 16 vectors here.
> 
> 	if (msix_entries > ndev->limits.msix_cnt) {

On SNB HW, limits.msix_cnt is set to SNB_MSIX_CNT (4)
http://lxr.free-electrons.com/source/drivers/ntb/ntb_hw.c#L558

> 		rc = -EINVAL;
> 		goto err;
> 	}
> 
> Upper limit check i.e. succeeds.
> 
> 	[...]
> 
> 	rc = pci_enable_msix(pdev, ndev->msix_entries, msix_entries);
> 
> pci_enable_msix() does not success and returns i.e. 8 here, should retry.

Per the above, since our upper bound is 4.  We will either have this
return 0 for all 4 or a number between 1 and 3 (or an error, but
that's not relevant to this discussion).

> 	if (rc < 0)
> 		goto err1;
> 	if (rc > 0) {
> 		/* On SNB, the link interrupt is always tied to 4th vector.  If
> 		 * we can't get all 4, then we can't use MSI-X.
> 		 */
> 		if (ndev->hw_type != BWD_HW) {
> 
> On SNB bail out here, although could have continue with 8 vectors.
> Can only use SNB_MSIX_CNT here, since limits.msix_cnt is the upper limit.

Since we can guarantee that rc is between 1 and 3 at this point (on
SNB HW), we should error out.

Thanks,
Jon


> 
> 			rc = -EIO;
> 			goto err1;
> 		}
> 
> 		[...]
> 	}
> 
> -- 
> Regards,
> Alexander Gordeev
> agordeev@redhat.com

^ permalink raw reply

* Re: [PATCH RFC 00/77] Re-design MSI/MSI-X interrupts enablement pattern
From: Ben Hutchings @ 2013-10-07 20:46 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: linux-mips, VMware, Inc., linux-pci, linux-nvme, linux-ide,
	linux-s390, Andy King, linux-scsi, linux-rdma, x86,
	Alexander Gordeev, iss_storagedev, linux-driver,
	Martin Schwidefsky, Bjorn Helgaas, Dan Williams, Jon Mason,
	Ingo Molnar, Solarflare linux maintainers, netdev, linux-kernel,
	Ralf Baechle, e1000-devel, Tejun Heo, linux390, linuxppc-dev
In-Reply-To: <1381176656.645.171.camel@pasglop>

On Tue, 2013-10-08 at 07:10 +1100, Benjamin Herrenschmidt wrote:
> On Mon, 2013-10-07 at 14:01 -0400, Tejun Heo wrote:
> > I don't think the same race condition would happen with the loop.  The
> > problem case is where multiple msi(x) allocation fails completely
> > because the global limit went down before inquiry and allocation.  In
> > the loop based interface, it'd retry with the lower number.
> > 
> > As long as the number of drivers which need this sort of adaptive
> > allocation isn't too high and the common cases can be made simple, I
> > don't think the "complex" part of interface is all that important.
> > Maybe we can have reserve / cancel type interface or just keep the
> > loop with more explicit function names (ie. try_enable or something
> > like that).
> 
> I'm thinking a better API overall might just have been to request
> individual MSI-X one by one :-)
> 
> We want to be able to request an MSI-X at runtime anyway ... if I want
> to dynamically add a queue to my network interface, I want it to be able
> to pop a new arbitrary MSI-X.

Yes, this would be very useful.

> And we don't want to lock drivers into contiguous MSI-X sets either.

I don't think there's any such limitation now.  The entries array passed
to pci_enable_msix() specifies which MSI-X vectors the driver wants to
enable.  It's usually filled with 0..nvec-1 in order, but not always.
And the IRQ numbers returned aren't usually contiguous either, on x86.

Ben.

> And for the cleanup ... well that's what the "pcim" functions are for,
> we can just make MSI-X variants.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply

* Re: [PATCH RFC 00/77] Re-design MSI/MSI-X interrupts enablement pattern
From: Ben Hutchings @ 2013-10-07 20:48 UTC (permalink / raw)
  To: Alexander Gordeev
  Cc: linux-mips, VMware, Inc., linux-nvme, linux-ide, linux-s390,
	Andy King, linux-scsi, linux-rdma, x86, Ingo Molnar, linux-pci,
	iss_storagedev, linux-driver, Tejun Heo, Bjorn Helgaas,
	Dan Williams, Jon Mason, Solarflare linux maintainers, netdev,
	linux-kernel, Ralf Baechle, e1000-devel, Martin Schwidefsky,
	linux390, linuxppc-dev
In-Reply-To: <20131006071027.GA29143@dhcp-26-207.brq.redhat.com>

On Sun, 2013-10-06 at 09:10 +0200, Alexander Gordeev wrote:
> On Sun, Oct 06, 2013 at 05:19:46PM +1100, Benjamin Herrenschmidt wrote:
> > On Sun, 2013-10-06 at 08:02 +0200, Alexander Gordeev wrote:
> > > In fact, in the current design to address the quota race decently the
> > > drivers would have to protect the *loop* to prevent the quota change
> > > between a pci_enable_msix() returned a positive number and the the next
> > > call to pci_enable_msix() with that number. Is it doable?
> > 
> > I am not advocating for the current design, simply saying that your
> > proposal doesn't address this issue while Ben's does.
> 
> There is one major flaw in min-max approach - the generic MSI layer
> will have to take decisions on exact number of MSIs to request, not
> device drivers.
[...

No, the min-max functions should be implemented using the same loop that
drivers are expected to use now.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply

* [PATCH] powerpc/irq: Don't switch to irq stack from softirq stack
From: Benjamin Herrenschmidt @ 2013-10-07 21:08 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Cédric Le Goater, linuxppc-dev

irq_exit() is now called on the irq stack, which can trigger a switch
to the softirq stack from the irq stack. If an interrupt happens at
that point, we will not properly detect the re-entrancy and clobber
the original return context on the irq stack.

This fixes it. The side effect is to prevent all nesting from softirq
stack to irq stack even in the "safe" case but it's simpler that way
and matches what x86_64 does.

Reported-by: Cédric Le Goater <clg@fr.ibm.com>
Tested-by: Cédric Le Goater <clg@fr.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---

Linus, I don't have my git repo at hand right now and this is pretty
urgent, can you apply it directly please ?

Cheers,
Ben.

 arch/powerpc/kernel/irq.c |    5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 57d286a..c7cb8c2 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -495,14 +495,15 @@ void __do_irq(struct pt_regs *regs)
 void do_IRQ(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
-	struct thread_info *curtp, *irqtp;
+	struct thread_info *curtp, *irqtp, *sirqtp;

 	/* Switch to the irq stack to handle this */
 	curtp = current_thread_info();
 	irqtp = hardirq_ctx[raw_smp_processor_id()];
+	sirqtp = softirq_ctx[raw_smp_processor_id()];

 	/* Already there ? */
-	if (unlikely(curtp == irqtp)) {
+	if (unlikely(curtp == irqtp || curtp == sirqtp)) {
 		__do_irq(regs);
 		set_irq_regs(old_regs);
 		return;
-- 
1.7.10.4

^ permalink raw reply related

* Re: [PATCH] powerpc/irq: Don't switch to irq stack from softirq stack
From: Linus Torvalds @ 2013-10-07 21:20 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Cédric Le Goater, linuxppc-dev
In-Reply-To: <1381180104.645.176.camel@pasglop>

On Mon, Oct 7, 2013 at 2:08 PM, Benjamin Herrenschmidt
<benh@kernel.crashing.org> wrote:
>
> Linus, I don't have my git repo at hand right now and this is pretty
> urgent, can you apply it directly please ?

Done.

              Linus

^ permalink raw reply

* Re: [E1000-devel] [PATCH RFC 00/77] Re-design MSI/MSI-X interrupts enablement pattern
From: Waskiewicz Jr, Peter P @ 2013-10-07 22:21 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: linux-mips@linux-mips.org, VMware, Inc.,
	linux-pci@vger.kernel.org, linux-nvme@lists.infradead.org,
	linux-ide@vger.kernel.org, Martin Schwidefsky,
	linux-s390@vger.kernel.org, King, linux-scsi@vger.kernel.org,
	linux-rdma@vger.kernel.org, x86@kernel.org, Molnar,
	iss_storagedev@hp.com, linuxppc-dev@lists.ozlabs.org, Tejun Heo,
	Ben Hutchings, Williams, Dan J, Mason, Jon,
	Solarflare linux maintainers, Alexander Gordeev,
	linux-kernel@vger.kernel.org, Ralf Baechle,
	e1000-devel@lists.sourceforge.net, netdev@vger.kernel.org,
	linux-driver@qlogic.com, linux390@de.ibm.com, Bjorn Helgaas
In-Reply-To: <1381176656.645.171.camel@pasglop>

T24gVHVlLCAyMDEzLTEwLTA4IGF0IDA3OjEwICsxMTAwLCBCZW5qYW1pbiBIZXJyZW5zY2htaWR0
IHdyb3RlOg0KPiBPbiBNb24sIDIwMTMtMTAtMDcgYXQgMTQ6MDEgLTA0MDAsIFRlanVuIEhlbyB3
cm90ZToNCj4gPiBJIGRvbid0IHRoaW5rIHRoZSBzYW1lIHJhY2UgY29uZGl0aW9uIHdvdWxkIGhh
cHBlbiB3aXRoIHRoZSBsb29wLiAgVGhlDQo+ID4gcHJvYmxlbSBjYXNlIGlzIHdoZXJlIG11bHRp
cGxlIG1zaSh4KSBhbGxvY2F0aW9uIGZhaWxzIGNvbXBsZXRlbHkNCj4gPiBiZWNhdXNlIHRoZSBn
bG9iYWwgbGltaXQgd2VudCBkb3duIGJlZm9yZSBpbnF1aXJ5IGFuZCBhbGxvY2F0aW9uLiAgSW4N
Cj4gPiB0aGUgbG9vcCBiYXNlZCBpbnRlcmZhY2UsIGl0J2QgcmV0cnkgd2l0aCB0aGUgbG93ZXIg
bnVtYmVyLg0KPiA+IA0KPiA+IEFzIGxvbmcgYXMgdGhlIG51bWJlciBvZiBkcml2ZXJzIHdoaWNo
IG5lZWQgdGhpcyBzb3J0IG9mIGFkYXB0aXZlDQo+ID4gYWxsb2NhdGlvbiBpc24ndCB0b28gaGln
aCBhbmQgdGhlIGNvbW1vbiBjYXNlcyBjYW4gYmUgbWFkZSBzaW1wbGUsIEkNCj4gPiBkb24ndCB0
aGluayB0aGUgImNvbXBsZXgiIHBhcnQgb2YgaW50ZXJmYWNlIGlzIGFsbCB0aGF0IGltcG9ydGFu
dC4NCj4gPiBNYXliZSB3ZSBjYW4gaGF2ZSByZXNlcnZlIC8gY2FuY2VsIHR5cGUgaW50ZXJmYWNl
IG9yIGp1c3Qga2VlcCB0aGUNCj4gPiBsb29wIHdpdGggbW9yZSBleHBsaWNpdCBmdW5jdGlvbiBu
YW1lcyAoaWUuIHRyeV9lbmFibGUgb3Igc29tZXRoaW5nDQo+ID4gbGlrZSB0aGF0KS4NCj4gDQo+
IFdlIHdhbnQgdG8gYmUgYWJsZSB0byByZXF1ZXN0IGFuIE1TSS1YIGF0IHJ1bnRpbWUgYW55d2F5
IC4uLiBpZiBJIHdhbnQNCj4gdG8gZHluYW1pY2FsbHkgYWRkIGEgcXVldWUgdG8gbXkgbmV0d29y
ayBpbnRlcmZhY2UsIEkgd2FudCBpdCB0byBiZSBhYmxlDQo+IHRvIHBvcCBhIG5ldyBhcmJpdHJh
cnkgTVNJLVguDQoNCklmIHlvdSB3YW50IHRvIGR5bmFtaWNhbGx5IGFsbG9jYXRlIGFub3RoZXIg
cXVldWUsIHlvdSdkIGVpdGhlciBuZWVkIHRvDQpoYXZlIHRoZW0gYWxsIHByZS1hbGxvY2F0ZWQg
YXQgYWxsb2NfZXRoZXJkZXZfbXFzKCksIG9yIGFkZCBhIG5ldyBBUEkgdG8NCm5ldGRldiB0aGF0
IGFsbG93cyBhZGRpbmcgbmV3IHF1ZXVlcyBvbiB0aGUgZmx5Lg0KDQpIb3cgdGhpbmdzIGFyZSBk
b25lIHRvZGF5LCB0aGUgVHggcXVldWVzIGFyZSBhbGwgdGFja2VkIG9udG8gdGhlIGVuZCBvZg0K
dGhlIG5ldGRldiBzdHJ1Y3QuICBUaGF0IHdvdWxkIGhhdmUgdG8gY2hhbmdlIHRvIHByb2JhYmx5
IGEgbGlua2VkIGxpc3QNCm9mIHF1ZXVlcyB0aGF0IGNvdWxkIGJlIGdyb3duIG9yIHNocnVuayBv
biB0aGUgZmx5Lg0KbmV0aWZfYWxsb2NfbmV0ZGV2X3F1ZXVlcygpIHdvdWxkIG5lZWQgdG8gY2hh
bmdlIHRoZSBremFsbG9jKCkgdG8gYSBsaXN0DQphbGxvY2F0aW9uLg0KDQpDaGVlcnMsDQotUEoN
Cg==

^ permalink raw reply

* Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
From: Alex Williamson @ 2013-10-08  3:13 UTC (permalink / raw)
  To: Bhushan Bharat-R65777
  Cc: agraf@suse.de, Wood Scott-B07421, linux-pci@vger.kernel.org,
	joro@8bytes.org, linux-kernel@vger.kernel.org,
	iommu@lists.linux-foundation.org, linuxppc-dev@lists.ozlabs.org
In-Reply-To: <6A3DF150A5B70D4F9B66A25E3F7C888D0719385B@039-SN2MPN1-011.039d.mgd.msft.net>

On Mon, 2013-10-07 at 05:46 +0000, Bhushan Bharat-R65777 wrote:
> 
> > -----Original Message-----
> > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > Sent: Friday, October 04, 2013 11:42 PM
> > To: Bhushan Bharat-R65777
> > Cc: joro@8bytes.org; benh@kernel.crashing.org; galak@kernel.crashing.org; linux-
> > kernel@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; linux-
> > pci@vger.kernel.org; agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > foundation.org
> > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
> > 
> > On Fri, 2013-10-04 at 17:23 +0000, Bhushan Bharat-R65777 wrote:
> > >
> > > > -----Original Message-----
> > > > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > > > Sent: Friday, October 04, 2013 10:43 PM
> > > > To: Bhushan Bharat-R65777
> > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org
> > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > > device
> > > >
> > > > On Fri, 2013-10-04 at 16:47 +0000, Bhushan Bharat-R65777 wrote:
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Alex Williamson [mailto:alex.williamson@redhat.com]
> > > > > > Sent: Friday, October 04, 2013 9:15 PM
> > > > > > To: Bhushan Bharat-R65777
> > > > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > > > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > > > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > > > > > foundation.org
> > > > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > > > > device
> > > > > >
> > > > > > On Fri, 2013-10-04 at 09:54 +0000, Bhushan Bharat-R65777 wrote:
> > > > > > >
> > > > > > > > -----Original Message-----
> > > > > > > > From: linux-pci-owner@vger.kernel.org
> > > > > > > > [mailto:linux-pci-owner@vger.kernel.org]
> > > > > > > > On Behalf Of Alex Williamson
> > > > > > > > Sent: Wednesday, September 25, 2013 10:16 PM
> > > > > > > > To: Bhushan Bharat-R65777
> > > > > > > > Cc: joro@8bytes.org; benh@kernel.crashing.org;
> > > > > > > > galak@kernel.crashing.org; linux- kernel@vger.kernel.org;
> > > > > > > > linuxppc-dev@lists.ozlabs.org; linux- pci@vger.kernel.org;
> > > > > > > > agraf@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > > > > > > > foundation.org; Bhushan Bharat-R65777
> > > > > > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain
> > > > > > > > of a device
> > > > > > > >
> > > > > > > > On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
> > > > > > > > > This api return the iommu domain to which the device is attached.
> > > > > > > > > The iommu_domain is required for making API calls related to
> > iommu.
> > > > > > > > > Follow up patches which use this API to know iommu maping.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Bharat Bhushan
> > > > > > > > > <bharat.bhushan@freescale.com>
> > > > > > > > > ---
> > > > > > > > >  drivers/iommu/iommu.c |   10 ++++++++++
> > > > > > > > >  include/linux/iommu.h |    7 +++++++
> > > > > > > > >  2 files changed, 17 insertions(+), 0 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > > > > > > > > index
> > > > > > > > > fbe9ca7..6ac5f50 100644
> > > > > > > > > --- a/drivers/iommu/iommu.c
> > > > > > > > > +++ b/drivers/iommu/iommu.c
> > > > > > > > > @@ -696,6 +696,16 @@ void iommu_detach_device(struct
> > > > > > > > > iommu_domain *domain, struct device *dev)  }
> > > > > > > > > EXPORT_SYMBOL_GPL(iommu_detach_device);
> > > > > > > > >
> > > > > > > > > +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
> > > > > > > > > +	struct iommu_ops *ops = dev->bus->iommu_ops;
> > > > > > > > > +
> > > > > > > > > +	if (unlikely(ops == NULL || ops->get_dev_iommu_domain ==
> > NULL))
> > > > > > > > > +		return NULL;
> > > > > > > > > +
> > > > > > > > > +	return ops->get_dev_iommu_domain(dev); }
> > > > > > > > > +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
> > > > > > > >
> > > > > > > > What prevents this from racing iommu_domain_free()?  There's
> > > > > > > > no references acquired, so there's no reason for the caller
> > > > > > > > to assume the
> > > > > > pointer is valid.
> > > > > > >
> > > > > > > Sorry for late query, somehow this email went into a folder
> > > > > > > and escaped;
> > > > > > >
> > > > > > > Just to be sure, there is not lock at generic "struct
> > > > > > > iommu_domain", but IP
> > > > > > specific structure (link FSL domain) linked in
> > > > > > iommu_domain->priv have a lock, so we need to ensure this race
> > > > > > in FSL iommu code (say drivers/iommu/fsl_pamu_domain.c), right?
> > > > > >
> > > > > > No, it's not sufficient to make sure that your use of the
> > > > > > interface is race free.  The interface itself needs to be
> > > > > > designed so that it's difficult to use incorrectly.
> > > > >
> > > > > So we can define iommu_get_dev_domain()/iommu_put_dev_domain();
> > > > > iommu_get_dev_domain() will return domain with the lock held, and
> > > > > iommu_put_dev_domain() will release the lock? And
> > > > > iommu_get_dev_domain() must always be followed by
> > > > > iommu_get_dev_domain().
> > > >
> > > > What lock?  get/put are generally used for reference counting, not
> > > > locking in the kernel.
> > > >
> > > > > > That's not the case here.  This is a backdoor to get the iommu
> > > > > > domain from the iommu driver regardless of who is using it or how.
> > > > > > The iommu domain is created and managed by vfio, so shouldn't we
> > > > > > be looking at how to do this through vfio?
> > > > >
> > > > > Let me first describe what we are doing here:
> > > > > During initialization:-
> > > > >  - vfio talks to MSI system to know the MSI-page and size
> > > > >  - vfio then interacts with iommu to map the MSI-page in iommu
> > > > > (IOVA is decided by userspace and physical address is the
> > > > > MSI-page)
> > > > >  - So the IOVA subwindow mapping is created in iommu and yes VFIO
> > > > > know about
> > > > this mapping.
> > > > >
> > > > > Now do SET_IRQ(MSI/MSIX) ioctl:
> > > > >  - calls pci_enable_msix()/pci_enable_msi_block(): which is
> > > > > supposed to set
> > > > MSI address/data in device.
> > > > >  - So in current implementation (this patchset) msi-subsystem gets
> > > > > the IOVA
> > > > from iommu via this defined interface.
> > > > >  - Are you saying that rather than getting this from iommu, we
> > > > > should get this
> > > > from vfio? What difference does this make?
> > > >
> > > > Yes, you just said above that vfio knows the msi to iova mapping, so
> > > > why go outside of vfio to find it later?  The difference is one case
> > > > you can have a proper reference to data structures to make sure the
> > > > pointer you get back actually has meaning at the time you're using
> > > > it vs the code here where you're defining an API that returns a
> > > > meaningless value
> > >
> > > With FSL-PAMU we will always get consistant data from iommu or vfio-data
> > structure.
> > 
> > Great, but you're trying to add a generic API to the IOMMU subsystem that's
> > difficult to use correctly.  The fact that you use it correctly does not justify
> > the API.
> > 
> > > > because you can't check or
> > > > enforce that an arbitrary caller is using it correctly.
> > >
> > > I am not sure what is arbitrary caller? pdev is known to vfio, so vfio
> > > will only make pci_enable_msix()/pci_enable_msi_block() for this pdev.
> > > If anyother code makes then it is some other unexpectedly thing
> > > happening in system, no?
> > 
> > What's proposed here is a generic IOMMU API.  Anybody can call this.
> > What if the host SCSI driver decides to go get the iommu domain for it's device
> > (or any other device)?  Does that fit your usage model?
> > 
> > > >  It's not maintainable.
> > > > Thanks,
> > >
> > > I do not have any issue with this as well, can you also describe the
> > > type of API you are envisioning; I can think of defining some function
> > > in vfio.c/vfio_iommu*.c, make them global and declare then in
> > > include/Linux/vfio.h And include <Linux/vfio.h> in caller file
> > > (arch/powerpc/kernel/msi.c)
> > 
> > Do you really want module dependencies between vfio and your core kernel MSI
> > setup?  Look at the vfio external user interface that we've already defined.
> > That allows other components of the kernel to get a proper reference to a vfio
> > group.  From there you can work out how to get what you want.  Another
> > alternative is that vfio could register an MSI to IOVA mapping with architecture
> > code when the mapping is created.  The MSI setup path could then do a lookup in
> > architecture code for the mapping.  You could even store the MSI to IOVA mapping
> > in VFIO and create an interface where SET_IRQ passes that mapping into setup
> > code.
> 
> Ok, What I want is to get IOVA associated with a physical address
> (physical address of MSI-bank).
> And currently I do not see a way to know IOVA of a physical address
> and doing all this domain get and then search through all of
> iommu-windows of that domain.
> 
> What if we add an iommu-API which can return the IOVA mapping of a
> physical address. Current use case is setting up MSI's for aperture
> type of IOMMU also getting a phys_to_iova() mapping is independent of
> VFIO, your thought?

A physical address can be mapped to multiple IOVAs, so the interface
seems flawed by design.  It also has the same problem as above, it's a
backdoor that can be called asynchronous to the owner of the domain, so
what reason is there to believe the result?  It just replaces an
iommu_domain pointer with an IOVA.  VFIO knows this mapping, so why are
we trying to go behind its back and ask the IOMMU?  Thanks,

Alex

^ permalink raw reply

* Re: [PATCH -V2 1/2] powerpc: Use HPTE constants when updating hpte bits
From: Aneesh Kumar K.V @ 2013-10-08  3:15 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev
In-Reply-To: <1380810140-11015-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com>

"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> writes:

> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>
> Even though we have same value for linux PTE bits and hash PTE pits
> use the hash pte bits wen updating hash pte

...
> diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
> index 02d6e21..78f2c59 100644
> --- a/arch/powerpc/platforms/pseries/lpar.c
> +++ b/arch/powerpc/platforms/pseries/lpar.c
> @@ -146,8 +146,9 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
>  	flags = 0;
>
>  	/* Make pHyp happy */
> -	if ((rflags & _PAGE_NO_CACHE) & !(rflags & _PAGE_WRITETHRU))
> -		hpte_r &= ~_PAGE_COHERENT;
> +	if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU))
> +		hpte_r &= ~HPTE_R_M;
> +

-	if ((rflags & _PAGE_NO_CACHE) & !(rflags & _PAGE_WRITETHRU))
+	if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU))

the if loop part of the change is already done in upstream. Since V2
was generated after moving V1 patch to a different tree (kvm tree),
ended up with that additional change. Let me know if you want me
respin the patch

>  	if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N))
>  		flags |= H_COALESCE_CAND;
>

-aneesh

^ permalink raw reply

* Re: [PATCH v11 2/3] DMA: Freescale: Add new 8-channel DMA engine device tree nodes
From: Hongbo Zhang @ 2013-10-08  3:22 UTC (permalink / raw)
  To: hongbo.zhang
  Cc: mark.rutland, devicetree, ian.campbell, pawel.moll, swarren,
	vinod.koul, linux-kernel, rob.herring, djbw, linuxppc-dev
In-Reply-To: <1380188023-3936-3-git-send-email-hongbo.zhang@freescale.com>

Hi Mark, Stephen and other DT maintainers?

The 1/3 had already been acked by Mark, and please have a further look 
at this patch 2/3.
The DMA maintainer Vinod  needs ack for the DT related patches so that 
he can take all this patch set.

On 09/26/2013 05:33 PM, hongbo.zhang@freescale.com wrote:
> From: Hongbo Zhang <hongbo.zhang@freescale.com>
>
> Freescale QorIQ T4 and B4 introduce new 8-channel DMA engines, this patch adds
> the device tree nodes for them.
>
> Signed-off-by: Hongbo Zhang <hongbo.zhang@freescale.com>
> ---
>   .../devicetree/bindings/powerpc/fsl/dma.txt        |   70 +++++++++++++++++
>   arch/powerpc/boot/dts/fsl/b4si-post.dtsi           |    4 +-
>   arch/powerpc/boot/dts/fsl/elo3-dma-0.dtsi          |   82 ++++++++++++++++++++
>   arch/powerpc/boot/dts/fsl/elo3-dma-1.dtsi          |   82 ++++++++++++++++++++
>   arch/powerpc/boot/dts/fsl/t4240si-post.dtsi        |    4 +-
>   5 files changed, 238 insertions(+), 4 deletions(-)
>   create mode 100644 arch/powerpc/boot/dts/fsl/elo3-dma-0.dtsi
>   create mode 100644 arch/powerpc/boot/dts/fsl/elo3-dma-1.dtsi
>
> diff --git a/Documentation/devicetree/bindings/powerpc/fsl/dma.txt b/Documentation/devicetree/bindings/powerpc/fsl/dma.txt
> index 0584168..7fc1b01 100644
> --- a/Documentation/devicetree/bindings/powerpc/fsl/dma.txt
> +++ b/Documentation/devicetree/bindings/powerpc/fsl/dma.txt
> @@ -128,6 +128,76 @@ Example:
>   		};
>   	};
>   
> +** Freescale Elo3 DMA Controller
> +   DMA controller which has same function as EloPlus except that Elo3 has 8
> +   channels while EloPlus has only 4, it is used in Freescale Txxx and Bxxx
> +   series chips, such as t1040, t4240, b4860.
> +
> +Required properties:
> +
> +- compatible        : must include "fsl,elo3-dma"
> +- reg               : contains two entries for DMA General Status Registers,
> +                      i.e. DGSR0 which includes status for channel 1~4, and
> +                      DGSR1 for channel 5~8
> +- ranges            : describes the mapping between the address space of the
> +                      DMA channels and the address space of the DMA controller
> +
> +- DMA channel nodes:
> +        - compatible        : must include "fsl,eloplus-dma-channel"
> +        - reg               : DMA channel specific registers
> +        - interrupts        : interrupt specifier for DMA channel IRQ
> +        - interrupt-parent  : optional, if needed for interrupt mapping
> +
> +Example:
> +dma@100300 {
> +	#address-cells = <1>;
> +	#size-cells = <1>;
> +	compatible = "fsl,elo3-dma";
> +	reg = <0x100300 0x4>,
> +	      <0x100600 0x4>;
> +	ranges = <0x0 0x100100 0x500>;
> +	dma-channel@0 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x0 0x80>;
> +		interrupts = <28 2 0 0>;
> +	};
> +	dma-channel@80 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x80 0x80>;
> +		interrupts = <29 2 0 0>;
> +	};
> +	dma-channel@100 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x100 0x80>;
> +		interrupts = <30 2 0 0>;
> +	};
> +	dma-channel@180 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x180 0x80>;
> +		interrupts = <31 2 0 0>;
> +	};
> +	dma-channel@300 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x300 0x80>;
> +		interrupts = <76 2 0 0>;
> +	};
> +	dma-channel@380 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x380 0x80>;
> +		interrupts = <77 2 0 0>;
> +	};
> +	dma-channel@400 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x400 0x80>;
> +		interrupts = <78 2 0 0>;
> +	};
> +	dma-channel@480 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x480 0x80>;
> +		interrupts = <79 2 0 0>;
> +	};
> +};
> +
>   Note on DMA channel compatible properties: The compatible property must say
>   "fsl,elo-dma-channel" or "fsl,eloplus-dma-channel" to be used by the Elo DMA
>   driver (fsldma).  Any DMA channel used by fsldma cannot be used by another
> diff --git a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
> index 7399154..ea53ea1 100644
> --- a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
> +++ b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
> @@ -223,13 +223,13 @@
>   		reg = <0xe2000 0x1000>;
>   	};
>   
> -/include/ "qoriq-dma-0.dtsi"
> +/include/ "elo3-dma-0.dtsi"
>   	dma@100300 {
>   		fsl,iommu-parent = <&pamu0>;
>   		fsl,liodn-reg = <&guts 0x580>; /* DMA1LIODNR */
>   	};
>   
> -/include/ "qoriq-dma-1.dtsi"
> +/include/ "elo3-dma-1.dtsi"
>   	dma@101300 {
>   		fsl,iommu-parent = <&pamu0>;
>   		fsl,liodn-reg = <&guts 0x584>; /* DMA2LIODNR */
> diff --git a/arch/powerpc/boot/dts/fsl/elo3-dma-0.dtsi b/arch/powerpc/boot/dts/fsl/elo3-dma-0.dtsi
> new file mode 100644
> index 0000000..3c210e0
> --- /dev/null
> +++ b/arch/powerpc/boot/dts/fsl/elo3-dma-0.dtsi
> @@ -0,0 +1,82 @@
> +/*
> + * QorIQ Elo3 DMA device tree stub [ controller @ offset 0x100000 ]
> + *
> + * Copyright 2013 Freescale Semiconductor Inc.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions are met:
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in the
> + *       documentation and/or other materials provided with the distribution.
> + *     * Neither the name of Freescale Semiconductor nor the
> + *       names of its contributors may be used to endorse or promote products
> + *       derived from this software without specific prior written permission.
> + *
> + *
> + * ALTERNATIVELY, this software may be distributed under the terms of the
> + * GNU General Public License ("GPL") as published by the Free Software
> + * Foundation, either version 2 of that License or (at your option) any
> + * later version.
> + *
> + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
> + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
> + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
> + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
> + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
> + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +dma0: dma@100300 {
> +	#address-cells = <1>;
> +	#size-cells = <1>;
> +	compatible = "fsl,elo3-dma";
> +	reg = <0x100300 0x4>,
> +	      <0x100600 0x4>;
> +	ranges = <0x0 0x100100 0x500>;
> +	dma-channel@0 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x0 0x80>;
> +		interrupts = <28 2 0 0>;
> +	};
> +	dma-channel@80 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x80 0x80>;
> +		interrupts = <29 2 0 0>;
> +	};
> +	dma-channel@100 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x100 0x80>;
> +		interrupts = <30 2 0 0>;
> +	};
> +	dma-channel@180 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x180 0x80>;
> +		interrupts = <31 2 0 0>;
> +	};
> +	dma-channel@300 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x300 0x80>;
> +		interrupts = <76 2 0 0>;
> +	};
> +	dma-channel@380 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x380 0x80>;
> +		interrupts = <77 2 0 0>;
> +	};
> +	dma-channel@400 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x400 0x80>;
> +		interrupts = <78 2 0 0>;
> +	};
> +	dma-channel@480 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x480 0x80>;
> +		interrupts = <79 2 0 0>;
> +	};
> +};
> diff --git a/arch/powerpc/boot/dts/fsl/elo3-dma-1.dtsi b/arch/powerpc/boot/dts/fsl/elo3-dma-1.dtsi
> new file mode 100644
> index 0000000..cccf3bb
> --- /dev/null
> +++ b/arch/powerpc/boot/dts/fsl/elo3-dma-1.dtsi
> @@ -0,0 +1,82 @@
> +/*
> + * QorIQ Elo3 DMA device tree stub [ controller @ offset 0x101000 ]
> + *
> + * Copyright 2013 Freescale Semiconductor Inc.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions are met:
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in the
> + *       documentation and/or other materials provided with the distribution.
> + *     * Neither the name of Freescale Semiconductor nor the
> + *       names of its contributors may be used to endorse or promote products
> + *       derived from this software without specific prior written permission.
> + *
> + *
> + * ALTERNATIVELY, this software may be distributed under the terms of the
> + * GNU General Public License ("GPL") as published by the Free Software
> + * Foundation, either version 2 of that License or (at your option) any
> + * later version.
> + *
> + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
> + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
> + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
> + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
> + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
> + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +dma1: dma@101300 {
> +	#address-cells = <1>;
> +	#size-cells = <1>;
> +	compatible = "fsl,elo3-dma";
> +	reg = <0x101300 0x4>,
> +	      <0x101600 0x4>;
> +	ranges = <0x0 0x101100 0x500>;
> +	dma-channel@0 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x0 0x80>;
> +		interrupts = <32 2 0 0>;
> +	};
> +	dma-channel@80 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x80 0x80>;
> +		interrupts = <33 2 0 0>;
> +	};
> +	dma-channel@100 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x100 0x80>;
> +		interrupts = <34 2 0 0>;
> +	};
> +	dma-channel@180 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x180 0x80>;
> +		interrupts = <35 2 0 0>;
> +	};
> +	dma-channel@300 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x300 0x80>;
> +		interrupts = <80 2 0 0>;
> +	};
> +	dma-channel@380 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x380 0x80>;
> +		interrupts = <81 2 0 0>;
> +	};
> +	dma-channel@400 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x400 0x80>;
> +		interrupts = <82 2 0 0>;
> +	};
> +	dma-channel@480 {
> +		compatible = "fsl,eloplus-dma-channel";
> +		reg = <0x480 0x80>;
> +		interrupts = <83 2 0 0>;
> +	};
> +};
> diff --git a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
> index bd611a9..ec95c60 100644
> --- a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
> +++ b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
> @@ -387,8 +387,8 @@
>   		reg	   = <0xea000 0x4000>;
>   	};
>   
> -/include/ "qoriq-dma-0.dtsi"
> -/include/ "qoriq-dma-1.dtsi"
> +/include/ "elo3-dma-0.dtsi"
> +/include/ "elo3-dma-1.dtsi"
>   
>   /include/ "qoriq-espi-0.dtsi"
>   	spi@110000 {

^ permalink raw reply

* Re: [PATCH 5/9][v5] powerpc: implement is_instr_load_store().
From: Michael Ellerman @ 2013-10-08  3:28 UTC (permalink / raw)
  To: Tom Musta
  Cc: linux-kernel, Stephane Eranian, linuxppc-dev, Paul Mackerras,
	Arnaldo Carvalho de Melo, Sukadev Bhattiprolu, Anshuman Khandual
In-Reply-To: <524DCAE6.5090601@gmail.com>

On Thu, 2013-10-03 at 14:52 -0500, Tom Musta wrote:
> On 10/3/2013 2:03 PM, Sukadev Bhattiprolu wrote:
> > Michael Ellerman [michael@ellerman.id.au] wrote:
> <snip>
> > |
> > |         if (lower == 6)
> > |             if (upper <= 1)
> > |                 return true;
> > |             return false;
> > v

> Note that this case covers the lvsl/lvsr instructions, which, despite their
> names are not actually loads.  So you could eliminate this check and do
> just a little bit better.

Yes you're right Tom, thanks for checking.

I saw "Load" in the name and that was good enough for me :)

cheers

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox