LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v3 05/14] KVM: PPC: Add an interface for pinning guest pages in Book3s HV guests
From: Paul Mackerras @ 2011-12-12 22:28 UTC (permalink / raw)
  To: Alexander Graf; +Cc: linuxppc-dev, kvm, kvm-ppc
In-Reply-To: <20111212222347.GA18868@bloggs.ozlabs.ibm.com>

This adds two new functions, kvmppc_pin_guest_page() and
kvmppc_unpin_guest_page(), and uses them to pin the guest pages where
the guest has registered areas of memory for the hypervisor to update,
(i.e. the per-cpu virtual processor areas, SLB shadow buffers and
dispatch trace logs) and then unpin them when they are no longer
required.

Although it is not strictly necessary to pin the pages at this point,
since all guest pages are already pinned, later commits in this series
will mean that guest pages aren't all pinned.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/include/asm/kvm_book3s.h |    3 +
 arch/powerpc/kvm/book3s_64_mmu_hv.c   |   38 ++++++++++++++++++
 arch/powerpc/kvm/book3s_hv.c          |   67 ++++++++++++++++++---------------
 3 files changed, 78 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index e8c78ac..a2a89c6 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -140,6 +140,9 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
 extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
 extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu);
 extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
+extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
+			unsigned long *nb_ret);
+extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr);
 
 extern void kvmppc_entry_trampoline(void);
 extern void kvmppc_hv_entry_trampoline(void);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index e4c6069..dcd39dc 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -184,6 +184,44 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	return -ENOENT;
 }
 
+void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
+			    unsigned long *nb_ret)
+{
+	struct kvm_memory_slot *memslot;
+	unsigned long gfn = gpa >> PAGE_SHIFT;
+	struct page *page;
+	unsigned long offset;
+	unsigned long pfn, pa;
+	unsigned long *physp;
+
+	memslot = gfn_to_memslot(kvm, gfn);
+	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
+		return NULL;
+	physp = kvm->arch.slot_phys[memslot->id];
+	if (!physp)
+		return NULL;
+	physp += (gfn - memslot->base_gfn) >>
+		(kvm->arch.ram_porder - PAGE_SHIFT);
+	pa = *physp;
+	if (!pa)
+		return NULL;
+	pfn = pa >> PAGE_SHIFT;
+	page = pfn_to_page(pfn);
+	get_page(page);
+	offset = gpa & (kvm->arch.ram_psize - 1);
+	if (nb_ret)
+		*nb_ret = kvm->arch.ram_psize - offset;
+	return page_address(page) + offset;
+}
+
+void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
+{
+	struct page *page = virt_to_page(va);
+
+	page = compound_head(page);
+	put_page(page);
+}
+
 void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 86d3e4b..bd82789 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -139,12 +139,10 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
 				       unsigned long vcpuid, unsigned long vpa)
 {
 	struct kvm *kvm = vcpu->kvm;
-	unsigned long gfn, pg_index, ra, len;
-	unsigned long pg_offset;
+	unsigned long len, nb;
 	void *va;
 	struct kvm_vcpu *tvcpu;
-	struct kvm_memory_slot *memslot;
-	unsigned long *physp;
+	int err = H_PARAMETER;
 
 	tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
 	if (!tvcpu)
@@ -157,51 +155,41 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
 	if (flags < 4) {
 		if (vpa & 0x7f)
 			return H_PARAMETER;
+		if (flags >= 2 && !tvcpu->arch.vpa)
+			return H_RESOURCE;
 		/* registering new area; convert logical addr to real */
-		gfn = vpa >> PAGE_SHIFT;
-		memslot = gfn_to_memslot(kvm, gfn);
-		if (!memslot || !(memslot->flags & KVM_MEMSLOT_INVALID))
-			return H_PARAMETER;
-		physp = kvm->arch.slot_phys[memslot->id];
-		if (!physp)
-			return H_PARAMETER;
-		pg_index = (gfn - memslot->base_gfn) >>
-			(kvm->arch.ram_porder - PAGE_SHIFT);
-		pg_offset = vpa & (kvm->arch.ram_psize - 1);
-		ra = physp[pg_index];
-		if (!ra)
+		va = kvmppc_pin_guest_page(kvm, vpa, &nb);
+		if (va == NULL)
 			return H_PARAMETER;
-		ra = (ra & PAGE_MASK) | pg_offset;
-		va = __va(ra);
 		if (flags <= 1)
 			len = *(unsigned short *)(va + 4);
 		else
 			len = *(unsigned int *)(va + 4);
-		if (pg_offset + len > kvm->arch.ram_psize)
-			return H_PARAMETER;
+		if (len > nb)
+			goto out_unpin;
 		switch (flags) {
 		case 1:		/* register VPA */
 			if (len < 640)
-				return H_PARAMETER;
+				goto out_unpin;
+			if (tvcpu->arch.vpa)
+				kvmppc_unpin_guest_page(kvm, vcpu->arch.vpa);
 			tvcpu->arch.vpa = va;
 			init_vpa(vcpu, va);
 			break;
 		case 2:		/* register DTL */
 			if (len < 48)
-				return H_PARAMETER;
-			if (!tvcpu->arch.vpa)
-				return H_RESOURCE;
+				goto out_unpin;
 			len -= len % 48;
+			if (tvcpu->arch.dtl)
+				kvmppc_unpin_guest_page(kvm, vcpu->arch.dtl);
 			tvcpu->arch.dtl = va;
 			tvcpu->arch.dtl_end = va + len;
 			break;
 		case 3:		/* register SLB shadow buffer */
-			if (len < 8)
-				return H_PARAMETER;
-			if (!tvcpu->arch.vpa)
-				return H_RESOURCE;
-			tvcpu->arch.slb_shadow = va;
-			len = (len - 16) / 16;
+			if (len < 16)
+				goto out_unpin;
+			if (tvcpu->arch.slb_shadow)
+				kvmppc_unpin_guest_page(kvm, vcpu->arch.slb_shadow);
 			tvcpu->arch.slb_shadow = va;
 			break;
 		}
@@ -210,17 +198,30 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
 		case 5:		/* unregister VPA */
 			if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl)
 				return H_RESOURCE;
+			if (!tvcpu->arch.vpa)
+				break;
+			kvmppc_unpin_guest_page(kvm, tvcpu->arch.vpa);
 			tvcpu->arch.vpa = NULL;
 			break;
 		case 6:		/* unregister DTL */
+			if (!tvcpu->arch.dtl)
+				break;
+			kvmppc_unpin_guest_page(kvm, tvcpu->arch.dtl);
 			tvcpu->arch.dtl = NULL;
 			break;
 		case 7:		/* unregister SLB shadow buffer */
+			if (!tvcpu->arch.slb_shadow)
+				break;
+			kvmppc_unpin_guest_page(kvm, tvcpu->arch.slb_shadow);
 			tvcpu->arch.slb_shadow = NULL;
 			break;
 		}
 	}
 	return H_SUCCESS;
+
+ out_unpin:
+	kvmppc_unpin_guest_page(kvm, va);
+	return err;
 }
 
 int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
@@ -503,6 +504,12 @@ out:
 
 void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 {
+	if (vcpu->arch.dtl)
+		kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl);
+	if (vcpu->arch.slb_shadow)
+		kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow);
+	if (vcpu->arch.vpa)
+		kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa);
 	kvm_vcpu_uninit(vcpu);
 	kfree(vcpu);
 }
-- 
1.7.7.3

^ permalink raw reply related

* [PATCH v3 04/14] KVM: PPC: Keep page physical addresses in per-slot arrays
From: Paul Mackerras @ 2011-12-12 22:28 UTC (permalink / raw)
  To: Alexander Graf; +Cc: linuxppc-dev, kvm, kvm-ppc
In-Reply-To: <20111212222347.GA18868@bloggs.ozlabs.ibm.com>

This allocates an array for each memory slot that is added to store
the physical addresses of the pages in the slot.  This array is
vmalloc'd and accessed in kvmppc_h_enter using real_vmalloc_addr().
This allows us to remove the ram_pginfo field from the kvm_arch
struct, and removes the 64GB guest RAM limit that we had.

We use the low-order bits of the array entries to store a flag
indicating that we have done get_page on the corresponding page,
and therefore need to call put_page when we are finished with the
page.  Currently this is set for all pages except those in our
special RMO regions.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/include/asm/kvm_host.h |    9 ++-
 arch/powerpc/kvm/book3s_64_mmu_hv.c |   18 +++---
 arch/powerpc/kvm/book3s_hv.c        |  114 +++++++++++++++++------------------
 arch/powerpc/kvm/book3s_hv_rm_mmu.c |   41 +++++++++++-
 4 files changed, 107 insertions(+), 75 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 629df2e..7a17ab5 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -38,6 +38,7 @@
 #define KVM_MEMORY_SLOTS 32
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
 
 #ifdef CONFIG_KVM_MMIO
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
@@ -175,25 +176,27 @@ struct revmap_entry {
 	unsigned long guest_rpte;
 };
 
+/* Low-order bits in kvm->arch.slot_phys[][] */
+#define KVMPPC_GOT_PAGE		0x80
+
 struct kvm_arch {
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 	unsigned long hpt_virt;
 	struct revmap_entry *revmap;
-	unsigned long ram_npages;
 	unsigned long ram_psize;
 	unsigned long ram_porder;
-	struct kvmppc_pginfo *ram_pginfo;
 	unsigned int lpid;
 	unsigned int host_lpid;
 	unsigned long host_lpcr;
 	unsigned long sdr1;
 	unsigned long host_sdr1;
 	int tlbie_lock;
-	int n_rma_pages;
 	unsigned long lpcr;
 	unsigned long rmor;
 	struct kvmppc_rma_info *rma;
 	struct list_head spapr_tce_tables;
+	unsigned long *slot_phys[KVM_MEM_SLOTS_NUM];
+	int slot_npages[KVM_MEM_SLOTS_NUM];
 	unsigned short last_vcpu[NR_CPUS];
 	struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 80ece8d..e4c6069 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -98,16 +98,16 @@ void kvmppc_free_hpt(struct kvm *kvm)
 void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
 {
 	unsigned long i;
-	unsigned long npages = kvm->arch.ram_npages;
-	unsigned long pfn;
+	unsigned long npages;
+	unsigned long pa;
 	unsigned long *hpte;
 	unsigned long hash;
 	unsigned long porder = kvm->arch.ram_porder;
 	struct revmap_entry *rev;
-	struct kvmppc_pginfo *pginfo = kvm->arch.ram_pginfo;
+	unsigned long *physp;
 
-	if (!pginfo)
-		return;
+	physp = kvm->arch.slot_phys[mem->slot];
+	npages = kvm->arch.slot_npages[mem->slot];
 
 	/* VRMA can't be > 1TB */
 	if (npages > 1ul << (40 - porder))
@@ -117,9 +117,10 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
 		npages = HPT_NPTEG;
 
 	for (i = 0; i < npages; ++i) {
-		pfn = pginfo[i].pfn;
-		if (!pfn)
+		pa = physp[i];
+		if (!pa)
 			break;
+		pa &= PAGE_MASK;
 		/* can't use hpt_hash since va > 64 bits */
 		hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK;
 		/*
@@ -131,8 +132,7 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
 		hash = (hash << 3) + 7;
 		hpte = (unsigned long *) (kvm->arch.hpt_virt + (hash << 4));
 		/* HPTE low word - RPN, protection, etc. */
-		hpte[1] = (pfn << PAGE_SHIFT) | HPTE_R_R | HPTE_R_C |
-			HPTE_R_M | PP_RWXX;
+		hpte[1] = pa | HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
 		smp_wmb();
 		hpte[0] = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
 			(i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index da7db14..86d3e4b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -50,14 +50,6 @@
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
 
-/*
- * For now, limit memory to 64GB and require it to be large pages.
- * This value is chosen because it makes the ram_pginfo array be
- * 64kB in size, which is about as large as we want to be trying
- * to allocate with kmalloc.
- */
-#define MAX_MEM_ORDER		36
-
 #define LARGE_PAGE_ORDER	24	/* 16MB pages */
 
 /* #define EXIT_DEBUG */
@@ -147,10 +139,12 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
 				       unsigned long vcpuid, unsigned long vpa)
 {
 	struct kvm *kvm = vcpu->kvm;
-	unsigned long pg_index, ra, len;
+	unsigned long gfn, pg_index, ra, len;
 	unsigned long pg_offset;
 	void *va;
 	struct kvm_vcpu *tvcpu;
+	struct kvm_memory_slot *memslot;
+	unsigned long *physp;
 
 	tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
 	if (!tvcpu)
@@ -164,14 +158,20 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
 		if (vpa & 0x7f)
 			return H_PARAMETER;
 		/* registering new area; convert logical addr to real */
-		pg_index = vpa >> kvm->arch.ram_porder;
-		pg_offset = vpa & (kvm->arch.ram_psize - 1);
-		if (pg_index >= kvm->arch.ram_npages)
+		gfn = vpa >> PAGE_SHIFT;
+		memslot = gfn_to_memslot(kvm, gfn);
+		if (!memslot || !(memslot->flags & KVM_MEMSLOT_INVALID))
+			return H_PARAMETER;
+		physp = kvm->arch.slot_phys[memslot->id];
+		if (!physp)
 			return H_PARAMETER;
-		if (kvm->arch.ram_pginfo[pg_index].pfn == 0)
+		pg_index = (gfn - memslot->base_gfn) >>
+			(kvm->arch.ram_porder - PAGE_SHIFT);
+		pg_offset = vpa & (kvm->arch.ram_psize - 1);
+		ra = physp[pg_index];
+		if (!ra)
 			return H_PARAMETER;
-		ra = kvm->arch.ram_pginfo[pg_index].pfn << PAGE_SHIFT;
-		ra |= pg_offset;
+		ra = (ra & PAGE_MASK) | pg_offset;
 		va = __va(ra);
 		if (flags <= 1)
 			len = *(unsigned short *)(va + 4);
@@ -1108,12 +1108,11 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 				struct kvm_userspace_memory_region *mem)
 {
 	unsigned long psize, porder;
-	unsigned long i, npages, totalpages;
-	unsigned long pg_ix;
-	struct kvmppc_pginfo *pginfo;
+	unsigned long i, npages;
 	unsigned long hva;
 	struct kvmppc_rma_info *ri = NULL;
 	struct page *page;
+	unsigned long *phys;
 
 	/* For now, only allow 16MB pages */
 	porder = LARGE_PAGE_ORDER;
@@ -1125,20 +1124,21 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 		return -EINVAL;
 	}
 
+	/* Allocate a slot_phys array */
 	npages = mem->memory_size >> porder;
-	totalpages = (mem->guest_phys_addr + mem->memory_size) >> porder;
-
-	/* More memory than we have space to track? */
-	if (totalpages > (1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER)))
-		return -EINVAL;
+	phys = kvm->arch.slot_phys[mem->slot];
+	if (!phys) {
+		phys = vzalloc(npages * sizeof(unsigned long));
+		if (!phys)
+			return -ENOMEM;
+		kvm->arch.slot_phys[mem->slot] = phys;
+		kvm->arch.slot_npages[mem->slot] = npages;
+	}
 
 	/* Do we already have an RMA registered? */
 	if (mem->guest_phys_addr == 0 && kvm->arch.rma)
 		return -EINVAL;
 
-	if (totalpages > kvm->arch.ram_npages)
-		kvm->arch.ram_npages = totalpages;
-
 	/* Is this one of our preallocated RMAs? */
 	if (mem->guest_phys_addr == 0) {
 		struct vm_area_struct *vma;
@@ -1171,7 +1171,6 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 		}
 		atomic_inc(&ri->use_count);
 		kvm->arch.rma = ri;
-		kvm->arch.n_rma_pages = rma_size >> porder;
 
 		/* Update LPCR and RMOR */
 		lpcr = kvm->arch.lpcr;
@@ -1195,12 +1194,9 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 			ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
 	}
 
-	pg_ix = mem->guest_phys_addr >> porder;
-	pginfo = kvm->arch.ram_pginfo + pg_ix;
-	for (i = 0; i < npages; ++i, ++pg_ix) {
-		if (ri && pg_ix < kvm->arch.n_rma_pages) {
-			pginfo[i].pfn = ri->base_pfn +
-				(pg_ix << (porder - PAGE_SHIFT));
+	for (i = 0; i < npages; ++i) {
+		if (ri && i < ri->npages) {
+			phys[i] = (ri->base_pfn << PAGE_SHIFT) + (i << porder);
 			continue;
 		}
 		hva = mem->userspace_addr + (i << porder);
@@ -1216,7 +1212,7 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 			       hva, compound_order(page));
 			goto err;
 		}
-		pginfo[i].pfn = page_to_pfn(page);
+		phys[i] = (page_to_pfn(page) << PAGE_SHIFT) | KVMPPC_GOT_PAGE;
 	}
 
 	return 0;
@@ -1225,6 +1221,28 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 	return -EINVAL;
 }
 
+static void unpin_slot(struct kvm *kvm, int slot_id)
+{
+	unsigned long *physp;
+	unsigned long j, npages, pfn;
+	struct page *page;
+
+	physp = kvm->arch.slot_phys[slot_id];
+	npages = kvm->arch.slot_npages[slot_id];
+	if (physp) {
+		for (j = 0; j < npages; j++) {
+			if (!(physp[j] & KVMPPC_GOT_PAGE))
+				continue;
+			pfn = physp[j] >> PAGE_SHIFT;
+			page = pfn_to_page(pfn);
+			SetPageDirty(page);
+			put_page(page);
+		}
+		vfree(physp);
+		kvm->arch.slot_phys[slot_id] = NULL;
+	}
+}
+
 void kvmppc_core_commit_memory_region(struct kvm *kvm,
 				struct kvm_userspace_memory_region *mem)
 {
@@ -1236,8 +1254,6 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
 int kvmppc_core_init_vm(struct kvm *kvm)
 {
 	long r;
-	unsigned long npages = 1ul << (MAX_MEM_ORDER - LARGE_PAGE_ORDER);
-	long err = -ENOMEM;
 	unsigned long lpcr;
 
 	/* Allocate hashed page table */
@@ -1247,19 +1263,9 @@ int kvmppc_core_init_vm(struct kvm *kvm)
 
 	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
 
-	kvm->arch.ram_pginfo = kzalloc(npages * sizeof(struct kvmppc_pginfo),
-				       GFP_KERNEL);
-	if (!kvm->arch.ram_pginfo) {
-		pr_err("kvmppc_core_init_vm: couldn't alloc %lu bytes\n",
-		       npages * sizeof(struct kvmppc_pginfo));
-		goto out_free;
-	}
-
-	kvm->arch.ram_npages = 0;
 	kvm->arch.ram_psize = 1ul << LARGE_PAGE_ORDER;
 	kvm->arch.ram_porder = LARGE_PAGE_ORDER;
 	kvm->arch.rma = NULL;
-	kvm->arch.n_rma_pages = 0;
 
 	kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
 
@@ -1282,25 +1288,15 @@ int kvmppc_core_init_vm(struct kvm *kvm)
 	kvm->arch.lpcr = lpcr;
 
 	return 0;
-
- out_free:
-	kvmppc_free_hpt(kvm);
-	return err;
 }
 
 void kvmppc_core_destroy_vm(struct kvm *kvm)
 {
-	struct kvmppc_pginfo *pginfo;
 	unsigned long i;
 
-	if (kvm->arch.ram_pginfo) {
-		pginfo = kvm->arch.ram_pginfo;
-		kvm->arch.ram_pginfo = NULL;
-		for (i = kvm->arch.n_rma_pages; i < kvm->arch.ram_npages; ++i)
-			if (pginfo[i].pfn)
-				put_page(pfn_to_page(pginfo[i].pfn));
-		kfree(pginfo);
-	}
+	for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
+		unpin_slot(kvm, i);
+
 	if (kvm->arch.rma) {
 		kvm_release_rma(kvm->arch.rma);
 		kvm->arch.rma = NULL;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 6148493..84dae82 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -20,6 +20,25 @@
 #include <asm/synch.h>
 #include <asm/ppc-opcode.h>
 
+/*
+ * Since this file is built in even if KVM is a module, we need
+ * a local copy of this function for the case where kvm_main.c is
+ * modular.
+ */
+static struct kvm_memory_slot *builtin_gfn_to_memslot(struct kvm *kvm,
+						gfn_t gfn)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+
+	slots = kvm_memslots(kvm);
+	kvm_for_each_memslot(memslot, slots)
+		if (gfn >= memslot->base_gfn &&
+		      gfn < memslot->base_gfn + memslot->npages)
+			return memslot;
+	return NULL;
+}
+
 /* Translate address of a vmalloc'd thing to a linear map address */
 static void *real_vmalloc_addr(void *x)
 {
@@ -59,10 +78,12 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 {
 	unsigned long porder;
 	struct kvm *kvm = vcpu->kvm;
-	unsigned long i, lpn, pa;
+	unsigned long i, gfn, lpn, pa;
 	unsigned long *hpte;
 	struct revmap_entry *rev;
 	unsigned long g_ptel = ptel;
+	struct kvm_memory_slot *memslot;
+	unsigned long *physp;
 
 	/* only handle 4k, 64k and 16M pages for now */
 	porder = 12;
@@ -80,12 +101,24 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 		} else
 			return H_PARAMETER;
 	}
-	lpn = (ptel & HPTE_R_RPN) >> kvm->arch.ram_porder;
-	if (lpn >= kvm->arch.ram_npages || porder > kvm->arch.ram_porder)
+	if (porder > kvm->arch.ram_porder)
 		return H_PARAMETER;
-	pa = kvm->arch.ram_pginfo[lpn].pfn << PAGE_SHIFT;
+
+	gfn = ((ptel & HPTE_R_RPN) & ~((1ul << porder) - 1)) >> PAGE_SHIFT;
+	memslot = builtin_gfn_to_memslot(kvm, gfn);
+	if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)))
+		return H_PARAMETER;
+	physp = kvm->arch.slot_phys[memslot->id];
+	if (!physp)
+		return H_PARAMETER;
+
+	lpn = (gfn - memslot->base_gfn) >> (kvm->arch.ram_porder - PAGE_SHIFT);
+	physp = real_vmalloc_addr(physp + lpn);
+	pa = *physp;
 	if (!pa)
 		return H_PARAMETER;
+	pa &= PAGE_MASK;
+
 	/* Check WIMG */
 	if ((ptel & HPTE_R_WIMG) != HPTE_R_M &&
 	    (ptel & HPTE_R_WIMG) != (HPTE_R_W | HPTE_R_I | HPTE_R_M))
-- 
1.7.7.3

^ permalink raw reply related

* [PATCH v3 03/14] KVM: PPC: Keep a record of HV guest view of hashed page table entries
From: Paul Mackerras @ 2011-12-12 22:27 UTC (permalink / raw)
  To: Alexander Graf; +Cc: linuxppc-dev, kvm, kvm-ppc
In-Reply-To: <20111212222347.GA18868@bloggs.ozlabs.ibm.com>

This adds an array that parallels the guest hashed page table (HPT),
that is, it has one entry per HPTE, used to store the guest's view
of the second doubleword of the corresponding HPTE.  The first
doubleword in the HPTE is the same as the guest's idea of it, so we
don't need to store a copy, but the second doubleword in the HPTE has
the real page number rather than the guest's logical page number.
This allows us to remove the back_translate() and reverse_xlate()
functions.

This "reverse mapping" array is vmalloc'd, meaning that to access it
in real mode we have to walk the kernel's page tables explicitly.
That is done by the new real_vmalloc_addr() function.  (In fact this
returns an address in the linear mapping, so the result is usable
both in real mode and in virtual mode.)

There are also some minor cleanups here: moving the definitions of
HPT_ORDER etc. to a header file and defining HPT_NPTE for HPT_NPTEG << 3.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/include/asm/kvm_book3s_64.h |    8 +++
 arch/powerpc/include/asm/kvm_host.h      |   10 ++++
 arch/powerpc/kvm/book3s_64_mmu_hv.c      |   44 +++++++++++----
 arch/powerpc/kvm/book3s_hv_rm_mmu.c      |   87 ++++++++++++++++++------------
 4 files changed, 103 insertions(+), 46 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 2054e47..fa3dc79 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -35,6 +35,14 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
 
 #define SPAPR_TCE_SHIFT		12
 
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+/* For now use fixed-size 16MB page table */
+#define HPT_ORDER	24
+#define HPT_NPTEG	(1ul << (HPT_ORDER - 7))	/* 128B per pteg */
+#define HPT_NPTE	(HPT_NPTEG << 3)		/* 8 PTEs per PTEG */
+#define HPT_HASH_MASK	(HPT_NPTEG - 1)
+#endif
+
 static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 					     unsigned long pte_index)
 {
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 66c75cd..629df2e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -166,9 +166,19 @@ struct kvmppc_rma_info {
 	atomic_t 	 use_count;
 };
 
+/*
+ * The reverse mapping array has one entry for each HPTE,
+ * which stores the guest's view of the second word of the HPTE
+ * (including the guest physical address of the mapping).
+ */
+struct revmap_entry {
+	unsigned long guest_rpte;
+};
+
 struct kvm_arch {
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 	unsigned long hpt_virt;
+	struct revmap_entry *revmap;
 	unsigned long ram_npages;
 	unsigned long ram_psize;
 	unsigned long ram_porder;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index bc3a2ea..80ece8d 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -23,6 +23,7 @@
 #include <linux/gfp.h>
 #include <linux/slab.h>
 #include <linux/hugetlb.h>
+#include <linux/vmalloc.h>
 
 #include <asm/tlbflush.h>
 #include <asm/kvm_ppc.h>
@@ -33,11 +34,6 @@
 #include <asm/ppc-opcode.h>
 #include <asm/cputable.h>
 
-/* For now use fixed-size 16MB page table */
-#define HPT_ORDER	24
-#define HPT_NPTEG	(1ul << (HPT_ORDER - 7))	/* 128B per pteg */
-#define HPT_HASH_MASK	(HPT_NPTEG - 1)
-
 /* Pages in the VRMA are 16MB pages */
 #define VRMA_PAGE_ORDER	24
 #define VRMA_VSID	0x1ffffffUL	/* 1TB VSID reserved for VRMA */
@@ -51,7 +47,9 @@ long kvmppc_alloc_hpt(struct kvm *kvm)
 {
 	unsigned long hpt;
 	unsigned long lpid;
+	struct revmap_entry *rev;
 
+	/* Allocate guest's hashed page table */
 	hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|__GFP_NOWARN,
 			       HPT_ORDER - PAGE_SHIFT);
 	if (!hpt) {
@@ -60,12 +58,20 @@ long kvmppc_alloc_hpt(struct kvm *kvm)
 	}
 	kvm->arch.hpt_virt = hpt;
 
+	/* Allocate reverse map array */
+	rev = vmalloc(sizeof(struct revmap_entry) * HPT_NPTE);
+	if (!rev) {
+		pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n");
+		goto out_freehpt;
+	}
+	kvm->arch.revmap = rev;
+
+	/* Allocate the guest's logical partition ID */
 	do {
 		lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS);
 		if (lpid >= NR_LPIDS) {
 			pr_err("kvm_alloc_hpt: No LPIDs free\n");
-			free_pages(hpt, HPT_ORDER - PAGE_SHIFT);
-			return -ENOMEM;
+			goto out_freeboth;
 		}
 	} while (test_and_set_bit(lpid, lpid_inuse));
 
@@ -74,11 +80,18 @@ long kvmppc_alloc_hpt(struct kvm *kvm)
 
 	pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid);
 	return 0;
+
+ out_freeboth:
+	vfree(rev);
+ out_freehpt:
+	free_pages(hpt, HPT_ORDER - PAGE_SHIFT);
+	return -ENOMEM;
 }
 
 void kvmppc_free_hpt(struct kvm *kvm)
 {
 	clear_bit(kvm->arch.lpid, lpid_inuse);
+	vfree(kvm->arch.revmap);
 	free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
 }
 
@@ -89,14 +102,16 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
 	unsigned long pfn;
 	unsigned long *hpte;
 	unsigned long hash;
+	unsigned long porder = kvm->arch.ram_porder;
+	struct revmap_entry *rev;
 	struct kvmppc_pginfo *pginfo = kvm->arch.ram_pginfo;
 
 	if (!pginfo)
 		return;
 
 	/* VRMA can't be > 1TB */
-	if (npages > 1ul << (40 - kvm->arch.ram_porder))
-		npages = 1ul << (40 - kvm->arch.ram_porder);
+	if (npages > 1ul << (40 - porder))
+		npages = 1ul << (40 - porder);
 	/* Can't use more than 1 HPTE per HPTEG */
 	if (npages > HPT_NPTEG)
 		npages = HPT_NPTEG;
@@ -113,15 +128,20 @@ void kvmppc_map_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem)
 		 * at most one HPTE per HPTEG, we just assume entry 7
 		 * is available and use it.
 		 */
-		hpte = (unsigned long *) (kvm->arch.hpt_virt + (hash << 7));
-		hpte += 7 * 2;
+		hash = (hash << 3) + 7;
+		hpte = (unsigned long *) (kvm->arch.hpt_virt + (hash << 4));
 		/* HPTE low word - RPN, protection, etc. */
 		hpte[1] = (pfn << PAGE_SHIFT) | HPTE_R_R | HPTE_R_C |
 			HPTE_R_M | PP_RWXX;
-		wmb();
+		smp_wmb();
 		hpte[0] = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
 			(i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED |
 			HPTE_V_LARGE | HPTE_V_VALID;
+
+		/* Reverse map info */
+		rev = &kvm->arch.revmap[hash];
+		rev->guest_rpte = (i << porder) | HPTE_R_R | HPTE_R_C |
+			HPTE_R_M | PP_RWXX;
 	}
 }
 
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index bacb0cf..6148493 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -20,10 +20,19 @@
 #include <asm/synch.h>
 #include <asm/ppc-opcode.h>
 
-/* For now use fixed-size 16MB page table */
-#define HPT_ORDER	24
-#define HPT_NPTEG	(1ul << (HPT_ORDER - 7))	/* 128B per pteg */
-#define HPT_HASH_MASK	(HPT_NPTEG - 1)
+/* Translate address of a vmalloc'd thing to a linear map address */
+static void *real_vmalloc_addr(void *x)
+{
+	unsigned long addr = (unsigned long) x;
+	pte_t *p;
+
+	p = find_linux_pte(swapper_pg_dir, addr);
+	if (!p || !pte_present(*p))
+		return NULL;
+	/* assume we don't have huge pages in vmalloc space... */
+	addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
+	return __va(addr);
+}
 
 #define HPTE_V_HVLOCK	0x40UL
 
@@ -52,6 +61,8 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 	struct kvm *kvm = vcpu->kvm;
 	unsigned long i, lpn, pa;
 	unsigned long *hpte;
+	struct revmap_entry *rev;
+	unsigned long g_ptel = ptel;
 
 	/* only handle 4k, 64k and 16M pages for now */
 	porder = 12;
@@ -82,7 +93,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 	pteh &= ~0x60UL;
 	ptel &= ~(HPTE_R_PP0 - kvm->arch.ram_psize);
 	ptel |= pa;
-	if (pte_index >= (HPT_NPTEG << 3))
+	if (pte_index >= HPT_NPTE)
 		return H_PARAMETER;
 	if (likely((flags & H_EXACT) == 0)) {
 		pte_index &= ~7UL;
@@ -95,18 +106,22 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 				break;
 			hpte += 2;
 		}
+		pte_index += i;
 	} else {
-		i = 0;
 		hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
 		if (!lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID))
 			return H_PTEG_FULL;
 	}
+
+	/* Save away the guest's idea of the second HPTE dword */
+	rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+	if (rev)
+		rev->guest_rpte = g_ptel;
 	hpte[1] = ptel;
 	eieio();
 	hpte[0] = pteh;
 	asm volatile("ptesync" : : : "memory");
-	atomic_inc(&kvm->arch.ram_pginfo[lpn].refcnt);
-	vcpu->arch.gpr[4] = pte_index + i;
+	vcpu->arch.gpr[4] = pte_index;
 	return H_SUCCESS;
 }
 
@@ -138,7 +153,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
 	unsigned long *hpte;
 	unsigned long v, r, rb;
 
-	if (pte_index >= (HPT_NPTEG << 3))
+	if (pte_index >= HPT_NPTE)
 		return H_PARAMETER;
 	hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
 	while (!lock_hpte(hpte, HPTE_V_HVLOCK))
@@ -193,7 +208,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 		if (req == 3)
 			break;
 		if (req != 1 || flags == 3 ||
-		    pte_index >= (HPT_NPTEG << 3)) {
+		    pte_index >= HPT_NPTE) {
 			/* parameter error */
 			args[i * 2] = ((0xa0 | flags) << 56) + pte_index;
 			ret = H_PARAMETER;
@@ -256,9 +271,10 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 {
 	struct kvm *kvm = vcpu->kvm;
 	unsigned long *hpte;
-	unsigned long v, r, rb;
+	struct revmap_entry *rev;
+	unsigned long v, r, rb, mask, bits;
 
-	if (pte_index >= (HPT_NPTEG << 3))
+	if (pte_index >= HPT_NPTE)
 		return H_PARAMETER;
 	hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
 	while (!lock_hpte(hpte, HPTE_V_HVLOCK))
@@ -271,11 +287,21 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 	if (atomic_read(&kvm->online_vcpus) == 1)
 		flags |= H_LOCAL;
 	v = hpte[0];
-	r = hpte[1] & ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
-			HPTE_R_KEY_HI | HPTE_R_KEY_LO);
-	r |= (flags << 55) & HPTE_R_PP0;
-	r |= (flags << 48) & HPTE_R_KEY_HI;
-	r |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
+	bits = (flags << 55) & HPTE_R_PP0;
+	bits |= (flags << 48) & HPTE_R_KEY_HI;
+	bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
+
+	/* Update guest view of 2nd HPTE dword */
+	mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
+		HPTE_R_KEY_HI | HPTE_R_KEY_LO;
+	rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+	if (rev) {
+		r = (rev->guest_rpte & ~mask) | bits;
+		rev->guest_rpte = r;
+	}
+	r = (hpte[1] & ~mask) | bits;
+
+	/* Update HPTE */
 	rb = compute_tlbie_rb(v, r, pte_index);
 	hpte[0] = v & ~HPTE_V_VALID;
 	if (!(flags & H_LOCAL)) {
@@ -298,38 +324,31 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 	return H_SUCCESS;
 }
 
-static unsigned long reverse_xlate(struct kvm *kvm, unsigned long realaddr)
-{
-	long int i;
-	unsigned long offset, rpn;
-
-	offset = realaddr & (kvm->arch.ram_psize - 1);
-	rpn = (realaddr - offset) >> PAGE_SHIFT;
-	for (i = 0; i < kvm->arch.ram_npages; ++i)
-		if (rpn == kvm->arch.ram_pginfo[i].pfn)
-			return (i << PAGE_SHIFT) + offset;
-	return HPTE_R_RPN;	/* all 1s in the RPN field */
-}
-
 long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
 		   unsigned long pte_index)
 {
 	struct kvm *kvm = vcpu->kvm;
 	unsigned long *hpte, r;
 	int i, n = 1;
+	struct revmap_entry *rev = NULL;
 
-	if (pte_index >= (HPT_NPTEG << 3))
+	if (pte_index >= HPT_NPTE)
 		return H_PARAMETER;
 	if (flags & H_READ_4) {
 		pte_index &= ~3;
 		n = 4;
 	}
+	if (flags & H_R_XLATE)
+		rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
 	for (i = 0; i < n; ++i, ++pte_index) {
 		hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
 		r = hpte[1];
-		if ((flags & H_R_XLATE) && (hpte[0] & HPTE_V_VALID))
-			r = reverse_xlate(kvm, r & HPTE_R_RPN) |
-				(r & ~HPTE_R_RPN);
+		if (hpte[0] & HPTE_V_VALID) {
+			if (rev)
+				r = rev[i].guest_rpte;
+			else
+				r = hpte[1] | HPTE_R_RPN;
+		}
 		vcpu->arch.gpr[4 + i * 2] = hpte[0];
 		vcpu->arch.gpr[5 + i * 2] = r;
 	}
-- 
1.7.7.3

^ permalink raw reply related

* [PATCH v3 02/14] KVM: PPC: Move kvm_vcpu_ioctl_[gs]et_one_reg down to platform-specific code
From: Paul Mackerras @ 2011-12-12 22:26 UTC (permalink / raw)
  To: Alexander Graf; +Cc: linuxppc-dev, kvm, kvm-ppc
In-Reply-To: <20111212222347.GA18868@bloggs.ozlabs.ibm.com>

This moves the get/set_one_reg implementation down from powerpc.c into
booke.c, book3s_pr.c and book3s_hv.c.  This avoids #ifdefs in C code,
but more importantly, it fixes a bug on Book3s HV where we were
accessing beyond the end of the kvm_vcpu struct (via the to_book3s()
macro) and corrupting memory, causing random crashes and file corruption.

On Book3s HV we only accept setting the HIOR to zero, since the guest
runs in supervisor mode and its vectors are never offset from zero.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/include/asm/kvm_ppc.h |    3 ++
 arch/powerpc/kvm/book3s_hv.c       |   33 ++++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_pr.c       |   33 ++++++++++++++++++++++++++++++
 arch/powerpc/kvm/booke.c           |   10 +++++++++
 arch/powerpc/kvm/powerpc.c         |   39 ------------------------------------
 5 files changed, 79 insertions(+), 39 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 5192c2e..fc2d696 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -176,6 +176,9 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg);
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg);
+
 void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index b1e3b9c..da7db14 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -392,6 +392,39 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+	int r = -EINVAL;
+
+	switch (reg->id) {
+	case KVM_ONE_REG_PPC_HIOR:
+		reg->u.reg64 = 0;
+		r = 0;
+		break;
+	default:
+		break;
+	}
+
+	return r;
+}
+
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+	int r = -EINVAL;
+
+	switch (reg->id) {
+	case KVM_ONE_REG_PPC_HIOR:
+		/* Only allow this to be set to zero */
+		if (reg->u.reg64 == 0)
+			r = 0;
+		break;
+	default:
+		break;
+	}
+
+	return r;
+}
+
 int kvmppc_core_check_processor_compat(void)
 {
 	if (cpu_has_feature(CPU_FTR_HVMODE))
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index ae6a034..ddd92a5 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -863,6 +863,39 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+	int r = -EINVAL;
+
+	switch (reg->id) {
+	case KVM_ONE_REG_PPC_HIOR:
+		reg->u.reg64 = to_book3s(vcpu)->hior;
+		r = 0;
+		break;
+	default:
+		break;
+	}
+
+	return r;
+}
+
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+	int r = -EINVAL;
+
+	switch (reg->id) {
+	case KVM_ONE_REG_PPC_HIOR:
+		to_book3s(vcpu)->hior = reg->u.reg64;
+		to_book3s(vcpu)->hior_explicit = true;
+		r = 0;
+		break;
+	default:
+		break;
+	}
+
+	return r;
+}
+
 int kvmppc_core_check_processor_compat(void)
 {
 	return 0;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 9e41f45..ee9e1ee 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -887,6 +887,16 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	return kvmppc_core_set_sregs(vcpu, sregs);
 }
 
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+	return -EINVAL;
+}
+
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+	return -EINVAL;
+}
+
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
 	return -ENOTSUPP;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index b939b8a..69367ac 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -624,45 +624,6 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 	return r;
 }
 
-static int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
-				      struct kvm_one_reg *reg)
-{
-	int r = -EINVAL;
-
-	switch (reg->id) {
-#ifdef CONFIG_PPC_BOOK3S
-	case KVM_ONE_REG_PPC_HIOR:
-		reg->u.reg64 = to_book3s(vcpu)->hior;
-		r = 0;
-		break;
-#endif
-	default:
-		break;
-	}
-
-	return r;
-}
-
-static int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
-				      struct kvm_one_reg *reg)
-{
-	int r = -EINVAL;
-
-	switch (reg->id) {
-#ifdef CONFIG_PPC_BOOK3S
-	case KVM_ONE_REG_PPC_HIOR:
-		to_book3s(vcpu)->hior = reg->u.reg64;
-		to_book3s(vcpu)->hior_explicit = true;
-		r = 0;
-		break;
-#endif
-	default:
-		break;
-	}
-
-	return r;
-}
-
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
                                     struct kvm_mp_state *mp_state)
 {
-- 
1.7.7.3

^ permalink raw reply related

* [PATCH v3 01/14] KVM: PPC: Make wakeups work again for Book3S HV guests
From: Paul Mackerras @ 2011-12-12 22:24 UTC (permalink / raw)
  To: Alexander Graf; +Cc: linuxppc-dev, kvm, kvm-ppc
In-Reply-To: <20111212222347.GA18868@bloggs.ozlabs.ibm.com>

When commit f43fdc15fa ("KVM: PPC: booke: Improve timer register
emulation") factored out some code in arch/powerpc/kvm/powerpc.c
into a new helper function, kvm_vcpu_kick(), an error crept in
which causes Book3s HV guest vcpus to stall.  This fixes it.
On POWER7 machines, guest vcpus are grouped together into virtual
CPU cores that share a single waitqueue, so it's important to use
vcpu->arch.wqp rather than &vcpu->wq.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kvm/powerpc.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index ef8c990..b939b8a 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -561,7 +561,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
         int cpu = vcpu->cpu;

         me = get_cpu();
-	if (waitqueue_active(&vcpu->wq)) {
+	if (waitqueue_active(vcpu->arch.wqp)) {
 		wake_up_interruptible(vcpu->arch.wqp);
 		vcpu->stat.halt_wakeup++;
 	} else if (cpu != me && cpu != -1) {
-- 
1.7.7.3

^ permalink raw reply related

* [PATCH v3 00/14] KVM: PPC: Update Book3S HV memory handling
From: Paul Mackerras @ 2011-12-12 22:23 UTC (permalink / raw)
  To: Alexander Graf; +Cc: linuxppc-dev, kvm, kvm-ppc

This series of patches updates the Book3S-HV KVM code that manages the
guest hashed page table (HPT) to enable several things:

* MMIO emulation and MMIO pass-through

* Use of small pages (4kB or 64kB, depending on config) to back the
  guest memory

* Pageable guest memory - i.e. backing pages can be removed from the
  guest and reinstated on demand, using the MMU notifier mechanism

* Guests can be given read-only access to pages even though they think
  they have mapped them read/write.  When they try to write to them
  their access is upgraded to read/write.  This allows KSM to share
  pages between guests.

On PPC970 we have no way to get DSIs and ISIs to come to the
hypervisor, so we can't do MMIO emulation or pageable guest memory.
On POWER7 we set the VPM1 bit in the LPCR to make all DSIs and ISIs
come to the hypervisor (host) as HDSIs or HISIs.

This code is working well in my tests.  The sporadic crashes that I
was seeing earlier are fixed by the second patch in the series.
Somewhat to my surprise, when I implemented the last patch in the
series I started to see KSM coalescing pages without any further
effort on my part -- my tests were on a machine with Fedora 16
installed, and it has ksmtuned running by default.

This series is on top of Alex Graf's kvm-ppc-next branch.  The first
patch in my series fixes a bug in one of the patches in that branch
("KVM: PPC: booke: Improve timer register emulation").

These patches only touch arch/powerpc except for patch 12, which adds
a couple of barriers to allow mmu_notifier_retry() to be used outside
of the kvm->mmu_lock.

Paul.

^ permalink raw reply

* [PATCH v3] ipc: provide generic compat versions of IPC syscalls
From: Chris Metcalf @ 2011-12-09 15:29 UTC (permalink / raw)
  To: Arnd Bergmann, Ralf Baechle, Benjamin Herrenschmidt,
	Paul Mackerras, Martin Schwidefsky, Heiko Carstens, linux390,
	David S. Miller, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
	x86, Eric W. Biederman, Christoph Hellwig, Lucas De Marchi,
	Dmitry Torokhov, Andrew Morton, J. Bruce Fields, NeilBrown,
	linux-mips, linux-kernel, linuxppc-dev, linux-s390, sparclinux
In-Reply-To: <1690400.7yOAjHVqTH@wuerfel>

When using the "compat" APIs, architectures will generally want to
be able to make direct syscalls to msgsnd(), shmctl(), etc., and
in the kernel we would want them to be handled directly by
compat_sys_xxx() functions, as is true for other compat syscalls.

However, for historical reasons, several of the existing compat IPC
syscalls do not do this.  semctl() expects a pointer to the fourth
argument, instead of the fourth argument itself.  msgsnd(), msgrcv()
and shmat() expect arguments in different order.

This change adds an ARCH_WANT_OLD_COMPAT_IPC config option that can be
set to preserve this behavior for ports that use it (x86, sparc, powerpc,
s390, and mips).  No actual semantics are changed for those architectures,
and there is only a minimal amount of code refactoring in ipc/compat.c.

Newer architectures like tile (and perhaps future architectures such
as arm64 and unicore64) should not select this option, and thus can
avoid having any IPC-specific code at all in their architecture-specific
compat layer.  In the same vein, if this option is not selected, IPC_64
mode is assumed, since that's what the <asm-generic> headers expect.

The workaround code in "tile" for msgsnd() and msgrcv() is removed
with this change; it also fixes the bug that shmat() and semctl() were
not being properly handled.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/Kconfig                   |    3 ++
 arch/mips/Kconfig              |    1 +
 arch/powerpc/Kconfig           |    1 +
 arch/s390/Kconfig              |    1 +
 arch/sparc/Kconfig             |    1 +
 arch/tile/include/asm/compat.h |   11 ------
 arch/tile/kernel/compat.c      |   43 ------------------------
 arch/x86/Kconfig               |    1 +
 include/linux/compat.h         |   12 ++++++-
 ipc/compat.c                   |   70 ++++++++++++++++++++++++++++++++++++---
 10 files changed, 83 insertions(+), 61 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 4b0669c..dfb1e07 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -181,4 +181,7 @@ config HAVE_RCU_TABLE_FREE
 config ARCH_HAVE_NMI_SAFE_CMPXCHG
 	bool
 
+config ARCH_WANT_OLD_COMPAT_IPC
+	bool
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index d46f1da..ad2af82 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2420,6 +2420,7 @@ config MIPS32_COMPAT
 config COMPAT
 	bool
 	depends on MIPS32_COMPAT
+	select ARCH_WANT_OLD_COMPAT_IPC
 	default y
 
 config SYSVIPC_COMPAT
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 951e18f..e2be710 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -146,6 +146,7 @@ config COMPAT
 	bool
 	default y if PPC64
 	select COMPAT_BINFMT_ELF
+	select ARCH_WANT_OLD_COMPAT_IPC
 
 config SYSVIPC_COMPAT
 	bool
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 373679b..2fc3bca 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -221,6 +221,7 @@ config COMPAT
 	prompt "Kernel support for 31 bit emulation"
 	depends on 64BIT
 	select COMPAT_BINFMT_ELF
+	select ARCH_WANT_OLD_COMPAT_IPC
 	help
 	  Select this option if you want to enable your system kernel to
 	  handle system-calls from ELF binaries for 31 bit ESA.  This option
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index f92602e..846cb5c 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -577,6 +577,7 @@ config COMPAT
 	depends on SPARC64
 	default y
 	select COMPAT_BINFMT_ELF
+	select ARCH_WANT_OLD_COMPAT_IPC
 
 config SYSVIPC_COMPAT
 	bool
diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h
index bf95f55..4b4b289 100644
--- a/arch/tile/include/asm/compat.h
+++ b/arch/tile/include/asm/compat.h
@@ -242,17 +242,6 @@ long compat_sys_fallocate(int fd, int mode,
 long compat_sys_sched_rr_get_interval(compat_pid_t pid,
 				      struct compat_timespec __user *interval);
 
-/* Versions of compat functions that differ from generic Linux. */
-struct compat_msgbuf;
-long tile_compat_sys_msgsnd(int msqid,
-			    struct compat_msgbuf __user *msgp,
-			    size_t msgsz, int msgflg);
-long tile_compat_sys_msgrcv(int msqid,
-			    struct compat_msgbuf __user *msgp,
-			    size_t msgsz, long msgtyp, int msgflg);
-long tile_compat_sys_ptrace(compat_long_t request, compat_long_t pid,
-			    compat_long_t addr, compat_long_t data);
-
 /* Tilera Linux syscalls that don't have "compat" versions. */
 #define compat_sys_flush_cache sys_flush_cache
 
diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c
index bf5e9d7..d67459b 100644
--- a/arch/tile/kernel/compat.c
+++ b/arch/tile/kernel/compat.c
@@ -16,7 +16,6 @@
 #define __SYSCALL_COMPAT
 
 #include <linux/compat.h>
-#include <linux/msg.h>
 #include <linux/syscalls.h>
 #include <linux/kdev_t.h>
 #include <linux/fs.h>
@@ -95,52 +94,10 @@ long compat_sys_sched_rr_get_interval(compat_pid_t pid,
 	return ret;
 }
 
-/*
- * The usual compat_sys_msgsnd() and _msgrcv() seem to be assuming
- * some different calling convention than our normal 32-bit tile code.
- */
-
-/* Already defined in ipc/compat.c, but we need it here. */
-struct compat_msgbuf {
-	compat_long_t mtype;
-	char mtext[1];
-};
-
-long tile_compat_sys_msgsnd(int msqid,
-			    struct compat_msgbuf __user *msgp,
-			    size_t msgsz, int msgflg)
-{
-	compat_long_t mtype;
-
-	if (get_user(mtype, &msgp->mtype))
-		return -EFAULT;
-	return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg);
-}
-
-long tile_compat_sys_msgrcv(int msqid,
-			    struct compat_msgbuf __user *msgp,
-			    size_t msgsz, long msgtyp, int msgflg)
-{
-	long err, mtype;
-
-	err =  do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg);
-	if (err < 0)
-		goto out;
-
-	if (put_user(mtype, &msgp->mtype))
-		err = -EFAULT;
- out:
-	return err;
-}
-
 /* Provide the compat syscall number to call mapping. */
 #undef __SYSCALL
 #define __SYSCALL(nr, call) [nr] = (call),
 
-/* The generic versions of these don't work for Tile. */
-#define compat_sys_msgrcv tile_compat_sys_msgrcv
-#define compat_sys_msgsnd tile_compat_sys_msgsnd
-
 /* See comments in sys.c */
 #define compat_sys_fadvise64_64 sys32_fadvise64_64
 #define compat_sys_readahead sys32_readahead
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cb9a104..0e1f474 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2131,6 +2131,7 @@ config IA32_AOUT
 config COMPAT
 	def_bool y
 	depends on IA32_EMULATION
+	select ARCH_WANT_OLD_COMPAT_IPC
 
 config COMPAT_FOR_U64_ALIGNMENT
 	def_bool COMPAT
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 66ed067..f295dae 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -224,6 +224,7 @@ struct compat_sysinfo;
 struct compat_sysctl_args;
 struct compat_kexec_segment;
 struct compat_mq_attr;
+struct compat_msgbuf;
 
 extern void compat_exit_robust_list(struct task_struct *curr);
 
@@ -234,13 +235,22 @@ asmlinkage long
 compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
 			   compat_size_t __user *len_ptr);
 
+#ifdef CONFIG_ARCH_WANT_OLD_COMPAT_IPC
 long compat_sys_semctl(int first, int second, int third, void __user *uptr);
 long compat_sys_msgsnd(int first, int second, int third, void __user *uptr);
 long compat_sys_msgrcv(int first, int second, int msgtyp, int third,
 		int version, void __user *uptr);
-long compat_sys_msgctl(int first, int second, void __user *uptr);
 long compat_sys_shmat(int first, int second, compat_uptr_t third, int version,
 		void __user *uptr);
+#else
+long compat_sys_semctl(int semid, int semnum, int cmd, int arg);
+long compat_sys_msgsnd(int msqid, struct compat_msgbuf __user *msgp,
+		size_t msgsz, int msgflg);
+long compat_sys_msgrcv(int msqid, struct compat_msgbuf __user *msgp,
+		size_t msgsz, long msgtyp, int msgflg);
+long compat_sys_shmat(int shmid, compat_uptr_t shmaddr, int shmflg);
+#endif
+long compat_sys_msgctl(int first, int second, void __user *uptr);
 long compat_sys_shmctl(int first, int second, void __user *uptr);
 long compat_sys_semtimedop(int semid, struct sembuf __user *tsems,
 		unsigned nsems, const struct compat_timespec __user *timeout);
diff --git a/ipc/compat.c b/ipc/compat.c
index 845a287..a6df704 100644
--- a/ipc/compat.c
+++ b/ipc/compat.c
@@ -27,6 +27,7 @@
 #include <linux/msg.h>
 #include <linux/shm.h>
 #include <linux/syscalls.h>
+#include <linux/ptrace.h>
 
 #include <linux/mutex.h>
 #include <asm/uaccess.h>
@@ -117,6 +118,7 @@ extern int sem_ctls[];
 
 static inline int compat_ipc_parse_version(int *cmd)
 {
+#ifdef CONFIG_ARCH_WANT_OLD_COMPAT_IPC
 	int version = *cmd & IPC_64;
 
 	/* this is tricky: architectures that have support for the old
@@ -128,6 +130,10 @@ static inline int compat_ipc_parse_version(int *cmd)
 	*cmd &= ~IPC_64;
 #endif
 	return version;
+#else
+	/* With the asm-generic APIs, we always use the 64-bit versions. */
+	return IPC_64;
+#endif
 }
 
 static inline int __get_compat_ipc64_perm(struct ipc64_perm *p64,
@@ -232,10 +238,9 @@ static inline int put_compat_semid_ds(struct semid64_ds *s,
 	return err;
 }
 
-long compat_sys_semctl(int first, int second, int third, void __user *uptr)
+static long do_compat_semctl(int first, int second, int third, u32 pad)
 {
 	union semun fourth;
-	u32 pad;
 	int err, err2;
 	struct semid64_ds s64;
 	struct semid64_ds __user *up64;
@@ -243,10 +248,6 @@ long compat_sys_semctl(int first, int second, int third, void __user *uptr)
 
 	memset(&s64, 0, sizeof(s64));
 
-	if (!uptr)
-		return -EINVAL;
-	if (get_user(pad, (u32 __user *) uptr))
-		return -EFAULT;
 	if ((third & (~IPC_64)) == SETVAL)
 		fourth.val = (int) pad;
 	else
@@ -305,6 +306,18 @@ long compat_sys_semctl(int first, int second, int third, void __user *uptr)
 	return err;
 }
 
+#ifdef CONFIG_ARCH_WANT_OLD_COMPAT_IPC
+long compat_sys_semctl(int first, int second, int third, void __user *uptr)
+{
+	u32 pad;
+
+	if (!uptr)
+		return -EINVAL;
+	if (get_user(pad, (u32 __user *) uptr))
+		return -EFAULT;
+	return do_compat_semctl(first, second, third, pad);
+}
+
 long compat_sys_msgsnd(int first, int second, int third, void __user *uptr)
 {
 	struct compat_msgbuf __user *up = uptr;
@@ -353,6 +366,37 @@ long compat_sys_msgrcv(int first, int second, int msgtyp, int third,
 out:
 	return err;
 }
+#else
+long compat_sys_semctl(int semid, int semnum, int cmd, int arg)
+{
+	return do_compat_semctl(semid, semnum, cmd, arg);
+}
+
+long compat_sys_msgsnd(int msqid, struct compat_msgbuf __user *msgp,
+		       size_t msgsz, int msgflg)
+{
+	compat_long_t mtype;
+
+	if (get_user(mtype, &msgp->mtype))
+		return -EFAULT;
+	return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg);
+}
+
+long compat_sys_msgrcv(int msqid, struct compat_msgbuf __user *msgp,
+		       size_t msgsz, long msgtyp, int msgflg)
+{
+	long err, mtype;
+
+	err =  do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg);
+	if (err < 0)
+		goto out;
+
+	if (put_user(mtype, &msgp->mtype))
+		err = -EFAULT;
+ out:
+	return err;
+}
+#endif
 
 static inline int get_compat_msqid64(struct msqid64_ds *m64,
 				     struct compat_msqid64_ds __user *up64)
@@ -470,6 +514,7 @@ long compat_sys_msgctl(int first, int second, void __user *uptr)
 	return err;
 }
 
+#ifdef CONFIG_ARCH_WANT_OLD_COMPAT_IPC
 long compat_sys_shmat(int first, int second, compat_uptr_t third, int version,
 			void __user *uptr)
 {
@@ -485,6 +530,19 @@ long compat_sys_shmat(int first, int second, compat_uptr_t third, int version,
 	uaddr = compat_ptr(third);
 	return put_user(raddr, uaddr);
 }
+#else
+long compat_sys_shmat(int shmid, compat_uptr_t shmaddr, int shmflg)
+{
+	unsigned long ret;
+	long err;
+
+	err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret);
+	if (err)
+		return err;
+	force_successful_syscall_return();
+	return (long)ret;
+}
+#endif
 
 static inline int get_compat_shmid64_ds(struct shmid64_ds *s64,
 					struct compat_shmid64_ds __user *up64)
-- 
1.6.5.2

^ permalink raw reply related

* Re: [PATCH 3/3] mtd/nand : workaround for Freescale FCM to support large-page Nand chip
From: Scott Wood @ 2011-12-12 21:30 UTC (permalink / raw)
  To: dedekind1
  Cc: Artem.Bityutskiy, linuxppc-dev, linux-kernel, shuo.liu, linux-mtd,
	akpm, dwmw2
In-Reply-To: <1323724784.2297.20.camel@koala>

On 12/12/2011 03:19 PM, Artem Bityutskiy wrote:
> On Mon, 2011-12-12 at 15:15 -0600, Scott Wood wrote:
>> NAND chips come from the factory with bad blocks marked at a certain
>> offset into each page.  This offset is normally in the OOB area, but
>> since we change the layout from "4k data, 128 byte oob" to "2k data, 64
>> byte oob, 2k data, 64 byte oob" the marker is no longer in the oob.  On
>> first use we need to migrate the markers so that they are still in the oob.
> 
> Ah, I see, thanks. Are you planning to implement in-kernel migration or
> use a user-space tool?

That's the kind of answer I was hoping to get from Shuo. :-)

Most likely is a firmware-based tool, but I'd like there to be some way
for the tool to mark that this has happened, so that the Linux driver
can refuse to do non-raw accesses to a chip that isn't marked as having
been migrated (or at least yell loudly in the log).

Speaking of raw accesses, these are currently broken in the eLBC
driver... we need some way for the generic layer to tell us what kind of
access it is before the transaction starts, not once it wants to read
out the buffer (unless we add more hacks to delay the start of a read
transaction until first buffer access...).  We'd be better off with a
high-level "read page/write page" function that does the whole thing
(not just buffer access, but command issuance as well).

-Scott

^ permalink raw reply

* Re: [PATCH 3/3] mtd/nand : workaround for Freescale FCM to support large-page Nand chip
From: Artem Bityutskiy @ 2011-12-12 21:19 UTC (permalink / raw)
  To: Scott Wood
  Cc: Artem.Bityutskiy, linuxppc-dev, linux-kernel, shuo.liu, linux-mtd,
	akpm, dwmw2
In-Reply-To: <4EE66EFE.1050608@freescale.com>

On Mon, 2011-12-12 at 15:15 -0600, Scott Wood wrote:
> NAND chips come from the factory with bad blocks marked at a certain
> offset into each page.  This offset is normally in the OOB area, but
> since we change the layout from "4k data, 128 byte oob" to "2k data, 64
> byte oob, 2k data, 64 byte oob" the marker is no longer in the oob.  On
> first use we need to migrate the markers so that they are still in the oob.

Ah, I see, thanks. Are you planning to implement in-kernel migration or
use a user-space tool?

Artem.

^ permalink raw reply

* Re: [PATCH 3/3] mtd/nand : workaround for Freescale FCM to support large-page Nand chip
From: Scott Wood @ 2011-12-12 21:15 UTC (permalink / raw)
  To: dedekind1
  Cc: Artem.Bityutskiy, linuxppc-dev, linux-kernel, shuo.liu, linux-mtd,
	akpm, dwmw2
In-Reply-To: <1323724195.2297.11.camel@koala>

On 12/12/2011 03:09 PM, Artem Bityutskiy wrote:
> On Tue, 2011-12-06 at 18:09 -0600, Scott Wood wrote:
>> On 12/03/2011 10:31 PM, shuo.liu@freescale.com wrote:
>>> From: Liu Shuo <shuo.liu@freescale.com>
>>>
>>> Freescale FCM controller has a 2K size limitation of buffer RAM. In order
>>> to support the Nand flash chip whose page size is larger than 2K bytes,
>>> we read/write 2k data repeatedly by issuing FIR_OP_RB/FIR_OP_WB and save
>>> them to a large buffer.
>>>
>>> Signed-off-by: Liu Shuo <shuo.liu@freescale.com>
>>> ---
>>> v3:
>>>     -remove page_size of struct fsl_elbc_mtd.
>>>     -do a oob write by NAND_CMD_RNDIN. 
>>>
>>>  drivers/mtd/nand/fsl_elbc_nand.c |  243 ++++++++++++++++++++++++++++++++++----
>>>  1 files changed, 218 insertions(+), 25 deletions(-)
>>
>> What is the plan for bad block marker migration?
> 
> Why it should be migrated? I thought that you support 2KiB pages, and
> this adds 4 and 8 KiB pages support, which you never supported before.
> What is the migration you guys are talking about?

NAND chips come from the factory with bad blocks marked at a certain
offset into each page.  This offset is normally in the OOB area, but
since we change the layout from "4k data, 128 byte oob" to "2k data, 64
byte oob, 2k data, 64 byte oob" the marker is no longer in the oob.  On
first use we need to migrate the markers so that they are still in the oob.

-Scott

^ permalink raw reply

* Re: [PATCH 3/3] mtd/nand : workaround for Freescale FCM to support large-page Nand chip
From: Artem Bityutskiy @ 2011-12-12 21:09 UTC (permalink / raw)
  To: Scott Wood
  Cc: Artem.Bityutskiy, linuxppc-dev, linux-kernel, shuo.liu, linux-mtd,
	akpm, dwmw2
In-Reply-To: <4EDEAEB9.6020703@freescale.com>

On Tue, 2011-12-06 at 18:09 -0600, Scott Wood wrote:
> On 12/03/2011 10:31 PM, shuo.liu@freescale.com wrote:
> > From: Liu Shuo <shuo.liu@freescale.com>
> > 
> > Freescale FCM controller has a 2K size limitation of buffer RAM. In order
> > to support the Nand flash chip whose page size is larger than 2K bytes,
> > we read/write 2k data repeatedly by issuing FIR_OP_RB/FIR_OP_WB and save
> > them to a large buffer.
> > 
> > Signed-off-by: Liu Shuo <shuo.liu@freescale.com>
> > ---
> > v3:
> >     -remove page_size of struct fsl_elbc_mtd.
> >     -do a oob write by NAND_CMD_RNDIN. 
> > 
> >  drivers/mtd/nand/fsl_elbc_nand.c |  243 ++++++++++++++++++++++++++++++++++----
> >  1 files changed, 218 insertions(+), 25 deletions(-)
> 
> What is the plan for bad block marker migration?

Why it should be migrated? I thought that you support 2KiB pages, and
this adds 4 and 8 KiB pages support, which you never supported before.
What is the migration you guys are talking about?

Artem.

^ permalink raw reply

* Re: [PATCH 1/2] mtd/nand : set Nand flash page address to FBAR and FPAR correctly
From: Artem Bityutskiy @ 2011-12-12 21:04 UTC (permalink / raw)
  To: shuo.liu
  Cc: Artem.Bityutskiy, linuxppc-dev, linux-kernel, linux-mtd,
	scottwood, akpm, dwmw2
In-Reply-To: <1323423775-26951-1-git-send-email-shuo.liu@freescale.com>

On Fri, 2011-12-09 at 17:42 +0800, shuo.liu@freescale.com wrote:
> From: Liu Shuo <b35362@freescale.com>
> 
> If we use the Nand flash chip whose number of pages in a block is greater
> than 64(for large page), we must treat the low bit of FBAR as being the
> high bit of the page address due to the limitation of FCM, it simply uses
> the low 6-bits (for large page) of the combined block/page address as the
> FPAR component, rather than considering the actual block size.

Pushed this one to l2-mtd-2.6.git, thanks!

Artem.

^ permalink raw reply

* [PATCH 4/4] of: Add device tree selftests
From: Grant Likely @ 2011-12-12 20:14 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, devicetree-discuss; +Cc: microblaze-uclinux
In-Reply-To: <1323720852-26311-1-git-send-email-grant.likely@secretlab.ca>

Add some runtime test cases for the library of device tree parsing functions.

v2: - Add testcase for phandle with 0 args
    - Don't run testcases if testcase data isn't present in device tree

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/arm/boot/dts/testcases/tests-phandle.dtsi |   37 +++++++
 arch/arm/boot/dts/testcases/tests.dtsi         |    1 +
 arch/arm/boot/dts/versatile-pb.dts             |    2 +
 drivers/of/Kconfig                             |    9 ++
 drivers/of/Makefile                            |    1 +
 drivers/of/selftest.c                          |  139 ++++++++++++++++++++++++
 6 files changed, 189 insertions(+), 0 deletions(-)
 create mode 100644 arch/arm/boot/dts/testcases/tests-phandle.dtsi
 create mode 100644 arch/arm/boot/dts/testcases/tests.dtsi
 create mode 100644 drivers/of/selftest.c

diff --git a/arch/arm/boot/dts/testcases/tests-phandle.dtsi b/arch/arm/boot/dts/testcases/tests-phandle.dtsi
new file mode 100644
index 0000000..ec0c4e6
--- /dev/null
+++ b/arch/arm/boot/dts/testcases/tests-phandle.dtsi
@@ -0,0 +1,37 @@
+
+/ {
+	testcase-data {
+		phandle-tests {
+			provider0: provider0 {
+				#phandle-cells = <0>;
+			};
+
+			provider1: provider1 {
+				#phandle-cells = <1>;
+			};
+
+			provider2: provider2 {
+				#phandle-cells = <2>;
+			};
+
+			provider3: provider3 {
+				#phandle-cells = <3>;
+			};
+
+			consumer-a {
+				phandle-list =	<&provider1 1>,
+						<&provider2 2 0>,
+						<0>,
+						<&provider3 4 4 3>,
+						<&provider2 5 100>,
+						<&provider0>,
+						<&provider1 7>;
+				phandle-list-names = "first", "second", "third";
+
+				phandle-list-bad-phandle = <12345678 0 0>;
+				phandle-list-bad-args = <&provider2 1 0>,
+							<&provider3 0>;
+			};
+		};
+	};
+};
diff --git a/arch/arm/boot/dts/testcases/tests.dtsi b/arch/arm/boot/dts/testcases/tests.dtsi
new file mode 100644
index 0000000..a7c5067
--- /dev/null
+++ b/arch/arm/boot/dts/testcases/tests.dtsi
@@ -0,0 +1 @@
+/include/ "tests-phandle.dtsi"
diff --git a/arch/arm/boot/dts/versatile-pb.dts b/arch/arm/boot/dts/versatile-pb.dts
index 8a614e3..1664610 100644
--- a/arch/arm/boot/dts/versatile-pb.dts
+++ b/arch/arm/boot/dts/versatile-pb.dts
@@ -46,3 +46,5 @@
 		};
 	};
 };
+
+/include/ "testcases/tests.dtsi"
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index cac63c9..268163d 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -15,6 +15,15 @@ config PROC_DEVICETREE
 	  an image of the device tree that the kernel copies from Open
 	  Firmware or other boot firmware. If unsure, say Y here.
 
+config OF_SELFTEST
+	bool "Device Tree Runtime self tests"
+	help
+	  This option builds in test cases for the device tree infrastructure
+	  that are executed one at boot time, and the results dumped to the
+	  console.
+
+	  If unsure, say N here, but this option is safe to enable.
+
 config OF_FLATTREE
 	bool
 	select DTC
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index dccb117..a73f5a5 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_OF_GPIO)   += gpio.o
 obj-$(CONFIG_OF_I2C)	+= of_i2c.o
 obj-$(CONFIG_OF_NET)	+= of_net.o
 obj-$(CONFIG_OF_SPI)	+= of_spi.o
+obj-$(CONFIG_OF_SELFTEST) += selftest.o
 obj-$(CONFIG_OF_MDIO)	+= of_mdio.o
 obj-$(CONFIG_OF_PCI)	+= of_pci.o
 obj-$(CONFIG_OF_PCI_IRQ)  += of_pci_irq.o
diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c
new file mode 100644
index 0000000..9d2b480
--- /dev/null
+++ b/drivers/of/selftest.c
@@ -0,0 +1,139 @@
+/*
+ * Self tests for device tree subsystem
+ */
+
+#define pr_fmt(fmt) "### %s(): " fmt, __func__
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+
+static bool selftest_passed = true;
+#define selftest(result, fmt, ...) { \
+	selftest_passed &= (result); \
+	if (!(result)) \
+		pr_err("FAIL %s:%i " fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
+}
+
+static void __init of_selftest_parse_phandle_with_args(void)
+{
+	struct device_node *np;
+	struct of_phandle_args args;
+	int rc, i;
+	bool passed_all = true;
+
+	pr_info("start\n");
+	np = of_find_node_by_path("/testcase-data/phandle-tests/consumer-a");
+	if (!np) {
+		pr_err("missing testcase data\n");
+		return;
+	}
+
+	for (i = 0; i < 7; i++) {
+		bool passed = true;
+		rc = of_parse_phandle_with_args(np, "phandle-list",
+						"#phandle-cells", i, &args);
+
+		/* Test the values from tests-phandle.dtsi */
+		switch (i) {
+		case 0:
+			passed &= !rc;
+			passed &= (args.args_count == 1);
+			passed &= (args.args[0] == (i + 1));
+			break;
+		case 1:
+			passed &= !rc;
+			passed &= (args.args_count == 2);
+			passed &= (args.args[0] == (i + 1));
+			passed &= (args.args[1] == 0);
+			break;
+		case 2:
+			passed &= (rc == -ENOENT);
+			break;
+		case 3:
+			passed &= !rc;
+			passed &= (args.args_count == 3);
+			passed &= (args.args[0] == (i + 1));
+			passed &= (args.args[1] == 4);
+			passed &= (args.args[2] == 3);
+			break;
+		case 4:
+			passed &= !rc;
+			passed &= (args.args_count == 2);
+			passed &= (args.args[0] == (i + 1));
+			passed &= (args.args[1] == 100);
+			break;
+		case 5:
+			passed &= !rc;
+			passed &= (args.args_count == 0);
+			break;
+		case 6:
+			passed &= !rc;
+			passed &= (args.args_count == 1);
+			passed &= (args.args[0] == (i + 1));
+			break;
+		case 7:
+			passed &= (rc == -EINVAL);
+			break;
+		default:
+			passed = false;
+		}
+
+		if (!passed) {
+			int j;
+			pr_err("index %i - data error on node %s rc=%i regs=[",
+				i, args.np->full_name, rc);
+			for (j = 0; j < args.args_count; j++)
+				printk(" %i", args.args[j]);
+			printk(" ]\n");
+
+			passed_all = false;
+		}
+	}
+
+	/* Check for missing list property */
+	rc = of_parse_phandle_with_args(np, "phandle-list-missing",
+					"#phandle-cells", 0, &args);
+	passed_all &= (rc == -EINVAL);
+
+	/* Check for missing cells property */
+	rc = of_parse_phandle_with_args(np, "phandle-list",
+					"#phandle-cells-missing", 0, &args);
+	passed_all &= (rc == -EINVAL);
+
+	/* Check for bad phandle in list */
+	rc = of_parse_phandle_with_args(np, "phandle-list-bad-phandle",
+					"#phandle-cells", 0, &args);
+	passed_all &= (rc == -EINVAL);
+
+	/* Check for incorrectly formed argument list */
+	rc = of_parse_phandle_with_args(np, "phandle-list-bad-args",
+					"#phandle-cells", 1, &args);
+	passed_all &= (rc == -EINVAL);
+
+	pr_info("end - %s\n", passed_all ? "PASS" : "FAIL");
+}
+
+static int __init of_selftest(void)
+{
+	struct device_node *np;
+
+	np = of_find_node_by_path("/testcase-data/phandle-tests/consumer-a");
+	if (!np) {
+		pr_info("No testcase data in device tree; not running tests\n");
+		return 0;
+	}
+	of_node_put(np);
+
+	pr_info("start of selftest - you will see error messages\n");
+	of_selftest_parse_phandle_with_args();
+	pr_info("end of selftest - %s\n", selftest_passed ? "PASS" : "FAIL");
+	return 0;
+}
+late_initcall(of_selftest);
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 1/4] gpio/microblaze: Eliminate duplication of of_get_named_gpio_flags()
From: Grant Likely @ 2011-12-12 20:14 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, devicetree-discuss
  Cc: microblaze-uclinux, Michal Simek
In-Reply-To: <1323720852-26311-1-git-send-email-grant.likely@secretlab.ca>

of_reset_gpio_handle() is largely a cut-and-paste copy of
of_get_named_gpio_flags(). There really isn't any reason for the
split, so this patch deletes the duplicate function

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Cc: Michal Simek <monstr@monstr.eu>
---
 arch/microblaze/kernel/reset.c |   43 +--------------------------------------
 1 files changed, 2 insertions(+), 41 deletions(-)

diff --git a/arch/microblaze/kernel/reset.c b/arch/microblaze/kernel/reset.c
index bd8ccab..88a0163 100644
--- a/arch/microblaze/kernel/reset.c
+++ b/arch/microblaze/kernel/reset.c
@@ -19,50 +19,11 @@
 static int handle; /* reset pin handle */
 static unsigned int reset_val;
 
-static int of_reset_gpio_handle(void)
-{
-	int ret; /* variable which stored handle reset gpio pin */
-	struct device_node *root; /* root node */
-	struct device_node *gpio; /* gpio node */
-	struct gpio_chip *gc;
-	u32 flags;
-	const void *gpio_spec;
-
-	/* find out root node */
-	root = of_find_node_by_path("/");
-
-	/* give me handle for gpio node to be possible allocate pin */
-	ret = of_parse_phandles_with_args(root, "hard-reset-gpios",
-				"#gpio-cells", 0, &gpio, &gpio_spec);
-	if (ret) {
-		pr_debug("%s: can't parse gpios property\n", __func__);
-		goto err0;
-	}
-
-	gc = of_node_to_gpiochip(gpio);
-	if (!gc) {
-		pr_debug("%s: gpio controller %s isn't registered\n",
-			 root->full_name, gpio->full_name);
-		ret = -ENODEV;
-		goto err1;
-	}
-
-	ret = gc->of_xlate(gc, root, gpio_spec, &flags);
-	if (ret < 0)
-		goto err1;
-
-	ret += gc->base;
-err1:
-	of_node_put(gpio);
-err0:
-	pr_debug("%s exited with status %d\n", __func__, ret);
-	return ret;
-}
-
 void of_platform_reset_gpio_probe(void)
 {
 	int ret;
-	handle = of_reset_gpio_handle();
+	handle = of_get_named_gpio(of_find_node_by_path("/"),
+				   "hard-reset-gpios", 0);
 
 	if (!gpio_is_valid(handle)) {
 		printk(KERN_INFO "Skipping unavailable RESET gpio %d (%s)\n",
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 3/4] of: create of_phandle_args to simplify return of phandle parsing data
From: Grant Likely @ 2011-12-12 20:14 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, devicetree-discuss; +Cc: microblaze-uclinux
In-Reply-To: <1323720852-26311-1-git-send-email-grant.likely@secretlab.ca>

of_parse_phandle_with_args() needs to return quite a bit of data.  Rather
than making each datum a separate **out_ argument, this patch creates
struct of_phandle_args to contain all the returned data and reworks the
user of the function.  This patch also enables of_parse_phandle_with_args()
to return the device node pointer for the phandle node.

This patch also ends up being fairly major surgery to
of_parse_handle_with_args().  The existing structure didn't work well
when extending to use of_phandle_args, and I discovered bugs during testing.
I also took the opportunity to rename the function to be like the
existing of_parse_phandle().

v2: - moved declaration of of_phandle_args to fix compile on non-DT builds
    - fixed incorrect index in example usage
    - fixed incorrect return code handling for empty entries

Reviewed-by: Shawn Guo <shawn.guo@freescale.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 drivers/of/base.c          |  146 ++++++++++++++++++++++---------------------
 drivers/of/gpio.c          |   43 ++++++-------
 include/asm-generic/gpio.h |    5 +-
 include/linux/of.h         |   11 +++-
 include/linux/of_gpio.h    |   10 ++-
 5 files changed, 112 insertions(+), 103 deletions(-)

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 9b6588e..c6db9ab 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -824,17 +824,19 @@ of_parse_phandle(struct device_node *np, const char *phandle_name, int index)
 EXPORT_SYMBOL(of_parse_phandle);
 
 /**
- * of_parse_phandles_with_args - Find a node pointed by phandle in a list
+ * of_parse_phandle_with_args() - Find a node pointed by phandle in a list
  * @np:		pointer to a device tree node containing a list
  * @list_name:	property name that contains a list
  * @cells_name:	property name that specifies phandles' arguments count
  * @index:	index of a phandle to parse out
- * @out_node:	optional pointer to device_node struct pointer (will be filled)
- * @out_args:	optional pointer to arguments pointer (will be filled)
+ * @out_args:	optional pointer to output arguments structure (will be filled)
  *
  * This function is useful to parse lists of phandles and their arguments.
- * Returns 0 on success and fills out_node and out_args, on error returns
- * appropriate errno value.
+ * Returns 0 on success and fills out_args, on error returns appropriate
+ * errno value.
+ *
+ * Caller is responsible to call of_node_put() on the returned out_args->node
+ * pointer.
  *
  * Example:
  *
@@ -851,94 +853,96 @@ EXPORT_SYMBOL(of_parse_phandle);
  * }
  *
  * To get a device_node of the `node2' node you may call this:
- * of_parse_phandles_with_args(node3, "list", "#list-cells", 2, &node2, &args);
+ * of_parse_phandle_with_args(node3, "list", "#list-cells", 1, &args);
  */
-int of_parse_phandles_with_args(struct device_node *np, const char *list_name,
+int of_parse_phandle_with_args(struct device_node *np, const char *list_name,
 				const char *cells_name, int index,
-				struct device_node **out_node,
-				const void **out_args)
+				struct of_phandle_args *out_args)
 {
-	int ret = -EINVAL;
-	const __be32 *list;
-	const __be32 *list_end;
-	int size;
-	int cur_index = 0;
+	const __be32 *list, *list_end;
+	int size, cur_index = 0;
+	uint32_t count = 0;
 	struct device_node *node = NULL;
-	const void *args = NULL;
+	phandle phandle;
 
+	/* Retrieve the phandle list property */
 	list = of_get_property(np, list_name, &size);
-	if (!list) {
-		ret = -ENOENT;
-		goto err0;
-	}
+	if (!list)
+		return -EINVAL;
 	list_end = list + size / sizeof(*list);
 
+	/* Loop over the phandles until all the requested entry is found */
 	while (list < list_end) {
-		const __be32 *cells;
-		phandle phandle;
+		count = 0;
 
+		/*
+		 * If phandle is 0, then it is an empty entry with no
+		 * arguments.  Skip forward to the next entry.
+		 */
 		phandle = be32_to_cpup(list++);
-		args = list;
-
-		/* one cell hole in the list = <>; */
-		if (!phandle)
-			goto next;
-
-		node = of_find_node_by_phandle(phandle);
-		if (!node) {
-			pr_debug("%s: could not find phandle\n",
-				 np->full_name);
-			goto err0;
-		}
+		if (phandle) {
+			/*
+			 * Find the provider node and parse the #*-cells
+			 * property to determine the argument length
+			 */
+			node = of_find_node_by_phandle(phandle);
+			if (!node) {
+				pr_err("%s: could not find phandle\n",
+					 np->full_name);
+				break;
+			}
+			if (of_property_read_u32(node, cells_name, &count)) {
+				pr_err("%s: could not get %s for %s\n",
+					 np->full_name, cells_name,
+					 node->full_name);
+				break;
+			}
 
-		cells = of_get_property(node, cells_name, &size);
-		if (!cells || size != sizeof(*cells)) {
-			pr_debug("%s: could not get %s for %s\n",
-				 np->full_name, cells_name, node->full_name);
-			goto err1;
+			/*
+			 * Make sure that the arguments actually fit in the
+			 * remaining property data length
+			 */
+			if (list + count > list_end) {
+				pr_err("%s: arguments longer than property\n",
+					 np->full_name);
+				break;
+			}
 		}
 
-		list += be32_to_cpup(cells);
-		if (list > list_end) {
-			pr_debug("%s: insufficient arguments length\n",
-				 np->full_name);
-			goto err1;
+		/*
+		 * All of the error cases above bail out of the loop, so at
+		 * this point, the parsing is successful. If the requested
+		 * index matches, then fill the out_args structure and return,
+		 * or return -ENOENT for an empty entry.
+		 */
+		if (cur_index == index) {
+			if (!phandle)
+				return -ENOENT;
+
+			if (out_args) {
+				int i;
+				if (WARN_ON(count > MAX_PHANDLE_ARGS))
+					count = MAX_PHANDLE_ARGS;
+				out_args->np = node;
+				out_args->args_count = count;
+				for (i = 0; i < count; i++)
+					out_args->args[i] = be32_to_cpup(list++);
+			}
+			return 0;
 		}
-next:
-		if (cur_index == index)
-			break;
 
 		of_node_put(node);
 		node = NULL;
-		args = NULL;
+		list += count;
 		cur_index++;
 	}
 
-	if (!node) {
-		/*
-		 * args w/o node indicates that the loop above has stopped at
-		 * the 'hole' cell. Report this differently.
-		 */
-		if (args)
-			ret = -EEXIST;
-		else
-			ret = -ENOENT;
-		goto err0;
-	}
-
-	if (out_node)
-		*out_node = node;
-	if (out_args)
-		*out_args = args;
-
-	return 0;
-err1:
-	of_node_put(node);
-err0:
-	pr_debug("%s failed with status %d\n", __func__, ret);
-	return ret;
+	/* Loop exited without finding a valid entry; return an error */
+	if (node)
+		of_node_put(node);
+	return -EINVAL;
 }
-EXPORT_SYMBOL(of_parse_phandles_with_args);
+EXPORT_SYMBOL(of_parse_phandle_with_args);
 
 /**
  * prom_add_property - Add a property to a node
diff --git a/drivers/of/gpio.c b/drivers/of/gpio.c
index ef0105f..244a2b0 100644
--- a/drivers/of/gpio.c
+++ b/drivers/of/gpio.c
@@ -35,32 +35,27 @@ int of_get_named_gpio_flags(struct device_node *np, const char *propname,
                            int index, enum of_gpio_flags *flags)
 {
 	int ret;
-	struct device_node *gpio_np;
 	struct gpio_chip *gc;
-	int size;
-	const void *gpio_spec;
-	const __be32 *gpio_cells;
+	struct of_phandle_args gpiospec;
 
-	ret = of_parse_phandles_with_args(np, propname, "#gpio-cells", index,
-					  &gpio_np, &gpio_spec);
+	ret = of_parse_phandle_with_args(np, propname, "#gpio-cells", index,
+					 &gpiospec);
 	if (ret) {
 		pr_debug("%s: can't parse gpios property\n", __func__);
 		goto err0;
 	}
 
-	gc = of_node_to_gpiochip(gpio_np);
+	gc = of_node_to_gpiochip(gpiospec.np);
 	if (!gc) {
 		pr_debug("%s: gpio controller %s isn't registered\n",
-			 np->full_name, gpio_np->full_name);
+			 np->full_name, gpiospec.np->full_name);
 		ret = -ENODEV;
 		goto err1;
 	}
 
-	gpio_cells = of_get_property(gpio_np, "#gpio-cells", &size);
-	if (!gpio_cells || size != sizeof(*gpio_cells) ||
-			be32_to_cpup(gpio_cells) != gc->of_gpio_n_cells) {
+	if (gpiospec.args_count != gc->of_gpio_n_cells) {
 		pr_debug("%s: wrong #gpio-cells for %s\n",
-			 np->full_name, gpio_np->full_name);
+			 np->full_name, gpiospec.np->full_name);
 		ret = -EINVAL;
 		goto err1;
 	}
@@ -69,13 +64,13 @@ int of_get_named_gpio_flags(struct device_node *np, const char *propname,
 	if (flags)
 		*flags = 0;
 
-	ret = gc->of_xlate(gc, np, gpio_spec, flags);
+	ret = gc->of_xlate(gc, &gpiospec, flags);
 	if (ret < 0)
 		goto err1;
 
 	ret += gc->base;
 err1:
-	of_node_put(gpio_np);
+	of_node_put(gpiospec.np);
 err0:
 	pr_debug("%s exited with status %d\n", __func__, ret);
 	return ret;
@@ -105,8 +100,8 @@ unsigned int of_gpio_count(struct device_node *np)
 	do {
 		int ret;
 
-		ret = of_parse_phandles_with_args(np, "gpios", "#gpio-cells",
-						  cnt, NULL, NULL);
+		ret = of_parse_phandle_with_args(np, "gpios", "#gpio-cells",
+						 cnt, NULL);
 		/* A hole in the gpios = <> counts anyway. */
 		if (ret < 0 && ret != -EEXIST)
 			break;
@@ -127,12 +122,9 @@ EXPORT_SYMBOL(of_gpio_count);
  * gpio chips. This function performs only one sanity check: whether gpio
  * is less than ngpios (that is specified in the gpio_chip).
  */
-int of_gpio_simple_xlate(struct gpio_chip *gc, struct device_node *np,
-			 const void *gpio_spec, u32 *flags)
+int of_gpio_simple_xlate(struct gpio_chip *gc,
+			 const struct of_phandle_args *gpiospec, u32 *flags)
 {
-	const __be32 *gpio = gpio_spec;
-	const u32 n = be32_to_cpup(gpio);
-
 	/*
 	 * We're discouraging gpio_cells < 2, since that way you'll have to
 	 * write your own xlate function (that will have to retrive the GPIO
@@ -144,13 +136,16 @@ int of_gpio_simple_xlate(struct gpio_chip *gc, struct device_node *np,
 		return -EINVAL;
 	}
 
-	if (n > gc->ngpio)
+	if (WARN_ON(gpiospec->args_count < gc->of_gpio_n_cells))
+		return -EINVAL;
+
+	if (gpiospec->args[0] > gc->ngpio)
 		return -EINVAL;
 
 	if (flags)
-		*flags = be32_to_cpu(gpio[1]);
+		*flags = gpiospec->args[1];
 
-	return n;
+	return gpiospec->args[0];
 }
 EXPORT_SYMBOL(of_gpio_simple_xlate);
 
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index 6b10bdc..d466c8d 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -4,6 +4,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/errno.h>
+#include <linux/of.h>
 
 #ifdef CONFIG_GPIOLIB
 
@@ -128,8 +129,8 @@ struct gpio_chip {
 	 */
 	struct device_node *of_node;
 	int of_gpio_n_cells;
-	int (*of_xlate)(struct gpio_chip *gc, struct device_node *np,
-		        const void *gpio_spec, u32 *flags);
+	int (*of_xlate)(struct gpio_chip *gc,
+		        const struct of_phandle_args *gpiospec, u32 *flags);
 #endif
 };
 
diff --git a/include/linux/of.h b/include/linux/of.h
index 4948552..ea44fd7 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -65,6 +65,13 @@ struct device_node {
 #endif
 };
 
+#define MAX_PHANDLE_ARGS 8
+struct of_phandle_args {
+	struct device_node *np;
+	int args_count;
+	uint32_t args[MAX_PHANDLE_ARGS];
+};
+
 #ifdef CONFIG_OF
 
 /* Pointer for first entry in chain of all nodes. */
@@ -230,9 +237,9 @@ extern int of_modalias_node(struct device_node *node, char *modalias, int len);
 extern struct device_node *of_parse_phandle(struct device_node *np,
 					    const char *phandle_name,
 					    int index);
-extern int of_parse_phandles_with_args(struct device_node *np,
+extern int of_parse_phandle_with_args(struct device_node *np,
 	const char *list_name, const char *cells_name, int index,
-	struct device_node **out_node, const void **out_args);
+	struct of_phandle_args *out_args);
 
 extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align));
 extern int of_alias_get_id(struct device_node *np, const char *stem);
diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h
index 52280a2..b254052 100644
--- a/include/linux/of_gpio.h
+++ b/include/linux/of_gpio.h
@@ -18,6 +18,7 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/gpio.h>
+#include <linux/of.h>
 
 struct device_node;
 
@@ -57,8 +58,9 @@ extern int of_mm_gpiochip_add(struct device_node *np,
 extern void of_gpiochip_add(struct gpio_chip *gc);
 extern void of_gpiochip_remove(struct gpio_chip *gc);
 extern struct gpio_chip *of_node_to_gpiochip(struct device_node *np);
-extern int of_gpio_simple_xlate(struct gpio_chip *gc, struct device_node *np,
-				const void *gpio_spec, u32 *flags);
+extern int of_gpio_simple_xlate(struct gpio_chip *gc,
+				const struct of_phandle_args *gpiospec,
+				u32 *flags);
 
 #else /* CONFIG_OF_GPIO */
 
@@ -75,8 +77,8 @@ static inline unsigned int of_gpio_count(struct device_node *np)
 }
 
 static inline int of_gpio_simple_xlate(struct gpio_chip *gc,
-				       struct device_node *np,
-				       const void *gpio_spec, u32 *flags)
+				       const struct of_phandle_args *gpiospec,
+				       u32 *flags)
 {
 	return -ENOSYS;
 }
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 2/4] gpio/powerpc: Eliminate duplication of of_get_named_gpio_flags()
From: Grant Likely @ 2011-12-12 20:14 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, devicetree-discuss; +Cc: microblaze-uclinux
In-Reply-To: <1323720852-26311-1-git-send-email-grant.likely@secretlab.ca>

A large chunk of qe_pin_request() is unnecessarily cut-and-paste
directly from of_get_named_gpio_flags().  This patch cuts out the
duplicate code and replaces it with a call to of_get_gpio().

v2: fixed compile error due to missing gpio_to_chip()

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/sysdev/qe_lib/gpio.c |   42 +++++++-----------------------------
 drivers/gpio/gpiolib.c            |    2 +-
 include/asm-generic/gpio.h        |    1 +
 3 files changed, 10 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/sysdev/qe_lib/gpio.c b/arch/powerpc/sysdev/qe_lib/gpio.c
index e23f23c..521e67a 100644
--- a/arch/powerpc/sysdev/qe_lib/gpio.c
+++ b/arch/powerpc/sysdev/qe_lib/gpio.c
@@ -139,14 +139,10 @@ struct qe_pin {
 struct qe_pin *qe_pin_request(struct device_node *np, int index)
 {
 	struct qe_pin *qe_pin;
-	struct device_node *gpio_np;
 	struct gpio_chip *gc;
 	struct of_mm_gpio_chip *mm_gc;
 	struct qe_gpio_chip *qe_gc;
 	int err;
-	int size;
-	const void *gpio_spec;
-	const u32 *gpio_cells;
 	unsigned long flags;
 
 	qe_pin = kzalloc(sizeof(*qe_pin), GFP_KERNEL);
@@ -155,45 +151,25 @@ struct qe_pin *qe_pin_request(struct device_node *np, int index)
 		return ERR_PTR(-ENOMEM);
 	}
 
-	err = of_parse_phandles_with_args(np, "gpios", "#gpio-cells", index,
-					  &gpio_np, &gpio_spec);
-	if (err) {
-		pr_debug("%s: can't parse gpios property\n", __func__);
+	err = of_get_gpio(np, index);
+	if (err < 0)
+		goto err0;
+	gc = gpio_to_chip(err);
+	if (WARN_ON(!gc))
 		goto err0;
-	}
 
-	if (!of_device_is_compatible(gpio_np, "fsl,mpc8323-qe-pario-bank")) {
+	if (!of_device_is_compatible(gc->of_node, "fsl,mpc8323-qe-pario-bank")) {
 		pr_debug("%s: tried to get a non-qe pin\n", __func__);
 		err = -EINVAL;
-		goto err1;
-	}
-
-	gc = of_node_to_gpiochip(gpio_np);
-	if (!gc) {
-		pr_debug("%s: gpio controller %s isn't registered\n",
-			 np->full_name, gpio_np->full_name);
-		err = -ENODEV;
-		goto err1;
-	}
-
-	gpio_cells = of_get_property(gpio_np, "#gpio-cells", &size);
-	if (!gpio_cells || size != sizeof(*gpio_cells) ||
-			*gpio_cells != gc->of_gpio_n_cells) {
-		pr_debug("%s: wrong #gpio-cells for %s\n",
-			 np->full_name, gpio_np->full_name);
-		err = -EINVAL;
-		goto err1;
+		goto err0;
 	}
 
-	err = gc->of_xlate(gc, np, gpio_spec, NULL);
-	if (err < 0)
-		goto err1;
-
 	mm_gc = to_of_mm_gpio_chip(gc);
 	qe_gc = to_qe_gpio_chip(mm_gc);
 
 	spin_lock_irqsave(&qe_gc->lock, flags);
 
+	err -= gc->base;
 	if (test_and_set_bit(QE_PIN_REQUESTED, &qe_gc->pin_flags[err]) == 0) {
 		qe_pin->controller = qe_gc;
 		qe_pin->num = err;
@@ -206,8 +182,6 @@ struct qe_pin *qe_pin_request(struct device_node *np, int index)
 
 	if (!err)
 		return qe_pin;
-err1:
-	of_node_put(gpio_np);
 err0:
 	kfree(qe_pin);
 	pr_debug("%s failed with status %d\n", __func__, err);
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index a971e3d..dc315e9 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -114,7 +114,7 @@ static int gpio_ensure_requested(struct gpio_desc *desc, unsigned offset)
 }
 
 /* caller holds gpio_lock *OR* gpio is marked as requested */
-static inline struct gpio_chip *gpio_to_chip(unsigned gpio)
+struct gpio_chip *gpio_to_chip(unsigned gpio)
 {
 	return gpio_desc[gpio].chip;
 }
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index 8c86210..6b10bdc 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -135,6 +135,7 @@ struct gpio_chip {
 
 extern const char *gpiochip_is_requested(struct gpio_chip *chip,
 			unsigned offset);
+extern struct gpio_chip *gpio_to_chip(unsigned gpio);
 extern int __must_check gpiochip_reserve(int start, int ngpio);
 
 /* add/remove chips */
-- 
1.7.5.4

^ permalink raw reply related

* Rework gpio phandle parsing
From: Grant Likely @ 2011-12-12 20:14 UTC (permalink / raw)
  To: linux-kernel, linuxppc-dev, devicetree-discuss; +Cc: microblaze-uclinux

I originally posted this as part of the DT clock bindings.  I'm reposting
now since I've fixed up some bugs and I'm planning to put them into
linux-next.

The DT clock binding patches will be posted separately.

Cheers,
g.


 arch/arm/boot/dts/testcases/tests-phandle.dtsi |   37 ++++++
 arch/arm/boot/dts/testcases/tests.dtsi         |    1 +
 arch/arm/boot/dts/versatile-pb.dts             |    2 +
 arch/microblaze/kernel/reset.c                 |   43 +-------
 arch/powerpc/sysdev/qe_lib/gpio.c              |   42 ++------
 drivers/gpio/gpiolib.c                         |    2 +-
 drivers/of/Kconfig                             |    9 ++
 drivers/of/Makefile                            |    1 +
 drivers/of/base.c                              |  146 ++++++++++++------------
 drivers/of/gpio.c                              |   43 +++----
 drivers/of/selftest.c                          |  139 ++++++++++++++++++++++
 include/asm-generic/gpio.h                     |    6 +-
 include/linux/of.h                             |   11 ++-
 include/linux/of_gpio.h                        |   10 +-
 14 files changed, 313 insertions(+), 179 deletions(-)

^ permalink raw reply

* Re: [PATCH 01/16 v3] pmac_zilog: fix unexpected irq
From: Benjamin Herrenschmidt @ 2011-12-12 20:06 UTC (permalink / raw)
  To: Finn Thain; +Cc: linuxppc-dev, linux-m68k, Geert Uytterhoeven, linux-serial
In-Reply-To: <alpine.LNX.2.00.1112130020000.2550@nippy.intranet>

On Tue, 2011-12-13 at 00:34 +1100, Finn Thain wrote:
> On Mon, 12 Dec 2011, Benjamin Herrenschmidt wrote:
> 
> > Any chance you can test this patch ? I would not be surprised if it 
> > broke m68k since I had to do some of the changes in there "blind", so 
> > let me know... with this, I can again suspend/resume properly on a Pismo 
> > while using the internal modem among other things.
> 
> The patch works on a PowerBook 520 given a few changes (below). This 
> PowerBook only has one serial port that I can test (the internal modem is 
> not supported on 68k Macs).

Interesting. The modem is a soft-modem "geoport" or a hw serial modem ?
In the later case it's probably just a matter of finding the right GPIO
bit in Apple ASIC to turn the power on :-)

>  Can you test a machine with two ports? The 
> rest of my Mac hardware is in storage since I moved house last week.

I tried on 2 port powermacs, but I only have one adapter, so I've
basically been running with one serial port open and shooting irda frame
on the other (with nothing to check wether I got the frames on the other
hand), oh well ...

I'll apply your patch and commit via my tree.

Cheers,
Ben.

> Finn
> 
> 
> Index: linux-git/drivers/tty/serial/pmac_zilog.c
> ===================================================================
> --- linux-git.orig/drivers/tty/serial/pmac_zilog.c	2011-12-13 00:18:02.000000000 +1100
> +++ linux-git/drivers/tty/serial/pmac_zilog.c	2011-12-13 00:23:55.000000000 +1100
> @@ -1705,8 +1705,8 @@ static int __init pmz_init_port(struct u
>  	struct resource *r_ports;
>  	int irq;
>  
> -	r_ports = platform_get_resource(uap->node, IORESOURCE_MEM, 0);
> -	irq = platform_get_irq(uap->node, 0);
> +	r_ports = platform_get_resource(uap->pdev, IORESOURCE_MEM, 0);
> +	irq = platform_get_irq(uap->pdev, 0);
>  	if (!r_ports || !irq)
>  		return -ENODEV;
>  
> @@ -1763,8 +1763,10 @@ static void pmz_dispose_port(struct uart
>  
>  static int __init pmz_attach(struct platform_device *pdev)
>  {
> +	struct uart_pmac_port *uap;
>  	int i;
>  
> +	/* Iterate the pmz_ports array to find a matching entry */
>  	for (i = 0; i < pmz_ports_count; i++)
>  		if (pmz_ports[i].pdev == pdev)
>  			break;
> @@ -1773,15 +1775,23 @@ static int __init pmz_attach(struct plat
>  
>  	uap = &pmz_ports[i];
>  	uap->port.dev = &pdev->dev;
> -	dev_set_drvdata(&mdev->ofdev.dev, uap);
> +	platform_set_drvdata(pdev, uap);
>  
> -			return uart_add_one_port(&pmz_uart_reg,
> -						 &pmz_ports[i]->port);
> +	return uart_add_one_port(&pmz_uart_reg, &uap->port);
>  }
>  
>  static int __exit pmz_detach(struct platform_device *pdev)
>  {
> +	struct uart_pmac_port *uap = platform_get_drvdata(pdev);
> +
> +	if (!uap)
> +		return -ENODEV;
> +
>  	uart_remove_one_port(&pmz_uart_reg, &uap->port);
> +
> +	platform_set_drvdata(pdev, NULL);
> +	uap->port.dev = NULL;
> +
>  	return 0;
>  }
>  
> @@ -1918,8 +1928,13 @@ static void __exit exit_pmz(void)
>  
>  	for (i = 0; i < pmz_ports_count; i++) {
>  		struct uart_pmac_port *uport = &pmz_ports[i];
> +#ifdef CONFIG_PPC_PMAC
>  		if (uport->node != NULL)
>  			pmz_dispose_port(uport);
> +#else
> +		if (uport->pdev != NULL)
> +			pmz_dispose_port(uport);
> +#endif
>  	}
>  	/* Unregister UART driver */
>  	uart_unregister_driver(&pmz_uart_reg);
> @@ -1993,6 +2008,9 @@ static int __init pmz_console_setup(stru
>  #ifdef CONFIG_PPC_PMAC
>  	if (uap->node == NULL)
>  		return -ENODEV;
> +#else
> +	if (uap->pdev == NULL)
> +		return -ENODEV;
>  #endif
>  	port = &uap->port;
>  
> Index: linux-git/drivers/tty/serial/pmac_zilog.h
> ===================================================================
> --- linux-git.orig/drivers/tty/serial/pmac_zilog.h	2011-12-13 00:18:02.000000000 +1100
> +++ linux-git/drivers/tty/serial/pmac_zilog.h	2011-12-13 00:23:55.000000000 +1100
> @@ -1,18 +1,9 @@
>  #ifndef __PMAC_ZILOG_H__
>  #define __PMAC_ZILOG_H__
>  
> -#ifdef CONFIG_PPC_PMAC
> -/* We cannot use dev_* because this can be called early, way before
> - * we are matched with a device (when using it as a kernel console)
> - */
>  #define pmz_debug(fmt, arg...)	pr_debug("ttyPZ%d: " fmt, uap->port.line, ## arg)
>  #define pmz_error(fmt, arg...)	pr_err("ttyPZ%d: " fmt, uap->port.line, ## arg)
>  #define pmz_info(fmt, arg...)	pr_info("ttyPZ%d: " fmt, uap->port.line, ## arg)
> -#else
> -#define pmz_debug(fmt, arg...)	dev_dbg(&uap->node->dev, fmt, ## arg)
> -#define pmz_error(fmt, arg...)	dev_err(&uap->node->dev, fmt, ## arg)
> -#define pmz_info(fmt, arg...)	dev_info(&uap->node->dev, fmt, ## arg)
> -#endif
>  
>  /*
>   * At most 2 ESCCs with 2 ports each

^ permalink raw reply

* Re: [PATCH v3 2/3] hvc_init(): Enforce one-time initialization.
From: Amit Shah @ 2011-12-12 19:25 UTC (permalink / raw)
  To: Miche Baker-Harvey
  Cc: Stephen Rothwell, xen-devel, Konrad Rzeszutek Wilk, Rusty Russell,
	linux-kernel, virtualization, Anton Blanchard, Mike Waychison,
	ppc-dev, Greg Kroah-Hartman, Eric Northrup
In-Reply-To: <CAB8RdapDDV0kYW_uASpBoJ3hbx6zScKnG+LWWcjYtqMnqXN4Xw@mail.gmail.com>

On (Mon) 12 Dec 2011 [11:11:55], Miche Baker-Harvey wrote:
> So on a CONSOLE_PORT_ADD message, we would take the
> (existing)ports_device::ports_lock, and for other control messages we
> would justtake the (new) port::port_lock?  You are concerned that just
> takingthe ports_lock for all control messages could be too
> restrictive?  Iwouldn't have expected these messages to be frequent
> occurrences, butI'll defer to your experience here.

No, I mean we'll have to take the new port_lock() everywhere we
currently take the port lock, plus in a few more places.  I only
suggest using port_lock() helper since we'll need a dependency on the
portdev lock as well.

> The CONSOLE_CONSOLE_PORT message calls hvc_alloc, which also
> needsserialization.  That's in another one of these three patches; are
> youthinking we could leave that patch be, or that we would we use
> theport_lock for CONSOLE_CONSOLE_PORT?  Using the port_lock
> wouldprovide the HVC serialization "for free" but it would be cleaner
> if weput HVC related synchronization in hvc_console.c.

Yes, definitely, since other users of hvc_console may get bitten in
similar ways.  However, I'm not too familiar with the hvc code, the
people at linux-ppc can be of help.

> On Thu, Dec 8, 2011 at 4:08 AM, Amit Shah <amit.shah@redhat.com> wrote:
> > On (Tue) 06 Dec 2011 [09:05:38], Miche Baker-Harvey wrote:
> >> Amit,
> >>
> >> Ah, indeed.  I am not using MSI-X, so virtio_pci::vp_try_to_find_vqs()
> >> calls vp_request_intx() and sets up an interrupt callback.  From
> >> there, when an interrupt occurs, the stack looks something like this:
> >>
> >> virtio_pci::vp_interrupt()
> >>   virtio_pci::vp_vring_interrupt()
> >>     virtio_ring::vring_interrupt()
> >>       vq->vq.callback()  <-- in this case, that's virtio_console::control_intr()
> >>         workqueue::schedule_work()
> >>           workqueue::queue_work()
> >>             queue_work_on(get_cpu())  <-- queues the work on the current CPU.
> >>
> >> I'm not doing anything to keep multiple control message from being
> >> sent concurrently to the guest, and we will take those interrupts on
> >> any CPU. I've confirmed that the two instances of
> >> handle_control_message() are occurring on different CPUs.
> >
> > So let's have a new helper, port_lock() that takes the port-specific
> > spinlock.  There has to be a new helper, since the port lock should
> > depend on the portdev lock being taken too.  For the port addition
> > case, just the portdev lock should be taken.  For any other
> > operations, the port lock should be taken.
> >
> > My assumption was that we would be able to serialise the work items,
> > but that will be too restrictive.  Taking port locks sounds like a
> > better idea.
> >
> > We'd definitely need the port lock in the control work handler.  We
> > might need it in a few more places (like module removal), but we'll
> > worry about that later.
> >
> > Does this sound fine?
> >
> >                Amit

		Amit

^ permalink raw reply

* Re: [PATCH v3 2/3] hvc_init(): Enforce one-time initialization.
From: Miche Baker-Harvey @ 2011-12-12 19:11 UTC (permalink / raw)
  To: Amit Shah
  Cc: Stephen Rothwell, xen-devel, Konrad Rzeszutek Wilk, Rusty Russell,
	linux-kernel, virtualization, Anton Blanchard, Mike Waychison,
	ppc-dev, Greg Kroah-Hartman, Eric Northrup
In-Reply-To: <20111208120804.GF23531@amit-x200.redhat.com>

So on a CONSOLE_PORT_ADD message, we would take the
(existing)ports_device::ports_lock, and for other control messages we
would justtake the (new) port::port_lock? =A0You are concerned that just
takingthe ports_lock for all control messages could be too
restrictive? =A0Iwouldn't have expected these messages to be frequent
occurrences, butI'll defer to your experience here.
The CONSOLE_CONSOLE_PORT message calls hvc_alloc, which also
needsserialization. =A0That's in another one of these three patches; are
youthinking we could leave that patch be, or that we would we use
theport_lock for CONSOLE_CONSOLE_PORT? =A0Using the port_lock
wouldprovide the HVC serialization "for free" but it would be cleaner
if weput HVC related synchronization in hvc_console.c.
On Thu, Dec 8, 2011 at 4:08 AM, Amit Shah <amit.shah@redhat.com> wrote:
> On (Tue) 06 Dec 2011 [09:05:38], Miche Baker-Harvey wrote:
>> Amit,
>>
>> Ah, indeed. =A0I am not using MSI-X, so virtio_pci::vp_try_to_find_vqs()
>> calls vp_request_intx() and sets up an interrupt callback. =A0From
>> there, when an interrupt occurs, the stack looks something like this:
>>
>> virtio_pci::vp_interrupt()
>> =A0 virtio_pci::vp_vring_interrupt()
>> =A0 =A0 virtio_ring::vring_interrupt()
>> =A0 =A0 =A0 vq->vq.callback() =A0<-- in this case, that's virtio_console=
::control_intr()
>> =A0 =A0 =A0 =A0 workqueue::schedule_work()
>> =A0 =A0 =A0 =A0 =A0 workqueue::queue_work()
>> =A0 =A0 =A0 =A0 =A0 =A0 queue_work_on(get_cpu()) =A0<-- queues the work =
on the current CPU.
>>
>> I'm not doing anything to keep multiple control message from being
>> sent concurrently to the guest, and we will take those interrupts on
>> any CPU. I've confirmed that the two instances of
>> handle_control_message() are occurring on different CPUs.
>
> So let's have a new helper, port_lock() that takes the port-specific
> spinlock. =A0There has to be a new helper, since the port lock should
> depend on the portdev lock being taken too. =A0For the port addition
> case, just the portdev lock should be taken. =A0For any other
> operations, the port lock should be taken.
>
> My assumption was that we would be able to serialise the work items,
> but that will be too restrictive. =A0Taking port locks sounds like a
> better idea.
>
> We'd definitely need the port lock in the control work handler. =A0We
> might need it in a few more places (like module removal), but we'll
> worry about that later.
>
> Does this sound fine?
>
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0Amit

^ permalink raw reply

* Re: [PATCH 01/16 v3] pmac_zilog: fix unexpected irq
From: Finn Thain @ 2011-12-12 13:34 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: linuxppc-dev, linux-m68k, Geert Uytterhoeven, linux-serial
In-Reply-To: <1323647315.19891.10.camel@pasglop>


On Mon, 12 Dec 2011, Benjamin Herrenschmidt wrote:

> Any chance you can test this patch ? I would not be surprised if it 
> broke m68k since I had to do some of the changes in there "blind", so 
> let me know... with this, I can again suspend/resume properly on a Pismo 
> while using the internal modem among other things.

The patch works on a PowerBook 520 given a few changes (below). This 
PowerBook only has one serial port that I can test (the internal modem is 
not supported on 68k Macs). Can you test a machine with two ports? The 
rest of my Mac hardware is in storage since I moved house last week.

Finn


Index: linux-git/drivers/tty/serial/pmac_zilog.c
===================================================================
--- linux-git.orig/drivers/tty/serial/pmac_zilog.c	2011-12-13 00:18:02.000000000 +1100
+++ linux-git/drivers/tty/serial/pmac_zilog.c	2011-12-13 00:23:55.000000000 +1100
@@ -1705,8 +1705,8 @@ static int __init pmz_init_port(struct u
 	struct resource *r_ports;
 	int irq;
 
-	r_ports = platform_get_resource(uap->node, IORESOURCE_MEM, 0);
-	irq = platform_get_irq(uap->node, 0);
+	r_ports = platform_get_resource(uap->pdev, IORESOURCE_MEM, 0);
+	irq = platform_get_irq(uap->pdev, 0);
 	if (!r_ports || !irq)
 		return -ENODEV;
 
@@ -1763,8 +1763,10 @@ static void pmz_dispose_port(struct uart
 
 static int __init pmz_attach(struct platform_device *pdev)
 {
+	struct uart_pmac_port *uap;
 	int i;
 
+	/* Iterate the pmz_ports array to find a matching entry */
 	for (i = 0; i < pmz_ports_count; i++)
 		if (pmz_ports[i].pdev == pdev)
 			break;
@@ -1773,15 +1775,23 @@ static int __init pmz_attach(struct plat
 
 	uap = &pmz_ports[i];
 	uap->port.dev = &pdev->dev;
-	dev_set_drvdata(&mdev->ofdev.dev, uap);
+	platform_set_drvdata(pdev, uap);
 
-			return uart_add_one_port(&pmz_uart_reg,
-						 &pmz_ports[i]->port);
+	return uart_add_one_port(&pmz_uart_reg, &uap->port);
 }
 
 static int __exit pmz_detach(struct platform_device *pdev)
 {
+	struct uart_pmac_port *uap = platform_get_drvdata(pdev);
+
+	if (!uap)
+		return -ENODEV;
+
 	uart_remove_one_port(&pmz_uart_reg, &uap->port);
+
+	platform_set_drvdata(pdev, NULL);
+	uap->port.dev = NULL;
+
 	return 0;
 }
 
@@ -1918,8 +1928,13 @@ static void __exit exit_pmz(void)
 
 	for (i = 0; i < pmz_ports_count; i++) {
 		struct uart_pmac_port *uport = &pmz_ports[i];
+#ifdef CONFIG_PPC_PMAC
 		if (uport->node != NULL)
 			pmz_dispose_port(uport);
+#else
+		if (uport->pdev != NULL)
+			pmz_dispose_port(uport);
+#endif
 	}
 	/* Unregister UART driver */
 	uart_unregister_driver(&pmz_uart_reg);
@@ -1993,6 +2008,9 @@ static int __init pmz_console_setup(stru
 #ifdef CONFIG_PPC_PMAC
 	if (uap->node == NULL)
 		return -ENODEV;
+#else
+	if (uap->pdev == NULL)
+		return -ENODEV;
 #endif
 	port = &uap->port;
 
Index: linux-git/drivers/tty/serial/pmac_zilog.h
===================================================================
--- linux-git.orig/drivers/tty/serial/pmac_zilog.h	2011-12-13 00:18:02.000000000 +1100
+++ linux-git/drivers/tty/serial/pmac_zilog.h	2011-12-13 00:23:55.000000000 +1100
@@ -1,18 +1,9 @@
 #ifndef __PMAC_ZILOG_H__
 #define __PMAC_ZILOG_H__
 
-#ifdef CONFIG_PPC_PMAC
-/* We cannot use dev_* because this can be called early, way before
- * we are matched with a device (when using it as a kernel console)
- */
 #define pmz_debug(fmt, arg...)	pr_debug("ttyPZ%d: " fmt, uap->port.line, ## arg)
 #define pmz_error(fmt, arg...)	pr_err("ttyPZ%d: " fmt, uap->port.line, ## arg)
 #define pmz_info(fmt, arg...)	pr_info("ttyPZ%d: " fmt, uap->port.line, ## arg)
-#else
-#define pmz_debug(fmt, arg...)	dev_dbg(&uap->node->dev, fmt, ## arg)
-#define pmz_error(fmt, arg...)	dev_err(&uap->node->dev, fmt, ## arg)
-#define pmz_info(fmt, arg...)	dev_info(&uap->node->dev, fmt, ## arg)
-#endif
 
 /*
  * At most 2 ESCCs with 2 ports each

^ permalink raw reply

* Re: [PATCH] block/swim3: Locking fixes
From: Jens Axboe @ 2011-12-12 11:42 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev
In-Reply-To: <1323665859.19891.16.camel@pasglop>

On 2011-12-12 05:57, Benjamin Herrenschmidt wrote:
> The old PowerMac swim3 driver has some "interesting" locking issues,
> using a private lock and failing to lock the queue before completing
> requests, which triggered WARN_ONs among others.
> 
> This rips out the private lock, makes everything operate under the
> block queue lock, and generally makes things simpler.
> 
> We used to also share a queue between the two possible instances which
> was problematic since we might pick the wrong controller in some cases,
> so make the queue and the current request per-instance and use
> queuedata to point to our private data which is a lot cleaner.
> 
> We still share the queue lock but then, it's nearly impossible to actually
> use 2 swim3's simultaneously: one would need to have a Wallstreet
> PowerBook, the only machine afaik with two of these on the motherboard,
> and populate both hotswap bays with a floppy drive (the machine ships
> only with one), so nobody cares...
> 
> While at it, add a little fix to clear up stale interrupts when loading
> the driver or plugging a floppy drive in a bay.

Applied for current for-linus branch.

-- 
Jens Axboe

^ permalink raw reply

* [PATCH 2/2] mtd/nand: Add ONFI support for FSL NAND controller
From: Shengzhou Liu @ 2011-12-12  9:40 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: scottwood, dwmw2, kumar.gala, linux-mtd, Shengzhou Liu
In-Reply-To: <1323682853-10750-1-git-send-email-Shengzhou.Liu@freescale.com>

- fix NAND_CMD_READID command for ONFI detect.
- add NAND_CMD_PARAM command to read the ONFI parameter page.

Signed-off-by: Shengzhou Liu <Shengzhou.Liu@freescale.com>
---
v3: unify the bytes of fbcr to 256.
v2: no changes

 drivers/mtd/nand/fsl_elbc_nand.c |   18 ++++++++++--------
 1 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index 4f405a0..320584a 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c
@@ -349,20 +349,22 @@ static void fsl_elbc_cmdfunc(struct mtd_info *mtd, unsigned int command,
 		fsl_elbc_run_command(mtd);
 		return;
 
-	/* READID must read all 5 possible bytes while CEB is active */
 	case NAND_CMD_READID:
-		dev_vdbg(priv->dev, "fsl_elbc_cmdfunc: NAND_CMD_READID.\n");
+	case NAND_CMD_PARAM:
+		dev_vdbg(priv->dev, "fsl_elbc_cmdfunc: NAND_CMD %x\n", command);
 
 		out_be32(&lbc->fir, (FIR_OP_CM0 << FIR_OP0_SHIFT) |
 		                    (FIR_OP_UA  << FIR_OP1_SHIFT) |
 		                    (FIR_OP_RBW << FIR_OP2_SHIFT));
-		out_be32(&lbc->fcr, NAND_CMD_READID << FCR_CMD0_SHIFT);
-		/* nand_get_flash_type() reads 8 bytes of entire ID string */
-		out_be32(&lbc->fbcr, 8);
-		elbc_fcm_ctrl->read_bytes = 8;
+		out_be32(&lbc->fcr, command << FCR_CMD0_SHIFT);
+		/*
+		 * although currently it's 8 bytes for READID, we always read
+		 * the maximum 256 bytes(for PARAM)
+		 */
+		out_be32(&lbc->fbcr, 256);
+		elbc_fcm_ctrl->read_bytes = 256;
 		elbc_fcm_ctrl->use_mdr = 1;
-		elbc_fcm_ctrl->mdr = 0;
-
+		elbc_fcm_ctrl->mdr = column;
 		set_addr(mtd, 0, 0, 0);
 		fsl_elbc_run_command(mtd);
 		return;
-- 
1.6.4

^ permalink raw reply related

* [PATCH 1/2] mtd/nand: fixup for fmr initialization of Freescale NAND controller
From: Shengzhou Liu @ 2011-12-12  9:40 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: scottwood, dwmw2, kumar.gala, linux-mtd, Shengzhou Liu

There was a bug for fmr initialization, which lead to  fmr was always 0x100
in fsl_elbc_chip_init() and caused FCM command timeout before calling
fsl_elbc_chip_init_tail(), now we initialize CWTO to maximum timeout value
and not relying on the setting of bootloader.

Signed-off-by: Shengzhou Liu <Shengzhou.Liu@freescale.com>
---
v3: add more descriptions.
v2: make fmr not relying on the setting of bootloader.

 drivers/mtd/nand/fsl_elbc_nand.c |   10 +++++-----
 1 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index eedd8ee..4f405a0 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c
@@ -659,9 +659,7 @@ static int fsl_elbc_chip_init_tail(struct mtd_info *mtd)
 	if (chip->pagemask & 0xff000000)
 		al++;
 
-	/* add to ECCM mode set in fsl_elbc_init */
-	priv->fmr |= (12 << FMR_CWTO_SHIFT) |  /* Timeout > 12 ms */
-	             (al << FMR_AL_SHIFT);
+	priv->fmr |= al << FMR_AL_SHIFT;
 
 	dev_dbg(priv->dev, "fsl_elbc_init: nand->numchips = %d\n",
 	        chip->numchips);
@@ -764,8 +762,10 @@ static int fsl_elbc_chip_init(struct fsl_elbc_mtd *priv)
 	priv->mtd.priv = chip;
 	priv->mtd.owner = THIS_MODULE;
 
-	/* Set the ECCM according to the settings in bootloader.*/
-	priv->fmr = in_be32(&lbc->fmr) & FMR_ECCM;
+	/* set timeout to maximum */
+	priv->fmr = 15 << FMR_CWTO_SHIFT;
+	if (in_be32(&lbc->bank[priv->bank].or) & OR_FCM_PGS)
+		priv->fmr |= FMR_ECCM;
 
 	/* fill in nand_chip structure */
 	/* set up function call table */
-- 
1.6.4

^ permalink raw reply related

* [PATCH 1/1] ppc64: fix missing to check all bits of _TIF_USER_WORK_MASK in preempt
From: Tiejun Chen @ 2011-12-12  9:10 UTC (permalink / raw)
  To: benh, linuxppc-dev

In entry_64.S version of ret_from_except_lite, you'll notice that
in the !preempt case, after we've checked MSR_PR we test for any
TIF flag in _TIF_USER_WORK_MASK to decide whether to go to do_work
or not. However, in the preempt case, we do a convoluted trick to
test SIGPENDING only if PR was set and always test NEED_RESCHED ...
but we forget to test any other bit of _TIF_USER_WORK_MASK !!! So
that means that with preempt, we completely fail to test for things
like single step, syscall tracing, etc...

This should be fixed as the following path:

 - Test PR. If set, go to test_work_user, else continue.

 - In test_work_user, always test for _TIF_USER_WORK_MASK to decide to
go to do_work, maybe call it do_user_work

 - In test_work_kernel, test for _TIF_KERNEL_WORK_MASK which is set to
our new flag along with NEED_RESCHED if preempt is enabled and branch to
do_kernel_work.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Tiejun Chen <tiejun.chen@windriver.com>
---
 arch/powerpc/kernel/entry_64.S |   33 +++++++++++++++------------------
 1 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index d834425..9e70b9a 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -571,27 +571,26 @@ _GLOBAL(ret_from_except_lite)
 	mtmsrd	r9,1		/* Update machine state */
 #endif /* CONFIG_PPC_BOOK3E */
 
-#ifdef CONFIG_PREEMPT
-	clrrdi	r9,r1,THREAD_SHIFT	/* current_thread_info() */
-	li	r0,_TIF_NEED_RESCHED	/* bits to check */
-	ld	r3,_MSR(r1)
-	ld	r4,TI_FLAGS(r9)
-	/* Move MSR_PR bit in r3 to _TIF_SIGPENDING position in r0 */
-	rlwimi	r0,r3,32+TIF_SIGPENDING-MSR_PR_LG,_TIF_SIGPENDING
-	and.	r0,r4,r0	/* check NEED_RESCHED and maybe SIGPENDING */
-	bne	do_work
-
-#else /* !CONFIG_PREEMPT */
 	ld	r3,_MSR(r1)	/* Returning to user mode? */
 	andi.	r3,r3,MSR_PR
-	beq	restore		/* if not, just restore regs and return */
+	bne	test_work_user
 
+	clrrdi	r9,r1,THREAD_SHIFT	/* current_thread_info() */
+	li	r0,_TIF_USER_WORK_MASK
+#ifdef CONFIG_PREEMPT
+	ori	r0,r0,_TIF_NEED_RESCHED
+#endif
+	ld	r4,TI_FLAGS(r9)
+	and.	r0,r4,r0	/* check NEED_RESCHED and maybe _TIF_USER_WORK_MASK */
+	bne	do_kernel_work
+	b	restore		/* if so, just restore regs and return */
+
+test_work_user:
 	/* Check current_thread_info()->flags */
 	clrrdi	r9,r1,THREAD_SHIFT
 	ld	r4,TI_FLAGS(r9)
 	andi.	r0,r4,_TIF_USER_WORK_MASK
-	bne	do_work
-#endif
+	bne	do_user_work
 
 restore:
 BEGIN_FW_FTR_SECTION
@@ -693,10 +692,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 	b	.ret_from_except_lite		/* loop back and handle more */
 #endif
 
-do_work:
+do_kernel_work:
 #ifdef CONFIG_PREEMPT
-	andi.	r0,r3,MSR_PR	/* Returning to user mode? */
-	bne	user_work
 	/* Check that preempt_count() == 0 and interrupts are enabled */
 	lwz	r8,TI_PREEMPT(r9)
 	cmpwi	cr1,r8,0
@@ -738,9 +735,9 @@ do_work:
 	bne	1b
 	b	restore
 
-user_work:
 #endif /* CONFIG_PREEMPT */
 
+do_user_work:
 	/* Enable interrupts */
 #ifdef CONFIG_PPC_BOOK3E
 	wrteei	1
-- 
1.5.6

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox