LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 8/8] mm: remove free_area_cache
From: Michel Lespinasse @ 2013-01-09  1:28 UTC (permalink / raw)
  To: Rik van Riel, Benjamin Herrenschmidt, James E.J. Bottomley,
	Matt Turner, David Howells, Tony Luck
  Cc: linux-ia64, linux-parisc, linux-kernel, linux-mm, linux-alpha,
	Andrew Morton, linuxppc-dev
In-Reply-To: <1357694895-520-1-git-send-email-walken@google.com>

Since all architectures have been converted to use vm_unmapped_area(),
there is no remaining use for the free_area_cache.

Signed-off-by: Michel Lespinasse <walken@google.com>

---
 arch/arm/mm/mmap.c               |    2 --
 arch/arm64/mm/mmap.c             |    2 --
 arch/mips/mm/mmap.c              |    2 --
 arch/powerpc/mm/mmap_64.c        |    2 --
 arch/s390/mm/mmap.c              |    4 ----
 arch/sparc/kernel/sys_sparc_64.c |    2 --
 arch/tile/mm/mmap.c              |    2 --
 arch/x86/ia32/ia32_aout.c        |    2 --
 arch/x86/mm/mmap.c               |    2 --
 fs/binfmt_aout.c                 |    2 --
 fs/binfmt_elf.c                  |    2 --
 include/linux/mm_types.h         |    3 ---
 include/linux/sched.h            |    2 --
 kernel/fork.c                    |    4 ----
 mm/mmap.c                        |   28 ----------------------------
 mm/nommu.c                       |    4 ----
 mm/util.c                        |    1 -
 17 files changed, 0 insertions(+), 66 deletions(-)

diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index 10062ceadd1c..0c6356255fe3 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -181,11 +181,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	if (mmap_is_legacy()) {
 		mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
 		mm->get_unmapped_area = arch_get_unmapped_area;
-		mm->unmap_area = arch_unmap_area;
 	} else {
 		mm->mmap_base = mmap_base(random_factor);
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
-		mm->unmap_area = arch_unmap_area_topdown;
 	}
 }
 
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 7c7be7855638..8ed6cb1a900f 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -90,11 +90,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	if (mmap_is_legacy()) {
 		mm->mmap_base = TASK_UNMAPPED_BASE;
 		mm->get_unmapped_area = arch_get_unmapped_area;
-		mm->unmap_area = arch_unmap_area;
 	} else {
 		mm->mmap_base = mmap_base();
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
-		mm->unmap_area = arch_unmap_area_topdown;
 	}
 }
 EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index d9be7540a6be..f4e63c29d044 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c
@@ -158,11 +158,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	if (mmap_is_legacy()) {
 		mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
 		mm->get_unmapped_area = arch_get_unmapped_area;
-		mm->unmap_area = arch_unmap_area;
 	} else {
 		mm->mmap_base = mmap_base(random_factor);
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
-		mm->unmap_area = arch_unmap_area_topdown;
 	}
 }
 
diff --git a/arch/powerpc/mm/mmap_64.c b/arch/powerpc/mm/mmap_64.c
index 67a42ed0d2fc..cb8bdbe4972f 100644
--- a/arch/powerpc/mm/mmap_64.c
+++ b/arch/powerpc/mm/mmap_64.c
@@ -92,10 +92,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	if (mmap_is_legacy()) {
 		mm->mmap_base = TASK_UNMAPPED_BASE;
 		mm->get_unmapped_area = arch_get_unmapped_area;
-		mm->unmap_area = arch_unmap_area;
 	} else {
 		mm->mmap_base = mmap_base();
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
-		mm->unmap_area = arch_unmap_area_topdown;
 	}
 }
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index c59a5efa58b1..f2a462625c9e 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -91,11 +91,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	if (mmap_is_legacy()) {
 		mm->mmap_base = TASK_UNMAPPED_BASE;
 		mm->get_unmapped_area = arch_get_unmapped_area;
-		mm->unmap_area = arch_unmap_area;
 	} else {
 		mm->mmap_base = mmap_base();
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
-		mm->unmap_area = arch_unmap_area_topdown;
 	}
 }
 
@@ -173,11 +171,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	if (mmap_is_legacy()) {
 		mm->mmap_base = TASK_UNMAPPED_BASE;
 		mm->get_unmapped_area = s390_get_unmapped_area;
-		mm->unmap_area = arch_unmap_area;
 	} else {
 		mm->mmap_base = mmap_base();
 		mm->get_unmapped_area = s390_get_unmapped_area_topdown;
-		mm->unmap_area = arch_unmap_area_topdown;
 	}
 }
 
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 708bc29d36a8..f3c169f9d3a1 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -290,7 +290,6 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	    sysctl_legacy_va_layout) {
 		mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
 		mm->get_unmapped_area = arch_get_unmapped_area;
-		mm->unmap_area = arch_unmap_area;
 	} else {
 		/* We know it's 32-bit */
 		unsigned long task_size = STACK_TOP32;
@@ -302,7 +301,6 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 
 		mm->mmap_base = PAGE_ALIGN(task_size - gap - random_factor);
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
-		mm->unmap_area = arch_unmap_area_topdown;
 	}
 }
 
diff --git a/arch/tile/mm/mmap.c b/arch/tile/mm/mmap.c
index f96f4cec602a..d67d91ebf63e 100644
--- a/arch/tile/mm/mmap.c
+++ b/arch/tile/mm/mmap.c
@@ -66,10 +66,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	if (!is_32bit || rlimit(RLIMIT_STACK) == RLIM_INFINITY) {
 		mm->mmap_base = TASK_UNMAPPED_BASE;
 		mm->get_unmapped_area = arch_get_unmapped_area;
-		mm->unmap_area = arch_unmap_area;
 	} else {
 		mm->mmap_base = mmap_base(mm);
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
-		mm->unmap_area = arch_unmap_area_topdown;
 	}
 }
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index a703af19c281..3b3558577642 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -309,8 +309,6 @@ static int load_aout_binary(struct linux_binprm *bprm)
 		(current->mm->start_data = N_DATADDR(ex));
 	current->mm->brk = ex.a_bss +
 		(current->mm->start_brk = N_BSSADDR(ex));
-	current->mm->free_area_cache = TASK_UNMAPPED_BASE;
-	current->mm->cached_hole_size = 0;
 
 	retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
 	if (retval < 0) {
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 845df6835f9f..62c29a5bfe26 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -115,10 +115,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	if (mmap_is_legacy()) {
 		mm->mmap_base = mmap_legacy_base();
 		mm->get_unmapped_area = arch_get_unmapped_area;
-		mm->unmap_area = arch_unmap_area;
 	} else {
 		mm->mmap_base = mmap_base();
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
-		mm->unmap_area = arch_unmap_area_topdown;
 	}
 }
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 6043567b95c2..692e75ca6415 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -256,8 +256,6 @@ static int load_aout_binary(struct linux_binprm * bprm)
 		(current->mm->start_data = N_DATADDR(ex));
 	current->mm->brk = ex.a_bss +
 		(current->mm->start_brk = N_BSSADDR(ex));
-	current->mm->free_area_cache = current->mm->mmap_base;
-	current->mm->cached_hole_size = 0;
 
 	retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
 	if (retval < 0) {
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0c42cdbabecf..e2087dea9c1e 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -730,8 +730,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
 
 	/* Do this so that we can load the interpreter, if need be.  We will
 	   change some of these later */
-	current->mm->free_area_cache = current->mm->mmap_base;
-	current->mm->cached_hole_size = 0;
 	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 				 executable_stack);
 	if (retval < 0) {
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index f8f5162a3571..e50eb047ea8a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -329,12 +329,9 @@ struct mm_struct {
 	unsigned long (*get_unmapped_area) (struct file *filp,
 				unsigned long addr, unsigned long len,
 				unsigned long pgoff, unsigned long flags);
-	void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
 #endif
 	unsigned long mmap_base;		/* base of mmap area */
 	unsigned long task_size;		/* size of task vm space */
-	unsigned long cached_hole_size; 	/* if non-zero, the largest hole below free_area_cache */
-	unsigned long free_area_cache;		/* first hole of size cached_hole_size or larger */
 	unsigned long highest_vm_end;		/* highest vma end address */
 	pgd_t * pgd;
 	atomic_t mm_users;			/* How many users with user space? */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 206bb089c06b..fa7e0a60ebe9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -366,8 +366,6 @@ extern unsigned long
 arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 			  unsigned long len, unsigned long pgoff,
 			  unsigned long flags);
-extern void arch_unmap_area(struct mm_struct *, unsigned long);
-extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
 #else
 static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index a31b823b3c2d..bdf61755ef4a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -364,8 +364,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 	mm->locked_vm = 0;
 	mm->mmap = NULL;
 	mm->mmap_cache = NULL;
-	mm->free_area_cache = oldmm->mmap_base;
-	mm->cached_hole_size = ~0UL;
 	mm->map_count = 0;
 	cpumask_clear(mm_cpumask(mm));
 	mm->mm_rb = RB_ROOT;
@@ -539,8 +537,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
 	mm->nr_ptes = 0;
 	memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
 	spin_lock_init(&mm->page_table_lock);
-	mm->free_area_cache = TASK_UNMAPPED_BASE;
-	mm->cached_hole_size = ~0UL;
 	mm_init_aio(mm);
 	mm_init_owner(mm, p);
 
diff --git a/mm/mmap.c b/mm/mmap.c
index f54b235f29a9..532f447879d4 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1800,15 +1800,6 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 }
 #endif	
 
-void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
-{
-	/*
-	 * Is this a new hole at the lowest possible address?
-	 */
-	if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache)
-		mm->free_area_cache = addr;
-}
-
 /*
  * This mmap-allocator allocates new areas top-down from below the
  * stack's low limit (the base):
@@ -1865,19 +1856,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 }
 #endif
 
-void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
-{
-	/*
-	 * Is this a new hole at the highest possible address?
-	 */
-	if (addr > mm->free_area_cache)
-		mm->free_area_cache = addr;
-
-	/* dont allow allocations above current base */
-	if (mm->free_area_cache > mm->mmap_base)
-		mm->free_area_cache = mm->mmap_base;
-}
-
 unsigned long
 get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 		unsigned long pgoff, unsigned long flags)
@@ -2276,7 +2254,6 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 {
 	struct vm_area_struct **insertion_point;
 	struct vm_area_struct *tail_vma = NULL;
-	unsigned long addr;
 
 	insertion_point = (prev ? &prev->vm_next : &mm->mmap);
 	vma->vm_prev = NULL;
@@ -2293,11 +2270,6 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 	} else
 		mm->highest_vm_end = prev ? prev->vm_end : 0;
 	tail_vma->vm_next = NULL;
-	if (mm->unmap_area == arch_unmap_area)
-		addr = prev ? prev->vm_end : mm->mmap_base;
-	else
-		addr = vma ?  vma->vm_start : mm->mmap_base;
-	mm->unmap_area(mm, addr);
 	mm->mmap_cache = NULL;		/* Kill the cache. */
 }
 
diff --git a/mm/nommu.c b/mm/nommu.c
index 79c3cac87afa..b5535ff2f9d1 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1852,10 +1852,6 @@ unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr,
 	return -ENOMEM;
 }
 
-void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
-{
-}
-
 void unmap_mapping_range(struct address_space *mapping,
 			 loff_t const holebegin, loff_t const holelen,
 			 int even_cows)
diff --git a/mm/util.c b/mm/util.c
index c55e26b17d93..4c19aa6a1b43 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -293,7 +293,6 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 {
 	mm->mmap_base = TASK_UNMAPPED_BASE;
 	mm->get_unmapped_area = arch_get_unmapped_area;
-	mm->unmap_area = arch_unmap_area;
 }
 #endif
 
-- 
1.7.7.3

^ permalink raw reply related

* [PATCH 7/8] mm: use vm_unmapped_area() on powerpc architecture
From: Michel Lespinasse @ 2013-01-09  1:28 UTC (permalink / raw)
  To: Rik van Riel, Benjamin Herrenschmidt, James E.J. Bottomley,
	Matt Turner, David Howells, Tony Luck
  Cc: linux-ia64, linux-parisc, linux-kernel, linux-mm, linux-alpha,
	Andrew Morton, linuxppc-dev
In-Reply-To: <1357694895-520-1-git-send-email-walken@google.com>

Update the powerpc slice_get_unmapped_area function to make use of
vm_unmapped_area() instead of implementing a brute force search.

Signed-off-by: Michel Lespinasse <walken@google.com>

---
 arch/powerpc/mm/slice.c |  128 +++++++++++++++++++++++++++++-----------------
 1 files changed, 81 insertions(+), 47 deletions(-)

diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 999a74f25ebe..048346b7eed5 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -242,31 +242,51 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
 					      struct slice_mask available,
 					      int psize)
 {
-	struct vm_area_struct *vma;
-	unsigned long addr;
-	struct slice_mask mask;
 	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
+	unsigned long addr, found, slice;
+	struct vm_unmapped_area_info info;
 
-	addr = TASK_UNMAPPED_BASE;
+	info.flags = 0;
+	info.length = len;
+	info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
+	info.align_offset = 0;
 
-	for (;;) {
-		addr = _ALIGN_UP(addr, 1ul << pshift);
-		if ((TASK_SIZE - len) < addr)
-			break;
-		vma = find_vma(mm, addr);
-		BUG_ON(vma && (addr >= vma->vm_end));
+	addr = TASK_UNMAPPED_BASE;
+	while (addr < TASK_SIZE) {
+		info.low_limit = addr;
+		if (addr < SLICE_LOW_TOP) {
+			slice = GET_LOW_SLICE_INDEX(addr);
+			addr = (slice + 1) << SLICE_LOW_SHIFT;
+			if (!(available.low_slices & (1u << slice)))
+				continue;
+		} else {
+			slice = GET_HIGH_SLICE_INDEX(addr);
+			addr = (slice + 1) << SLICE_HIGH_SHIFT;
+			if (!(available.high_slices & (1u << slice)))
+				continue;
+		}
 
-		mask = slice_range_to_mask(addr, len);
-		if (!slice_check_fit(mask, available)) {
-			if (addr < SLICE_LOW_TOP)
-				addr = _ALIGN_UP(addr + 1,  1ul << SLICE_LOW_SHIFT);
-			else
-				addr = _ALIGN_UP(addr + 1,  1ul << SLICE_HIGH_SHIFT);
-			continue;
+ next_slice:
+		if (addr >= TASK_SIZE)
+			addr = TASK_SIZE;
+		else if (addr < SLICE_LOW_TOP) {
+			slice = GET_LOW_SLICE_INDEX(addr);
+			if (available.low_slices & (1u << slice)) {
+				addr = (slice + 1) << SLICE_LOW_SHIFT;
+				goto next_slice;
+			}
+		} else {
+			slice = GET_HIGH_SLICE_INDEX(addr);
+			if (available.high_slices & (1u << slice)) {
+				addr = (slice + 1) << SLICE_HIGH_SHIFT;
+				goto next_slice;
+			}
 		}
-		if (!vma || addr + len <= vma->vm_start)
-			return addr;
-		addr = vma->vm_end;
+		info.high_limit = addr;
+
+		found = vm_unmapped_area(&info);
+		if (!(found & ~PAGE_MASK))
+			return found;
 	}
 
 	return -ENOMEM;
@@ -277,39 +297,53 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
 					     struct slice_mask available,
 					     int psize)
 {
-	struct vm_area_struct *vma;
-	unsigned long addr;
-	struct slice_mask mask;
 	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
+	unsigned long addr, found, slice;
+	struct vm_unmapped_area_info info;
 
-	addr = mm->mmap_base;
-	while (addr > len) {
-		/* Go down by chunk size */
-		addr = _ALIGN_DOWN(addr - len, 1ul << pshift);
+	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+	info.length = len;
+	info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
+	info.align_offset = 0;
 
-		/* Check for hit with different page size */
-		mask = slice_range_to_mask(addr, len);
-		if (!slice_check_fit(mask, available)) {
-			if (addr < SLICE_LOW_TOP)
-				addr = _ALIGN_DOWN(addr, 1ul << SLICE_LOW_SHIFT);
-			else if (addr < (1ul << SLICE_HIGH_SHIFT))
-				addr = SLICE_LOW_TOP;
-			else
-				addr = _ALIGN_DOWN(addr, 1ul << SLICE_HIGH_SHIFT);
-			continue;
+	addr = mm->mmap_base;
+	while (addr > PAGE_SIZE) {
+		info.high_limit = addr;
+                if (addr < SLICE_LOW_TOP) {
+			slice = GET_LOW_SLICE_INDEX(addr - 1);
+			addr = slice << SLICE_LOW_SHIFT;
+			if (!(available.low_slices & (1u << slice)))
+				continue;
+		} else {
+			slice = GET_HIGH_SLICE_INDEX(addr - 1);
+			addr = slice ? (slice << SLICE_HIGH_SHIFT) :
+								SLICE_LOW_TOP;
+			if (!(available.high_slices & (1u << slice)))
+				continue;
 		}
 
-		/*
-		 * Lookup failure means no vma is above this address,
-		 * else if new region fits below vma->vm_start,
-		 * return with success:
-		 */
-		vma = find_vma(mm, addr);
-		if (!vma || (addr + len) <= vma->vm_start)
-			return addr;
+ next_slice:
+		if (addr < PAGE_SIZE)
+			addr = PAGE_SIZE;
+		else if (addr < SLICE_LOW_TOP) {
+			slice = GET_LOW_SLICE_INDEX(addr - 1);
+			if (available.low_slices & (1u << slice)) {
+				addr = slice << SLICE_LOW_SHIFT;
+				goto next_slice;
+			}
+		} else {
+			slice = GET_HIGH_SLICE_INDEX(addr - 1);
+			if (available.high_slices & (1u << slice)) {
+				addr = slice ? (slice << SLICE_HIGH_SHIFT) :
+								SLICE_LOW_TOP;
+				goto next_slice;
+			}
+		}
+		info.low_limit = addr;
 
-		/* try just below the current vma->vm_start */
-		addr = vma->vm_start;
+		found = vm_unmapped_area(&info);
+		if (!(found & ~PAGE_MASK))
+			return found;
 	}
 
 	/*
-- 
1.7.7.3

^ permalink raw reply related

* [PATCH 6/8] mm: remove free_area_cache use in powerpc architecture
From: Michel Lespinasse @ 2013-01-09  1:28 UTC (permalink / raw)
  To: Rik van Riel, Benjamin Herrenschmidt, James E.J. Bottomley,
	Matt Turner, David Howells, Tony Luck
  Cc: linux-ia64, linux-parisc, linux-kernel, linux-mm, linux-alpha,
	Andrew Morton, linuxppc-dev
In-Reply-To: <1357694895-520-1-git-send-email-walken@google.com>

As all other architectures have been converted to use vm_unmapped_area(),
we are about to retire the free_area_cache.

This change simply removes the use of that cache in
slice_get_unmapped_area(), which will most certainly have a
performance cost. Next one will convert that function to use the
vm_unmapped_area() infrastructure and regain the performance.

Signed-off-by: Michel Lespinasse <walken@google.com>

---
 arch/powerpc/include/asm/page_64.h       |    3 +-
 arch/powerpc/mm/hugetlbpage.c            |    2 +-
 arch/powerpc/mm/slice.c                  |  108 +++++------------------------
 arch/powerpc/platforms/cell/spufs/file.c |    2 +-
 4 files changed, 22 insertions(+), 93 deletions(-)

diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index cd915d6b093d..88693cef4f3d 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -99,8 +99,7 @@ extern unsigned long slice_get_unmapped_area(unsigned long addr,
 					     unsigned long len,
 					     unsigned long flags,
 					     unsigned int psize,
-					     int topdown,
-					     int use_cache);
+					     int topdown);
 
 extern unsigned int get_slice_psize(struct mm_struct *mm,
 				    unsigned long addr);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 1a6de0a7d8eb..5dc52d803ed8 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -742,7 +742,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 	struct hstate *hstate = hstate_file(file);
 	int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
 
-	return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
+	return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
 }
 #endif
 
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index cf9dada734b6..999a74f25ebe 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -240,23 +240,15 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
 static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
 					      unsigned long len,
 					      struct slice_mask available,
-					      int psize, int use_cache)
+					      int psize)
 {
 	struct vm_area_struct *vma;
-	unsigned long start_addr, addr;
+	unsigned long addr;
 	struct slice_mask mask;
 	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
 
-	if (use_cache) {
-		if (len <= mm->cached_hole_size) {
-			start_addr = addr = TASK_UNMAPPED_BASE;
-			mm->cached_hole_size = 0;
-		} else
-			start_addr = addr = mm->free_area_cache;
-	} else
-		start_addr = addr = TASK_UNMAPPED_BASE;
+	addr = TASK_UNMAPPED_BASE;
 
-full_search:
 	for (;;) {
 		addr = _ALIGN_UP(addr, 1ul << pshift);
 		if ((TASK_SIZE - len) < addr)
@@ -272,63 +264,24 @@ full_search:
 				addr = _ALIGN_UP(addr + 1,  1ul << SLICE_HIGH_SHIFT);
 			continue;
 		}
-		if (!vma || addr + len <= vma->vm_start) {
-			/*
-			 * Remember the place where we stopped the search:
-			 */
-			if (use_cache)
-				mm->free_area_cache = addr + len;
+		if (!vma || addr + len <= vma->vm_start)
 			return addr;
-		}
-		if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start)
-		        mm->cached_hole_size = vma->vm_start - addr;
 		addr = vma->vm_end;
 	}
 
-	/* Make sure we didn't miss any holes */
-	if (use_cache && start_addr != TASK_UNMAPPED_BASE) {
-		start_addr = addr = TASK_UNMAPPED_BASE;
-		mm->cached_hole_size = 0;
-		goto full_search;
-	}
 	return -ENOMEM;
 }
 
 static unsigned long slice_find_area_topdown(struct mm_struct *mm,
 					     unsigned long len,
 					     struct slice_mask available,
-					     int psize, int use_cache)
+					     int psize)
 {
 	struct vm_area_struct *vma;
 	unsigned long addr;
 	struct slice_mask mask;
 	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
 
-	/* check if free_area_cache is useful for us */
-	if (use_cache) {
-		if (len <= mm->cached_hole_size) {
-			mm->cached_hole_size = 0;
-			mm->free_area_cache = mm->mmap_base;
-		}
-
-		/* either no address requested or can't fit in requested
-		 * address hole
-		 */
-		addr = mm->free_area_cache;
-
-		/* make sure it can fit in the remaining address space */
-		if (addr > len) {
-			addr = _ALIGN_DOWN(addr - len, 1ul << pshift);
-			mask = slice_range_to_mask(addr, len);
-			if (slice_check_fit(mask, available) &&
-			    slice_area_is_free(mm, addr, len))
-					/* remember the address as a hint for
-					 * next time
-					 */
-					return (mm->free_area_cache = addr);
-		}
-	}
-
 	addr = mm->mmap_base;
 	while (addr > len) {
 		/* Go down by chunk size */
@@ -352,16 +305,8 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
 		 * return with success:
 		 */
 		vma = find_vma(mm, addr);
-		if (!vma || (addr + len) <= vma->vm_start) {
-			/* remember the address as a hint for next time */
-			if (use_cache)
-				mm->free_area_cache = addr;
+		if (!vma || (addr + len) <= vma->vm_start)
 			return addr;
-		}
-
-		/* remember the largest hole we saw so far */
-		if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start)
-		        mm->cached_hole_size = vma->vm_start - addr;
 
 		/* try just below the current vma->vm_start */
 		addr = vma->vm_start;
@@ -373,28 +318,18 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
 	 * can happen with large stack limits and large mmap()
 	 * allocations.
 	 */
-	addr = slice_find_area_bottomup(mm, len, available, psize, 0);
-
-	/*
-	 * Restore the topdown base:
-	 */
-	if (use_cache) {
-		mm->free_area_cache = mm->mmap_base;
-		mm->cached_hole_size = ~0UL;
-	}
-
-	return addr;
+	return slice_find_area_bottomup(mm, len, available, psize);
 }
 
 
 static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
 				     struct slice_mask mask, int psize,
-				     int topdown, int use_cache)
+				     int topdown)
 {
 	if (topdown)
-		return slice_find_area_topdown(mm, len, mask, psize, use_cache);
+		return slice_find_area_topdown(mm, len, mask, psize);
 	else
-		return slice_find_area_bottomup(mm, len, mask, psize, use_cache);
+		return slice_find_area_bottomup(mm, len, mask, psize);
 }
 
 #define or_mask(dst, src)	do {			\
@@ -415,7 +350,7 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
 
 unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 				      unsigned long flags, unsigned int psize,
-				      int topdown, int use_cache)
+				      int topdown)
 {
 	struct slice_mask mask = {0, 0};
 	struct slice_mask good_mask;
@@ -430,8 +365,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	BUG_ON(mm->task_size == 0);
 
 	slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
-	slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d, use_cache=%d\n",
-		  addr, len, flags, topdown, use_cache);
+	slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d\n",
+		  addr, len, flags, topdown);
 
 	if (len > mm->task_size)
 		return -ENOMEM;
@@ -503,8 +438,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 		/* Now let's see if we can find something in the existing
 		 * slices for that size
 		 */
-		newaddr = slice_find_area(mm, len, good_mask, psize, topdown,
-					  use_cache);
+		newaddr = slice_find_area(mm, len, good_mask, psize, topdown);
 		if (newaddr != -ENOMEM) {
 			/* Found within the good mask, we don't have to setup,
 			 * we thus return directly
@@ -536,8 +470,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	 * anywhere in the good area.
 	 */
 	if (addr) {
-		addr = slice_find_area(mm, len, good_mask, psize, topdown,
-				       use_cache);
+		addr = slice_find_area(mm, len, good_mask, psize, topdown);
 		if (addr != -ENOMEM) {
 			slice_dbg(" found area at 0x%lx\n", addr);
 			return addr;
@@ -547,15 +480,14 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	/* Now let's see if we can find something in the existing slices
 	 * for that size plus free slices
 	 */
-	addr = slice_find_area(mm, len, potential_mask, psize, topdown,
-			       use_cache);
+	addr = slice_find_area(mm, len, potential_mask, psize, topdown);
 
 #ifdef CONFIG_PPC_64K_PAGES
 	if (addr == -ENOMEM && psize == MMU_PAGE_64K) {
 		/* retry the search with 4k-page slices included */
 		or_mask(potential_mask, compat_mask);
 		addr = slice_find_area(mm, len, potential_mask, psize,
-				       topdown, use_cache);
+				       topdown);
 	}
 #endif
 
@@ -586,8 +518,7 @@ unsigned long arch_get_unmapped_area(struct file *filp,
 				     unsigned long flags)
 {
 	return slice_get_unmapped_area(addr, len, flags,
-				       current->mm->context.user_psize,
-				       0, 1);
+				       current->mm->context.user_psize, 0);
 }
 
 unsigned long arch_get_unmapped_area_topdown(struct file *filp,
@@ -597,8 +528,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
 					     const unsigned long flags)
 {
 	return slice_get_unmapped_area(addr0, len, flags,
-				       current->mm->context.user_psize,
-				       1, 1);
+				       current->mm->context.user_psize, 1);
 }
 
 unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 0cfece4cf6ef..2eb4df2a9388 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -352,7 +352,7 @@ static unsigned long spufs_get_unmapped_area(struct file *file,
 
 	/* Else, try to obtain a 64K pages slice */
 	return slice_get_unmapped_area(addr, len, flags,
-				       MMU_PAGE_64K, 1, 0);
+				       MMU_PAGE_64K, 1);
 }
 #endif /* CONFIG_SPU_FS_64K_LS */
 
-- 
1.7.7.3

^ permalink raw reply related

* [PATCH 5/8] mm: use vm_unmapped_area() in hugetlbfs on ia64 architecture
From: Michel Lespinasse @ 2013-01-09  1:28 UTC (permalink / raw)
  To: Rik van Riel, Benjamin Herrenschmidt, James E.J. Bottomley,
	Matt Turner, David Howells, Tony Luck
  Cc: linux-ia64, linux-parisc, linux-kernel, linux-mm, linux-alpha,
	Andrew Morton, linuxppc-dev
In-Reply-To: <1357694895-520-1-git-send-email-walken@google.com>

Update the ia64 hugetlb_get_unmapped_area function to make use of
vm_unmapped_area() instead of implementing a brute force search.

Signed-off-by: Michel Lespinasse <walken@google.com>

---
 arch/ia64/mm/hugetlbpage.c |   20 +++++++++-----------
 1 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index 5ca674b74737..76069c18ee42 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -148,7 +148,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 		unsigned long pgoff, unsigned long flags)
 {
-	struct vm_area_struct *vmm;
+	struct vm_unmapped_area_info info;
 
 	if (len > RGN_MAP_LIMIT)
 		return -ENOMEM;
@@ -165,16 +165,14 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, u
 	/* This code assumes that RGN_HPAGE != 0. */
 	if ((REGION_NUMBER(addr) != RGN_HPAGE) || (addr & (HPAGE_SIZE - 1)))
 		addr = HPAGE_REGION_BASE;
-	else
-		addr = ALIGN(addr, HPAGE_SIZE);
-	for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
-		/* At this point:  (!vmm || addr < vmm->vm_end). */
-		if (REGION_OFFSET(addr) + len > RGN_MAP_LIMIT)
-			return -ENOMEM;
-		if (!vmm || (addr + len) <= vmm->vm_start)
-			return addr;
-		addr = ALIGN(vmm->vm_end, HPAGE_SIZE);
-	}
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = addr;
+	info.high_limit = HPAGE_REGION_BASE + RGN_MAP_LIMIT;
+	info.align_mask = PAGE_MASK & (HPAGE_SIZE - 1);
+	info.align_offset = 0;
+	return vm_unmapped_area(&info);
 }
 
 static int __init hugetlb_setup_sz(char *str)
-- 
1.7.7.3

^ permalink raw reply related

* [PATCH 4/8] mm: use vm_unmapped_area() on ia64 architecture
From: Michel Lespinasse @ 2013-01-09  1:28 UTC (permalink / raw)
  To: Rik van Riel, Benjamin Herrenschmidt, James E.J. Bottomley,
	Matt Turner, David Howells, Tony Luck
  Cc: linux-ia64, linux-parisc, linux-kernel, linux-mm, linux-alpha,
	Andrew Morton, linuxppc-dev
In-Reply-To: <1357694895-520-1-git-send-email-walken@google.com>

Update the ia64 arch_get_unmapped_area function to make use of
vm_unmapped_area() instead of implementing a brute force search.

Signed-off-by: Michel Lespinasse <walken@google.com>

---
 arch/ia64/kernel/sys_ia64.c |   37 ++++++++++++-------------------------
 1 files changed, 12 insertions(+), 25 deletions(-)

diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index d9439ef2f661..41e33f84c185 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -25,9 +25,9 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len
 			unsigned long pgoff, unsigned long flags)
 {
 	long map_shared = (flags & MAP_SHARED);
-	unsigned long start_addr, align_mask = PAGE_SIZE - 1;
+	unsigned long align_mask = 0;
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
+	struct vm_unmapped_area_info info;
 
 	if (len > RGN_MAP_LIMIT)
 		return -ENOMEM;
@@ -44,7 +44,7 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len
 		addr = 0;
 #endif
 	if (!addr)
-		addr = mm->free_area_cache;
+		addr = TASK_UNMAPPED_BASE;
 
 	if (map_shared && (TASK_SIZE > 0xfffffffful))
 		/*
@@ -53,28 +53,15 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len
 		 * tasks, we prefer to avoid exhausting the address space too quickly by
 		 * limiting alignment to a single page.
 		 */
-		align_mask = SHMLBA - 1;
-
-  full_search:
-	start_addr = addr = (addr + align_mask) & ~align_mask;
-
-	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
-		/* At this point:  (!vma || addr < vma->vm_end). */
-		if (TASK_SIZE - len < addr || RGN_MAP_LIMIT - len < REGION_OFFSET(addr)) {
-			if (start_addr != TASK_UNMAPPED_BASE) {
-				/* Start a new search --- just in case we missed some holes.  */
-				addr = TASK_UNMAPPED_BASE;
-				goto full_search;
-			}
-			return -ENOMEM;
-		}
-		if (!vma || addr + len <= vma->vm_start) {
-			/* Remember the address where we stopped this search:  */
-			mm->free_area_cache = addr + len;
-			return addr;
-		}
-		addr = (vma->vm_end + align_mask) & ~align_mask;
-	}
+		align_mask = PAGE_MASK & (SHMLBA - 1);
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = addr;
+	info.high_limit = TASK_SIZE;
+	info.align_mask = align_mask;
+	info.align_offset = 0;
+	return vm_unmapped_area(&info);
 }
 
 asmlinkage long
-- 
1.7.7.3

^ permalink raw reply related

* [PATCH 3/8] mm: use vm_unmapped_area() on frv architecture
From: Michel Lespinasse @ 2013-01-09  1:28 UTC (permalink / raw)
  To: Rik van Riel, Benjamin Herrenschmidt, James E.J. Bottomley,
	Matt Turner, David Howells, Tony Luck
  Cc: linux-ia64, linux-parisc, linux-kernel, linux-mm, linux-alpha,
	Andrew Morton, linuxppc-dev
In-Reply-To: <1357694895-520-1-git-send-email-walken@google.com>

Update the frv arch_get_unmapped_area function to make use of
vm_unmapped_area() instead of implementing a brute force search.

Signed-off-by: Michel Lespinasse <walken@google.com>

---
 arch/frv/mm/elf-fdpic.c |   49 ++++++++++++++++------------------------------
 1 files changed, 17 insertions(+), 32 deletions(-)

diff --git a/arch/frv/mm/elf-fdpic.c b/arch/frv/mm/elf-fdpic.c
index 385fd30b142f..836f14707a62 100644
--- a/arch/frv/mm/elf-fdpic.c
+++ b/arch/frv/mm/elf-fdpic.c
@@ -60,7 +60,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
 				     unsigned long pgoff, unsigned long flags)
 {
 	struct vm_area_struct *vma;
-	unsigned long limit;
+	struct vm_unmapped_area_info info;
 
 	if (len > TASK_SIZE)
 		return -ENOMEM;
@@ -79,39 +79,24 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
 	}
 
 	/* search between the bottom of user VM and the stack grow area */
-	addr = PAGE_SIZE;
-	limit = (current->mm->start_stack - 0x00200000);
-	if (addr + len <= limit) {
-		limit -= len;
-
-		if (addr <= limit) {
-			vma = find_vma(current->mm, PAGE_SIZE);
-			for (; vma; vma = vma->vm_next) {
-				if (addr > limit)
-					break;
-				if (addr + len <= vma->vm_start)
-					goto success;
-				addr = vma->vm_end;
-			}
-		}
-	}
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = PAGE_SIZE;
+	info.high_limit = (current->mm->start_stack - 0x00200000);
+	info.align_mask = 0;
+	info.align_offset = 0;
+	addr = vm_unmapped_area(&info);
+	if (!(addr & ~PAGE_MASK))
+		goto success;
+	VM_BUG_ON(addr != -ENOMEM);
 
 	/* search from just above the WorkRAM area to the top of memory */
-	addr = PAGE_ALIGN(0x80000000);
-	limit = TASK_SIZE - len;
-	if (addr <= limit) {
-		vma = find_vma(current->mm, addr);
-		for (; vma; vma = vma->vm_next) {
-			if (addr > limit)
-				break;
-			if (addr + len <= vma->vm_start)
-				goto success;
-			addr = vma->vm_end;
-		}
-
-		if (!vma && addr <= limit)
-			goto success;
-	}
+	info.low_limit = PAGE_ALIGN(0x80000000);
+	info.high_limit = TASK_SIZE;
+	addr = vm_unmapped_area(&info);
+	if (!(addr & ~PAGE_MASK))
+		goto success;
+	VM_BUG_ON(addr != -ENOMEM);
 
 #if 0
 	printk("[area] l=%lx (ENOMEM) f='%s'\n",
-- 
1.7.7.3

^ permalink raw reply related

* [PATCH 2/8] mm: use vm_unmapped_area() on alpha architecture
From: Michel Lespinasse @ 2013-01-09  1:28 UTC (permalink / raw)
  To: Rik van Riel, Benjamin Herrenschmidt, James E.J. Bottomley,
	Matt Turner, David Howells, Tony Luck
  Cc: linux-ia64, linux-parisc, linux-kernel, linux-mm, linux-alpha,
	Andrew Morton, linuxppc-dev
In-Reply-To: <1357694895-520-1-git-send-email-walken@google.com>

Update the alpha arch_get_unmapped_area function to make use of
vm_unmapped_area() instead of implementing a brute force search.

Signed-off-by: Michel Lespinasse <walken@google.com>

---
 arch/alpha/kernel/osf_sys.c |   20 +++++++++-----------
 1 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 14db93e4c8a8..ba707e23ef37 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1298,17 +1298,15 @@ static unsigned long
 arch_get_unmapped_area_1(unsigned long addr, unsigned long len,
 		         unsigned long limit)
 {
-	struct vm_area_struct *vma = find_vma(current->mm, addr);
-
-	while (1) {
-		/* At this point:  (!vma || addr < vma->vm_end). */
-		if (limit - len < addr)
-			return -ENOMEM;
-		if (!vma || addr + len <= vma->vm_start)
-			return addr;
-		addr = vma->vm_end;
-		vma = vma->vm_next;
-	}
+	struct vm_unmapped_area_info info;
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = addr;
+	info.high_limit = limit;
+	info.align_mask = 0;
+	info.align_offset = 0;
+	return vm_unmapped_area(&info);
 }
 
 unsigned long
-- 
1.7.7.3

^ permalink raw reply related

* [PATCH 1/8] mm: use vm_unmapped_area() on parisc architecture
From: Michel Lespinasse @ 2013-01-09  1:28 UTC (permalink / raw)
  To: Rik van Riel, Benjamin Herrenschmidt, James E.J. Bottomley,
	Matt Turner, David Howells, Tony Luck
  Cc: linux-ia64, linux-parisc, linux-kernel, linux-mm, linux-alpha,
	Andrew Morton, linuxppc-dev
In-Reply-To: <1357694895-520-1-git-send-email-walken@google.com>

Update the parisc arch_get_unmapped_area function to make use of
vm_unmapped_area() instead of implementing a brute force search.

Signed-off-by: Michel Lespinasse <walken@google.com>

---
 arch/parisc/kernel/sys_parisc.c |   46 ++++++++++++++------------------------
 1 files changed, 17 insertions(+), 29 deletions(-)

diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index f76c10863c62..6ab138088076 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -35,18 +35,15 @@
 
 static unsigned long get_unshared_area(unsigned long addr, unsigned long len)
 {
-	struct vm_area_struct *vma;
+	struct vm_unmapped_area_info info;
 
-	addr = PAGE_ALIGN(addr);
-
-	for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) {
-		/* At this point:  (!vma || addr < vma->vm_end). */
-		if (TASK_SIZE - len < addr)
-			return -ENOMEM;
-		if (!vma || addr + len <= vma->vm_start)
-			return addr;
-		addr = vma->vm_end;
-	}
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = PAGE_ALIGN(addr);
+	info.high_limit = TASK_SIZE;
+	info.align_mask = 0;
+	info.align_offset = 0;
+	return vm_unmapped_area(&info);
 }
 
 #define DCACHE_ALIGN(addr) (((addr) + (SHMLBA - 1)) &~ (SHMLBA - 1))
@@ -63,30 +60,21 @@ static unsigned long get_unshared_area(unsigned long addr, unsigned long len)
  */
 static int get_offset(struct address_space *mapping)
 {
-	int offset = (unsigned long) mapping << (PAGE_SHIFT - 8);
-	return offset & 0x3FF000;
+	return (unsigned long) mapping >> 8;
 }
 
 static unsigned long get_shared_area(struct address_space *mapping,
 		unsigned long addr, unsigned long len, unsigned long pgoff)
 {
-	struct vm_area_struct *vma;
-	int offset = mapping ? get_offset(mapping) : 0;
-
-	offset = (offset + (pgoff << PAGE_SHIFT)) & 0x3FF000;
+	struct vm_unmapped_area_info info;
 
-	addr = DCACHE_ALIGN(addr - offset) + offset;
-
-	for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) {
-		/* At this point:  (!vma || addr < vma->vm_end). */
-		if (TASK_SIZE - len < addr)
-			return -ENOMEM;
-		if (!vma || addr + len <= vma->vm_start)
-			return addr;
-		addr = DCACHE_ALIGN(vma->vm_end - offset) + offset;
-		if (addr < vma->vm_end) /* handle wraparound */
-			return -ENOMEM;
-	}
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = PAGE_ALIGN(addr);
+	info.high_limit = TASK_SIZE;
+	info.align_mask = PAGE_MASK & (SHMLBA - 1);
+	info.align_offset = (get_offset(mapping) + pgoff) << PAGE_SHIFT;
+	return vm_unmapped_area(&info);
 }
 
 unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
-- 
1.7.7.3

^ permalink raw reply related

* [PATCH 0/8] vm_unmapped_area: finish the mission
From: Michel Lespinasse @ 2013-01-09  1:28 UTC (permalink / raw)
  To: Rik van Riel, Benjamin Herrenschmidt, James E.J. Bottomley,
	Matt Turner, David Howells, Tony Luck
  Cc: linux-ia64, linux-parisc, linux-kernel, linux-mm, linux-alpha,
	Andrew Morton, linuxppc-dev

These patches, which apply on top of v3.8-rc kernels, are to complete the
VMA gap finding code I introduced (following Rik's initial proposal) in
v3.8-rc1.

First 5 patches introduce the use of vm_unmapped_area() to replace brute
force searches on parisc, alpha, frv and ia64 architectures (all relatively
trivial uses of the vm_unmapped_area() infrastructure)

Next 2 patches do the same as above for the powerpc architecture. This
change is not as trivial as for the other architectures, because we
need to account for each address space slice potentially having a
different page size.

The last patch removes the free_area_cache, which was used by all the
brute force searches before they got converted to the
vm_unmapped_area() infrastructure.

I did some basic testing on x86 and powerpc; however the first 5 (simpler)
patches for parisc, alpha, frv and ia64 architectures are untested.

Michel Lespinasse (8):
  mm: use vm_unmapped_area() on parisc architecture
  mm: use vm_unmapped_area() on alpha architecture
  mm: use vm_unmapped_area() on frv architecture
  mm: use vm_unmapped_area() on ia64 architecture
  mm: use vm_unmapped_area() in hugetlbfs on ia64 architecture
  mm: remove free_area_cache use in powerpc architecture
  mm: use vm_unmapped_area() on powerpc architecture
  mm: remove free_area_cache

 arch/alpha/kernel/osf_sys.c              |   20 ++--
 arch/arm/mm/mmap.c                       |    2 -
 arch/arm64/mm/mmap.c                     |    2 -
 arch/frv/mm/elf-fdpic.c                  |   49 +++----
 arch/ia64/kernel/sys_ia64.c              |   37 ++----
 arch/ia64/mm/hugetlbpage.c               |   20 ++--
 arch/mips/mm/mmap.c                      |    2 -
 arch/parisc/kernel/sys_parisc.c          |   46 +++----
 arch/powerpc/include/asm/page_64.h       |    3 +-
 arch/powerpc/mm/hugetlbpage.c            |    2 +-
 arch/powerpc/mm/mmap_64.c                |    2 -
 arch/powerpc/mm/slice.c                  |  228 +++++++++++++-----------------
 arch/powerpc/platforms/cell/spufs/file.c |    2 +-
 arch/s390/mm/mmap.c                      |    4 -
 arch/sparc/kernel/sys_sparc_64.c         |    2 -
 arch/tile/mm/mmap.c                      |    2 -
 arch/x86/ia32/ia32_aout.c                |    2 -
 arch/x86/mm/mmap.c                       |    2 -
 fs/binfmt_aout.c                         |    2 -
 fs/binfmt_elf.c                          |    2 -
 include/linux/mm_types.h                 |    3 -
 include/linux/sched.h                    |    2 -
 kernel/fork.c                            |    4 -
 mm/mmap.c                                |   28 ----
 mm/nommu.c                               |    4 -
 mm/util.c                                |    1 -
 26 files changed, 163 insertions(+), 310 deletions(-)

-- 
1.7.7.3

^ permalink raw reply

* [PATCH 4/4] powerpc: Optimise 64bit syscall auditing exit path
From: Anton Blanchard @ 2013-01-08 23:48 UTC (permalink / raw)
  To: eparis, viro, benh, paulus; +Cc: linuxppc-dev, linux-kernel
In-Reply-To: <20130109104617.74e995a5@kryten>


Add an assembly fast path for the syscall audit exit path on
64bit. Some distros enable auditing by default which forces us
through the syscall auditing path even if there are no rules.

With syscall auditing enabled we currently disable interrupts,
check the threadinfo flags then immediately re-enable interrupts
and call audit_syscall_exit. This patch splits the threadinfo
flag check into two so we can avoid the disable/reenable of
interrupts when handling trace flags. We must do the user work
flag check with interrupts off to avoid returning to userspace
without handling them.

The other big gain is that we don't have to save and restore
the non volatile registers or exit via the slow ret_from_except
path.

I wrote some test cases to validate the patch:

http://ozlabs.org/~anton/junkcode/audit_tests.tar.gz

And to test the performance I ran a simple null syscall
microbenchmark on a POWER7 box:

http://ozlabs.org/~anton/junkcode/null_syscall.c

Baseline: 920.6 cycles
Patched:  719.6 cycles

An improvement of 22%.

Signed-off-by: Anton Blanchard <anton@samba.org>
---

Index: b/arch/powerpc/kernel/entry_64.S
===================================================================
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -195,6 +195,19 @@ syscall_exit:
 	andi.	r10,r8,MSR_RI
 	beq-	unrecov_restore
 #endif
+
+	/* We can handle some thread info flags with interrupts on */
+	ld	r9,TI_FLAGS(r12)
+	li	r11,-_LAST_ERRNO
+	andi.   r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_PERSYSCALL_MASK)
+	bne	syscall_exit_work
+
+	cmpld	r3,r11
+	ld	r5,_CCR(r1)
+	bge-	syscall_error
+
+.Lsyscall_exit_work_cont:
+
 	/*
 	 * Disable interrupts so current_thread_info()->flags can't change,
 	 * and so that we don't get interrupted after loading SRR0/1.
@@ -208,21 +221,19 @@ syscall_exit:
 	 * clear EE. We only need to clear RI just before we restore r13
 	 * below, but batching it with EE saves us one expensive mtmsrd call.
 	 * We have to be careful to restore RI if we branch anywhere from
-	 * here (eg syscall_exit_work).
+	 * here (eg syscall_exit_user_work).
 	 */
 	li	r9,MSR_RI
 	andc	r11,r10,r9
 	mtmsrd	r11,1
 #endif /* CONFIG_PPC_BOOK3E */
 
+	/* Recheck thread info flags with interrupts off */
 	ld	r9,TI_FLAGS(r12)
-	li	r11,-_LAST_ERRNO
-	andi.	r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
-	bne-	syscall_exit_work
-	cmpld	r3,r11
-	ld	r5,_CCR(r1)
-	bge-	syscall_error
-.Lsyscall_error_cont:
+
+	andi.   r0,r9,_TIF_USER_WORK_MASK
+	bne-	syscall_exit_user_work
+
 	ld	r7,_NIP(r1)
 BEGIN_FTR_SECTION
 	stdcx.	r0,0,r1			/* to clear the reservation */
@@ -246,7 +257,7 @@ syscall_error:
 	oris	r5,r5,0x1000	/* Set SO bit in CR */
 	neg	r3,r3
 	std	r5,_CCR(r1)
-	b	.Lsyscall_error_cont
+	b	.Lsyscall_exit_work_cont
 	
 /* Traced system call support */
 syscall_dotrace:
@@ -306,58 +317,79 @@ audit_entry:
 syscall_enosys:
 	li	r3,-ENOSYS
 	b	syscall_exit
-	
+
 syscall_exit_work:
-#ifdef CONFIG_PPC_BOOK3S
-	mtmsrd	r10,1		/* Restore RI */
-#endif
-	/* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
-	 If TIF_NOERROR is set, just save r3 as it is. */
+	li	r6,1		/* r6 contains syscall success */
+	mr	r7,r3
+	ld	r5,_CCR(r1)
 
+	/*
+	 * If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
+	 * If TIF_NOERROR is set, just save r3 as it is.
+	 */
 	andi.	r0,r9,_TIF_RESTOREALL
 	beq+	0f
 	REST_NVGPRS(r1)
 	b	2f
-0:	cmpld	r3,r11		/* r10 is -LAST_ERRNO */
+0:	cmpld	r3,r11		/* r11 is -LAST_ERRNO */
 	blt+	1f
 	andi.	r0,r9,_TIF_NOERROR
 	bne-	1f
-	ld	r5,_CCR(r1)
+	li	r6,0		/* syscall failed */
 	neg	r3,r3
 	oris	r5,r5,0x1000	/* Set SO bit in CR */
 	std	r5,_CCR(r1)
 1:	std	r3,GPR3(r1)
-2:	andi.	r0,r9,(_TIF_PERSYSCALL_MASK)
+
+2:	andi.	r0,r9,_TIF_SYSCALL_AUDIT
 	beq	4f
 
-	/* Clear per-syscall TIF flags if any are set.  */
+	mr	r3,r6
+	mr	r4,r7
+	bl	.__audit_syscall_exit
+	CURRENT_THREAD_INFO(r12, r1)
+	ld	r9,TI_FLAGS(r12)
+	ld	r3,GPR3(r1)
+	ld	r5,_CCR(r1)
+	ld	r8,_MSR(r1)
+
+4:	andi.	r0,r9,(_TIF_PERSYSCALL_MASK)
+	beq	6f
 
+	/* Clear per-syscall TIF flags if any are set.  */
 	li	r11,_TIF_PERSYSCALL_MASK
 	addi	r12,r12,TI_FLAGS
-3:	ldarx	r10,0,r12
+5:	ldarx	r10,0,r12
 	andc	r10,r10,r11
 	stdcx.	r10,0,r12
-	bne-	3b
+	bne-	5b
 	subi	r12,r12,TI_FLAGS
 
-4:	/* Anything else left to do? */
-	andi.	r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP)
-	beq	.ret_from_except_lite
+	/*
+	 * We can use the fast path if no other trace flags are on and
+	 * _TIF_RESTOREALL wasn't set.
+	 */
+6:      andi.   r0,r9,((_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_RESTOREALL) & ~_TIF_SYSCALL_AUDIT)
+	mr	r9,r10
+	beq	.Lsyscall_exit_work_cont
 
-	/* Re-enable interrupts */
-#ifdef CONFIG_PPC_BOOK3E
-	wrteei	1
-#else
-	ld	r10,PACAKMSR(r13)
-	ori	r10,r10,MSR_EE
-	mtmsrd	r10,1
-#endif /* CONFIG_PPC_BOOK3E */
+	andi.	r0,r9,((_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) & ~_TIF_SYSCALL_AUDIT)
+	beq	7f
 
 	bl	.save_nvgprs
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	.do_syscall_trace_leave
 	b	.ret_from_except
 
+7:	b	.ret_from_except_lite
+
+syscall_exit_user_work:
+#ifdef CONFIG_PPC_BOOK3S
+	mtmsrd	r10,1		/* Restore RI */
+#endif
+	std	r3,GPR3(r1)
+	b	.ret_from_except_lite
+
 /* Save non-volatile GPRs, if not already saved. */
 _GLOBAL(save_nvgprs)
 	ld	r11,_TRAP(r1)
Index: b/arch/powerpc/kernel/ptrace.c
===================================================================
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1781,7 +1781,9 @@ void do_syscall_trace_leave(struct pt_re
 {
 	int step;
 
+#ifdef CONFIG_PPC32
 	audit_syscall_exit(regs);
+#endif
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_exit(regs, regs->result);

^ permalink raw reply

* [PATCH 3/4] powerpc: Optimise 64bit syscall auditing entry path
From: Anton Blanchard @ 2013-01-08 23:48 UTC (permalink / raw)
  To: eparis, viro, benh, paulus; +Cc: linuxppc-dev, linux-kernel
In-Reply-To: <20130109104617.74e995a5@kryten>


Add an assembly fast path for the syscall audit entry path on
64bit. Some distros enable auditing by default which forces us
through the syscall auditing path even if there are no rules.

I wrote some test cases to validate the patch:

http://ozlabs.org/~anton/junkcode/audit_tests.tar.gz

And to test the performance I ran a simple null syscall
microbenchmark on a POWER7 box:

http://ozlabs.org/~anton/junkcode/null_syscall.c

Baseline: 949.2 cycles
Patched:  920.6 cycles

An improvement of 3%. Most of the potential gains are masked by
the syscall audit exit path which will be fixed in a
subsequent patch.

Signed-off-by: Anton Blanchard <anton@samba.org>
---

Index: b/arch/powerpc/kernel/entry_64.S
===================================================================
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -34,6 +34,12 @@
 #include <asm/ftrace.h>
 #include <asm/hw_irq.h>
 
+/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
+#include <linux/elf-em.h>
+#define AUDIT_ARCH_PPC		(EM_PPC)
+#define AUDIT_ARCH_PPC64	(EM_PPC64|__AUDIT_ARCH_64BIT)
+#define __AUDIT_ARCH_64BIT 0x80000000
+
 /*
  * System calls.
  */
@@ -244,6 +250,10 @@ syscall_error:
 	
 /* Traced system call support */
 syscall_dotrace:
+#ifdef CONFIG_AUDITSYSCALL
+	andi.	r11,r10,(_TIF_SYSCALL_T_OR_A & ~_TIF_SYSCALL_AUDIT)
+	beq	audit_entry
+#endif
 	bl	.save_nvgprs
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	.do_syscall_trace_enter
@@ -253,6 +263,7 @@ syscall_dotrace:
 	 * for the call number to look up in the table (r0).
 	 */
 	mr	r0,r3
+.Laudit_entry_return:
 	ld	r3,GPR3(r1)
 	ld	r4,GPR4(r1)
 	ld	r5,GPR5(r1)
@@ -264,6 +275,34 @@ syscall_dotrace:
 	ld	r10,TI_FLAGS(r10)
 	b	.Lsyscall_dotrace_cont
 
+#ifdef CONFIG_AUDITSYSCALL
+audit_entry:
+	ld	r4,GPR0(r1)
+	ld	r5,GPR3(r1)
+	ld	r6,GPR4(r1)
+	ld	r7,GPR5(r1)
+	ld	r8,GPR6(r1)
+
+	andi.	r11,r10,_TIF_32BIT
+	beq	1f
+
+	lis	r3,AUDIT_ARCH_PPC@h
+	ori	r3,r3,AUDIT_ARCH_PPC@l
+	clrldi	r5,r5,32
+	clrldi	r6,r6,32
+	clrldi	r7,r7,32
+	clrldi	r8,r8,32
+	bl	.__audit_syscall_entry
+	ld	r0,GPR0(r1)
+	b	.Laudit_entry_return
+
+1:	lis	r3,AUDIT_ARCH_PPC64@h
+	ori	r3,r3,AUDIT_ARCH_PPC64@l
+	bl	.__audit_syscall_entry
+	ld	r0,GPR0(r1)
+	b	.Laudit_entry_return
+#endif
+
 syscall_enosys:
 	li	r3,-ENOSYS
 	b	syscall_exit

^ permalink raw reply

* [PATCH 2/4] powerpc: Remove static branch prediction in 64bit traced syscall path
From: Anton Blanchard @ 2013-01-08 23:47 UTC (permalink / raw)
  To: eparis, viro, benh, paulus; +Cc: linuxppc-dev, linux-kernel
In-Reply-To: <20130109104617.74e995a5@kryten>


Some distros enable auditing by default which forces us through the
syscall trace path. Remove the static branch prediction in our 64bit
syscall handler and let the hardware do the prediction.

Signed-off-by: Anton Blanchard <anton@samba.org>
---

Index: b/arch/powerpc/kernel/entry_64.S
===================================================================
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -149,7 +149,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLP
 	CURRENT_THREAD_INFO(r11, r1)
 	ld	r10,TI_FLAGS(r11)
 	andi.	r11,r10,_TIF_SYSCALL_T_OR_A
-	bne-	syscall_dotrace
+	bne	syscall_dotrace
 .Lsyscall_dotrace_cont:
 	cmpldi	0,r0,NR_syscalls
 	bge-	syscall_enosys

^ permalink raw reply

* [PATCH 1/4] audit: Syscall rules are not applied to existing processes on non-x86
From: Anton Blanchard @ 2013-01-08 23:46 UTC (permalink / raw)
  To: eparis, viro, benh, paulus; +Cc: linuxppc-dev, linux-kernel


Commit b05d8447e782 (audit: inline audit_syscall_entry to reduce
burden on archs) changed audit_syscall_entry to check for a dummy
context before calling __audit_syscall_entry. Unfortunately the dummy
context state is maintained in __audit_syscall_entry so once set it
never gets cleared, even if the audit rules change.

As a result, if there are no auditing rules when a process starts
then it will never be subject to any rules added later. x86 doesn't
see this because it has an assembly fast path that calls directly into
__audit_syscall_entry.

I noticed this issue when working on audit performance optimisations.
I wrote a set of simple test cases available at:

http://ozlabs.org/~anton/junkcode/audit_tests.tar.gz

02_new_rule.py fails without the patch and passes with it. The
test case clears all rules, starts a process, adds a rule then
verifies the process produces a syscall audit record.

Signed-off-by: Anton Blanchard <anton@samba.org>
Cc: <stable@kernel.org> # 3.3+
---

Index: b/include/linux/audit.h
===================================================================
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -119,7 +119,7 @@ static inline void audit_syscall_entry(i
 				       unsigned long a1, unsigned long a2,
 				       unsigned long a3)
 {
-	if (unlikely(!audit_dummy_context()))
+	if (unlikely(current->audit_context))
 		__audit_syscall_entry(arch, major, a0, a1, a2, a3);
 }
 static inline void audit_syscall_exit(void *pt_regs)

^ permalink raw reply

* Re: [PATCH 5/5] kfifo: log based kfifo API
From: Andy Walls @ 2013-01-08 21:10 UTC (permalink / raw)
  To: Dmitry Torokhov, Yuanhan Liu
  Cc: linux-iio, linux-mmc, platform-driver-x86, linux-mm, linux-sctp,
	linux-mtd, devel, linux-scsi, libertas-dev, linux-rdma,
	Stefani Seibold, linux-serial, linux-pci, open-iscsi, linux-media,
	linux-input, linux-omap, netdev, linux-usb, linux-wireless,
	linux-kernel, dccp, Andrew Morton, linuxppc-dev
In-Reply-To: <20130108181645.GA7972@core.coreip.homeip.net>

Dmitry Torokhov <dmitry.torokhov@gmail.com> wrote:

>Hi Yuanhan,
>
>On Tue, Jan 08, 2013 at 10:57:53PM +0800, Yuanhan Liu wrote:
>> The current kfifo API take the kfifo size as input, while it rounds
>>  _down_ the size to power of 2 at __kfifo_alloc. This may introduce
>> potential issue.
>> 
>> Take the code at drivers/hid/hid-logitech-dj.c as example:
>> 
>> 	if (kfifo_alloc(&djrcv_dev->notif_fifo,
>>                        DJ_MAX_NUMBER_NOTIFICATIONS * sizeof(struct
>dj_report),
>>                        GFP_KERNEL)) {
>> 
>> Where, DJ_MAX_NUMBER_NOTIFICATIONS is 8, and sizeo of(struct
>dj_report)
>> is 15.
>> 
>> Which means it wants to allocate a kfifo buffer which can store 8
>> dj_report entries at once. The expected kfifo buffer size would be
>> 8 * 15 = 120 then. While, in the end, __kfifo_alloc will turn the
>> size to rounddown_power_of_2(120) =  64, and then allocate a buf
>> with 64 bytes, which I don't think this is the original author want.
>> 
>> With the new log API, we can do like following:
>> 
>> 	int kfifo_size_order = order_base_2(DJ_MAX_NUMBER_NOTIFICATIONS *
>> 					    sizeof(struct dj_report));
>> 
>> 	if (kfifo_alloc(&djrcv_dev->notif_fifo, kfifo_size_order,
>GFP_KERNEL)) {
>> 
>> This make sure we will allocate enough kfifo buffer for holding
>> DJ_MAX_NUMBER_NOTIFICATIONS dj_report entries.
>
>Why don't you simply change __kfifo_alloc to round the allocation up
>instead of down?
>
>Thanks.
>
>-- 
>Dmitry
>--
>To unsubscribe from this list: send the line "unsubscribe linux-media"
>in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html

Hi Dmitry,

I agree.   I don't see the benefit in pushing up the change to a kfifo internal decision/problem to many different places in the kernel.

Regards,
Andy

 

^ permalink raw reply

* Re: [PATCH 5/5] kfifo: log based kfifo API
From: Dmitry Torokhov @ 2013-01-08 18:16 UTC (permalink / raw)
  To: Yuanhan Liu
  Cc: linux-iio, linux-mmc, platform-driver-x86, linux-mm, linux-sctp,
	linux-mtd, devel, linux-scsi, libertas-dev, linux-rdma,
	Stefani Seibold, linux-serial, linux-pci, open-iscsi, linux-media,
	linux-input, linux-omap, netdev, linux-usb, linux-wireless,
	linux-kernel, dccp, Andrew Morton, linuxppc-dev
In-Reply-To: <1357657073-27352-6-git-send-email-yuanhan.liu@linux.intel.com>

Hi Yuanhan,

On Tue, Jan 08, 2013 at 10:57:53PM +0800, Yuanhan Liu wrote:
> The current kfifo API take the kfifo size as input, while it rounds
>  _down_ the size to power of 2 at __kfifo_alloc. This may introduce
> potential issue.
> 
> Take the code at drivers/hid/hid-logitech-dj.c as example:
> 
> 	if (kfifo_alloc(&djrcv_dev->notif_fifo,
>                        DJ_MAX_NUMBER_NOTIFICATIONS * sizeof(struct dj_report),
>                        GFP_KERNEL)) {
> 
> Where, DJ_MAX_NUMBER_NOTIFICATIONS is 8, and sizeo of(struct dj_report)
> is 15.
> 
> Which means it wants to allocate a kfifo buffer which can store 8
> dj_report entries at once. The expected kfifo buffer size would be
> 8 * 15 = 120 then. While, in the end, __kfifo_alloc will turn the
> size to rounddown_power_of_2(120) =  64, and then allocate a buf
> with 64 bytes, which I don't think this is the original author want.
> 
> With the new log API, we can do like following:
> 
> 	int kfifo_size_order = order_base_2(DJ_MAX_NUMBER_NOTIFICATIONS *
> 					    sizeof(struct dj_report));
> 
> 	if (kfifo_alloc(&djrcv_dev->notif_fifo, kfifo_size_order, GFP_KERNEL)) {
> 
> This make sure we will allocate enough kfifo buffer for holding
> DJ_MAX_NUMBER_NOTIFICATIONS dj_report entries.

Why don't you simply change __kfifo_alloc to round the allocation up
instead of down?

Thanks.

-- 
Dmitry

^ permalink raw reply

* [PATCH 5/5] kfifo: log based kfifo API
From: Yuanhan Liu @ 2013-01-08 14:57 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-iio, linux-mmc, platform-driver-x86, linux-mm, linux-sctp,
	linux-mtd, devel, linux-scsi, libertas-dev, linux-rdma,
	Stefani Seibold, linux-serial, linux-pci, open-iscsi, linux-media,
	Yuanhan Liu, linux-input, linux-omap, netdev, linux-usb,
	linux-wireless, dccp, Andrew Morton, linuxppc-dev
In-Reply-To: <1357657073-27352-1-git-send-email-yuanhan.liu@linux.intel.com>

The current kfifo API take the kfifo size as input, while it rounds
 _down_ the size to power of 2 at __kfifo_alloc. This may introduce
potential issue.

Take the code at drivers/hid/hid-logitech-dj.c as example:

	if (kfifo_alloc(&djrcv_dev->notif_fifo,
                       DJ_MAX_NUMBER_NOTIFICATIONS * sizeof(struct dj_report),
                       GFP_KERNEL)) {

Where, DJ_MAX_NUMBER_NOTIFICATIONS is 8, and sizeo of(struct dj_report)
is 15.

Which means it wants to allocate a kfifo buffer which can store 8
dj_report entries at once. The expected kfifo buffer size would be
8 * 15 = 120 then. While, in the end, __kfifo_alloc will turn the
size to rounddown_power_of_2(120) =  64, and then allocate a buf
with 64 bytes, which I don't think this is the original author want.

With the new log API, we can do like following:

	int kfifo_size_order = order_base_2(DJ_MAX_NUMBER_NOTIFICATIONS *
					    sizeof(struct dj_report));

	if (kfifo_alloc(&djrcv_dev->notif_fifo, kfifo_size_order, GFP_KERNEL)) {

This make sure we will allocate enough kfifo buffer for holding
DJ_MAX_NUMBER_NOTIFICATIONS dj_report entries.

Cc: Stefani Seibold <stefani@seibold.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-omap@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: platform-driver-x86@vger.kernel.org
Cc: linux-input@vger.kernel.org
Cc: linux-iio@vger.kernel.org
Cc: linux-rdma@vger.kernel.org
Cc: linux-media@vger.kernel.org
Cc: linux-mmc@vger.kernel.org
Cc: linux-mtd@lists.infradead.org
Cc: libertas-dev@lists.infradead.org
Cc: linux-wireless@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: linux-pci@vger.kernel.org
Cc: open-iscsi@googlegroups.com
Cc: linux-scsi@vger.kernel.org
Cc: devel@driverdev.osuosl.org
Cc: linux-serial@vger.kernel.org
Cc: linux-usb@vger.kernel.org
Cc: linux-mm@kvack.org
Cc: dccp@vger.kernel.org
Cc: linux-sctp@vger.kernel.org
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
 arch/arm/plat-omap/Kconfig                  |    2 +-
 arch/arm/plat-omap/mailbox.c                |    6 +++-
 arch/powerpc/sysdev/fsl_rmu.c               |    2 +-
 drivers/char/sonypi.c                       |    9 ++++---
 drivers/hid/hid-logitech-dj.c               |    7 +++--
 drivers/iio/industrialio-event.c            |    2 +-
 drivers/iio/kfifo_buf.c                     |    3 +-
 drivers/infiniband/hw/cxgb3/cxio_resource.c |    8 ++++--
 drivers/media/i2c/cx25840/cx25840-ir.c      |    9 +++++--
 drivers/media/pci/cx23885/cx23888-ir.c      |    9 +++++--
 drivers/media/pci/meye/meye.c               |    6 +---
 drivers/media/pci/meye/meye.h               |    2 +
 drivers/media/rc/ir-raw.c                   |    7 +++--
 drivers/memstick/host/r592.h                |    2 +-
 drivers/mmc/card/sdio_uart.c                |    4 ++-
 drivers/mtd/sm_ftl.c                        |    5 +++-
 drivers/net/wireless/libertas/main.c        |    4 ++-
 drivers/net/wireless/rt2x00/rt2x00dev.c     |    5 +--
 drivers/pci/pcie/aer/aerdrv_core.c          |    3 +-
 drivers/platform/x86/fujitsu-laptop.c       |    5 ++-
 drivers/platform/x86/sony-laptop.c          |    6 ++--
 drivers/rapidio/devices/tsi721.c            |    5 ++-
 drivers/scsi/libiscsi_tcp.c                 |    6 +++-
 drivers/staging/omapdrm/omap_plane.c        |    5 +++-
 drivers/tty/n_gsm.c                         |    4 ++-
 drivers/tty/nozomi.c                        |    5 +--
 drivers/tty/serial/ifx6x60.c                |    2 +-
 drivers/tty/serial/ifx6x60.h                |    3 +-
 drivers/tty/serial/kgdb_nmi.c               |    7 +++--
 drivers/usb/host/fhci.h                     |    4 ++-
 drivers/usb/serial/cypress_m8.c             |    4 +-
 drivers/usb/serial/io_ti.c                  |    4 +-
 drivers/usb/serial/ti_usb_3410_5052.c       |    7 +++--
 drivers/usb/serial/usb-serial.c             |    2 +-
 include/linux/kfifo.h                       |   31 +++++++++++++--------------
 include/linux/rio.h                         |    1 +
 include/media/lirc_dev.h                    |    4 ++-
 kernel/kfifo.c                              |    9 +------
 mm/memory-failure.c                         |    3 +-
 net/dccp/probe.c                            |    6 +++-
 net/sctp/probe.c                            |    6 +++-
 samples/kfifo/bytestream-example.c          |    8 +++---
 samples/kfifo/dma-example.c                 |    5 ++-
 samples/kfifo/inttype-example.c             |    7 +++--
 samples/kfifo/record-example.c              |    6 ++--
 45 files changed, 142 insertions(+), 108 deletions(-)

diff --git a/arch/arm/plat-omap/Kconfig b/arch/arm/plat-omap/Kconfig
index 665870d..7eda02c 100644
--- a/arch/arm/plat-omap/Kconfig
+++ b/arch/arm/plat-omap/Kconfig
@@ -124,7 +124,7 @@ config OMAP_MBOX_FWK
 	  DSP, IVA1.0 and IVA2 in OMAP1/2/3.
 
 config OMAP_MBOX_KFIFO_SIZE
-	int "Mailbox kfifo default buffer size (bytes)"
+	int "Mailbox kfifo default buffer size (bytes, should be power of 2. If not, will roundup to power of 2"
 	depends on OMAP_MBOX_FWK
 	default 256
 	help
diff --git a/arch/arm/plat-omap/mailbox.c b/arch/arm/plat-omap/mailbox.c
index 42377ef..848fa0b 100644
--- a/arch/arm/plat-omap/mailbox.c
+++ b/arch/arm/plat-omap/mailbox.c
@@ -30,6 +30,7 @@
 #include <linux/err.h>
 #include <linux/notifier.h>
 #include <linux/module.h>
+#include <linux/log2.h>
 
 #include <plat/mailbox.h>
 
@@ -40,7 +41,7 @@ static DEFINE_MUTEX(mbox_configured_lock);
 
 static unsigned int mbox_kfifo_size = CONFIG_OMAP_MBOX_KFIFO_SIZE;
 module_param(mbox_kfifo_size, uint, S_IRUGO);
-MODULE_PARM_DESC(mbox_kfifo_size, "Size of omap's mailbox kfifo (bytes)");
+MODULE_PARM_DESC(mbox_kfifo_size, "Size of omap's mailbox kfifo (bytes, should be power of 2. If not, will roundup to power of 2)");
 
 /* Mailbox FIFO handle functions */
 static inline mbox_msg_t mbox_fifo_read(struct omap_mbox *mbox)
@@ -218,6 +219,7 @@ static struct omap_mbox_queue *mbox_queue_alloc(struct omap_mbox *mbox,
 					void (*tasklet)(unsigned long))
 {
 	struct omap_mbox_queue *mq;
+	int mbox_kfifo_size_order = order_base_2(mbox_kfifo_size);
 
 	mq = kzalloc(sizeof(struct omap_mbox_queue), GFP_KERNEL);
 	if (!mq)
@@ -225,7 +227,7 @@ static struct omap_mbox_queue *mbox_queue_alloc(struct omap_mbox *mbox,
 
 	spin_lock_init(&mq->lock);
 
-	if (kfifo_alloc(&mq->fifo, mbox_kfifo_size, GFP_KERNEL))
+	if (kfifo_alloc(&mq->fifo, mbox_kfifo_size_order, GFP_KERNEL))
 		goto error;
 
 	if (work)
diff --git a/arch/powerpc/sysdev/fsl_rmu.c b/arch/powerpc/sysdev/fsl_rmu.c
index 14bd522..84d2b8c 100644
--- a/arch/powerpc/sysdev/fsl_rmu.c
+++ b/arch/powerpc/sysdev/fsl_rmu.c
@@ -587,7 +587,7 @@ int fsl_rio_port_write_init(struct fsl_rio_pw *pw)
 
 	INIT_WORK(&pw->pw_work, fsl_pw_dpc);
 	spin_lock_init(&pw->pw_fifo_lock);
-	if (kfifo_alloc(&pw->pw_fifo, RIO_PW_MSG_SIZE * 32, GFP_KERNEL)) {
+	if (kfifo_alloc(&pw->pw_fifo, RIO_KFIFO_SIZE_ORDER, GFP_KERNEL)) {
 		pr_err("FIFO allocation failed\n");
 		rc = -ENOMEM;
 		goto err_out_irq;
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index d780295..39d8dd7 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -429,7 +429,7 @@ static struct sonypi_eventtypes {
 	{ 0 }
 };
 
-#define SONYPI_BUF_SIZE	128
+#define SONYPI_KFIFO_SIZE_ORDER		7
 
 /* Correspondance table between sonypi events and input layer events */
 static struct {
@@ -1316,7 +1316,8 @@ static int sonypi_probe(struct platform_device *dev)
 			"http://www.linux.it/~malattia/wiki/index.php/Sony_drivers\n");
 
 	spin_lock_init(&sonypi_device.fifo_lock);
-	error = kfifo_alloc(&sonypi_device.fifo, SONYPI_BUF_SIZE, GFP_KERNEL);
+	error = kfifo_alloc(&sonypi_device.fifo, SONYPI_KFIFO_SIZE_ORDER,
+			GFP_KERNEL);
 	if (error) {
 		printk(KERN_ERR "sonypi: kfifo_alloc failed\n");
 		return error;
@@ -1395,8 +1396,8 @@ static int sonypi_probe(struct platform_device *dev)
 		}
 
 		spin_lock_init(&sonypi_device.input_fifo_lock);
-		error = kfifo_alloc(&sonypi_device.input_fifo, SONYPI_BUF_SIZE,
-				GFP_KERNEL);
+		error = kfifo_alloc(&sonypi_device.input_fifo,
+				SONYPI_KFIFO_SIZE_ORDER, GFP_KERNEL);
 		if (error) {
 			printk(KERN_ERR "sonypi: kfifo_alloc failed\n");
 			goto err_inpdev_unregister;
diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c
index 9500f2f..031be77 100644
--- a/drivers/hid/hid-logitech-dj.c
+++ b/drivers/hid/hid-logitech-dj.c
@@ -26,6 +26,7 @@
 #include <linux/hid.h>
 #include <linux/module.h>
 #include <linux/usb.h>
+#include <linux/log2.h>
 #include <asm/unaligned.h>
 #include "usbhid/usbhid.h"
 #include "hid-ids.h"
@@ -730,6 +731,8 @@ static int logi_dj_probe(struct hid_device *hdev,
 	struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
 	struct dj_receiver_dev *djrcv_dev;
 	int retval;
+	int kfifo_size_order = order_base_2(DJ_MAX_NUMBER_NOTIFICATIONS *
+					    sizeof(struct dj_report));
 
 	if (is_dj_device((struct dj_device *)hdev->driver_data))
 		return -ENODEV;
@@ -757,9 +760,7 @@ static int logi_dj_probe(struct hid_device *hdev,
 	djrcv_dev->hdev = hdev;
 	INIT_WORK(&djrcv_dev->work, delayedwork_callback);
 	spin_lock_init(&djrcv_dev->lock);
-	if (kfifo_alloc(&djrcv_dev->notif_fifo,
-			DJ_MAX_NUMBER_NOTIFICATIONS * sizeof(struct dj_report),
-			GFP_KERNEL)) {
+	if (kfifo_alloc(&djrcv_dev->notif_fifo, kfifo_size_order, GFP_KERNEL)) {
 		dev_err(&hdev->dev,
 			"%s:failed allocating notif_fifo\n", __func__);
 		kfree(djrcv_dev);
diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c
index 261cae0..9b73680 100644
--- a/drivers/iio/industrialio-event.c
+++ b/drivers/iio/industrialio-event.c
@@ -35,7 +35,7 @@
  */
 struct iio_event_interface {
 	wait_queue_head_t	wait;
-	DECLARE_KFIFO(det_events, struct iio_event_data, 16);
+	DECLARE_KFIFO(det_events, struct iio_event_data, 4);
 
 	struct list_head	dev_attr_list;
 	unsigned long		flags;
diff --git a/drivers/iio/kfifo_buf.c b/drivers/iio/kfifo_buf.c
index 5bc5c86..d8ba52ff 100644
--- a/drivers/iio/kfifo_buf.c
+++ b/drivers/iio/kfifo_buf.c
@@ -7,6 +7,7 @@
 #include <linux/mutex.h>
 #include <linux/iio/kfifo_buf.h>
 #include <linux/sched.h>
+#include <linux/log2.h>
 
 struct iio_kfifo {
 	struct iio_buffer buffer;
@@ -23,7 +24,7 @@ static inline int __iio_allocate_kfifo(struct iio_kfifo *buf,
 		return -EINVAL;
 
 	__iio_update_buffer(&buf->buffer, bytes_per_datum, length);
-	return __kfifo_alloc((struct __kfifo *)&buf->kf, length,
+	return __kfifo_alloc((struct __kfifo *)&buf->kf, order_base_2(length),
 			     bytes_per_datum, GFP_KERNEL);
 }
 
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index 31f9201..186d05e 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -36,6 +36,7 @@
 #include <linux/kfifo.h>
 #include <linux/spinlock.h>
 #include <linux/errno.h>
+#include <linux/log2.h>
 #include "cxio_resource.h"
 #include "cxio_hal.h"
 
@@ -54,8 +55,9 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
 	u32 random_bytes;
 	u32 rarray[16];
 	spin_lock_init(fifo_lock);
+	int kfifo_size_order = order_base_2(nr * sizeof(u32));
 
-	if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL))
+	if (kfifo_alloc(fifo, kfifo_size_order, GFP_KERNEL))
 		return -ENOMEM;
 
 	for (i = 0; i < skip_low + skip_high; i++)
@@ -111,11 +113,11 @@ static int cxio_init_resource_fifo_random(struct kfifo *fifo,
 static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
 {
 	u32 i;
+	int kfifo_size_order = order_base_2(T3_MAX_NUM_QP * sizeof(u32));
 
 	spin_lock_init(&rdev_p->rscp->qpid_fifo_lock);
 
-	if (kfifo_alloc(&rdev_p->rscp->qpid_fifo, T3_MAX_NUM_QP * sizeof(u32),
-					      GFP_KERNEL))
+	if (kfifo_alloc(&rdev_p->rscp->qpid_fifo, kfifo_size_order, GFP_KERNEL))
 		return -ENOMEM;
 
 	for (i = 16; i < T3_MAX_NUM_QP; i++)
diff --git a/drivers/media/i2c/cx25840/cx25840-ir.c b/drivers/media/i2c/cx25840/cx25840-ir.c
index 38ce76e..1da0b6c 100644
--- a/drivers/media/i2c/cx25840/cx25840-ir.c
+++ b/drivers/media/i2c/cx25840/cx25840-ir.c
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <linux/kfifo.h>
 #include <linux/module.h>
+#include <linux/log2.h>
 #include <media/cx25840.h>
 #include <media/rc-core.h>
 
@@ -106,8 +107,10 @@ union cx25840_ir_fifo_rec {
 	struct ir_raw_event ir_core_data;
 };
 
-#define CX25840_IR_RX_KFIFO_SIZE    (256 * sizeof(union cx25840_ir_fifo_rec))
-#define CX25840_IR_TX_KFIFO_SIZE    (256 * sizeof(union cx25840_ir_fifo_rec))
+#define CX25840_IR_RX_KFIFO_SIZE_ORDER	(order_base_2(256 * sizeof(union cx25840_ir_fifo_rec)))
+#define CX25840_IR_RX_KFIFO_SIZE    	(1<<CX25840_IR_RX_KFIFO_SIZE_ORDER)
+#define CX25840_IR_TX_KFIFO_SIZE_ORDER	(order_base_2(256 * sizeof(union cx25840_ir_fifo_rec)))
+#define CX25840_IR_TX_KFIFO_SIZE    	(CX25840_IR_TX_KFIFO_SIZE_ORDER)
 
 struct cx25840_ir_state {
 	struct i2c_client *c;
@@ -1236,7 +1239,7 @@ int cx25840_ir_probe(struct v4l2_subdev *sd)
 
 	spin_lock_init(&ir_state->rx_kfifo_lock);
 	if (kfifo_alloc(&ir_state->rx_kfifo,
-			CX25840_IR_RX_KFIFO_SIZE, GFP_KERNEL)) {
+			CX25840_IR_RX_KFIFO_SIZE_ORDER, GFP_KERNEL)) {
 		kfree(ir_state);
 		return -ENOMEM;
 	}
diff --git a/drivers/media/pci/cx23885/cx23888-ir.c b/drivers/media/pci/cx23885/cx23888-ir.c
index c4bd1e9..4c6e24b 100644
--- a/drivers/media/pci/cx23885/cx23888-ir.c
+++ b/drivers/media/pci/cx23885/cx23888-ir.c
@@ -23,6 +23,7 @@
 
 #include <linux/kfifo.h>
 #include <linux/slab.h>
+#include <linux/log2.h>
 
 #include <media/v4l2-device.h>
 #include <media/v4l2-chip-ident.h>
@@ -125,8 +126,10 @@ union cx23888_ir_fifo_rec {
 	struct ir_raw_event ir_core_data;
 };
 
-#define CX23888_IR_RX_KFIFO_SIZE    (256 * sizeof(union cx23888_ir_fifo_rec))
-#define CX23888_IR_TX_KFIFO_SIZE    (256 * sizeof(union cx23888_ir_fifo_rec))
+#define CX23888_IR_RX_KFIFO_SIZE_ORDER	(order_base_2(256 * sizeof(union cx23888_ir_fifo_rec)))
+#define CX23888_IR_RX_KFIFO_SIZE    	(1<<CX23888_IR_RX_KFIFO_SIZE_ORDER)
+#define CX23888_IR_TX_KFIFO_SIZE_ORDER	(order_base_2(256 * sizeof(union cx23888_ir_fifo_rec)))
+#define CX23888_IR_TX_KFIFO_SIZE    	(1<<CX23888_IR_TX_KFIFO_SIZE_ORDER)
 
 struct cx23888_ir_state {
 	struct v4l2_subdev sd;
@@ -1213,7 +1216,7 @@ int cx23888_ir_probe(struct cx23885_dev *dev)
 		return -ENOMEM;
 
 	spin_lock_init(&state->rx_kfifo_lock);
-	if (kfifo_alloc(&state->rx_kfifo, CX23888_IR_RX_KFIFO_SIZE, GFP_KERNEL))
+	if (kfifo_alloc(&state->rx_kfifo, CX23888_IR_RX_KFIFO_SIZE_ORDER, GFP_KERNEL))
 		return -ENOMEM;
 
 	state->dev = dev;
diff --git a/drivers/media/pci/meye/meye.c b/drivers/media/pci/meye/meye.c
index 049e186..3bcde0c 100644
--- a/drivers/media/pci/meye/meye.c
+++ b/drivers/media/pci/meye/meye.c
@@ -1759,14 +1759,12 @@ static int meye_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
 	}
 
 	spin_lock_init(&meye.grabq_lock);
-	if (kfifo_alloc(&meye.grabq, sizeof(int) * MEYE_MAX_BUFNBRS,
-				GFP_KERNEL)) {
+	if (kfifo_alloc(&meye.grabq, MEYE_KFIFO_SIZE_ORDER, GFP_KERNEL)) {
 		v4l2_err(v4l2_dev, "fifo allocation failed\n");
 		goto outkfifoalloc1;
 	}
 	spin_lock_init(&meye.doneq_lock);
-	if (kfifo_alloc(&meye.doneq, sizeof(int) * MEYE_MAX_BUFNBRS,
-				GFP_KERNEL)) {
+	if (kfifo_alloc(&meye.doneq, MEYE_KFIFO_SIZE_ORDER, GFP_KERNEL)) {
 		v4l2_err(v4l2_dev, "fifo allocation failed\n");
 		goto outkfifoalloc2;
 	}
diff --git a/drivers/media/pci/meye/meye.h b/drivers/media/pci/meye/meye.h
index 4bdeb03..5d3ab4f 100644
--- a/drivers/media/pci/meye/meye.h
+++ b/drivers/media/pci/meye/meye.h
@@ -260,6 +260,7 @@
 /* private API definitions */
 #include <linux/meye.h>
 #include <linux/mutex.h>
+#include <linux/log2.h>
 
 
 /* Enable jpg software correction */
@@ -270,6 +271,7 @@
 
 /* Maximum number of buffers */
 #define MEYE_MAX_BUFNBRS	32
+#define MEYE_KFIFO_SIZE_ORDER	(order_base_2(MEYE_MAX_BUFNBRS * sizeof(int)))
 
 /* State of a buffer */
 #define MEYE_BUF_UNUSED	0	/* not used */
diff --git a/drivers/media/rc/ir-raw.c b/drivers/media/rc/ir-raw.c
index 97dc8d1..e4d1ec8 100644
--- a/drivers/media/rc/ir-raw.c
+++ b/drivers/media/rc/ir-raw.c
@@ -18,6 +18,7 @@
 #include <linux/kmod.h>
 #include <linux/sched.h>
 #include <linux/freezer.h>
+#include <linux/log2.h>
 #include "rc-core-priv.h"
 
 /* Define the max number of pulse/space transitions to buffer */
@@ -252,6 +253,8 @@ int ir_raw_event_register(struct rc_dev *dev)
 {
 	int rc;
 	struct ir_raw_handler *handler;
+	int kfifo_size_order = order_base_2(sizeof(struct ir_raw_event) *
+					    MAX_IR_EVENT_SIZE);
 
 	if (!dev)
 		return -EINVAL;
@@ -262,9 +265,7 @@ int ir_raw_event_register(struct rc_dev *dev)
 
 	dev->raw->dev = dev;
 	dev->raw->enabled_protocols = ~0;
-	rc = kfifo_alloc(&dev->raw->kfifo,
-			 sizeof(struct ir_raw_event) * MAX_IR_EVENT_SIZE,
-			 GFP_KERNEL);
+	rc = kfifo_alloc(&dev->raw->kfifo, kfifo_size_order, GFP_KERNEL);
 	if (rc < 0)
 		goto out;
 
diff --git a/drivers/memstick/host/r592.h b/drivers/memstick/host/r592.h
index c5726c1..6fc19f4 100644
--- a/drivers/memstick/host/r592.h
+++ b/drivers/memstick/host/r592.h
@@ -143,7 +143,7 @@ struct r592_device {
 	struct task_struct *io_thread;
 	bool parallel_mode;
 
-	DECLARE_KFIFO(pio_fifo, u8, sizeof(u32));
+	DECLARE_KFIFO(pio_fifo, u8, 2);
 
 	/* DMA area */
 	int dma_capable;
diff --git a/drivers/mmc/card/sdio_uart.c b/drivers/mmc/card/sdio_uart.c
index bd57a11..c54a7c5 100644
--- a/drivers/mmc/card/sdio_uart.c
+++ b/drivers/mmc/card/sdio_uart.c
@@ -43,12 +43,14 @@
 #include <linux/mmc/card.h>
 #include <linux/mmc/sdio_func.h>
 #include <linux/mmc/sdio_ids.h>
+#include <linux/log2.h>
 
 
 #define UART_NR		8	/* Number of UARTs this driver can handle */
 
 
 #define FIFO_SIZE	PAGE_SIZE
+#define FIFO_SIZE_ORDER	PAGE_SHIFT
 #define WAKEUP_CHARS	256
 
 struct uart_icount {
@@ -93,7 +95,7 @@ static int sdio_uart_add_port(struct sdio_uart_port *port)
 
 	mutex_init(&port->func_lock);
 	spin_lock_init(&port->write_lock);
-	if (kfifo_alloc(&port->xmit_fifo, FIFO_SIZE, GFP_KERNEL))
+	if (kfifo_alloc(&port->xmit_fifo, FIFO_SIZE_ORDER, GFP_KERNEL))
 		return -ENOMEM;
 
 	spin_lock(&sdio_uart_table_lock);
diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c
index 8dd6ba5..672ef47 100644
--- a/drivers/mtd/sm_ftl.c
+++ b/drivers/mtd/sm_ftl.c
@@ -17,6 +17,7 @@
 #include <linux/bitops.h>
 #include <linux/slab.h>
 #include <linux/mtd/nand_ecc.h>
+#include <linux/log2.h>
 #include "nand/sm_common.h"
 #include "sm_ftl.h"
 
@@ -766,6 +767,7 @@ static int sm_init_zone(struct sm_ftl *ftl, int zone_num)
 	int lba;
 	int i = 0;
 	int len;
+	int kfifo_size_order;
 
 	dbg("initializing zone %d", zone_num);
 
@@ -778,7 +780,8 @@ static int sm_init_zone(struct sm_ftl *ftl, int zone_num)
 
 
 	/* Allocate memory for free sectors FIFO */
-	if (kfifo_alloc(&zone->free_sectors, ftl->zone_size * 2, GFP_KERNEL)) {
+	kfifo_size_order = order_base_2(ftl->zone_size * 2);
+	if (kfifo_alloc(&zone->free_sectors, kfifo_size_order, GFP_KERNEL)) {
 		kfree(zone->lba_to_phys_table);
 		return -ENOMEM;
 	}
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index 0c02f04..ea5ddf4 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -25,6 +25,8 @@
 #include "cmd.h"
 #include "mesh.h"
 
+#define KFIFO_SIZE_ORDER	6
+
 #define DRIVER_RELEASE_VERSION "323.p0"
 const char lbs_driver_version[] = "COMM-USB8388-" DRIVER_RELEASE_VERSION
 #ifdef  DEBUG
@@ -914,7 +916,7 @@ static int lbs_init_adapter(struct lbs_private *priv)
 	priv->resp_len[0] = priv->resp_len[1] = 0;
 
 	/* Create the event FIFO */
-	ret = kfifo_alloc(&priv->event_fifo, sizeof(u32) * 16, GFP_KERNEL);
+	ret = kfifo_alloc(&priv->event_fifo, KFIFO_SIZE_ORDER, GFP_KERNEL);
 	if (ret) {
 		pr_err("Out of memory allocating event FIFO buffer\n");
 		goto out;
diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index 44f8b3f..c8f68485 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
@@ -979,12 +979,11 @@ static int rt2x00lib_probe_hw(struct rt2x00_dev *rt2x00dev)
 		 * tx_queues * entry_num and round up to the nearest
 		 * power of 2.
 		 */
-		int kfifo_size =
-			roundup_pow_of_two(rt2x00dev->ops->tx_queues *
+		int kfifo_size_order = order_base_2(rt2x00dev->ops->tx_queues *
 					   rt2x00dev->ops->tx->entry_num *
 					   sizeof(u32));
 
-		status = kfifo_alloc(&rt2x00dev->txstatus_fifo, kfifo_size,
+		status = kfifo_alloc(&rt2x00dev->txstatus_fifo, kfifo_size_order,
 				     GFP_KERNEL);
 		if (status)
 			return status;
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 421bbc5..ec9284a 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -574,7 +574,6 @@ static void handle_error_source(struct pcie_device *aerdev,
 static void aer_recover_work_func(struct work_struct *work);
 
 #define AER_RECOVER_RING_ORDER		4
-#define AER_RECOVER_RING_SIZE		(1 << AER_RECOVER_RING_ORDER)
 
 struct aer_recover_entry
 {
@@ -585,7 +584,7 @@ struct aer_recover_entry
 };
 
 static DEFINE_KFIFO(aer_recover_ring, struct aer_recover_entry,
-		    AER_RECOVER_RING_SIZE);
+		    AER_RECOVER_RING_ORDER);
 /*
  * Mutual exclusion for writers of aer_recover_ring, reader side don't
  * need lock, because there is only one reader and lock is not needed
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index c4c1a54..185bd55 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -66,6 +66,7 @@
 #include <linux/backlight.h>
 #include <linux/input.h>
 #include <linux/kfifo.h>
+#include <linux/log2.h>
 #include <linux/video_output.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
@@ -116,6 +117,7 @@
 
 #define MAX_HOTKEY_RINGBUFFER_SIZE 100
 #define RINGBUFFERSIZE 40
+#define KFIFO_SIZE_ORDER	(order_base_2(RINGBUFFERSIZE * sizeof(int)))
 
 /* Debugging */
 #define FUJLAPTOP_LOG	   ACPI_FUJITSU_HID ": "
@@ -825,8 +827,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 
 	/* kfifo */
 	spin_lock_init(&fujitsu_hotkey->fifo_lock);
-	error = kfifo_alloc(&fujitsu_hotkey->fifo, RINGBUFFERSIZE * sizeof(int),
-			GFP_KERNEL);
+	error = kfifo_alloc(&fujitsu_hotkey->fifo, KFIFO_SIZE_ORDER, GFP_KERNEL);
 	if (error) {
 		pr_err("kfifo_alloc failed\n");
 		goto err_stop;
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index daaddec..ee57eac 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -183,7 +183,7 @@ static void sony_nc_rfkill_update(void);
 
 /*********** Input Devices ***********/
 
-#define SONY_LAPTOP_BUF_SIZE	128
+#define SONY_LAPTOP_KFIFO_SIZE_ORDER	7
 struct sony_laptop_input_s {
 	atomic_t		users;
 	struct input_dev	*jog_dev;
@@ -447,7 +447,7 @@ static int sony_laptop_setup_input(struct acpi_device *acpi_device)
 	/* kfifo */
 	spin_lock_init(&sony_laptop_input.fifo_lock);
 	error = kfifo_alloc(&sony_laptop_input.fifo,
-			    SONY_LAPTOP_BUF_SIZE, GFP_KERNEL);
+			    SONY_LAPTOP_KFIFO_SIZE_ORDER, GFP_KERNEL);
 	if (error) {
 		pr_err("kfifo_alloc failed\n");
 		goto err_dec_users;
@@ -3752,7 +3752,7 @@ static int sonypi_compat_init(void)
 
 	spin_lock_init(&sonypi_compat.fifo_lock);
 	error =
-	 kfifo_alloc(&sonypi_compat.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL);
+	 kfifo_alloc(&sonypi_compat.fifo, SONY_LAPTOP_KFIFO_SIZE_ORDER, GFP_KERNEL);
 	if (error) {
 		pr_err("kfifo_alloc failed\n");
 		return error;
diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c
index 6faba40..a731e87 100644
--- a/drivers/rapidio/devices/tsi721.c
+++ b/drivers/rapidio/devices/tsi721.c
@@ -32,6 +32,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
 #include <linux/kfifo.h>
+#include <linux/log2.h>
 #include <linux/delay.h>
 
 #include "tsi721.h"
@@ -970,11 +971,11 @@ static void tsi721_init_sr2pc_mapping(struct tsi721_device *priv)
  */
 static int tsi721_port_write_init(struct tsi721_device *priv)
 {
+	int kfifo_size_order = order_base_2(TSI721_RIO_PW_MSG_SIZE * 32);
 	priv->pw_discard_count = 0;
 	INIT_WORK(&priv->pw_work, tsi721_pw_dpc);
 	spin_lock_init(&priv->pw_fifo_lock);
-	if (kfifo_alloc(&priv->pw_fifo,
-			TSI721_RIO_PW_MSG_SIZE * 32, GFP_KERNEL)) {
+	if (kfifo_alloc(&priv->pw_fifo, kfifo_size_order, GFP_KERNEL)) {
 		dev_err(&priv->pdev->dev, "PW FIFO allocation failed\n");
 		return -ENOMEM;
 	}
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index 552e8a2..bdb09bf 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -35,6 +35,7 @@
 #include <linux/crypto.h>
 #include <linux/delay.h>
 #include <linux/kfifo.h>
+#include <linux/log2.h>
 #include <linux/scatterlist.h>
 #include <linux/module.h>
 #include <net/tcp.h>
@@ -1113,6 +1114,7 @@ int iscsi_tcp_r2tpool_alloc(struct iscsi_session *session)
 {
 	int i;
 	int cmd_i;
+	int kfifo_size_order;
 
 	/*
 	 * initialize per-task: R2T pool and xmit queue
@@ -1135,8 +1137,8 @@ int iscsi_tcp_r2tpool_alloc(struct iscsi_session *session)
 		}
 
 		/* R2T xmit queue */
-		if (kfifo_alloc(&tcp_task->r2tqueue,
-		      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL)) {
+		kfifo_size_order = order_base_2(session->max_r2t * 4 * sizeof(void *));
+		if (kfifo_alloc(&tcp_task->r2tqueue, kfifo_size_order, GFP_KERNEL)) {
 			iscsi_pool_free(&tcp_task->r2tpool);
 			goto r2t_alloc_fail;
 		}
diff --git a/drivers/staging/omapdrm/omap_plane.c b/drivers/staging/omapdrm/omap_plane.c
index 2a8e5ba..40f057f 100644
--- a/drivers/staging/omapdrm/omap_plane.c
+++ b/drivers/staging/omapdrm/omap_plane.c
@@ -28,6 +28,8 @@
  */
 #define omap_plane _omap_plane
 
+#define OMAP_KFIFO_SIZE_ORDER	4
+
 /*
  * plane funcs
  */
@@ -508,7 +510,8 @@ struct drm_plane *omap_plane_init(struct drm_device *dev,
 
 	mutex_init(&omap_plane->unpin_mutex);
 
-	ret = kfifo_alloc(&omap_plane->unpin_fifo, 16, GFP_KERNEL);
+	ret = kfifo_alloc(&omap_plane->unpin_fifo, OMAP_KFIFO_SIZE_ORDER,
+			  GFP_KERNEL);
 	if (ret) {
 		dev_err(dev->dev, "could not allocate unpin FIFO\n");
 		goto fail;
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index dcc0430..b3b1b1c 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -66,6 +66,8 @@
 static int debug;
 module_param(debug, int, 0600);
 
+#define KFIFO_SIZE_ORDER	12
+
 /* Defaults: these are from the specification */
 
 #define T1	10		/* 100mS */
@@ -1636,7 +1638,7 @@ static struct gsm_dlci *gsm_dlci_alloc(struct gsm_mux *gsm, int addr)
 	spin_lock_init(&dlci->lock);
 	mutex_init(&dlci->mutex);
 	dlci->fifo = &dlci->_fifo;
-	if (kfifo_alloc(&dlci->_fifo, 4096, GFP_KERNEL) < 0) {
+	if (kfifo_alloc(&dlci->_fifo, KFIFO_SIZE_ORDER, GFP_KERNEL) < 0) {
 		kfree(dlci);
 		return NULL;
 	}
diff --git a/drivers/tty/nozomi.c b/drivers/tty/nozomi.c
index a0c69ab..8b54da3 100644
--- a/drivers/tty/nozomi.c
+++ b/drivers/tty/nozomi.c
@@ -128,8 +128,7 @@ static int debug;
 #define NTTY_TTY_MAXMINORS	256
 #define NTTY_FIFO_BUFFER_SIZE	8192
 
-/* Must be power of 2 */
-#define FIFO_BUFFER_SIZE_UL	8192
+#define FIFO_BUFFER_SIZE_ORDER	13
 
 /* Size of tmp send buffer to card */
 #define SEND_BUF_MAX		1024
@@ -1428,7 +1427,7 @@ static int nozomi_card_init(struct pci_dev *pdev,
 	}
 
 	for (i = PORT_MDM; i < MAX_PORT; i++) {
-		if (kfifo_alloc(&dc->port[i].fifo_ul, FIFO_BUFFER_SIZE_UL,
+		if (kfifo_alloc(&dc->port[i].fifo_ul, FIFO_BUFFER_SIZE_ORDER,
 					GFP_KERNEL)) {
 			dev_err(&pdev->dev,
 					"Could not allocate kfifo buffer\n");
diff --git a/drivers/tty/serial/ifx6x60.c b/drivers/tty/serial/ifx6x60.c
index 675d94a..f80dc2c 100644
--- a/drivers/tty/serial/ifx6x60.c
+++ b/drivers/tty/serial/ifx6x60.c
@@ -880,7 +880,7 @@ static int ifx_spi_create_port(struct ifx_spi_device *ifx_dev)
 	lockdep_set_class_and_subclass(&ifx_dev->fifo_lock,
 		&ifx_spi_key, 0);
 
-	if (kfifo_alloc(&ifx_dev->tx_fifo, IFX_SPI_FIFO_SIZE, GFP_KERNEL)) {
+	if (kfifo_alloc(&ifx_dev->tx_fifo, IFX_SPI_FIFO_SIZE_ORDER, GFP_KERNEL)) {
 		ret = -ENOMEM;
 		goto error_ret;
 	}
diff --git a/drivers/tty/serial/ifx6x60.h b/drivers/tty/serial/ifx6x60.h
index 4fbddc2..da4fd1c 100644
--- a/drivers/tty/serial/ifx6x60.h
+++ b/drivers/tty/serial/ifx6x60.h
@@ -31,7 +31,8 @@
 
 #define IFX_SPI_MAX_MINORS		1
 #define IFX_SPI_TRANSFER_SIZE		2048
-#define IFX_SPI_FIFO_SIZE		4096
+#define IFX_SPI_FIFO_SIZE_ORDER		12
+#define IFX_SPI_FIFO_SIZE		(1 << IFX_SPI_FIFO_SIZE_ORDER)
 
 #define IFX_SPI_HEADER_OVERHEAD		4
 #define IFX_RESET_TIMEOUT		msecs_to_jiffies(50)
diff --git a/drivers/tty/serial/kgdb_nmi.c b/drivers/tty/serial/kgdb_nmi.c
index 6ac2b79..947dd72 100644
--- a/drivers/tty/serial/kgdb_nmi.c
+++ b/drivers/tty/serial/kgdb_nmi.c
@@ -26,6 +26,7 @@
 #include <linux/interrupt.h>
 #include <linux/hrtimer.h>
 #include <linux/tick.h>
+#include <linux/log2.h>
 #include <linux/kfifo.h>
 #include <linux/kgdb.h>
 #include <linux/kdb.h>
@@ -75,13 +76,13 @@ static struct console kgdb_nmi_console = {
  * This is usually the maximum rate on debug ports. We make fifo large enough
  * to make copy-pasting to the terminal usable.
  */
-#define KGDB_NMI_BAUD		115200
-#define KGDB_NMI_FIFO_SIZE	roundup_pow_of_two(KGDB_NMI_BAUD / 8 / HZ)
+#define KGDB_NMI_BAUD			115200
+#define KGDB_NMI_FIFO_SIZE_ORDER	order_base_2(KGDB_NMI_BAUD / 8 / HZ)
 
 struct kgdb_nmi_tty_priv {
 	struct tty_port port;
 	struct tasklet_struct tlet;
-	STRUCT_KFIFO(char, KGDB_NMI_FIFO_SIZE) fifo;
+	STRUCT_KFIFO(char, KGDB_NMI_FIFO_SIZE_ORDER) fifo;
 };
 
 static struct kgdb_nmi_tty_priv *kgdb_nmi_port_to_priv(struct tty_port *port)
diff --git a/drivers/usb/host/fhci.h b/drivers/usb/host/fhci.h
index 7cc1c32..e4a0ac6 100644
--- a/drivers/usb/host/fhci.h
+++ b/drivers/usb/host/fhci.h
@@ -24,6 +24,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/kfifo.h>
+#include <linux/log2.h>
 #include <linux/io.h>
 #include <linux/usb.h>
 #include <linux/usb/hcd.h>
@@ -478,7 +479,8 @@ static inline struct usb_hcd *fhci_to_hcd(struct fhci_hcd *fhci)
 /* fifo of pointers */
 static inline int cq_new(struct kfifo *fifo, int size)
 {
-	return kfifo_alloc(fifo, size * sizeof(void *), GFP_KERNEL);
+	int kfifo_size_order = order_base_2(size * sizeof(void *));
+	return kfifo_alloc(fifo, kfifo_size_order, GFP_KERNEL);
 }
 
 static inline void cq_delete(struct kfifo *kfifo)
diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c
index fd8c35f..a4d7cd1 100644
--- a/drivers/usb/serial/cypress_m8.c
+++ b/drivers/usb/serial/cypress_m8.c
@@ -54,7 +54,7 @@ static bool unstable_bauds;
 #define DRIVER_DESC "Cypress USB to Serial Driver"
 
 /* write buffer size defines */
-#define CYPRESS_BUF_SIZE	1024
+#define CYPRESS_KFIFO_SIZE_ORDER	10
 
 static const struct usb_device_id id_table_earthmate[] = {
 	{ USB_DEVICE(VENDOR_ID_DELORME, PRODUCT_ID_EARTHMATEUSB) },
@@ -445,7 +445,7 @@ static int cypress_generic_port_probe(struct usb_serial_port *port)
 
 	priv->comm_is_ok = !0;
 	spin_lock_init(&priv->lock);
-	if (kfifo_alloc(&priv->write_fifo, CYPRESS_BUF_SIZE, GFP_KERNEL)) {
+	if (kfifo_alloc(&priv->write_fifo, CYPRESS_KFIFO_SIZE_ORDER, GFP_KERNEL)) {
 		kfree(priv);
 		return -ENOMEM;
 	}
diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index 58184f3..a19018b 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -64,7 +64,7 @@
 
 #define EDGE_CLOSING_WAIT	4000	/* in .01 sec */
 
-#define EDGE_OUT_BUF_SIZE	1024
+#define EDGE_KFIFO_SIZE_ORDER	10
 
 
 /* Product information read from the Edgeport */
@@ -2567,7 +2567,7 @@ static int edge_port_probe(struct usb_serial_port *port)
 	if (!edge_port)
 		return -ENOMEM;
 
-	ret = kfifo_alloc(&edge_port->write_fifo, EDGE_OUT_BUF_SIZE,
+	ret = kfifo_alloc(&edge_port->write_fifo, EDGE_KFIFO_SIZE_ORDER,
 								GFP_KERNEL);
 	if (ret) {
 		kfree(edge_port);
diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index f2530d2..777e90a 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -45,7 +45,8 @@
 
 #define TI_FIRMWARE_BUF_SIZE	16284
 
-#define TI_WRITE_BUF_SIZE	1024
+#define TI_KFIFO_SIZE_ORDER	10
+#define TI_KFIFO_SIZE		(1 << TI_KFIFO_SIZE_ORDER)
 
 #define TI_TRANSFER_TIMEOUT	2
 
@@ -434,7 +435,7 @@ static int ti_port_probe(struct usb_serial_port *port)
 	tport->tp_closing_wait = closing_wait;
 	init_waitqueue_head(&tport->tp_msr_wait);
 	init_waitqueue_head(&tport->tp_write_wait);
-	if (kfifo_alloc(&tport->write_fifo, TI_WRITE_BUF_SIZE, GFP_KERNEL)) {
+	if (kfifo_alloc(&tport->write_fifo, TI_KFIFO_SIZE_ORDER, GFP_KERNEL)) {
 		kfree(tport);
 		return -ENOMEM;
 	}
@@ -1355,7 +1356,7 @@ static int ti_get_serial_info(struct ti_port *tport,
 	ret_serial.line = port->serial->minor;
 	ret_serial.port = port->number - port->serial->minor;
 	ret_serial.flags = tport->tp_flags;
-	ret_serial.xmit_fifo_size = TI_WRITE_BUF_SIZE;
+	ret_serial.xmit_fifo_size = TI_KFIFO_SIZE;
 	ret_serial.baud_base = tport->tp_tdev->td_is_3410 ? 921600 : 460800;
 	ret_serial.closing_wait = tport->tp_closing_wait;
 
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 64bda13..11ca271 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -934,7 +934,7 @@ static int usb_serial_probe(struct usb_interface *interface,
 	for (i = 0; i < num_bulk_out; ++i) {
 		endpoint = bulk_out_endpoint[i];
 		port = serial->port[i];
-		if (kfifo_alloc(&port->write_fifo, PAGE_SIZE, GFP_KERNEL))
+		if (kfifo_alloc(&port->write_fifo, PAGE_SHIFT, GFP_KERNEL))
 			goto probe_error;
 		buffer_size = serial->type->bulk_out_size;
 		if (!buffer_size)
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 4bf984e..28dfe98 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -76,8 +76,8 @@ struct __kfifo {
 	type		buf[((size < 2) || (size & (size - 1))) ? -1 : size]; \
 }
 
-#define STRUCT_KFIFO(type, size) \
-	struct __STRUCT_KFIFO(type, size, 0)
+#define STRUCT_KFIFO(type, size_order) \
+	struct __STRUCT_KFIFO(type, (1<<(size_order)), 0)
 
 #define __STRUCT_KFIFO_PTR(type, recsize) \
 { \
@@ -93,11 +93,11 @@ struct __kfifo {
  */
 struct kfifo __STRUCT_KFIFO_PTR(unsigned char, 0);
 
-#define STRUCT_KFIFO_REC_1(size) \
-	struct __STRUCT_KFIFO(unsigned char, size, 1)
+#define STRUCT_KFIFO_REC_1(size_order) \
+	struct __STRUCT_KFIFO(unsigned char, (1<<(size_order)), 1)
 
-#define STRUCT_KFIFO_REC_2(size) \
-	struct __STRUCT_KFIFO(unsigned char, size, 2)
+#define STRUCT_KFIFO_REC_2(size_order) \
+	struct __STRUCT_KFIFO(unsigned char, (1<<(size_order)), 2)
 
 /*
  * define kfifo_rec types
@@ -123,9 +123,9 @@ struct kfifo_rec_ptr_2 __STRUCT_KFIFO_PTR(unsigned char, 2);
  * DECLARE_KFIFO - macro to declare a fifo object
  * @fifo: name of the declared fifo
  * @type: type of the fifo elements
- * @size: the number of elements in the fifo, this must be a power of 2
+ * @size_order: request 2^size_order fifo elements
  */
-#define DECLARE_KFIFO(fifo, type, size)	STRUCT_KFIFO(type, size) fifo
+#define DECLARE_KFIFO(fifo, type, size_order)	STRUCT_KFIFO(type, size_order) fifo
 
 /**
  * INIT_KFIFO - Initialize a fifo declared by DECLARE_KFIFO
@@ -146,12 +146,12 @@ struct kfifo_rec_ptr_2 __STRUCT_KFIFO_PTR(unsigned char, 2);
  * DEFINE_KFIFO - macro to define and initialize a fifo
  * @fifo: name of the declared fifo datatype
  * @type: type of the fifo elements
- * @size: the number of elements in the fifo, this must be a power of 2
+ * @size_order: request 2^size_order fifo elements
  *
  * Note: the macro can be used for global and local fifo data type variables.
  */
-#define DEFINE_KFIFO(fifo, type, size) \
-	DECLARE_KFIFO(fifo, type, size) = \
+#define DEFINE_KFIFO(fifo, type, size_order) \
+	DECLARE_KFIFO(fifo, type, size_order) = \
 	(typeof(fifo)) { \
 		{ \
 			{ \
@@ -317,22 +317,21 @@ __kfifo_uint_must_check_helper( \
 /**
  * kfifo_alloc - dynamically allocates a new fifo buffer
  * @fifo: pointer to the fifo
- * @size: the number of elements in the fifo, this must be a power of 2
+ * @size_order: request 2^size_order fifo elements
  * @gfp_mask: get_free_pages mask, passed to kmalloc()
  *
  * This macro dynamically allocates a new fifo buffer.
  *
- * The numer of elements will be rounded-up to a power of 2.
  * The fifo will be release with kfifo_free().
  * Return 0 if no error, otherwise an error code.
  */
-#define kfifo_alloc(fifo, size, gfp_mask) \
+#define kfifo_alloc(fifo, size_order, gfp_mask) \
 __kfifo_int_must_check_helper( \
 ({ \
 	typeof((fifo) + 1) __tmp = (fifo); \
 	struct __kfifo *__kfifo = &__tmp->kfifo; \
 	__is_kfifo_ptr(__tmp) ? \
-	__kfifo_alloc(__kfifo, size, sizeof(*__tmp->type), gfp_mask) : \
+	__kfifo_alloc(__kfifo, size_order, sizeof(*__tmp->type), gfp_mask) : \
 	-EINVAL; \
 }) \
 )
@@ -745,7 +744,7 @@ __kfifo_uint_must_check_helper( \
 }) \
 )
 
-extern int __kfifo_alloc(struct __kfifo *fifo, unsigned int size,
+extern int __kfifo_alloc(struct __kfifo *fifo, int size_order,
 	size_t esize, gfp_t gfp_mask);
 
 extern void __kfifo_free(struct __kfifo *fifo);
diff --git a/include/linux/rio.h b/include/linux/rio.h
index a3e7842..05ff6bb 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -70,6 +70,7 @@
 #define RIO_OUTB_MBOX_RESOURCE	2
 
 #define RIO_PW_MSG_SIZE		64
+#define RIO_KFIFO_SIZE_ORDER	11	/* 64 * 32 */
 
 /*
  * A component tag value (stored in the component tag CSR) is used as device's
diff --git a/include/media/lirc_dev.h b/include/media/lirc_dev.h
index 168dd0b..7816d39 100644
--- a/include/media/lirc_dev.h
+++ b/include/media/lirc_dev.h
@@ -19,6 +19,7 @@
 #include <linux/ioctl.h>
 #include <linux/poll.h>
 #include <linux/kfifo.h>
+#include <linux/log2.h>
 #include <media/lirc.h>
 
 struct lirc_buffer {
@@ -50,12 +51,13 @@ static inline int lirc_buffer_init(struct lirc_buffer *buf,
 				    unsigned int size)
 {
 	int ret;
+	int kfifo_size_order = order_base_2(size * chunk_size);
 
 	init_waitqueue_head(&buf->wait_poll);
 	spin_lock_init(&buf->fifo_lock);
 	buf->chunk_size = chunk_size;
 	buf->size = size;
-	ret = kfifo_alloc(&buf->fifo, size * chunk_size, GFP_KERNEL);
+	ret = kfifo_alloc(&buf->fifo, kfifo_size_order, GFP_KERNEL);
 	if (ret == 0)
 		buf->fifo_initialized = 1;
 
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index d07f480..be1c2a0 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -35,15 +35,10 @@ static inline unsigned int kfifo_unused(struct __kfifo *fifo)
 	return (fifo->mask + 1) - (fifo->in - fifo->out);
 }
 
-int __kfifo_alloc(struct __kfifo *fifo, unsigned int size,
+int __kfifo_alloc(struct __kfifo *fifo, int size_order,
 		size_t esize, gfp_t gfp_mask)
 {
-	/*
-	 * round down to the next power of 2, since our 'let the indices
-	 * wrap' technique works only in this case.
-	 */
-	if (!is_power_of_2(size))
-		size = rounddown_pow_of_two(size);
+	unsigned int size = 1 << size_order;
 
 	fifo->in = 0;
 	fifo->out = 0;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index c6e4dd3..827bbf3 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1189,7 +1189,6 @@ out:
 EXPORT_SYMBOL_GPL(memory_failure);
 
 #define MEMORY_FAILURE_FIFO_ORDER	4
-#define MEMORY_FAILURE_FIFO_SIZE	(1 << MEMORY_FAILURE_FIFO_ORDER)
 
 struct memory_failure_entry {
 	unsigned long pfn;
@@ -1199,7 +1198,7 @@ struct memory_failure_entry {
 
 struct memory_failure_cpu {
 	DECLARE_KFIFO(fifo, struct memory_failure_entry,
-		      MEMORY_FAILURE_FIFO_SIZE);
+		      MEMORY_FAILURE_FIFO_ORDER);
 	spinlock_t lock;
 	struct work_struct work;
 };
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 0a8d6eb..0a12fd5 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -31,6 +31,7 @@
 #include <linux/kfifo.h>
 #include <linux/vmalloc.h>
 #include <linux/gfp.h>
+#include <linux/log2.h>
 #include <net/net_namespace.h>
 
 #include "dccp.h"
@@ -166,10 +167,11 @@ static __init int setup_jprobe(void)
 static __init int dccpprobe_init(void)
 {
 	int ret = -ENOMEM;
+	int kfifo_size_order = order_base_2(bufsize);
 
 	init_waitqueue_head(&dccpw.wait);
 	spin_lock_init(&dccpw.lock);
-	if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL))
+	if (kfifo_alloc(&dccpw.fifo, kfifo_size_order, GFP_KERNEL))
 		return ret;
 	if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops))
 		goto err0;
@@ -200,7 +202,7 @@ module_exit(dccpprobe_exit);
 MODULE_PARM_DESC(port, "Port to match (0=all)");
 module_param(port, int, 0);
 
-MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
+MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k , should be power of 2. If not, will roundup to power of 2)");
 module_param(bufsize, int, 0);
 
 MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>");
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index 5f7518d..1736ef4 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -33,6 +33,7 @@
 #include <linux/module.h>
 #include <linux/kfifo.h>
 #include <linux/time.h>
+#include <linux/log2.h>
 #include <net/net_namespace.h>
 
 #include <net/sctp/sctp.h>
@@ -47,7 +48,7 @@ MODULE_PARM_DESC(port, "Port to match (0=all)");
 module_param(port, int, 0);
 
 static int bufsize __read_mostly = 64 * 1024;
-MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
+MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k, should be power of 2. If not, will roundup to power of 2)");
 module_param(bufsize, int, 0);
 
 static int full __read_mostly = 1;
@@ -182,10 +183,11 @@ static struct jprobe sctp_recv_probe = {
 static __init int sctpprobe_init(void)
 {
 	int ret = -ENOMEM;
+	int kfifo_size_order = order_base_2(bufsize);
 
 	init_waitqueue_head(&sctpw.wait);
 	spin_lock_init(&sctpw.lock);
-	if (kfifo_alloc(&sctpw.fifo, bufsize, GFP_KERNEL))
+	if (kfifo_alloc(&sctpw.fifo, kfifo_size_order, GFP_KERNEL))
 		return ret;
 
 	if (!proc_net_fops_create(&init_net, procname, S_IRUSR,
diff --git a/samples/kfifo/bytestream-example.c b/samples/kfifo/bytestream-example.c
index cfe40ad..eb3a46e 100644
--- a/samples/kfifo/bytestream-example.c
+++ b/samples/kfifo/bytestream-example.c
@@ -18,7 +18,7 @@
  */
 
 /* fifo size in elements (bytes) */
-#define FIFO_SIZE	32
+#define FIFO_SIZE_ORDER	5
 
 /* name of the proc entry */
 #define	PROC_FIFO	"bytestream-fifo"
@@ -41,10 +41,10 @@ static DEFINE_MUTEX(write_lock);
 #ifdef DYNAMIC
 static struct kfifo test;
 #else
-static DECLARE_KFIFO(test, unsigned char, FIFO_SIZE);
+static DECLARE_KFIFO(test, unsigned char, FIFO_SIZE_ORDER);
 #endif
 
-static const unsigned char expected_result[FIFO_SIZE] = {
+static const unsigned char expected_result[1<<FIFO_SIZE_ORDER] = {
 	 3,  4,  5,  6,  7,  8,  9,  0,
 	 1, 20, 21, 22, 23, 24, 25, 26,
 	27, 28, 29, 30, 31, 32, 33, 34,
@@ -156,7 +156,7 @@ static int __init example_init(void)
 #ifdef DYNAMIC
 	int ret;
 
-	ret = kfifo_alloc(&test, FIFO_SIZE, GFP_KERNEL);
+	ret = kfifo_alloc(&test, FIFO_SIZE_ORDER, GFP_KERNEL);
 	if (ret) {
 		printk(KERN_ERR "error kfifo_alloc\n");
 		return ret;
diff --git a/samples/kfifo/dma-example.c b/samples/kfifo/dma-example.c
index 0647379..bbc0787 100644
--- a/samples/kfifo/dma-example.c
+++ b/samples/kfifo/dma-example.c
@@ -16,7 +16,8 @@
  */
 
 /* fifo size in elements (bytes) */
-#define FIFO_SIZE	32
+#define FIFO_SIZE_ORDER	5
+#define FIFO_SIZE	(1<< FIFO_SIZE_ORDER)
 
 static struct kfifo fifo;
 
@@ -29,7 +30,7 @@ static int __init example_init(void)
 
 	printk(KERN_INFO "DMA fifo test start\n");
 
-	if (kfifo_alloc(&fifo, FIFO_SIZE, GFP_KERNEL)) {
+	if (kfifo_alloc(&fifo, FIFO_SIZE_ORDER, GFP_KERNEL)) {
 		printk(KERN_WARNING "error kfifo_alloc\n");
 		return -ENOMEM;
 	}
diff --git a/samples/kfifo/inttype-example.c b/samples/kfifo/inttype-example.c
index 6f8e79e..bed3229 100644
--- a/samples/kfifo/inttype-example.c
+++ b/samples/kfifo/inttype-example.c
@@ -18,7 +18,8 @@
  */
 
 /* fifo size in elements (ints) */
-#define FIFO_SIZE	32
+#define FIFO_SIZE_ORDER	5
+#define FIFO_SIZE	(1<< FIFO_SIZE_ORDER)
 
 /* name of the proc entry */
 #define	PROC_FIFO	"int-fifo"
@@ -41,7 +42,7 @@ static DEFINE_MUTEX(write_lock);
 #ifdef DYNAMIC
 static DECLARE_KFIFO_PTR(test, int);
 #else
-static DEFINE_KFIFO(test, int, FIFO_SIZE);
+static DEFINE_KFIFO(test, int, FIFO_SIZE_ORDER);
 #endif
 
 static const int expected_result[FIFO_SIZE] = {
@@ -149,7 +150,7 @@ static int __init example_init(void)
 #ifdef DYNAMIC
 	int ret;
 
-	ret = kfifo_alloc(&test, FIFO_SIZE, GFP_KERNEL);
+	ret = kfifo_alloc(&test, FIFO_SIZE_ORDER, GFP_KERNEL);
 	if (ret) {
 		printk(KERN_ERR "error kfifo_alloc\n");
 		return ret;
diff --git a/samples/kfifo/record-example.c b/samples/kfifo/record-example.c
index 2d7529e..2902eae 100644
--- a/samples/kfifo/record-example.c
+++ b/samples/kfifo/record-example.c
@@ -18,7 +18,7 @@
  */
 
 /* fifo size in elements (bytes) */
-#define FIFO_SIZE	128
+#define FIFO_SIZE_ORDER	7
 
 /* name of the proc entry */
 #define	PROC_FIFO	"record-fifo"
@@ -50,7 +50,7 @@ static DEFINE_MUTEX(write_lock);
 struct kfifo_rec_ptr_1 test;
 
 #else
-typedef STRUCT_KFIFO_REC_1(FIFO_SIZE) mytest;
+typedef STRUCT_KFIFO_REC_1(FIFO_SIZE_ORDER) mytest;
 
 static mytest test;
 #endif
@@ -163,7 +163,7 @@ static int __init example_init(void)
 #ifdef DYNAMIC
 	int ret;
 
-	ret = kfifo_alloc(&test, FIFO_SIZE, GFP_KERNEL);
+	ret = kfifo_alloc(&test, FIFO_SIZE_ORDER, GFP_KERNEL);
 	if (ret) {
 		printk(KERN_ERR "error kfifo_alloc\n");
 		return ret;
-- 
1.7.7.6

^ permalink raw reply related

* Re: [PATCH 1/2] cpuhotplug/nohz: Remove offline cpus from nohz-idle state
From: Srivatsa S. Bhat @ 2013-01-08  6:55 UTC (permalink / raw)
  To: Russell King - ARM Linux
  Cc: linux-mips, linux-sh, Peter Zijlstra, Srivatsa Vaddagiri, mhocko,
	H. Peter Anvin, sparclinux, linux-s390, x86, Ingo Molnar,
	Paul E. McKenney, Mike Frysinger, Nikunj A Dadhania,
	linux-arm-msm, rusty@rustcorp.com.au, Thomas Gleixner,
	linux-arm-kernel, Stephen Boyd, linux-kernel, Ralf Baechle,
	Paul Mundt, Martin Schwidefsky, uclinux-dist-devel, linuxppc-dev,
	David S. Miller
In-Reply-To: <20130105103627.GU2631@n2100.arm.linux.org.uk>

On 01/05/2013 04:06 PM, Russell King - ARM Linux wrote:
> On Thu, Jan 03, 2013 at 06:58:38PM -0800, Srivatsa Vaddagiri wrote:
>> I also think that the
>> wait_for_completion() based wait in ARM's __cpu_die() can be replaced with a
>> busy-loop based one, as the wait there in general should be terminated within
>> few cycles.
> 
> Why open-code this stuff when we have infrastructure already in the kernel
> for waiting for stuff to happen?  I chose to use the standard infrastructure
> because its better tested, and avoids having to think about whether we need
> CPU barriers and such like to ensure that updates are seen in a timely
> manner.
> 
> My stance on a lot of this idle/cpu dying code is that much of it can
> probably be cleaned up and merged into a single common implementation -
> in which case the use of standard infrastructure for things like waiting
> for other CPUs do stuff is even more justified.

On similar lines, Nikunj (in CC) and I had posted a patchset sometime ago to
consolidate some of the CPU hotplug related code in the various architectures
into a common standard implementation [1].

However, we ended up hitting a problem with Xen, because its existing code
was unlike the other arch/ pieces [2]. At that time, we decided that we will
first make the CPU online and offline paths symmetric in the generic code and
then provide a common implementation of the duplicated bits in arch/, for the
new CPU hotplug model [3].

I guess we should probably revisit it sometime, consolidating the code in
incremental steps if not all at a time...

--
[1]. http://lwn.net/Articles/500185/
[2]. http://thread.gmane.org/gmane.linux.kernel.cross-arch/14342/focus=14430
[3]. http://thread.gmane.org/gmane.linux.kernel.cross-arch/14342/focus=15567

Regards,
Srivatsa S. Bhat

^ permalink raw reply

* Re: [PATCH 1/2] cpuhotplug/nohz: Remove offline cpus from nohz-idle state
From: Srivatsa Vaddagiri @ 2013-01-08  4:27 UTC (permalink / raw)
  To: Russell King - ARM Linux
  Cc: linux-mips, linux-sh, mhocko, H. Peter Anvin, sparclinux,
	linux-s390, x86, Ingo Molnar, Paul E. McKenney, Mike Frysinger,
	linux-arm-msm, Thomas Gleixner, linux-arm-kernel, Stephen Boyd,
	linux-kernel, Ralf Baechle, Paul Mundt, srivatsa.bhat,
	Martin Schwidefsky, uclinux-dist-devel, linuxppc-dev,
	David S. Miller
In-Reply-To: <20130105103627.GU2631@n2100.arm.linux.org.uk>

* Russell King - ARM Linux <linux@arm.linux.org.uk> [2013-01-05 10:36:27]:

> On Thu, Jan 03, 2013 at 06:58:38PM -0800, Srivatsa Vaddagiri wrote:
> > I also think that the
> > wait_for_completion() based wait in ARM's __cpu_die() can be replaced with a
> > busy-loop based one, as the wait there in general should be terminated within
> > few cycles.
> 
> Why open-code this stuff when we have infrastructure already in the kernel
> for waiting for stuff to happen?  I chose to use the standard infrastructure
> because its better tested, and avoids having to think about whether we need
> CPU barriers and such like to ensure that updates are seen in a timely
> manner.

I was primarily thinking of calling as few generic functions as possible on
a dead cpu. I recall several "am I running on a dead cpu?" checks
(cpu_is_offline(this_cpu) that were put in generic routines during early
versions of cpu hotplug [1] to educate code running on dead cpu, the need for
which went away though with introduction of atomic/stop-machine variant. The
need to add a RCU_NONIDLE() wrapper around ARM's cpu_die() [2] is perhaps a more
recent example of educating code running on dead cpu. As quickly we die as
possible after idle thread of dying cpu gains control, the better!

1. http://lwn.net/Articles/69040/
2. http://lists.infradead.org/pipermail/linux-arm-kernel/2012-July/107971.html

- vatsa
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply

* Re: [PATCH] lsprop: Fixes to work correctly when built little endian
From: Nathan Fontenot @ 2013-01-08  3:55 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev
In-Reply-To: <1357611149.16285.54.camel@pasglop>

On 01/07/2013 08:12 PM, Benjamin Herrenschmidt wrote:
> On Mon, 2013-01-07 at 15:23 +1100, Michael Ellerman wrote:
>> Add and use dt_swap_int() to byte swap on little endian.
>>
>> Also declare buf as unsigned char, so that we don't sign extend when
>> printing values from it.
>>
>> Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
>> ---
>>
>> Ben, based on your patch, can you add your s-o-b? :
>>   https://lists.ozlabs.org/pipermail/linuxppc-dev/2008-May/056088.html
> 
> I didn't know powerpc-utils required sob's :-)

Not technically, it's more a CYA thing. It (hopefully) keeps big blue legal
happy, which keeps me happy.

-Nathan

> 
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> 
>> ---
>>  src/lsprop.c |   17 ++++++++++++++---
>>  1 file changed, 14 insertions(+), 3 deletions(-)
>>
>> diff --git a/src/lsprop.c b/src/lsprop.c
>> index 5969a97..38a8fa5 100644
>> --- a/src/lsprop.c
>> +++ b/src/lsprop.c
>> @@ -13,11 +13,22 @@
>>  #include <sys/stat.h>
>>  #include <sys/types.h>
>>  #include <dirent.h>
>> +#include <endian.h>
>> +#include <byteswap.h>
>> +
>> +static inline unsigned int dt_swap_int(unsigned int data)
>> +{
>> +#if __BYTE_ORDER == __LITTLE_ENDIAN
>> +	return bswap_32(data);
>> +#else
>> +	return data;
>> +#endif
>> +}
>>  
>>  int recurse;
>>  int maxbytes = 128;
>>  int words_per_line = 0;
>> -char *buf;
>> +unsigned char *buf;
>>  
>>  void lsprop(FILE *f, char *name);
>>  void lsdir(char *name);
>> @@ -183,7 +194,7 @@ void lsprop(FILE *f, char *name)
>>      } else if ((n & 3) == 0) {
>>  	nw = n >> 2;
>>  	if (nw == 1) {
>> -	    i = *(int *)buf;
>> +	    i = dt_swap_int(*(int *)buf);
>>  	    printf(" %.8x", i);
>>  	    if (i > -0x10000 && !(i >= 0 && i <= 9))
>>  		printf(" (%d)", i);
>> @@ -201,7 +212,7 @@ void lsprop(FILE *f, char *name)
>>  		if (i != 0)
>>  		    printf("\n\t\t");
>>  		for (j = 0; j < npl && i + j < nw; ++j)
>> -		    printf(" %.8x", ((unsigned int *)buf)[i+j]);
>> +		    printf(" %.8x", dt_swap_int(((unsigned int *)buf)[i+j]));
>>  	    }
>>  	}
>>      } else {
> 
> 


-- 
-Nathan

^ permalink raw reply

* Re: [PATCH] lsprop: Fixes to work correctly when built little endian
From: Benjamin Herrenschmidt @ 2013-01-08  2:12 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: nfont, linuxppc-dev
In-Reply-To: <1357532610-19416-1-git-send-email-michael@ellerman.id.au>

On Mon, 2013-01-07 at 15:23 +1100, Michael Ellerman wrote:
> Add and use dt_swap_int() to byte swap on little endian.
> 
> Also declare buf as unsigned char, so that we don't sign extend when
> printing values from it.
> 
> Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
> ---
> 
> Ben, based on your patch, can you add your s-o-b? :
>   https://lists.ozlabs.org/pipermail/linuxppc-dev/2008-May/056088.html

I didn't know powerpc-utils required sob's :-)

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

> ---
>  src/lsprop.c |   17 ++++++++++++++---
>  1 file changed, 14 insertions(+), 3 deletions(-)
> 
> diff --git a/src/lsprop.c b/src/lsprop.c
> index 5969a97..38a8fa5 100644
> --- a/src/lsprop.c
> +++ b/src/lsprop.c
> @@ -13,11 +13,22 @@
>  #include <sys/stat.h>
>  #include <sys/types.h>
>  #include <dirent.h>
> +#include <endian.h>
> +#include <byteswap.h>
> +
> +static inline unsigned int dt_swap_int(unsigned int data)
> +{
> +#if __BYTE_ORDER == __LITTLE_ENDIAN
> +	return bswap_32(data);
> +#else
> +	return data;
> +#endif
> +}
>  
>  int recurse;
>  int maxbytes = 128;
>  int words_per_line = 0;
> -char *buf;
> +unsigned char *buf;
>  
>  void lsprop(FILE *f, char *name);
>  void lsdir(char *name);
> @@ -183,7 +194,7 @@ void lsprop(FILE *f, char *name)
>      } else if ((n & 3) == 0) {
>  	nw = n >> 2;
>  	if (nw == 1) {
> -	    i = *(int *)buf;
> +	    i = dt_swap_int(*(int *)buf);
>  	    printf(" %.8x", i);
>  	    if (i > -0x10000 && !(i >= 0 && i <= 9))
>  		printf(" (%d)", i);
> @@ -201,7 +212,7 @@ void lsprop(FILE *f, char *name)
>  		if (i != 0)
>  		    printf("\n\t\t");
>  		for (j = 0; j < npl && i + j < nw; ++j)
> -		    printf(" %.8x", ((unsigned int *)buf)[i+j]);
> +		    printf(" %.8x", dt_swap_int(((unsigned int *)buf)[i+j]));
>  	    }
>  	}
>      } else {

^ permalink raw reply

* Re: [PATCH] powerpc/mm: eliminate unneeded for_each_memblock
From: Kumar Gala @ 2013-01-07 16:42 UTC (permalink / raw)
  To: Cody P Schafer; +Cc: linuxppc-dev, LKML, Paul Mackerras
In-Reply-To: <1357322760-12197-1-git-send-email-cody@linux.vnet.ibm.com>


On Jan 4, 2013, at 12:06 PM, Cody P Schafer wrote:

> The only persistent change made by this loop is calling
> memblock_set_node() once for each memblock, which is not useful (and has
> no effect) as memblock_set_node() is not called with any
> memblock-specific parameters.
> 
> Subsistute a single memblock_set_node().
> ---
> arch/powerpc/mm/mem.c | 9 ++-------
> 1 file changed, 2 insertions(+), 7 deletions(-)

Missing a signed-off-by

- k

^ permalink raw reply

* Re: [PATCH v2 1/4] kprobes/powerpc: Do not disable External interrupts during single step
From: Sebastian Andrzej Siewior @ 2013-01-07 12:03 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: srikar, peterz, linux-kernel, oleg, linuxppc-dev,
	Suzuki K. Poulose, anton, mingo
In-Reply-To: <1357274575.2500.23.camel@pasglop>

On 01/04/2013 05:42 AM, Benjamin Herrenschmidt wrote:
> On Tue, 2012-12-11 at 11:18 +0530, Suzuki K. Poulose wrote:
>> On 12/03/2012 08:37 PM, Suzuki K. Poulose wrote:
>>> From: Suzuki K. Poulose<suzuki@in.ibm.com>
>>>
>>> External/Decrement exceptions have lower priority than the Debug Exception.
>>> So, we don't have to disable the External interrupts before a single step.
>>> However, on BookE, Critical Input Exception(CE) has higher priority than a
>>> Debug Exception. Hence we mask them.
>
> I'm not sure about that one ...
>
>> From memory, 4xx has that interesting issue which is that if you have
> single step enabled and an interrupt (of *any kind* occurs), the
> processor *will* step into the first instruction of the interrupt
> handler. (In fact, some silicons have a bug where it can even be the
> *second* instruction of the handler, which can be problematic when the
> first one is a branch).
>
> This is why you may notice that whole business we have in the handling
> of debug/crit interrupts where we try to figure out if that happened,
> and return with DE off if it did.
>
> Now, the above mentioned workaround means we might not need to disable
> EE indeed.
>
> However, in any case, I don't see what your patch fixes or improves, nor
> do I understand what you mean by "it is possible we'd get the single
> step reported for CE". Please explain in more details and describe the
> problematic scenario.

This change is probably my fault to some degree so let me explain. I've
been looking over the patch in first place and noticed that Suzuki
disables EE while enabling single stepping. After looking into the
manual I did not find a reason why this is done.

_If_ an external interrupt is pending and we enable EE and DE at the 
same time (via rfi) then we should never land in the external interrupt 
handler but always in the debug exception handler (and EE is disabled on 
all interrupts by the CPU). So why disable EE here?

_If_ the instruction in problem state triggers an DTLB exception then
we land in the TLB exception handler with DE bit set in MSR. I would say 
that this isn't uncommon (same goes probably for the syscall
opcode). After executing the first in instruction in kernel the CPU
should disable the DE (and CE) bit in the MSR and invoke the critical
exception handler. The critical debug exception handler seems to handle
this case. So disable DE, let the previous handler continue and exit to
problem state with DE enabled. From the uprobe point of view, we won't
stop over kernel code but only know once a problem state instruction is
over.

Based on this I did not see a reason why we should disable EE (or CE)
upfront. And for CE, it should be harmless if the code notices that we
debug problem state and continue the non-critical exception with
DE-disabled.

Now, if you come along with some CPU erratas on the 4xx CPUs where we
have to disable CE/EE because the CPU doesn't do what is expected then
I think that this should be explained in the comment :)

> Cheers,
> Ben.

Sebastian

^ permalink raw reply

* [PATCH] uprobes/powerpc: Add dependency on single step emulation
From: Suzuki K. Poulose @ 2013-01-07 10:26 UTC (permalink / raw)
  To: benh; +Cc: linuxppc-dev, linux-kernel, stable

From: Suzuki K. Poulose <suzuki@in.ibm.com>

Uprobes uses emulate_step in sstep.c, but we haven't explicitly specified
the dependency. On pseries HAVE_HW_BREAKPOINT protects us, but 44x has no
such luxury.

Consolidate other users that depend on sstep and create a new config option.

Signed-off-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Signed-off-by: Suzuki K. Poulose <suzuki@in.ibm.com>
Cc: linuxppc-dev@ozlabs.org
Cc: stable@vger.kernel.org
---
 arch/powerpc/Kconfig      |    4 ++++
 arch/powerpc/lib/Makefile |    4 +---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 17903f1..dabe429 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -275,6 +275,10 @@ config PPC_ADV_DEBUG_DAC_RANGE
 	depends on PPC_ADV_DEBUG_REGS && 44x
 	default y
 
+config PPC_EMULATE_SSTEP
+	bool
+	default y if KPROBES || UPROBES || XMON || HAVE_HW_BREAKPOINT
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 746e0c8..35baad9 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -19,9 +19,7 @@ obj-$(CONFIG_PPC64)	+= copypage_64.o copyuser_64.o \
 			   checksum_wrappers_64.o hweight_64.o \
 			   copyuser_power7.o string_64.o copypage_power7.o \
 			   memcpy_power7.o
-obj-$(CONFIG_XMON)	+= sstep.o ldstfp.o
-obj-$(CONFIG_KPROBES)	+= sstep.o ldstfp.o
-obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= sstep.o ldstfp.o
+obj-$(CONFIG_PPC_EMULATE_SSTEP)	+= sstep.o ldstfp.o
 
 ifeq ($(CONFIG_PPC64),y)
 obj-$(CONFIG_SMP)	+= locks.o

^ permalink raw reply related

* Re: [PATCH v5 14/14] memory-hotplug: free node_data when a node is offlined
From: Kamezawa Hiroyuki @ 2013-01-07  5:30 UTC (permalink / raw)
  To: Wen Congyang
  Cc: linux-ia64, linux-sh, Tang Chen, linux-mm, paulus, hpa,
	sparclinux, cl, linux-s390, x86, linux-acpi, isimatu.yasuaki,
	linfeng, mgorman, kosaki.motohiro, rientjes, liuj97, len.brown,
	cmetcalf, wujianguo, yinghai, laijs, linux-kernel, minchan.kim,
	akpm, linuxppc-dev
In-Reply-To: <50DFD8F4.7040301@cn.fujitsu.com>

(2012/12/30 15:02), Wen Congyang wrote:
> At 12/28/2012 08:28 AM, Kamezawa Hiroyuki Wrote:
>> (2012/12/27 21:16), Wen Congyang wrote:
>>> At 12/26/2012 11:55 AM, Kamezawa Hiroyuki Wrote:
>>>> (2012/12/24 21:09), Tang Chen wrote:
>>>>> From: Wen Congyang <wency@cn.fujitsu.com>
>>>>>
>>>>> We call hotadd_new_pgdat() to allocate memory to store node_data. So we
>>>>> should free it when removing a node.
>>>>>
>>>>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>>>>
>>>> I'm sorry but is it safe to remove pgdat ? All zone cache and zonelists are
>>>> properly cleared/rebuilded in synchronous way ? and No threads are visinting
>>>> zone in vmscan.c ?
>>>
>>> We have rebuilt zonelists when a zone has no memory after offlining some pages.
>>>
>>
>> How do you guarantee that the address of pgdat/zone is not on stack of any kernel
>> threads or other kernel objects without reference counting or other syncing method ?
> 
> No way to guarentee this. But, the kernel should not use the address of pgdat/zone when
> it is offlined.
> 
> Hmm, what about this: reuse the memory when the node is onlined again?
> 

That's the only way which we can go now. Please don't free it.

Thanks,
-Kame

^ permalink raw reply

* [PATCH] lsprop: Fixes to work correctly when built little endian
From: Michael Ellerman @ 2013-01-07  4:23 UTC (permalink / raw)
  To: nfont; +Cc: linuxppc-dev

Add and use dt_swap_int() to byte swap on little endian.

Also declare buf as unsigned char, so that we don't sign extend when
printing values from it.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
---

Ben, based on your patch, can you add your s-o-b? :
  https://lists.ozlabs.org/pipermail/linuxppc-dev/2008-May/056088.html
---
 src/lsprop.c |   17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/lsprop.c b/src/lsprop.c
index 5969a97..38a8fa5 100644
--- a/src/lsprop.c
+++ b/src/lsprop.c
@@ -13,11 +13,22 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <dirent.h>
+#include <endian.h>
+#include <byteswap.h>
+
+static inline unsigned int dt_swap_int(unsigned int data)
+{
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+	return bswap_32(data);
+#else
+	return data;
+#endif
+}
 
 int recurse;
 int maxbytes = 128;
 int words_per_line = 0;
-char *buf;
+unsigned char *buf;
 
 void lsprop(FILE *f, char *name);
 void lsdir(char *name);
@@ -183,7 +194,7 @@ void lsprop(FILE *f, char *name)
     } else if ((n & 3) == 0) {
 	nw = n >> 2;
 	if (nw == 1) {
-	    i = *(int *)buf;
+	    i = dt_swap_int(*(int *)buf);
 	    printf(" %.8x", i);
 	    if (i > -0x10000 && !(i >= 0 && i <= 9))
 		printf(" (%d)", i);
@@ -201,7 +212,7 @@ void lsprop(FILE *f, char *name)
 		if (i != 0)
 		    printf("\n\t\t");
 		for (j = 0; j < npl && i + j < nw; ++j)
-		    printf(" %.8x", ((unsigned int *)buf)[i+j]);
+		    printf(" %.8x", dt_swap_int(((unsigned int *)buf)[i+j]));
 	    }
 	}
     } else {
-- 
1.7.10.4

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox