[patch 1/13] Qsort

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [patch 1/13] Qsort
  2005-01-22 20:34 [patch 0/13] NFSACL protocol extension for NFSv3 Andreas Gruenbacher
@ 2005-01-22 20:34 ` Andreas Gruenbacher
  2005-01-22 21:00   ` vlobanov
                     ` (3 more replies)
  0 siblings, 4 replies; 47+ messages in thread
From: Andreas Gruenbacher @ 2005-01-22 20:34 UTC (permalink / raw)
  To: linux-kernel, Neil Brown, Trond Myklebust
  Cc: Olaf Kirch, Andries E. Brouwer, Buck Huppmann, Andrew Morton

[-- Attachment #1: patches.suse/qsort --]
[-- Type: text/plain, Size: 18585 bytes --]

Add a quicksort from glibc as a kernel library function, and switch
xfs over to using it. The implementations are equivalent. The nfsacl
protocol also requires a sort function, so it makes more sense in
the common code.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Acked-by: Olaf Kirch <okir@suse.de>

Index: linux-2.6.11-rc2/include/linux/kernel.h
===================================================================
--- linux-2.6.11-rc2.orig/include/linux/kernel.h
+++ linux-2.6.11-rc2/include/linux/kernel.h
@@ -93,6 +93,8 @@ extern int sscanf(const char *, const ch
 	__attribute__ ((format (scanf,2,3)));
 extern int vsscanf(const char *, const char *, va_list);
 
+extern void qsort(void *, size_t, size_t, int (*)(const void *,const void *));
+
 extern int get_option(char **str, int *pint);
 extern char *get_options(const char *str, int nints, int *ints);
 extern unsigned long long memparse(char *ptr, char **retptr);
Index: linux-2.6.11-rc2/lib/Kconfig
===================================================================
--- linux-2.6.11-rc2.orig/lib/Kconfig
+++ linux-2.6.11-rc2/lib/Kconfig
@@ -30,6 +30,9 @@ config LIBCRC32C
 	  require M here.  See Castagnoli93.
 	  Module will be libcrc32c.
 
+config QSORT
+	bool "Quick Sort"
+
 #
 # compression support is select'ed if needed
 #
Index: linux-2.6.11-rc2/lib/Makefile
===================================================================
--- linux-2.6.11-rc2.orig/lib/Makefile
+++ linux-2.6.11-rc2/lib/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_CRC_CCITT)	+= crc-ccitt.o
 obj-$(CONFIG_CRC32)	+= crc32.o
 obj-$(CONFIG_LIBCRC32C)	+= libcrc32c.o
 obj-$(CONFIG_GENERIC_IOMAP) += iomap.o
+obj-$(CONFIG_QSORT)	+= qsort.o
 
 obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
 obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
Index: linux-2.6.11-rc2/lib/qsort.c
===================================================================
--- /dev/null
+++ linux-2.6.11-rc2/lib/qsort.c
@@ -0,0 +1,249 @@
+/* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* If you consider tuning this algorithm, you should consult first:
+   Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
+   Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993.  */
+
+# include <linux/module.h>
+# include <linux/slab.h>
+# include <linux/string.h>
+
+MODULE_LICENSE("GPL");
+
+/* Byte-wise swap two items of size SIZE. */
+#define SWAP(a, b, size)						      \
+  do									      \
+    {									      \
+      register size_t __size = (size);					      \
+      register char *__a = (a), *__b = (b);				      \
+      do								      \
+	{								      \
+	  char __tmp = *__a;						      \
+	  *__a++ = *__b;						      \
+	  *__b++ = __tmp;						      \
+	} while (--__size > 0);						      \
+    } while (0)
+
+/* Discontinue quicksort algorithm when partition gets below this size.
+   This particular magic number was chosen to work best on a Sun 4/260. */
+#define MAX_THRESH 4
+
+/* Stack node declarations used to store unfulfilled partition obligations. */
+typedef struct
+  {
+    char *lo;
+    char *hi;
+  } stack_node;
+
+/* The next 5 #defines implement a very fast in-line stack abstraction. */
+/* The stack needs log (total_elements) entries (we could even subtract
+   log(MAX_THRESH)).  Since total_elements has type size_t, we get as
+   upper bound for log (total_elements):
+   bits per byte (CHAR_BIT) * sizeof(size_t).  */
+#define CHAR_BIT 8
+#define STACK_SIZE	(CHAR_BIT * sizeof(size_t))
+#define PUSH(low, high)	((void) ((top->lo = (low)), (top->hi = (high)), ++top))
+#define	POP(low, high)	((void) (--top, (low = top->lo), (high = top->hi)))
+#define	STACK_NOT_EMPTY	(stack < top)
+
+
+/* Order size using quicksort.  This implementation incorporates
+   four optimizations discussed in Sedgewick:
+
+   1. Non-recursive, using an explicit stack of pointer that store the
+      next array partition to sort.  To save time, this maximum amount
+      of space required to store an array of SIZE_MAX is allocated on the
+      stack.  Assuming a 32-bit (64 bit) integer for size_t, this needs
+      only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
+      Pretty cheap, actually.
+
+   2. Chose the pivot element using a median-of-three decision tree.
+      This reduces the probability of selecting a bad pivot value and
+      eliminates certain extraneous comparisons.
+
+   3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
+      insertion sort to order the MAX_THRESH items within each partition.
+      This is a big win, since insertion sort is faster for small, mostly
+      sorted array segments.
+
+   4. The larger of the two sub-partitions is always pushed onto the
+      stack first, with the algorithm then concentrating on the
+      smaller partition.  This *guarantees* no more than log (total_elems)
+      stack size is needed (actually O(1) in this case)!  */
+
+void
+qsort(void *const pbase, size_t total_elems, size_t size,
+      int(*cmp)(const void *,const void *))
+{
+  register char *base_ptr = (char *) pbase;
+
+  const size_t max_thresh = MAX_THRESH * size;
+
+  if (total_elems == 0)
+    /* Avoid lossage with unsigned arithmetic below.  */
+    return;
+
+  if (total_elems > MAX_THRESH)
+    {
+      char *lo = base_ptr;
+      char *hi = &lo[size * (total_elems - 1)];
+      stack_node stack[STACK_SIZE];
+      stack_node *top = stack + 1;
+
+      while (STACK_NOT_EMPTY)
+        {
+          char *left_ptr;
+          char *right_ptr;
+
+	  /* Select median value from among LO, MID, and HI. Rearrange
+	     LO and HI so the three values are sorted. This lowers the
+	     probability of picking a pathological pivot value and
+	     skips a comparison for both the LEFT_PTR and RIGHT_PTR in
+	     the while loops. */
+
+	  char *mid = lo + size * ((hi - lo) / size >> 1);
+
+	  if ((*cmp) ((void *) mid, (void *) lo) < 0)
+	    SWAP (mid, lo, size);
+	  if ((*cmp) ((void *) hi, (void *) mid) < 0)
+	    SWAP (mid, hi, size);
+	  else
+	    goto jump_over;
+	  if ((*cmp) ((void *) mid, (void *) lo) < 0)
+	    SWAP (mid, lo, size);
+	jump_over:;
+
+	  left_ptr  = lo + size;
+	  right_ptr = hi - size;
+
+	  /* Here's the famous ``collapse the walls'' section of quicksort.
+	     Gotta like those tight inner loops!  They are the main reason
+	     that this algorithm runs much faster than others. */
+	  do
+	    {
+	      while ((*cmp) ((void *) left_ptr, (void *) mid) < 0)
+		left_ptr += size;
+
+	      while ((*cmp) ((void *) mid, (void *) right_ptr) < 0)
+		right_ptr -= size;
+
+	      if (left_ptr < right_ptr)
+		{
+		  SWAP (left_ptr, right_ptr, size);
+		  if (mid == left_ptr)
+		    mid = right_ptr;
+		  else if (mid == right_ptr)
+		    mid = left_ptr;
+		  left_ptr += size;
+		  right_ptr -= size;
+		}
+	      else if (left_ptr == right_ptr)
+		{
+		  left_ptr += size;
+		  right_ptr -= size;
+		  break;
+		}
+	    }
+	  while (left_ptr <= right_ptr);
+
+          /* Set up pointers for next iteration.  First determine whether
+             left and right partitions are below the threshold size.  If so,
+             ignore one or both.  Otherwise, push the larger partition's
+             bounds on the stack and continue sorting the smaller one. */
+
+          if ((size_t) (right_ptr - lo) <= max_thresh)
+            {
+              if ((size_t) (hi - left_ptr) <= max_thresh)
+		/* Ignore both small partitions. */
+                POP (lo, hi);
+              else
+		/* Ignore small left partition. */
+                lo = left_ptr;
+            }
+          else if ((size_t) (hi - left_ptr) <= max_thresh)
+	    /* Ignore small right partition. */
+            hi = right_ptr;
+          else if ((right_ptr - lo) > (hi - left_ptr))
+            {
+	      /* Push larger left partition indices. */
+              PUSH (lo, right_ptr);
+              lo = left_ptr;
+            }
+          else
+            {
+	      /* Push larger right partition indices. */
+              PUSH (left_ptr, hi);
+              hi = right_ptr;
+            }
+        }
+    }
+
+  /* Once the BASE_PTR array is partially sorted by quicksort the rest
+     is completely sorted using insertion sort, since this is efficient
+     for partitions below MAX_THRESH size. BASE_PTR points to the beginning
+     of the array to sort, and END_PTR points at the very last element in
+     the array (*not* one beyond it!). */
+
+  {
+    char *end_ptr = &base_ptr[size * (total_elems - 1)];
+    char *tmp_ptr = base_ptr;
+    char *thresh = min(end_ptr, base_ptr + max_thresh);
+    register char *run_ptr;
+
+    /* Find smallest element in first threshold and place it at the
+       array's beginning.  This is the smallest array element,
+       and the operation speeds up insertion sort's inner loop. */
+
+    for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
+      if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr) < 0)
+        tmp_ptr = run_ptr;
+
+    if (tmp_ptr != base_ptr)
+      SWAP (tmp_ptr, base_ptr, size);
+
+    /* Insertion sort, running from left-hand-side up to right-hand-side.  */
+
+    run_ptr = base_ptr + size;
+    while ((run_ptr += size) <= end_ptr)
+      {
+	tmp_ptr = run_ptr - size;
+	while ((*cmp) ((void *) run_ptr, (void *) tmp_ptr) < 0)
+	  tmp_ptr -= size;
+
+	tmp_ptr += size;
+        if (tmp_ptr != run_ptr)
+          {
+            char *trav;
+
+	    trav = run_ptr + size;
+	    while (--trav >= run_ptr)
+              {
+                char c = *trav;
+                char *hi, *lo;
+
+                for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
+                  *hi = *lo;
+                *hi = c;
+              }
+          }
+      }
+  }
+}
+EXPORT_SYMBOL(qsort);
Index: linux-2.6.11-rc2/fs/xfs/support/qsort.c
===================================================================
--- linux-2.6.11-rc2.orig/fs/xfs/support/qsort.c
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 1992, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <linux/kernel.h>
-#include <linux/string.h>
-
-/*
- * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
- */
-#define swapcode(TYPE, parmi, parmj, n) { 		\
-	long i = (n) / sizeof (TYPE); 			\
-	register TYPE *pi = (TYPE *) (parmi); 		\
-	register TYPE *pj = (TYPE *) (parmj); 		\
-	do { 						\
-		register TYPE	t = *pi;		\
-		*pi++ = *pj;				\
-		*pj++ = t;				\
-        } while (--i > 0);				\
-}
-
-#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
-	es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
-
-static __inline void
-swapfunc(char *a, char *b, int n, int swaptype)
-{
-	if (swaptype <= 1) 
-		swapcode(long, a, b, n)
-	else
-		swapcode(char, a, b, n)
-}
-
-#define swap(a, b)					\
-	if (swaptype == 0) {				\
-		long t = *(long *)(a);			\
-		*(long *)(a) = *(long *)(b);		\
-		*(long *)(b) = t;			\
-	} else						\
-		swapfunc(a, b, es, swaptype)
-
-#define vecswap(a, b, n) 	if ((n) > 0) swapfunc(a, b, n, swaptype)
-
-static __inline char *
-med3(char *a, char *b, char *c, int (*cmp)(const void *, const void *))
-{
-	return cmp(a, b) < 0 ?
-	       (cmp(b, c) < 0 ? b : (cmp(a, c) < 0 ? c : a ))
-              :(cmp(b, c) > 0 ? b : (cmp(a, c) < 0 ? a : c ));
-}
-
-void
-qsort(void *aa, size_t n, size_t es, int (*cmp)(const void *, const void *))
-{
-	char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
-	int d, r, swaptype, swap_cnt;
-	register char *a = aa;
-
-loop:	SWAPINIT(a, es);
-	swap_cnt = 0;
-	if (n < 7) {
-		for (pm = (char *)a + es; pm < (char *) a + n * es; pm += es)
-			for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
-			     pl -= es)
-				swap(pl, pl - es);
-		return;
-	}
-	pm = (char *)a + (n / 2) * es;
-	if (n > 7) {
-		pl = (char *)a;
-		pn = (char *)a + (n - 1) * es;
-		if (n > 40) {
-			d = (n / 8) * es;
-			pl = med3(pl, pl + d, pl + 2 * d, cmp);
-			pm = med3(pm - d, pm, pm + d, cmp);
-			pn = med3(pn - 2 * d, pn - d, pn, cmp);
-		}
-		pm = med3(pl, pm, pn, cmp);
-	}
-	swap(a, pm);
-	pa = pb = (char *)a + es;
-
-	pc = pd = (char *)a + (n - 1) * es;
-	for (;;) {
-		while (pb <= pc && (r = cmp(pb, a)) <= 0) {
-			if (r == 0) {
-				swap_cnt = 1;
-				swap(pa, pb);
-				pa += es;
-			}
-			pb += es;
-		}
-		while (pb <= pc && (r = cmp(pc, a)) >= 0) {
-			if (r == 0) {
-				swap_cnt = 1;
-				swap(pc, pd);
-				pd -= es;
-			}
-			pc -= es;
-		}
-		if (pb > pc)
-			break;
-		swap(pb, pc);
-		swap_cnt = 1;
-		pb += es;
-		pc -= es;
-	}
-	if (swap_cnt == 0) {  /* Switch to insertion sort */
-		for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
-			for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0; 
-			     pl -= es)
-				swap(pl, pl - es);
-		return;
-	}
-
-	pn = (char *)a + n * es;
-	r = min(pa - (char *)a, pb - pa);
-	vecswap(a, pb - r, r);
-	r = min((long)(pd - pc), (long)(pn - pd - es));
-	vecswap(pb, pn - r, r);
-	if ((r = pb - pa) > es)
-		qsort(a, r / es, es, cmp);
-	if ((r = pd - pc) > es) { 
-		/* Iterate rather than recurse to save stack space */
-		a = pn - r;
-		n = r / es;
-		goto loop;
-	}
-/*		qsort(pn - r, r / es, es, cmp);*/
-}
Index: linux-2.6.11-rc2/fs/xfs/Makefile
===================================================================
--- linux-2.6.11-rc2.orig/fs/xfs/Makefile
+++ linux-2.6.11-rc2/fs/xfs/Makefile
@@ -142,7 +142,6 @@ xfs-y				+= $(addprefix linux-2.6/, \
 xfs-y				+= $(addprefix support/, \
 				   debug.o \
 				   move.o \
-				   qsort.o \
 				   uuid.o)
 
 xfs-$(CONFIG_XFS_TRACE)		+= support/ktrace.o
Index: linux-2.6.11-rc2/fs/xfs/support/qsort.h
===================================================================
--- linux-2.6.11-rc2.orig/fs/xfs/support/qsort.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-#ifndef QSORT_H
-#define QSORT_H
-
-extern void qsort (void *const pbase,
-		    size_t total_elems,
-		    size_t size,
-		    int (*cmp)(const void *, const void *));
-
-#endif
Index: linux-2.6.11-rc2/fs/xfs/linux-2.6/xfs_linux.h
===================================================================
--- linux-2.6.11-rc2.orig/fs/xfs/linux-2.6/xfs_linux.h
+++ linux-2.6.11-rc2/fs/xfs/linux-2.6/xfs_linux.h
@@ -64,7 +64,6 @@
 #include <sema.h>
 #include <time.h>
 
-#include <support/qsort.h>
 #include <support/ktrace.h>
 #include <support/debug.h>
 #include <support/move.h>
Index: linux-2.6.11-rc2/fs/Kconfig
===================================================================
--- linux-2.6.11-rc2.orig/fs/Kconfig
+++ linux-2.6.11-rc2/fs/Kconfig
@@ -306,6 +306,7 @@ config FS_POSIX_ACL
 
 config XFS_FS
 	tristate "XFS filesystem support"
+	select QSORT
 	help
 	  XFS is a high performance journaling filesystem which originated
 	  on the SGI IRIX platform.  It is completely multi-threaded, can

--
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX PRODUCTS GMBH


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-22 20:34 ` [patch 1/13] Qsort Andreas Gruenbacher
@ 2005-01-22 21:00   ` vlobanov
  2005-01-23  2:03     ` Felipe Alfaro Solana
  2005-01-23 21:24     ` Richard Henderson
       [not found]   ` <1106431568.4153.154.camel@laptopd505.fenrus.org>
                     ` (2 subsequent siblings)
  3 siblings, 2 replies; 47+ messages in thread
From: vlobanov @ 2005-01-22 21:00 UTC (permalink / raw)
  To: Andreas Gruenbacher
  Cc: linux-kernel, Neil Brown, Trond Myklebust, Olaf Kirch,
	Andries E. Brouwer, Buck Huppmann, Andrew Morton

Hi,

I was just reading over the patch, and had a quick question/comment upon
the SWAP macro defined below. I think it's possible to do a tiny bit
better (better, of course, being subjective), as follows:

#define SWAP(a, b, size)			\
    do {					\
	register size_t __size = (size);	\
	register char * __a = (a), * __b = (b);	\
	do {					\
	    *__a ^= *__b;			\
	    *__b ^= *__a;			\
	    *__a ^= *__b;			\
	    __a++;				\
	    __b++;				\
	} while ((--__size) > 0);		\
    } while (0)

What do you think? :)

-Vadim Lobanov

On Sat, 22 Jan 2005, Andreas Gruenbacher wrote:

> Add a quicksort from glibc as a kernel library function, and switch
> xfs over to using it. The implementations are equivalent. The nfsacl
> protocol also requires a sort function, so it makes more sense in
> the common code.
>
> Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
> Acked-by: Olaf Kirch <okir@suse.de>
>
> Index: linux-2.6.11-rc2/include/linux/kernel.h
> ===================================================================
> --- linux-2.6.11-rc2.orig/include/linux/kernel.h
> +++ linux-2.6.11-rc2/include/linux/kernel.h
> @@ -93,6 +93,8 @@ extern int sscanf(const char *, const ch
>  	__attribute__ ((format (scanf,2,3)));
>  extern int vsscanf(const char *, const char *, va_list);
>
> +extern void qsort(void *, size_t, size_t, int (*)(const void *,const void *));
> +
>  extern int get_option(char **str, int *pint);
>  extern char *get_options(const char *str, int nints, int *ints);
>  extern unsigned long long memparse(char *ptr, char **retptr);
> Index: linux-2.6.11-rc2/lib/Kconfig
> ===================================================================
> --- linux-2.6.11-rc2.orig/lib/Kconfig
> +++ linux-2.6.11-rc2/lib/Kconfig
> @@ -30,6 +30,9 @@ config LIBCRC32C
>  	  require M here.  See Castagnoli93.
>  	  Module will be libcrc32c.
>
> +config QSORT
> +	bool "Quick Sort"
> +
>  #
>  # compression support is select'ed if needed
>  #
> Index: linux-2.6.11-rc2/lib/Makefile
> ===================================================================
> --- linux-2.6.11-rc2.orig/lib/Makefile
> +++ linux-2.6.11-rc2/lib/Makefile
> @@ -25,6 +25,7 @@ obj-$(CONFIG_CRC_CCITT)	+= crc-ccitt.o
>  obj-$(CONFIG_CRC32)	+= crc32.o
>  obj-$(CONFIG_LIBCRC32C)	+= libcrc32c.o
>  obj-$(CONFIG_GENERIC_IOMAP) += iomap.o
> +obj-$(CONFIG_QSORT)	+= qsort.o
>
>  obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
>  obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
> Index: linux-2.6.11-rc2/lib/qsort.c
> ===================================================================
> --- /dev/null
> +++ linux-2.6.11-rc2/lib/qsort.c
> @@ -0,0 +1,249 @@
> +/* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +   Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, write to the Free
> +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
> +   02111-1307 USA.  */
> +
> +/* If you consider tuning this algorithm, you should consult first:
> +   Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
> +   Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993.  */
> +
> +# include <linux/module.h>
> +# include <linux/slab.h>
> +# include <linux/string.h>
> +
> +MODULE_LICENSE("GPL");
> +
> +/* Byte-wise swap two items of size SIZE. */
> +#define SWAP(a, b, size)						      \
> +  do									      \
> +    {									      \
> +      register size_t __size = (size);					      \
> +      register char *__a = (a), *__b = (b);				      \
> +      do								      \
> +	{								      \
> +	  char __tmp = *__a;						      \
> +	  *__a++ = *__b;						      \
> +	  *__b++ = __tmp;						      \
> +	} while (--__size > 0);						      \
> +    } while (0)
> +
> +/* Discontinue quicksort algorithm when partition gets below this size.
> +   This particular magic number was chosen to work best on a Sun 4/260. */
> +#define MAX_THRESH 4
> +
> +/* Stack node declarations used to store unfulfilled partition obligations. */
> +typedef struct
> +  {
> +    char *lo;
> +    char *hi;
> +  } stack_node;
> +
> +/* The next 5 #defines implement a very fast in-line stack abstraction. */
> +/* The stack needs log (total_elements) entries (we could even subtract
> +   log(MAX_THRESH)).  Since total_elements has type size_t, we get as
> +   upper bound for log (total_elements):
> +   bits per byte (CHAR_BIT) * sizeof(size_t).  */
> +#define CHAR_BIT 8
> +#define STACK_SIZE	(CHAR_BIT * sizeof(size_t))
> +#define PUSH(low, high)	((void) ((top->lo = (low)), (top->hi = (high)), ++top))
> +#define	POP(low, high)	((void) (--top, (low = top->lo), (high = top->hi)))
> +#define	STACK_NOT_EMPTY	(stack < top)
> +
> +
> +/* Order size using quicksort.  This implementation incorporates
> +   four optimizations discussed in Sedgewick:
> +
> +   1. Non-recursive, using an explicit stack of pointer that store the
> +      next array partition to sort.  To save time, this maximum amount
> +      of space required to store an array of SIZE_MAX is allocated on the
> +      stack.  Assuming a 32-bit (64 bit) integer for size_t, this needs
> +      only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
> +      Pretty cheap, actually.
> +
> +   2. Chose the pivot element using a median-of-three decision tree.
> +      This reduces the probability of selecting a bad pivot value and
> +      eliminates certain extraneous comparisons.
> +
> +   3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
> +      insertion sort to order the MAX_THRESH items within each partition.
> +      This is a big win, since insertion sort is faster for small, mostly
> +      sorted array segments.
> +
> +   4. The larger of the two sub-partitions is always pushed onto the
> +      stack first, with the algorithm then concentrating on the
> +      smaller partition.  This *guarantees* no more than log (total_elems)
> +      stack size is needed (actually O(1) in this case)!  */
> +
> +void
> +qsort(void *const pbase, size_t total_elems, size_t size,
> +      int(*cmp)(const void *,const void *))
> +{
> +  register char *base_ptr = (char *) pbase;
> +
> +  const size_t max_thresh = MAX_THRESH * size;
> +
> +  if (total_elems == 0)
> +    /* Avoid lossage with unsigned arithmetic below.  */
> +    return;
> +
> +  if (total_elems > MAX_THRESH)
> +    {
> +      char *lo = base_ptr;
> +      char *hi = &lo[size * (total_elems - 1)];
> +      stack_node stack[STACK_SIZE];
> +      stack_node *top = stack + 1;
> +
> +      while (STACK_NOT_EMPTY)
> +        {
> +          char *left_ptr;
> +          char *right_ptr;
> +
> +	  /* Select median value from among LO, MID, and HI. Rearrange
> +	     LO and HI so the three values are sorted. This lowers the
> +	     probability of picking a pathological pivot value and
> +	     skips a comparison for both the LEFT_PTR and RIGHT_PTR in
> +	     the while loops. */
> +
> +	  char *mid = lo + size * ((hi - lo) / size >> 1);
> +
> +	  if ((*cmp) ((void *) mid, (void *) lo) < 0)
> +	    SWAP (mid, lo, size);
> +	  if ((*cmp) ((void *) hi, (void *) mid) < 0)
> +	    SWAP (mid, hi, size);
> +	  else
> +	    goto jump_over;
> +	  if ((*cmp) ((void *) mid, (void *) lo) < 0)
> +	    SWAP (mid, lo, size);
> +	jump_over:;
> +
> +	  left_ptr  = lo + size;
> +	  right_ptr = hi - size;
> +
> +	  /* Here's the famous ``collapse the walls'' section of quicksort.
> +	     Gotta like those tight inner loops!  They are the main reason
> +	     that this algorithm runs much faster than others. */
> +	  do
> +	    {
> +	      while ((*cmp) ((void *) left_ptr, (void *) mid) < 0)
> +		left_ptr += size;
> +
> +	      while ((*cmp) ((void *) mid, (void *) right_ptr) < 0)
> +		right_ptr -= size;
> +
> +	      if (left_ptr < right_ptr)
> +		{
> +		  SWAP (left_ptr, right_ptr, size);
> +		  if (mid == left_ptr)
> +		    mid = right_ptr;
> +		  else if (mid == right_ptr)
> +		    mid = left_ptr;
> +		  left_ptr += size;
> +		  right_ptr -= size;
> +		}
> +	      else if (left_ptr == right_ptr)
> +		{
> +		  left_ptr += size;
> +		  right_ptr -= size;
> +		  break;
> +		}
> +	    }
> +	  while (left_ptr <= right_ptr);
> +
> +          /* Set up pointers for next iteration.  First determine whether
> +             left and right partitions are below the threshold size.  If so,
> +             ignore one or both.  Otherwise, push the larger partition's
> +             bounds on the stack and continue sorting the smaller one. */
> +
> +          if ((size_t) (right_ptr - lo) <= max_thresh)
> +            {
> +              if ((size_t) (hi - left_ptr) <= max_thresh)
> +		/* Ignore both small partitions. */
> +                POP (lo, hi);
> +              else
> +		/* Ignore small left partition. */
> +                lo = left_ptr;
> +            }
> +          else if ((size_t) (hi - left_ptr) <= max_thresh)
> +	    /* Ignore small right partition. */
> +            hi = right_ptr;
> +          else if ((right_ptr - lo) > (hi - left_ptr))
> +            {
> +	      /* Push larger left partition indices. */
> +              PUSH (lo, right_ptr);
> +              lo = left_ptr;
> +            }
> +          else
> +            {
> +	      /* Push larger right partition indices. */
> +              PUSH (left_ptr, hi);
> +              hi = right_ptr;
> +            }
> +        }
> +    }
> +
> +  /* Once the BASE_PTR array is partially sorted by quicksort the rest
> +     is completely sorted using insertion sort, since this is efficient
> +     for partitions below MAX_THRESH size. BASE_PTR points to the beginning
> +     of the array to sort, and END_PTR points at the very last element in
> +     the array (*not* one beyond it!). */
> +
> +  {
> +    char *end_ptr = &base_ptr[size * (total_elems - 1)];
> +    char *tmp_ptr = base_ptr;
> +    char *thresh = min(end_ptr, base_ptr + max_thresh);
> +    register char *run_ptr;
> +
> +    /* Find smallest element in first threshold and place it at the
> +       array's beginning.  This is the smallest array element,
> +       and the operation speeds up insertion sort's inner loop. */
> +
> +    for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
> +      if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr) < 0)
> +        tmp_ptr = run_ptr;
> +
> +    if (tmp_ptr != base_ptr)
> +      SWAP (tmp_ptr, base_ptr, size);
> +
> +    /* Insertion sort, running from left-hand-side up to right-hand-side.  */
> +
> +    run_ptr = base_ptr + size;
> +    while ((run_ptr += size) <= end_ptr)
> +      {
> +	tmp_ptr = run_ptr - size;
> +	while ((*cmp) ((void *) run_ptr, (void *) tmp_ptr) < 0)
> +	  tmp_ptr -= size;
> +
> +	tmp_ptr += size;
> +        if (tmp_ptr != run_ptr)
> +          {
> +            char *trav;
> +
> +	    trav = run_ptr + size;
> +	    while (--trav >= run_ptr)
> +              {
> +                char c = *trav;
> +                char *hi, *lo;
> +
> +                for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
> +                  *hi = *lo;
> +                *hi = c;
> +              }
> +          }
> +      }
> +  }
> +}
> +EXPORT_SYMBOL(qsort);
> Index: linux-2.6.11-rc2/fs/xfs/support/qsort.c
> ===================================================================
> --- linux-2.6.11-rc2.orig/fs/xfs/support/qsort.c
> +++ /dev/null
> @@ -1,155 +0,0 @@
> -/*
> - * Copyright (c) 1992, 1993
> - *	The Regents of the University of California.  All rights reserved.
> - *
> - * Redistribution and use in source and binary forms, with or without
> - * modification, are permitted provided that the following conditions
> - * are met:
> - * 1. Redistributions of source code must retain the above copyright
> - *    notice, this list of conditions and the following disclaimer.
> - * 2. Redistributions in binary form must reproduce the above copyright
> - *    notice, this list of conditions and the following disclaimer in the
> - *    documentation and/or other materials provided with the distribution.
> - * 3. Neither the name of the University nor the names of its contributors
> - *    may be used to endorse or promote products derived from this software
> - *    without specific prior written permission.
> - *
> - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
> - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> - * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
> - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> - * SUCH DAMAGE.
> - */
> -
> -#include <linux/kernel.h>
> -#include <linux/string.h>
> -
> -/*
> - * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
> - */
> -#define swapcode(TYPE, parmi, parmj, n) { 		\
> -	long i = (n) / sizeof (TYPE); 			\
> -	register TYPE *pi = (TYPE *) (parmi); 		\
> -	register TYPE *pj = (TYPE *) (parmj); 		\
> -	do { 						\
> -		register TYPE	t = *pi;		\
> -		*pi++ = *pj;				\
> -		*pj++ = t;				\
> -        } while (--i > 0);				\
> -}
> -
> -#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
> -	es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;
> -
> -static __inline void
> -swapfunc(char *a, char *b, int n, int swaptype)
> -{
> -	if (swaptype <= 1)
> -		swapcode(long, a, b, n)
> -	else
> -		swapcode(char, a, b, n)
> -}
> -
> -#define swap(a, b)					\
> -	if (swaptype == 0) {				\
> -		long t = *(long *)(a);			\
> -		*(long *)(a) = *(long *)(b);		\
> -		*(long *)(b) = t;			\
> -	} else						\
> -		swapfunc(a, b, es, swaptype)
> -
> -#define vecswap(a, b, n) 	if ((n) > 0) swapfunc(a, b, n, swaptype)
> -
> -static __inline char *
> -med3(char *a, char *b, char *c, int (*cmp)(const void *, const void *))
> -{
> -	return cmp(a, b) < 0 ?
> -	       (cmp(b, c) < 0 ? b : (cmp(a, c) < 0 ? c : a ))
> -              :(cmp(b, c) > 0 ? b : (cmp(a, c) < 0 ? a : c ));
> -}
> -
> -void
> -qsort(void *aa, size_t n, size_t es, int (*cmp)(const void *, const void *))
> -{
> -	char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
> -	int d, r, swaptype, swap_cnt;
> -	register char *a = aa;
> -
> -loop:	SWAPINIT(a, es);
> -	swap_cnt = 0;
> -	if (n < 7) {
> -		for (pm = (char *)a + es; pm < (char *) a + n * es; pm += es)
> -			for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
> -			     pl -= es)
> -				swap(pl, pl - es);
> -		return;
> -	}
> -	pm = (char *)a + (n / 2) * es;
> -	if (n > 7) {
> -		pl = (char *)a;
> -		pn = (char *)a + (n - 1) * es;
> -		if (n > 40) {
> -			d = (n / 8) * es;
> -			pl = med3(pl, pl + d, pl + 2 * d, cmp);
> -			pm = med3(pm - d, pm, pm + d, cmp);
> -			pn = med3(pn - 2 * d, pn - d, pn, cmp);
> -		}
> -		pm = med3(pl, pm, pn, cmp);
> -	}
> -	swap(a, pm);
> -	pa = pb = (char *)a + es;
> -
> -	pc = pd = (char *)a + (n - 1) * es;
> -	for (;;) {
> -		while (pb <= pc && (r = cmp(pb, a)) <= 0) {
> -			if (r == 0) {
> -				swap_cnt = 1;
> -				swap(pa, pb);
> -				pa += es;
> -			}
> -			pb += es;
> -		}
> -		while (pb <= pc && (r = cmp(pc, a)) >= 0) {
> -			if (r == 0) {
> -				swap_cnt = 1;
> -				swap(pc, pd);
> -				pd -= es;
> -			}
> -			pc -= es;
> -		}
> -		if (pb > pc)
> -			break;
> -		swap(pb, pc);
> -		swap_cnt = 1;
> -		pb += es;
> -		pc -= es;
> -	}
> -	if (swap_cnt == 0) {  /* Switch to insertion sort */
> -		for (pm = (char *) a + es; pm < (char *) a + n * es; pm += es)
> -			for (pl = pm; pl > (char *) a && cmp(pl - es, pl) > 0;
> -			     pl -= es)
> -				swap(pl, pl - es);
> -		return;
> -	}
> -
> -	pn = (char *)a + n * es;
> -	r = min(pa - (char *)a, pb - pa);
> -	vecswap(a, pb - r, r);
> -	r = min((long)(pd - pc), (long)(pn - pd - es));
> -	vecswap(pb, pn - r, r);
> -	if ((r = pb - pa) > es)
> -		qsort(a, r / es, es, cmp);
> -	if ((r = pd - pc) > es) {
> -		/* Iterate rather than recurse to save stack space */
> -		a = pn - r;
> -		n = r / es;
> -		goto loop;
> -	}
> -/*		qsort(pn - r, r / es, es, cmp);*/
> -}
> Index: linux-2.6.11-rc2/fs/xfs/Makefile
> ===================================================================
> --- linux-2.6.11-rc2.orig/fs/xfs/Makefile
> +++ linux-2.6.11-rc2/fs/xfs/Makefile
> @@ -142,7 +142,6 @@ xfs-y				+= $(addprefix linux-2.6/, \
>  xfs-y				+= $(addprefix support/, \
>  				   debug.o \
>  				   move.o \
> -				   qsort.o \
>  				   uuid.o)
>
>  xfs-$(CONFIG_XFS_TRACE)		+= support/ktrace.o
> Index: linux-2.6.11-rc2/fs/xfs/support/qsort.h
> ===================================================================
> --- linux-2.6.11-rc2.orig/fs/xfs/support/qsort.h
> +++ /dev/null
> @@ -1,41 +0,0 @@
> -/*
> - * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
> - *
> - * This program is free software; you can redistribute it and/or modify it
> - * under the terms of version 2 of the GNU General Public License as
> - * published by the Free Software Foundation.
> - *
> - * This program is distributed in the hope that it would be useful, but
> - * WITHOUT ANY WARRANTY; without even the implied warranty of
> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
> - *
> - * Further, this software is distributed without any warranty that it is
> - * free of the rightful claim of any third person regarding infringement
> - * or the like.  Any license provided herein, whether implied or
> - * otherwise, applies only to this software file.  Patent licenses, if
> - * any, provided herein do not apply to combinations of this program with
> - * other software, or any other product whatsoever.
> - *
> - * You should have received a copy of the GNU General Public License along
> - * with this program; if not, write the Free Software Foundation, Inc., 59
> - * Temple Place - Suite 330, Boston MA 02111-1307, USA.
> - *
> - * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
> - * Mountain View, CA  94043, or:
> - *
> - * http://www.sgi.com
> - *
> - * For further information regarding this notice, see:
> - *
> - * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
> - */
> -
> -#ifndef QSORT_H
> -#define QSORT_H
> -
> -extern void qsort (void *const pbase,
> -		    size_t total_elems,
> -		    size_t size,
> -		    int (*cmp)(const void *, const void *));
> -
> -#endif
> Index: linux-2.6.11-rc2/fs/xfs/linux-2.6/xfs_linux.h
> ===================================================================
> --- linux-2.6.11-rc2.orig/fs/xfs/linux-2.6/xfs_linux.h
> +++ linux-2.6.11-rc2/fs/xfs/linux-2.6/xfs_linux.h
> @@ -64,7 +64,6 @@
>  #include <sema.h>
>  #include <time.h>
>
> -#include <support/qsort.h>
>  #include <support/ktrace.h>
>  #include <support/debug.h>
>  #include <support/move.h>
> Index: linux-2.6.11-rc2/fs/Kconfig
> ===================================================================
> --- linux-2.6.11-rc2.orig/fs/Kconfig
> +++ linux-2.6.11-rc2/fs/Kconfig
> @@ -306,6 +306,7 @@ config FS_POSIX_ACL
>
>  config XFS_FS
>  	tristate "XFS filesystem support"
> +	select QSORT
>  	help
>  	  XFS is a high performance journaling filesystem which originated
>  	  on the SGI IRIX platform.  It is completely multi-threaded, can
>
> --
> Andreas Gruenbacher <agruen@suse.de>
> SUSE Labs, SUSE LINUX PRODUCTS GMBH
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
       [not found]   ` <1106431568.4153.154.camel@laptopd505.fenrus.org>
@ 2005-01-22 22:10     ` Andreas Gruenbacher
  0 siblings, 0 replies; 47+ messages in thread
From: Andreas Gruenbacher @ 2005-01-22 22:10 UTC (permalink / raw)
  To: Arjan van de Ven; +Cc: linux-kernel@vger.kernel.org

On Sat, 2005-01-22 at 23:06, Arjan van de Ven wrote:
> since you took the glibc one.. the glibc authors have repeatedly asked
> if glibc code that goes into the kernel will be export_symbol_gpl only
> due to their view of the gpl and lgpl

Sure, no big deal. We could equally well take the xfs one instead.

Cheers,
-- 
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX GMBH


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-22 20:34 ` [patch 1/13] Qsort Andreas Gruenbacher
  2005-01-22 21:00   ` vlobanov
       [not found]   ` <1106431568.4153.154.camel@laptopd505.fenrus.org>
@ 2005-01-22 23:28   ` Matt Mackall
  2005-01-23  0:21     ` Matt Mackall
  2005-01-23  5:08     ` Andreas Gruenbacher
  2005-01-24  3:48   ` Horst von Brand
  3 siblings, 2 replies; 47+ messages in thread
From: Matt Mackall @ 2005-01-22 23:28 UTC (permalink / raw)
  To: Andreas Gruenbacher
  Cc: linux-kernel, Neil Brown, Trond Myklebust, Olaf Kirch,
	Andries E. Brouwer, Buck Huppmann, Andrew Morton

On Sat, Jan 22, 2005 at 09:34:01PM +0100, Andreas Gruenbacher wrote:
> Add a quicksort from glibc as a kernel library function, and switch
> xfs over to using it. The implementations are equivalent. The nfsacl
> protocol also requires a sort function, so it makes more sense in
> the common code.

Please update this to kernel formatting standards and try to modernize
it a bit.

> +/* Byte-wise swap two items of size SIZE. */
> +#define SWAP(a, b, size)						      \
> +  do									      \
> +    {									      \
> +      register size_t __size = (size);					      \
> +      register char *__a = (a), *__b = (b);				      \
> +      do								      \
> +	{								      \
> +	  char __tmp = *__a;						      \
> +	  *__a++ = *__b;						      \
> +	  *__b++ = __tmp;						      \
> +	} while (--__size > 0);						      \
> +    } while (0)

Inline, please? Register keyword?!

> +typedef struct
> +  {
> +    char *lo;
> +    char *hi;
> +  } stack_node;

void *, please

> +
> +/* The next 5 #defines implement a very fast in-line stack abstraction. */
> +/* The stack needs log (total_elements) entries (we could even subtract
> +   log(MAX_THRESH)).  Since total_elements has type size_t, we get as
> +   upper bound for log (total_elements):
> +   bits per byte (CHAR_BIT) * sizeof(size_t).  */
> +#define CHAR_BIT 8
> +#define STACK_SIZE	(CHAR_BIT * sizeof(size_t))

So the stack is going to be either 256 or 1024 bytes. Seems like we
ought to kmalloc it.

> +#define PUSH(low, high)	((void) ((top->lo = (low)), (top->hi = (high)), ++top))
> +#define	POP(low, high)	((void) (--top, (low = top->lo), (high = top->hi)))
> +#define	STACK_NOT_EMPTY	(stack < top)

There's only one usage of POP, one of STACK_NOT_EMPTY and two of PUSH
that can trivially be made one. Please kill these macros.

> +   3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
> +      insertion sort to order the MAX_THRESH items within each partition.
> +      This is a big win, since insertion sort is faster for small, mostly
> +      sorted array segments.

This observation may be dated, instruction cache issues may dominate now.

> +	  char *mid = lo + size * ((hi - lo) / size >> 1);

Get rid of all this char* stuff, please. It makes for lots of ugly and
unnecessary casting.

> +	  if ((*cmp) ((void *) mid, (void *) lo) < 0)
> +	    SWAP (mid, lo, size);

cmp(mid, lo)

> +	  if ((*cmp) ((void *) hi, (void *) mid) < 0)
> +	    SWAP (mid, hi, size);
> +	  else
> +	    goto jump_over;
> +	  if ((*cmp) ((void *) mid, (void *) lo) < 0)
> +	    SWAP (mid, lo, size);
> +	jump_over:;

?!

> +  /* Once the BASE_PTR array is partially sorted by quicksort the rest
> +     is completely sorted using insertion sort, since this is efficient
> +     for partitions below MAX_THRESH size. BASE_PTR points to the beginning
> +     of the array to sort, and END_PTR points at the very last element in
> +     the array (*not* one beyond it!). */
> +
> +  {
> +    char *end_ptr = &base_ptr[size * (total_elems - 1)];
> +    char *tmp_ptr = base_ptr;
> +    char *thresh = min(end_ptr, base_ptr + max_thresh);
> +    register char *run_ptr;

Move these vars to the top or better yet, split this into two functions.

-- 
Mathematics is the supreme nostalgia of our time.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-22 23:28   ` Matt Mackall
@ 2005-01-23  0:21     ` Matt Mackall
  2005-01-23  5:08     ` Andreas Gruenbacher
  1 sibling, 0 replies; 47+ messages in thread
From: Matt Mackall @ 2005-01-23  0:21 UTC (permalink / raw)
  To: Andreas Gruenbacher; +Cc: linux-kernel

On Sat, Jan 22, 2005 at 03:28:14PM -0800, Matt Mackall wrote:
> On Sat, Jan 22, 2005 at 09:34:01PM +0100, Andreas Gruenbacher wrote:
> > Add a quicksort from glibc as a kernel library function, and switch
> > xfs over to using it. The implementations are equivalent. The nfsacl
> > protocol also requires a sort function, so it makes more sense in
> > the common code.
> 
> Please update this to kernel formatting standards and try to modernize
> it a bit.

I started working on this with an eye to doing some performance
testing of the insertion sort threshold in userspace, but I'm about to
head out for the day. Here's what I've got so far, compiles but
untested. Note the insertion sort at the end really ought to be using
memmove as well.

/* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc.
   Written by Douglas C. Schmidt (schmidt@ics.uci.edu).

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, write to the Free
   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307 USA.  */

/* If you consider tuning this algorithm, you should consult first:
   Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
   Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993.  */

#include <unistd.h>
#include <stdlib.h>

#define min(x,y) ({ \
	typeof(x) _x = (x);	\
	typeof(y) _y = (y);	\
	(void) (&_x == &_y);		\
	_x < _y ? _x : _y; })

/* Byte-wise swap two items of size SIZE. */
#define SWAP(a, b, size)						      \
  do									      \
    {									      \
      size_t __size = (size);					      \
      char *__a = (a), *__b = (b);				      \
      do								      \
	{								      \
	  char __tmp = *__a;						      \
	  *__a++ = *__b;						      \
	  *__b++ = __tmp;						      \
	} while (--__size > 0);						      \
    } while (0)

/* Discontinue quicksort algorithm when partition gets below this size.
   This particular magic number was chosen to work best on a Sun 4/260. */
#define MAX_THRESH 4

/* Stack node declarations used to store unfulfilled partition obligations. */
typedef struct {
	void *lo;
	void *hi;
} stack_node;

/* Order size using quicksort.  This implementation incorporates
   four optimizations discussed in Sedgewick:

   1. Non-recursive, using an explicit stack of pointer that store the
      next array partition to sort.  To save time, this maximum amount
      of space required to store an array of SIZE_MAX is allocated on the
      stack.  Assuming a 32-bit (64 bit) integer for size_t, this needs
      only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
      Pretty cheap, actually.

   2. Chose the pivot element using a median-of-three decision tree.
      This reduces the probability of selecting a bad pivot value and
      eliminates certain extraneous comparisons.

   3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
      insertion sort to order the MAX_THRESH items within each partition.
      This is a big win, since insertion sort is faster for small, mostly
      sorted array segments.

   4. The larger of the two sub-partitions is always pushed onto the
      stack first, with the algorithm then concentrating on the
      smaller partition.  This *guarantees* no more than log (total_elems)
      stack size is needed (actually O(1) in this case)!  */

void qsort(void *base, size_t num, size_t size,
	   int (*cmp) (const void *, const void *))
{
	const size_t max_thresh = MAX_THRESH * size;
	void *hi, *lo, *mid, *left, *right;
	void *end = base + (size * (num - 1));
	void *tmp = base;
	void *thresh = min(end, base + max_thresh);
	void *run, *trav;
	stack_node *stack, *top;

	if (num == 0)
		return;

	lo = base;
	hi = lo + size * (num - 1);
	if (num > MAX_THRESH) {
		stack = malloc(8 * sizeof(size_t) * sizeof(stack_node));
		top = stack + 1;

		while (stack < top) {
			/* Select median value from among LO, MID, and
			   HI. Rearrange LO and HI so the three values
			   are sorted. This lowers the probability of
			   picking a pathological pivot value and
			   skips a comparison for both the LEFT
			   and RIGHT in the while loops. */

			mid = lo + size * ((hi - lo) / size >> 1);

			if (cmp(mid, lo) < 0)
				SWAP(mid, lo, size);
			if (cmp(hi, mid) < 0) {
				SWAP(mid, hi, size);
				if (cmp(mid, lo) < 0)
					SWAP(mid, lo, size);
			}

			left = lo + size;
			right = hi - size;

			/* Here's the famous ``collapse the walls''
			   section of quicksort. Gotta like those
			   tight inner loops! They are the main reason
			   that this algorithm runs much faster than
			   others. */

			do {
				while (cmp(left, mid) < 0)
					left += size;
				while (cmp(mid, right) < 0)
					right -= size;

				if (left < right) {
					SWAP(left, right, size);
					if (mid == left)
						mid = right;
					else if (mid == right)
						mid = left;
					left += size;
					right -= size;
				} else if (left == right) {
					left += size;
					right -= size;
					break;
				}
			}
			while (left <= right);

			/* Set up pointers for next iteration. First
			   determine whether left and right partitions
			   are below the threshold size. If so, ignore
			   one or both. Otherwise, push the larger
			   partition's bounds on the stack and
			   continue sorting the smaller one. */

			if ((right - lo) <= max_thresh) {
				if ((hi - left) <= max_thresh) {
					/* Ignore both small partitions. */
					--top;
					lo = top->lo;
					hi = top->hi;
				} else
					/* Ignore small left partition. */
					lo = left;
			} else if ((hi - left) <= max_thresh)
				/* Ignore small right partition. */
				hi = right;
			else if ((right - lo) > (hi - left)) {
				/* Push larger left partition indices. */
				top->lo = lo;
				top->hi = right;
				top++;
				lo = left;
			} else {
				/* Push larger right partition indices. */
				top->lo = left;
				top->hi = hi;
				top++;
				hi = right;
			}
		}

		free(stack);
	}

	/* Once the BASE array is partially sorted by quicksort
	   the rest is completely sorted using insertion sort, since
	   this is efficient for partitions below MAX_THRESH size.
	   BASE points to the beginning of the array to sort, and
	   END points at the very last element in the array (*not*
	   one beyond it!). */

	/* Find smallest element in first threshold and place it at
	   the array's beginning. This is the smallest array element,
	   and the operation speeds up insertion sort's inner loop. */

	for (run = tmp + size; run <= thresh; run += size) {
		if (cmp(run, tmp) < 0)
			tmp = run;

		if (tmp != base)
			SWAP(tmp, base, size);

		/* Insertion sort, running from left-hand-side up to
		 * right-hand-side.  */

		run = base + size;
		while ((run += size) <= end) {
			tmp = run - size;
			while (cmp(run, tmp) < 0)
				tmp -= size;

			tmp += size;
			if (tmp != run) {
				trav = run + size;
				while (--trav >= run) {
					char c = *(char *)trav;
					for (hi = lo = trav;
					     (lo -= size) >= tmp; hi = lo)
						*(char *)hi = *(char *)lo;
					*(char *)hi = c;
				}
			}
		}
	}
}



-- 
Mathematics is the supreme nostalgia of our time.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-22 21:00   ` vlobanov
@ 2005-01-23  2:03     ` Felipe Alfaro Solana
  2005-01-23  2:39       ` Andi Kleen
                         ` (2 more replies)
  2005-01-23 21:24     ` Richard Henderson
  1 sibling, 3 replies; 47+ messages in thread
From: Felipe Alfaro Solana @ 2005-01-23  2:03 UTC (permalink / raw)
  To: vlobanov
  Cc: Trond Myklebust, linux-kernel, Buck Huppmann, Neil Brown,
	Andreas Gruenbacher, Andries E. Brouwer, Andrew Morton,
	Olaf Kirch

On 22 Jan 2005, at 22:00, vlobanov wrote:

> Hi,
>
> I was just reading over the patch, and had a quick question/comment 
> upon
> the SWAP macro defined below. I think it's possible to do a tiny bit
> better (better, of course, being subjective), as follows:
>
> #define SWAP(a, b, size)			\
>     do {					\
> 	register size_t __size = (size);	\
> 	register char * __a = (a), * __b = (b);	\
> 	do {					\
> 	    *__a ^= *__b;			\
> 	    *__b ^= *__a;			\
> 	    *__a ^= *__b;			\
> 	    __a++;				\
> 	    __b++;				\
> 	} while ((--__size) > 0);		\
>     } while (0)
>
> What do you think? :)

AFAIK, XOR is quite expensive on IA32 when compared to simple MOV 
operatings. Also, since the original patch uses 3 MOVs to perform the 
swapping, and your version uses 3 XOR operations, I don't see any 
gains.

Am I missing something?


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-23  2:03     ` Felipe Alfaro Solana
@ 2005-01-23  2:39       ` Andi Kleen
  2005-01-23  3:02         ` Jesper Juhl
                           ` (2 more replies)
  2005-01-23  4:22       ` Matt Mackall
  2005-01-23  5:44       ` Willy Tarreau
  2 siblings, 3 replies; 47+ messages in thread
From: Andi Kleen @ 2005-01-23  2:39 UTC (permalink / raw)
  To: Felipe Alfaro Solana
  Cc: Trond Myklebust, linux-kernel, Buck Huppmann, Neil Brown,
	Andreas Gruenbacher, Andries E. Brouwer, Andrew Morton,
	Olaf Kirch

Felipe Alfaro Solana <lkml@mac.com> writes:
>
> AFAIK, XOR is quite expensive on IA32 when compared to simple MOV
> operatings. Also, since the original patch uses 3 MOVs to perform the
> swapping, and your version uses 3 XOR operations, I don't see any
> gains.

Both are one cycle latency for register<->register on all x86 cores
I've looked at. What makes you think differently?

-Andi (who thinks the glibc qsort is vast overkill for kernel purposes
where there are only small data sets and it would be better to use a 
simpler one optimized for code size)


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-23  2:39       ` Andi Kleen
@ 2005-01-23  3:02         ` Jesper Juhl
  2005-01-23  4:46           ` Andi Kleen
  2005-01-23  4:29         ` Matt Mackall
  2005-01-23  4:58         ` Felipe Alfaro Solana
  2 siblings, 1 reply; 47+ messages in thread
From: Jesper Juhl @ 2005-01-23  3:02 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Felipe Alfaro Solana, Trond Myklebust, linux-kernel,
	Buck Huppmann, Neil Brown, Andreas Gruenbacher,
	Andries E. Brouwer, Andrew Morton, Olaf Kirch

On Sun, 23 Jan 2005, Andi Kleen wrote:

> Felipe Alfaro Solana <lkml@mac.com> writes:
> >
> > AFAIK, XOR is quite expensive on IA32 when compared to simple MOV
> > operatings. Also, since the original patch uses 3 MOVs to perform the
> > swapping, and your version uses 3 XOR operations, I don't see any
> > gains.
> 
> Both are one cycle latency for register<->register on all x86 cores
> I've looked at. What makes you think differently?
> 
> -Andi (who thinks the glibc qsort is vast overkill for kernel purposes
> where there are only small data sets and it would be better to use a 
> simpler one optimized for code size)
> 
How about a shell sort?  if the data is mostly sorted shell sort beats 
qsort lots of times, and since the data sets are often small in-kernel, 
shell sorts O(n^2) behaviour won't harm it too much, shell sort is also 
faster if the data is already completely sorted. Shell sort is certainly 
not the simplest algorithm around, but I think (without having done any 
tests) that it would probably do pretty well for in-kernel use... Then 
again, I've known to be wrong :)


-- 
Jesper Juhl


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-23  2:03     ` Felipe Alfaro Solana
  2005-01-23  2:39       ` Andi Kleen
@ 2005-01-23  4:22       ` Matt Mackall
  2005-01-23  5:44       ` Willy Tarreau
  2 siblings, 0 replies; 47+ messages in thread
From: Matt Mackall @ 2005-01-23  4:22 UTC (permalink / raw)
  To: Felipe Alfaro Solana
  Cc: vlobanov, Trond Myklebust, linux-kernel, Buck Huppmann,
	Neil Brown, Andreas Gruenbacher, Andries E. Brouwer,
	Andrew Morton, Olaf Kirch

On Sun, Jan 23, 2005 at 03:03:32AM +0100, Felipe Alfaro Solana wrote:
> On 22 Jan 2005, at 22:00, vlobanov wrote:
> 
> >Hi,
> >
> >I was just reading over the patch, and had a quick question/comment 
> >upon
> >the SWAP macro defined below. I think it's possible to do a tiny bit
> >better (better, of course, being subjective), as follows:
> >
> >#define SWAP(a, b, size)			\
> >    do {					\
> >	register size_t __size = (size);	\
> >	register char * __a = (a), * __b = (b);	\
> >	do {					\
> >	    *__a ^= *__b;			\
> >	    *__b ^= *__a;			\
> >	    *__a ^= *__b;			\
> >	    __a++;				\
> >	    __b++;				\
> >	} while ((--__size) > 0);		\
> >    } while (0)
> >
> >What do you think? :)
> 
> AFAIK, XOR is quite expensive on IA32 when compared to simple MOV 
> operatings. Also, since the original patch uses 3 MOVs to perform the 
> swapping, and your version uses 3 XOR operations, I don't see any 
> gains.
> 
> Am I missing something?

No temporary variable needed in the xor version. mov and xor are
roughly the same speed, but xor modifies flags.

-- 
Mathematics is the supreme nostalgia of our time.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-23  2:39       ` Andi Kleen
  2005-01-23  3:02         ` Jesper Juhl
@ 2005-01-23  4:29         ` Matt Mackall
  2005-01-24  0:21           ` Nathan Scott
  2005-01-24  4:02           ` Horst von Brand
  2005-01-23  4:58         ` Felipe Alfaro Solana
  2 siblings, 2 replies; 47+ messages in thread
From: Matt Mackall @ 2005-01-23  4:29 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Felipe Alfaro Solana, Trond Myklebust, linux-kernel,
	Buck Huppmann, Neil Brown, Andreas Gruenbacher,
	Andries E. Brouwer, Andrew Morton, Olaf Kirch

On Sun, Jan 23, 2005 at 03:39:34AM +0100, Andi Kleen wrote:
> Felipe Alfaro Solana <lkml@mac.com> writes:
> >
> > AFAIK, XOR is quite expensive on IA32 when compared to simple MOV
> > operatings. Also, since the original patch uses 3 MOVs to perform the
> > swapping, and your version uses 3 XOR operations, I don't see any
> > gains.
> 
> Both are one cycle latency for register<->register on all x86 cores
> I've looked at. What makes you think differently?
> 
> -Andi (who thinks the glibc qsort is vast overkill for kernel purposes
> where there are only small data sets and it would be better to use a 
> simpler one optimized for code size)

Mostly agreed. Except:

a) the glibc version is not actually all that optimized
b) it's nice that it's not recursive
c) the three-way median selection does help avoid worst-case O(n^2)
behavior, which might potentially be triggerable by users in places
like XFS where this is used

I'll probably whip up a simpler version tomorrow or Monday and do some
size/space benchmarking. I've been meaning to contribute a qsort for
doubly-linked lists I've got lying around as well.

-- 
Mathematics is the supreme nostalgia of our time.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-23  3:02         ` Jesper Juhl
@ 2005-01-23  4:46           ` Andi Kleen
  2005-01-23  5:05             ` Jesper Juhl
  2005-01-24 22:04             ` Mike Waychison
  0 siblings, 2 replies; 47+ messages in thread
From: Andi Kleen @ 2005-01-23  4:46 UTC (permalink / raw)
  To: Jesper Juhl
  Cc: Felipe Alfaro Solana, Trond Myklebust, linux-kernel,
	Buck Huppmann, Neil Brown, Andreas Gruenbacher,
	Andries E. Brouwer, Andrew Morton, Olaf Kirch

> How about a shell sort?  if the data is mostly sorted shell sort beats 
> qsort lots of times, and since the data sets are often small in-kernel, 
> shell sorts O(n^2) behaviour won't harm it too much, shell sort is also 
> faster if the data is already completely sorted. Shell sort is certainly 
> not the simplest algorithm around, but I think (without having done any 
> tests) that it would probably do pretty well for in-kernel use... Then 
> again, I've known to be wrong :)

I like shell sort for small data sets too. And I agree it would be 
appropiate for the kernel.

-Andi

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-23  2:39       ` Andi Kleen
  2005-01-23  3:02         ` Jesper Juhl
  2005-01-23  4:29         ` Matt Mackall
@ 2005-01-23  4:58         ` Felipe Alfaro Solana
  2005-01-24 21:20           ` Matt Mackall
  2 siblings, 1 reply; 47+ messages in thread
From: Felipe Alfaro Solana @ 2005-01-23  4:58 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Neil Brown, linux-kernel, Buck Huppmann, Trond Myklebust,
	Andreas Gruenbacher, Andries E. Brouwer, Andrew Morton,
	Olaf Kirch

On 23 Jan 2005, at 03:39, Andi Kleen wrote:

> Felipe Alfaro Solana <lkml@mac.com> writes:
>>
>> AFAIK, XOR is quite expensive on IA32 when compared to simple MOV
>> operatings. Also, since the original patch uses 3 MOVs to perform the
>> swapping, and your version uses 3 XOR operations, I don't see any
>> gains.
>
> Both are one cycle latency for register<->register on all x86 cores
> I've looked at. What makes you think differently?

I thought XOR was more expensie. Anyways, I still don't see any 
advantage in replacing 3 MOVs with 3 XORs.


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-23  4:46           ` Andi Kleen
@ 2005-01-23  5:05             ` Jesper Juhl
  2005-01-23 10:37               ` Rafael J. Wysocki
                                 ` (2 more replies)
  2005-01-24 22:04             ` Mike Waychison
  1 sibling, 3 replies; 47+ messages in thread
From: Jesper Juhl @ 2005-01-23  5:05 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Jesper Juhl, Felipe Alfaro Solana, Trond Myklebust, linux-kernel,
	Buck Huppmann, Neil Brown, Andreas Gruenbacher,
	Andries E. Brouwer, Andrew Morton, Olaf Kirch

On Sun, 23 Jan 2005, Andi Kleen wrote:

> > How about a shell sort?  if the data is mostly sorted shell sort beats 
> > qsort lots of times, and since the data sets are often small in-kernel, 
> > shell sorts O(n^2) behaviour won't harm it too much, shell sort is also 
> > faster if the data is already completely sorted. Shell sort is certainly 
> > not the simplest algorithm around, but I think (without having done any 
> > tests) that it would probably do pretty well for in-kernel use... Then 
> > again, I've known to be wrong :)
> 
> I like shell sort for small data sets too. And I agree it would be 
> appropiate for the kernel.
> 
Even with large data sets that are mostly unsorted shell sorts performance 
is close to qsort, and there's an optimization that gives it O(n^(3/2)) 
runtime (IIRC), and another nice property is that it's iterative so it 
doesn't eat up stack space (as oposed to qsort which is recursive and eats 
stack like ****)...
Yeah, I think shell sort would be good for the kernel.


-- 
Jesper Juhl




^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-22 23:28   ` Matt Mackall
  2005-01-23  0:21     ` Matt Mackall
@ 2005-01-23  5:08     ` Andreas Gruenbacher
  2005-01-23  5:32       ` Matt Mackall
  1 sibling, 1 reply; 47+ messages in thread
From: Andreas Gruenbacher @ 2005-01-23  5:08 UTC (permalink / raw)
  To: Matt Mackall
  Cc: linux-kernel, Neil Brown, Trond Myklebust, Olaf Kirch,
	Andries E. Brouwer, Buck Huppmann, Andrew Morton

On Sunday 23 January 2005 00:28, Matt Mackall wrote:
> So the stack is going to be either 256 or 1024 bytes. Seems like we
> ought to kmalloc it.

This will do. I didn't check if the +1 is strictly needed.

-      stack_node stack[STACK_SIZE];
+      stack_node stack[fls(size) - fls(MAX_THRESH) + 1];

-- 
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX PRODUCTS GMBH

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-23  5:08     ` Andreas Gruenbacher
@ 2005-01-23  5:32       ` Matt Mackall
  2005-01-23 12:22         ` Andreas Gruenbacher
  0 siblings, 1 reply; 47+ messages in thread
From: Matt Mackall @ 2005-01-23  5:32 UTC (permalink / raw)
  To: Andreas Gruenbacher
  Cc: linux-kernel, Neil Brown, Trond Myklebust, Olaf Kirch,
	Andries E. Brouwer, Buck Huppmann, Andrew Morton

On Sun, Jan 23, 2005 at 06:08:36AM +0100, Andreas Gruenbacher wrote:
> On Sunday 23 January 2005 00:28, Matt Mackall wrote:
> > So the stack is going to be either 256 or 1024 bytes. Seems like we
> > ought to kmalloc it.
> 
> This will do. I didn't check if the +1 is strictly needed.
> 
> -      stack_node stack[STACK_SIZE];
> +      stack_node stack[fls(size) - fls(MAX_THRESH) + 1];

Yes, indeed. Though I think even here, we'd prefer to use kmalloc
because gcc generates suboptimal code for variable-sized stack vars.

-- 
Mathematics is the supreme nostalgia of our time.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-23  2:03     ` Felipe Alfaro Solana
  2005-01-23  2:39       ` Andi Kleen
  2005-01-23  4:22       ` Matt Mackall
@ 2005-01-23  5:44       ` Willy Tarreau
  2 siblings, 0 replies; 47+ messages in thread
From: Willy Tarreau @ 2005-01-23  5:44 UTC (permalink / raw)
  To: Felipe Alfaro Solana
  Cc: vlobanov, Trond Myklebust, linux-kernel, Buck Huppmann,
	Neil Brown, Andreas Gruenbacher, Andries E. Brouwer,
	Andrew Morton, Olaf Kirch

Hi,

On Sun, Jan 23, 2005 at 03:03:32AM +0100, Felipe Alfaro Solana wrote:
> On 22 Jan 2005, at 22:00, vlobanov wrote:
> >#define SWAP(a, b, size)			\
> >    do {					\
> >	register size_t __size = (size);	\
> >	register char * __a = (a), * __b = (b);	\
> >	do {					\
> >	    *__a ^= *__b;			\
> >	    *__b ^= *__a;			\
> >	    *__a ^= *__b;			\
> >	    __a++;				\
> >	    __b++;				\
> >	} while ((--__size) > 0);		\
> >    } while (0)
> >
> >What do you think? :)
> 
> AFAIK, XOR is quite expensive on IA32 when compared to simple MOV 
> operatings. Also, since the original patch uses 3 MOVs to perform the 
> swapping, and your version uses 3 XOR operations, I don't see any 
> gains.

It will even be worse because we are accessing memory, and most architectures
will not be able to use a memory reference for both operands of the XOR.
Basically, what will be generated will look like this :

  tmp = *b
  *a ^= tmp
  tmp ^= *a
  *b = tmp
  *a ^= tmp

which is 5 cycles, or 4 if the two last instructions get merged. And there's
3 memory reads + 3 memory writes (assuming that the CPU will be smart enough
to reuse *a without accessing memory at instruction 3).

The move is quite faster :

   tmp1 = *a
   tmp2 = *b
   *a = tmp2
   *b = tmp1

This is 4 cycles on simple CPUs, or even 2 cycles on most of todays CPUs
which can do the first two fetches at once, and the last two writes at once.
And there are only two reads and two writes.

Clearly this one is better.

Regards,
Willy

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-23  5:05             ` Jesper Juhl
@ 2005-01-23 10:37               ` Rafael J. Wysocki
  2005-01-24  4:29                 ` Horst von Brand
  2005-01-24 15:45               ` Alan Cox
  2005-01-24 17:10               ` H. Peter Anvin
  2 siblings, 1 reply; 47+ messages in thread
From: Rafael J. Wysocki @ 2005-01-23 10:37 UTC (permalink / raw)
  To: Jesper Juhl
  Cc: Andi Kleen, Felipe Alfaro Solana, Trond Myklebust, linux-kernel,
	Buck Huppmann, Neil Brown, Andreas Gruenbacher,
	Andries E. Brouwer, Andrew Morton, Olaf Kirch

On Sunday, 23 of January 2005 06:05, Jesper Juhl wrote:
> On Sun, 23 Jan 2005, Andi Kleen wrote:
> 
> > > How about a shell sort?  if the data is mostly sorted shell sort beats 
> > > qsort lots of times, and since the data sets are often small in-kernel, 
> > > shell sorts O(n^2) behaviour won't harm it too much, shell sort is also 
> > > faster if the data is already completely sorted. Shell sort is certainly 
> > > not the simplest algorithm around, but I think (without having done any 
> > > tests) that it would probably do pretty well for in-kernel use... Then 
> > > again, I've known to be wrong :)
> > 
> > I like shell sort for small data sets too. And I agree it would be 
> > appropiate for the kernel.
> > 
> Even with large data sets that are mostly unsorted shell sorts performance 
> is close to qsort, and there's an optimization that gives it O(n^(3/2)) 
> runtime (IIRC),

Yes, there is.

> and another nice property is that it's iterative so it  
> doesn't eat up stack space (as oposed to qsort which is recursive and eats 
> stack like ****)...

To be precise, one needs ~(log N) of stack space for qsort, and frankly, one
should use something like the shell (or should I say Shell?) sort for sorting
small sets of elements in qsort as well.

> Yeah, I think shell sort would be good for the kernel.

I agree.

Greets,
RJW


-- 
- Would you tell me, please, which way I ought to go from here?
- That depends a good deal on where you want to get to.
		-- Lewis Carroll "Alice's Adventures in Wonderland"

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-23  5:32       ` Matt Mackall
@ 2005-01-23 12:22         ` Andreas Gruenbacher
  2005-01-23 16:49           ` Matt Mackall
  0 siblings, 1 reply; 47+ messages in thread
From: Andreas Gruenbacher @ 2005-01-23 12:22 UTC (permalink / raw)
  To: Matt Mackall
  Cc: linux-kernel, Neil Brown, Trond Myklebust, Olaf Kirch,
	Andries E. Brouwer, Andrew Morton

On Sunday 23 January 2005 06:32, Matt Mackall wrote:
> Yes, indeed. Though I think even here, we'd prefer to use kmalloc
> because gcc generates suboptimal code for variable-sized stack vars.

That's ridiculous. kmalloc isn't even close to whatever suboptimal code gcc 
might produce here. Also I'm not convinced that gcc generates bad code in the 
first place. The code I get makes perfect sense.

-- Andreas.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-23 12:22         ` Andreas Gruenbacher
@ 2005-01-23 16:49           ` Matt Mackall
  0 siblings, 0 replies; 47+ messages in thread
From: Matt Mackall @ 2005-01-23 16:49 UTC (permalink / raw)
  To: Andreas Gruenbacher
  Cc: linux-kernel, Neil Brown, Trond Myklebust, Olaf Kirch,
	Andries E. Brouwer, Andrew Morton

On Sun, Jan 23, 2005 at 01:22:13PM +0100, Andreas Gruenbacher wrote:
> On Sunday 23 January 2005 06:32, Matt Mackall wrote:
> > Yes, indeed. Though I think even here, we'd prefer to use kmalloc
> > because gcc generates suboptimal code for variable-sized stack vars.
> 
> That's ridiculous. kmalloc isn't even close to whatever suboptimal
> code gcc might produce here. Also I'm not convinced that gcc
> generates bad code in the first place. The code I get makes perfect
> sense.

Fixed-sized slab-based kmalloc is O(1) (and pretty darn fast). If we
take a constant overhead for every local variable lookup in qsort,
that's O(n log n). Putting the stack vars last might fix that, but I
think it needs testing. I'll try it.

-- 
Mathematics is the supreme nostalgia of our time.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-22 21:00   ` vlobanov
  2005-01-23  2:03     ` Felipe Alfaro Solana
@ 2005-01-23 21:24     ` Richard Henderson
  1 sibling, 0 replies; 47+ messages in thread
From: Richard Henderson @ 2005-01-23 21:24 UTC (permalink / raw)
  To: vlobanov
  Cc: Andreas Gruenbacher, linux-kernel, Neil Brown, Trond Myklebust,
	Olaf Kirch, Andries E. Brouwer, Buck Huppmann, Andrew Morton

On Sat, Jan 22, 2005 at 01:00:24PM -0800, vlobanov wrote:
> #define SWAP(a, b, size)			\
>     do {					\
> 	register size_t __size = (size);	\
> 	register char * __a = (a), * __b = (b);	\
> 	do {					\
> 	    *__a ^= *__b;			\
> 	    *__b ^= *__a;			\
> 	    *__a ^= *__b;			\
> 	    __a++;				\
> 	    __b++;				\
> 	} while ((--__size) > 0);		\
>     } while (0)
> 
> What do you think? :)

I think you'll confuse the compiler and get worse results.


r~

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-23  4:29         ` Matt Mackall
@ 2005-01-24  0:21           ` Nathan Scott
  2005-01-24  2:57             ` Matt Mackall
  2005-01-24  4:02           ` Horst von Brand
  1 sibling, 1 reply; 47+ messages in thread
From: Nathan Scott @ 2005-01-24  0:21 UTC (permalink / raw)
  To: Matt Mackall, Andreas Gruenbacher
  Cc: Andi Kleen, Felipe Alfaro Solana, Trond Myklebust, linux-kernel,
	Buck Huppmann, Neil Brown, Andries E. Brouwer, Andrew Morton,
	Olaf Kirch

On Sat, Jan 22, 2005 at 08:29:30PM -0800, Matt Mackall wrote:
> On Sun, Jan 23, 2005 at 03:39:34AM +0100, Andi Kleen wrote:
> 
> c) the three-way median selection does help avoid worst-case O(n^2)
> behavior, which might potentially be triggerable by users in places
> like XFS where this is used

XFS's needs are simple - we're just sorting dirents within a
single directory block or smaller, and sorting EA lists/ACLs -
all of which are small arrays, so a qsort optimised for small
arrays suits XFS well.  Take care not to put any arrays on the
stack though, else the CONFIG_4KSTACKS punters won't be happy.

cheers.

-- 
Nathan

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
@ 2005-01-24  0:28 James Lamanna
  0 siblings, 0 replies; 47+ messages in thread
From: James Lamanna @ 2005-01-24  0:28 UTC (permalink / raw)
  To: linux-kernel

> On Sunday, January 23, 2005, Rafael J. Wysocki wrote:
> > On Sunday, 23 of January 2005 06:05, Jesper Juhl wrote:
> > > On Sun, 23 Jan 2005, Andi Kleen wrote:
> > Even with large data sets that are mostly unsorted shell sorts performance 
> > is close to qsort, and there's an optimization that gives it O(n^(3/2)) 
> > runtime (IIRC),
>
> Yes, there is.

After doing a small amount of research into this, according to the abstract at
http://www.cs.princeton.edu/~rs/shell/paperF.pdf you can get O(n^(4/3)) 
with different increment sequences. (1, 8, 23, 77, 281 ...)

So I guess the sort function could look something like this for XFS use
(for reference only!):

void shellsort(void *array, size_t total_elems, size_t size, 
    int (*cmp)(const void *, const void *))
{
    size_t i, j;
    int k, h;
    register char *a = array;  
    const int incs[3] = {23, 8, 1};
    
    for (k = 0; k < 3; k++) {
        for (h = incs[k], i = h; i < total_elems; i++) {
            j = i;
            while (j >= h && cmp(a + (j-h) * size, a + j * size) > 0) {
                swap(a + j * size, a + (j-h) * size);
                j -= h;
            }
        }
    }
}

-- James Lamanna

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-24  0:21           ` Nathan Scott
@ 2005-01-24  2:57             ` Matt Mackall
  0 siblings, 0 replies; 47+ messages in thread
From: Matt Mackall @ 2005-01-24  2:57 UTC (permalink / raw)
  To: Nathan Scott
  Cc: Andreas Gruenbacher, Andi Kleen, Felipe Alfaro Solana,
	Trond Myklebust, linux-kernel, Buck Huppmann, Neil Brown,
	Andries E. Brouwer, Andrew Morton, Olaf Kirch

On Mon, Jan 24, 2005 at 11:21:29AM +1100, Nathan Scott wrote:
> On Sat, Jan 22, 2005 at 08:29:30PM -0800, Matt Mackall wrote:
> > On Sun, Jan 23, 2005 at 03:39:34AM +0100, Andi Kleen wrote:
> > 
> > c) the three-way median selection does help avoid worst-case O(n^2)
> > behavior, which might potentially be triggerable by users in places
> > like XFS where this is used
> 
> XFS's needs are simple - we're just sorting dirents within a
> single directory block or smaller, and sorting EA lists/ACLs -
> all of which are small arrays, so a qsort optimised for small
> arrays suits XFS well. 

Ok, I've worked up a much smaller, cleaner version that wins on lists
of 10000 entries or less and is still within 5% at 1M entries (ie well
past what any kernel code has any business doing). More after I've
fiddled around a bit more with the benchmarks.

> Take care not to put any arrays on the
> stack though, else the CONFIG_4KSTACKS punters won't be happy.

I'm afraid I'm one of those punters - 4k stacks were getting cleaned up and
tested in my -tiny tree long before mainline.

-- 
Mathematics is the supreme nostalgia of our time.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
@ 2005-01-24  3:44 Charles R Harris
  0 siblings, 0 replies; 47+ messages in thread
From: Charles R Harris @ 2005-01-24  3:44 UTC (permalink / raw)
  To: linux-kernel

Here are semi-templated versions of quicksort and heapsort, just for
completeness. The quicksort uses the median of three.

Chuck

void  quicksort0<typename>(<typename> *pl, <typename> *pr)
{
	<typename> vp, SWAP_temp;
	<typename> *stack[100], **sptr = stack, *pm, *pi, *pj, *pt;
        
	for(;;) {
		while ((pr - pl) > 15) {
			/* quicksort partition */
			pm = pl + ((pr - pl) >> 1);
			if (<lessthan>(*pm,*pl)) SWAP(*pm,*pl);
			if (<lessthan>(*pr,*pm)) SWAP(*pr,*pm);
			if (<lessthan>(*pm,*pl)) SWAP(*pm,*pl);
			vp = *pm;
			pi = pl;
			pj = pr - 1;
			SWAP(*pm,*pj);
			for(;;) {
				do ++pi; while (<lessthan>(*pi,vp));
				do --pj; while (<lessthan>(vp,*pj));
				if (pi >= pj)  break;
				SWAP(*pi,*pj);
			}
			SWAP(*pi,*(pr-1));
			/* push largest partition on stack */
			if (pi - pl < pr - pi) {
				*sptr++ = pi + 1;
				*sptr++ = pr;
				pr = pi - 1;
			}else{
				*sptr++ = pl;
				*sptr++ = pi - 1;
				pl = pi + 1;
			}
		}
		/* insertion sort */
		for(pi = pl + 1; pi <= pr; ++pi) {
			vp = *pi;
			for(pj = pi, pt = pi - 1; pj > pl && <lessthan>(vp, *pt);) {
				*pj-- = *pt--;
			}
			*pj = vp;
		}
		if (sptr == stack) break;
		pr = *(--sptr);
		pl = *(--sptr);
	}
}

void heapsort0<typename>(<typename> *a, long n)
{
	<typename> tmp;
	long i,j,l;

	/* The array needs to be offset by one for heapsort indexing */
        a -= 1;
        
	for (l = n>>1; l > 0; --l) {
		tmp = a[l];
		for (i = l, j = l<<1; j <= n;) {
			if (j < n && <lessthan>(a[j], a[j+1])) 
				j += 1;
			if (<lessthan>(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	} 

	for (; n > 1;) {
		tmp = a[n];
		a[n] = a[1];
		n -= 1;
		for (i = 1, j = 2; j <= n;) {
			if (j < n && <lessthan>(a[j], a[j+1]))
				j++;
			if (<lessthan>(tmp, a[j])) {
				a[i] = a[j];
				i = j;
				j += j;
			}else
				break;
		}
		a[i] = tmp;
	}
}




^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-22 20:34 ` [patch 1/13] Qsort Andreas Gruenbacher
                     ` (2 preceding siblings ...)
  2005-01-22 23:28   ` Matt Mackall
@ 2005-01-24  3:48   ` Horst von Brand
  3 siblings, 0 replies; 47+ messages in thread
From: Horst von Brand @ 2005-01-24  3:48 UTC (permalink / raw)
  To: Andreas Gruenbacher
  Cc: linux-kernel, Neil Brown, Trond Myklebust, Olaf Kirch,
	Andries E. Brouwer, Buck Huppmann, Andrew Morton

Andreas Gruenbacher <agruen@suse.de> said:
> Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
> Acked-by: Olaf Kirch <okir@suse.de>

[...]

> +/* Order size using quicksort.  This implementation incorporates
> +   four optimizations discussed in Sedgewick:
> +
> +   1. Non-recursive, using an explicit stack of pointer that store the
> +      next array partition to sort.  To save time, this maximum amount
> +      of space required to store an array of SIZE_MAX is allocated on the
> +      stack.  Assuming a 32-bit (64 bit) integer for size_t, this needs
> +      only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
> +      Pretty cheap, actually.

Not really, given the strict size restrictions in-kernel.

Has there been any comparison between the original and this one? Code size,
stack use, speed, ...?
-- 
Dr. Horst H. von Brand                   User #22616 counter.li.org
Departamento de Informatica                     Fono: +56 32 654431
Universidad Tecnica Federico Santa Maria              +56 32 654239
Casilla 110-V, Valparaiso, Chile                Fax:  +56 32 797513

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-23  4:29         ` Matt Mackall
  2005-01-24  0:21           ` Nathan Scott
@ 2005-01-24  4:02           ` Horst von Brand
  2005-01-24 21:57             ` Matt Mackall
  1 sibling, 1 reply; 47+ messages in thread
From: Horst von Brand @ 2005-01-24  4:02 UTC (permalink / raw)
  To: Matt Mackall
  Cc: Andi Kleen, Felipe Alfaro Solana, Trond Myklebust, linux-kernel,
	Buck Huppmann, Neil Brown, Andreas Gruenbacher,
	Andries E. Brouwer, Andrew Morton, Olaf Kirch

Matt Mackall <mpm@selenic.com> said:
> On Sun, Jan 23, 2005 at 03:39:34AM +0100, Andi Kleen wrote:

[...]

> > -Andi (who thinks the glibc qsort is vast overkill for kernel purposes
> > where there are only small data sets and it would be better to use a 
> > simpler one optimized for code size)

> Mostly agreed. Except:
> 
> a) the glibc version is not actually all that optimized
> b) it's nice that it's not recursive
> c) the three-way median selection does help avoid worst-case O(n^2)
> behavior, which might potentially be triggerable by users in places
> like XFS where this is used

Shellsort is much simpler, and not much slower for small datasets. Plus no
extra space for stacks.

> I'll probably whip up a simpler version tomorrow or Monday and do some
> size/space benchmarking. I've been meaning to contribute a qsort for
> doubly-linked lists I've got lying around as well.

Qsort is OK as long as you have direct access to each element. In case of
lists, it is better to just use mergesort.
-- 
Dr. Horst H. von Brand                   User #22616 counter.li.org
Departamento de Informatica                     Fono: +56 32 654431
Universidad Tecnica Federico Santa Maria              +56 32 654239
Casilla 110-V, Valparaiso, Chile                Fax:  +56 32 797513

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-23 10:37               ` Rafael J. Wysocki
@ 2005-01-24  4:29                 ` Horst von Brand
  0 siblings, 0 replies; 47+ messages in thread
From: Horst von Brand @ 2005-01-24  4:29 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Jesper Juhl, Andi Kleen, Felipe Alfaro Solana, Trond Myklebust,
	linux-kernel, Buck Huppmann, Neil Brown, Andreas Gruenbacher,
	Andries E. Brouwer, Andrew Morton, Olaf Kirch

"Rafael J. Wysocki" <rjw@sisk.pl> said:

[...]

> To be precise, one needs ~(log N) of stack space for qsort, and frankly, one
> should use something like the shell (or should I say Shell?)

Shell. It is named for a person.

>                                                              sort for sorting
> small sets of elements in qsort as well.

It makes no sense for smallish sets, insertion sort is better.
-- 
Dr. Horst H. von Brand                   User #22616 counter.li.org
Departamento de Informatica                     Fono: +56 32 654431
Universidad Tecnica Federico Santa Maria              +56 32 654239
Casilla 110-V, Valparaiso, Chile                Fax:  +56 32 797513

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-23  5:05             ` Jesper Juhl
  2005-01-23 10:37               ` Rafael J. Wysocki
@ 2005-01-24 15:45               ` Alan Cox
  2005-01-24 17:10               ` H. Peter Anvin
  2 siblings, 0 replies; 47+ messages in thread
From: Alan Cox @ 2005-01-24 15:45 UTC (permalink / raw)
  To: Jesper Juhl
  Cc: Andi Kleen, Felipe Alfaro Solana, Trond Myklebust,
	Linux Kernel Mailing List, Buck Huppmann, Neil Brown,
	Andreas Gruenbacher, Andries E. Brouwer, Andrew Morton,
	Olaf Kirch

On Sul, 2005-01-23 at 05:05, Jesper Juhl wrote:
> On Sun, 23 Jan 2005, Andi Kleen wrote:
> Even with large data sets that are mostly unsorted shell sorts performance 
> is close to qsort, and there's an optimization that gives it O(n^(3/2)) 
> runtime (IIRC), and another nice property is that it's iterative so it 
> doesn't eat up stack space (as oposed to qsort which is recursive and eats 
> stack like ****)...

qsort also has bad worst case performance which matters if you are
sorting data provided by a hostile source.


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-23  5:05             ` Jesper Juhl
  2005-01-23 10:37               ` Rafael J. Wysocki
  2005-01-24 15:45               ` Alan Cox
@ 2005-01-24 17:10               ` H. Peter Anvin
  2005-01-25  0:43                 ` Horst von Brand
  2 siblings, 1 reply; 47+ messages in thread
From: H. Peter Anvin @ 2005-01-24 17:10 UTC (permalink / raw)
  To: linux-kernel

Followup to:  <Pine.LNX.4.61.0501230600070.2748@dragon.hygekrogen.localhost>
By author:    Jesper Juhl <juhl-lkml@dif.dk>
In newsgroup: linux.dev.kernel
>
> On Sun, 23 Jan 2005, Andi Kleen wrote:
> 
> > > How about a shell sort?  if the data is mostly sorted shell sort beats 
> > > qsort lots of times, and since the data sets are often small in-kernel, 
> > > shell sorts O(n^2) behaviour won't harm it too much, shell sort is also 
> > > faster if the data is already completely sorted. Shell sort is certainly 
> > > not the simplest algorithm around, but I think (without having done any 
> > > tests) that it would probably do pretty well for in-kernel use... Then 
> > > again, I've known to be wrong :)
> > 
> > I like shell sort for small data sets too. And I agree it would be 
> > appropiate for the kernel.
> > 
> Even with large data sets that are mostly unsorted shell sorts performance 
> is close to qsort, and there's an optimization that gives it O(n^(3/2)) 
> runtime (IIRC), and another nice property is that it's iterative so it 
> doesn't eat up stack space (as oposed to qsort which is recursive and eats 
> stack like ****)...
> Yeah, I think shell sort would be good for the kernel.
> 

In klibc, I use combsort:

/*
 * qsort.c
 *
 * This is actually combsort.  It's an O(n log n) algorithm with
 * simplicity/small code size being its main virtue.
 */

#include <stddef.h>
#include <string.h>

static inline size_t newgap(size_t gap)
{
  gap = (gap*10)/13;
  if ( gap == 9 || gap == 10 )
    gap = 11;

  if ( gap < 1 )
    gap = 1;
  return gap;
}

void qsort(void *base, size_t nmemb, size_t size,
           int (*compar)(const void *, const void *))
{
  size_t gap = nmemb;
  size_t i, j;
  char *p1, *p2;
  int swapped;

  do {
    gap = newgap(gap);
    swapped = 0;

    for ( i = 0, p1 = base ; i < nmemb-gap ; i++, p1 += size ) {
      j = i+gap;
      if ( compar(p1, p2 = (char *)base+j*size) > 0 ) {
        memswap(p1, p2, size);
        swapped = 1;
      }
    }
  } while ( gap > 1 || swapped );
}

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-23  4:58         ` Felipe Alfaro Solana
@ 2005-01-24 21:20           ` Matt Mackall
  2005-01-24 21:50             ` vlobanov
  0 siblings, 1 reply; 47+ messages in thread
From: Matt Mackall @ 2005-01-24 21:20 UTC (permalink / raw)
  To: Felipe Alfaro Solana
  Cc: Andi Kleen, Neil Brown, linux-kernel, Buck Huppmann,
	Trond Myklebust, Andreas Gruenbacher, Andries E. Brouwer,
	Andrew Morton, Olaf Kirch

On Sun, Jan 23, 2005 at 05:58:00AM +0100, Felipe Alfaro Solana wrote:
> On 23 Jan 2005, at 03:39, Andi Kleen wrote:
> 
> >Felipe Alfaro Solana <lkml@mac.com> writes:
> >>
> >>AFAIK, XOR is quite expensive on IA32 when compared to simple MOV
> >>operatings. Also, since the original patch uses 3 MOVs to perform the
> >>swapping, and your version uses 3 XOR operations, I don't see any
> >>gains.
> >
> >Both are one cycle latency for register<->register on all x86 cores
> >I've looked at. What makes you think differently?
> 
> I thought XOR was more expensie. Anyways, I still don't see any 
> advantage in replacing 3 MOVs with 3 XORs.

Again, no temporaries needed.

But I benched it and it was quite a bit slower.

-- 
Mathematics is the supreme nostalgia of our time.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-24 21:20           ` Matt Mackall
@ 2005-01-24 21:50             ` vlobanov
  0 siblings, 0 replies; 47+ messages in thread
From: vlobanov @ 2005-01-24 21:50 UTC (permalink / raw)
  To: Matt Mackall
  Cc: Felipe Alfaro Solana, Andi Kleen, Neil Brown, linux-kernel,
	Buck Huppmann, Trond Myklebust, Andreas Gruenbacher,
	Andries E. Brouwer, Andrew Morton, Olaf Kirch

On Mon, 24 Jan 2005, Matt Mackall wrote:

> On Sun, Jan 23, 2005 at 05:58:00AM +0100, Felipe Alfaro Solana wrote:
> > On 23 Jan 2005, at 03:39, Andi Kleen wrote:
> >
> > >Felipe Alfaro Solana <lkml@mac.com> writes:
> > >>
> > >>AFAIK, XOR is quite expensive on IA32 when compared to simple MOV
> > >>operatings. Also, since the original patch uses 3 MOVs to perform the
> > >>swapping, and your version uses 3 XOR operations, I don't see any
> > >>gains.
> > >
> > >Both are one cycle latency for register<->register on all x86 cores
> > >I've looked at. What makes you think differently?
> >
> > I thought XOR was more expensie. Anyways, I still don't see any
> > advantage in replacing 3 MOVs with 3 XORs.
>
> Again, no temporaries needed.
>
> But I benched it and it was quite a bit slower.
>
> --
> Mathematics is the supreme nostalgia of our time.

Yep, it's a difference of four instructions (when using one or two
temporary variables and swapping using assignments) versus six
instructions (when using xors, since IA32 can't do an xor with both
arguments in memory).

I originally pitched this idea out to the list just for discussion
purposes. Most considered it, and said that the advantages don't
outweigh the disadvantages. And that's fine -- it means that the chosen
way is that much better considered. Always a good thing. :)

-Vadim Lobanov

> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-24  4:02           ` Horst von Brand
@ 2005-01-24 21:57             ` Matt Mackall
  0 siblings, 0 replies; 47+ messages in thread
From: Matt Mackall @ 2005-01-24 21:57 UTC (permalink / raw)
  To: Horst von Brand
  Cc: Andi Kleen, Felipe Alfaro Solana, Trond Myklebust, linux-kernel,
	Buck Huppmann, Neil Brown, Andreas Gruenbacher,
	Andries E. Brouwer, Andrew Morton, Olaf Kirch

On Mon, Jan 24, 2005 at 01:02:44AM -0300, Horst von Brand wrote:
> Matt Mackall <mpm@selenic.com> said:
> > On Sun, Jan 23, 2005 at 03:39:34AM +0100, Andi Kleen wrote:
> 
> [...]
> 
> > > -Andi (who thinks the glibc qsort is vast overkill for kernel purposes
> > > where there are only small data sets and it would be better to use a 
> > > simpler one optimized for code size)
> 
> > Mostly agreed. Except:
> > 
> > a) the glibc version is not actually all that optimized
> > b) it's nice that it's not recursive
> > c) the three-way median selection does help avoid worst-case O(n^2)
> > behavior, which might potentially be triggerable by users in places
> > like XFS where this is used
> 
> Shellsort is much simpler, and not much slower for small datasets. Plus no
> extra space for stacks.
> 
> > I'll probably whip up a simpler version tomorrow or Monday and do some
> > size/space benchmarking. I've been meaning to contribute a qsort for
> > doubly-linked lists I've got lying around as well.
> 
> Qsort is OK as long as you have direct access to each element. In case of
> lists, it is better to just use mergesort.

Qsort does not need to do random access. I posted an efficient
doubly-linked list version here four years ago:

template<class T>
void list<T>::qsort(iter l, iter r, cmpfunc *cmp, void *data)
{
        if(l==r) return;

        iter i(l), p(l);

        for(i++; i!=r; i++)
                if(cmp(*i, *l, data)<0)
                        i.swap(++p);

        l.swap(p);
        qsort(l, p, cmp, data);
        qsort(++p, r, cmp, data);
}

-- 
Mathematics is the supreme nostalgia of our time.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-23  4:46           ` Andi Kleen
  2005-01-23  5:05             ` Jesper Juhl
@ 2005-01-24 22:04             ` Mike Waychison
  2005-01-25  6:51               ` Andi Kleen
  1 sibling, 1 reply; 47+ messages in thread
From: Mike Waychison @ 2005-01-24 22:04 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Jesper Juhl, Felipe Alfaro Solana, Trond Myklebust, linux-kernel,
	Buck Huppmann, Neil Brown, Andreas Gruenbacher,
	Andries E. Brouwer, Andrew Morton, Olaf Kirch, Tim Hockin

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Andi Kleen wrote:
>>How about a shell sort?  if the data is mostly sorted shell sort beats 
>>qsort lots of times, and since the data sets are often small in-kernel, 
>>shell sorts O(n^2) behaviour won't harm it too much, shell sort is also 
>>faster if the data is already completely sorted. Shell sort is certainly 
>>not the simplest algorithm around, but I think (without having done any 
>>tests) that it would probably do pretty well for in-kernel use... Then 
>>again, I've known to be wrong :)
> 
> 
> I like shell sort for small data sets too. And I agree it would be 
> appropiate for the kernel.
> 

FWIW, we already have a Shell sort for the ngroups stuff in
kernel/sys.c:groups_sort() that could be made generic.

- --
Mike Waychison
Sun Microsystems, Inc.
1 (650) 352-5299 voice
1 (416) 202-8336 voice

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
NOTICE:  The opinions expressed in this email are held by me,
and may not represent the views of Sun Microsystems, Inc.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.5 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://enigmail.mozdev.org

iD8DBQFB9XDzdQs4kOxk3/MRAs2ZAJ4if1XRFAiWsgb1wvTInFLUVGHesgCfWxCJ
Efyrr4PkG/KrqefAVAQjt+c=
=/OPh
-----END PGP SIGNATURE-----

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-24 17:10               ` H. Peter Anvin
@ 2005-01-25  0:43                 ` Horst von Brand
  2005-01-25  4:06                   ` Eric St-Laurent
  0 siblings, 1 reply; 47+ messages in thread
From: Horst von Brand @ 2005-01-25  0:43 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1331 bytes --]

hpa@zytor.com (H. Peter Anvin) said:

[...]

> In klibc, I use combsort:
> 
> /*
>  * qsort.c
>  *
>  * This is actually combsort.  It's an O(n log n) algorithm with
>  * simplicity/small code size being its main virtue.
>  */
> 
> #include <stddef.h>
> #include <string.h>
> 
> static inline size_t newgap(size_t gap)
> {
>   gap = (gap*10)/13;
>   if ( gap == 9 || gap == 10 )
>     gap = 11;
> 
>   if ( gap < 1 )
>     gap = 1;
>   return gap;
> }
> 
> void qsort(void *base, size_t nmemb, size_t size,
>            int (*compar)(const void *, const void *))
> {
>   size_t gap = nmemb;
>   size_t i, j;
>   char *p1, *p2;
>   int swapped;
> 
>   do {
>     gap = newgap(gap);
>     swapped = 0;
> 
>     for ( i = 0, p1 = base ; i < nmemb-gap ; i++, p1 += size ) {
>       j = i+gap;
>       if ( compar(p1, p2 = (char *)base+j*size) > 0 ) {
>         memswap(p1, p2, size);
>         swapped = 1;
>       }
>     }
>   } while ( gap > 1 || swapped );
> }

AFAICS, this is just a badly implemented Shellsort (the 10/13 increment
sequence starting with the number of elements is probably not very good,
besides swapping stuff is inefficient (just juggling like Shellsort does
gives you almost a third less copies)).

Have you found a proof for the O(n log n) claim?

I'd write as attached (careful, a local element on stack!)

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: Shellsort --]
[-- Type: text/x-c, Size: 2186 bytes --]

/*
 * shellsort.c: Shell sort
 *
 * Copyright (c) 2005, Horst H. von Brand <vonbrand@inf.utfsm.cl>
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above
 *       copyright notice, this list of conditions and the following
 *       disclaimer in the documentation and/or other materials provided
 *       with the distribution.
 *     * Neither the name of Horst H. von Brand nor the names of its
 *       contributors may be used to endorse or promote products derived
 *       from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <string.h>

void qsort(void *base, size_t nmemb, size_t size, 
           int (*compar)(const void *, const void *))

{
  int i, j, h;
  char tmp[size];

  for(h = 1; h < nmemb; h = 3 * h + 1)
    ;

  do {
    h /= 3;
    for(i = h; i < nmemb; i++) {
      memcpy(tmp, base + i * size, size);
      for(j = i - h; j >= 0 && compar(tmp, base + j * size); j -= h)
	memcpy(base + (j + h) * size, base + j * size, size);
      memcpy(base + (j + h) * size, tmp, size);
    }
  } while(h > 1);
}

[-- Attachment #3: Type: text/plain, Size: 276 bytes --]

-- 
Dr. Horst H. von Brand                   User #22616 counter.li.org
Departamento de Informatica                     Fono: +56 32 654431
Universidad Tecnica Federico Santa Maria              +56 32 654239
Casilla 110-V, Valparaiso, Chile                Fax:  +56 32 797513

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-25  0:43                 ` Horst von Brand
@ 2005-01-25  4:06                   ` Eric St-Laurent
  0 siblings, 0 replies; 47+ messages in thread
From: Eric St-Laurent @ 2005-01-25  4:06 UTC (permalink / raw)
  To: Horst von Brand; +Cc: H. Peter Anvin, linux-kernel

On Mon, 2005-01-24 at 21:43 -0300, Horst von Brand wrote:
> AFAICS, this is just a badly implemented Shellsort (the 10/13 increment
> sequence starting with the number of elements is probably not very good,
> besides swapping stuff is inefficient (just juggling like Shellsort does
> gives you almost a third less copies)).
> 
> Have you found a proof for the O(n log n) claim?

"Why a Comb Sort is NOT a Shell Sort

A shell sort completely sorts the data for each gap size. A comb sort
takes a more optimistic approach and doesn't require data be completely
sorted at a gap size. The comb sort assumes that out-of-order data will
be cleaned-up by smaller gap sizes as the sort proceeds. "

Reference:

http://world.std.com/~jdveale/combsort.htm

Another good reference:

http://yagni.com/combsort/index.php

Personally, i've used it in the past because of it's small size.  With
C++ templates you can have a copy of the routine generated for a
specific datatype, thus skipping the costly function call used for each
compare.  With some C macro magic, i presume something similar can be
done, for time-critical applications.

Best regards,

Eric St-Laurent

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-24 22:04             ` Mike Waychison
@ 2005-01-25  6:51               ` Andi Kleen
  2005-01-25 10:12                 ` Andreas Gruenbacher
  0 siblings, 1 reply; 47+ messages in thread
From: Andi Kleen @ 2005-01-25  6:51 UTC (permalink / raw)
  To: Mike Waychison
  Cc: Jesper Juhl, Felipe Alfaro Solana, Trond Myklebust, linux-kernel,
	Buck Huppmann, Neil Brown, Andreas Gruenbacher,
	Andries E. Brouwer, Andrew Morton, Olaf Kirch, Tim Hockin

> FWIW, we already have a Shell sort for the ngroups stuff in
> kernel/sys.c:groups_sort() that could be made generic.

Sounds like a good plan. Any takers?

-Andi

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-25  6:51               ` Andi Kleen
@ 2005-01-25 10:12                 ` Andreas Gruenbacher
  2005-01-25 12:00                   ` Andi Kleen
  0 siblings, 1 reply; 47+ messages in thread
From: Andreas Gruenbacher @ 2005-01-25 10:12 UTC (permalink / raw)
  To: Andi Kleen, Nathan Scott
  Cc: Mike Waychison, Jesper Juhl, Felipe Alfaro Solana,
	Trond Myklebust, linux-kernel, Buck Huppmann, Neil Brown,
	Andries E. Brouwer, Andrew Morton, Olaf Kirch, Tim Hockin

On Tuesday 25 January 2005 07:51, Andi Kleen wrote:
> > FWIW, we already have a Shell sort for the ngroups stuff in
> > kernel/sys.c:groups_sort() that could be made generic.
>
> Sounds like a good plan. Any takers?

It would slow down the groups case (unless we leave the specialized version 
in). Gcc doesn't inline a cmp function pointer, and a C preprocessor 
templatized version would be really ugly. A variant with of this routine with 
qsort like interface should be good enough for nfsacl and xfs though.

Nevertheless, xfs and nfsacl have very similar requirements:

nfsacl: at most 1024 elements; 8-byte elements (16 on 64-bit archs)

xfs (from Nathan): at most 1024 elements (with 64K blocksize); 8-byte or 
larger elements

Cheers.
-- 
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX PRODUCTS GMBH

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-25 10:12                 ` Andreas Gruenbacher
@ 2005-01-25 12:00                   ` Andi Kleen
  2005-01-25 12:05                     ` Olaf Kirch
  0 siblings, 1 reply; 47+ messages in thread
From: Andi Kleen @ 2005-01-25 12:00 UTC (permalink / raw)
  To: Andreas Gruenbacher
  Cc: Nathan Scott, Mike Waychison, Jesper Juhl, Felipe Alfaro Solana,
	Trond Myklebust, linux-kernel, Buck Huppmann, Neil Brown,
	Andries E. Brouwer, Andrew Morton, Olaf Kirch, Tim Hockin

> It would slow down the groups case (unless we leave the specialized version 
> in). Gcc doesn't inline a cmp function pointer, and a C preprocessor 
> templatized version would be really ugly. A variant with of this routine with 
> qsort like interface should be good enough for nfsacl and xfs though.

group initialization is not time critical, it typically only happens
at login.  Also it's doubleful you'll even be able to measure the difference.

-Andi

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-25 12:00                   ` Andi Kleen
@ 2005-01-25 12:05                     ` Olaf Kirch
  2005-01-25 16:52                       ` Trond Myklebust
  0 siblings, 1 reply; 47+ messages in thread
From: Olaf Kirch @ 2005-01-25 12:05 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Andreas Gruenbacher, Nathan Scott, Mike Waychison, Jesper Juhl,
	Felipe Alfaro Solana, Trond Myklebust, linux-kernel,
	Buck Huppmann, Neil Brown, Andries E. Brouwer, Andrew Morton,
	Tim Hockin

On Tue, Jan 25, 2005 at 01:00:23PM +0100, Andi Kleen wrote:
> group initialization is not time critical, it typically only happens
> at login.  Also it's doubleful you'll even be able to measure the difference.

nfsd updates its group list for every request it processes, so you don't want
to make that too slow.

Olaf
-- 
Olaf Kirch     | Things that make Monday morning interesting, #2:
okir@suse.de   |        "We have 8,000 NFS mount points, why do we keep
---------------+ 	 running out of privileged ports?"

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-25 12:05                     ` Olaf Kirch
@ 2005-01-25 16:52                       ` Trond Myklebust
  2005-01-25 16:53                         ` Andreas Gruenbacher
  0 siblings, 1 reply; 47+ messages in thread
From: Trond Myklebust @ 2005-01-25 16:52 UTC (permalink / raw)
  To: Olaf Kirch
  Cc: Andi Kleen, Andreas Gruenbacher, Nathan Scott, Mike Waychison,
	Jesper Juhl, Felipe Alfaro Solana, linux-kernel, Buck Huppmann,
	Neil Brown, Andries E. Brouwer, Andrew Morton, Tim Hockin

ty den 25.01.2005 Klokka 13:05 (+0100) skreiv Olaf Kirch:
> On Tue, Jan 25, 2005 at 01:00:23PM +0100, Andi Kleen wrote:
> > group initialization is not time critical, it typically only happens
> > at login.  Also it's doubleful you'll even be able to measure the difference.
> 
> nfsd updates its group list for every request it processes, so you don't want
> to make that too slow.

So here's an iconoclastic question or two:

  Why can't clients sort the list in userland, before they call down to
the kernel?

  If clients are sorting their lists, why would we need to sort the same
list on the server side. Detecting out-of-order list entries is much
less of a hassle than actually sorting, so if the protocol calls for
sorted elements, you can return an EINVAL or something in the case where
some client sends an unsorted list.

Cheers,
  Trond

-- 
Trond Myklebust <trond.myklebust@fys.uio.no>


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-25 16:52                       ` Trond Myklebust
@ 2005-01-25 16:53                         ` Andreas Gruenbacher
  2005-01-25 17:03                           ` Trond Myklebust
  0 siblings, 1 reply; 47+ messages in thread
From: Andreas Gruenbacher @ 2005-01-25 16:53 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: Olaf Kirch, Andi Kleen, Nathan Scott, Mike Waychison, Jesper Juhl,
	Felipe Alfaro Solana, linux-kernel@vger.kernel.org, Buck Huppmann,
	Neil Brown, Andries E. Brouwer, Andrew Morton, Tim Hockin

On Tue, 2005-01-25 at 17:52, Trond Myklebust wrote:
> So here's an iconoclastic question or two:
> 
>   Why can't clients sort the list in userland, before they call down to
> the kernel?

Tell that to Sun Microsystems.

Regards,
-- 
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX GMBH


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-25 16:53                         ` Andreas Gruenbacher
@ 2005-01-25 17:03                           ` Trond Myklebust
  2005-01-25 17:16                             ` Andreas Gruenbacher
  0 siblings, 1 reply; 47+ messages in thread
From: Trond Myklebust @ 2005-01-25 17:03 UTC (permalink / raw)
  To: Andreas Gruenbacher
  Cc: Olaf Kirch, Andi Kleen, Nathan Scott, Mike Waychison, Jesper Juhl,
	Felipe Alfaro Solana, linux-kernel@vger.kernel.org, Buck Huppmann,
	Neil Brown, Andries E. Brouwer, Andrew Morton, Tim Hockin

ty den 25.01.2005 Klokka 17:53 (+0100) skreiv Andreas Gruenbacher:
> On Tue, 2005-01-25 at 17:52, Trond Myklebust wrote:
> > So here's an iconoclastic question or two:
> > 
> >   Why can't clients sort the list in userland, before they call down to
> > the kernel?
> 
> Tell that to Sun Microsystems.

Whatever Sun chooses to do or not do changes nothing to the question of
why our client would want to do a quicksort in the kernel.

Cheers,
  Trond
-- 
Trond Myklebust <trond.myklebust@fys.uio.no>


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-25 17:03                           ` Trond Myklebust
@ 2005-01-25 17:16                             ` Andreas Gruenbacher
  2005-01-25 17:37                               ` Trond Myklebust
  0 siblings, 1 reply; 47+ messages in thread
From: Andreas Gruenbacher @ 2005-01-25 17:16 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: Olaf Kirch, Andi Kleen, Nathan Scott, Mike Waychison, Jesper Juhl,
	Felipe Alfaro Solana, linux-kernel@vger.kernel.org, Buck Huppmann,
	Neil Brown, Andries E. Brouwer, Andrew Morton, Tim Hockin

On Tue, 2005-01-25 at 18:03, Trond Myklebust wrote:
> ty den 25.01.2005 Klokka 17:53 (+0100) skreiv Andreas Gruenbacher:
> > On Tue, 2005-01-25 at 17:52, Trond Myklebust wrote:
> > > So here's an iconoclastic question or two:
> > > 
> > >   Why can't clients sort the list in userland, before they call down to
> > > the kernel?
> > 
> > Tell that to Sun Microsystems.
> 
> Whatever Sun chooses to do or not do changes nothing to the question of
> why our client would want to do a quicksort in the kernel.

Well, it determines what we must accept, both on the server side and the
client side.

Cheers,
-- 
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX GMBH


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-25 17:16                             ` Andreas Gruenbacher
@ 2005-01-25 17:37                               ` Trond Myklebust
  2005-01-25 18:12                                 ` Andreas Gruenbacher
  0 siblings, 1 reply; 47+ messages in thread
From: Trond Myklebust @ 2005-01-25 17:37 UTC (permalink / raw)
  To: Andreas Gruenbacher
  Cc: Olaf Kirch, Andi Kleen, Nathan Scott, Mike Waychison, Jesper Juhl,
	Felipe Alfaro Solana, linux-kernel@vger.kernel.org, Buck Huppmann,
	Neil Brown, Andries E. Brouwer, Andrew Morton, Tim Hockin

ty den 25.01.2005 Klokka 18:16 (+0100) skreiv Andreas Gruenbacher:

> > Whatever Sun chooses to do or not do changes nothing to the question of
> > why our client would want to do a quicksort in the kernel.
> 
> Well, it determines what we must accept, both on the server side and the
> client side.

I can see why you might want it on the server side, but I repeat: why
does the client need to do this in the kernel? The client code should
not be overriding the server when it comes to what is acceptable or not
acceptable. That's just wrong...

I can also see that if the server _must_ have a sorted list, then doing
a sort on the client is a good thing since it will cut down on the work
that said server will need to do, and so it will scale better with the
number of clients (though note that, conversely, this server will scale
poorly with the Sun clients or others if they do not sort the lists).

I'm asking 'cos if the client doesn't need this code, then it seems to
me you can move helper routines like the quicksort and posix checking
routines into the nfsd module rather than having to keeping it in the
VFS (unless you foresee that other modules will want to use the same
routines???).

Cheers,
 Trond
-- 
Trond Myklebust <trond.myklebust@fys.uio.no>

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-25 17:37                               ` Trond Myklebust
@ 2005-01-25 18:12                                 ` Andreas Gruenbacher
  2005-01-25 19:33                                   ` Trond Myklebust
  0 siblings, 1 reply; 47+ messages in thread
From: Andreas Gruenbacher @ 2005-01-25 18:12 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: Olaf Kirch, Andi Kleen, Nathan Scott, Mike Waychison, Jesper Juhl,
	Felipe Alfaro Solana, linux-kernel@vger.kernel.org, Buck Huppmann,
	Neil Brown, Andries E. Brouwer, Andrew Morton, Tim Hockin

On Tue, 2005-01-25 at 18:37, Trond Myklebust wrote:
> ty den 25.01.2005 Klokka 18:16 (+0100) skreiv Andreas Gruenbacher:
> 
> > > Whatever Sun chooses to do or not do changes nothing to the question of
> > > why our client would want to do a quicksort in the kernel.
> > 
> > Well, it determines what we must accept, both on the server side and the
> > client side.
> 
> I can see why you might want it on the server side, but I repeat: why
> does the client need to do this in the kernel? The client code should
> not be overriding the server when it comes to what is acceptable or not
> acceptable. That's just wrong...

Ah, I see now what you mean. The setxattr syscall only accepts
well-formed acls (that is, sorted plus a few other restrictions), and
user-space is expected to take care of that. In turn, getxattr returns
only well-formed acls. We could lift that guarantee specifically for
nfs, but I don't think it would be a good idea. Entry order in POSIX
acls doesn't convey a meaning by the way, and the nfs client never
rejects what the server sends.

> I can also see that if the server _must_ have a sorted list, then doing
> a sort on the client is a good thing since it will cut down on the work
> that said server will need to do, and so it will scale better with the
> number of clients (though note that, conversely, this server will scale
> poorly with the Sun clients or others if they do not sort the lists).

The server must have sorted lists. Linux clients send well-formed acls
except when they fake up a mask entry; they insert the mask entry at the
end instead of in the right position (this is the three-entry acl
problem I described in [patch 0/13]). We could insert the mask in the
right position, but the protocol doesn't require it. We must sort on the
server anyway, and the server can as easily swap the two entries.

> I'm asking 'cos if the client doesn't need this code, then it seems to
> me you can move helper routines like the quicksort and posix checking
> routines into the nfsd module rather than having to keeping it in the
> VFS (unless you foresee that other modules will want to use the same
> routines???).

That would cause getxattr to return an "invalid" result. libacl doesn't
care, but other users might exist that rely on the current format. In
addition, comparing acls becomes non-trivial: currently xattr values are
equal iff acls are equal.

Cheers,
-- 
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX GMBH

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-25 18:12                                 ` Andreas Gruenbacher
@ 2005-01-25 19:33                                   ` Trond Myklebust
  2005-01-25 19:49                                     ` Andreas Gruenbacher
  0 siblings, 1 reply; 47+ messages in thread
From: Trond Myklebust @ 2005-01-25 19:33 UTC (permalink / raw)
  To: Andreas Gruenbacher
  Cc: Olaf Kirch, Andi Kleen, Nathan Scott, Mike Waychison, Jesper Juhl,
	Felipe Alfaro Solana, linux-kernel@vger.kernel.org, Buck Huppmann,
	Neil Brown, Andries E. Brouwer, Andrew Morton, Tim Hockin

ty den 25.01.2005 Klokka 19:12 (+0100) skreiv Andreas Gruenbacher:

> Ah, I see now what you mean. The setxattr syscall only accepts
> well-formed acls (that is, sorted plus a few other restrictions), and
> user-space is expected to take care of that. In turn, getxattr returns
> only well-formed acls. We could lift that guarantee specifically for
> nfs, but I don't think it would be a good idea.

Note that if you really want to add a qsort to the kernel you might as
well drop the setxattr sorting requirement too. If the kernel can qsort
for getxattr, then might as well do it for the case of setxattr too.

> Entry order in POSIX acls doesn't convey a meaning by the way.

Precisely. Are there really any existing programs out there that are
using the raw xattr output and making assumptions about entry order?

> The server must have sorted lists.

So, I realize that the on-disk format is already defined, but looking at
routines like posix_acl_permission(), it looks like the only order the
kernel (at least) actually cares about is that of the "e_tag" field.
Unless I missed something, nothing there cares about the order of the
"e_id" fields.
Given that you only have 6 possible values there, it seems a shame in
hindsight that we didn't choose to just use a 6 bucket hashtable (the
value of e_id being the hash value), and leave the order of the e_id
fields undefined. 8-(

Cheers,
  Trond

-- 
Trond Myklebust <trond.myklebust@fys.uio.no>

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [patch 1/13] Qsort
  2005-01-25 19:33                                   ` Trond Myklebust
@ 2005-01-25 19:49                                     ` Andreas Gruenbacher
  0 siblings, 0 replies; 47+ messages in thread
From: Andreas Gruenbacher @ 2005-01-25 19:49 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: Olaf Kirch, Andi Kleen, Nathan Scott, Mike Waychison, Jesper Juhl,
	Felipe Alfaro Solana, linux-kernel@vger.kernel.org, Buck Huppmann,
	Neil Brown, Andries E. Brouwer, Andrew Morton, Tim Hockin

On Tue, 2005-01-25 at 20:33, Trond Myklebust wrote:
> ty den 25.01.2005 Klokka 19:12 (+0100) skreiv Andreas Gruenbacher:
> 
> > Ah, I see now what you mean. The setxattr syscall only accepts
> > well-formed acls (that is, sorted plus a few other restrictions), and
> > user-space is expected to take care of that. In turn, getxattr returns
> > only well-formed acls. We could lift that guarantee specifically for
> > nfs, but I don't think it would be a good idea.
> 
> Note that if you really want to add a qsort to the kernel you might as
> well drop the setxattr sorting requirement too. If the kernel can qsort
> for getxattr, then might as well do it for the case of setxattr too.

There is no need to sort anything in the kernel for acls except for the
NFSACL case, so that's where we need it, and nowhere else. What would be
the point in making setxattr accept unsorted acls? It's just not
necessary; userspace can do it just as well.

> > Entry order in POSIX acls doesn't convey a meaning by the way.
> 
> Precisely. Are there really any existing programs out there that are
> using the raw xattr output and making assumptions about entry order?

I don't know. Anyway, it's a nice feature to have a unique canonical
form.

> > The server must have sorted lists.
> 
> So, I realize that the on-disk format is already defined, but looking at
> routines like posix_acl_permission(), it looks like the only order the
> kernel (at least) actually cares about is that of the "e_tag" field.
> Unless I missed something, nothing there cares about the order of the
> "e_id" fields.

Correct. But posix_acl_valid() does care about the i_id order as well.

> Given that you only have 6 possible values there, it seems a shame in
> hindsight that we didn't choose to just use a 6 bucket hashtable (the
> value of e_id being the hash value), and leave the order of the e_id
> fields undefined. 8-(

Checking for duplicate e_id fields would become expensive. I really
don't see any benefit.

Cheers,
-- 
Andreas Gruenbacher <agruen@suse.de>
SUSE Labs, SUSE LINUX GMBH


^ permalink raw reply	[flat|nested] 47+ messages in thread

end of thread, other threads:[~2005-01-25 19:57 UTC | newest]

Thread overview: 47+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-01-24  0:28 [patch 1/13] Qsort James Lamanna
  -- strict thread matches above, loose matches on Subject: below --
2005-01-24  3:44 Charles R Harris
2005-01-22 20:34 [patch 0/13] NFSACL protocol extension for NFSv3 Andreas Gruenbacher
2005-01-22 20:34 ` [patch 1/13] Qsort Andreas Gruenbacher
2005-01-22 21:00   ` vlobanov
2005-01-23  2:03     ` Felipe Alfaro Solana
2005-01-23  2:39       ` Andi Kleen
2005-01-23  3:02         ` Jesper Juhl
2005-01-23  4:46           ` Andi Kleen
2005-01-23  5:05             ` Jesper Juhl
2005-01-23 10:37               ` Rafael J. Wysocki
2005-01-24  4:29                 ` Horst von Brand
2005-01-24 15:45               ` Alan Cox
2005-01-24 17:10               ` H. Peter Anvin
2005-01-25  0:43                 ` Horst von Brand
2005-01-25  4:06                   ` Eric St-Laurent
2005-01-24 22:04             ` Mike Waychison
2005-01-25  6:51               ` Andi Kleen
2005-01-25 10:12                 ` Andreas Gruenbacher
2005-01-25 12:00                   ` Andi Kleen
2005-01-25 12:05                     ` Olaf Kirch
2005-01-25 16:52                       ` Trond Myklebust
2005-01-25 16:53                         ` Andreas Gruenbacher
2005-01-25 17:03                           ` Trond Myklebust
2005-01-25 17:16                             ` Andreas Gruenbacher
2005-01-25 17:37                               ` Trond Myklebust
2005-01-25 18:12                                 ` Andreas Gruenbacher
2005-01-25 19:33                                   ` Trond Myklebust
2005-01-25 19:49                                     ` Andreas Gruenbacher
2005-01-23  4:29         ` Matt Mackall
2005-01-24  0:21           ` Nathan Scott
2005-01-24  2:57             ` Matt Mackall
2005-01-24  4:02           ` Horst von Brand
2005-01-24 21:57             ` Matt Mackall
2005-01-23  4:58         ` Felipe Alfaro Solana
2005-01-24 21:20           ` Matt Mackall
2005-01-24 21:50             ` vlobanov
2005-01-23  4:22       ` Matt Mackall
2005-01-23  5:44       ` Willy Tarreau
2005-01-23 21:24     ` Richard Henderson
     [not found]   ` <1106431568.4153.154.camel@laptopd505.fenrus.org>
2005-01-22 22:10     ` Andreas Gruenbacher
2005-01-22 23:28   ` Matt Mackall
2005-01-23  0:21     ` Matt Mackall
2005-01-23  5:08     ` Andreas Gruenbacher
2005-01-23  5:32       ` Matt Mackall
2005-01-23 12:22         ` Andreas Gruenbacher
2005-01-23 16:49           ` Matt Mackall
2005-01-24  3:48   ` Horst von Brand

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox