[PATCH 1/5] Add obstack.[ch] from EGLIBC 2.10

git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 1/5] Add obstack.[ch] from EGLIBC 2.10
       [not found] <20100213141558.22851.13660.stgit@fredrik-laptop>
@ 2010-02-13 14:20 ` Fredrik Kuivinen
  2010-02-13 14:20 ` [PATCH 2/5] Add string search routines from GNU grep Fredrik Kuivinen
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 11+ messages in thread
From: Fredrik Kuivinen @ 2010-02-13 14:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

Signed-off-by: Fredrik Kuivinen <frekui@gmail.com>
---

 obstack.c |  441 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 obstack.h |  509 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 950 insertions(+), 0 deletions(-)
 create mode 100644 obstack.c
 create mode 100644 obstack.h

diff --git a/obstack.c b/obstack.c
new file mode 100644
index 0000000..75440d9
--- /dev/null
+++ b/obstack.c
@@ -0,0 +1,441 @@
+/* obstack.c - subroutines used implicitly by object stack macros
+   Copyright (C) 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1996, 1997, 1998,
+   1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+   Boston, MA 02110-1301, USA.  */
+
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#ifdef _LIBC
+# include <obstack.h>
+# include <shlib-compat.h>
+#else
+# include "obstack.h"
+#endif
+
+/* NOTE BEFORE MODIFYING THIS FILE: This version number must be
+   incremented whenever callers compiled using an old obstack.h can no
+   longer properly call the functions in this obstack.c.  */
+#define OBSTACK_INTERFACE_VERSION 1
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+   actually compiling the library itself, and the installed library
+   supports the same library interface we do.  This code is part of the GNU
+   C Library, but also included in many other GNU distributions.  Compiling
+   and linking in this code is a waste when using the GNU C library
+   (especially if it is a shared library).  Rather than having every GNU
+   program understand `configure --with-gnu-libc' and omit the object
+   files, it is simpler to just do this in the source for each such file.  */
+
+#include <stdio.h>		/* Random thing to get __GNU_LIBRARY__.  */
+#if !defined _LIBC && defined __GNU_LIBRARY__ && __GNU_LIBRARY__ > 1
+# include <gnu-versions.h>
+# if _GNU_OBSTACK_INTERFACE_VERSION == OBSTACK_INTERFACE_VERSION
+#  define ELIDE_CODE
+# endif
+#endif
+
+#include <stddef.h>
+
+#ifndef ELIDE_CODE
+
+
+# if HAVE_INTTYPES_H
+#  include <inttypes.h>
+# endif
+# if HAVE_STDINT_H || defined _LIBC
+#  include <stdint.h>
+# endif
+
+/* Determine default alignment.  */
+union fooround
+{
+  uintmax_t i;
+  long double d;
+  void *p;
+};
+struct fooalign
+{
+  char c;
+  union fooround u;
+};
+/* If malloc were really smart, it would round addresses to DEFAULT_ALIGNMENT.
+   But in fact it might be less smart and round addresses to as much as
+   DEFAULT_ROUNDING.  So we prepare for it to do that.  */
+enum
+  {
+    DEFAULT_ALIGNMENT = offsetof (struct fooalign, u),
+    DEFAULT_ROUNDING = sizeof (union fooround)
+  };
+
+/* When we copy a long block of data, this is the unit to do it with.
+   On some machines, copying successive ints does not work;
+   in such a case, redefine COPYING_UNIT to `long' (if that works)
+   or `char' as a last resort.  */
+# ifndef COPYING_UNIT
+#  define COPYING_UNIT int
+# endif
+
+
+/* The functions allocating more room by calling `obstack_chunk_alloc'
+   jump to the handler pointed to by `obstack_alloc_failed_handler'.
+   This can be set to a user defined function which should either
+   abort gracefully or use longjump - but shouldn't return.  This
+   variable by default points to the internal function
+   `print_and_abort'.  */
+static void print_and_abort (void);
+void (*obstack_alloc_failed_handler) (void) = print_and_abort;
+
+/* Exit value used when `print_and_abort' is used.  */
+# include <stdlib.h>
+# ifdef _LIBC
+int obstack_exit_failure = EXIT_FAILURE;
+# else
+#  include "exitfail.h"
+#  define obstack_exit_failure exit_failure
+# endif
+
+# ifdef _LIBC
+#  if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
+/* A looong time ago (before 1994, anyway; we're not sure) this global variable
+   was used by non-GNU-C macros to avoid multiple evaluation.  The GNU C
+   library still exports it because somebody might use it.  */
+struct obstack *_obstack_compat;
+compat_symbol (libc, _obstack_compat, _obstack, GLIBC_2_0);
+#  endif
+# endif
+
+/* Define a macro that either calls functions with the traditional malloc/free
+   calling interface, or calls functions with the mmalloc/mfree interface
+   (that adds an extra first argument), based on the state of use_extra_arg.
+   For free, do not use ?:, since some compilers, like the MIPS compilers,
+   do not allow (expr) ? void : void.  */
+
+# define CALL_CHUNKFUN(h, size) \
+  (((h) -> use_extra_arg) \
+   ? (*(h)->chunkfun) ((h)->extra_arg, (size)) \
+   : (*(struct _obstack_chunk *(*) (long)) (h)->chunkfun) ((size)))
+
+# define CALL_FREEFUN(h, old_chunk) \
+  do { \
+    if ((h) -> use_extra_arg) \
+      (*(h)->freefun) ((h)->extra_arg, (old_chunk)); \
+    else \
+      (*(void (*) (void *)) (h)->freefun) ((old_chunk)); \
+  } while (0)
+
+\f
+/* Initialize an obstack H for use.  Specify chunk size SIZE (0 means default).
+   Objects start on multiples of ALIGNMENT (0 means use default).
+   CHUNKFUN is the function to use to allocate chunks,
+   and FREEFUN the function to free them.
+
+   Return nonzero if successful, calls obstack_alloc_failed_handler if
+   allocation fails.  */
+
+int
+_obstack_begin (struct obstack *h,
+		int size, int alignment,
+		void *(*chunkfun) (long),
+		void (*freefun) (void *))
+{
+  register struct _obstack_chunk *chunk; /* points to new chunk */
+
+  if (alignment == 0)
+    alignment = DEFAULT_ALIGNMENT;
+  if (size == 0)
+    /* Default size is what GNU malloc can fit in a 4096-byte block.  */
+    {
+      /* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc.
+	 Use the values for range checking, because if range checking is off,
+	 the extra bytes won't be missed terribly, but if range checking is on
+	 and we used a larger request, a whole extra 4096 bytes would be
+	 allocated.
+
+	 These number are irrelevant to the new GNU malloc.  I suspect it is
+	 less sensitive to the size of the request.  */
+      int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1))
+		    + 4 + DEFAULT_ROUNDING - 1)
+		   & ~(DEFAULT_ROUNDING - 1));
+      size = 4096 - extra;
+    }
+
+  h->chunkfun = (struct _obstack_chunk * (*)(void *, long)) chunkfun;
+  h->freefun = (void (*) (void *, struct _obstack_chunk *)) freefun;
+  h->chunk_size = size;
+  h->alignment_mask = alignment - 1;
+  h->use_extra_arg = 0;
+
+  chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
+  if (!chunk)
+    (*obstack_alloc_failed_handler) ();
+  h->next_free = h->object_base = __PTR_ALIGN ((char *) chunk, chunk->contents,
+					       alignment - 1);
+  h->chunk_limit = chunk->limit
+    = (char *) chunk + h->chunk_size;
+  chunk->prev = 0;
+  /* The initial chunk now contains no empty object.  */
+  h->maybe_empty_object = 0;
+  h->alloc_failed = 0;
+  return 1;
+}
+
+int
+_obstack_begin_1 (struct obstack *h, int size, int alignment,
+		  void *(*chunkfun) (void *, long),
+		  void (*freefun) (void *, void *),
+		  void *arg)
+{
+  register struct _obstack_chunk *chunk; /* points to new chunk */
+
+  if (alignment == 0)
+    alignment = DEFAULT_ALIGNMENT;
+  if (size == 0)
+    /* Default size is what GNU malloc can fit in a 4096-byte block.  */
+    {
+      /* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc.
+	 Use the values for range checking, because if range checking is off,
+	 the extra bytes won't be missed terribly, but if range checking is on
+	 and we used a larger request, a whole extra 4096 bytes would be
+	 allocated.
+
+	 These number are irrelevant to the new GNU malloc.  I suspect it is
+	 less sensitive to the size of the request.  */
+      int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1))
+		    + 4 + DEFAULT_ROUNDING - 1)
+		   & ~(DEFAULT_ROUNDING - 1));
+      size = 4096 - extra;
+    }
+
+  h->chunkfun = (struct _obstack_chunk * (*)(void *,long)) chunkfun;
+  h->freefun = (void (*) (void *, struct _obstack_chunk *)) freefun;
+  h->chunk_size = size;
+  h->alignment_mask = alignment - 1;
+  h->extra_arg = arg;
+  h->use_extra_arg = 1;
+
+  chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
+  if (!chunk)
+    (*obstack_alloc_failed_handler) ();
+  h->next_free = h->object_base = __PTR_ALIGN ((char *) chunk, chunk->contents,
+					       alignment - 1);
+  h->chunk_limit = chunk->limit
+    = (char *) chunk + h->chunk_size;
+  chunk->prev = 0;
+  /* The initial chunk now contains no empty object.  */
+  h->maybe_empty_object = 0;
+  h->alloc_failed = 0;
+  return 1;
+}
+
+/* Allocate a new current chunk for the obstack *H
+   on the assumption that LENGTH bytes need to be added
+   to the current object, or a new object of length LENGTH allocated.
+   Copies any partial object from the end of the old chunk
+   to the beginning of the new one.  */
+
+void
+_obstack_newchunk (struct obstack *h, int length)
+{
+  register struct _obstack_chunk *old_chunk = h->chunk;
+  register struct _obstack_chunk *new_chunk;
+  register long	new_size;
+  register long obj_size = h->next_free - h->object_base;
+  register long i;
+  long already;
+  char *object_base;
+
+  /* Compute size for new chunk.  */
+  new_size = (obj_size + length) + (obj_size >> 3) + h->alignment_mask + 100;
+  if (new_size < h->chunk_size)
+    new_size = h->chunk_size;
+
+  /* Allocate and initialize the new chunk.  */
+  new_chunk = CALL_CHUNKFUN (h, new_size);
+  if (!new_chunk)
+    (*obstack_alloc_failed_handler) ();
+  h->chunk = new_chunk;
+  new_chunk->prev = old_chunk;
+  new_chunk->limit = h->chunk_limit = (char *) new_chunk + new_size;
+
+  /* Compute an aligned object_base in the new chunk */
+  object_base =
+    __PTR_ALIGN ((char *) new_chunk, new_chunk->contents, h->alignment_mask);
+
+  /* Move the existing object to the new chunk.
+     Word at a time is fast and is safe if the object
+     is sufficiently aligned.  */
+  if (h->alignment_mask + 1 >= DEFAULT_ALIGNMENT)
+    {
+      for (i = obj_size / sizeof (COPYING_UNIT) - 1;
+	   i >= 0; i--)
+	((COPYING_UNIT *)object_base)[i]
+	  = ((COPYING_UNIT *)h->object_base)[i];
+      /* We used to copy the odd few remaining bytes as one extra COPYING_UNIT,
+	 but that can cross a page boundary on a machine
+	 which does not do strict alignment for COPYING_UNITS.  */
+      already = obj_size / sizeof (COPYING_UNIT) * sizeof (COPYING_UNIT);
+    }
+  else
+    already = 0;
+  /* Copy remaining bytes one by one.  */
+  for (i = already; i < obj_size; i++)
+    object_base[i] = h->object_base[i];
+
+  /* If the object just copied was the only data in OLD_CHUNK,
+     free that chunk and remove it from the chain.
+     But not if that chunk might contain an empty object.  */
+  if (! h->maybe_empty_object
+      && (h->object_base
+	  == __PTR_ALIGN ((char *) old_chunk, old_chunk->contents,
+			  h->alignment_mask)))
+    {
+      new_chunk->prev = old_chunk->prev;
+      CALL_FREEFUN (h, old_chunk);
+    }
+
+  h->object_base = object_base;
+  h->next_free = h->object_base + obj_size;
+  /* The new chunk certainly contains no empty object yet.  */
+  h->maybe_empty_object = 0;
+}
+# ifdef _LIBC
+libc_hidden_def (_obstack_newchunk)
+# endif
+
+/* Return nonzero if object OBJ has been allocated from obstack H.
+   This is here for debugging.
+   If you use it in a program, you are probably losing.  */
+
+/* Suppress -Wmissing-prototypes warning.  We don't want to declare this in
+   obstack.h because it is just for debugging.  */
+int _obstack_allocated_p (struct obstack *h, void *obj);
+
+int
+_obstack_allocated_p (struct obstack *h, void *obj)
+{
+  register struct _obstack_chunk *lp;	/* below addr of any objects in this chunk */
+  register struct _obstack_chunk *plp;	/* point to previous chunk if any */
+
+  lp = (h)->chunk;
+  /* We use >= rather than > since the object cannot be exactly at
+     the beginning of the chunk but might be an empty object exactly
+     at the end of an adjacent chunk.  */
+  while (lp != 0 && ((void *) lp >= obj || (void *) (lp)->limit < obj))
+    {
+      plp = lp->prev;
+      lp = plp;
+    }
+  return lp != 0;
+}
+\f
+/* Free objects in obstack H, including OBJ and everything allocate
+   more recently than OBJ.  If OBJ is zero, free everything in H.  */
+
+# undef obstack_free
+
+void
+obstack_free (struct obstack *h, void *obj)
+{
+  register struct _obstack_chunk *lp;	/* below addr of any objects in this chunk */
+  register struct _obstack_chunk *plp;	/* point to previous chunk if any */
+
+  lp = h->chunk;
+  /* We use >= because there cannot be an object at the beginning of a chunk.
+     But there can be an empty object at that address
+     at the end of another chunk.  */
+  while (lp != 0 && ((void *) lp >= obj || (void *) (lp)->limit < obj))
+    {
+      plp = lp->prev;
+      CALL_FREEFUN (h, lp);
+      lp = plp;
+      /* If we switch chunks, we can't tell whether the new current
+	 chunk contains an empty object, so assume that it may.  */
+      h->maybe_empty_object = 1;
+    }
+  if (lp)
+    {
+      h->object_base = h->next_free = (char *) (obj);
+      h->chunk_limit = lp->limit;
+      h->chunk = lp;
+    }
+  else if (obj != 0)
+    /* obj is not in any of the chunks! */
+    abort ();
+}
+
+# ifdef _LIBC
+/* Older versions of libc used a function _obstack_free intended to be
+   called by non-GCC compilers.  */
+strong_alias (obstack_free, _obstack_free)
+# endif
+\f
+int
+_obstack_memory_used (struct obstack *h)
+{
+  register struct _obstack_chunk* lp;
+  register int nbytes = 0;
+
+  for (lp = h->chunk; lp != 0; lp = lp->prev)
+    {
+      nbytes += lp->limit - (char *) lp;
+    }
+  return nbytes;
+}
+\f
+/* Define the error handler.  */
+# ifdef _LIBC
+#  include <libintl.h>
+# else
+#  include "gettext.h"
+# endif
+# ifndef _
+#  define _(msgid) gettext (msgid)
+# endif
+
+# ifdef _LIBC
+#  include <libio/iolibio.h>
+# endif
+
+# ifndef __attribute__
+/* This feature is available in gcc versions 2.5 and later.  */
+#  if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5)
+#   define __attribute__(Spec) /* empty */
+#  endif
+# endif
+
+static void
+__attribute__ ((noreturn))
+print_and_abort (void)
+{
+  /* Don't change any of these strings.  Yes, it would be possible to add
+     the newline to the string and use fputs or so.  But this must not
+     happen because the "memory exhausted" message appears in other places
+     like this and the translation should be reused instead of creating
+     a very similar string which requires a separate translation.  */
+# ifdef _LIBC
+  (void) __fxprintf (NULL, "%s\n", _("memory exhausted"));
+# else
+  fprintf (stderr, "%s\n", _("memory exhausted"));
+# endif
+  exit (obstack_exit_failure);
+}
+
+#endif	/* !ELIDE_CODE */
diff --git a/obstack.h b/obstack.h
new file mode 100644
index 0000000..449070e
--- /dev/null
+++ b/obstack.h
@@ -0,0 +1,509 @@
+/* obstack.h - object stack macros
+   Copyright (C) 1988-1994,1996-1999,2003,2004,2005,2009
+	Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+   Boston, MA 02110-1301, USA.  */
+
+/* Summary:
+
+All the apparent functions defined here are macros. The idea
+is that you would use these pre-tested macros to solve a
+very specific set of problems, and they would run fast.
+Caution: no side-effects in arguments please!! They may be
+evaluated MANY times!!
+
+These macros operate a stack of objects.  Each object starts life
+small, and may grow to maturity.  (Consider building a word syllable
+by syllable.)  An object can move while it is growing.  Once it has
+been "finished" it never changes address again.  So the "top of the
+stack" is typically an immature growing object, while the rest of the
+stack is of mature, fixed size and fixed address objects.
+
+These routines grab large chunks of memory, using a function you
+supply, called `obstack_chunk_alloc'.  On occasion, they free chunks,
+by calling `obstack_chunk_free'.  You must define them and declare
+them before using any obstack macros.
+
+Each independent stack is represented by a `struct obstack'.
+Each of the obstack macros expects a pointer to such a structure
+as the first argument.
+
+One motivation for this package is the problem of growing char strings
+in symbol tables.  Unless you are "fascist pig with a read-only mind"
+--Gosper's immortal quote from HAKMEM item 154, out of context--you
+would not like to put any arbitrary upper limit on the length of your
+symbols.
+
+In practice this often means you will build many short symbols and a
+few long symbols.  At the time you are reading a symbol you don't know
+how long it is.  One traditional method is to read a symbol into a
+buffer, realloc()ating the buffer every time you try to read a symbol
+that is longer than the buffer.  This is beaut, but you still will
+want to copy the symbol from the buffer to a more permanent
+symbol-table entry say about half the time.
+
+With obstacks, you can work differently.  Use one obstack for all symbol
+names.  As you read a symbol, grow the name in the obstack gradually.
+When the name is complete, finalize it.  Then, if the symbol exists already,
+free the newly read name.
+
+The way we do this is to take a large chunk, allocating memory from
+low addresses.  When you want to build a symbol in the chunk you just
+add chars above the current "high water mark" in the chunk.  When you
+have finished adding chars, because you got to the end of the symbol,
+you know how long the chars are, and you can create a new object.
+Mostly the chars will not burst over the highest address of the chunk,
+because you would typically expect a chunk to be (say) 100 times as
+long as an average object.
+
+In case that isn't clear, when we have enough chars to make up
+the object, THEY ARE ALREADY CONTIGUOUS IN THE CHUNK (guaranteed)
+so we just point to it where it lies.  No moving of chars is
+needed and this is the second win: potentially long strings need
+never be explicitly shuffled. Once an object is formed, it does not
+change its address during its lifetime.
+
+When the chars burst over a chunk boundary, we allocate a larger
+chunk, and then copy the partly formed object from the end of the old
+chunk to the beginning of the new larger chunk.  We then carry on
+accreting characters to the end of the object as we normally would.
+
+A special macro is provided to add a single char at a time to a
+growing object.  This allows the use of register variables, which
+break the ordinary 'growth' macro.
+
+Summary:
+	We allocate large chunks.
+	We carve out one object at a time from the current chunk.
+	Once carved, an object never moves.
+	We are free to append data of any size to the currently
+	  growing object.
+	Exactly one object is growing in an obstack at any one time.
+	You can run one obstack per control block.
+	You may have as many control blocks as you dare.
+	Because of the way we do it, you can `unwind' an obstack
+	  back to a previous state. (You may remove objects much
+	  as you would with a stack.)
+*/
+
+
+/* Don't do the contents of this file more than once.  */
+
+#ifndef _OBSTACK_H
+#define _OBSTACK_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+\f
+/* We need the type of a pointer subtraction.  If __PTRDIFF_TYPE__ is
+   defined, as with GNU C, use that; that way we don't pollute the
+   namespace with <stddef.h>'s symbols.  Otherwise, include <stddef.h>
+   and use ptrdiff_t.  */
+
+#ifdef __PTRDIFF_TYPE__
+# define PTR_INT_TYPE __PTRDIFF_TYPE__
+#else
+# include <stddef.h>
+# define PTR_INT_TYPE ptrdiff_t
+#endif
+
+/* If B is the base of an object addressed by P, return the result of
+   aligning P to the next multiple of A + 1.  B and P must be of type
+   char *.  A + 1 must be a power of 2.  */
+
+#define __BPTR_ALIGN(B, P, A) ((B) + (((P) - (B) + (A)) & ~(A)))
+
+/* Similiar to _BPTR_ALIGN (B, P, A), except optimize the common case
+   where pointers can be converted to integers, aligned as integers,
+   and converted back again.  If PTR_INT_TYPE is narrower than a
+   pointer (e.g., the AS/400), play it safe and compute the alignment
+   relative to B.  Otherwise, use the faster strategy of computing the
+   alignment relative to 0.  */
+
+#define __PTR_ALIGN(B, P, A)						    \
+  __BPTR_ALIGN (sizeof (PTR_INT_TYPE) < sizeof (void *) ? (B) : (char *) 0, \
+		P, A)
+
+#include <string.h>
+
+struct _obstack_chunk		/* Lives at front of each chunk. */
+{
+  char  *limit;			/* 1 past end of this chunk */
+  struct _obstack_chunk *prev;	/* address of prior chunk or NULL */
+  char	contents[4];		/* objects begin here */
+};
+
+struct obstack		/* control current object in current chunk */
+{
+  long	chunk_size;		/* preferred size to allocate chunks in */
+  struct _obstack_chunk *chunk;	/* address of current struct obstack_chunk */
+  char	*object_base;		/* address of object we are building */
+  char	*next_free;		/* where to add next char to current object */
+  char	*chunk_limit;		/* address of char after current chunk */
+  union
+  {
+    PTR_INT_TYPE tempint;
+    void *tempptr;
+  } temp;			/* Temporary for some macros.  */
+  int   alignment_mask;		/* Mask of alignment for each object. */
+  /* These prototypes vary based on `use_extra_arg', and we use
+     casts to the prototypeless function type in all assignments,
+     but having prototypes here quiets -Wstrict-prototypes.  */
+  struct _obstack_chunk *(*chunkfun) (void *, long);
+  void (*freefun) (void *, struct _obstack_chunk *);
+  void *extra_arg;		/* first arg for chunk alloc/dealloc funcs */
+  unsigned use_extra_arg:1;	/* chunk alloc/dealloc funcs take extra arg */
+  unsigned maybe_empty_object:1;/* There is a possibility that the current
+				   chunk contains a zero-length object.  This
+				   prevents freeing the chunk if we allocate
+				   a bigger chunk to replace it. */
+  unsigned alloc_failed:1;	/* No longer used, as we now call the failed
+				   handler on error, but retained for binary
+				   compatibility.  */
+};
+
+/* Declare the external functions we use; they are in obstack.c.  */
+
+extern void _obstack_newchunk (struct obstack *, int);
+extern int _obstack_begin (struct obstack *, int, int,
+			    void *(*) (long), void (*) (void *));
+extern int _obstack_begin_1 (struct obstack *, int, int,
+			     void *(*) (void *, long),
+			     void (*) (void *, void *), void *);
+extern int _obstack_memory_used (struct obstack *);
+
+void obstack_free (struct obstack *__obstack, void *__block);
+
+\f
+/* Error handler called when `obstack_chunk_alloc' failed to allocate
+   more memory.  This can be set to a user defined function which
+   should either abort gracefully or use longjump - but shouldn't
+   return.  The default action is to print a message and abort.  */
+extern void (*obstack_alloc_failed_handler) (void);
+
+/* Exit value used when `print_and_abort' is used.  */
+extern int obstack_exit_failure;
+\f
+/* Pointer to beginning of object being allocated or to be allocated next.
+   Note that this might not be the final address of the object
+   because a new chunk might be needed to hold the final size.  */
+
+#define obstack_base(h) ((void *) (h)->object_base)
+
+/* Size for allocating ordinary chunks.  */
+
+#define obstack_chunk_size(h) ((h)->chunk_size)
+
+/* Pointer to next byte not yet allocated in current chunk.  */
+
+#define obstack_next_free(h)	((h)->next_free)
+
+/* Mask specifying low bits that should be clear in address of an object.  */
+
+#define obstack_alignment_mask(h) ((h)->alignment_mask)
+
+/* To prevent prototype warnings provide complete argument list.  */
+#define obstack_init(h)						\
+  _obstack_begin ((h), 0, 0,					\
+		  (void *(*) (long)) obstack_chunk_alloc,	\
+		  (void (*) (void *)) obstack_chunk_free)
+
+#define obstack_begin(h, size)					\
+  _obstack_begin ((h), (size), 0,				\
+		  (void *(*) (long)) obstack_chunk_alloc,	\
+		  (void (*) (void *)) obstack_chunk_free)
+
+#define obstack_specify_allocation(h, size, alignment, chunkfun, freefun)  \
+  _obstack_begin ((h), (size), (alignment),				   \
+		  (void *(*) (long)) (chunkfun),			   \
+		  (void (*) (void *)) (freefun))
+
+#define obstack_specify_allocation_with_arg(h, size, alignment, chunkfun, freefun, arg) \
+  _obstack_begin_1 ((h), (size), (alignment),				\
+		    (void *(*) (void *, long)) (chunkfun),		\
+		    (void (*) (void *, void *)) (freefun), (arg))
+
+#define obstack_chunkfun(h, newchunkfun) \
+  ((h) -> chunkfun = (struct _obstack_chunk *(*)(void *, long)) (newchunkfun))
+
+#define obstack_freefun(h, newfreefun) \
+  ((h) -> freefun = (void (*)(void *, struct _obstack_chunk *)) (newfreefun))
+
+#define obstack_1grow_fast(h,achar) (*((h)->next_free)++ = (achar))
+
+#define obstack_blank_fast(h,n) ((h)->next_free += (n))
+
+#define obstack_memory_used(h) _obstack_memory_used (h)
+\f
+#if defined __GNUC__ && defined __STDC__ && __STDC__
+/* NextStep 2.0 cc is really gcc 1.93 but it defines __GNUC__ = 2 and
+   does not implement __extension__.  But that compiler doesn't define
+   __GNUC_MINOR__.  */
+# if __GNUC__ < 2 || (__NeXT__ && !__GNUC_MINOR__)
+#  define __extension__
+# endif
+
+/* For GNU C, if not -traditional,
+   we can define these macros to compute all args only once
+   without using a global variable.
+   Also, we can avoid using the `temp' slot, to make faster code.  */
+
+# define obstack_object_size(OBSTACK)					\
+  __extension__								\
+  ({ struct obstack const *__o = (OBSTACK);				\
+     (unsigned) (__o->next_free - __o->object_base); })
+
+# define obstack_room(OBSTACK)						\
+  __extension__								\
+  ({ struct obstack const *__o = (OBSTACK);				\
+     (unsigned) (__o->chunk_limit - __o->next_free); })
+
+# define obstack_make_room(OBSTACK,length)				\
+__extension__								\
+({ struct obstack *__o = (OBSTACK);					\
+   int __len = (length);						\
+   if (__o->chunk_limit - __o->next_free < __len)			\
+     _obstack_newchunk (__o, __len);					\
+   (void) 0; })
+
+# define obstack_empty_p(OBSTACK)					\
+  __extension__								\
+  ({ struct obstack const *__o = (OBSTACK);				\
+     (__o->chunk->prev == 0						\
+      && __o->next_free == __PTR_ALIGN ((char *) __o->chunk,		\
+					__o->chunk->contents,		\
+					__o->alignment_mask)); })
+
+# define obstack_grow(OBSTACK,where,length)				\
+__extension__								\
+({ struct obstack *__o = (OBSTACK);					\
+   int __len = (length);						\
+   if (__o->next_free + __len > __o->chunk_limit)			\
+     _obstack_newchunk (__o, __len);					\
+   memcpy (__o->next_free, where, __len);				\
+   __o->next_free += __len;						\
+   (void) 0; })
+
+# define obstack_grow0(OBSTACK,where,length)				\
+__extension__								\
+({ struct obstack *__o = (OBSTACK);					\
+   int __len = (length);						\
+   if (__o->next_free + __len + 1 > __o->chunk_limit)			\
+     _obstack_newchunk (__o, __len + 1);				\
+   memcpy (__o->next_free, where, __len);				\
+   __o->next_free += __len;						\
+   *(__o->next_free)++ = 0;						\
+   (void) 0; })
+
+# define obstack_1grow(OBSTACK,datum)					\
+__extension__								\
+({ struct obstack *__o = (OBSTACK);					\
+   if (__o->next_free + 1 > __o->chunk_limit)				\
+     _obstack_newchunk (__o, 1);					\
+   obstack_1grow_fast (__o, datum);					\
+   (void) 0; })
+
+/* These assume that the obstack alignment is good enough for pointers
+   or ints, and that the data added so far to the current object
+   shares that much alignment.  */
+
+# define obstack_ptr_grow(OBSTACK,datum)				\
+__extension__								\
+({ struct obstack *__o = (OBSTACK);					\
+   if (__o->next_free + sizeof (void *) > __o->chunk_limit)		\
+     _obstack_newchunk (__o, sizeof (void *));				\
+   obstack_ptr_grow_fast (__o, datum); })				\
+
+# define obstack_int_grow(OBSTACK,datum)				\
+__extension__								\
+({ struct obstack *__o = (OBSTACK);					\
+   if (__o->next_free + sizeof (int) > __o->chunk_limit)		\
+     _obstack_newchunk (__o, sizeof (int));				\
+   obstack_int_grow_fast (__o, datum); })
+
+# define obstack_ptr_grow_fast(OBSTACK,aptr)				\
+__extension__								\
+({ struct obstack *__o1 = (OBSTACK);					\
+   *(const void **) __o1->next_free = (aptr);				\
+   __o1->next_free += sizeof (const void *);				\
+   (void) 0; })
+
+# define obstack_int_grow_fast(OBSTACK,aint)				\
+__extension__								\
+({ struct obstack *__o1 = (OBSTACK);					\
+   *(int *) __o1->next_free = (aint);					\
+   __o1->next_free += sizeof (int);					\
+   (void) 0; })
+
+# define obstack_blank(OBSTACK,length)					\
+__extension__								\
+({ struct obstack *__o = (OBSTACK);					\
+   int __len = (length);						\
+   if (__o->chunk_limit - __o->next_free < __len)			\
+     _obstack_newchunk (__o, __len);					\
+   obstack_blank_fast (__o, __len);					\
+   (void) 0; })
+
+# define obstack_alloc(OBSTACK,length)					\
+__extension__								\
+({ struct obstack *__h = (OBSTACK);					\
+   obstack_blank (__h, (length));					\
+   obstack_finish (__h); })
+
+# define obstack_copy(OBSTACK,where,length)				\
+__extension__								\
+({ struct obstack *__h = (OBSTACK);					\
+   obstack_grow (__h, (where), (length));				\
+   obstack_finish (__h); })
+
+# define obstack_copy0(OBSTACK,where,length)				\
+__extension__								\
+({ struct obstack *__h = (OBSTACK);					\
+   obstack_grow0 (__h, (where), (length));				\
+   obstack_finish (__h); })
+
+/* The local variable is named __o1 to avoid a name conflict
+   when obstack_blank is called.  */
+# define obstack_finish(OBSTACK)					\
+__extension__								\
+({ struct obstack *__o1 = (OBSTACK);					\
+   void *__value = (void *) __o1->object_base;				\
+   if (__o1->next_free == __value)					\
+     __o1->maybe_empty_object = 1;					\
+   __o1->next_free							\
+     = __PTR_ALIGN (__o1->object_base, __o1->next_free,			\
+		    __o1->alignment_mask);				\
+   if (__o1->next_free - (char *)__o1->chunk				\
+       > __o1->chunk_limit - (char *)__o1->chunk)			\
+     __o1->next_free = __o1->chunk_limit;				\
+   __o1->object_base = __o1->next_free;					\
+   __value; })
+
+# define obstack_free(OBSTACK, OBJ)					\
+__extension__								\
+({ struct obstack *__o = (OBSTACK);					\
+   void *__obj = (OBJ);							\
+   if (__obj > (void *)__o->chunk && __obj < (void *)__o->chunk_limit)  \
+     __o->next_free = __o->object_base = (char *)__obj;			\
+   else (obstack_free) (__o, __obj); })
+\f
+#else /* not __GNUC__ or not __STDC__ */
+
+# define obstack_object_size(h) \
+ (unsigned) ((h)->next_free - (h)->object_base)
+
+# define obstack_room(h)		\
+ (unsigned) ((h)->chunk_limit - (h)->next_free)
+
+# define obstack_empty_p(h) \
+ ((h)->chunk->prev == 0							\
+  && (h)->next_free == __PTR_ALIGN ((char *) (h)->chunk,		\
+				    (h)->chunk->contents,		\
+				    (h)->alignment_mask))
+
+/* Note that the call to _obstack_newchunk is enclosed in (..., 0)
+   so that we can avoid having void expressions
+   in the arms of the conditional expression.
+   Casting the third operand to void was tried before,
+   but some compilers won't accept it.  */
+
+# define obstack_make_room(h,length)					\
+( (h)->temp.tempint = (length),						\
+  (((h)->next_free + (h)->temp.tempint > (h)->chunk_limit)		\
+   ? (_obstack_newchunk ((h), (h)->temp.tempint), 0) : 0))
+
+# define obstack_grow(h,where,length)					\
+( (h)->temp.tempint = (length),						\
+  (((h)->next_free + (h)->temp.tempint > (h)->chunk_limit)		\
+   ? (_obstack_newchunk ((h), (h)->temp.tempint), 0) : 0),		\
+  memcpy ((h)->next_free, where, (h)->temp.tempint),			\
+  (h)->next_free += (h)->temp.tempint)
+
+# define obstack_grow0(h,where,length)					\
+( (h)->temp.tempint = (length),						\
+  (((h)->next_free + (h)->temp.tempint + 1 > (h)->chunk_limit)		\
+   ? (_obstack_newchunk ((h), (h)->temp.tempint + 1), 0) : 0),		\
+  memcpy ((h)->next_free, where, (h)->temp.tempint),			\
+  (h)->next_free += (h)->temp.tempint,					\
+  *((h)->next_free)++ = 0)
+
+# define obstack_1grow(h,datum)						\
+( (((h)->next_free + 1 > (h)->chunk_limit)				\
+   ? (_obstack_newchunk ((h), 1), 0) : 0),				\
+  obstack_1grow_fast (h, datum))
+
+# define obstack_ptr_grow(h,datum)					\
+( (((h)->next_free + sizeof (char *) > (h)->chunk_limit)		\
+   ? (_obstack_newchunk ((h), sizeof (char *)), 0) : 0),		\
+  obstack_ptr_grow_fast (h, datum))
+
+# define obstack_int_grow(h,datum)					\
+( (((h)->next_free + sizeof (int) > (h)->chunk_limit)			\
+   ? (_obstack_newchunk ((h), sizeof (int)), 0) : 0),			\
+  obstack_int_grow_fast (h, datum))
+
+# define obstack_ptr_grow_fast(h,aptr)					\
+  (((const void **) ((h)->next_free += sizeof (void *)))[-1] = (aptr))
+
+# define obstack_int_grow_fast(h,aint)					\
+  (((int *) ((h)->next_free += sizeof (int)))[-1] = (aint))
+
+# define obstack_blank(h,length)					\
+( (h)->temp.tempint = (length),						\
+  (((h)->chunk_limit - (h)->next_free < (h)->temp.tempint)		\
+   ? (_obstack_newchunk ((h), (h)->temp.tempint), 0) : 0),		\
+  obstack_blank_fast (h, (h)->temp.tempint))
+
+# define obstack_alloc(h,length)					\
+ (obstack_blank ((h), (length)), obstack_finish ((h)))
+
+# define obstack_copy(h,where,length)					\
+ (obstack_grow ((h), (where), (length)), obstack_finish ((h)))
+
+# define obstack_copy0(h,where,length)					\
+ (obstack_grow0 ((h), (where), (length)), obstack_finish ((h)))
+
+# define obstack_finish(h)						\
+( ((h)->next_free == (h)->object_base					\
+   ? (((h)->maybe_empty_object = 1), 0)					\
+   : 0),								\
+  (h)->temp.tempptr = (h)->object_base,					\
+  (h)->next_free							\
+    = __PTR_ALIGN ((h)->object_base, (h)->next_free,			\
+		   (h)->alignment_mask),				\
+  (((h)->next_free - (char *) (h)->chunk				\
+    > (h)->chunk_limit - (char *) (h)->chunk)				\
+   ? ((h)->next_free = (h)->chunk_limit) : 0),				\
+  (h)->object_base = (h)->next_free,					\
+  (h)->temp.tempptr)
+
+# define obstack_free(h,obj)						\
+( (h)->temp.tempint = (char *) (obj) - (char *) (h)->chunk,		\
+  ((((h)->temp.tempint > 0						\
+    && (h)->temp.tempint < (h)->chunk_limit - (char *) (h)->chunk))	\
+   ? (int) ((h)->next_free = (h)->object_base				\
+	    = (h)->temp.tempint + (char *) (h)->chunk)			\
+   : (((obstack_free) ((h), (h)->temp.tempint + (char *) (h)->chunk), 0), 0)))
+
+#endif /* not __GNUC__ or not __STDC__ */
+
+#ifdef __cplusplus
+}	/* C++ */
+#endif
+
+#endif /* obstack.h */

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/5] Add string search routines from GNU grep
       [not found] <20100213141558.22851.13660.stgit@fredrik-laptop>
  2010-02-13 14:20 ` [PATCH 1/5] Add obstack.[ch] from EGLIBC 2.10 Fredrik Kuivinen
@ 2010-02-13 14:20 ` Fredrik Kuivinen
  2010-02-13 15:49   ` Dmitry Potapov
  2010-02-13 15:56   ` Paolo Bonzini
  2010-02-13 14:20 ` [PATCH 3/5] Adapt the kwset code to Git Fredrik Kuivinen
                   ` (2 subsequent siblings)
  4 siblings, 2 replies; 11+ messages in thread
From: Fredrik Kuivinen @ 2010-02-13 14:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

kwset.c and kwset.h have been copied unmodified from GNU grep 2.5.4.

Signed-off-by: Fredrik Kuivinen <frekui@gmail.com>
---

 kwset.c |  779 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 kwset.h |   57 +++++
 2 files changed, 836 insertions(+), 0 deletions(-)
 create mode 100644 kwset.c
 create mode 100644 kwset.h

diff --git a/kwset.c b/kwset.c
new file mode 100644
index 0000000..0957f66
--- /dev/null
+++ b/kwset.c
@@ -0,0 +1,779 @@
+/* kwset.c - search for any of a set of keywords.
+   Copyright (C) 1989, 1998, 2000, 2005, 2007, 2009
+   Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+   02110-1301, USA.  */
+
+/* Written August 1989 by Mike Haertel.
+   The author may be reached (Email) at the address mike@ai.mit.edu,
+   or (US mail) as Mike Haertel c/o Free Software Foundation. */
+
+/* The algorithm implemented by these routines bears a startling resemblence
+   to one discovered by Beate Commentz-Walter, although it is not identical.
+   See "A String Matching Algorithm Fast on the Average," Technical Report,
+   IBM-Germany, Scientific Center Heidelberg, Tiergartenstrasse 15, D-6900
+   Heidelberg, Germany.  See also Aho, A.V., and M. Corasick, "Efficient
+   String Matching:  An Aid to Bibliographic Search," CACM June 1975,
+   Vol. 18, No. 6, which describes the failure function used below. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+#include <sys/types.h>
+#include "system.h"
+#include "kwset.h"
+#include "obstack.h"
+
+#ifdef GREP
+# include "xalloc.h"
+# undef malloc
+# define malloc xmalloc
+#endif
+
+#define NCHAR (UCHAR_MAX + 1)
+#define obstack_chunk_alloc malloc
+#define obstack_chunk_free free
+
+#define U(c) ((unsigned char) (c))
+
+/* Balanced tree of edges and labels leaving a given trie node. */
+struct tree
+{
+  struct tree *llink;		/* Left link; MUST be first field. */
+  struct tree *rlink;		/* Right link (to larger labels). */
+  struct trie *trie;		/* Trie node pointed to by this edge. */
+  unsigned char label;		/* Label on this edge. */
+  char balance;			/* Difference in depths of subtrees. */
+};
+
+/* Node of a trie representing a set of reversed keywords. */
+struct trie
+{
+  unsigned int accepting;	/* Word index of accepted word, or zero. */
+  struct tree *links;		/* Tree of edges leaving this node. */
+  struct trie *parent;		/* Parent of this node. */
+  struct trie *next;		/* List of all trie nodes in level order. */
+  struct trie *fail;		/* Aho-Corasick failure function. */
+  int depth;			/* Depth of this node from the root. */
+  int shift;			/* Shift function for search failures. */
+  int maxshift;			/* Max shift of self and descendents. */
+};
+
+/* Structure returned opaquely to the caller, containing everything. */
+struct kwset
+{
+  struct obstack obstack;	/* Obstack for node allocation. */
+  int words;			/* Number of words in the trie. */
+  struct trie *trie;		/* The trie itself. */
+  int mind;			/* Minimum depth of an accepting node. */
+  int maxd;			/* Maximum depth of any node. */
+  unsigned char delta[NCHAR];	/* Delta table for rapid search. */
+  struct trie *next[NCHAR];	/* Table of children of the root. */
+  char *target;			/* Target string if there's only one. */
+  int mind2;			/* Used in Boyer-Moore search for one string. */
+  char const *trans;		/* Character translation table. */
+};
+
+/* Allocate and initialize a keyword set object, returning an opaque
+   pointer to it.  Return NULL if memory is not available. */
+kwset_t
+kwsalloc (char const *trans)
+{
+  struct kwset *kwset;
+
+  kwset = (struct kwset *) malloc(sizeof (struct kwset));
+  if (!kwset)
+    return NULL;
+
+  obstack_init(&kwset->obstack);
+  kwset->words = 0;
+  kwset->trie
+    = (struct trie *) obstack_alloc(&kwset->obstack, sizeof (struct trie));
+  if (!kwset->trie)
+    {
+      kwsfree((kwset_t) kwset);
+      return NULL;
+    }
+  kwset->trie->accepting = 0;
+  kwset->trie->links = NULL;
+  kwset->trie->parent = NULL;
+  kwset->trie->next = NULL;
+  kwset->trie->fail = NULL;
+  kwset->trie->depth = 0;
+  kwset->trie->shift = 0;
+  kwset->mind = INT_MAX;
+  kwset->maxd = -1;
+  kwset->target = NULL;
+  kwset->trans = trans;
+
+  return (kwset_t) kwset;
+}
+
+/* This upper bound is valid for CHAR_BIT >= 4 and
+   exact for CHAR_BIT in { 4..11, 13, 15, 17, 19 }. */
+#define DEPTH_SIZE (CHAR_BIT + CHAR_BIT/2)
+
+/* Add the given string to the contents of the keyword set.  Return NULL
+   for success, an error message otherwise. */
+const char *
+kwsincr (kwset_t kws, char const *text, size_t len)
+{
+  struct kwset *kwset;
+  register struct trie *trie;
+  register unsigned char label;
+  register struct tree *link;
+  register int depth;
+  struct tree *links[DEPTH_SIZE];
+  enum { L, R } dirs[DEPTH_SIZE];
+  struct tree *t, *r, *l, *rl, *lr;
+
+  kwset = (struct kwset *) kws;
+  trie = kwset->trie;
+  text += len;
+
+  /* Descend the trie (built of reversed keywords) character-by-character,
+     installing new nodes when necessary. */
+  while (len--)
+    {
+      label = kwset->trans ? kwset->trans[U(*--text)] : *--text;
+
+      /* Descend the tree of outgoing links for this trie node,
+	 looking for the current character and keeping track
+	 of the path followed. */
+      link = trie->links;
+      links[0] = (struct tree *) &trie->links;
+      dirs[0] = L;
+      depth = 1;
+
+      while (link && label != link->label)
+	{
+	  links[depth] = link;
+	  if (label < link->label)
+	    dirs[depth++] = L, link = link->llink;
+	  else
+	    dirs[depth++] = R, link = link->rlink;
+	}
+
+      /* The current character doesn't have an outgoing link at
+	 this trie node, so build a new trie node and install
+	 a link in the current trie node's tree. */
+      if (!link)
+	{
+	  link = (struct tree *) obstack_alloc(&kwset->obstack,
+					       sizeof (struct tree));
+	  if (!link)
+	    return _("memory exhausted");
+	  link->llink = NULL;
+	  link->rlink = NULL;
+	  link->trie = (struct trie *) obstack_alloc(&kwset->obstack,
+						     sizeof (struct trie));
+	  if (!link->trie)
+	    {
+	      obstack_free(&kwset->obstack, link);
+	      return _("memory exhausted");
+	    }
+	  link->trie->accepting = 0;
+	  link->trie->links = NULL;
+	  link->trie->parent = trie;
+	  link->trie->next = NULL;
+	  link->trie->fail = NULL;
+	  link->trie->depth = trie->depth + 1;
+	  link->trie->shift = 0;
+	  link->label = label;
+	  link->balance = 0;
+
+	  /* Install the new tree node in its parent. */
+	  if (dirs[--depth] == L)
+	    links[depth]->llink = link;
+	  else
+	    links[depth]->rlink = link;
+
+	  /* Back up the tree fixing the balance flags. */
+	  while (depth && !links[depth]->balance)
+	    {
+	      if (dirs[depth] == L)
+		--links[depth]->balance;
+	      else
+		++links[depth]->balance;
+	      --depth;
+	    }
+
+	  /* Rebalance the tree by pointer rotations if necessary. */
+	  if (depth && ((dirs[depth] == L && --links[depth]->balance)
+			|| (dirs[depth] == R && ++links[depth]->balance)))
+	    {
+	      switch (links[depth]->balance)
+		{
+		case (char) -2:
+		  switch (dirs[depth + 1])
+		    {
+		    case L:
+		      r = links[depth], t = r->llink, rl = t->rlink;
+		      t->rlink = r, r->llink = rl;
+		      t->balance = r->balance = 0;
+		      break;
+		    case R:
+		      r = links[depth], l = r->llink, t = l->rlink;
+		      rl = t->rlink, lr = t->llink;
+		      t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
+		      l->balance = t->balance != 1 ? 0 : -1;
+		      r->balance = t->balance != (char) -1 ? 0 : 1;
+		      t->balance = 0;
+		      break;
+		    default:
+		      abort ();
+		    }
+		  break;
+		case 2:
+		  switch (dirs[depth + 1])
+		    {
+		    case R:
+		      l = links[depth], t = l->rlink, lr = t->llink;
+		      t->llink = l, l->rlink = lr;
+		      t->balance = l->balance = 0;
+		      break;
+		    case L:
+		      l = links[depth], r = l->rlink, t = r->llink;
+		      lr = t->llink, rl = t->rlink;
+		      t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
+		      l->balance = t->balance != 1 ? 0 : -1;
+		      r->balance = t->balance != (char) -1 ? 0 : 1;
+		      t->balance = 0;
+		      break;
+		    default:
+		      abort ();
+		    }
+		  break;
+		default:
+		  abort ();
+		}
+
+	      if (dirs[depth - 1] == L)
+		links[depth - 1]->llink = t;
+	      else
+		links[depth - 1]->rlink = t;
+	    }
+	}
+
+      trie = link->trie;
+    }
+
+  /* Mark the node we finally reached as accepting, encoding the
+     index number of this word in the keyword set so far. */
+  if (!trie->accepting)
+    trie->accepting = 1 + 2 * kwset->words;
+  ++kwset->words;
+
+  /* Keep track of the longest and shortest string of the keyword set. */
+  if (trie->depth < kwset->mind)
+    kwset->mind = trie->depth;
+  if (trie->depth > kwset->maxd)
+    kwset->maxd = trie->depth;
+
+  return NULL;
+}
+
+/* Enqueue the trie nodes referenced from the given tree in the
+   given queue. */
+static void
+enqueue (struct tree *tree, struct trie **last)
+{
+  if (!tree)
+    return;
+  enqueue(tree->llink, last);
+  enqueue(tree->rlink, last);
+  (*last) = (*last)->next = tree->trie;
+}
+
+/* Compute the Aho-Corasick failure function for the trie nodes referenced
+   from the given tree, given the failure function for their parent as
+   well as a last resort failure node. */
+static void
+treefails (register struct tree const *tree, struct trie const *fail,
+	   struct trie *recourse)
+{
+  register struct tree *link;
+
+  if (!tree)
+    return;
+
+  treefails(tree->llink, fail, recourse);
+  treefails(tree->rlink, fail, recourse);
+
+  /* Find, in the chain of fails going back to the root, the first
+     node that has a descendent on the current label. */
+  while (fail)
+    {
+      link = fail->links;
+      while (link && tree->label != link->label)
+	if (tree->label < link->label)
+	  link = link->llink;
+	else
+	  link = link->rlink;
+      if (link)
+	{
+	  tree->trie->fail = link->trie;
+	  return;
+	}
+      fail = fail->fail;
+    }
+
+  tree->trie->fail = recourse;
+}
+
+/* Set delta entries for the links of the given tree such that
+   the preexisting delta value is larger than the current depth. */
+static void
+treedelta (register struct tree const *tree,
+	   register unsigned int depth,
+	   unsigned char delta[])
+{
+  if (!tree)
+    return;
+  treedelta(tree->llink, depth, delta);
+  treedelta(tree->rlink, depth, delta);
+  if (depth < delta[tree->label])
+    delta[tree->label] = depth;
+}
+
+/* Return true if A has every label in B. */
+static int
+hasevery (register struct tree const *a, register struct tree const *b)
+{
+  if (!b)
+    return 1;
+  if (!hasevery(a, b->llink))
+    return 0;
+  if (!hasevery(a, b->rlink))
+    return 0;
+  while (a && b->label != a->label)
+    if (b->label < a->label)
+      a = a->llink;
+    else
+      a = a->rlink;
+  return !!a;
+}
+
+/* Compute a vector, indexed by character code, of the trie nodes
+   referenced from the given tree. */
+static void
+treenext (struct tree const *tree, struct trie *next[])
+{
+  if (!tree)
+    return;
+  treenext(tree->llink, next);
+  treenext(tree->rlink, next);
+  next[tree->label] = tree->trie;
+}
+
+/* Compute the shift for each trie node, as well as the delta
+   table and next cache for the given keyword set. */
+const char *
+kwsprep (kwset_t kws)
+{
+  register struct kwset *kwset;
+  register int i;
+  register struct trie *curr;
+  register char const *trans;
+  unsigned char delta[NCHAR];
+
+  kwset = (struct kwset *) kws;
+
+  /* Initial values for the delta table; will be changed later.  The
+     delta entry for a given character is the smallest depth of any
+     node at which an outgoing edge is labeled by that character. */
+  memset(delta, kwset->mind < UCHAR_MAX ? kwset->mind : UCHAR_MAX, NCHAR);
+
+  /* Check if we can use the simple boyer-moore algorithm, instead
+     of the hairy commentz-walter algorithm. */
+  if (kwset->words == 1 && kwset->trans == NULL)
+    {
+      char c;
+
+      /* Looking for just one string.  Extract it from the trie. */
+      kwset->target = obstack_alloc(&kwset->obstack, kwset->mind);
+      if (!kwset->target)
+	return _("memory exhausted");
+      for (i = kwset->mind - 1, curr = kwset->trie; i >= 0; --i)
+	{
+	  kwset->target[i] = curr->links->label;
+	  curr = curr->links->trie;
+	}
+      /* Build the Boyer Moore delta.  Boy that's easy compared to CW. */
+      for (i = 0; i < kwset->mind; ++i)
+	delta[U(kwset->target[i])] = kwset->mind - (i + 1);
+      /* Find the minimal delta2 shift that we might make after
+	 a backwards match has failed. */
+      c = kwset->target[kwset->mind - 1];
+      for (i = kwset->mind - 2; i >= 0; --i)
+	if (kwset->target[i] == c)
+	  break;
+      kwset->mind2 = kwset->mind - (i + 1);
+    }
+  else
+    {
+      register struct trie *fail;
+      struct trie *last, *next[NCHAR];
+
+      /* Traverse the nodes of the trie in level order, simultaneously
+	 computing the delta table, failure function, and shift function. */
+      for (curr = last = kwset->trie; curr; curr = curr->next)
+	{
+	  /* Enqueue the immediate descendents in the level order queue. */
+	  enqueue(curr->links, &last);
+
+	  curr->shift = kwset->mind;
+	  curr->maxshift = kwset->mind;
+
+	  /* Update the delta table for the descendents of this node. */
+	  treedelta(curr->links, curr->depth, delta);
+
+	  /* Compute the failure function for the decendents of this node. */
+	  treefails(curr->links, curr->fail, kwset->trie);
+
+	  /* Update the shifts at each node in the current node's chain
+	     of fails back to the root. */
+	  for (fail = curr->fail; fail; fail = fail->fail)
+	    {
+	      /* If the current node has some outgoing edge that the fail
+		 doesn't, then the shift at the fail should be no larger
+		 than the difference of their depths. */
+	      if (!hasevery(fail->links, curr->links))
+		if (curr->depth - fail->depth < fail->shift)
+		  fail->shift = curr->depth - fail->depth;
+
+	      /* If the current node is accepting then the shift at the
+		 fail and its descendents should be no larger than the
+		 difference of their depths. */
+	      if (curr->accepting && fail->maxshift > curr->depth - fail->depth)
+		fail->maxshift = curr->depth - fail->depth;
+	    }
+	}
+
+      /* Traverse the trie in level order again, fixing up all nodes whose
+	 shift exceeds their inherited maxshift. */
+      for (curr = kwset->trie->next; curr; curr = curr->next)
+	{
+	  if (curr->maxshift > curr->parent->maxshift)
+	    curr->maxshift = curr->parent->maxshift;
+	  if (curr->shift > curr->maxshift)
+	    curr->shift = curr->maxshift;
+	}
+
+      /* Create a vector, indexed by character code, of the outgoing links
+	 from the root node. */
+      for (i = 0; i < NCHAR; ++i)
+	next[i] = NULL;
+      treenext(kwset->trie->links, next);
+
+      if ((trans = kwset->trans) != NULL)
+	for (i = 0; i < NCHAR; ++i)
+	  kwset->next[i] = next[U(trans[i])];
+      else
+	memcpy(kwset->next, next, NCHAR * sizeof(struct trie *));
+    }
+
+  /* Fix things up for any translation table. */
+  if ((trans = kwset->trans) != NULL)
+    for (i = 0; i < NCHAR; ++i)
+      kwset->delta[i] = delta[U(trans[i])];
+  else
+    memcpy(kwset->delta, delta, NCHAR);
+
+  return NULL;
+}
+
+/* Fast boyer-moore search. */
+static size_t
+bmexec (kwset_t kws, char const *text, size_t size)
+{
+  struct kwset const *kwset;
+  register unsigned char const *d1;
+  register char const *ep, *sp, *tp;
+  register int d, gc, i, len, md2;
+
+  kwset = (struct kwset const *) kws;
+  len = kwset->mind;
+
+  if (len == 0)
+    return 0;
+  if (len > size)
+    return -1;
+  if (len == 1)
+    {
+      tp = memchr (text, kwset->target[0], size);
+      return tp ? tp - text : -1;
+    }
+
+  d1 = kwset->delta;
+  sp = kwset->target + len;
+  gc = U(sp[-2]);
+  md2 = kwset->mind2;
+  tp = text + len;
+
+  /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
+  if (size > 12 * len)
+    /* 11 is not a bug, the initial offset happens only once. */
+    for (ep = text + size - 11 * len;;)
+      {
+	while (tp <= ep)
+	  {
+	    d = d1[U(tp[-1])], tp += d;
+	    d = d1[U(tp[-1])], tp += d;
+	    if (d == 0)
+	      goto found;
+	    d = d1[U(tp[-1])], tp += d;
+	    d = d1[U(tp[-1])], tp += d;
+	    d = d1[U(tp[-1])], tp += d;
+	    if (d == 0)
+	      goto found;
+	    d = d1[U(tp[-1])], tp += d;
+	    d = d1[U(tp[-1])], tp += d;
+	    d = d1[U(tp[-1])], tp += d;
+	    if (d == 0)
+	      goto found;
+	    d = d1[U(tp[-1])], tp += d;
+	    d = d1[U(tp[-1])], tp += d;
+	  }
+	break;
+      found:
+	if (U(tp[-2]) == gc)
+	  {
+	    for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
+	      ;
+	    if (i > len)
+	      return tp - len - text;
+	  }
+	tp += md2;
+      }
+
+  /* Now we have only a few characters left to search.  We
+     carefully avoid ever producing an out-of-bounds pointer. */
+  ep = text + size;
+  d = d1[U(tp[-1])];
+  while (d <= ep - tp)
+    {
+      d = d1[U((tp += d)[-1])];
+      if (d != 0)
+	continue;
+      if (U(tp[-2]) == gc)
+	{
+	  for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
+	    ;
+	  if (i > len)
+	    return tp - len - text;
+	}
+      d = md2;
+    }
+
+  return -1;
+}
+
+/* Hairy multiple string search. */
+static size_t
+cwexec (kwset_t kws, char const *text, size_t len, struct kwsmatch *kwsmatch)
+{
+  struct kwset const *kwset;
+  struct trie * const *next;
+  struct trie const *trie;
+  struct trie const *accept;
+  char const *beg, *lim, *mch, *lmch;
+  register unsigned char c;
+  register unsigned char const *delta;
+  register int d;
+  register char const *end, *qlim;
+  register struct tree const *tree;
+  register char const *trans;
+
+#ifdef lint
+  accept = NULL;
+#endif
+
+  /* Initialize register copies and look for easy ways out. */
+  kwset = (struct kwset *) kws;
+  if (len < kwset->mind)
+    return -1;
+  next = kwset->next;
+  delta = kwset->delta;
+  trans = kwset->trans;
+  lim = text + len;
+  end = text;
+  if ((d = kwset->mind) != 0)
+    mch = NULL;
+  else
+    {
+      mch = text, accept = kwset->trie;
+      goto match;
+    }
+
+  if (len >= 4 * kwset->mind)
+    qlim = lim - 4 * kwset->mind;
+  else
+    qlim = NULL;
+
+  while (lim - end >= d)
+    {
+      if (qlim && end <= qlim)
+	{
+	  end += d - 1;
+	  while ((d = delta[c = *end]) && end < qlim)
+	    {
+	      end += d;
+	      end += delta[U(*end)];
+	      end += delta[U(*end)];
+	    }
+	  ++end;
+	}
+      else
+	d = delta[c = (end += d)[-1]];
+      if (d)
+	continue;
+      beg = end - 1;
+      trie = next[c];
+      if (trie->accepting)
+	{
+	  mch = beg;
+	  accept = trie;
+	}
+      d = trie->shift;
+      while (beg > text)
+	{
+	  c = trans ? trans[U(*--beg)] : *--beg;
+	  tree = trie->links;
+	  while (tree && c != tree->label)
+	    if (c < tree->label)
+	      tree = tree->llink;
+	    else
+	      tree = tree->rlink;
+	  if (tree)
+	    {
+	      trie = tree->trie;
+	      if (trie->accepting)
+		{
+		  mch = beg;
+		  accept = trie;
+		}
+	    }
+	  else
+	    break;
+	  d = trie->shift;
+	}
+      if (mch)
+	goto match;
+    }
+  return -1;
+
+ match:
+  /* Given a known match, find the longest possible match anchored
+     at or before its starting point.  This is nearly a verbatim
+     copy of the preceding main search loops. */
+  if (lim - mch > kwset->maxd)
+    lim = mch + kwset->maxd;
+  lmch = 0;
+  d = 1;
+  while (lim - end >= d)
+    {
+      if ((d = delta[c = (end += d)[-1]]) != 0)
+	continue;
+      beg = end - 1;
+      if (!(trie = next[c]))
+	{
+	  d = 1;
+	  continue;
+	}
+      if (trie->accepting && beg <= mch)
+	{
+	  lmch = beg;
+	  accept = trie;
+	}
+      d = trie->shift;
+      while (beg > text)
+	{
+	  c = trans ? trans[U(*--beg)] : *--beg;
+	  tree = trie->links;
+	  while (tree && c != tree->label)
+	    if (c < tree->label)
+	      tree = tree->llink;
+	    else
+	      tree = tree->rlink;
+	  if (tree)
+	    {
+	      trie = tree->trie;
+	      if (trie->accepting && beg <= mch)
+		{
+		  lmch = beg;
+		  accept = trie;
+		}
+	    }
+	  else
+	    break;
+	  d = trie->shift;
+	}
+      if (lmch)
+	{
+	  mch = lmch;
+	  goto match;
+	}
+      if (!d)
+	d = 1;
+    }
+
+  if (kwsmatch)
+    {
+      kwsmatch->index = accept->accepting / 2;
+      kwsmatch->offset[0] = mch - text;
+      kwsmatch->size[0] = accept->depth;
+    }
+  return mch - text;
+}
+
+/* Search through the given text for a match of any member of the
+   given keyword set.  Return a pointer to the first character of
+   the matching substring, or NULL if no match is found.  If FOUNDLEN
+   is non-NULL store in the referenced location the length of the
+   matching substring.  Similarly, if FOUNDIDX is non-NULL, store
+   in the referenced location the index number of the particular
+   keyword matched. */
+size_t
+kwsexec (kwset_t kws, char const *text, size_t size,
+	 struct kwsmatch *kwsmatch)
+{
+  struct kwset const *kwset = (struct kwset *) kws;
+  if (kwset->words == 1 && kwset->trans == NULL)
+    {
+      size_t ret = bmexec (kws, text, size);
+      if (kwsmatch != NULL && ret != (size_t) -1)
+	{
+	  kwsmatch->index = 0;
+	  kwsmatch->offset[0] = ret;
+	  kwsmatch->size[0] = kwset->mind;
+	}
+      return ret;
+    }
+  else
+    return cwexec(kws, text, size, kwsmatch);
+}
+
+/* Free the components of the given keyword set. */
+void
+kwsfree (kwset_t kws)
+{
+  struct kwset *kwset;
+
+  kwset = (struct kwset *) kws;
+  obstack_free(&kwset->obstack, NULL);
+  free(kws);
+}
diff --git a/kwset.h b/kwset.h
new file mode 100644
index 0000000..e8dec04
--- /dev/null
+++ b/kwset.h
@@ -0,0 +1,57 @@
+/* kwset.h - header declaring the keyword set library.
+   Copyright (C) 1989, 1998, 2005, 2007, 2009 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+   02110-1301, USA.  */
+
+/* Written August 1989 by Mike Haertel.
+   The author may be reached (Email) at the address mike@ai.mit.edu,
+   or (US mail) as Mike Haertel c/o Free Software Foundation. */
+
+struct kwsmatch
+{
+  int index;			/* Index number of matching keyword. */
+  size_t offset[1];		/* Offset of each submatch. */
+  size_t size[1];		/* Length of each submatch. */
+};
+
+typedef ptr_t kwset_t;
+
+/* Return an opaque pointer to a newly allocated keyword set, or NULL
+   if enough memory cannot be obtained.  The argument if non-NULL
+   specifies a table of character translations to be applied to all
+   pattern and search text. */
+extern kwset_t kwsalloc PARAMS((char const *));
+
+/* Incrementally extend the keyword set to include the given string.
+   Return NULL for success, or an error message.  Remember an index
+   number for each keyword included in the set. */
+extern const char *kwsincr PARAMS((kwset_t, char const *, size_t));
+
+/* When the keyword set has been completely built, prepare it for
+   use.  Return NULL for success, or an error message. */
+extern const char *kwsprep PARAMS((kwset_t));
+
+/* Search through the given buffer for a member of the keyword set.
+   Return a pointer to the leftmost longest match found, or NULL if
+   no match is found.  If foundlen is non-NULL, store the length of
+   the matching substring in the integer it points to.  Similarly,
+   if foundindex is non-NULL, store the index of the particular
+   keyword found therein. */
+extern size_t kwsexec PARAMS((kwset_t, char const *, size_t, struct kwsmatch *));
+
+/* Deallocate the given keyword set and all its associated storage. */
+extern void kwsfree PARAMS((kwset_t));
+

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/5] Adapt the kwset code to Git
       [not found] <20100213141558.22851.13660.stgit@fredrik-laptop>
  2010-02-13 14:20 ` [PATCH 1/5] Add obstack.[ch] from EGLIBC 2.10 Fredrik Kuivinen
  2010-02-13 14:20 ` [PATCH 2/5] Add string search routines from GNU grep Fredrik Kuivinen
@ 2010-02-13 14:20 ` Fredrik Kuivinen
  2010-02-13 14:21 ` [PATCH 4/5] Use kwset in pickaxe Fredrik Kuivinen
  2010-02-13 14:21 ` [PATCH 5/5] Use kwset in grep Fredrik Kuivinen
  4 siblings, 0 replies; 11+ messages in thread
From: Fredrik Kuivinen @ 2010-02-13 14:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

Signed-off-by: Fredrik Kuivinen <frekui@gmail.com>
---

 kwset.c |   24 ++++++++++--------------
 kwset.h |   17 +++++++++++------
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/kwset.c b/kwset.c
index 0957f66..2853859 100644
--- a/kwset.c
+++ b/kwset.c
@@ -1,3 +1,7 @@
+/* This file has been copied from GNU grep 2.5.4. A few small changes
+ * have been made to adapt the code to Git.
+ */
+
 /* kwset.c - search for any of a set of keywords.
    Copyright (C) 1989, 1998, 2000, 2005, 2007, 2009
    Free Software Foundation, Inc.
@@ -29,22 +33,14 @@
    String Matching:  An Aid to Bibliographic Search," CACM June 1975,
    Vol. 18, No. 6, which describes the failure function used below. */
 
-#ifdef HAVE_CONFIG_H
-# include <config.h>
-#endif
+#include "cache.h"
+
 #include <sys/types.h>
-#include "system.h"
 #include "kwset.h"
 #include "obstack.h"
 
-#ifdef GREP
-# include "xalloc.h"
-# undef malloc
-# define malloc xmalloc
-#endif
-
 #define NCHAR (UCHAR_MAX + 1)
-#define obstack_chunk_alloc malloc
+#define obstack_chunk_alloc xmalloc
 #define obstack_chunk_free free
 
 #define U(c) ((unsigned char) (c))
@@ -175,7 +171,7 @@ kwsincr (kwset_t kws, char const *text, size_t len)
 	  link = (struct tree *) obstack_alloc(&kwset->obstack,
 					       sizeof (struct tree));
 	  if (!link)
-	    return _("memory exhausted");
+	    return "memory exhausted";
 	  link->llink = NULL;
 	  link->rlink = NULL;
 	  link->trie = (struct trie *) obstack_alloc(&kwset->obstack,
@@ -183,7 +179,7 @@ kwsincr (kwset_t kws, char const *text, size_t len)
 	  if (!link->trie)
 	    {
 	      obstack_free(&kwset->obstack, link);
-	      return _("memory exhausted");
+	      return "memory exhausted";
 	    }
 	  link->trie->accepting = 0;
 	  link->trie->links = NULL;
@@ -406,7 +402,7 @@ kwsprep (kwset_t kws)
       /* Looking for just one string.  Extract it from the trie. */
       kwset->target = obstack_alloc(&kwset->obstack, kwset->mind);
       if (!kwset->target)
-	return _("memory exhausted");
+	return "memory exhausted";
       for (i = kwset->mind - 1, curr = kwset->trie; i >= 0; --i)
 	{
 	  kwset->target[i] = curr->links->label;
diff --git a/kwset.h b/kwset.h
index e8dec04..48c2ff1 100644
--- a/kwset.h
+++ b/kwset.h
@@ -1,3 +1,7 @@
+/* This file has been copied from GNU grep 2.5.4. A few small changes
+ * have been made to adapt the code to Git.
+ */
+
 /* kwset.h - header declaring the keyword set library.
    Copyright (C) 1989, 1998, 2005, 2007, 2009 Free Software Foundation, Inc.
 
@@ -27,22 +31,23 @@ struct kwsmatch
   size_t size[1];		/* Length of each submatch. */
 };
 
-typedef ptr_t kwset_t;
+struct kwset_t;
+typedef struct kwset_t* kwset_t;
 
 /* Return an opaque pointer to a newly allocated keyword set, or NULL
    if enough memory cannot be obtained.  The argument if non-NULL
    specifies a table of character translations to be applied to all
    pattern and search text. */
-extern kwset_t kwsalloc PARAMS((char const *));
+extern kwset_t kwsalloc(char const *);
 
 /* Incrementally extend the keyword set to include the given string.
    Return NULL for success, or an error message.  Remember an index
    number for each keyword included in the set. */
-extern const char *kwsincr PARAMS((kwset_t, char const *, size_t));
+extern const char *kwsincr(kwset_t, char const *, size_t);
 
 /* When the keyword set has been completely built, prepare it for
    use.  Return NULL for success, or an error message. */
-extern const char *kwsprep PARAMS((kwset_t));
+extern const char *kwsprep(kwset_t);
 
 /* Search through the given buffer for a member of the keyword set.
    Return a pointer to the leftmost longest match found, or NULL if
@@ -50,8 +55,8 @@ extern const char *kwsprep PARAMS((kwset_t));
    the matching substring in the integer it points to.  Similarly,
    if foundindex is non-NULL, store the index of the particular
    keyword found therein. */
-extern size_t kwsexec PARAMS((kwset_t, char const *, size_t, struct kwsmatch *));
+extern size_t kwsexec(kwset_t, char const *, size_t, struct kwsmatch *);
 
 /* Deallocate the given keyword set and all its associated storage. */
-extern void kwsfree PARAMS((kwset_t));
+extern void kwsfree(kwset_t);
 

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 4/5] Use kwset in pickaxe
       [not found] <20100213141558.22851.13660.stgit@fredrik-laptop>
                   ` (2 preceding siblings ...)
  2010-02-13 14:20 ` [PATCH 3/5] Adapt the kwset code to Git Fredrik Kuivinen
@ 2010-02-13 14:21 ` Fredrik Kuivinen
  2010-02-13 14:21 ` [PATCH 5/5] Use kwset in grep Fredrik Kuivinen
  4 siblings, 0 replies; 11+ messages in thread
From: Fredrik Kuivinen @ 2010-02-13 14:21 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

Best of five runs in the git repository:

before:

$ time git log -Sqwerty

real	0m32.517s
user	0m32.134s
sys	0m0.388s

after:

$ time git log -Sqwerty

real	0m24.299s
user	0m23.645s
sys	0m0.652s

So the kwset code is about 25% faster.

Signed-off-by: Fredrik Kuivinen <frekui@gmail.com>
---

 Makefile           |    2 ++
 diffcore-pickaxe.c |   34 +++++++++++++++++++++++-----------
 2 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/Makefile b/Makefile
index 7bf2fca..5687b99 100644
--- a/Makefile
+++ b/Makefile
@@ -479,6 +479,7 @@ LIB_H += unpack-trees.h
 LIB_H += userdiff.h
 LIB_H += utf8.h
 LIB_H += wt-status.h
+LIB_H += kwset.h
 
 LIB_OBJS += abspath.o
 LIB_OBJS += advice.o
@@ -591,6 +592,7 @@ LIB_OBJS += write_or_die.o
 LIB_OBJS += ws.o
 LIB_OBJS += wt-status.o
 LIB_OBJS += xdiff-interface.o
+LIB_OBJS += kwset.o
 
 BUILTIN_OBJS += builtin-add.o
 BUILTIN_OBJS += builtin-annotate.o
diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index d0ef839..6563a4e 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -4,10 +4,11 @@
 #include "cache.h"
 #include "diff.h"
 #include "diffcore.h"
+#include "kwset.h"
 
 static unsigned int contains(struct diff_filespec *one,
 			     const char *needle, unsigned long len,
-			     regex_t *regexp)
+			     regex_t *regexp, kwset_t kws)
 {
 	unsigned int cnt;
 	unsigned long sz;
@@ -36,9 +37,12 @@ static unsigned int contains(struct diff_filespec *one,
 
 	} else { /* Classic exact string match */
 		while (sz) {
-			const char *found = memmem(data, sz, needle, len);
-			if (!found)
+			size_t offset = kwsexec(kws, data, sz, NULL);
+			const char *found;
+			if (offset == -1)
 				break;
+			else
+				found = data + offset;
 			sz -= found - data + len;
 			data = found + len;
 			cnt++;
@@ -54,6 +58,7 @@ void diffcore_pickaxe(const char *needle, int opts)
 	unsigned long len = strlen(needle);
 	int i, has_changes;
 	regex_t regex, *regexp = NULL;
+	kwset_t kws = NULL;
 	struct diff_queue_struct outq;
 	outq.queue = NULL;
 	outq.nr = outq.alloc = 0;
@@ -69,6 +74,10 @@ void diffcore_pickaxe(const char *needle, int opts)
 			die("invalid pickaxe regex: %s", errbuf);
 		}
 		regexp = &regex;
+	} else {
+		kws = kwsalloc(NULL);
+		kwsincr(kws, needle, len);
+		kwsprep(kws);
 	}
 
 	if (opts & DIFF_PICKAXE_ALL) {
@@ -79,16 +88,16 @@ void diffcore_pickaxe(const char *needle, int opts)
 				if (!DIFF_FILE_VALID(p->two))
 					continue; /* ignore unmerged */
 				/* created */
-				if (contains(p->two, needle, len, regexp))
+				if (contains(p->two, needle, len, regexp, kws))
 					has_changes++;
 			}
 			else if (!DIFF_FILE_VALID(p->two)) {
-				if (contains(p->one, needle, len, regexp))
+				if (contains(p->one, needle, len, regexp, kws))
 					has_changes++;
 			}
 			else if (!diff_unmodified_pair(p) &&
-				 contains(p->one, needle, len, regexp) !=
-				 contains(p->two, needle, len, regexp))
+				contains(p->one, needle, len, regexp, kws) !=
+				contains(p->two, needle, len, regexp, kws))
 				has_changes++;
 		}
 		if (has_changes)
@@ -111,16 +120,17 @@ void diffcore_pickaxe(const char *needle, int opts)
 				if (!DIFF_FILE_VALID(p->two))
 					; /* ignore unmerged */
 				/* created */
-				else if (contains(p->two, needle, len, regexp))
+				else if (contains(p->two, needle, len, regexp,
+						  kws))
 					has_changes = 1;
 			}
 			else if (!DIFF_FILE_VALID(p->two)) {
-				if (contains(p->one, needle, len, regexp))
+				if (contains(p->one, needle, len, regexp, kws))
 					has_changes = 1;
 			}
 			else if (!diff_unmodified_pair(p) &&
-				 contains(p->one, needle, len, regexp) !=
-				 contains(p->two, needle, len, regexp))
+				contains(p->one, needle, len, regexp, kws) !=
+				contains(p->two, needle, len, regexp, kws))
 				has_changes = 1;
 
 			if (has_changes)
@@ -131,6 +141,8 @@ void diffcore_pickaxe(const char *needle, int opts)
 
 	if (opts & DIFF_PICKAXE_REGEX) {
 		regfree(&regex);
+	} else {
+		kwsfree(kws);
 	}
 
 	free(q->queue);

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 5/5] Use kwset in grep
       [not found] <20100213141558.22851.13660.stgit@fredrik-laptop>
                   ` (3 preceding siblings ...)
  2010-02-13 14:21 ` [PATCH 4/5] Use kwset in pickaxe Fredrik Kuivinen
@ 2010-02-13 14:21 ` Fredrik Kuivinen
  2010-02-13 15:58   ` Paolo Bonzini
  2010-02-13 17:38   ` Paolo Bonzini
  4 siblings, 2 replies; 11+ messages in thread
From: Fredrik Kuivinen @ 2010-02-13 14:21 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

Best of five runs in the linux repository:

before:

$ time git grep qwerty
drivers/char/keyboard.c:        "qwertyuiop[]\r\000as"                          /* 0x10 - 0x1f */

real	0m1.065s
user	0m1.400s
sys	0m0.536s


after:

$ time git grep qwerty
drivers/char/keyboard.c:        "qwertyuiop[]\r\000as"                          /* 0x10 - 0x1f */

real	0m0.621s
user	0m0.560s
sys	0m0.564s

So we gain about 40% by using the kwset code.

Signed-off-by: Fredrik Kuivinen <frekui@gmail.com>
---

 grep.c |   61 +++++++++++++++++++++++++++++++++++++++++--------------------
 grep.h |    2 ++
 2 files changed, 43 insertions(+), 20 deletions(-)

diff --git a/grep.c b/grep.c
index a0864f1..deb5f71 100644
--- a/grep.c
+++ b/grep.c
@@ -51,16 +51,38 @@ struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
 	return ret;
 }
 
+static int is_fixed(const char *s)
+{
+	while (*s && !is_regex_special(*s))
+		s++;
+	return !*s;
+}
+
 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 {
 	int err;
 
 	p->word_regexp = opt->word_regexp;
 	p->ignore_case = opt->ignore_case;
-	p->fixed = opt->fixed;
-
-	if (p->fixed)
+	p->fixed = 0;
+
+	if (opt->fixed || is_fixed(p->pattern))
+		p->fixed = 1;
+
+	if (p->fixed) {
+		if (opt->regflags & REG_ICASE || p->ignore_case) {
+			static char trans[256];
+			int i;
+			for (i = 0; i < 256; i++)
+				trans[i] = tolower(i);
+			p->kws = kwsalloc(trans);
+		} else {
+			p->kws = kwsalloc(NULL);
+		}
+		kwsincr(p->kws, p->pattern, strlen(p->pattern));
+		kwsprep(p->kws);
 		return;
+	}
 
 	err = regcomp(&p->regexp, p->pattern, opt->regflags);
 	if (err) {
@@ -241,7 +263,10 @@ void free_grep_patterns(struct grep_opt *opt)
 		case GREP_PATTERN: /* atom */
 		case GREP_PATTERN_HEAD:
 		case GREP_PATTERN_BODY:
-			regfree(&p->regexp);
+			if (p->fixed)
+				kwsfree(p->kws);
+			else
+				regfree(&p->regexp);
 			break;
 		default:
 			break;
@@ -277,21 +302,17 @@ static void show_name(struct grep_opt *opt, const char *name)
 }
 
 
-static int fixmatch(const char *pattern, char *line, int ignore_case, regmatch_t *match)
+static int fixmatch(const kwset_t kws, char *line, size_t sz,
+		    regmatch_t *match)
 {
-	char *hit;
-	if (ignore_case)
-		hit = strcasestr(line, pattern);
-	else
-		hit = strstr(line, pattern);
-
-	if (!hit) {
+	struct kwsmatch kwsm;
+	size_t offset = kwsexec(kws, line, sz, &kwsm);
+	if (offset == -1) {
 		match->rm_so = match->rm_eo = -1;
 		return REG_NOMATCH;
-	}
-	else {
-		match->rm_so = hit - line;
-		match->rm_eo = match->rm_so + strlen(pattern);
+	} else {
+		match->rm_so = offset;
+		match->rm_eo = match->rm_so + kwsm.size[0];
 		return 0;
 	}
 }
@@ -346,7 +367,7 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
 
  again:
 	if (p->fixed)
-		hit = !fixmatch(p->pattern, bol, p->ignore_case, pmatch);
+		hit = !fixmatch(p->kws, bol, eol-bol, pmatch);
 	else
 		hit = !regexec(&p->regexp, bol, 1, pmatch, eflags);
 
@@ -670,9 +691,9 @@ static int look_ahead(struct grep_opt *opt,
 		int hit;
 		regmatch_t m;
 
-		if (p->fixed)
-			hit = !fixmatch(p->pattern, bol, p->ignore_case, &m);
-		else {
+		if (p->fixed) {
+			hit = !fixmatch(p->kws, bol, *left_p, &m);
+		} else {
 #ifdef REG_STARTEND
 			m.rm_so = 0;
 			m.rm_eo = *left_p;
diff --git a/grep.h b/grep.h
index 9703087..3c79154 100644
--- a/grep.h
+++ b/grep.h
@@ -1,6 +1,7 @@
 #ifndef GREP_H
 #define GREP_H
 #include "color.h"
+#include "kwset.h"
 
 enum grep_pat_token {
 	GREP_PATTERN,
@@ -31,6 +32,7 @@ struct grep_pat {
 	const char *pattern;
 	enum grep_header_field field;
 	regex_t regexp;
+	kwset_t kws;
 	unsigned fixed:1;
 	unsigned ignore_case:1;
 	unsigned word_regexp:1;

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/5] Add string search routines from GNU grep
  2010-02-13 14:20 ` [PATCH 2/5] Add string search routines from GNU grep Fredrik Kuivinen
@ 2010-02-13 15:49   ` Dmitry Potapov
  2010-02-13 15:56   ` Paolo Bonzini
  1 sibling, 0 replies; 11+ messages in thread
From: Dmitry Potapov @ 2010-02-13 15:49 UTC (permalink / raw)
  To: Fredrik Kuivinen; +Cc: git, Junio C Hamano

On Sat, Feb 13, 2010 at 5:20 PM, Fredrik Kuivinen <frekui@gmail.com> wrote:
> @@ -0,0 +1,779 @@
> +/* kwset.c - search for any of a set of keywords.
> +   Copyright (C) 1989, 1998, 2000, 2005, 2007, 2009
> +   Free Software Foundation, Inc.
> +
> +   This program is free software; you can redistribute it and/or modify
> +   it under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3, or (at your option)
> +   any later version.

git uses GPLv2, and GPLv3 is incompatible with it.

Probably, you can try to find an older version that was licensed under GPL v2.
I've googled for kwset and found this:
http://www.opensource.apple.com/source/grep/grep-14/grep/src/kwset.c

Dmitry

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/5] Add string search routines from GNU grep
  2010-02-13 14:20 ` [PATCH 2/5] Add string search routines from GNU grep Fredrik Kuivinen
  2010-02-13 15:49   ` Dmitry Potapov
@ 2010-02-13 15:56   ` Paolo Bonzini
  2010-02-14 16:52     ` Fredrik Kuivinen
  1 sibling, 1 reply; 11+ messages in thread
From: Paolo Bonzini @ 2010-02-13 15:56 UTC (permalink / raw)
  To: Fredrik Kuivinen; +Cc: git, Junio C Hamano


> +   This program is free software; you can redistribute it and/or modify
> +   it under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3, or (at your option)
> +   any later version.

You need to use the last GPLv2 version (commit e7ac713d^ in the GNU grep 
git repository).  It doesn't change anything except the copyright 
header, but let's do things the right way.

Paolo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 5/5] Use kwset in grep
  2010-02-13 14:21 ` [PATCH 5/5] Use kwset in grep Fredrik Kuivinen
@ 2010-02-13 15:58   ` Paolo Bonzini
  2010-02-13 17:38   ` Paolo Bonzini
  1 sibling, 0 replies; 11+ messages in thread
From: Paolo Bonzini @ 2010-02-13 15:58 UTC (permalink / raw)
  To: Fredrik Kuivinen; +Cc: git, Junio C Hamano

On 02/13/2010 03:21 PM, Fredrik Kuivinen wrote:
> Best of five runs in the linux repository:
>
> before:
>
> $ time git grep qwerty
> drivers/char/keyboard.c:        "qwertyuiop[]\r\000as"                          /* 0x10 - 0x1f */
>
> real	0m1.065s
> user	0m1.400s
> sys	0m0.536s
>
>
> after:
>
> $ time git grep qwerty
> drivers/char/keyboard.c:        "qwertyuiop[]\r\000as"                          /* 0x10 - 0x1f */
>
> real	0m0.621s
> user	0m0.560s
> sys	0m0.564s
>
> So we gain about 40% by using the kwset code.

I like it, thanks for doing this!

Paolo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 5/5] Use kwset in grep
  2010-02-13 14:21 ` [PATCH 5/5] Use kwset in grep Fredrik Kuivinen
  2010-02-13 15:58   ` Paolo Bonzini
@ 2010-02-13 17:38   ` Paolo Bonzini
  2010-02-14 16:51     ` Fredrik Kuivinen
  1 sibling, 1 reply; 11+ messages in thread
From: Paolo Bonzini @ 2010-02-13 17:38 UTC (permalink / raw)
  To: Fredrik Kuivinen; +Cc: git, Junio C Hamano

On 02/13/2010 03:21 PM, Fredrik Kuivinen wrote:
> Best of five runs in the linux repository:
>
> before:
>
> $ time git grep qwerty
> drivers/char/keyboard.c:        "qwertyuiop[]\r\000as"                          /* 0x10 - 0x1f */
>
> real	0m1.065s
> user	0m1.400s
> sys	0m0.536s
>
>
> after:
>
> $ time git grep qwerty
> drivers/char/keyboard.c:        "qwertyuiop[]\r\000as"                          /* 0x10 - 0x1f */
>
> real	0m0.621s
> user	0m0.560s
> sys	0m0.564s
>
> So we gain about 40% by using the kwset code.

Hmm, on a more accurate review for

     git grep -e foo -e bar

you're creating two kwsets, so a Boyer-Moore search be much 
simpler---the performance would be the same since that's what kwset 
degrades to for a single string, but you'd probably save around 600 
lines of code...

Paolo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 5/5] Use kwset in grep
  2010-02-13 17:38   ` Paolo Bonzini
@ 2010-02-14 16:51     ` Fredrik Kuivinen
  0 siblings, 0 replies; 11+ messages in thread
From: Fredrik Kuivinen @ 2010-02-14 16:51 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: git, Junio C Hamano

On Sat, Feb 13, 2010 at 18:38, Paolo Bonzini <bonzini@gnu.org> wrote:
> On 02/13/2010 03:21 PM, Fredrik Kuivinen wrote:
>>
>> Best of five runs in the linux repository:
>>
>> before:
>>
>> $ time git grep qwerty
>> drivers/char/keyboard.c:        "qwertyuiop[]\r\000as"
>>      /* 0x10 - 0x1f */
>>
>> real    0m1.065s
>> user    0m1.400s
>> sys     0m0.536s
>>
>>
>> after:
>>
>> $ time git grep qwerty
>> drivers/char/keyboard.c:        "qwertyuiop[]\r\000as"
>>      /* 0x10 - 0x1f */
>>
>> real    0m0.621s
>> user    0m0.560s
>> sys     0m0.564s
>>
>> So we gain about 40% by using the kwset code.
>
> Hmm, on a more accurate review for
>
>    git grep -e foo -e bar
>
> you're creating two kwsets, so a Boyer-Moore search be much simpler---the
> performance would be the same since that's what kwset degrades to for a
> single string, but you'd probably save around 600 lines of code...

Another approach is to just create a single kwset for this case as
well. I will try that in the next iteration.

- Fredrik

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/5] Add string search routines from GNU grep
  2010-02-13 15:56   ` Paolo Bonzini
@ 2010-02-14 16:52     ` Fredrik Kuivinen
  0 siblings, 0 replies; 11+ messages in thread
From: Fredrik Kuivinen @ 2010-02-14 16:52 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: git, Junio C Hamano, Dmitry Potapov

On Sat, Feb 13, 2010 at 16:56, Paolo Bonzini <bonzini@gnu.org> wrote:
>
>> +   This program is free software; you can redistribute it and/or modify
>> +   it under the terms of the GNU General Public License as published by
>> +   the Free Software Foundation; either version 3, or (at your option)
>> +   any later version.
>
> You need to use the last GPLv2 version (commit e7ac713d^ in the GNU grep git
> repository).  It doesn't change anything except the copyright header, but
> let's do things the right way.

Thanks. I will use the GPLv2 version in the next iteration.

- Fredrik

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2010-02-14 16:52 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20100213141558.22851.13660.stgit@fredrik-laptop>
2010-02-13 14:20 ` [PATCH 1/5] Add obstack.[ch] from EGLIBC 2.10 Fredrik Kuivinen
2010-02-13 14:20 ` [PATCH 2/5] Add string search routines from GNU grep Fredrik Kuivinen
2010-02-13 15:49   ` Dmitry Potapov
2010-02-13 15:56   ` Paolo Bonzini
2010-02-14 16:52     ` Fredrik Kuivinen
2010-02-13 14:20 ` [PATCH 3/5] Adapt the kwset code to Git Fredrik Kuivinen
2010-02-13 14:21 ` [PATCH 4/5] Use kwset in pickaxe Fredrik Kuivinen
2010-02-13 14:21 ` [PATCH 5/5] Use kwset in grep Fredrik Kuivinen
2010-02-13 15:58   ` Paolo Bonzini
2010-02-13 17:38   ` Paolo Bonzini
2010-02-14 16:51     ` Fredrik Kuivinen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).