From: Roel Kluin <roel.kluin@gmail.com>
To: git@vger.kernel.org
Subject: [PATCH] git-cget: prints elements of C code in the git repository
Date: Tue, 24 Mar 2009 11:09:29 +0100 [thread overview]
Message-ID: <49C8B159.2040600@gmail.com> (raw)
Maybe something like this is useful?
------------------------------>8-------------8<---------------------------------
Add git-cget.sh: prints elements of C code in the git repository.
Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
---
diff --git a/Makefile b/Makefile
index 1087884..c21ba91 100644
--- a/Makefile
+++ b/Makefile
@@ -260,6 +260,7 @@ BASIC_LDFLAGS =
SCRIPT_SH += git-am.sh
SCRIPT_SH += git-bisect.sh
SCRIPT_SH += git-filter-branch.sh
+SCRIPT_SH += git-cget.sh
SCRIPT_SH += git-lost-found.sh
SCRIPT_SH += git-merge-octopus.sh
SCRIPT_SH += git-merge-one-file.sh
diff --git a/git-cget.sh b/git-cget.sh
new file mode 100755
index 0000000..08ea65f
--- /dev/null
+++ b/git-cget.sh
@@ -0,0 +1,409 @@
+#!/bin/bash
+# FIXME: make C++ style members
+
+int="[0-9]"
+hex="[a-f0-9]"
+hEx="[A-Fa-f0-9]"
+HEX="[A-F0-9]"
+upp="[A-Z]"
+up_="[A-Z_]"
+low="[a-z0-9]"
+lo_="[a-z0-9_]"
+alp="[A-Za-z]"
+al_="[A-Za-z_]"
+ALN="[A-Z0-9]"
+AN_="[A-Z0-9_]"
+aln="[A-Za-z0-9]"
+an_="[A-Za-z0-9_]"
+
+em='!' # because of bash banging
+
+D="$int*\.?$int+x?$hex*[uUlL]{0,3}[fF]?" # a number, float or hex
+# more strict and catches it (costs one backreference for (git )grep)
+SD="($int+[uUlLfF]?|$int+[uU]?[lL][lL]?|0x$hex+|0x$HEX+|$int+[lL][lL][uU]|$int*\.$int+[fF]?)"
+
+V="$al_+$an_*" # variable/function name (or definition)
+K="$up_+$AN_*" # definition (in capitals)
+
+# to catch variables that are members or arrays:
+W="[a-zA-Z0-9_>.-]*"
+SW="$V(\[[^][]*\]|\[[^][]*\[[^][]*\][^][]*\]|\.$V|->$V)*" # more strict, 1 backref
+
+s="[[:space:]]*"
+S="[[:space:]]+"
+
+# useful to ensure the end of a variable name:
+Q="[^[:alnum:]_]"
+Q2="[^[:alnum:]_>.]" # the '>' is tricky, it's an operator as well
+
+# match comments
+comm1="\/\*([^*]+|\**[^*/])*\*+\/" # 1 backref
+comm2="\/\/([^\n]+|[n\\]+)*" # 1 backref
+
+# match the end of the line, including comments:
+cendl="$s($comm1|$comm2|$s)*($|\n)" # 4 backrefs
+
+# strings and characters can contain things we want to match
+str="\"([^\\\"]+|\\\\.)*\"" # 1 backref
+ch1="'[^\\']'"
+ch2="'\\\\.[^']*'"
+ch="$ch1|$ch2"
+
+# when using grep [^\/] also cases excludes '\', so we correct for that
+# match something that is not comment, string or character (c-code): 3 backrefs
+ccode="([^\"'/]+|\/[^*\"'/]|\/?$comm1|\/?$ch1|\/?$ch2|\/?$str|\/?\\\\|\/?$comm2)*"
+
+# TODO: use backref cheaper versions
+nps="[^()]*"
+nstdps="(\($nps(\($nps(\($nps(\($nps(\($nps\)$nps)*\)$nps)*\)$nps)*\)$nps)*\)$nps)*"
+npz="$nps$nstdps"
+nnps="\($npz\)"
+
+ncs="[^}{]*"
+nstdcs="(\{$ncs(\{$ncs(\{$ncs(\{$ncs(\{$ncs\}$ncs)*\}$ncs)*\}$ncs)*\}$ncs)*\}$ncs)*"
+ncz="$ncs$nstdcs"
+nncs="\{$ncz\}"
+
+delimitstr="s/([][{}(|)+*?\\/.^])/\\\\\1/g"
+delimit()
+{
+ sed -r "$delimitstr"
+}
+
+# unmatch: match anything except the given.
+#
+# If given is a string, then it also matches longer
+# or shorter strings.
+#
+# otherwise it delimits characters and then creates a match
+#
+unmatch()
+{
+ local sedstr=
+ # TODO: unmatch numbers
+ # if [ -n "`echo "$1" | grep -E "^$SD$"`" ]; then
+ if [ -n "`echo "$1" | grep -E "^$V$"`" ]; then
+ sedstr=":loop
+ s/^($an_+)?($an_)(\|.*)?$/\1\|\1[^\2]\3/
+ t loop
+ s/^[|](.*)$/($al_$an_{${#1}}|\1)$an_*/"
+ fi
+ # FIXME: unmatch non alnum characters:
+ # with the below "\*" results in ([^\\]|\\[^*])*,
+ # but it should be ([^\\]|\\+[^*\\])*
+ #else
+ # local a="[^][{}(|)+*?\\/.^]"
+ # local b="[][{}(|)+*?\\/.^]"
+ # sedstr="$delimitstr
+ # :loop
+ # s/^(($a|[\\]$b)+)?(($a)|[\\]($b))(\|.*)?$/\1\|\1[^\4\5]\6/
+ # t loop
+ # s/^[|](.*)$/(\1)*/"
+ #fi
+ echo $1 | sed -r "$sedstr"
+}
+
+# excludes testing in strings, chars and comment
+excl_code()
+{
+ echo "([^$1\"'/$2]*|\/[^$1\"*'/$2]*|\/?$comm1|\/?$ch1|\/?$ch2|\/?$str|\/?$comm2)*"
+}
+
+# usage: nestc "(" ")" [number]
+# only works for nesting single chars. (TODO: multiple)
+nestc()
+{
+ local i;
+ [ $# -eq 1 ] && i=5 || i=$3;
+ # first and 2nd are flipped to enable matching
+ # square brackets "]["
+ local p="$(excl_code "$2$1" "$4")"
+ local ret="$p"
+ while [ $i -gt 0 ]; do
+ ret="${p}([$1]${ret}[$2]${p})*"
+ i=$(($i-1));
+ done
+ echo "$ret"
+}
+
+blank_it()
+{
+ echo "$1" | sed -r "
+ :a
+ s/([[:alnum:]])[[:space:]]+([[:alnum:]])/\1[[:space:]]+\2/g
+ s/[[:space:]]+/[[:space:]]*/g
+ $!{
+ N; ba
+ }"
+}
+
+# usage: grep_12 "struct list_head {" "include/linux/slub_def.h"
+# greps included as well, when not found
+grep_12()
+{
+ flist=
+ _grep_12 "$1" "$2"
+}
+
+_grep_12()
+{
+ grep -qE "$1" "$2";
+ if [ $? -eq 0 ]; then
+ echo "$2";
+ else
+ [ -n "$flist" ] && flist="$flist|"
+ flist="$flist$2"
+ local b="${2%\/*}"
+ for f in `sed -rn "/^$s#${s}include${s}[\"<]/{
+ $delimitstr
+ s/^$s#${s}include$s<([^>]+)>$cendl/include\/\1/p
+ s/^$s#${s}include$s\"([^\"]+)\"$cendl/${b//\//\/}\/\1/p
+ }" "$2" | grep -vE "($flist)" `; do
+ [ -f "$f" ] && _grep_12 "$1" "$f" && return 0;
+ b="${f%\/*}"
+ done
+ fi
+}
+
+get_matching_files()
+{
+ local files="${@:2}"
+ if [ -z "$files" ]; then
+ grep -lE "$1" $(git-ls-files "*.[ch]");
+ return;
+ fi
+ for f in $files; do
+ [ -f "$f" ] && echo -n "$(grep_12 "$1" "$f") "
+ [ -d "$f" ] && grep -lE "$1" $(git-ls-files "*.[ch]")
+ done
+}
+
+
+_sedit()
+{
+ sed -r -n "
+ :a
+ /$1/${em}b
+ /$2/${em}{
+ H; N; ba
+ }
+ $4
+ :b
+ /$3/${em}{
+ H; N; bb
+ }
+ $5
+ " "${@:6}"
+}
+
+sedit()
+{
+ local print2="p"
+ [ $(($1 & 4 )) -eq 4 ] && print2="="
+
+ local stages="` blank_it "$2@$3@$4" | tr "@" " "`"
+ local sed2str="/ \+@$/{N; s/@\n//}";
+ if [ $(($1 & 1 )) -eq 1 ]; then
+ _sedit $stages "#" "$print2" "${@:5}" | sed -r "$sed2str";
+ else
+ local print1=
+ [ $(($1 & 2 )) -eq 0 ] && print1="`echo -e "+@\n="`"
+ [ $1 -eq 4 ] && sed2str="/ \+@$/{N; N; s/\/\/ vi ([^+]*) \+@\n([0-9]*)\n([0-9]*)/\1:\2-\3/}"
+
+ for f in "${@:5}"; do
+ _sedit $stages "i// vi $f $print1" "$print2" "$f"
+ done | sed -r "$sed2str";
+ fi
+}
+
+get_func()
+{
+ # TODO: C++ member stuff
+ local fret="( $V( |\*|$comm1)*$S( |\*|$comm1)*)+"
+ local func_args="$(nestc "(" ")" 10 "+%/?#&|<>.^-")"
+ local func_body="$(nestc "{" "}" 10)"
+
+ local fl="${@:3}"
+ if [ -z "$fl" ]; then
+ local match="^($fret)? $2 ((\($func_args)+(\) ($comm1 )*((\{$func_body)+\}?)?)?)?$";
+ match="`blank_it "$match"`";
+ fl="`git-ls-files "*.[ch]"`"
+ fl="`grep -lE "$match" $fl`";
+ [ -z "$fl" ] && return;
+ fi
+
+ local stage1="^$fret ($2 ((\($func_args)+(\) ($comm1 )*((\{$func_body)+\}?)?)?)?)?$";
+ local stage2="^$fret $2 \($func_args\) \{";
+ local stage3="^$fret $2 \($func_args\) \{$func_body\}";
+ sedit $1 "$stage1" "$stage2" "$stage3" $fl
+}
+
+
+# get struct/enum/union
+get_elem()
+{
+ local sA="($S|$comm1)"
+ local sB="($V$S|$V \(\([^()]+\)\) |$comm1 )*"
+ local struct_body="$(nestc "{" "}" 10)"
+ local stage1=;
+ local stage2=;
+ local stage3=;
+
+ local match="^ (($sB$2)?$sA$sB)?$3($sA$sB)?((\{$struct_body)+\}?)?$cendl"
+ match="`blank_it "$match"`"
+ local fl="`get_matching_files "$match" ${@:4}`"
+ if [ -n "$fl" ]; then
+ stage1="^ ($V|$V \(\([^()]*\)\)|$sB$2($sA$sB($3($sA$sB)?((\{$struct_body)+\}?)?)?)?)$cendl"
+ stage2="^ $sB$2$sA$sB$3($sA$sB)? \{"
+ stage3="^ $sB$2$sA$sB$3($sA$sB)? \{$struct_body\}"
+ sedit $1 "$stage1" "$stage2" "$stage3" $fl
+ fi
+
+ match="^( (($sB$2)?($sA$sB| )\{)?($struct_body\})+)? $sB$3($sA$sB| );$cendl"
+ match="`blank_it "$match"`"
+ fl="`get_matching_files "$match" ${@:4}`"
+ if [ -n "$fl" ]; then
+ stage1="^ ($V|$V \(\([^()]*\)\)|$sB$2(($sA$sB| )((\{$struct_body)+(\} $sB($3($sA$sB| );)?)?)?)?)$cendl"
+ stage2="^ $sB$2($sA$sB| )\{"
+ stage3="^ $sB$2($sA$sB| )\{$struct_body\} $sB$3($sA$sB| );$cendl"
+ # there can be false positives due to stage1.
+ # here we remove them
+ sedit $1 "$stage1" "$stage2" "$stage3" $fl |
+ sed -r "/^\/\/ vi .* \+[0-9]*$/{
+ :a
+ $!{
+ d; b
+ }
+ N
+ /\/\/ vi .* \+[0-9]*\n\/\/ vi /${em}b
+ D; ba
+ }";
+
+ fi
+}
+
+get_def()
+{
+ local stage1="^ # define$S$2$Q"
+ local match="`blank_it "$stage1"`";
+ local stage2="^"
+ local stage3="[^\\]$"
+ sedit $1 "$stage1" "$stage2" "$stage3" `get_matching_files "$match" ${@:3}`
+}
+
+get_any()
+{
+ local defm=" # define$S$2$Q"
+ local sA="($S|$comm1)"
+ local sB="($V$S|$V \(\([^()]+\)\) |$comm1 )*"
+ local body="$(nestc "{" "}" 10)"
+
+ local elemsl="(($sB(struct|enum|union))?$sA$sB)?"
+ local elemsr1="$2($sA$sB)?((\{$body)+\}?)?"
+
+ local fret="( $V( |\*|$comm1)*$S( |\*|$comm1)*)+"
+ local func_args="$(nestc "(" ")" 10 "+%/?#^&|<>.-")"
+ local funcm="($fret)? $2 ((\($func_args)+(\) ($comm1 )*((\{$body)+\}?)?)?)?";
+
+ local match="`blank_it "^($defm| $elemsl$elemsr1$| (($elemsl\{)?($body\})+)? $sB$2$sA$sB;$|$funcm$)"`";
+ defm="`blank_it "^$defm"`"
+ funcm="`blank_it "^$funcm$"`"
+ git-grep -E "$match" | while read l; do
+ f="${l%:*}"
+ l="${l#*:}"
+ if [ "`echo "$l" | grep -E "^$defm"`" ]; then
+ get_def $1 "$2" $f
+ elif [ "`echo "$l" | grep -E "^$funcm$"`" ]; then
+ get_func $1 "$2" $f
+ else
+ for e in struct enum union; do
+ z="(($sB$e)?$sA$sB)?"
+ echo "$l" | grep -Eq "`blank_it "^ ( $z$elemsr1| (($z\{)?($body\})+)? $sB$2$sA$sB;)$cendl"`"
+ [ $? -eq 0 ] && get_elem $1 "$e" "$2" $f
+ done
+ fi
+ done
+}
+
+usage()
+{
+cat << EOF
+USAGE: git-get.sh [OPTION]... PATTERN [FILE]...
+print elements of C code with name PATTERN in the git repository,
+where PATTERN is a extended regular expression
+
+Options to specify which element(s) should be printed:
+ -f function
+ -s struct
+ -d definition
+ -m macro
+ -e enum
+ -u union
+
+Options to alter the output:
+ -b only print body
+ -n only print file and lineranges
+ -? print this help
+
+EOF
+}
+
+parseopts()
+{
+ local fl= # file list
+ local getflag=0
+ local printflag=0
+
+ while [ $# -ne 0 ]; do
+ while getopts "fdmseubn" optname; do
+ case "$optname" in
+ "f") getflag=$((getflag|1)) ;;
+ "d") getflag=$((getflag|2)) ;;
+ "m") getflag=$((getflag|4)) ;;
+ "s") getflag=$((getflag|8)) ;;
+ "e") getflag=$((getflag|16)) ;;
+ "u") getflag=$((getflag|32)) ;;
+ "b") printflag=3 ;;
+ "n") printflag=4 ;;
+ "?") usage; exit 0; ;;
+ esac
+ done
+ shift $((OPTIND-1))
+ [ $# -eq 0 ] && break;
+ OPTIND=0
+ if [ -f "$1" ]; then
+ fl="$fl $1";
+ else
+ name="$1";
+ fi
+ shift
+ done
+ [ -z "$name" ] && usage;
+ if [ $getflag -eq 0 ]; then
+ get_any $printflag "$name" $fl
+ return;
+ fi
+ [ $((getflag&1)) -eq 1 ] && get_func $printflag "$name" $fl
+ if [ $((getflag&6)) -ne 0 ]; then
+ if [ $((getflag&6)) -eq 2 ]; then
+ dom="($S.*)?"; # print only definitions
+ elif [ $((getflag&6)) -eq 4 ]; then
+ dom="\(.*"; # print only macros
+ else
+ dom="(\(.*|$S.*)?"; # catch definition or macro
+ fi
+ get_def $printflag "$name$dom" $fl
+ fi
+ if [ $((getflag&8)) -eq 8 ]; then
+ get_elem $printflag "struct" "$name" $fl
+ fi
+ if [ $((getflag&16)) -eq 16 ]; then
+ get_elem $printflag "enum" "$name" $fl
+ fi
+ if [ $((getflag&32)) -eq 32 ]; then
+ get_elem $printflag "union" "$name" $fl
+ fi
+}
+
+#main
+parseopts "$@"
next reply other threads:[~2009-03-24 10:11 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-03-24 10:09 Roel Kluin [this message]
2009-03-24 12:05 ` [PATCH] git-cget: prints elements of C code in the git repository Johannes Schindelin
2009-03-24 13:30 ` Roel Kluin
2009-03-24 13:52 ` Johannes Schindelin
2009-03-24 15:08 ` Ping Yin
2009-03-24 13:59 ` Jeff King
2009-03-24 14:04 ` Stefan Karpinski
2009-03-24 14:05 ` Johannes Schindelin
2009-03-24 14:38 ` Mike Ralphson
2009-03-24 14:56 ` Johannes Schindelin
2009-03-24 16:37 ` Mike Ralphson
2009-03-24 16:33 ` Steven Tweed
2009-03-25 4:23 ` Jeff King
2009-03-24 16:17 ` roel kluin
2009-03-27 9:22 ` roel kluin
2009-03-27 11:26 ` Johannes Schindelin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=49C8B159.2040600@gmail.com \
--to=roel.kluin@gmail.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.