* [PATCH] get_maintainer: Add a couple more --self-test options @ 2017-11-06 17:27 Joe Perches 2017-11-06 19:12 ` Tom Saeger 0 siblings, 1 reply; 3+ messages in thread From: Joe Perches @ 2017-11-06 17:27 UTC (permalink / raw) To: Andrew Morton; +Cc: Tom Saeger, linux-kernel Check for duplicate section headers and link reachability. Miscellanea: o Add --self-test=<foo> options (sections, patterns and scm for now) where the default without options is all tests o Rename check_maintainers_patterns to self_test o Rename self_test_pattern_info to self_test_info Signed-off-by: Joe Perches <joe@perches.com> cc: Tom Saeger <tom.saeger@oracle.com> --- scripts/get_maintainer.pl | 114 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 97 insertions(+), 17 deletions(-) diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index c68a5d1ba709..748bff0790a8 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -57,7 +57,7 @@ my $sections = 0; my $file_emails = 0; my $from_filename = 0; my $pattern_depth = 0; -my $self_test = 0; +my $self_test = undef; my $version = 0; my $help = 0; my $find_maintainer_files = 0; @@ -221,7 +221,7 @@ if (-f $ignore_file) { if ($#ARGV > 0) { foreach (@ARGV) { - if ($_ eq "-self-test" || $_ eq "--self-test") { + if ($_ =~ /^-{1,2}self-test(?:=|$)/) { die "$P: using --self-test does not allow any other option or argument\n"; } } @@ -263,7 +263,7 @@ if (!GetOptions( 'fe|file-emails!' => \$file_emails, 'f|file' => \$from_filename, 'find-maintainer-files' => \$find_maintainer_files, - 'self-test' => \$self_test, + 'self-test:s' => \$self_test, 'v|version' => \$version, 'h|help|usage' => \$help, )) { @@ -280,9 +280,9 @@ if ($version != 0) { exit 0; } -if ($self_test) { +if (defined $self_test) { read_all_maintainer_files(); - check_maintainers_patterns(); + self_test(); exit 0; } @@ -329,7 +329,7 @@ if (!top_of_kernel_tree($lk_path)) { my @typevalue = (); my %keyword_hash; my @mfiles = (); -my @self_test_pattern_info = (); +my @self_test_info = (); sub read_maintainer_file { my ($file) = @_; @@ -339,6 +339,7 @@ sub read_maintainer_file { my $i = 1; while (<$maint>) { my $line = $_; + chomp $line; if ($line =~ m/^([A-Z]):\s*(.*)/) { my $type = $1; @@ -353,17 +354,16 @@ sub read_maintainer_file { if ((-d $value)) { $value =~ s@([^/])$@$1/@; } - if ($self_test) { - push(@self_test_pattern_info, {file=>$file, line=>$line, linenr=>$i, pat=>$value}); - } } elsif ($type eq "K") { $keyword_hash{@typevalue} = $value; } push(@typevalue, "$type:$value"); } elsif (!(/^\s*$/ || /^\s*\#/)) { - $line =~ s/\n$//g; push(@typevalue, $line); } + if (defined $self_test) { + push(@self_test_info, {file=>$file, linenr=>$i, line=>$line}); + } $i++; } close($maint); @@ -614,17 +614,97 @@ if ($web) { exit($exit); -sub check_maintainers_patterns { +sub self_test { my @lsfiles = (); + my @good_links = (); + my @bad_links = (); + my @section_headers = (); @lsfiles = vcs_list_files($lk_path); - for my $x (@self_test_pattern_info) { - if (!grep(m@^$x->{pat}@, @lsfiles)) { - my $line = $x->{line}; - chomp($line); - print("$x->{file}:$x->{linenr}: warning: no matches $line\n"); - } + for my $x (@self_test_info) { + + ## Section header duplication + if (($self_test eq "" || $self_test =~ /\bsections\b/) && + $x->{line} =~ /^\S[^:]/) { + if (grep(m@^\Q$x->{line}\E@, @section_headers)) { + print("$x->{file}:$x->{linenr}: warning: duplicate section header\t$x->{line}\n"); + } else { + push(@section_headers, $x->{line}); + } + } + next if ($x->{line} !~ /^([A-Z]):\s*(.*)/); + + my $type = $1; + my $value = $2; + + ## Filename pattern matching + if (($type eq "F" || $type eq "X") && + ($self_test eq "" || $self_test =~ /\bpatterns\b/)) { + $value =~ s@\.@\\\.@g; ##Convert . to \. + $value =~ s/\*/\.\*/g; ##Convert * to .* + $value =~ s/\?/\./g; ##Convert ? to . + ##if pattern is a directory and it lacks a trailing slash, add one + if ((-d $value)) { + $value =~ s@([^/])$@$1/@; + } + if (!grep(m@^$value@, @lsfiles)) { + print("$x->{file}:$x->{linenr}: warning: no file matches\t$x->{line}\n"); + } + + ## Link reachability + } elsif (($type eq "W" || + $type eq "B" && $value =~ /^https?:/) && + ($self_test eq "" || $self_test =~ /\blinks\b/)) { + next if (grep(m@^\Q$value\E$@, @good_links)); + my $isbad = 0; + if (grep(m@^\Q$value\E$@, @bad_links)) { + $isbad = 1; + } else { + my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $value`; + if ($? == 0) { + push(@good_links, $value); + } else { + push(@bad_links, $value); + $isbad = 1; + } + } + if ($isbad) { + print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n"); + } + + ## SCM reachability + } elsif (($type eq "T" && $value =~ /^(?:git|quilt|hg)\s+\S/) && + ($self_test eq "" || $self_test =~ /\bscm\b/)) { + next if (grep(m@^\Q$value\E$@, @good_links)); + my $isbad = 0; + if (grep(m@^\Q$value\E$@, @bad_links)) { + $isbad = 1; + } else { + if ($value =~ /^git\s+(\S+)/) { + my $url = $1; + my $output = `git ls-remote --exit-code -h "$url" > /dev/null 2>&1`; + if ($? == 0) { + push(@good_links, $value); + } else { + push(@bad_links, $value); + $isbad = 1; + } + } elsif ($value =~ /^(?:quilt|hg)\s+(https?:\S+)/) { + my $url = $1; + my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $url`; + if ($? == 0) { + push(@good_links, $value); + } else { + push(@bad_links, $value); + $isbad = 1; + } + } + if ($isbad) { + print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n"); + } + } + } } } -- 2.15.0 ^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] get_maintainer: Add a couple more --self-test options 2017-11-06 17:27 [PATCH] get_maintainer: Add a couple more --self-test options Joe Perches @ 2017-11-06 19:12 ` Tom Saeger 2017-11-06 19:24 ` Joe Perches 0 siblings, 1 reply; 3+ messages in thread From: Tom Saeger @ 2017-11-06 19:12 UTC (permalink / raw) To: Joe Perches; +Cc: Andrew Morton, Tom Saeger, linux-kernel Hi Joe, This is good! I had something similar cooking - specifically for SCM validation. My SCM attempt caught a few more issues: - check git branch if specified - check validitiy of "T:" entry, otherwise warn of malformed entry. Example malformed (current next has two instances): 9740 T: git://git.infradead.org/nvme.git Should be: 9740 T: git git://git.infradead.org/nvme.git Also - I believe you intended on warning on all bad SCM entries, not just newly discovered ones? Your change correctly finds a previously $isbad, however the print is enclosed in an else preventing output. I was going to inline these, but in my haste to understand in incorporate changes I sanitized whitespace (BTW - I see both tabs and spaces, which is preferred in this file?) The below git branch special-casing is for these: 567:T: git git://people.freedesktop.org/~airlied/linux (part of drm maint) 3671:T: git git://git.linaro.org/people/vireshk/linux.git (For ARM Updates) See bottom for my suggestions. --Tom On Mon, Nov 06, 2017 at 09:27:25AM -0800, Joe Perches wrote: > Check for duplicate section headers and link reachability. > > Miscellanea: > > o Add --self-test=<foo> options (sections, patterns and scm for now) > where the default without options is all tests > o Rename check_maintainers_patterns to self_test > o Rename self_test_pattern_info to self_test_info > > Signed-off-by: Joe Perches <joe@perches.com> > cc: Tom Saeger <tom.saeger@oracle.com> > --- > scripts/get_maintainer.pl | 114 +++++++++++++++++++++++++++++++++++++++------- > 1 file changed, 97 insertions(+), 17 deletions(-) > > diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl > index c68a5d1ba709..748bff0790a8 100755 > --- a/scripts/get_maintainer.pl > +++ b/scripts/get_maintainer.pl > @@ -57,7 +57,7 @@ my $sections = 0; > my $file_emails = 0; > my $from_filename = 0; > my $pattern_depth = 0; > -my $self_test = 0; > +my $self_test = undef; > my $version = 0; > my $help = 0; > my $find_maintainer_files = 0; > @@ -221,7 +221,7 @@ if (-f $ignore_file) { > > if ($#ARGV > 0) { > foreach (@ARGV) { > - if ($_ eq "-self-test" || $_ eq "--self-test") { > + if ($_ =~ /^-{1,2}self-test(?:=|$)/) { > die "$P: using --self-test does not allow any other option or argument\n"; > } > } > @@ -263,7 +263,7 @@ if (!GetOptions( > 'fe|file-emails!' => \$file_emails, > 'f|file' => \$from_filename, > 'find-maintainer-files' => \$find_maintainer_files, > - 'self-test' => \$self_test, > + 'self-test:s' => \$self_test, > 'v|version' => \$version, > 'h|help|usage' => \$help, > )) { > @@ -280,9 +280,9 @@ if ($version != 0) { > exit 0; > } > > -if ($self_test) { > +if (defined $self_test) { > read_all_maintainer_files(); > - check_maintainers_patterns(); > + self_test(); > exit 0; > } > > @@ -329,7 +329,7 @@ if (!top_of_kernel_tree($lk_path)) { > my @typevalue = (); > my %keyword_hash; > my @mfiles = (); > -my @self_test_pattern_info = (); > +my @self_test_info = (); > > sub read_maintainer_file { > my ($file) = @_; > @@ -339,6 +339,7 @@ sub read_maintainer_file { > my $i = 1; > while (<$maint>) { > my $line = $_; > + chomp $line; > > if ($line =~ m/^([A-Z]):\s*(.*)/) { > my $type = $1; > @@ -353,17 +354,16 @@ sub read_maintainer_file { > if ((-d $value)) { > $value =~ s@([^/])$@$1/@; > } > - if ($self_test) { > - push(@self_test_pattern_info, {file=>$file, line=>$line, linenr=>$i, pat=>$value}); > - } > } elsif ($type eq "K") { > $keyword_hash{@typevalue} = $value; > } > push(@typevalue, "$type:$value"); > } elsif (!(/^\s*$/ || /^\s*\#/)) { > - $line =~ s/\n$//g; > push(@typevalue, $line); > } > + if (defined $self_test) { > + push(@self_test_info, {file=>$file, linenr=>$i, line=>$line}); > + } > $i++; > } > close($maint); > @@ -614,17 +614,97 @@ if ($web) { > > exit($exit); > > -sub check_maintainers_patterns { > +sub self_test { > my @lsfiles = (); > + my @good_links = (); > + my @bad_links = (); > + my @section_headers = (); > > @lsfiles = vcs_list_files($lk_path); > > - for my $x (@self_test_pattern_info) { > - if (!grep(m@^$x->{pat}@, @lsfiles)) { > - my $line = $x->{line}; > - chomp($line); > - print("$x->{file}:$x->{linenr}: warning: no matches $line\n"); > - } > + for my $x (@self_test_info) { > + > + ## Section header duplication > + if (($self_test eq "" || $self_test =~ /\bsections\b/) && > + $x->{line} =~ /^\S[^:]/) { > + if (grep(m@^\Q$x->{line}\E@, @section_headers)) { > + print("$x->{file}:$x->{linenr}: warning: duplicate section header\t$x->{line}\n"); > + } else { > + push(@section_headers, $x->{line}); > + } > + } > + next if ($x->{line} !~ /^([A-Z]):\s*(.*)/); > + > + my $type = $1; > + my $value = $2; > + > + ## Filename pattern matching > + if (($type eq "F" || $type eq "X") && > + ($self_test eq "" || $self_test =~ /\bpatterns\b/)) { > + $value =~ s@\.@\\\.@g; ##Convert . to \. > + $value =~ s/\*/\.\*/g; ##Convert * to .* > + $value =~ s/\?/\./g; ##Convert ? to . > + ##if pattern is a directory and it lacks a trailing slash, add one > + if ((-d $value)) { > + $value =~ s@([^/])$@$1/@; > + } > + if (!grep(m@^$value@, @lsfiles)) { > + print("$x->{file}:$x->{linenr}: warning: no file matches\t$x->{line}\n"); > + } > + > + ## Link reachability > + } elsif (($type eq "W" || > + $type eq "B" && $value =~ /^https?:/) && > + ($self_test eq "" || $self_test =~ /\blinks\b/)) { > + next if (grep(m@^\Q$value\E$@, @good_links)); > + my $isbad = 0; > + if (grep(m@^\Q$value\E$@, @bad_links)) { > + $isbad = 1; > + } else { > + my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $value`; > + if ($? == 0) { > + push(@good_links, $value); > + } else { > + push(@bad_links, $value); > + $isbad = 1; > + } > + } > + if ($isbad) { > + print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n"); > + } > + > + ## SCM reachability > + } elsif (($type eq "T" && $value =~ /^(?:git|quilt|hg)\s+\S/) && > + ($self_test eq "" || $self_test =~ /\bscm\b/)) { > + next if (grep(m@^\Q$value\E$@, @good_links)); > + my $isbad = 0; > + if (grep(m@^\Q$value\E$@, @bad_links)) { > + $isbad = 1; > + } else { > + if ($value =~ /^git\s+(\S+)/) { > + my $url = $1; > + my $output = `git ls-remote --exit-code -h "$url" > /dev/null 2>&1`; > + if ($? == 0) { > + push(@good_links, $value); > + } else { > + push(@bad_links, $value); > + $isbad = 1; > + } > + } elsif ($value =~ /^(?:quilt|hg)\s+(https?:\S+)/) { > + my $url = $1; > + my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $url`; > + if ($? == 0) { > + push(@good_links, $value); > + } else { > + push(@bad_links, $value); > + $isbad = 1; > + } > + } > + if ($isbad) { > + print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n"); > + } > + } > } > } > > -- > 2.15.0 > Changed SCM portion to this, which picks up a few more warnings... Checks git branch on remote if specified. Perhaps a $ismalformed category or some other way to deal with malformed entries? Or just move up to first check of SCM? ## SCM reachability } elsif (($type eq "T") && ($self_test eq "" || $self_test =~ /\bscm\b/)) { next if (grep(m@^\Q$value\E$@, @good_links)); my $isbad = 0; if (grep(m@^\Q$value\E$@, @bad_links)) { $isbad = 1; } else { if ($value !~ /^(?:git|quilt|hg)\s+\S/) { print("$x->{file}:$x->{linenr}: warning: malformed entry\t$x->{line}\n"); } elsif ($value =~ /^git\s+(\S+)(\s+([^\(]+\S+))?/) { my $url = $1; my $branch = ""; $branch = $3 if $3; my $output = `git ls-remote --exit-code -h "$url" $branch> /dev/null 2>&1`; if ($? == 0) { push(@good_links, $value); } else { push(@bad_links, $value); $isbad = 1; } } elsif ($value =~ /^(?:quilt|hg)\s+(https?:\S+)/) { my $url = $1; my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $url`; if ($? == 0) { push(@good_links, $value); } else { push(@bad_links, $value); $isbad = 1; } } } if ($isbad) { print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n"); } } } } ^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] get_maintainer: Add a couple more --self-test options 2017-11-06 19:12 ` Tom Saeger @ 2017-11-06 19:24 ` Joe Perches 0 siblings, 0 replies; 3+ messages in thread From: Joe Perches @ 2017-11-06 19:24 UTC (permalink / raw) To: Tom Saeger; +Cc: Andrew Morton, linux-kernel On Mon, 2017-11-06 at 13:12 -0600, Tom Saeger wrote: > Hi Joe, > This is good! I had something similar cooking - specifically for SCM validation. > > My SCM attempt caught a few more issues: > - check git branch if specified > - check validitiy of "T:" entry, otherwise warn of malformed entry. > > Example malformed (current next has two instances): > > 9740 T: git://git.infradead.org/nvme.git > > Should be: > 9740 T: git git://git.infradead.org/nvme.git > > > Also - I believe you intended on warning on all bad SCM entries, not just newly discovered ones? > Your change correctly finds a previously $isbad, however the print is enclosed in an else preventing output. > > I was going to inline these, but in my haste to understand in incorporate changes I sanitized whitespace > (BTW - I see both tabs and spaces, which is preferred in this file?) The indent in get_maintainers is supposed to be 4. There is a mix of 8 char tabs and spaces, but there shouldn't be any spaces followed by tabs. > The below git branch special-casing is for these: > 567:T: git git://people.freedesktop.org/~airlied/linux (part of drm maint) > 3671:T: git git://git.linaro.org/people/vireshk/linux.git (For ARM Updates) > > See bottom for my suggestions. [] > Changed SCM portion to this, which picks up a few more warnings... > Checks git branch on remote if specified. > Perhaps a $ismalformed category or some other way to deal with malformed entries? Or just > move up to first check of SCM? > > ## SCM reachability > } elsif (($type eq "T") && ($self_test eq "" || $self_test =~ /\bscm\b/)) { > next if (grep(m@^\Q$value\E$@, @good_links)); > my $isbad = 0; > if (grep(m@^\Q$value\E$@, @bad_links)) { > $isbad = 1; > } else { > if ($value !~ /^(?:git|quilt|hg)\s+\S/) { > print("$x->{file}:$x->{linenr}: warning: malformed entry\t$x->{line}\n"); > } elsif ($value =~ /^git\s+(\S+)(\s+([^\(]+\S+))?/) { This seems OK. > my $url = $1; > my $branch = ""; > $branch = $3 if $3; > ^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2017-11-06 19:24 UTC | newest] Thread overview: 3+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2017-11-06 17:27 [PATCH] get_maintainer: Add a couple more --self-test options Joe Perches 2017-11-06 19:12 ` Tom Saeger 2017-11-06 19:24 ` Joe Perches
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox