* [RFC/PATCH] gitweb: Create Gitweb::Escape module
@ 2010-06-04 9:07 Pavan Kumar Sunkara
2010-06-05 9:22 ` Jakub Narebski
0 siblings, 1 reply; 2+ messages in thread
From: Pavan Kumar Sunkara @ 2010-06-04 9:07 UTC (permalink / raw)
To: git, jnareb, chriscool, pasky; +Cc: Pavan Kumar Sunkara
Create a Gitweb::Escape module in 'gitweb/lib/Gitweb/Escape.pm'
to store all the quoting/unquoting and escaping subroutines
regarding the gitweb.perl script.
Subroutines moved:
to_utf8
esc_param
esc_url
esc_html
esc_path
quot_cec
quot_upr
unquote
untabify
Subroutines yet to move: (Contains not yet packaged subs & vars)
None
Update gitweb/Makefile to install gitweb modules alongside gitwe
Signed-off-by: Pavan Kumar Sunkara <pavan.sss1991@gmail.com>
---
gitweb/Makefile | 1 +
gitweb/gitweb.perl | 157 +--------------------------------------
gitweb/lib/Gitweb/Escape.pm | 175 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 177 insertions(+), 156 deletions(-)
create mode 100644 gitweb/lib/Gitweb/Escape.pm
diff --git a/gitweb/Makefile b/gitweb/Makefile
index 15646b2..4343396 100644
--- a/gitweb/Makefile
+++ b/gitweb/Makefile
@@ -114,6 +114,7 @@ GITWEB_FILES += static/git-logo.png static/git-favicon.png
# Files: gitweb/lib/Gitweb
GITWEB_LIB_GITWEB += lib/Gitweb/Config.pm
GITWEB_LIB_GITWEB += lib/Gitweb/Request.pm
+GITWEB_LIB_GITWEB += lib/Gitweb/Escape.pm
GITWEB_REPLACE = \
-e 's|++GIT_VERSION++|$(GIT_VERSION)|g' \
diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl
index b30ac48..9e54983 100755
--- a/gitweb/gitweb.perl
+++ b/gitweb/gitweb.perl
@@ -12,7 +12,6 @@ use warnings;
use CGI qw(:standard :escapeHTML -nosticky);
use CGI::Util qw(unescape);
use CGI::Carp qw(fatalsToBrowser set_message);
-use Encode;
use Fcntl ':mode';
use File::Find qw();
use File::Basename qw(basename);
@@ -27,6 +26,7 @@ use lib __DIR__ . "/lib";
use Gitweb::Config;
use Gitweb::Request;
+use Gitweb::Escape;
BEGIN {
CGI->compile() if $ENV{'MOD_PERL'};
@@ -762,161 +762,6 @@ sub validate_project {
}
}
-# decode sequences of octets in utf8 into Perl's internal form,
-# which is utf-8 with utf8 flag set if needed. gitweb writes out
-# in utf-8 thanks to "binmode STDOUT, ':utf8'" at beginning
-sub to_utf8 {
- my $str = shift;
- return undef unless defined $str;
- if (utf8::valid($str)) {
- utf8::decode($str);
- return $str;
- } else {
- return decode($fallback_encoding, $str, Encode::FB_DEFAULT);
- }
-}
-
-# quote unsafe chars, but keep the slash, even when it's not
-# correct, but quoted slashes look too horrible in bookmarks
-sub esc_param {
- my $str = shift;
- return undef unless defined $str;
- $str =~ s/([^A-Za-z0-9\-_.~()\/:@ ]+)/CGI::escape($1)/eg;
- $str =~ s/ /\+/g;
- return $str;
-}
-
-# quote unsafe chars in whole URL, so some charactrs cannot be quoted
-sub esc_url {
- my $str = shift;
- return undef unless defined $str;
- $str =~ s/([^A-Za-z0-9\-_.~();\/;?:@&=])/sprintf("%%%02X", ord($1))/eg;
- $str =~ s/\+/%2B/g;
- $str =~ s/ /\+/g;
- return $str;
-}
-
-# replace invalid utf8 character with SUBSTITUTION sequence
-sub esc_html {
- my $str = shift;
- my %opts = @_;
-
- return undef unless defined $str;
-
- $str = to_utf8($str);
- $str = $cgi->escapeHTML($str);
- if ($opts{'-nbsp'}) {
- $str =~ s/ / /g;
- }
- $str =~ s|([[:cntrl:]])|(($1 ne "\t") ? quot_cec($1) : $1)|eg;
- return $str;
-}
-
-# quote control characters and escape filename to HTML
-sub esc_path {
- my $str = shift;
- my %opts = @_;
-
- return undef unless defined $str;
-
- $str = to_utf8($str);
- $str = $cgi->escapeHTML($str);
- if ($opts{'-nbsp'}) {
- $str =~ s/ / /g;
- }
- $str =~ s|([[:cntrl:]])|quot_cec($1)|eg;
- return $str;
-}
-
-# Make control characters "printable", using character escape codes (CEC)
-sub quot_cec {
- my $cntrl = shift;
- my %opts = @_;
- my %es = ( # character escape codes, aka escape sequences
- "\t" => '\t', # tab (HT)
- "\n" => '\n', # line feed (LF)
- "\r" => '\r', # carrige return (CR)
- "\f" => '\f', # form feed (FF)
- "\b" => '\b', # backspace (BS)
- "\a" => '\a', # alarm (bell) (BEL)
- "\e" => '\e', # escape (ESC)
- "\013" => '\v', # vertical tab (VT)
- "\000" => '\0', # nul character (NUL)
- );
- my $chr = ( (exists $es{$cntrl})
- ? $es{$cntrl}
- : sprintf('\%2x', ord($cntrl)) );
- if ($opts{-nohtml}) {
- return $chr;
- } else {
- return "<span class=\"cntrl\">$chr</span>";
- }
-}
-
-# Alternatively use unicode control pictures codepoints,
-# Unicode "printable representation" (PR)
-sub quot_upr {
- my $cntrl = shift;
- my %opts = @_;
-
- my $chr = sprintf('&#%04d;', 0x2400+ord($cntrl));
- if ($opts{-nohtml}) {
- return $chr;
- } else {
- return "<span class=\"cntrl\">$chr</span>";
- }
-}
-
-# git may return quoted and escaped filenames
-sub unquote {
- my $str = shift;
-
- sub unq {
- my $seq = shift;
- my %es = ( # character escape codes, aka escape sequences
- 't' => "\t", # tab (HT, TAB)
- 'n' => "\n", # newline (NL)
- 'r' => "\r", # return (CR)
- 'f' => "\f", # form feed (FF)
- 'b' => "\b", # backspace (BS)
- 'a' => "\a", # alarm (bell) (BEL)
- 'e' => "\e", # escape (ESC)
- 'v' => "\013", # vertical tab (VT)
- );
-
- if ($seq =~ m/^[0-7]{1,3}$/) {
- # octal char sequence
- return chr(oct($seq));
- } elsif (exists $es{$seq}) {
- # C escape sequence, aka character escape code
- return $es{$seq};
- }
- # quoted ordinary character
- return $seq;
- }
-
- if ($str =~ m/^"(.*)"$/) {
- # needs unquoting
- $str = $1;
- $str =~ s/\\([^0-7]|[0-7]{1,3})/unq($1)/eg;
- }
- return $str;
-}
-
-# escape tabs (convert tabs to spaces)
-sub untabify {
- my $line = shift;
-
- while ((my $pos = index($line, "\t")) != -1) {
- if (my $count = (8 - ($pos % 8))) {
- my $spaces = ' ' x $count;
- $line =~ s/\t/$spaces/;
- }
- }
-
- return $line;
-}
-
sub project_in_list {
my $project = shift;
my @list = git_get_projects_list();
diff --git a/gitweb/lib/Gitweb/Escape.pm b/gitweb/lib/Gitweb/Escape.pm
new file mode 100644
index 0000000..f6ea209
--- /dev/null
+++ b/gitweb/lib/Gitweb/Escape.pm
@@ -0,0 +1,175 @@
+#!/usr/bin/perl
+#
+# Gitweb::Escape -- gitweb's quoting/unquoting, escaping package
+#
+# This program is licensed under the GPLv2
+
+package Gitweb::Escape;
+
+use strict;
+use warnings;
+use Exporter qw(import);
+
+our @ISA = qw(Exporter);
+our @EXPORT = qw(&to_utf8 &esc_param &esc_url &esc_html &esc_path "_cec "_upr &unquote &untabify);
+
+use Encode;
+use Gitweb::Config;
+use Gitweb::Request;
+
+# decode sequences of octets in utf8 into Perl's internal form,
+# which is utf-8 with utf8 flag set if needed. gitweb writes out
+# in utf-8 thanks to "binmode STDOUT, ':utf8'" at beginning
+sub to_utf8 {
+ my $str = shift;
+ return undef unless defined $str;
+ if (utf8::valid($str)) {
+ utf8::decode($str);
+ return $str;
+ } else {
+ return decode($fallback_encoding, $str, Encode::FB_DEFAULT);
+ }
+}
+
+# quote unsafe chars, but keep the slash, even when it's not
+# correct, but quoted slashes look too horrible in bookmarks
+sub esc_param {
+ my $str = shift;
+ return undef unless defined $str;
+ $str =~ s/([^A-Za-z0-9\-_.~()\/:@ ]+)/CGI::escape($1)/eg;
+ $str =~ s/ /\+/g;
+ return $str;
+}
+
+# quote unsafe chars in whole URL, so some charactrs cannot be quoted
+sub esc_url {
+ my $str = shift;
+ return undef unless defined $str;
+ $str =~ s/([^A-Za-z0-9\-_.~();\/;?:@&=])/sprintf("%%%02X", ord($1))/eg;
+ $str =~ s/\+/%2B/g;
+ $str =~ s/ /\+/g;
+ return $str;
+}
+
+# replace invalid utf8 character with SUBSTITUTION sequence
+sub esc_html {
+ my $str = shift;
+ my %opts = @_;
+
+ return undef unless defined $str;
+
+ $str = to_utf8($str);
+ $str = $cgi->escapeHTML($str);
+ if ($opts{'-nbsp'}) {
+ $str =~ s/ / /g;
+ }
+ $str =~ s|([[:cntrl:]])|(($1 ne "\t") ? quot_cec($1) : $1)|eg;
+ return $str;
+}
+
+# quote control characters and escape filename to HTML
+sub esc_path {
+ my $str = shift;
+ my %opts = @_;
+
+ return undef unless defined $str;
+
+ $str = to_utf8($str);
+ $str = $cgi->escapeHTML($str);
+ if ($opts{'-nbsp'}) {
+ $str =~ s/ / /g;
+ }
+ $str =~ s|([[:cntrl:]])|quot_cec($1)|eg;
+ return $str;
+}
+
+# Make control characters "printable", using character escape codes (CEC)
+sub quot_cec {
+ my $cntrl = shift;
+ my %opts = @_;
+ my %es = ( # character escape codes, aka escape sequences
+ "\t" => '\t', # tab (HT)
+ "\n" => '\n', # line feed (LF)
+ "\r" => '\r', # carrige return (CR)
+ "\f" => '\f', # form feed (FF)
+ "\b" => '\b', # backspace (BS)
+ "\a" => '\a', # alarm (bell) (BEL)
+ "\e" => '\e', # escape (ESC)
+ "\013" => '\v', # vertical tab (VT)
+ "\000" => '\0', # nul character (NUL)
+ );
+ my $chr = ( (exists $es{$cntrl})
+ ? $es{$cntrl}
+ : sprintf('\%2x', ord($cntrl)) );
+ if ($opts{-nohtml}) {
+ return $chr;
+ } else {
+ return "<span class=\"cntrl\">$chr</span>";
+ }
+}
+
+# Alternatively use unicode control pictures codepoints,
+# Unicode "printable representation" (PR)
+sub quot_upr {
+ my $cntrl = shift;
+ my %opts = @_;
+
+ my $chr = sprintf('&#%04d;', 0x2400+ord($cntrl));
+ if ($opts{-nohtml}) {
+ return $chr;
+ } else {
+ return "<span class=\"cntrl\">$chr</span>";
+ }
+}
+
+# git may return quoted and escaped filenames
+sub unquote {
+ my $str = shift;
+
+ sub unq {
+ my $seq = shift;
+ my %es = ( # character escape codes, aka escape sequences
+ 't' => "\t", # tab (HT, TAB)
+ 'n' => "\n", # newline (NL)
+ 'r' => "\r", # return (CR)
+ 'f' => "\f", # form feed (FF)
+ 'b' => "\b", # backspace (BS)
+ 'a' => "\a", # alarm (bell) (BEL)
+ 'e' => "\e", # escape (ESC)
+ 'v' => "\013", # vertical tab (VT)
+ );
+
+ if ($seq =~ m/^[0-7]{1,3}$/) {
+ # octal char sequence
+ return chr(oct($seq));
+ } elsif (exists $es{$seq}) {
+ # C escape sequence, aka character escape code
+ return $es{$seq};
+ }
+ # quoted ordinary character
+ return $seq;
+ }
+
+ if ($str =~ m/^"(.*)"$/) {
+ # needs unquoting
+ $str = $1;
+ $str =~ s/\\([^0-7]|[0-7]{1,3})/unq($1)/eg;
+ }
+ return $str;
+}
+
+# escape tabs (convert tabs to spaces)
+sub untabify {
+ my $line = shift;
+
+ while ((my $pos = index($line, "\t")) != -1) {
+ if (my $count = (8 - ($pos % 8))) {
+ my $spaces = ' ' x $count;
+ $line =~ s/\t/$spaces/;
+ }
+ }
+
+ return $line;
+}
+
+1;
--
1.7.1.447.g2b98e.dirty
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [RFC/PATCH] gitweb: Create Gitweb::Escape module
2010-06-04 9:07 [RFC/PATCH] gitweb: Create Gitweb::Escape module Pavan Kumar Sunkara
@ 2010-06-05 9:22 ` Jakub Narebski
0 siblings, 0 replies; 2+ messages in thread
From: Jakub Narebski @ 2010-06-05 9:22 UTC (permalink / raw)
To: Pavan Kumar Sunkara; +Cc: git, Christian Couder, Petr Baudis, Giuseppe Bilotta
On Fri, 4 June 2010, Pavan Kumar Sunkara wrote:
> Create a Gitweb::Escape module in 'gitweb/lib/Gitweb/Escape.pm'
> to store all the quoting/unquoting and escaping subroutines
> regarding the gitweb.perl script.
<del>
I would ask if it wouldn't be simpler to just have Gitweb::Util
for all auxiliary / helper subroutines...
...if not for the fact that to avoid circular dependencies you would
have, most probably, split this module further into
* Gitweb::Escape::CGI
* Gitweb::Escape::HTML
and perhaps move 'unquote' to Gitweb::Git or Gitweb::Escape::Git.
Then Gitweb::Escape could just import both.
</del>
Errr... sorry. I guess that either Gitweb::Config nor Gitweb::Request
needs to include Gitweb::Escape.
Note that 'Escape' part of name is not entirely correct, because some of
subroutines mentioned below deal with _quoting_ / _unquoting_, not with
escaping / unescaping.
>
> Subroutines moved:
> to_utf8
> esc_param
> esc_url
> esc_html
> esc_path
> quot_cec
> quot_upr
> unquote
> untabify
>
> Subroutines yet to move: (Contains not yet packaged subs & vars)
> None
>
> Update gitweb/Makefile to install gitweb modules alongside gitwe
>
> Signed-off-by: Pavan Kumar Sunkara <pavan.sss1991@gmail.com>
> ---
> gitweb/Makefile | 1 +
> gitweb/gitweb.perl | 157 +--------------------------------------
> gitweb/lib/Gitweb/Escape.pm | 175 +++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 177 insertions(+), 156 deletions(-)
> create mode 100644 gitweb/lib/Gitweb/Escape.pm
It's a pity that git cannot tell us here that it is straight code
movement (moving code block from one file to the other)...
> diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl
> index b30ac48..9e54983 100755
> --- a/gitweb/gitweb.perl
> +++ b/gitweb/gitweb.perl
> @@ -12,7 +12,6 @@ use warnings;
> use CGI qw(:standard :escapeHTML -nosticky);
> use CGI::Util qw(unescape);
> use CGI::Carp qw(fatalsToBrowser set_message);
> -use Encode;
> use Fcntl ':mode';
> use File::Find qw();
> use File::Basename qw(basename);
> @@ -27,6 +26,7 @@ use lib __DIR__ . "/lib";
>
> use Gitweb::Config;
> use Gitweb::Request;
> +use Gitweb::Escape;
>
> BEGIN {
> CGI->compile() if $ENV{'MOD_PERL'};
> @@ -762,161 +762,6 @@ sub validate_project {
> }
> }
>
> -# decode sequences of octets in utf8 into Perl's internal form,
> -# which is utf-8 with utf8 flag set if needed. gitweb writes out
> -# in utf-8 thanks to "binmode STDOUT, ':utf8'" at beginning
> -sub to_utf8 {
[cut]
> diff --git a/gitweb/lib/Gitweb/Escape.pm b/gitweb/lib/Gitweb/Escape.pm
> new file mode 100644
> index 0000000..f6ea209
> --- /dev/null
> +++ b/gitweb/lib/Gitweb/Escape.pm
> @@ -0,0 +1,175 @@
> +#!/usr/bin/perl
> +#
> +# Gitweb::Escape -- gitweb's quoting/unquoting, escaping package
> +#
> +# This program is licensed under the GPLv2
> +
> +package Gitweb::Escape;
> +
> +use strict;
> +use warnings;
> +use Exporter qw(import);
> +
> +our @ISA = qw(Exporter);
This line is not necessary if you are using 'use Exporter qw(import);'
This code:
use Exporter;
our @ISA = qw(Exporter);
is equivalent to (safer)
use Exporter;
use base qw(Exporter);
The section "Exporting without inheriting from Exporter" of
Exporter(3pm) manpage says that using
use Exporter qw(import);
allows to avoid inheriting our package from Exporter.
> +our @EXPORT = qw(&to_utf8 &esc_param &esc_url &esc_html &esc_path "_cec "_upr &unquote &untabify);
Please try to not introduce such long likes. Perhaps:
+our @EXPORT = qw(&to_utf8 &esc_param &esc_url &esc_html &esc_path
"_cec "_upr &unquote &untabify);
Also ampersands are not needed there, and I think hat they are not used
usually, so it would be:
+our @EXPORT = qw(to_utf8 esc_param esc_url esc_html esc_path
quot_cec quot_upr unquote untabify);
But note also comment below about splitting this file further.
> +
> +use Encode;
Perhaps empty line between 'use Exporter;' (using outside Perl modules)
and 'use Gitweb::Config;' would help readibility of code, hmmm...?
Also, don't you need 'use CGI;' or 'use CGI qw(:escapeHTML);' here, to
have access to CGI::escape (to be more exact CGI::Util::escape, but
imported by CGI to CGI::escape) and CGI::escapeHTML?
> +use Gitweb::Config;
I personally would say here explicitely
+use Gitweb::Config qw($fallback_encoding);
to make it clear that we need only this single one config variable.
> +use Gitweb::Request;
As it is shown below, this is not necessary.
> +
> +# decode sequences of octets in utf8 into Perl's internal form,
> +# which is utf-8 with utf8 flag set if needed. gitweb writes out
> +# in utf-8 thanks to "binmode STDOUT, ':utf8'" at beginning
s/at beginning/at beginning of XXX/;
> +sub to_utf8 {
> + my $str = shift;
> + return undef unless defined $str;
> + if (utf8::valid($str)) {
> + utf8::decode($str);
> + return $str;
> + } else {
> + return decode($fallback_encoding, $str, Encode::FB_DEFAULT);
> + }
> +}
This single one uses $fallback_encoding from Gitweb::Config.
> +
> +# quote unsafe chars, but keep the slash, even when it's not
> +# correct, but quoted slashes look too horrible in bookmarks
> +sub esc_param {
> + my $str = shift;
> + return undef unless defined $str;
> + $str =~ s/([^A-Za-z0-9\-_.~()\/:@ ]+)/CGI::escape($1)/eg;
> + $str =~ s/ /\+/g;
> + return $str;
> +}
> +
> +# quote unsafe chars in whole URL, so some charactrs cannot be quoted
> +sub esc_url {
> + my $str = shift;
> + return undef unless defined $str;
> + $str =~ s/([^A-Za-z0-9\-_.~();\/;?:@&=])/sprintf("%%%02X", ord($1))/eg;
> + $str =~ s/\+/%2B/g;
> + $str =~ s/ /\+/g;
> + return $str;
> +}
Hmmm... why this difference? esc_url() should differ from esc_param()
only in that esc_url() escapes less (it should not escape query
parameter delimites). It seems like 452e225 (gitweb: fix esc_param,
2009-10-13) by Giuseppe Bilotta missed fixing esc_url(), isn't it?
Might be worth fixing upfront (in a separate commit, of course). Could
you do this, please?
> +
> +# replace invalid utf8 character with SUBSTITUTION sequence
> +sub esc_html {
> + my $str = shift;
> + my %opts = @_;
> +
> + return undef unless defined $str;
> +
> + $str = to_utf8($str);
> + $str = $cgi->escapeHTML($str);
We can use 'CGI::escapeHTML;' here, and avoid dependency on
Gitweb::Request.
This change probably should be done in a separate cleanup patch,
preceding this one.
> + if ($opts{'-nbsp'}) {
> + $str =~ s/ / /g;
> + }
> + $str =~ s|([[:cntrl:]])|(($1 ne "\t") ? quot_cec($1) : $1)|eg;
> + return $str;
> +}
> +
> +# quote control characters and escape filename to HTML
> +sub esc_path {
> + my $str = shift;
> + my %opts = @_;
> +
> + return undef unless defined $str;
> +
> + $str = to_utf8($str);
> + $str = $cgi->escapeHTML($str);
Same here.
> + if ($opts{'-nbsp'}) {
> + $str =~ s/ / /g;
> + }
> + $str =~ s|([[:cntrl:]])|quot_cec($1)|eg;
> + return $str;
> +}
Those two subroutines use to_utf8(), which in turn uses
$fallback_encoding.
> +
> +# Make control characters "printable", using character escape codes (CEC)
> +sub quot_cec {
[...]
> +}
> +
> +# Alternatively use unicode control pictures codepoints,
> +# Unicode "printable representation" (PR)
> +sub quot_upr {
[...]
> +}
> +# git may return quoted and escaped filenames
> +sub unquote {
[...]
> +}
Note that this subroutine is about neither CGI nor HTML, but about
unquoting and unescaping output of git commands. Just FYI.
> +
> +# escape tabs (convert tabs to spaces)
> +sub untabify {
> + my $line = shift;
> +
> + while ((my $pos = index($line, "\t")) != -1) {
> + if (my $count = (8 - ($pos % 8))) {
> + my $spaces = ' ' x $count;
> + $line =~ s/\t/$spaces/;
> + }
> + }
> +
> + return $line;
> +}
> +
> +1;
Good work!
--
Jakub Narebski
Poland
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2010-06-05 9:20 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-06-04 9:07 [RFC/PATCH] gitweb: Create Gitweb::Escape module Pavan Kumar Sunkara
2010-06-05 9:22 ` Jakub Narebski
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).