git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] gitweb: refactor input parameters parse/validation
@ 2008-10-03 17:19 Giuseppe Bilotta
  2008-10-07 10:57 ` Jakub Narebski
  0 siblings, 1 reply; 11+ messages in thread
From: Giuseppe Bilotta @ 2008-10-03 17:19 UTC (permalink / raw)
  To: git; +Cc: Jakub Narebski, Shawn O. Pearce, Giuseppe Bilotta

Since input parameters can be obtained both from CGI parameters and
PATH_INFO, we would like most of the code to be agnostic about the way
parameters were retrieved. We thus collect all the parameters into the
new %input_params hash, delaying validation after the collection is
completed.

Although the kludge removal is minimal at the moment, it makes life much
easier for future expansions such as more extensive PATH_INFO use or
other form of input (e.g. command-line support).

Signed-off-by: Giuseppe Bilotta <giuseppe.bilotta@gmail.com>
---

As recommended by Jakub, I reworked the input parameter collection refactoring
into a separate patch preluding to rather than intermixed with the PATH_INFO
enhancement work.

 gitweb/gitweb.perl |  304 +++++++++++++++++++++++++++++-----------------------
 1 files changed, 171 insertions(+), 133 deletions(-)

diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl
index 83f810a..329f789 100755
--- a/gitweb/gitweb.perl
+++ b/gitweb/gitweb.perl
@@ -29,7 +29,9 @@ our $my_uri = $cgi->url(-absolute => 1);
 
 # if we're called with PATH_INFO, we have to strip that
 # from the URL to find our real URL
-if (my $path_info = $ENV{"PATH_INFO"}) {
+# we make $path_info global because it's also used later on
+my $path_info = $ENV{"PATH_INFO"};
+if ($path_info) {
 	$my_url =~ s,\Q$path_info\E$,,;
 	$my_uri =~ s,\Q$path_info\E$,,;
 }
@@ -390,34 +392,149 @@ $projects_list ||= $projectroot;
 
 # ======================================================================
 # input validation and dispatch
-our $action = $cgi->param('a');
+
+# input parameters can be collected from a variety of sources (presently, CGI
+# and PATH_INFO), so we define an %input_params hash that collects them all
+# together during validation: this allows subsequent uses (e.g. href()) to be
+# agnostic of the parameter origin
+
+my %input_params = ();
+
+# input parameters are stored with the long parameter name as key, so we need
+# the name -> CGI key mapping here. This will also be used in the href
+# subroutine to convert parameters to their CGI equivalent.
+#
+# XXX: Warning: If you touch this, check the search form for updating,
+# too.
+
+my @cgi_param_mapping = (
+	project => "p",
+	action => "a",
+	file_name => "f",
+	file_parent => "fp",
+	hash => "h",
+	hash_parent => "hp",
+	hash_base => "hb",
+	hash_parent_base => "hpb",
+	page => "pg",
+	order => "o",
+	searchtext => "s",
+	searchtype => "st",
+	snapshot_format => "sf",
+	extra_options => "opt",
+	search_use_regexp => "sr",
+);
+my %cgi_param_mapping = @cgi_param_mapping;
+
+# we will also need to know the possible actions, for validation
+my %actions = (
+	"blame" => \&git_blame,
+	"blobdiff" => \&git_blobdiff,
+	"blobdiff_plain" => \&git_blobdiff_plain,
+	"blob" => \&git_blob,
+	"blob_plain" => \&git_blob_plain,
+	"commitdiff" => \&git_commitdiff,
+	"commitdiff_plain" => \&git_commitdiff_plain,
+	"commit" => \&git_commit,
+	"forks" => \&git_forks,
+	"heads" => \&git_heads,
+	"history" => \&git_history,
+	"log" => \&git_log,
+	"rss" => \&git_rss,
+	"atom" => \&git_atom,
+	"search" => \&git_search,
+	"search_help" => \&git_search_help,
+	"shortlog" => \&git_shortlog,
+	"summary" => \&git_summary,
+	"tag" => \&git_tag,
+	"tags" => \&git_tags,
+	"tree" => \&git_tree,
+	"snapshot" => \&git_snapshot,
+	"object" => \&git_object,
+	# those below don't need $project
+	"opml" => \&git_opml,
+	"project_list" => \&git_project_list,
+	"project_index" => \&git_project_index,
+);
+
+# fill %input_params with the CGI parameters. All values except for 'opt'
+# should be single values, but opt can be an array. We should probably
+# build an array of parameters that can be multi-valued, but since for the time
+# being it's only this one, we just single it out
+while (my ($name, $symbol) = each %cgi_param_mapping) {
+	if ($symbol eq 'opt') {
+		$input_params{$name} = [$cgi->param($symbol)];
+	} else {
+		$input_params{$name} = $cgi->param($symbol);
+	}
+}
+
+# now read PATH_INFO and update the parameter list for missing parameters
+sub evaluate_path_info {
+	return if defined $input_params{'project'};
+	return if !$path_info;
+	$path_info =~ s,^/+,,;
+	return if !$path_info;
+
+	# find which part of PATH_INFO is project
+	my $project = $path_info;
+	$project =~ s,/+$,,;
+	while ($project && !check_head_link("$projectroot/$project")) {
+		$project =~ s,/*[^/]*$,,;
+	}
+	# validate project
+	$project = validate_project($project);
+	return unless $project;
+	$input_params{'project'} = $project;
+
+	# do not change any parameters if an action is given using the query string
+	return if $input_params{'action'};
+	$path_info =~ s,^\Q$project\E/*,,;
+	my ($refname, $pathname) = split(/:/, $path_info, 2);
+	if (defined $pathname) {
+		# we got "project.git/branch:filename" or "project.git/branch:dir/"
+		# we could use git_get_type(branch:pathname), but it needs $git_dir
+		$pathname =~ s,^/+,,;
+		if (!$pathname || substr($pathname, -1) eq "/") {
+			$input_params{'action'} = "tree";
+			$pathname =~ s,/$,,;
+		} else {
+			$input_params{'action'} = "blob_plain";
+		}
+		$input_params{'hash_base'} ||= validate_refname($refname);
+		$input_params{'file_name'} ||= validate_pathname($pathname);
+	} elsif (defined $refname) {
+		# we got "project.git/branch"
+		$input_params{'action'} = "shortlog";
+		$input_params{'hash'} ||= validate_refname($refname);
+	}
+}
+evaluate_path_info();
+
+our $action = $input_params{'action'};
 if (defined $action) {
-	if ($action =~ m/[^0-9a-zA-Z\.\-_]/) {
+	if (!validate_action($action)) {
 		die_error(400, "Invalid action parameter");
 	}
 }
 
 # parameters which are pathnames
-our $project = $cgi->param('p');
+our $project = $input_params{'project'};
 if (defined $project) {
-	if (!validate_pathname($project) ||
-	    !(-d "$projectroot/$project") ||
-	    !check_head_link("$projectroot/$project") ||
-	    ($export_ok && !(-e "$projectroot/$project/$export_ok")) ||
-	    ($strict_export && !project_in_list($project))) {
+	if (!validate_project($project)) {
 		undef $project;
 		die_error(404, "No such project");
 	}
 }
 
-our $file_name = $cgi->param('f');
+our $file_name = $input_params{'file_name'};
 if (defined $file_name) {
 	if (!validate_pathname($file_name)) {
 		die_error(400, "Invalid file parameter");
 	}
 }
 
-our $file_parent = $cgi->param('fp');
+our $file_parent = $input_params{'file_parent'};
 if (defined $file_parent) {
 	if (!validate_pathname($file_parent)) {
 		die_error(400, "Invalid file parent parameter");
@@ -425,21 +542,21 @@ if (defined $file_parent) {
 }
 
 # parameters which are refnames
-our $hash = $cgi->param('h');
+our $hash = $input_params{'hash'};
 if (defined $hash) {
 	if (!validate_refname($hash)) {
 		die_error(400, "Invalid hash parameter");
 	}
 }
 
-our $hash_parent = $cgi->param('hp');
+our $hash_parent = $input_params{'hash_parent'};
 if (defined $hash_parent) {
 	if (!validate_refname($hash_parent)) {
 		die_error(400, "Invalid hash parent parameter");
 	}
 }
 
-our $hash_base = $cgi->param('hb');
+our $hash_base = $input_params{'hash_base'};
 if (defined $hash_base) {
 	if (!validate_refname($hash_base)) {
 		die_error(400, "Invalid hash base parameter");
@@ -450,19 +567,19 @@ my %allowed_options = (
 	"--no-merges" => [ qw(rss atom log shortlog history) ],
 );
 
-our @extra_options = $cgi->param('opt');
-if (defined @extra_options) {
-	foreach my $opt (@extra_options) {
-		if (not exists $allowed_options{$opt}) {
-			die_error(400, "Invalid option parameter");
-		}
-		if (not grep(/^$action$/, @{$allowed_options{$opt}})) {
-			die_error(400, "Invalid option parameter for this action");
-		}
+our @extra_options = @{$input_params{'extra_options'}};
+# @extra_options is always defined, since it can only be (currently) set from
+# CGI, and $cgi->param() returns the empty array in array context
+foreach my $opt (@extra_options) {
+	if (not exists $allowed_options{$opt}) {
+		die_error(400, "Invalid option parameter");
+	}
+	if (not grep(/^$action$/, @{$allowed_options{$opt}})) {
+		die_error(400, "Invalid option parameter for this action");
 	}
 }
 
-our $hash_parent_base = $cgi->param('hpb');
+our $hash_parent_base = $input_params{'hash_parent_base'};
 if (defined $hash_parent_base) {
 	if (!validate_refname($hash_parent_base)) {
 		die_error(400, "Invalid hash parent base parameter");
@@ -470,23 +587,23 @@ if (defined $hash_parent_base) {
 }
 
 # other parameters
-our $page = $cgi->param('pg');
+our $page = $input_params{'page'};
 if (defined $page) {
 	if ($page =~ m/[^0-9]/) {
 		die_error(400, "Invalid page parameter");
 	}
 }
 
-our $searchtype = $cgi->param('st');
+our $searchtype = $input_params{'searchtype'};
 if (defined $searchtype) {
 	if ($searchtype =~ m/[^a-z]/) {
 		die_error(400, "Invalid searchtype parameter");
 	}
 }
 
-our $search_use_regexp = $cgi->param('sr');
+our $search_use_regexp = $input_params{'search_use_regexp'};
 
-our $searchtext = $cgi->param('s');
+our $searchtext = $input_params{'searchtext'};
 our $search_regexp;
 if (defined $searchtext) {
 	if (length($searchtext) < 2) {
@@ -495,86 +612,11 @@ if (defined $searchtext) {
 	$search_regexp = $search_use_regexp ? $searchtext : quotemeta $searchtext;
 }
 
-# now read PATH_INFO and use it as alternative to parameters
-sub evaluate_path_info {
-	return if defined $project;
-	my $path_info = $ENV{"PATH_INFO"};
-	return if !$path_info;
-	$path_info =~ s,^/+,,;
-	return if !$path_info;
-	# find which part of PATH_INFO is project
-	$project = $path_info;
-	$project =~ s,/+$,,;
-	while ($project && !check_head_link("$projectroot/$project")) {
-		$project =~ s,/*[^/]*$,,;
-	}
-	# validate project
-	$project = validate_pathname($project);
-	if (!$project ||
-	    ($export_ok && !-e "$projectroot/$project/$export_ok") ||
-	    ($strict_export && !project_in_list($project))) {
-		undef $project;
-		return;
-	}
-	# do not change any parameters if an action is given using the query string
-	return if $action;
-	$path_info =~ s,^\Q$project\E/*,,;
-	my ($refname, $pathname) = split(/:/, $path_info, 2);
-	if (defined $pathname) {
-		# we got "project.git/branch:filename" or "project.git/branch:dir/"
-		# we could use git_get_type(branch:pathname), but it needs $git_dir
-		$pathname =~ s,^/+,,;
-		if (!$pathname || substr($pathname, -1) eq "/") {
-			$action  ||= "tree";
-			$pathname =~ s,/$,,;
-		} else {
-			$action  ||= "blob_plain";
-		}
-		$hash_base ||= validate_refname($refname);
-		$file_name ||= validate_pathname($pathname);
-	} elsif (defined $refname) {
-		# we got "project.git/branch"
-		$action ||= "shortlog";
-		$hash   ||= validate_refname($refname);
-	}
-}
-evaluate_path_info();
-
 # path to the current git repository
 our $git_dir;
 $git_dir = "$projectroot/$project" if $project;
 
 # dispatch
-my %actions = (
-	"blame" => \&git_blame,
-	"blobdiff" => \&git_blobdiff,
-	"blobdiff_plain" => \&git_blobdiff_plain,
-	"blob" => \&git_blob,
-	"blob_plain" => \&git_blob_plain,
-	"commitdiff" => \&git_commitdiff,
-	"commitdiff_plain" => \&git_commitdiff_plain,
-	"commit" => \&git_commit,
-	"forks" => \&git_forks,
-	"heads" => \&git_heads,
-	"history" => \&git_history,
-	"log" => \&git_log,
-	"rss" => \&git_rss,
-	"atom" => \&git_atom,
-	"search" => \&git_search,
-	"search_help" => \&git_search_help,
-	"shortlog" => \&git_shortlog,
-	"summary" => \&git_summary,
-	"tag" => \&git_tag,
-	"tags" => \&git_tags,
-	"tree" => \&git_tree,
-	"snapshot" => \&git_snapshot,
-	"object" => \&git_object,
-	# those below don't need $project
-	"opml" => \&git_opml,
-	"project_list" => \&git_project_list,
-	"project_index" => \&git_project_index,
-);
-
 if (!defined $action) {
 	if (defined $hash) {
 		$action = git_get_type($hash);
@@ -604,35 +646,12 @@ sub href (%) {
 	# default is to use -absolute url() i.e. $my_uri
 	my $href = $params{-full} ? $my_url : $my_uri;
 
-	# XXX: Warning: If you touch this, check the search form for updating,
-	# too.
-
-	my @mapping = (
-		project => "p",
-		action => "a",
-		file_name => "f",
-		file_parent => "fp",
-		hash => "h",
-		hash_parent => "hp",
-		hash_base => "hb",
-		hash_parent_base => "hpb",
-		page => "pg",
-		order => "o",
-		searchtext => "s",
-		searchtype => "st",
-		snapshot_format => "sf",
-		extra_options => "opt",
-		search_use_regexp => "sr",
-	);
-	my %mapping = @mapping;
-
 	$params{'project'} = $project unless exists $params{'project'};
 
 	if ($params{-replay}) {
-		while (my ($name, $symbol) = each %mapping) {
+		while (my ($name, $symbol) = each %cgi_param_mapping) {
 			if (!exists $params{$name}) {
-				# to allow for multivalued params we use arrayref form
-				$params{$name} = [ $cgi->param($symbol) ];
+				$params{$name} = $input_params{$name};
 			}
 		}
 	}
@@ -651,8 +670,8 @@ sub href (%) {
 
 	# now encode the parameters explicitly
 	my @result = ();
-	for (my $i = 0; $i < @mapping; $i += 2) {
-		my ($name, $symbol) = ($mapping[$i], $mapping[$i+1]);
+	for (my $i = 0; $i < @cgi_param_mapping; $i += 2) {
+		my ($name, $symbol) = ($cgi_param_mapping[$i], $cgi_param_mapping[$i+1]);
 		if (defined $params{$name}) {
 			if (ref($params{$name}) eq "ARRAY") {
 				foreach my $par (@{$params{$name}}) {
@@ -672,6 +691,25 @@ sub href (%) {
 ## ======================================================================
 ## validation, quoting/unquoting and escaping
 
+sub validate_action {
+	my $input = shift || return undef;
+	return undef unless exists $actions{$action};
+	return $action;
+}
+
+sub validate_project {
+	my $input = shift || return undef;
+	if (!validate_pathname($input) ||
+		!(-d "$projectroot/$input") ||
+		!check_head_link("$projectroot/$input") ||
+		($export_ok && !(-e "$projectroot/$input/$export_ok")) ||
+		($strict_export && !project_in_list($input))) {
+		return undef;
+	} else {
+		return $input;
+	}
+}
+
 sub validate_pathname {
 	my $input = shift || return undef;
 
@@ -3988,7 +4026,7 @@ sub git_search_grep_body {
 ## actions
 
 sub git_project_list {
-	my $order = $cgi->param('o');
+	my $order = $input_params{'order'};
 	if (defined $order && $order !~ m/none|project|descr|owner|age/) {
 		die_error(400, "Unknown order parameter");
 	}
@@ -4011,7 +4049,7 @@ sub git_project_list {
 }
 
 sub git_forks {
-	my $order = $cgi->param('o');
+	my $order = $input_params{'order'};
 	if (defined $order && $order !~ m/none|project|descr|owner|age/) {
 		die_error(400, "Unknown order parameter");
 	}
@@ -4545,7 +4583,7 @@ sub git_snapshot {
 	my @supported_fmts = gitweb_check_feature('snapshot');
 	@supported_fmts = filter_snapshot_fmts(@supported_fmts);
 
-	my $format = $cgi->param('sf');
+	my $format = $input_params{'snapshot_format'};
 	if (!@supported_fmts) {
 		die_error(403, "Snapshots not allowed");
 	}
-- 
1.5.6.5

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] gitweb: refactor input parameters parse/validation
  2008-10-03 17:19 [PATCH] gitweb: refactor input parameters parse/validation Giuseppe Bilotta
@ 2008-10-07 10:57 ` Jakub Narebski
  2008-10-07 12:42   ` Giuseppe Bilotta
  2008-10-08  9:26   ` [PATCHv2] " Giuseppe Bilotta
  0 siblings, 2 replies; 11+ messages in thread
From: Jakub Narebski @ 2008-10-07 10:57 UTC (permalink / raw)
  To: Giuseppe Bilotta; +Cc: git, Shawn O. Pearce, Matthias Lederhofer

On Fri, 3 Oct 2008, Giuseppe Bilotta wrote:

> Since input parameters can be obtained both from CGI parameters and
> PATH_INFO, we would like most of the code to be agnostic about the way
> parameters were retrieved. We thus collect all the parameters into the
> new %input_params hash, delaying validation after the collection is
> completed.
> 
> Although the kludge removal is minimal at the moment, it makes life much
> easier for future expansions such as more extensive PATH_INFO use or
> other form of input (e.g. command-line support).

Looks good, beside double validation of parameters taken from path_info
(once in evaluate_path_info(), and once when setting 'action' variables).
 
> Signed-off-by: Giuseppe Bilotta <giuseppe.bilotta@gmail.com>
> ---
> 
> As recommended by Jakub, I reworked the input parameter collection refactoring
> into a separate patch preluding to rather than intermixed with the PATH_INFO
> enhancement work.

Thanks.
 
>  gitweb/gitweb.perl |  304 +++++++++++++++++++++++++++++-----------------------
>  1 files changed, 171 insertions(+), 133 deletions(-)
> 
> diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl
> index 83f810a..329f789 100755
> --- a/gitweb/gitweb.perl
> +++ b/gitweb/gitweb.perl
> @@ -29,7 +29,9 @@ our $my_uri = $cgi->url(-absolute => 1);
>  
>  # if we're called with PATH_INFO, we have to strip that
>  # from the URL to find our real URL
> -if (my $path_info = $ENV{"PATH_INFO"}) {
> +# we make $path_info global because it's also used later on
> +my $path_info = $ENV{"PATH_INFO"};
> +if ($path_info) {
>  	$my_url =~ s,\Q$path_info\E$,,;
>  	$my_uri =~ s,\Q$path_info\E$,,;
>  }

I think we could have left it as it was, i.e. have $path_info be
a local variable.  Unless there is something I don't see.

[...]
> +# input parameters are stored with the long parameter name as key, so we need
> +# the name -> CGI key mapping here. This will also be used in the href
> +# subroutine to convert parameters to their CGI equivalent.

Actually href() subroutine would be called many times, while parsing of
input params happens only once; that is why it is longname => shortname
hash, and not the reverse (shortname => longname).  Also it does not
result in loss of performance as we parse all input params anyway.

This explanation might be obvious, or could be put in comment
(increasing comment to code count ;-))

> +my @cgi_param_mapping = (
> +my %cgi_param_mapping = @cgi_param_mapping;
> +my %actions = (

Perhaps %allowed_options should also be there, to have everything about
input params in single place... or perhaps not...

> +# fill %input_params with the CGI parameters. All values except for 'opt'
> +# should be single values, but opt can be an array. We should probably
> +# build an array of parameters that can be multi-valued, but since for the time
> +# being it's only this one, we just single it out
> +while (my ($name, $symbol) = each %cgi_param_mapping) {
> +	if ($symbol eq 'opt') {
> +		$input_params{$name} = [$cgi->param($symbol)];

I would personally use "[ $cgi->param($symbol) ]" instead for better
(IMVHO) visibility, but this is just a matter of taste.

> +	} else {
> +		$input_params{$name} = $cgi->param($symbol);
> +	}
> +}

Nice. I guess that you have checked that you caught all
$cgi->param(...) calls, and there aren't any beside those above...


Now we don't have mdiff, following code movement; tool which was at
beginnings of content movement detection in early days of git-blame.
Therefore I have moved the original code, to easier check the changes
(separating them from code movement).

> -# now read PATH_INFO and use it as alternative to parameters
> -sub evaluate_path_info {
> -	return if defined $project;
> -	my $path_info = $ENV{"PATH_INFO"};
> -	return if !$path_info;
> -	$path_info =~ s,^/+,,;
> -	return if !$path_info;
> -	# find which part of PATH_INFO is project
> -	$project = $path_info;
> -	$project =~ s,/+$,,;
> -	while ($project && !check_head_link("$projectroot/$project")) {
> -		$project =~ s,/*[^/]*$,,;
> -	}
> -	# validate project
> -	$project = validate_pathname($project);
> -	if (!$project ||
> -	    ($export_ok && !-e "$projectroot/$project/$export_ok") ||
> -	    ($strict_export && !project_in_list($project))) {
> -		undef $project;
> -		return;
> -	}
> -	# do not change any parameters if an action is given using the query string
> -	return if $action;
> -	$path_info =~ s,^\Q$project\E/*,,;
> -	my ($refname, $pathname) = split(/:/, $path_info, 2);
> -	if (defined $pathname) {
> -		# we got "project.git/branch:filename" or "project.git/branch:dir/"
> -		# we could use git_get_type(branch:pathname), but it needs $git_dir
> -		$pathname =~ s,^/+,,;
> -		if (!$pathname || substr($pathname, -1) eq "/") {
> -			$action  ||= "tree";
> -			$pathname =~ s,/$,,;
> -		} else {
> -			$action  ||= "blob_plain";
> -		}
> -		$hash_base ||= validate_refname($refname);
> -		$file_name ||= validate_pathname($pathname);
> -	} elsif (defined $refname) {
> -		# we got "project.git/branch"
> -		$action ||= "shortlog";
> -		$hash   ||= validate_refname($refname);
> -	}
> -}
> -evaluate_path_info();


> +# now read PATH_INFO and update the parameter list for missing parameters
> +sub evaluate_path_info {
> +	return if defined $input_params{'project'};

I was 'my $path_info = $ENV{"PATH_INFO"};' there, when $path_info
wasn't global variable.  Any advantages to having it global?

(This is just a minor thing, not worth resending patch over, I think).

> +	return if !$path_info;
> +	$path_info =~ s,^/+,,;
> +	return if !$path_info;
> +

Nice adding lines to separate parts of code.

> +	# find which part of PATH_INFO is project
> +	my $project = $path_info;

Hmmm... now $project is local (lexically) here.

> +	$project =~ s,/+$,,;
> +	while ($project && !check_head_link("$projectroot/$project")) {
> +		$project =~ s,/*[^/]*$,,;
> +	}
> +	# validate project
> +	$project = validate_project($project);

I'm not sure if it is worth worrying over, but I think you repeat 
check_head_link() check here.

[After examining code further].  But I think you do double validation;
once you do it here, and once you do it copying to global variables
such as $action or $project, and double checking check_head_link()
won't be easy to avoid; fortunately it is cheap filesystem-level check
(might be slow only when stat is extremely slow, and is not cached).

> +	return unless $project;
> +	$input_params{'project'} = $project;
> +
> +	# do not change any parameters if an action is given using the query string
> +	return if $input_params{'action'};
> +	$path_info =~ s,^\Q$project\E/*,,;

Hmmm... I wonder if it is good idea. It was done in commit 645927c
(gitweb: fix warnings in PATH_INFO code and add export_ok/strict_export)
by Matthias Lederhofer, but I don't see why we do not remove project
from path_info if action is set.  But this is belongs probably to
independent code cleanup (if it is needed), so don't worry about that.

Perhaps it would be good to add empty line here before beginning of
'hash' and 'hash_base:file_name' handling.

> +	my ($refname, $pathname) = split(/:/, $path_info, 2);
> +	if (defined $pathname) {
> +		# we got "project.git/branch:filename" or "project.git/branch:dir/"
> +		# we could use git_get_type(branch:pathname), but it needs $git_dir

Additionally git_get_type(<extended sha1>) is additional call to git
(additional fork) currently; that might change with gitweb caching code,
which uses permanent connection to 'git cat-file --batch/--batch-check'
for that.

> +		$pathname =~ s,^/+,,;
> +		if (!$pathname || substr($pathname, -1) eq "/") {
> +			$input_params{'action'} = "tree";
> +			$pathname =~ s,/$,,;
> +		} else {
> +			$input_params{'action'} = "blob_plain";
> +		}
> +		$input_params{'hash_base'} ||= validate_refname($refname);
> +		$input_params{'file_name'} ||= validate_pathname($pathname);
> +	} elsif (defined $refname) {
> +		# we got "project.git/branch"
> +		$input_params{'action'} = "shortlog";
> +		$input_params{'hash'} ||= validate_refname($refname);
> +	}
> +}

Nice defensive programming with '||=' here for setting %input_params.

[After examining code further].  But I think you do double validation;
see comment below.

> +evaluate_path_info();
> +
> +our $action = $input_params{'action'};
>  if (defined $action) {
> -	if ($action =~ m/[^0-9a-zA-Z\.\-_]/) {
> +	if (!validate_action($action)) {
>  		die_error(400, "Invalid action parameter");
>  	}
>  }

Hmm... don't you do double validation now? Once in evaluate_path_info,
and once copying to global variables like $action?

Very nice moving param validation for 'a'/'action' parameter to 
validate_action() subroutine.
  
>  # parameters which are pathnames
> -our $project = $cgi->param('p');
> +our $project = $input_params{'project'};
>  if (defined $project) {
> -	if (!validate_pathname($project) ||
> -	    !(-d "$projectroot/$project") ||
> -	    !check_head_link("$projectroot/$project") ||
> -	    ($export_ok && !(-e "$projectroot/$project/$export_ok")) ||
> -	    ($strict_export && !project_in_list($project))) {
> +	if (!validate_project($project)) {
>  		undef $project;
>  		die_error(404, "No such project");
>  	}
>  }

Nice using validate_project() here.

> -our @extra_options = $cgi->param('opt');
> -if (defined @extra_options) {
> -	foreach my $opt (@extra_options) {
> -		if (not exists $allowed_options{$opt}) {
> -			die_error(400, "Invalid option parameter");
> -		}
> -		if (not grep(/^$action$/, @{$allowed_options{$opt}})) {
> -			die_error(400, "Invalid option parameter for this action");
> -		}
> +our @extra_options = @{$input_params{'extra_options'}};
> +# @extra_options is always defined, since it can only be (currently) set from
> +# CGI, and $cgi->param() returns the empty array in array context

...if param was not set.

Thanks for catching that.

> +foreach my $opt (@extra_options) {
> +	if (not exists $allowed_options{$opt}) {
> +		die_error(400, "Invalid option parameter");
> +	}
> +	if (not grep(/^$action$/, @{$allowed_options{$opt}})) {
> +		die_error(400, "Invalid option parameter for this action");
>  	}
>  }
  
> @@ -470,23 +587,23 @@ if (defined $hash_parent_base) {
>  }
>  
>  # other parameters
> -our $page = $cgi->param('pg');
> +our $page = $input_params{'page'};
>  if (defined $page) {
>  	if ($page =~ m/[^0-9]/) {

Nice, almost trivial change.

> @@ -604,35 +646,12 @@ sub href (%) {
[...]
>  	if ($params{-replay}) {
> -		while (my ($name, $symbol) = each %mapping) {
> +		while (my ($name, $symbol) = each %cgi_param_mapping) {
>  			if (!exists $params{$name}) {
> -				# to allow for multivalued params we use arrayref form
> -				$params{$name} = [ $cgi->param($symbol) ];
> +				$params{$name} = $input_params{$name};
>  			}
>  		}
>  	}

Nice cleanup.

> @@ -672,6 +691,25 @@ sub href (%) {
>  ## ======================================================================
>  ## validation, quoting/unquoting and escaping
>  
> +sub validate_action {
> +	my $input = shift || return undef;
> +	return undef unless exists $actions{$action};
> +	return $action;
> +}
> +
> +sub validate_project {
> +	my $input = shift || return undef;
> +	if (!validate_pathname($input) ||
> +		!(-d "$projectroot/$input") ||
> +		!check_head_link("$projectroot/$input") ||
> +		($export_ok && !(-e "$projectroot/$input/$export_ok")) ||
> +		($strict_export && !project_in_list($input))) {
> +		return undef;
> +	} else {
> +		return $input;
> +	}
> +}

Nice.

-- 
Jakub Narebski
Poland

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] gitweb: refactor input parameters parse/validation
  2008-10-07 10:57 ` Jakub Narebski
@ 2008-10-07 12:42   ` Giuseppe Bilotta
  2008-10-07 14:39     ` Jakub Narebski
  2008-10-08  9:26   ` [PATCHv2] " Giuseppe Bilotta
  1 sibling, 1 reply; 11+ messages in thread
From: Giuseppe Bilotta @ 2008-10-07 12:42 UTC (permalink / raw)
  To: Jakub Narebski; +Cc: git, Shawn O. Pearce, Matthias Lederhofer

On Tue, Oct 7, 2008 at 12:57 PM, Jakub Narebski <jnareb@gmail.com> wrote:
> On Fri, 3 Oct 2008, Giuseppe Bilotta wrote:

>> +# we make $path_info global because it's also used later on
>> +my $path_info = $ENV{"PATH_INFO"};
>> +if ($path_info) {
>>       $my_url =~ s,\Q$path_info\E$,,;
>>       $my_uri =~ s,\Q$path_info\E$,,;
>>  }
>
> I think we could have left it as it was, i.e. have $path_info be
> a local variable.  Unless there is something I don't see.

Well, it's being reassigned locally in evaluate_path_info ... since
we're looking at it anyway, I thought it made sense to do it once and
for all. The var could also potentially have other uses, such as
enabling the use of path_info whenever path_info was used (something
which is currently not done), so ...

>> +# input parameters are stored with the long parameter name as key, so we need
>> +# the name -> CGI key mapping here. This will also be used in the href
>> +# subroutine to convert parameters to their CGI equivalent.
>
> Actually href() subroutine would be called many times, while parsing of
> input params happens only once; that is why it is longname => shortname
> hash, and not the reverse (shortname => longname).  Also it does not
> result in loss of performance as we parse all input params anyway.
>
> This explanation might be obvious, or could be put in comment
> (increasing comment to code count ;-))

It's actually a good point, I'll put it in (and not for the comment to
code ratio 8-D)

>> +my @cgi_param_mapping = (
>> +my %cgi_param_mapping = @cgi_param_mapping;
>> +my %actions = (
>
> Perhaps %allowed_options should also be there, to have everything about
> input params in single place... or perhaps not...

I had thought about it. It makes sense in a way to put it after
cgi_param_mapping. I didn't do it to reduce the code move.

>> +# fill %input_params with the CGI parameters. All values except for 'opt'
>> +# should be single values, but opt can be an array. We should probably
>> +# build an array of parameters that can be multi-valued, but since for the time
>> +# being it's only this one, we just single it out
>> +while (my ($name, $symbol) = each %cgi_param_mapping) {
>> +     if ($symbol eq 'opt') {
>> +             $input_params{$name} = [$cgi->param($symbol)];
>
> I would personally use "[ $cgi->param($symbol) ]" instead for better
> (IMVHO) visibility, but this is just a matter of taste.

Ah yes, much more readable. I'll do it.

>> +     } else {
>> +             $input_params{$name} = $cgi->param($symbol);
>> +     }
>> +}
>
> Nice. I guess that you have checked that you caught all
> $cgi->param(...) calls, and there aren't any beside those above...

I did a grep so I *think* I didn't miss any.


>> +# now read PATH_INFO and update the parameter list for missing parameters
>> +sub evaluate_path_info {
>> +     return if defined $input_params{'project'};
>
> I was 'my $path_info = $ENV{"PATH_INFO"};' there, when $path_info
> wasn't global variable.  Any advantages to having it global?
>
> (This is just a minor thing, not worth resending patch over, I think).

As I mentioned above, just sparing a double assignment for something
that is going to be checked anyway both at the beginning and here. And
it can also have other uses, potentially.

>> +     # find which part of PATH_INFO is project
>> +     my $project = $path_info;
>
> Hmmm... now $project is local (lexically) here.

Yes, itt's only used temporarily here, to see if  a proper $project
can be defined. It gets redefined outside. It just made sense to name
it like this 8-)

>> +     $project =~ s,/+$,,;
>> +     while ($project && !check_head_link("$projectroot/$project")) {
>> +             $project =~ s,/*[^/]*$,,;
>> +     }
>> +     # validate project
>> +     $project = validate_project($project);
>
> I'm not sure if it is worth worrying over, but I think you repeat
> check_head_link() check here.
>
> [After examining code further].  But I think you do double validation;
> once you do it here, and once you do it copying to global variables
> such as $action or $project, and double checking check_head_link()
> won't be easy to avoid; fortunately it is cheap filesystem-level check
> (might be slow only when stat is extremely slow, and is not cached).

I know. This is actually the reason why I had interleaved path_info
definition and global validation in my previous version of the patch.
The big issue here is that path_info evaluation _needs_ (partial)
validation.

A possible alternative could be to only put validated parameters into
%input_params. This would completely separate the validation for cgi
and path_info (modulo shared subs).

Of course, the check_head_link would still be repeated inside
evaluate_path_info, but the other params could skip a double
validation.

>> +     return unless $project;
>> +     $input_params{'project'} = $project;
>> +
>> +     # do not change any parameters if an action is given using the query string
>> +     return if $input_params{'action'};
>> +     $path_info =~ s,^\Q$project\E/*,,;
>
> Hmmm... I wonder if it is good idea. It was done in commit 645927c
> (gitweb: fix warnings in PATH_INFO code and add export_ok/strict_export)
> by Matthias Lederhofer, but I don't see why we do not remove project
> from path_info if action is set.  But this is belongs probably to
> independent code cleanup (if it is needed), so don't worry about that.

path_info information is simply discarded if action was already
defined, because it was assumed that path_info was only used for
default action. it's something that might be retaught with the rest of
the path_info enhancements.

> Perhaps it would be good to add empty line here before beginning of
> 'hash' and 'hash_base:file_name' handling.

Can do.

>> +     my ($refname, $pathname) = split(/:/, $path_info, 2);
>> +     if (defined $pathname) {
>> +             # we got "project.git/branch:filename" or "project.git/branch:dir/"
>> +             # we could use git_get_type(branch:pathname), but it needs $git_dir
>
> Additionally git_get_type(<extended sha1>) is additional call to git
> (additional fork) currently; that might change with gitweb caching code,
> which uses permanent connection to 'git cat-file --batch/--batch-check'
> for that.

I can add that to the comment.

>> +             $pathname =~ s,^/+,,;
>> +             if (!$pathname || substr($pathname, -1) eq "/") {
>> +                     $input_params{'action'} = "tree";
>> +                     $pathname =~ s,/$,,;
>> +             } else {
>> +                     $input_params{'action'} = "blob_plain";
>> +             }
>> +             $input_params{'hash_base'} ||= validate_refname($refname);
>> +             $input_params{'file_name'} ||= validate_pathname($pathname);
>> +     } elsif (defined $refname) {
>> +             # we got "project.git/branch"
>> +             $input_params{'action'} = "shortlog";
>> +             $input_params{'hash'} ||= validate_refname($refname);
>> +     }
>> +}
>
> Nice defensive programming with '||=' here for setting %input_params.
>
> [After examining code further].  But I think you do double validation;
> see comment below.

[see above]

>> +evaluate_path_info();
>> +
>> +our $action = $input_params{'action'};
>>  if (defined $action) {
>> -     if ($action =~ m/[^0-9a-zA-Z\.\-_]/) {
>> +     if (!validate_action($action)) {
>>               die_error(400, "Invalid action parameter");
>>       }
>>  }
>
> Hmm... don't you do double validation now? Once in evaluate_path_info,
> and once copying to global variables like $action?
>
> Very nice moving param validation for 'a'/'action' parameter to
> validate_action() subroutine.

As I mentioned, this could be the solution to the double validation,
I'll just validate the cgi params separately from the path_info ones,
and only embed validated parameters in the %input_params hash

>> +our @extra_options = @{$input_params{'extra_options'}};
>> +# @extra_options is always defined, since it can only be (currently) set from
>> +# CGI, and $cgi->param() returns the empty array in array context
>
> ...if param was not set.

Ah, yes. I'll add it.

>>       if ($params{-replay}) {
>> -             while (my ($name, $symbol) = each %mapping) {
>> +             while (my ($name, $symbol) = each %cgi_param_mapping) {
>>                       if (!exists $params{$name}) {
>> -                             # to allow for multivalued params we use arrayref form
>> -                             $params{$name} = [ $cgi->param($symbol) ];
>> +                             $params{$name} = $input_params{$name};
>>                       }
>>               }
>>       }
>
> Nice cleanup.

Well, one would expect, given that this was the 'trigger' ;-)

I'll try to whip up a revised patch ASAP.

-- 
Giuseppe "Oblomov" Bilotta

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] gitweb: refactor input parameters parse/validation
  2008-10-07 12:42   ` Giuseppe Bilotta
@ 2008-10-07 14:39     ` Jakub Narebski
  2008-10-08  9:10       ` Giuseppe Bilotta
  0 siblings, 1 reply; 11+ messages in thread
From: Jakub Narebski @ 2008-10-07 14:39 UTC (permalink / raw)
  To: Giuseppe Bilotta; +Cc: git, Shawn O. Pearce

Giuseppe Bilotta wrote:
> On Tue, Oct 7, 2008 at 12:57 PM, Jakub Narebski <jnareb@gmail.com> wrote:
>> On Fri, 3 Oct 2008, Giuseppe Bilotta wrote:

>>> +     # find which part of PATH_INFO is project
>>> +     my $project = $path_info;
>>
>> Hmmm... now $project is local (lexically) here.
> 
> Yes, itt's only used temporarily here, to see if  a proper $project
> can be defined. It gets redefined outside. It just made sense to name
> it like this 8-)

Well, if $project is local in evaluate_path_info(), so could be
$path_info...
 
>>> +     $project =~ s,/+$,,;
>>> +     while ($project && !check_head_link("$projectroot/$project")) {
>>> +             $project =~ s,/*[^/]*$,,;
>>> +     }
>>> +     # validate project
>>> +     $project = validate_project($project);
>>
>> I'm not sure if it is worth worrying over, but I think you repeat
>> check_head_link() check here.
>>
>> [After examining code further].  But I think you do double validation;
>> once you do it here, and once you do it copying to global variables
>> such as $action or $project, and double checking check_head_link()
>> won't be easy to avoid; fortunately it is cheap filesystem-level check
>> (might be slow only when stat is extremely slow, and is not cached).
> 
> I know. This is actually the reason why I had interleaved path_info
> definition and global validation in my previous version of the patch.
> The big issue here is that path_info evaluation _needs_ (partial)
> validation.
> 
> A possible alternative could be to only put validated parameters into
> %input_params. This would completely separate the validation for cgi
> and path_info (modulo shared subs).
> 
> Of course, the check_head_link would still be repeated inside
> evaluate_path_info, but the other params could skip a double
> validation.

Wouldn't it be simpler and as good solution to just leave validation
off evaluate_path_info() (well, of course except check_head_link() test),
and allow it to be validated when assigning global 'params' variables?
check_head_link() would be repeated for path_info links, but that
should not affect performance much.

-- 
Jakub Narebski
Poland

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] gitweb: refactor input parameters parse/validation
  2008-10-07 14:39     ` Jakub Narebski
@ 2008-10-08  9:10       ` Giuseppe Bilotta
  2008-10-08  9:45         ` Jakub Narebski
  0 siblings, 1 reply; 11+ messages in thread
From: Giuseppe Bilotta @ 2008-10-08  9:10 UTC (permalink / raw)
  To: Jakub Narebski; +Cc: git, Shawn O. Pearce

On Tue, Oct 7, 2008 at 4:39 PM, Jakub Narebski <jnareb@gmail.com> wrote:
> Giuseppe Bilotta wrote:
>> On Tue, Oct 7, 2008 at 12:57 PM, Jakub Narebski <jnareb@gmail.com> wrote:
>>> On Fri, 3 Oct 2008, Giuseppe Bilotta wrote:
>
>>>> +     # find which part of PATH_INFO is project
>>>> +     my $project = $path_info;
>>>
>>> Hmmm... now $project is local (lexically) here.
>>
>> Yes, itt's only used temporarily here, to see if  a proper $project
>> can be defined. It gets redefined outside. It just made sense to name
>> it like this 8-)
>
> Well, if $project is local in evaluate_path_info(), so could be
> $path_info...

But $path_info in evaluate_path_info() has the same purpose and the
same (initial) data as $path_info outside of evaluate_path_info(), the
same is not true for $project.

>>>> +     $project =~ s,/+$,,;
>>>> +     while ($project && !check_head_link("$projectroot/$project")) {
>>>> +             $project =~ s,/*[^/]*$,,;
>>>> +     }
>>>> +     # validate project
>>>> +     $project = validate_project($project);
>>>
>>> I'm not sure if it is worth worrying over, but I think you repeat
>>> check_head_link() check here.
>>>
>>> [After examining code further].  But I think you do double validation;
>>> once you do it here, and once you do it copying to global variables
>>> such as $action or $project, and double checking check_head_link()
>>> won't be easy to avoid; fortunately it is cheap filesystem-level check
>>> (might be slow only when stat is extremely slow, and is not cached).
>>
>> I know. This is actually the reason why I had interleaved path_info
>> definition and global validation in my previous version of the patch.
>> The big issue here is that path_info evaluation _needs_ (partial)
>> validation.
>>
>> A possible alternative could be to only put validated parameters into
>> %input_params. This would completely separate the validation for cgi
>> and path_info (modulo shared subs).
>>
>> Of course, the check_head_link would still be repeated inside
>> evaluate_path_info, but the other params could skip a double
>> validation.
>
> Wouldn't it be simpler and as good solution to just leave validation
> off evaluate_path_info() (well, of course except check_head_link() test),
> and allow it to be validated when assigning global 'params' variables?
> check_head_link() would be repeated for path_info links, but that
> should not affect performance much.

Well, it does have a performance hit in the case of invalid $project
since it spends time working on the rest of the URL before bailing
out, but it's probably the cleanest solution. I'll do it this way.

-- 
Giuseppe "Oblomov" Bilotta

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCHv2] gitweb: refactor input parameters parse/validation
  2008-10-07 10:57 ` Jakub Narebski
  2008-10-07 12:42   ` Giuseppe Bilotta
@ 2008-10-08  9:26   ` Giuseppe Bilotta
  2008-10-10  8:37     ` Jakub Narebski
  2008-10-10 15:01     ` Shawn O. Pearce
  1 sibling, 2 replies; 11+ messages in thread
From: Giuseppe Bilotta @ 2008-10-08  9:26 UTC (permalink / raw)
  To: git; +Cc: Jakub Narebski, Shawn O. Pearce, Giuseppe Bilotta

Since input parameters can be obtained both from CGI parameters and
PATH_INFO, we would like most of the code to be agnostic about the way
parameters were retrieved. We thus collect all the parameters into the
new %input_params hash, delaying validation after the collection is
completed.

Although the kludge removal is minimal at the moment, it makes life much
easier for future expansions such as more extensive PATH_INFO use or
other form of input such as command-line support.

Signed-off-by: Giuseppe Bilotta <giuseppe.bilotta@gmail.com>
---

Some cleanups and clarifications suggested by Jakub were merged in this
revised version of the patch, although I'm keeping $path_info global

 gitweb/gitweb.perl |  315 +++++++++++++++++++++++++++++-----------------------
 1 files changed, 178 insertions(+), 137 deletions(-)

diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl
index 83f810a..c54d82b 100755
--- a/gitweb/gitweb.perl
+++ b/gitweb/gitweb.perl
@@ -29,7 +29,9 @@ our $my_uri = $cgi->url(-absolute => 1);
 
 # if we're called with PATH_INFO, we have to strip that
 # from the URL to find our real URL
-if (my $path_info = $ENV{"PATH_INFO"}) {
+# we make $path_info global because it's also used later on
+my $path_info = $ENV{"PATH_INFO"};
+if ($path_info) {
 	$my_url =~ s,\Q$path_info\E$,,;
 	$my_uri =~ s,\Q$path_info\E$,,;
 }
@@ -390,34 +392,155 @@ $projects_list ||= $projectroot;
 
 # ======================================================================
 # input validation and dispatch
-our $action = $cgi->param('a');
+
+# input parameters can be collected from a variety of sources (presently, CGI
+# and PATH_INFO), so we define an %input_params hash that collects them all
+# together during validation: this allows subsequent uses (e.g. href()) to be
+# agnostic of the parameter origin
+
+my %input_params = ();
+
+# input parameters are stored with the long parameter name as key. This will
+# also be used in the href subroutine to convert parameters to their CGI
+# equivalent, and since the href() usage is the most frequent one, we store
+# the name -> CGI key mapping here, instead of the reverse.
+#
+# XXX: Warning: If you touch this, check the search form for updating,
+# too.
+
+my @cgi_param_mapping = (
+	project => "p",
+	action => "a",
+	file_name => "f",
+	file_parent => "fp",
+	hash => "h",
+	hash_parent => "hp",
+	hash_base => "hb",
+	hash_parent_base => "hpb",
+	page => "pg",
+	order => "o",
+	searchtext => "s",
+	searchtype => "st",
+	snapshot_format => "sf",
+	extra_options => "opt",
+	search_use_regexp => "sr",
+);
+my %cgi_param_mapping = @cgi_param_mapping;
+
+# we will also need to know the possible actions, for validation
+my %actions = (
+	"blame" => \&git_blame,
+	"blobdiff" => \&git_blobdiff,
+	"blobdiff_plain" => \&git_blobdiff_plain,
+	"blob" => \&git_blob,
+	"blob_plain" => \&git_blob_plain,
+	"commitdiff" => \&git_commitdiff,
+	"commitdiff_plain" => \&git_commitdiff_plain,
+	"commit" => \&git_commit,
+	"forks" => \&git_forks,
+	"heads" => \&git_heads,
+	"history" => \&git_history,
+	"log" => \&git_log,
+	"rss" => \&git_rss,
+	"atom" => \&git_atom,
+	"search" => \&git_search,
+	"search_help" => \&git_search_help,
+	"shortlog" => \&git_shortlog,
+	"summary" => \&git_summary,
+	"tag" => \&git_tag,
+	"tags" => \&git_tags,
+	"tree" => \&git_tree,
+	"snapshot" => \&git_snapshot,
+	"object" => \&git_object,
+	# those below don't need $project
+	"opml" => \&git_opml,
+	"project_list" => \&git_project_list,
+	"project_index" => \&git_project_index,
+);
+
+# finally, we have the hash of allowed extra_options for the commands that
+# allow them
+my %allowed_options = (
+	"--no-merges" => [ qw(rss atom log shortlog history) ],
+);
+
+# fill %input_params with the CGI parameters. All values except for 'opt'
+# should be single values, but opt can be an array. We should probably
+# build an array of parameters that can be multi-valued, but since for the time
+# being it's only this one, we just single it out
+while (my ($name, $symbol) = each %cgi_param_mapping) {
+	if ($symbol eq 'opt') {
+		$input_params{$name} = [ $cgi->param($symbol) ];
+	} else {
+		$input_params{$name} = $cgi->param($symbol);
+	}
+}
+
+# now read PATH_INFO and update the parameter list for missing parameters
+sub evaluate_path_info {
+	return if defined $input_params{'project'};
+	return if !$path_info;
+	$path_info =~ s,^/+,,;
+	return if !$path_info;
+
+	# find which part of PATH_INFO is project
+	my $project = $path_info;
+	$project =~ s,/+$,,;
+	while ($project && !check_head_link("$projectroot/$project")) {
+		$project =~ s,/*[^/]*$,,;
+	}
+	return unless $project;
+	$input_params{'project'} = $project;
+
+	# do not change any parameters if an action is given using the query string
+	return if $input_params{'action'};
+	$path_info =~ s,^\Q$project\E/*,,;
+
+	my ($refname, $pathname) = split(/:/, $path_info, 2);
+	if (defined $pathname) {
+		# we got "project.git/branch:filename" or "project.git/branch:dir/"
+		# we could use git_get_type(branch:pathname), but it needs $git_dir
+		$pathname =~ s,^/+,,;
+		if (!$pathname || substr($pathname, -1) eq "/") {
+			$input_params{'action'} = "tree";
+			$pathname =~ s,/$,,;
+		} else {
+			$input_params{'action'} = "blob_plain";
+		}
+		$input_params{'hash_base'} ||= $refname;
+		$input_params{'file_name'} ||= $pathname;
+	} elsif (defined $refname) {
+		# we got "project.git/branch"
+		$input_params{'action'} = "shortlog";
+		$input_params{'hash'} ||= $refname;
+	}
+}
+evaluate_path_info();
+
+our $action = $input_params{'action'};
 if (defined $action) {
-	if ($action =~ m/[^0-9a-zA-Z\.\-_]/) {
+	if (!validate_action($action)) {
 		die_error(400, "Invalid action parameter");
 	}
 }
 
 # parameters which are pathnames
-our $project = $cgi->param('p');
+our $project = $input_params{'project'};
 if (defined $project) {
-	if (!validate_pathname($project) ||
-	    !(-d "$projectroot/$project") ||
-	    !check_head_link("$projectroot/$project") ||
-	    ($export_ok && !(-e "$projectroot/$project/$export_ok")) ||
-	    ($strict_export && !project_in_list($project))) {
+	if (!validate_project($project)) {
 		undef $project;
 		die_error(404, "No such project");
 	}
 }
 
-our $file_name = $cgi->param('f');
+our $file_name = $input_params{'file_name'};
 if (defined $file_name) {
 	if (!validate_pathname($file_name)) {
 		die_error(400, "Invalid file parameter");
 	}
 }
 
-our $file_parent = $cgi->param('fp');
+our $file_parent = $input_params{'file_parent'};
 if (defined $file_parent) {
 	if (!validate_pathname($file_parent)) {
 		die_error(400, "Invalid file parent parameter");
@@ -425,44 +548,41 @@ if (defined $file_parent) {
 }
 
 # parameters which are refnames
-our $hash = $cgi->param('h');
+our $hash = $input_params{'hash'};
 if (defined $hash) {
 	if (!validate_refname($hash)) {
 		die_error(400, "Invalid hash parameter");
 	}
 }
 
-our $hash_parent = $cgi->param('hp');
+our $hash_parent = $input_params{'hash_parent'};
 if (defined $hash_parent) {
 	if (!validate_refname($hash_parent)) {
 		die_error(400, "Invalid hash parent parameter");
 	}
 }
 
-our $hash_base = $cgi->param('hb');
+our $hash_base = $input_params{'hash_base'};
 if (defined $hash_base) {
 	if (!validate_refname($hash_base)) {
 		die_error(400, "Invalid hash base parameter");
 	}
 }
 
-my %allowed_options = (
-	"--no-merges" => [ qw(rss atom log shortlog history) ],
-);
-
-our @extra_options = $cgi->param('opt');
-if (defined @extra_options) {
-	foreach my $opt (@extra_options) {
-		if (not exists $allowed_options{$opt}) {
-			die_error(400, "Invalid option parameter");
-		}
-		if (not grep(/^$action$/, @{$allowed_options{$opt}})) {
-			die_error(400, "Invalid option parameter for this action");
-		}
+our @extra_options = @{$input_params{'extra_options'}};
+# @extra_options is always defined, since it can only be (currently) set from
+# CGI, and $cgi->param() returns the empty array in array context if the param
+# is not set
+foreach my $opt (@extra_options) {
+	if (not exists $allowed_options{$opt}) {
+		die_error(400, "Invalid option parameter");
+	}
+	if (not grep(/^$action$/, @{$allowed_options{$opt}})) {
+		die_error(400, "Invalid option parameter for this action");
 	}
 }
 
-our $hash_parent_base = $cgi->param('hpb');
+our $hash_parent_base = $input_params{'hash_parent_base'};
 if (defined $hash_parent_base) {
 	if (!validate_refname($hash_parent_base)) {
 		die_error(400, "Invalid hash parent base parameter");
@@ -470,23 +590,23 @@ if (defined $hash_parent_base) {
 }
 
 # other parameters
-our $page = $cgi->param('pg');
+our $page = $input_params{'page'};
 if (defined $page) {
 	if ($page =~ m/[^0-9]/) {
 		die_error(400, "Invalid page parameter");
 	}
 }
 
-our $searchtype = $cgi->param('st');
+our $searchtype = $input_params{'searchtype'};
 if (defined $searchtype) {
 	if ($searchtype =~ m/[^a-z]/) {
 		die_error(400, "Invalid searchtype parameter");
 	}
 }
 
-our $search_use_regexp = $cgi->param('sr');
+our $search_use_regexp = $input_params{'search_use_regexp'};
 
-our $searchtext = $cgi->param('s');
+our $searchtext = $input_params{'searchtext'};
 our $search_regexp;
 if (defined $searchtext) {
 	if (length($searchtext) < 2) {
@@ -495,86 +615,11 @@ if (defined $searchtext) {
 	$search_regexp = $search_use_regexp ? $searchtext : quotemeta $searchtext;
 }
 
-# now read PATH_INFO and use it as alternative to parameters
-sub evaluate_path_info {
-	return if defined $project;
-	my $path_info = $ENV{"PATH_INFO"};
-	return if !$path_info;
-	$path_info =~ s,^/+,,;
-	return if !$path_info;
-	# find which part of PATH_INFO is project
-	$project = $path_info;
-	$project =~ s,/+$,,;
-	while ($project && !check_head_link("$projectroot/$project")) {
-		$project =~ s,/*[^/]*$,,;
-	}
-	# validate project
-	$project = validate_pathname($project);
-	if (!$project ||
-	    ($export_ok && !-e "$projectroot/$project/$export_ok") ||
-	    ($strict_export && !project_in_list($project))) {
-		undef $project;
-		return;
-	}
-	# do not change any parameters if an action is given using the query string
-	return if $action;
-	$path_info =~ s,^\Q$project\E/*,,;
-	my ($refname, $pathname) = split(/:/, $path_info, 2);
-	if (defined $pathname) {
-		# we got "project.git/branch:filename" or "project.git/branch:dir/"
-		# we could use git_get_type(branch:pathname), but it needs $git_dir
-		$pathname =~ s,^/+,,;
-		if (!$pathname || substr($pathname, -1) eq "/") {
-			$action  ||= "tree";
-			$pathname =~ s,/$,,;
-		} else {
-			$action  ||= "blob_plain";
-		}
-		$hash_base ||= validate_refname($refname);
-		$file_name ||= validate_pathname($pathname);
-	} elsif (defined $refname) {
-		# we got "project.git/branch"
-		$action ||= "shortlog";
-		$hash   ||= validate_refname($refname);
-	}
-}
-evaluate_path_info();
-
 # path to the current git repository
 our $git_dir;
 $git_dir = "$projectroot/$project" if $project;
 
 # dispatch
-my %actions = (
-	"blame" => \&git_blame,
-	"blobdiff" => \&git_blobdiff,
-	"blobdiff_plain" => \&git_blobdiff_plain,
-	"blob" => \&git_blob,
-	"blob_plain" => \&git_blob_plain,
-	"commitdiff" => \&git_commitdiff,
-	"commitdiff_plain" => \&git_commitdiff_plain,
-	"commit" => \&git_commit,
-	"forks" => \&git_forks,
-	"heads" => \&git_heads,
-	"history" => \&git_history,
-	"log" => \&git_log,
-	"rss" => \&git_rss,
-	"atom" => \&git_atom,
-	"search" => \&git_search,
-	"search_help" => \&git_search_help,
-	"shortlog" => \&git_shortlog,
-	"summary" => \&git_summary,
-	"tag" => \&git_tag,
-	"tags" => \&git_tags,
-	"tree" => \&git_tree,
-	"snapshot" => \&git_snapshot,
-	"object" => \&git_object,
-	# those below don't need $project
-	"opml" => \&git_opml,
-	"project_list" => \&git_project_list,
-	"project_index" => \&git_project_index,
-);
-
 if (!defined $action) {
 	if (defined $hash) {
 		$action = git_get_type($hash);
@@ -604,35 +649,12 @@ sub href (%) {
 	# default is to use -absolute url() i.e. $my_uri
 	my $href = $params{-full} ? $my_url : $my_uri;
 
-	# XXX: Warning: If you touch this, check the search form for updating,
-	# too.
-
-	my @mapping = (
-		project => "p",
-		action => "a",
-		file_name => "f",
-		file_parent => "fp",
-		hash => "h",
-		hash_parent => "hp",
-		hash_base => "hb",
-		hash_parent_base => "hpb",
-		page => "pg",
-		order => "o",
-		searchtext => "s",
-		searchtype => "st",
-		snapshot_format => "sf",
-		extra_options => "opt",
-		search_use_regexp => "sr",
-	);
-	my %mapping = @mapping;
-
 	$params{'project'} = $project unless exists $params{'project'};
 
 	if ($params{-replay}) {
-		while (my ($name, $symbol) = each %mapping) {
+		while (my ($name, $symbol) = each %cgi_param_mapping) {
 			if (!exists $params{$name}) {
-				# to allow for multivalued params we use arrayref form
-				$params{$name} = [ $cgi->param($symbol) ];
+				$params{$name} = $input_params{$name};
 			}
 		}
 	}
@@ -651,8 +673,8 @@ sub href (%) {
 
 	# now encode the parameters explicitly
 	my @result = ();
-	for (my $i = 0; $i < @mapping; $i += 2) {
-		my ($name, $symbol) = ($mapping[$i], $mapping[$i+1]);
+	for (my $i = 0; $i < @cgi_param_mapping; $i += 2) {
+		my ($name, $symbol) = ($cgi_param_mapping[$i], $cgi_param_mapping[$i+1]);
 		if (defined $params{$name}) {
 			if (ref($params{$name}) eq "ARRAY") {
 				foreach my $par (@{$params{$name}}) {
@@ -672,6 +694,25 @@ sub href (%) {
 ## ======================================================================
 ## validation, quoting/unquoting and escaping
 
+sub validate_action {
+	my $input = shift || return undef;
+	return undef unless exists $actions{$action};
+	return $action;
+}
+
+sub validate_project {
+	my $input = shift || return undef;
+	if (!validate_pathname($input) ||
+		!(-d "$projectroot/$input") ||
+		!check_head_link("$projectroot/$input") ||
+		($export_ok && !(-e "$projectroot/$input/$export_ok")) ||
+		($strict_export && !project_in_list($input))) {
+		return undef;
+	} else {
+		return $input;
+	}
+}
+
 sub validate_pathname {
 	my $input = shift || return undef;
 
@@ -3988,7 +4029,7 @@ sub git_search_grep_body {
 ## actions
 
 sub git_project_list {
-	my $order = $cgi->param('o');
+	my $order = $input_params{'order'};
 	if (defined $order && $order !~ m/none|project|descr|owner|age/) {
 		die_error(400, "Unknown order parameter");
 	}
@@ -4011,7 +4052,7 @@ sub git_project_list {
 }
 
 sub git_forks {
-	my $order = $cgi->param('o');
+	my $order = $input_params{'order'};
 	if (defined $order && $order !~ m/none|project|descr|owner|age/) {
 		die_error(400, "Unknown order parameter");
 	}
@@ -4545,7 +4586,7 @@ sub git_snapshot {
 	my @supported_fmts = gitweb_check_feature('snapshot');
 	@supported_fmts = filter_snapshot_fmts(@supported_fmts);
 
-	my $format = $cgi->param('sf');
+	my $format = $input_params{'snapshot_format'};
 	if (!@supported_fmts) {
 		die_error(403, "Snapshots not allowed");
 	}
-- 
1.5.6.5

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] gitweb: refactor input parameters parse/validation
  2008-10-08  9:10       ` Giuseppe Bilotta
@ 2008-10-08  9:45         ` Jakub Narebski
  0 siblings, 0 replies; 11+ messages in thread
From: Jakub Narebski @ 2008-10-08  9:45 UTC (permalink / raw)
  To: Giuseppe Bilotta; +Cc: git, Shawn O. Pearce

Giuseppe Bilotta wrote:
> On Tue, Oct 7, 2008 at 4:39 PM, Jakub Narebski <jnareb@gmail.com> wrote:
> > Giuseppe Bilotta wrote:

> > Wouldn't it be simpler and as good solution to just leave validation
> > off evaluate_path_info() (well, of course except check_head_link() test),
> > and allow it to be validated when assigning global 'params' variables?
> > check_head_link() would be repeated for path_info links, but that
> > should not affect performance much.
> 
> Well, it does have a performance hit in the case of invalid $project
> since it spends time working on the rest of the URL before bailing
> out, but it's probably the cleanest solution. I'll do it this way.

I have forgot about this fact, that parameter validation serves also
as early escape. But I don't think it is much performance hit in
practice; it is performance (optimization) vs. maintability tradeoff.

-- 
Jakub Narebski
Poland

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCHv2] gitweb: refactor input parameters parse/validation
  2008-10-08  9:26   ` [PATCHv2] " Giuseppe Bilotta
@ 2008-10-10  8:37     ` Jakub Narebski
  2008-10-10 15:01     ` Shawn O. Pearce
  1 sibling, 0 replies; 11+ messages in thread
From: Jakub Narebski @ 2008-10-10  8:37 UTC (permalink / raw)
  To: Giuseppe Bilotta; +Cc: git, Shawn O. Pearce, Petr Baudis

On Wed, 8 Oct 2008, Giuseppe Bilotta wrote:

> Since input parameters can be obtained both from CGI parameters and
> PATH_INFO, we would like most of the code to be agnostic about the way
> parameters were retrieved. We thus collect all the parameters into the
> new %input_params hash, delaying validation after the collection is
> completed.
> 
> Although the kludge removal is minimal at the moment, it makes life much
> easier for future expansions such as more extensive PATH_INFO use or
> other form of input such as command-line support.
> 
> Signed-off-by: Giuseppe Bilotta <giuseppe.bilotta@gmail.com>

Very nice.

Acked-by: Jakub Narebski <jnareb@gmail.com>

-- 
Jakub Narebski
Poland

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCHv2] gitweb: refactor input parameters parse/validation
  2008-10-08  9:26   ` [PATCHv2] " Giuseppe Bilotta
  2008-10-10  8:37     ` Jakub Narebski
@ 2008-10-10 15:01     ` Shawn O. Pearce
  2008-10-10 17:33       ` Giuseppe Bilotta
  2008-10-10 18:42       ` [PATCHv3] " Giuseppe Bilotta
  1 sibling, 2 replies; 11+ messages in thread
From: Shawn O. Pearce @ 2008-10-10 15:01 UTC (permalink / raw)
  To: Giuseppe Bilotta; +Cc: git, Jakub Narebski

Giuseppe Bilotta <giuseppe.bilotta@gmail.com> wrote:
> Since input parameters can be obtained both from CGI parameters and
> PATH_INFO, we would like most of the code to be agnostic about the way
> parameters were retrieved. We thus collect all the parameters into the
> new %input_params hash, delaying validation after the collection is
> completed.

> @@ -672,6 +694,25 @@ sub href (%) {
>  ## ======================================================================
>  ## validation, quoting/unquoting and escaping
>  
> +sub validate_action {
> +	my $input = shift || return undef;
> +	return undef unless exists $actions{$action};
> +	return $action;
> +}

Shouldn't $action here be $input?

-- 
Shawn.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCHv2] gitweb: refactor input parameters parse/validation
  2008-10-10 15:01     ` Shawn O. Pearce
@ 2008-10-10 17:33       ` Giuseppe Bilotta
  2008-10-10 18:42       ` [PATCHv3] " Giuseppe Bilotta
  1 sibling, 0 replies; 11+ messages in thread
From: Giuseppe Bilotta @ 2008-10-10 17:33 UTC (permalink / raw)
  To: Shawn O. Pearce; +Cc: git, Jakub Narebski

On Fri, Oct 10, 2008 at 5:01 PM, Shawn O. Pearce <spearce@spearce.org> wrote:
> Giuseppe Bilotta <giuseppe.bilotta@gmail.com> wrote:
>> +sub validate_action {
>> +     my $input = shift || return undef;
>> +     return undef unless exists $actions{$action};
>> +     return $action;
>> +}
>
> Shouldn't $action here be $input?

Urgh. Yes, yes it should. Good catch. Should I resend?


-- 
Giuseppe "Oblomov" Bilotta

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCHv3] gitweb: refactor input parameters parse/validation
  2008-10-10 15:01     ` Shawn O. Pearce
  2008-10-10 17:33       ` Giuseppe Bilotta
@ 2008-10-10 18:42       ` Giuseppe Bilotta
  1 sibling, 0 replies; 11+ messages in thread
From: Giuseppe Bilotta @ 2008-10-10 18:42 UTC (permalink / raw)
  To: git; +Cc: Jakub Narebski, Shawn O. Pearce, Giuseppe Bilotta

Since input parameters can be obtained both from CGI parameters and
PATH_INFO, we would like most of the code to be agnostic about the way
parameters were retrieved. We thus collect all the parameters into the
new %input_params hash, delaying validation after the collection is
completed.

Although the kludge removal is minimal at the moment, it makes life much
easier for future expansions such as more extensive PATH_INFO use or
other form of input such as command-line support.

Signed-off-by: Giuseppe Bilotta <giuseppe.bilotta@gmail.com>
---
 gitweb/gitweb.perl |  315 +++++++++++++++++++++++++++++-----------------------
 1 files changed, 178 insertions(+), 137 deletions(-)

This resend fixes a bug spotted by Shwan Pearce, where the global $action
instead of the local $input was used for action validation. Although the bug
itself didn't have any effect because validate_action was called on $action
anyway, it's still a logical error that needed squashing.

diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl
index 1116800..c5254af 100755
--- a/gitweb/gitweb.perl
+++ b/gitweb/gitweb.perl
@@ -29,7 +29,9 @@ our $my_uri = $cgi->url(-absolute => 1);
 
 # if we're called with PATH_INFO, we have to strip that
 # from the URL to find our real URL
-if (my $path_info = $ENV{"PATH_INFO"}) {
+# we make $path_info global because it's also used later on
+my $path_info = $ENV{"PATH_INFO"};
+if ($path_info) {
 	$my_url =~ s,\Q$path_info\E$,,;
 	$my_uri =~ s,\Q$path_info\E$,,;
 }
@@ -428,34 +430,155 @@ $projects_list ||= $projectroot;
 
 # ======================================================================
 # input validation and dispatch
-our $action = $cgi->param('a');
+
+# input parameters can be collected from a variety of sources (presently, CGI
+# and PATH_INFO), so we define an %input_params hash that collects them all
+# together during validation: this allows subsequent uses (e.g. href()) to be
+# agnostic of the parameter origin
+
+my %input_params = ();
+
+# input parameters are stored with the long parameter name as key. This will
+# also be used in the href subroutine to convert parameters to their CGI
+# equivalent, and since the href() usage is the most frequent one, we store
+# the name -> CGI key mapping here, instead of the reverse.
+#
+# XXX: Warning: If you touch this, check the search form for updating,
+# too.
+
+my @cgi_param_mapping = (
+	project => "p",
+	action => "a",
+	file_name => "f",
+	file_parent => "fp",
+	hash => "h",
+	hash_parent => "hp",
+	hash_base => "hb",
+	hash_parent_base => "hpb",
+	page => "pg",
+	order => "o",
+	searchtext => "s",
+	searchtype => "st",
+	snapshot_format => "sf",
+	extra_options => "opt",
+	search_use_regexp => "sr",
+);
+my %cgi_param_mapping = @cgi_param_mapping;
+
+# we will also need to know the possible actions, for validation
+my %actions = (
+	"blame" => \&git_blame,
+	"blobdiff" => \&git_blobdiff,
+	"blobdiff_plain" => \&git_blobdiff_plain,
+	"blob" => \&git_blob,
+	"blob_plain" => \&git_blob_plain,
+	"commitdiff" => \&git_commitdiff,
+	"commitdiff_plain" => \&git_commitdiff_plain,
+	"commit" => \&git_commit,
+	"forks" => \&git_forks,
+	"heads" => \&git_heads,
+	"history" => \&git_history,
+	"log" => \&git_log,
+	"rss" => \&git_rss,
+	"atom" => \&git_atom,
+	"search" => \&git_search,
+	"search_help" => \&git_search_help,
+	"shortlog" => \&git_shortlog,
+	"summary" => \&git_summary,
+	"tag" => \&git_tag,
+	"tags" => \&git_tags,
+	"tree" => \&git_tree,
+	"snapshot" => \&git_snapshot,
+	"object" => \&git_object,
+	# those below don't need $project
+	"opml" => \&git_opml,
+	"project_list" => \&git_project_list,
+	"project_index" => \&git_project_index,
+);
+
+# finally, we have the hash of allowed extra_options for the commands that
+# allow them
+my %allowed_options = (
+	"--no-merges" => [ qw(rss atom log shortlog history) ],
+);
+
+# fill %input_params with the CGI parameters. All values except for 'opt'
+# should be single values, but opt can be an array. We should probably
+# build an array of parameters that can be multi-valued, but since for the time
+# being it's only this one, we just single it out
+while (my ($name, $symbol) = each %cgi_param_mapping) {
+	if ($symbol eq 'opt') {
+		$input_params{$name} = [ $cgi->param($symbol) ];
+	} else {
+		$input_params{$name} = $cgi->param($symbol);
+	}
+}
+
+# now read PATH_INFO and update the parameter list for missing parameters
+sub evaluate_path_info {
+	return if defined $input_params{'project'};
+	return if !$path_info;
+	$path_info =~ s,^/+,,;
+	return if !$path_info;
+
+	# find which part of PATH_INFO is project
+	my $project = $path_info;
+	$project =~ s,/+$,,;
+	while ($project && !check_head_link("$projectroot/$project")) {
+		$project =~ s,/*[^/]*$,,;
+	}
+	return unless $project;
+	$input_params{'project'} = $project;
+
+	# do not change any parameters if an action is given using the query string
+	return if $input_params{'action'};
+	$path_info =~ s,^\Q$project\E/*,,;
+
+	my ($refname, $pathname) = split(/:/, $path_info, 2);
+	if (defined $pathname) {
+		# we got "project.git/branch:filename" or "project.git/branch:dir/"
+		# we could use git_get_type(branch:pathname), but it needs $git_dir
+		$pathname =~ s,^/+,,;
+		if (!$pathname || substr($pathname, -1) eq "/") {
+			$input_params{'action'} = "tree";
+			$pathname =~ s,/$,,;
+		} else {
+			$input_params{'action'} = "blob_plain";
+		}
+		$input_params{'hash_base'} ||= $refname;
+		$input_params{'file_name'} ||= $pathname;
+	} elsif (defined $refname) {
+		# we got "project.git/branch"
+		$input_params{'action'} = "shortlog";
+		$input_params{'hash'} ||= $refname;
+	}
+}
+evaluate_path_info();
+
+our $action = $input_params{'action'};
 if (defined $action) {
-	if ($action =~ m/[^0-9a-zA-Z\.\-_]/) {
+	if (!validate_action($action)) {
 		die_error(400, "Invalid action parameter");
 	}
 }
 
 # parameters which are pathnames
-our $project = $cgi->param('p');
+our $project = $input_params{'project'};
 if (defined $project) {
-	if (!validate_pathname($project) ||
-	    !(-d "$projectroot/$project") ||
-	    !check_head_link("$projectroot/$project") ||
-	    ($export_ok && !(-e "$projectroot/$project/$export_ok")) ||
-	    ($strict_export && !project_in_list($project))) {
+	if (!validate_project($project)) {
 		undef $project;
 		die_error(404, "No such project");
 	}
 }
 
-our $file_name = $cgi->param('f');
+our $file_name = $input_params{'file_name'};
 if (defined $file_name) {
 	if (!validate_pathname($file_name)) {
 		die_error(400, "Invalid file parameter");
 	}
 }
 
-our $file_parent = $cgi->param('fp');
+our $file_parent = $input_params{'file_parent'};
 if (defined $file_parent) {
 	if (!validate_pathname($file_parent)) {
 		die_error(400, "Invalid file parent parameter");
@@ -463,44 +586,41 @@ if (defined $file_parent) {
 }
 
 # parameters which are refnames
-our $hash = $cgi->param('h');
+our $hash = $input_params{'hash'};
 if (defined $hash) {
 	if (!validate_refname($hash)) {
 		die_error(400, "Invalid hash parameter");
 	}
 }
 
-our $hash_parent = $cgi->param('hp');
+our $hash_parent = $input_params{'hash_parent'};
 if (defined $hash_parent) {
 	if (!validate_refname($hash_parent)) {
 		die_error(400, "Invalid hash parent parameter");
 	}
 }
 
-our $hash_base = $cgi->param('hb');
+our $hash_base = $input_params{'hash_base'};
 if (defined $hash_base) {
 	if (!validate_refname($hash_base)) {
 		die_error(400, "Invalid hash base parameter");
 	}
 }
 
-my %allowed_options = (
-	"--no-merges" => [ qw(rss atom log shortlog history) ],
-);
-
-our @extra_options = $cgi->param('opt');
-if (defined @extra_options) {
-	foreach my $opt (@extra_options) {
-		if (not exists $allowed_options{$opt}) {
-			die_error(400, "Invalid option parameter");
-		}
-		if (not grep(/^$action$/, @{$allowed_options{$opt}})) {
-			die_error(400, "Invalid option parameter for this action");
-		}
+our @extra_options = @{$input_params{'extra_options'}};
+# @extra_options is always defined, since it can only be (currently) set from
+# CGI, and $cgi->param() returns the empty array in array context if the param
+# is not set
+foreach my $opt (@extra_options) {
+	if (not exists $allowed_options{$opt}) {
+		die_error(400, "Invalid option parameter");
+	}
+	if (not grep(/^$action$/, @{$allowed_options{$opt}})) {
+		die_error(400, "Invalid option parameter for this action");
 	}
 }
 
-our $hash_parent_base = $cgi->param('hpb');
+our $hash_parent_base = $input_params{'hash_parent_base'};
 if (defined $hash_parent_base) {
 	if (!validate_refname($hash_parent_base)) {
 		die_error(400, "Invalid hash parent base parameter");
@@ -508,23 +628,23 @@ if (defined $hash_parent_base) {
 }
 
 # other parameters
-our $page = $cgi->param('pg');
+our $page = $input_params{'page'};
 if (defined $page) {
 	if ($page =~ m/[^0-9]/) {
 		die_error(400, "Invalid page parameter");
 	}
 }
 
-our $searchtype = $cgi->param('st');
+our $searchtype = $input_params{'searchtype'};
 if (defined $searchtype) {
 	if ($searchtype =~ m/[^a-z]/) {
 		die_error(400, "Invalid searchtype parameter");
 	}
 }
 
-our $search_use_regexp = $cgi->param('sr');
+our $search_use_regexp = $input_params{'search_use_regexp'};
 
-our $searchtext = $cgi->param('s');
+our $searchtext = $input_params{'searchtext'};
 our $search_regexp;
 if (defined $searchtext) {
 	if (length($searchtext) < 2) {
@@ -533,86 +653,11 @@ if (defined $searchtext) {
 	$search_regexp = $search_use_regexp ? $searchtext : quotemeta $searchtext;
 }
 
-# now read PATH_INFO and use it as alternative to parameters
-sub evaluate_path_info {
-	return if defined $project;
-	my $path_info = $ENV{"PATH_INFO"};
-	return if !$path_info;
-	$path_info =~ s,^/+,,;
-	return if !$path_info;
-	# find which part of PATH_INFO is project
-	$project = $path_info;
-	$project =~ s,/+$,,;
-	while ($project && !check_head_link("$projectroot/$project")) {
-		$project =~ s,/*[^/]*$,,;
-	}
-	# validate project
-	$project = validate_pathname($project);
-	if (!$project ||
-	    ($export_ok && !-e "$projectroot/$project/$export_ok") ||
-	    ($strict_export && !project_in_list($project))) {
-		undef $project;
-		return;
-	}
-	# do not change any parameters if an action is given using the query string
-	return if $action;
-	$path_info =~ s,^\Q$project\E/*,,;
-	my ($refname, $pathname) = split(/:/, $path_info, 2);
-	if (defined $pathname) {
-		# we got "project.git/branch:filename" or "project.git/branch:dir/"
-		# we could use git_get_type(branch:pathname), but it needs $git_dir
-		$pathname =~ s,^/+,,;
-		if (!$pathname || substr($pathname, -1) eq "/") {
-			$action  ||= "tree";
-			$pathname =~ s,/$,,;
-		} else {
-			$action  ||= "blob_plain";
-		}
-		$hash_base ||= validate_refname($refname);
-		$file_name ||= validate_pathname($pathname);
-	} elsif (defined $refname) {
-		# we got "project.git/branch"
-		$action ||= "shortlog";
-		$hash   ||= validate_refname($refname);
-	}
-}
-evaluate_path_info();
-
 # path to the current git repository
 our $git_dir;
 $git_dir = "$projectroot/$project" if $project;
 
 # dispatch
-my %actions = (
-	"blame" => \&git_blame,
-	"blobdiff" => \&git_blobdiff,
-	"blobdiff_plain" => \&git_blobdiff_plain,
-	"blob" => \&git_blob,
-	"blob_plain" => \&git_blob_plain,
-	"commitdiff" => \&git_commitdiff,
-	"commitdiff_plain" => \&git_commitdiff_plain,
-	"commit" => \&git_commit,
-	"forks" => \&git_forks,
-	"heads" => \&git_heads,
-	"history" => \&git_history,
-	"log" => \&git_log,
-	"rss" => \&git_rss,
-	"atom" => \&git_atom,
-	"search" => \&git_search,
-	"search_help" => \&git_search_help,
-	"shortlog" => \&git_shortlog,
-	"summary" => \&git_summary,
-	"tag" => \&git_tag,
-	"tags" => \&git_tags,
-	"tree" => \&git_tree,
-	"snapshot" => \&git_snapshot,
-	"object" => \&git_object,
-	# those below don't need $project
-	"opml" => \&git_opml,
-	"project_list" => \&git_project_list,
-	"project_index" => \&git_project_index,
-);
-
 if (!defined $action) {
 	if (defined $hash) {
 		$action = git_get_type($hash);
@@ -642,35 +687,12 @@ sub href (%) {
 	# default is to use -absolute url() i.e. $my_uri
 	my $href = $params{-full} ? $my_url : $my_uri;
 
-	# XXX: Warning: If you touch this, check the search form for updating,
-	# too.
-
-	my @mapping = (
-		project => "p",
-		action => "a",
-		file_name => "f",
-		file_parent => "fp",
-		hash => "h",
-		hash_parent => "hp",
-		hash_base => "hb",
-		hash_parent_base => "hpb",
-		page => "pg",
-		order => "o",
-		searchtext => "s",
-		searchtype => "st",
-		snapshot_format => "sf",
-		extra_options => "opt",
-		search_use_regexp => "sr",
-	);
-	my %mapping = @mapping;
-
 	$params{'project'} = $project unless exists $params{'project'};
 
 	if ($params{-replay}) {
-		while (my ($name, $symbol) = each %mapping) {
+		while (my ($name, $symbol) = each %cgi_param_mapping) {
 			if (!exists $params{$name}) {
-				# to allow for multivalued params we use arrayref form
-				$params{$name} = [ $cgi->param($symbol) ];
+				$params{$name} = $input_params{$name};
 			}
 		}
 	}
@@ -689,8 +711,8 @@ sub href (%) {
 
 	# now encode the parameters explicitly
 	my @result = ();
-	for (my $i = 0; $i < @mapping; $i += 2) {
-		my ($name, $symbol) = ($mapping[$i], $mapping[$i+1]);
+	for (my $i = 0; $i < @cgi_param_mapping; $i += 2) {
+		my ($name, $symbol) = ($cgi_param_mapping[$i], $cgi_param_mapping[$i+1]);
 		if (defined $params{$name}) {
 			if (ref($params{$name}) eq "ARRAY") {
 				foreach my $par (@{$params{$name}}) {
@@ -710,6 +732,25 @@ sub href (%) {
 ## ======================================================================
 ## validation, quoting/unquoting and escaping
 
+sub validate_action {
+	my $input = shift || return undef;
+	return undef unless exists $actions{$input};
+	return $input;
+}
+
+sub validate_project {
+	my $input = shift || return undef;
+	if (!validate_pathname($input) ||
+		!(-d "$projectroot/$input") ||
+		!check_head_link("$projectroot/$input") ||
+		($export_ok && !(-e "$projectroot/$input/$export_ok")) ||
+		($strict_export && !project_in_list($input))) {
+		return undef;
+	} else {
+		return $input;
+	}
+}
+
 sub validate_pathname {
 	my $input = shift || return undef;
 
@@ -4121,7 +4162,7 @@ sub git_search_grep_body {
 ## actions
 
 sub git_project_list {
-	my $order = $cgi->param('o');
+	my $order = $input_params{'order'};
 	if (defined $order && $order !~ m/none|project|descr|owner|age/) {
 		die_error(400, "Unknown order parameter");
 	}
@@ -4149,7 +4190,7 @@ sub git_project_list {
 }
 
 sub git_forks {
-	my $order = $cgi->param('o');
+	my $order = $input_params{'order'};
 	if (defined $order && $order !~ m/none|project|descr|owner|age/) {
 		die_error(400, "Unknown order parameter");
 	}
@@ -4697,7 +4738,7 @@ sub git_snapshot {
 	my @supported_fmts = gitweb_check_feature('snapshot');
 	@supported_fmts = filter_snapshot_fmts(@supported_fmts);
 
-	my $format = $cgi->param('sf');
+	my $format = $input_params{'snapshot_format'};
 	if (!@supported_fmts) {
 		die_error(403, "Snapshots not allowed");
 	}
-- 
1.5.6.5

^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2008-10-10 18:43 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-10-03 17:19 [PATCH] gitweb: refactor input parameters parse/validation Giuseppe Bilotta
2008-10-07 10:57 ` Jakub Narebski
2008-10-07 12:42   ` Giuseppe Bilotta
2008-10-07 14:39     ` Jakub Narebski
2008-10-08  9:10       ` Giuseppe Bilotta
2008-10-08  9:45         ` Jakub Narebski
2008-10-08  9:26   ` [PATCHv2] " Giuseppe Bilotta
2008-10-10  8:37     ` Jakub Narebski
2008-10-10 15:01     ` Shawn O. Pearce
2008-10-10 17:33       ` Giuseppe Bilotta
2008-10-10 18:42       ` [PATCHv3] " Giuseppe Bilotta

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).