git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] gitweb: handle non UTF-8 text
@ 2007-05-28 20:47 Martin Koegler
  2007-05-28 23:21 ` Petr Baudis
  0 siblings, 1 reply; 9+ messages in thread
From: Martin Koegler @ 2007-05-28 20:47 UTC (permalink / raw)
  To: Jakub Narebski; +Cc: git, Martin Koegler

gitweb assumes, that everything is in UTF-8. If a text contains invalid
UTF-8 character sequences, the text must be in a different encoding.

This patch interprets such a text as latin1.

Signed-off-by: Martin Koegler <mkoegler@auto.tuwien.ac.at>
---
For correct UTF-8, the patch does not change anything.

If commit/blob/... is not in UTF-8, it displays the text
with a very high probability correct. 

As git itself is not aware of any encoding, I know no better
possibility to handle non UTF-8 text in gitweb.

 gitweb/gitweb.perl |   27 +++++++++++++++++----------
 1 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl
index c3921cb..dfd564d 100755
--- a/gitweb/gitweb.perl
+++ b/gitweb/gitweb.perl
@@ -621,12 +621,19 @@ sub esc_url {
 	return $str;
 }
 
+sub my_decode_utf8 {
+	my $str = shift;
+	my $res;
+	eval { $res = decode_utf8 ($str, 1); };
+	return $res || decode('latin1', $str);
+}
+
 # replace invalid utf8 character with SUBSTITUTION sequence
 sub esc_html ($;%) {
 	my $str = shift;
 	my %opts = @_;
 
-	$str = decode_utf8($str);
+	$str = my_decode_utf8($str);
 	$str = $cgi->escapeHTML($str);
 	if ($opts{'-nbsp'}) {
 		$str =~ s/ /&nbsp;/g;
@@ -640,7 +647,7 @@ sub esc_path {
 	my $str = shift;
 	my %opts = @_;
 
-	$str = decode_utf8($str);
+	$str = my_decode_utf8($str);
 	$str = $cgi->escapeHTML($str);
 	if ($opts{'-nbsp'}) {
 		$str =~ s/ /&nbsp;/g;
@@ -925,7 +932,7 @@ sub format_subject_html {
 
 	if (length($short) < length($long)) {
 		return $cgi->a({-href => $href, -class => "list subject",
-		                -title => decode_utf8($long)},
+		                -title => my_decode_utf8($long)},
 		       esc_html($short) . $extra);
 	} else {
 		return $cgi->a({-href => $href, -class => "list subject"},
@@ -1239,7 +1246,7 @@ sub git_get_projects_list {
 			if (check_export_ok("$projectroot/$path")) {
 				my $pr = {
 					path => $path,
-					owner => decode_utf8($owner),
+					owner => my_decode_utf8($owner),
 				};
 				push @list, $pr;
 				(my $forks_path = $path) =~ s/\.git$//;
@@ -1269,7 +1276,7 @@ sub git_get_project_owner {
 			$pr = unescape($pr);
 			$ow = unescape($ow);
 			if ($pr eq $project) {
-				$owner = decode_utf8($ow);
+				$owner = my_decode_utf8($ow);
 				last;
 			}
 		}
@@ -1759,7 +1766,7 @@ sub get_file_owner {
 	}
 	my $owner = $gcos;
 	$owner =~ s/[,;].*$//;
-	return decode_utf8($owner);
+	return my_decode_utf8($owner);
 }
 
 ## ......................................................................
@@ -1842,7 +1849,7 @@ sub git_header_html {
 
 	my $title = "$site_name";
 	if (defined $project) {
-		$title .= " - " . decode_utf8($project);
+		$title .= " - " . my_decode_utf8($project);
 		if (defined $action) {
 			$title .= "/$action";
 			if (defined $file_name) {
@@ -2116,7 +2123,7 @@ sub git_print_page_path {
 
 	print "<div class=\"page_path\">";
 	print $cgi->a({-href => href(action=>"tree", hash_base=>$hb),
-	              -title => 'tree root'}, decode_utf8("[$project]"));
+	              -title => 'tree root'}, my_decode_utf8("[$project]"));
 	print " / ";
 	if (defined $name) {
 		my @dirname = split '/', $name;
@@ -2936,7 +2943,7 @@ sub git_project_list_body {
 		($pr->{'age'}, $pr->{'age_string'}) = @aa;
 		if (!defined $pr->{'descr'}) {
 			my $descr = git_get_project_description($pr->{'path'}) || "";
-			$pr->{'descr_long'} = decode_utf8($descr);
+			$pr->{'descr_long'} = my_decode_utf8($descr);
 			$pr->{'descr'} = chop_str($descr, 25, 5);
 		}
 		if (!defined $pr->{'owner'}) {
@@ -3981,7 +3988,7 @@ sub git_snapshot {
 	my $git = git_cmd_str();
 	my $name = $project;
 	$name =~ s/\047/\047\\\047\047/g;
-	my $filename = decode_utf8(basename($project));
+	my $filename = my_decode_utf8(basename($project));
 	my $cmd;
 	if ($suffix eq 'zip') {
 		$filename .= "-$hash.$suffix";
-- 
1.5.2.846.g9a144

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2007-06-03 18:41 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-05-28 20:47 [PATCH] gitweb: handle non UTF-8 text Martin Koegler
2007-05-28 23:21 ` Petr Baudis
2007-05-29  9:21   ` Jakub Narebski
2007-05-29 21:55     ` Martin Koegler
2007-05-30 20:18       ` Robin Rosenberg
2007-06-01 21:05       ` Jakub Narebski
2007-06-02 22:15         ` Junio C Hamano
2007-06-03 15:42           ` Jakub Narebski
2007-06-03 18:41             ` Alexandre Julliard

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).