From: Martin Koegler <mkoegler@auto.tuwien.ac.at>
To: Jakub Narebski <jnareb@gmail.com>
Cc: git@vger.kernel.org, Martin Koegler <mkoegler@auto.tuwien.ac.at>
Subject: [PATCH] gitweb: handle non UTF-8 text
Date: Mon, 28 May 2007 22:47:34 +0200 [thread overview]
Message-ID: <1180385254576-git-send-email-mkoegler@auto.tuwien.ac.at> (raw)
gitweb assumes, that everything is in UTF-8. If a text contains invalid
UTF-8 character sequences, the text must be in a different encoding.
This patch interprets such a text as latin1.
Signed-off-by: Martin Koegler <mkoegler@auto.tuwien.ac.at>
---
For correct UTF-8, the patch does not change anything.
If commit/blob/... is not in UTF-8, it displays the text
with a very high probability correct.
As git itself is not aware of any encoding, I know no better
possibility to handle non UTF-8 text in gitweb.
gitweb/gitweb.perl | 27 +++++++++++++++++----------
1 files changed, 17 insertions(+), 10 deletions(-)
diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl
index c3921cb..dfd564d 100755
--- a/gitweb/gitweb.perl
+++ b/gitweb/gitweb.perl
@@ -621,12 +621,19 @@ sub esc_url {
return $str;
}
+sub my_decode_utf8 {
+ my $str = shift;
+ my $res;
+ eval { $res = decode_utf8 ($str, 1); };
+ return $res || decode('latin1', $str);
+}
+
# replace invalid utf8 character with SUBSTITUTION sequence
sub esc_html ($;%) {
my $str = shift;
my %opts = @_;
- $str = decode_utf8($str);
+ $str = my_decode_utf8($str);
$str = $cgi->escapeHTML($str);
if ($opts{'-nbsp'}) {
$str =~ s/ / /g;
@@ -640,7 +647,7 @@ sub esc_path {
my $str = shift;
my %opts = @_;
- $str = decode_utf8($str);
+ $str = my_decode_utf8($str);
$str = $cgi->escapeHTML($str);
if ($opts{'-nbsp'}) {
$str =~ s/ / /g;
@@ -925,7 +932,7 @@ sub format_subject_html {
if (length($short) < length($long)) {
return $cgi->a({-href => $href, -class => "list subject",
- -title => decode_utf8($long)},
+ -title => my_decode_utf8($long)},
esc_html($short) . $extra);
} else {
return $cgi->a({-href => $href, -class => "list subject"},
@@ -1239,7 +1246,7 @@ sub git_get_projects_list {
if (check_export_ok("$projectroot/$path")) {
my $pr = {
path => $path,
- owner => decode_utf8($owner),
+ owner => my_decode_utf8($owner),
};
push @list, $pr;
(my $forks_path = $path) =~ s/\.git$//;
@@ -1269,7 +1276,7 @@ sub git_get_project_owner {
$pr = unescape($pr);
$ow = unescape($ow);
if ($pr eq $project) {
- $owner = decode_utf8($ow);
+ $owner = my_decode_utf8($ow);
last;
}
}
@@ -1759,7 +1766,7 @@ sub get_file_owner {
}
my $owner = $gcos;
$owner =~ s/[,;].*$//;
- return decode_utf8($owner);
+ return my_decode_utf8($owner);
}
## ......................................................................
@@ -1842,7 +1849,7 @@ sub git_header_html {
my $title = "$site_name";
if (defined $project) {
- $title .= " - " . decode_utf8($project);
+ $title .= " - " . my_decode_utf8($project);
if (defined $action) {
$title .= "/$action";
if (defined $file_name) {
@@ -2116,7 +2123,7 @@ sub git_print_page_path {
print "<div class=\"page_path\">";
print $cgi->a({-href => href(action=>"tree", hash_base=>$hb),
- -title => 'tree root'}, decode_utf8("[$project]"));
+ -title => 'tree root'}, my_decode_utf8("[$project]"));
print " / ";
if (defined $name) {
my @dirname = split '/', $name;
@@ -2936,7 +2943,7 @@ sub git_project_list_body {
($pr->{'age'}, $pr->{'age_string'}) = @aa;
if (!defined $pr->{'descr'}) {
my $descr = git_get_project_description($pr->{'path'}) || "";
- $pr->{'descr_long'} = decode_utf8($descr);
+ $pr->{'descr_long'} = my_decode_utf8($descr);
$pr->{'descr'} = chop_str($descr, 25, 5);
}
if (!defined $pr->{'owner'}) {
@@ -3981,7 +3988,7 @@ sub git_snapshot {
my $git = git_cmd_str();
my $name = $project;
$name =~ s/\047/\047\\\047\047/g;
- my $filename = decode_utf8(basename($project));
+ my $filename = my_decode_utf8(basename($project));
my $cmd;
if ($suffix eq 'zip') {
$filename .= "-$hash.$suffix";
--
1.5.2.846.g9a144
next reply other threads:[~2007-05-28 20:47 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-05-28 20:47 Martin Koegler [this message]
2007-05-28 23:21 ` [PATCH] gitweb: handle non UTF-8 text Petr Baudis
2007-05-29 9:21 ` Jakub Narebski
2007-05-29 21:55 ` Martin Koegler
2007-05-30 20:18 ` Robin Rosenberg
2007-06-01 21:05 ` Jakub Narebski
2007-06-02 22:15 ` Junio C Hamano
2007-06-03 15:42 ` Jakub Narebski
2007-06-03 18:41 ` Alexandre Julliard
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1180385254576-git-send-email-mkoegler@auto.tuwien.ac.at \
--to=mkoegler@auto.tuwien.ac.at \
--cc=git@vger.kernel.org \
--cc=jnareb@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).