Git development
 help / color / mirror / Atom feed
From: Luben Tuikov <ltuikov@yahoo.com>
To: git@vger.kernel.org
Subject: [PATCH] gitweb.cgi: Use File::MMagic; "a=blob" action knows the blob/file type
Date: Fri, 7 Jul 2006 21:10:21 -0700 (PDT)	[thread overview]
Message-ID: <20060708041021.24704.qmail@web31804.mail.mud.yahoo.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 775 bytes --]

Use File::MMagic to determine the MIME type of a blob/file.
The variable magic_mime_file holds the location of the
"magic.mime" file, usually "/usr/share/file/magic.mime".
If not defined, the magic numbers internally stored in the
File::MMagic module are used.

Action "blob" knows the file type: if the file type is
not "text/*" then action "blob" defaults to "blob_plain",
i.e. the file is downloaded raw for the browser to interpret.
If the file type is "text/*", then "blob" defaults to the
current "cat -n"-like output, from which you can click
"plain", to get the "blob_plain" output.

Signed-off-by: Luben Tuikov <ltuikov@yahoo.com>
---
 gitweb/gitweb.cgi |  140 +++++++++++++++++++++--------------------------------
 1 files changed, 56 insertions(+), 84 deletions(-)

[-- Attachment #2: pat785262450 --]
[-- Type: text/plain, Size: 5173 bytes --]

diff --git a/gitweb/gitweb.cgi b/gitweb/gitweb.cgi
index cce0753..6798990 100755
--- a/gitweb/gitweb.cgi
+++ b/gitweb/gitweb.cgi
@@ -14,6 +14,8 @@ use CGI::Util qw(unescape);
 use CGI::Carp qw(fatalsToBrowser);
 use Encode;
 use Fcntl ':mode';
+use File::MMagic;
+use FileHandle;
 binmode STDOUT, ':utf8';
 
 our $cgi = new CGI;
@@ -54,9 +56,15 @@ #our $projects_list = $projectroot;
 our $projects_list = "index/index.aux";
 
 # default blob_plain mimetype and default charset for text/plain blob
-our $default_blob_plain_mimetype = 'text/plain';
+our $default_blob_mimetype = 'text/plain';
 our $default_text_plain_charset  = undef;
 
+# magic_mime_file: if defined this file will be used by File::MMagic
+# to guess the file type, else the magic numbers stored internally
+# in File::MMagic will be used.  Either relative or absolute name
+# can be given.  E.g. "/usr/share/file/magic.mime".
+our $magic_mime_file = undef;
+
 # file to use for guessing MIME types before trying /etc/mime.types
 # (relative to the current git repository)
 our $mimetypes_file = undef;
@@ -1455,11 +1463,58 @@ sub git_get_hash_by_path {
 	}
 }
 
+#
+# Strangely enough the File::MMagic package, version 1.27, has a bug
+# whereby reading from a piped filehandle (e.g. STDIN, or "-|") always
+# returns 'text/plain', but reading from a file on a file system (as it
+# would be the case for the checktype_filename() method) properly
+# determines the file type.
+#
+sub get_blob_mimetype {
+	my $blob_file = "$git_temp/blob-$hash";
+	if (! -r $blob_file) {
+	    open my $fd_in, "-|", "$gitbin/git-cat-file blob $hash" or return $default_blob_mimetype;
+	    open my $fd_out, "> $blob_file";
+	    my @file = <$fd_in>;
+	    print $fd_out @file;
+	    close $fd_out;
+	    close $fd_in;
+	}
+	my $mm = $magic_mime_file ? File::MMagic->new($magic_mime_file) : new File::MMagic;
+	my $mime = $mm->checktype_filename($blob_file);
+	return $mime;
+}
+
+sub git_blob_plain {
+	open my $fd, "-|", "$gitbin/git-cat-file blob $hash" or return;
+	my $mimetype = get_blob_mimetype();
+
+	# save as filename, even when no $file_name is given
+	my $save_as = "$hash";
+	if (defined $file_name) {
+		$save_as = $file_name;
+	} elsif ($mimetype =~ m/^text\//) {
+		$save_as .= '.txt';
+	}
+
+	print $cgi->header(-type => "$mimetype", '-content-disposition' => "inline; filename=\"$save_as\"");
+	undef $/;
+	binmode STDOUT, ':raw';
+	print <$fd>;
+	binmode STDOUT, ':utf8'; # as set at the beginning of gitweb.cgi
+	$/ = "\n";
+	close $fd;
+}
+
 sub git_blob {
 	if (!defined $hash && defined $file_name) {
 		my $base = $hash_base || git_read_head($project);
 		$hash = git_get_hash_by_path($base, $file_name, "blob") || die_error(undef, "Error lookup file.");
 	}
+	my $mimetype = get_blob_mimetype();
+	if ($mimetype !~ m/^text\//) {
+		return git_blob_plain();
+	}
 	my $have_blame = git_get_project_config_bool ('blame');
 	open my $fd, "-|", "$gitbin/git-cat-file blob $hash" or die_error(undef, "Open failed.");
 	git_header_html();
@@ -1510,89 +1565,6 @@ sub git_blob {
 	git_footer_html();
 }
 
-sub mimetype_guess_file {
-	my $filename = shift;
-	my $mimemap = shift;
-	-r $mimemap or return undef;
-
-	my %mimemap;
-	open(MIME, $mimemap) or return undef;
-	while (<MIME>) {
-		my ($mime, $exts) = split(/\t+/);
-		my @exts = split(/\s+/, $exts);
-		foreach my $ext (@exts) {
-			$mimemap{$ext} = $mime;
-		}
-	}
-	close(MIME);
-
-	$filename =~ /\.(.*?)$/;
-	return $mimemap{$1};
-}
-
-sub mimetype_guess {
-	my $filename = shift;
-	my $mime;
-	$filename =~ /\./ or return undef;
-
-	if ($mimetypes_file) {
-		my $file = $mimetypes_file;
-		#$file =~ m#^/# or $file = "$projectroot/$path/$file";
-		$mime = mimetype_guess_file($filename, $file);
-	}
-	$mime ||= mimetype_guess_file($filename, '/etc/mime.types');
-	return $mime;
-}
-
-sub git_blob_plain_mimetype {
-	my $fd = shift;
-	my $filename = shift;
-
-	# just in case
-	return $default_blob_plain_mimetype unless $fd;
-
-	if ($filename) {
-		my $mime = mimetype_guess($filename);
-		$mime and return $mime;
-	}
-
-	if (-T $fd) {
-		return 'text/plain' .
-		       ($default_text_plain_charset ? '; charset='.$default_text_plain_charset : '');
-	} elsif (! $filename) {
-		return 'application/octet-stream';
-	} elsif ($filename =~ m/\.png$/i) {
-		return 'image/png';
-	} elsif ($filename =~ m/\.gif$/i) {
-		return 'image/gif';
-	} elsif ($filename =~ m/\.jpe?g$/i) {
-		return 'image/jpeg';
-	} else {
-		return 'application/octet-stream';
-	}
-}
-
-sub git_blob_plain {
-	open my $fd, "-|", "$gitbin/git-cat-file blob $hash" or return;
-	my $type = git_blob_plain_mimetype($fd, $file_name);
-
-	# save as filename, even when no $file_name is given
-	my $save_as = "$hash";
-	if (defined $file_name) {
-		$save_as = $file_name;
-	} elsif ($type =~ m/^text\//) {
-		$save_as .= '.txt';
-	}
-
-	print $cgi->header(-type => "$type", '-content-disposition' => "inline; filename=\"$save_as\"");
-	undef $/;
-	binmode STDOUT, ':raw';
-	print <$fd>;
-	binmode STDOUT, ':utf8'; # as set at the beginning of gitweb.cgi
-	$/ = "\n";
-	close $fd;
-}
-
 sub git_tree {
 	if (!defined $hash) {
 		$hash = git_read_head($project);
-- 
1.4.1.g2f3c


             reply	other threads:[~2006-07-08  4:10 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-07-08  4:10 Luben Tuikov [this message]
2006-07-08  6:18 ` [PATCH] gitweb.cgi: Use File::MMagic; "a=blob" action knows the blob/file type Junio C Hamano
2006-07-09  1:17   ` Luben Tuikov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20060708041021.24704.qmail@web31804.mail.mud.yahoo.com \
    --to=ltuikov@yahoo.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox