#!/usr/bin/env ruby # Calculates sizes of repository at different commits in git # # 20060819 Initial release # 20060820 Pass arguments to git-rev-list # # (c)2006 R. Nijlunsing # License: LGPLv2 require 'set' require 'enumerator' if ARGV.empty? puts "Calculates sizes of repository at different commits" puts puts "Usage: #{$0} " puts "Example: #{$0} HEAD" exit 1 end class Sizes attr_reader :trees, :blobs, :bytes def initialize(trees, blobs, bytes); @trees = trees; @blobs = blobs; @bytes = bytes; end def add(o); @trees += o.trees; @blobs += o.blobs; @bytes += o.bytes; end end def tree_size(tree) return $sha2size[tree] if $sha2size.include?(tree) size = Sizes.new(1, 0, 0) blobs = [] # Blobs with unknown sizes File.popen("git cat-file -p #{tree}", "r") { |io| while line = io.gets line =~ %r{^[0-9]{6} ([a-z]+) ([0-9a-f]+)} type, sha1 = $1, $2 if $sha2size.include?(sha1) size.add($sha2size[sha1]) elsif type == "tree" size.add(tree_size(sha1)) elsif type == "blob" blobs << sha1 else raise type end end } if blobs.size > 0 # Do all _blobs_ at once. For this to help, git-cat-file should accept # more than one filename a time. blobs.each_slice(1) { |blobs_slice| File.popen("git cat-file -s #{blobs_slice.join(' ')}", "r") { |io| blobs_slice.each { |blob| blob_size = $sha2size[blob] = Sizes.new(0, 1, io.gets.to_i) size.add(blob_size) } } } end $sha2size[tree] = size end $sha2size = {} # SHA1 -> Sizes File.popen("git rev-list #{ARGV.join(' ')}", "r") do |cio| while commit = cio.gets tree = nil # Root tree of this commit commit = commit.chomp File.popen("git cat-file -p #{commit}", "r") do |io| while (line = io.gets) && !tree tree = $1 if line =~ %r{^tree ([a-f0-9]+)} end end if tree sizes = tree_size(tree) puts "#{commit} #{sizes.trees} #{sizes.blobs} #{sizes.bytes}" end end end