Skip to content
This repository was archived by the owner on Apr 20, 2022. It is now read-only.

Commit 0a18af8

Browse files
committed
Initial public release.
1 parent 2d9fd34 commit 0a18af8

17 files changed

+313
-0
lines changed

.gitignore

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
*.gem
2+
*.rbc
3+
.bundle
4+
.config
5+
.yardoc
6+
_yardoc
7+
coverage
8+
doc/
9+
InstalledFiles
10+
lib/bundler/man
11+
pkg
12+
rdoc
13+
spec/reports
14+
test/tmp
15+
test/version_tmp
16+
tmp

.ruby-gemset

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
git_commit_size_scraper

.ruby-version

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ruby-2.0.0-p247

.travis.yml

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
rvm:
2+
- 1.9.3
3+
- 2.0.0
4+
branches:
5+
only:
6+
- master

AUTHORS

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
LICENSE

COPYING

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
LICENSE

Gemfile

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
source 'https://rubygems.org'
2+
gemspec

Gemfile.lock

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
PATH
2+
remote: .
3+
specs:
4+
git_commit_size_scraper (1.0.0)
5+
statistical_array (~> 1.0)
6+
7+
GEM
8+
remote: https://rubygems.org/
9+
specs:
10+
rake (10.1.0)
11+
statistical_array (1.0.0)
12+
13+
PLATFORMS
14+
ruby
15+
16+
DEPENDENCIES
17+
bundler (~> 1.3)
18+
git_commit_size_scraper!
19+
rake

LICENSE

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
Copyright (c) 2012, 2013 Todd A. Jacobs
2+
3+
This program is free software: you can redistribute it and/or modify it under
4+
the terms of the GNU General Public License as published by the Free Software
5+
Foundation, either version 3 of the License, or (at your option) any later
6+
version.
7+
8+
This program is distributed in the hope that it will be useful, but WITHOUT ANY
9+
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
10+
PARTICULAR PURPOSE. See the GNU General Public License for more details.
11+
12+
You should have received a copy of the GNU General Public License along with
13+
this program. If not, see <http://www.gnu.org/licenses/>.

README.md

+93
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# Git Commit Size Scraper
2+
3+
## Copyright and Licensing
4+
5+
### Copyright Notice
6+
7+
The copyright for the software, documentation, and associated files are
8+
held by the author.
9+
10+
Copyright 2012, 2013 Todd A. Jacobs
11+
All rights reserved.
12+
13+
The AUTHORS file is also included in the source tree.
14+
15+
### Software License
16+
17+
![GPLv3 Logo](http://www.gnu.org/graphics/gplv3-88x31.png)
18+
19+
The software is licensed under the
20+
[GPLv3](http://www.gnu.org/copyleft/gpl.html). The LICENSE file is
21+
included in the source tree.
22+
23+
### README License
24+
25+
![Creative Commons BY-NC-SA
26+
Logo](http://i.creativecommons.org/l/by-nc-sa/3.0/us/88x31.png)
27+
28+
This README is licensed under the [Creative Commons
29+
Attribution-NonCommercial-ShareAlike 3.0 United States
30+
License](http://creativecommons.org/licenses/by-nc-sa/3.0/us/).
31+
32+
## Purpose
33+
34+
Git doesn't provide an easy way to determine the actual size of a given
35+
commit. However, it is possible to walk Git history to estimate the
36+
commit size in bytes by summing the objects associated with the commit.
37+
38+
## Caveats
39+
40+
- Currently measures blobs only; other object types are considered
41+
"overhead."
42+
- Overhead from tree objects and commit objects are not currently
43+
included in the per-commit totals.
44+
- Reports actual object size in bytes; does not take space savings from
45+
deltification or the packfile format into account.
46+
47+
## Installation and Setup
48+
49+
gem install git_commit_size_scraper
50+
51+
## Usage
52+
53+
# Accepts most arguments understood by git-rev-list(1).
54+
git_commit_size_scraper [args]
55+
56+
## Examples
57+
58+
No screenshots here, just samples of what you can expect to see on
59+
standard output when you run the program.
60+
61+
### Command Line
62+
63+
$> git_commit_size_scraper --max-count=10
64+
Walking 10 commits ...
65+
{"f60d13b456162bb3b2364336e517f3f16c900693"=>28942,
66+
"c1194d24f71f9f8ff9aa1fd83337a70d7c233481"=>28813,
67+
"0142283e2b014f5206bba70a7d6e695096b4da1c"=>28789,
68+
"891773ec2a448992e19cb1d5b3d894abee88ce23"=>28563,
69+
"9e5c5dcd87b542444b17388ea86cb34bcdca29ba"=>28520,
70+
"b2f0faf4fca0ccbf26388b87cd9aa49e6e6149a4"=>28344,
71+
"0880b036e42c2e8bbf4ddbf8c540390c12cc83cb"=>28211,
72+
"f2160f0a5f48424eb1b5d7a8292f814b293191ec"=>28157,
73+
"01256264e64949b2ba1aa61a513d633c9bee7157"=>27757,
74+
"5d52891ab5ac19efaf1f8f1a0f596e954d0e9dd8"=>26647}
75+
76+
Average: 28,432.0 bytes
77+
Median : 28,274.3 bytes
78+
Std Dev: 693.07 bytes
79+
Total : 282,743 bytes
80+
81+
### REPL
82+
83+
require 'git_commit_size_scraper/scraper'
84+
include GitCommitSizeScraper
85+
86+
git = Scraper.new '-n10'
87+
git.walk
88+
p "Total bytes committed: %d" % git.list.values.compact.reduce(:+)
89+
# => "Total bytes committed: 282743"
90+
91+
----
92+
93+
[Project Home Page](https://github.com/CodeGnome/git_commit_size_scraper)

Rakefile

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
require "bundler/gem_tasks"

bin/git_commit_size_scraper

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#!/usr/bin/env ruby
2+
3+
require_relative File.join '..', 'lib', 'git_commit_size_scraper'
4+
5+
include GitCommitSizeScraper
6+
include StatisticalArray
7+
8+
def numsep num
9+
p = /(?<=\d)(?=(?:\d{3})+(\.\d+)?\z)/
10+
num.round(2).to_s.gsub p, ?,
11+
end
12+
13+
def report values
14+
values = StatsArray.new values
15+
16+
puts
17+
puts "Average: %s bytes" % numsep(values.median)
18+
puts "Median : %s bytes" % numsep(values.avg)
19+
puts "Std Dev: %s bytes" % numsep(values.std_dev.round(2))
20+
puts "Total : %s bytes" % numsep(values.sum)
21+
end
22+
23+
git = Scraper.new ARGV
24+
count = git.commits.count
25+
26+
puts "Walking #{count} commits ..."
27+
git.walk
28+
pp git.list
29+
report git.list.values.map(&:to_i)

git_commit_size_scraper.gemspec

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# coding: utf-8
2+
lib = File.expand_path('../lib', __FILE__)
3+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4+
require 'git_commit_size_scraper/version'
5+
6+
Gem::Specification.new do |spec|
7+
spec.name = "git_commit_size_scraper"
8+
spec.version = GitCommitSizeScraper::VERSION
9+
spec.authors = ["Todd A. Jacobs"]
10+
spec.email = ["[email protected]"]
11+
spec.description = "Walk Git history to calculate commit size in bytes."
12+
spec.summary = ""
13+
spec.homepage = "https://github.com/CodeGnome/git_commit_size_scraper"
14+
spec.license = "GPLv3"
15+
16+
spec.files = `git ls-files`.split($/)
17+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19+
spec.require_paths = ["lib"]
20+
21+
spec.add_dependency "statistical_array", "~> 1.0"
22+
23+
spec.add_development_dependency "bundler", "~> 1.3"
24+
spec.add_development_dependency "rake"
25+
end

lib/git_commit_size_scraper.foo

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#!/usr/bin/env ruby
2+
3+
class GitCommitSize
4+
attr_reader :list
5+
6+
def initialize args=nil
7+
@commits = `git rev-list --all #{args}`.split
8+
@list = {}
9+
end
10+
11+
def blobs_in_commit treeish
12+
`git diff-tree -r -c -M -C --no-commit-id #{treeish}`.
13+
split("\n").
14+
map { |blob| blob.split[3] }
15+
end
16+
17+
def size_in_bytes obj
18+
return 0 if obj =~ /\A0+\z/
19+
`git cat-file -s #{obj}`.to_i
20+
end
21+
22+
def bytes_in_commit treeish
23+
blobs = blobs_in_commit treeish
24+
total = blobs.map { |blob| size_in_bytes blob }.reduce(:+)
25+
end
26+
27+
def walk
28+
@commits.map { |c| @list[c] = bytes_in_commit c }
29+
end
30+
31+
def sorted
32+
@list.reject { |k, v| v.nil? }.sort_by { |k,v| v }
33+
end
34+
35+
def avg
36+
(@list.values.compact.reduce(:+).to_f / @commits.count).round
37+
end
38+
39+
def median
40+
list = @list.values.map(&:to_i).sort
41+
n = list.count
42+
case @list.count.odd?
43+
when true
44+
@list[(n+1)/2]
45+
when false
46+
( list[n/2-1] + list[n/2] ) / 2.0
47+
end
48+
end
49+
end
50+
51+
if __FILE__ == $0
52+
git = GitCommitSize.new
53+
count = git.commits.count
54+
puts "Walking #{count} commits."
55+
git.walk
56+
result = git.sorted
57+
numsep = /(?<=\d)(?=(?:\d{3})+(\.\d+)?\z)/
58+
pp result
59+
puts "Average: %s bytes" % git.avg.to_s.gsub(numsep, ?,)
60+
end

lib/git_commit_size_scraper.rb

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/usr/bin/env ruby
2+
3+
require 'pp'
4+
require 'bundler/setup'
5+
require 'statistical_array'
6+
require 'git_commit_size_scraper/version'
7+
require 'git_commit_size_scraper/scraper'
8+
9+
if __FILE__ == $0
10+
load File.join(__dir__, '..', 'bin', 'git_commit_size_scraper')
11+
end
+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
module GitCommitSizeScraper
2+
class Scraper
3+
attr_reader :commits, :list
4+
5+
def initialize *args
6+
args.flatten!
7+
@commits = `git rev-list --all #{args.join " "}`.split
8+
@list = {}
9+
end
10+
11+
def blobs_in_commit treeish
12+
`git diff-tree -r -c -M -C --no-commit-id #{treeish}`.
13+
split("\n").
14+
map { |blob| blob.split[3] }
15+
end
16+
17+
def size_in_bytes obj
18+
return 0 if obj =~ /\A0+\z/
19+
`git cat-file -s #{obj}`.to_i
20+
end
21+
22+
def bytes_in_commit treeish
23+
blobs = blobs_in_commit treeish
24+
total = blobs.map { |blob| size_in_bytes blob }.reduce(:+)
25+
end
26+
27+
def walk
28+
@commits.map { |c| @list[c] = bytes_in_commit c }
29+
end
30+
end
31+
end
+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
module GitCommitSizeScraper
2+
VERSION = "1.0.0"
3+
end

0 commit comments

Comments
 (0)