256 lines
6.5 KiB
Ruby
Executable File
256 lines
6.5 KiB
Ruby
Executable File
#!/usr/bin/env ruby
|
|
#
|
|
# the_long_tail
|
|
#
|
|
# A histogram view on contributor stats
|
|
#
|
|
# notes
|
|
#
|
|
# Since this script does not track file-renames in the git history, the
|
|
# dependence of Casks upon occasional contributors/non-maintainers can
|
|
# only be expressed as a range or lower bound.
|
|
#
|
|
|
|
###
|
|
### dependencies
|
|
###
|
|
|
|
require 'open3'
|
|
require 'set'
|
|
|
|
###
|
|
### configurable constants
|
|
###
|
|
|
|
BINS = [
|
|
(1..10).to_a,
|
|
100,
|
|
1000,
|
|
].flatten
|
|
|
|
OCCASIONAL_CUTOFF = 5
|
|
|
|
CASK_PATH = 'Casks'
|
|
|
|
# all maintainers, past and present
|
|
MAINTAINERS = %w[
|
|
paul.t.hinze@gmail.com
|
|
fanquake@users.noreply.github.com
|
|
fanquake@gmail.com
|
|
kevin@suttle.io
|
|
leoj3n@gmail.com
|
|
nano@fdp.io
|
|
nanoid.xd@gmail.com
|
|
me@passcod.name
|
|
walker@pobox.com
|
|
info@vitorgalvao.com
|
|
calebcenter@live.com
|
|
ndr@qef.io
|
|
josh@joshbutts.com
|
|
goxberry@gmail.com
|
|
radek.simko@gmail.com
|
|
federicobond@gmail.com
|
|
claui@users.noreply.github.com
|
|
amorymeltzer@gmail.com
|
|
hagins.josh@gmail.com
|
|
dragon.vctr@gmail.com
|
|
mail@sebastianroeder.de
|
|
github@adityadalal.com
|
|
adityadalal924@users.noreply.github.com
|
|
]
|
|
|
|
###
|
|
### git methods
|
|
###
|
|
|
|
def cd_to_project_root
|
|
Dir.chdir File.dirname(File.expand_path(__FILE__))
|
|
@git_root ||= Open3.popen3(*%w[
|
|
git rev-parse --show-toplevel
|
|
]) do |stdin, stdout, stderr|
|
|
begin
|
|
stdout.gets.chomp
|
|
rescue
|
|
end
|
|
end
|
|
Dir.chdir @git_root
|
|
@git_root
|
|
end
|
|
|
|
def authors
|
|
@authors ||= Open3.popen3(*%w[
|
|
git log --no-merges --format=%ae --
|
|
]) do |stdin, stdout, stderr|
|
|
h = {}
|
|
stdout.each_line do |line|
|
|
line.chomp!
|
|
h[line] ||= 0
|
|
h[line] += 1
|
|
end
|
|
h
|
|
end
|
|
end
|
|
|
|
def casks_by_author
|
|
@casks_by_author ||= Open3.popen3(*%w[
|
|
git log --no-merges --name-only --format=%ae --
|
|
],
|
|
CASK_PATH) do |stdin, stdout, stderr|
|
|
email = nil
|
|
h = {}
|
|
stdout.each_line.to_a.join('').split("\n\n").each do |paragraph|
|
|
if paragraph.include?('Casks/')
|
|
lines=paragraph.split("\n")
|
|
email = lines.pop
|
|
h[email] ||= Set.new
|
|
h[email].merge(lines.compact)
|
|
else
|
|
email = paragraph.chomp
|
|
end
|
|
end
|
|
h
|
|
end
|
|
end
|
|
|
|
###
|
|
### filesystem methods
|
|
###
|
|
|
|
def all_casks
|
|
@all_casks ||= Open3.popen3('/usr/bin/find',
|
|
CASK_PATH,
|
|
*%w[-type f -name *.rb]
|
|
) do |stdin, stdout, stderr|
|
|
stdout.each_line.map(&:chomp)
|
|
end
|
|
end
|
|
|
|
###
|
|
### analysis and report methods
|
|
###
|
|
|
|
def histogram
|
|
if @histogram.nil?
|
|
@histogram = Hash[*BINS.map{ |elt| [elt, 0] }.flatten]
|
|
authors.each do |name, num_commits|
|
|
bottom = 0
|
|
BINS.each do |top|
|
|
if num_commits >= bottom and num_commits < top
|
|
@histogram[bottom] += 1
|
|
end
|
|
bottom = top
|
|
end
|
|
end
|
|
end
|
|
@histogram
|
|
end
|
|
|
|
def historic_occasional_cask_set
|
|
@historic_occasional_cask_set = authors.each.collect do |name, num_commits|
|
|
if num_commits > OCCASIONAL_CUTOFF
|
|
nil
|
|
elsif ! casks_by_author.key?(name)
|
|
nil
|
|
else
|
|
casks_by_author[name].to_a
|
|
end
|
|
end.flatten.compact.to_set
|
|
end
|
|
|
|
def extant_occasional_cask_count
|
|
# avoid double-counting renames by intersecting with extant Casks
|
|
historic_occasional_cask_set.intersection(all_casks).count
|
|
end
|
|
|
|
def historic_nonmaintainer_cask_set
|
|
@historic_nonmaintainer_cask_set = authors.each.collect do |name, num_commits|
|
|
if MAINTAINERS.include?(name)
|
|
nil
|
|
else
|
|
casks_by_author[name].to_a
|
|
end
|
|
end.flatten.compact.to_set
|
|
end
|
|
|
|
def extant_nonmaintainer_cask_count
|
|
# avoid double-counting renames by intersecting with extant Casks
|
|
historic_nonmaintainer_cask_set.intersection(all_casks).count
|
|
end
|
|
|
|
def extant_occasional_cask_percentage
|
|
@extant_occasional_cask_percentage ||= (100 * extant_occasional_cask_count / all_casks.count).to_i
|
|
end
|
|
|
|
def historic_occasional_cask_percentage
|
|
@historic_occasional_cask_percentage ||= (100 * historic_occasional_cask_set.count / all_casks.count).to_i
|
|
end
|
|
|
|
def extant_nonmaintainer_cask_percentage
|
|
@extant_nonmaintainer_cask_percentage ||= (100 * extant_nonmaintainer_cask_count / all_casks.count).to_i
|
|
end
|
|
|
|
def historic_nonmaintainer_cask_percentage
|
|
# this is so large, it might cross 100%
|
|
@historic_nonmaintainer_cask_percentage ||= [100, (100 * historic_nonmaintainer_cask_set.count / all_casks.count).to_i].min
|
|
end
|
|
|
|
def onetime_author_percentage
|
|
@onetime_author_percentage ||= (100 *
|
|
histogram[1] /
|
|
authors.length).to_i
|
|
end
|
|
|
|
def occasional_author_percentage
|
|
# why is it so hard to slice a hash?
|
|
@occasional_author_percentage ||= (100 *
|
|
(1 .. OCCASIONAL_CUTOFF).to_a.collect{ |bin| histogram[bin] }.reduce(:+) /
|
|
authors.length).to_i
|
|
end
|
|
|
|
def graph_width
|
|
if @graph_width.nil?
|
|
@graph_width = `/bin/stty size 2>/dev/null`.chomp.split(" ").last.to_i
|
|
@graph_width = 80 if @graph_width <= 0
|
|
@graph_width -= 20 if @graph_width > 20
|
|
end
|
|
@graph_width
|
|
end
|
|
|
|
def graph_normalization
|
|
@graph_normalization ||= histogram.values.max.to_f
|
|
end
|
|
|
|
def print_header
|
|
puts "Commits\tContributors"
|
|
puts "---------------------"
|
|
end
|
|
|
|
def print_table
|
|
BINS.each do |bin|
|
|
plural = (bin % 10) == 0 ? "'s" : ''
|
|
graph = '.' * ((histogram[bin]/graph_normalization) * graph_width)
|
|
puts "#{bin}#{plural}\t#{histogram[bin]}\t#{graph}"
|
|
end
|
|
end
|
|
|
|
def print_footer
|
|
puts %Q[\n#{occasional_author_percentage}% of contributors are "occasional" (with <= #{OCCASIONAL_CUTOFF} commits)]
|
|
puts "\n#{onetime_author_percentage}% of contributors commit only once"
|
|
puts "\n#{extant_occasional_cask_percentage}% - #{historic_occasional_cask_percentage}% of Casks depend on an occasional contributor"
|
|
puts "\n#{extant_nonmaintainer_cask_percentage}% - #{historic_nonmaintainer_cask_percentage}% of Casks depend on a contributor who is not a maintainer"
|
|
puts "\n"
|
|
end
|
|
|
|
def generate_report
|
|
print_header
|
|
print_table
|
|
print_footer
|
|
end
|
|
|
|
###
|
|
### main
|
|
###
|
|
|
|
cd_to_project_root
|
|
generate_report
|