253 lines
6.4 KiB
Plaintext
253 lines
6.4 KiB
Plaintext
|
#!/usr/bin/env ruby
|
||
|
#
|
||
|
# the_long_tail
|
||
|
#
|
||
|
# A histogram view on contributor stats
|
||
|
#
|
||
|
# notes
|
||
|
#
|
||
|
# Since this script does not track file-renames in the git history, the
|
||
|
# dependence of Casks upon occasional contributors/non-maintainers can
|
||
|
# only be expressed as a range or lower bound.
|
||
|
#
|
||
|
|
||
|
###
|
||
|
### dependencies
|
||
|
###
|
||
|
|
||
|
require "open3"
|
||
|
require "set"
|
||
|
|
||
|
###
|
||
|
### configurable constants
|
||
|
###
|
||
|
|
||
|
BINS = [
|
||
|
(1..10).to_a,
|
||
|
100,
|
||
|
1000,
|
||
|
].flatten
|
||
|
|
||
|
OCCASIONAL_CUTOFF = 5
|
||
|
|
||
|
CASK_PATH = "Casks".freeze
|
||
|
|
||
|
# all maintainers, past and present
|
||
|
MAINTAINERS = %w[
|
||
|
paul.t.hinze@gmail.com
|
||
|
fanquake@users.noreply.github.com
|
||
|
fanquake@gmail.com
|
||
|
kevin@suttle.io
|
||
|
leoj3n@gmail.com
|
||
|
nano@fdp.io
|
||
|
nanoid.xd@gmail.com
|
||
|
me@passcod.name
|
||
|
walker@pobox.com
|
||
|
info@vitorgalvao.com
|
||
|
calebcenter@live.com
|
||
|
ndr@qef.io
|
||
|
josh@joshbutts.com
|
||
|
goxberry@gmail.com
|
||
|
radek.simko@gmail.com
|
||
|
federicobond@gmail.com
|
||
|
claui@users.noreply.github.com
|
||
|
amorymeltzer@gmail.com
|
||
|
hagins.josh@gmail.com
|
||
|
dragon.vctr@gmail.com
|
||
|
mail@sebastianroeder.de
|
||
|
github@adityadalal.com
|
||
|
adityadalal924@users.noreply.github.com
|
||
|
].freeze
|
||
|
|
||
|
###
|
||
|
### git methods
|
||
|
###
|
||
|
|
||
|
def cd_to_project_root
|
||
|
Dir.chdir File.dirname(File.expand_path(__FILE__))
|
||
|
@git_root ||= Open3.popen3(*%w[
|
||
|
git rev-parse --show-toplevel
|
||
|
]) do |_stdin, stdout, _stderr|
|
||
|
begin
|
||
|
stdout.gets.chomp
|
||
|
rescue
|
||
|
end
|
||
|
end
|
||
|
Dir.chdir @git_root
|
||
|
@git_root
|
||
|
end
|
||
|
|
||
|
def authors
|
||
|
@authors ||= Open3.popen3(*%w[
|
||
|
git log --no-merges --format=%ae --
|
||
|
]) do |_stdin, stdout, _stderr|
|
||
|
h = {}
|
||
|
stdout.each_line do |line|
|
||
|
line.chomp!
|
||
|
h[line] ||= 0
|
||
|
h[line] += 1
|
||
|
end
|
||
|
h
|
||
|
end
|
||
|
end
|
||
|
|
||
|
def casks_by_author
|
||
|
@casks_by_author ||= Open3.popen3(*%w[
|
||
|
git log --no-merges --name-only --format=%ae --
|
||
|
],
|
||
|
CASK_PATH) do |_stdin, stdout, _stderr|
|
||
|
email = nil
|
||
|
h = {}
|
||
|
stdout.each_line.to_a.join("").split("\n\n").each do |paragraph|
|
||
|
if paragraph.include?("Casks/")
|
||
|
lines = paragraph.split("\n")
|
||
|
email = lines.pop
|
||
|
h[email] ||= Set.new
|
||
|
h[email].merge(lines.compact)
|
||
|
else
|
||
|
email = paragraph.chomp
|
||
|
end
|
||
|
end
|
||
|
h
|
||
|
end
|
||
|
end
|
||
|
|
||
|
###
|
||
|
### filesystem methods
|
||
|
###
|
||
|
|
||
|
def all_casks
|
||
|
@all_casks ||= Open3.popen2("/usr/bin/find",
|
||
|
CASK_PATH,
|
||
|
*%w[-type f -name *.rb]) do |_stdin, stdout|
|
||
|
stdout.each_line.map(&:chomp)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
###
|
||
|
### analysis and report methods
|
||
|
###
|
||
|
|
||
|
def histogram
|
||
|
if @histogram.nil?
|
||
|
@histogram = Hash[*BINS.map { |elt| [elt, 0] }.flatten]
|
||
|
authors.each do |_name, num_commits|
|
||
|
bottom = 0
|
||
|
BINS.each do |top|
|
||
|
@histogram[bottom] += 1 if num_commits >= bottom && num_commits < top
|
||
|
bottom = top
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
@histogram
|
||
|
end
|
||
|
|
||
|
def historic_occasional_cask_set
|
||
|
@historic_occasional_cask_set = authors.each.collect do |name, num_commits|
|
||
|
if num_commits > OCCASIONAL_CUTOFF
|
||
|
nil
|
||
|
elsif !casks_by_author.key?(name)
|
||
|
nil
|
||
|
else
|
||
|
casks_by_author[name].to_a
|
||
|
end
|
||
|
end.flatten.compact.to_set
|
||
|
end
|
||
|
|
||
|
def extant_occasional_cask_count
|
||
|
# avoid double-counting renames by intersecting with extant Casks
|
||
|
historic_occasional_cask_set.intersection(all_casks).count
|
||
|
end
|
||
|
|
||
|
def historic_nonmaintainer_cask_set
|
||
|
@historic_nonmaintainer_cask_set = authors.each.collect do |name, _num_commits|
|
||
|
if MAINTAINERS.include?(name)
|
||
|
nil
|
||
|
else
|
||
|
casks_by_author[name].to_a
|
||
|
end
|
||
|
end.flatten.compact.to_set
|
||
|
end
|
||
|
|
||
|
def extant_nonmaintainer_cask_count
|
||
|
# avoid double-counting renames by intersecting with extant Casks
|
||
|
historic_nonmaintainer_cask_set.intersection(all_casks).count
|
||
|
end
|
||
|
|
||
|
def extant_occasional_cask_percentage
|
||
|
@extant_occasional_cask_percentage ||= (100 * extant_occasional_cask_count / all_casks.count).to_i
|
||
|
end
|
||
|
|
||
|
def historic_occasional_cask_percentage
|
||
|
@historic_occasional_cask_percentage ||= (100 * historic_occasional_cask_set.count / all_casks.count).to_i
|
||
|
end
|
||
|
|
||
|
def extant_nonmaintainer_cask_percentage
|
||
|
@extant_nonmaintainer_cask_percentage ||= (100 * extant_nonmaintainer_cask_count / all_casks.count).to_i
|
||
|
end
|
||
|
|
||
|
def historic_nonmaintainer_cask_percentage
|
||
|
# this is so large, it might cross 100%
|
||
|
@historic_nonmaintainer_cask_percentage ||= [100, (100 * historic_nonmaintainer_cask_set.count / all_casks.count).to_i].min
|
||
|
end
|
||
|
|
||
|
def onetime_author_percentage
|
||
|
@onetime_author_percentage ||= (100 *
|
||
|
histogram[1] /
|
||
|
authors.length).to_i
|
||
|
end
|
||
|
|
||
|
def occasional_author_percentage
|
||
|
# why is it so hard to slice a hash?
|
||
|
@occasional_author_percentage ||= (100 *
|
||
|
(1..OCCASIONAL_CUTOFF).to_a.collect { |bin| histogram[bin] }.reduce(:+) /
|
||
|
authors.length).to_i
|
||
|
end
|
||
|
|
||
|
def graph_width
|
||
|
if @graph_width.nil?
|
||
|
@graph_width = `/bin/stty size 2>/dev/null`.chomp.split(" ").last.to_i
|
||
|
@graph_width = 80 if @graph_width <= 0
|
||
|
@graph_width -= 20 if @graph_width > 20
|
||
|
end
|
||
|
@graph_width
|
||
|
end
|
||
|
|
||
|
def graph_normalization
|
||
|
@graph_normalization ||= histogram.values.max.to_f
|
||
|
end
|
||
|
|
||
|
def print_header
|
||
|
puts "Commits\tContributors"
|
||
|
puts "---------------------"
|
||
|
end
|
||
|
|
||
|
def print_table
|
||
|
BINS.each do |bin|
|
||
|
plural = (bin % 10) == 0 ? "'s" : ""
|
||
|
graph = "." * ((histogram[bin] / graph_normalization) * graph_width)
|
||
|
puts "#{bin}#{plural}\t#{histogram[bin]}\t#{graph}"
|
||
|
end
|
||
|
end
|
||
|
|
||
|
def print_footer
|
||
|
puts %Q{\n#{occasional_author_percentage}% of contributors are "occasional" (with <= #{OCCASIONAL_CUTOFF} commits)}
|
||
|
puts "\n#{onetime_author_percentage}% of contributors commit only once"
|
||
|
puts "\n#{extant_occasional_cask_percentage}% - #{historic_occasional_cask_percentage}% of Casks depend on an occasional contributor"
|
||
|
puts "\n#{extant_nonmaintainer_cask_percentage}% - #{historic_nonmaintainer_cask_percentage}% of Casks depend on a contributor who is not a maintainer"
|
||
|
puts "\n"
|
||
|
end
|
||
|
|
||
|
def generate_report
|
||
|
print_header
|
||
|
print_table
|
||
|
print_footer
|
||
|
end
|
||
|
|
||
|
###
|
||
|
### main
|
||
|
###
|
||
|
|
||
|
cd_to_project_root
|
||
|
generate_report
|