homebrew-cask/developer/bin/the_long_tail

249 lines
6.2 KiB
Ruby
Executable File

#!/usr/bin/env ruby
#
# the_long_tail
#
# A histogram view on contributor stats
#
# notes
#
# Since this script does not track file-renames in the git history, the
# dependence of Casks upon occasional contributors/non-maintainers can
# only be expressed as a range or lower bound.
#
###
### dependencies
###
require 'open3'
require 'set'
###
### configurable constants
###
BINS = [
(1..10).to_a,
100,
1000,
].flatten
OCCASIONAL_CUTOFF = 5
CASK_PATH = 'Casks'
# all maintainers, past and present
MAINTAINERS = %w[
paul.t.hinze@gmail.com
fanquake@users.noreply.github.com
fanquake@gmail.com
kevin@suttle.io
leoj3n@gmail.com
nano@fdp.io
nanoid.xd@gmail.com
me@passcod.name
walker@pobox.com
info@vitorgalvao.com
calebcenter@live.com
ndr@qef.io
josh@joshbutts.com
goxberry@gmail.com
radek.simko@gmail.com
federicobond@gmail.com
]
###
### git methods
###
def cd_to_project_root
Dir.chdir File.dirname(File.expand_path(__FILE__))
@git_root ||= Open3.popen3(*%w[
git rev-parse --show-toplevel
]) do |stdin, stdout, stderr|
begin
stdout.gets.chomp
rescue
end
end
Dir.chdir @git_root
@git_root
end
def authors
@authors ||= Open3.popen3(*%w[
git log --no-merges --format=%ae --
]) do |stdin, stdout, stderr|
h = {}
stdout.each_line do |line|
line.chomp!
h[line] ||= 0
h[line] += 1
end
h
end
end
def casks_by_author
@casks_by_author ||= Open3.popen3(*%w[
git log --no-merges --name-only --format=%ae --
],
CASK_PATH) do |stdin, stdout, stderr|
email = nil
h = {}
stdout.each_line.to_a.join('').split("\n\n").each do |paragraph|
if paragraph.include?('Casks/')
lines=paragraph.split("\n")
email = lines.pop
h[email] ||= Set.new
h[email].merge(lines.compact)
else
email = paragraph.chomp
end
end
h
end
end
###
### filesystem methods
###
def all_casks
@all_casks ||= Open3.popen3('/usr/bin/find',
CASK_PATH,
*%w[-type f -name *.rb]
) do |stdin, stdout, stderr|
stdout.each_line.map(&:chomp)
end
end
###
### analysis and report methods
###
def histogram
if @histogram.nil?
@histogram = Hash[*BINS.map{ |elt| [elt, 0] }.flatten]
authors.each do |name, num_commits|
bottom = 0
BINS.each do |top|
if num_commits >= bottom and num_commits < top
@histogram[bottom] += 1
end
bottom = top
end
end
end
@histogram
end
def historic_occasional_cask_set
@historic_occasional_cask_set = authors.each.collect do |name, num_commits|
if num_commits > OCCASIONAL_CUTOFF
nil
elsif ! casks_by_author.key?(name)
nil
else
casks_by_author[name].to_a
end
end.flatten.compact.to_set
end
def extant_occasional_cask_count
# avoid double-counting renames by intersecting with extant Casks
historic_occasional_cask_set.intersection(all_casks).count
end
def historic_nonmaintainer_cask_set
@historic_nonmaintainer_cask_set = authors.each.collect do |name, num_commits|
if MAINTAINERS.include?(name)
nil
else
casks_by_author[name].to_a
end
end.flatten.compact.to_set
end
def extant_nonmaintainer_cask_count
# avoid double-counting renames by intersecting with extant Casks
historic_nonmaintainer_cask_set.intersection(all_casks).count
end
def extant_occasional_cask_percentage
@extant_occasional_cask_percentage ||= (100 * extant_occasional_cask_count / all_casks.count).to_i
end
def historic_occasional_cask_percentage
@historic_occasional_cask_percentage ||= (100 * historic_occasional_cask_set.count / all_casks.count).to_i
end
def extant_nonmaintainer_cask_percentage
@extant_nonmaintainer_cask_percentage ||= (100 * extant_nonmaintainer_cask_count / all_casks.count).to_i
end
def historic_nonmaintainer_cask_percentage
# this is so large, it might cross 100%
@historic_nonmaintainer_cask_percentage ||= [100, (100 * historic_nonmaintainer_cask_set.count / all_casks.count).to_i].min
end
def onetime_author_percentage
@onetime_author_percentage ||= (100 *
histogram[1] /
authors.length).to_i
end
def occasional_author_percentage
# why is it so hard to slice a hash?
@occasional_author_percentage ||= (100 *
(1 .. OCCASIONAL_CUTOFF).to_a.collect{ |bin| histogram[bin] }.reduce(:+) /
authors.length).to_i
end
def graph_width
if @graph_width.nil?
@graph_width = `/bin/stty size 2>/dev/null`.chomp.split(" ").last.to_i
@graph_width = 80 if @graph_width <= 0
@graph_width -= 20 if @graph_width > 20
end
@graph_width
end
def graph_normalization
@graph_normalization ||= histogram.values.max.to_f
end
def print_header
puts "Commits\tContributors"
puts "---------------------"
end
def print_table
BINS.each do |bin|
plural = (bin % 10) == 0 ? "'s" : ''
graph = '.' * ((histogram[bin]/graph_normalization) * graph_width)
puts "#{bin}#{plural}\t#{histogram[bin]}\t#{graph}"
end
end
def print_footer
puts %Q[\n#{occasional_author_percentage}% of contributors are "occasional" (with <= #{OCCASIONAL_CUTOFF} commits)]
puts "\n#{onetime_author_percentage}% of contributors commit only once"
puts "\n#{extant_occasional_cask_percentage}% - #{historic_occasional_cask_percentage}% of Casks depend on an occasional contributor"
puts "\n#{extant_nonmaintainer_cask_percentage}% - #{historic_nonmaintainer_cask_percentage}% of Casks depend on a contributor who is not a maintainer"
puts "\n"
end
def generate_report
print_header
print_table
print_footer
end
###
### main
###
cd_to_project_root
generate_report