#!/usr/bin/env ruby # # the_long_tail # # A histogram view on contributor stats # # notes # # Since this script does not track file-renames in the git history, the # dependence of Casks upon occasional contributors/non-maintainers can # only be expressed as a range or lower bound. # ### ### dependencies ### require "open3" require "set" ### ### configurable constants ### BINS = [ (1..10).to_a, 100, 1000, ].flatten OCCASIONAL_CUTOFF = 5 CASK_PATH = "Casks".freeze # all maintainers, past and present MAINTAINERS = %w[ paul.t.hinze@gmail.com fanquake@users.noreply.github.com fanquake@gmail.com kevin@suttle.io leoj3n@gmail.com nano@fdp.io nanoid.xd@gmail.com me@passcod.name walker@pobox.com info@vitorgalvao.com calebcenter@live.com ndr@qef.io josh@joshbutts.com goxberry@gmail.com radek.simko@gmail.com federicobond@gmail.com claui@users.noreply.github.com amorymeltzer@gmail.com hagins.josh@gmail.com dragon.vctr@gmail.com mail@sebastianroeder.de github@adityadalal.com adityadalal924@users.noreply.github.com ].freeze ### ### git methods ### def cd_to_project_root Dir.chdir File.dirname(File.expand_path(__FILE__)) @git_root ||= Open3.popen3(*%w[ git rev-parse --show-toplevel ]) do |_stdin, stdout, _stderr| begin stdout.gets.chomp rescue end end Dir.chdir @git_root @git_root end def authors @authors ||= Open3.popen3(*%w[ git log --no-merges --format=%ae -- ]) do |_stdin, stdout, _stderr| h = {} stdout.each_line do |line| line.chomp! h[line] ||= 0 h[line] += 1 end h end end def casks_by_author @casks_by_author ||= Open3.popen3(*%w[ git log --no-merges --name-only --format=%ae -- ], CASK_PATH) do |_stdin, stdout, _stderr| email = nil h = {} stdout.each_line.to_a.join("").split("\n\n").each do |paragraph| if paragraph.include?("Casks/") lines = paragraph.split("\n") email = lines.pop h[email] ||= Set.new h[email].merge(lines.compact) else email = paragraph.chomp end end h end end ### ### filesystem methods ### def all_casks @all_casks ||= Open3.popen2("/usr/bin/find", CASK_PATH, *%w[-type f -name *.rb]) do |_stdin, stdout| stdout.each_line.map(&:chomp) end end ### ### analysis and report methods ### def histogram if @histogram.nil? @histogram = Hash[*BINS.map { |elt| [elt, 0] }.flatten] authors.each do |_name, num_commits| bottom = 0 BINS.each do |top| @histogram[bottom] += 1 if num_commits >= bottom && num_commits < top bottom = top end end end @histogram end def historic_occasional_cask_set @historic_occasional_cask_set = authors.each.collect do |name, num_commits| if num_commits > OCCASIONAL_CUTOFF nil elsif !casks_by_author.key?(name) nil else casks_by_author[name].to_a end end.flatten.compact.to_set end def extant_occasional_cask_count # avoid double-counting renames by intersecting with extant Casks historic_occasional_cask_set.intersection(all_casks).count end def historic_nonmaintainer_cask_set @historic_nonmaintainer_cask_set = authors.each.collect do |name, _num_commits| if MAINTAINERS.include?(name) nil else casks_by_author[name].to_a end end.flatten.compact.to_set end def extant_nonmaintainer_cask_count # avoid double-counting renames by intersecting with extant Casks historic_nonmaintainer_cask_set.intersection(all_casks).count end def extant_occasional_cask_percentage @extant_occasional_cask_percentage ||= (100 * extant_occasional_cask_count / all_casks.count).to_i end def historic_occasional_cask_percentage @historic_occasional_cask_percentage ||= (100 * historic_occasional_cask_set.count / all_casks.count).to_i end def extant_nonmaintainer_cask_percentage @extant_nonmaintainer_cask_percentage ||= (100 * extant_nonmaintainer_cask_count / all_casks.count).to_i end def historic_nonmaintainer_cask_percentage # this is so large, it might cross 100% @historic_nonmaintainer_cask_percentage ||= [100, (100 * historic_nonmaintainer_cask_set.count / all_casks.count).to_i].min end def onetime_author_percentage @onetime_author_percentage ||= (100 * histogram[1] / authors.length).to_i end def occasional_author_percentage # why is it so hard to slice a hash? @occasional_author_percentage ||= (100 * (1..OCCASIONAL_CUTOFF).to_a.collect { |bin| histogram[bin] }.reduce(:+) / authors.length).to_i end def graph_width if @graph_width.nil? @graph_width = `/bin/stty size 2>/dev/null`.chomp.split(" ").last.to_i @graph_width = 80 if @graph_width <= 0 @graph_width -= 20 if @graph_width > 20 end @graph_width end def graph_normalization @graph_normalization ||= histogram.values.max.to_f end def print_header puts "Commits\tContributors" puts "---------------------" end def print_table BINS.each do |bin| plural = (bin % 10) == 0 ? "'s" : "" graph = "." * ((histogram[bin] / graph_normalization) * graph_width) puts "#{bin}#{plural}\t#{histogram[bin]}\t#{graph}" end end def print_footer puts %Q{\n#{occasional_author_percentage}% of contributors are "occasional" (with <= #{OCCASIONAL_CUTOFF} commits)} puts "\n#{onetime_author_percentage}% of contributors commit only once" puts "\n#{extant_occasional_cask_percentage}% - #{historic_occasional_cask_percentage}% of Casks depend on an occasional contributor" puts "\n#{extant_nonmaintainer_cask_percentage}% - #{historic_nonmaintainer_cask_percentage}% of Casks depend on a contributor who is not a maintainer" puts "\n" end def generate_report print_header print_table print_footer end ### ### main ### cd_to_project_root generate_report