3 files changed, 173 insertions, 1 deletions
diff --git a/.mailmap b/.mailmap
new file mode 100644
index 0000000..bb72068
--- /dev/null
+++ b/.mailmap
@@ -0,0 +1,3 @@
+Ben Sima <ben@bsima.me>
+Nick Sima <nicksima@gmail.com> <nick@simatime.com>
+David Johnson <djohnson.m@gmail.com>
diff --git a/Biz/lint.py b/Biz/lint.py
index 88720b3..4d760d2 100755
--- a/Biz/lint.py
+++ b/Biz/lint.py
@@ -35,7 +35,8 @@ def run(cmd, file):
         msg = ret.stdout.decode("utf-8").strip()
         print(Color.WARN + f"lint error: {cmd}: {file}" + Color.END)
         if msg:
-            print(msg)
+            for line in msg.split("\n"):
+                print("  " + line)
 
 
 def changed_files():
diff --git a/Devalloc/main.py b/Devalloc/main.py
new file mode 100755
index 0000000..1010966
--- /dev/null
+++ b/Devalloc/main.py
@@ -0,0 +1,168 @@
+#!/usr/bin/env python
+"""
+Analyze developer allocation across a codebase.
+"""
+
+import argparse
+import logging
+import os
+import re
+import subprocess
+import sys
+
+
+def extract_email(line):
+    """Given 'Ben Sima <ben@bsima.me>', extract `ben@bsima.me'."""
+    return re.search(r"<(\S*)>", line).group(1)
+
+
+def authors_for(path, active_users):
+    """Return a dictionary of {author: commits} for given path. Usernames not in
+    the 'active_users' list will be filtered out."""
+    raw = subprocess.check_output(
+        ["git", "shortlog", "--numbered", "--summary", "--email", "--", path]
+    ).decode("utf-8")
+    lines = [s for s in raw.split("\n") if s]
+    data = {}
+    for line in lines:
+        parts = line.strip().split("\t")
+        author = extract_email(parts[1])
+        commits = parts[0]
+        if author in active_users:
+            data[author] = commits
+    return data
+
+
+MAX_SCORE = 10
+
+
+def score(blackhole, liability, good, total):
+    "Calculate the score."
+    weights = {
+        "blackhole": 0.5,
+        "liability": 0.7,
+    }
+    return (
+        MAX_SCORE
+        * (
+            (blackhole * weights["blackhole"])
+            + (liability * weights["liability"])
+            + good
+        )
+        / total
+    )
+
+
+def get_args():
+    "Parse CLI arguments."
+    cli = argparse.ArgumentParser(description=__doc__)
+    cli.add_argument("repo", default=".", help="the git repo to run on", metavar="REPO")
+    cli.add_argument(
+        "-b",
+        "--blackholes",
+        action="store_true",
+        help="print the blackholes (files with one or zero active contributors)",
+    )
+    cli.add_argument(
+        "-l",
+        "--liabilities",
+        action="store_true",
+        help="print the liabilities (files with < 3 active contributors)",
+    )
+    cli.add_argument(
+        "-i", "--ignored", nargs="+", default=[], help="patterns to ignore in paths",
+    )
+    cli.add_argument(
+        "--active-users", nargs="+", default=[], help="list of active user emails",
+    )
+    cli.add_argument(
+        "-v",
+        "--verbosity",
+        help="set the log level verbosity",
+        choices=["debug", "warning", "error"],
+        default="error",
+    )
+    return cli.parse_args()
+
+
+def guard_git(repo):
+    "Guard against non-git repos."
+    is_git = subprocess.run(
+        ["git", "rev-parse"],
+        stderr=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+        check=False,
+    ).returncode
+    if is_git != 0:
+        sys.exit(f"error: not a git repository: {repo}")
+
+
+class Repo:
+    "Represents a repo and stats for the repo."
+
+    def __init__(self, ignored_paths, active_users):
+        self.paths = [
+            p
+            for p in subprocess.check_output(["git", "ls-files"])
+            .decode("utf-8")
+            .split()
+            if not any(i in p for i in ignored_paths)
+        ]
+        logging.debug("collecting stats")
+        self.stats = {}
+        for path in self.paths:
+            self.stats[path] = authors_for(path, active_users)
+        self.blackholes = [path for path, authors in self.stats.items() if not authors]
+        self.liabilities = {
+            path: list(authors.keys())
+            for path, authors in self.stats.items()
+            if 1 < len(authors) < 3
+        }
+
+    def print_blackholes(self, full):
+        "Print number of blackholes, or list of all blackholes."
+        # note: file renames may result in false positives
+        n_blackhole = len(self.blackholes)
+        print(f"Blackholes: {n_blackhole}")
+        if full:
+            for path in self.blackholes:
+                print(f"  {path}")
+
+    def print_liabilities(self, full):
+        "Print number of liabilities, or list of all liabilities."
+        n_liabilities = len(self.liabilities)
+        print(f"Liabilities: {n_liabilities}")
+        if full:
+            for path, authors in self.liabilities.items():
+                print(f"  {path}  ({', '.join(authors)})")
+
+    def print_score(self):
+        "Print the overall score."
+        n_total = len(self.stats.keys())
+        n_blackhole = len(self.blackholes)
+        n_liabilities = len(self.liabilities)
+        n_good = n_total - n_blackhole - n_liabilities
+        print("Total:", n_total)
+        print(
+            "Score: {:.2f}/{}".format(
+                score(n_blackhole, n_liabilities, n_good, n_total), MAX_SCORE
+            )
+        )
+
+
+if __name__ == "__main__":
+    ARGS = get_args()
+    logging.basicConfig(stream=sys.stderr, level=ARGS.verbosity.upper())
+
+    logging.debug("starting")
+    os.chdir(os.path.abspath(ARGS.repo))
+
+    guard_git(ARGS.repo)
+
+    # collect data
+    REPO = Repo(ARGS.ignored, ARGS.active_users)
+
+    # print data
+    REPO.print_score()
+    REPO.print_blackholes(ARGS.blackholes)
+    REPO.print_liabilities(ARGS.liabilities)