summaryrefslogtreecommitdiff
path: root/Biz/Dragons
diff options
context:
space:
mode:
Diffstat (limited to 'Biz/Dragons')
-rw-r--r--Biz/Dragons/Analysis.hs252
-rwxr-xr-xBiz/Dragons/get-examples.sh13
-rwxr-xr-xBiz/Dragons/main.py221
-rw-r--r--Biz/Dragons/pitch.md40
4 files changed, 526 insertions, 0 deletions
diff --git a/Biz/Dragons/Analysis.hs b/Biz/Dragons/Analysis.hs
new file mode 100644
index 0000000..4a1421c
--- /dev/null
+++ b/Biz/Dragons/Analysis.hs
@@ -0,0 +1,252 @@
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE QuasiQuotes #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+
+-- : out dragons-analyze
+module Biz.Dragons.Analysis
+ ( Analysis (..),
+ Commit (..),
+ run,
+ main,
+ test,
+ git,
+ )
+where
+
+import Alpha
+import qualified Biz.Cli as Cli
+import Biz.Test ((@=?))
+import qualified Biz.Test as Test
+import qualified Control.Concurrent.Async as Async
+import qualified Data.Aeson as Aeson
+import Data.Data (Data)
+import qualified Data.List as List
+import qualified Data.Map as Map
+import qualified Data.String as String
+import qualified Data.Text as Text
+import qualified Data.Time.Clock as Time
+import qualified Data.Time.Format as Time
+import qualified System.Directory as Directory
+import qualified System.Process as Process
+
+main :: IO ()
+main = Cli.main <| Cli.Plan help move test tidy
+
+move :: Cli.Arguments -> IO ()
+move args = gitDir +> run authors /> Aeson.encode +> putStrLn
+ where
+ gitDir =
+ Cli.argument "git-dir"
+ |> Cli.getArgWithDefault args ".git"
+ |> Directory.makeAbsolute
+ authors =
+ -- i think this is not working? do i need optparse-applicative?
+ Cli.shortOption 'a'
+ |> Cli.getAllArgs args
+ |> map Text.pack
+
+tidy :: cfg -> IO ()
+tidy _ = pure ()
+
+test :: Test.Tree
+test = Test.group "Biz.Dragons.Analysis" [test_calculateScore]
+
+help :: Cli.Docopt
+help =
+ [Cli.docopt|
+dragons-analyze
+
+Usage:
+ dragons-analyze test
+ dragons-analyze [--author=<email>]... <git-dir>
+
+Options:
+ -a, --author List of active authors' emails.
+|]
+
+newtype Commit = Sha Text
+ deriving (Eq, Data, Typeable, Ord, Generic, Show)
+
+instance Aeson.ToJSON Commit
+
+-- | The result of analyzing a git repo.
+data Analysis = Analysis
+ { -- | Where the repo is stored on the local disk.
+ gitDir :: FilePath,
+ -- | A path with no active contributors
+ blackholes :: [Text],
+ -- | A path with < 3 active contributors
+ liabilities :: [Text],
+ -- | Map of path to number of commits, for detecting paths that continually
+ -- get rewritten.
+ hotspotMap :: Map FilePath Integer,
+ -- | Files that have not been touched in 6 months
+ stale :: Map FilePath Integer,
+ -- | Total score for the repo
+ score :: Integer,
+ -- | Total number of files
+ totalFiles :: Integer,
+ -- | The total number of commits
+ totalCommits :: Integer,
+ -- | List of all the active users we care about
+ activeAuthors :: [Text],
+ -- | Which commit this analysis was run against.
+ commit :: Commit
+ }
+ deriving (Eq, Ord, Generic, Show, Data, Typeable)
+
+instance Aeson.ToJSON Analysis
+
+run :: [Text] -> FilePath -> IO Analysis
+run activeAuthors bareRepo = do
+ commit <- git bareRepo ["rev-parse", "HEAD"] /> Text.pack /> chomp /> Sha
+ tree <-
+ git
+ bareRepo
+ [ "ls-tree",
+ "--full-tree",
+ "--name-only",
+ "-r", -- recurse into subtrees
+ "HEAD"
+ ]
+ /> String.lines
+ authors <- traverse (authorsFor bareRepo) tree :: IO [[(Text, Text, Text)]]
+ let authorMap = zip tree authors :: [(FilePath, [(Text, Text, Text)])]
+ stalenessMap <- traverse (lastTouched bareRepo) tree
+ let blackholes =
+ [ Text.pack path
+ | (path, authors_) <- authorMap,
+ null (map third authors_ `List.intersect` activeAuthors)
+ ]
+ let liabilities =
+ [ Text.pack path
+ | (path, authors_) <- authorMap,
+ length (map third authors_ `List.intersect` activeAuthors) < 3
+ ]
+ let numBlackholes = realToFrac <| length blackholes
+ let numLiabilities = realToFrac <| length liabilities
+ let numTotal = realToFrac <| length tree
+ hotspotMap <-
+ Map.fromList </ Async.mapConcurrently getChangeCount tree
+ totalCommits <-
+ git bareRepo ["rev-list", "--count", "HEAD"]
+ /> filter (/= '\n')
+ /> readMaybe
+ /> fromMaybe 0
+ pure
+ <| Analysis
+ { gitDir = bareRepo,
+ stale =
+ Map.fromList
+ <| [ (path, days)
+ | (path, Just days) <- stalenessMap,
+ days > 180
+ ],
+ score = calculateScore numTotal numBlackholes numLiabilities,
+ totalFiles = toInteger <| length tree,
+ ..
+ }
+ where
+ third :: (a, b, c) -> c
+ third (_, _, a) = a
+ getChangeCount :: FilePath -> IO (FilePath, Integer)
+ getChangeCount path =
+ git bareRepo ["rev-list", "--count", "HEAD", "--", path]
+ /> filter (/= '\n')
+ /> readMaybe
+ /> fromMaybe 0
+ /> (path,)
+
+-- | Given a git dir and a path inside the git repo, get information about the
+-- authors.
+authorsFor ::
+ FilePath ->
+ FilePath ->
+ -- | returns (number of commits, author name, author email)
+ IO [(Text, Text, Text)]
+authorsFor gitDir path =
+ Process.readProcess
+ "git"
+ [ "--git-dir",
+ gitDir,
+ "shortlog",
+ "--numbered",
+ "--summary",
+ "--email",
+ "HEAD",
+ "--",
+ path
+ ]
+ ""
+ /> Text.pack
+ /> Text.lines
+ /> map (Text.break (== '\t'))
+ /> map parseAuthor
+ where
+ parseAuthor (commits, author) =
+ ( Text.strip commits,
+ Text.strip <| Text.takeWhile (/= '<') author,
+ Text.strip <| Text.dropAround (`elem` ['<', '>']) <| Text.dropWhile (/= '<') author
+ )
+
+-- | Run a git command on a repo
+git ::
+ -- | path to the git dir (bare repo)
+ String ->
+ -- | args to `git`
+ [String] ->
+ IO String
+git bareRepo args = Process.readProcess "git" (["--git-dir", bareRepo] ++ args) ""
+
+lastTouched :: FilePath -> FilePath -> IO (FilePath, Maybe Integer)
+lastTouched bareRepo path = do
+ now <- Time.getCurrentTime
+ timestamp <-
+ Process.readProcess
+ "git"
+ [ "--git-dir",
+ bareRepo,
+ "log",
+ "-n1",
+ "--pretty=%aI",
+ "--",
+ path
+ ]
+ ""
+ /> filter (/= '\n')
+ /> Time.parseTimeM True Time.defaultTimeLocale "%Y-%m-%dT%H:%M:%S%z"
+ pure (path, calculateAge now </ timestamp)
+ where
+ calculateAge now n = round <| Time.diffUTCTime now n / Time.nominalDay
+
+-- | Does the aggregate score calculation given number of files found to be
+-- blackholes, liabilities, etc.
+calculateScore :: Double -> Double -> Double -> Integer
+calculateScore 0 _ _ = 0
+calculateScore a 0 0 | a > 0 = 100
+calculateScore a b c | a < 0 || b < 0 || c < 0 = 0
+calculateScore numTotal numBlackholes numLiabilities =
+ max 0 <. round
+ <| maxScore
+ * (weightedBlackholes + weightedLiabilities + numGood)
+ / numTotal
+ where
+ weightedBlackholes = numBlackholes * (5 / 10)
+ weightedLiabilities = numLiabilities * (7 / 10)
+ numGood = numTotal - numBlackholes - numLiabilities
+ maxScore = 100.0
+
+test_calculateScore :: Test.Tree
+test_calculateScore =
+ Test.group
+ "calculateScore"
+ [ Test.unit "perfect score" <| 100 @=? calculateScore 100 0 0,
+ Test.unit "all blackholes" <| 50 @=? calculateScore 100 100 0,
+ Test.unit "all liabilities" <| 70 @=? calculateScore 100 0 100,
+ Test.prop "never > 100" <| \t b l -> calculateScore t b l <= 100,
+ Test.prop "never < 0" <| \t b l -> calculateScore t b l >= 0
+ ]
diff --git a/Biz/Dragons/get-examples.sh b/Biz/Dragons/get-examples.sh
new file mode 100755
index 0000000..a35a282
--- /dev/null
+++ b/Biz/Dragons/get-examples.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+if [ "$#" == "0" ]
+then
+ echo "usage: $(basename $0) <cookie>"
+ echo "copy the cookie from the browser dev console"
+ exit 1
+fi
+cookie="$1"
+curl 'https://dragons.dev/analysis?user=github&repo=training-kit' \
+ -X POST \
+ -H 'Content-Type: application/x-www-form-urlencoded' \
+ -H "Cookie: JWT-Cookie=$cookie" \
+ --compressed --insecure
diff --git a/Biz/Dragons/main.py b/Biz/Dragons/main.py
new file mode 100755
index 0000000..bb10441
--- /dev/null
+++ b/Biz/Dragons/main.py
@@ -0,0 +1,221 @@
+#!/usr/bin/env python
+"""
+Analyze developer allocation across a codebase.
+"""
+
+import argparse
+import datetime
+import logging
+import os
+import re
+import subprocess
+import sys
+
+
+def find_user(line):
+ """Given 'Ben Sima <ben@bsima.me>', finds `Ben Sima'. Returns the first
+ matching string."""
+ return re.findall(r"^[^<]*", line)[0].strip()
+
+
+def authors_for(path, active_users):
+ """Return a dictionary of {author: commits} for given path. Usernames not in
+ the 'active_users' list will be filtered out."""
+ raw = subprocess.check_output(
+ ["git", "shortlog", "--numbered", "--summary", "--email", "--", path]
+ ).decode("utf-8")
+ lines = [s for s in raw.split("\n") if s]
+ data = {}
+ for line in lines:
+ parts = line.strip().split("\t")
+ author = find_user(parts[1])
+ commits = parts[0]
+ if author in active_users:
+ data[author] = commits
+ return data
+
+
+def mailmap_users():
+ """Returns users from the .mailmap file."""
+ users = []
+ with open(".mailmap") as file:
+ lines = file.readlines()
+ for line in lines:
+ users.append(find_user(line))
+ return users
+
+
+MAX_SCORE = 10
+
+
+def score(blackhole, liability, good, total):
+ "Calculate the score."
+ weights = {
+ "blackhole": 0.5,
+ "liability": 0.7,
+ }
+ return (
+ MAX_SCORE
+ * (
+ (blackhole * weights["blackhole"])
+ + (liability * weights["liability"])
+ + good
+ )
+ / total
+ )
+
+
+def get_args():
+ "Parse CLI arguments."
+ cli = argparse.ArgumentParser(description=__doc__)
+ cli.add_argument("repo", default=".", help="the git repo to run on", metavar="REPO")
+ cli.add_argument(
+ "-b",
+ "--blackholes",
+ action="store_true",
+ help="print the blackholes (files with one or zero active contributors)",
+ )
+ cli.add_argument(
+ "-l",
+ "--liabilities",
+ action="store_true",
+ help="print the liabilities (files with < 3 active contributors)",
+ )
+ cli.add_argument(
+ "-s",
+ "--stale",
+ action="store_true",
+ help="print stale files (haven't been touched in 6 months)",
+ )
+ cli.add_argument(
+ "-i", "--ignored", nargs="+", default=[], help="patterns to ignore in paths",
+ )
+ cli.add_argument(
+ "--active-users",
+ nargs="+",
+ default=[],
+ help="list of active user emails. if not provided, this is loaded from .mailmap",
+ )
+ cli.add_argument(
+ "-v",
+ "--verbosity",
+ help="set the log level verbosity",
+ choices=["debug", "warning", "error"],
+ default="error",
+ )
+ return cli.parse_args()
+
+
+def guard_git(repo):
+ "Guard against non-git repos."
+ is_git = subprocess.run(
+ ["git", "rev-parse"],
+ stderr=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ check=False,
+ ).returncode
+ if is_git != 0:
+ sys.exit(f"error: not a git repository: {repo}")
+
+
+def staleness(path, now):
+ "How long has it been since this file was touched?"
+ timestamp = datetime.datetime.strptime(
+ subprocess.check_output(["git", "log", "-n1", "--pretty=%aI", path])
+ .decode("utf-8")
+ .strip(),
+ "%Y-%m-%dT%H:%M:%S%z",
+ )
+ delta = now - timestamp
+ return delta.days
+
+
+class Repo:
+ "Represents a repo and stats for the repo."
+
+ def __init__(self, ignored_paths, active_users):
+ self.paths = [
+ p
+ for p in subprocess.check_output(["git", "ls-files", "--no-deleted"])
+ .decode("utf-8")
+ .split()
+ if not any(i in p for i in ignored_paths)
+ ]
+ logging.debug("collecting stats")
+ self.stats = {}
+ for path in self.paths:
+ self.stats[path] = authors_for(path, active_users)
+ self.blackholes = [path for path, authors in self.stats.items() if not authors]
+ self.liabilities = {
+ path: list(authors)
+ for path, authors in self.stats.items()
+ if 1 <= len(authors) < 3
+ }
+ now = datetime.datetime.utcnow().astimezone()
+ self.stale = {}
+ for path, _ in self.stats.items():
+ _staleness = staleness(path, now)
+ if _staleness > 180:
+ self.stale[path] = _staleness
+
+ def print_blackholes(self, full):
+ "Print number of blackholes, or list of all blackholes."
+ # note: file renames may result in false positives
+ n_blackhole = len(self.blackholes)
+ print(f"Blackholes: {n_blackhole}")
+ if full:
+ for path in self.blackholes:
+ print(f" {path}")
+
+ def print_liabilities(self, full):
+ "Print number of liabilities, or list of all liabilities."
+ n_liabilities = len(self.liabilities)
+ print(f"Liabilities: {n_liabilities}")
+ if full:
+ for path, authors in self.liabilities.items():
+ print(f" {path} ({', '.join(authors)})")
+
+ def print_score(self):
+ "Print the overall score."
+ n_total = len(self.stats.keys())
+ n_blackhole = len(self.blackholes)
+ n_liabilities = len(self.liabilities)
+ n_good = n_total - n_blackhole - n_liabilities
+ print("Total:", n_total)
+ print(
+ "Score: {:.2f}/{}".format(
+ score(n_blackhole, n_liabilities, n_good, n_total), MAX_SCORE
+ )
+ )
+
+ def print_stale(self, full):
+ "Print stale files"
+ n_stale = len(self.stale)
+ print(f"Stale files: {n_stale}")
+ if full:
+ for path, days in self.stale.items():
+ print(f" {path} ({days} days)")
+
+
+if __name__ == "__main__":
+ ARGS = get_args()
+ logging.basicConfig(stream=sys.stderr, level=ARGS.verbosity.upper())
+
+ logging.debug("starting")
+ os.chdir(os.path.abspath(ARGS.repo))
+
+ guard_git(ARGS.repo)
+
+ # if no active users provided, load from .mailmap
+ if ARGS.active_users == []:
+ if os.path.exists(".mailmap"):
+ ARGS.active_users = mailmap_users()
+
+ # collect data
+ REPO = Repo(ARGS.ignored, ARGS.active_users)
+
+ # print data
+ REPO.print_score()
+ REPO.print_blackholes(ARGS.blackholes)
+ REPO.print_liabilities(ARGS.liabilities)
+ REPO.print_stale(ARGS.stale)
diff --git a/Biz/Dragons/pitch.md b/Biz/Dragons/pitch.md
new file mode 100644
index 0000000..a4d4ffa
--- /dev/null
+++ b/Biz/Dragons/pitch.md
@@ -0,0 +1,40 @@
+# Dragons
+
+Dragons analyzes your codebase trends, finds patterns in how your developers
+work, and protects against tech debt.
+
+Just hook it up to your CI system - it will warn you when it finds a problem.
+
+## Identify blackholes in your codebase
+
+What if none of your active employees have touched some part of the codebase?
+This happens too often with legacy code, and then it turns into a huge source of
+tech debt. Dragons finds these "blackholes" and warns you about them so you
+can be proactive in eliminating tech debt.
+
+## Protect against lost knowledge
+
+Not everyone can know every part of a codebase. By finding pieces of code
+that only 1 or 2 people have touched, dragons identifes siloed knowledge. This
+allows you to protect against the risk of this knowledge leaving the company if
+an employee leaves.
+
+## Don't just measure "code coverage" - also know your "dev coverage"
+
+No matter how smart your employees are, if you are under- or over-utilizing your
+developers then you will never get optimal performance from your team.
+
+- Find developer "hot spots" in your code: which pieces of code get continually
+ rewritten, taking up valuable dev time?
+- Know how your devs work best: which ones have depth of knowledge, and which
+ ones have breadth?
+
+(Paid only)
+
+## See how your teams *actually* organize themselves with cluster analysis
+
+Does your team feel splintered or not cohesive? Which developers work best
+together? Dragons analyzes the collaboration patterns between devs and helps
+you form optimal pairings and teams based on shared code and mindspace.
+
+(Paid only)