diff options
Diffstat (limited to 'Biz/Dragons')
-rw-r--r-- | Biz/Dragons/Analysis.hs | 252 | ||||
-rwxr-xr-x | Biz/Dragons/get-examples.sh | 13 | ||||
-rwxr-xr-x | Biz/Dragons/main.py | 221 | ||||
-rw-r--r-- | Biz/Dragons/pitch.md | 40 |
4 files changed, 526 insertions, 0 deletions
diff --git a/Biz/Dragons/Analysis.hs b/Biz/Dragons/Analysis.hs new file mode 100644 index 0000000..4a1421c --- /dev/null +++ b/Biz/Dragons/Analysis.hs @@ -0,0 +1,252 @@ +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE NoImplicitPrelude #-} + +-- : out dragons-analyze +module Biz.Dragons.Analysis + ( Analysis (..), + Commit (..), + run, + main, + test, + git, + ) +where + +import Alpha +import qualified Biz.Cli as Cli +import Biz.Test ((@=?)) +import qualified Biz.Test as Test +import qualified Control.Concurrent.Async as Async +import qualified Data.Aeson as Aeson +import Data.Data (Data) +import qualified Data.List as List +import qualified Data.Map as Map +import qualified Data.String as String +import qualified Data.Text as Text +import qualified Data.Time.Clock as Time +import qualified Data.Time.Format as Time +import qualified System.Directory as Directory +import qualified System.Process as Process + +main :: IO () +main = Cli.main <| Cli.Plan help move test tidy + +move :: Cli.Arguments -> IO () +move args = gitDir +> run authors /> Aeson.encode +> putStrLn + where + gitDir = + Cli.argument "git-dir" + |> Cli.getArgWithDefault args ".git" + |> Directory.makeAbsolute + authors = + -- i think this is not working? do i need optparse-applicative? + Cli.shortOption 'a' + |> Cli.getAllArgs args + |> map Text.pack + +tidy :: cfg -> IO () +tidy _ = pure () + +test :: Test.Tree +test = Test.group "Biz.Dragons.Analysis" [test_calculateScore] + +help :: Cli.Docopt +help = + [Cli.docopt| +dragons-analyze + +Usage: + dragons-analyze test + dragons-analyze [--author=<email>]... <git-dir> + +Options: + -a, --author List of active authors' emails. +|] + +newtype Commit = Sha Text + deriving (Eq, Data, Typeable, Ord, Generic, Show) + +instance Aeson.ToJSON Commit + +-- | The result of analyzing a git repo. +data Analysis = Analysis + { -- | Where the repo is stored on the local disk. + gitDir :: FilePath, + -- | A path with no active contributors + blackholes :: [Text], + -- | A path with < 3 active contributors + liabilities :: [Text], + -- | Map of path to number of commits, for detecting paths that continually + -- get rewritten. + hotspotMap :: Map FilePath Integer, + -- | Files that have not been touched in 6 months + stale :: Map FilePath Integer, + -- | Total score for the repo + score :: Integer, + -- | Total number of files + totalFiles :: Integer, + -- | The total number of commits + totalCommits :: Integer, + -- | List of all the active users we care about + activeAuthors :: [Text], + -- | Which commit this analysis was run against. + commit :: Commit + } + deriving (Eq, Ord, Generic, Show, Data, Typeable) + +instance Aeson.ToJSON Analysis + +run :: [Text] -> FilePath -> IO Analysis +run activeAuthors bareRepo = do + commit <- git bareRepo ["rev-parse", "HEAD"] /> Text.pack /> chomp /> Sha + tree <- + git + bareRepo + [ "ls-tree", + "--full-tree", + "--name-only", + "-r", -- recurse into subtrees + "HEAD" + ] + /> String.lines + authors <- traverse (authorsFor bareRepo) tree :: IO [[(Text, Text, Text)]] + let authorMap = zip tree authors :: [(FilePath, [(Text, Text, Text)])] + stalenessMap <- traverse (lastTouched bareRepo) tree + let blackholes = + [ Text.pack path + | (path, authors_) <- authorMap, + null (map third authors_ `List.intersect` activeAuthors) + ] + let liabilities = + [ Text.pack path + | (path, authors_) <- authorMap, + length (map third authors_ `List.intersect` activeAuthors) < 3 + ] + let numBlackholes = realToFrac <| length blackholes + let numLiabilities = realToFrac <| length liabilities + let numTotal = realToFrac <| length tree + hotspotMap <- + Map.fromList </ Async.mapConcurrently getChangeCount tree + totalCommits <- + git bareRepo ["rev-list", "--count", "HEAD"] + /> filter (/= '\n') + /> readMaybe + /> fromMaybe 0 + pure + <| Analysis + { gitDir = bareRepo, + stale = + Map.fromList + <| [ (path, days) + | (path, Just days) <- stalenessMap, + days > 180 + ], + score = calculateScore numTotal numBlackholes numLiabilities, + totalFiles = toInteger <| length tree, + .. + } + where + third :: (a, b, c) -> c + third (_, _, a) = a + getChangeCount :: FilePath -> IO (FilePath, Integer) + getChangeCount path = + git bareRepo ["rev-list", "--count", "HEAD", "--", path] + /> filter (/= '\n') + /> readMaybe + /> fromMaybe 0 + /> (path,) + +-- | Given a git dir and a path inside the git repo, get information about the +-- authors. +authorsFor :: + FilePath -> + FilePath -> + -- | returns (number of commits, author name, author email) + IO [(Text, Text, Text)] +authorsFor gitDir path = + Process.readProcess + "git" + [ "--git-dir", + gitDir, + "shortlog", + "--numbered", + "--summary", + "--email", + "HEAD", + "--", + path + ] + "" + /> Text.pack + /> Text.lines + /> map (Text.break (== '\t')) + /> map parseAuthor + where + parseAuthor (commits, author) = + ( Text.strip commits, + Text.strip <| Text.takeWhile (/= '<') author, + Text.strip <| Text.dropAround (`elem` ['<', '>']) <| Text.dropWhile (/= '<') author + ) + +-- | Run a git command on a repo +git :: + -- | path to the git dir (bare repo) + String -> + -- | args to `git` + [String] -> + IO String +git bareRepo args = Process.readProcess "git" (["--git-dir", bareRepo] ++ args) "" + +lastTouched :: FilePath -> FilePath -> IO (FilePath, Maybe Integer) +lastTouched bareRepo path = do + now <- Time.getCurrentTime + timestamp <- + Process.readProcess + "git" + [ "--git-dir", + bareRepo, + "log", + "-n1", + "--pretty=%aI", + "--", + path + ] + "" + /> filter (/= '\n') + /> Time.parseTimeM True Time.defaultTimeLocale "%Y-%m-%dT%H:%M:%S%z" + pure (path, calculateAge now </ timestamp) + where + calculateAge now n = round <| Time.diffUTCTime now n / Time.nominalDay + +-- | Does the aggregate score calculation given number of files found to be +-- blackholes, liabilities, etc. +calculateScore :: Double -> Double -> Double -> Integer +calculateScore 0 _ _ = 0 +calculateScore a 0 0 | a > 0 = 100 +calculateScore a b c | a < 0 || b < 0 || c < 0 = 0 +calculateScore numTotal numBlackholes numLiabilities = + max 0 <. round + <| maxScore + * (weightedBlackholes + weightedLiabilities + numGood) + / numTotal + where + weightedBlackholes = numBlackholes * (5 / 10) + weightedLiabilities = numLiabilities * (7 / 10) + numGood = numTotal - numBlackholes - numLiabilities + maxScore = 100.0 + +test_calculateScore :: Test.Tree +test_calculateScore = + Test.group + "calculateScore" + [ Test.unit "perfect score" <| 100 @=? calculateScore 100 0 0, + Test.unit "all blackholes" <| 50 @=? calculateScore 100 100 0, + Test.unit "all liabilities" <| 70 @=? calculateScore 100 0 100, + Test.prop "never > 100" <| \t b l -> calculateScore t b l <= 100, + Test.prop "never < 0" <| \t b l -> calculateScore t b l >= 0 + ] diff --git a/Biz/Dragons/get-examples.sh b/Biz/Dragons/get-examples.sh new file mode 100755 index 0000000..a35a282 --- /dev/null +++ b/Biz/Dragons/get-examples.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +if [ "$#" == "0" ] +then + echo "usage: $(basename $0) <cookie>" + echo "copy the cookie from the browser dev console" + exit 1 +fi +cookie="$1" +curl 'https://dragons.dev/analysis?user=github&repo=training-kit' \ + -X POST \ + -H 'Content-Type: application/x-www-form-urlencoded' \ + -H "Cookie: JWT-Cookie=$cookie" \ + --compressed --insecure diff --git a/Biz/Dragons/main.py b/Biz/Dragons/main.py new file mode 100755 index 0000000..bb10441 --- /dev/null +++ b/Biz/Dragons/main.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python +""" +Analyze developer allocation across a codebase. +""" + +import argparse +import datetime +import logging +import os +import re +import subprocess +import sys + + +def find_user(line): + """Given 'Ben Sima <ben@bsima.me>', finds `Ben Sima'. Returns the first + matching string.""" + return re.findall(r"^[^<]*", line)[0].strip() + + +def authors_for(path, active_users): + """Return a dictionary of {author: commits} for given path. Usernames not in + the 'active_users' list will be filtered out.""" + raw = subprocess.check_output( + ["git", "shortlog", "--numbered", "--summary", "--email", "--", path] + ).decode("utf-8") + lines = [s for s in raw.split("\n") if s] + data = {} + for line in lines: + parts = line.strip().split("\t") + author = find_user(parts[1]) + commits = parts[0] + if author in active_users: + data[author] = commits + return data + + +def mailmap_users(): + """Returns users from the .mailmap file.""" + users = [] + with open(".mailmap") as file: + lines = file.readlines() + for line in lines: + users.append(find_user(line)) + return users + + +MAX_SCORE = 10 + + +def score(blackhole, liability, good, total): + "Calculate the score." + weights = { + "blackhole": 0.5, + "liability": 0.7, + } + return ( + MAX_SCORE + * ( + (blackhole * weights["blackhole"]) + + (liability * weights["liability"]) + + good + ) + / total + ) + + +def get_args(): + "Parse CLI arguments." + cli = argparse.ArgumentParser(description=__doc__) + cli.add_argument("repo", default=".", help="the git repo to run on", metavar="REPO") + cli.add_argument( + "-b", + "--blackholes", + action="store_true", + help="print the blackholes (files with one or zero active contributors)", + ) + cli.add_argument( + "-l", + "--liabilities", + action="store_true", + help="print the liabilities (files with < 3 active contributors)", + ) + cli.add_argument( + "-s", + "--stale", + action="store_true", + help="print stale files (haven't been touched in 6 months)", + ) + cli.add_argument( + "-i", "--ignored", nargs="+", default=[], help="patterns to ignore in paths", + ) + cli.add_argument( + "--active-users", + nargs="+", + default=[], + help="list of active user emails. if not provided, this is loaded from .mailmap", + ) + cli.add_argument( + "-v", + "--verbosity", + help="set the log level verbosity", + choices=["debug", "warning", "error"], + default="error", + ) + return cli.parse_args() + + +def guard_git(repo): + "Guard against non-git repos." + is_git = subprocess.run( + ["git", "rev-parse"], + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + check=False, + ).returncode + if is_git != 0: + sys.exit(f"error: not a git repository: {repo}") + + +def staleness(path, now): + "How long has it been since this file was touched?" + timestamp = datetime.datetime.strptime( + subprocess.check_output(["git", "log", "-n1", "--pretty=%aI", path]) + .decode("utf-8") + .strip(), + "%Y-%m-%dT%H:%M:%S%z", + ) + delta = now - timestamp + return delta.days + + +class Repo: + "Represents a repo and stats for the repo." + + def __init__(self, ignored_paths, active_users): + self.paths = [ + p + for p in subprocess.check_output(["git", "ls-files", "--no-deleted"]) + .decode("utf-8") + .split() + if not any(i in p for i in ignored_paths) + ] + logging.debug("collecting stats") + self.stats = {} + for path in self.paths: + self.stats[path] = authors_for(path, active_users) + self.blackholes = [path for path, authors in self.stats.items() if not authors] + self.liabilities = { + path: list(authors) + for path, authors in self.stats.items() + if 1 <= len(authors) < 3 + } + now = datetime.datetime.utcnow().astimezone() + self.stale = {} + for path, _ in self.stats.items(): + _staleness = staleness(path, now) + if _staleness > 180: + self.stale[path] = _staleness + + def print_blackholes(self, full): + "Print number of blackholes, or list of all blackholes." + # note: file renames may result in false positives + n_blackhole = len(self.blackholes) + print(f"Blackholes: {n_blackhole}") + if full: + for path in self.blackholes: + print(f" {path}") + + def print_liabilities(self, full): + "Print number of liabilities, or list of all liabilities." + n_liabilities = len(self.liabilities) + print(f"Liabilities: {n_liabilities}") + if full: + for path, authors in self.liabilities.items(): + print(f" {path} ({', '.join(authors)})") + + def print_score(self): + "Print the overall score." + n_total = len(self.stats.keys()) + n_blackhole = len(self.blackholes) + n_liabilities = len(self.liabilities) + n_good = n_total - n_blackhole - n_liabilities + print("Total:", n_total) + print( + "Score: {:.2f}/{}".format( + score(n_blackhole, n_liabilities, n_good, n_total), MAX_SCORE + ) + ) + + def print_stale(self, full): + "Print stale files" + n_stale = len(self.stale) + print(f"Stale files: {n_stale}") + if full: + for path, days in self.stale.items(): + print(f" {path} ({days} days)") + + +if __name__ == "__main__": + ARGS = get_args() + logging.basicConfig(stream=sys.stderr, level=ARGS.verbosity.upper()) + + logging.debug("starting") + os.chdir(os.path.abspath(ARGS.repo)) + + guard_git(ARGS.repo) + + # if no active users provided, load from .mailmap + if ARGS.active_users == []: + if os.path.exists(".mailmap"): + ARGS.active_users = mailmap_users() + + # collect data + REPO = Repo(ARGS.ignored, ARGS.active_users) + + # print data + REPO.print_score() + REPO.print_blackholes(ARGS.blackholes) + REPO.print_liabilities(ARGS.liabilities) + REPO.print_stale(ARGS.stale) diff --git a/Biz/Dragons/pitch.md b/Biz/Dragons/pitch.md new file mode 100644 index 0000000..a4d4ffa --- /dev/null +++ b/Biz/Dragons/pitch.md @@ -0,0 +1,40 @@ +# Dragons + +Dragons analyzes your codebase trends, finds patterns in how your developers +work, and protects against tech debt. + +Just hook it up to your CI system - it will warn you when it finds a problem. + +## Identify blackholes in your codebase + +What if none of your active employees have touched some part of the codebase? +This happens too often with legacy code, and then it turns into a huge source of +tech debt. Dragons finds these "blackholes" and warns you about them so you +can be proactive in eliminating tech debt. + +## Protect against lost knowledge + +Not everyone can know every part of a codebase. By finding pieces of code +that only 1 or 2 people have touched, dragons identifes siloed knowledge. This +allows you to protect against the risk of this knowledge leaving the company if +an employee leaves. + +## Don't just measure "code coverage" - also know your "dev coverage" + +No matter how smart your employees are, if you are under- or over-utilizing your +developers then you will never get optimal performance from your team. + +- Find developer "hot spots" in your code: which pieces of code get continually + rewritten, taking up valuable dev time? +- Know how your devs work best: which ones have depth of knowledge, and which + ones have breadth? + +(Paid only) + +## See how your teams *actually* organize themselves with cluster analysis + +Does your team feel splintered or not cohesive? Which developers work best +together? Dragons analyzes the collaboration patterns between devs and helps +you form optimal pairings and teams based on shared code and mindspace. + +(Paid only) |