diff options
Diffstat (limited to 'Biz/Devalloc')
-rw-r--r-- | Biz/Devalloc/Analysis.hs | 252 | ||||
-rwxr-xr-x | Biz/Devalloc/get-examples.sh | 13 | ||||
-rwxr-xr-x | Biz/Devalloc/main.py | 221 | ||||
-rw-r--r-- | Biz/Devalloc/pitch.md | 40 |
4 files changed, 0 insertions, 526 deletions
diff --git a/Biz/Devalloc/Analysis.hs b/Biz/Devalloc/Analysis.hs deleted file mode 100644 index 4b1f297..0000000 --- a/Biz/Devalloc/Analysis.hs +++ /dev/null @@ -1,252 +0,0 @@ -{-# LANGUAGE DeriveDataTypeable #-} -{-# LANGUAGE DeriveGeneric #-} -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE QuasiQuotes #-} -{-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE NoImplicitPrelude #-} - --- : out devalloc-analyze -module Biz.Devalloc.Analysis - ( Analysis (..), - Commit (..), - run, - main, - test, - git, - ) -where - -import Alpha -import qualified Biz.Cli as Cli -import Biz.Test ((@=?)) -import qualified Biz.Test as Test -import qualified Control.Concurrent.Async as Async -import qualified Data.Aeson as Aeson -import Data.Data (Data) -import qualified Data.List as List -import qualified Data.Map as Map -import qualified Data.String as String -import qualified Data.Text as Text -import qualified Data.Time.Clock as Time -import qualified Data.Time.Format as Time -import qualified System.Directory as Directory -import qualified System.Process as Process - -main :: IO () -main = Cli.main <| Cli.Plan help move test tidy - -move :: Cli.Arguments -> IO () -move args = gitDir +> run authors /> Aeson.encode +> putStrLn - where - gitDir = - Cli.argument "git-dir" - |> Cli.getArgWithDefault args ".git" - |> Directory.makeAbsolute - authors = - -- i think this is not working? do i need optparse-applicative? - Cli.shortOption 'a' - |> Cli.getAllArgs args - |> map Text.pack - -tidy :: cfg -> IO () -tidy _ = pure () - -test :: Test.Tree -test = Test.group "Biz.Devalloc.Analysis" [test_calculateScore] - -help :: Cli.Docopt -help = - [Cli.docopt| -devalloc-analyze - -Usage: - devalloc-analyze test - devalloc-analyze [--author=<email>]... <git-dir> - -Options: - -a, --author List of active authors' emails. -|] - -newtype Commit = Sha Text - deriving (Eq, Data, Typeable, Ord, Generic, Show) - -instance Aeson.ToJSON Commit - --- | The result of analyzing a git repo. -data Analysis = Analysis - { -- | Where the repo is stored on the local disk. - gitDir :: FilePath, - -- | A path with no active contributors - blackholes :: [Text], - -- | A path with < 3 active contributors - liabilities :: [Text], - -- | Map of path to number of commits, for detecting paths that continually - -- get rewritten. - hotspotMap :: Map FilePath Integer, - -- | Files that have not been touched in 6 months - stale :: Map FilePath Integer, - -- | Total score for the repo - score :: Integer, - -- | Total number of files - totalFiles :: Integer, - -- | The total number of commits - totalCommits :: Integer, - -- | List of all the active users we care about - activeAuthors :: [Text], - -- | Which commit this analysis was run against. - commit :: Commit - } - deriving (Eq, Ord, Generic, Show, Data, Typeable) - -instance Aeson.ToJSON Analysis - -run :: [Text] -> FilePath -> IO Analysis -run activeAuthors bareRepo = do - commit <- git bareRepo ["rev-parse", "HEAD"] /> Text.pack /> chomp /> Sha - tree <- - git - bareRepo - [ "ls-tree", - "--full-tree", - "--name-only", - "-r", -- recurse into subtrees - "HEAD" - ] - /> String.lines - authors <- traverse (authorsFor bareRepo) tree :: IO [[(Text, Text, Text)]] - let authorMap = zip tree authors :: [(FilePath, [(Text, Text, Text)])] - stalenessMap <- traverse (lastTouched bareRepo) tree - let blackholes = - [ Text.pack path - | (path, authors_) <- authorMap, - null (map third authors_ `List.intersect` activeAuthors) - ] - let liabilities = - [ Text.pack path - | (path, authors_) <- authorMap, - length (map third authors_ `List.intersect` activeAuthors) < 3 - ] - let numBlackholes = realToFrac <| length blackholes - let numLiabilities = realToFrac <| length liabilities - let numTotal = realToFrac <| length tree - hotspotMap <- - Map.fromList </ Async.mapConcurrently getChangeCount tree - totalCommits <- - git bareRepo ["rev-list", "--count", "HEAD"] - /> filter (/= '\n') - /> readMaybe - /> fromMaybe 0 - pure - <| Analysis - { gitDir = bareRepo, - stale = - Map.fromList - <| [ (path, days) - | (path, Just days) <- stalenessMap, - days > 180 - ], - score = calculateScore numTotal numBlackholes numLiabilities, - totalFiles = toInteger <| length tree, - .. - } - where - third :: (a, b, c) -> c - third (_, _, a) = a - getChangeCount :: FilePath -> IO (FilePath, Integer) - getChangeCount path = - git bareRepo ["rev-list", "--count", "HEAD", "--", path] - /> filter (/= '\n') - /> readMaybe - /> fromMaybe 0 - /> (path,) - --- | Given a git dir and a path inside the git repo, get information about the --- authors. -authorsFor :: - FilePath -> - FilePath -> - -- | returns (number of commits, author name, author email) - IO [(Text, Text, Text)] -authorsFor gitDir path = - Process.readProcess - "git" - [ "--git-dir", - gitDir, - "shortlog", - "--numbered", - "--summary", - "--email", - "HEAD", - "--", - path - ] - "" - /> Text.pack - /> Text.lines - /> map (Text.break (== '\t')) - /> map parseAuthor - where - parseAuthor (commits, author) = - ( Text.strip commits, - Text.strip <| Text.takeWhile (/= '<') author, - Text.strip <| Text.dropAround (`elem` ['<', '>']) <| Text.dropWhile (/= '<') author - ) - --- | Run a git command on a repo -git :: - -- | path to the git dir (bare repo) - String -> - -- | args to `git` - [String] -> - IO String -git bareRepo args = Process.readProcess "git" (["--git-dir", bareRepo] ++ args) "" - -lastTouched :: FilePath -> FilePath -> IO (FilePath, Maybe Integer) -lastTouched bareRepo path = do - now <- Time.getCurrentTime - timestamp <- - Process.readProcess - "git" - [ "--git-dir", - bareRepo, - "log", - "-n1", - "--pretty=%aI", - "--", - path - ] - "" - /> filter (/= '\n') - /> Time.parseTimeM True Time.defaultTimeLocale "%Y-%m-%dT%H:%M:%S%z" - pure (path, calculateAge now </ timestamp) - where - calculateAge now n = round <| Time.diffUTCTime now n / Time.nominalDay - --- | Does the aggregate score calculation given number of files found to be --- blackholes, liabilities, etc. -calculateScore :: Double -> Double -> Double -> Integer -calculateScore 0 _ _ = 0 -calculateScore a 0 0 | a > 0 = 100 -calculateScore a b c | a < 0 || b < 0 || c < 0 = 0 -calculateScore numTotal numBlackholes numLiabilities = - max 0 <. round - <| maxScore - * (weightedBlackholes + weightedLiabilities + numGood) - / numTotal - where - weightedBlackholes = numBlackholes * (5 / 10) - weightedLiabilities = numLiabilities * (7 / 10) - numGood = numTotal - numBlackholes - numLiabilities - maxScore = 100.0 - -test_calculateScore :: Test.Tree -test_calculateScore = - Test.group - "calculateScore" - [ Test.unit "perfect score" <| 100 @=? calculateScore 100 0 0, - Test.unit "all blackholes" <| 50 @=? calculateScore 100 100 0, - Test.unit "all liabilities" <| 70 @=? calculateScore 100 0 100, - Test.prop "never > 100" <| \t b l -> calculateScore t b l <= 100, - Test.prop "never < 0" <| \t b l -> calculateScore t b l >= 0 - ] diff --git a/Biz/Devalloc/get-examples.sh b/Biz/Devalloc/get-examples.sh deleted file mode 100755 index 2e0647b..0000000 --- a/Biz/Devalloc/get-examples.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env bash -if [ "$#" == "0" ] -then - echo "usage: $(basename $0) <cookie>" - echo "copy the cookie from the browser dev console" - exit 1 -fi -cookie="$1" -curl 'https://devalloc.io/analysis?user=github&repo=training-kit' \ - -X POST \ - -H 'Content-Type: application/x-www-form-urlencoded' \ - -H "Cookie: JWT-Cookie=$cookie" \ - --compressed --insecure diff --git a/Biz/Devalloc/main.py b/Biz/Devalloc/main.py deleted file mode 100755 index bb10441..0000000 --- a/Biz/Devalloc/main.py +++ /dev/null @@ -1,221 +0,0 @@ -#!/usr/bin/env python -""" -Analyze developer allocation across a codebase. -""" - -import argparse -import datetime -import logging -import os -import re -import subprocess -import sys - - -def find_user(line): - """Given 'Ben Sima <ben@bsima.me>', finds `Ben Sima'. Returns the first - matching string.""" - return re.findall(r"^[^<]*", line)[0].strip() - - -def authors_for(path, active_users): - """Return a dictionary of {author: commits} for given path. Usernames not in - the 'active_users' list will be filtered out.""" - raw = subprocess.check_output( - ["git", "shortlog", "--numbered", "--summary", "--email", "--", path] - ).decode("utf-8") - lines = [s for s in raw.split("\n") if s] - data = {} - for line in lines: - parts = line.strip().split("\t") - author = find_user(parts[1]) - commits = parts[0] - if author in active_users: - data[author] = commits - return data - - -def mailmap_users(): - """Returns users from the .mailmap file.""" - users = [] - with open(".mailmap") as file: - lines = file.readlines() - for line in lines: - users.append(find_user(line)) - return users - - -MAX_SCORE = 10 - - -def score(blackhole, liability, good, total): - "Calculate the score." - weights = { - "blackhole": 0.5, - "liability": 0.7, - } - return ( - MAX_SCORE - * ( - (blackhole * weights["blackhole"]) - + (liability * weights["liability"]) - + good - ) - / total - ) - - -def get_args(): - "Parse CLI arguments." - cli = argparse.ArgumentParser(description=__doc__) - cli.add_argument("repo", default=".", help="the git repo to run on", metavar="REPO") - cli.add_argument( - "-b", - "--blackholes", - action="store_true", - help="print the blackholes (files with one or zero active contributors)", - ) - cli.add_argument( - "-l", - "--liabilities", - action="store_true", - help="print the liabilities (files with < 3 active contributors)", - ) - cli.add_argument( - "-s", - "--stale", - action="store_true", - help="print stale files (haven't been touched in 6 months)", - ) - cli.add_argument( - "-i", "--ignored", nargs="+", default=[], help="patterns to ignore in paths", - ) - cli.add_argument( - "--active-users", - nargs="+", - default=[], - help="list of active user emails. if not provided, this is loaded from .mailmap", - ) - cli.add_argument( - "-v", - "--verbosity", - help="set the log level verbosity", - choices=["debug", "warning", "error"], - default="error", - ) - return cli.parse_args() - - -def guard_git(repo): - "Guard against non-git repos." - is_git = subprocess.run( - ["git", "rev-parse"], - stderr=subprocess.PIPE, - stdout=subprocess.PIPE, - check=False, - ).returncode - if is_git != 0: - sys.exit(f"error: not a git repository: {repo}") - - -def staleness(path, now): - "How long has it been since this file was touched?" - timestamp = datetime.datetime.strptime( - subprocess.check_output(["git", "log", "-n1", "--pretty=%aI", path]) - .decode("utf-8") - .strip(), - "%Y-%m-%dT%H:%M:%S%z", - ) - delta = now - timestamp - return delta.days - - -class Repo: - "Represents a repo and stats for the repo." - - def __init__(self, ignored_paths, active_users): - self.paths = [ - p - for p in subprocess.check_output(["git", "ls-files", "--no-deleted"]) - .decode("utf-8") - .split() - if not any(i in p for i in ignored_paths) - ] - logging.debug("collecting stats") - self.stats = {} - for path in self.paths: - self.stats[path] = authors_for(path, active_users) - self.blackholes = [path for path, authors in self.stats.items() if not authors] - self.liabilities = { - path: list(authors) - for path, authors in self.stats.items() - if 1 <= len(authors) < 3 - } - now = datetime.datetime.utcnow().astimezone() - self.stale = {} - for path, _ in self.stats.items(): - _staleness = staleness(path, now) - if _staleness > 180: - self.stale[path] = _staleness - - def print_blackholes(self, full): - "Print number of blackholes, or list of all blackholes." - # note: file renames may result in false positives - n_blackhole = len(self.blackholes) - print(f"Blackholes: {n_blackhole}") - if full: - for path in self.blackholes: - print(f" {path}") - - def print_liabilities(self, full): - "Print number of liabilities, or list of all liabilities." - n_liabilities = len(self.liabilities) - print(f"Liabilities: {n_liabilities}") - if full: - for path, authors in self.liabilities.items(): - print(f" {path} ({', '.join(authors)})") - - def print_score(self): - "Print the overall score." - n_total = len(self.stats.keys()) - n_blackhole = len(self.blackholes) - n_liabilities = len(self.liabilities) - n_good = n_total - n_blackhole - n_liabilities - print("Total:", n_total) - print( - "Score: {:.2f}/{}".format( - score(n_blackhole, n_liabilities, n_good, n_total), MAX_SCORE - ) - ) - - def print_stale(self, full): - "Print stale files" - n_stale = len(self.stale) - print(f"Stale files: {n_stale}") - if full: - for path, days in self.stale.items(): - print(f" {path} ({days} days)") - - -if __name__ == "__main__": - ARGS = get_args() - logging.basicConfig(stream=sys.stderr, level=ARGS.verbosity.upper()) - - logging.debug("starting") - os.chdir(os.path.abspath(ARGS.repo)) - - guard_git(ARGS.repo) - - # if no active users provided, load from .mailmap - if ARGS.active_users == []: - if os.path.exists(".mailmap"): - ARGS.active_users = mailmap_users() - - # collect data - REPO = Repo(ARGS.ignored, ARGS.active_users) - - # print data - REPO.print_score() - REPO.print_blackholes(ARGS.blackholes) - REPO.print_liabilities(ARGS.liabilities) - REPO.print_stale(ARGS.stale) diff --git a/Biz/Devalloc/pitch.md b/Biz/Devalloc/pitch.md deleted file mode 100644 index cfc0b23..0000000 --- a/Biz/Devalloc/pitch.md +++ /dev/null @@ -1,40 +0,0 @@ -# Devalloc - -Devalloc analyzes your codebase trends, finds patterns in how your developers -work, and protects against tech debt. - -Just hook it up to your CI system - it will warn you when it finds a problem. - -## Identify blackholes in your codebase - -What if none of your active employees have touched some part of the codebase? -This happens too often with legacy code, and then it turns into a huge source of -tech debt. Devalloc finds these "blackholes" and warns you about them so you -can be proactive in eliminating tech debt. - -## Protect against lost knowledge - -Not everyone can know every part of a codebase. By finding pieces of code -that only 1 or 2 people have touched, devalloc identifes siloed knowledge. This -allows you to protect against the risk of this knowledge leaving the company if -an employee leaves. - -## Don't just measure "code coverage" - also know your "dev coverage" - -No matter how smart your employees are, if you are under- or over-utilizing your -developers then you will never get optimal performance from your team. - -- Find developer "hot spots" in your code: which pieces of code get continually - rewritten, taking up valuable dev time? -- Know how your devs work best: which ones have depth of knowledge, and which - ones have breadth? - -(Paid only) - -## See how your teams *actually* organize themselves with cluster analysis - -Does your team feel splintered or not cohesive? Which developers work best -together? Devalloc analyzes the collaboration patterns between devs and helps -you form optimal pairings and teams based on shared code and mindspace. - -(Paid only) |