summaryrefslogtreecommitdiff
path: root/Biz/Devalloc
diff options
context:
space:
mode:
authorBen Sima <ben@bsima.me>2021-08-18 13:25:31 -0400
committerBen Sima <ben@bsima.me>2021-11-26 13:47:37 -0500
commit1176a24a1f76f551ec32eda731e8d5cdf93ad085 (patch)
tree69d6ead8e57e54f2886808769a134d57b2e0893d /Biz/Devalloc
parent2462d2c1377b645a99cba38875628b18d7da5ac8 (diff)
Rename Devalloc to Dragons
Diffstat (limited to 'Biz/Devalloc')
-rw-r--r--Biz/Devalloc/Analysis.hs252
-rwxr-xr-xBiz/Devalloc/get-examples.sh13
-rwxr-xr-xBiz/Devalloc/main.py221
-rw-r--r--Biz/Devalloc/pitch.md40
4 files changed, 0 insertions, 526 deletions
diff --git a/Biz/Devalloc/Analysis.hs b/Biz/Devalloc/Analysis.hs
deleted file mode 100644
index 4b1f297..0000000
--- a/Biz/Devalloc/Analysis.hs
+++ /dev/null
@@ -1,252 +0,0 @@
-{-# LANGUAGE DeriveDataTypeable #-}
-{-# LANGUAGE DeriveGeneric #-}
-{-# LANGUAGE LambdaCase #-}
-{-# LANGUAGE QuasiQuotes #-}
-{-# LANGUAGE RecordWildCards #-}
-{-# LANGUAGE TupleSections #-}
-{-# LANGUAGE NoImplicitPrelude #-}
-
--- : out devalloc-analyze
-module Biz.Devalloc.Analysis
- ( Analysis (..),
- Commit (..),
- run,
- main,
- test,
- git,
- )
-where
-
-import Alpha
-import qualified Biz.Cli as Cli
-import Biz.Test ((@=?))
-import qualified Biz.Test as Test
-import qualified Control.Concurrent.Async as Async
-import qualified Data.Aeson as Aeson
-import Data.Data (Data)
-import qualified Data.List as List
-import qualified Data.Map as Map
-import qualified Data.String as String
-import qualified Data.Text as Text
-import qualified Data.Time.Clock as Time
-import qualified Data.Time.Format as Time
-import qualified System.Directory as Directory
-import qualified System.Process as Process
-
-main :: IO ()
-main = Cli.main <| Cli.Plan help move test tidy
-
-move :: Cli.Arguments -> IO ()
-move args = gitDir +> run authors /> Aeson.encode +> putStrLn
- where
- gitDir =
- Cli.argument "git-dir"
- |> Cli.getArgWithDefault args ".git"
- |> Directory.makeAbsolute
- authors =
- -- i think this is not working? do i need optparse-applicative?
- Cli.shortOption 'a'
- |> Cli.getAllArgs args
- |> map Text.pack
-
-tidy :: cfg -> IO ()
-tidy _ = pure ()
-
-test :: Test.Tree
-test = Test.group "Biz.Devalloc.Analysis" [test_calculateScore]
-
-help :: Cli.Docopt
-help =
- [Cli.docopt|
-devalloc-analyze
-
-Usage:
- devalloc-analyze test
- devalloc-analyze [--author=<email>]... <git-dir>
-
-Options:
- -a, --author List of active authors' emails.
-|]
-
-newtype Commit = Sha Text
- deriving (Eq, Data, Typeable, Ord, Generic, Show)
-
-instance Aeson.ToJSON Commit
-
--- | The result of analyzing a git repo.
-data Analysis = Analysis
- { -- | Where the repo is stored on the local disk.
- gitDir :: FilePath,
- -- | A path with no active contributors
- blackholes :: [Text],
- -- | A path with < 3 active contributors
- liabilities :: [Text],
- -- | Map of path to number of commits, for detecting paths that continually
- -- get rewritten.
- hotspotMap :: Map FilePath Integer,
- -- | Files that have not been touched in 6 months
- stale :: Map FilePath Integer,
- -- | Total score for the repo
- score :: Integer,
- -- | Total number of files
- totalFiles :: Integer,
- -- | The total number of commits
- totalCommits :: Integer,
- -- | List of all the active users we care about
- activeAuthors :: [Text],
- -- | Which commit this analysis was run against.
- commit :: Commit
- }
- deriving (Eq, Ord, Generic, Show, Data, Typeable)
-
-instance Aeson.ToJSON Analysis
-
-run :: [Text] -> FilePath -> IO Analysis
-run activeAuthors bareRepo = do
- commit <- git bareRepo ["rev-parse", "HEAD"] /> Text.pack /> chomp /> Sha
- tree <-
- git
- bareRepo
- [ "ls-tree",
- "--full-tree",
- "--name-only",
- "-r", -- recurse into subtrees
- "HEAD"
- ]
- /> String.lines
- authors <- traverse (authorsFor bareRepo) tree :: IO [[(Text, Text, Text)]]
- let authorMap = zip tree authors :: [(FilePath, [(Text, Text, Text)])]
- stalenessMap <- traverse (lastTouched bareRepo) tree
- let blackholes =
- [ Text.pack path
- | (path, authors_) <- authorMap,
- null (map third authors_ `List.intersect` activeAuthors)
- ]
- let liabilities =
- [ Text.pack path
- | (path, authors_) <- authorMap,
- length (map third authors_ `List.intersect` activeAuthors) < 3
- ]
- let numBlackholes = realToFrac <| length blackholes
- let numLiabilities = realToFrac <| length liabilities
- let numTotal = realToFrac <| length tree
- hotspotMap <-
- Map.fromList </ Async.mapConcurrently getChangeCount tree
- totalCommits <-
- git bareRepo ["rev-list", "--count", "HEAD"]
- /> filter (/= '\n')
- /> readMaybe
- /> fromMaybe 0
- pure
- <| Analysis
- { gitDir = bareRepo,
- stale =
- Map.fromList
- <| [ (path, days)
- | (path, Just days) <- stalenessMap,
- days > 180
- ],
- score = calculateScore numTotal numBlackholes numLiabilities,
- totalFiles = toInteger <| length tree,
- ..
- }
- where
- third :: (a, b, c) -> c
- third (_, _, a) = a
- getChangeCount :: FilePath -> IO (FilePath, Integer)
- getChangeCount path =
- git bareRepo ["rev-list", "--count", "HEAD", "--", path]
- /> filter (/= '\n')
- /> readMaybe
- /> fromMaybe 0
- /> (path,)
-
--- | Given a git dir and a path inside the git repo, get information about the
--- authors.
-authorsFor ::
- FilePath ->
- FilePath ->
- -- | returns (number of commits, author name, author email)
- IO [(Text, Text, Text)]
-authorsFor gitDir path =
- Process.readProcess
- "git"
- [ "--git-dir",
- gitDir,
- "shortlog",
- "--numbered",
- "--summary",
- "--email",
- "HEAD",
- "--",
- path
- ]
- ""
- /> Text.pack
- /> Text.lines
- /> map (Text.break (== '\t'))
- /> map parseAuthor
- where
- parseAuthor (commits, author) =
- ( Text.strip commits,
- Text.strip <| Text.takeWhile (/= '<') author,
- Text.strip <| Text.dropAround (`elem` ['<', '>']) <| Text.dropWhile (/= '<') author
- )
-
--- | Run a git command on a repo
-git ::
- -- | path to the git dir (bare repo)
- String ->
- -- | args to `git`
- [String] ->
- IO String
-git bareRepo args = Process.readProcess "git" (["--git-dir", bareRepo] ++ args) ""
-
-lastTouched :: FilePath -> FilePath -> IO (FilePath, Maybe Integer)
-lastTouched bareRepo path = do
- now <- Time.getCurrentTime
- timestamp <-
- Process.readProcess
- "git"
- [ "--git-dir",
- bareRepo,
- "log",
- "-n1",
- "--pretty=%aI",
- "--",
- path
- ]
- ""
- /> filter (/= '\n')
- /> Time.parseTimeM True Time.defaultTimeLocale "%Y-%m-%dT%H:%M:%S%z"
- pure (path, calculateAge now </ timestamp)
- where
- calculateAge now n = round <| Time.diffUTCTime now n / Time.nominalDay
-
--- | Does the aggregate score calculation given number of files found to be
--- blackholes, liabilities, etc.
-calculateScore :: Double -> Double -> Double -> Integer
-calculateScore 0 _ _ = 0
-calculateScore a 0 0 | a > 0 = 100
-calculateScore a b c | a < 0 || b < 0 || c < 0 = 0
-calculateScore numTotal numBlackholes numLiabilities =
- max 0 <. round
- <| maxScore
- * (weightedBlackholes + weightedLiabilities + numGood)
- / numTotal
- where
- weightedBlackholes = numBlackholes * (5 / 10)
- weightedLiabilities = numLiabilities * (7 / 10)
- numGood = numTotal - numBlackholes - numLiabilities
- maxScore = 100.0
-
-test_calculateScore :: Test.Tree
-test_calculateScore =
- Test.group
- "calculateScore"
- [ Test.unit "perfect score" <| 100 @=? calculateScore 100 0 0,
- Test.unit "all blackholes" <| 50 @=? calculateScore 100 100 0,
- Test.unit "all liabilities" <| 70 @=? calculateScore 100 0 100,
- Test.prop "never > 100" <| \t b l -> calculateScore t b l <= 100,
- Test.prop "never < 0" <| \t b l -> calculateScore t b l >= 0
- ]
diff --git a/Biz/Devalloc/get-examples.sh b/Biz/Devalloc/get-examples.sh
deleted file mode 100755
index 2e0647b..0000000
--- a/Biz/Devalloc/get-examples.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env bash
-if [ "$#" == "0" ]
-then
- echo "usage: $(basename $0) <cookie>"
- echo "copy the cookie from the browser dev console"
- exit 1
-fi
-cookie="$1"
-curl 'https://devalloc.io/analysis?user=github&repo=training-kit' \
- -X POST \
- -H 'Content-Type: application/x-www-form-urlencoded' \
- -H "Cookie: JWT-Cookie=$cookie" \
- --compressed --insecure
diff --git a/Biz/Devalloc/main.py b/Biz/Devalloc/main.py
deleted file mode 100755
index bb10441..0000000
--- a/Biz/Devalloc/main.py
+++ /dev/null
@@ -1,221 +0,0 @@
-#!/usr/bin/env python
-"""
-Analyze developer allocation across a codebase.
-"""
-
-import argparse
-import datetime
-import logging
-import os
-import re
-import subprocess
-import sys
-
-
-def find_user(line):
- """Given 'Ben Sima <ben@bsima.me>', finds `Ben Sima'. Returns the first
- matching string."""
- return re.findall(r"^[^<]*", line)[0].strip()
-
-
-def authors_for(path, active_users):
- """Return a dictionary of {author: commits} for given path. Usernames not in
- the 'active_users' list will be filtered out."""
- raw = subprocess.check_output(
- ["git", "shortlog", "--numbered", "--summary", "--email", "--", path]
- ).decode("utf-8")
- lines = [s for s in raw.split("\n") if s]
- data = {}
- for line in lines:
- parts = line.strip().split("\t")
- author = find_user(parts[1])
- commits = parts[0]
- if author in active_users:
- data[author] = commits
- return data
-
-
-def mailmap_users():
- """Returns users from the .mailmap file."""
- users = []
- with open(".mailmap") as file:
- lines = file.readlines()
- for line in lines:
- users.append(find_user(line))
- return users
-
-
-MAX_SCORE = 10
-
-
-def score(blackhole, liability, good, total):
- "Calculate the score."
- weights = {
- "blackhole": 0.5,
- "liability": 0.7,
- }
- return (
- MAX_SCORE
- * (
- (blackhole * weights["blackhole"])
- + (liability * weights["liability"])
- + good
- )
- / total
- )
-
-
-def get_args():
- "Parse CLI arguments."
- cli = argparse.ArgumentParser(description=__doc__)
- cli.add_argument("repo", default=".", help="the git repo to run on", metavar="REPO")
- cli.add_argument(
- "-b",
- "--blackholes",
- action="store_true",
- help="print the blackholes (files with one or zero active contributors)",
- )
- cli.add_argument(
- "-l",
- "--liabilities",
- action="store_true",
- help="print the liabilities (files with < 3 active contributors)",
- )
- cli.add_argument(
- "-s",
- "--stale",
- action="store_true",
- help="print stale files (haven't been touched in 6 months)",
- )
- cli.add_argument(
- "-i", "--ignored", nargs="+", default=[], help="patterns to ignore in paths",
- )
- cli.add_argument(
- "--active-users",
- nargs="+",
- default=[],
- help="list of active user emails. if not provided, this is loaded from .mailmap",
- )
- cli.add_argument(
- "-v",
- "--verbosity",
- help="set the log level verbosity",
- choices=["debug", "warning", "error"],
- default="error",
- )
- return cli.parse_args()
-
-
-def guard_git(repo):
- "Guard against non-git repos."
- is_git = subprocess.run(
- ["git", "rev-parse"],
- stderr=subprocess.PIPE,
- stdout=subprocess.PIPE,
- check=False,
- ).returncode
- if is_git != 0:
- sys.exit(f"error: not a git repository: {repo}")
-
-
-def staleness(path, now):
- "How long has it been since this file was touched?"
- timestamp = datetime.datetime.strptime(
- subprocess.check_output(["git", "log", "-n1", "--pretty=%aI", path])
- .decode("utf-8")
- .strip(),
- "%Y-%m-%dT%H:%M:%S%z",
- )
- delta = now - timestamp
- return delta.days
-
-
-class Repo:
- "Represents a repo and stats for the repo."
-
- def __init__(self, ignored_paths, active_users):
- self.paths = [
- p
- for p in subprocess.check_output(["git", "ls-files", "--no-deleted"])
- .decode("utf-8")
- .split()
- if not any(i in p for i in ignored_paths)
- ]
- logging.debug("collecting stats")
- self.stats = {}
- for path in self.paths:
- self.stats[path] = authors_for(path, active_users)
- self.blackholes = [path for path, authors in self.stats.items() if not authors]
- self.liabilities = {
- path: list(authors)
- for path, authors in self.stats.items()
- if 1 <= len(authors) < 3
- }
- now = datetime.datetime.utcnow().astimezone()
- self.stale = {}
- for path, _ in self.stats.items():
- _staleness = staleness(path, now)
- if _staleness > 180:
- self.stale[path] = _staleness
-
- def print_blackholes(self, full):
- "Print number of blackholes, or list of all blackholes."
- # note: file renames may result in false positives
- n_blackhole = len(self.blackholes)
- print(f"Blackholes: {n_blackhole}")
- if full:
- for path in self.blackholes:
- print(f" {path}")
-
- def print_liabilities(self, full):
- "Print number of liabilities, or list of all liabilities."
- n_liabilities = len(self.liabilities)
- print(f"Liabilities: {n_liabilities}")
- if full:
- for path, authors in self.liabilities.items():
- print(f" {path} ({', '.join(authors)})")
-
- def print_score(self):
- "Print the overall score."
- n_total = len(self.stats.keys())
- n_blackhole = len(self.blackholes)
- n_liabilities = len(self.liabilities)
- n_good = n_total - n_blackhole - n_liabilities
- print("Total:", n_total)
- print(
- "Score: {:.2f}/{}".format(
- score(n_blackhole, n_liabilities, n_good, n_total), MAX_SCORE
- )
- )
-
- def print_stale(self, full):
- "Print stale files"
- n_stale = len(self.stale)
- print(f"Stale files: {n_stale}")
- if full:
- for path, days in self.stale.items():
- print(f" {path} ({days} days)")
-
-
-if __name__ == "__main__":
- ARGS = get_args()
- logging.basicConfig(stream=sys.stderr, level=ARGS.verbosity.upper())
-
- logging.debug("starting")
- os.chdir(os.path.abspath(ARGS.repo))
-
- guard_git(ARGS.repo)
-
- # if no active users provided, load from .mailmap
- if ARGS.active_users == []:
- if os.path.exists(".mailmap"):
- ARGS.active_users = mailmap_users()
-
- # collect data
- REPO = Repo(ARGS.ignored, ARGS.active_users)
-
- # print data
- REPO.print_score()
- REPO.print_blackholes(ARGS.blackholes)
- REPO.print_liabilities(ARGS.liabilities)
- REPO.print_stale(ARGS.stale)
diff --git a/Biz/Devalloc/pitch.md b/Biz/Devalloc/pitch.md
deleted file mode 100644
index cfc0b23..0000000
--- a/Biz/Devalloc/pitch.md
+++ /dev/null
@@ -1,40 +0,0 @@
-# Devalloc
-
-Devalloc analyzes your codebase trends, finds patterns in how your developers
-work, and protects against tech debt.
-
-Just hook it up to your CI system - it will warn you when it finds a problem.
-
-## Identify blackholes in your codebase
-
-What if none of your active employees have touched some part of the codebase?
-This happens too often with legacy code, and then it turns into a huge source of
-tech debt. Devalloc finds these "blackholes" and warns you about them so you
-can be proactive in eliminating tech debt.
-
-## Protect against lost knowledge
-
-Not everyone can know every part of a codebase. By finding pieces of code
-that only 1 or 2 people have touched, devalloc identifes siloed knowledge. This
-allows you to protect against the risk of this knowledge leaving the company if
-an employee leaves.
-
-## Don't just measure "code coverage" - also know your "dev coverage"
-
-No matter how smart your employees are, if you are under- or over-utilizing your
-developers then you will never get optimal performance from your team.
-
-- Find developer "hot spots" in your code: which pieces of code get continually
- rewritten, taking up valuable dev time?
-- Know how your devs work best: which ones have depth of knowledge, and which
- ones have breadth?
-
-(Paid only)
-
-## See how your teams *actually* organize themselves with cluster analysis
-
-Does your team feel splintered or not cohesive? Which developers work best
-together? Devalloc analyzes the collaboration patterns between devs and helps
-you form optimal pairings and teams based on shared code and mindspace.
-
-(Paid only)