summaryrefslogtreecommitdiff
path: root/Biz/Devalloc
diff options
context:
space:
mode:
authorBen Sima <ben@bsima.me>2021-07-07 18:57:04 -0400
committerBen Sima <ben@bsima.me>2021-11-26 13:47:36 -0500
commit1934482e8a5beb628b1e6e9db11f9fd274f174a5 (patch)
tree4047135e1adc49986197b59ccb02d374d48f801d /Biz/Devalloc
parent75a804d52461aade2e4431a2148eabcd99a9e26e (diff)
Factor Analysis into own module
Diffstat (limited to 'Biz/Devalloc')
-rw-r--r--Biz/Devalloc/Analysis.hs252
1 files changed, 252 insertions, 0 deletions
diff --git a/Biz/Devalloc/Analysis.hs b/Biz/Devalloc/Analysis.hs
new file mode 100644
index 0000000..4b1f297
--- /dev/null
+++ b/Biz/Devalloc/Analysis.hs
@@ -0,0 +1,252 @@
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE QuasiQuotes #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+
+-- : out devalloc-analyze
+module Biz.Devalloc.Analysis
+ ( Analysis (..),
+ Commit (..),
+ run,
+ main,
+ test,
+ git,
+ )
+where
+
+import Alpha
+import qualified Biz.Cli as Cli
+import Biz.Test ((@=?))
+import qualified Biz.Test as Test
+import qualified Control.Concurrent.Async as Async
+import qualified Data.Aeson as Aeson
+import Data.Data (Data)
+import qualified Data.List as List
+import qualified Data.Map as Map
+import qualified Data.String as String
+import qualified Data.Text as Text
+import qualified Data.Time.Clock as Time
+import qualified Data.Time.Format as Time
+import qualified System.Directory as Directory
+import qualified System.Process as Process
+
+main :: IO ()
+main = Cli.main <| Cli.Plan help move test tidy
+
+move :: Cli.Arguments -> IO ()
+move args = gitDir +> run authors /> Aeson.encode +> putStrLn
+ where
+ gitDir =
+ Cli.argument "git-dir"
+ |> Cli.getArgWithDefault args ".git"
+ |> Directory.makeAbsolute
+ authors =
+ -- i think this is not working? do i need optparse-applicative?
+ Cli.shortOption 'a'
+ |> Cli.getAllArgs args
+ |> map Text.pack
+
+tidy :: cfg -> IO ()
+tidy _ = pure ()
+
+test :: Test.Tree
+test = Test.group "Biz.Devalloc.Analysis" [test_calculateScore]
+
+help :: Cli.Docopt
+help =
+ [Cli.docopt|
+devalloc-analyze
+
+Usage:
+ devalloc-analyze test
+ devalloc-analyze [--author=<email>]... <git-dir>
+
+Options:
+ -a, --author List of active authors' emails.
+|]
+
+newtype Commit = Sha Text
+ deriving (Eq, Data, Typeable, Ord, Generic, Show)
+
+instance Aeson.ToJSON Commit
+
+-- | The result of analyzing a git repo.
+data Analysis = Analysis
+ { -- | Where the repo is stored on the local disk.
+ gitDir :: FilePath,
+ -- | A path with no active contributors
+ blackholes :: [Text],
+ -- | A path with < 3 active contributors
+ liabilities :: [Text],
+ -- | Map of path to number of commits, for detecting paths that continually
+ -- get rewritten.
+ hotspotMap :: Map FilePath Integer,
+ -- | Files that have not been touched in 6 months
+ stale :: Map FilePath Integer,
+ -- | Total score for the repo
+ score :: Integer,
+ -- | Total number of files
+ totalFiles :: Integer,
+ -- | The total number of commits
+ totalCommits :: Integer,
+ -- | List of all the active users we care about
+ activeAuthors :: [Text],
+ -- | Which commit this analysis was run against.
+ commit :: Commit
+ }
+ deriving (Eq, Ord, Generic, Show, Data, Typeable)
+
+instance Aeson.ToJSON Analysis
+
+run :: [Text] -> FilePath -> IO Analysis
+run activeAuthors bareRepo = do
+ commit <- git bareRepo ["rev-parse", "HEAD"] /> Text.pack /> chomp /> Sha
+ tree <-
+ git
+ bareRepo
+ [ "ls-tree",
+ "--full-tree",
+ "--name-only",
+ "-r", -- recurse into subtrees
+ "HEAD"
+ ]
+ /> String.lines
+ authors <- traverse (authorsFor bareRepo) tree :: IO [[(Text, Text, Text)]]
+ let authorMap = zip tree authors :: [(FilePath, [(Text, Text, Text)])]
+ stalenessMap <- traverse (lastTouched bareRepo) tree
+ let blackholes =
+ [ Text.pack path
+ | (path, authors_) <- authorMap,
+ null (map third authors_ `List.intersect` activeAuthors)
+ ]
+ let liabilities =
+ [ Text.pack path
+ | (path, authors_) <- authorMap,
+ length (map third authors_ `List.intersect` activeAuthors) < 3
+ ]
+ let numBlackholes = realToFrac <| length blackholes
+ let numLiabilities = realToFrac <| length liabilities
+ let numTotal = realToFrac <| length tree
+ hotspotMap <-
+ Map.fromList </ Async.mapConcurrently getChangeCount tree
+ totalCommits <-
+ git bareRepo ["rev-list", "--count", "HEAD"]
+ /> filter (/= '\n')
+ /> readMaybe
+ /> fromMaybe 0
+ pure
+ <| Analysis
+ { gitDir = bareRepo,
+ stale =
+ Map.fromList
+ <| [ (path, days)
+ | (path, Just days) <- stalenessMap,
+ days > 180
+ ],
+ score = calculateScore numTotal numBlackholes numLiabilities,
+ totalFiles = toInteger <| length tree,
+ ..
+ }
+ where
+ third :: (a, b, c) -> c
+ third (_, _, a) = a
+ getChangeCount :: FilePath -> IO (FilePath, Integer)
+ getChangeCount path =
+ git bareRepo ["rev-list", "--count", "HEAD", "--", path]
+ /> filter (/= '\n')
+ /> readMaybe
+ /> fromMaybe 0
+ /> (path,)
+
+-- | Given a git dir and a path inside the git repo, get information about the
+-- authors.
+authorsFor ::
+ FilePath ->
+ FilePath ->
+ -- | returns (number of commits, author name, author email)
+ IO [(Text, Text, Text)]
+authorsFor gitDir path =
+ Process.readProcess
+ "git"
+ [ "--git-dir",
+ gitDir,
+ "shortlog",
+ "--numbered",
+ "--summary",
+ "--email",
+ "HEAD",
+ "--",
+ path
+ ]
+ ""
+ /> Text.pack
+ /> Text.lines
+ /> map (Text.break (== '\t'))
+ /> map parseAuthor
+ where
+ parseAuthor (commits, author) =
+ ( Text.strip commits,
+ Text.strip <| Text.takeWhile (/= '<') author,
+ Text.strip <| Text.dropAround (`elem` ['<', '>']) <| Text.dropWhile (/= '<') author
+ )
+
+-- | Run a git command on a repo
+git ::
+ -- | path to the git dir (bare repo)
+ String ->
+ -- | args to `git`
+ [String] ->
+ IO String
+git bareRepo args = Process.readProcess "git" (["--git-dir", bareRepo] ++ args) ""
+
+lastTouched :: FilePath -> FilePath -> IO (FilePath, Maybe Integer)
+lastTouched bareRepo path = do
+ now <- Time.getCurrentTime
+ timestamp <-
+ Process.readProcess
+ "git"
+ [ "--git-dir",
+ bareRepo,
+ "log",
+ "-n1",
+ "--pretty=%aI",
+ "--",
+ path
+ ]
+ ""
+ /> filter (/= '\n')
+ /> Time.parseTimeM True Time.defaultTimeLocale "%Y-%m-%dT%H:%M:%S%z"
+ pure (path, calculateAge now </ timestamp)
+ where
+ calculateAge now n = round <| Time.diffUTCTime now n / Time.nominalDay
+
+-- | Does the aggregate score calculation given number of files found to be
+-- blackholes, liabilities, etc.
+calculateScore :: Double -> Double -> Double -> Integer
+calculateScore 0 _ _ = 0
+calculateScore a 0 0 | a > 0 = 100
+calculateScore a b c | a < 0 || b < 0 || c < 0 = 0
+calculateScore numTotal numBlackholes numLiabilities =
+ max 0 <. round
+ <| maxScore
+ * (weightedBlackholes + weightedLiabilities + numGood)
+ / numTotal
+ where
+ weightedBlackholes = numBlackholes * (5 / 10)
+ weightedLiabilities = numLiabilities * (7 / 10)
+ numGood = numTotal - numBlackholes - numLiabilities
+ maxScore = 100.0
+
+test_calculateScore :: Test.Tree
+test_calculateScore =
+ Test.group
+ "calculateScore"
+ [ Test.unit "perfect score" <| 100 @=? calculateScore 100 0 0,
+ Test.unit "all blackholes" <| 50 @=? calculateScore 100 100 0,
+ Test.unit "all liabilities" <| 70 @=? calculateScore 100 0 100,
+ Test.prop "never > 100" <| \t b l -> calculateScore t b l <= 100,
+ Test.prop "never < 0" <| \t b l -> calculateScore t b l >= 0
+ ]