diff options
author | Ben Sima <ben@bsima.me> | 2021-07-07 18:57:04 -0400 |
---|---|---|
committer | Ben Sima <ben@bsima.me> | 2021-11-26 13:47:36 -0500 |
commit | 1934482e8a5beb628b1e6e9db11f9fd274f174a5 (patch) | |
tree | 4047135e1adc49986197b59ccb02d374d48f801d /Biz/Devalloc/Analysis.hs | |
parent | 75a804d52461aade2e4431a2148eabcd99a9e26e (diff) |
Factor Analysis into own module
Diffstat (limited to 'Biz/Devalloc/Analysis.hs')
-rw-r--r-- | Biz/Devalloc/Analysis.hs | 252 |
1 files changed, 252 insertions, 0 deletions
diff --git a/Biz/Devalloc/Analysis.hs b/Biz/Devalloc/Analysis.hs new file mode 100644 index 0000000..4b1f297 --- /dev/null +++ b/Biz/Devalloc/Analysis.hs @@ -0,0 +1,252 @@ +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE NoImplicitPrelude #-} + +-- : out devalloc-analyze +module Biz.Devalloc.Analysis + ( Analysis (..), + Commit (..), + run, + main, + test, + git, + ) +where + +import Alpha +import qualified Biz.Cli as Cli +import Biz.Test ((@=?)) +import qualified Biz.Test as Test +import qualified Control.Concurrent.Async as Async +import qualified Data.Aeson as Aeson +import Data.Data (Data) +import qualified Data.List as List +import qualified Data.Map as Map +import qualified Data.String as String +import qualified Data.Text as Text +import qualified Data.Time.Clock as Time +import qualified Data.Time.Format as Time +import qualified System.Directory as Directory +import qualified System.Process as Process + +main :: IO () +main = Cli.main <| Cli.Plan help move test tidy + +move :: Cli.Arguments -> IO () +move args = gitDir +> run authors /> Aeson.encode +> putStrLn + where + gitDir = + Cli.argument "git-dir" + |> Cli.getArgWithDefault args ".git" + |> Directory.makeAbsolute + authors = + -- i think this is not working? do i need optparse-applicative? + Cli.shortOption 'a' + |> Cli.getAllArgs args + |> map Text.pack + +tidy :: cfg -> IO () +tidy _ = pure () + +test :: Test.Tree +test = Test.group "Biz.Devalloc.Analysis" [test_calculateScore] + +help :: Cli.Docopt +help = + [Cli.docopt| +devalloc-analyze + +Usage: + devalloc-analyze test + devalloc-analyze [--author=<email>]... <git-dir> + +Options: + -a, --author List of active authors' emails. +|] + +newtype Commit = Sha Text + deriving (Eq, Data, Typeable, Ord, Generic, Show) + +instance Aeson.ToJSON Commit + +-- | The result of analyzing a git repo. +data Analysis = Analysis + { -- | Where the repo is stored on the local disk. + gitDir :: FilePath, + -- | A path with no active contributors + blackholes :: [Text], + -- | A path with < 3 active contributors + liabilities :: [Text], + -- | Map of path to number of commits, for detecting paths that continually + -- get rewritten. + hotspotMap :: Map FilePath Integer, + -- | Files that have not been touched in 6 months + stale :: Map FilePath Integer, + -- | Total score for the repo + score :: Integer, + -- | Total number of files + totalFiles :: Integer, + -- | The total number of commits + totalCommits :: Integer, + -- | List of all the active users we care about + activeAuthors :: [Text], + -- | Which commit this analysis was run against. + commit :: Commit + } + deriving (Eq, Ord, Generic, Show, Data, Typeable) + +instance Aeson.ToJSON Analysis + +run :: [Text] -> FilePath -> IO Analysis +run activeAuthors bareRepo = do + commit <- git bareRepo ["rev-parse", "HEAD"] /> Text.pack /> chomp /> Sha + tree <- + git + bareRepo + [ "ls-tree", + "--full-tree", + "--name-only", + "-r", -- recurse into subtrees + "HEAD" + ] + /> String.lines + authors <- traverse (authorsFor bareRepo) tree :: IO [[(Text, Text, Text)]] + let authorMap = zip tree authors :: [(FilePath, [(Text, Text, Text)])] + stalenessMap <- traverse (lastTouched bareRepo) tree + let blackholes = + [ Text.pack path + | (path, authors_) <- authorMap, + null (map third authors_ `List.intersect` activeAuthors) + ] + let liabilities = + [ Text.pack path + | (path, authors_) <- authorMap, + length (map third authors_ `List.intersect` activeAuthors) < 3 + ] + let numBlackholes = realToFrac <| length blackholes + let numLiabilities = realToFrac <| length liabilities + let numTotal = realToFrac <| length tree + hotspotMap <- + Map.fromList </ Async.mapConcurrently getChangeCount tree + totalCommits <- + git bareRepo ["rev-list", "--count", "HEAD"] + /> filter (/= '\n') + /> readMaybe + /> fromMaybe 0 + pure + <| Analysis + { gitDir = bareRepo, + stale = + Map.fromList + <| [ (path, days) + | (path, Just days) <- stalenessMap, + days > 180 + ], + score = calculateScore numTotal numBlackholes numLiabilities, + totalFiles = toInteger <| length tree, + .. + } + where + third :: (a, b, c) -> c + third (_, _, a) = a + getChangeCount :: FilePath -> IO (FilePath, Integer) + getChangeCount path = + git bareRepo ["rev-list", "--count", "HEAD", "--", path] + /> filter (/= '\n') + /> readMaybe + /> fromMaybe 0 + /> (path,) + +-- | Given a git dir and a path inside the git repo, get information about the +-- authors. +authorsFor :: + FilePath -> + FilePath -> + -- | returns (number of commits, author name, author email) + IO [(Text, Text, Text)] +authorsFor gitDir path = + Process.readProcess + "git" + [ "--git-dir", + gitDir, + "shortlog", + "--numbered", + "--summary", + "--email", + "HEAD", + "--", + path + ] + "" + /> Text.pack + /> Text.lines + /> map (Text.break (== '\t')) + /> map parseAuthor + where + parseAuthor (commits, author) = + ( Text.strip commits, + Text.strip <| Text.takeWhile (/= '<') author, + Text.strip <| Text.dropAround (`elem` ['<', '>']) <| Text.dropWhile (/= '<') author + ) + +-- | Run a git command on a repo +git :: + -- | path to the git dir (bare repo) + String -> + -- | args to `git` + [String] -> + IO String +git bareRepo args = Process.readProcess "git" (["--git-dir", bareRepo] ++ args) "" + +lastTouched :: FilePath -> FilePath -> IO (FilePath, Maybe Integer) +lastTouched bareRepo path = do + now <- Time.getCurrentTime + timestamp <- + Process.readProcess + "git" + [ "--git-dir", + bareRepo, + "log", + "-n1", + "--pretty=%aI", + "--", + path + ] + "" + /> filter (/= '\n') + /> Time.parseTimeM True Time.defaultTimeLocale "%Y-%m-%dT%H:%M:%S%z" + pure (path, calculateAge now </ timestamp) + where + calculateAge now n = round <| Time.diffUTCTime now n / Time.nominalDay + +-- | Does the aggregate score calculation given number of files found to be +-- blackholes, liabilities, etc. +calculateScore :: Double -> Double -> Double -> Integer +calculateScore 0 _ _ = 0 +calculateScore a 0 0 | a > 0 = 100 +calculateScore a b c | a < 0 || b < 0 || c < 0 = 0 +calculateScore numTotal numBlackholes numLiabilities = + max 0 <. round + <| maxScore + * (weightedBlackholes + weightedLiabilities + numGood) + / numTotal + where + weightedBlackholes = numBlackholes * (5 / 10) + weightedLiabilities = numLiabilities * (7 / 10) + numGood = numTotal - numBlackholes - numLiabilities + maxScore = 100.0 + +test_calculateScore :: Test.Tree +test_calculateScore = + Test.group + "calculateScore" + [ Test.unit "perfect score" <| 100 @=? calculateScore 100 0 0, + Test.unit "all blackholes" <| 50 @=? calculateScore 100 100 0, + Test.unit "all liabilities" <| 70 @=? calculateScore 100 0 100, + Test.prop "never > 100" <| \t b l -> calculateScore t b l <= 100, + Test.prop "never < 0" <| \t b l -> calculateScore t b l >= 0 + ] |