diff options
author | Ben Sima <ben@bsima.me> | 2021-07-07 18:57:04 -0400 |
---|---|---|
committer | Ben Sima <ben@bsima.me> | 2021-11-26 13:47:36 -0500 |
commit | 1934482e8a5beb628b1e6e9db11f9fd274f174a5 (patch) | |
tree | 4047135e1adc49986197b59ccb02d374d48f801d | |
parent | 75a804d52461aade2e4431a2148eabcd99a9e26e (diff) |
Factor Analysis into own module
-rw-r--r-- | Alpha.hs | 5 | ||||
-rw-r--r-- | Biz/Cli.hs | 2 | ||||
-rw-r--r-- | Biz/Devalloc.hs | 341 | ||||
-rw-r--r-- | Biz/Devalloc/Analysis.hs | 252 |
4 files changed, 338 insertions, 262 deletions
@@ -66,6 +66,7 @@ module Alpha -- * String capitalize, lowercase, + strip, -- * Lists list, @@ -184,6 +185,10 @@ _ ?+ [] = panic "wutlus: empty cond list" (?<) :: Bool -> (Bool -> Text -> a) -> Text -> a a ?< f = if not a then f a else panic "wutgal failed" +-- | Removes newlinse from a string. +strip :: String -> String +strip = filter (/= '\n') + -- | Removes newlines from text. chomp :: Text -> Text chomp = Text.filter (/= '\n') @@ -11,7 +11,9 @@ module Biz.Cli Docopt.docopt, Docopt.getAllArgs, Docopt.getArg, + Docopt.getArgWithDefault, Docopt.longOption, + Docopt.shortOption, Docopt.command, has, ) diff --git a/Biz/Devalloc.hs b/Biz/Devalloc.hs index d9cf840..9eea33d 100644 --- a/Biz/Devalloc.hs +++ b/Biz/Devalloc.hs @@ -61,6 +61,8 @@ import Alpha hiding (rem, (<.>)) import qualified Biz.App as App import qualified Biz.Auth as Auth import qualified Biz.Cli as Cli +import Biz.Devalloc.Analysis (Analysis (..), Commit (..)) +import qualified Biz.Devalloc.Analysis as Analysis import qualified Biz.Id as Id import qualified Biz.Log as Log import qualified Biz.Look @@ -89,12 +91,10 @@ import qualified Data.Map as Map import Data.SafeCopy (base, deriveSafeCopy, extension) import qualified Data.SafeCopy as SafeCopy import qualified Data.Set as Set -import qualified Data.String as String import qualified Data.Text as Text import qualified Data.Text.Encoding as Encoding import qualified Data.Time.Calendar as Time import qualified Data.Time.Clock as Time -import qualified Data.Time.Format as Time import Data.Vector (Vector) import qualified Data.Vector as Vector -- import qualified Data.Vector.Algorithms.Intro as Vector @@ -201,6 +201,7 @@ data User0 = User0 } deriving (Eq, Data, Typeable, Ord, Generic, Show) +$(deriveSafeCopy 0 'base ''Id.Id) $(deriveSafeCopy 0 'base ''User0) -- | The main representation of a user. @@ -242,9 +243,6 @@ instance Indexable User where ixFun <| \User {..} -> [userSubscription] ] -newtype Commit = Sha Text - deriving (Eq, Data, Typeable, Ord, Generic, Show) - instance Lucid.ToHtml Commit where toHtmlRaw = Lucid.toHtml toHtml (Sha txt) = Lucid.toHtml txt @@ -269,84 +267,13 @@ data Visibility = Public | Private $(deriveSafeCopy 0 'base ''Visibility) -data Analysis0 = Analysis0 - { analysisId :: Id.Id Analysis0, - url :: URL, - bareRepo :: FilePath, - repoVisibility :: Visibility, - blackholes :: [Text], - liabilities :: [Text], - stale :: [(FilePath, Int)], - score :: Integer, - totalFiles :: Integer, - activeAuthors :: [Text], - commit :: Commit, - askedBy :: Id.Id User - } - deriving (Eq, Ord, Generic, Show, Data, Typeable) - -$(deriveSafeCopy 0 'base ''Analysis0) - --- | The result of analyzing a git repo. -data Analysis = Analysis - { -- | Monotonic incrementing integer id - analysisId :: Id.Id Analysis, - -- | Canonical URL for the repo. I wish this was structured data instead of - -- just Text. - url :: URL, - -- | Where the repo is stored on the local disk. - bareRepo :: FilePath, - -- | If the repo is OSS or not - repoVisibility :: Visibility, - -- | A path with no active contributors - blackholes :: [Text], - -- | A path with < 3 active contributors - liabilities :: [Text], - -- | Map of path to number of commits, for detecting paths that continually - -- get rewritten. - hotspotMap :: Map FilePath Integer, - -- | Files that have not been touched in 6 months - stale :: [(FilePath, Integer)], - -- | Total score for the repo - score :: Integer, - -- | Total number of files - totalFiles :: Integer, - -- | The total number of commits - totalCommits :: Integer, - -- | List of all the active users we care about - activeAuthors :: [Text], - -- | Which commit this analysis was run against. - commit :: Commit, - -- | Who asked for this analysis - askedBy :: Id.Id User - } - deriving (Eq, Ord, Generic, Show, Data, Typeable) - -instance SafeCopy.Migrate Analysis where - type MigrateFrom Analysis = Analysis0 - migrate Analysis0 {..} = - Analysis - { analysisId = Id.mk (Proxy :: Proxy Analysis) <| Id.untag analysisId, - hotspotMap = mempty, - totalCommits = 0, - stale = map (second toInteger) stale, - .. - } - -$(deriveSafeCopy 0 'base ''Id.Id) -$(deriveSafeCopy 0 'base ''Analysis) - instance Indexable Analysis where empty = ixSet - [ ixFun <| \Analysis {..} -> [analysisId], - ixFun <| \Analysis {..} -> [askedBy], - ixFun <| \Analysis {..} -> [url], - ixFun <| \Analysis {..} -> [commit], - ixFun <| \Analysis {..} -> [repoVisibility] + [ ixFun <| \Analysis {..} -> [commit] ] -instance App.HasCss Analysis where +instance App.HasCss AnalysisAction where cssFor _ = do "#analysis > *" ? do Biz.Look.marginAll (rem 2) @@ -399,14 +326,13 @@ instance App.HasCss Analysis where ".bar" ? do Clay.height (px 800) -instance Lucid.ToHtml Analysis where +instance Lucid.ToHtml AnalysisAction where toHtmlRaw = Lucid.toHtml - toHtml Analysis {..} = + toHtml AnalysisAction {..} = Lucid.div_ [Lucid.id_ "analysis"] <| do + let Analysis {..} = analysis Lucid.p_ [Lucid.class_ "analysisFor"] <| do - "Analysis for " - Lucid.a_ [Lucid.href_ <| (\(URL txt) -> txt) <| url] <| do - Lucid.toHtml url + "Analysis for " <> Lucid.toHtml gitDir score_ <| do title_ "Total Score" @@ -488,8 +414,8 @@ instance Lucid.ToHtml Analysis where score_ <| do title_ "Stale files" - quantity_ <| Lucid.toHtml <| slen stale - previewChart <| simpleBar (len stale) totalFiles + quantity_ <| Lucid.toHtml <| tshow <| Map.size stale + previewChart <| simpleBar (Map.size stale) totalFiles Lucid.details_ <| do Lucid.summary_ "Details" desc @@ -505,7 +431,8 @@ instance Lucid.ToHtml Analysis where documentation practices along the way. |] Lucid.ul_ <| do - forM_ stale <| \(path, days) -> + -- probably Map.mapWithKey is better? + forM_ (Map.toList stale) <| \(path, days) -> Lucid.li_ <| Lucid.toHtml <| path <> " (" <> show days <> " days)" Lucid.with score_ [Lucid.id_ "hotspots"] <| do @@ -540,7 +467,7 @@ instance Lucid.ToHtml Analysis where <| Lucid.toHtml <| path <> ": " <> show n <> " commits" where - simpleBar :: Monad m => Integer -> Integer -> Lucid.HtmlT m () + simpleBar :: (Show i, Monad m, Num i) => i -> Integer -> Lucid.HtmlT m () simpleBar n total = do Lucid.table_ [Lucid.class_ "charts-css bar stacked multiple"] <| do Lucid.tr_ <| do @@ -560,12 +487,41 @@ instance Lucid.ToHtml Analysis where desc :: Monad m => Text -> Lucid.HtmlT m () desc = Lucid.p_ <. Cmark.renderNode [] <. Cmark.commonmarkToNode [] +-- | Captures an 'Analysis' with metadata used in the webapp to track who asked +-- it and so on. +data AnalysisAction = AnalysisAction + { -- | Monotonic incrementing integer id + analysisId :: Id.Id AnalysisAction, + -- | Who asked for this analysis + askedBy :: Id.Id User, + -- | Where is this coming from? + url :: URL, + -- | Is the URL publically visible? + repoVisibility :: Visibility, + -- | The actual analaysis + analysis :: Analysis + } + deriving (Eq, Ord, Generic, Show, Data, Typeable) + +$(deriveSafeCopy 0 'base ''Analysis) +$(deriveSafeCopy 0 'base ''AnalysisAction) + +instance Indexable AnalysisAction where + empty = + ixSet + [ ixFun <| \AnalysisAction {..} -> [analysisId], + ixFun <| \AnalysisAction {..} -> [askedBy], + ixFun <| \AnalysisAction {..} -> [url], + ixFun <| \AnalysisAction {..} -> [repoVisibility], + ixFun <| \AnalysisAction {..} -> [commit analysis] + ] + -- | The database. data Keep = Keep { users :: IxSet User, nextUserId :: Id.Id User, - analyses :: IxSet Analysis, - nextAnalysisId :: Id.Id Analysis + analyses :: IxSet AnalysisAction, + nextAnalysisId :: Id.Id AnalysisAction } deriving (Data, Typeable) @@ -603,10 +559,10 @@ getUsers = do Keep {..} <- ask pure <| IxSet.toList users -createAnalysis :: Analysis -> Acid.Update Keep Analysis +createAnalysis :: AnalysisAction -> Acid.Update Keep AnalysisAction createAnalysis a = do keep@Keep {..} <- get - let newAnalysis = a {analysisId = nextAnalysisId} :: Analysis + let newAnalysis = a {analysisId = nextAnalysisId} :: AnalysisAction put <| keep { analyses = IxSet.insert newAnalysis analyses, @@ -614,27 +570,27 @@ createAnalysis a = do } pure newAnalysis -getAnalysisById :: Id.Id Analysis -> Acid.Query Keep (Maybe Analysis) +getAnalysisById :: Id.Id AnalysisAction -> Acid.Query Keep (Maybe AnalysisAction) getAnalysisById id = do Keep {..} <- ask pure <| IxSet.getOne <| analyses @= id -getAllAnalyses :: Acid.Query Keep [Analysis] +getAllAnalyses :: Acid.Query Keep [AnalysisAction] getAllAnalyses = do Keep {..} <- ask pure <| IxSet.toList analyses -getAnalysesByAsker :: User -> Acid.Query Keep [Analysis] +getAnalysesByAsker :: User -> Acid.Query Keep [AnalysisAction] getAnalysesByAsker User {..} = do Keep {..} <- ask pure <| IxSet.toList <| analyses @= userId -getAnalysesByUrl :: URL -> Acid.Query Keep [Analysis] +getAnalysesByUrl :: URL -> Acid.Query Keep [AnalysisAction] getAnalysesByUrl url = do Keep {..} <- ask pure <| IxSet.toList <| analyses @= url -getAnalysisByUrlAndCommit :: URL -> Commit -> Acid.Query Keep (Maybe Analysis) +getAnalysisByUrlAndCommit :: URL -> Commit -> Acid.Query Keep (Maybe AnalysisAction) getAnalysisByUrlAndCommit url sha = do Keep {..} <- ask pure <| IxSet.getOne <| analyses @= url &&& analyses @= sha @@ -727,7 +683,7 @@ mock_ghUser = init :: Keep init = Keep - { nextAnalysisId = Id.mk (Proxy :: Proxy Analysis) 1, + { nextAnalysisId = Id.mk (Proxy :: Proxy AnalysisAction) 1, nextUserId = Id.mk (Proxy :: Proxy User) 1, users = IxSet.empty, analyses = IxSet.empty @@ -817,8 +773,7 @@ test :: Test.Tree test = Test.group "Biz.Devalloc" - [ test_calculateScore, - test_spliceCreds, + [ test_spliceCreds, Test.with (startup True) (\t@(c, _, _) -> shutdown t >> tidy c) @@ -851,6 +806,7 @@ data Config = Config -- and set in .envrc.local -- https://docs.github.com/en/github/authenticating-to-github/creating-a-personal-access-token tokn :: Text, + -- | The example shown on the homepage homeExample :: URL } deriving (Generic, Show) @@ -950,7 +906,7 @@ data Paths path = Paths path :- Auth.Auth '[Auth.Cookie] User :> "analysis" - :> Capture "analysisId" (Id.Id Analysis) + :> Capture "analysisId" (Id.Id AnalysisAction) :> Get '[Lucid.HTML] (App.Html AnalysisDisplay), postAnalysis :: path @@ -1256,7 +1212,7 @@ instance Lucid.ToHtml AdminDashboard where -- | The front page pitch. Eventually I'd like to load the content from markdown -- files or some other store of data so I can A/B test. -data Home = Home Auth.OAuthArgs (Maybe Analysis) +data Home = Home Auth.OAuthArgs (Maybe AnalysisAction) instance App.HasCss Home where cssFor (Home _ mAnalysis) = do @@ -1356,7 +1312,7 @@ instance Lucid.ToHtml Home where h2 = Lucid.h2_ <. markdown exampleWrapper = Lucid.div_ [Lucid.class_ "example"] -data Analyses = Analyses User [Analysis] +data Analyses = Analyses User [AnalysisAction] instance App.HasCss Analyses where cssFor _ = mempty @@ -1373,7 +1329,7 @@ instance Lucid.ToHtml Analyses where [Lucid.linkHref_ "/" <| fieldLink selectRepo] "Analyze one of your repos" Lucid.div_ <| do - forM_ analyses <| \Analysis {..} -> + forM_ analyses <| \AnalysisAction {..} -> Lucid.a_ [ href analysisId, css <| Biz.Look.marginAll (em 1) @@ -1382,7 +1338,7 @@ instance Lucid.ToHtml Analyses where <| do Lucid.div_ <| Lucid.toHtml url Lucid.div_ [css <| Clay.fontSizeCustom Clay.Font.small] - <| Lucid.toHtml commit + <| Lucid.toHtml (commit analysis) footer where href aid = Lucid.linkHref_ "/" <| fieldLink getAnalysis aid @@ -1555,7 +1511,7 @@ instance Web.FromForm SubmitAnalysis -- | I need more information than just 'Analysis' has to render a full, useful -- web page, hence this type. -data AnalysisDisplay = AnalysisDisplay User Analysis +data AnalysisDisplay = AnalysisDisplay User AnalysisAction instance App.HasCss AnalysisDisplay where cssFor (AnalysisDisplay _ analysis) = App.cssFor analysis @@ -1570,164 +1526,24 @@ instance Lucid.ToHtml AnalysisDisplay where footer -- | Run a full analysis on a git repo -analyze :: Acid.AcidState Keep -> Id.Id User -> [Text] -> URL -> FilePath -> Bool -> IO Analysis +analyze :: Acid.AcidState Keep -> Id.Id User -> [Text] -> URL -> FilePath -> Bool -> IO AnalysisAction analyze keep askedBy activeAuthors url bareRepo repoPrivate = do - commit <- Sha </ Text.strip </ Text.pack </ git bareRepo ["log", "-n1", "--format=%H"] + commit <- Sha </ Text.strip </ Text.pack </ Analysis.git bareRepo ["log", "-n1", "--format=%H"] Acid.query keep (GetAnalysisByUrlAndCommit url commit) +> \case Just analysis -> pure analysis Nothing -> - runAnalysis bareRepo activeAuthors url askedBy repoPrivate commit + Analysis.run activeAuthors bareRepo + /> ( \a -> + AnalysisAction + { analysisId = mempty, + analysis = a, + repoVisibility = repoPrivate ?: (Private, Public), + .. + } + ) /> CreateAnalysis +> Acid.update keep -runAnalysis :: String -> [Text] -> URL -> Id.Id User -> Bool -> Commit -> IO Analysis -runAnalysis bareRepo activeAuthors url askedBy repoPrivate commit = do - tree <- - git - bareRepo - [ "ls-tree", - "--full-tree", - "--name-only", - "-r", -- recurse into subtrees - "HEAD" - ] - /> String.lines - authors <- traverse (authorsFor bareRepo) tree :: IO [[(Text, Text, Text)]] - let authorMap = zip tree authors :: [(FilePath, [(Text, Text, Text)])] - stalenessMap <- traverse (lastTouched bareRepo) tree - let blackholes = - [ Text.pack path - | (path, authors_) <- authorMap, - null (map third authors_ `List.intersect` activeAuthors) - ] - let liabilities = - [ Text.pack path - | (path, authors_) <- authorMap, - length (map third authors_ `List.intersect` activeAuthors) < 3 - ] - let numBlackholes = realToFrac <| length blackholes - let numLiabilities = realToFrac <| length liabilities - let numTotal = realToFrac <| length tree - hotspotMap <- - Map.fromList </ Async.mapConcurrently getChangeCount tree - totalCommits <- - git bareRepo ["rev-list", "--count", "HEAD"] - /> filter (/= '\n') - /> readMaybe - /> fromMaybe 0 - pure - <| Analysis - { analysisId = mempty, - stale = - [ (path, days) - | (path, Just days) <- stalenessMap, - days > 180 - ], - score = calculateScore numTotal numBlackholes numLiabilities, - totalFiles = toInteger <| length tree, - repoVisibility = repoPrivate ?: (Private, Public), - .. - } - where - third :: (a, b, c) -> c - third (_, _, a) = a - getChangeCount :: FilePath -> IO (FilePath, Integer) - getChangeCount path = - git bareRepo ["rev-list", "--count", "HEAD", "--", path] - /> filter (/= '\n') - /> readMaybe - /> fromMaybe 0 - /> (path,) - --- | Run a git command on a repo -git :: - -- | path to the git dir (bare repo) - String -> - -- | args to `git` - [String] -> IO String -git bareRepo args = Process.readProcess "git" (["--git-dir", bareRepo] ++ args) "" - --- | Does the aggregate score calculation given number of files found to be --- blackholes, liabilities, etc. -calculateScore :: Double -> Double -> Double -> Integer -calculateScore 0 _ _ = 0 -calculateScore a 0 0 | a > 0 = 100 -calculateScore a b c | a < 0 || b < 0 || c < 0 = 0 -calculateScore numTotal numBlackholes numLiabilities = - max 0 <. round - <| maxScore - * (weightedBlackholes + weightedLiabilities + numGood) - / numTotal - where - weightedBlackholes = numBlackholes * (5 / 10) - weightedLiabilities = numLiabilities * (7 / 10) - numGood = numTotal - numBlackholes - numLiabilities - maxScore = 100.0 - -test_calculateScore :: Test.Tree -test_calculateScore = - Test.group - "calculateScore" - [ Test.unit "perfect score" <| 100 @=? calculateScore 100 0 0, - Test.unit "all blackholes" <| 50 @=? calculateScore 100 100 0, - Test.unit "all liabilities" <| 70 @=? calculateScore 100 0 100, - Test.prop "never > 100" <| \t b l -> calculateScore t b l <= 100, - Test.prop "never < 0" <| \t b l -> calculateScore t b l >= 0 - ] - -lastTouched :: FilePath -> FilePath -> IO (FilePath, Maybe Integer) -lastTouched bareRepo path = do - now <- Time.getCurrentTime - timestamp <- - Process.readProcess - "git" - [ "--git-dir", - bareRepo, - "log", - "-n1", - "--pretty=%aI", - "--", - path - ] - "" - /> filter (/= '\n') - /> Time.parseTimeM True Time.defaultTimeLocale "%Y-%m-%dT%H:%M:%S%z" - pure (path, calculateAge now </ timestamp) - where - calculateAge now n = round <| Time.diffUTCTime now n / Time.nominalDay - --- | Given a git dir and a path inside the git repo, get information about the --- authors. -authorsFor :: - FilePath -> - FilePath -> - -- | returns (number of commits, author name, author email) - IO [(Text, Text, Text)] -authorsFor gitDir path = - Process.readProcess - "git" - [ "--git-dir", - gitDir, - "shortlog", - "--numbered", - "--summary", - "--email", - "HEAD", - "--", - path - ] - "" - /> Text.pack - /> Text.lines - /> map (Text.break (== '\t')) - /> map parseAuthor - where - parseAuthor (commits, author) = - ( Text.strip commits, - Text.strip <| Text.takeWhile (/= '<') author, - Text.strip <| Text.dropAround (`elem` ['<', '>']) <| Text.dropWhile (/= '<') author - ) - spliceCreds :: User -> Text -> Text spliceCreds User {..} url = scheme <> "//" <> unGitHubHandle userGitHubHandle <> ":" <> userGitHubToken <> "@" <> Text.drop 2 rest @@ -1767,7 +1583,7 @@ analyzeGitHub :: Text -> -- | GitHub repo Text -> - IO Analysis + IO AnalysisAction analyzeGitHub keep user@User {userId} ghAuth depo o r = do activeAuthors <- getPeople @@ -1823,7 +1639,7 @@ test_analyzeGitHub load = [ Test.unit "can analyze a public repo (octocat/hello-world)" <| do (c, _, k) <- load let user@User {userGitHubToken} = mock_user c - Analysis {..} <- + AnalysisAction {..} <- analyzeGitHub k user @@ -1832,19 +1648,20 @@ test_analyzeGitHub load = "octocat" "hello-world" url @?= URL "https://github.com/octocat/Hello-World" - bareRepo @?= depo c <> "/github.com/octocat/Hello-World.git" + -- bareRepo @?= depo c <> "/github.com/octocat/Hello-World.git" + let Analysis {..} = analysis length activeAuthors @?= 2 activeAuthors @?= ["hire@spacegho.st", "octocat@github.com"] blackholes @?= ["README"] liabilities @?= ["README"] - fst </ headMay stale @?= Just "README" + Map.member "README" stale @?= True score @?= 20 totalFiles @?= 1 commit @?= Sha "7fd1a60b01f91b314f59955a4e4d4e80d8edf11d", Test.unit "can analyze a private repo (bsima/biz)" <| do (c, _, k) <- load let user@User {userGitHubToken} = mock_user c - Analysis {..} <- + AnalysisAction {..} <- analyzeGitHub k user @@ -1853,7 +1670,7 @@ test_analyzeGitHub load = "bsima" "biz" url @?= URL "https://github.com/bsima/biz" - bareRepo @?= depo c <> "/github.com/bsima/biz.git" + -- bareRepo @?= depo c <> "/github.com/bsima/biz.git" ] where mock_user c = diff --git a/Biz/Devalloc/Analysis.hs b/Biz/Devalloc/Analysis.hs new file mode 100644 index 0000000..4b1f297 --- /dev/null +++ b/Biz/Devalloc/Analysis.hs @@ -0,0 +1,252 @@ +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE NoImplicitPrelude #-} + +-- : out devalloc-analyze +module Biz.Devalloc.Analysis + ( Analysis (..), + Commit (..), + run, + main, + test, + git, + ) +where + +import Alpha +import qualified Biz.Cli as Cli +import Biz.Test ((@=?)) +import qualified Biz.Test as Test +import qualified Control.Concurrent.Async as Async +import qualified Data.Aeson as Aeson +import Data.Data (Data) +import qualified Data.List as List +import qualified Data.Map as Map +import qualified Data.String as String +import qualified Data.Text as Text +import qualified Data.Time.Clock as Time +import qualified Data.Time.Format as Time +import qualified System.Directory as Directory +import qualified System.Process as Process + +main :: IO () +main = Cli.main <| Cli.Plan help move test tidy + +move :: Cli.Arguments -> IO () +move args = gitDir +> run authors /> Aeson.encode +> putStrLn + where + gitDir = + Cli.argument "git-dir" + |> Cli.getArgWithDefault args ".git" + |> Directory.makeAbsolute + authors = + -- i think this is not working? do i need optparse-applicative? + Cli.shortOption 'a' + |> Cli.getAllArgs args + |> map Text.pack + +tidy :: cfg -> IO () +tidy _ = pure () + +test :: Test.Tree +test = Test.group "Biz.Devalloc.Analysis" [test_calculateScore] + +help :: Cli.Docopt +help = + [Cli.docopt| +devalloc-analyze + +Usage: + devalloc-analyze test + devalloc-analyze [--author=<email>]... <git-dir> + +Options: + -a, --author List of active authors' emails. +|] + +newtype Commit = Sha Text + deriving (Eq, Data, Typeable, Ord, Generic, Show) + +instance Aeson.ToJSON Commit + +-- | The result of analyzing a git repo. +data Analysis = Analysis + { -- | Where the repo is stored on the local disk. + gitDir :: FilePath, + -- | A path with no active contributors + blackholes :: [Text], + -- | A path with < 3 active contributors + liabilities :: [Text], + -- | Map of path to number of commits, for detecting paths that continually + -- get rewritten. + hotspotMap :: Map FilePath Integer, + -- | Files that have not been touched in 6 months + stale :: Map FilePath Integer, + -- | Total score for the repo + score :: Integer, + -- | Total number of files + totalFiles :: Integer, + -- | The total number of commits + totalCommits :: Integer, + -- | List of all the active users we care about + activeAuthors :: [Text], + -- | Which commit this analysis was run against. + commit :: Commit + } + deriving (Eq, Ord, Generic, Show, Data, Typeable) + +instance Aeson.ToJSON Analysis + +run :: [Text] -> FilePath -> IO Analysis +run activeAuthors bareRepo = do + commit <- git bareRepo ["rev-parse", "HEAD"] /> Text.pack /> chomp /> Sha + tree <- + git + bareRepo + [ "ls-tree", + "--full-tree", + "--name-only", + "-r", -- recurse into subtrees + "HEAD" + ] + /> String.lines + authors <- traverse (authorsFor bareRepo) tree :: IO [[(Text, Text, Text)]] + let authorMap = zip tree authors :: [(FilePath, [(Text, Text, Text)])] + stalenessMap <- traverse (lastTouched bareRepo) tree + let blackholes = + [ Text.pack path + | (path, authors_) <- authorMap, + null (map third authors_ `List.intersect` activeAuthors) + ] + let liabilities = + [ Text.pack path + | (path, authors_) <- authorMap, + length (map third authors_ `List.intersect` activeAuthors) < 3 + ] + let numBlackholes = realToFrac <| length blackholes + let numLiabilities = realToFrac <| length liabilities + let numTotal = realToFrac <| length tree + hotspotMap <- + Map.fromList </ Async.mapConcurrently getChangeCount tree + totalCommits <- + git bareRepo ["rev-list", "--count", "HEAD"] + /> filter (/= '\n') + /> readMaybe + /> fromMaybe 0 + pure + <| Analysis + { gitDir = bareRepo, + stale = + Map.fromList + <| [ (path, days) + | (path, Just days) <- stalenessMap, + days > 180 + ], + score = calculateScore numTotal numBlackholes numLiabilities, + totalFiles = toInteger <| length tree, + .. + } + where + third :: (a, b, c) -> c + third (_, _, a) = a + getChangeCount :: FilePath -> IO (FilePath, Integer) + getChangeCount path = + git bareRepo ["rev-list", "--count", "HEAD", "--", path] + /> filter (/= '\n') + /> readMaybe + /> fromMaybe 0 + /> (path,) + +-- | Given a git dir and a path inside the git repo, get information about the +-- authors. +authorsFor :: + FilePath -> + FilePath -> + -- | returns (number of commits, author name, author email) + IO [(Text, Text, Text)] +authorsFor gitDir path = + Process.readProcess + "git" + [ "--git-dir", + gitDir, + "shortlog", + "--numbered", + "--summary", + "--email", + "HEAD", + "--", + path + ] + "" + /> Text.pack + /> Text.lines + /> map (Text.break (== '\t')) + /> map parseAuthor + where + parseAuthor (commits, author) = + ( Text.strip commits, + Text.strip <| Text.takeWhile (/= '<') author, + Text.strip <| Text.dropAround (`elem` ['<', '>']) <| Text.dropWhile (/= '<') author + ) + +-- | Run a git command on a repo +git :: + -- | path to the git dir (bare repo) + String -> + -- | args to `git` + [String] -> + IO String +git bareRepo args = Process.readProcess "git" (["--git-dir", bareRepo] ++ args) "" + +lastTouched :: FilePath -> FilePath -> IO (FilePath, Maybe Integer) +lastTouched bareRepo path = do + now <- Time.getCurrentTime + timestamp <- + Process.readProcess + "git" + [ "--git-dir", + bareRepo, + "log", + "-n1", + "--pretty=%aI", + "--", + path + ] + "" + /> filter (/= '\n') + /> Time.parseTimeM True Time.defaultTimeLocale "%Y-%m-%dT%H:%M:%S%z" + pure (path, calculateAge now </ timestamp) + where + calculateAge now n = round <| Time.diffUTCTime now n / Time.nominalDay + +-- | Does the aggregate score calculation given number of files found to be +-- blackholes, liabilities, etc. +calculateScore :: Double -> Double -> Double -> Integer +calculateScore 0 _ _ = 0 +calculateScore a 0 0 | a > 0 = 100 +calculateScore a b c | a < 0 || b < 0 || c < 0 = 0 +calculateScore numTotal numBlackholes numLiabilities = + max 0 <. round + <| maxScore + * (weightedBlackholes + weightedLiabilities + numGood) + / numTotal + where + weightedBlackholes = numBlackholes * (5 / 10) + weightedLiabilities = numLiabilities * (7 / 10) + numGood = numTotal - numBlackholes - numLiabilities + maxScore = 100.0 + +test_calculateScore :: Test.Tree +test_calculateScore = + Test.group + "calculateScore" + [ Test.unit "perfect score" <| 100 @=? calculateScore 100 0 0, + Test.unit "all blackholes" <| 50 @=? calculateScore 100 100 0, + Test.unit "all liabilities" <| 70 @=? calculateScore 100 0 100, + Test.prop "never > 100" <| \t b l -> calculateScore t b l <= 100, + Test.prop "never < 0" <| \t b l -> calculateScore t b l >= 0 + ] |