summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Sima <ben@bsima.me>2021-07-07 18:57:04 -0400
committerBen Sima <ben@bsima.me>2021-11-26 13:47:36 -0500
commit1934482e8a5beb628b1e6e9db11f9fd274f174a5 (patch)
tree4047135e1adc49986197b59ccb02d374d48f801d
parent75a804d52461aade2e4431a2148eabcd99a9e26e (diff)
Factor Analysis into own module
-rw-r--r--Alpha.hs5
-rw-r--r--Biz/Cli.hs2
-rw-r--r--Biz/Devalloc.hs341
-rw-r--r--Biz/Devalloc/Analysis.hs252
4 files changed, 338 insertions, 262 deletions
diff --git a/Alpha.hs b/Alpha.hs
index 6245614..1da1f1a 100644
--- a/Alpha.hs
+++ b/Alpha.hs
@@ -66,6 +66,7 @@ module Alpha
-- * String
capitalize,
lowercase,
+ strip,
-- * Lists
list,
@@ -184,6 +185,10 @@ _ ?+ [] = panic "wutlus: empty cond list"
(?<) :: Bool -> (Bool -> Text -> a) -> Text -> a
a ?< f = if not a then f a else panic "wutgal failed"
+-- | Removes newlinse from a string.
+strip :: String -> String
+strip = filter (/= '\n')
+
-- | Removes newlines from text.
chomp :: Text -> Text
chomp = Text.filter (/= '\n')
diff --git a/Biz/Cli.hs b/Biz/Cli.hs
index 05d81ac..0054e26 100644
--- a/Biz/Cli.hs
+++ b/Biz/Cli.hs
@@ -11,7 +11,9 @@ module Biz.Cli
Docopt.docopt,
Docopt.getAllArgs,
Docopt.getArg,
+ Docopt.getArgWithDefault,
Docopt.longOption,
+ Docopt.shortOption,
Docopt.command,
has,
)
diff --git a/Biz/Devalloc.hs b/Biz/Devalloc.hs
index d9cf840..9eea33d 100644
--- a/Biz/Devalloc.hs
+++ b/Biz/Devalloc.hs
@@ -61,6 +61,8 @@ import Alpha hiding (rem, (<.>))
import qualified Biz.App as App
import qualified Biz.Auth as Auth
import qualified Biz.Cli as Cli
+import Biz.Devalloc.Analysis (Analysis (..), Commit (..))
+import qualified Biz.Devalloc.Analysis as Analysis
import qualified Biz.Id as Id
import qualified Biz.Log as Log
import qualified Biz.Look
@@ -89,12 +91,10 @@ import qualified Data.Map as Map
import Data.SafeCopy (base, deriveSafeCopy, extension)
import qualified Data.SafeCopy as SafeCopy
import qualified Data.Set as Set
-import qualified Data.String as String
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Encoding
import qualified Data.Time.Calendar as Time
import qualified Data.Time.Clock as Time
-import qualified Data.Time.Format as Time
import Data.Vector (Vector)
import qualified Data.Vector as Vector
-- import qualified Data.Vector.Algorithms.Intro as Vector
@@ -201,6 +201,7 @@ data User0 = User0
}
deriving (Eq, Data, Typeable, Ord, Generic, Show)
+$(deriveSafeCopy 0 'base ''Id.Id)
$(deriveSafeCopy 0 'base ''User0)
-- | The main representation of a user.
@@ -242,9 +243,6 @@ instance Indexable User where
ixFun <| \User {..} -> [userSubscription]
]
-newtype Commit = Sha Text
- deriving (Eq, Data, Typeable, Ord, Generic, Show)
-
instance Lucid.ToHtml Commit where
toHtmlRaw = Lucid.toHtml
toHtml (Sha txt) = Lucid.toHtml txt
@@ -269,84 +267,13 @@ data Visibility = Public | Private
$(deriveSafeCopy 0 'base ''Visibility)
-data Analysis0 = Analysis0
- { analysisId :: Id.Id Analysis0,
- url :: URL,
- bareRepo :: FilePath,
- repoVisibility :: Visibility,
- blackholes :: [Text],
- liabilities :: [Text],
- stale :: [(FilePath, Int)],
- score :: Integer,
- totalFiles :: Integer,
- activeAuthors :: [Text],
- commit :: Commit,
- askedBy :: Id.Id User
- }
- deriving (Eq, Ord, Generic, Show, Data, Typeable)
-
-$(deriveSafeCopy 0 'base ''Analysis0)
-
--- | The result of analyzing a git repo.
-data Analysis = Analysis
- { -- | Monotonic incrementing integer id
- analysisId :: Id.Id Analysis,
- -- | Canonical URL for the repo. I wish this was structured data instead of
- -- just Text.
- url :: URL,
- -- | Where the repo is stored on the local disk.
- bareRepo :: FilePath,
- -- | If the repo is OSS or not
- repoVisibility :: Visibility,
- -- | A path with no active contributors
- blackholes :: [Text],
- -- | A path with < 3 active contributors
- liabilities :: [Text],
- -- | Map of path to number of commits, for detecting paths that continually
- -- get rewritten.
- hotspotMap :: Map FilePath Integer,
- -- | Files that have not been touched in 6 months
- stale :: [(FilePath, Integer)],
- -- | Total score for the repo
- score :: Integer,
- -- | Total number of files
- totalFiles :: Integer,
- -- | The total number of commits
- totalCommits :: Integer,
- -- | List of all the active users we care about
- activeAuthors :: [Text],
- -- | Which commit this analysis was run against.
- commit :: Commit,
- -- | Who asked for this analysis
- askedBy :: Id.Id User
- }
- deriving (Eq, Ord, Generic, Show, Data, Typeable)
-
-instance SafeCopy.Migrate Analysis where
- type MigrateFrom Analysis = Analysis0
- migrate Analysis0 {..} =
- Analysis
- { analysisId = Id.mk (Proxy :: Proxy Analysis) <| Id.untag analysisId,
- hotspotMap = mempty,
- totalCommits = 0,
- stale = map (second toInteger) stale,
- ..
- }
-
-$(deriveSafeCopy 0 'base ''Id.Id)
-$(deriveSafeCopy 0 'base ''Analysis)
-
instance Indexable Analysis where
empty =
ixSet
- [ ixFun <| \Analysis {..} -> [analysisId],
- ixFun <| \Analysis {..} -> [askedBy],
- ixFun <| \Analysis {..} -> [url],
- ixFun <| \Analysis {..} -> [commit],
- ixFun <| \Analysis {..} -> [repoVisibility]
+ [ ixFun <| \Analysis {..} -> [commit]
]
-instance App.HasCss Analysis where
+instance App.HasCss AnalysisAction where
cssFor _ = do
"#analysis > *" ? do
Biz.Look.marginAll (rem 2)
@@ -399,14 +326,13 @@ instance App.HasCss Analysis where
".bar" ? do
Clay.height (px 800)
-instance Lucid.ToHtml Analysis where
+instance Lucid.ToHtml AnalysisAction where
toHtmlRaw = Lucid.toHtml
- toHtml Analysis {..} =
+ toHtml AnalysisAction {..} =
Lucid.div_ [Lucid.id_ "analysis"] <| do
+ let Analysis {..} = analysis
Lucid.p_ [Lucid.class_ "analysisFor"] <| do
- "Analysis for "
- Lucid.a_ [Lucid.href_ <| (\(URL txt) -> txt) <| url] <| do
- Lucid.toHtml url
+ "Analysis for " <> Lucid.toHtml gitDir
score_ <| do
title_ "Total Score"
@@ -488,8 +414,8 @@ instance Lucid.ToHtml Analysis where
score_ <| do
title_ "Stale files"
- quantity_ <| Lucid.toHtml <| slen stale
- previewChart <| simpleBar (len stale) totalFiles
+ quantity_ <| Lucid.toHtml <| tshow <| Map.size stale
+ previewChart <| simpleBar (Map.size stale) totalFiles
Lucid.details_ <| do
Lucid.summary_ "Details"
desc
@@ -505,7 +431,8 @@ instance Lucid.ToHtml Analysis where
documentation practices along the way.
|]
Lucid.ul_ <| do
- forM_ stale <| \(path, days) ->
+ -- probably Map.mapWithKey is better?
+ forM_ (Map.toList stale) <| \(path, days) ->
Lucid.li_ <| Lucid.toHtml <| path <> " (" <> show days <> " days)"
Lucid.with score_ [Lucid.id_ "hotspots"] <| do
@@ -540,7 +467,7 @@ instance Lucid.ToHtml Analysis where
<| Lucid.toHtml
<| path <> ": " <> show n <> " commits"
where
- simpleBar :: Monad m => Integer -> Integer -> Lucid.HtmlT m ()
+ simpleBar :: (Show i, Monad m, Num i) => i -> Integer -> Lucid.HtmlT m ()
simpleBar n total = do
Lucid.table_ [Lucid.class_ "charts-css bar stacked multiple"] <| do
Lucid.tr_ <| do
@@ -560,12 +487,41 @@ instance Lucid.ToHtml Analysis where
desc :: Monad m => Text -> Lucid.HtmlT m ()
desc = Lucid.p_ <. Cmark.renderNode [] <. Cmark.commonmarkToNode []
+-- | Captures an 'Analysis' with metadata used in the webapp to track who asked
+-- it and so on.
+data AnalysisAction = AnalysisAction
+ { -- | Monotonic incrementing integer id
+ analysisId :: Id.Id AnalysisAction,
+ -- | Who asked for this analysis
+ askedBy :: Id.Id User,
+ -- | Where is this coming from?
+ url :: URL,
+ -- | Is the URL publically visible?
+ repoVisibility :: Visibility,
+ -- | The actual analaysis
+ analysis :: Analysis
+ }
+ deriving (Eq, Ord, Generic, Show, Data, Typeable)
+
+$(deriveSafeCopy 0 'base ''Analysis)
+$(deriveSafeCopy 0 'base ''AnalysisAction)
+
+instance Indexable AnalysisAction where
+ empty =
+ ixSet
+ [ ixFun <| \AnalysisAction {..} -> [analysisId],
+ ixFun <| \AnalysisAction {..} -> [askedBy],
+ ixFun <| \AnalysisAction {..} -> [url],
+ ixFun <| \AnalysisAction {..} -> [repoVisibility],
+ ixFun <| \AnalysisAction {..} -> [commit analysis]
+ ]
+
-- | The database.
data Keep = Keep
{ users :: IxSet User,
nextUserId :: Id.Id User,
- analyses :: IxSet Analysis,
- nextAnalysisId :: Id.Id Analysis
+ analyses :: IxSet AnalysisAction,
+ nextAnalysisId :: Id.Id AnalysisAction
}
deriving (Data, Typeable)
@@ -603,10 +559,10 @@ getUsers = do
Keep {..} <- ask
pure <| IxSet.toList users
-createAnalysis :: Analysis -> Acid.Update Keep Analysis
+createAnalysis :: AnalysisAction -> Acid.Update Keep AnalysisAction
createAnalysis a = do
keep@Keep {..} <- get
- let newAnalysis = a {analysisId = nextAnalysisId} :: Analysis
+ let newAnalysis = a {analysisId = nextAnalysisId} :: AnalysisAction
put
<| keep
{ analyses = IxSet.insert newAnalysis analyses,
@@ -614,27 +570,27 @@ createAnalysis a = do
}
pure newAnalysis
-getAnalysisById :: Id.Id Analysis -> Acid.Query Keep (Maybe Analysis)
+getAnalysisById :: Id.Id AnalysisAction -> Acid.Query Keep (Maybe AnalysisAction)
getAnalysisById id = do
Keep {..} <- ask
pure <| IxSet.getOne <| analyses @= id
-getAllAnalyses :: Acid.Query Keep [Analysis]
+getAllAnalyses :: Acid.Query Keep [AnalysisAction]
getAllAnalyses = do
Keep {..} <- ask
pure <| IxSet.toList analyses
-getAnalysesByAsker :: User -> Acid.Query Keep [Analysis]
+getAnalysesByAsker :: User -> Acid.Query Keep [AnalysisAction]
getAnalysesByAsker User {..} = do
Keep {..} <- ask
pure <| IxSet.toList <| analyses @= userId
-getAnalysesByUrl :: URL -> Acid.Query Keep [Analysis]
+getAnalysesByUrl :: URL -> Acid.Query Keep [AnalysisAction]
getAnalysesByUrl url = do
Keep {..} <- ask
pure <| IxSet.toList <| analyses @= url
-getAnalysisByUrlAndCommit :: URL -> Commit -> Acid.Query Keep (Maybe Analysis)
+getAnalysisByUrlAndCommit :: URL -> Commit -> Acid.Query Keep (Maybe AnalysisAction)
getAnalysisByUrlAndCommit url sha = do
Keep {..} <- ask
pure <| IxSet.getOne <| analyses @= url &&& analyses @= sha
@@ -727,7 +683,7 @@ mock_ghUser =
init :: Keep
init =
Keep
- { nextAnalysisId = Id.mk (Proxy :: Proxy Analysis) 1,
+ { nextAnalysisId = Id.mk (Proxy :: Proxy AnalysisAction) 1,
nextUserId = Id.mk (Proxy :: Proxy User) 1,
users = IxSet.empty,
analyses = IxSet.empty
@@ -817,8 +773,7 @@ test :: Test.Tree
test =
Test.group
"Biz.Devalloc"
- [ test_calculateScore,
- test_spliceCreds,
+ [ test_spliceCreds,
Test.with
(startup True)
(\t@(c, _, _) -> shutdown t >> tidy c)
@@ -851,6 +806,7 @@ data Config = Config
-- and set in .envrc.local
-- https://docs.github.com/en/github/authenticating-to-github/creating-a-personal-access-token
tokn :: Text,
+ -- | The example shown on the homepage
homeExample :: URL
}
deriving (Generic, Show)
@@ -950,7 +906,7 @@ data Paths path = Paths
path
:- Auth.Auth '[Auth.Cookie] User
:> "analysis"
- :> Capture "analysisId" (Id.Id Analysis)
+ :> Capture "analysisId" (Id.Id AnalysisAction)
:> Get '[Lucid.HTML] (App.Html AnalysisDisplay),
postAnalysis ::
path
@@ -1256,7 +1212,7 @@ instance Lucid.ToHtml AdminDashboard where
-- | The front page pitch. Eventually I'd like to load the content from markdown
-- files or some other store of data so I can A/B test.
-data Home = Home Auth.OAuthArgs (Maybe Analysis)
+data Home = Home Auth.OAuthArgs (Maybe AnalysisAction)
instance App.HasCss Home where
cssFor (Home _ mAnalysis) = do
@@ -1356,7 +1312,7 @@ instance Lucid.ToHtml Home where
h2 = Lucid.h2_ <. markdown
exampleWrapper = Lucid.div_ [Lucid.class_ "example"]
-data Analyses = Analyses User [Analysis]
+data Analyses = Analyses User [AnalysisAction]
instance App.HasCss Analyses where
cssFor _ = mempty
@@ -1373,7 +1329,7 @@ instance Lucid.ToHtml Analyses where
[Lucid.linkHref_ "/" <| fieldLink selectRepo]
"Analyze one of your repos"
Lucid.div_ <| do
- forM_ analyses <| \Analysis {..} ->
+ forM_ analyses <| \AnalysisAction {..} ->
Lucid.a_
[ href analysisId,
css <| Biz.Look.marginAll (em 1)
@@ -1382,7 +1338,7 @@ instance Lucid.ToHtml Analyses where
<| do
Lucid.div_ <| Lucid.toHtml url
Lucid.div_ [css <| Clay.fontSizeCustom Clay.Font.small]
- <| Lucid.toHtml commit
+ <| Lucid.toHtml (commit analysis)
footer
where
href aid = Lucid.linkHref_ "/" <| fieldLink getAnalysis aid
@@ -1555,7 +1511,7 @@ instance Web.FromForm SubmitAnalysis
-- | I need more information than just 'Analysis' has to render a full, useful
-- web page, hence this type.
-data AnalysisDisplay = AnalysisDisplay User Analysis
+data AnalysisDisplay = AnalysisDisplay User AnalysisAction
instance App.HasCss AnalysisDisplay where
cssFor (AnalysisDisplay _ analysis) = App.cssFor analysis
@@ -1570,164 +1526,24 @@ instance Lucid.ToHtml AnalysisDisplay where
footer
-- | Run a full analysis on a git repo
-analyze :: Acid.AcidState Keep -> Id.Id User -> [Text] -> URL -> FilePath -> Bool -> IO Analysis
+analyze :: Acid.AcidState Keep -> Id.Id User -> [Text] -> URL -> FilePath -> Bool -> IO AnalysisAction
analyze keep askedBy activeAuthors url bareRepo repoPrivate = do
- commit <- Sha </ Text.strip </ Text.pack </ git bareRepo ["log", "-n1", "--format=%H"]
+ commit <- Sha </ Text.strip </ Text.pack </ Analysis.git bareRepo ["log", "-n1", "--format=%H"]
Acid.query keep (GetAnalysisByUrlAndCommit url commit) +> \case
Just analysis -> pure analysis
Nothing ->
- runAnalysis bareRepo activeAuthors url askedBy repoPrivate commit
+ Analysis.run activeAuthors bareRepo
+ /> ( \a ->
+ AnalysisAction
+ { analysisId = mempty,
+ analysis = a,
+ repoVisibility = repoPrivate ?: (Private, Public),
+ ..
+ }
+ )
/> CreateAnalysis
+> Acid.update keep
-runAnalysis :: String -> [Text] -> URL -> Id.Id User -> Bool -> Commit -> IO Analysis
-runAnalysis bareRepo activeAuthors url askedBy repoPrivate commit = do
- tree <-
- git
- bareRepo
- [ "ls-tree",
- "--full-tree",
- "--name-only",
- "-r", -- recurse into subtrees
- "HEAD"
- ]
- /> String.lines
- authors <- traverse (authorsFor bareRepo) tree :: IO [[(Text, Text, Text)]]
- let authorMap = zip tree authors :: [(FilePath, [(Text, Text, Text)])]
- stalenessMap <- traverse (lastTouched bareRepo) tree
- let blackholes =
- [ Text.pack path
- | (path, authors_) <- authorMap,
- null (map third authors_ `List.intersect` activeAuthors)
- ]
- let liabilities =
- [ Text.pack path
- | (path, authors_) <- authorMap,
- length (map third authors_ `List.intersect` activeAuthors) < 3
- ]
- let numBlackholes = realToFrac <| length blackholes
- let numLiabilities = realToFrac <| length liabilities
- let numTotal = realToFrac <| length tree
- hotspotMap <-
- Map.fromList </ Async.mapConcurrently getChangeCount tree
- totalCommits <-
- git bareRepo ["rev-list", "--count", "HEAD"]
- /> filter (/= '\n')
- /> readMaybe
- /> fromMaybe 0
- pure
- <| Analysis
- { analysisId = mempty,
- stale =
- [ (path, days)
- | (path, Just days) <- stalenessMap,
- days > 180
- ],
- score = calculateScore numTotal numBlackholes numLiabilities,
- totalFiles = toInteger <| length tree,
- repoVisibility = repoPrivate ?: (Private, Public),
- ..
- }
- where
- third :: (a, b, c) -> c
- third (_, _, a) = a
- getChangeCount :: FilePath -> IO (FilePath, Integer)
- getChangeCount path =
- git bareRepo ["rev-list", "--count", "HEAD", "--", path]
- /> filter (/= '\n')
- /> readMaybe
- /> fromMaybe 0
- /> (path,)
-
--- | Run a git command on a repo
-git ::
- -- | path to the git dir (bare repo)
- String ->
- -- | args to `git`
- [String] -> IO String
-git bareRepo args = Process.readProcess "git" (["--git-dir", bareRepo] ++ args) ""
-
--- | Does the aggregate score calculation given number of files found to be
--- blackholes, liabilities, etc.
-calculateScore :: Double -> Double -> Double -> Integer
-calculateScore 0 _ _ = 0
-calculateScore a 0 0 | a > 0 = 100
-calculateScore a b c | a < 0 || b < 0 || c < 0 = 0
-calculateScore numTotal numBlackholes numLiabilities =
- max 0 <. round
- <| maxScore
- * (weightedBlackholes + weightedLiabilities + numGood)
- / numTotal
- where
- weightedBlackholes = numBlackholes * (5 / 10)
- weightedLiabilities = numLiabilities * (7 / 10)
- numGood = numTotal - numBlackholes - numLiabilities
- maxScore = 100.0
-
-test_calculateScore :: Test.Tree
-test_calculateScore =
- Test.group
- "calculateScore"
- [ Test.unit "perfect score" <| 100 @=? calculateScore 100 0 0,
- Test.unit "all blackholes" <| 50 @=? calculateScore 100 100 0,
- Test.unit "all liabilities" <| 70 @=? calculateScore 100 0 100,
- Test.prop "never > 100" <| \t b l -> calculateScore t b l <= 100,
- Test.prop "never < 0" <| \t b l -> calculateScore t b l >= 0
- ]
-
-lastTouched :: FilePath -> FilePath -> IO (FilePath, Maybe Integer)
-lastTouched bareRepo path = do
- now <- Time.getCurrentTime
- timestamp <-
- Process.readProcess
- "git"
- [ "--git-dir",
- bareRepo,
- "log",
- "-n1",
- "--pretty=%aI",
- "--",
- path
- ]
- ""
- /> filter (/= '\n')
- /> Time.parseTimeM True Time.defaultTimeLocale "%Y-%m-%dT%H:%M:%S%z"
- pure (path, calculateAge now </ timestamp)
- where
- calculateAge now n = round <| Time.diffUTCTime now n / Time.nominalDay
-
--- | Given a git dir and a path inside the git repo, get information about the
--- authors.
-authorsFor ::
- FilePath ->
- FilePath ->
- -- | returns (number of commits, author name, author email)
- IO [(Text, Text, Text)]
-authorsFor gitDir path =
- Process.readProcess
- "git"
- [ "--git-dir",
- gitDir,
- "shortlog",
- "--numbered",
- "--summary",
- "--email",
- "HEAD",
- "--",
- path
- ]
- ""
- /> Text.pack
- /> Text.lines
- /> map (Text.break (== '\t'))
- /> map parseAuthor
- where
- parseAuthor (commits, author) =
- ( Text.strip commits,
- Text.strip <| Text.takeWhile (/= '<') author,
- Text.strip <| Text.dropAround (`elem` ['<', '>']) <| Text.dropWhile (/= '<') author
- )
-
spliceCreds :: User -> Text -> Text
spliceCreds User {..} url =
scheme <> "//" <> unGitHubHandle userGitHubHandle <> ":" <> userGitHubToken <> "@" <> Text.drop 2 rest
@@ -1767,7 +1583,7 @@ analyzeGitHub ::
Text ->
-- | GitHub repo
Text ->
- IO Analysis
+ IO AnalysisAction
analyzeGitHub keep user@User {userId} ghAuth depo o r = do
activeAuthors <-
getPeople
@@ -1823,7 +1639,7 @@ test_analyzeGitHub load =
[ Test.unit "can analyze a public repo (octocat/hello-world)" <| do
(c, _, k) <- load
let user@User {userGitHubToken} = mock_user c
- Analysis {..} <-
+ AnalysisAction {..} <-
analyzeGitHub
k
user
@@ -1832,19 +1648,20 @@ test_analyzeGitHub load =
"octocat"
"hello-world"
url @?= URL "https://github.com/octocat/Hello-World"
- bareRepo @?= depo c <> "/github.com/octocat/Hello-World.git"
+ -- bareRepo @?= depo c <> "/github.com/octocat/Hello-World.git"
+ let Analysis {..} = analysis
length activeAuthors @?= 2
activeAuthors @?= ["hire@spacegho.st", "octocat@github.com"]
blackholes @?= ["README"]
liabilities @?= ["README"]
- fst </ headMay stale @?= Just "README"
+ Map.member "README" stale @?= True
score @?= 20
totalFiles @?= 1
commit @?= Sha "7fd1a60b01f91b314f59955a4e4d4e80d8edf11d",
Test.unit "can analyze a private repo (bsima/biz)" <| do
(c, _, k) <- load
let user@User {userGitHubToken} = mock_user c
- Analysis {..} <-
+ AnalysisAction {..} <-
analyzeGitHub
k
user
@@ -1853,7 +1670,7 @@ test_analyzeGitHub load =
"bsima"
"biz"
url @?= URL "https://github.com/bsima/biz"
- bareRepo @?= depo c <> "/github.com/bsima/biz.git"
+ -- bareRepo @?= depo c <> "/github.com/bsima/biz.git"
]
where
mock_user c =
diff --git a/Biz/Devalloc/Analysis.hs b/Biz/Devalloc/Analysis.hs
new file mode 100644
index 0000000..4b1f297
--- /dev/null
+++ b/Biz/Devalloc/Analysis.hs
@@ -0,0 +1,252 @@
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE QuasiQuotes #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+
+-- : out devalloc-analyze
+module Biz.Devalloc.Analysis
+ ( Analysis (..),
+ Commit (..),
+ run,
+ main,
+ test,
+ git,
+ )
+where
+
+import Alpha
+import qualified Biz.Cli as Cli
+import Biz.Test ((@=?))
+import qualified Biz.Test as Test
+import qualified Control.Concurrent.Async as Async
+import qualified Data.Aeson as Aeson
+import Data.Data (Data)
+import qualified Data.List as List
+import qualified Data.Map as Map
+import qualified Data.String as String
+import qualified Data.Text as Text
+import qualified Data.Time.Clock as Time
+import qualified Data.Time.Format as Time
+import qualified System.Directory as Directory
+import qualified System.Process as Process
+
+main :: IO ()
+main = Cli.main <| Cli.Plan help move test tidy
+
+move :: Cli.Arguments -> IO ()
+move args = gitDir +> run authors /> Aeson.encode +> putStrLn
+ where
+ gitDir =
+ Cli.argument "git-dir"
+ |> Cli.getArgWithDefault args ".git"
+ |> Directory.makeAbsolute
+ authors =
+ -- i think this is not working? do i need optparse-applicative?
+ Cli.shortOption 'a'
+ |> Cli.getAllArgs args
+ |> map Text.pack
+
+tidy :: cfg -> IO ()
+tidy _ = pure ()
+
+test :: Test.Tree
+test = Test.group "Biz.Devalloc.Analysis" [test_calculateScore]
+
+help :: Cli.Docopt
+help =
+ [Cli.docopt|
+devalloc-analyze
+
+Usage:
+ devalloc-analyze test
+ devalloc-analyze [--author=<email>]... <git-dir>
+
+Options:
+ -a, --author List of active authors' emails.
+|]
+
+newtype Commit = Sha Text
+ deriving (Eq, Data, Typeable, Ord, Generic, Show)
+
+instance Aeson.ToJSON Commit
+
+-- | The result of analyzing a git repo.
+data Analysis = Analysis
+ { -- | Where the repo is stored on the local disk.
+ gitDir :: FilePath,
+ -- | A path with no active contributors
+ blackholes :: [Text],
+ -- | A path with < 3 active contributors
+ liabilities :: [Text],
+ -- | Map of path to number of commits, for detecting paths that continually
+ -- get rewritten.
+ hotspotMap :: Map FilePath Integer,
+ -- | Files that have not been touched in 6 months
+ stale :: Map FilePath Integer,
+ -- | Total score for the repo
+ score :: Integer,
+ -- | Total number of files
+ totalFiles :: Integer,
+ -- | The total number of commits
+ totalCommits :: Integer,
+ -- | List of all the active users we care about
+ activeAuthors :: [Text],
+ -- | Which commit this analysis was run against.
+ commit :: Commit
+ }
+ deriving (Eq, Ord, Generic, Show, Data, Typeable)
+
+instance Aeson.ToJSON Analysis
+
+run :: [Text] -> FilePath -> IO Analysis
+run activeAuthors bareRepo = do
+ commit <- git bareRepo ["rev-parse", "HEAD"] /> Text.pack /> chomp /> Sha
+ tree <-
+ git
+ bareRepo
+ [ "ls-tree",
+ "--full-tree",
+ "--name-only",
+ "-r", -- recurse into subtrees
+ "HEAD"
+ ]
+ /> String.lines
+ authors <- traverse (authorsFor bareRepo) tree :: IO [[(Text, Text, Text)]]
+ let authorMap = zip tree authors :: [(FilePath, [(Text, Text, Text)])]
+ stalenessMap <- traverse (lastTouched bareRepo) tree
+ let blackholes =
+ [ Text.pack path
+ | (path, authors_) <- authorMap,
+ null (map third authors_ `List.intersect` activeAuthors)
+ ]
+ let liabilities =
+ [ Text.pack path
+ | (path, authors_) <- authorMap,
+ length (map third authors_ `List.intersect` activeAuthors) < 3
+ ]
+ let numBlackholes = realToFrac <| length blackholes
+ let numLiabilities = realToFrac <| length liabilities
+ let numTotal = realToFrac <| length tree
+ hotspotMap <-
+ Map.fromList </ Async.mapConcurrently getChangeCount tree
+ totalCommits <-
+ git bareRepo ["rev-list", "--count", "HEAD"]
+ /> filter (/= '\n')
+ /> readMaybe
+ /> fromMaybe 0
+ pure
+ <| Analysis
+ { gitDir = bareRepo,
+ stale =
+ Map.fromList
+ <| [ (path, days)
+ | (path, Just days) <- stalenessMap,
+ days > 180
+ ],
+ score = calculateScore numTotal numBlackholes numLiabilities,
+ totalFiles = toInteger <| length tree,
+ ..
+ }
+ where
+ third :: (a, b, c) -> c
+ third (_, _, a) = a
+ getChangeCount :: FilePath -> IO (FilePath, Integer)
+ getChangeCount path =
+ git bareRepo ["rev-list", "--count", "HEAD", "--", path]
+ /> filter (/= '\n')
+ /> readMaybe
+ /> fromMaybe 0
+ /> (path,)
+
+-- | Given a git dir and a path inside the git repo, get information about the
+-- authors.
+authorsFor ::
+ FilePath ->
+ FilePath ->
+ -- | returns (number of commits, author name, author email)
+ IO [(Text, Text, Text)]
+authorsFor gitDir path =
+ Process.readProcess
+ "git"
+ [ "--git-dir",
+ gitDir,
+ "shortlog",
+ "--numbered",
+ "--summary",
+ "--email",
+ "HEAD",
+ "--",
+ path
+ ]
+ ""
+ /> Text.pack
+ /> Text.lines
+ /> map (Text.break (== '\t'))
+ /> map parseAuthor
+ where
+ parseAuthor (commits, author) =
+ ( Text.strip commits,
+ Text.strip <| Text.takeWhile (/= '<') author,
+ Text.strip <| Text.dropAround (`elem` ['<', '>']) <| Text.dropWhile (/= '<') author
+ )
+
+-- | Run a git command on a repo
+git ::
+ -- | path to the git dir (bare repo)
+ String ->
+ -- | args to `git`
+ [String] ->
+ IO String
+git bareRepo args = Process.readProcess "git" (["--git-dir", bareRepo] ++ args) ""
+
+lastTouched :: FilePath -> FilePath -> IO (FilePath, Maybe Integer)
+lastTouched bareRepo path = do
+ now <- Time.getCurrentTime
+ timestamp <-
+ Process.readProcess
+ "git"
+ [ "--git-dir",
+ bareRepo,
+ "log",
+ "-n1",
+ "--pretty=%aI",
+ "--",
+ path
+ ]
+ ""
+ /> filter (/= '\n')
+ /> Time.parseTimeM True Time.defaultTimeLocale "%Y-%m-%dT%H:%M:%S%z"
+ pure (path, calculateAge now </ timestamp)
+ where
+ calculateAge now n = round <| Time.diffUTCTime now n / Time.nominalDay
+
+-- | Does the aggregate score calculation given number of files found to be
+-- blackholes, liabilities, etc.
+calculateScore :: Double -> Double -> Double -> Integer
+calculateScore 0 _ _ = 0
+calculateScore a 0 0 | a > 0 = 100
+calculateScore a b c | a < 0 || b < 0 || c < 0 = 0
+calculateScore numTotal numBlackholes numLiabilities =
+ max 0 <. round
+ <| maxScore
+ * (weightedBlackholes + weightedLiabilities + numGood)
+ / numTotal
+ where
+ weightedBlackholes = numBlackholes * (5 / 10)
+ weightedLiabilities = numLiabilities * (7 / 10)
+ numGood = numTotal - numBlackholes - numLiabilities
+ maxScore = 100.0
+
+test_calculateScore :: Test.Tree
+test_calculateScore =
+ Test.group
+ "calculateScore"
+ [ Test.unit "perfect score" <| 100 @=? calculateScore 100 0 0,
+ Test.unit "all blackholes" <| 50 @=? calculateScore 100 100 0,
+ Test.unit "all liabilities" <| 70 @=? calculateScore 100 0 100,
+ Test.prop "never > 100" <| \t b l -> calculateScore t b l <= 100,
+ Test.prop "never < 0" <| \t b l -> calculateScore t b l >= 0
+ ]