summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Sima <ben@bsima.me>2021-01-19 18:52:41 -0500
committerBen Sima <ben@bsima.me>2021-01-22 08:00:27 -0500
commit92d3ab89ffd6e65d5a79de0cf5907aad3786ce44 (patch)
tree29fdaa8a40f6a31f6298f15673a60e9e356c0239
parente9578a7bb7c116cf71aea0313e64e64abb650f9e (diff)
Persist Analysis runs
Keep Analysis runs, with a unique id, and index them based on ID, which user asked for the analysis, and commit sha.
-rw-r--r--Biz/Devalloc.hs255
-rw-r--r--Biz/Id.hs42
2 files changed, 207 insertions, 90 deletions
diff --git a/Biz/Devalloc.hs b/Biz/Devalloc.hs
index a33b167..4645a2a 100644
--- a/Biz/Devalloc.hs
+++ b/Biz/Devalloc.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveDataTypeable #-}
@@ -10,6 +11,7 @@
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TemplateHaskell #-}
+{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE NoImplicitPrelude #-}
@@ -50,6 +52,7 @@ where
import Alpha hiding (rem, (<.>))
import Biz.App (CSS (..), HtmlApp (..))
import qualified Biz.Cli as Cli
+import qualified Biz.Id as Id
import qualified Biz.Look
import Biz.Test ((@=?))
import qualified Biz.Test as Test
@@ -64,7 +67,7 @@ import qualified Data.Acid as Acid
import qualified Data.Aeson as Aeson
import qualified Data.ByteString.Lazy as LBS
import Data.Data (Data, Typeable)
-import Data.IxSet (Indexable (..), IxSet, ixFun, ixSet, (@=))
+import Data.IxSet (Indexable (..), IxSet, ixFun, ixSet, (&&&), (@=))
import qualified Data.IxSet as IxSet
import qualified Data.List as List
import Data.SafeCopy (base, deriveSafeCopy)
@@ -154,20 +157,68 @@ instance Indexable User where
ixFun <| \u -> [userGitHubId u]
]
--- | The database.
-newtype Keep = Keep {users :: IxSet User}
- deriving (Data, Typeable)
+newtype Commit = Sha Text
+ deriving (Eq, Data, Typeable, Ord, Generic, Show)
-instance Semigroup Keep where
- a <> b = Keep <| users a <> users b
+$(deriveSafeCopy 0 'base ''Commit)
+
+newtype URL = URL Text
+ deriving (Eq, Data, Typeable, Ord, Generic, Show)
-instance Monoid Keep where
- mempty = Keep <| mempty []
+$(deriveSafeCopy 0 'base ''URL)
+
+-- | The result of analyzing a git repo.
+data Analysis = Analysis
+ { -- | Monotonic incrementing integer id
+ analysisId :: Id.Id Analysis,
+ -- | Canonical URL for the repo. I wish this was structured data instead of
+ -- just Text.
+ url :: URL,
+ -- | Where the repo is stored on the local disk.
+ bareRepo :: FilePath,
+ -- | A path with no active contributors
+ blackholes :: [Text],
+ -- | A path with < 3 active contributors
+ liabilities :: [Text],
+ -- | Files that have not been touched in 6 months
+ stale :: [(FilePath, Int)],
+ -- | Total score for the repo
+ score :: Integer,
+ -- | Total number of files
+ totalFiles :: Integer,
+ -- | List of all the active users we care about
+ activeAuthors :: [Text],
+ -- | Which commit this analysis was run against.
+ commit :: Commit,
+ -- | Who asked for this analysis
+ askedBy :: User
+ }
+ deriving (Eq, Ord, Generic, Show, Data, Typeable)
+
+$(deriveSafeCopy 0 'base ''Id.Id)
+$(deriveSafeCopy 0 'base ''Analysis)
+
+instance Indexable Analysis where
+ empty =
+ ixSet
+ [ ixFun <| \a -> [analysisId a],
+ ixFun <| \a -> [askedBy a],
+ ixFun <| \a -> [url a],
+ ixFun <| \a -> [commit a]
+ ]
+
+-- | The database.
+data Keep = Keep
+ { users :: IxSet User,
+ analyses :: IxSet Analysis,
+ nextAnalysisId :: Id.Id Analysis
+ }
+ deriving (Data, Typeable)
$(deriveSafeCopy 0 'base ''Keep)
-newUser :: User -> Acid.Update Keep User
-newUser u = do
+createUser :: User -> Acid.Update Keep User
+createUser u = do
keep <- get
put <| keep {users = IxSet.insert u (users keep)}
return u
@@ -183,7 +234,38 @@ getUserByEmail email = do
Keep {..} <- ask
return <| IxSet.getOne <| users @= email
-$(makeAcidic ''Keep ['newUser, 'updateUser, 'getUserByEmail])
+createAnalysis :: Analysis -> Acid.Update Keep Analysis
+createAnalysis a = do
+ keep@Keep {..} <- get
+ put
+ <| keep
+ { analyses = IxSet.insert a analyses,
+ nextAnalysisId =
+ succ
+ nextAnalysisId
+ }
+ return a
+
+getAnalysesByAsker :: User -> Acid.Query Keep [Analysis]
+getAnalysesByAsker user = do
+ Keep {..} <- ask
+ return <| IxSet.toList <| analyses @= user
+
+getAnalysisByUrlAndCommit :: URL -> Commit -> Acid.Query Keep (Maybe Analysis)
+getAnalysisByUrlAndCommit url sha = do
+ Keep {..} <- ask
+ return <| IxSet.getOne <| (analyses @= url &&& analyses @= sha)
+
+$( makeAcidic
+ ''Keep
+ [ 'createUser,
+ 'updateUser,
+ 'getUserByEmail,
+ 'createAnalysis,
+ 'getAnalysesByAsker,
+ 'getAnalysisByUrlAndCommit
+ ]
+ )
upsertGitHubUser :: Acid.AcidState Keep -> ByteString -> GitHub.User -> IO User
upsertGitHubUser keep tok ghUser = case GitHub.userEmail ghUser of
@@ -196,13 +278,21 @@ upsertGitHubUser keep tok ghUser = case GitHub.userEmail ghUser of
Acid.update keep <| UpdateUser user {userGitHubToken = Encoding.decodeUtf8 tok}
Nothing ->
Acid.update keep
- <| NewUser
+ <| CreateUser
User
{ userEmail = UserEmail email,
userGitHubId = GitHubId <. GitHub.untagId <| GitHub.userId ghUser,
userGitHubToken = Encoding.decodeUtf8 tok
}
+init :: Keep
+init =
+ Keep
+ { nextAnalysisId = Id.mk (Proxy :: Proxy Analysis) 1,
+ users = IxSet.empty,
+ analyses = IxSet.empty
+ }
+
-- * main and test
main :: IO ()
@@ -224,7 +314,7 @@ move _ = Exception.bracket startup shutdown run
startup = do
cfg <- Envy.decodeWithDefaults Envy.defConfig
oAuthArgs <- Envy.decodeWithDefaults Envy.defConfig
- kp <- Acid.openLocalStateFrom (keep cfg) mempty :: IO (Acid.AcidState Keep)
+ kp <- Acid.openLocalStateFrom (keep cfg) init :: IO (Acid.AcidState Keep)
jwk <- Auth.generateKey
putText "@"
putText "devalloc"
@@ -442,7 +532,8 @@ htmlApp cooks kp cfg jwk oAuthArgs =
Auth.Authenticated user -> \owner repo ->
liftIO
<| analyzeGitHub
- (GitHub.OAuth <. Encoding.encodeUtf8 <| userGitHubToken user)
+ kp
+ user
cfg
owner
repo
@@ -762,25 +853,6 @@ instance Lucid.ToHtml AnalysisDisplay where
Lucid.main_ <| Lucid.toHtml anal
footer
--- | The result of analyzing a git repo.
-data Analysis = Analysis
- { -- | Where the repo is stored on the local disk.
- bareRepo :: FilePath,
- -- | A path with no active contributors
- blackholes :: [Text],
- -- | A path with < 3 active contributors
- liabilities :: [Text],
- -- | Files that have not been touched in 6 months
- stale :: [(FilePath, Int)],
- -- | Total score for the repo
- score :: Integer,
- -- | Total number of files
- totalFiles :: Integer,
- -- | List of all the active users we care about
- activeAuthors :: [Text]
- }
- deriving (Show)
-
instance Lucid.ToHtml Analysis where
toHtmlRaw = Lucid.toHtml
toHtml = render .> Lucid.toHtml
@@ -806,58 +878,61 @@ instance Lucid.ToHtml Analysis where
-- | Takes a list of active authors and a path to a bare git repo and runs a
-- regular analysis
-analyze :: [Text] -> FilePath -> IO Analysis
-analyze activeAuthors bareRepo = do
- tree <-
- Process.readProcess
- "git"
- [ "--git-dir",
- bareRepo,
- "ls-tree",
- "--full-tree",
- "--name-only",
- "-r", -- recurse into subtrees
- "HEAD"
- ]
- ""
- /> String.lines
- authors <- mapM (authorsFor bareRepo) tree :: IO [[(Text, Text, Text)]]
- let authorMap =
- zipWith
- ( \path authors_ ->
- (path, authors_)
- )
- tree
- authors ::
- [(FilePath, [(Text, Text, Text)])]
- stalenessMap <- mapM (lastTouched bareRepo) tree
- let blackholes =
- [ Text.pack path
- | (path, authors_) <- authorMap,
- null (map third authors_ `List.intersect` activeAuthors)
- ]
- let liabilities =
- [ Text.pack path
- | (path, authors_) <- authorMap,
- length (map third authors_ `List.intersect` activeAuthors) < 3
- ]
- let numBlackholes = realToFrac <| length blackholes
- let numLiabilities = realToFrac <| length liabilities
- let numTotal = realToFrac <| length tree
- return
- Analysis
- { stale =
- [ (path, days)
- | (path, days) <- stalenessMap,
- days > 180
- ],
- score = calculateScore numTotal numBlackholes numLiabilities,
- totalFiles = toInteger <| length tree,
- ..
- }
+analyze :: Acid.AcidState Keep -> User -> [Text] -> URL -> FilePath -> IO Analysis
+analyze keep askedBy activeAuthors url bareRepo = do
+ commit <- Sha </ Text.strip </ Text.pack </ git ["log", "-n1", "--format=%H"]
+ Acid.query keep (GetAnalysisByUrlAndCommit url commit) >>= \case
+ Just analysis -> return analysis
+ Nothing -> do
+ tree <-
+ git
+ [ "ls-tree",
+ "--full-tree",
+ "--name-only",
+ "-r", -- recurse into subtrees
+ "HEAD"
+ ]
+ /> String.lines
+ authors <- mapM (authorsFor bareRepo) tree :: IO [[(Text, Text, Text)]]
+ let authorMap =
+ zipWith
+ ( \path authors_ ->
+ (path, authors_)
+ )
+ tree
+ authors ::
+ [(FilePath, [(Text, Text, Text)])]
+ stalenessMap <- mapM (lastTouched bareRepo) tree
+ let blackholes =
+ [ Text.pack path
+ | (path, authors_) <- authorMap,
+ null (map third authors_ `List.intersect` activeAuthors)
+ ]
+ let liabilities =
+ [ Text.pack path
+ | (path, authors_) <- authorMap,
+ length (map third authors_ `List.intersect` activeAuthors) < 3
+ ]
+ let numBlackholes = realToFrac <| length blackholes
+ let numLiabilities = realToFrac <| length liabilities
+ let numTotal = realToFrac <| length tree
+ Acid.update keep
+ <| CreateAnalysis
+ <| Analysis
+ { analysisId = Id.Id 0, -- make this mempty?
+ stale =
+ [ (path, days)
+ | (path, days) <- stalenessMap,
+ days > 180
+ ],
+ score = calculateScore numTotal numBlackholes numLiabilities,
+ totalFiles = toInteger <| length tree,
+ ..
+ }
where
third :: (a, b, c) -> c
third (_, _, a) = a
+ git args = Process.readProcess "git" (["--git-dir", bareRepo] ++ args) ""
calculateScore :: Double -> Double -> Double -> Integer
calculateScore numTotal numBlackholes numLiabilities =
@@ -909,8 +984,6 @@ authorsFor ::
-- | Returns (number of commits, author name, author email)
IO [(Text, Text, Text)]
authorsFor gitDir path =
- -- git shortlog writes to stderr for some reason, so we can't just use
- -- Process.readProcess
Process.readProcess
"git"
[ "--git-dir",
@@ -938,15 +1011,16 @@ authorsFor gitDir path =
-- | Clones a repo from GitHub and does the analysis.
-- TODO: break this up into fetchGitHub and analyze functions.
analyzeGitHub ::
- GitHub.AuthMethod authMethod =>
- authMethod ->
+ Acid.AcidState Keep ->
+ User ->
Config ->
-- | GitHub owner
Text ->
-- | GitHub repo
Text ->
IO Analysis
-analyzeGitHub githubAuth cfg o r = do
+analyzeGitHub keep user cfg o r = do
+ let githubAuth = GitHub.OAuth <. Encoding.encodeUtf8 <| userGitHubToken user
-- I currently have no way of getting active users... getting a list of
-- collaborators on a repo requires authentication for some reason.
--
@@ -960,7 +1034,7 @@ analyzeGitHub githubAuth cfg o r = do
-- is the only regular contributor, at least for now.
--
-- Right activeUsers <- GitHub.github () (GitHub.collaboratorsOnR ghOwner ghRepo GitHub.FetchAll)
- Right user <-
+ Right ghuser <-
GitHub.github
githubAuth
( GitHub.userInfoForR
@@ -968,13 +1042,14 @@ analyzeGitHub githubAuth cfg o r = do
)
-- assume the only active author is the owner, for now
-- TODO: should be userEmail but that requires authentication?
- let activeAuthors = [require "user email" <| GitHub.userName user]
+ let activeAuthors = [require "user email" <| GitHub.userName ghuser]
eRepo <- GitHub.github githubAuth (GitHub.repositoryR ghOwner ghRepo)
case eRepo of
Left err -> throwIO <| toException err
Right repo -> do
+ let GitHub.URL url = GitHub.repoUrl repo
bareRepo <- fetchBareRepo cfg <. GitHub.getUrl <| GitHub.repoHtmlUrl repo
- analyze activeAuthors bareRepo
+ analyze keep user activeAuthors (URL url) bareRepo
where
ghOwner = GitHub.mkName (Proxy :: Proxy GitHub.Owner) o
ghRepo = GitHub.mkName (Proxy :: Proxy GitHub.Repo) r
diff --git a/Biz/Id.hs b/Biz/Id.hs
new file mode 100644
index 0000000..89c74bf
--- /dev/null
+++ b/Biz/Id.hs
@@ -0,0 +1,42 @@
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+
+-- Integer-based identifier.
+module Biz.Id
+ ( Id (..),
+ mk,
+ untag,
+ )
+where
+
+import Alpha
+import Data.Aeson (FromJSON (..), ToJSON (..))
+import Data.Binary (Binary)
+import Data.Data (Data)
+
+newtype Id entity = Id Int
+ deriving (Eq, Ord, Show, Generic, Typeable, Data)
+
+mk :: proxy entity -> Int -> Id entity
+mk _ = Id
+
+untag :: Id entity -> Int
+untag (Id i) = i
+
+instance Hashable (Id entity)
+
+instance Binary (Id entity)
+
+instance Enum (Id entity) where
+ toEnum n = mk (Proxy :: Proxy entity) n
+ fromEnum = untag
+
+instance NFData (Id entity) where
+ rnf (Id s) = rnf s
+
+instance FromJSON (Id entity) where
+ parseJSON = fmap Id <. parseJSON
+
+instance ToJSON (Id entity) where
+ toJSON = toJSON <. untag