From 92d3ab89ffd6e65d5a79de0cf5907aad3786ce44 Mon Sep 17 00:00:00 2001 From: Ben Sima Date: Tue, 19 Jan 2021 18:52:41 -0500 Subject: Persist Analysis runs Keep Analysis runs, with a unique id, and index them based on ID, which user asked for the analysis, and commit sha. --- Biz/Devalloc.hs | 255 ++++++++++++++++++++++++++++++++++++-------------------- Biz/Id.hs | 42 ++++++++++ 2 files changed, 207 insertions(+), 90 deletions(-) create mode 100644 Biz/Id.hs (limited to 'Biz') diff --git a/Biz/Devalloc.hs b/Biz/Devalloc.hs index a33b167..4645a2a 100644 --- a/Biz/Devalloc.hs +++ b/Biz/Devalloc.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DeriveDataTypeable #-} @@ -10,6 +11,7 @@ {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE NoImplicitPrelude #-} @@ -50,6 +52,7 @@ where import Alpha hiding (rem, (<.>)) import Biz.App (CSS (..), HtmlApp (..)) import qualified Biz.Cli as Cli +import qualified Biz.Id as Id import qualified Biz.Look import Biz.Test ((@=?)) import qualified Biz.Test as Test @@ -64,7 +67,7 @@ import qualified Data.Acid as Acid import qualified Data.Aeson as Aeson import qualified Data.ByteString.Lazy as LBS import Data.Data (Data, Typeable) -import Data.IxSet (Indexable (..), IxSet, ixFun, ixSet, (@=)) +import Data.IxSet (Indexable (..), IxSet, ixFun, ixSet, (&&&), (@=)) import qualified Data.IxSet as IxSet import qualified Data.List as List import Data.SafeCopy (base, deriveSafeCopy) @@ -154,20 +157,68 @@ instance Indexable User where ixFun <| \u -> [userGitHubId u] ] --- | The database. -newtype Keep = Keep {users :: IxSet User} - deriving (Data, Typeable) +newtype Commit = Sha Text + deriving (Eq, Data, Typeable, Ord, Generic, Show) -instance Semigroup Keep where - a <> b = Keep <| users a <> users b +$(deriveSafeCopy 0 'base ''Commit) + +newtype URL = URL Text + deriving (Eq, Data, Typeable, Ord, Generic, Show) -instance Monoid Keep where - mempty = Keep <| mempty [] +$(deriveSafeCopy 0 'base ''URL) + +-- | The result of analyzing a git repo. +data Analysis = Analysis + { -- | Monotonic incrementing integer id + analysisId :: Id.Id Analysis, + -- | Canonical URL for the repo. I wish this was structured data instead of + -- just Text. + url :: URL, + -- | Where the repo is stored on the local disk. + bareRepo :: FilePath, + -- | A path with no active contributors + blackholes :: [Text], + -- | A path with < 3 active contributors + liabilities :: [Text], + -- | Files that have not been touched in 6 months + stale :: [(FilePath, Int)], + -- | Total score for the repo + score :: Integer, + -- | Total number of files + totalFiles :: Integer, + -- | List of all the active users we care about + activeAuthors :: [Text], + -- | Which commit this analysis was run against. + commit :: Commit, + -- | Who asked for this analysis + askedBy :: User + } + deriving (Eq, Ord, Generic, Show, Data, Typeable) + +$(deriveSafeCopy 0 'base ''Id.Id) +$(deriveSafeCopy 0 'base ''Analysis) + +instance Indexable Analysis where + empty = + ixSet + [ ixFun <| \a -> [analysisId a], + ixFun <| \a -> [askedBy a], + ixFun <| \a -> [url a], + ixFun <| \a -> [commit a] + ] + +-- | The database. +data Keep = Keep + { users :: IxSet User, + analyses :: IxSet Analysis, + nextAnalysisId :: Id.Id Analysis + } + deriving (Data, Typeable) $(deriveSafeCopy 0 'base ''Keep) -newUser :: User -> Acid.Update Keep User -newUser u = do +createUser :: User -> Acid.Update Keep User +createUser u = do keep <- get put <| keep {users = IxSet.insert u (users keep)} return u @@ -183,7 +234,38 @@ getUserByEmail email = do Keep {..} <- ask return <| IxSet.getOne <| users @= email -$(makeAcidic ''Keep ['newUser, 'updateUser, 'getUserByEmail]) +createAnalysis :: Analysis -> Acid.Update Keep Analysis +createAnalysis a = do + keep@Keep {..} <- get + put + <| keep + { analyses = IxSet.insert a analyses, + nextAnalysisId = + succ + nextAnalysisId + } + return a + +getAnalysesByAsker :: User -> Acid.Query Keep [Analysis] +getAnalysesByAsker user = do + Keep {..} <- ask + return <| IxSet.toList <| analyses @= user + +getAnalysisByUrlAndCommit :: URL -> Commit -> Acid.Query Keep (Maybe Analysis) +getAnalysisByUrlAndCommit url sha = do + Keep {..} <- ask + return <| IxSet.getOne <| (analyses @= url &&& analyses @= sha) + +$( makeAcidic + ''Keep + [ 'createUser, + 'updateUser, + 'getUserByEmail, + 'createAnalysis, + 'getAnalysesByAsker, + 'getAnalysisByUrlAndCommit + ] + ) upsertGitHubUser :: Acid.AcidState Keep -> ByteString -> GitHub.User -> IO User upsertGitHubUser keep tok ghUser = case GitHub.userEmail ghUser of @@ -196,13 +278,21 @@ upsertGitHubUser keep tok ghUser = case GitHub.userEmail ghUser of Acid.update keep <| UpdateUser user {userGitHubToken = Encoding.decodeUtf8 tok} Nothing -> Acid.update keep - <| NewUser + <| CreateUser User { userEmail = UserEmail email, userGitHubId = GitHubId <. GitHub.untagId <| GitHub.userId ghUser, userGitHubToken = Encoding.decodeUtf8 tok } +init :: Keep +init = + Keep + { nextAnalysisId = Id.mk (Proxy :: Proxy Analysis) 1, + users = IxSet.empty, + analyses = IxSet.empty + } + -- * main and test main :: IO () @@ -224,7 +314,7 @@ move _ = Exception.bracket startup shutdown run startup = do cfg <- Envy.decodeWithDefaults Envy.defConfig oAuthArgs <- Envy.decodeWithDefaults Envy.defConfig - kp <- Acid.openLocalStateFrom (keep cfg) mempty :: IO (Acid.AcidState Keep) + kp <- Acid.openLocalStateFrom (keep cfg) init :: IO (Acid.AcidState Keep) jwk <- Auth.generateKey putText "@" putText "devalloc" @@ -442,7 +532,8 @@ htmlApp cooks kp cfg jwk oAuthArgs = Auth.Authenticated user -> \owner repo -> liftIO <| analyzeGitHub - (GitHub.OAuth <. Encoding.encodeUtf8 <| userGitHubToken user) + kp + user cfg owner repo @@ -762,25 +853,6 @@ instance Lucid.ToHtml AnalysisDisplay where Lucid.main_ <| Lucid.toHtml anal footer --- | The result of analyzing a git repo. -data Analysis = Analysis - { -- | Where the repo is stored on the local disk. - bareRepo :: FilePath, - -- | A path with no active contributors - blackholes :: [Text], - -- | A path with < 3 active contributors - liabilities :: [Text], - -- | Files that have not been touched in 6 months - stale :: [(FilePath, Int)], - -- | Total score for the repo - score :: Integer, - -- | Total number of files - totalFiles :: Integer, - -- | List of all the active users we care about - activeAuthors :: [Text] - } - deriving (Show) - instance Lucid.ToHtml Analysis where toHtmlRaw = Lucid.toHtml toHtml = render .> Lucid.toHtml @@ -806,58 +878,61 @@ instance Lucid.ToHtml Analysis where -- | Takes a list of active authors and a path to a bare git repo and runs a -- regular analysis -analyze :: [Text] -> FilePath -> IO Analysis -analyze activeAuthors bareRepo = do - tree <- - Process.readProcess - "git" - [ "--git-dir", - bareRepo, - "ls-tree", - "--full-tree", - "--name-only", - "-r", -- recurse into subtrees - "HEAD" - ] - "" - /> String.lines - authors <- mapM (authorsFor bareRepo) tree :: IO [[(Text, Text, Text)]] - let authorMap = - zipWith - ( \path authors_ -> - (path, authors_) - ) - tree - authors :: - [(FilePath, [(Text, Text, Text)])] - stalenessMap <- mapM (lastTouched bareRepo) tree - let blackholes = - [ Text.pack path - | (path, authors_) <- authorMap, - null (map third authors_ `List.intersect` activeAuthors) - ] - let liabilities = - [ Text.pack path - | (path, authors_) <- authorMap, - length (map third authors_ `List.intersect` activeAuthors) < 3 - ] - let numBlackholes = realToFrac <| length blackholes - let numLiabilities = realToFrac <| length liabilities - let numTotal = realToFrac <| length tree - return - Analysis - { stale = - [ (path, days) - | (path, days) <- stalenessMap, - days > 180 - ], - score = calculateScore numTotal numBlackholes numLiabilities, - totalFiles = toInteger <| length tree, - .. - } +analyze :: Acid.AcidState Keep -> User -> [Text] -> URL -> FilePath -> IO Analysis +analyze keep askedBy activeAuthors url bareRepo = do + commit <- Sha >= \case + Just analysis -> return analysis + Nothing -> do + tree <- + git + [ "ls-tree", + "--full-tree", + "--name-only", + "-r", -- recurse into subtrees + "HEAD" + ] + /> String.lines + authors <- mapM (authorsFor bareRepo) tree :: IO [[(Text, Text, Text)]] + let authorMap = + zipWith + ( \path authors_ -> + (path, authors_) + ) + tree + authors :: + [(FilePath, [(Text, Text, Text)])] + stalenessMap <- mapM (lastTouched bareRepo) tree + let blackholes = + [ Text.pack path + | (path, authors_) <- authorMap, + null (map third authors_ `List.intersect` activeAuthors) + ] + let liabilities = + [ Text.pack path + | (path, authors_) <- authorMap, + length (map third authors_ `List.intersect` activeAuthors) < 3 + ] + let numBlackholes = realToFrac <| length blackholes + let numLiabilities = realToFrac <| length liabilities + let numTotal = realToFrac <| length tree + Acid.update keep + <| CreateAnalysis + <| Analysis + { analysisId = Id.Id 0, -- make this mempty? + stale = + [ (path, days) + | (path, days) <- stalenessMap, + days > 180 + ], + score = calculateScore numTotal numBlackholes numLiabilities, + totalFiles = toInteger <| length tree, + .. + } where third :: (a, b, c) -> c third (_, _, a) = a + git args = Process.readProcess "git" (["--git-dir", bareRepo] ++ args) "" calculateScore :: Double -> Double -> Double -> Integer calculateScore numTotal numBlackholes numLiabilities = @@ -909,8 +984,6 @@ authorsFor :: -- | Returns (number of commits, author name, author email) IO [(Text, Text, Text)] authorsFor gitDir path = - -- git shortlog writes to stderr for some reason, so we can't just use - -- Process.readProcess Process.readProcess "git" [ "--git-dir", @@ -938,15 +1011,16 @@ authorsFor gitDir path = -- | Clones a repo from GitHub and does the analysis. -- TODO: break this up into fetchGitHub and analyze functions. analyzeGitHub :: - GitHub.AuthMethod authMethod => - authMethod -> + Acid.AcidState Keep -> + User -> Config -> -- | GitHub owner Text -> -- | GitHub repo Text -> IO Analysis -analyzeGitHub githubAuth cfg o r = do +analyzeGitHub keep user cfg o r = do + let githubAuth = GitHub.OAuth <. Encoding.encodeUtf8 <| userGitHubToken user -- I currently have no way of getting active users... getting a list of -- collaborators on a repo requires authentication for some reason. -- @@ -960,7 +1034,7 @@ analyzeGitHub githubAuth cfg o r = do -- is the only regular contributor, at least for now. -- -- Right activeUsers <- GitHub.github () (GitHub.collaboratorsOnR ghOwner ghRepo GitHub.FetchAll) - Right user <- + Right ghuser <- GitHub.github githubAuth ( GitHub.userInfoForR @@ -968,13 +1042,14 @@ analyzeGitHub githubAuth cfg o r = do ) -- assume the only active author is the owner, for now -- TODO: should be userEmail but that requires authentication? - let activeAuthors = [require "user email" <| GitHub.userName user] + let activeAuthors = [require "user email" <| GitHub.userName ghuser] eRepo <- GitHub.github githubAuth (GitHub.repositoryR ghOwner ghRepo) case eRepo of Left err -> throwIO <| toException err Right repo -> do + let GitHub.URL url = GitHub.repoUrl repo bareRepo <- fetchBareRepo cfg <. GitHub.getUrl <| GitHub.repoHtmlUrl repo - analyze activeAuthors bareRepo + analyze keep user activeAuthors (URL url) bareRepo where ghOwner = GitHub.mkName (Proxy :: Proxy GitHub.Owner) o ghRepo = GitHub.mkName (Proxy :: Proxy GitHub.Repo) r diff --git a/Biz/Id.hs b/Biz/Id.hs new file mode 100644 index 0000000..89c74bf --- /dev/null +++ b/Biz/Id.hs @@ -0,0 +1,42 @@ +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE NoImplicitPrelude #-} + +-- Integer-based identifier. +module Biz.Id + ( Id (..), + mk, + untag, + ) +where + +import Alpha +import Data.Aeson (FromJSON (..), ToJSON (..)) +import Data.Binary (Binary) +import Data.Data (Data) + +newtype Id entity = Id Int + deriving (Eq, Ord, Show, Generic, Typeable, Data) + +mk :: proxy entity -> Int -> Id entity +mk _ = Id + +untag :: Id entity -> Int +untag (Id i) = i + +instance Hashable (Id entity) + +instance Binary (Id entity) + +instance Enum (Id entity) where + toEnum n = mk (Proxy :: Proxy entity) n + fromEnum = untag + +instance NFData (Id entity) where + rnf (Id s) = rnf s + +instance FromJSON (Id entity) where + parseJSON = fmap Id <. parseJSON + +instance ToJSON (Id entity) where + toJSON = toJSON <. untag -- cgit v1.2.3