From 1934482e8a5beb628b1e6e9db11f9fd274f174a5 Mon Sep 17 00:00:00 2001 From: Ben Sima Date: Wed, 7 Jul 2021 18:57:04 -0400 Subject: Factor Analysis into own module --- Biz/Devalloc.hs | 341 +++++++++++++------------------------------------------- 1 file changed, 79 insertions(+), 262 deletions(-) (limited to 'Biz/Devalloc.hs') diff --git a/Biz/Devalloc.hs b/Biz/Devalloc.hs index d9cf840..9eea33d 100644 --- a/Biz/Devalloc.hs +++ b/Biz/Devalloc.hs @@ -61,6 +61,8 @@ import Alpha hiding (rem, (<.>)) import qualified Biz.App as App import qualified Biz.Auth as Auth import qualified Biz.Cli as Cli +import Biz.Devalloc.Analysis (Analysis (..), Commit (..)) +import qualified Biz.Devalloc.Analysis as Analysis import qualified Biz.Id as Id import qualified Biz.Log as Log import qualified Biz.Look @@ -89,12 +91,10 @@ import qualified Data.Map as Map import Data.SafeCopy (base, deriveSafeCopy, extension) import qualified Data.SafeCopy as SafeCopy import qualified Data.Set as Set -import qualified Data.String as String import qualified Data.Text as Text import qualified Data.Text.Encoding as Encoding import qualified Data.Time.Calendar as Time import qualified Data.Time.Clock as Time -import qualified Data.Time.Format as Time import Data.Vector (Vector) import qualified Data.Vector as Vector -- import qualified Data.Vector.Algorithms.Intro as Vector @@ -201,6 +201,7 @@ data User0 = User0 } deriving (Eq, Data, Typeable, Ord, Generic, Show) +$(deriveSafeCopy 0 'base ''Id.Id) $(deriveSafeCopy 0 'base ''User0) -- | The main representation of a user. @@ -242,9 +243,6 @@ instance Indexable User where ixFun <| \User {..} -> [userSubscription] ] -newtype Commit = Sha Text - deriving (Eq, Data, Typeable, Ord, Generic, Show) - instance Lucid.ToHtml Commit where toHtmlRaw = Lucid.toHtml toHtml (Sha txt) = Lucid.toHtml txt @@ -269,84 +267,13 @@ data Visibility = Public | Private $(deriveSafeCopy 0 'base ''Visibility) -data Analysis0 = Analysis0 - { analysisId :: Id.Id Analysis0, - url :: URL, - bareRepo :: FilePath, - repoVisibility :: Visibility, - blackholes :: [Text], - liabilities :: [Text], - stale :: [(FilePath, Int)], - score :: Integer, - totalFiles :: Integer, - activeAuthors :: [Text], - commit :: Commit, - askedBy :: Id.Id User - } - deriving (Eq, Ord, Generic, Show, Data, Typeable) - -$(deriveSafeCopy 0 'base ''Analysis0) - --- | The result of analyzing a git repo. -data Analysis = Analysis - { -- | Monotonic incrementing integer id - analysisId :: Id.Id Analysis, - -- | Canonical URL for the repo. I wish this was structured data instead of - -- just Text. - url :: URL, - -- | Where the repo is stored on the local disk. - bareRepo :: FilePath, - -- | If the repo is OSS or not - repoVisibility :: Visibility, - -- | A path with no active contributors - blackholes :: [Text], - -- | A path with < 3 active contributors - liabilities :: [Text], - -- | Map of path to number of commits, for detecting paths that continually - -- get rewritten. - hotspotMap :: Map FilePath Integer, - -- | Files that have not been touched in 6 months - stale :: [(FilePath, Integer)], - -- | Total score for the repo - score :: Integer, - -- | Total number of files - totalFiles :: Integer, - -- | The total number of commits - totalCommits :: Integer, - -- | List of all the active users we care about - activeAuthors :: [Text], - -- | Which commit this analysis was run against. - commit :: Commit, - -- | Who asked for this analysis - askedBy :: Id.Id User - } - deriving (Eq, Ord, Generic, Show, Data, Typeable) - -instance SafeCopy.Migrate Analysis where - type MigrateFrom Analysis = Analysis0 - migrate Analysis0 {..} = - Analysis - { analysisId = Id.mk (Proxy :: Proxy Analysis) <| Id.untag analysisId, - hotspotMap = mempty, - totalCommits = 0, - stale = map (second toInteger) stale, - .. - } - -$(deriveSafeCopy 0 'base ''Id.Id) -$(deriveSafeCopy 0 'base ''Analysis) - instance Indexable Analysis where empty = ixSet - [ ixFun <| \Analysis {..} -> [analysisId], - ixFun <| \Analysis {..} -> [askedBy], - ixFun <| \Analysis {..} -> [url], - ixFun <| \Analysis {..} -> [commit], - ixFun <| \Analysis {..} -> [repoVisibility] + [ ixFun <| \Analysis {..} -> [commit] ] -instance App.HasCss Analysis where +instance App.HasCss AnalysisAction where cssFor _ = do "#analysis > *" ? do Biz.Look.marginAll (rem 2) @@ -399,14 +326,13 @@ instance App.HasCss Analysis where ".bar" ? do Clay.height (px 800) -instance Lucid.ToHtml Analysis where +instance Lucid.ToHtml AnalysisAction where toHtmlRaw = Lucid.toHtml - toHtml Analysis {..} = + toHtml AnalysisAction {..} = Lucid.div_ [Lucid.id_ "analysis"] <| do + let Analysis {..} = analysis Lucid.p_ [Lucid.class_ "analysisFor"] <| do - "Analysis for " - Lucid.a_ [Lucid.href_ <| (\(URL txt) -> txt) <| url] <| do - Lucid.toHtml url + "Analysis for " <> Lucid.toHtml gitDir score_ <| do title_ "Total Score" @@ -488,8 +414,8 @@ instance Lucid.ToHtml Analysis where score_ <| do title_ "Stale files" - quantity_ <| Lucid.toHtml <| slen stale - previewChart <| simpleBar (len stale) totalFiles + quantity_ <| Lucid.toHtml <| tshow <| Map.size stale + previewChart <| simpleBar (Map.size stale) totalFiles Lucid.details_ <| do Lucid.summary_ "Details" desc @@ -505,7 +431,8 @@ instance Lucid.ToHtml Analysis where documentation practices along the way. |] Lucid.ul_ <| do - forM_ stale <| \(path, days) -> + -- probably Map.mapWithKey is better? + forM_ (Map.toList stale) <| \(path, days) -> Lucid.li_ <| Lucid.toHtml <| path <> " (" <> show days <> " days)" Lucid.with score_ [Lucid.id_ "hotspots"] <| do @@ -540,7 +467,7 @@ instance Lucid.ToHtml Analysis where <| Lucid.toHtml <| path <> ": " <> show n <> " commits" where - simpleBar :: Monad m => Integer -> Integer -> Lucid.HtmlT m () + simpleBar :: (Show i, Monad m, Num i) => i -> Integer -> Lucid.HtmlT m () simpleBar n total = do Lucid.table_ [Lucid.class_ "charts-css bar stacked multiple"] <| do Lucid.tr_ <| do @@ -560,12 +487,41 @@ instance Lucid.ToHtml Analysis where desc :: Monad m => Text -> Lucid.HtmlT m () desc = Lucid.p_ <. Cmark.renderNode [] <. Cmark.commonmarkToNode [] +-- | Captures an 'Analysis' with metadata used in the webapp to track who asked +-- it and so on. +data AnalysisAction = AnalysisAction + { -- | Monotonic incrementing integer id + analysisId :: Id.Id AnalysisAction, + -- | Who asked for this analysis + askedBy :: Id.Id User, + -- | Where is this coming from? + url :: URL, + -- | Is the URL publically visible? + repoVisibility :: Visibility, + -- | The actual analaysis + analysis :: Analysis + } + deriving (Eq, Ord, Generic, Show, Data, Typeable) + +$(deriveSafeCopy 0 'base ''Analysis) +$(deriveSafeCopy 0 'base ''AnalysisAction) + +instance Indexable AnalysisAction where + empty = + ixSet + [ ixFun <| \AnalysisAction {..} -> [analysisId], + ixFun <| \AnalysisAction {..} -> [askedBy], + ixFun <| \AnalysisAction {..} -> [url], + ixFun <| \AnalysisAction {..} -> [repoVisibility], + ixFun <| \AnalysisAction {..} -> [commit analysis] + ] + -- | The database. data Keep = Keep { users :: IxSet User, nextUserId :: Id.Id User, - analyses :: IxSet Analysis, - nextAnalysisId :: Id.Id Analysis + analyses :: IxSet AnalysisAction, + nextAnalysisId :: Id.Id AnalysisAction } deriving (Data, Typeable) @@ -603,10 +559,10 @@ getUsers = do Keep {..} <- ask pure <| IxSet.toList users -createAnalysis :: Analysis -> Acid.Update Keep Analysis +createAnalysis :: AnalysisAction -> Acid.Update Keep AnalysisAction createAnalysis a = do keep@Keep {..} <- get - let newAnalysis = a {analysisId = nextAnalysisId} :: Analysis + let newAnalysis = a {analysisId = nextAnalysisId} :: AnalysisAction put <| keep { analyses = IxSet.insert newAnalysis analyses, @@ -614,27 +570,27 @@ createAnalysis a = do } pure newAnalysis -getAnalysisById :: Id.Id Analysis -> Acid.Query Keep (Maybe Analysis) +getAnalysisById :: Id.Id AnalysisAction -> Acid.Query Keep (Maybe AnalysisAction) getAnalysisById id = do Keep {..} <- ask pure <| IxSet.getOne <| analyses @= id -getAllAnalyses :: Acid.Query Keep [Analysis] +getAllAnalyses :: Acid.Query Keep [AnalysisAction] getAllAnalyses = do Keep {..} <- ask pure <| IxSet.toList analyses -getAnalysesByAsker :: User -> Acid.Query Keep [Analysis] +getAnalysesByAsker :: User -> Acid.Query Keep [AnalysisAction] getAnalysesByAsker User {..} = do Keep {..} <- ask pure <| IxSet.toList <| analyses @= userId -getAnalysesByUrl :: URL -> Acid.Query Keep [Analysis] +getAnalysesByUrl :: URL -> Acid.Query Keep [AnalysisAction] getAnalysesByUrl url = do Keep {..} <- ask pure <| IxSet.toList <| analyses @= url -getAnalysisByUrlAndCommit :: URL -> Commit -> Acid.Query Keep (Maybe Analysis) +getAnalysisByUrlAndCommit :: URL -> Commit -> Acid.Query Keep (Maybe AnalysisAction) getAnalysisByUrlAndCommit url sha = do Keep {..} <- ask pure <| IxSet.getOne <| analyses @= url &&& analyses @= sha @@ -727,7 +683,7 @@ mock_ghUser = init :: Keep init = Keep - { nextAnalysisId = Id.mk (Proxy :: Proxy Analysis) 1, + { nextAnalysisId = Id.mk (Proxy :: Proxy AnalysisAction) 1, nextUserId = Id.mk (Proxy :: Proxy User) 1, users = IxSet.empty, analyses = IxSet.empty @@ -817,8 +773,7 @@ test :: Test.Tree test = Test.group "Biz.Devalloc" - [ test_calculateScore, - test_spliceCreds, + [ test_spliceCreds, Test.with (startup True) (\t@(c, _, _) -> shutdown t >> tidy c) @@ -851,6 +806,7 @@ data Config = Config -- and set in .envrc.local -- https://docs.github.com/en/github/authenticating-to-github/creating-a-personal-access-token tokn :: Text, + -- | The example shown on the homepage homeExample :: URL } deriving (Generic, Show) @@ -950,7 +906,7 @@ data Paths path = Paths path :- Auth.Auth '[Auth.Cookie] User :> "analysis" - :> Capture "analysisId" (Id.Id Analysis) + :> Capture "analysisId" (Id.Id AnalysisAction) :> Get '[Lucid.HTML] (App.Html AnalysisDisplay), postAnalysis :: path @@ -1256,7 +1212,7 @@ instance Lucid.ToHtml AdminDashboard where -- | The front page pitch. Eventually I'd like to load the content from markdown -- files or some other store of data so I can A/B test. -data Home = Home Auth.OAuthArgs (Maybe Analysis) +data Home = Home Auth.OAuthArgs (Maybe AnalysisAction) instance App.HasCss Home where cssFor (Home _ mAnalysis) = do @@ -1356,7 +1312,7 @@ instance Lucid.ToHtml Home where h2 = Lucid.h2_ <. markdown exampleWrapper = Lucid.div_ [Lucid.class_ "example"] -data Analyses = Analyses User [Analysis] +data Analyses = Analyses User [AnalysisAction] instance App.HasCss Analyses where cssFor _ = mempty @@ -1373,7 +1329,7 @@ instance Lucid.ToHtml Analyses where [Lucid.linkHref_ "/" <| fieldLink selectRepo] "Analyze one of your repos" Lucid.div_ <| do - forM_ analyses <| \Analysis {..} -> + forM_ analyses <| \AnalysisAction {..} -> Lucid.a_ [ href analysisId, css <| Biz.Look.marginAll (em 1) @@ -1382,7 +1338,7 @@ instance Lucid.ToHtml Analyses where <| do Lucid.div_ <| Lucid.toHtml url Lucid.div_ [css <| Clay.fontSizeCustom Clay.Font.small] - <| Lucid.toHtml commit + <| Lucid.toHtml (commit analysis) footer where href aid = Lucid.linkHref_ "/" <| fieldLink getAnalysis aid @@ -1555,7 +1511,7 @@ instance Web.FromForm SubmitAnalysis -- | I need more information than just 'Analysis' has to render a full, useful -- web page, hence this type. -data AnalysisDisplay = AnalysisDisplay User Analysis +data AnalysisDisplay = AnalysisDisplay User AnalysisAction instance App.HasCss AnalysisDisplay where cssFor (AnalysisDisplay _ analysis) = App.cssFor analysis @@ -1570,164 +1526,24 @@ instance Lucid.ToHtml AnalysisDisplay where footer -- | Run a full analysis on a git repo -analyze :: Acid.AcidState Keep -> Id.Id User -> [Text] -> URL -> FilePath -> Bool -> IO Analysis +analyze :: Acid.AcidState Keep -> Id.Id User -> [Text] -> URL -> FilePath -> Bool -> IO AnalysisAction analyze keep askedBy activeAuthors url bareRepo repoPrivate = do - commit <- Sha \case Just analysis -> pure analysis Nothing -> - runAnalysis bareRepo activeAuthors url askedBy repoPrivate commit + Analysis.run activeAuthors bareRepo + /> ( \a -> + AnalysisAction + { analysisId = mempty, + analysis = a, + repoVisibility = repoPrivate ?: (Private, Public), + .. + } + ) /> CreateAnalysis +> Acid.update keep -runAnalysis :: String -> [Text] -> URL -> Id.Id User -> Bool -> Commit -> IO Analysis -runAnalysis bareRepo activeAuthors url askedBy repoPrivate commit = do - tree <- - git - bareRepo - [ "ls-tree", - "--full-tree", - "--name-only", - "-r", -- recurse into subtrees - "HEAD" - ] - /> String.lines - authors <- traverse (authorsFor bareRepo) tree :: IO [[(Text, Text, Text)]] - let authorMap = zip tree authors :: [(FilePath, [(Text, Text, Text)])] - stalenessMap <- traverse (lastTouched bareRepo) tree - let blackholes = - [ Text.pack path - | (path, authors_) <- authorMap, - null (map third authors_ `List.intersect` activeAuthors) - ] - let liabilities = - [ Text.pack path - | (path, authors_) <- authorMap, - length (map third authors_ `List.intersect` activeAuthors) < 3 - ] - let numBlackholes = realToFrac <| length blackholes - let numLiabilities = realToFrac <| length liabilities - let numTotal = realToFrac <| length tree - hotspotMap <- - Map.fromList filter (/= '\n') - /> readMaybe - /> fromMaybe 0 - pure - <| Analysis - { analysisId = mempty, - stale = - [ (path, days) - | (path, Just days) <- stalenessMap, - days > 180 - ], - score = calculateScore numTotal numBlackholes numLiabilities, - totalFiles = toInteger <| length tree, - repoVisibility = repoPrivate ?: (Private, Public), - .. - } - where - third :: (a, b, c) -> c - third (_, _, a) = a - getChangeCount :: FilePath -> IO (FilePath, Integer) - getChangeCount path = - git bareRepo ["rev-list", "--count", "HEAD", "--", path] - /> filter (/= '\n') - /> readMaybe - /> fromMaybe 0 - /> (path,) - --- | Run a git command on a repo -git :: - -- | path to the git dir (bare repo) - String -> - -- | args to `git` - [String] -> IO String -git bareRepo args = Process.readProcess "git" (["--git-dir", bareRepo] ++ args) "" - --- | Does the aggregate score calculation given number of files found to be --- blackholes, liabilities, etc. -calculateScore :: Double -> Double -> Double -> Integer -calculateScore 0 _ _ = 0 -calculateScore a 0 0 | a > 0 = 100 -calculateScore a b c | a < 0 || b < 0 || c < 0 = 0 -calculateScore numTotal numBlackholes numLiabilities = - max 0 <. round - <| maxScore - * (weightedBlackholes + weightedLiabilities + numGood) - / numTotal - where - weightedBlackholes = numBlackholes * (5 / 10) - weightedLiabilities = numLiabilities * (7 / 10) - numGood = numTotal - numBlackholes - numLiabilities - maxScore = 100.0 - -test_calculateScore :: Test.Tree -test_calculateScore = - Test.group - "calculateScore" - [ Test.unit "perfect score" <| 100 @=? calculateScore 100 0 0, - Test.unit "all blackholes" <| 50 @=? calculateScore 100 100 0, - Test.unit "all liabilities" <| 70 @=? calculateScore 100 0 100, - Test.prop "never > 100" <| \t b l -> calculateScore t b l <= 100, - Test.prop "never < 0" <| \t b l -> calculateScore t b l >= 0 - ] - -lastTouched :: FilePath -> FilePath -> IO (FilePath, Maybe Integer) -lastTouched bareRepo path = do - now <- Time.getCurrentTime - timestamp <- - Process.readProcess - "git" - [ "--git-dir", - bareRepo, - "log", - "-n1", - "--pretty=%aI", - "--", - path - ] - "" - /> filter (/= '\n') - /> Time.parseTimeM True Time.defaultTimeLocale "%Y-%m-%dT%H:%M:%S%z" - pure (path, calculateAge now - FilePath -> - -- | returns (number of commits, author name, author email) - IO [(Text, Text, Text)] -authorsFor gitDir path = - Process.readProcess - "git" - [ "--git-dir", - gitDir, - "shortlog", - "--numbered", - "--summary", - "--email", - "HEAD", - "--", - path - ] - "" - /> Text.pack - /> Text.lines - /> map (Text.break (== '\t')) - /> map parseAuthor - where - parseAuthor (commits, author) = - ( Text.strip commits, - Text.strip <| Text.takeWhile (/= '<') author, - Text.strip <| Text.dropAround (`elem` ['<', '>']) <| Text.dropWhile (/= '<') author - ) - spliceCreds :: User -> Text -> Text spliceCreds User {..} url = scheme <> "//" <> unGitHubHandle userGitHubHandle <> ":" <> userGitHubToken <> "@" <> Text.drop 2 rest @@ -1767,7 +1583,7 @@ analyzeGitHub :: Text -> -- | GitHub repo Text -> - IO Analysis + IO AnalysisAction analyzeGitHub keep user@User {userId} ghAuth depo o r = do activeAuthors <- getPeople @@ -1823,7 +1639,7 @@ test_analyzeGitHub load = [ Test.unit "can analyze a public repo (octocat/hello-world)" <| do (c, _, k) <- load let user@User {userGitHubToken} = mock_user c - Analysis {..} <- + AnalysisAction {..} <- analyzeGitHub k user @@ -1832,19 +1648,20 @@ test_analyzeGitHub load = "octocat" "hello-world" url @?= URL "https://github.com/octocat/Hello-World" - bareRepo @?= depo c <> "/github.com/octocat/Hello-World.git" + -- bareRepo @?= depo c <> "/github.com/octocat/Hello-World.git" + let Analysis {..} = analysis length activeAuthors @?= 2 activeAuthors @?= ["hire@spacegho.st", "octocat@github.com"] blackholes @?= ["README"] liabilities @?= ["README"] - fst "/github.com/bsima/biz.git" + -- bareRepo @?= depo c <> "/github.com/bsima/biz.git" ] where mock_user c = -- cgit v1.2.3