From 9f2d8dbf6e0b5ee9153524601b4e7dec49f308df Mon Sep 17 00:00:00 2001 From: Ben Sima Date: Wed, 23 Dec 2020 23:47:12 -0500 Subject: devalloc: implement github blackhole list --- Biz/Devalloc.hs | 227 +++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 190 insertions(+), 37 deletions(-) (limited to 'Biz') diff --git a/Biz/Devalloc.hs b/Biz/Devalloc.hs index 7cf906a..ec8f870 100644 --- a/Biz/Devalloc.hs +++ b/Biz/Devalloc.hs @@ -29,16 +29,19 @@ -- : dep warp module Biz.Devalloc ( main, + test, ) where -import Alpha +import Alpha hiding ((<.>)) import Biz.App (CSS (..), HtmlApp (..)) import qualified Biz.Look import qualified Clay import qualified Control.Exception as Exception import qualified Data.Aeson as Aeson import qualified Data.ByteString.Lazy as LBS +import qualified Data.List as List +import qualified Data.String as String import qualified Data.Text as Text import qualified Data.Text.Encoding as Encoding import Data.Vector (Vector) @@ -50,12 +53,14 @@ import qualified Lucid.Servant as Lucid import Network.HTTP.Req ((/:), (=:)) import qualified Network.HTTP.Req as Req import qualified Network.Wai as Wai -import Network.Wai.Application.Static (defaultWebAppSettings) import qualified Network.Wai.Handler.Warp as Warp import Network.Wai.Middleware.RequestLogger (logStdout) import Servant import Servant.HTML.Lucid +import qualified System.Directory as Directory import qualified System.Envy as Envy +import System.FilePath ((<.>), ()) +import qualified System.Process as Process import qualified Web.FormUrlEncoded main :: IO () @@ -67,15 +72,20 @@ main = Exception.bracket startup shutdown run putText "@" putText "devalloc" putText <| "port: " <> (show <| port cfg) + putText <| "depo: " <> (Text.pack <| depo cfg) return (cfg, serve (Proxy @AllPaths) <| paths cfg oAuthArgs) shutdown :: (Config, Application) -> IO () shutdown _ = pure () run :: (Config, Wai.Application) -> IO () run (cfg, app) = Warp.run (port cfg) (logStdout app) +test :: IO () +test = test_analyzeGitHub >> pure () + data Config = Config { port :: Warp.Port, - assets :: FilePath + -- | The repo depo! Depository of repositories! + depo :: FilePath } deriving (Generic, Show) @@ -83,7 +93,7 @@ instance Envy.DefConfig Config where defConfig = Config { port = 8005, - assets = "_/bild/dev/static/devalloc.js" + depo = "_/var/devalloc/depo" } instance Envy.FromEnv Config @@ -136,32 +146,30 @@ instance Lucid.ToHtml a => Lucid.ToHtml (HtmlApp a) where -- * paths and pages type AllPaths = - Get '[HTML] (HtmlApp Page) + Get '[HTML] (HtmlApp Home) :<|> "auth" :> "github" :> "callback" :> QueryParam "code" Text - :> Get '[HTML] (HtmlApp Page) - :<|> ShowAnalysis - :<|> ("static" :> Raw) + :> Get '[HTML] (HtmlApp SelectRepo) + :<|> GitHubAnalysis :<|> "css" :> "main.css" :> Get '[CSS] Text allPaths :: Proxy AllPaths allPaths = Proxy :: Proxy AllPaths -type ShowAnalysis = "analysis" :> QueryParam "id" Int :> Get '[HTML] (HtmlApp Analysis) +type GitHubAnalysis = + "analysis" :> "github" + :> Capture "user" Text + :> Capture "repo" Text + :> Get '[HTML] (HtmlApp Analysis) paths :: Config -> OAuthArgs -> Server AllPaths -paths Config {assets} oAuthArgs = +paths cfg oAuthArgs = page (Home oAuthArgs) :<|> auth oAuthArgs - :<|> analyze - :<|> static + :<|> (\user repo -> liftIO <| analyzeGitHub cfg user repo >>= pure . HtmlApp) :<|> look where page = pure . HtmlApp - analyze Nothing = panic "could not analyze this repo" - analyze (Just id) = - pure . HtmlApp <| Analysis {targetRepo = GitHub.mkId (Proxy :: Proxy GitHub.Repo) id} - static = serveDirectoryWith <| defaultWebAppSettings assets look = return . toStrict . Clay.render <| do Biz.Look.fuckingStyle @@ -176,18 +184,12 @@ linkTo :: MkLink path Lucid.Attribute linkTo = Lucid.safeHref_ "/" allPaths -data Page - = Home OAuthArgs - | GitHubRepos (Vector GitHub.Repo) +newtype Home = Home OAuthArgs -instance Lucid.ToHtml Page where +instance Lucid.ToHtml Home where toHtmlRaw = Lucid.toHtml - toHtml page = - Lucid.toHtml <| case page of - Home authArgs -> pitch authArgs - GitHubRepos repos -> do - Lucid.h1_ "Select a repo to analyze" - selectRepo repos + toHtml (Home oAuthArgs) = + Lucid.toHtml <| pitch oAuthArgs data OAuthResponse = OAuthResponse { access_token :: Text, @@ -196,26 +198,34 @@ data OAuthResponse = OAuthResponse } deriving (Generic, Aeson.FromJSON) -auth :: OAuthArgs -> Maybe Text -> Handler (HtmlApp Page) +newtype SelectRepo = SelectRepo (Vector GitHub.Repo) + +instance Lucid.ToHtml SelectRepo where + toHtmlRaw = Lucid.toHtml + toHtml (SelectRepo repos) = + Lucid.toHtml <| do + Lucid.h1_ "Select a repo to analyze" + selectRepo repos + +auth :: OAuthArgs -> Maybe Text -> Handler (HtmlApp SelectRepo) auth _ Nothing = panic "no code from github api" auth OAuthArgs {..} (Just code) = liftIO <| getAccessToken >>= getRepos >>= \case Left err -> panic <| show err - Right repos -> - pure <| HtmlApp <| GitHubRepos repos + Right repos -> pure . HtmlApp <| SelectRepo repos where getRepos oAuthToken = GitHub.github (GitHub.OAuth <| Encoding.encodeUtf8 oAuthToken) (GitHub.currentUserReposR GitHub.RepoPublicityAll GitHub.FetchAll) getAccessToken = - Req.runReq Req.defaultHttpConfig - <| accessTokenRequest + accessTokenRequest >>= Req.responseBody /> access_token /> return + |> Req.runReq Req.defaultHttpConfig accessTokenRequest = Req.req Req.POST @@ -228,7 +238,15 @@ auth OAuthArgs {..} (Just code) = <> "state" =: githubState data Analysis = Analysis - { targetRepo :: GitHub.Id GitHub.Repo + { bareRepo :: FilePath, + -- | A path with no active contributors + blackholes :: [Text], + -- | A path with < 3 active contributors + liabilities :: [Text], + -- | Files that have not been touched in 6 months + stale :: [Text], + -- | Total score for the repo + score :: Int } instance Lucid.ToHtml Analysis where @@ -236,10 +254,145 @@ instance Lucid.ToHtml Analysis where toHtml = Lucid.toHtml . render where render :: Analysis -> Lucid.Html () - render analysis = + render Analysis {..} = Lucid.div_ <| do Lucid.h1_ "Analysis Results" - Lucid.p_ (Lucid.toHtml <| Text.pack <| show <| targetRepo analysis) + Lucid.h3_ "blackholes:" + Lucid.ul_ <| do + mapM_ (Lucid.li_ . Lucid.toHtml) blackholes + +-- | Takes a list of active authors and a path to a bare git repo and runs a +-- regular analysis +analyze :: [Text] -> FilePath -> IO Analysis +analyze activeAuthors bareRepo = do + tree <- + Process.readProcess + "git" + [ "--git-dir", + bareRepo, + "ls-tree", + "--full-tree", + "--name-only", + "-r", -- recurse into subtrees + "HEAD" + ] + "" + /> String.lines + authors <- mapM (authorsFor bareRepo) tree :: IO [[(Text, Text, Text)]] + let authorMap = + zipWith + ( \path authors_ -> + (path, authors_) + ) + tree + authors :: + [(FilePath, [(Text, Text, Text)])] + + return + Analysis + { blackholes = + [ Text.pack path + | (path, authors_) <- authorMap, + length (List.intersect (map third authors_) activeAuthors) < 1 + ], + liabilities = [], + stale = [], -- actually a map of path->staleness + score = 10, + .. + } + +third :: (a, b, c) -> c +third (_, _, a) = a + +-- | Given a git dir and a path inside the git repo, return a list of tuples +-- with number of commits and author. +authorsFor :: + FilePath -> + FilePath -> + -- | Returns (number of commits, author name, author email) + IO [(Text, Text, Text)] +authorsFor gitDir path = do + -- git shortlog writes to stderr for some reason, so we can't just use + -- Process.readProcess + Process.readProcess + "git" + [ "--git-dir", + gitDir, + "shortlog", + "--numbered", + "--summary", + "--email", + "HEAD", + "--", + path + ] + "" + /> Text.pack + /> Text.lines + /> map (Text.break (== '\t')) + /> map + ( \(commits, author) -> + ( Text.strip commits, + Text.strip <| Text.takeWhile (/= '<') author, + Text.strip <| Text.dropAround (`elem` ['<', '>']) <| Text.dropWhile (/= '<') author + ) + ) + +-- | Clones a repo from GitHub and does the analysis. +analyzeGitHub :: + Config -> + -- | GitHub owner + Text -> + -- | GitHub repo + Text -> + IO Analysis +analyzeGitHub cfg o r = do + -- I currently have no way of getting active users... getting a list of + -- collaborators on a repo requires authentication for some reason. + -- + -- If the owner is an organization, then we can just use org members, which is + -- public too. And if the auth'ed user is a member of the org, then it returns + -- all of the members, not just public ones, so that will work just fine. + -- + -- In the meantime, what do? Maybe get the number of commits, and consider + -- "active users" as the top 10% in terms of number of commits? Or ask for a + -- list explicitly? If it is a personal repo, then I can assume that the owner + -- is the only regular contributor, at least for now. + -- + -- Right activeUsers <- GitHub.github () (GitHub.collaboratorsOnR ghOwner ghRepo GitHub.FetchAll) + Right user <- + GitHub.github + () + ( GitHub.userInfoForR + <| GitHub.mkName (Proxy :: Proxy GitHub.User) o + ) + -- assume the only active author is the owner, for now + let activeAuthors = [require "user email" <| GitHub.userName user] + Right repo <- GitHub.github () (GitHub.repositoryR ghOwner ghRepo) + bareRepo <- gitBareClone cfg . GitHub.getUrl <| GitHub.repoHtmlUrl repo + analyze activeAuthors bareRepo + where + ghOwner = GitHub.mkName (Proxy :: Proxy GitHub.Owner) o + ghRepo = GitHub.mkName (Proxy :: Proxy GitHub.Repo) r + +test_analyzeGitHub :: IO Analysis +test_analyzeGitHub = analyzeGitHub Envy.defConfig "bsima" "bin" + +-- | Clone the repo to /var/devalloc/repos/, return the full path to the +-- local repo. +gitBareClone :: Config -> Text -> IO FilePath +gitBareClone Config {depo} url = do + worktreeExists <- Directory.doesPathExist worktree + let args = + if worktreeExists + then ["--git-dir", worktree, "fetch", "origin"] + else ["clone", "--bare", "--", Text.unpack url, worktree] + Process.callProcess "git" args + return worktree + where + removeScheme :: Text -> FilePath + removeScheme u = Text.unpack <. Text.dropWhile (== '/') <. snd <| Text.breakOn "//" u + worktree = depo removeScheme url <.> "git" -- * parts @@ -257,8 +410,9 @@ selectRepo = Lucid.ul_ . mapM_ render . Vector.toList Lucid.li_ . Lucid.a_ [ linkTo - (Proxy :: Proxy ShowAnalysis) - (Just <| GitHub.untagId <| GitHub.repoId repo) + (Proxy :: Proxy GitHubAnalysis) + (GitHub.untagName <| GitHub.simpleOwnerLogin <| GitHub.repoOwner repo) + (GitHub.untagName <| GitHub.repoName repo) ] . Lucid.toHtml . GitHub.untagName @@ -272,7 +426,6 @@ loginButton OAuthArgs {..} = <> encodeParams [ ("client_id", githubClientId), ("state", githubState) - -- ("redirect_uri", "https://devalloc.io") ] ] "Get Started with GitHub" -- cgit v1.2.3