From edb55ff54b9f81c0942f4120eedd72357d7b3d7c Mon Sep 17 00:00:00 2001 From: Ben Sima Date: Mon, 7 Aug 2023 14:03:07 -0400 Subject: Working nixified python build This represents quite a few evenings of hacking. It doesn't build all of my Python code, because my Python code is not up to snuff, but it builds the examples and pulls in third party dependencies. Some design points: - I'm using buildPythonApplication in Builder.nix because it was getting way too annoying to wrap the Python script and set PYTHONPATH myself. Easier and more robust to just use the upstream nix builder - Because of this, I had to generate a setup.py. Maybe switch to pyproject.toml in the future, whatever. - Also because of this, Target.wrapper is becoming redundant. I'll just remove it when I get Guile built in nix. - Biz/Bild.nix is getting messy and could use a refactor. - In Builder.nix, I worked around the empty directories bug by just finding and deleting empty directories after unpacking. If its stupid but works it ain't stupid! - I had to touch __init__.py files in all directories before building. Annoying! - `repl` just works, which is awesome - To ensure good Python code, I moved lints and added type checking to the build. So I can't build anything unless it passes those checks. This seems restrictive, but if I want to run some non-passing code, I can still use `repl`, so it's actually not inhibitory. --- Biz/Bild.hs | 103 ++++++++++++++++++++++++++++++++++++++++----------- Biz/Bild.nix | 14 ++----- Biz/Bild/Builder.nix | 93 ++++++++++++++++++++++++++++++++++++---------- Biz/Bild/Example.py | 30 +++++++++++++++ Biz/Ide/repl | 6 ++- Biz/Lint.hs | 18 +-------- Biz/Namespace.hs | 14 ++++--- ava.py | 31 ++++++++++------ 8 files changed, 223 insertions(+), 86 deletions(-) create mode 100644 Biz/Bild/Example.py diff --git a/Biz/Bild.hs b/Biz/Bild.hs index 7bfa788..19c8827 100644 --- a/Biz/Bild.hs +++ b/Biz/Bild.hs @@ -120,6 +120,7 @@ import qualified Biz.Cli as Cli import qualified Biz.Log as Log import Biz.Namespace (Namespace (..)) import qualified Biz.Namespace as Namespace +import Biz.Test ((@=?)) import qualified Biz.Test as Test import qualified Conduit import qualified Control.Concurrent.Async as Async @@ -152,21 +153,41 @@ main = Cli.main <| Cli.Plan help move test_ pure test_ = Test.group "Biz.Bild" - [ Test.unit "can bild bild" <| do - root <- Env.getEnv "BIZ_ROOT" - path <- Dir.makeAbsolute "Biz/Bild.hs" - case Namespace.fromPath root path of - Nothing -> Test.assertFailure "can't find ns for bild" - Just ns -> - analyze mempty ns - +> build False False - +> \case - [Exit.ExitFailure _] -> - Test.assertFailure "can't bild bild" - _ -> - pure () + [ test_bildBild, + test_bildExamples ] +test_bildBild :: Test.Tree +test_bildBild = + Test.unit "can bild bild" <| do + root <- Env.getEnv "BIZ_ROOT" + path <- Dir.makeAbsolute "Biz/Bild.hs" + case Namespace.fromPath root path of + Nothing -> Test.assertFailure "can't find ns for bild" + Just ns -> + analyze mempty ns + +> build False False + +> \case + [Exit.ExitFailure _] -> + Test.assertFailure "can't bild bild" + _ -> + pure () + +test_bildExamples :: Test.Tree +test_bildExamples = + Test.unit "can bild examples" <| do + Env.getEnv "BIZ_ROOT" +> \root -> + ["c", "hs", "lisp", "rs"] + |> map ("Biz/Bild/Example." <>) + |> traverse Dir.makeAbsolute + /> map (Namespace.fromPath root) + /> catMaybes + +> foldM analyze mempty + +> build False False + +> \case + [] -> Test.assertFailure "asdf" + xs -> all (== Exit.ExitSuccess) xs @=? True + move :: Cli.Arguments -> IO () move args = do root <- Env.getEnv "BIZ_ROOT" @@ -223,6 +244,7 @@ exitSummary exits = data Compiler = Copy + | CPython | Gcc | Ghc | Guile @@ -234,6 +256,7 @@ data Compiler compilerExe :: IsString a => Compiler -> a compilerExe = \case Copy -> "cp" + CPython -> "python" Gcc -> "gcc" Ghc -> "ghc" Guile -> "guild" @@ -321,7 +344,7 @@ isBuildableNs = \case (Namespace _ Namespace.Lisp) -> True (Namespace _ Namespace.Md) -> False (Namespace _ Namespace.None) -> False - (Namespace _ Namespace.Py) -> False + (Namespace _ Namespace.Py) -> True (Namespace _ Namespace.Sh) -> False (Namespace _ Namespace.Scm) -> True (Namespace _ Namespace.Rs) -> True @@ -412,7 +435,33 @@ analyze hmap ns = case Map.lookup ns hmap of Namespace.Keys -> pure Nothing Namespace.Md -> pure Nothing Namespace.None -> pure Nothing - Namespace.Py -> pure Nothing + Namespace.Py -> + Meta.detectAll "#" contentLines |> \Meta.Parsed {..} -> + Target + { builder = Local user host, + wrapper = Nothing, + compiler = CPython, + compilerFlags = + -- This doesn't really make sense for python, but I'll leave + -- it here for eventual --dev builds + [ "-c", + "\"import py_compile;import os;" + <> "py_compile.compile(file='" + <> str path + <> "', cfile=os.getenv('BIZ_ROOT')+'/_/int/" + <> str path + <> "', doraise=True)\"" + ], + sysdeps = psys, + langdeps = pdep, + outPath = outToPath pout, + out = pout, + srcs = Set.singleton path, + packageSet = "pythonWith", + .. + } + |> Just + |> pure Namespace.Sh -> pure Nothing Namespace.C -> Meta.detectAll "//" contentLines |> \Meta.Parsed {..} -> do @@ -557,6 +606,8 @@ analyze hmap ns = case Map.lookup ns hmap of outPath = outToPath pout, out = pout, srcs = Set.singleton absPath, + -- TODO: wrapper should just be removed, instead rely on + -- upstream nixpkgs builders to make wrappers wrapper = (pout == Meta.None) ?: ( Nothing, @@ -685,6 +736,9 @@ build andTest loud analysis = Env.getEnv "BIZ_ROOT" +> \root -> forM (Map.elems analysis) <| \target@Target {..} -> fst + Log.info ["bild", "nix", "python", nschunk namespace] + >> nixBuild loud target Gcc -> Log.info ["bild", label, "gcc", nschunk namespace] >> proc loud namespace (toNixFlag compiler) compilerFlags @@ -812,7 +866,7 @@ isModuleChar :: Char -> Bool isModuleChar c = elem c <| concat [['A' .. 'Z'], ['a' .. 'z'], ['.', '_'], ['0' .. '9']] --- Matches on `(require :package)` forms and returns `package`. The `require` +-- | Matches on `(require :package)` forms and returns `package`. The `require` -- function is technically deprecated in Common Lisp, but no new spec has been -- published with a replacement, and I don't wanna use asdf, so this is what we -- use for Lisp imports. @@ -852,14 +906,15 @@ nixBuild loud Target {..} = -- is tightly coupled with the code in the nix builder and there's no -- way around that, methinks. args = - [ argstr "srcs" <| unwords <| map str <| (root path) : Set.toList srcs, + [ argstr "srcs" <| unwords <| map str <| Set.toList srcs <> [root path], + argstr "main" <| str <| Namespace.toModule namespace, argstr "root" <| str root, argstr "packageSet" packageSet, - (argstr "langDeps" <| unwords <| map str <| Set.toList langdeps) <|> mempty, + (langdeps == mempty) ?: (mempty, argstr "langdeps" <| unwords <| map str <| Set.toList langdeps), + (sysdeps == mempty) ?: (mempty, argstr "sysdeps" <| unwords <| map str <| Set.toList sysdeps), argstr "name" <| str <| outname out, - argstr "buildPhase" <| unwords - <| (Text.pack <| toNixFlag compiler) : - compilerFlags, + argstr "compileLine" <| unwords <| (Text.pack <| toNixFlag compiler) : compilerFlags, + ["--attr", selectBuilder namespace], [str <| root "Biz/Bild/Builder.nix"] ] |> mconcat @@ -891,3 +946,9 @@ nixBuild loud Target {..} = onFailure = Log.fail ["bild", "symlink", nschunk namespace] >> Log.br, onSuccess = pure () } + +selectBuilder :: Namespace -> Text +selectBuilder = \case + Namespace _ Namespace.Hs -> "base" + Namespace _ Namespace.Py -> "python" + _ -> panic "no builder for this namespace" diff --git a/Biz/Bild.nix b/Biz/Bild.nix index 415931b..ca9223f 100644 --- a/Biz/Bild.nix +++ b/Biz/Bild.nix @@ -26,6 +26,8 @@ rec { sbclWith = nixpkgs.lispPackages_new.sbclWithPackages; + pythonWith = nixpkgs.python3.withPackages; + ghcPackageSetFull = private.ghcWith private.haskellDeps; ghcPackageSetBild = private.ghcWith (hpkgs: with hpkgs; [ aeson async base bytestring conduit conduit-extra containers directory @@ -112,6 +114,8 @@ rec { (lib.trivial.importJSON (runBildAnalyze main + "/analysis.json")); + buildPythonApplication = nixpkgs.python310.pkgs.buildPythonApplication; + # build a ghc executable ghc = main: let @@ -153,16 +157,6 @@ rec { lolcat #nixops # fails to build ormolu - (private.nixpkgs.python3.withPackages(p: with p; [ - transformers - pytorch - private.nixpkgs.python3Packages.bitsandbytes - private.nixpkgs.python3Packages.accelerate - # lint tools: - black - pylint - ])) - shellcheck wemux ]; diff --git a/Biz/Bild/Builder.nix b/Biz/Bild/Builder.nix index 04002d0..214c110 100644 --- a/Biz/Bild/Builder.nix +++ b/Biz/Bild/Builder.nix @@ -1,33 +1,88 @@ -{ srcs # list of files +/* +This is the library of nix builders. Some rules to follow: +- Keep this code as minimal as possible. I'd rather write Haskell than Nix, + wouldn't you? +- Try to reuse as much upstream Nix as possible. +- Path-like args such as 'srcs' should always be absolute paths. +*/ +{ srcs ? "" # list of all source files, as a space-separated string +, main # the entrypoint or main module (not a path) , root # path to git root , packageSet # name mapped to private.${packageSet}, e.g. 'ghcWith' -, langDeps ? null # list of deps (as a string), split and passed to packageSet +, langdeps ? null # list of deps (as a string), split and passed to packageSet +, sysdeps ? null , name # exe name -, buildPhase +, compileLine ? "" # Target.compiler <> Target.compilerFlags }: with import (/. + root + "/Biz/Bild.nix") {}; with builtins; let - srcs_ = lib.strings.splitString " " srcs; + srcs_ = (lib.strings.splitString " " srcs) ++ [main]; + skip = ["_" ".direnv"]; filter = file: type: if elem (baseNameOf file) skip then false - # TODO: this means any new directory will cause a rebuild. this bad. - # i should recurse into the directory and match against the srcsr + # TODO: this means any new directory will cause a rebuild. this bad. i + # should recurse into the directory and match against the srcs. for now I + # just use postUnpack to delete empty dirs else if type == "directory" then true - else if type == "regular" then builtins.elem file srcs_ + else if type == "regular" then (builtins.elem file srcs_) else false; - deps = pkgset: - if langDeps != null then - private.selectAttrs (lib.strings.splitString " " langDeps) pkgset - else - []; -in stdenv.mkDerivation rec { - inherit name buildPhase; + + # clean up empty dirs + postUnpack = "find . -type d -empty -delete"; + src = lib.sources.cleanSourceWith {inherit filter; src = lib.sources.cleanSource root;}; - BIZ_ROOT = src; - buildInputs = [ (private.${packageSet} deps) ]; - installPhase = '' - mkdir -p $out/bin && cp ${name} $out/bin - ''; + + langdeps_ = pkgset: + if langdeps == null || langdeps == [] then + [] + else + private.selectAttrs (lib.strings.splitString " " langdeps) pkgset; + sysdeps_ = + if sysdeps == null || sysdeps == [] then + [] + else + private.selectAttrs (lib.strings.splitString " " sysdeps) private.nixpkgs.pkgs; + BIZ_ROOT = "."; +in { + base = stdenv.mkDerivation rec { + inherit name src BIZ_ROOT postUnpack; + buildInputs = [ (private.${packageSet} langdeps_) ] ++ sysdeps_; + installPhase = "install -D ${name} $out/bin/${name}"; + buildPhase = compileLine; + }; + + python = buildPythonApplication rec { + inherit name src BIZ_ROOT postUnpack; + propagatedBuildInputs = [ (private.${packageSet} langdeps_) ] ++ sysdeps_; + buildInputs = sysdeps_; + checkInputs = [(private.pythonWith (p: with p; [black mypy pylint]))]; + checkPhase = '' + black --quiet --exclude 'setup\.py$' --check . + pylint --errors-only . + mypy --strict --no-error-summary --exclude 'setup\.py$' . + python -m ${main} test + ''; + preBuild = '' + # initialize possibly-empty subdirectories as python modules + find . -type d -exec touch {}/__init__.py \; + # generate a minimal setup.py + cat > setup.py << EOF + from setuptools import setup, find_packages + setup( + name='${name}', + entry_points={'console_scripts':['${name} = ${main}:main']}, + version='0.0.0', + url='git://simatime.com/biz.git', + author='dev', + author_email='dev@simatime.com', + description='nil', + packages=find_packages(), + install_requires=[], + ) + EOF + ''; + pythonImportsCheck = [main]; # sanity check + }; } diff --git a/Biz/Bild/Example.py b/Biz/Bild/Example.py new file mode 100644 index 0000000..78a8a6a --- /dev/null +++ b/Biz/Bild/Example.py @@ -0,0 +1,30 @@ +# : out example +# : dep cryptography +import sys +from typing import List + +from cryptography.fernet import Fernet + + +def cryptic_hello(name: str) -> str: + "Example taken from `cryptography` docs." + key = Fernet.generate_key() + f = Fernet(key) + token = f.encrypt(hello(name).encode("utf-8")) + ret = f.decrypt(token).decode("utf-8") + assert ret == hello(name) + return ret + + +def hello(name: str) -> str: + return f"Hello {name}" + + +def main() -> None: + if "test" in sys.argv: + print("testing success") + print(cryptic_hello("world")) + + +if __name__ == "__main__": + main() diff --git a/Biz/Ide/repl b/Biz/Ide/repl index 0d0a08d..e62c7b2 100755 --- a/Biz/Ide/repl +++ b/Biz/Ide/repl @@ -29,7 +29,7 @@ fi langdeps=$(jq --raw-output '.[].langdeps | join(" ")' <<< $json) sysdeps=$(jq --raw-output '.[].sysdeps | join(" ")' <<< $json) exts=$(jq --raw-output '.[].namespace.ext' <<< $json | sort | uniq) - packageSet=$(jp --raw-output '.[].packageSet' <<< $json) + packageSet=$(jq --raw-output '.[].packageSet' <<< $json) BILD="(import ${BIZ_ROOT:?}/Biz/Bild.nix {})" for lib in ${sysdeps[@]}; do flags+=(--packages "$BILD.private.nixpkgs.${lib}") @@ -62,6 +62,10 @@ fi flags+=(--packages "$BILD.private.nixpkgs.rustc") command=bash ;; + Py) + flags+=(--packages "$BILD.private.$packageSet (p: with p; [$langdeps])") + command=${CMD:-"python -i $targets"} + ;; *) echo "unsupported targets: ${targets[@]}" exit 1 diff --git a/Biz/Lint.hs b/Biz/Lint.hs index d44e465..d7db9d6 100644 --- a/Biz/Lint.hs +++ b/Biz/Lint.hs @@ -123,22 +123,6 @@ hlint = fixArgs = Nothing } -pylint :: Linter -pylint = - Linter - { exe = "pylint", - checkArgs = ["--disable=invalid-name"], - fixArgs = Nothing - } - -black :: Linter -black = - Linter - { exe = "black", - checkArgs = ["--check"], - fixArgs = Just [] - } - deadnix :: Linter deadnix = Linter @@ -180,7 +164,7 @@ runOne mode root cwd path_ = results +> traverse_ printResult >> results [ lint mode ormolu path_, lint mode hlint path_ ] - Just (Namespace _ Py) -> [lint mode pylint path_, lint mode black path_] + Just (Namespace _ Py) -> [] -- python linters are run in Builder.nix checkPhase Just (Namespace _ Sh) -> [lint mode shellcheck path_] Just (Namespace _ Nix) -> [lint mode deadnix path_] Just (Namespace _ Scm) -> [pure <| NoOp path_] diff --git a/Biz/Namespace.hs b/Biz/Namespace.hs index 72e3fa1..f0e3b32 100644 --- a/Biz/Namespace.hs +++ b/Biz/Namespace.hs @@ -9,6 +9,7 @@ module Biz.Namespace Ext (..), fromPath, toPath, + toModule, fromHaskellContent, fromHaskellModule, toHaskellModule, @@ -85,18 +86,19 @@ fromHaskellContent c = case Regex.findFirstInfix haskellModule c of Regex.many (name <|> dot)) <*> pure Hs +toModule (Namespace parts Hs) = joinWith "." parts +toModule (Namespace parts Py) = joinWith "." parts +toModule (Namespace parts Scm) = "(" ++ joinWith " " parts ++ ")" +toModule (Namespace {..}) = panic <| "toModule not implemented for " <> show ext + toHaskellModule :: Namespace -> String -toHaskellModule (Namespace parts Hs) = joinWith "." parts -toHaskellModule (Namespace {..}) = - panic <| "can't convert " <> show ext <> " to a Haskell module" +toHaskellModule = toModule fromHaskellModule :: String -> Namespace fromHaskellModule s = Namespace (List.splitOn "." s) Hs toSchemeModule :: Namespace -> String -toSchemeModule (Namespace parts Scm) = "(" ++ joinWith " " parts ++ ")" -toSchemeModule (Namespace {..}) = - panic <| "can't convert " <> show ext <> " to a Scheme module" +toSchemeModule = toModule dot :: Regex.RE Char String dot = Regex.some <| Regex.sym '.' diff --git a/ava.py b/ava.py index 1f08241..c364f6d 100755 --- a/ava.py +++ b/ava.py @@ -1,42 +1,49 @@ #!/usr/bin/env python +# : out ava +# : dep transformers +# : dep torch import transformers import torch import sys + # import sleekxmpp -#model_name = "EleutherAI/gpt-neox-20b" +# model_name = "EleutherAI/gpt-neox-20b" model_name = "EleutherAI/gpt-j-6B" if torch.cuda.is_available(): device = "cuda:0" else: raise ValueError("no cuda") + sys.exit(1) tokenizer = transformers.AutoTokenizer.from_pretrained(model_name) model = transformers.AutoModelForCausalLM.from_pretrained( - model_name, - device_map="auto", - load_in_8bit=True, - pad_token_id=tokenizer.eos_token_id, - revision="float16", - torch_dtype=torch.float16, - low_cpu_mem_usage=True, + model_name, + device_map="auto", + load_in_8bit=True, + pad_token_id=tokenizer.eos_token_id, + revision="float16", + torch_dtype=torch.float16, + low_cpu_mem_usage=True, ) # set attention_mask and pad_token_id + def gen(txt): - input_ids = tokenizer(txt, return_tensors="pt").input_ids.to('cuda') + input_ids = tokenizer(txt, return_tensors="pt").input_ids.to("cuda") outputs = model.generate( - input_ids=input_ids, - max_length=1024, - temperature=0.7, + input_ids=input_ids, + max_length=1024, + temperature=0.7, ) result = tokenizer.batch_decode(outputs, skip_special_tokens=True) result = "".join(result) return result + # Get user input and generate a response while True: user_input = input("ben: ") -- cgit v1.2.3