diff options
author | Ben Sima <ben@bsima.me> | 2023-08-07 14:03:07 -0400 |
---|---|---|
committer | Ben Sima <ben@bsima.me> | 2023-08-07 14:03:36 -0400 |
commit | edb55ff54b9f81c0942f4120eedd72357d7b3d7c (patch) | |
tree | 84c7db39e718c1a301815f0baa50e0900b7d4942 | |
parent | beb505a7125e61c4fedd02d77e37545b61b99f73 (diff) |
Working nixified python build
This represents quite a few evenings of hacking. It doesn't build all of my
Python code, because my Python code is not up to snuff, but it builds the
examples and pulls in third party dependencies.
Some design points:
- I'm using buildPythonApplication in Builder.nix because it was getting way too
annoying to wrap the Python script and set PYTHONPATH myself. Easier and more
robust to just use the upstream nix builder
- Because of this, I had to generate a setup.py. Maybe switch to pyproject.toml
in the future, whatever.
- Also because of this, Target.wrapper is becoming redundant. I'll just remove
it when I get Guile built in nix.
- Biz/Bild.nix is getting messy and could use a refactor.
- In Builder.nix, I worked around the empty directories bug by just finding and
deleting empty directories after unpacking. If its stupid but works it ain't
stupid!
- I had to touch __init__.py files in all directories before building. Annoying!
- `repl` just works, which is awesome
- To ensure good Python code, I moved lints and added type checking to the
build. So I can't build anything unless it passes those checks. This seems
restrictive, but if I want to run some non-passing code, I can still use
`repl`, so it's actually not inhibitory.
-rw-r--r-- | Biz/Bild.hs | 103 | ||||
-rw-r--r-- | Biz/Bild.nix | 14 | ||||
-rw-r--r-- | Biz/Bild/Builder.nix | 93 | ||||
-rw-r--r-- | Biz/Bild/Example.py | 30 | ||||
-rwxr-xr-x | Biz/Ide/repl | 6 | ||||
-rw-r--r-- | Biz/Lint.hs | 18 | ||||
-rw-r--r-- | Biz/Namespace.hs | 14 | ||||
-rwxr-xr-x | ava.py | 31 |
8 files changed, 223 insertions, 86 deletions
diff --git a/Biz/Bild.hs b/Biz/Bild.hs index 7bfa788..19c8827 100644 --- a/Biz/Bild.hs +++ b/Biz/Bild.hs @@ -120,6 +120,7 @@ import qualified Biz.Cli as Cli import qualified Biz.Log as Log import Biz.Namespace (Namespace (..)) import qualified Biz.Namespace as Namespace +import Biz.Test ((@=?)) import qualified Biz.Test as Test import qualified Conduit import qualified Control.Concurrent.Async as Async @@ -152,21 +153,41 @@ main = Cli.main <| Cli.Plan help move test_ pure test_ = Test.group "Biz.Bild" - [ Test.unit "can bild bild" <| do - root <- Env.getEnv "BIZ_ROOT" - path <- Dir.makeAbsolute "Biz/Bild.hs" - case Namespace.fromPath root path of - Nothing -> Test.assertFailure "can't find ns for bild" - Just ns -> - analyze mempty ns - +> build False False - +> \case - [Exit.ExitFailure _] -> - Test.assertFailure "can't bild bild" - _ -> - pure () + [ test_bildBild, + test_bildExamples ] +test_bildBild :: Test.Tree +test_bildBild = + Test.unit "can bild bild" <| do + root <- Env.getEnv "BIZ_ROOT" + path <- Dir.makeAbsolute "Biz/Bild.hs" + case Namespace.fromPath root path of + Nothing -> Test.assertFailure "can't find ns for bild" + Just ns -> + analyze mempty ns + +> build False False + +> \case + [Exit.ExitFailure _] -> + Test.assertFailure "can't bild bild" + _ -> + pure () + +test_bildExamples :: Test.Tree +test_bildExamples = + Test.unit "can bild examples" <| do + Env.getEnv "BIZ_ROOT" +> \root -> + ["c", "hs", "lisp", "rs"] + |> map ("Biz/Bild/Example." <>) + |> traverse Dir.makeAbsolute + /> map (Namespace.fromPath root) + /> catMaybes + +> foldM analyze mempty + +> build False False + +> \case + [] -> Test.assertFailure "asdf" + xs -> all (== Exit.ExitSuccess) xs @=? True + move :: Cli.Arguments -> IO () move args = do root <- Env.getEnv "BIZ_ROOT" @@ -223,6 +244,7 @@ exitSummary exits = data Compiler = Copy + | CPython | Gcc | Ghc | Guile @@ -234,6 +256,7 @@ data Compiler compilerExe :: IsString a => Compiler -> a compilerExe = \case Copy -> "cp" + CPython -> "python" Gcc -> "gcc" Ghc -> "ghc" Guile -> "guild" @@ -321,7 +344,7 @@ isBuildableNs = \case (Namespace _ Namespace.Lisp) -> True (Namespace _ Namespace.Md) -> False (Namespace _ Namespace.None) -> False - (Namespace _ Namespace.Py) -> False + (Namespace _ Namespace.Py) -> True (Namespace _ Namespace.Sh) -> False (Namespace _ Namespace.Scm) -> True (Namespace _ Namespace.Rs) -> True @@ -412,7 +435,33 @@ analyze hmap ns = case Map.lookup ns hmap of Namespace.Keys -> pure Nothing Namespace.Md -> pure Nothing Namespace.None -> pure Nothing - Namespace.Py -> pure Nothing + Namespace.Py -> + Meta.detectAll "#" contentLines |> \Meta.Parsed {..} -> + Target + { builder = Local user host, + wrapper = Nothing, + compiler = CPython, + compilerFlags = + -- This doesn't really make sense for python, but I'll leave + -- it here for eventual --dev builds + [ "-c", + "\"import py_compile;import os;" + <> "py_compile.compile(file='" + <> str path + <> "', cfile=os.getenv('BIZ_ROOT')+'/_/int/" + <> str path + <> "', doraise=True)\"" + ], + sysdeps = psys, + langdeps = pdep, + outPath = outToPath pout, + out = pout, + srcs = Set.singleton path, + packageSet = "pythonWith", + .. + } + |> Just + |> pure Namespace.Sh -> pure Nothing Namespace.C -> Meta.detectAll "//" contentLines |> \Meta.Parsed {..} -> do @@ -557,6 +606,8 @@ analyze hmap ns = case Map.lookup ns hmap of outPath = outToPath pout, out = pout, srcs = Set.singleton absPath, + -- TODO: wrapper should just be removed, instead rely on + -- upstream nixpkgs builders to make wrappers wrapper = (pout == Meta.None) ?: ( Nothing, @@ -685,6 +736,9 @@ build andTest loud analysis = Env.getEnv "BIZ_ROOT" +> \root -> forM (Map.elems analysis) <| \target@Target {..} -> fst </ case compiler of + CPython -> + Log.info ["bild", "nix", "python", nschunk namespace] + >> nixBuild loud target Gcc -> Log.info ["bild", label, "gcc", nschunk namespace] >> proc loud namespace (toNixFlag compiler) compilerFlags @@ -812,7 +866,7 @@ isModuleChar :: Char -> Bool isModuleChar c = elem c <| concat [['A' .. 'Z'], ['a' .. 'z'], ['.', '_'], ['0' .. '9']] --- Matches on `(require :package)` forms and returns `package`. The `require` +-- | Matches on `(require :package)` forms and returns `package`. The `require` -- function is technically deprecated in Common Lisp, but no new spec has been -- published with a replacement, and I don't wanna use asdf, so this is what we -- use for Lisp imports. @@ -852,14 +906,15 @@ nixBuild loud Target {..} = -- is tightly coupled with the code in the nix builder and there's no -- way around that, methinks. args = - [ argstr "srcs" <| unwords <| map str <| (root </> path) : Set.toList srcs, + [ argstr "srcs" <| unwords <| map str <| Set.toList srcs <> [root </> path], + argstr "main" <| str <| Namespace.toModule namespace, argstr "root" <| str root, argstr "packageSet" packageSet, - (argstr "langDeps" <| unwords <| map str <| Set.toList langdeps) <|> mempty, + (langdeps == mempty) ?: (mempty, argstr "langdeps" <| unwords <| map str <| Set.toList langdeps), + (sysdeps == mempty) ?: (mempty, argstr "sysdeps" <| unwords <| map str <| Set.toList sysdeps), argstr "name" <| str <| outname out, - argstr "buildPhase" <| unwords - <| (Text.pack <| toNixFlag compiler) : - compilerFlags, + argstr "compileLine" <| unwords <| (Text.pack <| toNixFlag compiler) : compilerFlags, + ["--attr", selectBuilder namespace], [str <| root </> "Biz/Bild/Builder.nix"] ] |> mconcat @@ -891,3 +946,9 @@ nixBuild loud Target {..} = onFailure = Log.fail ["bild", "symlink", nschunk namespace] >> Log.br, onSuccess = pure () } + +selectBuilder :: Namespace -> Text +selectBuilder = \case + Namespace _ Namespace.Hs -> "base" + Namespace _ Namespace.Py -> "python" + _ -> panic "no builder for this namespace" diff --git a/Biz/Bild.nix b/Biz/Bild.nix index 415931b..ca9223f 100644 --- a/Biz/Bild.nix +++ b/Biz/Bild.nix @@ -26,6 +26,8 @@ rec { sbclWith = nixpkgs.lispPackages_new.sbclWithPackages; + pythonWith = nixpkgs.python3.withPackages; + ghcPackageSetFull = private.ghcWith private.haskellDeps; ghcPackageSetBild = private.ghcWith (hpkgs: with hpkgs; [ aeson async base bytestring conduit conduit-extra containers directory @@ -112,6 +114,8 @@ rec { (lib.trivial.importJSON (runBildAnalyze main + "/analysis.json")); + buildPythonApplication = nixpkgs.python310.pkgs.buildPythonApplication; + # build a ghc executable ghc = main: let @@ -153,16 +157,6 @@ rec { lolcat #nixops # fails to build ormolu - (private.nixpkgs.python3.withPackages(p: with p; [ - transformers - pytorch - private.nixpkgs.python3Packages.bitsandbytes - private.nixpkgs.python3Packages.accelerate - # lint tools: - black - pylint - ])) - shellcheck wemux ]; diff --git a/Biz/Bild/Builder.nix b/Biz/Bild/Builder.nix index 04002d0..214c110 100644 --- a/Biz/Bild/Builder.nix +++ b/Biz/Bild/Builder.nix @@ -1,33 +1,88 @@ -{ srcs # list of files +/* +This is the library of nix builders. Some rules to follow: +- Keep this code as minimal as possible. I'd rather write Haskell than Nix, + wouldn't you? +- Try to reuse as much upstream Nix as possible. +- Path-like args such as 'srcs' should always be absolute paths. +*/ +{ srcs ? "" # list of all source files, as a space-separated string +, main # the entrypoint or main module (not a path) , root # path to git root , packageSet # name mapped to private.${packageSet}, e.g. 'ghcWith' -, langDeps ? null # list of deps (as a string), split and passed to packageSet +, langdeps ? null # list of deps (as a string), split and passed to packageSet +, sysdeps ? null , name # exe name -, buildPhase +, compileLine ? "" # Target.compiler <> Target.compilerFlags }: with import (/. + root + "/Biz/Bild.nix") {}; with builtins; let - srcs_ = lib.strings.splitString " " srcs; + srcs_ = (lib.strings.splitString " " srcs) ++ [main]; + skip = ["_" ".direnv"]; filter = file: type: if elem (baseNameOf file) skip then false - # TODO: this means any new directory will cause a rebuild. this bad. - # i should recurse into the directory and match against the srcsr + # TODO: this means any new directory will cause a rebuild. this bad. i + # should recurse into the directory and match against the srcs. for now I + # just use postUnpack to delete empty dirs else if type == "directory" then true - else if type == "regular" then builtins.elem file srcs_ + else if type == "regular" then (builtins.elem file srcs_) else false; - deps = pkgset: - if langDeps != null then - private.selectAttrs (lib.strings.splitString " " langDeps) pkgset - else - []; -in stdenv.mkDerivation rec { - inherit name buildPhase; + + # clean up empty dirs + postUnpack = "find . -type d -empty -delete"; + src = lib.sources.cleanSourceWith {inherit filter; src = lib.sources.cleanSource root;}; - BIZ_ROOT = src; - buildInputs = [ (private.${packageSet} deps) ]; - installPhase = '' - mkdir -p $out/bin && cp ${name} $out/bin - ''; + + langdeps_ = pkgset: + if langdeps == null || langdeps == [] then + [] + else + private.selectAttrs (lib.strings.splitString " " langdeps) pkgset; + sysdeps_ = + if sysdeps == null || sysdeps == [] then + [] + else + private.selectAttrs (lib.strings.splitString " " sysdeps) private.nixpkgs.pkgs; + BIZ_ROOT = "."; +in { + base = stdenv.mkDerivation rec { + inherit name src BIZ_ROOT postUnpack; + buildInputs = [ (private.${packageSet} langdeps_) ] ++ sysdeps_; + installPhase = "install -D ${name} $out/bin/${name}"; + buildPhase = compileLine; + }; + + python = buildPythonApplication rec { + inherit name src BIZ_ROOT postUnpack; + propagatedBuildInputs = [ (private.${packageSet} langdeps_) ] ++ sysdeps_; + buildInputs = sysdeps_; + checkInputs = [(private.pythonWith (p: with p; [black mypy pylint]))]; + checkPhase = '' + black --quiet --exclude 'setup\.py$' --check . + pylint --errors-only . + mypy --strict --no-error-summary --exclude 'setup\.py$' . + python -m ${main} test + ''; + preBuild = '' + # initialize possibly-empty subdirectories as python modules + find . -type d -exec touch {}/__init__.py \; + # generate a minimal setup.py + cat > setup.py << EOF + from setuptools import setup, find_packages + setup( + name='${name}', + entry_points={'console_scripts':['${name} = ${main}:main']}, + version='0.0.0', + url='git://simatime.com/biz.git', + author='dev', + author_email='dev@simatime.com', + description='nil', + packages=find_packages(), + install_requires=[], + ) + EOF + ''; + pythonImportsCheck = [main]; # sanity check + }; } diff --git a/Biz/Bild/Example.py b/Biz/Bild/Example.py new file mode 100644 index 0000000..78a8a6a --- /dev/null +++ b/Biz/Bild/Example.py @@ -0,0 +1,30 @@ +# : out example +# : dep cryptography +import sys +from typing import List + +from cryptography.fernet import Fernet + + +def cryptic_hello(name: str) -> str: + "Example taken from `cryptography` docs." + key = Fernet.generate_key() + f = Fernet(key) + token = f.encrypt(hello(name).encode("utf-8")) + ret = f.decrypt(token).decode("utf-8") + assert ret == hello(name) + return ret + + +def hello(name: str) -> str: + return f"Hello {name}" + + +def main() -> None: + if "test" in sys.argv: + print("testing success") + print(cryptic_hello("world")) + + +if __name__ == "__main__": + main() diff --git a/Biz/Ide/repl b/Biz/Ide/repl index 0d0a08d..e62c7b2 100755 --- a/Biz/Ide/repl +++ b/Biz/Ide/repl @@ -29,7 +29,7 @@ fi langdeps=$(jq --raw-output '.[].langdeps | join(" ")' <<< $json) sysdeps=$(jq --raw-output '.[].sysdeps | join(" ")' <<< $json) exts=$(jq --raw-output '.[].namespace.ext' <<< $json | sort | uniq) - packageSet=$(jp --raw-output '.[].packageSet' <<< $json) + packageSet=$(jq --raw-output '.[].packageSet' <<< $json) BILD="(import ${BIZ_ROOT:?}/Biz/Bild.nix {})" for lib in ${sysdeps[@]}; do flags+=(--packages "$BILD.private.nixpkgs.${lib}") @@ -62,6 +62,10 @@ fi flags+=(--packages "$BILD.private.nixpkgs.rustc") command=bash ;; + Py) + flags+=(--packages "$BILD.private.$packageSet (p: with p; [$langdeps])") + command=${CMD:-"python -i $targets"} + ;; *) echo "unsupported targets: ${targets[@]}" exit 1 diff --git a/Biz/Lint.hs b/Biz/Lint.hs index d44e465..d7db9d6 100644 --- a/Biz/Lint.hs +++ b/Biz/Lint.hs @@ -123,22 +123,6 @@ hlint = fixArgs = Nothing } -pylint :: Linter -pylint = - Linter - { exe = "pylint", - checkArgs = ["--disable=invalid-name"], - fixArgs = Nothing - } - -black :: Linter -black = - Linter - { exe = "black", - checkArgs = ["--check"], - fixArgs = Just [] - } - deadnix :: Linter deadnix = Linter @@ -180,7 +164,7 @@ runOne mode root cwd path_ = results +> traverse_ printResult >> results [ lint mode ormolu path_, lint mode hlint path_ ] - Just (Namespace _ Py) -> [lint mode pylint path_, lint mode black path_] + Just (Namespace _ Py) -> [] -- python linters are run in Builder.nix checkPhase Just (Namespace _ Sh) -> [lint mode shellcheck path_] Just (Namespace _ Nix) -> [lint mode deadnix path_] Just (Namespace _ Scm) -> [pure <| NoOp path_] diff --git a/Biz/Namespace.hs b/Biz/Namespace.hs index 72e3fa1..f0e3b32 100644 --- a/Biz/Namespace.hs +++ b/Biz/Namespace.hs @@ -9,6 +9,7 @@ module Biz.Namespace Ext (..), fromPath, toPath, + toModule, fromHaskellContent, fromHaskellModule, toHaskellModule, @@ -85,18 +86,19 @@ fromHaskellContent c = case Regex.findFirstInfix haskellModule c of </ (Regex.string "\nmodule " *> Regex.many (name <|> dot)) <*> pure Hs +toModule (Namespace parts Hs) = joinWith "." parts +toModule (Namespace parts Py) = joinWith "." parts +toModule (Namespace parts Scm) = "(" ++ joinWith " " parts ++ ")" +toModule (Namespace {..}) = panic <| "toModule not implemented for " <> show ext + toHaskellModule :: Namespace -> String -toHaskellModule (Namespace parts Hs) = joinWith "." parts -toHaskellModule (Namespace {..}) = - panic <| "can't convert " <> show ext <> " to a Haskell module" +toHaskellModule = toModule fromHaskellModule :: String -> Namespace fromHaskellModule s = Namespace (List.splitOn "." s) Hs toSchemeModule :: Namespace -> String -toSchemeModule (Namespace parts Scm) = "(" ++ joinWith " " parts ++ ")" -toSchemeModule (Namespace {..}) = - panic <| "can't convert " <> show ext <> " to a Scheme module" +toSchemeModule = toModule dot :: Regex.RE Char String dot = Regex.some <| Regex.sym '.' @@ -1,42 +1,49 @@ #!/usr/bin/env python +# : out ava +# : dep transformers +# : dep torch import transformers import torch import sys + # import sleekxmpp -#model_name = "EleutherAI/gpt-neox-20b" +# model_name = "EleutherAI/gpt-neox-20b" model_name = "EleutherAI/gpt-j-6B" if torch.cuda.is_available(): device = "cuda:0" else: raise ValueError("no cuda") + sys.exit(1) tokenizer = transformers.AutoTokenizer.from_pretrained(model_name) model = transformers.AutoModelForCausalLM.from_pretrained( - model_name, - device_map="auto", - load_in_8bit=True, - pad_token_id=tokenizer.eos_token_id, - revision="float16", - torch_dtype=torch.float16, - low_cpu_mem_usage=True, + model_name, + device_map="auto", + load_in_8bit=True, + pad_token_id=tokenizer.eos_token_id, + revision="float16", + torch_dtype=torch.float16, + low_cpu_mem_usage=True, ) # set attention_mask and pad_token_id + def gen(txt): - input_ids = tokenizer(txt, return_tensors="pt").input_ids.to('cuda') + input_ids = tokenizer(txt, return_tensors="pt").input_ids.to("cuda") outputs = model.generate( - input_ids=input_ids, - max_length=1024, - temperature=0.7, + input_ids=input_ids, + max_length=1024, + temperature=0.7, ) result = tokenizer.batch_decode(outputs, skip_special_tokens=True) result = "".join(result) return result + # Get user input and generate a response while True: user_input = input("ben: ") |