10 files changed, 175 insertions, 23 deletions
diff --git a/Biz/Bild.nix b/Biz/Bild.nix
index 47c0ecb..8fe20db 100644
--- a/Biz/Bild.nix
+++ b/Biz/Bild.nix
@@ -146,9 +146,15 @@ rec {
       haskell.packages.${constants.ghcCompiler}.fast-tags
       hlint
       lolcat
-      nixops
+      #nixops # fails to build
       ormolu
-      (python3.withPackages(p: with p; [transformers pytorch]))
+      (private.nixpkgs.python3.withPackages(p:
+      with p; [
+        transformers
+        pytorch
+        private.nixpkgs.python3Packages.bitsandbytes
+        private.nixpkgs.python3Packages.accelerate
+      ]))
       shellcheck
       wemux
     ];
diff --git a/Biz/Bild/Deps.nix b/Biz/Bild/Deps.nix
index a6e31a4..952a294 100644
--- a/Biz/Bild/Deps.nix
+++ b/Biz/Bild/Deps.nix
@@ -31,6 +31,13 @@ in rec
     };
   };
 
+  python3Packages = let
+    callPackage = super.python3Packages.callPackage;
+  in super.python3Packages // rec {
+    accelerate = callPackage ./Deps/accelerate.nix {};
+    bitsandbytes = callPackage ./Deps/bitsandbytes.nix {};
+  };
+
   autogen = super.autogen.overrideAttrs (old: old // {
     buildInputs = super.lib.lists.remove super.guile old.buildInputs ++ [super.guile_2_0];
   });
diff --git a/Biz/Bild/Deps/accelerate.nix b/Biz/Bild/Deps/accelerate.nix
new file mode 100644
index 0000000..5d00f14
--- /dev/null
+++ b/Biz/Bild/Deps/accelerate.nix
@@ -0,0 +1,24 @@
+{ fetchFromGitHub
+, buildPythonPackage
+, numpy
+, packaging
+, psutil
+, pyyaml
+, torch
+}:
+
+buildPythonPackage rec {
+  name = "accelerate";
+  version = "0.15.0";
+  propagatedBuildInputs = [
+    numpy packaging psutil pyyaml torch
+  ];
+  doCheck = false;
+  src = fetchFromGitHub {
+    owner = "huggingface";
+    repo = "accelerate";
+    rev = "v${version}";
+    sha256 = "sha256-agfbOaa+Nm10HZkd2Y7zR3R37n+vLNsxCyxZax6O3Lo=";
+  };
+}
+
diff --git a/Biz/Bild/Deps/bitsandbytes.nix b/Biz/Bild/Deps/bitsandbytes.nix
new file mode 100644
index 0000000..8d4fa53
--- /dev/null
+++ b/Biz/Bild/Deps/bitsandbytes.nix
@@ -0,0 +1,94 @@
+{ lib
+, fetchgit
+, buildPythonPackage
+, pytorch
+, setuptools
+, typer
+, linuxPackages
+, pytest
+
+# CUDA
+, cudaSupport ? true
+, cudaPackages ? {}
+, addOpenGLRunpath ? null
+, gcc
+}:
+
+let
+  inherit (linuxPackages) nvidia_x11;
+  inherit (cudaPackages) cudatoolkit;
+  cudaVersion =
+    lib.strings.removeSuffix "0"
+      (lib.strings.concatStrings
+        (lib.strings.splitString "." cudatoolkit.version));
+  libraryPath = lib.strings.makeLibraryPath [
+      cudatoolkit
+      cudatoolkit.lib
+      nvidia_x11
+  ];
+in buildPythonPackage rec {
+  pname = "bitsandbytes";
+  version = "unstable-2022-12-21";
+
+  src = fetchgit {
+    url = "https://simatime.com/git/ben/bitsandbytes.git";
+    rev = "31ef751bea48eeee2e0e95aca79df8e59b4c25c4";
+    sha256 = "sha256-/a2NFVuMSvSDELeXyfDdU9FZEJcDxCLa4VbMWBUCRI0=";
+  };
+
+  # any exe must be wrapped with this, it doesn't effect the build but it does
+  # show up on the logs so keep it here for documentation purposes
+  LD_LIBRARY_PATH = libraryPath;
+
+  # this allows the build system to find nvcc
+  CUDA_HOME = "${cudatoolkit}";
+
+  preBuild = ''
+    make cuda11x CUDA_VERSION=${cudaVersion} GPP=${gcc}/bin/g++ -j3
+    ## this python doesn't know where to get libcuda, so explicitly tell it
+    ## this is probably really bad practice, fix this
+    substituteInPlace bitsandbytes/cuda_setup/main.py \
+      --replace "libcuda.so" "${nvidia_x11}/lib/libcuda.so"
+  '';
+
+  propagatedBuildInputs = [
+    (pytorch.override({ inherit cudaSupport;}))
+    setuptools
+  ] ++ lib.optionals cudaSupport [
+    typer
+    cudatoolkit
+    cudatoolkit.lib
+    nvidia_x11
+  ];
+
+  nativeBuildInputs = lib.optionals cudaSupport [
+    gcc
+    addOpenGLRunpath
+  ];
+
+  preFixup = lib.optionalString cudaSupport ''
+    find $out -type f \( -name '*.so' -or -name '*.so.*' \) | while read lib
+    do
+      addOpenGLRunpath "$lib"
+      patchelf \
+        --set-rpath "${libraryPath}" \
+        "$lib"
+    done
+  '';
+
+  checkInputs = [
+    pytest
+  ];
+
+  # disabled because the test suite cannot find any GPUs in the nix sandbox
+  doCheck = false;
+
+  pythonImportsCheck = [
+    "torch"
+    # this tries to load and verify the cuda device on import, since this runs
+    # in the nix sandbox it will fail with a bunch of errors, but curiously the
+    # build still succeeds...
+    "bitsandbytes"
+  ];
+}
+
diff --git a/Biz/Bild/Nixpkgs.nix b/Biz/Bild/Nixpkgs.nix
index 514a39d..e6d7d01 100644
--- a/Biz/Bild/Nixpkgs.nix
+++ b/Biz/Bild/Nixpkgs.nix
@@ -33,8 +33,5 @@ import sources.nixpkgs {
     allowBroken = true;
     allowUnfree = true;
     cudaSupport = true;
-    permittedInsecurePackages = [
-      "python2.7-pyjwt-1.7.1"
-    ];
   };
 }
diff --git a/Biz/Cloud/Web.nix b/Biz/Cloud/Web.nix
index cd2e745..d65ab00 100644
--- a/Biz/Cloud/Web.nix
+++ b/Biz/Cloud/Web.nix
@@ -1,4 +1,4 @@
-{ config, pkgs, ... }:
+{ config, ... }:
 
 let
   rootDomain = config.networking.domain;
diff --git a/Biz/Dev/Dns.nix b/Biz/Dev/Dns.nix
index e64e114..4720b39 100644
--- a/Biz/Dev/Dns.nix
+++ b/Biz/Dev/Dns.nix
@@ -1,4 +1,4 @@
-{ config, lib, pkgs, ... }:
+{ ... }:
 
 
 
diff --git a/Biz/Dev/Networking.nix b/Biz/Dev/Networking.nix
index efe42f1..c45cecb 100644
--- a/Biz/Dev/Networking.nix
+++ b/Biz/Dev/Networking.nix
@@ -1,4 +1,4 @@
-{ lib, ... }:
+{ ... }:
 
 let
   ports = import ../Cloud/Ports.nix;
diff --git a/ava.py b/ava.py
index 6ca3a3e..1f08241 100755
--- a/ava.py
+++ b/ava.py
@@ -1,25 +1,48 @@
 #!/usr/bin/env python
-import transformers import AutoModelWithLMHead, AutoTokenizer, TextGenerator
+import transformers
+import torch
+import sys
 # import sleekxmpp
 
-model_name = "gpt-neox-20b"
 
-model = AutoModelWithLMHead.from_pretrained(model_name)
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-generator = TextGenerator(model=model, tokenizer=tokenizer)
+#model_name = "EleutherAI/gpt-neox-20b"
+model_name = "EleutherAI/gpt-j-6B"
 
-def generate_response(input_text):
-  response = model.generate(
-    input_ids=input_text,
-    max_length=1024,
-    temperature=0.7,
-  )
-  return response
+if torch.cuda.is_available():
+    device = "cuda:0"
+else:
+    raise ValueError("no cuda")
+
+tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
+model = transformers.AutoModelForCausalLM.from_pretrained(
+  model_name,
+  device_map="auto",
+  load_in_8bit=True,
+  pad_token_id=tokenizer.eos_token_id,
+  revision="float16",
+  torch_dtype=torch.float16,
+  low_cpu_mem_usage=True,
+)
+
+# set attention_mask and pad_token_id
+
+def gen(txt):
+    input_ids = tokenizer(txt, return_tensors="pt").input_ids.to('cuda')
+    outputs = model.generate(
+      input_ids=input_ids,
+      max_length=1024,
+      temperature=0.7,
+    )
+    result = tokenizer.batch_decode(outputs, skip_special_tokens=True)
+    result = "".join(result)
+    return result
 
 # Get user input and generate a response
-user_input = input("User: ")
-response = generate_response(user_input)
-print("Bot: ", response)
+while True:
+    user_input = input("ben: ")
+    response = gen(user_input)
+    print("bot: ", response)
+
 
 """
 # Set up the XMPP client
diff --git a/default.nix b/default.nix
new file mode 100644
index 0000000..d88873c
--- /dev/null
+++ b/default.nix
@@ -0,0 +1 @@
+import ./Biz/Bild.nix {}