summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Sima <ben@bsima.me>2022-12-28 19:53:55 -0500
committerBen Sima <ben@bsima.me>2022-12-28 19:53:55 -0500
commitc3b955145998d39df39370671585a271ca6f80f0 (patch)
tree33614e03c966d205e2eadaf4dd183f52618afebc
parent11e480c4b13808f12bc3f5db2765cebebf1aaf46 (diff)
Get ava GPT chatbot prototype working
Mostly thid required packaging up some deps, but also had to recompile stuff with cuda support.
-rw-r--r--Biz/Bild.nix10
-rw-r--r--Biz/Bild/Deps.nix7
-rw-r--r--Biz/Bild/Deps/accelerate.nix24
-rw-r--r--Biz/Bild/Deps/bitsandbytes.nix94
-rw-r--r--Biz/Bild/Nixpkgs.nix3
-rw-r--r--Biz/Cloud/Web.nix2
-rw-r--r--Biz/Dev/Dns.nix2
-rw-r--r--Biz/Dev/Networking.nix2
-rwxr-xr-xava.py53
-rw-r--r--default.nix1
10 files changed, 175 insertions, 23 deletions
diff --git a/Biz/Bild.nix b/Biz/Bild.nix
index 47c0ecb..8fe20db 100644
--- a/Biz/Bild.nix
+++ b/Biz/Bild.nix
@@ -146,9 +146,15 @@ rec {
haskell.packages.${constants.ghcCompiler}.fast-tags
hlint
lolcat
- nixops
+ #nixops # fails to build
ormolu
- (python3.withPackages(p: with p; [transformers pytorch]))
+ (private.nixpkgs.python3.withPackages(p:
+ with p; [
+ transformers
+ pytorch
+ private.nixpkgs.python3Packages.bitsandbytes
+ private.nixpkgs.python3Packages.accelerate
+ ]))
shellcheck
wemux
];
diff --git a/Biz/Bild/Deps.nix b/Biz/Bild/Deps.nix
index a6e31a4..952a294 100644
--- a/Biz/Bild/Deps.nix
+++ b/Biz/Bild/Deps.nix
@@ -31,6 +31,13 @@ in rec
};
};
+ python3Packages = let
+ callPackage = super.python3Packages.callPackage;
+ in super.python3Packages // rec {
+ accelerate = callPackage ./Deps/accelerate.nix {};
+ bitsandbytes = callPackage ./Deps/bitsandbytes.nix {};
+ };
+
autogen = super.autogen.overrideAttrs (old: old // {
buildInputs = super.lib.lists.remove super.guile old.buildInputs ++ [super.guile_2_0];
});
diff --git a/Biz/Bild/Deps/accelerate.nix b/Biz/Bild/Deps/accelerate.nix
new file mode 100644
index 0000000..5d00f14
--- /dev/null
+++ b/Biz/Bild/Deps/accelerate.nix
@@ -0,0 +1,24 @@
+{ fetchFromGitHub
+, buildPythonPackage
+, numpy
+, packaging
+, psutil
+, pyyaml
+, torch
+}:
+
+buildPythonPackage rec {
+ name = "accelerate";
+ version = "0.15.0";
+ propagatedBuildInputs = [
+ numpy packaging psutil pyyaml torch
+ ];
+ doCheck = false;
+ src = fetchFromGitHub {
+ owner = "huggingface";
+ repo = "accelerate";
+ rev = "v${version}";
+ sha256 = "sha256-agfbOaa+Nm10HZkd2Y7zR3R37n+vLNsxCyxZax6O3Lo=";
+ };
+}
+
diff --git a/Biz/Bild/Deps/bitsandbytes.nix b/Biz/Bild/Deps/bitsandbytes.nix
new file mode 100644
index 0000000..8d4fa53
--- /dev/null
+++ b/Biz/Bild/Deps/bitsandbytes.nix
@@ -0,0 +1,94 @@
+{ lib
+, fetchgit
+, buildPythonPackage
+, pytorch
+, setuptools
+, typer
+, linuxPackages
+, pytest
+
+# CUDA
+, cudaSupport ? true
+, cudaPackages ? {}
+, addOpenGLRunpath ? null
+, gcc
+}:
+
+let
+ inherit (linuxPackages) nvidia_x11;
+ inherit (cudaPackages) cudatoolkit;
+ cudaVersion =
+ lib.strings.removeSuffix "0"
+ (lib.strings.concatStrings
+ (lib.strings.splitString "." cudatoolkit.version));
+ libraryPath = lib.strings.makeLibraryPath [
+ cudatoolkit
+ cudatoolkit.lib
+ nvidia_x11
+ ];
+in buildPythonPackage rec {
+ pname = "bitsandbytes";
+ version = "unstable-2022-12-21";
+
+ src = fetchgit {
+ url = "https://simatime.com/git/ben/bitsandbytes.git";
+ rev = "31ef751bea48eeee2e0e95aca79df8e59b4c25c4";
+ sha256 = "sha256-/a2NFVuMSvSDELeXyfDdU9FZEJcDxCLa4VbMWBUCRI0=";
+ };
+
+ # any exe must be wrapped with this, it doesn't effect the build but it does
+ # show up on the logs so keep it here for documentation purposes
+ LD_LIBRARY_PATH = libraryPath;
+
+ # this allows the build system to find nvcc
+ CUDA_HOME = "${cudatoolkit}";
+
+ preBuild = ''
+ make cuda11x CUDA_VERSION=${cudaVersion} GPP=${gcc}/bin/g++ -j3
+ ## this python doesn't know where to get libcuda, so explicitly tell it
+ ## this is probably really bad practice, fix this
+ substituteInPlace bitsandbytes/cuda_setup/main.py \
+ --replace "libcuda.so" "${nvidia_x11}/lib/libcuda.so"
+ '';
+
+ propagatedBuildInputs = [
+ (pytorch.override({ inherit cudaSupport;}))
+ setuptools
+ ] ++ lib.optionals cudaSupport [
+ typer
+ cudatoolkit
+ cudatoolkit.lib
+ nvidia_x11
+ ];
+
+ nativeBuildInputs = lib.optionals cudaSupport [
+ gcc
+ addOpenGLRunpath
+ ];
+
+ preFixup = lib.optionalString cudaSupport ''
+ find $out -type f \( -name '*.so' -or -name '*.so.*' \) | while read lib
+ do
+ addOpenGLRunpath "$lib"
+ patchelf \
+ --set-rpath "${libraryPath}" \
+ "$lib"
+ done
+ '';
+
+ checkInputs = [
+ pytest
+ ];
+
+ # disabled because the test suite cannot find any GPUs in the nix sandbox
+ doCheck = false;
+
+ pythonImportsCheck = [
+ "torch"
+ # this tries to load and verify the cuda device on import, since this runs
+ # in the nix sandbox it will fail with a bunch of errors, but curiously the
+ # build still succeeds...
+ "bitsandbytes"
+ ];
+}
+
diff --git a/Biz/Bild/Nixpkgs.nix b/Biz/Bild/Nixpkgs.nix
index 514a39d..e6d7d01 100644
--- a/Biz/Bild/Nixpkgs.nix
+++ b/Biz/Bild/Nixpkgs.nix
@@ -33,8 +33,5 @@ import sources.nixpkgs {
allowBroken = true;
allowUnfree = true;
cudaSupport = true;
- permittedInsecurePackages = [
- "python2.7-pyjwt-1.7.1"
- ];
};
}
diff --git a/Biz/Cloud/Web.nix b/Biz/Cloud/Web.nix
index cd2e745..d65ab00 100644
--- a/Biz/Cloud/Web.nix
+++ b/Biz/Cloud/Web.nix
@@ -1,4 +1,4 @@
-{ config, pkgs, ... }:
+{ config, ... }:
let
rootDomain = config.networking.domain;
diff --git a/Biz/Dev/Dns.nix b/Biz/Dev/Dns.nix
index e64e114..4720b39 100644
--- a/Biz/Dev/Dns.nix
+++ b/Biz/Dev/Dns.nix
@@ -1,4 +1,4 @@
-{ config, lib, pkgs, ... }:
+{ ... }:
diff --git a/Biz/Dev/Networking.nix b/Biz/Dev/Networking.nix
index efe42f1..c45cecb 100644
--- a/Biz/Dev/Networking.nix
+++ b/Biz/Dev/Networking.nix
@@ -1,4 +1,4 @@
-{ lib, ... }:
+{ ... }:
let
ports = import ../Cloud/Ports.nix;
diff --git a/ava.py b/ava.py
index 6ca3a3e..1f08241 100755
--- a/ava.py
+++ b/ava.py
@@ -1,25 +1,48 @@
#!/usr/bin/env python
-import transformers import AutoModelWithLMHead, AutoTokenizer, TextGenerator
+import transformers
+import torch
+import sys
# import sleekxmpp
-model_name = "gpt-neox-20b"
-model = AutoModelWithLMHead.from_pretrained(model_name)
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-generator = TextGenerator(model=model, tokenizer=tokenizer)
+#model_name = "EleutherAI/gpt-neox-20b"
+model_name = "EleutherAI/gpt-j-6B"
-def generate_response(input_text):
- response = model.generate(
- input_ids=input_text,
- max_length=1024,
- temperature=0.7,
- )
- return response
+if torch.cuda.is_available():
+ device = "cuda:0"
+else:
+ raise ValueError("no cuda")
+
+tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
+model = transformers.AutoModelForCausalLM.from_pretrained(
+ model_name,
+ device_map="auto",
+ load_in_8bit=True,
+ pad_token_id=tokenizer.eos_token_id,
+ revision="float16",
+ torch_dtype=torch.float16,
+ low_cpu_mem_usage=True,
+)
+
+# set attention_mask and pad_token_id
+
+def gen(txt):
+ input_ids = tokenizer(txt, return_tensors="pt").input_ids.to('cuda')
+ outputs = model.generate(
+ input_ids=input_ids,
+ max_length=1024,
+ temperature=0.7,
+ )
+ result = tokenizer.batch_decode(outputs, skip_special_tokens=True)
+ result = "".join(result)
+ return result
# Get user input and generate a response
-user_input = input("User: ")
-response = generate_response(user_input)
-print("Bot: ", response)
+while True:
+ user_input = input("ben: ")
+ response = gen(user_input)
+ print("bot: ", response)
+
"""
# Set up the XMPP client
diff --git a/default.nix b/default.nix
new file mode 100644
index 0000000..d88873c
--- /dev/null
+++ b/default.nix
@@ -0,0 +1 @@
+import ./Biz/Bild.nix {}