summaryrefslogtreecommitdiff
path: root/Biz/Bild/Deps/exllama.nix
diff options
context:
space:
mode:
Diffstat (limited to 'Biz/Bild/Deps/exllama.nix')
-rw-r--r--Biz/Bild/Deps/exllama.nix64
1 files changed, 64 insertions, 0 deletions
diff --git a/Biz/Bild/Deps/exllama.nix b/Biz/Bild/Deps/exllama.nix
new file mode 100644
index 0000000..54d6df1
--- /dev/null
+++ b/Biz/Bild/Deps/exllama.nix
@@ -0,0 +1,64 @@
+{ lib
+, sources
+, buildPythonPackage
+, pythonOlder
+, fetchFromGitHub
+, torch # tested on 2.0.1 and 2.1.0 (nightly) with cu118
+, safetensors
+, sentencepiece
+, ninja
+, cudaPackages
+, addOpenGLRunpath
+, which
+, gcc11 # cuda 11.7 requires g++ <12
+}:
+
+buildPythonPackage rec {
+ pname = "exllama";
+ version = sources.exllama.rev;
+ format = "setuptools";
+ disabled = pythonOlder "3.9";
+
+ src = sources.exllama;
+
+ # I only care about compiling for the Ampere architecture, which is what my
+ # RTX 3090 TI is, and for some reason (nix sandbox?) the torch extension
+ # builder
+ # cannot autodetect the arch
+ TORCH_CUDA_ARCH_LIST = "8.0;8.6+PTX";
+
+ CUDA_HOME = "${cudaPackages.cuda_nvcc}";
+
+ nativeBuildInputs = [
+ gcc11
+ which
+ addOpenGLRunpath
+ cudaPackages.cuda_nvcc
+ cudaPackages.cuda_cudart
+ ];
+
+ propagatedBuildInputs = [
+ torch safetensors sentencepiece ninja
+ cudaPackages.cudatoolkit
+ ];
+
+ doCheck = false; # no tests currently
+ pythonImportsCheck = [
+ "exllama"
+ "exllama.cuda_ext"
+ "exllama.generator"
+ "exllama.lora"
+ "exllama.model"
+ "exllama.tokenizer"
+ ];
+
+ meta = with lib; {
+ description = ''
+ A more memory-efficient rewrite of the HF transformers implementation of
+ Llama for use with quantized weights.
+ '';
+ homepage = "https://github.com/jllllll/exllama";
+ license = licenses.mit;
+ maintainers = with maintainers; [ bsima ];
+ };
+}