diff options
Diffstat (limited to 'Biz/Bild/Deps/exllama.nix')
-rw-r--r-- | Biz/Bild/Deps/exllama.nix | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/Biz/Bild/Deps/exllama.nix b/Biz/Bild/Deps/exllama.nix new file mode 100644 index 0000000..54d6df1 --- /dev/null +++ b/Biz/Bild/Deps/exllama.nix @@ -0,0 +1,64 @@ +{ lib +, sources +, buildPythonPackage +, pythonOlder +, fetchFromGitHub +, torch # tested on 2.0.1 and 2.1.0 (nightly) with cu118 +, safetensors +, sentencepiece +, ninja +, cudaPackages +, addOpenGLRunpath +, which +, gcc11 # cuda 11.7 requires g++ <12 +}: + +buildPythonPackage rec { + pname = "exllama"; + version = sources.exllama.rev; + format = "setuptools"; + disabled = pythonOlder "3.9"; + + src = sources.exllama; + + # I only care about compiling for the Ampere architecture, which is what my + # RTX 3090 TI is, and for some reason (nix sandbox?) the torch extension + # builder + # cannot autodetect the arch + TORCH_CUDA_ARCH_LIST = "8.0;8.6+PTX"; + + CUDA_HOME = "${cudaPackages.cuda_nvcc}"; + + nativeBuildInputs = [ + gcc11 + which + addOpenGLRunpath + cudaPackages.cuda_nvcc + cudaPackages.cuda_cudart + ]; + + propagatedBuildInputs = [ + torch safetensors sentencepiece ninja + cudaPackages.cudatoolkit + ]; + + doCheck = false; # no tests currently + pythonImportsCheck = [ + "exllama" + "exllama.cuda_ext" + "exllama.generator" + "exllama.lora" + "exllama.model" + "exllama.tokenizer" + ]; + + meta = with lib; { + description = '' + A more memory-efficient rewrite of the HF transformers implementation of + Llama for use with quantized weights. + ''; + homepage = "https://github.com/jllllll/exllama"; + license = licenses.mit; + maintainers = with maintainers; [ bsima ]; + }; +} |