From 247678afc7c74c98f64e8d19f67355d128946974 Mon Sep 17 00:00:00 2001 From: Ben Sima Date: Thu, 10 Aug 2023 21:11:23 -0400 Subject: Add llama-cpp and exllama --- Biz/Bild/Deps/exllama.nix | 64 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 Biz/Bild/Deps/exllama.nix (limited to 'Biz/Bild/Deps/exllama.nix') diff --git a/Biz/Bild/Deps/exllama.nix b/Biz/Bild/Deps/exllama.nix new file mode 100644 index 0000000..54d6df1 --- /dev/null +++ b/Biz/Bild/Deps/exllama.nix @@ -0,0 +1,64 @@ +{ lib +, sources +, buildPythonPackage +, pythonOlder +, fetchFromGitHub +, torch # tested on 2.0.1 and 2.1.0 (nightly) with cu118 +, safetensors +, sentencepiece +, ninja +, cudaPackages +, addOpenGLRunpath +, which +, gcc11 # cuda 11.7 requires g++ <12 +}: + +buildPythonPackage rec { + pname = "exllama"; + version = sources.exllama.rev; + format = "setuptools"; + disabled = pythonOlder "3.9"; + + src = sources.exllama; + + # I only care about compiling for the Ampere architecture, which is what my + # RTX 3090 TI is, and for some reason (nix sandbox?) the torch extension + # builder + # cannot autodetect the arch + TORCH_CUDA_ARCH_LIST = "8.0;8.6+PTX"; + + CUDA_HOME = "${cudaPackages.cuda_nvcc}"; + + nativeBuildInputs = [ + gcc11 + which + addOpenGLRunpath + cudaPackages.cuda_nvcc + cudaPackages.cuda_cudart + ]; + + propagatedBuildInputs = [ + torch safetensors sentencepiece ninja + cudaPackages.cudatoolkit + ]; + + doCheck = false; # no tests currently + pythonImportsCheck = [ + "exllama" + "exllama.cuda_ext" + "exllama.generator" + "exllama.lora" + "exllama.model" + "exllama.tokenizer" + ]; + + meta = with lib; { + description = '' + A more memory-efficient rewrite of the HF transformers implementation of + Llama for use with quantized weights. + ''; + homepage = "https://github.com/jllllll/exllama"; + license = licenses.mit; + maintainers = with maintainers; [ bsima ]; + }; +} -- cgit v1.2.3