diff options
author | Ben Sima <ben@bsima.me> | 2023-08-10 21:11:23 -0400 |
---|---|---|
committer | Ben Sima <ben@bsima.me> | 2023-08-16 14:29:43 -0400 |
commit | 247678afc7c74c98f64e8d19f67355d128946974 (patch) | |
tree | 6bde2696aab9029f67ff6eb136f26b81bcd5a4c4 /Biz/Bild/Deps | |
parent | 4e67ef22a7508150798413081bf8a5bb4adab6e5 (diff) |
Add llama-cpp and exllama
Diffstat (limited to 'Biz/Bild/Deps')
-rw-r--r-- | Biz/Bild/Deps/exllama.nix | 64 | ||||
-rw-r--r-- | Biz/Bild/Deps/llama-cpp.nix | 41 |
2 files changed, 105 insertions, 0 deletions
diff --git a/Biz/Bild/Deps/exllama.nix b/Biz/Bild/Deps/exllama.nix new file mode 100644 index 0000000..54d6df1 --- /dev/null +++ b/Biz/Bild/Deps/exllama.nix @@ -0,0 +1,64 @@ +{ lib +, sources +, buildPythonPackage +, pythonOlder +, fetchFromGitHub +, torch # tested on 2.0.1 and 2.1.0 (nightly) with cu118 +, safetensors +, sentencepiece +, ninja +, cudaPackages +, addOpenGLRunpath +, which +, gcc11 # cuda 11.7 requires g++ <12 +}: + +buildPythonPackage rec { + pname = "exllama"; + version = sources.exllama.rev; + format = "setuptools"; + disabled = pythonOlder "3.9"; + + src = sources.exllama; + + # I only care about compiling for the Ampere architecture, which is what my + # RTX 3090 TI is, and for some reason (nix sandbox?) the torch extension + # builder + # cannot autodetect the arch + TORCH_CUDA_ARCH_LIST = "8.0;8.6+PTX"; + + CUDA_HOME = "${cudaPackages.cuda_nvcc}"; + + nativeBuildInputs = [ + gcc11 + which + addOpenGLRunpath + cudaPackages.cuda_nvcc + cudaPackages.cuda_cudart + ]; + + propagatedBuildInputs = [ + torch safetensors sentencepiece ninja + cudaPackages.cudatoolkit + ]; + + doCheck = false; # no tests currently + pythonImportsCheck = [ + "exllama" + "exllama.cuda_ext" + "exllama.generator" + "exllama.lora" + "exllama.model" + "exllama.tokenizer" + ]; + + meta = with lib; { + description = '' + A more memory-efficient rewrite of the HF transformers implementation of + Llama for use with quantized weights. + ''; + homepage = "https://github.com/jllllll/exllama"; + license = licenses.mit; + maintainers = with maintainers; [ bsima ]; + }; +} diff --git a/Biz/Bild/Deps/llama-cpp.nix b/Biz/Bild/Deps/llama-cpp.nix new file mode 100644 index 0000000..85bd778 --- /dev/null +++ b/Biz/Bild/Deps/llama-cpp.nix @@ -0,0 +1,41 @@ +{ stdenv +, pkgs +, sources +, python3 +, cmake +, pkgconfig +, openmpi +, cudaPackages +}: +let + llama-python = python3.withPackages (ps: with ps; [ numpy sentencepiece ]); +in stdenv.mkDerivation { + name = "llama.cpp"; + version = sources.llama-cpp.rev; + + src = sources.llama-cpp; + + postPatch = '' + substituteInPlace ./ggml-metal.m \ + --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" + substituteInPlace ./*.py --replace '/usr/bin/env python' '${llama-python}/bin/python' + ''; + + nativeBuildInputs = [ cmake pkgconfig ]; + buildInputs = [ openmpi cudaPackages.cudatoolkit ]; + + cmakeFlags = [ + "-DLLAMA_BUILD_SERVER=ON" + "-DLLAMA_MPI=ON" + "-DBUILD_SHARED_LIBS=ON" + "-DCMAKE_SKIP_BUILD_RPATH=ON" + "-DLLAMA_CUBLAS=ON" + ]; + + postInstall = '' + mv $out/bin/main $out/bin/llama + mv $out/bin/server $out/bin/llama-server + ''; + + meta.mainProgram = "llama"; +} |