From 18956baad9cde85bc55bd0113092e51786d9e1ee Mon Sep 17 00:00:00 2001 From: Ben Sima Date: Mon, 2 Dec 2024 07:52:56 -0500 Subject: Delete Biz/Mynion.py and exllama Mynion was a prototype and while it was cool and worked well, it is unused and causing magma to build, which takes forever. I have settled on using ollama for local inference and a hosted inference API for production. --- Biz/Bild/Deps/exllama.nix | 54 ----------------------------------------------- Biz/Bild/Python.nix | 2 -- Biz/Bild/Sources.json | 12 ----------- 3 files changed, 68 deletions(-) delete mode 100644 Biz/Bild/Deps/exllama.nix (limited to 'Biz/Bild') diff --git a/Biz/Bild/Deps/exllama.nix b/Biz/Bild/Deps/exllama.nix deleted file mode 100644 index 434e9a9..0000000 --- a/Biz/Bild/Deps/exllama.nix +++ /dev/null @@ -1,54 +0,0 @@ -{ lib, sources, buildPythonPackage, pythonOlder -, torch # tested on 2.0.1 and 2.1.0 (nightly) with cu118 -, safetensors, sentencepiece, ninja, cudaPackages, addOpenGLRunpath, which -, libGL, gcc11 # cuda 11.7 requires g++ <12 -}: - -buildPythonPackage rec { - pname = "exllama"; - version = sources.exllama.rev; - format = "setuptools"; - disabled = pythonOlder "3.9"; - - src = sources.exllama; - - # I only care about compiling for the Ampere architecture, which is what my - # RTX 3090 TI is, and for some reason (nix sandbox?) the torch extension - # builder - # cannot autodetect the arch - TORCH_CUDA_ARCH_LIST = "8.0;8.6+PTX"; - - CUDA_HOME = "${cudaPackages.cuda_nvcc}"; - - nativeBuildInputs = [ - gcc11 - which - libGL - addOpenGLRunpath - cudaPackages.cuda_nvcc - cudaPackages.cuda_cudart - ]; - - propagatedBuildInputs = - [ torch safetensors sentencepiece ninja cudaPackages.cudatoolkit ]; - - doCheck = false; # no tests currently - pythonImportsCheck = [ - "exllama" - "exllama.cuda_ext" - "exllama.generator" - "exllama.lora" - "exllama.model" - "exllama.tokenizer" - ]; - - meta = with lib; { - description = '' - A more memory-efficient rewrite of the HF transformers implementation of - Llama for use with quantized weights. - ''; - homepage = "https://github.com/jllllll/exllama"; - license = licenses.mit; - maintainers = with maintainers; [ bsima ]; - }; -} diff --git a/Biz/Bild/Python.nix b/Biz/Bild/Python.nix index 2385987..50a1779 100644 --- a/Biz/Bild/Python.nix +++ b/Biz/Bild/Python.nix @@ -4,8 +4,6 @@ _self: super: { with pysuper.pkgs.python3Packages; let dontCheck = p: p.overridePythonAttrs (_: { doCheck = false; }); in { - exllama = callPackage ./Deps/exllama.nix { }; - exllamav2 = callPackage ./Deps/exllamav2.nix { }; interegular = callPackage ./Deps/interegular.nix { }; llm-ollama = callPackage ./Deps/llm-ollama.nix { }; mypy = dontCheck pysuper.mypy; diff --git a/Biz/Bild/Sources.json b/Biz/Bild/Sources.json index 3b1e4fd..c12b6ce 100644 --- a/Biz/Bild/Sources.json +++ b/Biz/Bild/Sources.json @@ -25,18 +25,6 @@ "url_template": "https://github.com///archive/.tar.gz", "version": "0.7.0.8" }, - "exllama": { - "branch": "master", - "description": "A more memory-efficient rewrite of the HF transformers implementation of Llama for use with quantized weights.", - "homepage": null, - "owner": "jllllll", - "repo": "exllama", - "rev": "3ddf3bd39bdff330623f3740cda4ae1537ef86d9", - "sha256": "0g87xm71jmw5bl4ya5dbk72fghhhwvrjqspaayq7zass16jixr1d", - "type": "tarball", - "url": "https://github.com/jllllll/exllama/archive/3ddf3bd39bdff330623f3740cda4ae1537ef86d9.tar.gz", - "url_template": "https://github.com///archive/.tar.gz" - }, "ghc-exactprint": { "branch": "master", "description": "GHC version of haskell-src-exts exactPrint", -- cgit v1.2.3