summaryrefslogtreecommitdiff
path: root/Biz/Bild
diff options
context:
space:
mode:
authorBen Sima <ben@bsima.me>2024-12-02 07:52:56 -0500
committerBen Sima <ben@bsima.me>2024-12-20 21:09:24 -0500
commit18956baad9cde85bc55bd0113092e51786d9e1ee (patch)
tree07e987380a05db04c4814f4ebbc1f5780817899c /Biz/Bild
parentbc78e72960dee2721c3648e8061cb543f775710b (diff)
Delete Biz/Mynion.py and exllama
Mynion was a prototype and while it was cool and worked well, it is unused and causing magma to build, which takes forever. I have settled on using ollama for local inference and a hosted inference API for production.
Diffstat (limited to 'Biz/Bild')
-rw-r--r--Biz/Bild/Deps/exllama.nix54
-rw-r--r--Biz/Bild/Python.nix2
-rw-r--r--Biz/Bild/Sources.json12
3 files changed, 0 insertions, 68 deletions
diff --git a/Biz/Bild/Deps/exllama.nix b/Biz/Bild/Deps/exllama.nix
deleted file mode 100644
index 434e9a9..0000000
--- a/Biz/Bild/Deps/exllama.nix
+++ /dev/null
@@ -1,54 +0,0 @@
-{ lib, sources, buildPythonPackage, pythonOlder
-, torch # tested on 2.0.1 and 2.1.0 (nightly) with cu118
-, safetensors, sentencepiece, ninja, cudaPackages, addOpenGLRunpath, which
-, libGL, gcc11 # cuda 11.7 requires g++ <12
-}:
-
-buildPythonPackage rec {
- pname = "exllama";
- version = sources.exllama.rev;
- format = "setuptools";
- disabled = pythonOlder "3.9";
-
- src = sources.exllama;
-
- # I only care about compiling for the Ampere architecture, which is what my
- # RTX 3090 TI is, and for some reason (nix sandbox?) the torch extension
- # builder
- # cannot autodetect the arch
- TORCH_CUDA_ARCH_LIST = "8.0;8.6+PTX";
-
- CUDA_HOME = "${cudaPackages.cuda_nvcc}";
-
- nativeBuildInputs = [
- gcc11
- which
- libGL
- addOpenGLRunpath
- cudaPackages.cuda_nvcc
- cudaPackages.cuda_cudart
- ];
-
- propagatedBuildInputs =
- [ torch safetensors sentencepiece ninja cudaPackages.cudatoolkit ];
-
- doCheck = false; # no tests currently
- pythonImportsCheck = [
- "exllama"
- "exllama.cuda_ext"
- "exllama.generator"
- "exllama.lora"
- "exllama.model"
- "exllama.tokenizer"
- ];
-
- meta = with lib; {
- description = ''
- A more memory-efficient rewrite of the HF transformers implementation of
- Llama for use with quantized weights.
- '';
- homepage = "https://github.com/jllllll/exllama";
- license = licenses.mit;
- maintainers = with maintainers; [ bsima ];
- };
-}
diff --git a/Biz/Bild/Python.nix b/Biz/Bild/Python.nix
index 2385987..50a1779 100644
--- a/Biz/Bild/Python.nix
+++ b/Biz/Bild/Python.nix
@@ -4,8 +4,6 @@ _self: super: {
with pysuper.pkgs.python3Packages;
let dontCheck = p: p.overridePythonAttrs (_: { doCheck = false; });
in {
- exllama = callPackage ./Deps/exllama.nix { };
- exllamav2 = callPackage ./Deps/exllamav2.nix { };
interegular = callPackage ./Deps/interegular.nix { };
llm-ollama = callPackage ./Deps/llm-ollama.nix { };
mypy = dontCheck pysuper.mypy;
diff --git a/Biz/Bild/Sources.json b/Biz/Bild/Sources.json
index 3b1e4fd..c12b6ce 100644
--- a/Biz/Bild/Sources.json
+++ b/Biz/Bild/Sources.json
@@ -25,18 +25,6 @@
"url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz",
"version": "0.7.0.8"
},
- "exllama": {
- "branch": "master",
- "description": "A more memory-efficient rewrite of the HF transformers implementation of Llama for use with quantized weights.",
- "homepage": null,
- "owner": "jllllll",
- "repo": "exllama",
- "rev": "3ddf3bd39bdff330623f3740cda4ae1537ef86d9",
- "sha256": "0g87xm71jmw5bl4ya5dbk72fghhhwvrjqspaayq7zass16jixr1d",
- "type": "tarball",
- "url": "https://github.com/jllllll/exllama/archive/3ddf3bd39bdff330623f3740cda4ae1537ef86d9.tar.gz",
- "url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz"
- },
"ghc-exactprint": {
"branch": "master",
"description": "GHC version of haskell-src-exts exactPrint",