summaryrefslogtreecommitdiff
path: root/Biz
diff options
context:
space:
mode:
authorBen Sima <ben@bsima.me>2024-05-11 14:28:09 -0400
committerBen Sima <ben@bsima.me>2024-05-20 22:15:49 -0400
commitcceefa62d147594d43478e398bbaa9c630670935 (patch)
tree458b468f271c156a7e91944e58d2861afd5e1dab /Biz
parentdd0bc9610cf0e6842f5d5ac67a73f2fd6f4eba2f (diff)
Setup ollama API service
This superceedes exllama and tabbyAPI which I could never get working fully. Unfortunately I had to switch to NixOS unstable to get all the Go builder stuff to work, so this is a cause of yet another version drift, but I guess it's inevitable and I should just learn to mitigate it with my nixpkgs shenanigans.
Diffstat (limited to 'Biz')
-rw-r--r--Biz/Bild.nix5
-rw-r--r--Biz/Bild/Deps.nix2
-rw-r--r--Biz/Bild/Deps/ollama.nix193
-rw-r--r--Biz/Bild/Python.nix3
-rw-r--r--Biz/Dev/Beryllium.nix3
-rw-r--r--Biz/Dev/Beryllium/Ollama.nix47
6 files changed, 249 insertions, 4 deletions
diff --git a/Biz/Bild.nix b/Biz/Bild.nix
index 859d070..ec049d8 100644
--- a/Biz/Bild.nix
+++ b/Biz/Bild.nix
@@ -11,6 +11,9 @@ let
stdenv = nixpkgs.nixos-23_11.ccacheStdenv;
};
+ # just needed for running ollama
+ unstable = nixpkgs.nixos-unstable-small;
+
# this is the main library definitions, recursive references can be made with
# `self.thing`, like in Python objects
self = {
@@ -79,6 +82,7 @@ let
inherit bat bc cmark ctags deadnix fd figlet fzf git git-branchless
gitlint guile hlint indent jq lolcat mypy nixfmt ormolu pkg-config
ripgrep rustc tree wemux;
+ ollama = nixpkgs.nixos-unstable-small.ollama;
ruff = nixpkgs.nixos-unstable-small.ruff;
shellcheck = nixpkgs.nixos-unstable-small.shellcheck;
};
@@ -195,6 +199,7 @@ let
# build an operating system. 'cfg' is the NixOS config
os = cfg: (stable.nixos (_args: cfg)).toplevel;
+ os-unstable = cfg: (unstable.nixos (_args: cfg)).toplevel;
# build a docker image
image = stable.dockerTools.buildImage;
diff --git a/Biz/Bild/Deps.nix b/Biz/Bild/Deps.nix
index 8f44dde..8bf2272 100644
--- a/Biz/Bild/Deps.nix
+++ b/Biz/Bild/Deps.nix
@@ -31,4 +31,6 @@ _self: super:
};
nostr-rs-relay = super.callPackage ./Deps/nostr-rs-relay.nix { };
+
+ ollama = super.callPackage ./Deps/ollama.nix { acceleration = "cuda"; };
}
diff --git a/Biz/Bild/Deps/ollama.nix b/Biz/Bild/Deps/ollama.nix
new file mode 100644
index 0000000..1fd1f20
--- /dev/null
+++ b/Biz/Bild/Deps/ollama.nix
@@ -0,0 +1,193 @@
+# This was cribbed from an open pull request: https://github.com/NixOS/nixpkgs/pull/309330
+{ lib, buildGo122Module, fetchFromGitHub, fetchpatch, buildEnv, linkFarm
+, overrideCC, makeWrapper, stdenv, nixosTests
+
+, pkgs, cmake, gcc12, clblast, libdrm, rocmPackages, cudaPackages, linuxPackages
+, darwin
+
+, testers, ollama
+
+, config
+# one of `[ null false "rocm" "cuda" ]`
+, acceleration ? null }:
+
+let
+ pname = "ollama";
+ # don't forget to invalidate all hashes each update
+ version = "0.1.33";
+
+ src = fetchFromGitHub {
+ owner = "jmorganca";
+ repo = "ollama";
+ rev = "v${version}";
+ hash = "sha256-+iZIuHr90d5OijrXl6kzPycsHmx5XnKAKKOtppycjsk=";
+ fetchSubmodules = true;
+ };
+ vendorHash = "sha256-7x/n60WiKmwHFFuN0GfzkibUREvxAXNHcD3fHmihZvs=";
+ # ollama's patches of llama.cpp's example server
+ # `ollama/llm/generate/gen_common.sh` -> "apply temporary patches until fix is upstream"
+ # each update, these patches should be synchronized with the contents of `ollama/llm/patches/`
+ llamacppPatches = [
+ (preparePatch "02-clip-log.diff"
+ "sha256-rMWbl3QgrPlhisTeHwD7EnGRJyOhLB4UeS7rqa0tdXM=")
+ (preparePatch "03-load_exception.diff"
+ "sha256-1DfNahFYYxqlx4E4pwMKQpL+XR0bibYnDFGt6dCL4TM=")
+ (preparePatch "04-metal.diff"
+ "sha256-Ne8J9R8NndUosSK0qoMvFfKNwqV5xhhce1nSoYrZo7Y=")
+ ];
+
+ preparePatch = patch: hash:
+ fetchpatch {
+ url = "file://${src}/llm/patches/${patch}";
+ inherit hash;
+ stripLen = 1;
+ extraPrefix = "llm/llama.cpp/";
+ };
+
+ accelIsValid = builtins.elem acceleration [ null false "rocm" "cuda" ];
+ validateFallback = lib.warnIf (config.rocmSupport && config.cudaSupport)
+ (lib.concatStrings [
+ "both `nixpkgs.config.rocmSupport` and `nixpkgs.config.cudaSupport` are enabled, "
+ "but they are mutually exclusive; falling back to cpu"
+ ]) (!(config.rocmSupport && config.cudaSupport));
+ validateLinux = api:
+ (lib.warnIfNot stdenv.isLinux
+ "building ollama with `${api}` is only supported on linux; falling back to cpu"
+ stdenv.isLinux);
+ shouldEnable = assert accelIsValid;
+ mode: fallback:
+ ((acceleration == mode)
+ || (fallback && acceleration == null && validateFallback))
+ && (validateLinux mode);
+
+ enableRocm = shouldEnable "rocm" config.rocmSupport;
+ enableCuda = shouldEnable "cuda" config.cudaSupport;
+
+ rocmClang = linkFarm "rocm-clang" { llvm = rocmPackages.llvm.clang; };
+ rocmPath = buildEnv {
+ name = "rocm-path";
+ paths = [
+ rocmPackages.clr
+ rocmPackages.hipblas
+ rocmPackages.rocblas
+ rocmPackages.rocsolver
+ rocmPackages.rocsparse
+ rocmPackages.rocm-device-libs
+ rocmClang
+ ];
+ };
+
+ cudaToolkit = buildEnv {
+ name = "cuda-toolkit";
+ ignoreCollisions =
+ true; # FIXME: find a cleaner way to do this without ignoring collisions
+ paths = [
+ cudaPackages.cudatoolkit
+ cudaPackages.cuda_cudart
+ cudaPackages.cuda_cudart.static
+ ];
+ };
+
+ runtimeLibs = lib.optionals enableRocm [ rocmPackages.rocm-smi ]
+ ++ lib.optionals enableCuda [ linuxPackages.nvidia_x11 ];
+
+ appleFrameworks = darwin.apple_sdk_11_0.frameworks;
+ metalFrameworks = [
+ appleFrameworks.Accelerate
+ appleFrameworks.Metal
+ appleFrameworks.MetalKit
+ appleFrameworks.MetalPerformanceShaders
+ ];
+
+ goBuild = if enableCuda then
+ buildGo122Module.override { stdenv = overrideCC stdenv gcc12; }
+ else
+ buildGo122Module;
+ inherit (lib) licenses platforms maintainers;
+in goBuild ((lib.optionalAttrs enableRocm {
+ ROCM_PATH = rocmPath;
+ CLBlast_DIR = "${clblast}/lib/cmake/CLBlast";
+}) // (lib.optionalAttrs enableCuda {
+ CUDA_LIB_DIR = "${cudaToolkit}/lib";
+ CUDACXX = "${cudaToolkit}/bin/nvcc";
+ CUDAToolkit_ROOT = cudaToolkit;
+}) // {
+ inherit pname version src vendorHash;
+
+ nativeBuildInputs = [ cmake ]
+ ++ lib.optionals enableRocm [ rocmPackages.llvm.bintools ]
+ ++ lib.optionals (enableRocm || enableCuda) [ makeWrapper ]
+ ++ lib.optionals stdenv.isDarwin metalFrameworks;
+
+ buildInputs = lib.optionals enableRocm [
+ rocmPackages.clr
+ rocmPackages.hipblas
+ rocmPackages.rocblas
+ rocmPackages.rocsolver
+ rocmPackages.rocsparse
+ libdrm
+ ] ++ lib.optionals enableCuda [ cudaPackages.cuda_cudart ]
+ ++ lib.optionals stdenv.isDarwin metalFrameworks;
+
+ patches = [
+ # disable uses of `git` in the `go generate` script
+ # ollama's build script assumes the source is a git repo, but nix removes the git directory
+ # this also disables necessary patches contained in `ollama/llm/patches/`
+ # those patches are added to `llamacppPatches`, and reapplied here in the patch phase
+ (fetchpatch {
+ url =
+ "https://raw.githubusercontent.com/NixOS/nixpkgs/master/pkgs/tools/misc/ollama/disable-git.patch";
+ hash = "sha256-7DthNfvk4VACdJb5L+4n9JmFzQn9qqDP2N4Hwuuhtgg=";
+ })
+ ] ++ llamacppPatches;
+ postPatch = ''
+ # replace inaccurate version number with actual release version
+ substituteInPlace version/version.go --replace-fail 0.0.0 '${version}'
+ '';
+ preBuild = ''
+ # disable uses of `git`, since nix removes the git directory
+ export OLLAMA_SKIP_PATCHING=true
+ # build llama.cpp libraries for ollama
+ go generate ./...
+ '';
+ postFixup = ''
+ # the app doesn't appear functional at the moment, so hide it
+ mv "$out/bin/app" "$out/bin/.ollama-app"
+ '' + lib.optionalString (enableRocm || enableCuda) ''
+ # expose runtime libraries necessary to use the gpu
+ mv "$out/bin/ollama" "$out/bin/.ollama-unwrapped"
+ makeWrapper "$out/bin/.ollama-unwrapped" "$out/bin/ollama" ${
+ lib.optionalString enableRocm "--set-default HIP_PATH '${rocmPath}' "
+ } \
+ --suffix LD_LIBRARY_PATH : '/run/opengl-driver/lib:${
+ lib.makeLibraryPath runtimeLibs
+ }'
+ '';
+
+ ldflags = [
+ "-s"
+ "-w"
+ "-X=github.com/jmorganca/ollama/version.Version=${version}"
+ "-X=github.com/jmorganca/ollama/server.mode=release"
+ ];
+
+ passthru.tests = {
+ service = nixosTests.ollama;
+ rocm = pkgs.ollama.override { acceleration = "rocm"; };
+ cuda = pkgs.ollama.override { acceleration = "cuda"; };
+ version = testers.testVersion {
+ inherit version;
+ package = ollama;
+ };
+ };
+
+ meta = {
+ description = "Get up and running with large language models locally";
+ homepage = "https://github.com/ollama/ollama";
+ changelog = "https://github.com/ollama/ollama/releases/tag/v${version}";
+ license = licenses.mit;
+ platforms = platforms.unix;
+ mainProgram = "ollama";
+ maintainers = with maintainers; [ abysssol dit7ya elohmeier ];
+ };
+})
diff --git a/Biz/Bild/Python.nix b/Biz/Bild/Python.nix
index 5569352..c559e42 100644
--- a/Biz/Bild/Python.nix
+++ b/Biz/Bild/Python.nix
@@ -10,9 +10,6 @@ _self: super: {
mypy = dontCheck pysuper.mypy;
outlines = callPackage ./Deps/outlines.nix { };
perscache = callPackage ./Deps/perscache.nix { };
- safetensors = super.nixos-23_11.python310Packages.safetensors.override {
- buildPythonPackage = pysuper.pkgs.python3Packages.buildPythonPackage;
- };
tokenizers = dontCheck pysuper.tokenizers;
};
};
diff --git a/Biz/Dev/Beryllium.nix b/Biz/Dev/Beryllium.nix
index 607e5c1..cca0997 100644
--- a/Biz/Dev/Beryllium.nix
+++ b/Biz/Dev/Beryllium.nix
@@ -1,11 +1,12 @@
{ bild }:
-bild.os {
+bild.os-unstable {
imports = [
../OsBase.nix
../Packages.nix
../Users.nix
./Beryllium/Configuration.nix
./Beryllium/Hardware.nix
+ ./Beryllium/Ollama.nix
./Vpn.nix
];
networking.hostName = "beryllium";
diff --git a/Biz/Dev/Beryllium/Ollama.nix b/Biz/Dev/Beryllium/Ollama.nix
new file mode 100644
index 0000000..00aa327
--- /dev/null
+++ b/Biz/Dev/Beryllium/Ollama.nix
@@ -0,0 +1,47 @@
+{ pkgs, ... }:
+/* Ollama API service
+
+ Don't put too much work into this, there's a much better and more complete
+ ollama service (with webui!) being built here:
+ https://github.com/NixOS/nixpkgs/pull/275448
+
+ If you want to spend time on it, spend time over there.
+*/
+{
+
+ systemd.services.ollama = {
+ description = "ollama";
+ after = [ "network.target" ];
+ wantedBy = [ "multi-user.target" ];
+
+ environment = {
+ OLLAMA_HOST = "localhost:11434";
+ # Where to store LLM model files.
+ HOME = "%S/ollama";
+ OLLAMA_MODELS = "%S/ollama/models";
+ OLLAMA_DEBUG = "1";
+ };
+
+ serviceConfig = {
+ ExecStart = "${pkgs.ollama}/bin/ollama serve";
+ User = "ollama";
+ Group = "ollama";
+ Type = "simple";
+ Restart = "on-failure";
+ RestartSec = 3;
+ # Persistent storage for model files, i.e. /var/lib/<StateDirectory>
+ StateDirectory = [ "ollama" ];
+ };
+ };
+
+ # for administration, make this available to users' PATH
+ environment.systemPackages = [ pkgs.ollama ];
+
+ users.groups.ollama = { };
+
+ users.users.ollama = {
+ group = "ollama";
+ isSystemUser = true;
+ extraGroups = [ "render" "video" ];
+ };
+}