Setup ollama API service

This superceedes exllama and tabbyAPI which I could never get working fully. Unfortunately I had to switch to NixOS unstable to get all the Go builder stuff to work, so this is a cause of yet another version drift, but I guess it's inevitable and I should just learn to mitigate it with my nixpkgs shenanigans.
author: Ben Sima <ben@bsima.me> 2024-05-11 14:28:09 -0400
committer: Ben Sima <ben@bsima.me> 2024-05-20 22:15:49 -0400
commit: cceefa62d147594d43478e398bbaa9c630670935 (patch)
tree: 458b468f271c156a7e91944e58d2861afd5e1dab /Biz
parent: dd0bc9610cf0e6842f5d5ac67a73f2fd6f4eba2f (diff)
6 files changed, 249 insertions, 4 deletions
diff --git a/Biz/Bild.nix b/Biz/Bild.nix
index 859d070..ec049d8 100644
--- a/Biz/Bild.nix
+++ b/Biz/Bild.nix
@@ -11,6 +11,9 @@ let
     stdenv = nixpkgs.nixos-23_11.ccacheStdenv;
   };
 
+  # just needed for running ollama
+  unstable = nixpkgs.nixos-unstable-small;
+
   # this is the main library definitions, recursive references can be made with
   # `self.thing`, like in Python objects
   self = {
@@ -79,6 +82,7 @@ let
       inherit bat bc cmark ctags deadnix fd figlet fzf git git-branchless
         gitlint guile hlint indent jq lolcat mypy nixfmt ormolu pkg-config
         ripgrep rustc tree wemux;
+      ollama = nixpkgs.nixos-unstable-small.ollama;
       ruff = nixpkgs.nixos-unstable-small.ruff;
       shellcheck = nixpkgs.nixos-unstable-small.shellcheck;
     };
@@ -195,6 +199,7 @@ let
 
     # build an operating system. 'cfg' is the NixOS config
     os = cfg: (stable.nixos (_args: cfg)).toplevel;
+    os-unstable = cfg: (unstable.nixos (_args: cfg)).toplevel;
 
     # build a docker image
     image = stable.dockerTools.buildImage;
diff --git a/Biz/Bild/Deps.nix b/Biz/Bild/Deps.nix
index 8f44dde..8bf2272 100644
--- a/Biz/Bild/Deps.nix
+++ b/Biz/Bild/Deps.nix
@@ -31,4 +31,6 @@ _self: super:
   };
 
   nostr-rs-relay = super.callPackage ./Deps/nostr-rs-relay.nix { };
+
+  ollama = super.callPackage ./Deps/ollama.nix { acceleration = "cuda"; };
 }
diff --git a/Biz/Bild/Deps/ollama.nix b/Biz/Bild/Deps/ollama.nix
new file mode 100644
index 0000000..1fd1f20
--- /dev/null
+++ b/Biz/Bild/Deps/ollama.nix
@@ -0,0 +1,193 @@
+# This was cribbed from an open pull request: https://github.com/NixOS/nixpkgs/pull/309330
+{ lib, buildGo122Module, fetchFromGitHub, fetchpatch, buildEnv, linkFarm
+, overrideCC, makeWrapper, stdenv, nixosTests
+
+, pkgs, cmake, gcc12, clblast, libdrm, rocmPackages, cudaPackages, linuxPackages
+, darwin
+
+, testers, ollama
+
+, config
+# one of `[ null false "rocm" "cuda" ]`
+, acceleration ? null }:
+
+let
+  pname = "ollama";
+  # don't forget to invalidate all hashes each update
+  version = "0.1.33";
+
+  src = fetchFromGitHub {
+    owner = "jmorganca";
+    repo = "ollama";
+    rev = "v${version}";
+    hash = "sha256-+iZIuHr90d5OijrXl6kzPycsHmx5XnKAKKOtppycjsk=";
+    fetchSubmodules = true;
+  };
+  vendorHash = "sha256-7x/n60WiKmwHFFuN0GfzkibUREvxAXNHcD3fHmihZvs=";
+  # ollama's patches of llama.cpp's example server
+  # `ollama/llm/generate/gen_common.sh` -> "apply temporary patches until fix is upstream"
+  # each update, these patches should be synchronized with the contents of `ollama/llm/patches/`
+  llamacppPatches = [
+    (preparePatch "02-clip-log.diff"
+      "sha256-rMWbl3QgrPlhisTeHwD7EnGRJyOhLB4UeS7rqa0tdXM=")
+    (preparePatch "03-load_exception.diff"
+      "sha256-1DfNahFYYxqlx4E4pwMKQpL+XR0bibYnDFGt6dCL4TM=")
+    (preparePatch "04-metal.diff"
+      "sha256-Ne8J9R8NndUosSK0qoMvFfKNwqV5xhhce1nSoYrZo7Y=")
+  ];
+
+  preparePatch = patch: hash:
+    fetchpatch {
+      url = "file://${src}/llm/patches/${patch}";
+      inherit hash;
+      stripLen = 1;
+      extraPrefix = "llm/llama.cpp/";
+    };
+
+  accelIsValid = builtins.elem acceleration [ null false "rocm" "cuda" ];
+  validateFallback = lib.warnIf (config.rocmSupport && config.cudaSupport)
+    (lib.concatStrings [
+      "both `nixpkgs.config.rocmSupport` and `nixpkgs.config.cudaSupport` are enabled, "
+      "but they are mutually exclusive; falling back to cpu"
+    ]) (!(config.rocmSupport && config.cudaSupport));
+  validateLinux = api:
+    (lib.warnIfNot stdenv.isLinux
+      "building ollama with `${api}` is only supported on linux; falling back to cpu"
+      stdenv.isLinux);
+  shouldEnable = assert accelIsValid;
+    mode: fallback:
+    ((acceleration == mode)
+      || (fallback && acceleration == null && validateFallback))
+    && (validateLinux mode);
+
+  enableRocm = shouldEnable "rocm" config.rocmSupport;
+  enableCuda = shouldEnable "cuda" config.cudaSupport;
+
+  rocmClang = linkFarm "rocm-clang" { llvm = rocmPackages.llvm.clang; };
+  rocmPath = buildEnv {
+    name = "rocm-path";
+    paths = [
+      rocmPackages.clr
+      rocmPackages.hipblas
+      rocmPackages.rocblas
+      rocmPackages.rocsolver
+      rocmPackages.rocsparse
+      rocmPackages.rocm-device-libs
+      rocmClang
+    ];
+  };
+
+  cudaToolkit = buildEnv {
+    name = "cuda-toolkit";
+    ignoreCollisions =
+      true; # FIXME: find a cleaner way to do this without ignoring collisions
+    paths = [
+      cudaPackages.cudatoolkit
+      cudaPackages.cuda_cudart
+      cudaPackages.cuda_cudart.static
+    ];
+  };
+
+  runtimeLibs = lib.optionals enableRocm [ rocmPackages.rocm-smi ]
+    ++ lib.optionals enableCuda [ linuxPackages.nvidia_x11 ];
+
+  appleFrameworks = darwin.apple_sdk_11_0.frameworks;
+  metalFrameworks = [
+    appleFrameworks.Accelerate
+    appleFrameworks.Metal
+    appleFrameworks.MetalKit
+    appleFrameworks.MetalPerformanceShaders
+  ];
+
+  goBuild = if enableCuda then
+    buildGo122Module.override { stdenv = overrideCC stdenv gcc12; }
+  else
+    buildGo122Module;
+  inherit (lib) licenses platforms maintainers;
+in goBuild ((lib.optionalAttrs enableRocm {
+  ROCM_PATH = rocmPath;
+  CLBlast_DIR = "${clblast}/lib/cmake/CLBlast";
+}) // (lib.optionalAttrs enableCuda {
+  CUDA_LIB_DIR = "${cudaToolkit}/lib";
+  CUDACXX = "${cudaToolkit}/bin/nvcc";
+  CUDAToolkit_ROOT = cudaToolkit;
+}) // {
+  inherit pname version src vendorHash;
+
+  nativeBuildInputs = [ cmake ]
+    ++ lib.optionals enableRocm [ rocmPackages.llvm.bintools ]
+    ++ lib.optionals (enableRocm || enableCuda) [ makeWrapper ]
+    ++ lib.optionals stdenv.isDarwin metalFrameworks;
+
+  buildInputs = lib.optionals enableRocm [
+    rocmPackages.clr
+    rocmPackages.hipblas
+    rocmPackages.rocblas
+    rocmPackages.rocsolver
+    rocmPackages.rocsparse
+    libdrm
+  ] ++ lib.optionals enableCuda [ cudaPackages.cuda_cudart ]
+    ++ lib.optionals stdenv.isDarwin metalFrameworks;
+
+  patches = [
+    # disable uses of `git` in the `go generate` script
+    # ollama's build script assumes the source is a git repo, but nix removes the git directory
+    # this also disables necessary patches contained in `ollama/llm/patches/`
+    # those patches are added to `llamacppPatches`, and reapplied here in the patch phase
+    (fetchpatch {
+      url =
+        "https://raw.githubusercontent.com/NixOS/nixpkgs/master/pkgs/tools/misc/ollama/disable-git.patch";
+      hash = "sha256-7DthNfvk4VACdJb5L+4n9JmFzQn9qqDP2N4Hwuuhtgg=";
+    })
+  ] ++ llamacppPatches;
+  postPatch = ''
+    # replace inaccurate version number with actual release version
+    substituteInPlace version/version.go --replace-fail 0.0.0 '${version}'
+  '';
+  preBuild = ''
+    # disable uses of `git`, since nix removes the git directory
+    export OLLAMA_SKIP_PATCHING=true
+    # build llama.cpp libraries for ollama
+    go generate ./...
+  '';
+  postFixup = ''
+    # the app doesn't appear functional at the moment, so hide it
+    mv "$out/bin/app" "$out/bin/.ollama-app"
+  '' + lib.optionalString (enableRocm || enableCuda) ''
+    # expose runtime libraries necessary to use the gpu
+    mv "$out/bin/ollama" "$out/bin/.ollama-unwrapped"
+    makeWrapper "$out/bin/.ollama-unwrapped" "$out/bin/ollama" ${
+      lib.optionalString enableRocm "--set-default HIP_PATH '${rocmPath}' "
+    } \
+      --suffix LD_LIBRARY_PATH : '/run/opengl-driver/lib:${
+        lib.makeLibraryPath runtimeLibs
+      }'
+  '';
+
+  ldflags = [
+    "-s"
+    "-w"
+    "-X=github.com/jmorganca/ollama/version.Version=${version}"
+    "-X=github.com/jmorganca/ollama/server.mode=release"
+  ];
+
+  passthru.tests = {
+    service = nixosTests.ollama;
+    rocm = pkgs.ollama.override { acceleration = "rocm"; };
+    cuda = pkgs.ollama.override { acceleration = "cuda"; };
+    version = testers.testVersion {
+      inherit version;
+      package = ollama;
+    };
+  };
+
+  meta = {
+    description = "Get up and running with large language models locally";
+    homepage = "https://github.com/ollama/ollama";
+    changelog = "https://github.com/ollama/ollama/releases/tag/v${version}";
+    license = licenses.mit;
+    platforms = platforms.unix;
+    mainProgram = "ollama";
+    maintainers = with maintainers; [ abysssol dit7ya elohmeier ];
+  };
+})
diff --git a/Biz/Bild/Python.nix b/Biz/Bild/Python.nix
index 5569352..c559e42 100644
--- a/Biz/Bild/Python.nix
+++ b/Biz/Bild/Python.nix
@@ -10,9 +10,6 @@ _self: super: {
         mypy = dontCheck pysuper.mypy;
         outlines = callPackage ./Deps/outlines.nix { };
         perscache = callPackage ./Deps/perscache.nix { };
-        safetensors = super.nixos-23_11.python310Packages.safetensors.override {
-          buildPythonPackage = pysuper.pkgs.python3Packages.buildPythonPackage;
-        };
         tokenizers = dontCheck pysuper.tokenizers;
       };
   };
diff --git a/Biz/Dev/Beryllium.nix b/Biz/Dev/Beryllium.nix
index 607e5c1..cca0997 100644
--- a/Biz/Dev/Beryllium.nix
+++ b/Biz/Dev/Beryllium.nix
@@ -1,11 +1,12 @@
 { bild }:
-bild.os {
+bild.os-unstable {
   imports = [
     ../OsBase.nix
     ../Packages.nix
     ../Users.nix
     ./Beryllium/Configuration.nix
     ./Beryllium/Hardware.nix
+    ./Beryllium/Ollama.nix
     ./Vpn.nix
   ];
   networking.hostName = "beryllium";
diff --git a/Biz/Dev/Beryllium/Ollama.nix b/Biz/Dev/Beryllium/Ollama.nix
new file mode 100644
index 0000000..00aa327
--- /dev/null
+++ b/Biz/Dev/Beryllium/Ollama.nix
@@ -0,0 +1,47 @@
+{ pkgs, ... }:
+/* Ollama API service
+
+   Don't put too much work into this, there's a much better and more complete
+   ollama service (with webui!) being built here:
+   https://github.com/NixOS/nixpkgs/pull/275448
+
+   If you want to spend time on it, spend time over there.
+*/
+{
+
+  systemd.services.ollama = {
+    description = "ollama";
+    after = [ "network.target" ];
+    wantedBy = [ "multi-user.target" ];
+
+    environment = {
+      OLLAMA_HOST = "localhost:11434";
+      # Where to store LLM model files.
+      HOME = "%S/ollama";
+      OLLAMA_MODELS = "%S/ollama/models";
+      OLLAMA_DEBUG = "1";
+    };
+
+    serviceConfig = {
+      ExecStart = "${pkgs.ollama}/bin/ollama serve";
+      User = "ollama";
+      Group = "ollama";
+      Type = "simple";
+      Restart = "on-failure";
+      RestartSec = 3;
+      # Persistent storage for model files, i.e. /var/lib/<StateDirectory>
+      StateDirectory = [ "ollama" ];
+    };
+  };
+
+  # for administration, make this available to users' PATH
+  environment.systemPackages = [ pkgs.ollama ];
+
+  users.groups.ollama = { };
+
+  users.users.ollama = {
+    group = "ollama";
+    isSystemUser = true;
+    extraGroups = [ "render" "video" ];
+  };
+}
author	Ben Sima <ben@bsima.me>	2024-05-11 14:28:09 -0400
committer	Ben Sima <ben@bsima.me>	2024-05-20 22:15:49 -0400
commit	cceefa62d147594d43478e398bbaa9c630670935 (patch)
tree	458b468f271c156a7e91944e58d2861afd5e1dab /Biz
parent	dd0bc9610cf0e6842f5d5ac67a73f2fd6f4eba2f (diff)