From cceefa62d147594d43478e398bbaa9c630670935 Mon Sep 17 00:00:00 2001 From: Ben Sima Date: Sat, 11 May 2024 14:28:09 -0400 Subject: Setup ollama API service This superceedes exllama and tabbyAPI which I could never get working fully. Unfortunately I had to switch to NixOS unstable to get all the Go builder stuff to work, so this is a cause of yet another version drift, but I guess it's inevitable and I should just learn to mitigate it with my nixpkgs shenanigans. --- Biz/Bild/Deps.nix | 2 + Biz/Bild/Deps/ollama.nix | 193 +++++++++++++++++++++++++++++++++++++++++++++++ Biz/Bild/Python.nix | 3 - 3 files changed, 195 insertions(+), 3 deletions(-) create mode 100644 Biz/Bild/Deps/ollama.nix (limited to 'Biz/Bild') diff --git a/Biz/Bild/Deps.nix b/Biz/Bild/Deps.nix index 8f44dde..8bf2272 100644 --- a/Biz/Bild/Deps.nix +++ b/Biz/Bild/Deps.nix @@ -31,4 +31,6 @@ _self: super: }; nostr-rs-relay = super.callPackage ./Deps/nostr-rs-relay.nix { }; + + ollama = super.callPackage ./Deps/ollama.nix { acceleration = "cuda"; }; } diff --git a/Biz/Bild/Deps/ollama.nix b/Biz/Bild/Deps/ollama.nix new file mode 100644 index 0000000..1fd1f20 --- /dev/null +++ b/Biz/Bild/Deps/ollama.nix @@ -0,0 +1,193 @@ +# This was cribbed from an open pull request: https://github.com/NixOS/nixpkgs/pull/309330 +{ lib, buildGo122Module, fetchFromGitHub, fetchpatch, buildEnv, linkFarm +, overrideCC, makeWrapper, stdenv, nixosTests + +, pkgs, cmake, gcc12, clblast, libdrm, rocmPackages, cudaPackages, linuxPackages +, darwin + +, testers, ollama + +, config +# one of `[ null false "rocm" "cuda" ]` +, acceleration ? null }: + +let + pname = "ollama"; + # don't forget to invalidate all hashes each update + version = "0.1.33"; + + src = fetchFromGitHub { + owner = "jmorganca"; + repo = "ollama"; + rev = "v${version}"; + hash = "sha256-+iZIuHr90d5OijrXl6kzPycsHmx5XnKAKKOtppycjsk="; + fetchSubmodules = true; + }; + vendorHash = "sha256-7x/n60WiKmwHFFuN0GfzkibUREvxAXNHcD3fHmihZvs="; + # ollama's patches of llama.cpp's example server + # `ollama/llm/generate/gen_common.sh` -> "apply temporary patches until fix is upstream" + # each update, these patches should be synchronized with the contents of `ollama/llm/patches/` + llamacppPatches = [ + (preparePatch "02-clip-log.diff" + "sha256-rMWbl3QgrPlhisTeHwD7EnGRJyOhLB4UeS7rqa0tdXM=") + (preparePatch "03-load_exception.diff" + "sha256-1DfNahFYYxqlx4E4pwMKQpL+XR0bibYnDFGt6dCL4TM=") + (preparePatch "04-metal.diff" + "sha256-Ne8J9R8NndUosSK0qoMvFfKNwqV5xhhce1nSoYrZo7Y=") + ]; + + preparePatch = patch: hash: + fetchpatch { + url = "file://${src}/llm/patches/${patch}"; + inherit hash; + stripLen = 1; + extraPrefix = "llm/llama.cpp/"; + }; + + accelIsValid = builtins.elem acceleration [ null false "rocm" "cuda" ]; + validateFallback = lib.warnIf (config.rocmSupport && config.cudaSupport) + (lib.concatStrings [ + "both `nixpkgs.config.rocmSupport` and `nixpkgs.config.cudaSupport` are enabled, " + "but they are mutually exclusive; falling back to cpu" + ]) (!(config.rocmSupport && config.cudaSupport)); + validateLinux = api: + (lib.warnIfNot stdenv.isLinux + "building ollama with `${api}` is only supported on linux; falling back to cpu" + stdenv.isLinux); + shouldEnable = assert accelIsValid; + mode: fallback: + ((acceleration == mode) + || (fallback && acceleration == null && validateFallback)) + && (validateLinux mode); + + enableRocm = shouldEnable "rocm" config.rocmSupport; + enableCuda = shouldEnable "cuda" config.cudaSupport; + + rocmClang = linkFarm "rocm-clang" { llvm = rocmPackages.llvm.clang; }; + rocmPath = buildEnv { + name = "rocm-path"; + paths = [ + rocmPackages.clr + rocmPackages.hipblas + rocmPackages.rocblas + rocmPackages.rocsolver + rocmPackages.rocsparse + rocmPackages.rocm-device-libs + rocmClang + ]; + }; + + cudaToolkit = buildEnv { + name = "cuda-toolkit"; + ignoreCollisions = + true; # FIXME: find a cleaner way to do this without ignoring collisions + paths = [ + cudaPackages.cudatoolkit + cudaPackages.cuda_cudart + cudaPackages.cuda_cudart.static + ]; + }; + + runtimeLibs = lib.optionals enableRocm [ rocmPackages.rocm-smi ] + ++ lib.optionals enableCuda [ linuxPackages.nvidia_x11 ]; + + appleFrameworks = darwin.apple_sdk_11_0.frameworks; + metalFrameworks = [ + appleFrameworks.Accelerate + appleFrameworks.Metal + appleFrameworks.MetalKit + appleFrameworks.MetalPerformanceShaders + ]; + + goBuild = if enableCuda then + buildGo122Module.override { stdenv = overrideCC stdenv gcc12; } + else + buildGo122Module; + inherit (lib) licenses platforms maintainers; +in goBuild ((lib.optionalAttrs enableRocm { + ROCM_PATH = rocmPath; + CLBlast_DIR = "${clblast}/lib/cmake/CLBlast"; +}) // (lib.optionalAttrs enableCuda { + CUDA_LIB_DIR = "${cudaToolkit}/lib"; + CUDACXX = "${cudaToolkit}/bin/nvcc"; + CUDAToolkit_ROOT = cudaToolkit; +}) // { + inherit pname version src vendorHash; + + nativeBuildInputs = [ cmake ] + ++ lib.optionals enableRocm [ rocmPackages.llvm.bintools ] + ++ lib.optionals (enableRocm || enableCuda) [ makeWrapper ] + ++ lib.optionals stdenv.isDarwin metalFrameworks; + + buildInputs = lib.optionals enableRocm [ + rocmPackages.clr + rocmPackages.hipblas + rocmPackages.rocblas + rocmPackages.rocsolver + rocmPackages.rocsparse + libdrm + ] ++ lib.optionals enableCuda [ cudaPackages.cuda_cudart ] + ++ lib.optionals stdenv.isDarwin metalFrameworks; + + patches = [ + # disable uses of `git` in the `go generate` script + # ollama's build script assumes the source is a git repo, but nix removes the git directory + # this also disables necessary patches contained in `ollama/llm/patches/` + # those patches are added to `llamacppPatches`, and reapplied here in the patch phase + (fetchpatch { + url = + "https://raw.githubusercontent.com/NixOS/nixpkgs/master/pkgs/tools/misc/ollama/disable-git.patch"; + hash = "sha256-7DthNfvk4VACdJb5L+4n9JmFzQn9qqDP2N4Hwuuhtgg="; + }) + ] ++ llamacppPatches; + postPatch = '' + # replace inaccurate version number with actual release version + substituteInPlace version/version.go --replace-fail 0.0.0 '${version}' + ''; + preBuild = '' + # disable uses of `git`, since nix removes the git directory + export OLLAMA_SKIP_PATCHING=true + # build llama.cpp libraries for ollama + go generate ./... + ''; + postFixup = '' + # the app doesn't appear functional at the moment, so hide it + mv "$out/bin/app" "$out/bin/.ollama-app" + '' + lib.optionalString (enableRocm || enableCuda) '' + # expose runtime libraries necessary to use the gpu + mv "$out/bin/ollama" "$out/bin/.ollama-unwrapped" + makeWrapper "$out/bin/.ollama-unwrapped" "$out/bin/ollama" ${ + lib.optionalString enableRocm "--set-default HIP_PATH '${rocmPath}' " + } \ + --suffix LD_LIBRARY_PATH : '/run/opengl-driver/lib:${ + lib.makeLibraryPath runtimeLibs + }' + ''; + + ldflags = [ + "-s" + "-w" + "-X=github.com/jmorganca/ollama/version.Version=${version}" + "-X=github.com/jmorganca/ollama/server.mode=release" + ]; + + passthru.tests = { + service = nixosTests.ollama; + rocm = pkgs.ollama.override { acceleration = "rocm"; }; + cuda = pkgs.ollama.override { acceleration = "cuda"; }; + version = testers.testVersion { + inherit version; + package = ollama; + }; + }; + + meta = { + description = "Get up and running with large language models locally"; + homepage = "https://github.com/ollama/ollama"; + changelog = "https://github.com/ollama/ollama/releases/tag/v${version}"; + license = licenses.mit; + platforms = platforms.unix; + mainProgram = "ollama"; + maintainers = with maintainers; [ abysssol dit7ya elohmeier ]; + }; +}) diff --git a/Biz/Bild/Python.nix b/Biz/Bild/Python.nix index 5569352..c559e42 100644 --- a/Biz/Bild/Python.nix +++ b/Biz/Bild/Python.nix @@ -10,9 +10,6 @@ _self: super: { mypy = dontCheck pysuper.mypy; outlines = callPackage ./Deps/outlines.nix { }; perscache = callPackage ./Deps/perscache.nix { }; - safetensors = super.nixos-23_11.python310Packages.safetensors.override { - buildPythonPackage = pysuper.pkgs.python3Packages.buildPythonPackage; - }; tokenizers = dontCheck pysuper.tokenizers; }; }; -- cgit v1.2.3