{ stdenv, sources, python3, cmake, pkgconfig, openmpi, cudaPackages }: let llama-python = python3.withPackages (ps: with ps; [ numpy sentencepiece ]); in stdenv.mkDerivation { name = "llama.cpp"; version = sources.llama-cpp.rev; src = sources.llama-cpp; postPatch = '' substituteInPlace ./ggml-metal.m \ --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" substituteInPlace ./*.py --replace '/usr/bin/env python' '${llama-python}/bin/python' ''; nativeBuildInputs = [ cmake pkgconfig ]; buildInputs = [ openmpi cudaPackages.cudatoolkit ]; cmakeFlags = [ "-DLLAMA_BUILD_SERVER=ON" "-DLLAMA_MPI=ON" "-DBUILD_SHARED_LIBS=ON" "-DCMAKE_SKIP_BUILD_RPATH=ON" "-DLLAMA_CUBLAS=ON" ]; postInstall = '' mv $out/bin/main $out/bin/llama mv $out/bin/server $out/bin/llama-server ''; meta.mainProgram = "llama"; }