From 44b1f8d5bc2ffcb00f56b0096476501e7e0cd458 Mon Sep 17 00:00:00 2001 From: Ben Sima Date: Mon, 7 Aug 2023 17:57:42 -0400 Subject: Get ava.py running with nixified builds I had to redo some of my python overrides and crib the bitsandbytes.nix from upstream. Ava is failing because: ValueError: Tokenizer class LlamaTokenizer does not exist or is not currently imported. I think this means I need to update my nixpkgs pin, so I'm gonna snapshot my work in git, do the update, and that might obviate the local bitsandbytes.nix anyway. --- Biz/Bild/Deps.nix | 13 ++-- Biz/Bild/Deps/bitsandbytes.nix | 144 +++++++++++++++++++++-------------------- Biz/Bild/Deps/lion-pytorch.nix | 32 +++++++++ ava.py | 5 +- 4 files changed, 117 insertions(+), 77 deletions(-) create mode 100644 Biz/Bild/Deps/lion-pytorch.nix diff --git a/Biz/Bild/Deps.nix b/Biz/Bild/Deps.nix index 34be480..20a5e34 100644 --- a/Biz/Bild/Deps.nix +++ b/Biz/Bild/Deps.nix @@ -31,13 +31,16 @@ in rec }; }; - python3Packages = let - callPackage = super.python3Packages.callPackage; - in super.python3Packages // rec { - accelerate = callPackage ./Deps/accelerate.nix {}; - bitsandbytes = callPackage ./Deps/bitsandbytes.nix {}; + python3 = super.python3.override { + packageOverrides = _: pysuper: with pysuper.pkgs.python3Packages; { + accelerate = callPackage ./Deps/accelerate.nix {}; + bitsandbytes = callPackage ./Deps/bitsandbytes.nix {}; + lion-pytorch = callPackage ./Deps/lion-pytorch.nix {}; + }; }; + python3Packages = python3.pkgs; + autogen = super.callPackage ./Deps/autogen.nix {}; gnutls = super.gnutls.override { guileBindings = true; }; diff --git a/Biz/Bild/Deps/bitsandbytes.nix b/Biz/Bild/Deps/bitsandbytes.nix index 8d4fa53..b87c0c4 100644 --- a/Biz/Bild/Deps/bitsandbytes.nix +++ b/Biz/Bild/Deps/bitsandbytes.nix @@ -1,94 +1,98 @@ { lib -, fetchgit , buildPythonPackage -, pytorch +, fetchFromGitHub +, python +, pythonOlder +, pytestCheckHook , setuptools -, typer -, linuxPackages -, pytest - -# CUDA -, cudaSupport ? true -, cudaPackages ? {} -, addOpenGLRunpath ? null -, gcc +, torch +, einops +, lion-pytorch +, scipy +, symlinkJoin }: let - inherit (linuxPackages) nvidia_x11; - inherit (cudaPackages) cudatoolkit; - cudaVersion = - lib.strings.removeSuffix "0" - (lib.strings.concatStrings - (lib.strings.splitString "." cudatoolkit.version)); - libraryPath = lib.strings.makeLibraryPath [ - cudatoolkit - cudatoolkit.lib - nvidia_x11 - ]; -in buildPythonPackage rec { pname = "bitsandbytes"; - version = "unstable-2022-12-21"; + version = "0.38.0"; + + inherit (torch) cudaPackages cudaSupport; + inherit (cudaPackages) cudaVersion; + + # NOTE: torchvision doesn't use cudnn; torch does! + # For this reason it is not included. + cuda-common-redist = with cudaPackages; [ + cuda_cccl # + libcublas # cublas_v2.h + libcurand + libcusolver # cusolverDn.h + libcusparse # cusparse.h + ]; - src = fetchgit { - url = "https://simatime.com/git/ben/bitsandbytes.git"; - rev = "31ef751bea48eeee2e0e95aca79df8e59b4c25c4"; - sha256 = "sha256-/a2NFVuMSvSDELeXyfDdU9FZEJcDxCLa4VbMWBUCRI0="; + cuda-native-redist = symlinkJoin { + name = "cuda-native-redist-${cudaVersion}"; + paths = with cudaPackages; [ + cuda_cudart # cuda_runtime.h cuda_runtime_api.h + cuda_nvcc + ] ++ cuda-common-redist; }; - # any exe must be wrapped with this, it doesn't effect the build but it does - # show up on the logs so keep it here for documentation purposes - LD_LIBRARY_PATH = libraryPath; + cuda-redist = symlinkJoin { + name = "cuda-redist-${cudaVersion}"; + paths = cuda-common-redist; + }; + +in +buildPythonPackage { + inherit pname version; + format = "pyproject"; - # this allows the build system to find nvcc - CUDA_HOME = "${cudatoolkit}"; + disabled = pythonOlder "3.7"; + + src = fetchFromGitHub { + owner = "TimDettmers"; + repo = pname; + rev = "refs/tags/${version}"; + hash = "sha256-gGlbzTDvZNo4MhcYzLvWuB2ec7q+Qt5/LtTbJ0Rc+Kk="; + }; - preBuild = '' - make cuda11x CUDA_VERSION=${cudaVersion} GPP=${gcc}/bin/g++ -j3 - ## this python doesn't know where to get libcuda, so explicitly tell it - ## this is probably really bad practice, fix this - substituteInPlace bitsandbytes/cuda_setup/main.py \ - --replace "libcuda.so" "${nvidia_x11}/lib/libcuda.so" + postPatch = '' + substituteInPlace Makefile --replace "/usr/bin/g++" "g++" --replace "lib64" "lib" + substituteInPlace bitsandbytes/cuda_setup/main.py \ + --replace "binary_path = package_dir / binary_name" \ + "binary_path = Path('$out/${python.sitePackages}/${pname}')/binary_name" + '' + lib.optionalString torch.cudaSupport '' + substituteInPlace bitsandbytes/cuda_setup/main.py \ + --replace "/usr/local/cuda/lib64" "${cuda-native-redist}/lib" ''; - propagatedBuildInputs = [ - (pytorch.override({ inherit cudaSupport;})) - setuptools - ] ++ lib.optionals cudaSupport [ - typer - cudatoolkit - cudatoolkit.lib - nvidia_x11 - ]; + CUDA_HOME = "${cuda-native-redist}"; - nativeBuildInputs = lib.optionals cudaSupport [ - gcc - addOpenGLRunpath - ]; + preBuild = if torch.cudaSupport then + with torch.cudaPackages; + let cudaVersion = lib.concatStrings (lib.splitVersion torch.cudaPackages.cudaMajorMinorVersion); in + ''make CUDA_VERSION=${cudaVersion} cuda${cudaMajorVersion}x'' + else + ''make CUDA_VERSION=CPU cpuonly''; - preFixup = lib.optionalString cudaSupport '' - find $out -type f \( -name '*.so' -or -name '*.so.*' \) | while read lib - do - addOpenGLRunpath "$lib" - patchelf \ - --set-rpath "${libraryPath}" \ - "$lib" - done - ''; + nativeBuildInputs = [ setuptools ] ++ lib.optionals torch.cudaSupport [ cuda-native-redist ]; + buildInputs = lib.optionals torch.cudaSupport [ cuda-redist ]; - checkInputs = [ - pytest + propagatedBuildInputs = [ + torch ]; - # disabled because the test suite cannot find any GPUs in the nix sandbox - doCheck = false; + doCheck = false; # tests require CUDA and also GPU access + nativeCheckInputs = [ pytestCheckHook einops lion-pytorch scipy ]; pythonImportsCheck = [ - "torch" - # this tries to load and verify the cuda device on import, since this runs - # in the nix sandbox it will fail with a bunch of errors, but curiously the - # build still succeeds... "bitsandbytes" ]; -} + meta = with lib; { + homepage = "https://github.com/TimDettmers/bitsandbytes"; + description = "8-bit CUDA functions for PyTorch"; + license = licenses.mit; + maintainers = with maintainers; [ bcdarwin ]; + }; +} diff --git a/Biz/Bild/Deps/lion-pytorch.nix b/Biz/Bild/Deps/lion-pytorch.nix new file mode 100644 index 0000000..e23011a --- /dev/null +++ b/Biz/Bild/Deps/lion-pytorch.nix @@ -0,0 +1,32 @@ +{ lib +, buildPythonPackage +, pythonOlder +, fetchFromGitHub +, torch +}: + +buildPythonPackage rec { + pname = "lion-pytorch"; + version = "0.1.2"; + format = "setuptools"; + disabled = pythonOlder "3.6"; + + src = fetchFromGitHub { + owner = "lucidrains"; + repo = "lion-pytorch"; + rev = "refs/tags/${version}"; + hash = "sha256-9hdpRJvCpv3PeC7f0IXpHt6i+e6LiT0QUl5jeDGelQE="; + }; + + propagatedBuildInputs = [ torch ]; + + pythonImportsCheck = [ "lion_pytorch" ]; + doCheck = false; # no tests currently + + meta = with lib; { + description = "Optimizer tuned by Google Brain using genetic algorithms"; + homepage = "https://github.com/lucidrains/lion-pytorch"; + license = licenses.mit; + maintainers = with maintainers; [ bcdarwin ]; + }; +} diff --git a/ava.py b/ava.py index c364f6d..c847edd 100755 --- a/ava.py +++ b/ava.py @@ -2,6 +2,8 @@ # : out ava # : dep transformers # : dep torch +# : dep accelerate +# : dep bitsandbytes import transformers import torch import sys @@ -9,8 +11,7 @@ import sys # import sleekxmpp -# model_name = "EleutherAI/gpt-neox-20b" -model_name = "EleutherAI/gpt-j-6B" +model_name = "lmsys/vicuna-33b-v1.3" if torch.cuda.is_available(): device = "cuda:0" -- cgit v1.2.3