From a371be302ddbdf3f36acef1a6fe365672099c9d9 Mon Sep 17 00:00:00 2001 From: Tim Dettmers Date: Tue, 25 Oct 2022 08:01:19 -0700 Subject: Added CUDA SETUP instruction generator. --- CHANGELOG.md | 3 ++ bitsandbytes/cextension.py | 71 +++++++++++++++++++++++++++++++++++--- bitsandbytes/cuda_setup/main.py | 2 +- tests/test_cuda_setup_evaluator.py | 10 ++---- 4 files changed, 74 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a71a0ca..6f68554 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -133,5 +133,8 @@ Bug fixes: ### 0.35.1 +Features: + - Added CUDA instruction generator to fix some installations. + Bug fixes: - Fixed a problem where warning messages would be displayed even though everything worked correctly. diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py index abb3054..4f791e2 100644 --- a/bitsandbytes/cextension.py +++ b/bitsandbytes/cextension.py @@ -1,32 +1,88 @@ import ctypes as ct +import torch + from pathlib import Path from warnings import warn - class CUDASetup(object): _instance = None def __init__(self): raise RuntimeError("Call get_instance() instead") + def generate_instructions(self): + if self.cuda is None: + self.add_log_entry('CUDA SETUP: Problem: The main issue seems to be that the main CUDA library was not detected.') + self.add_log_entry('CUDA SETUP: Solution 1): Your paths are probably not up-to-date. You can update them via: sudo ldconfig.') + self.add_log_entry('CUDA SETUP: Solution 2): If you do not have sudo rights, you can do the following:') + self.add_log_entry('CUDA SETUP: Solution 2a): Find the cuda library via: find / -name libcuda.so 2>/dev/null') + self.add_log_entry('CUDA SETUP: Solution 2b): Once the library is found add it to the LD_LIBRARY_PATH: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:FOUND_PATH_FROM_2a') + self.add_log_entry('CUDA SETUP: Solution 2c): For a permanent solution add the export from 2b into your .bashrc file, located at ~/.bashrc') + return + + if self.cudart_path is None: + self.add_log_entry('CUDA SETUP: Problem: The main issue seems to be that the main CUDA runtime library was not detected.') + self.add_log_entry('CUDA SETUP: Solution 1: To solve the issue the libcudart.so location needs to be added to the LD_LIBRARY_PATH variable') + self.add_log_entry('CUDA SETUP: Solution 1a): Find the cuda runtime library via: find / -name libcudart.so 2>/dev/null') + self.add_log_entry('CUDA SETUP: Solution 1b): Once the library is found add it to the LD_LIBRARY_PATH: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:FOUND_PATH_FROM_1a') + self.add_log_entry('CUDA SETUP: Solution 1c): For a permanent solution add the export from 1b into your .bashrc file, located at ~/.bashrc') + self.add_log_entry('CUDA SETUP: Solution 2: If no library was found in step 1a) you need to install CUDA.') + self.add_log_entry('CUDA SETUP: Solution 2a): Download CUDA install script: wget https://github.com/TimDettmers/bitsandbytes/blob/main/cuda_install.sh') + self.add_log_entry('CUDA SETUP: Solution 2b): Install desired CUDA version to desired location. The syntax is bash cuda_install.sh CUDA_VERSION PATH_TO_INSTALL_INTO.') + self.add_log_entry('CUDA SETUP: Solution 2b): For example, "bash cuda_install.sh 113 ~/local/" will download CUDA 11.3 and install into the folder ~/local') + return + + make_cmd = f'CUDA_VERSION={self.cuda_version_string}' + if len(self.cuda_version_string) < 3: + make_cmd += ' make cuda92' + elif self.cuda_version_string == '110': + make_cmd += ' make cuda110' + elif self.cuda_version_string[:2] == '11' and int(self.cuda_version_string[2]) > 0: + make_cmd += ' make cuda11x' + + has_cublaslt = self.cc in ["7.5", "8.0", "8.6"] + if not has_cublaslt: + make_cmd += '_nomatmul' + + self.add_log_entry('CUDA SETUP: Something unexpected happened. Please compile from source:') + self.add_log_entry('git clone git@github.com:TimDettmers/bitsandbytes.git') + self.add_log_entry('cd bitsandbytes') + self.add_log_entry(make_cmd) + self.add_log_entry('python setup.py install') + def initialize(self): self.cuda_setup_log = [] + self.lib = None from .cuda_setup.main import evaluate_cuda_setup - binary_name = evaluate_cuda_setup() + binary_name, cudart_path, cuda, cc, cuda_version_string = evaluate_cuda_setup() + self.cudart_path = cudart_path + self.cuda = cuda + self.cc = cc + self.cuda_version_string = cuda_version_string + package_dir = Path(__file__).parent binary_path = package_dir / binary_name try: if not binary_path.exists(): - self.add_log_entry(f"CUDA SETUP: TODO: compile library for specific version: {binary_name}") + self.add_log_entry(f"CUDA SETUP: Required library version not found: {binary_name}. Maybe you need to compile it from source?") legacy_binary_name = "libbitsandbytes.so" self.add_log_entry(f"CUDA SETUP: Defaulting to {legacy_binary_name}...") binary_path = package_dir / legacy_binary_name if not binary_path.exists(): - self.add_log_entry('CUDA SETUP: CUDA detection failed. Either CUDA driver not installed, CUDA not installed, or you have multiple conflicting CUDA libraries!') + self.add_log_entry('') + self.add_log_entry('='*48 + 'ERROR' + '='*37) + self.add_log_entry('CUDA SETUP: CUDA detection failed! Possible reasons:') + self.add_log_entry('1. CUDA driver not installed') + self.add_log_entry('2. CUDA not installed') + self.add_log_entry('3. You have multiple conflicting CUDA libraries') + self.add_log_entry('4. Required library not pre-compiled for this bitsandbytes release!') self.add_log_entry('CUDA SETUP: If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION` for example, `make CUDA_VERSION=113`.') + self.add_log_entry('='*80) + self.add_log_entry('') + self.generate_instructions() self.print_log_stack() raise Exception('CUDA SETUP: Setup Failed!') self.lib = ct.cdll.LoadLibrary(binary_path) @@ -56,6 +112,13 @@ class CUDASetup(object): lib = CUDASetup.get_instance().lib try: + if lib is None and torch.cuda.is_available(): + CUDASetup.get_instance().generate_instructions() + CUDASetup.get_instance().print_log_stack() + raise RuntimeError(''' + CUDA Setup failed despite GPU being available. Inspect the CUDA SETUP outputs to fix your environment! + If you cannot find any issues and suspect a bug, please open an issue with detals about your environment: + https://github.com/TimDettmers/bitsandbytes/issues''') lib.cadam32bit_g32 lib.get_context.restype = ct.c_void_p lib.get_cusparse.restype = ct.c_void_p diff --git a/bitsandbytes/cuda_setup/main.py b/bitsandbytes/cuda_setup/main.py index f8f35f0..f5abda2 100644 --- a/bitsandbytes/cuda_setup/main.py +++ b/bitsandbytes/cuda_setup/main.py @@ -159,4 +159,4 @@ def evaluate_cuda_setup(): binary_name = get_binary_name() - return binary_name + return binary_name, cudart_path, cuda, cc, cuda_version_string diff --git a/tests/test_cuda_setup_evaluator.py b/tests/test_cuda_setup_evaluator.py index 6fbd29f..bf9a003 100644 --- a/tests/test_cuda_setup_evaluator.py +++ b/tests/test_cuda_setup_evaluator.py @@ -93,12 +93,8 @@ def test_full_system(): # but it does not contain the library directly, so we need to look at the a sub-folder version = "" if "CONDA_PREFIX" in os.environ: - ls_output, err = bnb.utils.execute_and_return( - f'ls -l {os.environ["CONDA_PREFIX"]}/lib/libcudart.so' - ) - major, minor, revision = ( - ls_output.split(" ")[-1].replace("libcudart.so.", "").split(".") - ) + ls_output, err = bnb.utils.execute_and_return(f'ls -l {os.environ["CONDA_PREFIX"]}/lib/libcudart.so') + major, minor, revision = (ls_output.split(" ")[-1].replace("libcudart.so.", "").split(".")) version = float(f"{major}.{minor}") if version == "" and "LD_LIBRARY_PATH" in os.environ: @@ -114,6 +110,6 @@ def test_full_system(): assert version > 0 - binary_name = evaluate_cuda_setup() + binary_name, cudart_path, cuda, cc, cuda_version_string = evaluate_cuda_setup() binary_name = binary_name.replace("libbitsandbytes_cuda", "") assert binary_name.startswith(str(version).replace(".", "")) -- cgit v1.2.3