From 59a615b3869eb8488a748e2aa51224a5e3d366bb Mon Sep 17 00:00:00 2001 From: Titus von Koeller Date: Tue, 2 Aug 2022 21:26:50 -0700 Subject: factored cuda_setup.main out into smaller modules and functions --- bitsandbytes/cuda_setup/paths.py | 126 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 bitsandbytes/cuda_setup/paths.py (limited to 'bitsandbytes/cuda_setup/paths.py') diff --git a/bitsandbytes/cuda_setup/paths.py b/bitsandbytes/cuda_setup/paths.py new file mode 100644 index 0000000..c4a7465 --- /dev/null +++ b/bitsandbytes/cuda_setup/paths.py @@ -0,0 +1,126 @@ +from pathlib import Path +from typing import Set, Union +from warnings import warn + +from ..utils import print_stderr +from .env_vars import get_potentially_lib_path_containing_env_vars + + +CUDA_RUNTIME_LIB: str = "libcudart.so" + + +def purge_unwanted_semicolon(tentative_path: Path) -> Path: + """ + Special function to handle the following exception: + __LMOD_REF_COUNT_PATH=/sw/cuda/11.6.2/bin:2;/mmfs1/home/dettmers/git/sched/bin:1;/mmfs1/home/dettmers/data/anaconda3/bin:1;/mmfs1/home/dettmers/data/anaconda3/condabin:1;/mmfs1/home/dettmers/.local/bin:1;/mmfs1/home/dettmers/bin:1;/usr/local/bin:1;/usr/bin:1;/usr/local/sbin:1;/usr/sbin:1;/mmfs1/home/dettmers/.fzf/bin:1;/mmfs1/home/dettmers/data/local/cuda-11.4/bin:1 + """ + # if ';' in str(tentative_path): + # path_as_str, _ = str(tentative_path).split(';') + pass + + +def extract_candidate_paths(paths_list_candidate: str) -> Set[Path]: + return {Path(ld_path) for ld_path in paths_list_candidate.split(":") if ld_path} + + +def remove_non_existent_dirs(candidate_paths: Set[Path]) -> Set[Path]: + non_existent_directories: Set[Path] = { + path for path in candidate_paths if not path.exists() + } + + if non_existent_directories: + print_stderr( + "WARNING: The following directories listed in your path were found to " + f"be non-existent: {non_existent_directories}" + ) + + return candidate_paths - non_existent_directories + + +def get_cuda_runtime_lib_paths(candidate_paths: Set[Path]) -> Set[Path]: + return { + path / CUDA_RUNTIME_LIB + for path in candidate_paths + if (path / CUDA_RUNTIME_LIB).is_file() + } + + +def resolve_paths_list(paths_list_candidate: str) -> Set[Path]: + """ + Searches a given environmental var for the CUDA runtime library, + i.e. `libcudart.so`. + """ + return remove_non_existent_dirs(extract_candidate_paths(paths_list_candidate)) + + +def find_cuda_lib_in(paths_list_candidate: str) -> Set[Path]: + return get_cuda_runtime_lib_paths( + resolve_paths_list(paths_list_candidate) + ) + + +def warn_in_case_of_duplicates(results_paths: Set[Path]) -> None: + if len(results_paths) > 1: + warning_msg = ( + f"Found duplicate {CUDA_RUNTIME_LIB} files: {results_paths}.. " + "We'll flip a coin and try one of these, in order to fail forward.\n" + "Either way, this might cause trouble in the future:\n" + "If you get `CUDA error: invalid device function` errors, the above " + "might be the cause and the solution is to make sure only one " + f"{CUDA_RUNTIME_LIB} in the paths that we search based on your env." + ) + warn(warning_msg) + + +def determine_cuda_runtime_lib_path() -> Union[Path, None]: + """ + Searches for a cuda installations, in the following order of priority: + 1. active conda env + 2. LD_LIBRARY_PATH + 3. any other env vars, while ignoring those that + - are known to be unrelated (see `bnb.cuda_setup.env_vars.to_be_ignored`) + - don't contain the path separator `/` + + If multiple libraries are found in part 3, we optimistically try one, + while giving a warning message. + """ + candidate_env_vars = get_potentially_lib_path_containing_env_vars() + + if "CONDA_PREFIX" in candidate_env_vars: + conda_libs_path = Path(candidate_env_vars["CONDA_PREFIX"]) / "lib" + + conda_cuda_libs = find_cuda_lib_in(str(conda_libs_path)) + warn_in_case_of_duplicates(conda_cuda_libs) + + if conda_cuda_libs: + return next(iter(conda_cuda_libs)) + + warn( + f'{candidate_env_vars["CONDA_PREFIX"]} did not contain ' + f'{CUDA_RUNTIME_LIB} as expected! Searching further paths...' + ) + + if "LD_LIBRARY_PATH" in candidate_env_vars: + lib_ld_cuda_libs = find_cuda_lib_in(candidate_env_vars["LD_LIBRARY_PATH"]) + + if lib_ld_cuda_libs: + return next(iter(lib_ld_cuda_libs)) + warn_in_case_of_duplicates(lib_ld_cuda_libs) + + warn( + f'{candidate_env_vars["LD_LIBRARY_PATH"]} did not contain ' + f'{CUDA_RUNTIME_LIB} as expected! Searching further paths...' + ) + + remaining_candidate_env_vars = { + env_var: value for env_var, value in candidate_env_vars.items() + if env_var not in {"CONDA_PREFIX", "LD_LIBRARY_PATH"} + } + + cuda_runtime_libs = set() + for env_var, value in remaining_candidate_env_vars: + cuda_runtime_libs.update(find_cuda_lib_in(value)) + + warn_in_case_of_duplicates(cuda_runtime_libs) + + return next(iter(cuda_runtime_libs)) if cuda_runtime_libs else set() -- cgit v1.2.3