from pathlib import Path from typing import Set, Union from warnings import warn from ..utils import print_stderr from .env_vars import get_potentially_lib_path_containing_env_vars CUDA_RUNTIME_LIB: str = "libcudart.so" def purge_unwanted_semicolon(tentative_path: Path) -> Path: """ Special function to handle the following exception: __LMOD_REF_COUNT_PATH=/sw/cuda/11.6.2/bin:2;/mmfs1/home/dettmers/git/sched/bin:1;/mmfs1/home/dettmers/data/anaconda3/bin:1;/mmfs1/home/dettmers/data/anaconda3/condabin:1;/mmfs1/home/dettmers/.local/bin:1;/mmfs1/home/dettmers/bin:1;/usr/local/bin:1;/usr/bin:1;/usr/local/sbin:1;/usr/sbin:1;/mmfs1/home/dettmers/.fzf/bin:1;/mmfs1/home/dettmers/data/local/cuda-11.4/bin:1 """ # if ';' in str(tentative_path): # path_as_str, _ = str(tentative_path).split(';') pass def extract_candidate_paths(paths_list_candidate: str) -> Set[Path]: return {Path(ld_path) for ld_path in paths_list_candidate.split(":") if ld_path} def remove_non_existent_dirs(candidate_paths: Set[Path]) -> Set[Path]: non_existent_directories: Set[Path] = { path for path in candidate_paths if not path.exists() } if non_existent_directories: print_stderr( "WARNING: The following directories listed in your path were found to " f"be non-existent: {non_existent_directories}" ) return candidate_paths - non_existent_directories def get_cuda_runtime_lib_paths(candidate_paths: Set[Path]) -> Set[Path]: return { path / CUDA_RUNTIME_LIB for path in candidate_paths if (path / CUDA_RUNTIME_LIB).is_file() } def resolve_paths_list(paths_list_candidate: str) -> Set[Path]: """ Searches a given environmental var for the CUDA runtime library, i.e. `libcudart.so`. """ return remove_non_existent_dirs(extract_candidate_paths(paths_list_candidate)) def find_cuda_lib_in(paths_list_candidate: str) -> Set[Path]: return get_cuda_runtime_lib_paths( resolve_paths_list(paths_list_candidate) ) def warn_in_case_of_duplicates(results_paths: Set[Path]) -> None: if len(results_paths) > 1: warning_msg = ( f"Found duplicate {CUDA_RUNTIME_LIB} files: {results_paths}.. " "We'll flip a coin and try one of these, in order to fail forward.\n" "Either way, this might cause trouble in the future:\n" "If you get `CUDA error: invalid device function` errors, the above " "might be the cause and the solution is to make sure only one " f"{CUDA_RUNTIME_LIB} in the paths that we search based on your env." ) warn(warning_msg) def determine_cuda_runtime_lib_path() -> Union[Path, None]: """ Searches for a cuda installations, in the following order of priority: 1. active conda env 2. LD_LIBRARY_PATH 3. any other env vars, while ignoring those that - are known to be unrelated (see `bnb.cuda_setup.env_vars.to_be_ignored`) - don't contain the path separator `/` If multiple libraries are found in part 3, we optimistically try one, while giving a warning message. """ candidate_env_vars = get_potentially_lib_path_containing_env_vars() if "CONDA_PREFIX" in candidate_env_vars: conda_libs_path = Path(candidate_env_vars["CONDA_PREFIX"]) / "lib" conda_cuda_libs = find_cuda_lib_in(str(conda_libs_path)) warn_in_case_of_duplicates(conda_cuda_libs) if conda_cuda_libs: return next(iter(conda_cuda_libs)) warn( f'{candidate_env_vars["CONDA_PREFIX"]} did not contain ' f'{CUDA_RUNTIME_LIB} as expected! Searching further paths...' ) if "LD_LIBRARY_PATH" in candidate_env_vars: lib_ld_cuda_libs = find_cuda_lib_in(candidate_env_vars["LD_LIBRARY_PATH"]) if lib_ld_cuda_libs: return next(iter(lib_ld_cuda_libs)) warn_in_case_of_duplicates(lib_ld_cuda_libs) warn( f'{candidate_env_vars["LD_LIBRARY_PATH"]} did not contain ' f'{CUDA_RUNTIME_LIB} as expected! Searching further paths...' ) remaining_candidate_env_vars = { env_var: value for env_var, value in candidate_env_vars.items() if env_var not in {"CONDA_PREFIX", "LD_LIBRARY_PATH"} } cuda_runtime_libs = set() for env_var, value in remaining_candidate_env_vars.items(): cuda_runtime_libs.update(find_cuda_lib_in(value)) if len(cuda_runtime_libs) == 0: print('CUDA_SETUP: WARNING! libcudart.so not found in any environmental path. Searching /usr/local/cuda/lib64...') cuda_runtime_libs.update(find_cuda_lib_in('/usr/local/cuda/lib64')) warn_in_case_of_duplicates(cuda_runtime_libs) return next(iter(cuda_runtime_libs)) if cuda_runtime_libs else None