summaryrefslogtreecommitdiff
path: root/bitsandbytes/cuda_setup.py
blob: 8cc2c039090511f2ea3f197b2629b13074445b68 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
"""
build is dependent on
- compute capability
    - dependent on GPU family
- CUDA version
- Software:
    - CPU-only: only CPU quantization functions (no optimizer, no matrix multipl)
    - CuBLAS-LT: full-build 8-bit optimizer
    - no CuBLAS-LT: no 8-bit matrix multiplication (`nomatmul`)

alle Binaries packagen

evaluation:
    - if paths faulty, return meaningful error
    - else:
        - determine CUDA version
        - determine capabilities
        - based on that set the default path
"""

import ctypes
from os import environ as env
from pathlib import Path
from typing import Set, Union

from .utils import print_err, warn_of_missing_prerequisite


def check_cuda_result(cuda, result_val):
    if result_val != 0:
        cuda.cuGetErrorString(result_val, ctypes.byref(error_str))
        print(f"Count not initialize CUDA - failure!")
        raise Exception("CUDA exception!")
    return result_val


# taken from https://gist.github.com/f0k/63a664160d016a491b2cbea15913d549
def get_compute_capability():
    libnames = ("libcuda.so", "libcuda.dylib", "cuda.dll")
    for libname in libnames:
        try:
            cuda = ctypes.CDLL(libname)
        except OSError:
            continue
        else:
            break
    else:
        raise OSError("could not load any of: " + " ".join(libnames))

    nGpus = ctypes.c_int()
    cc_major = ctypes.c_int()
    cc_minor = ctypes.c_int()

    result = ctypes.c_int()
    device = ctypes.c_int()
    context = ctypes.c_void_p()
    error_str = ctypes.c_char_p()

    result = check_cuda_result(cuda, cuda.cuInit(0))

    result = check_cuda_result(cuda, cuda.cuDeviceGetCount(ctypes.byref(nGpus)))
    ccs = []
    for i in range(nGpus.value):
        result = check_cuda_result(cuda, cuda.cuDeviceGet(ctypes.byref(device), i))
        result = check_cuda_result(
            cuda,
            cuda.cuDeviceComputeCapability(
                ctypes.byref(cc_major), ctypes.byref(cc_minor), device
            ),
        )
        ccs.append(f"{cc_major.value}.{cc_minor.value}")

    # TODO: handle different compute capabilities; for now, take the max
    ccs.sort()
    # return ccs[-1]
    return ccs


CUDA_RUNTIME_LIB: str = "libcudart.so"


def tokenize_paths(paths: str) -> Set[Path]:
    return {Path(ld_path) for ld_path in paths.split(":") if ld_path}


def get_cuda_runtime_lib_path(
    # TODO: replace this with logic for all paths in env vars
    LD_LIBRARY_PATH: Union[str, None] = env.get("LD_LIBRARY_PATH")
) -> Union[Path, None]:
    """# TODO: add doc-string"""

    if not LD_LIBRARY_PATH:
        warn_of_missing_prerequisite(
            "LD_LIBRARY_PATH is completely missing from environment!"
        )
        return None

    ld_library_paths: Set[Path] = tokenize_paths(LD_LIBRARY_PATH)

    non_existent_directories: Set[Path] = {
        path for path in ld_library_paths if not path.exists()
    }

    if non_existent_directories:
        print_err(
            "WARNING: The following directories listed your path were found to "
            f"be non-existent: {non_existent_directories}"
        )

    cuda_runtime_libs: Set[Path] = {
        path / CUDA_RUNTIME_LIB
        for path in ld_library_paths
        if (path / CUDA_RUNTIME_LIB).is_file()
    } - non_existent_directories

    if len(cuda_runtime_libs) > 1:
        err_msg = f"Found duplicate {CUDA_RUNTIME_LIB} files: {cuda_runtime_libs}.."
        raise FileNotFoundError(err_msg)

    elif len(cuda_runtime_libs) < 1:
        err_msg = f"Did not find {CUDA_RUNTIME_LIB} files: {cuda_runtime_libs}.."
        raise FileNotFoundError(err_msg)

    single_cuda_runtime_lib_dir = next(iter(cuda_runtime_libs))
    return single_cuda_runtime_lib_dir


def evaluate_cuda_setup():
    cuda_path = get_cuda_runtime_lib_path()
    cc = get_compute_capability()
    binary_name = "libbitsandbytes_cpu.so"

    if not (has_gpu := bool(cc)):
        print(
            "WARNING: No GPU detected! Check our CUDA paths. Processing to load CPU-only library..."
        )
        return binary_name

    has_cublaslt = cc in ["7.5", "8.0", "8.6"]

    # TODO:
    # (1) Model missing cases (no CUDA installed by CUDA driver (nvidia-smi accessible)
    # (2) Multiple CUDA versions installed

    cuda_home = str(Path(cuda_path).parent.parent)
    ls_output, err = execute_and_return(f"{cuda_home}/bin/nvcc --version")
    cuda_version = (
        ls_output.split("\n")[3].split(",")[-1].strip().lower().replace("v", "")
    )
    major, minor, revision = cuda_version.split(".")
    cuda_version_string = f"{major}{minor}"

    binary_name = f'libbitsandbytes_cuda{cuda_version_string}_{("cublaslt" if has_cublaslt else "")}.so'

    return binary_name