From 7e0fb655e1e040221054886fbee9d5682aa6e4e2 Mon Sep 17 00:00:00 2001
From: Tim Dettmers <tim.dettmers@gmail.com>
Date: Tue, 23 Aug 2022 13:59:34 -0700
Subject: Some initial code. Needs to be tested.

---
 bitsandbytes/cuda_setup/main.py |  9 +++++++--
 bitsandbytes/functional.py      |  5 ++---
 bitsandbytes/optim/__init__.py  | 15 +++++++--------
 3 files changed, 16 insertions(+), 13 deletions(-)

(limited to 'bitsandbytes')

diff --git a/bitsandbytes/cuda_setup/main.py b/bitsandbytes/cuda_setup/main.py
index 975b772..d305c64 100644
--- a/bitsandbytes/cuda_setup/main.py
+++ b/bitsandbytes/cuda_setup/main.py
@@ -17,6 +17,7 @@ evaluation:
 """
 
 import ctypes
+import torch
 from pathlib import Path
 
 from ..utils import execute_and_return
@@ -28,7 +29,7 @@ def check_cuda_result(cuda, result_val):
     if result_val != 0:
         error_str = ctypes.c_char_p()
         cuda.cuGetErrorString(result_val, ctypes.byref(error_str))
-        raise Exception(f"CUDA exception! Error code: {error_str.value.decode()}")
+        print(f"CUDA exception! Error code: {error_str.value.decode()}")
 
 def get_cuda_version(cuda, cudart_path):
     # https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART____VERSION.html#group__CUDART____VERSION
@@ -57,7 +58,7 @@ def get_cuda_lib_handle():
         cuda = ctypes.CDLL("libcuda.so")
     except OSError:
         # TODO: shouldn't we error or at least warn here?
-        raise Exception('CUDA SETUP: ERROR! libcuda.so not found! Do you have a CUDA driver installed? If you are on a cluster, make sure you are on a CUDA machine!')
+        print('CUDA SETUP: WARNING! libcuda.so not found! Do you have a CUDA driver installed? If you are on a cluster, make sure you are on a CUDA machine!')
         return None
     check_cuda_result(cuda, cuda.cuInit(0))
 
@@ -119,6 +120,10 @@ def evaluate_cuda_setup():
     print('For effortless bug reporting copy-paste your error into this form: https://docs.google.com/forms/d/e/1FAIpQLScPB8emS3Thkp66nvqwmjTEgxp8Y9ufuWTzFyr9kJ5AoI47dQ/viewform?usp=sf_link')
     print('='*80)
     binary_name = "libbitsandbytes_cpu.so"
+    #if not torch.cuda.is_available():
+        #print('No GPU detected. Loading CPU library...')
+        #return binary_name
+
     cudart_path = determine_cuda_runtime_lib_path()
     if cudart_path is None:
         print(
diff --git a/bitsandbytes/functional.py b/bitsandbytes/functional.py
index 6637554..745e7e4 100644
--- a/bitsandbytes/functional.py
+++ b/bitsandbytes/functional.py
@@ -1686,11 +1686,10 @@ def double_quant(
 
 
 def get_special_format_str():
+    if not torch.cuda.is_available(): return 'col_turning'
     major, minor = torch.cuda.get_device_capability()
     if major < 7:
-        print(
-            f"Device with CUDA capability of {major} not supported for 8-bit matmul. Device has no tensor cores!"
-        )
+        print(f"Device with CUDA capability of {major} not supported for 8-bit matmul. Device has no tensor cores!")
         assert major >= 7
 
     if major == 7: return 'col_turing'
diff --git a/bitsandbytes/optim/__init__.py b/bitsandbytes/optim/__init__.py
index a76d717..d18f1d1 100644
--- a/bitsandbytes/optim/__init__.py
+++ b/bitsandbytes/optim/__init__.py
@@ -5,13 +5,12 @@
 
 from bitsandbytes.cextension import COMPILED_WITH_CUDA
 
-if COMPILED_WITH_CUDA:
-    from .adam import Adam, Adam8bit, Adam32bit
-    from .adamw import AdamW, AdamW8bit, AdamW32bit
-    from .sgd import SGD, SGD8bit, SGD32bit
-    from .lars import LARS, LARS8bit, LARS32bit, PytorchLARS
-    from .lamb import LAMB, LAMB8bit, LAMB32bit
-    from .rmsprop import RMSprop, RMSprop8bit, RMSprop32bit
-    from .adagrad import Adagrad, Adagrad8bit, Adagrad32bit
+from .adam import Adam, Adam8bit, Adam32bit
+from .adamw import AdamW, AdamW8bit, AdamW32bit
+from .sgd import SGD, SGD8bit, SGD32bit
+from .lars import LARS, LARS8bit, LARS32bit, PytorchLARS
+from .lamb import LAMB, LAMB8bit, LAMB32bit
+from .rmsprop import RMSprop, RMSprop8bit, RMSprop32bit
+from .adagrad import Adagrad, Adagrad8bit, Adagrad32bit
 
 from .optimizer import GlobalOptimManager
-- 
cgit v1.2.3


From ee5b947e63c2340405f25e4e83066f39292bc0ed Mon Sep 17 00:00:00 2001
From: Tim Dettmers <tim.dettmers@gmail.com>
Date: Tue, 23 Aug 2022 16:00:26 -0700
Subject: Fixed issue where Pascal was not displaying proper error.

---
 bitsandbytes/functional.py | 23 ++---------------------
 1 file changed, 2 insertions(+), 21 deletions(-)

(limited to 'bitsandbytes')

diff --git a/bitsandbytes/functional.py b/bitsandbytes/functional.py
index 745e7e4..75d083b 100644
--- a/bitsandbytes/functional.py
+++ b/bitsandbytes/functional.py
@@ -185,14 +185,9 @@ def create_dynamic_map(signed=True, n=7):
 
 
 def get_special_format_str():
+    if not torch.cuda.is_available(): return 'col_turing'
     major, minor = torch.cuda.get_device_capability()
-    if major < 7:
-        print(
-            f"Device with CUDA capability of {major} not supported for 8-bit matmul. Device has no tensor cores!"
-        )
-        assert major >= 7
-
-    if major == 7:
+    if major <= 7:
         return "col_turing"
     elif major == 8:
         return "col_ampere"
@@ -1685,20 +1680,6 @@ def double_quant(
     return out_row, out_col, row_stats, col_stats, coo_tensor
 
 
-def get_special_format_str():
-    if not torch.cuda.is_available(): return 'col_turning'
-    major, minor = torch.cuda.get_device_capability()
-    if major < 7:
-        print(f"Device with CUDA capability of {major} not supported for 8-bit matmul. Device has no tensor cores!")
-        assert major >= 7
-
-    if major == 7: return 'col_turing'
-    elif major == 8: return 'col_ampere'
-    else: return 'col_turing'
-
-
-
-
 def transform(A, to_order, from_order='row', out=None, transpose=False, state=None, ld=None):
     prev_device = pre_call(A.device)
     if state is None: state = (A.shape, from_order)
-- 
cgit v1.2.3