From 62441815bc733c9e75d32dd65305a16aaebd317a Mon Sep 17 00:00:00 2001
From: Tim Dettmers <tim.dettmers@gmail.com>
Date: Mon, 8 Aug 2022 05:20:36 -0700
Subject: Removed prod for Python <= 3.7 compatibility.

---
 bitsandbytes/autograd/_functions.py | 14 ++++++++------
 setup.py                            |  2 +-
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/bitsandbytes/autograd/_functions.py b/bitsandbytes/autograd/_functions.py
index 14f2660..a5446b7 100644
--- a/bitsandbytes/autograd/_functions.py
+++ b/bitsandbytes/autograd/_functions.py
@@ -1,10 +1,14 @@
-from dataclasses import dataclass
-
+import operator
 import torch
-import math
 import bitsandbytes as bnb
 import bitsandbytes.functional as F
 
+from dataclasses import dataclass
+from functools import reduce  # Required in Python 3
+
+def prod(iterable):
+    return reduce(operator.mul, iterable, 1)
+
 tensor = torch.Tensor
 
 """
@@ -12,8 +16,6 @@ tensor = torch.Tensor
     This is particularly important for small models where outlier features 
     are less systematic and occur with low frequency.
 """
-
-
 class GlobalOutlierPooler(object):
     _instance = None
 
@@ -201,7 +203,7 @@ class MatMul8bitLt(torch.autograd.Function):
     def forward(ctx, A, B, out=None, state=MatmulLtState()):
         # default to pytorch behavior if inputs are empty
         ctx.is_empty = False
-        if math.prod(A.shape) == 0:
+        if prod(A.shape) == 0:
             ctx.is_empty = True
             ctx.A = A
             ctx.B = B
diff --git a/setup.py b/setup.py
index 67b655d..c425ca7 100644
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,7 @@ def read(fname):
 
 setup(
     name=f"bitsandbytes",
-    version=f"0.31.4",
+    version=f"0.31.5",
     author="Tim Dettmers",
     author_email="dettmers@cs.washington.edu",
     description="8-bit optimizers and matrix multiplication routines.",
-- 
cgit v1.2.3


From f9cbe2fe99c805dcca934c66677951f428d3b3e2 Mon Sep 17 00:00:00 2001
From: Tim Dettmers <tim.dettmers@gmail.com>
Date: Mon, 8 Aug 2022 09:13:22 -0700
Subject: Fixed prod Python < 3.7 compatibility in function.py.

---
 bitsandbytes/autograd/_functions.py |  1 +
 bitsandbytes/functional.py          | 12 +++++++++---
 setup.py                            |  2 +-
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/bitsandbytes/autograd/_functions.py b/bitsandbytes/autograd/_functions.py
index a5446b7..01e7073 100644
--- a/bitsandbytes/autograd/_functions.py
+++ b/bitsandbytes/autograd/_functions.py
@@ -6,6 +6,7 @@ import bitsandbytes.functional as F
 from dataclasses import dataclass
 from functools import reduce  # Required in Python 3
 
+# math.prod not compatible with python < 3.8
 def prod(iterable):
     return reduce(operator.mul, iterable, 1)
 
diff --git a/bitsandbytes/functional.py b/bitsandbytes/functional.py
index b4409e4..1bddb52 100644
--- a/bitsandbytes/functional.py
+++ b/bitsandbytes/functional.py
@@ -3,6 +3,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 import ctypes as ct
+import operator
 import random
 import math
 import torch
@@ -11,6 +12,11 @@ from typing import Tuple
 from torch import Tensor
 
 from .cextension import COMPILED_WITH_CUDA, lib
+from functools import reduce  # Required in Python 3
+
+# math.prod not compatible with python < 3.8
+def prod(iterable):
+    return reduce(operator.mul, iterable, 1)
 
 name2qmap = {}
 
@@ -326,8 +332,8 @@ def nvidia_transform(
         dim1 = ct.c_int32(shape[0])
         dim2 = ct.c_int32(shape[1])
     elif ld is not None:
-        n = math.prod(shape)
-        dim1 = math.prod([shape[i] for i in ld])
+        n = prod(shape)
+        dim1 = prod([shape[i] for i in ld])
         dim2 = ct.c_int32(n // dim1)
         dim1 = ct.c_int32(dim1)
     else:
@@ -1314,7 +1320,7 @@ def igemmlt(A, B, SA, SB, out=None, Sout=None, dtype=torch.int32):
         m = shapeA[0] * shapeA[1]
 
     rows = n = shapeB[0]
-    assert math.prod(list(shapeA)) > 0, f'Input tensor dimensions need to be > 0: {shapeA}'
+    assert prod(list(shapeA)) > 0, f'Input tensor dimensions need to be > 0: {shapeA}'
 
     # if the tensor is empty, return a transformed empty tensor with the right dimensions
     if shapeA[0] == 0 and dimsA == 2:
diff --git a/setup.py b/setup.py
index c425ca7..9fb1889 100644
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,7 @@ def read(fname):
 
 setup(
     name=f"bitsandbytes",
-    version=f"0.31.5",
+    version=f"0.31.7",
     author="Tim Dettmers",
     author_email="dettmers@cs.washington.edu",
     description="8-bit optimizers and matrix multiplication routines.",
-- 
cgit v1.2.3


From 1ced47c5043ed88b78c288f55f43ec3e66a0f765 Mon Sep 17 00:00:00 2001
From: Tim Dettmers <tim.dettmers@gmail.com>
Date: Tue, 9 Aug 2022 20:02:47 -0700
Subject: Added CUDA version warning and fixed cuda_install for 9.2/10.2.

---
 bitsandbytes/cuda_setup/main.py | 8 ++++++++
 cuda_install.sh                 | 2 +-
 deploy_from_slurm.sh            | 2 +-
 setup.py                        | 2 +-
 4 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/bitsandbytes/cuda_setup/main.py b/bitsandbytes/cuda_setup/main.py
index f1c845c..1f2ceb4 100644
--- a/bitsandbytes/cuda_setup/main.py
+++ b/bitsandbytes/cuda_setup/main.py
@@ -45,6 +45,9 @@ def get_cuda_version(cuda, cudart_path):
     major = version//1000
     minor = (version-(major*1000))//10
 
+    if major < 11:
+       print('CUDA SETUP: CUDA version lower than 11 are currenlty not supported!')
+
     return f'{major}{minor}'
 
 
@@ -110,6 +113,10 @@ def get_compute_capability(cuda):
 
 
 def evaluate_cuda_setup():
+    print('')
+    print('='*35 + 'BUG REPORT' + '='*35)
+    print('Welcome to bitsandbytes. For bug reports, please use this form: https://docs.google.com/forms/d/e/1FAIpQLScPB8emS3Thkp66nvqwmjTEgxp8Y9ufuWTzFyr9kJ5AoI47dQ/viewform?usp=sf_link')
+    print('='*80)
     binary_name = "libbitsandbytes_cpu.so"
     cudart_path = determine_cuda_runtime_lib_path()
     if cudart_path is None:
@@ -121,6 +128,7 @@ def evaluate_cuda_setup():
     print(f"CUDA SETUP: CUDA path found: {cudart_path}")
     cuda = get_cuda_lib_handle()
     cc = get_compute_capability(cuda)
+    print(f"CUDA SETUP: Highest compute capability among GPUs detected: {cc}")
     cuda_version_string = get_cuda_version(cuda, cudart_path)
 
 
diff --git a/cuda_install.sh b/cuda_install.sh
index 856cbe5..4508f4a 100644
--- a/cuda_install.sh
+++ b/cuda_install.sh
@@ -65,7 +65,7 @@ if [[ -n "$CUDA_VERSION" ]]; then
   echo $URL
   echo $FILE
   wget $URL
-  bash $FILE --no-drm --no-man-page --override --installpath=~/local --librarypath=$BASE_PATH/lib --toolkitpath=$BASE_PATH/$FOLDER/ --toolkit --silent
+  bash $FILE --no-drm --no-man-page --override --toolkitpath=$BASE_PATH/$FOLDER/ --toolkit --silent
   echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$BASE_PATH/$FOLDER/lib64/" >> ~/.bashrc
   echo "export PATH=$PATH:$BASE_PATH/$FOLDER/bin/" >> ~/.bashrc
   source ~/.bashrc
diff --git a/deploy_from_slurm.sh b/deploy_from_slurm.sh
index c6ee84d..bc53869 100644
--- a/deploy_from_slurm.sh
+++ b/deploy_from_slurm.sh
@@ -202,4 +202,4 @@ if [ ! -f "./bitsandbytes/libbitsandbytes_cuda117_nocublaslt.so" ]; then
 fi
 
 python -m build
-python -m twine upload dist/* --verbose --repository testpypi
+python -m twine upload dist/* --verbose
diff --git a/setup.py b/setup.py
index 9fb1889..61a5d05 100644
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,7 @@ def read(fname):
 
 setup(
     name=f"bitsandbytes",
-    version=f"0.31.7",
+    version=f"0.31.8",
     author="Tim Dettmers",
     author_email="dettmers@cs.washington.edu",
     description="8-bit optimizers and matrix multiplication routines.",
-- 
cgit v1.2.3