summaryrefslogtreecommitdiff
path: root/bitsandbytes/autograd/_functions.py
diff options
context:
space:
mode:
Diffstat (limited to 'bitsandbytes/autograd/_functions.py')
-rw-r--r--bitsandbytes/autograd/_functions.py45
1 files changed, 36 insertions, 9 deletions
diff --git a/bitsandbytes/autograd/_functions.py b/bitsandbytes/autograd/_functions.py
index a08b560..b56b2ee 100644
--- a/bitsandbytes/autograd/_functions.py
+++ b/bitsandbytes/autograd/_functions.py
@@ -111,7 +111,9 @@ class MatMul8bit(torch.autograd.Function):
qgrad_output, S1 = F.vectorwise_quant(
grad_output, dim=dims, quant_type=quant_type
)
- qA, S2 = F.vectorwise_quant(A, dim=dims, quant_type=quant_type)
+ qA, S2 = F.vectorwise_quant(
+ A, dim=dims, quant_type=quant_type
+ )
igrad_B = F.igemm(qA.permute(permute_dim), qgrad_output)
grad_B = F.vectorwise_mm_dequant(
igrad_B,
@@ -146,7 +148,11 @@ class MatMul8bit(torch.autograd.Function):
qB, S3 = F.vectorwise_quant(B, dim=dim_B, quant_type=quant_type)
igrad_A = F.igemm(qgrad_output, qB.permute(permute_dim))
grad_A = F.vectorwise_mm_dequant(
- igrad_A, S1, S3.permute(permute_dim), grad_output.dtype, quant_type
+ igrad_A,
+ S1,
+ S3.permute(permute_dim),
+ grad_output.dtype,
+ quant_type,
)
return grad_A, grad_B, None, None, None
@@ -211,7 +217,9 @@ class MatMul8bitLt(torch.autograd.Function):
# 1. Quantize A
if len(A.shape) == 3:
A = A.view(-1, A.shape[-1]).contiguous()
- CA, CAt, SCA, SCAt, coo_tensorA = F.double_quant(A, threshold=state.threshold)
+ CA, CAt, SCA, SCAt, coo_tensorA = F.double_quant(
+ A, threshold=state.threshold
+ )
if state.threshold > 0.0 and coo_tensorA is not None:
if state.has_fp16_weights:
@@ -225,7 +233,9 @@ class MatMul8bitLt(torch.autograd.Function):
if state.CxB is None:
# B in in 8-bit row-major, we can transform it back to 16-bit to extract outlier dimensions
# we also need to convert it to the turing/ampere format
- state.CxB, state.SB = F.transform(state.CB, to_order=formatB)
+ state.CxB, state.SB = F.transform(
+ state.CB, to_order=formatB
+ )
# state.B = (state.CB.float()*(state.SCB.view(-1, 1)/127)).half()
# if state.threshold > 0.0 and coo_tensorA is not None and state.idx is None and state.CB is not None:
# # generate outlier index and subB
@@ -259,7 +269,13 @@ class MatMul8bitLt(torch.autograd.Function):
if (state.is_training and not has_grad) or state.CxB is None:
state.reset_grads()
- CB, state.CBt, state.SCB, state.SCBt, coo_tensorB = F.double_quant(B)
+ (
+ CB,
+ state.CBt,
+ state.SCB,
+ state.SCBt,
+ coo_tensorB,
+ ) = F.double_quant(B)
state.CxB, state.SB = F.transform(CB, to_order=formatB)
else:
has_grad = False
@@ -277,7 +293,10 @@ class MatMul8bitLt(torch.autograd.Function):
# state.idx = outlier_idx
outliers = F.extract_outliers(state.CxB, state.SB, state.idx.int())
state.subB = (
- (outliers * state.SCB.view(-1, 1) / 127.0).t().contiguous().half()
+ (outliers * state.SCB.view(-1, 1) / 127.0)
+ .t()
+ .contiguous()
+ .half()
)
CA[:, state.idx.long()] = 0
CAt[:, state.idx.long()] = 0
@@ -325,10 +344,14 @@ class MatMul8bitLt(torch.autograd.Function):
SCAt, idx = ctx.tensor_states
formatB = ctx.formatB
state = ctx.state
- assert state.has_fp16_weights, "Backprop only supported for fp16 weights."
+ assert (
+ state.has_fp16_weights
+ ), "Backprop only supported for fp16 weights."
if len(grad_output.shape) == 3:
- grad_output = grad_output.view(-1, grad_output.shape[-1]).contiguous()
+ grad_output = grad_output.view(
+ -1, grad_output.shape[-1]
+ ).contiguous()
grad_A = grad_B = None
@@ -359,7 +382,11 @@ matmul = MatMul8bitLt.apply
def matmul(
- A: tensor, B: tensor, out: tensor = None, state: MatmulLtState = None, threshold=0.0
+ A: tensor,
+ B: tensor,
+ out: tensor = None,
+ state: MatmulLtState = None,
+ threshold=0.0,
):
state = state or MatmulLtState()
if threshold > 0.0: