diff options
author | justheuristic <justheuristic@gmail.com> | 2022-09-18 01:24:27 +0300 |
---|---|---|
committer | justheuristic <justheuristic@gmail.com> | 2022-09-18 01:24:27 +0300 |
commit | 28a9313ddcf09c40d6cea75b3fd932ef09b4c715 (patch) | |
tree | b268f18585e4ca43cb39496fd4f7d345c49ad64f | |
parent | 95dafc6475bc36490e213269d1028adfd4f75363 (diff) |
cast before allclose
-rw-r--r-- | tests/test_modules.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/tests/test_modules.py b/tests/test_modules.py index dbadea9..bb65edb 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -541,7 +541,7 @@ def test_linear8bitlt_no_fp16_weights(threshold, memory_efficient_backward): mlp = MLP8bit( 32, 64, threshold=threshold, has_fp16_weights=False, memory_efficient_backward=memory_efficient_backward ) - w1, w2 = mlp.fc1.weight.clone(), mlp.fc2.weight.clone() # note: we grad original weights before quantization, + w1, w2 = mlp.fc1.weight.clone().cuda(), mlp.fc2.weight.clone().cuda() # grab weights before quantization, mlp = mlp.cuda().half() # and this line triggers quantization for i in range(100): @@ -567,7 +567,7 @@ def test_linear8bitlt_no_fp16_weights(threshold, memory_efficient_backward): mlp.zero_grad() (o1 * grad_proj).sum().backward() - grad_ref = grad_proj.flatten(2) @ w2.to(grad_proj.device) @ w1.to(grad_proj.device) + grad_ref = grad_proj.flatten(2) @ w2.to() @ w1.to(grad_proj.device) assert torch.allclose(b1.grad, grad_ref) |