debug

author: justheuristic <justheuristic@gmail.com> 2022-09-18 01:13:58 +0300
committer: justheuristic <justheuristic@gmail.com> 2022-09-18 01:13:58 +0300
commit: d9b8789818191f9992733394d7ccfa00a63d4dba (patch)
tree: 43fbc7d352da6aa1818ff5e8974b25c23f8011db /tests
parent: 5d658171017473b54825dfeac21718f4e4be4eca (diff)
1 files changed, 8 insertions, 0 deletions
diff --git a/tests/test_modules.py b/tests/test_modules.py
index d3992a9..c6e7f85 100644
--- a/tests/test_modules.py
+++ b/tests/test_modules.py
@@ -545,6 +545,7 @@ def test_linear8bitlt_no_fp16_weights(threshold, memory_efficient_backward):
         .to(torch.float16)
         .to("cuda")
     )
+    w1, w2 = mlp.fc1.weight.clone(), mlp.fc2.weight.clone()
 
     for i in range(100):
         b1 = torch.randn(16, 8, 32, device="cuda").half()
@@ -567,8 +568,15 @@ def test_linear8bitlt_no_fp16_weights(threshold, memory_efficient_backward):
         assert o1.requires_grad
         grad_proj = torch.randn_like(o1)
 
+        mlp.zero_grad()
         (o1 * grad_proj).sum().backward()
 
+        grad_ref = grad_proj.flatten(2) @ w2 @ w1
+        assert torch.allclose(b1.grad, grad_ref)
+
+
+
+
author	justheuristic <justheuristic@gmail.com>	2022-09-18 01:13:58 +0300
committer	justheuristic <justheuristic@gmail.com>	2022-09-18 01:13:58 +0300
commit	d9b8789818191f9992733394d7ccfa00a63d4dba (patch)
tree	43fbc7d352da6aa1818ff5e8974b25c23f8011db /tests
parent	5d658171017473b54825dfeac21718f4e4be4eca (diff)