summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorTim Dettmers <tim.dettmers@gmail.com>2022-11-07 18:06:18 -0800
committerTim Dettmers <tim.dettmers@gmail.com>2022-11-07 18:06:18 -0800
commit08fa2e7b01dda8959a930295de9829516f8c77bc (patch)
tree0c31b3fa012caac459bea4ceda1890c153d81110 /tests
parent62a333ac40f157e69c4bb86f30ac06b41ca4ff34 (diff)
Fixed bug in cpu quant; faster GPU dequant.
Diffstat (limited to 'tests')
-rw-r--r--tests/test_functional.py16
1 files changed, 16 insertions, 0 deletions
diff --git a/tests/test_functional.py b/tests/test_functional.py
index 4642b16..d36dfc1 100644
--- a/tests/test_functional.py
+++ b/tests/test_functional.py
@@ -2166,3 +2166,19 @@ def test_kbit_quantile_estimation():
val2 = F.estimate_quantiles(data, offset=0, num_quantiles=2**bits)
err = torch.abs(val1-val2).mean()
assert err < 0.035
+
+
+def test_bench_dequantization():
+ a = torch.rand(1024, 1024, device='cuda').half()
+ qa, SA = F.quantize_blockwise(a)
+
+ max_theoretical_mu = 1024*1024*2/1024**3/672*1000*1000
+ #print(max_theoretical_mu)
+
+ torch.cuda.synchronize()
+ t0 = time.time()
+ for i in range(100):
+ F.dequantize_blockwise(qa, SA, blocksize=2048)
+ torch.cuda.synchronize()
+ #print((time.time()-t0)/1e6)
+