From 08fa2e7b01dda8959a930295de9829516f8c77bc Mon Sep 17 00:00:00 2001 From: Tim Dettmers Date: Mon, 7 Nov 2022 18:06:18 -0800 Subject: Fixed bug in cpu quant; faster GPU dequant. --- tests/test_functional.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'tests/test_functional.py') diff --git a/tests/test_functional.py b/tests/test_functional.py index 4642b16..d36dfc1 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -2166,3 +2166,19 @@ def test_kbit_quantile_estimation(): val2 = F.estimate_quantiles(data, offset=0, num_quantiles=2**bits) err = torch.abs(val1-val2).mean() assert err < 0.035 + + +def test_bench_dequantization(): + a = torch.rand(1024, 1024, device='cuda').half() + qa, SA = F.quantize_blockwise(a) + + max_theoretical_mu = 1024*1024*2/1024**3/672*1000*1000 + #print(max_theoretical_mu) + + torch.cuda.synchronize() + t0 = time.time() + for i in range(100): + F.dequantize_blockwise(qa, SA, blocksize=2048) + torch.cuda.synchronize() + #print((time.time()-t0)/1e6) + -- cgit v1.2.3