summaryrefslogtreecommitdiff
path: root/csrc/kernels.cuh
diff options
context:
space:
mode:
authorTim Dettmers <tim.dettmers@gmail.com>2022-11-07 18:06:18 -0800
committerTim Dettmers <tim.dettmers@gmail.com>2022-11-07 18:06:18 -0800
commit08fa2e7b01dda8959a930295de9829516f8c77bc (patch)
tree0c31b3fa012caac459bea4ceda1890c153d81110 /csrc/kernels.cuh
parent62a333ac40f157e69c4bb86f30ac06b41ca4ff34 (diff)
Fixed bug in cpu quant; faster GPU dequant.
Diffstat (limited to 'csrc/kernels.cuh')
-rw-r--r--csrc/kernels.cuh2
1 files changed, 1 insertions, 1 deletions
diff --git a/csrc/kernels.cuh b/csrc/kernels.cuh
index bdf61b2..cca983b 100644
--- a/csrc/kernels.cuh
+++ b/csrc/kernels.cuh
@@ -15,7 +15,7 @@ __global__ void kQuantize(float * code, float * __restrict__ const A, unsigned c
__global__ void kDequantize(float *code, unsigned char *A, float *out, const int n);
template<typename T, int BLOCK_SIZE, int NUM_PER_TH, int STOCHASTIC> __global__ void kQuantizeBlockwise(float * code, T * __restrict__ const A, float *absmax, unsigned char *out, float * __restrict__ const rand, const int rand_offset, const int n);
-template<typename T, int BLOCK_SIZE, int THREADS, int NUM_PER_TH> __global__ void kDequantizeBlockwise(float *code, unsigned char * __restrict__ const A, float * __restrict__ const absmax, T *out, const int n);
+template<typename T, int BLOCK_SIZE, int THREADS, int NUM_PER_TH> __global__ void kDequantizeBlockwise(float *code, unsigned char * A, float * absmax, T *out, const int n);
template<typename T, int OPTIMIZER, int BLOCK_SIZE, int NUM_VALS>
__global__ void kPreconditionOptimizer32bit2State(T* g, T* p,