summaryrefslogtreecommitdiff
path: root/csrc/ops.cuh
diff options
context:
space:
mode:
Diffstat (limited to 'csrc/ops.cuh')
-rw-r--r--csrc/ops.cuh81
1 files changed, 81 insertions, 0 deletions
diff --git a/csrc/ops.cuh b/csrc/ops.cuh
new file mode 100644
index 0000000..e6033cb
--- /dev/null
+++ b/csrc/ops.cuh
@@ -0,0 +1,81 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+//
+// This source code is licensed under the MIT license found in the
+// LICENSE file in the root directory of this source tree.
+
+
+#ifndef ops_H
+#define ops_H
+
+#include <stdio.h>
+#include <iostream>
+#include <unistd.h>
+#include <assert.h>
+
+#include <cuda_runtime_api.h>
+#include <cuda_fp16.h>
+
+#define CUDA_CHECK_RETURN(value) { \
+ cudaError_t _m_cudaStat = value; \
+ if (_m_cudaStat != cudaSuccess) { \
+ fprintf(stderr, "Error %s at line %d in file %s\n", \
+ cudaGetErrorString(_m_cudaStat), __LINE__, __FILE__); \
+ exit(1); \
+ } }
+
+#define THREADS_PER_BLOCKS (512)
+
+typedef enum Operations_t
+{
+ ksmul = 0,
+} Operations_t;
+
+typedef enum Optimizer_t
+{
+ ADAM = 0,
+ MOMENTUM = 1,
+ RMSPROP = 2,
+ LARS = 3,
+} Optimizer_t;
+
+
+template <typename T> void estimateQuantiles(T *A, float *code, float offset, int n);
+
+void quantize(float *code, float *A, unsigned char *out, int n);
+void dequantize(float *code, unsigned char *A, float *out, int n);
+template <typename T, int STOCHASTIC> void quantizeBlockwise(float * code, T *A, float *absmax, unsigned char *out, float* rand, int rand_offset, const int n);
+template<typename T> void dequantizeBlockwise(float *code, unsigned char *A, float *absmax, T *out, int block_size, const int n);
+
+template<typename T, int OPTIMIZER> void optimizer32bit(T* g, T* p,
+ float* state1, float* state2, float *unorm, float max_unorm, float param_norm,
+ float beta1, float beta2, float eps, float weight_decay,
+ int step, float lr, const float gnorm_scale, int n);
+
+template<typename T, int OPTIMIZER> void optimizerStatic8bit(T* p, T* g, unsigned char* state1, unsigned char* state2,
+ float *unorm, float max_unorm, float param_norm,
+ float beta1, float beta2,
+ float eps, int step, float lr,
+ float* quantiles1, float* quantiles2,
+ float* max1, float* max2, float* new_max1, float* new_max2,
+ float weight_decay,
+ const float gnorm_scale, int n);
+
+template<typename T, int OPTIMIZER> void optimizerStatic8bitBlockwise(T* p, T* g,
+ unsigned char* state1, unsigned char* state2, float beta1, float beta2, float eps, int step, float lr,
+ float* quantiles1, float* quantiles2, float* absmax1, float* absmax2, float weight_decay, const float gnorm_scale, int n);
+
+template<typename T> void percentileClipping(T * g, float *gnorm_vec, int step, const int n);
+
+void quantize_cpu(float *code, float *A, float *absmax, unsigned char *out, int n);
+void dequantize_cpu(float *code, unsigned char *A, float *absmax, float *out, int n);
+
+void histogramScatterAdd2D(float* histogram, int *index1, int *index2, float *src, int maxidx1, int n);
+
+#endif
+
+
+
+
+
+
+