ggml : sync latest ggml repo

- new Q4 and Q8 quantization
- updated CUDA
This commit is contained in:
Georgi Gerganov
2023-05-20 18:56:30 +03:00
parent bc89f285d8
commit e410cfc3ce
5 changed files with 502 additions and 295 deletions

16
ggml.h
View File

@@ -190,7 +190,7 @@
#define GGML_FILE_MAGIC 0x67676d6c // "ggml"
#define GGML_FILE_VERSION 1
#define GGML_QNT_VERSION 1 // bump this on quantization format changes
#define GGML_QNT_VERSION 2 // bump this on quantization format changes
#define GGML_QNT_VERSION_FACTOR 1000 // do not change this
#define GGML_MAX_DIMS 4
@@ -313,6 +313,7 @@ extern "C" {
GGML_OP_ROPE,
GGML_OP_ROPE_BACK,
GGML_OP_ALIBI,
GGML_OP_CLAMP,
GGML_OP_CONV_1D_1S,
GGML_OP_CONV_1D_2S,
@@ -849,7 +850,7 @@ extern "C" {
int n_past);
// in-place, returns view(a)
GGML_API struct ggml_tensor * gml_diag_mask_zero_inplace(
GGML_API struct ggml_tensor * ggml_diag_mask_zero_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
int n_past);
@@ -897,7 +898,16 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * a,
int n_past,
int n_head);
int n_head,
float bias_max);
// clamp
// in-place, returns view(a)
struct ggml_tensor * ggml_clamp(
struct ggml_context * ctx,
struct ggml_tensor * a,
float min,
float max);
// padding = 1
// TODO: we don't support extra parameters for now