mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-02-05 04:50:18 +01:00
llava : MobileVLM support (llama/4954)
* MobileVLM native implementation * delete depthwise_conv_2d and permute_cpy relative code, replace the two by the existed functions, and opt ldp definition, support LLAMA_PERF option for CMake * move android script to example/llava directory * Fix the editor config checks --------- Co-authored-by: Chenxiaotao03 <chenxiaotao03@meituan.com>
This commit is contained in:
parent
078b8e23bf
commit
aaeaa43878
141
ggml.c
141
ggml.c
@ -1418,6 +1418,9 @@ inline static void ggml_vec_tanh_f32 (const int n, float * y, const float * x) {
|
|||||||
inline static void ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expf(x[i])-1; }
|
inline static void ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expf(x[i])-1; }
|
||||||
inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
|
inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
|
||||||
inline static void ggml_vec_leaky_relu_f32 (const int n, float * y, const float * x, const float ns) { for (int i = 0; i < n; ++i) y[i] = ((x[i] > 0.f) ? x[i] : 0.f) + ns * ((x[i] < 0.0f) ? x[i] : 0.f); }
|
inline static void ggml_vec_leaky_relu_f32 (const int n, float * y, const float * x, const float ns) { for (int i = 0; i < n; ++i) y[i] = ((x[i] > 0.f) ? x[i] : 0.f) + ns * ((x[i] < 0.0f) ? x[i] : 0.f); }
|
||||||
|
// TODO: optimize performance
|
||||||
|
inline static void ggml_vec_hardswish_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i] * fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); }
|
||||||
|
inline static void ggml_vec_hardsigmoid_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); }
|
||||||
|
|
||||||
static const float GELU_COEF_A = 0.044715f;
|
static const float GELU_COEF_A = 0.044715f;
|
||||||
static const float GELU_QUICK_COEF = -1.702f;
|
static const float GELU_QUICK_COEF = -1.702f;
|
||||||
@ -1776,9 +1779,11 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
|
|||||||
"GELU",
|
"GELU",
|
||||||
"GELU_QUICK",
|
"GELU_QUICK",
|
||||||
"SILU",
|
"SILU",
|
||||||
|
"HARDSWISH",
|
||||||
|
"HARDSIGMOID",
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(GGML_UNARY_OP_COUNT == 10, "GGML_UNARY_OP_COUNT != 10");
|
static_assert(GGML_UNARY_OP_COUNT == 12, "GGML_UNARY_OP_COUNT != 12");
|
||||||
|
|
||||||
|
|
||||||
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
|
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
|
||||||
@ -3945,6 +3950,20 @@ struct ggml_tensor * ggml_silu_back(
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ggml hardswish
|
||||||
|
struct ggml_tensor * ggml_hardswish(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a) {
|
||||||
|
return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSWISH);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ggml hardsigmoid
|
||||||
|
struct ggml_tensor * ggml_hardsigmoid(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a) {
|
||||||
|
return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSIGMOID);
|
||||||
|
}
|
||||||
|
|
||||||
// ggml_norm
|
// ggml_norm
|
||||||
|
|
||||||
static struct ggml_tensor * ggml_norm_impl(
|
static struct ggml_tensor * ggml_norm_impl(
|
||||||
@ -5344,6 +5363,33 @@ GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ggml_conv_depthwise
|
||||||
|
struct ggml_tensor * ggml_conv_depthwise_2d(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
struct ggml_tensor * c,
|
||||||
|
int s0,
|
||||||
|
int s1,
|
||||||
|
int p0,
|
||||||
|
int p1,
|
||||||
|
int d0,
|
||||||
|
int d1) {
|
||||||
|
|
||||||
|
struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
|
||||||
|
struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
|
||||||
|
ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
|
||||||
|
s0, s1, p0, p1, d0, d1, true); // [N * IC, OH, OW, KH * KW]
|
||||||
|
|
||||||
|
struct ggml_tensor * result =
|
||||||
|
ggml_mul_mat(ctx,
|
||||||
|
ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1), // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
|
||||||
|
ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3])); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
|
||||||
|
|
||||||
|
result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
// ggml_conv_2d
|
// ggml_conv_2d
|
||||||
|
|
||||||
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
|
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
|
||||||
@ -9338,6 +9384,87 @@ static void ggml_compute_forward_silu_back(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void ggml_compute_forward_hardswish_f32(
|
||||||
|
const struct ggml_compute_params * params,
|
||||||
|
const struct ggml_tensor * src0,
|
||||||
|
struct ggml_tensor * dst) {
|
||||||
|
assert(params->ith == 0);
|
||||||
|
assert(ggml_are_same_shape(src0, dst));
|
||||||
|
|
||||||
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int n = ggml_nrows(src0);
|
||||||
|
const int nc = src0->ne[0];
|
||||||
|
|
||||||
|
assert(dst->nb[0] == sizeof(float));
|
||||||
|
assert(src0->nb[0] == sizeof(float));
|
||||||
|
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
ggml_vec_hardswish_f32(nc,
|
||||||
|
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
||||||
|
(float *) ((char *) src0->data + i*(src0->nb[1])));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
static void ggml_compute_forward_hardswish(
|
||||||
|
const struct ggml_compute_params * params,
|
||||||
|
const struct ggml_tensor * src0,
|
||||||
|
struct ggml_tensor * dst) {
|
||||||
|
switch (src0->type) {
|
||||||
|
case GGML_TYPE_F32:
|
||||||
|
{
|
||||||
|
ggml_compute_forward_hardswish_f32(params, src0, dst);
|
||||||
|
} break;
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
GGML_ASSERT(false);
|
||||||
|
} break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_compute_forward_hardsigmoid_f32(
|
||||||
|
const struct ggml_compute_params * params,
|
||||||
|
const struct ggml_tensor * src0,
|
||||||
|
struct ggml_tensor * dst) {
|
||||||
|
assert(params->ith == 0);
|
||||||
|
assert(ggml_are_same_shape(src0, dst));
|
||||||
|
|
||||||
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int n = ggml_nrows(src0);
|
||||||
|
const int nc = src0->ne[0];
|
||||||
|
|
||||||
|
assert(dst->nb[0] == sizeof(float));
|
||||||
|
assert(src0->nb[0] == sizeof(float));
|
||||||
|
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
ggml_vec_hardsigmoid_f32(nc,
|
||||||
|
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
||||||
|
(float *) ((char *) src0->data + i*(src0->nb[1])));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ggml_compute_forward_hardsigmoid(
|
||||||
|
const struct ggml_compute_params * params,
|
||||||
|
const struct ggml_tensor * src0,
|
||||||
|
struct ggml_tensor * dst) {
|
||||||
|
switch (src0->type) {
|
||||||
|
case GGML_TYPE_F32:
|
||||||
|
{
|
||||||
|
ggml_compute_forward_hardsigmoid_f32(params, src0, dst);
|
||||||
|
} break;
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
GGML_ASSERT(false);
|
||||||
|
} break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// ggml_compute_forward_norm
|
// ggml_compute_forward_norm
|
||||||
|
|
||||||
static void ggml_compute_forward_norm_f32(
|
static void ggml_compute_forward_norm_f32(
|
||||||
@ -12354,6 +12481,7 @@ static void ggml_compute_forward_im2col(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// ggml_compute_forward_conv_transpose_2d
|
// ggml_compute_forward_conv_transpose_2d
|
||||||
|
|
||||||
static void ggml_compute_forward_conv_transpose_2d(
|
static void ggml_compute_forward_conv_transpose_2d(
|
||||||
@ -13922,6 +14050,14 @@ static void ggml_compute_forward_unary(
|
|||||||
{
|
{
|
||||||
ggml_compute_forward_silu(params, src0, dst);
|
ggml_compute_forward_silu(params, src0, dst);
|
||||||
} break;
|
} break;
|
||||||
|
case GGML_UNARY_OP_HARDSWISH:
|
||||||
|
{
|
||||||
|
ggml_compute_forward_hardswish(params, src0, dst);
|
||||||
|
} break;
|
||||||
|
case GGML_UNARY_OP_HARDSIGMOID:
|
||||||
|
{
|
||||||
|
ggml_compute_forward_hardsigmoid(params, src0, dst);
|
||||||
|
} break;
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
GGML_ASSERT(false);
|
GGML_ASSERT(false);
|
||||||
@ -16335,6 +16471,8 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|||||||
case GGML_UNARY_OP_TANH:
|
case GGML_UNARY_OP_TANH:
|
||||||
case GGML_UNARY_OP_ELU:
|
case GGML_UNARY_OP_ELU:
|
||||||
case GGML_UNARY_OP_RELU:
|
case GGML_UNARY_OP_RELU:
|
||||||
|
case GGML_UNARY_OP_HARDSWISH: // to opt for multiple threads
|
||||||
|
case GGML_UNARY_OP_HARDSIGMOID: // to opt for multiple threads
|
||||||
{
|
{
|
||||||
n_tasks = 1;
|
n_tasks = 1;
|
||||||
} break;
|
} break;
|
||||||
@ -16567,7 +16705,6 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|||||||
// distribute new work or execute it direct if 1T
|
// distribute new work or execute it direct if 1T
|
||||||
while (++node_n < cgraph->n_nodes) {
|
while (++node_n < cgraph->n_nodes) {
|
||||||
GGML_PRINT_DEBUG_5("%s: %d/%d\n", __func__, node_n, cgraph->n_nodes);
|
GGML_PRINT_DEBUG_5("%s: %d/%d\n", __func__, node_n, cgraph->n_nodes);
|
||||||
|
|
||||||
struct ggml_tensor * node = cgraph->nodes[node_n];
|
struct ggml_tensor * node = cgraph->nodes[node_n];
|
||||||
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
||||||
|
|
||||||
|
24
ggml.h
24
ggml.h
@ -489,6 +489,8 @@ extern "C" {
|
|||||||
GGML_UNARY_OP_GELU,
|
GGML_UNARY_OP_GELU,
|
||||||
GGML_UNARY_OP_GELU_QUICK,
|
GGML_UNARY_OP_GELU_QUICK,
|
||||||
GGML_UNARY_OP_SILU,
|
GGML_UNARY_OP_SILU,
|
||||||
|
GGML_UNARY_OP_HARDSWISH,
|
||||||
|
GGML_UNARY_OP_HARDSIGMOID,
|
||||||
|
|
||||||
GGML_UNARY_OP_COUNT,
|
GGML_UNARY_OP_COUNT,
|
||||||
};
|
};
|
||||||
@ -1032,6 +1034,16 @@ extern "C" {
|
|||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
struct ggml_tensor * b);
|
struct ggml_tensor * b);
|
||||||
|
|
||||||
|
// hardswish(x) = x * relu6(x + 3) / 6
|
||||||
|
GGML_API struct ggml_tensor * ggml_hardswish(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a);
|
||||||
|
|
||||||
|
// hardsigmoid(x) = relu6(x + 3) / 6
|
||||||
|
GGML_API struct ggml_tensor * ggml_hardsigmoid(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a);
|
||||||
|
|
||||||
// normalize along rows
|
// normalize along rows
|
||||||
GGML_API struct ggml_tensor * ggml_norm(
|
GGML_API struct ggml_tensor * ggml_norm(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
@ -1483,6 +1495,18 @@ extern "C" {
|
|||||||
int d1,
|
int d1,
|
||||||
bool is_2D);
|
bool is_2D);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
struct ggml_tensor * c,
|
||||||
|
int s0,
|
||||||
|
int s1,
|
||||||
|
int p0,
|
||||||
|
int p1,
|
||||||
|
int d0,
|
||||||
|
int d1);
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_conv_1d(
|
GGML_API struct ggml_tensor * ggml_conv_1d(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
|
Loading…
Reference in New Issue
Block a user