mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-06 17:56:55 +02:00
CANN: Support more ops (llama/12841)
* [CANN]Support Opt LOG && MEAN && PAD_REFLECT_1D * [CANN]Support COUNT_EQUAL && STEP && SGN * [CANN]codestyle adjustment * [CANN]codestyle adjustment --------- Signed-off-by: noemotiovon <noemotiovon@gmail.com>
This commit is contained in:
parent
3bf9691dfd
commit
182df69384
@ -41,6 +41,8 @@ aclDataType ggml_cann_type_mapping(ggml_type type) {
|
|||||||
return ACL_INT4;
|
return ACL_INT4;
|
||||||
case GGML_TYPE_Q8_0:
|
case GGML_TYPE_Q8_0:
|
||||||
return ACL_INT8;
|
return ACL_INT8;
|
||||||
|
case GGML_TYPE_I64:
|
||||||
|
return ACL_INT64;
|
||||||
default:
|
default:
|
||||||
return ACL_DT_UNDEFINED;
|
return ACL_DT_UNDEFINED;
|
||||||
}
|
}
|
||||||
|
@ -59,6 +59,11 @@
|
|||||||
#include <aclnnop/aclnn_div.h>
|
#include <aclnnop/aclnn_div.h>
|
||||||
#include <aclnnop/aclnn_convolution.h>
|
#include <aclnnop/aclnn_convolution.h>
|
||||||
#include <aclnnop/aclnn_elu.h>
|
#include <aclnnop/aclnn_elu.h>
|
||||||
|
#include <aclnnop/aclnn_log.h>
|
||||||
|
#include <aclnnop/aclnn_mean.h>
|
||||||
|
#include <aclnnop/aclnn_reflection_pad1d.h>
|
||||||
|
#include <aclnnop/aclnn_eq_tensor.h>
|
||||||
|
#include <aclnnop/aclnn_gt_scalar.h>
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
@ -2598,6 +2603,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
|
|||||||
aclTensor* acl_dst = ggml_cann_create_tensor(dst, dst->ne, dst->nb, 3);
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst, dst->ne, dst->nb, 3);
|
||||||
|
|
||||||
GGML_CANN_CALL_ACLNN_OP(ArgMax, acl_src, 3, false, acl_dst);
|
GGML_CANN_CALL_ACLNN_OP(ArgMax, acl_src, 3, false, acl_dst);
|
||||||
|
|
||||||
ACL_CHECK(aclDestroyTensor(acl_src));
|
ACL_CHECK(aclDestroyTensor(acl_src));
|
||||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||||
}
|
}
|
||||||
@ -2629,6 +2635,9 @@ void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* ds
|
|||||||
|
|
||||||
ACL_CHECK(aclDestroyTensor(acl_weight));
|
ACL_CHECK(aclDestroyTensor(acl_weight));
|
||||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||||
|
ACL_CHECK(aclDestroyIntArray(stride));
|
||||||
|
ACL_CHECK(aclDestroyIntArray(padding));
|
||||||
|
ACL_CHECK(aclDestroyIntArray(dilation));
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst){
|
void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst){
|
||||||
@ -2646,4 +2655,79 @@ void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst){
|
|||||||
|
|
||||||
ACL_CHECK(aclDestroyTensor(acl_input));
|
ACL_CHECK(aclDestroyTensor(acl_input));
|
||||||
ACL_CHECK(aclDestroyTensor(acl_dst));
|
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||||
|
ACL_CHECK(aclDestroyScalar(alpha));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ggml_cann_mean(ggml_backend_cann_context& ctx, ggml_tensor* dst){
|
||||||
|
ggml_tensor * src0 = dst->src[0];
|
||||||
|
|
||||||
|
aclTensor* acl_src = ggml_cann_create_tensor(src0);
|
||||||
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
|
|
||||||
|
int64_t reduceDimValue[] = {3};
|
||||||
|
aclIntArray* reduceDim = aclCreateIntArray(reduceDimValue, 1);
|
||||||
|
bool keepDim = true;
|
||||||
|
|
||||||
|
GGML_CANN_CALL_ACLNN_OP(Mean, acl_src, reduceDim, keepDim, ACL_FLOAT, acl_dst);
|
||||||
|
|
||||||
|
ACL_CHECK(aclDestroyTensor(acl_src));
|
||||||
|
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||||
|
ACL_CHECK(aclDestroyIntArray(reduceDim));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ggml_cann_pad_reflect_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst){
|
||||||
|
ggml_tensor * src0 = dst->src[0];
|
||||||
|
int32_t *opts = (int32_t *) dst->op_params;
|
||||||
|
int64_t paddingsArray[2] = {opts[0], opts[1]};
|
||||||
|
aclIntArray* paddings = aclCreateIntArray(paddingsArray, 2);
|
||||||
|
|
||||||
|
for (int64_t i = 0; i < src0->ne[3]; i++) {
|
||||||
|
aclTensor* acl_src = ggml_cann_create_tensor(
|
||||||
|
(char*)src0->data + i * src0->ne[3],
|
||||||
|
ggml_cann_type_mapping(src0->type), ggml_element_size(src0),
|
||||||
|
src0->ne, src0->nb, 3);
|
||||||
|
|
||||||
|
aclTensor* acl_dst = ggml_cann_create_tensor(
|
||||||
|
(char*)dst->data + i * src0->ne[3],
|
||||||
|
ggml_cann_type_mapping(dst->type), ggml_element_size(dst),
|
||||||
|
dst->ne, dst->nb, 3);
|
||||||
|
|
||||||
|
GGML_CANN_CALL_ACLNN_OP(ReflectionPad1d, acl_src, paddings, acl_dst);
|
||||||
|
|
||||||
|
ACL_CHECK(aclDestroyTensor(acl_src));
|
||||||
|
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||||
|
}
|
||||||
|
ACL_CHECK(aclDestroyIntArray(paddings));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ggml_cann_count_equal(ggml_backend_cann_context& ctx, ggml_tensor* dst){
|
||||||
|
ggml_tensor * src0 = dst->src[0];
|
||||||
|
ggml_tensor * src1 = dst->src[1];
|
||||||
|
|
||||||
|
aclTensor* acl_self = ggml_cann_create_tensor(src0);
|
||||||
|
aclTensor* acl_other = ggml_cann_create_tensor(src1);
|
||||||
|
|
||||||
|
GGML_CANN_CALL_ACLNN_OP(InplaceEqTensor, acl_self, acl_other);
|
||||||
|
|
||||||
|
ggml_cann_sum(ctx, dst);
|
||||||
|
|
||||||
|
ACL_CHECK(aclDestroyTensor(acl_self));
|
||||||
|
ACL_CHECK(aclDestroyTensor(acl_other));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ggml_cann_step(ggml_backend_cann_context& ctx, ggml_tensor* dst){
|
||||||
|
ggml_tensor * src0 = dst->src[0];
|
||||||
|
|
||||||
|
aclTensor* acl_src = ggml_cann_create_tensor(src0);
|
||||||
|
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
|
||||||
|
|
||||||
|
float alphaValue = 0.0f;
|
||||||
|
aclScalar* alpha = nullptr;
|
||||||
|
alpha = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);
|
||||||
|
|
||||||
|
GGML_CANN_CALL_ACLNN_OP(GtScalar, acl_src, alpha, acl_dst);
|
||||||
|
|
||||||
|
ACL_CHECK(aclDestroyTensor(acl_src));
|
||||||
|
ACL_CHECK(aclDestroyTensor(acl_dst));
|
||||||
|
ACL_CHECK(aclDestroyScalar(alpha));
|
||||||
}
|
}
|
||||||
|
@ -42,6 +42,8 @@
|
|||||||
#include <aclnnop/aclnn_sqrt.h>
|
#include <aclnnop/aclnn_sqrt.h>
|
||||||
#include <aclnnop/aclnn_sin.h>
|
#include <aclnnop/aclnn_sin.h>
|
||||||
#include <aclnnop/aclnn_cos.h>
|
#include <aclnnop/aclnn_cos.h>
|
||||||
|
#include <aclnnop/aclnn_log.h>
|
||||||
|
#include <aclnnop/aclnn_sign.h>
|
||||||
#include "acl_tensor.h"
|
#include "acl_tensor.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
@ -650,6 +652,67 @@ void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* ds
|
|||||||
*/
|
*/
|
||||||
void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Computes the mean of a ggml tensor element-wise using the CANN backend.
|
||||||
|
*
|
||||||
|
* @details This function calculates the element-wise mean of the input tensor.
|
||||||
|
* The result is written to the destination tensor `dst`.
|
||||||
|
* The mean is computed by averaging the values across the entire tensor.
|
||||||
|
*
|
||||||
|
* This operation is optimized using the CANN backend for high-performance inference or training.
|
||||||
|
*
|
||||||
|
* @param ctx The CANN context used for operations.
|
||||||
|
* @param dst The destination tensor where the mean result will be stored.
|
||||||
|
* dst->op is expected to be `GGML_OP_MEAN`.
|
||||||
|
*/
|
||||||
|
void ggml_cann_mean(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Applies 1D reflect padding to a ggml tensor using the CANN backend.
|
||||||
|
*
|
||||||
|
* @details This function performs 1D reflect padding on the input tensor.
|
||||||
|
* The amount of padding on each side is specified by parameters stored in `dst->op_params`.
|
||||||
|
* The operation reflects the values at the borders of the tensor to generate the padded output.
|
||||||
|
*
|
||||||
|
* This operation is optimized using the CANN backend for high-performance inference or training.
|
||||||
|
*
|
||||||
|
* @param ctx The CANN context used for operations.
|
||||||
|
* @param dst The destination tensor where the padded result will be stored.
|
||||||
|
* dst->op is expected to be `GGML_OP_PAD_REFLECT_1D`.
|
||||||
|
*/
|
||||||
|
void ggml_cann_pad_reflect_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Counts the number of equal elements in two ggml tensors using the CANN backend.
|
||||||
|
*
|
||||||
|
* @details This function performs an element-wise comparison between two input tensors,
|
||||||
|
* and counts the number of positions where the elements are equal. The result is
|
||||||
|
* stored in the destination tensor `dst` as a scalar.
|
||||||
|
*
|
||||||
|
* The operation is optimized using the CANN backend, making it suitable for
|
||||||
|
* high-performance inference or training scenarios.
|
||||||
|
*
|
||||||
|
* @param ctx The CANN context used for operations.
|
||||||
|
* @param dst The destination tensor where the result will be stored.
|
||||||
|
* dst->op is expected to be `GGML_OP_COUNT_EQUAL`.
|
||||||
|
*/
|
||||||
|
void ggml_cann_count_equal(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Applies the Step activation function to a ggml tensor using the CANN backend.
|
||||||
|
*
|
||||||
|
* @details This function applies a step function element-wise to the input tensor, where
|
||||||
|
* each element is transformed to 1.0 if it is greater than 0, and 0.0 otherwise.
|
||||||
|
* The result is stored in the destination tensor `dst`.
|
||||||
|
*
|
||||||
|
* This operation is accelerated using the CANN backend to improve runtime performance.
|
||||||
|
*
|
||||||
|
* @param ctx The CANN context used for operations.
|
||||||
|
* @param dst The destination tensor where the result will be stored.
|
||||||
|
* dst->op is expected to be `GGML_OP_STEP`.
|
||||||
|
*/
|
||||||
|
void ggml_cann_step(ggml_backend_cann_context& ctx, ggml_tensor* dst);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Applies a element-wise operation to two input tensors using the CANN
|
* @brief Applies a element-wise operation to two input tensors using the CANN
|
||||||
* backend.
|
* backend.
|
||||||
|
@ -1358,6 +1358,12 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
|||||||
case GGML_UNARY_OP_ELU:
|
case GGML_UNARY_OP_ELU:
|
||||||
ggml_cann_elu(ctx, dst);
|
ggml_cann_elu(ctx, dst);
|
||||||
break;
|
break;
|
||||||
|
case GGML_UNARY_OP_SGN:
|
||||||
|
GGML_CANN_CALL_UNARY_OP(Sign);
|
||||||
|
break;
|
||||||
|
case GGML_UNARY_OP_STEP:
|
||||||
|
ggml_cann_step(ctx, dst);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -1456,6 +1462,18 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
|
|||||||
case GGML_OP_CONV_TRANSPOSE_1D:
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
||||||
ggml_cann_conv_transpose_1d(ctx, dst);
|
ggml_cann_conv_transpose_1d(ctx, dst);
|
||||||
break;
|
break;
|
||||||
|
case GGML_OP_LOG:
|
||||||
|
GGML_CANN_CALL_UNARY_OP(Log);
|
||||||
|
break;
|
||||||
|
case GGML_OP_MEAN:
|
||||||
|
ggml_cann_mean(ctx, dst);
|
||||||
|
break;
|
||||||
|
case GGML_OP_PAD_REFLECT_1D:
|
||||||
|
ggml_cann_pad_reflect_1d(ctx, dst);
|
||||||
|
break;
|
||||||
|
case GGML_OP_COUNT_EQUAL:
|
||||||
|
ggml_cann_count_equal(ctx, dst);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -1718,6 +1736,8 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|||||||
case GGML_UNARY_OP_TANH:
|
case GGML_UNARY_OP_TANH:
|
||||||
case GGML_UNARY_OP_EXP:
|
case GGML_UNARY_OP_EXP:
|
||||||
case GGML_UNARY_OP_ELU:
|
case GGML_UNARY_OP_ELU:
|
||||||
|
case GGML_UNARY_OP_SGN:
|
||||||
|
case GGML_UNARY_OP_STEP:
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
@ -1854,6 +1874,10 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
|||||||
case GGML_OP_COS:
|
case GGML_OP_COS:
|
||||||
case GGML_OP_SIN:
|
case GGML_OP_SIN:
|
||||||
case GGML_OP_CONV_TRANSPOSE_1D:
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
||||||
|
case GGML_OP_LOG:
|
||||||
|
case GGML_OP_MEAN:
|
||||||
|
case GGML_OP_PAD_REFLECT_1D:
|
||||||
|
case GGML_OP_COUNT_EQUAL:
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user