mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-01-01 03:28:57 +01:00
ggml : fix running tasks with variable number of threads
This commit is contained in:
parent
74ffa14e1d
commit
6394c906af
20
ggml.c
20
ggml.c
@ -4745,7 +4745,7 @@ static void ggml_compute_forward_mul_mat_f16_f32(
|
||||
// TODO: do not support transposed src1
|
||||
assert(nb10/2 == sizeof(ggml_fp16_t));
|
||||
|
||||
// parallelize by src0 rows using ggml_vec_dot_f32
|
||||
// parallelize by src0 rows using ggml_vec_dot_f16
|
||||
|
||||
// total rows in src0
|
||||
const int nr = ne01*ne02*ne03;
|
||||
@ -4773,7 +4773,7 @@ static void ggml_compute_forward_mul_mat_f16_f32(
|
||||
const int i3 = i03;
|
||||
|
||||
ggml_fp16_t * src0_row = (ggml_fp16_t *) ((char *) src0->data + (i01*nb01 + i02*nb02 + i03*nb03));
|
||||
ggml_fp16_t * src1_col = wdata + (i13*ne12*ne11 + i12*ne11 + 0)*ne00;
|
||||
ggml_fp16_t * src1_col = wdata + ( 0 + i12*ne11 + i13*ne12*ne11)*ne00;
|
||||
|
||||
float * dst_col = (float *) ((char *) dst->data + (i0*nb0 + 0*nb1 + i2*nb2 + i3*nb3));
|
||||
|
||||
@ -7142,7 +7142,9 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
||||
}
|
||||
|
||||
if (state->node) {
|
||||
ggml_compute_forward(&state->params, state->node);
|
||||
if (state->params.ith < state->params.nth) {
|
||||
ggml_compute_forward(&state->params, state->node);
|
||||
}
|
||||
state->node = NULL;
|
||||
} else {
|
||||
break;
|
||||
@ -7236,9 +7238,15 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
||||
} break;
|
||||
case GGML_OP_MUL_MAT:
|
||||
{
|
||||
// TODO: use different scheduling for different matrix sizes
|
||||
node->n_tasks = n_threads;
|
||||
|
||||
// TODO: use different scheduling for different matrix sizes
|
||||
//const int nr0 = ggml_nrows(node->src0);
|
||||
//const int nr1 = ggml_nrows(node->src1);
|
||||
|
||||
//node->n_tasks = MIN(n_threads, MAX(1, nr0/128));
|
||||
//printf("nr0 = %8d, nr1 = %8d, nr0*nr1 = %8d, n_tasks = %d\n", nr0, nr1, nr0*nr1, node->n_tasks);
|
||||
|
||||
size_t cur = 0;
|
||||
|
||||
// TODO: better way to determine if the matrix is transposed
|
||||
@ -7422,7 +7430,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
||||
workers[j].params = (struct ggml_compute_params) {
|
||||
.type = GGML_TASK_COMPUTE,
|
||||
.ith = j + 1,
|
||||
.nth = n_threads,
|
||||
.nth = node->n_tasks,
|
||||
.wsize = cgraph->work ? ggml_nbytes(cgraph->work) : 0,
|
||||
.wdata = cgraph->work ? cgraph->work->data : NULL,
|
||||
};
|
||||
@ -7477,7 +7485,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
||||
workers[j].params = (struct ggml_compute_params) {
|
||||
.type = GGML_TASK_FINALIZE,
|
||||
.ith = j + 1,
|
||||
.nth = n_threads,
|
||||
.nth = node->n_tasks,
|
||||
.wsize = cgraph->work ? ggml_nbytes(cgraph->work) : 0,
|
||||
.wdata = cgraph->work ? cgraph->work->data : NULL,
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user