ggml : limit n_threads to the max n_tasks (llama/5238)

This commit is contained in:
slaren 2024-01-31 13:43:03 +01:00 committed by Georgi Gerganov
parent f850a067ed
commit fc7b0e2c28
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735

6
ggml.c
View File

@ -16985,12 +16985,16 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
struct ggml_cplan cplan;
memset(&cplan, 0, sizeof(struct ggml_cplan));
int max_tasks = 1;
// thread scheduling for the different operations + work buffer size estimation
for (int i = 0; i < cgraph->n_nodes; i++) {
struct ggml_tensor * node = cgraph->nodes[i];
const int n_tasks = ggml_get_n_tasks(node, n_threads);
max_tasks = MAX(max_tasks, n_tasks);
size_t cur = 0;
switch (node->op) {
@ -17157,7 +17161,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
work_size += CACHE_LINE_SIZE*(n_threads - 1);
}
cplan.n_threads = n_threads;
cplan.n_threads = MIN(max_tasks, n_threads);
cplan.work_size = work_size;
cplan.work_data = NULL;