mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-08-14 17:08:41 +02:00
Merge branch 'master' into metal-and-alloc
This commit is contained in:
14
ggml.c
14
ggml.c
@ -17283,10 +17283,18 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|||||||
} else {
|
} else {
|
||||||
// wait for other threads to finish
|
// wait for other threads to finish
|
||||||
const int last = node_n;
|
const int last = node_n;
|
||||||
do {
|
while (true) {
|
||||||
//sched_yield();
|
// TODO: this sched_yield can have significant impact on the performance - either positive or negative
|
||||||
|
// depending on the workload and the operating system.
|
||||||
|
// since it is not clear what is the best approach, it should potentially become user-configurable
|
||||||
|
// ref: https://github.com/ggerganov/ggml/issues/291
|
||||||
|
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
||||||
|
sched_yield();
|
||||||
|
#endif
|
||||||
|
|
||||||
node_n = atomic_load(&state->shared->node_n);
|
node_n = atomic_load(&state->shared->node_n);
|
||||||
} while (node_n == last);
|
if (node_n != last) break;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if we should stop
|
// check if we should stop
|
||||||
|
@ -5332,7 +5332,8 @@ WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) {
|
|||||||
// b: N*N*sizeof(float)
|
// b: N*N*sizeof(float)
|
||||||
// c: N*N*sizeof(float)
|
// c: N*N*sizeof(float)
|
||||||
// when F16 is used, there is an extra work buffer of size N*N*sizeof(float)
|
// when F16 is used, there is an extra work buffer of size N*N*sizeof(float)
|
||||||
std::vector<char> buf(4llu*N_max*N_max*sizeof(float) + 4*512);
|
std::vector<uint8_t> buf(3llu*N_max*N_max*sizeof(float) + 3*ggml_tensor_overhead());
|
||||||
|
std::vector<uint8_t> work;
|
||||||
|
|
||||||
// put a bunch of random data in the buffer
|
// put a bunch of random data in the buffer
|
||||||
for (size_t i = 0; i < buf.size(); i++) buf[i] = i;
|
for (size_t i = 0; i < buf.size(); i++) buf[i] = i;
|
||||||
@ -5387,12 +5388,12 @@ WHISPER_API const char * whisper_bench_ggml_mul_mat_str(int n_threads) {
|
|||||||
double tsum = 0.0;
|
double tsum = 0.0;
|
||||||
|
|
||||||
// heat-up
|
// heat-up
|
||||||
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
|
ggml_graph_compute_helper(work, &gf, n_threads);
|
||||||
|
|
||||||
for (int i = 0; i < n_max; ++i) {
|
for (int i = 0; i < n_max; ++i) {
|
||||||
const int64_t t0 = ggml_time_us();
|
const int64_t t0 = ggml_time_us();
|
||||||
|
|
||||||
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
|
ggml_graph_compute_helper(work, &gf, n_threads);
|
||||||
|
|
||||||
const int64_t t1 = ggml_time_us();
|
const int64_t t1 = ggml_time_us();
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user