mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-04-03 07:40:54 +02:00
vulkan: fix mul_mat_vec failure in backend tests (llama/12529)
The OOB calculation could be wrong if the last iteration was during one of the unrolled loops. Adjust the unrolling counts to avoid this. Add a couple new backend tests that hit this failure on NVIDIA GPUs.
This commit is contained in:
parent
388ed98220
commit
a2cc8c2666
@ -105,6 +105,16 @@ void compute_outputs(const uint32_t first_row, const uint32_t num_rows) {
|
||||
int unroll_count = 4;
|
||||
uint unrolled_iters = num_iters & ~(unroll_count - 1);
|
||||
|
||||
#if K_PER_ITER == 2
|
||||
// If the K dimension is odd, we need lastiter==true on the last iteration
|
||||
// so OOB is computed correctly. Skip some unrolling to make that happen.
|
||||
if ((p.ncols & 1) != 0 &&
|
||||
unrolled_iters == num_iters &&
|
||||
unrolled_iters > 0) {
|
||||
unrolled_iters -= unroll_count;
|
||||
}
|
||||
#endif
|
||||
|
||||
uint i = 0;
|
||||
while (i < unrolled_iters) {
|
||||
// Manually partially unroll the loop
|
||||
@ -113,8 +123,18 @@ void compute_outputs(const uint32_t first_row, const uint32_t num_rows) {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
unroll_count = 2;
|
||||
unrolled_iters = num_iters & ~(unroll_count - 1);
|
||||
|
||||
#if K_PER_ITER == 2
|
||||
if ((p.ncols & 1) != 0 &&
|
||||
unrolled_iters == num_iters &&
|
||||
unrolled_iters > 0) {
|
||||
unrolled_iters -= unroll_count;
|
||||
}
|
||||
#endif
|
||||
|
||||
while (i < unrolled_iters) {
|
||||
// Manually partially unroll the loop
|
||||
[[unroll]] for (uint k = 0; k < unroll_count; ++k) {
|
||||
|
Loading…
Reference in New Issue
Block a user