From a3899e78afc4ce790886a60d3fb4eae05b8805fe Mon Sep 17 00:00:00 2001 From: uvos Date: Tue, 29 Jul 2025 17:43:43 +0200 Subject: [PATCH] HIP: Ignore unsupported unroll transformation in fattn-vec (llama/14931) llvm with the amdgcn target dose not support unrolling loops with conditional break statements, when those statements can not be resolved at compile time. Similar to other places in GGML lets simply ignore this warning. --- ggml/src/ggml-cuda/fattn-vec-f16.cuh | 9 +++++++++ ggml/src/ggml-cuda/fattn-vec-f32.cuh | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/ggml/src/ggml-cuda/fattn-vec-f16.cuh b/ggml/src/ggml-cuda/fattn-vec-f16.cuh index 10925383..afef815c 100644 --- a/ggml/src/ggml-cuda/fattn-vec-f16.cuh +++ b/ggml/src/ggml-cuda/fattn-vec-f16.cuh @@ -1,6 +1,12 @@ #include "common.cuh" #include "fattn-common.cuh" +// Currenlty llvm with the amdgcn target dose not support unrolling loops +// that contain a break that can not be resolved at compile time. +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wpass-failed" +#endif // __clang__ template // D == head size #ifndef GGML_USE_HIP __launch_bounds__(D, 1) @@ -341,6 +347,9 @@ static __global__ void flash_attn_vec_ext_f16( NO_DEVICE_CODE; #endif // defined(FLASH_ATTN_AVAILABLE) && defined(FP16_AVAILABLE) } +#ifdef __clang__ +#pragma clang diagnostic pop +#endif // __clang__ template void ggml_cuda_flash_attn_ext_vec_f16_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { diff --git a/ggml/src/ggml-cuda/fattn-vec-f32.cuh b/ggml/src/ggml-cuda/fattn-vec-f32.cuh index 2cf2e408..3595e296 100644 --- a/ggml/src/ggml-cuda/fattn-vec-f32.cuh +++ b/ggml/src/ggml-cuda/fattn-vec-f32.cuh @@ -1,6 +1,12 @@ #include "common.cuh" #include "fattn-common.cuh" +// Currenlty llvm with the amdgcn target dose not support unrolling loops +// that contain a break that can not be resolved at compile time. +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wpass-failed" +#endif // __clang__ template // D == head size #ifndef GGML_USE_HIP __launch_bounds__(D, 1) @@ -336,6 +342,9 @@ static __global__ void flash_attn_vec_ext_f32( NO_DEVICE_CODE; #endif // FLASH_ATTN_AVAILABLE } +#ifdef __clang__ +#pragma clang diagnostic pop +#endif // __clang__ template void ggml_cuda_flash_attn_ext_vec_f32_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {