From a221288dc6f0f8642ae9138f68d2b266d78cf811 Mon Sep 17 00:00:00 2001 From: Jeff Bolz Date: Wed, 14 May 2025 13:15:50 +0900 Subject: [PATCH] vulkan: workaround FA compile failures on macos (llama/13517) --- ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp b/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp index e6545160..16835576 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp @@ -12,6 +12,7 @@ layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; +layout (constant_id = 0) const uint32_t WorkGroupSize = 128; layout (constant_id = 1) const uint32_t Br = 1; layout (constant_id = 2) const uint32_t Bc = 32; layout (constant_id = 3) const uint32_t D = 32; @@ -19,7 +20,7 @@ layout (constant_id = 3) const uint32_t D = 32; layout (constant_id = 5) const uint32_t D_split = 16; const uint32_t D_per_thread = D / D_split; -const uint32_t cols_per_iter = gl_WorkGroupSize.x / D_split; +const uint32_t cols_per_iter = WorkGroupSize / D_split; const uint32_t cols_per_thread = Bc / cols_per_iter; layout (push_constant) uniform parameter { @@ -134,8 +135,8 @@ ACC_TYPE perElemOpComputeSlope(const in uint32_t r, const in uint32_t c, const i return ACC_TYPE(pow(base, ACC_TYPE(exph))); } -shared FLOAT_TYPE tmpsh[gl_WorkGroupSize.x]; -shared vec4 tmpshv4[gl_WorkGroupSize.x]; +shared FLOAT_TYPE tmpsh[WorkGroupSize]; +shared vec4 tmpshv4[WorkGroupSize]; shared float masksh[Bc][Br]; shared vec4 Qf[Br][D / 4];