From 730a00be8a067ad65b73fa978314049e2a29165f Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Thu, 22 May 2025 09:24:09 +0100 Subject: [PATCH] SYCL: Avoid using with SYCL-Graph for unsupported nodes (llama/13587) Currently on a CUDA backend to SYCL when running `GGML_SYCL_DISABLE_GRAPH=0 ./bin/test-backend-ops -b SYCL0` there are two operations that throw an exception from the blocking waits during queue recording. * `-o CONCAT` : Use of blocking waits on a queue that's being recorded https://github.com/ggml-org/llama.cpp/blob/master/ggml/src/ggml-sycl/concat.cpp#L185-L187 * `-o MUL_MAT_ID`: Blocking wait on a recording queue for a copy to host memory https://github.com/ggml-org/llama.cpp/blob/master/ggml/src/ggml-sycl/ggml-sycl.cpp#L3072-L3074 We've noticed that `ggml-cuda.cu` has the [check_node_graph_compatibility_and_refresh_copy_ops](https://github.com/ggml-org/llama.cpp/blob/39e73ae0d69f882d7e29cecc6dd8f5052fca6731/ggml/src/ggml-cuda/ggml-cuda.cu#L2458-L2458) method for checking if a graph can be used, even if enabled. I've taken a similar approach in this PR by adding a method to `ggml-sycl.cpp` for checking if a graph can be used for the operations even if a user has asked for it to be enabled. --- ggml/src/ggml-sycl/ggml-sycl.cpp | 34 +++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index d0591978..c2eb618e 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -3809,11 +3809,43 @@ static void ggml_backend_sycl_graph_compute_impl(ggml_backend_sycl_context * syc } } +#ifdef GGML_SYCL_GRAPH +static bool check_graph_compatibility(ggml_cgraph * cgraph) { + if (ggml_sycl_info().device_count > 1) { + // A sycl_ex::command_graph object can only be created for a single device + GGML_LOG_INFO("%s: disabling SYCL graphs due to multiple devices\n", __func__); + return false; + } + + for (int i = 0; i < cgraph->n_nodes; i++) { + const ggml_op node_op = cgraph->nodes[i]->op; + switch (node_op) { + default: + break; + case GGML_OP_CONCAT: + // ggml_sycl_op_concat() does a blocking host wait after memcpy operations, + // but wait() can't be called on the events returned by a queue recording + // to a graph. + [[fallthrough]]; + case GGML_OP_MUL_MAT_ID: + // ggml_sycl_mul_mat_id() does a blocking host wait on the sycl queue after + // submitting a memcpy operation, but wait() can't be called on a queue that + // is recording to a graph. + GGML_LOG_INFO("%s: disabling SYCL graphs due to unsupported node type %s\n", __func__, + ggml_op_name(node_op)); + return false; + } + } + return true; +} +#endif + static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) { auto * sycl_ctx = static_cast(backend->context); #ifdef GGML_SYCL_GRAPH - if (!g_ggml_sycl_disable_graph) { + bool use_sycl_graph = !g_ggml_sycl_disable_graph && check_graph_compatibility(cgraph); + if (use_sycl_graph) { const bool graph_support = dpct::get_device(sycl_ctx->device).has(sycl::aspect::ext_oneapi_limited_graph); if (!graph_support) { GGML_SYCL_DEBUG("[SYCL-GRAPH] can not use graphs on device:%d\n", sycl_ctx->device);