sycl: Adding additional cpy dbg print output (llama/14034)

This commit is contained in:
Anton Mitkov 2025-06-13 08:51:39 +01:00 committed by Georgi Gerganov
parent 783cf0309f
commit 4ea599afdf
3 changed files with 33 additions and 37 deletions

View File

@ -513,9 +513,9 @@ constexpr size_t ceil_div(const size_t m, const size_t n) {
bool gpu_has_xmx(sycl::device &dev); bool gpu_has_xmx(sycl::device &dev);
template <int N, class T> void debug_print_array(const std::string & prefix, const T array[N]) { template <int N, class T> std::string debug_get_array_str(const std::string & prefix, const T array[N]) {
if (LIKELY(!g_ggml_sycl_debug)) { if (LIKELY(!g_ggml_sycl_debug)) {
return; return "";
} }
std::stringstream ss; std::stringstream ss;
ss << prefix << "=["; ss << prefix << "=[";
@ -526,29 +526,26 @@ template <int N, class T> void debug_print_array(const std::string & prefix, con
ss << array[N - 1]; ss << array[N - 1];
} }
ss << "]"; ss << "]";
GGML_SYCL_DEBUG("%s", ss.str().c_str()); return ss.str();
} }
inline void debug_print_tensor(const std::string & prefix, const ggml_tensor * tensor, inline std::string debug_get_tensor_str(const std::string &prefix,
const std::string & suffix = "") { const ggml_tensor *tensor, const std::string &suffix = "") {
if (LIKELY(!g_ggml_sycl_debug)) { std::stringstream ss;
return; if (LIKELY(!g_ggml_sycl_debug)) { return ss.str(); }
} ss << prefix.c_str() << "=";
GGML_SYCL_DEBUG("%s=", prefix.c_str());
if (tensor) { if (tensor) {
GGML_SYCL_DEBUG("'%s':type=%s", tensor->name, ggml_type_name(tensor->type)); ss << "'" << tensor->name << "':type=" << ggml_type_name(tensor->type);
debug_print_array<GGML_MAX_DIMS>(";ne", tensor->ne); ss << debug_get_array_str<GGML_MAX_DIMS>(";ne", tensor->ne);
debug_print_array<GGML_MAX_DIMS>(";nb", tensor->nb); ss << debug_get_array_str<GGML_MAX_DIMS>(";nb", tensor->nb);
if (!ggml_is_contiguous(tensor)) {
GGML_SYCL_DEBUG(";strided"); if (!ggml_is_contiguous(tensor)) { ss << ";strided"; }
} if (ggml_is_permuted(tensor)) { ss << ";permuted"; }
if (ggml_is_permuted(tensor)) {
GGML_SYCL_DEBUG(";permuted");
}
} else { } else {
GGML_SYCL_DEBUG("nullptr"); ss << "nullptr";
} }
GGML_SYCL_DEBUG("%s", suffix.c_str()); ss << suffix;
return ss.str();
} }
// Use scope_op_debug_print to log operations coming from running a model // Use scope_op_debug_print to log operations coming from running a model
@ -564,10 +561,10 @@ struct scope_op_debug_print {
return; return;
} }
GGML_SYCL_DEBUG("[SYCL][OP] call %s%s:", func.data(), func_suffix.data()); GGML_SYCL_DEBUG("[SYCL][OP] call %s%s:", func.data(), func_suffix.data());
debug_print_tensor(" dst", dst); GGML_SYCL_DEBUG("%s", debug_get_tensor_str(" dst", dst).c_str());
if (dst) { if (dst) {
for (std::size_t i = 0; i < num_src; ++i) { for (std::size_t i = 0; i < num_src; ++i) {
debug_print_tensor("\tsrc" + std::to_string(i), dst->src[i]); GGML_SYCL_DEBUG("%s", debug_get_tensor_str("\tsrc" + std::to_string(i), dst->src[i]).c_str());
} }
} }
GGML_SYCL_DEBUG("%s\n", suffix.data()); GGML_SYCL_DEBUG("%s\n", suffix.data());

View File

@ -723,8 +723,7 @@ static void ggml_cpy_q4_1_q4_1(const char * cx, char * cdst, const int ne, const
void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1) try { void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1) try {
// Unlike other operators ggml_sycl_cpy takes 2 distinct tensors instead of a dst ggml_tensor and rely on its src field // Unlike other operators ggml_sycl_cpy takes 2 distinct tensors instead of a dst ggml_tensor and rely on its src field
scope_op_debug_print scope_dbg_print(__func__, src1, /*num_src=*/0, scope_op_debug_print scope_dbg_print(__func__, src1, /*num_src=*/0, debug_get_tensor_str("\tsrc0", src0));
std::string(" src0 type=") + ggml_type_name(src0->type));
const int64_t ne = ggml_nelements(src0); const int64_t ne = ggml_nelements(src0);
GGML_ASSERT(ne == ggml_nelements(src1)); GGML_ASSERT(ne == ggml_nelements(src1));

View File

@ -347,7 +347,7 @@ static enum ggml_status
ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
ggml_tensor *tensor) try { ggml_tensor *tensor) try {
GGML_SYCL_DEBUG("[SYCL] call %s", __func__); GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
debug_print_tensor(": tensor=", tensor, "\n"); GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor, "\n").c_str());
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context; ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *)buffer->context;
if (tensor->view_src != NULL) { if (tensor->view_src != NULL) {
@ -385,7 +385,7 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer,
const void *data, size_t offset, const void *data, size_t offset,
size_t size) try { size_t size) try {
GGML_SYCL_DEBUG("[SYCL] call %s", __func__); GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
debug_print_tensor(": tensor=", tensor); GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str());
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset); GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context; ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
ggml_sycl_set_device(ctx->device); ggml_sycl_set_device(ctx->device);
@ -413,7 +413,7 @@ static void ggml_backend_sycl_buffer_get_tensor(ggml_backend_buffer_t buffer,
void *data, size_t offset, void *data, size_t offset,
size_t size) try { size_t size) try {
GGML_SYCL_DEBUG("[SYCL] call %s", __func__); GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
debug_print_tensor(": tensor=", tensor); GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str());
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset); GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context; ggml_backend_sycl_buffer_context * ctx = ( ggml_backend_sycl_buffer_context *)buffer->context;
@ -444,8 +444,8 @@ ggml_backend_sycl_buffer_cpy_tensor(ggml_backend_buffer_t buffer,
ggml_tensor *dst) try { ggml_tensor *dst) try {
bool is_cpy_supported = ggml_backend_buffer_is_sycl(src->buffer); bool is_cpy_supported = ggml_backend_buffer_is_sycl(src->buffer);
GGML_SYCL_DEBUG("[SYCL] call %s", __func__); GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
debug_print_tensor(": dst=", dst); GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": dst", dst).c_str());
debug_print_tensor(" src=", src); GGML_SYCL_DEBUG("%s", debug_get_tensor_str(" src", src).c_str());
GGML_SYCL_DEBUG(" is_cpy_supported=%d\n", is_cpy_supported); GGML_SYCL_DEBUG(" is_cpy_supported=%d\n", is_cpy_supported);
if (is_cpy_supported) { if (is_cpy_supported) {
ggml_backend_sycl_buffer_context * src_ctx = (ggml_backend_sycl_buffer_context *)src->buffer->context; ggml_backend_sycl_buffer_context * src_ctx = (ggml_backend_sycl_buffer_context *)src->buffer->context;
@ -525,7 +525,7 @@ catch (sycl::exception const &exc) {
static void ggml_backend_sycl_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value, static void ggml_backend_sycl_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value,
size_t offset, size_t size) { size_t offset, size_t size) {
GGML_SYCL_DEBUG("[SYCL] call %s", __func__); GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
debug_print_tensor(": tensor=", tensor); GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str());
GGML_SYCL_DEBUG(" size=%zu offset=%zu value=%u\n", size, offset, value); GGML_SYCL_DEBUG(" size=%zu offset=%zu value=%u\n", size, offset, value);
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *) buffer->context; ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *) buffer->context;
SYCL_CHECK(ggml_sycl_set_device(ctx->device)); SYCL_CHECK(ggml_sycl_set_device(ctx->device));
@ -805,7 +805,7 @@ static enum ggml_status
ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_backend_sycl_split_buffer_init_tensor(ggml_backend_buffer_t buffer,
ggml_tensor *tensor) try { ggml_tensor *tensor) try {
GGML_SYCL_DEBUG("[SYCL] call %s", __func__); GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
debug_print_tensor(": tensor=", tensor, "\n"); GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor, "\n").c_str());
GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported GGML_ASSERT(tensor->view_src == nullptr); // views of split tensors are not supported
ggml_backend_sycl_split_buffer_context * ctx = (ggml_backend_sycl_split_buffer_context *)buffer->context; ggml_backend_sycl_split_buffer_context * ctx = (ggml_backend_sycl_split_buffer_context *)buffer->context;
@ -891,7 +891,7 @@ ggml_backend_sycl_split_buffer_set_tensor(ggml_backend_buffer_t buffer,
ggml_tensor *tensor, const void *data, ggml_tensor *tensor, const void *data,
size_t offset, size_t size) try { size_t offset, size_t size) try {
GGML_SYCL_DEBUG("[SYCL] call %s", __func__); GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
debug_print_tensor(": tensor=", tensor); GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str());
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset); GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
// split tensors must always be set in their entirety at once // split tensors must always be set in their entirety at once
GGML_ASSERT(offset == 0); GGML_ASSERT(offset == 0);
@ -947,7 +947,7 @@ ggml_backend_sycl_split_buffer_get_tensor(ggml_backend_buffer_t buffer,
const ggml_tensor *tensor, void *data, const ggml_tensor *tensor, void *data,
size_t offset, size_t size) try { size_t offset, size_t size) try {
GGML_SYCL_DEBUG("[SYCL] call %s", __func__); GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
debug_print_tensor(": tensor=", tensor); GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str());
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset); GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
// split tensors must always be set in their entirety at once // split tensors must always be set in their entirety at once
GGML_ASSERT(offset == 0); GGML_ASSERT(offset == 0);
@ -3863,7 +3863,7 @@ static void ggml_backend_sycl_set_tensor_async(ggml_backend_t backend,
const void *data, size_t offset, const void *data, size_t offset,
size_t size) try { size_t size) try {
GGML_SYCL_DEBUG("[SYCL] call %s", __func__); GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
debug_print_tensor(": tensor=", tensor); GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str());
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset); GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context; ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer; ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
@ -3884,7 +3884,7 @@ static void ggml_backend_sycl_get_tensor_async(ggml_backend_t backend,
void *data, size_t offset, void *data, size_t offset,
size_t size) try { size_t size) try {
GGML_SYCL_DEBUG("[SYCL] call %s", __func__); GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
debug_print_tensor(": tensor=", tensor); GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": tensor", tensor).c_str());
GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset); GGML_SYCL_DEBUG(" size=%zu offset=%zu\n", size, offset);
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context; ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer; ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
@ -3907,8 +3907,8 @@ static bool ggml_backend_sycl_cpy_tensor_async(ggml_backend_t backend,
bool is_cpy_supported = dst->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) && bool is_cpy_supported = dst->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) &&
ggml_backend_buffer_is_sycl(src->buffer); ggml_backend_buffer_is_sycl(src->buffer);
GGML_SYCL_DEBUG("[SYCL] call %s", __func__); GGML_SYCL_DEBUG("[SYCL] call %s", __func__);
debug_print_tensor(": dst=", dst); GGML_SYCL_DEBUG("%s", debug_get_tensor_str(": dst", dst).c_str());
debug_print_tensor(" src=", src); GGML_SYCL_DEBUG("%s", debug_get_tensor_str(" src", src).c_str());
GGML_SYCL_DEBUG(" is_cpy_supported=%d\n", is_cpy_supported); GGML_SYCL_DEBUG(" is_cpy_supported=%d\n", is_cpy_supported);
if (is_cpy_supported) { if (is_cpy_supported) {
/* /*