From 9d7cb80f04eea4c73b6aa6a26e919d76e4e0e3ac Mon Sep 17 00:00:00 2001 From: Acly Date: Tue, 1 Jul 2025 09:11:00 +0200 Subject: [PATCH] ggml-cpu : "align corners" for bilinear upscale/downscale (ggml/1285) * add "align corners" mode for bilinear upscale, and allow downscaling * add ggml_interpolate, deprecate ggml_upscale_ext, pass in align-corners as bit-flag * test-backend-ops: replace ggml_upscale_ext with ggml_interpolate, add test cases for downscale and align-corners --- ggml/include/ggml.h | 22 ++++++++++++++++++++-- ggml/src/ggml-cpu/ops.cpp | 19 ++++++++++++------- ggml/src/ggml.c | 39 ++++++++++++++++++++++++--------------- 3 files changed, 56 insertions(+), 24 deletions(-) diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 9c4e2402..4231a668 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -1765,6 +1765,12 @@ extern "C" { enum ggml_scale_mode { GGML_SCALE_MODE_NEAREST = 0, GGML_SCALE_MODE_BILINEAR = 1, + + GGML_SCALE_MODE_COUNT + }; + + enum ggml_scale_flag { + GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8) }; // interpolate @@ -1777,14 +1783,26 @@ extern "C" { // interpolate // interpolate scale to specified dimensions - GGML_API struct ggml_tensor * ggml_upscale_ext( + GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext( struct ggml_context * ctx, struct ggml_tensor * a, int ne0, int ne1, int ne2, int ne3, - enum ggml_scale_mode mode); + enum ggml_scale_mode mode), + "use ggml_interpolate instead"); + + // Up- or downsamples the input to the specified size. + // 2D scale modes (eg. bilinear) are applied to the first two dimensions. + GGML_API struct ggml_tensor * ggml_interpolate( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3, + uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...] // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0] GGML_API struct ggml_tensor * ggml_pad( diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp index eff4a53e..f6ebb4ea 100644 --- a/ggml/src/ggml-cpu/ops.cpp +++ b/ggml/src/ggml-cpu/ops.cpp @@ -6608,12 +6608,13 @@ static void ggml_compute_forward_upscale_f32( GGML_TENSOR_UNARY_OP_LOCALS - const float sf0 = (float)ne0/src0->ne[0]; - const float sf1 = (float)ne1/src0->ne[1]; - const float sf2 = (float)ne2/src0->ne[2]; - const float sf3 = (float)ne3/src0->ne[3]; + float sf0 = (float)ne0/src0->ne[0]; + float sf1 = (float)ne1/src0->ne[1]; + float sf2 = (float)ne2/src0->ne[2]; + float sf3 = (float)ne3/src0->ne[3]; - const ggml_scale_mode mode = (ggml_scale_mode) ggml_get_op_params_i32(dst, 0); + const int32_t mode_flags = ggml_get_op_params_i32(dst, 0); + const ggml_scale_mode mode = (ggml_scale_mode) (mode_flags & 0xFF); if (mode == GGML_SCALE_MODE_NEAREST) { for (int64_t i3 = 0; i3 < ne3; i3++) { @@ -6634,8 +6635,12 @@ static void ggml_compute_forward_upscale_f32( } } } else if (mode == GGML_SCALE_MODE_BILINEAR) { - // setting a pixel offset of 0 would replicate the behavior of pytorch interpolate with align_corners=True - const float pixel_offset = 0.5f; + float pixel_offset = 0.5f; + if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) { + pixel_offset = 0.0f; + sf0 = (float)(ne0 - 1) / (src0->ne[0] - 1); + sf1 = (float)(ne1 - 1) / (src0->ne[1] - 1); + } for (int64_t i3 = 0; i3 < ne3; i3++) { const int64_t i03 = i3 / sf3; diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index f8e7c595..354a8051 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -4247,24 +4247,21 @@ struct ggml_tensor * ggml_pool_2d_back( return result; } -// ggml_upscale +// ggml_upscale / ggml_interpolate -static struct ggml_tensor * ggml_upscale_impl( +static struct ggml_tensor * ggml_interpolate_impl( struct ggml_context * ctx, struct ggml_tensor * a, - int ne0, - int ne1, - int ne2, - int ne3, - enum ggml_scale_mode mode) { - GGML_ASSERT(a->ne[0] <= ne0); - GGML_ASSERT(a->ne[1] <= ne1); - GGML_ASSERT(a->ne[2] <= ne2); - GGML_ASSERT(a->ne[3] <= ne3); - + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3, + uint32_t mode) { + GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT); + struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3); - ggml_set_op_params_i32(result, 0, mode); + ggml_set_op_params_i32(result, 0, (int32_t)mode); result->op = GGML_OP_UPSCALE; result->src[0] = a; @@ -4277,7 +4274,8 @@ struct ggml_tensor * ggml_upscale( struct ggml_tensor * a, int scale_factor, enum ggml_scale_mode mode) { - return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode); + GGML_ASSERT(scale_factor > 1); + return ggml_interpolate_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode); } struct ggml_tensor * ggml_upscale_ext( @@ -4288,7 +4286,18 @@ struct ggml_tensor * ggml_upscale_ext( int ne2, int ne3, enum ggml_scale_mode mode) { - return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3, mode); + return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode); +} + +struct ggml_tensor * ggml_interpolate( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3, + uint32_t mode) { + return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode); } // ggml_pad