From 11218294db0811fd39270af627de6a8c51186467 Mon Sep 17 00:00:00 2001 From: Diego Devesa Date: Mon, 21 Apr 2025 18:13:51 +0200 Subject: [PATCH] ggml : add SSE 4.2 and x64 base variant for CPUs without AVX (llama/12871) * ggml : add SSE 4.2 variant for CPUs without AVX * ggml : add x64 base ABI variant --- ggml/CMakeLists.txt | 1 + ggml/src/CMakeLists.txt | 15 +++++++++------ ggml/src/ggml-cpu/CMakeLists.txt | 8 +++++--- ggml/src/ggml-cpu/cpu-feats-x86.cpp | 2 +- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index 438c2a73..61fe15a1 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -107,6 +107,7 @@ message(DEBUG "INS_ENB : ${INS_ENB}") option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF) option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON) option(GGML_CPU_KLEIDIAI "ggml: use KleidiAI optimized kernels if applicable" OFF) +option(GGML_SSE42 "ggml: enable SSE 4.2" ${INS_ENB}) option(GGML_AVX "ggml: enable AVX" ${INS_ENB}) option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF) option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB}) diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index f00700da..43d9fc4f 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -267,6 +267,7 @@ function(ggml_add_cpu_backend_variant tag_name) set(GGML_CPU_TAG_NAME ${tag_name}) # other: OPENMP LLAMAFILE CPU_HBM foreach (feat NATIVE + SSE42 AVX AVX2 BMI2 AVX_VNNI FMA F16C AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8 AMX_BF16) @@ -286,14 +287,16 @@ if (GGML_CPU_ALL_VARIANTS) if (NOT GGML_BACKEND_DL) message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL") endif() - ggml_add_cpu_backend_variant(sandybridge AVX) - ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 BMI2 FMA) - ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 BMI2 FMA AVX512) - ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI) - ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 BMI2 FMA AVX_VNNI) + ggml_add_cpu_backend_variant(x64) + ggml_add_cpu_backend_variant(sse42 SSE42) + ggml_add_cpu_backend_variant(sandybridge SSE42 AVX) + ggml_add_cpu_backend_variant(haswell SSE42 AVX F16C AVX2 BMI2 FMA) + ggml_add_cpu_backend_variant(skylakex SSE42 AVX F16C AVX2 BMI2 FMA AVX512) + ggml_add_cpu_backend_variant(icelake SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI) + ggml_add_cpu_backend_variant(alderlake SSE42 AVX F16C AVX2 BMI2 FMA AVX_VNNI) if (NOT MSVC) # MSVC doesn't support AMX - ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8) + ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8) endif() elseif (GGML_CPU) ggml_add_cpu_backend_variant_impl("") diff --git a/ggml/src/ggml-cpu/CMakeLists.txt b/ggml/src/ggml-cpu/CMakeLists.txt index e73a3b69..6a652738 100644 --- a/ggml/src/ggml-cpu/CMakeLists.txt +++ b/ggml/src/ggml-cpu/CMakeLists.txt @@ -222,7 +222,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name) elseif (GGML_AVX) list(APPEND ARCH_FLAGS /arch:AVX) list(APPEND ARCH_DEFINITIONS GGML_AVX) - else () + elseif (GGML_SSE42) list(APPEND ARCH_FLAGS /arch:SSE4.2) list(APPEND ARCH_DEFINITIONS GGML_SSE42) endif() @@ -237,8 +237,10 @@ function(ggml_add_cpu_backend_variant_impl tag_name) if (GGML_NATIVE) list(APPEND ARCH_FLAGS -march=native) else () - list(APPEND ARCH_FLAGS -msse4.2) - list(APPEND ARCH_DEFINITIONS GGML_SSE42) + if (GGML_SSE42) + list(APPEND ARCH_FLAGS -msse4.2) + list(APPEND ARCH_DEFINITIONS GGML_SSE42) + endif() if (GGML_F16C) list(APPEND ARCH_FLAGS -mf16c) list(APPEND ARCH_DEFINITIONS GGML_F16C) diff --git a/ggml/src/ggml-cpu/cpu-feats-x86.cpp b/ggml/src/ggml-cpu/cpu-feats-x86.cpp index 902ee434..d775a036 100644 --- a/ggml/src/ggml-cpu/cpu-feats-x86.cpp +++ b/ggml/src/ggml-cpu/cpu-feats-x86.cpp @@ -263,7 +263,7 @@ void test_x86_is() { static int ggml_backend_cpu_x86_score() { // FIXME: this does not check for OS support - int score = 0; + int score = 1; cpuid_x86 is; #ifdef GGML_FMA