mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-08-09 13:25:53 +02:00
add amx kernel for gemm (llama/8998)
add intel amx isa detection add vnni kernel for gemv cases add vnni and amx kernel support for block_q8_0 code cleanup fix packing B issue enable openmp fine tune amx kernel switch to aten parallel pattern add error message for nested parallelism code cleanup add f16 support in ggml-amx add amx kernels for QK_K quant formats: Q4_K, Q5_K, Q6_K and IQ4_XS update CMakeList update README fix some compilation warning fix compiler warning when amx is not enabled minor change ggml-ci move ggml_amx_init from ggml.c to ggml-amx/mmq.cpp ggml-ci update CMakeLists with -mamx-tile, -mamx-int8 and -mamx-bf16 ggml-ci add amx as an ggml-backend update header file, the old path for immintrin.h has changed to ggml-cpu-impl.h minor change update CMakeLists.txt minor change apply weight prepacking in set_tensor method in ggml-backend fix compile error ggml-ci minor change ggml-ci update CMakeLists.txt ggml-ci add march dependency minor change ggml-ci change ggml_backend_buffer_is_host to return false for amx backend ggml-ci fix supports_op use device reg for AMX backend ggml-ci minor change ggml-ci minor change fix rebase set .buffer_from_host_ptr to be false for AMX backend
This commit is contained in:
committed by
Georgi Gerganov
parent
28b044dad9
commit
e1936eb2a5
@ -267,6 +267,26 @@ if (GGML_LLAMAFILE)
|
||||
set(GGML_SOURCES_LLAMAFILE llamafile/sgemm.cpp)
|
||||
endif()
|
||||
|
||||
if (GGML_AMX)
|
||||
if (CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 11.0)
|
||||
else()
|
||||
set(GGML_AMX OFF)
|
||||
message(WARNING "AMX requires gcc version > 11.0. Turning off GGML_AMX.")
|
||||
endif()
|
||||
|
||||
if (GGML_AMX)
|
||||
message(STATUS "Using AMX")
|
||||
|
||||
list(APPEND GGML_CDEF_PUBLIC GGML_USE_AMX)
|
||||
|
||||
file(GLOB GGML_HEADERS_AMX "ggml-amx/*.h")
|
||||
list(APPEND GGML_HEADERS_AMX "../include/ggml-amx.h")
|
||||
|
||||
file(GLOB GGML_SOURCES_AMX "ggml-amx/*.cpp")
|
||||
list(APPEND GGML_SOURCES_AMX "ggml-amx.cpp")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (GGML_CUDA)
|
||||
cmake_minimum_required(VERSION 3.18) # for CMAKE_CUDA_ARCHITECTURES
|
||||
|
||||
@ -1180,6 +1200,18 @@ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LW
|
||||
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512BF16__>)
|
||||
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512BF16__>)
|
||||
endif()
|
||||
if (GGML_AMX_TILE)
|
||||
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_TILE__>)
|
||||
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_TILE__>)
|
||||
endif()
|
||||
if (GGML_AMX_INT8)
|
||||
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_INT8__>)
|
||||
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_INT8__>)
|
||||
endif()
|
||||
if (GGML_AMX_BF16)
|
||||
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_BF16__>)
|
||||
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AMX_BF16__>)
|
||||
endif()
|
||||
elseif (GGML_AVX2)
|
||||
list(APPEND ARCH_FLAGS /arch:AVX2)
|
||||
elseif (GGML_AVX)
|
||||
@ -1215,6 +1247,15 @@ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LW
|
||||
if (GGML_AVX512_BF16)
|
||||
list(APPEND ARCH_FLAGS -mavx512bf16)
|
||||
endif()
|
||||
if (GGML_AMX_TILE)
|
||||
list(APPEND ARCH_FLAGS -mamx-tile)
|
||||
endif()
|
||||
if (GGML_AMX_INT8)
|
||||
list(APPEND ARCH_FLAGS -mamx-int8)
|
||||
endif()
|
||||
if (GGML_AMX_BF16)
|
||||
list(APPEND ARCH_FLAGS -mamx-bf16)
|
||||
endif()
|
||||
endif()
|
||||
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
|
||||
message(STATUS "PowerPC detected")
|
||||
@ -1340,6 +1381,7 @@ add_library(ggml
|
||||
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
|
||||
${GGML_SOURCES_BLAS} ${GGML_HEADERS_BLAS}
|
||||
${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
|
||||
${GGML_SOURCES_AMX} ${GGML_HEADERS_AMX}
|
||||
${GGML_SOURCES_CANN} ${GGML_HEADERS_CANN}
|
||||
ggml-aarch64.c ggml-aarch64.h
|
||||
)
|
||||
|
@ -329,7 +329,6 @@ bool ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type
|
||||
if (backend->device) {
|
||||
return ggml_backend_dev_supports_buft(backend->device, buft);
|
||||
}
|
||||
|
||||
return backend->iface.supports_buft(backend, buft);
|
||||
}
|
||||
|
||||
@ -550,6 +549,14 @@ void * ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, const char * na
|
||||
#include "ggml-rpc.h"
|
||||
#endif
|
||||
|
||||
#ifndef __AMX_INT8__
|
||||
#undef GGML_USE_AMX
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_AMX
|
||||
# include "ggml-amx.h"
|
||||
#endif
|
||||
|
||||
struct ggml_backend_registry {
|
||||
std::vector<ggml_backend_reg_t> backends;
|
||||
std::vector<ggml_backend_dev_t> devices;
|
||||
@ -570,6 +577,9 @@ struct ggml_backend_registry {
|
||||
#ifdef GGML_USE_RPC
|
||||
register_backend(ggml_backend_rpc_reg());
|
||||
#endif
|
||||
#ifdef GGML_USE_AMX
|
||||
register_backend(ggml_backend_amx_reg());
|
||||
#endif
|
||||
|
||||
// TODO: sycl, kompute, cann
|
||||
|
||||
|
@ -23219,6 +23219,14 @@ int ggml_cpu_has_avx512_bf16(void) {
|
||||
#endif
|
||||
}
|
||||
|
||||
int ggml_cpu_has_amx_int8(void) {
|
||||
#if defined(__AMX_INT8__)
|
||||
return 1;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
int ggml_cpu_has_fma(void) {
|
||||
#if defined(__FMA__)
|
||||
return 1;
|
||||
|
Reference in New Issue
Block a user