metal : Cache the Metal library at the device context level (llama/12265)

This commit is contained in:
BB-fat 2025-03-11 19:45:02 +08:00 committed by Georgi Gerganov
parent 776cdceb9e
commit 774c519433

View File

@ -46,6 +46,7 @@ static struct ggml_backend_device g_ggml_backend_metal_device;
static struct ggml_backend_metal_device_context { static struct ggml_backend_metal_device_context {
id<MTLDevice> mtl_device; id<MTLDevice> mtl_device;
int mtl_device_ref_count; int mtl_device_ref_count;
id<MTLLibrary> mtl_library;
bool has_simdgroup_reduction; bool has_simdgroup_reduction;
bool has_simdgroup_mm; bool has_simdgroup_mm;
@ -57,6 +58,7 @@ static struct ggml_backend_metal_device_context {
} g_ggml_ctx_dev_main = { } g_ggml_ctx_dev_main = {
/*.mtl_device =*/ nil, /*.mtl_device =*/ nil,
/*.mtl_device_ref_count =*/ 0, /*.mtl_device_ref_count =*/ 0,
/*.mtl_library =*/ nil,
/*.has_simdgroup_reduction =*/ false, /*.has_simdgroup_reduction =*/ false,
/*.has_simdgroup_mm =*/ false, /*.has_simdgroup_mm =*/ false,
/*.has_residency_sets =*/ false, /*.has_residency_sets =*/ false,
@ -108,6 +110,11 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
ctx->mtl_device_ref_count--; ctx->mtl_device_ref_count--;
if (ctx->mtl_device_ref_count == 0) { if (ctx->mtl_device_ref_count == 0) {
if (ctx->mtl_library) {
[ctx->mtl_library release];
ctx->mtl_library = nil;
}
if (ctx->mtl_device) { if (ctx->mtl_device) {
[ctx->mtl_device release]; [ctx->mtl_device release];
ctx->mtl_device = nil; ctx->mtl_device = nil;
@ -495,42 +502,14 @@ static void * ggml_metal_host_malloc(size_t n) {
return data; return data;
} }
static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t dev) { // load library
GGML_LOG_INFO("%s: allocating\n", __func__); //
// - first check if the library is embedded
#if TARGET_OS_OSX && !GGML_METAL_NDEBUG // - then check if the library is in the bundle
// Show all the Metal device instances in the system // - if not found, load the source and compile it
NSArray * devices = MTLCopyAllDevices(); // - if that fails, return NULL
for (id<MTLDevice> device in devices) { static id<MTLLibrary> ggml_metal_load_library(id<MTLDevice> device, bool use_bfloat) {
GGML_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]);
}
[devices release]; // since it was created by a *Copy* C method
#endif
// init context
struct ggml_backend_metal_context * ctx = calloc(1, sizeof(struct ggml_backend_metal_context));
struct ggml_backend_metal_device_context * ctx_dev = dev->context;
id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
GGML_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
ctx->queue = [device newCommandQueue];
if (ctx->queue == nil) {
GGML_LOG_ERROR("%s: error: failed to create command queue\n", __func__);
return NULL;
}
ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
id<MTLLibrary> metal_library = nil; id<MTLLibrary> metal_library = nil;
// load library
//
// - first check if the library is embedded
// - then check if the library is in the bundle
// - if not found, load the source and compile it
// - if that fails, return NULL
{
NSError * error = nil; NSError * error = nil;
NSString * src = nil; NSString * src = nil;
@ -624,7 +603,7 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
// dictionary of preprocessor macros // dictionary of preprocessor macros
NSMutableDictionary * prep = [NSMutableDictionary dictionary]; NSMutableDictionary * prep = [NSMutableDictionary dictionary];
if (ctx_dev->use_bfloat) { if (use_bfloat) {
[prep setObject:@"1" forKey:@"GGML_METAL_USE_BF16"]; [prep setObject:@"1" forKey:@"GGML_METAL_USE_BF16"];
} }
@ -652,6 +631,45 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
#if GGML_METAL_EMBED_LIBRARY #if GGML_METAL_EMBED_LIBRARY
[src release]; [src release];
#endif // GGML_METAL_EMBED_LIBRARY #endif // GGML_METAL_EMBED_LIBRARY
return metal_library;
}
static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t dev) {
GGML_LOG_INFO("%s: allocating\n", __func__);
#if TARGET_OS_OSX && !GGML_METAL_NDEBUG
// Show all the Metal device instances in the system
NSArray * devices = MTLCopyAllDevices();
for (id<MTLDevice> device in devices) {
GGML_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]);
}
[devices release]; // since it was created by a *Copy* C method
#endif
// init context
struct ggml_backend_metal_context * ctx = calloc(1, sizeof(struct ggml_backend_metal_context));
struct ggml_backend_metal_device_context * ctx_dev = dev->context;
id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
GGML_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
ctx->queue = [device newCommandQueue];
if (ctx->queue == nil) {
GGML_LOG_ERROR("%s: error: failed to create command queue\n", __func__);
return NULL;
}
ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
// load library
if (ctx_dev->mtl_library == nil) {
ctx_dev->mtl_library = ggml_metal_load_library(device, ctx_dev->use_bfloat);
}
id<MTLLibrary> metal_library = ctx_dev->mtl_library;
if (metal_library == nil) {
GGML_LOG_ERROR("%s: error: metal library is nil\n", __func__);
return NULL;
} }
// print MTL GPU family: // print MTL GPU family:
@ -725,7 +743,6 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
[metal_function release]; \ [metal_function release]; \
if (error) { \ if (error) { \
GGML_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \ GGML_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
[metal_library release]; \
return NULL; \ return NULL; \
} \ } \
} else { \ } else { \
@ -1044,8 +1061,6 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_POOL_2D_MAX_F32, pool_2d_max_f32, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_POOL_2D_MAX_F32, pool_2d_max_f32, true);
} }
[metal_library release];
return ctx; return ctx;
} }