metal : Cache the Metal library at the device context level (llama/12265)

2025-06-13 21:26:41 +02:00 · 2025-03-11 19:45:02 +08:00 · 2025-03-11 19:45:02 +08:00 · 774c519433
commit 774c519433
parent 776cdceb9e
1 changed files with 147 additions and 132 deletions
--- a/ggml/src/ggml-metal/ggml-metal.m
+++ b/ggml/src/ggml-metal/ggml-metal.m
@ -46,6 +46,7 @@ static struct ggml_backend_device g_ggml_backend_metal_device;
 static struct ggml_backend_metal_device_context {
    id<MTLDevice> mtl_device;
    int           mtl_device_ref_count;
    id<MTLLibrary> mtl_library;
    bool has_simdgroup_reduction;
    bool has_simdgroup_mm;
@ -57,6 +58,7 @@ static struct ggml_backend_metal_device_context {
 } g_ggml_ctx_dev_main = {
    /*.mtl_device              =*/ nil,
    /*.mtl_device_ref_count    =*/ 0,
    /*.mtl_library             =*/ nil,
    /*.has_simdgroup_reduction =*/ false,
    /*.has_simdgroup_mm        =*/ false,
    /*.has_residency_sets      =*/ false,
@ -108,6 +110,11 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
    ctx->mtl_device_ref_count--;
    if (ctx->mtl_device_ref_count == 0) {
        if (ctx->mtl_library) {
            [ctx->mtl_library release];
            ctx->mtl_library = nil;
        }
        if (ctx->mtl_device) {
            [ctx->mtl_device release];
            ctx->mtl_device = nil;
@ -495,42 +502,14 @@ static void * ggml_metal_host_malloc(size_t n) {
    return data;
 }
-static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t dev) {
+// load library
-    GGML_LOG_INFO("%s: allocating\n", __func__);
+//
-
+// - first check if the library is embedded
-#if TARGET_OS_OSX && !GGML_METAL_NDEBUG
+// - then check if the library is in the bundle
-    // Show all the Metal device instances in the system
+// - if not found, load the source and compile it
-    NSArray * devices = MTLCopyAllDevices();
+// - if that fails, return NULL
-    for (id<MTLDevice> device in devices) {
+static id<MTLLibrary> ggml_metal_load_library(id<MTLDevice> device, bool use_bfloat) {
        GGML_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]);
    }
    [devices release]; // since it was created by a *Copy* C method
 #endif
    // init context
    struct ggml_backend_metal_context * ctx = calloc(1, sizeof(struct ggml_backend_metal_context));
    struct ggml_backend_metal_device_context * ctx_dev = dev->context;
    id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
    GGML_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
    ctx->queue  = [device newCommandQueue];
    if (ctx->queue == nil) {
        GGML_LOG_ERROR("%s: error: failed to create command queue\n", __func__);
        return NULL;
    }
    ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
    id<MTLLibrary> metal_library = nil;
    // load library
    //
    // - first check if the library is embedded
    // - then check if the library is in the bundle
    // - if not found, load the source and compile it
    // - if that fails, return NULL
    {
    NSError * error = nil;
    NSString * src = nil;
@ -624,7 +603,7 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
            // dictionary of preprocessor macros
            NSMutableDictionary * prep = [NSMutableDictionary dictionary];
-                if (ctx_dev->use_bfloat) {
+            if (use_bfloat) {
                [prep setObject:@"1" forKey:@"GGML_METAL_USE_BF16"];
            }
@ -652,6 +631,45 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
 #if GGML_METAL_EMBED_LIBRARY
    [src release];
 #endif // GGML_METAL_EMBED_LIBRARY
    return metal_library;
 }
 static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t dev) {
    GGML_LOG_INFO("%s: allocating\n", __func__);
 #if TARGET_OS_OSX && !GGML_METAL_NDEBUG
    // Show all the Metal device instances in the system
    NSArray * devices = MTLCopyAllDevices();
    for (id<MTLDevice> device in devices) {
        GGML_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]);
    }
    [devices release]; // since it was created by a *Copy* C method
 #endif
    // init context
    struct ggml_backend_metal_context * ctx = calloc(1, sizeof(struct ggml_backend_metal_context));
    struct ggml_backend_metal_device_context * ctx_dev = dev->context;
    id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
    GGML_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
    ctx->queue  = [device newCommandQueue];
    if (ctx->queue == nil) {
        GGML_LOG_ERROR("%s: error: failed to create command queue\n", __func__);
        return NULL;
    }
    ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
    // load library
    if (ctx_dev->mtl_library == nil) {
        ctx_dev->mtl_library = ggml_metal_load_library(device, ctx_dev->use_bfloat);
    }
    id<MTLLibrary> metal_library = ctx_dev->mtl_library;
    if (metal_library == nil) {
        GGML_LOG_ERROR("%s: error: metal library is nil\n", __func__);
        return NULL;
    }
    // print MTL GPU family:
@ -725,7 +743,6 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
            [metal_function release]; \
            if (error) { \
                GGML_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
                [metal_library release]; \
                return NULL; \
            } \
        } else { \
@ -1044,8 +1061,6 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
        GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_POOL_2D_MAX_F32,               pool_2d_max_f32,                true);
    }
    [metal_library release];
    return ctx;
 }