mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-06-13 21:26:41 +02:00
metal : Cache the Metal library at the device context level (llama/12265)
This commit is contained in:
parent
776cdceb9e
commit
774c519433
@ -46,6 +46,7 @@ static struct ggml_backend_device g_ggml_backend_metal_device;
|
|||||||
static struct ggml_backend_metal_device_context {
|
static struct ggml_backend_metal_device_context {
|
||||||
id<MTLDevice> mtl_device;
|
id<MTLDevice> mtl_device;
|
||||||
int mtl_device_ref_count;
|
int mtl_device_ref_count;
|
||||||
|
id<MTLLibrary> mtl_library;
|
||||||
|
|
||||||
bool has_simdgroup_reduction;
|
bool has_simdgroup_reduction;
|
||||||
bool has_simdgroup_mm;
|
bool has_simdgroup_mm;
|
||||||
@ -57,6 +58,7 @@ static struct ggml_backend_metal_device_context {
|
|||||||
} g_ggml_ctx_dev_main = {
|
} g_ggml_ctx_dev_main = {
|
||||||
/*.mtl_device =*/ nil,
|
/*.mtl_device =*/ nil,
|
||||||
/*.mtl_device_ref_count =*/ 0,
|
/*.mtl_device_ref_count =*/ 0,
|
||||||
|
/*.mtl_library =*/ nil,
|
||||||
/*.has_simdgroup_reduction =*/ false,
|
/*.has_simdgroup_reduction =*/ false,
|
||||||
/*.has_simdgroup_mm =*/ false,
|
/*.has_simdgroup_mm =*/ false,
|
||||||
/*.has_residency_sets =*/ false,
|
/*.has_residency_sets =*/ false,
|
||||||
@ -108,6 +110,11 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
|
|||||||
ctx->mtl_device_ref_count--;
|
ctx->mtl_device_ref_count--;
|
||||||
|
|
||||||
if (ctx->mtl_device_ref_count == 0) {
|
if (ctx->mtl_device_ref_count == 0) {
|
||||||
|
if (ctx->mtl_library) {
|
||||||
|
[ctx->mtl_library release];
|
||||||
|
ctx->mtl_library = nil;
|
||||||
|
}
|
||||||
|
|
||||||
if (ctx->mtl_device) {
|
if (ctx->mtl_device) {
|
||||||
[ctx->mtl_device release];
|
[ctx->mtl_device release];
|
||||||
ctx->mtl_device = nil;
|
ctx->mtl_device = nil;
|
||||||
@ -495,42 +502,14 @@ static void * ggml_metal_host_malloc(size_t n) {
|
|||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t dev) {
|
// load library
|
||||||
GGML_LOG_INFO("%s: allocating\n", __func__);
|
//
|
||||||
|
// - first check if the library is embedded
|
||||||
#if TARGET_OS_OSX && !GGML_METAL_NDEBUG
|
// - then check if the library is in the bundle
|
||||||
// Show all the Metal device instances in the system
|
// - if not found, load the source and compile it
|
||||||
NSArray * devices = MTLCopyAllDevices();
|
// - if that fails, return NULL
|
||||||
for (id<MTLDevice> device in devices) {
|
static id<MTLLibrary> ggml_metal_load_library(id<MTLDevice> device, bool use_bfloat) {
|
||||||
GGML_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]);
|
|
||||||
}
|
|
||||||
[devices release]; // since it was created by a *Copy* C method
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// init context
|
|
||||||
struct ggml_backend_metal_context * ctx = calloc(1, sizeof(struct ggml_backend_metal_context));
|
|
||||||
struct ggml_backend_metal_device_context * ctx_dev = dev->context;
|
|
||||||
|
|
||||||
id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
|
|
||||||
GGML_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
|
|
||||||
|
|
||||||
ctx->queue = [device newCommandQueue];
|
|
||||||
if (ctx->queue == nil) {
|
|
||||||
GGML_LOG_ERROR("%s: error: failed to create command queue\n", __func__);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
|
|
||||||
|
|
||||||
id<MTLLibrary> metal_library = nil;
|
id<MTLLibrary> metal_library = nil;
|
||||||
|
|
||||||
// load library
|
|
||||||
//
|
|
||||||
// - first check if the library is embedded
|
|
||||||
// - then check if the library is in the bundle
|
|
||||||
// - if not found, load the source and compile it
|
|
||||||
// - if that fails, return NULL
|
|
||||||
{
|
|
||||||
NSError * error = nil;
|
NSError * error = nil;
|
||||||
NSString * src = nil;
|
NSString * src = nil;
|
||||||
|
|
||||||
@ -624,7 +603,7 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
|
|||||||
// dictionary of preprocessor macros
|
// dictionary of preprocessor macros
|
||||||
NSMutableDictionary * prep = [NSMutableDictionary dictionary];
|
NSMutableDictionary * prep = [NSMutableDictionary dictionary];
|
||||||
|
|
||||||
if (ctx_dev->use_bfloat) {
|
if (use_bfloat) {
|
||||||
[prep setObject:@"1" forKey:@"GGML_METAL_USE_BF16"];
|
[prep setObject:@"1" forKey:@"GGML_METAL_USE_BF16"];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -652,6 +631,45 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
|
|||||||
#if GGML_METAL_EMBED_LIBRARY
|
#if GGML_METAL_EMBED_LIBRARY
|
||||||
[src release];
|
[src release];
|
||||||
#endif // GGML_METAL_EMBED_LIBRARY
|
#endif // GGML_METAL_EMBED_LIBRARY
|
||||||
|
|
||||||
|
return metal_library;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t dev) {
|
||||||
|
GGML_LOG_INFO("%s: allocating\n", __func__);
|
||||||
|
|
||||||
|
#if TARGET_OS_OSX && !GGML_METAL_NDEBUG
|
||||||
|
// Show all the Metal device instances in the system
|
||||||
|
NSArray * devices = MTLCopyAllDevices();
|
||||||
|
for (id<MTLDevice> device in devices) {
|
||||||
|
GGML_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]);
|
||||||
|
}
|
||||||
|
[devices release]; // since it was created by a *Copy* C method
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// init context
|
||||||
|
struct ggml_backend_metal_context * ctx = calloc(1, sizeof(struct ggml_backend_metal_context));
|
||||||
|
struct ggml_backend_metal_device_context * ctx_dev = dev->context;
|
||||||
|
|
||||||
|
id<MTLDevice> device = ggml_backend_metal_device_acq(ctx_dev);
|
||||||
|
GGML_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
|
||||||
|
|
||||||
|
ctx->queue = [device newCommandQueue];
|
||||||
|
if (ctx->queue == nil) {
|
||||||
|
GGML_LOG_ERROR("%s: error: failed to create command queue\n", __func__);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
|
||||||
|
|
||||||
|
// load library
|
||||||
|
if (ctx_dev->mtl_library == nil) {
|
||||||
|
ctx_dev->mtl_library = ggml_metal_load_library(device, ctx_dev->use_bfloat);
|
||||||
|
}
|
||||||
|
id<MTLLibrary> metal_library = ctx_dev->mtl_library;
|
||||||
|
if (metal_library == nil) {
|
||||||
|
GGML_LOG_ERROR("%s: error: metal library is nil\n", __func__);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
// print MTL GPU family:
|
// print MTL GPU family:
|
||||||
@ -725,7 +743,6 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
|
|||||||
[metal_function release]; \
|
[metal_function release]; \
|
||||||
if (error) { \
|
if (error) { \
|
||||||
GGML_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
|
GGML_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
|
||||||
[metal_library release]; \
|
|
||||||
return NULL; \
|
return NULL; \
|
||||||
} \
|
} \
|
||||||
} else { \
|
} else { \
|
||||||
@ -1044,8 +1061,6 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
|
|||||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_POOL_2D_MAX_F32, pool_2d_max_f32, true);
|
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_POOL_2D_MAX_F32, pool_2d_max_f32, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
[metal_library release];
|
|
||||||
|
|
||||||
return ctx;
|
return ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user