diff --git a/ggml/include/ggml-cpu.h b/ggml/include/ggml-cpu.h index 3aa71bad..d23c6b26 100644 --- a/ggml/include/ggml-cpu.h +++ b/ggml/include/ggml-cpu.h @@ -8,7 +8,7 @@ extern "C" { #endif // the compute plan that needs to be prepared for ggml_graph_compute() - // since https://github.com/ggerganov/ggml/issues/287 + // since https://github.com/ggml-org/ggml/issues/287 struct ggml_cplan { size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()` uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()` diff --git a/ggml/include/ggml-metal.h b/ggml/include/ggml-metal.h index 669c1f84..a6106944 100644 --- a/ggml/include/ggml-metal.h +++ b/ggml/include/ggml-metal.h @@ -45,7 +45,7 @@ GGML_BACKEND_API bool ggml_backend_is_metal(ggml_backend_t backend); GGML_DEPRECATED( GGML_BACKEND_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size), - "obsoleted by the new device interface - https://github.com/ggerganov/llama.cpp/pull/9713"); + "obsoleted by the new device interface - https://github.com/ggml-org/llama.cpp/pull/9713"); GGML_BACKEND_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data); diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c index 0cbf8318..dbef5df2 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.c +++ b/ggml/src/ggml-cpu/ggml-cpu.c @@ -1816,7 +1816,7 @@ inline static float ggml_silu_f32(float x) { #if __FINITE_MATH_ONLY__ #error "some routines in ggml.c require non-finite math arithmetics -- pass -fno-finite-math-only to the compiler to fix" -#error "ref: https://github.com/ggerganov/llama.cpp/pull/7154#issuecomment-2143844461" +#error "ref: https://github.com/ggml-org/llama.cpp/pull/7154#issuecomment-2143844461" #endif #if defined(__ARM_NEON) && defined(__aarch64__) @@ -7574,7 +7574,7 @@ UseGgmlGemm2:; int64_t nchunk1 = (nr1 + chunk_size - 1) / chunk_size; // If the chunking is poor for the number of threads on this setup, scrap the whole plan. Re-chunk it by thread. - // Also, chunking by thread was measured to have perform better on NUMA systems. See https://github.com/ggerganov/llama.cpp/pull/6915 + // Also, chunking by thread was measured to have perform better on NUMA systems. See https://github.com/ggml-org/llama.cpp/pull/6915 // In theory, chunking should be just as useful on NUMA and non NUMA systems, but testing disagreed with that. if (nchunk0 * nchunk1 < nth * 4 || ggml_is_numa()) { // distribute the thread work across the inner or outer loop based on which one is larger diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m index 944d90af..0add6b51 100644 --- a/ggml/src/ggml-metal/ggml-metal.m +++ b/ggml/src/ggml-metal/ggml-metal.m @@ -1983,7 +1983,7 @@ static void ggml_metal_encode_node( const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2); // TODO: add ggml_metal_kargs struct - // TODO: optimize (see https://github.com/ggerganov/llama.cpp/pull/10238/commits/7941b6b9ec29a2866fec6fa6c51612515ca509f6) + // TODO: optimize (see https://github.com/ggml-org/llama.cpp/pull/10238/commits/7941b6b9ec29a2866fec6fa6c51612515ca509f6) [encoder setComputePipelineState:pipeline]; [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; if (id_src1) { diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal index 44f04c90..da415184 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -1058,7 +1058,7 @@ kernel void kernel_soft_max( } // This barrier fixes a failing test - // ref: https://github.com/ggerganov/ggml/pull/621#discussion_r1425156335 + // ref: https://github.com/ggml-org/ggml/pull/621#discussion_r1425156335 threadgroup_barrier(mem_flags::mem_none); float sum = simd_sum(lsum); @@ -1163,7 +1163,7 @@ kernel void kernel_soft_max_4( const float lsum = lsum4[0] + lsum4[1] + lsum4[2] + lsum4[3]; // This barrier fixes a failing test - // ref: https://github.com/ggerganov/ggml/pull/621#discussion_r1425156335 + // ref: https://github.com/ggml-org/ggml/pull/621#discussion_r1425156335 threadgroup_barrier(mem_flags::mem_none); float sum = simd_sum(lsum);