From 483eecae622b4321cda3877249893552abb5524a Mon Sep 17 00:00:00 2001 From: hipudding Date: Tue, 15 Apr 2025 19:08:55 +0800 Subject: [PATCH] CANN: Add x86 build ci (llama/12950) * CANN: Add x86 build ci * CANN: fix code format --- ggml/src/ggml-cann/ggml-cann.cpp | 326 ++++++++++++++++--------------- 1 file changed, 164 insertions(+), 162 deletions(-) diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index db8ae260..08b9ca30 100644 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -156,195 +156,196 @@ const ggml_cann_device_info& ggml_cann_info() { * This class manages a pool of CANN buffers for a specific device. */ struct ggml_cann_pool_buf_prio : public ggml_cann_pool { - /** - * @brief The maximum reuse margin for a buffer. - */ - static const size_t max_reuse_margin = 1ull << 22; // 4MB + /** + * @brief The maximum reuse margin for a buffer. + */ + static const size_t max_reuse_margin = 1ull << 22; // 4MB - /** - * @brief The minimum free margin for a buffer. - */ - static const size_t min_free_margin = 1ull << 20; // 1MB + /** + * @brief The minimum free margin for a buffer. + */ + static const size_t min_free_margin = 1ull << 20; // 1MB - /** - * @brief The alignment for buffer allocation. - */ - static const size_t alignment = 128; + /** + * @brief The alignment for buffer allocation. + */ + static const size_t alignment = 128; - /** - * @brief The device ID associated with this buffer pool. - */ - int device; + /** + * @brief The device ID associated with this buffer pool. + */ + int device; - /** - * @brief Whether to disable clean during buffer allocation. - */ - bool disable_clean = false; + /** + * @brief Whether to disable clean during buffer allocation. + */ + bool disable_clean = false; - /** - * @brief Structure representing a CANN buffer. - */ - struct ggml_cann_buffer { - void* ptr = nullptr; ///< Pointer to the buffer. - size_t size = 0; ///< Size of the buffer. - std::chrono::steady_clock::time_point last_used; ///< Last used time. + /** + * @brief Structure representing a CANN buffer. + */ + struct ggml_cann_buffer { + void* ptr = nullptr; ///< Pointer to the buffer. + size_t size = 0; ///< Size of the buffer. + std::chrono::steady_clock::time_point last_used; ///< Last used time. - bool operator>(const ggml_cann_buffer& other) const { - return size > other.size; - } - }; + bool operator>(const ggml_cann_buffer& other) const { + return size > other.size; + } + }; - /** - * @brief Array of CANN buffers in the pool. - */ - std::unordered_map buffer_pool; - std::priority_queue, - std::greater<>> free_buffers ; + /** + * @brief Array of CANN buffers in the pool. + */ + std::unordered_map buffer_pool; + std::priority_queue, + std::greater<>> free_buffers ; - /** - * @brief Total size of all buffers in the pool. - */ - size_t pool_size = 0; + /** + * @brief Total size of all buffers in the pool. + */ + size_t pool_size = 0; - /** - * @brief Constructor to initialize the buffer pool for a specific device. - * - * @param device The device ID to associate with this buffer pool. - */ - explicit ggml_cann_pool_buf_prio(int device) : device(device) { - disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr; + /** + * @brief Constructor to initialize the buffer pool for a specific device. + * + * @param device The device ID to associate with this buffer pool. + */ + explicit ggml_cann_pool_buf_prio(int device) : device(device) { + disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr; + } + + /** + * @brief Destructor to free all buffers in the pool. + */ + ~ggml_cann_pool_buf_prio() { + ggml_cann_set_device(device); + for (auto& [b_ptr, b_size] : buffer_pool) { + aclrtFree(b_ptr); + pool_size -= b_size; + } + buffer_pool.clear(); + GGML_ASSERT(pool_size == 0); + } + + /** + * @brief Allocate a buffer of the given size. + * + * @param size The size of the buffer to allocate. + * @param actual_size A pointer to a variable to receive the actual size of + * the allocated buffer. + * @return A pointer to the allocated buffer. + */ + void* alloc(size_t size, size_t* actual_size) override { + size = GGML_PAD(size, alignment); + if (size == 0) { + size = alignment; } - /** - * @brief Destructor to free all buffers in the pool. - */ - ~ggml_cann_pool_buf_prio() { - ggml_cann_set_device(device); - for (auto& [b_ptr, b_size] : buffer_pool) { - aclrtFree(b_ptr); - pool_size -= b_size; - } - buffer_pool.clear(); - GGML_ASSERT(pool_size == 0); - } + void* ptr = nullptr; + auto now = std::chrono::steady_clock::now(); - /** - * @brief Allocate a buffer of the given size. - * - * @param size The size of the buffer to allocate. - * @param actual_size A pointer to a variable to receive the actual size of - * the allocated buffer. - * @return A pointer to the allocated buffer. - */ - void* alloc(size_t size, size_t* actual_size) override { - size = GGML_PAD(size, alignment); - if (size == 0) { - size = alignment; - } + std::vector free_buffers_rest; + free_buffers_rest.reserve(free_buffers.size()); + while (!free_buffers.empty()) { + auto b = free_buffers.top(); + free_buffers.pop(); - void* ptr = nullptr; - auto now = std::chrono::steady_clock::now(); - - std::vector free_buffers_rest; - free_buffers_rest.reserve(free_buffers.size()); - while (!free_buffers.empty()) { - auto b = free_buffers.top(); - free_buffers.pop(); - - if (b.size >= size) { - // reuse the buffer if the size is enough - const size_t margin = b.size - size; - if (margin <= max_reuse_margin) { - *actual_size = b.size; - ptr = b.ptr; - #ifdef DEBUG_CANN_MALLOC - GGML_LOG_INFO( - "cann pool[%d]: reused %p, " - "pool_size = %5u MB, " - "size = %5u MB, " - "margin = %5u MB\n", - device, b.ptr, - (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576), - (uint32_t)(GGML_PAD(size, 1048576) / 1048576), - (uint32_t)(GGML_PAD(margin, 1048576) / 1048576)); - #endif - break; - } - } - - bool should_clean = !disable_clean && - b.size > min_free_margin && - std::chrono::duration_cast(now - b.last_used).count() > 100; - if (should_clean) { - // free the buffer if the size is needed to be freed - ACL_CHECK(aclrtFree(b.ptr)); - pool_size -= b.size; - buffer_pool.erase(b.ptr); - #ifdef DEBUG_CANN_MALLOC + if (b.size >= size) { + // reuse the buffer if the size is enough + const size_t margin = b.size - size; + if (margin <= max_reuse_margin) { + *actual_size = b.size; + ptr = b.ptr; +#ifdef DEBUG_CANN_MALLOC GGML_LOG_INFO( - "cann pool[%d]: clean %p, " + "cann pool[%d]: reused %p, " "pool_size = %5u MB, " - "size = %5u MB\n", + "size = %5u MB, " + "margin = %5u MB\n", device, b.ptr, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576), - (uint32_t)(GGML_PAD(b.size, 1048576) / 1048576)); - #endif - continue; + (uint32_t)(GGML_PAD(size, 1048576) / 1048576), + (uint32_t)(GGML_PAD(margin, 1048576) / 1048576)); +#endif + break; } - free_buffers_rest.push_back(b); - } - for (ggml_cann_buffer &b : free_buffers_rest) { - free_buffers.push(std::move(b)); } - #ifdef DEBUG_CANN_MALLOC - GGML_LOG_INFO("cann pool[%d] free pool_size = %5u MB\n\n", device, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576)); - #endif - if (ptr != nullptr) { - return ptr; + bool should_clean = !disable_clean && + b.size > min_free_margin && + std::chrono::duration_cast(now - b.last_used).count() > 100; + if (should_clean) { + // free the buffer if the size is needed to be freed + ACL_CHECK(aclrtFree(b.ptr)); + pool_size -= b.size; + buffer_pool.erase(b.ptr); +#ifdef DEBUG_CANN_MALLOC + GGML_LOG_INFO( + "cann pool[%d]: clean %p, " + "pool_size = %5u MB, " + "size = %5u MB\n", + device, b.ptr, + (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576), + (uint32_t)(GGML_PAD(b.size, 1048576) / 1048576)); +#endif + continue; } + free_buffers_rest.push_back(b); + } + for (ggml_cann_buffer &b : free_buffers_rest) { + free_buffers.push(std::move(b)); + } - // allocate a new buffer if no buffer can be reused - ggml_cann_set_device(device); - ACL_CHECK(aclrtMalloc(&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST)); - *actual_size = size; - pool_size += size; - #ifdef DEBUG_CANN_MALLOC - GGML_LOG_INFO( - "cann pool[%d]: allocate %p, " - "pool_size = %5u MB, " - "size = %5u MB\n", - device, ptr, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576), - (uint32_t)(GGML_PAD(size, 1048576) / 1048576)); - #endif - buffer_pool.emplace(ptr, size); +#ifdef DEBUG_CANN_MALLOC + GGML_LOG_INFO("cann pool[%d] free pool_size = %5u MB\n\n", device, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576)); +#endif + if (ptr != nullptr) { return ptr; } - /** - * @brief Free a buffer and return it to the pool. - * - * @param ptr Pointer to the buffer to free. - * @param size Size of the buffer to free. - */ - void free(void* ptr, size_t size) override { - auto it = buffer_pool.find(ptr); - if (it == buffer_pool.end()) { - GGML_ABORT("cann pool[%d]: buffer %p not found in pool\n", device, ptr); - } + // allocate a new buffer if no buffer can be reused + ggml_cann_set_device(device); + ACL_CHECK(aclrtMalloc(&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST)); + *actual_size = size; + pool_size += size; +#ifdef DEBUG_CANN_MALLOC + GGML_LOG_INFO( + "cann pool[%d]: allocate %p, " + "pool_size = %5u MB, " + "size = %5u MB\n", + device, ptr, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576), + (uint32_t)(GGML_PAD(size, 1048576) / 1048576)); +#endif + buffer_pool.emplace(ptr, size); + return ptr; + } - auto now = std::chrono::steady_clock::now(); - free_buffers.emplace(ggml_cann_buffer{ptr, it->second, now}); - #ifdef DEBUG_CANN_MALLOC - GGML_LOG_INFO( - "cann pool[%d]: return %p, " - "pool_size = %5u MB\n", - device, ptr, - (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576)); - #endif + /** + * @brief Free a buffer and return it to the pool. + * + * @param ptr Pointer to the buffer to free. + * @param size Size of the buffer to free. + */ + void free(void* ptr, size_t size) override { + GGML_UNUSED(size); + auto it = buffer_pool.find(ptr); + if (it == buffer_pool.end()) { + GGML_ABORT("cann pool[%d]: buffer %p not found in pool\n", device, ptr); } - }; + + auto now = std::chrono::steady_clock::now(); + free_buffers.emplace(ggml_cann_buffer{ptr, it->second, now}); +#ifdef DEBUG_CANN_MALLOC + GGML_LOG_INFO( + "cann pool[%d]: return %p, " + "pool_size = %5u MB\n", + device, ptr, + (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576)); +#endif + } +}; /** * @brief A pool of CANN buffers(segment buffer). @@ -531,6 +532,7 @@ struct ggml_cann_pool_buf : public ggml_cann_pool { * @param size Size of the buffer to free. */ void free(void* ptr, size_t size) override { + GGML_UNUSED(size); for (int i = 0; i < MAX_BUFFERS; ++i) { ggml_cann_buffer& b = buffer_pool[i]; if (b.ptr != ptr) {