]> git.djapps.eu Git - pkg/ggml/sources/whisper.cpp/commitdiff
CANN: Add x86 build ci (llama/12950)
authorhipudding <redacted>
Tue, 15 Apr 2025 11:08:55 +0000 (19:08 +0800)
committerGeorgi Gerganov <redacted>
Thu, 24 Apr 2025 17:39:16 +0000 (20:39 +0300)
* CANN: Add x86 build ci

* CANN: fix code format

ggml/src/ggml-cann/ggml-cann.cpp

index db8ae260af6262f2afe4289cfe7bf228a962e426..08b9ca301c6172de3b949865f0ab16aab11c24a2 100644 (file)
@@ -156,195 +156,196 @@ const ggml_cann_device_info& ggml_cann_info() {
  * This class manages a pool of CANN buffers for a specific device.
  */
 struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
-        /**
-         * @brief The maximum reuse margin for a buffer.
-         */
-        static const size_t max_reuse_margin = 1ull << 22;  // 4MB
-
-        /**
-         * @brief The minimum free margin for a buffer.
-         */
-        static const size_t min_free_margin = 1ull << 20;   // 1MB
-
-        /**
-         * @brief The alignment for buffer allocation.
-         */
-        static const size_t alignment = 128;
-
-        /**
-         * @brief The device ID associated with this buffer pool.
-         */
-        int device;
-
-        /**
-         * @brief Whether to disable clean during buffer allocation.
-         */
-        bool disable_clean = false;
-
-        /**
-         * @brief Structure representing a CANN buffer.
-         */
-        struct ggml_cann_buffer {
-            void* ptr = nullptr;  ///< Pointer to the buffer.
-            size_t size = 0;      ///< Size of the buffer.
-            std::chrono::steady_clock::time_point last_used;  ///< Last used time.
-
-            bool operator>(const ggml_cann_buffer& other) const {
-                return size > other.size;
-            }
-        };
-
-        /**
-         * @brief Array of CANN buffers in the pool.
-         */
-        std::unordered_map<void*, size_t> buffer_pool;
-        std::priority_queue<ggml_cann_buffer,
-                            std::vector<ggml_cann_buffer>,
-                            std::greater<>> free_buffers ;
-
-        /**
-         * @brief Total size of all buffers in the pool.
-         */
-        size_t pool_size = 0;
-
-        /**
-         * @brief Constructor to initialize the buffer pool for a specific device.
-         *
-         * @param device The device ID to associate with this buffer pool.
-         */
-        explicit ggml_cann_pool_buf_prio(int device) : device(device) {
-            disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr;
+    /**
+     * @brief The maximum reuse margin for a buffer.
+     */
+    static const size_t max_reuse_margin = 1ull << 22;  // 4MB
+
+    /**
+     * @brief The minimum free margin for a buffer.
+     */
+    static const size_t min_free_margin = 1ull << 20;   // 1MB
+
+    /**
+     * @brief The alignment for buffer allocation.
+     */
+    static const size_t alignment = 128;
+
+    /**
+     * @brief The device ID associated with this buffer pool.
+     */
+    int device;
+
+    /**
+     * @brief Whether to disable clean during buffer allocation.
+     */
+    bool disable_clean = false;
+
+    /**
+     * @brief Structure representing a CANN buffer.
+     */
+    struct ggml_cann_buffer {
+        void* ptr = nullptr;  ///< Pointer to the buffer.
+        size_t size = 0;      ///< Size of the buffer.
+        std::chrono::steady_clock::time_point last_used;  ///< Last used time.
+
+        bool operator>(const ggml_cann_buffer& other) const {
+            return size > other.size;
         }
+    };
 
-        /**
-         * @brief Destructor to free all buffers in the pool.
-         */
-        ~ggml_cann_pool_buf_prio() {
-            ggml_cann_set_device(device);
-            for (auto& [b_ptr, b_size] : buffer_pool) {
-                aclrtFree(b_ptr);
-               pool_size -= b_size;
-            }
-            buffer_pool.clear();
-            GGML_ASSERT(pool_size == 0);
+    /**
+     * @brief Array of CANN buffers in the pool.
+     */
+    std::unordered_map<void*, size_t> buffer_pool;
+    std::priority_queue<ggml_cann_buffer,
+                        std::vector<ggml_cann_buffer>,
+                        std::greater<>> free_buffers ;
+
+    /**
+     * @brief Total size of all buffers in the pool.
+     */
+    size_t pool_size = 0;
+
+    /**
+     * @brief Constructor to initialize the buffer pool for a specific device.
+     *
+     * @param device The device ID to associate with this buffer pool.
+     */
+    explicit ggml_cann_pool_buf_prio(int device) : device(device) {
+        disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr;
+    }
+
+    /**
+     * @brief Destructor to free all buffers in the pool.
+     */
+    ~ggml_cann_pool_buf_prio() {
+        ggml_cann_set_device(device);
+        for (auto& [b_ptr, b_size] : buffer_pool) {
+            aclrtFree(b_ptr);
+            pool_size -= b_size;
         }
+        buffer_pool.clear();
+        GGML_ASSERT(pool_size == 0);
+    }
 
-        /**
-         * @brief Allocate a buffer of the given size.
-         *
-         * @param size The size of the buffer to allocate.
-         * @param actual_size A pointer to a variable to receive the actual size of
-         * the allocated buffer.
-         * @return A pointer to the allocated buffer.
-         */
-        void* alloc(size_t size, size_t* actual_size) override {
-            size = GGML_PAD(size, alignment);
-            if (size == 0) {
-                size = alignment;
-            }
+    /**
+     * @brief Allocate a buffer of the given size.
+     *
+     * @param size The size of the buffer to allocate.
+     * @param actual_size A pointer to a variable to receive the actual size of
+     * the allocated buffer.
+     * @return A pointer to the allocated buffer.
+     */
+    void* alloc(size_t size, size_t* actual_size) override {
+        size = GGML_PAD(size, alignment);
+        if (size == 0) {
+            size = alignment;
+        }
 
-            void* ptr = nullptr;
-            auto now = std::chrono::steady_clock::now();
-
-            std::vector<ggml_cann_buffer> free_buffers_rest;
-            free_buffers_rest.reserve(free_buffers.size());
-            while (!free_buffers.empty()) {
-                auto b = free_buffers.top();
-                free_buffers.pop();
-
-                if (b.size >= size) {
-                    // reuse the buffer if the size is enough
-                    const size_t margin = b.size - size;
-                    if (margin <= max_reuse_margin) {
-                        *actual_size = b.size;
-                        ptr = b.ptr;
-    #ifdef DEBUG_CANN_MALLOC
-                        GGML_LOG_INFO(
-                            "cann pool[%d]: reused   %p, "
-                            "pool_size = %5u MB, "
-                            "size = %5u MB, "
-                            "margin = %5u MB\n",
-                            device, b.ptr,
-                            (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
-                            (uint32_t)(GGML_PAD(size, 1048576) / 1048576),
-                            (uint32_t)(GGML_PAD(margin, 1048576) / 1048576));
-    #endif
-                        break;
-                    }
-                }
+        void* ptr = nullptr;
+        auto now = std::chrono::steady_clock::now();
+
+        std::vector<ggml_cann_buffer> free_buffers_rest;
+        free_buffers_rest.reserve(free_buffers.size());
+        while (!free_buffers.empty()) {
+            auto b = free_buffers.top();
+            free_buffers.pop();
 
-                bool should_clean = !disable_clean &&
-                                   b.size > min_free_margin &&
-                                   std::chrono::duration_cast<std::chrono::milliseconds>(now - b.last_used).count() > 100;
-                if (should_clean) {
-                    // free the buffer if the size is needed to be freed
-                    ACL_CHECK(aclrtFree(b.ptr));
-                    pool_size -= b.size;
-                    buffer_pool.erase(b.ptr);
-    #ifdef DEBUG_CANN_MALLOC
+            if (b.size >= size) {
+                // reuse the buffer if the size is enough
+                const size_t margin = b.size - size;
+                if (margin <= max_reuse_margin) {
+                    *actual_size = b.size;
+                    ptr = b.ptr;
+#ifdef DEBUG_CANN_MALLOC
                     GGML_LOG_INFO(
-                        "cann pool[%d]: clean    %p, "
+                        "cann pool[%d]: reused   %p, "
                         "pool_size = %5u MB, "
-                        "size = %5u MB\n",
+                        "size = %5u MB, "
+                        "margin = %5u MB\n",
                         device, b.ptr,
                         (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
-                        (uint32_t)(GGML_PAD(b.size, 1048576) / 1048576));
-    #endif
-                    continue;
+                        (uint32_t)(GGML_PAD(size, 1048576) / 1048576),
+                        (uint32_t)(GGML_PAD(margin, 1048576) / 1048576));
+#endif
+                    break;
                 }
-                free_buffers_rest.push_back(b);
-            }
-            for (ggml_cann_buffer &b : free_buffers_rest) {
-                free_buffers.push(std::move(b));
             }
 
-    #ifdef DEBUG_CANN_MALLOC
-            GGML_LOG_INFO("cann pool[%d] free pool_size = %5u MB\n\n", device, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
-    #endif
-            if (ptr != nullptr) {
-                return ptr;
+            bool should_clean = !disable_clean &&
+                                b.size > min_free_margin &&
+                                std::chrono::duration_cast<std::chrono::milliseconds>(now - b.last_used).count() > 100;
+            if (should_clean) {
+                // free the buffer if the size is needed to be freed
+                ACL_CHECK(aclrtFree(b.ptr));
+                pool_size -= b.size;
+                buffer_pool.erase(b.ptr);
+#ifdef DEBUG_CANN_MALLOC
+                GGML_LOG_INFO(
+                    "cann pool[%d]: clean    %p, "
+                    "pool_size = %5u MB, "
+                    "size = %5u MB\n",
+                    device, b.ptr,
+                    (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
+                    (uint32_t)(GGML_PAD(b.size, 1048576) / 1048576));
+#endif
+                continue;
             }
+            free_buffers_rest.push_back(b);
+        }
+        for (ggml_cann_buffer &b : free_buffers_rest) {
+            free_buffers.push(std::move(b));
+        }
 
-            // allocate a new buffer if no buffer can be reused
-            ggml_cann_set_device(device);
-            ACL_CHECK(aclrtMalloc(&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
-            *actual_size = size;
-            pool_size += size;
-    #ifdef DEBUG_CANN_MALLOC
-            GGML_LOG_INFO(
-                "cann pool[%d]: allocate %p, "
-                "pool_size = %5u MB, "
-                "size = %5u MB\n",
-                device, ptr, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
-                (uint32_t)(GGML_PAD(size, 1048576) / 1048576));
-    #endif
-            buffer_pool.emplace(ptr, size);
+#ifdef DEBUG_CANN_MALLOC
+        GGML_LOG_INFO("cann pool[%d] free pool_size = %5u MB\n\n", device, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
+#endif
+        if (ptr != nullptr) {
             return ptr;
         }
 
-        /**
-         * @brief Free a buffer and return it to the pool.
-         *
-         * @param ptr Pointer to the buffer to free.
-         * @param size Size of the buffer to free.
-         */
-        void free(void* ptr, size_t size) override {
-            auto it = buffer_pool.find(ptr);
-            if (it == buffer_pool.end()) {
-                GGML_ABORT("cann pool[%d]: buffer %p not found in pool\n", device, ptr);
-            }
+        // allocate a new buffer if no buffer can be reused
+        ggml_cann_set_device(device);
+        ACL_CHECK(aclrtMalloc(&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
+        *actual_size = size;
+        pool_size += size;
+#ifdef DEBUG_CANN_MALLOC
+        GGML_LOG_INFO(
+            "cann pool[%d]: allocate %p, "
+            "pool_size = %5u MB, "
+            "size = %5u MB\n",
+            device, ptr, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
+            (uint32_t)(GGML_PAD(size, 1048576) / 1048576));
+#endif
+        buffer_pool.emplace(ptr, size);
+        return ptr;
+    }
 
-            auto now = std::chrono::steady_clock::now();
-            free_buffers.emplace(ggml_cann_buffer{ptr, it->second, now});
-    #ifdef DEBUG_CANN_MALLOC
-            GGML_LOG_INFO(
-                "cann pool[%d]: return   %p, "
-                "pool_size = %5u MB\n",
-                device, ptr,
-                (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
-    #endif
+    /**
+     * @brief Free a buffer and return it to the pool.
+     *
+     * @param ptr Pointer to the buffer to free.
+     * @param size Size of the buffer to free.
+     */
+    void free(void* ptr, size_t size) override {
+        GGML_UNUSED(size);
+        auto it = buffer_pool.find(ptr);
+        if (it == buffer_pool.end()) {
+            GGML_ABORT("cann pool[%d]: buffer %p not found in pool\n", device, ptr);
         }
-    };
+
+        auto now = std::chrono::steady_clock::now();
+        free_buffers.emplace(ggml_cann_buffer{ptr, it->second, now});
+#ifdef DEBUG_CANN_MALLOC
+        GGML_LOG_INFO(
+            "cann pool[%d]: return   %p, "
+            "pool_size = %5u MB\n",
+            device, ptr,
+            (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
+#endif
+    }
+};
 
 /**
  * @brief A pool of CANN buffers(segment buffer).
@@ -531,6 +532,7 @@ struct ggml_cann_pool_buf : public ggml_cann_pool {
      * @param size Size of the buffer to free.
      */
     void free(void* ptr, size_t size) override {
+        GGML_UNUSED(size);
         for (int i = 0; i < MAX_BUFFERS; ++i) {
             ggml_cann_buffer& b = buffer_pool[i];
             if (b.ptr != ptr) {