* This class manages a pool of CANN buffers for a specific device.
*/
struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
- /**
- * @brief The maximum reuse margin for a buffer.
- */
- static const size_t max_reuse_margin = 1ull << 22; // 4MB
-
- /**
- * @brief The minimum free margin for a buffer.
- */
- static const size_t min_free_margin = 1ull << 20; // 1MB
-
- /**
- * @brief The alignment for buffer allocation.
- */
- static const size_t alignment = 128;
-
- /**
- * @brief The device ID associated with this buffer pool.
- */
- int device;
-
- /**
- * @brief Whether to disable clean during buffer allocation.
- */
- bool disable_clean = false;
-
- /**
- * @brief Structure representing a CANN buffer.
- */
- struct ggml_cann_buffer {
- void* ptr = nullptr; ///< Pointer to the buffer.
- size_t size = 0; ///< Size of the buffer.
- std::chrono::steady_clock::time_point last_used; ///< Last used time.
-
- bool operator>(const ggml_cann_buffer& other) const {
- return size > other.size;
- }
- };
-
- /**
- * @brief Array of CANN buffers in the pool.
- */
- std::unordered_map<void*, size_t> buffer_pool;
- std::priority_queue<ggml_cann_buffer,
- std::vector<ggml_cann_buffer>,
- std::greater<>> free_buffers ;
-
- /**
- * @brief Total size of all buffers in the pool.
- */
- size_t pool_size = 0;
-
- /**
- * @brief Constructor to initialize the buffer pool for a specific device.
- *
- * @param device The device ID to associate with this buffer pool.
- */
- explicit ggml_cann_pool_buf_prio(int device) : device(device) {
- disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr;
+ /**
+ * @brief The maximum reuse margin for a buffer.
+ */
+ static const size_t max_reuse_margin = 1ull << 22; // 4MB
+
+ /**
+ * @brief The minimum free margin for a buffer.
+ */
+ static const size_t min_free_margin = 1ull << 20; // 1MB
+
+ /**
+ * @brief The alignment for buffer allocation.
+ */
+ static const size_t alignment = 128;
+
+ /**
+ * @brief The device ID associated with this buffer pool.
+ */
+ int device;
+
+ /**
+ * @brief Whether to disable clean during buffer allocation.
+ */
+ bool disable_clean = false;
+
+ /**
+ * @brief Structure representing a CANN buffer.
+ */
+ struct ggml_cann_buffer {
+ void* ptr = nullptr; ///< Pointer to the buffer.
+ size_t size = 0; ///< Size of the buffer.
+ std::chrono::steady_clock::time_point last_used; ///< Last used time.
+
+ bool operator>(const ggml_cann_buffer& other) const {
+ return size > other.size;
}
+ };
- /**
- * @brief Destructor to free all buffers in the pool.
- */
- ~ggml_cann_pool_buf_prio() {
- ggml_cann_set_device(device);
- for (auto& [b_ptr, b_size] : buffer_pool) {
- aclrtFree(b_ptr);
- pool_size -= b_size;
- }
- buffer_pool.clear();
- GGML_ASSERT(pool_size == 0);
+ /**
+ * @brief Array of CANN buffers in the pool.
+ */
+ std::unordered_map<void*, size_t> buffer_pool;
+ std::priority_queue<ggml_cann_buffer,
+ std::vector<ggml_cann_buffer>,
+ std::greater<>> free_buffers ;
+
+ /**
+ * @brief Total size of all buffers in the pool.
+ */
+ size_t pool_size = 0;
+
+ /**
+ * @brief Constructor to initialize the buffer pool for a specific device.
+ *
+ * @param device The device ID to associate with this buffer pool.
+ */
+ explicit ggml_cann_pool_buf_prio(int device) : device(device) {
+ disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr;
+ }
+
+ /**
+ * @brief Destructor to free all buffers in the pool.
+ */
+ ~ggml_cann_pool_buf_prio() {
+ ggml_cann_set_device(device);
+ for (auto& [b_ptr, b_size] : buffer_pool) {
+ aclrtFree(b_ptr);
+ pool_size -= b_size;
}
+ buffer_pool.clear();
+ GGML_ASSERT(pool_size == 0);
+ }
- /**
- * @brief Allocate a buffer of the given size.
- *
- * @param size The size of the buffer to allocate.
- * @param actual_size A pointer to a variable to receive the actual size of
- * the allocated buffer.
- * @return A pointer to the allocated buffer.
- */
- void* alloc(size_t size, size_t* actual_size) override {
- size = GGML_PAD(size, alignment);
- if (size == 0) {
- size = alignment;
- }
+ /**
+ * @brief Allocate a buffer of the given size.
+ *
+ * @param size The size of the buffer to allocate.
+ * @param actual_size A pointer to a variable to receive the actual size of
+ * the allocated buffer.
+ * @return A pointer to the allocated buffer.
+ */
+ void* alloc(size_t size, size_t* actual_size) override {
+ size = GGML_PAD(size, alignment);
+ if (size == 0) {
+ size = alignment;
+ }
- void* ptr = nullptr;
- auto now = std::chrono::steady_clock::now();
-
- std::vector<ggml_cann_buffer> free_buffers_rest;
- free_buffers_rest.reserve(free_buffers.size());
- while (!free_buffers.empty()) {
- auto b = free_buffers.top();
- free_buffers.pop();
-
- if (b.size >= size) {
- // reuse the buffer if the size is enough
- const size_t margin = b.size - size;
- if (margin <= max_reuse_margin) {
- *actual_size = b.size;
- ptr = b.ptr;
- #ifdef DEBUG_CANN_MALLOC
- GGML_LOG_INFO(
- "cann pool[%d]: reused %p, "
- "pool_size = %5u MB, "
- "size = %5u MB, "
- "margin = %5u MB\n",
- device, b.ptr,
- (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
- (uint32_t)(GGML_PAD(size, 1048576) / 1048576),
- (uint32_t)(GGML_PAD(margin, 1048576) / 1048576));
- #endif
- break;
- }
- }
+ void* ptr = nullptr;
+ auto now = std::chrono::steady_clock::now();
+
+ std::vector<ggml_cann_buffer> free_buffers_rest;
+ free_buffers_rest.reserve(free_buffers.size());
+ while (!free_buffers.empty()) {
+ auto b = free_buffers.top();
+ free_buffers.pop();
- bool should_clean = !disable_clean &&
- b.size > min_free_margin &&
- std::chrono::duration_cast<std::chrono::milliseconds>(now - b.last_used).count() > 100;
- if (should_clean) {
- // free the buffer if the size is needed to be freed
- ACL_CHECK(aclrtFree(b.ptr));
- pool_size -= b.size;
- buffer_pool.erase(b.ptr);
- #ifdef DEBUG_CANN_MALLOC
+ if (b.size >= size) {
+ // reuse the buffer if the size is enough
+ const size_t margin = b.size - size;
+ if (margin <= max_reuse_margin) {
+ *actual_size = b.size;
+ ptr = b.ptr;
+#ifdef DEBUG_CANN_MALLOC
GGML_LOG_INFO(
- "cann pool[%d]: clean %p, "
+ "cann pool[%d]: reused %p, "
"pool_size = %5u MB, "
- "size = %5u MB\n",
+ "size = %5u MB, "
+ "margin = %5u MB\n",
device, b.ptr,
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
- (uint32_t)(GGML_PAD(b.size, 1048576) / 1048576));
- #endif
- continue;
+ (uint32_t)(GGML_PAD(size, 1048576) / 1048576),
+ (uint32_t)(GGML_PAD(margin, 1048576) / 1048576));
+#endif
+ break;
}
- free_buffers_rest.push_back(b);
- }
- for (ggml_cann_buffer &b : free_buffers_rest) {
- free_buffers.push(std::move(b));
}
- #ifdef DEBUG_CANN_MALLOC
- GGML_LOG_INFO("cann pool[%d] free pool_size = %5u MB\n\n", device, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
- #endif
- if (ptr != nullptr) {
- return ptr;
+ bool should_clean = !disable_clean &&
+ b.size > min_free_margin &&
+ std::chrono::duration_cast<std::chrono::milliseconds>(now - b.last_used).count() > 100;
+ if (should_clean) {
+ // free the buffer if the size is needed to be freed
+ ACL_CHECK(aclrtFree(b.ptr));
+ pool_size -= b.size;
+ buffer_pool.erase(b.ptr);
+#ifdef DEBUG_CANN_MALLOC
+ GGML_LOG_INFO(
+ "cann pool[%d]: clean %p, "
+ "pool_size = %5u MB, "
+ "size = %5u MB\n",
+ device, b.ptr,
+ (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
+ (uint32_t)(GGML_PAD(b.size, 1048576) / 1048576));
+#endif
+ continue;
}
+ free_buffers_rest.push_back(b);
+ }
+ for (ggml_cann_buffer &b : free_buffers_rest) {
+ free_buffers.push(std::move(b));
+ }
- // allocate a new buffer if no buffer can be reused
- ggml_cann_set_device(device);
- ACL_CHECK(aclrtMalloc(&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
- *actual_size = size;
- pool_size += size;
- #ifdef DEBUG_CANN_MALLOC
- GGML_LOG_INFO(
- "cann pool[%d]: allocate %p, "
- "pool_size = %5u MB, "
- "size = %5u MB\n",
- device, ptr, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
- (uint32_t)(GGML_PAD(size, 1048576) / 1048576));
- #endif
- buffer_pool.emplace(ptr, size);
+#ifdef DEBUG_CANN_MALLOC
+ GGML_LOG_INFO("cann pool[%d] free pool_size = %5u MB\n\n", device, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
+#endif
+ if (ptr != nullptr) {
return ptr;
}
- /**
- * @brief Free a buffer and return it to the pool.
- *
- * @param ptr Pointer to the buffer to free.
- * @param size Size of the buffer to free.
- */
- void free(void* ptr, size_t size) override {
- auto it = buffer_pool.find(ptr);
- if (it == buffer_pool.end()) {
- GGML_ABORT("cann pool[%d]: buffer %p not found in pool\n", device, ptr);
- }
+ // allocate a new buffer if no buffer can be reused
+ ggml_cann_set_device(device);
+ ACL_CHECK(aclrtMalloc(&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
+ *actual_size = size;
+ pool_size += size;
+#ifdef DEBUG_CANN_MALLOC
+ GGML_LOG_INFO(
+ "cann pool[%d]: allocate %p, "
+ "pool_size = %5u MB, "
+ "size = %5u MB\n",
+ device, ptr, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
+ (uint32_t)(GGML_PAD(size, 1048576) / 1048576));
+#endif
+ buffer_pool.emplace(ptr, size);
+ return ptr;
+ }
- auto now = std::chrono::steady_clock::now();
- free_buffers.emplace(ggml_cann_buffer{ptr, it->second, now});
- #ifdef DEBUG_CANN_MALLOC
- GGML_LOG_INFO(
- "cann pool[%d]: return %p, "
- "pool_size = %5u MB\n",
- device, ptr,
- (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
- #endif
+ /**
+ * @brief Free a buffer and return it to the pool.
+ *
+ * @param ptr Pointer to the buffer to free.
+ * @param size Size of the buffer to free.
+ */
+ void free(void* ptr, size_t size) override {
+ GGML_UNUSED(size);
+ auto it = buffer_pool.find(ptr);
+ if (it == buffer_pool.end()) {
+ GGML_ABORT("cann pool[%d]: buffer %p not found in pool\n", device, ptr);
}
- };
+
+ auto now = std::chrono::steady_clock::now();
+ free_buffers.emplace(ggml_cann_buffer{ptr, it->second, now});
+#ifdef DEBUG_CANN_MALLOC
+ GGML_LOG_INFO(
+ "cann pool[%d]: return %p, "
+ "pool_size = %5u MB\n",
+ device, ptr,
+ (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
+#endif
+ }
+};
/**
* @brief A pool of CANN buffers(segment buffer).
* @param size Size of the buffer to free.
*/
void free(void* ptr, size_t size) override {
+ GGML_UNUSED(size);
for (int i = 0; i < MAX_BUFFERS; ++i) {
ggml_cann_buffer& b = buffer_pool[i];
if (b.ptr != ptr) {