@@ -156,195 +156,196 @@ const ggml_cann_device_info& ggml_cann_info() {
156
156
* This class manages a pool of CANN buffers for a specific device.
157
157
*/
158
158
struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
159
- /* *
160
- * @brief The maximum reuse margin for a buffer.
161
- */
162
- static const size_t max_reuse_margin = 1ull << 22 ; // 4MB
163
-
164
- /* *
165
- * @brief The minimum free margin for a buffer.
166
- */
167
- static const size_t min_free_margin = 1ull << 20 ; // 1MB
168
-
169
- /* *
170
- * @brief The alignment for buffer allocation.
171
- */
172
- static const size_t alignment = 128 ;
173
-
174
- /* *
175
- * @brief The device ID associated with this buffer pool.
176
- */
177
- int device;
178
-
179
- /* *
180
- * @brief Whether to disable clean during buffer allocation.
181
- */
182
- bool disable_clean = false ;
183
-
184
- /* *
185
- * @brief Structure representing a CANN buffer.
186
- */
187
- struct ggml_cann_buffer {
188
- void * ptr = nullptr ; // /< Pointer to the buffer.
189
- size_t size = 0 ; // /< Size of the buffer.
190
- std::chrono::steady_clock::time_point last_used; // /< Last used time.
191
-
192
- bool operator >(const ggml_cann_buffer& other) const {
193
- return size > other.size ;
194
- }
195
- };
196
-
197
- /* *
198
- * @brief Array of CANN buffers in the pool.
199
- */
200
- std::unordered_map<void *, size_t > buffer_pool;
201
- std::priority_queue<ggml_cann_buffer,
202
- std::vector<ggml_cann_buffer>,
203
- std::greater<>> free_buffers ;
204
-
205
- /* *
206
- * @brief Total size of all buffers in the pool.
207
- */
208
- size_t pool_size = 0 ;
209
-
210
- /* *
211
- * @brief Constructor to initialize the buffer pool for a specific device.
212
- *
213
- * @param device The device ID to associate with this buffer pool.
214
- */
215
- explicit ggml_cann_pool_buf_prio (int device) : device(device) {
216
- disable_clean = getenv (" GGML_CANN_DISABLE_BUF_POOL_CLEAN" ) != nullptr ;
159
+ /* *
160
+ * @brief The maximum reuse margin for a buffer.
161
+ */
162
+ static const size_t max_reuse_margin = 1ull << 22 ; // 4MB
163
+
164
+ /* *
165
+ * @brief The minimum free margin for a buffer.
166
+ */
167
+ static const size_t min_free_margin = 1ull << 20 ; // 1MB
168
+
169
+ /* *
170
+ * @brief The alignment for buffer allocation.
171
+ */
172
+ static const size_t alignment = 128 ;
173
+
174
+ /* *
175
+ * @brief The device ID associated with this buffer pool.
176
+ */
177
+ int device;
178
+
179
+ /* *
180
+ * @brief Whether to disable clean during buffer allocation.
181
+ */
182
+ bool disable_clean = false ;
183
+
184
+ /* *
185
+ * @brief Structure representing a CANN buffer.
186
+ */
187
+ struct ggml_cann_buffer {
188
+ void * ptr = nullptr ; // /< Pointer to the buffer.
189
+ size_t size = 0 ; // /< Size of the buffer.
190
+ std::chrono::steady_clock::time_point last_used; // /< Last used time.
191
+
192
+ bool operator >(const ggml_cann_buffer& other) const {
193
+ return size > other.size ;
217
194
}
195
+ };
218
196
219
- /* *
220
- * @brief Destructor to free all buffers in the pool.
221
- */
222
- ~ggml_cann_pool_buf_prio () {
223
- ggml_cann_set_device (device);
224
- for (auto & [b_ptr, b_size] : buffer_pool) {
225
- aclrtFree (b_ptr);
226
- pool_size -= b_size;
227
- }
228
- buffer_pool.clear ();
229
- GGML_ASSERT (pool_size == 0 );
197
+ /* *
198
+ * @brief Array of CANN buffers in the pool.
199
+ */
200
+ std::unordered_map<void *, size_t > buffer_pool;
201
+ std::priority_queue<ggml_cann_buffer,
202
+ std::vector<ggml_cann_buffer>,
203
+ std::greater<>> free_buffers ;
204
+
205
+ /* *
206
+ * @brief Total size of all buffers in the pool.
207
+ */
208
+ size_t pool_size = 0 ;
209
+
210
+ /* *
211
+ * @brief Constructor to initialize the buffer pool for a specific device.
212
+ *
213
+ * @param device The device ID to associate with this buffer pool.
214
+ */
215
+ explicit ggml_cann_pool_buf_prio (int device) : device(device) {
216
+ disable_clean = getenv (" GGML_CANN_DISABLE_BUF_POOL_CLEAN" ) != nullptr ;
217
+ }
218
+
219
+ /* *
220
+ * @brief Destructor to free all buffers in the pool.
221
+ */
222
+ ~ggml_cann_pool_buf_prio () {
223
+ ggml_cann_set_device (device);
224
+ for (auto & [b_ptr, b_size] : buffer_pool) {
225
+ aclrtFree (b_ptr);
226
+ pool_size -= b_size;
230
227
}
228
+ buffer_pool.clear ();
229
+ GGML_ASSERT (pool_size == 0 );
230
+ }
231
231
232
- /* *
233
- * @brief Allocate a buffer of the given size.
234
- *
235
- * @param size The size of the buffer to allocate.
236
- * @param actual_size A pointer to a variable to receive the actual size of
237
- * the allocated buffer.
238
- * @return A pointer to the allocated buffer.
239
- */
240
- void * alloc (size_t size, size_t * actual_size) override {
241
- size = GGML_PAD (size, alignment);
242
- if (size == 0 ) {
243
- size = alignment;
244
- }
232
+ /* *
233
+ * @brief Allocate a buffer of the given size.
234
+ *
235
+ * @param size The size of the buffer to allocate.
236
+ * @param actual_size A pointer to a variable to receive the actual size of
237
+ * the allocated buffer.
238
+ * @return A pointer to the allocated buffer.
239
+ */
240
+ void * alloc (size_t size, size_t * actual_size) override {
241
+ size = GGML_PAD (size, alignment);
242
+ if (size == 0 ) {
243
+ size = alignment;
244
+ }
245
245
246
- void * ptr = nullptr ;
247
- auto now = std::chrono::steady_clock::now ();
248
-
249
- std::vector<ggml_cann_buffer> free_buffers_rest;
250
- free_buffers_rest.reserve (free_buffers.size ());
251
- while (!free_buffers.empty ()) {
252
- auto b = free_buffers.top ();
253
- free_buffers.pop ();
254
-
255
- if (b.size >= size) {
256
- // reuse the buffer if the size is enough
257
- const size_t margin = b.size - size;
258
- if (margin <= max_reuse_margin) {
259
- *actual_size = b.size ;
260
- ptr = b.ptr ;
261
- #ifdef DEBUG_CANN_MALLOC
262
- GGML_LOG_INFO (
263
- " cann pool[%d]: reused %p, "
264
- " pool_size = %5u MB, "
265
- " size = %5u MB, "
266
- " margin = %5u MB\n " ,
267
- device, b.ptr ,
268
- (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ),
269
- (uint32_t )(GGML_PAD (size, 1048576 ) / 1048576 ),
270
- (uint32_t )(GGML_PAD (margin, 1048576 ) / 1048576 ));
271
- #endif
272
- break ;
273
- }
274
- }
246
+ void * ptr = nullptr ;
247
+ auto now = std::chrono::steady_clock::now ();
248
+
249
+ std::vector<ggml_cann_buffer> free_buffers_rest;
250
+ free_buffers_rest.reserve (free_buffers.size ());
251
+ while (!free_buffers.empty ()) {
252
+ auto b = free_buffers.top ();
253
+ free_buffers.pop ();
275
254
276
- bool should_clean = !disable_clean &&
277
- b.size > min_free_margin &&
278
- std::chrono::duration_cast<std::chrono::milliseconds>(now - b.last_used ).count () > 100 ;
279
- if (should_clean) {
280
- // free the buffer if the size is needed to be freed
281
- ACL_CHECK (aclrtFree (b.ptr ));
282
- pool_size -= b.size ;
283
- buffer_pool.erase (b.ptr );
284
- #ifdef DEBUG_CANN_MALLOC
255
+ if (b.size >= size) {
256
+ // reuse the buffer if the size is enough
257
+ const size_t margin = b.size - size;
258
+ if (margin <= max_reuse_margin) {
259
+ *actual_size = b.size ;
260
+ ptr = b.ptr ;
261
+ #ifdef DEBUG_CANN_MALLOC
285
262
GGML_LOG_INFO (
286
- " cann pool[%d]: clean %p, "
263
+ " cann pool[%d]: reused %p, "
287
264
" pool_size = %5u MB, "
288
- " size = %5u MB\n " ,
265
+ " size = %5u MB, "
266
+ " margin = %5u MB\n " ,
289
267
device, b.ptr ,
290
268
(uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ),
291
- (uint32_t )(GGML_PAD (b.size , 1048576 ) / 1048576 ));
292
- #endif
293
- continue ;
269
+ (uint32_t )(GGML_PAD (size, 1048576 ) / 1048576 ),
270
+ (uint32_t )(GGML_PAD (margin, 1048576 ) / 1048576 ));
271
+ #endif
272
+ break ;
294
273
}
295
- free_buffers_rest.push_back (b);
296
- }
297
- for (ggml_cann_buffer &b : free_buffers_rest) {
298
- free_buffers.push (std::move (b));
299
274
}
300
275
301
- #ifdef DEBUG_CANN_MALLOC
302
- GGML_LOG_INFO (" cann pool[%d] free pool_size = %5u MB\n\n " , device, (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ));
303
- #endif
304
- if (ptr != nullptr ) {
305
- return ptr;
276
+ bool should_clean = !disable_clean &&
277
+ b.size > min_free_margin &&
278
+ std::chrono::duration_cast<std::chrono::milliseconds>(now - b.last_used ).count () > 100 ;
279
+ if (should_clean) {
280
+ // free the buffer if the size is needed to be freed
281
+ ACL_CHECK (aclrtFree (b.ptr ));
282
+ pool_size -= b.size ;
283
+ buffer_pool.erase (b.ptr );
284
+ #ifdef DEBUG_CANN_MALLOC
285
+ GGML_LOG_INFO (
286
+ " cann pool[%d]: clean %p, "
287
+ " pool_size = %5u MB, "
288
+ " size = %5u MB\n " ,
289
+ device, b.ptr ,
290
+ (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ),
291
+ (uint32_t )(GGML_PAD (b.size , 1048576 ) / 1048576 ));
292
+ #endif
293
+ continue ;
306
294
}
295
+ free_buffers_rest.push_back (b);
296
+ }
297
+ for (ggml_cann_buffer &b : free_buffers_rest) {
298
+ free_buffers.push (std::move (b));
299
+ }
307
300
308
- // allocate a new buffer if no buffer can be reused
309
- ggml_cann_set_device (device);
310
- ACL_CHECK (aclrtMalloc (&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
311
- *actual_size = size;
312
- pool_size += size;
313
- #ifdef DEBUG_CANN_MALLOC
314
- GGML_LOG_INFO (
315
- " cann pool[%d]: allocate %p, "
316
- " pool_size = %5u MB, "
317
- " size = %5u MB\n " ,
318
- device, ptr, (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ),
319
- (uint32_t )(GGML_PAD (size, 1048576 ) / 1048576 ));
320
- #endif
321
- buffer_pool.emplace (ptr, size);
301
+ #ifdef DEBUG_CANN_MALLOC
302
+ GGML_LOG_INFO (" cann pool[%d] free pool_size = %5u MB\n\n " , device, (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ));
303
+ #endif
304
+ if (ptr != nullptr ) {
322
305
return ptr;
323
306
}
324
307
325
- /* *
326
- * @brief Free a buffer and return it to the pool.
327
- *
328
- * @param ptr Pointer to the buffer to free.
329
- * @param size Size of the buffer to free.
330
- */
331
- void free (void * ptr, size_t size) override {
332
- auto it = buffer_pool.find (ptr);
333
- if (it == buffer_pool.end ()) {
334
- GGML_ABORT (" cann pool[%d]: buffer %p not found in pool\n " , device, ptr);
335
- }
308
+ // allocate a new buffer if no buffer can be reused
309
+ ggml_cann_set_device (device);
310
+ ACL_CHECK (aclrtMalloc (&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
311
+ *actual_size = size;
312
+ pool_size += size;
313
+ #ifdef DEBUG_CANN_MALLOC
314
+ GGML_LOG_INFO (
315
+ " cann pool[%d]: allocate %p, "
316
+ " pool_size = %5u MB, "
317
+ " size = %5u MB\n " ,
318
+ device, ptr, (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ),
319
+ (uint32_t )(GGML_PAD (size, 1048576 ) / 1048576 ));
320
+ #endif
321
+ buffer_pool.emplace (ptr, size);
322
+ return ptr;
323
+ }
336
324
337
- auto now = std::chrono::steady_clock::now ();
338
- free_buffers.emplace (ggml_cann_buffer{ptr, it->second , now});
339
- #ifdef DEBUG_CANN_MALLOC
340
- GGML_LOG_INFO (
341
- " cann pool[%d]: return %p, "
342
- " pool_size = %5u MB\n " ,
343
- device, ptr,
344
- (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ));
345
- #endif
325
+ /* *
326
+ * @brief Free a buffer and return it to the pool.
327
+ *
328
+ * @param ptr Pointer to the buffer to free.
329
+ * @param size Size of the buffer to free.
330
+ */
331
+ void free (void * ptr, size_t size) override {
332
+ GGML_UNUSED (size);
333
+ auto it = buffer_pool.find (ptr);
334
+ if (it == buffer_pool.end ()) {
335
+ GGML_ABORT (" cann pool[%d]: buffer %p not found in pool\n " , device, ptr);
346
336
}
347
- };
337
+
338
+ auto now = std::chrono::steady_clock::now ();
339
+ free_buffers.emplace (ggml_cann_buffer{ptr, it->second , now});
340
+ #ifdef DEBUG_CANN_MALLOC
341
+ GGML_LOG_INFO (
342
+ " cann pool[%d]: return %p, "
343
+ " pool_size = %5u MB\n " ,
344
+ device, ptr,
345
+ (uint32_t )(GGML_PAD (pool_size, 1048576 ) / 1048576 ));
346
+ #endif
347
+ }
348
+ };
348
349
349
350
/* *
350
351
* @brief A pool of CANN buffers(segment buffer).
@@ -531,6 +532,7 @@ struct ggml_cann_pool_buf : public ggml_cann_pool {
531
532
* @param size Size of the buffer to free.
532
533
*/
533
534
void free (void * ptr, size_t size) override {
535
+ GGML_UNUSED (size);
534
536
for (int i = 0 ; i < MAX_BUFFERS; ++i) {
535
537
ggml_cann_buffer& b = buffer_pool[i];
536
538
if (b.ptr != ptr) {
0 commit comments