Skip to content

Commit 54a7272

Browse files
authored
CANN: Add x86 build ci (#12950)
* CANN: Add x86 build ci * CANN: fix code format
1 parent 84778e9 commit 54a7272

File tree

2 files changed

+174
-171
lines changed

2 files changed

+174
-171
lines changed

.github/workflows/build.yml

+3-2
Original file line numberDiff line numberDiff line change
@@ -1766,16 +1766,17 @@ jobs:
17661766
if: ${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'Ascend NPU') }}
17671767
defaults:
17681768
run:
1769-
shell: bash -el {0}
1770-
runs-on: ubuntu-24.04-arm
1769+
shell: bash -el {0}
17711770
strategy:
17721771
matrix:
1772+
arch: [x86, aarch64]
17731773
cann:
17741774
- '8.1.RC1.alpha001-910b-openeuler22.03-py3.10'
17751775
device:
17761776
- 'ascend910b3'
17771777
build:
17781778
- 'Release'
1779+
runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
17791780
container: ascendai/cann:${{ matrix.cann }}
17801781
steps:
17811782
- name: Checkout

ggml/src/ggml-cann/ggml-cann.cpp

+171-169
Original file line numberDiff line numberDiff line change
@@ -156,195 +156,196 @@ const ggml_cann_device_info& ggml_cann_info() {
156156
* This class manages a pool of CANN buffers for a specific device.
157157
*/
158158
struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
159-
/**
160-
* @brief The maximum reuse margin for a buffer.
161-
*/
162-
static const size_t max_reuse_margin = 1ull << 22; // 4MB
163-
164-
/**
165-
* @brief The minimum free margin for a buffer.
166-
*/
167-
static const size_t min_free_margin = 1ull << 20; // 1MB
168-
169-
/**
170-
* @brief The alignment for buffer allocation.
171-
*/
172-
static const size_t alignment = 128;
173-
174-
/**
175-
* @brief The device ID associated with this buffer pool.
176-
*/
177-
int device;
178-
179-
/**
180-
* @brief Whether to disable clean during buffer allocation.
181-
*/
182-
bool disable_clean = false;
183-
184-
/**
185-
* @brief Structure representing a CANN buffer.
186-
*/
187-
struct ggml_cann_buffer {
188-
void* ptr = nullptr; ///< Pointer to the buffer.
189-
size_t size = 0; ///< Size of the buffer.
190-
std::chrono::steady_clock::time_point last_used; ///< Last used time.
191-
192-
bool operator>(const ggml_cann_buffer& other) const {
193-
return size > other.size;
194-
}
195-
};
196-
197-
/**
198-
* @brief Array of CANN buffers in the pool.
199-
*/
200-
std::unordered_map<void*, size_t> buffer_pool;
201-
std::priority_queue<ggml_cann_buffer,
202-
std::vector<ggml_cann_buffer>,
203-
std::greater<>> free_buffers ;
204-
205-
/**
206-
* @brief Total size of all buffers in the pool.
207-
*/
208-
size_t pool_size = 0;
209-
210-
/**
211-
* @brief Constructor to initialize the buffer pool for a specific device.
212-
*
213-
* @param device The device ID to associate with this buffer pool.
214-
*/
215-
explicit ggml_cann_pool_buf_prio(int device) : device(device) {
216-
disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr;
159+
/**
160+
* @brief The maximum reuse margin for a buffer.
161+
*/
162+
static const size_t max_reuse_margin = 1ull << 22; // 4MB
163+
164+
/**
165+
* @brief The minimum free margin for a buffer.
166+
*/
167+
static const size_t min_free_margin = 1ull << 20; // 1MB
168+
169+
/**
170+
* @brief The alignment for buffer allocation.
171+
*/
172+
static const size_t alignment = 128;
173+
174+
/**
175+
* @brief The device ID associated with this buffer pool.
176+
*/
177+
int device;
178+
179+
/**
180+
* @brief Whether to disable clean during buffer allocation.
181+
*/
182+
bool disable_clean = false;
183+
184+
/**
185+
* @brief Structure representing a CANN buffer.
186+
*/
187+
struct ggml_cann_buffer {
188+
void* ptr = nullptr; ///< Pointer to the buffer.
189+
size_t size = 0; ///< Size of the buffer.
190+
std::chrono::steady_clock::time_point last_used; ///< Last used time.
191+
192+
bool operator>(const ggml_cann_buffer& other) const {
193+
return size > other.size;
217194
}
195+
};
218196

219-
/**
220-
* @brief Destructor to free all buffers in the pool.
221-
*/
222-
~ggml_cann_pool_buf_prio() {
223-
ggml_cann_set_device(device);
224-
for (auto& [b_ptr, b_size] : buffer_pool) {
225-
aclrtFree(b_ptr);
226-
pool_size -= b_size;
227-
}
228-
buffer_pool.clear();
229-
GGML_ASSERT(pool_size == 0);
197+
/**
198+
* @brief Array of CANN buffers in the pool.
199+
*/
200+
std::unordered_map<void*, size_t> buffer_pool;
201+
std::priority_queue<ggml_cann_buffer,
202+
std::vector<ggml_cann_buffer>,
203+
std::greater<>> free_buffers ;
204+
205+
/**
206+
* @brief Total size of all buffers in the pool.
207+
*/
208+
size_t pool_size = 0;
209+
210+
/**
211+
* @brief Constructor to initialize the buffer pool for a specific device.
212+
*
213+
* @param device The device ID to associate with this buffer pool.
214+
*/
215+
explicit ggml_cann_pool_buf_prio(int device) : device(device) {
216+
disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr;
217+
}
218+
219+
/**
220+
* @brief Destructor to free all buffers in the pool.
221+
*/
222+
~ggml_cann_pool_buf_prio() {
223+
ggml_cann_set_device(device);
224+
for (auto& [b_ptr, b_size] : buffer_pool) {
225+
aclrtFree(b_ptr);
226+
pool_size -= b_size;
230227
}
228+
buffer_pool.clear();
229+
GGML_ASSERT(pool_size == 0);
230+
}
231231

232-
/**
233-
* @brief Allocate a buffer of the given size.
234-
*
235-
* @param size The size of the buffer to allocate.
236-
* @param actual_size A pointer to a variable to receive the actual size of
237-
* the allocated buffer.
238-
* @return A pointer to the allocated buffer.
239-
*/
240-
void* alloc(size_t size, size_t* actual_size) override {
241-
size = GGML_PAD(size, alignment);
242-
if (size == 0) {
243-
size = alignment;
244-
}
232+
/**
233+
* @brief Allocate a buffer of the given size.
234+
*
235+
* @param size The size of the buffer to allocate.
236+
* @param actual_size A pointer to a variable to receive the actual size of
237+
* the allocated buffer.
238+
* @return A pointer to the allocated buffer.
239+
*/
240+
void* alloc(size_t size, size_t* actual_size) override {
241+
size = GGML_PAD(size, alignment);
242+
if (size == 0) {
243+
size = alignment;
244+
}
245245

246-
void* ptr = nullptr;
247-
auto now = std::chrono::steady_clock::now();
248-
249-
std::vector<ggml_cann_buffer> free_buffers_rest;
250-
free_buffers_rest.reserve(free_buffers.size());
251-
while (!free_buffers.empty()) {
252-
auto b = free_buffers.top();
253-
free_buffers.pop();
254-
255-
if (b.size >= size) {
256-
// reuse the buffer if the size is enough
257-
const size_t margin = b.size - size;
258-
if (margin <= max_reuse_margin) {
259-
*actual_size = b.size;
260-
ptr = b.ptr;
261-
#ifdef DEBUG_CANN_MALLOC
262-
GGML_LOG_INFO(
263-
"cann pool[%d]: reused %p, "
264-
"pool_size = %5u MB, "
265-
"size = %5u MB, "
266-
"margin = %5u MB\n",
267-
device, b.ptr,
268-
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
269-
(uint32_t)(GGML_PAD(size, 1048576) / 1048576),
270-
(uint32_t)(GGML_PAD(margin, 1048576) / 1048576));
271-
#endif
272-
break;
273-
}
274-
}
246+
void* ptr = nullptr;
247+
auto now = std::chrono::steady_clock::now();
248+
249+
std::vector<ggml_cann_buffer> free_buffers_rest;
250+
free_buffers_rest.reserve(free_buffers.size());
251+
while (!free_buffers.empty()) {
252+
auto b = free_buffers.top();
253+
free_buffers.pop();
275254

276-
bool should_clean = !disable_clean &&
277-
b.size > min_free_margin &&
278-
std::chrono::duration_cast<std::chrono::milliseconds>(now - b.last_used).count() > 100;
279-
if (should_clean) {
280-
// free the buffer if the size is needed to be freed
281-
ACL_CHECK(aclrtFree(b.ptr));
282-
pool_size -= b.size;
283-
buffer_pool.erase(b.ptr);
284-
#ifdef DEBUG_CANN_MALLOC
255+
if (b.size >= size) {
256+
// reuse the buffer if the size is enough
257+
const size_t margin = b.size - size;
258+
if (margin <= max_reuse_margin) {
259+
*actual_size = b.size;
260+
ptr = b.ptr;
261+
#ifdef DEBUG_CANN_MALLOC
285262
GGML_LOG_INFO(
286-
"cann pool[%d]: clean %p, "
263+
"cann pool[%d]: reused %p, "
287264
"pool_size = %5u MB, "
288-
"size = %5u MB\n",
265+
"size = %5u MB, "
266+
"margin = %5u MB\n",
289267
device, b.ptr,
290268
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
291-
(uint32_t)(GGML_PAD(b.size, 1048576) / 1048576));
292-
#endif
293-
continue;
269+
(uint32_t)(GGML_PAD(size, 1048576) / 1048576),
270+
(uint32_t)(GGML_PAD(margin, 1048576) / 1048576));
271+
#endif
272+
break;
294273
}
295-
free_buffers_rest.push_back(b);
296-
}
297-
for (ggml_cann_buffer &b : free_buffers_rest) {
298-
free_buffers.push(std::move(b));
299274
}
300275

301-
#ifdef DEBUG_CANN_MALLOC
302-
GGML_LOG_INFO("cann pool[%d] free pool_size = %5u MB\n\n", device, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
303-
#endif
304-
if (ptr != nullptr) {
305-
return ptr;
276+
bool should_clean = !disable_clean &&
277+
b.size > min_free_margin &&
278+
std::chrono::duration_cast<std::chrono::milliseconds>(now - b.last_used).count() > 100;
279+
if (should_clean) {
280+
// free the buffer if the size is needed to be freed
281+
ACL_CHECK(aclrtFree(b.ptr));
282+
pool_size -= b.size;
283+
buffer_pool.erase(b.ptr);
284+
#ifdef DEBUG_CANN_MALLOC
285+
GGML_LOG_INFO(
286+
"cann pool[%d]: clean %p, "
287+
"pool_size = %5u MB, "
288+
"size = %5u MB\n",
289+
device, b.ptr,
290+
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
291+
(uint32_t)(GGML_PAD(b.size, 1048576) / 1048576));
292+
#endif
293+
continue;
306294
}
295+
free_buffers_rest.push_back(b);
296+
}
297+
for (ggml_cann_buffer &b : free_buffers_rest) {
298+
free_buffers.push(std::move(b));
299+
}
307300

308-
// allocate a new buffer if no buffer can be reused
309-
ggml_cann_set_device(device);
310-
ACL_CHECK(aclrtMalloc(&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
311-
*actual_size = size;
312-
pool_size += size;
313-
#ifdef DEBUG_CANN_MALLOC
314-
GGML_LOG_INFO(
315-
"cann pool[%d]: allocate %p, "
316-
"pool_size = %5u MB, "
317-
"size = %5u MB\n",
318-
device, ptr, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
319-
(uint32_t)(GGML_PAD(size, 1048576) / 1048576));
320-
#endif
321-
buffer_pool.emplace(ptr, size);
301+
#ifdef DEBUG_CANN_MALLOC
302+
GGML_LOG_INFO("cann pool[%d] free pool_size = %5u MB\n\n", device, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
303+
#endif
304+
if (ptr != nullptr) {
322305
return ptr;
323306
}
324307

325-
/**
326-
* @brief Free a buffer and return it to the pool.
327-
*
328-
* @param ptr Pointer to the buffer to free.
329-
* @param size Size of the buffer to free.
330-
*/
331-
void free(void* ptr, size_t size) override {
332-
auto it = buffer_pool.find(ptr);
333-
if (it == buffer_pool.end()) {
334-
GGML_ABORT("cann pool[%d]: buffer %p not found in pool\n", device, ptr);
335-
}
308+
// allocate a new buffer if no buffer can be reused
309+
ggml_cann_set_device(device);
310+
ACL_CHECK(aclrtMalloc(&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
311+
*actual_size = size;
312+
pool_size += size;
313+
#ifdef DEBUG_CANN_MALLOC
314+
GGML_LOG_INFO(
315+
"cann pool[%d]: allocate %p, "
316+
"pool_size = %5u MB, "
317+
"size = %5u MB\n",
318+
device, ptr, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576),
319+
(uint32_t)(GGML_PAD(size, 1048576) / 1048576));
320+
#endif
321+
buffer_pool.emplace(ptr, size);
322+
return ptr;
323+
}
336324

337-
auto now = std::chrono::steady_clock::now();
338-
free_buffers.emplace(ggml_cann_buffer{ptr, it->second, now});
339-
#ifdef DEBUG_CANN_MALLOC
340-
GGML_LOG_INFO(
341-
"cann pool[%d]: return %p, "
342-
"pool_size = %5u MB\n",
343-
device, ptr,
344-
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
345-
#endif
325+
/**
326+
* @brief Free a buffer and return it to the pool.
327+
*
328+
* @param ptr Pointer to the buffer to free.
329+
* @param size Size of the buffer to free.
330+
*/
331+
void free(void* ptr, size_t size) override {
332+
GGML_UNUSED(size);
333+
auto it = buffer_pool.find(ptr);
334+
if (it == buffer_pool.end()) {
335+
GGML_ABORT("cann pool[%d]: buffer %p not found in pool\n", device, ptr);
346336
}
347-
};
337+
338+
auto now = std::chrono::steady_clock::now();
339+
free_buffers.emplace(ggml_cann_buffer{ptr, it->second, now});
340+
#ifdef DEBUG_CANN_MALLOC
341+
GGML_LOG_INFO(
342+
"cann pool[%d]: return %p, "
343+
"pool_size = %5u MB\n",
344+
device, ptr,
345+
(uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576));
346+
#endif
347+
}
348+
};
348349

349350
/**
350351
* @brief A pool of CANN buffers(segment buffer).
@@ -531,6 +532,7 @@ struct ggml_cann_pool_buf : public ggml_cann_pool {
531532
* @param size Size of the buffer to free.
532533
*/
533534
void free(void* ptr, size_t size) override {
535+
GGML_UNUSED(size);
534536
for (int i = 0; i < MAX_BUFFERS; ++i) {
535537
ggml_cann_buffer& b = buffer_pool[i];
536538
if (b.ptr != ptr) {

0 commit comments

Comments
 (0)