Skip to content

Commit eef0010

Browse files
authored
Qualcomm AI Engine Direct - Replace private variable with function (#8724)
Summary: - To improve readability, replace is_bert_ with is_bert()
1 parent 5da974a commit eef0010

File tree

2 files changed

+20
-16
lines changed

2 files changed

+20
-16
lines changed

Diff for: examples/qualcomm/oss_scripts/llama/runner/io_manager.cpp

+10-12
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,7 @@ ShiftPointerIoMgr::ShiftPointerIoMgr(
8181
eval_mode_(eval_mode),
8282
prefill_forward_name_(prefill_forward_name),
8383
kv_forward_name_(kv_forward_name),
84-
use_int64_token_(use_int64_token),
85-
is_bert_(prefill_cache_len_ == 0) {
84+
use_int64_token_(use_int64_token) {
8685
if (!prefill_forward_name_.empty()) {
8786
input_tensors_[prefill_forward_name_] =
8887
std::vector<std::vector<executorch::aten::TensorImpl*>>(modules.size());
@@ -391,7 +390,7 @@ void ShiftPointerIoMgr::prepare_prefill_io(
391390
input_tensors_[prefill_forward_name_][0].push_back(
392391
prefill_attention_mask_.get());
393392

394-
if (!is_bert_) {
393+
if (!is_bert()) {
395394
// [I]: prefill_input_pos
396395
Result<TensorInfo> prefill_input_pos =
397396
methods_meta[0]->input_tensor_meta(2);
@@ -544,7 +543,7 @@ void ShiftPointerIoMgr::update_prefill_to_kv_io(
544543
size_t copied_size = pos * sizeof(uint8_t);
545544
for (int i = 0; i < k_cache_in.size(); ++i) {
546545
uint8_t* ptr_in = k_cache_in[i]->mutable_data<uint8_t>();
547-
if (is_bert_) {
546+
if (is_bert()) {
548547
const uint8_t* ptr_out = k_cache_out[i]->data<uint8_t>();
549548
for (size_t j = 0, offset = kv_cache_len_; j < head_dim_;
550549
++j, offset += kv_cache_len_) {
@@ -620,7 +619,7 @@ void ShiftPointerIoMgr::update_prefill_io(
620619
(void)cur_token;
621620
(void)output_tensors;
622621

623-
if (!is_bert_) {
622+
if (!is_bert()) {
624623
// update v_cache
625624
auto& v_cache_in = v_cache_in_[prefill_forward_name_];
626625
auto& v_cache_out = v_cache_out_[prefill_forward_name_];
@@ -668,7 +667,7 @@ void ShiftPointerIoMgr::fill_prefill_toks(
668667
std::vector<uint64_t>& prompt_tokens) {
669668
IO* ptr = static_cast<IO*>(get_mutable_ptr());
670669
for (int i = 0; i < prefill_ar_len_; i++) {
671-
if (!is_bert_) {
670+
if (!is_bert()) {
672671
ptr->prefill_input_pos[i] = start_pos + i;
673672
}
674673

@@ -735,8 +734,7 @@ SmartMaskIoMgr::SmartMaskIoMgr(
735734
eval_mode_(eval_mode),
736735
prefill_forward_name_(prefill_forward_name),
737736
kv_forward_name_(kv_forward_name),
738-
use_int64_token_(use_int64_token),
739-
is_bert_(prefill_cache_len == 0) {
737+
use_int64_token_(use_int64_token) {
740738
if (!prefill_forward_name_.empty()) {
741739
input_tensors_[prefill_forward_name_] =
742740
std::vector<std::vector<executorch::aten::TensorImpl*>>(modules.size());
@@ -1196,7 +1194,7 @@ void SmartMaskIoMgr::prepare_prefill_io(
11961194
executorch::aten::ScalarType::Bits16,
11971195
prefill_attention_mask.get());
11981196

1199-
if (!is_bert_) {
1197+
if (!is_bert()) {
12001198
// [I]: prefill_input_pos
12011199
Result<TensorInfo> prefill_input_pos =
12021200
methods_meta[0]->input_tensor_meta(2);
@@ -1319,7 +1317,7 @@ void SmartMaskIoMgr::update_prefill_to_kv_io(
13191317
ptr->kv_attention_mask[i] = 65535;
13201318
}
13211319

1322-
if (is_bert_) {
1320+
if (is_bert()) {
13231321
// update v_cache
13241322
auto& v_cache_in = v_cache_in_[kv_forward_name_];
13251323
auto& v_cache_out = v_cache_out_[prefill_forward_name_];
@@ -1366,7 +1364,7 @@ void SmartMaskIoMgr::update_prefill_io(
13661364
std::vector<std::vector<Tensor>>& output_tensors) {
13671365
(void)output_tensors;
13681366

1369-
if (!is_bert_) {
1367+
if (!is_bert()) {
13701368
// update v_cache
13711369
auto& v_cache_in = v_cache_in_[prefill_forward_name_];
13721370
auto& v_cache_out = v_cache_out_[prefill_forward_name_];
@@ -1400,7 +1398,7 @@ void SmartMaskIoMgr::fill_prefill_toks(
14001398
std::vector<uint64_t>& prompt_tokens) {
14011399
IO* ptr = static_cast<IO*>(get_mutable_ptr());
14021400
for (int i = 0; i < prefill_ar_len_; i++) {
1403-
if (!is_bert_) {
1401+
if (!is_bert()) {
14041402
ptr->prefill_input_pos[i] = start_pos + i;
14051403
}
14061404

Diff for: examples/qualcomm/oss_scripts/llama/runner/io_manager.h

+10-4
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,11 @@ class ShiftPointerIoMgr : public IoMgrBase {
151151
};
152152

153153
private:
154+
// If the cache length is zero, it indicates a BERT model, which does not use
155+
// position ids or KV cache inputs.
156+
bool is_bert() const {
157+
return prefill_cache_len_ == 0;
158+
}
154159
std::unique_ptr<executorch::aten::TensorImpl> kv_input_toks_;
155160
std::unique_ptr<executorch::aten::TensorImpl> kv_input_pos_;
156161
std::unique_ptr<executorch::aten::TensorImpl> kv_attention_mask_;
@@ -189,7 +194,6 @@ class ShiftPointerIoMgr : public IoMgrBase {
189194
std::string prefill_forward_name_;
190195
std::string kv_forward_name_;
191196
const bool use_int64_token_{false};
192-
const bool is_bert_{false};
193197
};
194198

195199
class SmartMaskIoMgr : public IoMgrBase {
@@ -288,6 +292,11 @@ class SmartMaskIoMgr : public IoMgrBase {
288292
};
289293

290294
private:
295+
// If the cache length is zero, it indicates a BERT model, which does not use
296+
// position ids or KV cache inputs.
297+
bool is_bert() const {
298+
return prefill_cache_len_ == 0;
299+
}
291300
std::unique_ptr<executorch::aten::TensorImpl> kv_input_toks_;
292301
std::unique_ptr<executorch::aten::TensorImpl> kv_input_pos_;
293302
std::unique_ptr<executorch::aten::TensorImpl> kv_attention_mask_;
@@ -326,9 +335,6 @@ class SmartMaskIoMgr : public IoMgrBase {
326335
std::string prefill_forward_name_;
327336
std::string kv_forward_name_;
328337
const bool use_int64_token_{false};
329-
// If the cache length is zero, it indicates a BERT model, which does not use
330-
// position ids or KV cache inputs.
331-
const bool is_bert_{false};
332338
};
333339

334340
} // namespace example

0 commit comments

Comments
 (0)