@@ -81,8 +81,7 @@ ShiftPointerIoMgr::ShiftPointerIoMgr(
81
81
eval_mode_(eval_mode),
82
82
prefill_forward_name_(prefill_forward_name),
83
83
kv_forward_name_(kv_forward_name),
84
- use_int64_token_(use_int64_token),
85
- is_bert_(prefill_cache_len_ == 0 ) {
84
+ use_int64_token_(use_int64_token) {
86
85
if (!prefill_forward_name_.empty ()) {
87
86
input_tensors_[prefill_forward_name_] =
88
87
std::vector<std::vector<executorch::aten::TensorImpl*>>(modules.size ());
@@ -391,7 +390,7 @@ void ShiftPointerIoMgr::prepare_prefill_io(
391
390
input_tensors_[prefill_forward_name_][0 ].push_back (
392
391
prefill_attention_mask_.get ());
393
392
394
- if (!is_bert_ ) {
393
+ if (!is_bert () ) {
395
394
// [I]: prefill_input_pos
396
395
Result<TensorInfo> prefill_input_pos =
397
396
methods_meta[0 ]->input_tensor_meta (2 );
@@ -544,7 +543,7 @@ void ShiftPointerIoMgr::update_prefill_to_kv_io(
544
543
size_t copied_size = pos * sizeof (uint8_t );
545
544
for (int i = 0 ; i < k_cache_in.size (); ++i) {
546
545
uint8_t * ptr_in = k_cache_in[i]->mutable_data <uint8_t >();
547
- if (is_bert_ ) {
546
+ if (is_bert () ) {
548
547
const uint8_t * ptr_out = k_cache_out[i]->data <uint8_t >();
549
548
for (size_t j = 0 , offset = kv_cache_len_; j < head_dim_;
550
549
++j, offset += kv_cache_len_) {
@@ -620,7 +619,7 @@ void ShiftPointerIoMgr::update_prefill_io(
620
619
(void )cur_token;
621
620
(void )output_tensors;
622
621
623
- if (!is_bert_ ) {
622
+ if (!is_bert () ) {
624
623
// update v_cache
625
624
auto & v_cache_in = v_cache_in_[prefill_forward_name_];
626
625
auto & v_cache_out = v_cache_out_[prefill_forward_name_];
@@ -668,7 +667,7 @@ void ShiftPointerIoMgr::fill_prefill_toks(
668
667
std::vector<uint64_t >& prompt_tokens) {
669
668
IO* ptr = static_cast <IO*>(get_mutable_ptr ());
670
669
for (int i = 0 ; i < prefill_ar_len_; i++) {
671
- if (!is_bert_ ) {
670
+ if (!is_bert () ) {
672
671
ptr->prefill_input_pos [i] = start_pos + i;
673
672
}
674
673
@@ -735,8 +734,7 @@ SmartMaskIoMgr::SmartMaskIoMgr(
735
734
eval_mode_(eval_mode),
736
735
prefill_forward_name_(prefill_forward_name),
737
736
kv_forward_name_(kv_forward_name),
738
- use_int64_token_(use_int64_token),
739
- is_bert_(prefill_cache_len == 0 ) {
737
+ use_int64_token_(use_int64_token) {
740
738
if (!prefill_forward_name_.empty ()) {
741
739
input_tensors_[prefill_forward_name_] =
742
740
std::vector<std::vector<executorch::aten::TensorImpl*>>(modules.size ());
@@ -1196,7 +1194,7 @@ void SmartMaskIoMgr::prepare_prefill_io(
1196
1194
executorch::aten::ScalarType::Bits16,
1197
1195
prefill_attention_mask.get ());
1198
1196
1199
- if (!is_bert_ ) {
1197
+ if (!is_bert () ) {
1200
1198
// [I]: prefill_input_pos
1201
1199
Result<TensorInfo> prefill_input_pos =
1202
1200
methods_meta[0 ]->input_tensor_meta (2 );
@@ -1319,7 +1317,7 @@ void SmartMaskIoMgr::update_prefill_to_kv_io(
1319
1317
ptr->kv_attention_mask [i] = 65535 ;
1320
1318
}
1321
1319
1322
- if (is_bert_ ) {
1320
+ if (is_bert () ) {
1323
1321
// update v_cache
1324
1322
auto & v_cache_in = v_cache_in_[kv_forward_name_];
1325
1323
auto & v_cache_out = v_cache_out_[prefill_forward_name_];
@@ -1366,7 +1364,7 @@ void SmartMaskIoMgr::update_prefill_io(
1366
1364
std::vector<std::vector<Tensor>>& output_tensors) {
1367
1365
(void )output_tensors;
1368
1366
1369
- if (!is_bert_ ) {
1367
+ if (!is_bert () ) {
1370
1368
// update v_cache
1371
1369
auto & v_cache_in = v_cache_in_[prefill_forward_name_];
1372
1370
auto & v_cache_out = v_cache_out_[prefill_forward_name_];
@@ -1400,7 +1398,7 @@ void SmartMaskIoMgr::fill_prefill_toks(
1400
1398
std::vector<uint64_t >& prompt_tokens) {
1401
1399
IO* ptr = static_cast <IO*>(get_mutable_ptr ());
1402
1400
for (int i = 0 ; i < prefill_ar_len_; i++) {
1403
- if (!is_bert_ ) {
1401
+ if (!is_bert () ) {
1404
1402
ptr->prefill_input_pos [i] = start_pos + i;
1405
1403
}
1406
1404
0 commit comments