Skip to content

Commit bb255dd

Browse files
Fei Yufacebook-github-bot
Fei Yu
authored andcommitted
directly pass update_util as int flag without syncing iter (#2695)
Summary: as title, this change will eliminate device to host sync for the iter buffer during each iteration, which achieves better performance and avoid pottential bottleneck due to the sync point this change covers all frontend usage of remap util which covers both ITEP & CEL Reviewed By: dstaay-fb, sryap Differential Revision: D68466136
1 parent 1afbf08 commit bb255dd

File tree

2 files changed

+4
-2
lines changed

2 files changed

+4
-2
lines changed

torchrec/modules/itep_embedding_modules.py

+1
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ def forward(
7575

7676
features = self._itep_module(features, self._iter.item())
7777
pooled_embeddings = self._embedding_bag_collection(features)
78+
7879
self._iter += 1
7980

8081
return pooled_embeddings

torchrec/modules/itep_modules.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -464,13 +464,13 @@ def forward(
464464
feature_offsets,
465465
) = self.get_remap_info(sparse_features)
466466

467-
update_utils: bool = (
467+
update_util: bool = (
468468
(cur_iter < 10)
469469
or (cur_iter < 100 and (cur_iter + 1) % 19 == 0)
470470
or ((cur_iter + 1) % 39 == 0)
471471
)
472472
full_values_list = None
473-
if update_utils and sparse_features.variable_stride_per_key():
473+
if update_util and sparse_features.variable_stride_per_key():
474474
if sparse_features.inverse_indices_or_none() is not None:
475475
# full util update mode require reconstructing original input indicies from VBE input
476476
full_values_list = self.get_full_values_list(sparse_features)
@@ -490,6 +490,7 @@ def forward(
490490
self.row_util,
491491
self.buffer_offsets,
492492
full_values_list=full_values_list,
493+
update_util=update_util,
493494
)
494495

495496
sparse_features._values = remapped_values

0 commit comments

Comments
 (0)