Skip to content

Commit 38c1851

Browse files
kimishpatelfacebook-github-bot
authored andcommitted
[Pytorch Edge] Enable kineto profiler on mobile via EdgeKinetoProfiler (pytorch#62419)
Summary: Pull Request resolved: pytorch#62419 This diff adds support for cpu only kineto profiler on mobile. Thus enabling chrome trace generation on mobile. This bring cpp API for mobile profiling on part with Torchscript. This is done via: 1. Utilizating debug handle annotations in KinetoEvent. 2. Adding post processing capability, via callbacks, to KinetoThreadLocalState 3. Creating new RAII stype profiler, KinetoEdgeCPUProfiler, which can be used in surrounding scope of model execution. This will write chrome trace to the location specified in profiler constructor. Test Plan: MobileProfiler.ModuleHierarchy Imported from OSS Reviewed By: raziel Differential Revision: D29993660 fbshipit-source-id: 0b44f52f9e9c5f5aff81ebbd9273c254c3c03299
1 parent 77a6436 commit 38c1851

24 files changed

+406
-72
lines changed

.jenkins/pytorch/build.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
130130
if [[ "${BUILD_ENVIRONMENT}" == *vulkan* ]]; then
131131
build_args+=("-DUSE_VULKAN=ON")
132132
fi
133+
build_args+=("-DUSE_LITE_INTERPRETER_PROFILER=OFF")
133134
exec ./scripts/build_android.sh "${build_args[@]}" "$@"
134135
fi
135136

CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ if(NOT DEFINED USE_VULKAN)
266266
endif()
267267

268268
option(USE_SOURCE_DEBUG_ON_MOBILE "Enable " ON)
269+
option(USE_LITE_INTERPRETER_PROFILER "Enable " ON)
269270
option(USE_VULKAN_FP16_INFERENCE "Vulkan - Use fp16 inference" OFF)
270271
option(USE_VULKAN_RELAXED_PRECISION "Vulkan - Use relaxed precision math in the kernels (mediump)" OFF)
271272
option(USE_VULKAN_SHADERC_RUNTIME "Vulkan - Use runtime shader compilation as opposed to build-time (needs libshaderc)" OFF)
@@ -687,6 +688,10 @@ if(USE_SOURCE_DEBUG_ON_MOBILE)
687688
string(APPEND CMAKE_CXX_FLAGS " -DSYMBOLICATE_MOBILE_DEBUG_HANDLE")
688689
endif()
689690

691+
if(USE_LITE_INTERPRETER_PROFILER)
692+
string(APPEND CMAKE_CXX_FLAGS " -DEDGE_PROFILER_USE_KINETO")
693+
endif()
694+
690695
# ---[ Allowlist file if allowlist is specified
691696
include(cmake/Allowlist.cmake)
692697

android/common.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ check_gradle() {
2929
}
3030

3131
parse_abis_list() {
32-
ABIS_LIST="armeabi-v7a,arm64-v8a,x86,x86_64"
32+
ABIS_LIST="x86"
3333
CUSTOM_ABIS_LIST=false
3434
if [ $# -gt 0 ]; then
3535
ABIS_LIST=$1
@@ -59,7 +59,8 @@ build_android() {
5959
ANDROID_ABI="$abi" \
6060
BUILD_ROOT="$ANDROID_BUILD_ROOT" \
6161
"$PYTORCH_DIR/scripts/build_android.sh" \
62-
-DANDROID_CCACHE="$(which ccache)"
62+
-DANDROID_CCACHE="$(which ccache)" \
63+
-DUSE_LITE_INTERPRETER_PROFILER="OFF"
6364

6465
echo "$abi build output lib,include at $ANDROID_BUILD_ROOT/install"
6566
ln -s "$ANDROID_BUILD_ROOT/install/lib" "$LIB_DIR/$abi"

android/pytorch_android/build.gradle

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ android {
1818
externalNativeBuild {
1919
cmake {
2020
if(System.env.BUILD_LITE_INTERPRETER == '0') {
21-
arguments "-DANDROID_STL=c++_shared", "-DBUILD_LITE_INTERPRETER=OFF"
21+
arguments "-DANDROID_STL=c++_shared", "-DBUILD_LITE_INTERPRETER=OFF", "-DUSE_LITE_INTERPRETER_PROFILER=OFF"
2222
} else {
23-
arguments "-DANDROID_STL=c++_shared"
23+
arguments "-DANDROID_STL=c++_shared", "-DUSE_LITE_INTERPRETER_PROFILER=OFF"
2424
}
2525
}
2626
}

aten/src/ATen/record_function.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ enum class C10_API_ENUM RecordScope : uint8_t {
2727
TORCHSCRIPT_FUNCTION,
2828
// Kernel Function dtype Tag
2929
KERNEL_FUNCTION_DTYPE,
30+
// Kernel Function dtype Tag
31+
LITE_INTERPRETER,
3032
// User defined scope (e.g. with record_function())
3133
USER_SCOPE,
3234
NUM_SCOPES, // must be the last in the list
@@ -502,11 +504,11 @@ class TORCH_API RecordFunctionCallback {
502504
} \
503505
}
504506

505-
// Helper macros to record user_scope events with debug handles
506-
#define RECORD_USER_SCOPE_WITH_DEBUG_HANDLE_AND_INPUTS( \
507-
fn, debug_handle, inputs) \
508-
RECORD_WITH_SCOPE_DEBUG_HANDLE_AND_INPUTS( \
509-
at::RecordScope::USER_SCOPE, fn, debug_handle, inputs)
507+
// Helper macros to record LITE INTERPETER scope events with debug handles
508+
#define RECORD_EDGE_SCOPE_WITH_DEBUG_HANDLE_AND_INPUTS( \
509+
fn, debug_handle, inputs) \
510+
RECORD_WITH_SCOPE_DEBUG_HANDLE_AND_INPUTS( \
511+
at::RecordScope::LITE_INTERPRETER, fn, debug_handle, inputs)
510512

511513
// Notes:
512514
// - two types of callbacks are provided: thread local and global

caffe2/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,10 +485,17 @@ if(NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
485485
endif()
486486
endif()
487487

488+
list(APPEND LITE_PROFILER_SRCS "")
489+
if(USE_LITE_INTERPRETER_PROFILER)
490+
append_filelist("libtorch_edge_profiler_sources " LITE_PROFILER_SRCS)
491+
endif()
492+
488493
# Switch between the full jit interpreter and lite interpreter
489494
if(BUILD_LITE_INTERPRETER)
490495
append_filelist("libtorch_lite_cmake_sources" LIBTORCH_CMAKE_SRCS)
491496
list(APPEND LIBTORCH_CMAKE_SRCS ${LITE_EAGER_SYMOBLICATION_SRCS})
497+
list(APPEND LIBTORCH_CMAKE_SRCS ${LITE_PROFILER_SRCS})
498+
set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
492499
else()
493500
append_filelist("libtorch_cmake_sources" LIBTORCH_CMAKE_SRCS)
494501

cmake/Dependencies.cmake

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1568,6 +1568,11 @@ endif()
15681568
# --[ ATen checks
15691569
set(USE_LAPACK 0)
15701570

1571+
# we need to build all targets to be linked with PIC
1572+
if(USE_KINETO AND INTERN_BUILD_MOBILE AND USE_LITE_INTERPRETER_PROFILER)
1573+
set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
1574+
endif()
1575+
15711576
if(NOT INTERN_BUILD_MOBILE)
15721577
set(TORCH_CUDA_ARCH_LIST $ENV{TORCH_CUDA_ARCH_LIST})
15731578
set(TORCH_NVCC_FLAGS $ENV{TORCH_NVCC_FLAGS})
@@ -1876,11 +1881,17 @@ list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
18761881
set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE)
18771882

18781883
# ---[ Kineto
1879-
if(USE_KINETO AND INTERN_BUILD_MOBILE)
1884+
# edge profiler depends on KinetoProfiler but it only does cpu
1885+
# profiling. Thus we dont need USE_CUDA/USE_ROCM
1886+
if(USE_KINETO AND INTERN_BUILD_MOBILE AND NOT (BUILD_LITE_INTERPRETER AND USE_LITE_INTERPRETER_PROFILER))
18801887
message(STATUS "Not using libkineto in a mobile build.")
18811888
set(USE_KINETO OFF)
18821889
endif()
18831890

1891+
if(USE_KINETO AND INTERN_BUILD_MOBILE AND USE_LITE_INTERPRETER_PROFILER AND (USE_CUDA OR USE_ROCM))
1892+
message(FATAL_ERROR "Mobile build with profiler does not support CUDA or ROCM")
1893+
endif()
1894+
18841895
if(USE_KINETO)
18851896
if((NOT USE_CUDA) OR MSVC)
18861897
set(LIBKINETO_NOCUPTI ON CACHE STRING "" FORCE)
@@ -1956,6 +1967,7 @@ if(USE_KINETO)
19561967

19571968
if(NOT TARGET kineto)
19581969
add_subdirectory("${KINETO_SOURCE_DIR}")
1970+
set_property(TARGET kineto PROPERTY POSITION_INDEPENDENT_CODE ON)
19591971
endif()
19601972
list(APPEND Caffe2_DEPENDENCY_LIBS kineto)
19611973
string(APPEND CMAKE_CXX_FLAGS " -DUSE_KINETO")

scripts/build_ios.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ if [ "${BUILD_LITE_INTERPRETER}" == 0 ]; then
8383
else
8484
CMAKE_ARGS+=("-DBUILD_LITE_INTERPRETER=ON")
8585
fi
86+
CMAKE_ARGS+=("-DUSE_LITE_INTERPRETER_PROFILER=OFF")
8687

8788
# Don't build binaries or tests (only the library)
8889
CMAKE_ARGS+=("-DBUILD_TEST=OFF")

test/cpp/jit/test_backend.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -338,16 +338,16 @@ TEST(BackendTestDebugInfo, TestCompiler) {
338338
lm._save_for_mobile(ss, ExtraFilesMap(), true);
339339
auto mlm = _load_for_mobile(ss);
340340
std::string error_pattern = R"(
341-
Module hierarchy:top(m).aten::add
341+
Module hierarchy:top(m)::<unknown>.aten::add
342342
Traceback of TorchScript (most recent call last):
343-
File "<string>", line 5, in FunctionName_UNKNOWN
343+
File "<string>", line 5, in <unknown>
344344
typed_inputs: List[Any] = [x, h, ]
345345
if self.__backend.is_available() :
346346
_0, = self.__backend.execute(self.__handles["forward"], typed_inputs)
347347
~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
348348
assert isinstance(_0, Tensor)
349349
return _0
350-
File "<string>", line 3, in FunctionName_UNKNOWN
350+
File "<string>", line 3, in <unknown>
351351
352352
def forward(self, x, h):
353353
return x + h
@@ -392,16 +392,16 @@ TEST(BackendTestDebugInfo, TestExceptionStackForCompilerWithModuleHierarchy) {
392392
lm._save_for_mobile(ss, ExtraFilesMap(), true);
393393
auto mlm = _load_for_mobile(ss);
394394
std::string error_pattern = R"(
395-
Module hierarchy:top(C).A0(A).aten::add
395+
Module hierarchy:top(C)::<unknown>.A0(A)::forward.aten::add
396396
Traceback of TorchScript (most recent call last):
397-
File "<string>", line 5, in FunctionName_UNKNOWN
397+
File "<string>", line 5, in <unknown>
398398
typed_inputs: List[Any] = [x, y, ]
399399
if self.__backend.is_available() :
400400
_0, = self.__backend.execute(self.__handles["forward"], typed_inputs)
401401
~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
402402
assert isinstance(_0, Tensor)
403403
return _0
404-
File "<string>", line 3, in FunctionName_UNKNOWN
404+
File "<string>", line 3, in <unknown>
405405
406406
def forward(self, x, y):
407407
return self.A0.forward(x, y) + self.B0.forward(x)
@@ -485,16 +485,16 @@ TEST(
485485
*
486486
*/
487487
std::string error_pattern = R"(
488-
Module hierarchy:top(C).B0(B).A0(A).aten::add
488+
Module hierarchy:top(C)::<unknown>.B0(B)::forward.A0(A)::forward.aten::add
489489
Traceback of TorchScript (most recent call last):
490-
File "<string>", line 5, in FunctionName_UNKNOWN
490+
File "<string>", line 5, in <unknown>
491491
typed_inputs: List[Any] = [x, y, ]
492492
if self.__backend.is_available() :
493493
_0, = self.__backend.execute(self.__handles["forward"], typed_inputs)
494494
~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
495495
assert isinstance(_0, Tensor)
496496
return _0
497-
File "<string>", line 3, in FunctionName_UNKNOWN
497+
File "<string>", line 3, in <unknown>
498498
499499
def forward(self, x, y):
500500
return self.B0.forward(x, y) + 3
@@ -572,9 +572,9 @@ TEST(BackendTestDebugInfo, TestExceptionStackForCompilerWithLoweredSubModule) {
572572
c._save_for_mobile(ss, ExtraFilesMap(), true);
573573
auto c_loaded = _load_for_mobile(ss);
574574
std::string error_pattern = R"(
575-
Module hierarchy:top(C).A0(A).aten::add
575+
Module hierarchy:top(C)::<unknown>.A0(A)::forward.aten::add
576576
Traceback of TorchScript (most recent call last):
577-
File "<string>", line 3, in FunctionName_UNKNOWN
577+
File "<string>", line 3, in <unknown>
578578
579579
def forward(self, x, y):
580580
return self.A0.forward(x, y) + self.B0.forward(x)
@@ -587,7 +587,7 @@ Traceback of TorchScript (most recent call last):
587587
~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
588588
assert isinstance(_0, Tensor)
589589
return _0
590-
File "<string>", line 3, in FunctionName_UNKNOWN
590+
File "<string>", line 3, in <unknown>
591591
592592
def forward(self, x, y):
593593
return x + y
@@ -693,9 +693,9 @@ TEST(
693693
*
694694
* */
695695
std::string error_pattern = R"(
696-
Module hierarchy:top(C).A0(A).AA0(AA).aten::add
696+
Module hierarchy:top(C)::<unknown>.A0(A)::forward.AA0(AA)::forward.aten::add
697697
Traceback of TorchScript (most recent call last):
698-
File "<string>", line 3, in FunctionName_UNKNOWN
698+
File "<string>", line 3, in <unknown>
699699
700700
def forward(self, x, y):
701701
return self.A0.forward(x, y) + self.B0.forward(x)
@@ -708,7 +708,7 @@ Traceback of TorchScript (most recent call last):
708708
~~~~~~~~~~~~~~~~~~~~~~ <--- HERE
709709
assert isinstance(_0, Tensor)
710710
return _0
711-
File "<string>", line 3, in FunctionName_UNKNOWN
711+
File "<string>", line 3, in <unknown>
712712
713713
def forward(self, x, y):
714714
return self.AA0.forward(x, y) + 3

test/cpp/jit/test_lite_interpreter.cpp

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ TEST(LiteInterpreterTest, ModuleInfoBasic) {
482482
}
483483
}
484484

485-
AT_ASSERT(module_debug_info_set.count("top(M).aten::mul"));
485+
AT_ASSERT(module_debug_info_set.count("top(M)::<unknown>.aten::mul"));
486486
}
487487

488488
TEST(LiteInterpreterTest, NotSaveModuleInfo) {
@@ -542,9 +542,11 @@ TEST(LiteInterpreterTest, OneSubmoduleModuleInfo) {
542542
}
543543
}
544544

545-
AT_ASSERT(module_debug_info_set.count("top(B).aten::add"));
546-
AT_ASSERT(module_debug_info_set.count("top(B).A0(A).aten::add"));
547-
AT_ASSERT(module_debug_info_set.count("top(B).A0(A).aten::mul"));
545+
AT_ASSERT(module_debug_info_set.count("top(B)::<unknown>.aten::add"));
546+
AT_ASSERT(module_debug_info_set.count(
547+
"top(B)::<unknown>.A0(A)::forward.aten::add"));
548+
AT_ASSERT(module_debug_info_set.count(
549+
"top(B)::<unknown>.A0(A)::forward.aten::mul"));
548550
}
549551

550552
TEST(LiteInterpreterTest, TwoSubmodulesModuleInfo) {
@@ -585,9 +587,11 @@ TEST(LiteInterpreterTest, TwoSubmodulesModuleInfo) {
585587
}
586588
}
587589

588-
AT_ASSERT(module_debug_info_set.count("top(C).aten::add"));
589-
AT_ASSERT(module_debug_info_set.count("top(C).A0(A).aten::add"));
590-
AT_ASSERT(module_debug_info_set.count("top(C).B0(B).aten::add"));
590+
AT_ASSERT(module_debug_info_set.count("top(C)::<unknown>.aten::add"));
591+
AT_ASSERT(module_debug_info_set.count(
592+
"top(C)::<unknown>.A0(A)::forward.aten::add"));
593+
AT_ASSERT(module_debug_info_set.count(
594+
"top(C)::<unknown>.B0(B)::forward.aten::add"));
591595
}
592596

593597
TEST(LiteInterpreterTest, GetRuntimeByteCodeVersion) {
@@ -854,9 +858,11 @@ TEST(LiteInterpreterTest, SequentialModuleInfo) {
854858
// def forward(self, x):
855859
// return self.A0.forward(self.B0.forward(x))
856860

857-
AT_ASSERT(module_debug_info_set.count("top(C).prim::Return"));
858-
AT_ASSERT(module_debug_info_set.count("top(C).A0(A).aten::add"));
859-
AT_ASSERT(module_debug_info_set.count("top(C).B0(B).aten::add"));
861+
AT_ASSERT(module_debug_info_set.count("top(C)::<unknown>.prim::Return"));
862+
AT_ASSERT(module_debug_info_set.count(
863+
"top(C)::<unknown>.A0(A)::forward.aten::add"));
864+
AT_ASSERT(module_debug_info_set.count(
865+
"top(C)::<unknown>.B0(B)::forward.aten::add"));
860866
}
861867

862868
TEST(LiteInterpreterTest, HierarchyModuleInfo) {
@@ -901,9 +907,11 @@ TEST(LiteInterpreterTest, HierarchyModuleInfo) {
901907
// "top(C).forward": for the add operator in top.
902908
// "top(C).B0(B).forward": for the add operator in B0.
903909
// "top(C).B0(B).forward.A0(A).forward": for the add operator in A0.
904-
AT_ASSERT(module_debug_info_set.count("top(C).aten::add"));
905-
AT_ASSERT(module_debug_info_set.count("top(C).B0(B).aten::add"));
906-
AT_ASSERT(module_debug_info_set.count("top(C).B0(B).A0(A).aten::add"));
910+
AT_ASSERT(module_debug_info_set.count("top(C)::<unknown>.aten::add"));
911+
AT_ASSERT(module_debug_info_set.count(
912+
"top(C)::<unknown>.B0(B)::forward.aten::add"));
913+
AT_ASSERT(module_debug_info_set.count(
914+
"top(C)::<unknown>.B0(B)::forward.A0(A)::forward.aten::add"));
907915
}
908916

909917
TEST(LiteInterpreterTest, DuplicatedClassTypeModuleInfo) {
@@ -960,9 +968,11 @@ TEST(LiteInterpreterTest, DuplicatedClassTypeModuleInfo) {
960968
// "top(B).A0(A).forward": for the add operator in A0.
961969
// "top(B).A1(A).forward": for the add operator in A1.
962970

963-
AT_ASSERT(module_debug_info_set.count("top(B).aten::add"));
964-
AT_ASSERT(module_debug_info_set.count("top(B).A0(A).aten::add"));
965-
AT_ASSERT(module_debug_info_set.count("top(B).A1(A).aten::add"));
971+
AT_ASSERT(module_debug_info_set.count("top(B)::<unknown>.aten::add"));
972+
AT_ASSERT(module_debug_info_set.count(
973+
"top(B)::<unknown>.A0(A)::forward.aten::add"));
974+
AT_ASSERT(module_debug_info_set.count(
975+
"top(B)::<unknown>.A1(A)::forward.aten::add"));
966976
}
967977
#endif // !defined(FB_XPLAT_BUILD)
968978

@@ -1371,9 +1381,9 @@ TEST(LiteInterpreterTest, TestExceptionStackWithTwoLevelModuleHierarchy) {
13711381
c._save_for_mobile(ss, ExtraFilesMap(), true);
13721382
auto lite_m = _load_for_mobile(ss);
13731383
std::string error_pattern = R"(
1374-
Module hierarchy:top(C).B0(B).A0(A).aten::add
1384+
Module hierarchy:top(C)::<unknown>.B0(B)::foo.A0(A)::bar.aten::add
13751385
Traceback of TorchScript (most recent call last):
1376-
File "<string>", line 3, in FunctionName_UNKNOWN
1386+
File "<string>", line 3, in <unknown>
13771387
13781388
def forward(self, x, y):
13791389
return self.B0.foo(x, y) + 3

0 commit comments

Comments
 (0)