Skip to content

Commit f300df9

Browse files
authored
Merge pull request #3378 from JanVogelsang/master-timers
Add the option to measure separate timers per thread
2 parents d0cd6bf + 6050539 commit f300df9

30 files changed

+972
-535
lines changed

Diff for: CMakeLists.txt

+4
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ set( with-models OFF CACHE STRING "The models to include as a semicolon-separate
6868
set( tics_per_ms "1000.0" CACHE STRING "Specify elementary unit of time [default=1000 tics per ms]." )
6969
set( tics_per_step "100" CACHE STRING "Specify resolution [default=100 tics per step]." )
7070
set( with-detailed-timers OFF CACHE STRING "Build with detailed internal time measurements [default=OFF]. Detailed timers can affect the performance." )
71+
set( with-mpi-sync-timer OFF CACHE STRING "Build with mpi synchronization barrier and timer [default=OFF]. Can affect the performance." )
72+
set( with-threaded-timers ON CACHE STRING "Build with one internal timer per thread [default=ON]. Multi-threaded timers can affect the performance." )
7173
set( target-bits-split "standard" CACHE STRING "Split of the 64-bit target neuron identifier type [default='standard']. 'standard' is recommended for most users. If running on more than 262144 MPI processes or more than 512 threads, change to 'hpc'." )
7274

7375
# generic build configuration
@@ -143,6 +145,8 @@ nest_process_with_gsl()
143145
nest_process_with_openmp()
144146
nest_process_with_mpi()
145147
nest_process_with_detailed_timers()
148+
nest_process_with_threaded_timers()
149+
nest_process_with_mpi_sync_timer()
146150
nest_process_with_libneurosim()
147151
nest_process_with_music()
148152
nest_process_with_sionlib()

Diff for: cmake/ConfigureSummary.cmake

+14
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,20 @@ function( NEST_PRINT_CONFIG_SUMMARY )
150150
message( "Detailed timers : No" )
151151
endif ()
152152

153+
message( "" )
154+
if ( THREADED_TIMERS )
155+
message( "Threaded timers : Yes" )
156+
else ()
157+
message( "Threaded timers : No" )
158+
endif ()
159+
160+
message( "" )
161+
if ( MPI_SYNC_TIMER )
162+
message( "MPI sync timer : Yes" )
163+
else ()
164+
message( "MPI sync timer : No" )
165+
endif ()
166+
153167
message( "" )
154168
if ( HAVE_MUSIC )
155169
message( "Use MUSIC : Yes (MUSIC ${MUSIC_VERSION})" )

Diff for: cmake/ProcessOptions.cmake

+14
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,20 @@ function( NEST_PROCESS_WITH_DETAILED_TIMERS )
462462
endif ()
463463
endfunction()
464464

465+
function( NEST_PROCESS_WITH_THREADED_TIMERS )
466+
set( THREADED_TIMERS OFF PARENT_SCOPE )
467+
if ( ${with-threaded-timers} STREQUAL "ON" )
468+
set( THREADED_TIMERS ON PARENT_SCOPE )
469+
endif ()
470+
endfunction()
471+
472+
function( NEST_PROCESS_WITH_MPI_SYNC_TIMER )
473+
set( MPI_SYNC_TIMER OFF PARENT_SCOPE )
474+
if ( ${with-mpi-sync-timer} STREQUAL "ON" )
475+
set( MPI_SYNC_TIMER ON PARENT_SCOPE )
476+
endif ()
477+
endfunction()
478+
465479
function( NEST_PROCESS_WITH_LIBNEUROSIM )
466480
# Find libneurosim
467481
set( HAVE_LIBNEUROSIM OFF PARENT_SCOPE )

Diff for: doc/htmldoc/developer_space/guidelines/coding_guidelines_cpp.rst

+4-4
Original file line numberDiff line numberDiff line change
@@ -556,7 +556,7 @@ For example, the ``stopwatch.h`` file could look like:
556556
}
557557
558558
inline nest::Stopwatch::timestamp_t
559-
nest::Stopwatch::elapsed_timestamp() const
559+
nest::Stopwatch::elapsed_us() const
560560
{
561561
#ifndef DISABLE_TIMING
562562
if ( isRunning() )
@@ -622,7 +622,7 @@ For example, the ``stopwatch.h`` file could look like:
622622
}
623623
624624
inline nest::Stopwatch::timestamp_t
625-
nest::Stopwatch::get_timestamp()
625+
nest::Stopwatch::get_current_time()
626626
{
627627
// works with:
628628
// * hambach (Linux 2.6.32 x86_64)
@@ -637,12 +637,12 @@ For example, the ``stopwatch.h`` file could look like:
637637
} /* namespace timer */
638638
#endif /* STOPWATCH_H */
639639
640-
And the corresponding ``stopwatch.cpp``:
640+
And the corresponding ``stopwatch_impl.h``:
641641

642642
.. code:: cpp
643643
644644
/*
645-
* stopwatch.cpp
645+
* stopwatch_impl.h
646646
*
647647
* This file is part of NEST.
648648
*

Diff for: doc/htmldoc/installation/cmake_options.rst

+8-2
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ For more details, see the :ref:`Python binding <compile_with_python>` section be
102102
.. _performance_cmake:
103103

104104
Maximize performance, reduce energy consumption
105-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
105+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
106106

107107
The following options help to optimize NEST for maximal performance and thus reduced energy consumption.
108108

@@ -126,7 +126,7 @@ The following options help to optimize NEST for maximal performance and thus red
126126
in place.
127127
* Using ``-march=native`` requires that you build NEST on the same CPU architecture as you will use to run it.
128128
* For the technically minded: Even just using ``-O3`` removes some ``assert()`` statements from NEST since we
129-
have wrapped some of them in functions, which get eliminated due to interprocedural optimization.
129+
have wrapped some of them in functions, which get eliminated due to interprocedural optimization.
130130

131131

132132

@@ -197,8 +197,14 @@ NEST properties
197197
+-----------------------------------------------+----------------------------------------------------------------+
198198
| ``-Dtics_per_step=[number]`` | Specify resolution [default=100 tics per step]. |
199199
+-----------------------------------------------+----------------------------------------------------------------+
200+
| ``-Dwith-threaded-timers=[OFF|ON]`` | Build with one internal timer per thread [default=ON]. |
201+
| | Multi-threaded timers can affect the performance. |
202+
+-----------------------------------------------+----------------------------------------------------------------+
200203
| ``-Dwith-detailed-timers=[OFF|ON]`` | Build with detailed internal time measurements [default=OFF]. |
201204
| | Detailed timers can affect the performance. |
205+
+----------------------------------------------------------------------------------------------------------------+
206+
| ``-Dwith-mpi-sync-timer=[OFF|ON]`` | Build with mpi synchronization barrier and timer [default=OFF].|
207+
| | Can affect the performance. |
202208
+-----------------------------------------------+----------------------------------------------------------------+
203209
| ``-Dtarget-bits-split=['standard'|'hpc']`` | Split of the 64-bit target neuron identifier type |
204210
| | [default='standard']. 'standard' is recommended for most users.|

Diff for: doc/htmldoc/nest_behavior/built-in_timers.rst

+51-31
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,9 @@ Built-in timers
66
Basic timers
77
------------
88

9-
Basic built-in timers keep track of the time NEST spent for network
10-
construction and actual simulation (propagation of the network
11-
state). These timers are active in all simulations with NEST, and the
12-
measured times can be checked by querying the corresponding kernel
13-
attributes. For example:
9+
Basic built-in timers keep track of the time NEST spent for network construction and actual simulation (propagation of
10+
the network state). These timers are active in all simulations with NEST, and the measured times can be checked by
11+
querying the corresponding kernel attributes. For example:
1412

1513
::
1614

@@ -22,7 +20,7 @@ The following basic time measurements are available:
2220
|Name |Explanation |
2321
+=============================+==================================+
2422
|``time_construction_create`` |Cumulative time NEST spent |
25-
| |creating neurons and devices |
23+
| |creating neurons and devices |
2624
+-----------------------------+----------------------------------+
2725
|``time_construction_connect``|Cumulative time NEST spent |
2826
| |creating connections |
@@ -33,19 +31,14 @@ The following basic time measurements are available:
3331

3432
.. note ::
3533
36-
While preparing the actual simulation after network construction,
37-
NEST needs to build the pre-synaptic part of the connection
38-
infrastructure, which requires MPI communication (`Jordan et
39-
al. 2018 <https://doi.org/10.3389/fninf.2018.00002>`__). This
40-
happens only for the first call to ``Simulate()`` unless
41-
connectivity changed in the meantime, and it may cause significant
42-
overhead by adding to ``time_simulate``. Therefore, the cumulative
43-
time NEST spent for building the pre-synaptic connection
44-
infrastructure is also tracked by a basic timer and available as
45-
kernel attribute ``time_communicate_prepare``.
34+
While preparing the actual simulation after network construction, NEST needs to build the pre-synaptic part of the
35+
connection infrastructure, which requires MPI communication (`Jordan et al. 2018
36+
<https://doi.org/10.3389/fninf.2018.00002>`__). This happens only for the first call to ``Simulate()`` unless
37+
connectivity changed in the meantime, and it may cause significant overhead by adding to ``time_simulate``.
38+
Therefore, the cumulative time NEST spent for building the pre-synaptic connection infrastructure is also tracked by
39+
a basic timer and available as kernel attribute ``time_communicate_prepare``.
4640
47-
In the context of NEST performance monitoring, other useful kernel
48-
attributes are:
41+
In the context of NEST performance monitoring, other useful kernel attributes are:
4942

5043
+-----------------------+----------------------------------+
5144
|Name |Explanation |
@@ -54,39 +47,34 @@ attributes are:
5447
+-----------------------+----------------------------------+
5548
|``local_spike_counter``|Number of spikes emitted by the |
5649
| |neurons represented on this MPI |
57-
| |rank during the last |
50+
| |rank during the last |
5851
| |``Simulate()`` |
5952
+-----------------------+----------------------------------+
6053

6154
.. note ::
6255
63-
``nest.ResetKernel()`` resets all time measurements as well as
64-
``biological_time`` and ``local_spike_counter``.
56+
``nest.ResetKernel()`` resets all time measurements as well as ``biological_time`` and ``local_spike_counter``.
6557
6658
6759
Detailed timers
6860
---------------
6961

70-
Detailed built-in timers can be activated (and again deactivated)
71-
prior to compilation through the CMake flag
72-
``-Dwith-detailed-timers=ON``. They provide further insights into the
73-
time NEST spends in different phases of the simulation cycle, but they
74-
can impact the runtime. Therefore, detailed timers are by default
75-
inactive.
62+
Detailed built-in timers can be activated (and again deactivated) prior to compilation through the CMake flag
63+
``-Dwith-detailed-timers=ON``. They provide further insights into the time NEST spends in different phases of the
64+
simulation cycle, but they can impact the runtime. Therefore, detailed timers are by default inactive.
7665

77-
If detailed timers are active, the following time measurements are
78-
available as kernel attributes:
66+
If detailed timers are active, the following time measurements are available as kernel attributes:
7967

8068
+--------------------------------+----------------------------------+----------------------------------+
8169
|Name |Explanation |Part of |
8270
+================================+==================================+==================================+
8371
|``time_gather_target_data`` |Cumulative time for communicating |``time_communicate_prepare`` |
8472
| |connection information from | |
85-
| |postsynaptic to presynaptic side | |
73+
| |postsynaptic to presynaptic side | |
8674
+--------------------------------+----------------------------------+----------------------------------+
8775
|``time_communicate_target_data``|Cumulative time for core MPI |``time_gather_target_data`` |
8876
| |communication when gathering | |
89-
| |target data | |
77+
| |target data | |
9078
+--------------------------------+----------------------------------+----------------------------------+
9179
|``time_update`` |Time for neuron update |``time_simulate`` |
9280
+--------------------------------+----------------------------------+----------------------------------+
@@ -107,3 +95,35 @@ available as kernel attributes:
10795
| |buffers of the corresponding | |
10896
| |postsynaptic neurons | |
10997
+--------------------------------+----------------------------------+----------------------------------+
98+
|``time_omp_synchronization_construction`` |Synchronization time of threads during network construction. |``time_construction_create``, ``time_construction_connect``, ``time_communicate_prepare`` |
99+
+--------------------------------+----------------------------------+----------------------------------+
100+
|``time_omp_synchronization_simulation`` |Synchronization time of threads during simulation. |``time_simulate`` |
101+
+--------------------------------+----------------------------------+----------------------------------+
102+
103+
MPI synchronization timer
104+
-------------------------
105+
In order to measure synchronization time between multiple MPI processes, an additional timer can be activated on demand
106+
via the ``-Dwith-mpi-sync-timer=ON`` CMake flag. This timer measures the time between the end of a process' update phase
107+
(i.e., neuron state propagation) and start of collective communication of spikes between all MPI processes. This timer
108+
adds an additional MPI barrier right before the start of communication, which might affect performance.
109+
110+
+-----------------------------+---------------------------------------+
111+
|Name |Explanation |
112+
+=============================+=======================================+
113+
|``time_mpi_synchronization`` |Time spent waiting for other processes.|
114+
+-----------------------------+---------------------------------------+
115+
116+
Multi-threaded timers
117+
---------------------
118+
In previous NEST versions, only the master thread measured timers. Since NEST 3.9, timers which measure time spent exclusively in multi-threaded environments are recorded by each thread individually.
119+
120+
The legacy timer behavior can be restored via the ``-Dwith-threaded-timers=OFF`` CMake flag.
121+
122+
Wall-time vs. CPU-time
123+
-------------------------
124+
All timers in NEST measure the actual wall-time spent between starting and stopping the timer. In order to only measure
125+
time spent on calculations, there is an additional variant for each of the timers above, suffixed with ``_cpu``. They
126+
can be accessed in the exact same way. For example:
127+
::
128+
129+
nest.time_simulate_cpu

Diff for: libnestutil/CMakeLists.txt

-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ set( nestutil_sources
3131
numerics.h numerics.cpp
3232
regula_falsi.h
3333
sort.h
34-
stopwatch.h stopwatch.cpp
3534
string_utils.h
3635
vector_util.h
3736
)

Diff for: libnestutil/config.h.in

+6
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,12 @@
182182
/* Whether to enable detailed NEST internal timers */
183183
#cmakedefine TIMER_DETAILED 1
184184

185+
/* Whether to use one NEST internal timer per thread */
186+
#cmakedefine THREADED_TIMERS 1
187+
188+
/* Whether to use the mpi synchronization timer (including an additional barrier) */
189+
#cmakedefine MPI_SYNC_TIMER 1
190+
185191
/* Whether to do full logging */
186192
#cmakedefine ENABLE_FULL_LOGGING 1
187193

Diff for: libnestutil/stopwatch.cpp

-33
This file was deleted.

0 commit comments

Comments
 (0)