Skip to content

Commit 54d2329

Browse files
mo-mgloverTeranIvy
andauthored
Re-jig get_walltime function; swap to unordered_map; sort entries in … (#32)
* Re-jig get_walltime function; swap to unordered_map; sort entries in output. * Manual timing callipers just inside profiler callipers. * Minor tweaks to comments: afterwards -> before. * Change to get_thread0_walltime; find t2 before final prof calliper; other cosmetic changes. * 32 - First set of changes in response to review. * 32 - Further change in response to review. (Comment block tweaks.) * PR #32: Update changelog for merge to main Co-authored-by: TeranIvy <[email protected]>
1 parent f28ca71 commit 54d2329

File tree

9 files changed

+84
-63
lines changed

9 files changed

+84
-63
lines changed

CHANGELOG.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
20/12/2021 PR #19 for #12: Use standard CMake project version variables for versioning. \
1515
21/12/2021 PR #18: Add GitHub runner for building and deploying Doxygen documentation. \
1616
21/12/2021 PR #22 for #13: Move install functionality in main CMakeLists.txt to cmake/Installation.cmake \
17-
21/01/2022 PR #23: Fix version number passed to Doxygen \
18-
15/03/2022 PR #5: Initial code import \
19-
08/07/2022 PR #27: Working Fortran (and C) interfaces \
20-
15/07/2022 PR #29: Unit testing for fortran interface
21-
17+
21/01/2022 PR #23: Fix version number passed to Doxygen. \
18+
15/03/2022 PR #5 towards #2: Initial code import. \
19+
08/07/2022 PR #27: Working Fortran (and C) interfaces. \
20+
15/07/2022 PR #30 for #29: Unit testing for Fortran interface. \
21+
04/08/2022 PR #32 for #31: Functionality improvements (walltime, swap to unordered_map, sort entries in output.

src/c++/hashtable.cpp

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <cassert>
1111
#include <iostream>
1212
#include <string>
13+
#include <algorithm>
1314

1415
/**
1516
* @brief Constructs a new entry in the hash table.
@@ -83,7 +84,7 @@ void HashTable::add_child_time(size_t hash, double time_delta)
8384
}
8485

8586
/**
86-
* @brief Writes all entries in the hashtable.
87+
* @brief Writes all entries in the hashtable, sorted according to self times.
8788
*
8889
*/
8990

@@ -107,9 +108,17 @@ void HashTable::write()
107108
<< std::setw(15) << "-" << " "
108109
<< std::setw(15) << "-" << "\n";
109110
std::cout << std::setfill(' ');
111+
112+
// Create a vector from the hashtable and sort the entries according to self
113+
// walltime. If optimisation of this is needed, it ought to be possible to
114+
// acquire a vector of hash-selftime pairs in the correct order, then use the
115+
// hashes to look up other information directly from the hashtable.
116+
auto hashvec = std::vector<std::pair<size_t, HashEntry>>(begin(table_), end(table_));
117+
std::sort(begin(hashvec), end(hashvec),
118+
[](auto a, auto b) { return a.second.self_walltime_ > b.second.self_walltime_;});
110119

111120
// Data entries
112-
for (auto& [hash, entry] : table_) {
121+
for (auto& [hash, entry] : hashvec) {
113122
std::cout
114123
<< std::setw(40) << std::left << entry.region_name_ << " "
115124
<< std::setw(15) << std::right << entry.self_walltime_ << " "
@@ -145,15 +154,13 @@ std::vector<size_t> HashTable::list_keys()
145154
}
146155

147156
/**
148-
* @brief Get the total wallclock time, which is the total walltime of the
149-
* first entry in the table corresponding to the top-level timing
150-
* callipers.
151-
*
157+
* @brief Get the total (inclusive) time corresponding to the input hash.
158+
*
152159
*/
153160

154-
double HashTable::get_total_wallclock_time()
161+
double HashTable::get_total_walltime(size_t const hash)
155162
{
156-
return table_.begin()->second.total_walltime_;
163+
return table_.at(hash).total_walltime_;
157164
}
158165

159166

src/c++/hashtable.h

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
#ifndef PROFILER_HASHTABLE_H
2525
#define PROFILER_HASHTABLE_H
2626

27-
#include <map>
27+
#include <unordered_map>
2828
#include <vector>
2929
#include <string>
3030
#include <string_view>
@@ -65,7 +65,7 @@ class HashTable{
6565

6666
// Members
6767
int tid_;
68-
std::map<size_t,HashEntry> table_;
68+
std::unordered_map<size_t,HashEntry> table_;
6969
std::hash<std::string_view> hash_function_;
7070

7171
public:
@@ -80,13 +80,10 @@ class HashTable{
8080
void write();
8181

8282
// Member functions
83-
double get_walltime(size_t const hash) const {return table_.at(hash).total_walltime_;}
84-
8583
std::vector<size_t> list_keys();
86-
8784
void add_child_time(size_t, double);
8885
void compute_self_times();
89-
double get_total_wallclock_time();
86+
double get_total_walltime(size_t const);
9087

9188
};
9289
#endif

src/c++/profiler.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,13 +124,21 @@ void Profiler::write()
124124
}
125125

126126
/**
127-
* @brief Get the top-level elapsed time.
127+
* @brief Get the total (inclusive) time of everything below the specified hash.
128+
*
129+
* @param[in] hash The hash corresponding to the region of interest.
130+
*
131+
* @note This function is normally expected to be used to return the total
132+
* wallclock time for whole run. Since this value is required only from
133+
* thread 0, the function does not take a thread ID argument and returns
134+
* the value for thread 0 only. Taking the hash argument avoids the need
135+
* to store the top-level hash inside the profiler itself.
128136
*
129137
*/
130138

131-
double Profiler::get_total_wallclock_time()
139+
double Profiler::get_thread0_walltime(size_t const hash)
132140
{
133141
auto tid = static_cast<hashtable_iterator_t_>(0);
134-
return thread_hashtables_[tid].get_total_wallclock_time();
142+
return thread_hashtables_[tid].get_total_walltime(hash);
135143
}
136144

src/c++/profiler.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ class Profiler
5555
size_t start(std::string_view);
5656
void stop (size_t const);
5757
void write();
58-
double get_total_wallclock_time();
58+
double get_thread0_walltime(size_t const);
5959
};
6060

6161
// Declare global profiler

src/c/profiler_c.cpp

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@
2222
#include <cstring>
2323

2424
extern "C" {
25-
void c_profiler_start(long int&, char const*);
26-
void c_profiler_stop (long int const&);
27-
void c_profiler_write();
28-
double c_get_total_wallclock_time();
25+
void c_profiler_start(long int&, char const*);
26+
void c_profiler_stop (long int const&);
27+
void c_profiler_write();
28+
double c_get_thread0_walltime(long int const&);
2929
}
3030

3131
/**
@@ -47,13 +47,13 @@ void c_profiler_start(long int& hash_out, char const* name)
4747

4848
void c_profiler_stop(long int const& hash_in)
4949
{
50-
size_t hash;
50+
size_t hash;
5151

52-
// Ensure that the source and destination have the same size.
53-
static_assert(sizeof(hash) == sizeof(hash_in), "Hash/In size mismatch.");
54-
std::memcpy(&hash, &hash_in, sizeof(hash));
52+
// Ensure that the source and destination have the same size.
53+
static_assert(sizeof(hash) == sizeof(hash_in), "Hash/In size mismatch.");
54+
std::memcpy(&hash, &hash_in, sizeof(hash));
5555

56-
prof.stop( hash );
56+
prof.stop( hash );
5757
}
5858

5959
/**
@@ -62,14 +62,21 @@ void c_profiler_stop(long int const& hash_in)
6262

6363
void c_profiler_write()
6464
{
65-
prof.write();
65+
prof.write();
6666
}
6767

6868
/**
69-
* Get the total wallclock time
69+
* Get the total wallclock time for the specified region on thread 0.
7070
*/
71-
double c_get_total_wallclock_time()
72-
{
73-
return prof.get_total_wallclock_time();
74-
}
71+
72+
double c_get_thread0_walltime(long int const& hash_in)
73+
{
74+
size_t hash;
75+
76+
// Ensure that the source and destination have the same size.
77+
static_assert(sizeof(hash) == sizeof(hash_in), "Hash/In size mismatch.");
78+
std::memcpy(&hash, &hash_in, sizeof(hash));
79+
80+
return prof.get_thread0_walltime( hash );
81+
}
7582

src/f/profiler_mod.F90

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,29 +24,31 @@ module profiler_mod
2424
public :: profiler_start
2525
public :: profiler_stop
2626
public :: profiler_write
27-
public :: profiler_get_total_wallclock_time
27+
public :: profiler_get_thread0_walltime
2828

2929
interface
3030

3131
subroutine profiler_start(hash_out, name) bind(C, name='c_profiler_start')
32-
import :: c_char, c_long
33-
character(kind=c_char, len=1), intent(in) :: name
34-
integer(kind=c_long), intent(out) :: hash_out
32+
import :: c_char, c_long
33+
character(kind=c_char, len=1), intent(in) :: name
34+
integer(kind=c_long), intent(out) :: hash_out
3535
end subroutine profiler_start
3636

3737
subroutine profiler_stop(hash_in) bind(C, name='c_profiler_stop')
38-
import :: c_long
39-
integer(kind=c_long), intent(in) :: hash_in
38+
import :: c_long
39+
integer(kind=c_long), intent(in) :: hash_in
4040
end subroutine profiler_stop
4141

4242
subroutine profiler_write() bind(C, name='c_profiler_write')
4343
!No arguments to handle
4444
end subroutine profiler_write
4545

46-
real(kind=c_double) function profiler_get_total_wallclock_time() &
47-
bind(C, name='c_get_total_wallclock_time')
48-
import :: c_double
49-
end function profiler_get_total_wallclock_time
46+
function profiler_get_thread0_walltime(hash_in) result(walltime) &
47+
bind(C, name='c_get_thread0_walltime')
48+
import :: c_double, c_long
49+
integer(kind=c_long), intent(in) :: hash_in
50+
real(kind=c_double) :: walltime
51+
end function profiler_get_thread0_walltime
5052

5153
end interface
5254

tests/unit_tests/c++/test_profiler.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ TEST(SystemTests, TimingTest)
1616
{
1717

1818
// Start timing: noddy way, and using Profiler.
19-
double t1 = omp_get_wtime();
2019
auto prof_main = prof.start("MAIN");
20+
double t1 = omp_get_wtime();
2121

2222
// Time a region
2323
{
@@ -44,9 +44,9 @@ TEST(SystemTests, TimingTest)
4444
// Give the main regions some substantial execution time.
4545
sleep(2);
4646

47-
// End of profiling; record t2 immediately afterwards.
48-
prof.stop(prof_main);
47+
// End of profiling; record t2 immediately before.
4948
double t2 = omp_get_wtime();
49+
prof.stop(prof_main);
5050

5151
// Write the profile
5252
prof.write();
@@ -57,7 +57,7 @@ TEST(SystemTests, TimingTest)
5757
double const time_tolerance = 0.0001;
5858

5959
double actual_time = t2 - t1;
60-
double prof_time = prof.get_total_wallclock_time();
60+
double prof_time = prof.get_thread0_walltime(prof_main);
6161
EXPECT_NEAR(prof_time, actual_time, time_tolerance);
6262

6363
std::cout << "\n" << "Actual timing: " << actual_time << "\n";

tests/unit_tests/f/test_profiler_mod.pf

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ module test_profiler_mod
88
@suite(name='test_profiler_mod_suite')
99

1010
use profiler_mod
11-
use OMP_LIB
11+
use omp_lib
1212

1313
contains
1414

@@ -21,15 +21,15 @@ contains
2121

2222
integer, parameter :: dp = 8
2323

24-
!Timer declarations
24+
! Timer declarations
2525
real(kind=dp) :: t1, t2, actual_time
2626

2727
real(kind=prk) :: profiler_wallclock_time
2828

29-
!Handle declarations
29+
! Handle declarations
3030
integer(kind=pik) :: prof_main
3131

32-
!Start timing: noddy way, and using Profiler.
32+
! Start timing: noddy way, and using Profiler.
3333
call profiler_start(prof_main, 'FULL')
3434
t1 = omp_get_wtime()
3535

@@ -59,21 +59,21 @@ contains
5959
end block
6060
!$OMP END PARALLEL
6161

62-
!Give the main regions some substantial execution time.
62+
! Give the main regions some substantial execution time.
6363
call sleep(2);
6464

65-
! End of profiling; record t2 immediately afterwards.
66-
call profiler_stop(prof_main)
65+
! End of profiling; record t2 immediately before.
6766
t2 = omp_get_wtime();
67+
call profiler_stop(prof_main)
6868

69-
!Write the profile
69+
! Write the profile
7070
call profiler_write()
7171

7272
actual_time = t2 - t1
73-
profiler_wallclock_time = profiler_get_total_wallclock_time()
73+
profiler_wallclock_time = profiler_get_thread0_walltime(prof_main)
7474

75-
@assertEqual(actual_time, profiler_wallclock_time, tolerance=1e-4)
75+
@assertEqual(actual_time, profiler_wallclock_time, tolerance=1.0e-4_dp)
7676

7777
end subroutine test_profiler
7878

79-
end module test_profiler_mod
79+
end module test_profiler_mod

0 commit comments

Comments
 (0)