37
37
#include " util.hxx"
38
38
39
39
#define FLOAT_T double
40
+ using namespace boost ::multiprecision;
40
41
41
42
/* Note: it would be more robust to use ACCUMULATOR().operator()(a,b) instead
42
43
* of a ACC_OP b, but this doesn't work for mpfr values */
47
48
48
49
const bool is_sum = std::is_same<std::plus<FLOAT_T>, ACCUMULATOR>::value;
49
50
const bool is_prod = std::is_same<std::multiplies<FLOAT_T>, ACCUMULATOR>::value;
51
+ mpfr_float_1000 mpfr_dot (FLOAT_T *as, FLOAT_T *bs, long long len);
50
52
51
53
int main (int argc, char * argv[])
52
54
{
@@ -60,6 +62,7 @@ int main (int argc, char* argv[])
60
62
FLOAT_T starttime, endtime, ptime;
61
63
FLOAT_T (*rand_flt_a)(); // Function to generate a random float
62
64
FLOAT_T (*rand_flt_b)(); // Function to generate a random float
65
+ mpfr_float_1000 mpfr_acc;
63
66
union udouble {
64
67
double d;
65
68
unsigned long u;
@@ -114,11 +117,11 @@ int main (int argc, char* argv[])
114
117
115
118
/* Assign storage for dot product vectors
116
119
* We do extra here for simplicity and so rank 0 has enough room */
117
- a = (double *) malloc (len*sizeof (double ));
118
- b = (double *) malloc (len*sizeof (double ));
119
- as = (double *) malloc (len*sizeof (double ));
120
- bs = (double *) malloc (len*sizeof (double ));
121
- rank_sum = (double *) malloc (numtasks*sizeof (double ));
120
+ a = (FLOAT_T *) malloc (len*sizeof (FLOAT_T ));
121
+ b = (FLOAT_T *) malloc (len*sizeof (FLOAT_T ));
122
+ as = (FLOAT_T *) malloc (len*sizeof (FLOAT_T ));
123
+ bs = (FLOAT_T *) malloc (len*sizeof (FLOAT_T ));
124
+ rank_sum = (FLOAT_T *) malloc (numtasks*sizeof (FLOAT_T ));
122
125
123
126
/* Initialize dot product vectors */
124
127
chunk = len/numtasks;
@@ -167,21 +170,36 @@ int main (int argc, char* argv[])
167
170
for (i = 0 ; i < numtasks; i++) {
168
171
can_mpi_sum += rank_sum[i];
169
172
}
173
+
170
174
// Generate a random summation
171
175
rand_sum = associative_accumulate_rand<FLOAT_T>(numtasks, rank_sum, is_sum, &height);
172
176
177
+ // MPFR
178
+ mpfr_acc = mpfr_dot (as, bs, len);
179
+
173
180
// Print header then different dot products
174
- printf (" numtasks\t veclen\t topology\t reduction algorithm\t reduction order \t height\t parallel time \t FP (decimal)\t FP (%%a)\t FP (hex)\n " );
181
+ printf (" numtasks\t veclen\t topology\t distribution \ t reduction algorithm\t order \t height\t time \t FP (decimal)\t FP (%%a)\t FP (hex)\n " );
175
182
pv.d = mysum;
176
- printf (" %d\t %lld\t %s\t %s\t Left assoc\t %lld\t %f\t %.15f\t %a\t 0x%lx\n " , numtasks, len, topo.c_str (), algo.c_str (), height, ptime, mysum, mysum, pv.u );
183
+ printf (" %d\t %lld\t %s\t %s\t %s\t Left assoc\t %lld\t %f\t %.15f\t %a\t 0x%lx\n " ,
184
+ numtasks, len, topo.c_str (), distr.c_str (), algo.c_str (), len-1 , nan (" " ), mysum, mysum, pv.u );
177
185
pv.d = rand_sum;
178
- printf (" %d\t %lld\t %s\t %s\t Random assoc\t %lld\t %f\t %.15f\t %a\t 0x%lx\n " , numtasks, len, topo.c_str (), algo.c_str (), height, ptime, rand_sum, rand_sum, pv.u );
186
+ printf (" %d\t %lld\t %s\t %s\t %s\t Random assoc\t %lld\t %f\t %.15f\t %a\t 0x%lx\n " ,
187
+ numtasks, len, topo.c_str (), distr.c_str (), algo.c_str (), height, ptime, rand_sum, rand_sum, pv.u );
188
+ // TODO: Figure out the height of MPI Reduce and MPI noncommutative sum, and canonical MPI sum
189
+ // TODO: Add in timings for MPFR and serial summations.
179
190
pv.d = par_sum;
180
- printf (" %d\t %lld\t %s\t %s\t MPI Reduce\t %lld\t %f\t %.15f\t %a\t 0x%lx\n " , numtasks, len, topo.c_str (), algo.c_str (), height, ptime, par_sum, par_sum, pv.u );
191
+ printf (" %d\t %lld\t %s\t %s\t %s\t MPI Reduce\t %lld\t %f\t %.15f\t %a\t 0x%lx\n " ,
192
+ numtasks, len, topo.c_str (), distr.c_str (), algo.c_str (), 0LL , ptime, par_sum, par_sum, pv.u );
181
193
pv.d = nc_sum;
182
- printf (" %d\t %lld\t %s\t %s\t MPI noncomm sum\t %lld\t %f\t %.15f\t %a\t 0x%lx\n " , numtasks, len, topo.c_str (), algo.c_str (), height, ptime, nc_sum, nc_sum, pv.u );
194
+ printf (" %d\t %lld\t %s\t %s\t %s\t MPI noncomm sum\t %lld\t %f\t %.15f\t %a\t 0x%lx\n " ,
195
+ numtasks, len, topo.c_str (), distr.c_str (), algo.c_str (), 0LL , ptime, nc_sum, nc_sum, pv.u );
183
196
pv.d = can_mpi_sum;
184
- printf (" %d\t %lld\t %s\t %s\t Canonical MPI\t %lld\t %f\t %.15f\t %a\t 0x%lx\n " , numtasks, len, topo.c_str (), algo.c_str (), height, ptime, can_mpi_sum, can_mpi_sum, pv.u );
197
+ printf (" %d\t %lld\t %s\t %s\t %s\t Canonical MPI\t %lld\t %f\t %.15f\t %a\t 0x%lx\n " ,
198
+ numtasks, len, topo.c_str (), distr.c_str (), algo.c_str (), (long long ) numtasks-1 , nan (" " ), can_mpi_sum, can_mpi_sum, pv.u );
199
+ mpfr_printf (" %d\t %lld\t %s\t %s\t %s\t MPFR(%d) left assoc\t %lld\t %f\t %.20RNf\t %.20RNa\t %RNa\n " ,
200
+ numtasks, len, topo.c_str (), distr.c_str (), algo.c_str (),
201
+ std::numeric_limits<mpfr_float_1000>::digits, // Precision of MPFR
202
+ len-1 , nan (" " ), mpfr_acc, mpfr_acc, mpfr_acc);
185
203
}
186
204
187
205
free (a);
@@ -195,3 +213,13 @@ int main (int argc, char* argv[])
195
213
MPI_Finalize ();
196
214
return rc;
197
215
}
216
+
217
+ mpfr_float_1000 mpfr_dot (FLOAT_T *as, FLOAT_T *bs, long long len)
218
+ {
219
+ mpfr_float_1000 acc;
220
+ acc = 0.0 ;
221
+ for (long long i = 0 ; i < len; i++) {
222
+ acc = acc + as[i] * bs[i];
223
+ }
224
+ return (acc);
225
+ }
0 commit comments