Skip to content

Commit 5617a77

Browse files
author
Sam Pollard
committed
Fixed up and added MPFR to dot product
1 parent 2abb5cb commit 5617a77

File tree

3 files changed

+81
-11
lines changed

3 files changed

+81
-11
lines changed

README.md

+5
Original file line numberDiff line numberDiff line change
@@ -105,3 +105,8 @@ tar -a -cvf datasets.tar.bz2 README.md nekbone.tsv subn.tsv openmpi talapas simg
105105

106106
Happy reproducing
107107
-Sam
108+
109+
## Acknowledgments
110+
This work is partially funded by Sandia National Laboratories.
111+
112+
Sandia National Laboratories is a multimission laboratory managed and operated by National Technology and Engineering Solutions of Sandia, LLC, a wholly owned subsidiary of Honeywell International, Inc., for the U.S. Department of Energy's National Nuclear Security Administration under contract DE-NA-0003525.

src/dotprod_mpi.cxx

+39-11
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "util.hxx"
3838

3939
#define FLOAT_T double
40+
using namespace boost::multiprecision;
4041

4142
/* Note: it would be more robust to use ACCUMULATOR().operator()(a,b) instead
4243
* of a ACC_OP b, but this doesn't work for mpfr values */
@@ -47,6 +48,7 @@
4748

4849
const bool is_sum = std::is_same<std::plus<FLOAT_T>, ACCUMULATOR>::value;
4950
const bool is_prod = std::is_same<std::multiplies<FLOAT_T>, ACCUMULATOR>::value;
51+
mpfr_float_1000 mpfr_dot(FLOAT_T *as, FLOAT_T *bs, long long len);
5052

5153
int main (int argc, char* argv[])
5254
{
@@ -60,6 +62,7 @@ int main (int argc, char* argv[])
6062
FLOAT_T starttime, endtime, ptime;
6163
FLOAT_T (*rand_flt_a)(); // Function to generate a random float
6264
FLOAT_T (*rand_flt_b)(); // Function to generate a random float
65+
mpfr_float_1000 mpfr_acc;
6366
union udouble {
6467
double d;
6568
unsigned long u;
@@ -114,11 +117,11 @@ int main (int argc, char* argv[])
114117

115118
/* Assign storage for dot product vectors
116119
* We do extra here for simplicity and so rank 0 has enough room */
117-
a = (double*) malloc(len*sizeof(double));
118-
b = (double*) malloc(len*sizeof(double));
119-
as = (double*) malloc(len*sizeof(double));
120-
bs = (double*) malloc(len*sizeof(double));
121-
rank_sum = (double *) malloc (numtasks*sizeof(double));
120+
a = (FLOAT_T*) malloc(len*sizeof(FLOAT_T));
121+
b = (FLOAT_T*) malloc(len*sizeof(FLOAT_T));
122+
as = (FLOAT_T*) malloc(len*sizeof(FLOAT_T));
123+
bs = (FLOAT_T*) malloc(len*sizeof(FLOAT_T));
124+
rank_sum = (FLOAT_T *) malloc (numtasks*sizeof(FLOAT_T));
122125

123126
/* Initialize dot product vectors */
124127
chunk = len/numtasks;
@@ -167,21 +170,36 @@ int main (int argc, char* argv[])
167170
for (i = 0; i < numtasks; i++) {
168171
can_mpi_sum += rank_sum[i];
169172
}
173+
170174
// Generate a random summation
171175
rand_sum = associative_accumulate_rand<FLOAT_T>(numtasks, rank_sum, is_sum, &height);
172176

177+
// MPFR
178+
mpfr_acc = mpfr_dot(as, bs, len);
179+
173180
// Print header then different dot products
174-
printf("numtasks\tveclen\ttopology\treduction algorithm\treduction order\theight\tparallel time\tFP (decimal)\tFP (%%a)\tFP (hex)\n");
181+
printf("numtasks\tveclen\ttopology\tdistribution\treduction algorithm\torder\theight\ttime\tFP (decimal)\tFP (%%a)\tFP (hex)\n");
175182
pv.d = mysum;
176-
printf("%d\t%lld\t%s\t%s\tLeft assoc\t%lld\t%f\t%.15f\t%a\t0x%lx\n", numtasks, len, topo.c_str(), algo.c_str(), height, ptime, mysum, mysum, pv.u);
183+
printf("%d\t%lld\t%s\t%s\t%s\tLeft assoc\t%lld\t%f\t%.15f\t%a\t0x%lx\n",
184+
numtasks, len, topo.c_str(), distr.c_str(), algo.c_str(), len-1, nan(""), mysum, mysum, pv.u);
177185
pv.d = rand_sum;
178-
printf("%d\t%lld\t%s\t%s\tRandom assoc\t%lld\t%f\t%.15f\t%a\t0x%lx\n", numtasks, len, topo.c_str(), algo.c_str(), height, ptime, rand_sum, rand_sum, pv.u);
186+
printf("%d\t%lld\t%s\t%s\t%s\tRandom assoc\t%lld\t%f\t%.15f\t%a\t0x%lx\n",
187+
numtasks, len, topo.c_str(), distr.c_str(), algo.c_str(), height, ptime, rand_sum, rand_sum, pv.u);
188+
// TODO: Figure out the height of MPI Reduce and MPI noncommutative sum, and canonical MPI sum
189+
// TODO: Add in timings for MPFR and serial summations.
179190
pv.d = par_sum;
180-
printf("%d\t%lld\t%s\t%s\tMPI Reduce\t%lld\t%f\t%.15f\t%a\t0x%lx\n", numtasks, len, topo.c_str(), algo.c_str(), height, ptime, par_sum, par_sum, pv.u);
191+
printf("%d\t%lld\t%s\t%s\t%s\tMPI Reduce\t%lld\t%f\t%.15f\t%a\t0x%lx\n",
192+
numtasks, len, topo.c_str(), distr.c_str(), algo.c_str(), 0LL, ptime, par_sum, par_sum, pv.u);
181193
pv.d = nc_sum;
182-
printf("%d\t%lld\t%s\t%s\tMPI noncomm sum\t%lld\t%f\t%.15f\t%a\t0x%lx\n", numtasks, len, topo.c_str(), algo.c_str(), height, ptime, nc_sum, nc_sum, pv.u);
194+
printf("%d\t%lld\t%s\t%s\t%s\tMPI noncomm sum\t%lld\t%f\t%.15f\t%a\t0x%lx\n",
195+
numtasks, len, topo.c_str(), distr.c_str(), algo.c_str(), 0LL, ptime, nc_sum, nc_sum, pv.u);
183196
pv.d = can_mpi_sum;
184-
printf("%d\t%lld\t%s\t%s\tCanonical MPI\t%lld\t%f\t%.15f\t%a\t0x%lx\n", numtasks, len, topo.c_str(), algo.c_str(), height, ptime, can_mpi_sum, can_mpi_sum, pv.u);
197+
printf("%d\t%lld\t%s\t%s\t%s\tCanonical MPI\t%lld\t%f\t%.15f\t%a\t0x%lx\n",
198+
numtasks, len, topo.c_str(), distr.c_str(), algo.c_str(), (long long) numtasks-1, nan(""), can_mpi_sum, can_mpi_sum, pv.u);
199+
mpfr_printf("%d\t%lld\t%s\t%s\t%s\tMPFR(%d) left assoc\t%lld\t%f\t%.20RNf\t%.20RNa\t%RNa\n",
200+
numtasks, len, topo.c_str(), distr.c_str(), algo.c_str(),
201+
std::numeric_limits<mpfr_float_1000>::digits, // Precision of MPFR
202+
len-1, nan(""), mpfr_acc, mpfr_acc, mpfr_acc);
185203
}
186204

187205
free(a);
@@ -195,3 +213,13 @@ int main (int argc, char* argv[])
195213
MPI_Finalize();
196214
return rc;
197215
}
216+
217+
mpfr_float_1000 mpfr_dot(FLOAT_T *as, FLOAT_T *bs, long long len)
218+
{
219+
mpfr_float_1000 acc;
220+
acc = 0.0;
221+
for (long long i = 0; i < len; i++) {
222+
acc = acc + as[i] * bs[i];
223+
}
224+
return(acc);
225+
}

src/openmpi.mk

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Makefile for OpenMPI experiments
2+
3+
VECLEN_BIG = 72000000
4+
5+
ifeq ($(USE_MPI), 0)
6+
$(error "make clean, then rerun with USE_MPI=1 SMPICXX=mpicxx make")
7+
endif
8+
ifeq ($(MPICXX), 0)
9+
$(error "make clean, then rerun with USE_MPI=1 SMPICXX=mpicxx make")
10+
endif
11+
12+
# For running on the host MPI (i.e. not Simgrid)
13+
NUM_PROCS_LOCAL = 16
14+
15+
# MPI and MPI Modular Component Architecture commands (OpenMPI). Currently Unused
16+
VERBOSITY = coll_base_verbose 0
17+
#VERBOSITY = coll_base_verbose 40
18+
19+
# OpenMPI MPI_Reduce Algorithms
20+
OMPI_ALGOS = 0 1 2 3 4 5 6 7
21+
# 0:"ignore"
22+
# 1:"linear"
23+
# 2:"chain"
24+
# 3:"pipeline"
25+
# 4:"binary"
26+
# 5:"binomial"
27+
# 6:"in-order_binary"
28+
# 7:"rabenseifner"
29+
30+
# OpenMPI command line arguments
31+
ompi :
32+
$(foreach algo,$(OMPI_ALGOS),\
33+
echo Reduction algorithm $(algo) ; \
34+
mpirun -np $(NUM_PROCS_LOCAL) --mca $(VERBOSITY) --mca coll_tuned_reduce_algorithm $(algo) ./mpi_pi_reduce;)
35+
$(foreach algo,$(OMPI_ALGOS),\
36+
mpirun -np $(NUM_PROCS_LOCAL) --mca $(VERBOSITY) --mca coll_tuned_reduce_algorithm $(algo) ./dotprod_mpi $(VECLEN_BIG) runif[-1,1] native $(algo);)
37+

0 commit comments

Comments
 (0)