Skip to content

Commit 4aefe8a

Browse files
committed
add doc for rambo.py & change unpack in idtr.cpp
1 parent b1d6227 commit 4aefe8a

File tree

2 files changed

+22
-39
lines changed

2 files changed

+22
-39
lines changed

examples/rambo.py

+7-25
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
"""
2+
Rambo benchmark
23
34
Examples:
4-
python rambo.py -nevts 10 -nout 10 -b sharpy -i 10000
5-
mpiexec -n 3 python rambo.py -nevts 64 -nout 64 -b sharpy -i 100
5+
6+
# run 1000 iterations of 10 events and 100 outputs on sharpy backend
7+
python rambo.py -nevts 10 -nout 100 -b sharpy -i 1000
8+
9+
# MPI parallel run
10+
mpiexec -n 3 python rambo.py -nevts 64 -nout 64 -b sharpy -i 1000
611
712
"""
813

@@ -31,27 +36,6 @@ def info(s):
3136
print(s)
3237

3338

34-
def naive_erf(x):
35-
"""
36-
Error function (erf) implementation
37-
38-
Adapted from formula 7.1.26 in
39-
Abramowitz and Stegun, "Handbook of Mathematical Functions", 1965.
40-
"""
41-
y = numpy.abs(x)
42-
43-
a1 = 0.254829592
44-
a2 = -0.284496736
45-
a3 = 1.421413741
46-
a4 = -1.453152027
47-
a5 = 1.061405429
48-
p = 0.3275911
49-
50-
t = 1.0 / (1.0 + p * y)
51-
f = (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t
52-
return numpy.sign(x) * (1.0 - f * numpy.exp(-y * y))
53-
54-
5539
def sp_rambo(sp, sp_C1, sp_F1, sp_Q1, sp_output, nevts, nout):
5640
sp_C = 2.0 * sp_C1 - 1.0
5741
sp_S = sp.sqrt(1 - sp.square(sp_C))
@@ -159,14 +143,12 @@ def eval():
159143
t_min = numpy.min(time_list)
160144
t_max = numpy.max(time_list)
161145
t_med = numpy.median(time_list)
162-
# perf_rate = nopt / t_med / 1e6 # million options per second
163146
init_overhead = t_warm - t_med
164147
if backend == "sharpy":
165148
info(f"Estimated initialization overhead: {init_overhead:.5f} s")
166149
info(f"Min. duration: {t_min:.5f} s")
167150
info(f"Max. duration: {t_max:.5f} s")
168151
info(f"Median duration: {t_med:.5f} s")
169-
# info(f"Median rate: {perf_rate:.5f} Mopts/s")
170152

171153
fini()
172154

src/idtr.cpp

+15-14
Original file line numberDiff line numberDiff line change
@@ -241,10 +241,11 @@ void bufferize(void *cptr, SHARPY::DTypeId dtype, const int64_t *sizes,
241241
});
242242
}
243243

244-
/// copy contiguous block of data into a possibly strided array
245-
void unpack(void *in, SHARPY::DTypeId dtype, const int64_t *sizes,
246-
const int64_t *strides, const int64_t *tStarts,
247-
const int64_t *tSizes, uint64_t nd, uint64_t N, void *out) {
244+
/// copy contiguous block of data into a possibly strided array distributed to N
245+
/// ranks
246+
void unpackN(void *in, SHARPY::DTypeId dtype, const int64_t *sizes,
247+
const int64_t *strides, const int64_t *tStarts,
248+
const int64_t *tSizes, uint64_t nd, uint64_t N, void *out) {
248249
if (!in || !sizes || !strides || !tStarts || !tSizes || !out) {
249250
return;
250251
}
@@ -269,8 +270,8 @@ void unpack(void *in, SHARPY::DTypeId dtype, const int64_t *sizes,
269270
}
270271

271272
/// copy contiguous block of data into a possibly strided array
272-
void unpack1(void *in, SHARPY::DTypeId dtype, const int64_t *sizes,
273-
const int64_t *strides, uint64_t ndim, void *out) {
273+
void unpack(void *in, SHARPY::DTypeId dtype, const int64_t *sizes,
274+
const int64_t *strides, uint64_t ndim, void *out) {
274275
if (!in || !sizes || !strides || !out) {
275276
return;
276277
}
@@ -522,8 +523,8 @@ WaitHandleBase *_idtr_copy_reshape(SHARPY::DTypeId sharpytype,
522523
roffs = std::move(roffs)]() {
523524
tc->wait(hdl);
524525
if (isStrided) {
525-
unpack1(rBuff, sharpytype, oDataShapePtr, oDataStridesPtr, oNDims,
526-
oDataPtr);
526+
unpack(rBuff, sharpytype, oDataShapePtr, oDataStridesPtr, oNDims,
527+
oDataPtr);
527528
delete[](char *) rBuff;
528529
}
529530
};
@@ -931,15 +932,15 @@ void *_idtr_update_halo(SHARPY::DTypeId sharpytype, int64_t ndims,
931932
tc->wait(lwh);
932933
std::vector<int64_t> recvBufferStart(nworkers * ndims, 0);
933934
if (cache->_bufferizeLRecv) {
934-
unpack(lRecvData, sharpytype, leftHaloShape, leftHaloStride,
935-
recvBufferStart.data(), cache->_lRecvBufferSize.data(), ndims,
936-
nworkers, leftHaloData);
935+
unpackN(lRecvData, sharpytype, leftHaloShape, leftHaloStride,
936+
recvBufferStart.data(), cache->_lRecvBufferSize.data(), ndims,
937+
nworkers, leftHaloData);
937938
}
938939
tc->wait(rwh);
939940
if (cache->_bufferizeRRecv) {
940-
unpack(rRecvData, sharpytype, rightHaloShape, rightHaloStride,
941-
recvBufferStart.data(), cache->_rRecvBufferSize.data(), ndims,
942-
nworkers, rightHaloData);
941+
unpackN(rRecvData, sharpytype, rightHaloShape, rightHaloStride,
942+
recvBufferStart.data(), cache->_rRecvBufferSize.data(), ndims,
943+
nworkers, rightHaloData);
943944
}
944945
};
945946

0 commit comments

Comments
 (0)