@@ -836,7 +836,17 @@ namespace ack {
836
836
return ec_point_fp_proj (); // identity
837
837
}
838
838
839
- auto t = p.x .sqr () * 3 + this ->curve ().a * p.z .sqr ();
839
+ const auto t = []( const ec_point_fp_proj& p) {
840
+ const auto x2 = p.x .sqr ();
841
+ if ( p.curve ().a_is_zero ) {
842
+ return 3 * x2;
843
+ }
844
+ if ( p.curve ().a_is_minus_3 ) {
845
+ return 3 * ( x2 - p.z .sqr () );
846
+ }
847
+ return 3 * x2 + p.curve ().a * p.z .sqr ();
848
+ }( p );
849
+
840
850
const auto dy = 2 * p.y ;
841
851
const auto u = dy * p.z ;
842
852
const auto v = u * p.x * dy;
@@ -1179,16 +1189,30 @@ namespace ack {
1179
1189
return ec_point_fp_jacobi (); // identity
1180
1190
}
1181
1191
1182
- // https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#doubling-dbl-1998-cmo-2
1183
- // note: faster than https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#doubling-dbl-2007-bl
1184
- auto y2 = p.y .sqr ();
1185
- auto z2 = p.z .sqr ();
1186
- auto S = 4 * p.x * y2;
1187
- auto M = 3 * p.x .sqr () + this ->curve ().a * z2.sqr ();
1188
- auto RX = M.sqr () - 2 * S;
1189
- auto RY = M * ( S - RX ) - 8 * y2.sqr ();
1190
- auto RZ = 2 * p.y * p.z ;
1191
- return make_point ( std::move (RX), std::move (RY), std::move (RZ) );
1192
+ // https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#doubling-dbl-1986-cc
1193
+ // note: this algo was measured to be the most efficient of them all.
1194
+
1195
+ const auto M = [](const auto & p) {
1196
+ const bool bZIsOne = p.z .is_one ();
1197
+ if ( p.curve ().a_is_zero ) {
1198
+ return 3 * p.x .sqr ();
1199
+ }
1200
+ else if ( p.curve ().a_is_minus_3 ) {
1201
+ const auto z2 = bZIsOne ? p.z : p.z .sqr ();
1202
+ return 3 * ( p.x - z2 ) * ( p.x + z2 );
1203
+ }
1204
+ else {
1205
+ const auto z4 = bZIsOne ? p.z : p.z .sqr ().sqr ();
1206
+ return 3 * p.x .sqr () + p.curve ().a * z4;
1207
+ }
1208
+ }( p );
1209
+
1210
+ const auto y2 = p.y .sqr ();
1211
+ const auto S = 4 * p.x * y2;
1212
+ auto X3 = M.sqr () - 2 * S;
1213
+ auto Y3 = M * ( S - X3 ) - 8 * y2.sqr ();
1214
+ auto Z3 = 2 * p.y * p.z ;
1215
+ return make_point ( std::move (X3), std::move (Y3), std::move (Z3) );
1192
1216
}
1193
1217
1194
1218
/* *
@@ -1314,7 +1338,7 @@ namespace ack {
1314
1338
}
1315
1339
1316
1340
[[nodiscard]]
1317
- __attribute__ ((always_inline)) // note: forced inline produces a little more efficient computation. [[clang::always_inline]] doesn't work.
1341
+ __attribute__ ((always_inline)) // note: forced inline produces slightly more efficient computation. [[clang::always_inline]] doesn't work.
1318
1342
static ec_point_fp_jacobi addex (const ec_point_fp_jacobi& p, const ec_point_fp_jacobi& q,
1319
1343
const field_element_type& U1, const field_element_type& U2,
1320
1344
const field_element_type& S1, const field_element_type& S2)
@@ -1334,8 +1358,8 @@ namespace ack {
1334
1358
const auto H3 = H2 * H;
1335
1359
const auto V = U1 * H2;
1336
1360
1337
- const auto X3 = R.sqr () - H3 - 2 * V;
1338
- const auto Y3 = R * ( V - X3 ) - S1 * H3;
1361
+ auto X3 = R.sqr () - H3 - 2 * V;
1362
+ auto Y3 = R * ( V - X3 ) - S1 * H3;
1339
1363
auto Z3 = std::move ( H );
1340
1364
if ( !p.z .is_one () ) {
1341
1365
Z3 *= p.z ;
@@ -1506,6 +1530,8 @@ namespace ack {
1506
1530
const IntT n; // order of g
1507
1531
const uint32_t h; // cofactor, i.e.: h = #E(Fp) / n
1508
1532
// #E(Fp) - number of points on the curve
1533
+ const bool a_is_minus_3; // cached a == p - 3
1534
+ const bool a_is_zero; // cached a == 0
1509
1535
1510
1536
/* *
1511
1537
* Creates a curve from the given parameters.
@@ -1522,7 +1548,9 @@ namespace ack {
1522
1548
b( std::move(b) ),
1523
1549
g( make_point( std::move(g.first), std::move(g.second) )),
1524
1550
n( std::move(n) ),
1525
- h( h )
1551
+ h( h ),
1552
+ a_is_minus_3( a == ( p - 3 ) ),
1553
+ a_is_zero( a.is_zero() )
1526
1554
{}
1527
1555
1528
1556
/* *
0 commit comments