@@ -1147,31 +1147,19 @@ namespace ack {
1147
1147
1148
1148
// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#addition-add-1998-cmo-2
1149
1149
// note: faster than https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#addition-add-2007-bl
1150
- auto pz2 = p.z .sqr ();
1151
- auto qz2 = q.z .sqr ();
1152
- auto U1 = p.x * qz2;
1153
- auto U2 = q.x * pz2;
1154
- auto S1 = p.y * qz2 * q.z ;
1155
- auto S2 = q.y * pz2 * p.z ;
1156
-
1157
- auto H = U2 - U1;
1158
- auto R = S2 - S1;
1159
- if ( H.is_zero () ) {
1160
- if ( R.is_zero () ) {
1161
- return doubled ();
1162
- } else {
1163
- return ec_point_fp_jacobi ();
1164
- }
1165
- }
1166
-
1167
- auto H2 = H.sqr ();
1168
- auto H3 = H2 * H;
1169
- auto V = U1 * H2;
1170
1150
1171
- auto X3 = R.sqr () - H3 - 2 * V;
1172
- auto Y3 = R * ( V - X3 ) - S1 * H3;
1173
- auto Z3 = H * p.z * q.z ;
1174
- return make_point ( std::move (X3), std::move (Y3), std::move (Z3) );
1151
+ const bool bZ1IsOne = p.z .is_one ();
1152
+ const bool bZ2IsOne = q.z .is_one ();
1153
+ if ( bZ1IsOne && bZ2IsOne ) {
1154
+ return add_z_1 ( p, q );
1155
+ }
1156
+ else if ( bZ1IsOne ) {
1157
+ return add_z2_1 ( q, p );
1158
+ }
1159
+ else if ( bZ2IsOne ) {
1160
+ return add_z2_1 ( p, q );
1161
+ }
1162
+ return add_ne ( p, q );
1175
1163
}
1176
1164
1177
1165
/* *
@@ -1324,6 +1312,65 @@ namespace ack {
1324
1312
field_element_type ( std::move (z), this ->curve ().p )
1325
1313
);
1326
1314
}
1315
+
1316
+ [[nodiscard]]
1317
+ __attribute__ ((always_inline)) // note: forced inline produces a little more efficient computation. [[clang::always_inline]] doesn't work.
1318
+ static ec_point_fp_jacobi addex (const ec_point_fp_jacobi& p, const ec_point_fp_jacobi& q,
1319
+ const field_element_type& U1, const field_element_type& U2,
1320
+ const field_element_type& S1, const field_element_type& S2)
1321
+ {
1322
+ // https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#addition-add-1998-cmo-2
1323
+ // note: faster than https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#addition-add-2007-bl
1324
+ const auto H = U2 - U1;
1325
+ const auto R = S2 - S1;
1326
+ if ( H.is_zero () ) {
1327
+ if ( R.is_zero () ) {
1328
+ return p.doubled ();
1329
+ } else {
1330
+ return ec_point_fp_jacobi ();
1331
+ }
1332
+ }
1333
+ const auto H2 = H.sqr ();
1334
+ const auto H3 = H2 * H;
1335
+ const auto V = U1 * H2;
1336
+
1337
+ const auto X3 = R.sqr () - H3 - 2 * V;
1338
+ const auto Y3 = R * ( V - X3 ) - S1 * H3;
1339
+ auto Z3 = std::move ( H );
1340
+ if ( !p.z .is_one () ) {
1341
+ Z3 *= p.z ;
1342
+ }
1343
+ if ( !q.z .is_one () ) {
1344
+ Z3 *= q.z ;
1345
+ }
1346
+ return ec_point_fp_jacobi ( p.curve (), std::move (X3), std::move (Y3), std::move (Z3) );
1347
+ }
1348
+
1349
+ [[nodiscard]]
1350
+ __attribute__ ((always_inline))
1351
+ static ec_point_fp_jacobi add_z_1 (const ec_point_fp_jacobi& p, const ec_point_fp_jacobi& q)
1352
+ {
1353
+ return addex ( p, q, p.x , q.x , p.y , q.y );
1354
+ }
1355
+
1356
+ [[nodiscard]]
1357
+ __attribute__ ((always_inline))
1358
+ static ec_point_fp_jacobi add_z2_1 (const ec_point_fp_jacobi& p, const ec_point_fp_jacobi& q)
1359
+ {
1360
+ const auto pz2 = p.z .sqr ();
1361
+ return addex ( p, q, p.x , q.x * pz2, p.y , q.y * pz2 * p.z );
1362
+ }
1363
+
1364
+ [[nodiscard]]
1365
+ __attribute__ ((always_inline))
1366
+ static ec_point_fp_jacobi add_ne (const ec_point_fp_jacobi& p, const ec_point_fp_jacobi& q)
1367
+ {
1368
+ // This extra function, although inlined, produces a little bit more efficient code than
1369
+ // it would if put directly into the calling scope.
1370
+ const auto pz2 = p.z .sqr ();
1371
+ const auto qz2 = q.z .sqr ();
1372
+ return addex ( p, q, p.x * qz2, q.x * pz2, p.y * qz2 * q.z , q.y * pz2 * p.z );
1373
+ }
1327
1374
};
1328
1375
1329
1376
/* *
@@ -1652,7 +1699,7 @@ namespace ack {
1652
1699
return false ;
1653
1700
}
1654
1701
1655
- // check that the discriminant is nonzero. If zero, the curve is singular.
1702
+ // check that discriminant is nonzero. If zero, the curve is singular.
1656
1703
if ( ( -16 * y2 ) == 0 ) {
1657
1704
return false ;
1658
1705
}
@@ -1744,7 +1791,7 @@ namespace ack {
1744
1791
const auto pnegqneg_inv = -pnegqneg;
1745
1792
1746
1793
// Iterate reversed NAF representations of a and b,
1747
- // optimized for cases where this .z == 1.
1794
+ // optimized for cases where p.z == 1 or q .z == 1.
1748
1795
PointT r;
1749
1796
for ( std::size_t i = 0 ; i < a_naf.size (); i++ ) {
1750
1797
r = r.doubled ();
0 commit comments