@@ -1421,7 +1421,7 @@ static void save_label_data(
1421
1421
1422
1422
// temp_data is used for normalization
1423
1423
std::vector<char > temp_data;
1424
- temp_data.reserve (total_num_cols*sizeof (double ));
1424
+ temp_data.resize (total_num_cols*sizeof (double ));
1425
1425
1426
1426
std::vector<char > data;
1427
1427
data.reserve (num_mols_per_file*(total_num_cols*sizeof (double ) + (1 +2 *num_tasks)*sizeof (uint64_t )));
@@ -1517,6 +1517,7 @@ static void save_label_data(
1517
1517
const size_t in_bytes_per_float,
1518
1518
const size_t out_bytes_per_float,
1519
1519
const NormalizationMethod normalization_method,
1520
+ const bool do_clipping,
1520
1521
const double * task_stats) {
1521
1522
1522
1523
if (size_t (col_stride) == in_bytes_per_float) {
@@ -1540,8 +1541,10 @@ static void save_label_data(
1540
1541
assert (in_bytes_per_float == sizeof (uint16_t ));
1541
1542
value = c10::detail::fp16_ieee_to_fp32_value (((const uint16_t *)(temp_data.data ()))[col]);
1542
1543
}
1543
- value = std::max (value, task_stats[stat_min_offset]);
1544
- value = std::min (value, task_stats[stat_max_offset]);
1544
+ if (do_clipping) {
1545
+ value = std::max (value, task_stats[stat_min_offset]);
1546
+ value = std::min (value, task_stats[stat_max_offset]);
1547
+ }
1545
1548
if (normalization_method == NormalizationMethod::NORMAL) {
1546
1549
if (task_stats[stat_std_offset] != 0 ) {
1547
1550
value = (value - task_stats[stat_mean_offset])/task_stats[stat_std_offset];
@@ -1599,6 +1602,9 @@ static void save_label_data(
1599
1602
const size_t task_first_col = task_col_starts[task_index];
1600
1603
const size_t task_num_cols = task_col_starts[task_index+1 ] - task_first_col;
1601
1604
const NormalizationOptions& normalization = task_normalization_options[task_index];
1605
+ const bool do_clipping =
1606
+ (normalization.min_clipping > -std::numeric_limits<double >::infinity ()) &&
1607
+ (normalization.max_clipping < std::numeric_limits<double >::infinity ());
1602
1608
const double * task_stats = all_task_stats + num_stats*task_first_col;
1603
1609
1604
1610
const size_t bytes_per_float = task_bytes_per_float[task_index];
@@ -1703,36 +1709,36 @@ static void save_label_data(
1703
1709
const intptr_t offsets_stride = label_offsets_numpy_array ? PyArray_STRIDES (label_offsets_numpy_array)[0 ] : 0 ;
1704
1710
if (offsets_raw_data == nullptr ) {
1705
1711
const char * row_data = raw_data + strides[0 ]*task_mol_index;
1706
- store_single_row (row_data, task_num_cols, strides[1 ], bytes_per_float, bytes_per_float, normalization.method , task_stats);
1712
+ store_single_row (row_data, task_num_cols, strides[1 ], bytes_per_float, bytes_per_float, normalization.method , do_clipping, task_stats);
1707
1713
}
1708
1714
else {
1709
1715
size_t begin_offset = *reinterpret_cast <const int64_t *>(offsets_raw_data + offsets_stride*task_mol_index);
1710
1716
size_t end_offset = *reinterpret_cast <const int64_t *>(offsets_raw_data + offsets_stride*(task_mol_index+1 ));
1711
1717
const char * row_data = raw_data + strides[0 ]*begin_offset;
1712
1718
if (same_order_as_first) {
1713
1719
for (size_t row = begin_offset; row < end_offset; ++row, row_data += strides[0 ]) {
1714
- store_single_row (row_data, task_num_cols, strides[1 ], bytes_per_float, bytes_per_float, normalization.method , task_stats);
1720
+ store_single_row (row_data, task_num_cols, strides[1 ], bytes_per_float, bytes_per_float, normalization.method , do_clipping, task_stats);
1715
1721
}
1716
1722
}
1717
1723
else if (task_levels[task_index] == FeatureLevel::NODE) {
1718
1724
assert (end_offset - begin_offset == current_atom_order.size ());
1719
1725
for (unsigned int current_index : current_atom_order) {
1720
- store_single_row (row_data + current_index*strides[0 ], task_num_cols, strides[1 ], bytes_per_float, bytes_per_float, normalization.method , task_stats);
1726
+ store_single_row (row_data + current_index*strides[0 ], task_num_cols, strides[1 ], bytes_per_float, bytes_per_float, normalization.method , do_clipping, task_stats);
1721
1727
}
1722
1728
}
1723
1729
else if (task_levels[task_index] == FeatureLevel::NODEPAIR) {
1724
1730
const size_t n = current_atom_order.size ();
1725
1731
assert (end_offset - begin_offset == n*n);
1726
1732
for (unsigned int current_index0 : current_atom_order) {
1727
1733
for (unsigned int current_index1 : current_atom_order) {
1728
- store_single_row (row_data + (current_index0*n + current_index1)*strides[0 ], task_num_cols, strides[1 ], bytes_per_float, bytes_per_float, normalization.method , task_stats);
1734
+ store_single_row (row_data + (current_index0*n + current_index1)*strides[0 ], task_num_cols, strides[1 ], bytes_per_float, bytes_per_float, normalization.method , do_clipping, task_stats);
1729
1735
}
1730
1736
}
1731
1737
}
1732
1738
else {
1733
1739
assert (task_levels[task_index] == FeatureLevel::EDGE);
1734
1740
for (unsigned int current_index : current_bond_order) {
1735
- store_single_row (row_data + current_index*strides[0 ], task_num_cols, strides[1 ], bytes_per_float, bytes_per_float, normalization.method , task_stats);
1741
+ store_single_row (row_data + current_index*strides[0 ], task_num_cols, strides[1 ], bytes_per_float, bytes_per_float, normalization.method , do_clipping, task_stats);
1736
1742
}
1737
1743
}
1738
1744
}
0 commit comments