Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[cker] Apply structured binding in cker #14665

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions runtime/compute/cker/include/cker/NeonTensorUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -667,9 +667,9 @@ inline void NeonSymmetricQuantizeFloats(const float *values, const int size,
float *scaling_factor)
{
// TODO(raziel): vectorize min/max calculation.
auto minmax = std::minmax_element(values, values + size);
*min = *minmax.first;
*max = *minmax.second;
auto [min_ptr, max_ptr] = std::minmax_element(values, values + size);
*min = *min_ptr;
*max = *max_ptr;
const int kScale = 127;
const float range = std::max(std::abs(*min), std::abs(*max));
if (range == 0)
Expand Down
12 changes: 6 additions & 6 deletions runtime/compute/cker/include/cker/PortableTensorUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,9 @@ inline void PortableSymmetricQuantizeFloats(const float *values, const int size,
int8_t *quantized_values, float *min_value,
float *max_value, float *scaling_factor)
{
auto minmax = std::minmax_element(values, values + size);
*min_value = *minmax.first;
*max_value = *minmax.second;
auto [min_ptr, max_ptr] = std::minmax_element(values, values + size);
*min_value = *min_ptr;
*max_value = *max_ptr;
const int kScale = 127;
const float range = std::max(std::abs(*min_value), std::abs(*max_value));
if (range == 0)
Expand Down Expand Up @@ -153,9 +153,9 @@ inline void PortableAsymmetricQuantizeFloats(const float *values, const int size
const int32_t kMaxScale = 127;
const double qmin_double = kMinScale;
const double qmax_double = kMaxScale;
const auto minmax = std::minmax_element(values, values + size);
const double rmin = static_cast<double>(std::min(0.0f, *minmax.first));
const double rmax = static_cast<double>(std::max(0.0f, *minmax.second));
const auto [min_ptr, max_ptr] = std::minmax_element(values, values + size);
const double rmin = static_cast<double>(std::min(0.0f, *min_ptr));
const double rmax = static_cast<double>(std::max(0.0f, *max_ptr));
if (rmin == rmax)
{
memset(quantized_values, 0, size * sizeof(int8_t));
Expand Down
140 changes: 74 additions & 66 deletions runtime/compute/cker/include/cker/operation/Pad.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,42 +64,43 @@ inline void Pad(const int32_t *padding_data, int32_t pad_rank, const Shape &inpu
case 1:
{
const int32_t in_row_len = input_shape.Dims(0);
std::fill_n(output_data, padding_list[0].first, constant_value);
std::memcpy(output_data + padding_list[0].first, input_data, in_row_len * sizeof(T));
std::fill_n(output_data + padding_list[0].first + in_row_len, padding_list[0].second,
constant_value);
[[maybe_unused]] auto [pad_before, pad_after] = padding_list[0];
std::fill_n(output_data, pad_before, constant_value);
std::memcpy(output_data + pad_before, input_data, in_row_len * sizeof(T));
std::fill_n(output_data + pad_before + in_row_len, pad_after, constant_value);
break;
}
case 2: // HW
{
const int32_t in_row_len = input_shape.Dims(1);
const int32_t out_row_size = output_shape.Dims(1);

// prepend padding rows
std::fill_n(output_data, padding_list[0].first * out_row_size, constant_value);
auto [pad_top, pad_bottom] = padding_list[0];
auto [pad_left, pad_right] = padding_list[1];

// Prepend padding rows
std::fill_n(output_data, pad_top * out_row_size, constant_value);

const auto r_h_inp_lim = input_shape.Dims(0) + padding_list[0].first;
for (auto i = padding_list[0].first, j = 0; i < r_h_inp_lim; ++i, ++j)
const auto r_h_inp_lim = input_shape.Dims(0) + pad_top;
for (auto i = pad_top, j = 0; i < r_h_inp_lim; ++i, ++j)
{
auto out_offset = i * out_row_size;
const auto in_offset = j * in_row_len;

// prepend padding values
std::fill_n(output_data + out_offset, padding_list[1].first, constant_value);
// Prepend padding values
std::fill_n(output_data + out_offset, pad_left, constant_value);
out_offset += pad_left;

out_offset += padding_list[1].first;

// copy a row of input data
// Copy a row of input data
memcpy(output_data + out_offset, input_data + in_offset, in_row_len * sizeof(T));

out_offset += in_row_len;

// append padding values
std::fill_n(output_data + out_offset, padding_list[1].second, constant_value);
// Append padding values
std::fill_n(output_data + out_offset, pad_right, constant_value);
}

// append padding rows
std::fill_n(output_data + r_h_inp_lim * out_row_size, padding_list[0].second * out_row_size,
// Append padding rows
std::fill_n(output_data + r_h_inp_lim * out_row_size, pad_bottom * out_row_size,
constant_value);
break;
}
Expand All @@ -109,45 +110,47 @@ inline void Pad(const int32_t *padding_data, int32_t pad_rank, const Shape &inpu
const int32_t out_row_size = output_shape.Dims(2);
const auto plain_size = out_row_size * output_shape.Dims(1);

// prepend padding plains
std::fill_n(output_data, padding_list[0].first * plain_size, constant_value);
auto [pad_batches_before, pad_batches_after] = padding_list[0];
auto [pad_parallelepipes_before, pad_parallelepipes_after] = padding_list[1];
auto [pad_plains_before, pad_plains_after] = padding_list[2];

const auto r_h_inp_lim = input_shape.Dims(0) + padding_list[0].first;
for (auto i = padding_list[0].first, i_inp = 0; i < r_h_inp_lim; ++i, ++i_inp)
// Prepend padding plains
std::fill_n(output_data, pad_batches_before * plain_size, constant_value);

const auto r_h_inp_lim = input_shape.Dims(0) + pad_batches_before;
for (auto i = pad_batches_before, i_inp = 0; i < r_h_inp_lim; ++i, ++i_inp)
{
const auto out_w_offset = (i * output_shape.Dims(1) + 0) * output_shape.Dims(2);
const auto out_w_offset = (i * output_shape.Dims(1)) * output_shape.Dims(2);

// prepend padding rows
std::fill_n(output_data + out_w_offset, padding_list[1].first * out_row_size,
// Prepend padding rows
std::fill_n(output_data + out_w_offset, pad_parallelepipes_before * out_row_size,
constant_value);

const auto r_w_inp_lim = input_shape.Dims(1) + padding_list[1].first;
for (auto j = padding_list[1].first, j_inp = 0; j < r_w_inp_lim; ++j, ++j_inp)
const auto r_w_inp_lim = input_shape.Dims(1) + pad_parallelepipes_before;
for (auto j = pad_parallelepipes_before, j_inp = 0; j < r_w_inp_lim; ++j, ++j_inp)
{
auto out_offset = (i * output_shape.Dims(1) + j) * output_shape.Dims(2);
const auto in_offset = (i_inp * input_shape.Dims(1) + j_inp) * input_shape.Dims(2);

// prepend padding values
std::fill_n(output_data + out_offset, padding_list[2].first, constant_value);

out_offset += padding_list[2].first;
// Prepend padding values
std::fill_n(output_data + out_offset, pad_plains_before, constant_value);
out_offset += pad_plains_before;

// copy a row of input data
// Copy a row of input data
memcpy(output_data + out_offset, input_data + in_offset, in_row_len * sizeof(T));

out_offset += in_row_len;

// append padding values
std::fill_n(output_data + out_offset, padding_list[2].second, constant_value);
// Append padding values
std::fill_n(output_data + out_offset, pad_plains_after, constant_value);
}

// append padding rows
// Append padding rows
std::fill_n(output_data + out_w_offset + r_w_inp_lim * out_row_size,
padding_list[1].second * out_row_size, constant_value);
pad_parallelepipes_after * out_row_size, constant_value);
}

// append padding plains
std::fill_n(output_data + r_h_inp_lim * plain_size, padding_list[0].second * plain_size,
// Append padding plains
std::fill_n(output_data + r_h_inp_lim * plain_size, pad_batches_after * plain_size,
constant_value);
break;
}
Expand All @@ -161,57 +164,62 @@ inline void Pad(const int32_t *padding_data, int32_t pad_rank, const Shape &inpu
const auto plain_size = out_row_size * output_shape.Dims(2);
const auto parallelepiped_size = plain_size * output_shape.Dims(1);

// prepend padding parallelepipeds
std::fill_n(output_data, padding_list[0].first * parallelepiped_size, constant_value);
auto [pad_batches_before, pad_batches_after] = padding_list[0];
auto [pad_parallelepipes_before, pad_parallelepipes_after] = padding_list[1];
auto [pad_plains_before, pad_plains_after] = padding_list[2];
auto [pad_rows_before, pad_rows_after] = padding_list[3];

const auto r_b_inp_lim = input_shape.Dims(0) + padding_list[0].first;
for (auto i = padding_list[0].first, i_inp = 0; i < r_b_inp_lim; ++i, ++i_inp)
// Prepend padding parallelepipeds
std::fill_n(output_data, pad_batches_before * parallelepiped_size, constant_value);

const auto r_b_inp_lim = input_shape.Dims(0) + pad_batches_before;
for (auto i = pad_batches_before, i_inp = 0; i < r_b_inp_lim; ++i, ++i_inp)
{
const auto out_h_offset = get_offset(output_shape, i, 0, 0);
// prepend padding plains
std::fill_n(output_data + out_h_offset, padding_list[1].first * plain_size, constant_value);
// Prepend padding plains
std::fill_n(output_data + out_h_offset, pad_parallelepipes_before * plain_size,
constant_value);

const auto r_h_inp_lim = input_shape.Dims(1) + padding_list[1].first;
for (auto j = padding_list[1].first, j_inp = 0; j < r_h_inp_lim; ++j, ++j_inp)
const auto r_h_inp_lim = input_shape.Dims(1) + pad_parallelepipes_before;
for (auto j = pad_parallelepipes_before, j_inp = 0; j < r_h_inp_lim; ++j, ++j_inp)
{
const auto out_w_offset = get_offset(output_shape, i, j, 0);

// prepend padding rows
std::fill_n(output_data + out_w_offset, padding_list[2].first * out_row_size,
constant_value);
// Prepend padding rows
std::fill_n(output_data + out_w_offset, pad_plains_before * out_row_size, constant_value);

const auto r_w_inp_lim = input_shape.Dims(2) + padding_list[2].first;
for (auto k = padding_list[2].first, k_inp = 0; k < r_w_inp_lim; ++k, ++k_inp)
const auto r_w_inp_lim = input_shape.Dims(2) + pad_plains_before;
for (auto k = pad_plains_before, k_inp = 0; k < r_w_inp_lim; ++k, ++k_inp)
{
auto out_c_offset = get_offset(output_shape, i, j, k);
const auto in_offset = get_offset(input_shape, i_inp, j_inp, k_inp);

// prepend padding values
std::fill_n(output_data + out_c_offset, padding_list[3].first, constant_value);

out_c_offset += padding_list[3].first;
// Prepend padding values
std::fill_n(output_data + out_c_offset, pad_rows_before, constant_value);
out_c_offset += pad_rows_before;

// copy a row of input data
// Copy a row of input data
memcpy(output_data + out_c_offset, input_data + in_offset, in_row_len * sizeof(T));

out_c_offset += in_row_len;

// append padding values
std::fill_n(output_data + out_c_offset, padding_list[3].second, constant_value);
// Append padding values
std::fill_n(output_data + out_c_offset, pad_rows_after, constant_value);
}

// append padding rows
// Append padding rows
std::fill_n(output_data + out_w_offset + r_w_inp_lim * out_row_size,
padding_list[2].second * out_row_size, constant_value);
pad_plains_after * out_row_size, constant_value);
}

// append padding plains
// Append padding plains
std::fill_n(output_data + out_h_offset + r_h_inp_lim * plain_size,
padding_list[1].second * plain_size, constant_value);
pad_parallelepipes_after * plain_size, constant_value);
}
// append padding parallelepipeds

// Append padding parallelepipeds
std::fill_n(output_data + r_b_inp_lim * parallelepiped_size,
padding_list[0].second * parallelepiped_size, constant_value);
pad_batches_after * parallelepiped_size, constant_value);
break;
break;
}
default:
Expand Down
3 changes: 1 addition & 2 deletions runtime/compute/cker/include/cker/operation/Tile.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,7 @@ std::pair<int, int> TileOneDimension(const Shape &in_dimensions, const T *in_dat
T *copy_to_data = out_data;
for (int i = 0; i < dimension_size; ++i)
{
int stride_size = 0, tiled_stride_size = 0;
std::tie(stride_size, tiled_stride_size) =
auto [stride_size, tiled_stride_size] =
TileOneDimension(in_dimensions, copy_from_data, multipliers, copy_to_data, dimension + 1);
copy_from_data += stride_size;
copy_to_data += tiled_stride_size;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -846,12 +846,11 @@ inline void BroadcastAddDispatch(const BinaryArithmeticOpParam &params, const Sh
}
else
{
auto implFuncs = getBinaryOpWithActivationImplFloat<BinaryOpFuncAddFloat>(params);

auto [implFunc1, implFunc2] = getBinaryOpWithActivationImplFloat<BinaryOpFuncAddFloat>(params);
BinaryBroadcastFiveFold(
params, params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast,
input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data,
implFuncs.first, implFuncs.second);
input1_shape, input1_data, input2_shape, input2_data, output_shape, output_data, implFunc1,
implFunc2);
}
}

Expand All @@ -871,16 +870,16 @@ inline void BroadcastSubDispatch(const BinaryArithmeticOpParam &params, const Sh
{
if (params.broadcast_category == BroadcastableOpCategory::kFirstInputBroadcastsFast)
{
auto implFuncs = getBinaryOpWithActivationImplFloat<BinaryOpFuncSubFloat>(params);
auto [implFunc1, implFunc2] = getBinaryOpWithActivationImplFloat<BinaryOpFuncSubFloat>(params);
BinaryBroadcastFiveFold(params, false, input1_shape, input1_data, input2_shape, input2_data,
output_shape, output_data, implFuncs.first, implFuncs.second);
output_shape, output_data, implFunc1, implFunc2);
}
else if (params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast)
{
auto implFuncs =
auto [implFunc1, implFunc2] =
getBinaryOpWithActivationImplFloat<BinaryOpFuncSwapArgs<BinaryOpFuncSubFloat>>(params);
BinaryBroadcastFiveFold(params, true, input1_shape, input1_data, input2_shape, input2_data,
output_shape, output_data, implFuncs.first, implFuncs.second);
output_shape, output_data, implFunc1, implFunc2);
}
else
{
Expand Down Expand Up @@ -1210,9 +1209,9 @@ inline void BroadcastMulDispatch(const BinaryArithmeticOpParam &params, const Sh
input2_data, output_shape, output_data, fn);
return;
}
auto implFuncs = getBinaryOpWithActivationImplFloat<BinaryOpFuncMulFloat>(params);
auto [implFunc1, implFunc2] = getBinaryOpWithActivationImplFloat<BinaryOpFuncMulFloat>(params);
BinaryBroadcastFiveFold(params, input1_shape, input1_data, input2_shape, input2_data,
output_shape, output_data, implFuncs.first, implFuncs.second);
output_shape, output_data, implFunc1, implFunc2);
}

inline void Div(const BinaryArithmeticOpParam &params, const Shape &input1_shape,
Expand All @@ -1239,16 +1238,16 @@ inline void BroadcastDivDispatch(const BinaryArithmeticOpParam &params, const Sh
#ifdef __aarch64__
if (params.broadcast_category == BroadcastableOpCategory::kFirstInputBroadcastsFast)
{
auto implFuncs = getBinaryOpWithActivationImplFloat<BinaryOpFuncDivFloat>(params);
auto [implFunc1, implFunc2] = getBinaryOpWithActivationImplFloat<BinaryOpFuncDivFloat>(params);
BinaryBroadcastFiveFold(params, false, input1_shape, input1_data, input2_shape, input2_data,
output_shape, output_data, implFuncs.first, implFuncs.second);
output_shape, output_data, implFunc1, implFunc2);
}
else if (params.broadcast_category == BroadcastableOpCategory::kSecondInputBroadcastsFast)
{
auto implFuncs =
auto [implFunc1, implFunc2] =
getBinaryOpWithActivationImplFloat<BinaryOpFuncSwapArgs<BinaryOpFuncDivFloat>>(params);
BinaryBroadcastFiveFold(params, true, input1_shape, input1_data, input2_shape, input2_data,
output_shape, output_data, implFuncs.first, implFuncs.second);
output_shape, output_data, implFunc1, implFunc2);
}
else
#endif // __aarch64__
Expand Down
Loading