Skip to content

Commit abc11e1

Browse files
hzellercopybara-github
authored andcommitted
Store sample inputs as testvector::SampleInputsProto
There are various serializations to represent sample inputs: for functions, it is args.txt, for proc it is some other ad-hoc serialization, and internal to the fuzzer datastructure it is serialized as testvector::SampleInputsProto. The tools have been prepared to take a single protobuffer. This CL writes the protobuffer (as testvector.pbtxt) but still alongside the 'old' serialization formats. Part of the refactoring to universally use testvector::SampleInputsProto (follow-up steps: use this format exclusivey and remove old serialization formats). Issues: #1645 PiperOrigin-RevId: 694671946
1 parent eedb4e3 commit abc11e1

File tree

7 files changed

+107
-45
lines changed

7 files changed

+107
-45
lines changed

xls/fuzzer/BUILD

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ cc_library(
6969
"//xls/common/file:get_runfile_path",
7070
"//xls/common/status:status_macros",
7171
"//xls/dslx/frontend:pos",
72+
"//xls/tests:testvector_cc_proto",
7273
"@boringssl//:crypto",
7374
"@com_google_absl//absl/flags:flag",
7475
"@com_google_absl//absl/log",
@@ -324,6 +325,7 @@ cc_library(
324325
"//xls/ir:value",
325326
"//xls/public:runtime_build_actions",
326327
"//xls/simulation:check_simulator",
328+
"//xls/tests:testvector_cc_proto",
327329
"//xls/tools:eval_utils",
328330
"@com_google_absl//absl/algorithm:container",
329331
"@com_google_absl//absl/container:btree",

xls/fuzzer/run_fuzz.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
#include "xls/fuzzer/sample_generator.h"
5252
#include "xls/fuzzer/sample_runner.h"
5353
#include "xls/fuzzer/sample_summary.pb.h"
54+
#include "xls/tests/testvector.pb.h"
5455

5556
ABSL_DECLARE_FLAG(int32_t, v);
5657
ABSL_DECLARE_FLAG(std::string, vmodule);
@@ -205,6 +206,14 @@ absl::Status RunSample(const Sample& smp, const std::filesystem::path& run_dir,
205206
SetTextProtoFile(options_file_name, smp.options().proto()));
206207
argv.push_back("--options_file=options.pbtxt");
207208

209+
std::filesystem::path testvector_path = run_dir / "testvector.pbtxt";
210+
testvector::SampleInputsProto testvector;
211+
XLS_RETURN_IF_ERROR(smp.FillSampleInputs(&testvector));
212+
XLS_RETURN_IF_ERROR(SetTextProtoFile(testvector_path, testvector));
213+
// TODO(hzeller): This is a preparation, but testvector.pbtxt is not yet
214+
// passed to tools. This is the egg, chicken follows in next change.
215+
// argv.push_back("--testvector_textproto=testvector.pbtxt");
216+
208217
std::filesystem::path args_file_name = run_dir / "args.txt";
209218
XLS_RETURN_IF_ERROR(
210219
SetFileContents(args_file_name, ArgsBatchToText(smp.args_batch())));

xls/fuzzer/sample.cc

Lines changed: 70 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,47 @@ bool Sample::ArgsBatchEqual(const Sample& other) const {
202202
return true;
203203
}
204204

205+
// Extract args batch from SampleInputsProto. If to be interpreted as
206+
// proc_samples, also extract "ir_channel_names" (which must not be a nullptr
207+
// then).
208+
/* static */ absl::Status Sample::ExtractArgsBatch(
209+
bool is_proc_samples, const testvector::SampleInputsProto& testvector,
210+
std::vector<std::vector<InterpValue>>& args_batch,
211+
std::vector<std::string>* ir_channel_names) {
212+
// In the serialization channel inputs are grouped by channel, but the
213+
// fuzzer expects inputs to be grouped by input number.
214+
// TODO(meheff): Change the fuzzer to accept inputs grouped by channel. This
215+
// would enable a different number of inputs per channel.
216+
if (is_proc_samples) {
217+
XLS_RET_CHECK(!testvector.has_function_args()); // proc samples expected
218+
XLS_RET_CHECK(ir_channel_names != nullptr);
219+
for (const testvector::ChannelInputProto& channel_input :
220+
testvector.channel_inputs().inputs()) {
221+
ir_channel_names->push_back(channel_input.channel_name());
222+
for (int i = 0; i < channel_input.values().size(); ++i) {
223+
const std::string& value_str = channel_input.values(i);
224+
XLS_ASSIGN_OR_RETURN(Value value, Parser::ParseTypedValue(value_str));
225+
XLS_ASSIGN_OR_RETURN(InterpValue interp_value,
226+
dslx::ValueToInterpValue(value));
227+
if (args_batch.size() <= i) {
228+
args_batch.resize(i + 1);
229+
}
230+
args_batch[i].push_back(interp_value);
231+
}
232+
}
233+
return absl::OkStatus();
234+
}
235+
236+
// Otherwise just extract function information.
237+
XLS_RET_CHECK(!testvector.has_channel_inputs()); // function samples expected
238+
for (const std::string& arg : testvector.function_args().args()) {
239+
XLS_ASSIGN_OR_RETURN(std::vector<InterpValue> args, dslx::ParseArgs(arg));
240+
args_batch.push_back(args);
241+
}
242+
243+
return absl::OkStatus();
244+
}
245+
205246
/* static */ absl::StatusOr<Sample> Sample::Deserialize(std::string_view s) {
206247
bool in_config = false;
207248
std::vector<std::string_view> config_lines;
@@ -238,37 +279,39 @@ bool Sample::ArgsBatchEqual(const Sample& other) const {
238279
XLS_ASSIGN_OR_RETURN(SampleOptions options,
239280
SampleOptions::FromProto(proto.sample_options()));
240281

241-
std::string dslx_code = absl::StrJoin(dslx_lines, "\n");
282+
// Make sure we see the kind of inputs we expect.
283+
XLS_RET_CHECK_EQ(proto.inputs().has_function_args(),
284+
options.IsFunctionSample());
242285

243-
// In the serialization channel inputs are grouped by channel, but the
244-
// fuzzer expects inputs to be grouped by input number.
245-
// TODO(meheff): Change the fuzzer to accept inputs grouped by channel. This
246-
// would enable a different number of inputs per channel.
247-
std::vector<std::string> ir_channel_names;
248286
std::vector<std::vector<InterpValue>> args_batch;
249-
if (proto.sample_options().sample_type() == fuzzer::SAMPLE_TYPE_PROC) {
250-
for (const testvector::ChannelInputProto& channel_input :
251-
proto.inputs().channel_inputs().inputs()) {
252-
ir_channel_names.push_back(channel_input.channel_name());
253-
for (int i = 0; i < channel_input.values().size(); ++i) {
254-
const std::string& value_str = channel_input.values(i);
255-
XLS_ASSIGN_OR_RETURN(Value value, Parser::ParseTypedValue(value_str));
256-
XLS_ASSIGN_OR_RETURN(InterpValue interp_value,
257-
dslx::ValueToInterpValue(value));
258-
if (args_batch.size() <= i) {
259-
args_batch.resize(i + 1);
260-
}
261-
args_batch[i].push_back(interp_value);
262-
}
287+
std::vector<std::string> ir_channel_names;
288+
XLS_RETURN_IF_ERROR(ExtractArgsBatch(options.IsProcSample(), proto.inputs(),
289+
args_batch, &ir_channel_names));
290+
291+
std::string dslx_code = absl::StrJoin(dslx_lines, "\n");
292+
return Sample(dslx_code, options, args_batch, ir_channel_names);
293+
}
294+
295+
absl::Status Sample::FillSampleInputs(
296+
testvector::SampleInputsProto* proto) const {
297+
if (options().IsFunctionSample()) {
298+
testvector::FunctionArgsProto* args_proto = proto->mutable_function_args();
299+
for (const std::vector<InterpValue>& args : args_batch_) {
300+
args_proto->add_args(InterpValueListToString(args));
263301
}
264302
} else {
265-
XLS_RET_CHECK(proto.inputs().has_function_args());
266-
for (const std::string& arg : proto.inputs().function_args().args()) {
267-
XLS_ASSIGN_OR_RETURN(std::vector<InterpValue> args, dslx::ParseArgs(arg));
268-
args_batch.push_back(args);
303+
XLS_RET_CHECK(options().IsProcSample());
304+
testvector::ChannelInputsProto* inputs_proto =
305+
proto->mutable_channel_inputs();
306+
for (int64_t i = 0; i < ir_channel_names_.size(); ++i) {
307+
testvector::ChannelInputProto* input_proto = inputs_proto->add_inputs();
308+
input_proto->set_channel_name(ir_channel_names_[i]);
309+
for (const std::vector<InterpValue>& args : args_batch_) {
310+
input_proto->add_values(ToArgString(args[i]));
311+
}
269312
}
270313
}
271-
return Sample(dslx_code, options, args_batch, ir_channel_names);
314+
return absl::OkStatus();
272315
}
273316

274317
std::string Sample::Serialize(
@@ -285,24 +328,8 @@ std::string Sample::Serialize(
285328
config.set_issue(std::string("DO NOT ") +
286329
"SUBMIT Insert link to GitHub issue here.");
287330
*config.mutable_sample_options() = options().proto();
288-
if (options().IsFunctionSample()) {
289-
testvector::FunctionArgsProto* args_proto =
290-
config.mutable_inputs()->mutable_function_args();
291-
for (const std::vector<InterpValue>& args : args_batch_) {
292-
args_proto->add_args(InterpValueListToString(args));
293-
}
294-
} else {
295-
CHECK(options().IsProcSample());
296-
testvector::ChannelInputsProto* inputs_proto =
297-
config.mutable_inputs()->mutable_channel_inputs();
298-
for (int64_t i = 0; i < ir_channel_names_.size(); ++i) {
299-
testvector::ChannelInputProto* input_proto = inputs_proto->add_inputs();
300-
input_proto->set_channel_name(ir_channel_names_[i]);
301-
for (const std::vector<InterpValue>& args : args_batch_) {
302-
input_proto->add_values(ToArgString(args[i]));
303-
}
304-
}
305-
}
331+
CHECK_OK(FillSampleInputs(config.mutable_inputs()));
332+
306333
std::string config_text;
307334
CHECK(google::protobuf::TextFormat::PrintToString(config, &config_text));
308335
for (std::string_view line : absl::StrSplit(config_text, '\n')) {

xls/fuzzer/sample.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,13 @@
2424
#include <utility>
2525
#include <vector>
2626

27+
#include "absl/status/status.h"
2728
#include "absl/status/statusor.h"
2829
#include "absl/types/span.h"
2930
#include "xls/common/proto_adaptor_utils.h"
3031
#include "xls/dslx/interp_value.h"
3132
#include "xls/fuzzer/sample.pb.h"
33+
#include "xls/tests/testvector.pb.h"
3234
#include "re2/re2.h"
3335

3436
namespace xls {
@@ -213,6 +215,14 @@ class Sample {
213215
// // END_CONFIG
214216
// <code sample>
215217
static absl::StatusOr<Sample> Deserialize(std::string_view s);
218+
219+
// Utility function to convert testvector::SampleInputsProto to
220+
// args batch and channel names used in this object.
221+
static absl::Status ExtractArgsBatch(
222+
bool is_proc_samples, const testvector::SampleInputsProto& testvector,
223+
std::vector<std::vector<dslx::InterpValue>>& args_batch,
224+
std::vector<std::string>* ir_channel_names = nullptr);
225+
216226
std::string Serialize(
217227
std::optional<std::string_view> error_message = std::nullopt) const;
218228

@@ -247,6 +257,9 @@ class Sample {
247257
}
248258
bool operator!=(const Sample& other) const { return !((*this) == other); }
249259

260+
// Convert internal argument representation to a sample inputs proto.
261+
absl::Status FillSampleInputs(testvector::SampleInputsProto* proto) const;
262+
250263
private:
251264
// Returns whether the argument batch is the same as in "other".
252265
bool ArgsBatchEqual(const Sample& other) const;

xls/fuzzer/sample_cc_test.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ TEST(SampleCcTest, DeserializationCanHandleNewlinesInStringLiterals) {
7474
// BEGIN_CONFIG
7575
// # proto-message: xls.fuzzer.CrasherConfigurationProto
7676
// issue: "Foo"
77+
// sample_options {
78+
// sample_type: SAMPLE_TYPE_FUNCTION
79+
// }
7780
// inputs {
7881
// function_args {
7982
// args: "(bits[32]:0x01,

xls/fuzzer/sample_runner.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
#include "xls/ir/value.h"
7777
#include "xls/public/runtime_build_actions.h"
7878
#include "xls/simulation/check_simulator.h"
79+
#include "xls/tests/testvector.pb.h"
7980
#include "xls/tools/eval_utils.h"
8081
#include "re2/re2.h"
8182

@@ -828,12 +829,17 @@ absl::Status SampleRunner::Run(const Sample& sample) {
828829
} else {
829830
input_path /= "sample.ir";
830831
}
831-
832832
XLS_RETURN_IF_ERROR(SetFileContents(input_path, sample.input_text()));
833833

834834
std::filesystem::path options_path = run_dir_ / "options.pbtxt";
835835
XLS_RETURN_IF_ERROR(SetTextProtoFile(options_path, sample.options().proto()));
836836

837+
std::filesystem::path testvector_path = run_dir_ / "testvector.pbtxt";
838+
testvector::SampleInputsProto testvector;
839+
XLS_RETURN_IF_ERROR(sample.FillSampleInputs(&testvector));
840+
XLS_RETURN_IF_ERROR(SetTextProtoFile(testvector_path, testvector));
841+
842+
// TODO(hzeller): retire the following files and use testvector
837843
std::filesystem::path args_path = run_dir_ / "args.txt";
838844
XLS_RETURN_IF_ERROR(
839845
SetFileContents(args_path, ArgsBatchToText(sample.args_batch())));

xls/tests/testvector.proto

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ package xls.testvector;
1818

1919
// TODO(google/xls#1645) As a first step, this is lifted out of
2020
// fuzzer/sample.proto without change; more refactoring steps follow here.
21+
// The channel data should possibly even be a xls::ProcChannelValuesProto
22+
// but requires to re-write existing crasher*.x
2123

2224
// Inputs fed to a single input channel of the sample proc.
2325
message ChannelInputProto {
@@ -37,7 +39,7 @@ message FunctionArgsProto {
3739
// Each entry is a semicolon-separated list of xls::Values with one Value per
3840
// function parameter. Example entry: "bits[1]:0; bits[32]:0x42"
3941
// TODO(google/xls#1645) instead of the semicolon-separation, this should be
40-
// repeated field of values.
42+
// repeated field of ValueProtos
4143
repeated string args = 1;
4244
}
4345

0 commit comments

Comments
 (0)