Skip to content

Commit c5f3947

Browse files
authored
Split support (#58)
Signed-off-by: clux <[email protected]>
1 parent cac63c2 commit c5f3947

File tree

5 files changed

+176
-22
lines changed

5 files changed

+176
-22
lines changed

Diff for: .gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
/target
2+
test/split

Diff for: lq.rs

+128-22
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,27 @@ struct Args {
6868
#[arg(short, long, default_value = "false")]
6969
in_place: bool,
7070

71+
/// Split the output by keys and serialize into a folder
72+
///
73+
/// If set, this query is executed for each document against jq
74+
/// to produce a filename to store the resulting document.
75+
///
76+
/// This option only produces a key for each filename. The key
77+
/// is evaluated against the base document and does not take the jq query
78+
/// into consideration.
79+
///
80+
/// Example: --split '"./" + (.metadata.name) + "_" + (.kind | ascii_downcase) + ".yaml"'
81+
#[arg(
82+
short,
83+
long,
84+
conflicts_with = "in_place",
85+
// we need to be able to parse these back into documents to be able to split them
86+
conflicts_with = "raw_output",
87+
conflicts_with = "compact_output",
88+
conflicts_with = "join_output"
89+
)]
90+
split: Option<String>,
91+
7192
/// Query to be sent to jq (see https://jqlang.github.io/jq/manual/)
7293
///
7394
/// Default "."
@@ -126,8 +147,14 @@ impl Args {
126147
}
127148
args
128149
}
150+
fn jq_split_args(&self) -> Option<Vec<String>> {
151+
let split_by = &self.split.as_ref()?;
152+
let mut args = vec!["-r".into()]; // we expect single unquoted keys
153+
args.push(split_by.to_string());
154+
Some(args)
155+
}
129156

130-
fn read_yaml(&mut self) -> Result<Vec<u8>> {
157+
fn read_yaml_docs(&mut self) -> Result<Vec<serde_json::Value>> {
131158
let yaml_de = if let Some(f) = &self.file {
132159
if !std::path::Path::new(&f).exists() {
133160
Self::try_parse_from(["cmd", "-h"])?;
@@ -156,6 +183,12 @@ impl Args {
156183
docs.push(json_value);
157184
}
158185
debug!("found {} documents", docs.len());
186+
Ok(docs)
187+
}
188+
189+
fn read_yaml(&mut self) -> Result<Vec<u8>> {
190+
// yaml is multidoc parsed by default, so flatten when <2 docs to conform to jq interface
191+
let docs = self.read_yaml_docs()?;
159192
// if there is 1 or 0 documents, do not return as nested documents
160193
let ser = match docs.as_slice() {
161194
[x] => serde_json::to_vec(x)?,
@@ -165,7 +198,7 @@ impl Args {
165198
Ok(ser)
166199
}
167200

168-
fn read_toml(&mut self) -> Result<Vec<u8>> {
201+
fn read_toml(&mut self) -> Result<serde_json::Value> {
169202
use toml::Table;
170203
let mut buf = String::new();
171204
let toml_str = if let Some(f) = &self.file {
@@ -184,10 +217,18 @@ impl Args {
184217
};
185218
let doc: Table = toml_str.parse()?;
186219
let doc_as: serde_json::Value = doc.try_into()?;
187-
Ok(serde_json::to_vec(&doc_as)?)
220+
Ok(doc_as)
221+
}
222+
223+
fn read_toml_docs(&mut self) -> Result<Vec<serde_json::Value>> {
224+
let toml = self.read_toml()?;
225+
// TODO: need toml crate to support multidoc +++ or something
226+
// see https://github.com/toml-lang/toml/issues/511
227+
// see https://github.com/toml-lang/toml/issues/583
228+
Ok(vec![toml]) // assume single document for now
188229
}
189230

190-
fn read_json(&mut self) -> Result<Vec<u8>> {
231+
fn read_json(&mut self) -> Result<serde_json::Value> {
191232
let json_value: serde_json::Value = if let Some(f) = &self.file {
192233
if !std::path::Path::new(&f).exists() {
193234
Self::try_parse_from(["cmd", "-h"])?;
@@ -202,26 +243,45 @@ impl Args {
202243
Self::try_parse_from(["cmd", "-h"])?;
203244
std::process::exit(2);
204245
};
205-
Ok(serde_json::to_vec(&json_value)?)
246+
Ok(json_value)
247+
}
248+
249+
// multidoc equivalent reader interface for json
250+
fn read_json_docs(&mut self) -> Result<Vec<serde_json::Value>> {
251+
let json = self.read_json()?;
252+
// outermost is array? equivalent to multidoc
253+
if let serde_json::Value::Array(ary) = json {
254+
return Ok(ary);
255+
}
256+
// otherwise, it's 1 doc
257+
Ok(vec![json])
206258
}
207259

208260
fn read_input(&mut self) -> Result<Vec<u8>> {
209261
let ser = match self.input {
210262
Input::Yaml => self.read_yaml()?,
211-
Input::Toml => self.read_toml()?,
212-
Input::Json => self.read_json()?,
263+
Input::Toml => serde_json::to_vec(&self.read_toml()?)?,
264+
Input::Json => serde_json::to_vec(&self.read_json()?)?,
213265
};
214266
debug!("input decoded as json: {}", String::from_utf8_lossy(&ser));
215267
Ok(ser)
216268
}
269+
fn read_input_multidoc(&mut self) -> Result<Vec<serde_json::Value>> {
270+
let ser = match self.input {
271+
Input::Yaml => self.read_yaml_docs()?,
272+
Input::Toml => self.read_toml_docs()?,
273+
Input::Json => self.read_json_docs()?,
274+
};
275+
//debug!("input decoded as json: {}", String::from_utf8_lossy(&ser));
276+
Ok(ser)
277+
}
217278

218279
/// Pass json encoded bytes to jq with arguments for jq
219-
fn shellout(&self, input: Vec<u8>) -> Result<Vec<u8>> {
220-
let args = self.jq_args();
280+
fn shellout(&self, input: &[u8], args: &[String]) -> Result<Vec<u8>> {
221281
debug!("jq args: {:?}", &args);
222282
// shellout jq with given args
223283
let mut child = Command::new("jq")
224-
.args(&args)
284+
.args(args)
225285
.stdin(Stdio::piped())
226286
.stdout(Stdio::piped())
227287
.stderr(Stdio::inherit())
@@ -239,7 +299,7 @@ impl Args {
239299
Ok(output.stdout)
240300
}
241301

242-
// print output either as yaml or json (as per jq output)
302+
// Convert stdout into one of the Output formats verbatim as a single string
243303
fn output(&self, stdout: Vec<u8>) -> Result<String> {
244304
match self.output {
245305
// Only jq output is guaranteed to succeed because it's not parsed as a format
@@ -269,6 +329,28 @@ impl Args {
269329
}
270330
}
271331
}
332+
// Convert stdout into one of the Output formats verbatim as multidoc strings
333+
// NB: not actually needed atm
334+
#[allow(unused)] // refactor later maybe
335+
fn output_matched(&self, stdout: Vec<u8>) -> Result<Vec<String>> {
336+
let docs = serde_json::Deserializer::from_slice(&stdout)
337+
.into_iter::<serde_json::Value>()
338+
.flatten()
339+
.collect::<Vec<_>>();
340+
debug!("parsed {} documents", docs.len());
341+
let mut res = vec![];
342+
// All formats are strictly parsed as the requested formats
343+
for x in docs.as_slice() {
344+
let str_doc: String = match self.output {
345+
// We even need jq output to be valid json in this case to allow multidoc to be matched up
346+
Output::Jq => serde_json::to_string_pretty(&x)?,
347+
Output::Yaml => serde_yaml::to_string(&x)?,
348+
Output::Toml => toml::to_string(&x)?,
349+
};
350+
res.push(str_doc.trim_end().to_string());
351+
}
352+
Ok(res)
353+
}
272354
}
273355

274356
fn init_env_tracing_stderr() -> Result<()> {
@@ -289,16 +371,40 @@ fn main() -> Result<()> {
289371
args.output = Output::Toml
290372
}
291373
debug!("args: {:?}", args);
292-
let input = args.read_input()?;
293-
let stdout = args.shellout(input)?;
294-
let output = args.output(stdout)?;
295-
if args.in_place && args.file.is_some() {
296-
let f = args.file.unwrap(); // required
297-
std::fs::write(f, output + "\n")?;
374+
let jq_args = args.jq_args();
375+
if let Some(split_args) = &args.jq_split_args() {
376+
// File splitting mode. Requiring precise multidoc parsing and evaluation
377+
let inputs = args.read_input_multidoc()?;
378+
// Evaluate each document with the split expression against jq
379+
// Later on, we match up the array of filenames with the corresponding output
380+
for json_doc in inputs {
381+
let data = serde_json::to_vec(&json_doc)?;
382+
let splitout = args.shellout(&data, split_args)?;
383+
let key = String::from_utf8_lossy(&splitout).trim_end().to_string();
384+
// TODO: assert no linebreaks in keysplit - it should be used for a path construction
385+
386+
let stdout = args.shellout(&data, &jq_args)?;
387+
let doc = args.output(stdout)?;
388+
// debug:
389+
let path = std::path::Path::new(".").join(key);
390+
debug!("Writing {}", path.display());
391+
let mut f = std::fs::File::create(path)?;
392+
f.write_all(doc.as_bytes())?;
393+
}
298394
} else {
299-
// write result to stdout ignoring SIGPIPE errors
300-
// https://github.com/rust-lang/rust/issues/46016
301-
let _ = writeln!(std::io::stdout(), "{output}");
395+
// normal, single pass mode on blob of u8 serde_json values passed to jq
396+
let input = args.read_input()?;
397+
let stdout = args.shellout(&input, &jq_args)?;
398+
// Lenient output mode (accept loose jq compact/join style output)
399+
let output = args.output(stdout)?;
400+
if args.in_place && args.file.is_some() {
401+
let f = args.file.unwrap(); // required
402+
std::fs::write(f, output + "\n")?;
403+
} else {
404+
// write result to stdout ignoring SIGPIPE errors
405+
// https://github.com/rust-lang/rust/issues/46016
406+
let _ = writeln!(std::io::stdout(), "{output}");
407+
}
302408
}
303409
Ok(())
304410
}
@@ -319,11 +425,11 @@ mod test {
319425
println!("have stdin? {}", !std::io::stdin().is_terminal());
320426
let data = args.read_input().unwrap();
321427
println!("debug args: {:?}", args);
322-
let res = args.shellout(data.clone()).unwrap();
428+
let res = args.shellout(&data, &args.jq_args()).unwrap();
323429
let out = args.output(res)?;
324430
assert_eq!(out, "{\"name\":\"controller\"}");
325431
args.output = Output::Yaml;
326-
let res2 = args.shellout(data)?;
432+
let res2 = args.shellout(&data, &args.jq_args())?;
327433
let out2 = args.output(res2)?;
328434
assert_eq!(out2, "name: controller");
329435
Ok(())

Diff for: test/multi.json

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
[
2+
{ "foo": "bar" },
3+
{ "foo": "baz" }
4+
]

Diff for: test/multi.toml

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
+++
2+
[[Document]]
3+
index = "1"
4+
foo = "some-key"
5+
note = "multidoc-not-working-for-toml-not-part-of-spec"
6+
+++
7+
[[Document]]
8+
index = "2"
9+
bar = "some-other-key"
10+
note = "https://github.com/toml-lang/toml/issues/583"

Diff for: test/yq.test.bats

+33
Original file line numberDiff line numberDiff line change
@@ -105,3 +105,36 @@
105105
run lq '.[].metadata.labels' -y test/deploy.yaml
106106
echo "$output" && echo "$output" | rg -U '\- null\n- null\n- null\n- app: controller\n- app: controller'
107107
}
108+
109+
@test "split-yaml-multi-to-yaml" {
110+
rm -f test/split/*
111+
mkdir -p test/split
112+
run lq '.' --split '"test/split/" + (.metadata.name) + "_" + (.kind | ascii_downcase) + ".yaml"' ./test/deploy.yaml -y
113+
[ "$status" -eq 0 ]
114+
files="$(find test/split -type f | wc -l)"
115+
echo $files && [[ "$files" -eq "5" ]]
116+
run lq '.kind' test/split/controller_service.yaml -r
117+
echo "$output" && echo "$output" | grep "Service"
118+
}
119+
120+
@test "split-yaml-single-to-json" {
121+
rm -f test/split/*
122+
mkdir -p test/split
123+
run lq '.' --split '"test/split/" + (.metadata.name) + "_" + (.kind | ascii_downcase) + ".json"' ./test/grafana.yaml
124+
[ "$status" -eq 0 ]
125+
files="$(find test/split -type f | wc -l)"
126+
echo $files && [[ "$files" -eq "1" ]]
127+
run jq '.kind' test/split/promstack-grafana_deployment.json -r
128+
echo "$output" && echo "$output" | grep "Deployment"
129+
}
130+
131+
@test "split-multi-json" {
132+
rm -f test/split/*
133+
mkdir -p test/split
134+
run lq '.' --split '"test/split/" + (.foo) + ".json"' ./test/multi.json --input=json
135+
[ "$status" -eq 0 ]
136+
files="$(find test/split -type f | wc -l)"
137+
echo $files && [[ "$files" -eq "2" ]]
138+
run jq '.foo' test/split/bar.json -r
139+
echo "$output" && echo "$output" | grep "bar"
140+
}

0 commit comments

Comments
 (0)