@@ -68,6 +68,27 @@ struct Args {
6868 #[ arg( short, long, default_value = "false" ) ]
6969 in_place : bool ,
7070
71+ /// Split the output by keys and serialize into a folder
72+ ///
73+ /// If set, this query is executed for each document against jq
74+ /// to produce a filename to store the resulting document.
75+ ///
76+ /// This option only produces a key for each filename. The key
77+ /// is evaluated against the base document and does not take the jq query
78+ /// into consideration.
79+ ///
80+ /// Example: --split '"./" + (.metadata.name) + "_" + (.kind | ascii_downcase) + ".yaml"'
81+ #[ arg(
82+ short,
83+ long,
84+ conflicts_with = "in_place" ,
85+ // we need to be able to parse these back into documents to be able to split them
86+ conflicts_with = "raw_output" ,
87+ conflicts_with = "compact_output" ,
88+ conflicts_with = "join_output"
89+ ) ]
90+ split : Option < String > ,
91+
7192 /// Query to be sent to jq (see https://jqlang.github.io/jq/manual/)
7293 ///
7394 /// Default "."
@@ -126,8 +147,14 @@ impl Args {
126147 }
127148 args
128149 }
150+ fn jq_split_args ( & self ) -> Option < Vec < String > > {
151+ let split_by = & self . split . as_ref ( ) ?;
152+ let mut args = vec ! [ "-r" . into( ) ] ; // we expect single unquoted keys
153+ args. push ( split_by. to_string ( ) ) ;
154+ Some ( args)
155+ }
129156
130- fn read_yaml ( & mut self ) -> Result < Vec < u8 > > {
157+ fn read_yaml_docs ( & mut self ) -> Result < Vec < serde_json :: Value > > {
131158 let yaml_de = if let Some ( f) = & self . file {
132159 if !std:: path:: Path :: new ( & f) . exists ( ) {
133160 Self :: try_parse_from ( [ "cmd" , "-h" ] ) ?;
@@ -156,6 +183,12 @@ impl Args {
156183 docs. push ( json_value) ;
157184 }
158185 debug ! ( "found {} documents" , docs. len( ) ) ;
186+ Ok ( docs)
187+ }
188+
189+ fn read_yaml ( & mut self ) -> Result < Vec < u8 > > {
190+ // yaml is multidoc parsed by default, so flatten when <2 docs to conform to jq interface
191+ let docs = self . read_yaml_docs ( ) ?;
159192 // if there is 1 or 0 documents, do not return as nested documents
160193 let ser = match docs. as_slice ( ) {
161194 [ x] => serde_json:: to_vec ( x) ?,
@@ -165,7 +198,7 @@ impl Args {
165198 Ok ( ser)
166199 }
167200
168- fn read_toml ( & mut self ) -> Result < Vec < u8 > > {
201+ fn read_toml ( & mut self ) -> Result < serde_json :: Value > {
169202 use toml:: Table ;
170203 let mut buf = String :: new ( ) ;
171204 let toml_str = if let Some ( f) = & self . file {
@@ -184,10 +217,18 @@ impl Args {
184217 } ;
185218 let doc: Table = toml_str. parse ( ) ?;
186219 let doc_as: serde_json:: Value = doc. try_into ( ) ?;
187- Ok ( serde_json:: to_vec ( & doc_as) ?)
220+ Ok ( doc_as)
221+ }
222+
223+ fn read_toml_docs ( & mut self ) -> Result < Vec < serde_json:: Value > > {
224+ let toml = self . read_toml ( ) ?;
225+ // TODO: need toml crate to support multidoc +++ or something
226+ // see https://github.com/toml-lang/toml/issues/511
227+ // see https://github.com/toml-lang/toml/issues/583
228+ Ok ( vec ! [ toml] ) // assume single document for now
188229 }
189230
190- fn read_json ( & mut self ) -> Result < Vec < u8 > > {
231+ fn read_json ( & mut self ) -> Result < serde_json :: Value > {
191232 let json_value: serde_json:: Value = if let Some ( f) = & self . file {
192233 if !std:: path:: Path :: new ( & f) . exists ( ) {
193234 Self :: try_parse_from ( [ "cmd" , "-h" ] ) ?;
@@ -202,26 +243,45 @@ impl Args {
202243 Self :: try_parse_from ( [ "cmd" , "-h" ] ) ?;
203244 std:: process:: exit ( 2 ) ;
204245 } ;
205- Ok ( serde_json:: to_vec ( & json_value) ?)
246+ Ok ( json_value)
247+ }
248+
249+ // multidoc equivalent reader interface for json
250+ fn read_json_docs ( & mut self ) -> Result < Vec < serde_json:: Value > > {
251+ let json = self . read_json ( ) ?;
252+ // outermost is array? equivalent to multidoc
253+ if let serde_json:: Value :: Array ( ary) = json {
254+ return Ok ( ary) ;
255+ }
256+ // otherwise, it's 1 doc
257+ Ok ( vec ! [ json] )
206258 }
207259
208260 fn read_input ( & mut self ) -> Result < Vec < u8 > > {
209261 let ser = match self . input {
210262 Input :: Yaml => self . read_yaml ( ) ?,
211- Input :: Toml => self . read_toml ( ) ?,
212- Input :: Json => self . read_json ( ) ?,
263+ Input :: Toml => serde_json :: to_vec ( & self . read_toml ( ) ? ) ?,
264+ Input :: Json => serde_json :: to_vec ( & self . read_json ( ) ? ) ?,
213265 } ;
214266 debug ! ( "input decoded as json: {}" , String :: from_utf8_lossy( & ser) ) ;
215267 Ok ( ser)
216268 }
269+ fn read_input_multidoc ( & mut self ) -> Result < Vec < serde_json:: Value > > {
270+ let ser = match self . input {
271+ Input :: Yaml => self . read_yaml_docs ( ) ?,
272+ Input :: Toml => self . read_toml_docs ( ) ?,
273+ Input :: Json => self . read_json_docs ( ) ?,
274+ } ;
275+ //debug!("input decoded as json: {}", String::from_utf8_lossy(&ser));
276+ Ok ( ser)
277+ }
217278
218279 /// Pass json encoded bytes to jq with arguments for jq
219- fn shellout ( & self , input : Vec < u8 > ) -> Result < Vec < u8 > > {
220- let args = self . jq_args ( ) ;
280+ fn shellout ( & self , input : & [ u8 ] , args : & [ String ] ) -> Result < Vec < u8 > > {
221281 debug ! ( "jq args: {:?}" , & args) ;
222282 // shellout jq with given args
223283 let mut child = Command :: new ( "jq" )
224- . args ( & args)
284+ . args ( args)
225285 . stdin ( Stdio :: piped ( ) )
226286 . stdout ( Stdio :: piped ( ) )
227287 . stderr ( Stdio :: inherit ( ) )
@@ -239,7 +299,7 @@ impl Args {
239299 Ok ( output. stdout )
240300 }
241301
242- // print output either as yaml or json ( as per jq output)
302+ // Convert stdout into one of the Output formats verbatim as a single string
243303 fn output ( & self , stdout : Vec < u8 > ) -> Result < String > {
244304 match self . output {
245305 // Only jq output is guaranteed to succeed because it's not parsed as a format
@@ -269,6 +329,28 @@ impl Args {
269329 }
270330 }
271331 }
332+ // Convert stdout into one of the Output formats verbatim as multidoc strings
333+ // NB: not actually needed atm
334+ #[ allow( unused) ] // refactor later maybe
335+ fn output_matched ( & self , stdout : Vec < u8 > ) -> Result < Vec < String > > {
336+ let docs = serde_json:: Deserializer :: from_slice ( & stdout)
337+ . into_iter :: < serde_json:: Value > ( )
338+ . flatten ( )
339+ . collect :: < Vec < _ > > ( ) ;
340+ debug ! ( "parsed {} documents" , docs. len( ) ) ;
341+ let mut res = vec ! [ ] ;
342+ // All formats are strictly parsed as the requested formats
343+ for x in docs. as_slice ( ) {
344+ let str_doc: String = match self . output {
345+ // We even need jq output to be valid json in this case to allow multidoc to be matched up
346+ Output :: Jq => serde_json:: to_string_pretty ( & x) ?,
347+ Output :: Yaml => serde_yaml:: to_string ( & x) ?,
348+ Output :: Toml => toml:: to_string ( & x) ?,
349+ } ;
350+ res. push ( str_doc. trim_end ( ) . to_string ( ) ) ;
351+ }
352+ Ok ( res)
353+ }
272354}
273355
274356fn init_env_tracing_stderr ( ) -> Result < ( ) > {
@@ -289,16 +371,40 @@ fn main() -> Result<()> {
289371 args. output = Output :: Toml
290372 }
291373 debug ! ( "args: {:?}" , args) ;
292- let input = args. read_input ( ) ?;
293- let stdout = args. shellout ( input) ?;
294- let output = args. output ( stdout) ?;
295- if args. in_place && args. file . is_some ( ) {
296- let f = args. file . unwrap ( ) ; // required
297- std:: fs:: write ( f, output + "\n " ) ?;
374+ let jq_args = args. jq_args ( ) ;
375+ if let Some ( split_args) = & args. jq_split_args ( ) {
376+ // File splitting mode. Requiring precise multidoc parsing and evaluation
377+ let inputs = args. read_input_multidoc ( ) ?;
378+ // Evaluate each document with the split expression against jq
379+ // Later on, we match up the array of filenames with the corresponding output
380+ for json_doc in inputs {
381+ let data = serde_json:: to_vec ( & json_doc) ?;
382+ let splitout = args. shellout ( & data, split_args) ?;
383+ let key = String :: from_utf8_lossy ( & splitout) . trim_end ( ) . to_string ( ) ;
384+ // TODO: assert no linebreaks in keysplit - it should be used for a path construction
385+
386+ let stdout = args. shellout ( & data, & jq_args) ?;
387+ let doc = args. output ( stdout) ?;
388+ // debug:
389+ let path = std:: path:: Path :: new ( "." ) . join ( key) ;
390+ debug ! ( "Writing {}" , path. display( ) ) ;
391+ let mut f = std:: fs:: File :: create ( path) ?;
392+ f. write_all ( doc. as_bytes ( ) ) ?;
393+ }
298394 } else {
299- // write result to stdout ignoring SIGPIPE errors
300- // https://github.com/rust-lang/rust/issues/46016
301- let _ = writeln ! ( std:: io:: stdout( ) , "{output}" ) ;
395+ // normal, single pass mode on blob of u8 serde_json values passed to jq
396+ let input = args. read_input ( ) ?;
397+ let stdout = args. shellout ( & input, & jq_args) ?;
398+ // Lenient output mode (accept loose jq compact/join style output)
399+ let output = args. output ( stdout) ?;
400+ if args. in_place && args. file . is_some ( ) {
401+ let f = args. file . unwrap ( ) ; // required
402+ std:: fs:: write ( f, output + "\n " ) ?;
403+ } else {
404+ // write result to stdout ignoring SIGPIPE errors
405+ // https://github.com/rust-lang/rust/issues/46016
406+ let _ = writeln ! ( std:: io:: stdout( ) , "{output}" ) ;
407+ }
302408 }
303409 Ok ( ( ) )
304410}
@@ -319,11 +425,11 @@ mod test {
319425 println ! ( "have stdin? {}" , !std:: io:: stdin( ) . is_terminal( ) ) ;
320426 let data = args. read_input ( ) . unwrap ( ) ;
321427 println ! ( "debug args: {:?}" , args) ;
322- let res = args. shellout ( data. clone ( ) ) . unwrap ( ) ;
428+ let res = args. shellout ( & data, & args . jq_args ( ) ) . unwrap ( ) ;
323429 let out = args. output ( res) ?;
324430 assert_eq ! ( out, "{\" name\" :\" controller\" }" ) ;
325431 args. output = Output :: Yaml ;
326- let res2 = args. shellout ( data) ?;
432+ let res2 = args. shellout ( & data, & args . jq_args ( ) ) ?;
327433 let out2 = args. output ( res2) ?;
328434 assert_eq ! ( out2, "name: controller" ) ;
329435 Ok ( ( ) )
0 commit comments