@@ -27,7 +27,7 @@ use datafusion::datasource::source::DataSourceExec;
2727use datafusion:: error:: { DataFusionError , Result } ;
2828use datafusion:: physical_plan:: { displayable, ExecutionPlan } ;
2929
30- use datafusion:: datasource:: physical_plan:: { FileScanConfig , ParquetSource } ;
30+ use datafusion:: datasource:: physical_plan:: ParquetSource ;
3131use substrait:: proto:: expression:: mask_expression:: { StructItem , StructSelect } ;
3232use substrait:: proto:: expression:: MaskExpression ;
3333use substrait:: proto:: r#type:: {
@@ -52,89 +52,82 @@ pub fn to_substrait_rel(
5252 ) ,
5353) -> Result < Box < Rel > > {
5454 if let Some ( data_source_exec) = plan. as_any ( ) . downcast_ref :: < DataSourceExec > ( ) {
55- let data_source = data_source_exec. data_source ( ) ;
56- if let Some ( file_config) = data_source. as_any ( ) . downcast_ref :: < FileScanConfig > ( ) {
57- let is_parquet = file_config
58- . file_source ( )
59- . as_any ( )
60- . downcast_ref :: < ParquetSource > ( )
61- . is_some ( ) ;
62- if is_parquet {
63- let mut substrait_files = vec ! [ ] ;
64- for ( partition_index, files) in file_config. file_groups . iter ( ) . enumerate ( )
65- {
66- for file in files. iter ( ) {
67- substrait_files. push ( FileOrFiles {
68- partition_index : partition_index. try_into ( ) . unwrap ( ) ,
69- start : 0 ,
70- length : file. object_meta . size as u64 ,
71- path_type : Some ( PathType :: UriPath (
72- file. object_meta . location . as_ref ( ) . to_string ( ) ,
73- ) ) ,
74- file_format : Some ( FileFormat :: Parquet ( ParquetReadOptions { } ) ) ,
75- } ) ;
76- }
55+ if let Some ( ( file_config, _) ) =
56+ data_source_exec. downcast_to_file_source :: < ParquetSource > ( )
57+ {
58+ let mut substrait_files = vec ! [ ] ;
59+ for ( partition_index, files) in file_config. file_groups . iter ( ) . enumerate ( ) {
60+ for file in files. iter ( ) {
61+ substrait_files. push ( FileOrFiles {
62+ partition_index : partition_index. try_into ( ) . unwrap ( ) ,
63+ start : 0 ,
64+ length : file. object_meta . size as u64 ,
65+ path_type : Some ( PathType :: UriPath (
66+ file. object_meta . location . as_ref ( ) . to_string ( ) ,
67+ ) ) ,
68+ file_format : Some ( FileFormat :: Parquet ( ParquetReadOptions { } ) ) ,
69+ } ) ;
7770 }
71+ }
7872
79- let mut names = vec ! [ ] ;
80- let mut types = vec ! [ ] ;
73+ let mut names = vec ! [ ] ;
74+ let mut types = vec ! [ ] ;
8175
82- for field in file_config. file_schema . fields . iter ( ) {
83- match to_substrait_type ( field. data_type ( ) , field. is_nullable ( ) ) {
84- Ok ( t) => {
85- names. push ( field. name ( ) . clone ( ) ) ;
86- types. push ( t) ;
87- }
88- Err ( e) => return Err ( e) ,
76+ for field in file_config. file_schema . fields . iter ( ) {
77+ match to_substrait_type ( field. data_type ( ) , field. is_nullable ( ) ) {
78+ Ok ( t) => {
79+ names. push ( field. name ( ) . clone ( ) ) ;
80+ types. push ( t) ;
8981 }
82+ Err ( e) => return Err ( e) ,
9083 }
84+ }
9185
92- let type_info = Struct {
93- types,
94- // FIXME: duckdb doesn't set this field, keep it as default variant 0.
95- // https://github.com/duckdb/substrait/blob/b6f56643cb11d52de0e32c24a01dfd5947df62be/src/to_substrait.cpp#L1106-L1127
96- type_variation_reference : 0 ,
97- nullability : Nullability :: Required . into ( ) ,
98- } ;
86+ let type_info = Struct {
87+ types,
88+ // FIXME: duckdb doesn't set this field, keep it as default variant 0.
89+ // https://github.com/duckdb/substrait/blob/b6f56643cb11d52de0e32c24a01dfd5947df62be/src/to_substrait.cpp#L1106-L1127
90+ type_variation_reference : 0 ,
91+ nullability : Nullability :: Required . into ( ) ,
92+ } ;
9993
100- let mut select_struct = None ;
101- if let Some ( projection) = file_config. projection . as_ref ( ) {
102- let struct_items = projection
103- . iter ( )
104- . map ( |index| StructItem {
105- field : * index as i32 ,
106- // FIXME: duckdb sets this to None, but it's not clear why.
107- // https://github.com/duckdb/substrait/blob/b6f56643cb11d52de0e32c24a01dfd5947df62be/src/to_substrait.cpp#L1191
108- child : None ,
109- } )
110- . collect ( ) ;
94+ let mut select_struct = None ;
95+ if let Some ( projection) = file_config. projection . as_ref ( ) {
96+ let struct_items = projection
97+ . iter ( )
98+ . map ( |index| StructItem {
99+ field : * index as i32 ,
100+ // FIXME: duckdb sets this to None, but it's not clear why.
101+ // https://github.com/duckdb/substrait/blob/b6f56643cb11d52de0e32c24a01dfd5947df62be/src/to_substrait.cpp#L1191
102+ child : None ,
103+ } )
104+ . collect ( ) ;
111105
112- select_struct = Some ( StructSelect { struct_items } ) ;
113- }
106+ select_struct = Some ( StructSelect { struct_items } ) ;
107+ }
114108
115- return Ok ( Box :: new ( Rel {
116- rel_type : Some ( RelType :: Read ( Box :: new ( ReadRel {
117- common : None ,
118- base_schema : Some ( NamedStruct {
119- names,
120- r#struct : Some ( type_info) ,
121- } ) ,
122- filter : None ,
123- best_effort_filter : None ,
124- projection : Some ( MaskExpression {
125- select : select_struct,
126- // FIXME: duckdb set this to true, but it's not clear why.
127- // https://github.com/duckdb/substrait/blob/b6f56643cb11d52de0e32c24a01dfd5947df62be/src/to_substrait.cpp#L1186.
128- maintain_singular_struct : true ,
129- } ) ,
109+ return Ok ( Box :: new ( Rel {
110+ rel_type : Some ( RelType :: Read ( Box :: new ( ReadRel {
111+ common : None ,
112+ base_schema : Some ( NamedStruct {
113+ names,
114+ r#struct : Some ( type_info) ,
115+ } ) ,
116+ filter : None ,
117+ best_effort_filter : None ,
118+ projection : Some ( MaskExpression {
119+ select : select_struct,
120+ // FIXME: duckdb set this to true, but it's not clear why.
121+ // https://github.com/duckdb/substrait/blob/b6f56643cb11d52de0e32c24a01dfd5947df62be/src/to_substrait.cpp#L1186.
122+ maintain_singular_struct : true ,
123+ } ) ,
124+ advanced_extension : None ,
125+ read_type : Some ( ReadType :: LocalFiles ( LocalFiles {
126+ items : substrait_files,
130127 advanced_extension : None ,
131- read_type : Some ( ReadType :: LocalFiles ( LocalFiles {
132- items : substrait_files,
133- advanced_extension : None ,
134- } ) ) ,
135- } ) ) ) ,
136- } ) ) ;
137- }
128+ } ) ) ,
129+ } ) ) ) ,
130+ } ) ) ;
138131 }
139132 }
140133 Err ( DataFusionError :: Substrait ( format ! (
0 commit comments