Skip to content

Commit 97a5b61

Browse files
authored
feat: support export data for bigquery (#1976)
1 parent 5ec953b commit 97a5b61

File tree

4 files changed

+309
-2
lines changed

4 files changed

+309
-2
lines changed

src/ast/mod.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4355,6 +4355,15 @@ pub enum Statement {
43554355
///
43564356
/// See [ReturnStatement]
43574357
Return(ReturnStatement),
4358+
/// Export data statement
4359+
///
4360+
/// Example:
4361+
/// ```sql
4362+
/// EXPORT DATA OPTIONS(uri='gs://bucket/folder/*', format='PARQUET', overwrite=true) AS
4363+
/// SELECT field1, field2 FROM mydataset.table1 ORDER BY field1 LIMIT 10
4364+
/// ```
4365+
/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/export-statements)
4366+
ExportData(ExportData),
43584367
/// ```sql
43594368
/// CREATE [OR REPLACE] USER <user> [IF NOT EXISTS]
43604369
/// ```
@@ -6198,6 +6207,7 @@ impl fmt::Display for Statement {
61986207
Statement::Return(r) => write!(f, "{r}"),
61996208
Statement::List(command) => write!(f, "LIST {command}"),
62006209
Statement::Remove(command) => write!(f, "REMOVE {command}"),
6210+
Statement::ExportData(e) => write!(f, "{e}"),
62016211
Statement::CreateUser(s) => write!(f, "{s}"),
62026212
}
62036213
}
@@ -10144,6 +10154,34 @@ impl fmt::Display for MemberOf {
1014410154
}
1014510155
}
1014610156

10157+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
10158+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
10159+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
10160+
pub struct ExportData {
10161+
pub options: Vec<SqlOption>,
10162+
pub query: Box<Query>,
10163+
pub connection: Option<ObjectName>,
10164+
}
10165+
10166+
impl fmt::Display for ExportData {
10167+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
10168+
if let Some(connection) = &self.connection {
10169+
write!(
10170+
f,
10171+
"EXPORT DATA WITH CONNECTION {connection} OPTIONS({}) AS {}",
10172+
display_comma_separated(&self.options),
10173+
self.query
10174+
)
10175+
} else {
10176+
write!(
10177+
f,
10178+
"EXPORT DATA OPTIONS({}) AS {}",
10179+
display_comma_separated(&self.options),
10180+
self.query
10181+
)
10182+
}
10183+
}
10184+
}
1014710185
/// Creates a user
1014810186
///
1014910187
/// Syntax:

src/ast/spans.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::ast::{query::SelectItemQualifiedWildcardKind, ColumnOptions};
18+
use crate::ast::{query::SelectItemQualifiedWildcardKind, ColumnOptions, ExportData};
1919
use core::iter;
2020

2121
use crate::tokenizer::Span;
@@ -531,6 +531,17 @@ impl Spanned for Statement {
531531
Statement::Print { .. } => Span::empty(),
532532
Statement::Return { .. } => Span::empty(),
533533
Statement::List(..) | Statement::Remove(..) => Span::empty(),
534+
Statement::ExportData(ExportData {
535+
options,
536+
query,
537+
connection,
538+
}) => union_spans(
539+
options
540+
.iter()
541+
.map(|i| i.span())
542+
.chain(core::iter::once(query.span()))
543+
.chain(connection.iter().map(|i| i.span())),
544+
),
534545
Statement::CreateUser(..) => Span::empty(),
535546
}
536547
}

src/parser/mod.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,10 @@ impl<'a> Parser<'a> {
645645
Keyword::COMMENT if self.dialect.supports_comment_on() => self.parse_comment(),
646646
Keyword::PRINT => self.parse_print(),
647647
Keyword::RETURN => self.parse_return(),
648+
Keyword::EXPORT => {
649+
self.prev_token();
650+
self.parse_export_data()
651+
}
648652
_ => self.expected("an SQL statement", next_token),
649653
},
650654
Token::LParen => {
@@ -16523,6 +16527,30 @@ impl<'a> Parser<'a> {
1652316527
}
1652416528
}
1652516529

16530+
/// /// Parse a `EXPORT DATA` statement.
16531+
///
16532+
/// See [Statement::ExportData]
16533+
fn parse_export_data(&mut self) -> Result<Statement, ParserError> {
16534+
self.expect_keywords(&[Keyword::EXPORT, Keyword::DATA])?;
16535+
16536+
let connection = if self.parse_keywords(&[Keyword::WITH, Keyword::CONNECTION]) {
16537+
Some(self.parse_object_name(false)?)
16538+
} else {
16539+
None
16540+
};
16541+
self.expect_keyword(Keyword::OPTIONS)?;
16542+
self.expect_token(&Token::LParen)?;
16543+
let options = self.parse_comma_separated(|p| p.parse_sql_option())?;
16544+
self.expect_token(&Token::RParen)?;
16545+
self.expect_keyword(Keyword::AS)?;
16546+
let query = self.parse_query()?;
16547+
Ok(Statement::ExportData(ExportData {
16548+
options,
16549+
query,
16550+
connection,
16551+
}))
16552+
}
16553+
1652616554
/// Consume the parser and return its underlying token buffer
1652716555
pub fn into_tokens(self) -> Vec<TokenWithSpan> {
1652816556
self.tokens

tests/sqlparser_bigquery.rs

Lines changed: 231 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,12 @@ mod test_utils;
2020

2121
use std::ops::Deref;
2222

23+
use sqlparser::ast::helpers::attached_token::AttachedToken;
2324
use sqlparser::ast::*;
2425
use sqlparser::dialect::{BigQueryDialect, GenericDialect};
26+
use sqlparser::keywords::Keyword;
2527
use sqlparser::parser::{ParserError, ParserOptions};
26-
use sqlparser::tokenizer::{Location, Span};
28+
use sqlparser::tokenizer::{Location, Span, Token, TokenWithSpan, Word};
2729
use test_utils::*;
2830

2931
#[test]
@@ -2567,6 +2569,234 @@ fn test_struct_trailing_and_nested_bracket() {
25672569
);
25682570
}
25692571

2572+
#[test]
2573+
fn test_export_data() {
2574+
let stmt = bigquery().verified_stmt(concat!(
2575+
"EXPORT DATA OPTIONS(",
2576+
"uri = 'gs://bucket/folder/*', ",
2577+
"format = 'PARQUET', ",
2578+
"overwrite = true",
2579+
") AS ",
2580+
"SELECT field1, field2 FROM mydataset.table1 ORDER BY field1 LIMIT 10",
2581+
));
2582+
assert_eq!(
2583+
stmt,
2584+
Statement::ExportData(ExportData {
2585+
options: vec![
2586+
SqlOption::KeyValue {
2587+
key: Ident::new("uri"),
2588+
value: Expr::Value(
2589+
Value::SingleQuotedString("gs://bucket/folder/*".to_owned())
2590+
.with_empty_span()
2591+
),
2592+
},
2593+
SqlOption::KeyValue {
2594+
key: Ident::new("format"),
2595+
value: Expr::Value(
2596+
Value::SingleQuotedString("PARQUET".to_owned()).with_empty_span()
2597+
),
2598+
},
2599+
SqlOption::KeyValue {
2600+
key: Ident::new("overwrite"),
2601+
value: Expr::Value(Value::Boolean(true).with_empty_span()),
2602+
},
2603+
],
2604+
connection: None,
2605+
query: Box::new(Query {
2606+
with: None,
2607+
body: Box::new(SetExpr::Select(Box::new(Select {
2608+
select_token: AttachedToken(TokenWithSpan::new(
2609+
Token::Word(Word {
2610+
value: "SELECT".to_string(),
2611+
quote_style: None,
2612+
keyword: Keyword::SELECT,
2613+
}),
2614+
Span::empty()
2615+
)),
2616+
distinct: None,
2617+
top: None,
2618+
top_before_distinct: false,
2619+
projection: vec![
2620+
SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("field1"))),
2621+
SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("field2"))),
2622+
],
2623+
exclude: None,
2624+
into: None,
2625+
from: vec![TableWithJoins {
2626+
relation: table_from_name(ObjectName::from(vec![
2627+
Ident::new("mydataset"),
2628+
Ident::new("table1")
2629+
])),
2630+
joins: vec![],
2631+
}],
2632+
lateral_views: vec![],
2633+
prewhere: None,
2634+
selection: None,
2635+
group_by: GroupByExpr::Expressions(vec![], vec![]),
2636+
cluster_by: vec![],
2637+
distribute_by: vec![],
2638+
sort_by: vec![],
2639+
having: None,
2640+
named_window: vec![],
2641+
qualify: None,
2642+
window_before_qualify: false,
2643+
value_table_mode: None,
2644+
connect_by: None,
2645+
flavor: SelectFlavor::Standard,
2646+
}))),
2647+
order_by: Some(OrderBy {
2648+
kind: OrderByKind::Expressions(vec![OrderByExpr {
2649+
expr: Expr::Identifier(Ident::new("field1")),
2650+
options: OrderByOptions {
2651+
asc: None,
2652+
nulls_first: None,
2653+
},
2654+
with_fill: None,
2655+
},]),
2656+
interpolate: None,
2657+
}),
2658+
limit_clause: Some(LimitClause::LimitOffset {
2659+
limit: Some(Expr::Value(number("10").with_empty_span())),
2660+
offset: None,
2661+
limit_by: vec![],
2662+
}),
2663+
fetch: None,
2664+
locks: vec![],
2665+
for_clause: None,
2666+
settings: None,
2667+
format_clause: None,
2668+
pipe_operators: vec![],
2669+
})
2670+
})
2671+
);
2672+
2673+
let stmt = bigquery().verified_stmt(concat!(
2674+
"EXPORT DATA WITH CONNECTION myconnection.myproject.us OPTIONS(",
2675+
"uri = 'gs://bucket/folder/*', ",
2676+
"format = 'PARQUET', ",
2677+
"overwrite = true",
2678+
") AS ",
2679+
"SELECT field1, field2 FROM mydataset.table1 ORDER BY field1 LIMIT 10",
2680+
));
2681+
2682+
assert_eq!(
2683+
stmt,
2684+
Statement::ExportData(ExportData {
2685+
options: vec![
2686+
SqlOption::KeyValue {
2687+
key: Ident::new("uri"),
2688+
value: Expr::Value(
2689+
Value::SingleQuotedString("gs://bucket/folder/*".to_owned())
2690+
.with_empty_span()
2691+
),
2692+
},
2693+
SqlOption::KeyValue {
2694+
key: Ident::new("format"),
2695+
value: Expr::Value(
2696+
Value::SingleQuotedString("PARQUET".to_owned()).with_empty_span()
2697+
),
2698+
},
2699+
SqlOption::KeyValue {
2700+
key: Ident::new("overwrite"),
2701+
value: Expr::Value(Value::Boolean(true).with_empty_span()),
2702+
},
2703+
],
2704+
connection: Some(ObjectName::from(vec![
2705+
Ident::new("myconnection"),
2706+
Ident::new("myproject"),
2707+
Ident::new("us")
2708+
])),
2709+
query: Box::new(Query {
2710+
with: None,
2711+
body: Box::new(SetExpr::Select(Box::new(Select {
2712+
select_token: AttachedToken(TokenWithSpan::new(
2713+
Token::Word(Word {
2714+
value: "SELECT".to_string(),
2715+
quote_style: None,
2716+
keyword: Keyword::SELECT,
2717+
}),
2718+
Span::empty()
2719+
)),
2720+
distinct: None,
2721+
top: None,
2722+
top_before_distinct: false,
2723+
projection: vec![
2724+
SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("field1"))),
2725+
SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("field2"))),
2726+
],
2727+
exclude: None,
2728+
into: None,
2729+
from: vec![TableWithJoins {
2730+
relation: table_from_name(ObjectName::from(vec![
2731+
Ident::new("mydataset"),
2732+
Ident::new("table1")
2733+
])),
2734+
joins: vec![],
2735+
}],
2736+
lateral_views: vec![],
2737+
prewhere: None,
2738+
selection: None,
2739+
group_by: GroupByExpr::Expressions(vec![], vec![]),
2740+
cluster_by: vec![],
2741+
distribute_by: vec![],
2742+
sort_by: vec![],
2743+
having: None,
2744+
named_window: vec![],
2745+
qualify: None,
2746+
window_before_qualify: false,
2747+
value_table_mode: None,
2748+
connect_by: None,
2749+
flavor: SelectFlavor::Standard,
2750+
}))),
2751+
order_by: Some(OrderBy {
2752+
kind: OrderByKind::Expressions(vec![OrderByExpr {
2753+
expr: Expr::Identifier(Ident::new("field1")),
2754+
options: OrderByOptions {
2755+
asc: None,
2756+
nulls_first: None,
2757+
},
2758+
with_fill: None,
2759+
},]),
2760+
interpolate: None,
2761+
}),
2762+
limit_clause: Some(LimitClause::LimitOffset {
2763+
limit: Some(Expr::Value(number("10").with_empty_span())),
2764+
offset: None,
2765+
limit_by: vec![],
2766+
}),
2767+
fetch: None,
2768+
locks: vec![],
2769+
for_clause: None,
2770+
settings: None,
2771+
format_clause: None,
2772+
pipe_operators: vec![],
2773+
})
2774+
})
2775+
);
2776+
2777+
// at least one option (uri) is required
2778+
let err = bigquery()
2779+
.parse_sql_statements(concat!(
2780+
"EXPORT DATA OPTIONS() AS ",
2781+
"SELECT field1, field2 FROM mydataset.table1 ORDER BY field1 LIMIT 10",
2782+
))
2783+
.unwrap_err();
2784+
assert_eq!(
2785+
err.to_string(),
2786+
"sql parser error: Expected: identifier, found: )"
2787+
);
2788+
2789+
let err = bigquery()
2790+
.parse_sql_statements(concat!(
2791+
"EXPORT DATA AS SELECT field1, field2 FROM mydataset.table1 ORDER BY field1 LIMIT 10",
2792+
))
2793+
.unwrap_err();
2794+
assert_eq!(
2795+
err.to_string(),
2796+
"sql parser error: Expected: OPTIONS, found: AS"
2797+
);
2798+
}
2799+
25702800
#[test]
25712801
fn test_begin_transaction() {
25722802
bigquery().verified_stmt("BEGIN TRANSACTION");

0 commit comments

Comments
 (0)