Skip to content

Commit 31ae446

Browse files
committed
[SPARK-29623][SQL] do not allow multiple unit TO unit statements in interval literal syntax
### What changes were proposed in this pull request? re-arrange the parser rules to make it clear that multiple unit TO unit statement like `SELECT INTERVAL '1-1' YEAR TO MONTH '2-2' YEAR TO MONTH` is not allowed. ### Why are the changes needed? This is definitely an accident that we support such a weird syntax in the past. It's not supported by any other DBs and I can't think of any use case of it. Also no test covers this syntax in the current codebase. ### Does this PR introduce any user-facing change? Yes, and a migration guide item is added. ### How was this patch tested? new tests. Closes apache#26285 from cloud-fan/syntax. Authored-by: Wenchen Fan <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent 28ccd31 commit 31ae446

File tree

7 files changed

+406
-91
lines changed

7 files changed

+406
-91
lines changed

docs/sql-migration-guide.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@ license: |
99
The ASF licenses this file to You under the Apache License, Version 2.0
1010
(the "License"); you may not use this file except in compliance with
1111
the License. You may obtain a copy of the License at
12-
12+
1313
http://www.apache.org/licenses/LICENSE-2.0
14-
14+
1515
Unless required by applicable law or agreed to in writing, software
1616
distributed under the License is distributed on an "AS IS" BASIS,
1717
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -218,6 +218,8 @@ license: |
218218

219219
- Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. In Spark version 2.4 and earlier, this function gives `-1` for the same input. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.sizeOfNull` to `true`.
220220

221+
- Since Spark 3.0, the interval literal syntax does not allow multiple from-to units anymore. For example, `SELECT INTERVAL '1-1' YEAR TO MONTH '2-2' YEAR TO MONTH'` throws parser exception.
222+
221223
## Upgrading from Spark SQL 2.4 to 2.4.1
222224

223225
- The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was

sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ singleTableSchema
8080
;
8181

8282
singleInterval
83-
: INTERVAL? (intervalValue intervalUnit)+ EOF
83+
: INTERVAL? multiUnitsInterval EOF
8484
;
8585

8686
statement
@@ -759,12 +759,24 @@ booleanValue
759759
;
760760

761761
interval
762-
: {ansi}? INTERVAL? intervalField+
763-
| {!ansi}? INTERVAL intervalField*
762+
: INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)?
763+
| {ansi}? (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)
764764
;
765765

766-
intervalField
767-
: value=intervalValue unit=intervalUnit (TO to=intervalUnit)?
766+
errorCapturingMultiUnitsInterval
767+
: multiUnitsInterval unitToUnitInterval?
768+
;
769+
770+
multiUnitsInterval
771+
: (intervalValue intervalUnit)+
772+
;
773+
774+
errorCapturingUnitToUnitInterval
775+
: body=unitToUnitInterval (error1=multiUnitsInterval | error2=unitToUnitInterval)?
776+
;
777+
778+
unitToUnitInterval
779+
: value=intervalValue from=intervalUnit TO to=intervalUnit
768780
;
769781

770782
intervalValue

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala

Lines changed: 84 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -102,20 +102,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
102102
}
103103

104104
override def visitSingleInterval(ctx: SingleIntervalContext): CalendarInterval = {
105-
withOrigin(ctx) {
106-
val units = ctx.intervalUnit().asScala.map {
107-
u => normalizeInternalUnit(u.getText.toLowerCase(Locale.ROOT))
108-
}.toArray
109-
val values = ctx.intervalValue().asScala.map(getIntervalValue).toArray
110-
try {
111-
IntervalUtils.fromUnitStrings(units, values)
112-
} catch {
113-
case i: IllegalArgumentException =>
114-
val e = new ParseException(i.getMessage, ctx)
115-
e.setStackTrace(i.getStackTrace)
116-
throw e
117-
}
118-
}
105+
withOrigin(ctx)(visitMultiUnitsInterval(ctx.multiUnitsInterval))
119106
}
120107

121108
/* ********************************************************************************************
@@ -1940,71 +1927,102 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
19401927
}
19411928

19421929
/**
1943-
* Create a [[CalendarInterval]] literal expression. An interval expression can contain multiple
1944-
* unit value pairs, for instance: interval 2 months 2 days.
1930+
* Create a [[CalendarInterval]] literal expression. Two syntaxes are supported:
1931+
* - multiple unit value pairs, for instance: interval 2 months 2 days.
1932+
* - from-to unit, for instance: interval '1-2' year to month.
19451933
*/
19461934
override def visitInterval(ctx: IntervalContext): Literal = withOrigin(ctx) {
1947-
val intervals = ctx.intervalField.asScala.map(visitIntervalField)
1948-
validate(intervals.nonEmpty, "at least one time unit should be given for interval literal", ctx)
1949-
Literal(intervals.reduce(_.add(_)))
1935+
if (ctx.errorCapturingMultiUnitsInterval != null) {
1936+
val innerCtx = ctx.errorCapturingMultiUnitsInterval
1937+
if (innerCtx.unitToUnitInterval != null) {
1938+
throw new ParseException(
1939+
"Can only have a single from-to unit in the interval literal syntax",
1940+
innerCtx.unitToUnitInterval)
1941+
}
1942+
Literal(visitMultiUnitsInterval(innerCtx.multiUnitsInterval), CalendarIntervalType)
1943+
} else if (ctx.errorCapturingUnitToUnitInterval != null) {
1944+
val innerCtx = ctx.errorCapturingUnitToUnitInterval
1945+
if (innerCtx.error1 != null || innerCtx.error2 != null) {
1946+
val errorCtx = if (innerCtx.error1 != null) innerCtx.error1 else innerCtx.error2
1947+
throw new ParseException(
1948+
"Can only have a single from-to unit in the interval literal syntax",
1949+
errorCtx)
1950+
}
1951+
Literal(visitUnitToUnitInterval(innerCtx.body), CalendarIntervalType)
1952+
} else {
1953+
throw new ParseException("at least one time unit should be given for interval literal", ctx)
1954+
}
19501955
}
19511956

19521957
/**
1953-
* Create a [[CalendarInterval]] for a unit value pair. Two unit configuration types are
1954-
* supported:
1955-
* - Single unit.
1956-
* - From-To unit ('YEAR TO MONTH', 'DAY TO HOUR', 'DAY TO MINUTE', 'DAY TO SECOND',
1957-
* 'HOUR TO MINUTE', 'HOUR TO SECOND' and 'MINUTE TO SECOND' are supported).
1958+
* Creates a [[CalendarInterval]] with multiple unit value pairs, e.g. 1 YEAR 2 DAYS.
19581959
*/
1959-
override def visitIntervalField(ctx: IntervalFieldContext): CalendarInterval = withOrigin(ctx) {
1960-
import ctx._
1961-
val s = getIntervalValue(value)
1962-
try {
1963-
val unitText = unit.getText.toLowerCase(Locale.ROOT)
1964-
val interval = (unitText, Option(to).map(_.getText.toLowerCase(Locale.ROOT))) match {
1965-
case (u, None) =>
1966-
IntervalUtils.fromUnitStrings(Array(normalizeInternalUnit(u)), Array(s))
1967-
case ("year", Some("month")) =>
1968-
IntervalUtils.fromYearMonthString(s)
1969-
case ("day", Some("hour")) =>
1970-
IntervalUtils.fromDayTimeString(s, "day", "hour")
1971-
case ("day", Some("minute")) =>
1972-
IntervalUtils.fromDayTimeString(s, "day", "minute")
1973-
case ("day", Some("second")) =>
1974-
IntervalUtils.fromDayTimeString(s, "day", "second")
1975-
case ("hour", Some("minute")) =>
1976-
IntervalUtils.fromDayTimeString(s, "hour", "minute")
1977-
case ("hour", Some("second")) =>
1978-
IntervalUtils.fromDayTimeString(s, "hour", "second")
1979-
case ("minute", Some("second")) =>
1980-
IntervalUtils.fromDayTimeString(s, "minute", "second")
1981-
case (from, Some(t)) =>
1982-
throw new ParseException(s"Intervals FROM $from TO $t are not supported.", ctx)
1960+
override def visitMultiUnitsInterval(ctx: MultiUnitsIntervalContext): CalendarInterval = {
1961+
withOrigin(ctx) {
1962+
val units = ctx.intervalUnit().asScala.map { unit =>
1963+
val u = unit.getText.toLowerCase(Locale.ROOT)
1964+
// Handle plural forms, e.g: yearS/monthS/weekS/dayS/hourS/minuteS/hourS/...
1965+
if (u.endsWith("s")) u.substring(0, u.length - 1) else u
1966+
}.toArray
1967+
1968+
val values = ctx.intervalValue().asScala.map { value =>
1969+
if (value.STRING() != null) {
1970+
string(value.STRING())
1971+
} else {
1972+
value.getText
1973+
}
1974+
}.toArray
1975+
1976+
try {
1977+
IntervalUtils.fromUnitStrings(units, values)
1978+
} catch {
1979+
case i: IllegalArgumentException =>
1980+
val e = new ParseException(i.getMessage, ctx)
1981+
e.setStackTrace(i.getStackTrace)
1982+
throw e
19831983
}
1984-
validate(interval != null, "No interval can be constructed", ctx)
1985-
interval
1986-
} catch {
1987-
// Handle Exceptions thrown by CalendarInterval
1988-
case e: IllegalArgumentException =>
1989-
val pe = new ParseException(e.getMessage, ctx)
1990-
pe.setStackTrace(e.getStackTrace)
1991-
throw pe
19921984
}
19931985
}
19941986

1995-
private def getIntervalValue(value: IntervalValueContext): String = {
1996-
if (value.STRING() != null) {
1997-
string(value.STRING())
1998-
} else {
1999-
value.getText
1987+
/**
1988+
* Creates a [[CalendarInterval]] with from-to unit, e.g. '2-1' YEAR TO MONTH.
1989+
*/
1990+
override def visitUnitToUnitInterval(ctx: UnitToUnitIntervalContext): CalendarInterval = {
1991+
withOrigin(ctx) {
1992+
val value = Option(ctx.intervalValue.STRING).map(string).getOrElse {
1993+
throw new ParseException("The value of from-to unit must be a string", ctx.intervalValue)
1994+
}
1995+
try {
1996+
val from = ctx.from.getText.toLowerCase(Locale.ROOT)
1997+
val to = ctx.to.getText.toLowerCase(Locale.ROOT)
1998+
(from, to) match {
1999+
case ("year", "month") =>
2000+
IntervalUtils.fromYearMonthString(value)
2001+
case ("day", "hour") =>
2002+
IntervalUtils.fromDayTimeString(value, "day", "hour")
2003+
case ("day", "minute") =>
2004+
IntervalUtils.fromDayTimeString(value, "day", "minute")
2005+
case ("day", "second") =>
2006+
IntervalUtils.fromDayTimeString(value, "day", "second")
2007+
case ("hour", "minute") =>
2008+
IntervalUtils.fromDayTimeString(value, "hour", "minute")
2009+
case ("hour", "second") =>
2010+
IntervalUtils.fromDayTimeString(value, "hour", "second")
2011+
case ("minute", "second") =>
2012+
IntervalUtils.fromDayTimeString(value, "minute", "second")
2013+
case _ =>
2014+
throw new ParseException(s"Intervals FROM $from TO $to are not supported.", ctx)
2015+
}
2016+
} catch {
2017+
// Handle Exceptions thrown by CalendarInterval
2018+
case e: IllegalArgumentException =>
2019+
val pe = new ParseException(e.getMessage, ctx)
2020+
pe.setStackTrace(e.getStackTrace)
2021+
throw pe
2022+
}
20002023
}
20012024
}
20022025

2003-
// Handle plural forms, e.g: yearS/monthS/weekS/dayS/hourS/minuteS/hourS/...
2004-
private def normalizeInternalUnit(s: String): String = {
2005-
if (s.endsWith("s")) s.substring(0, s.length - 1) else s
2006-
}
2007-
20082026
/* ********************************************************************************************
20092027
* DataType parsing
20102028
* ******************************************************************************************** */

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -644,7 +644,7 @@ class ExpressionParserSuite extends AnalysisTest {
644644

645645
// Non Existing unit
646646
intercept("interval 10 nanoseconds",
647-
"no viable alternative at input 'interval 10 nanoseconds'")
647+
"no viable alternative at input '10 nanoseconds'")
648648

649649
// Year-Month intervals.
650650
val yearMonthValues = Seq("123-10", "496-0", "-2-3", "-123-0")
@@ -679,16 +679,13 @@ class ExpressionParserSuite extends AnalysisTest {
679679
}
680680

681681
// Unknown FROM TO intervals
682-
intercept("interval 10 month to second",
682+
intercept("interval '10' month to second",
683683
"Intervals FROM month TO second are not supported.")
684684

685685
// Composed intervals.
686686
checkIntervals(
687687
"3 months 4 days 22 seconds 1 millisecond",
688688
Literal(new CalendarInterval(3, 4, 22001000L)))
689-
checkIntervals(
690-
"3 years '-1-10' year to month 3 weeks '1 0:0:2' day to second",
691-
Literal(new CalendarInterval(14, 22, 2 * CalendarInterval.MICROS_PER_SECOND)))
692689
}
693690

694691
test("SPARK-23264 Interval Compatibility tests") {

sql/core/src/test/resources/sql-tests/inputs/literals.sql

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,3 +130,26 @@ select interval '3 year 1 hour';
130130
select integer '7';
131131
select integer'7';
132132
select integer '2147483648';
133+
134+
-- malformed interval literal
135+
select interval;
136+
select interval 1 fake_unit;
137+
select interval 1 year to month;
138+
select interval '1' year to second;
139+
select interval '10-9' year to month '2-1' year to month;
140+
select interval '10-9' year to month '12:11:10' hour to second;
141+
select interval '1 15:11' day to minute '12:11:10' hour to second;
142+
select interval 1 year '2-1' year to month;
143+
select interval 1 year '12:11:10' hour to second;
144+
select interval '10-9' year to month '1' year;
145+
select interval '12:11:10' hour to second '1' year;
146+
-- malformed interval literal with ansi mode
147+
SET spark.sql.ansi.enabled=true;
148+
select interval;
149+
select interval 1 fake_unit;
150+
select interval 1 year to month;
151+
select 1 year to month;
152+
select interval '1' year to second;
153+
select '1' year to second;
154+
select interval 1 year '2-1' year to month;
155+
select 1 year '2-1' year to month;

0 commit comments

Comments
 (0)