Skip to content

Commit

Permalink
#399 Add ability to use date expressions in SQL expressions.
Browse files Browse the repository at this point in the history
  • Loading branch information
yruslan committed May 6, 2024
1 parent 72f4bce commit 5958342
Show file tree
Hide file tree
Showing 5 changed files with 179 additions and 27 deletions.
35 changes: 35 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1929,6 +1929,41 @@ Here is an example configuration for a JDBC source:
}
```

You can use date expressions and formatted dates in sql expressions. You can wrap date expressions in `@{}` and use
variables like `@infoDate` and date functions referenced below inside curly braces. And you can apply formatting to variables
using `%format%` (like `%yyyy-MM-dd%`) after variables or expressions.
Examples:

For
```hocon
sql = "SELECT * FROM my_table_@infoDate%yyyyMMdd% WHERE a = b"
```
the result would look like:
```sql
SELECT * FROM my_table_20220218 WHERE a = b
```

For
```hocon
sql = "SELECT * FROM my_table WHERE snapshot_date = date'@{beginOfMonth(minusMonths(@infoDate, 1))}'"
```
the result would look like:
```sql
-- the beginning of the previous month
SELECT * FROM my_table WHERE snapshot_date = date'2022-01-01'
```

For
```hocon
sql = "SELECT * FROM my_table_@{plusMonths(@infoDate, 1)}%yyyyMMdd% WHERE a = b"
```
the result would look like:
```sql
SELECT * FROM my_table_20220318 WHERE a = b
-- ^the month is 3 (next month)
```


The above example also shows how you can add a pre-ingestion validation on the number of records in the table
using `minimum.records` parameter.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,11 @@ object TableReaderJdbcNative {
}

def getFilteredSql(sqlExpression: String, infoDateBegin: LocalDate, infoDateEnd: LocalDate): String = {
val f1 = StringUtils.replaceFormattedDate(sqlExpression, "@dateFrom", infoDateBegin)
val f2 = StringUtils.replaceFormattedDate(f1, "@dateTo", infoDateEnd)
val f3 = StringUtils.replaceFormattedDate(f2, "@date", infoDateEnd)
val f4 = StringUtils.replaceFormattedDate(f3, "@infoDateBegin", infoDateBegin)
val f5 = StringUtils.replaceFormattedDate(f4, "@infoDateEnd", infoDateEnd)
StringUtils.replaceFormattedDate(f5, "@infoDate", infoDateEnd)
val f1 = StringUtils.replaceFormattedDateExpression(sqlExpression, "dateFrom", infoDateBegin)
val f2 = StringUtils.replaceFormattedDateExpression(f1, "dateTo", infoDateEnd)
val f3 = StringUtils.replaceFormattedDateExpression(f2, "date", infoDateEnd)
val f4 = StringUtils.replaceFormattedDateExpression(f3, "infoDateBegin", infoDateBegin)
val f5 = StringUtils.replaceFormattedDateExpression(f4, "infoDateEnd", infoDateEnd)
StringUtils.replaceFormattedDateExpression(f5, "infoDate", infoDateEnd)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -194,9 +194,9 @@ object SparkUtils {

def applyFilters(df: DataFrame, filters: Seq[String], infoDate: LocalDate, dateFrom: LocalDate, dateTo: LocalDate): DataFrame = {
filters.foldLeft(df)((df, filter) => {
val f1 = StringUtils.replaceFormattedDate(filter, "@dateFrom", dateFrom)
val f2 = StringUtils.replaceFormattedDate(f1, "@dateTo", dateTo)
val f3 = StringUtils.replaceFormattedDate(f2, "@date", infoDate)
val f1 = StringUtils.replaceFormattedDateExpression(filter, "dateFrom", dateFrom)
val f2 = StringUtils.replaceFormattedDateExpression(f1, "dateTo", dateTo)
val f3 = StringUtils.replaceFormattedDateExpression(f2, "date", infoDate)
val actualFilter = f3.replaceAll("@infoDate", s"date'${infoDate.toString}'")

log.info(s"Applying filter: $actualFilter")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,9 +195,37 @@ object StringUtils {
base + details
}

def replaceFormattedDate(template: String, dateVar: String, date: LocalDate): String = {
/**
* Replaces a template with date substitution.
*
* For example, given
* {{{
* SELECT * FROM my_table_@date%yyyyMMdd% WHERE a = b
* }}}
* and date is '2022-02-18' the result is:
* {{{
* SELECT * FROM my_table_20220218 WHERE a = b
* }}}
*
* and with date substitution:
* {{{
* SELECT * FROM my_table_@{plusMonths(@date, 1)}%yyyyMMdd% WHERE a = b
* }}}
* the result is
* {{{
* SELECT * FROM my_table_20220318 WHERE a = b
* }}}
*
*
* @param template A template to replace variablesin.
* @param dateVar A variable name for the date (does not include '@') - case sensitive.
* @param date The date to replace the the variable with.
* @return The processed template.
*/
def replaceFormattedDateExpression(template: String, dateVar: String, date: LocalDate): String = {
val output = new StringBuilder()
val outputPartial = new StringBuilder()
val outputExpression = new StringBuilder()
var state = 0
var i = 0
var j = 0
Expand All @@ -206,16 +234,26 @@ object StringUtils {
val CATCH_VARIABLE = 1
val END_OF_VARIABLE = 2
val END_OF_FORMAT = 3
val DATE_EXPRESSION = 4

val expr = new DateExprEvaluator
expr.setValue(dateVar, date)

while (i < template.length) {
val c = template(i)
state match {
case STATE_TEMPLATE_AS_IS =>
if (c == dateVar(0)) {
state = CATCH_VARIABLE
j = 1
if (c == '@') {
outputExpression.clear()
outputPartial.clear()
outputPartial.append(s"$c")
if (i < template.length - 2 && template(i + 1) == '{') {
i += 1
state = DATE_EXPRESSION
} else {
state = CATCH_VARIABLE
j = 0
outputPartial.append(s"$c")
}
} else {
output.append(s"$c")
}
Expand All @@ -239,22 +277,49 @@ object StringUtils {
state = END_OF_FORMAT
outputPartial.clear()
} else {
output.append(s"$date$c")
if (outputExpression.nonEmpty) {
val calculatedDate = expr.evalDate(outputExpression.toString())
output.append(s"$calculatedDate$c")
} else {
output.append(s"$date$c")
}
state = STATE_TEMPLATE_AS_IS
}
case END_OF_FORMAT =>
if (c == '%') {
state = STATE_TEMPLATE_AS_IS
val formatter = DateTimeFormatter.ofPattern(outputPartial.toString())
output.append(s"${formatter.format(date)}")
if (outputExpression.nonEmpty) {
val calculatedDate = expr.evalDate(outputExpression.toString())
val formatter = DateTimeFormatter.ofPattern(outputPartial.toString())
output.append(s"${formatter.format(calculatedDate)}")
} else {
val formatter = DateTimeFormatter.ofPattern(outputPartial.toString())
output.append(s"${formatter.format(date)}")
}
} else {
outputPartial.append(s"$c")
}
case DATE_EXPRESSION =>
if (c == '}') {
state = END_OF_VARIABLE
if (i == template.length - 1) {
val calculatedDate = expr.evalDate(outputExpression.toString())
output.append(s"$calculatedDate")
}
} else {
outputExpression.append(s"$c")
}

}
i += 1
}
if (state == DATE_EXPRESSION) {
throw new IllegalArgumentException(s"No matching '{' in the date expression: $template")
}
if (state == END_OF_FORMAT) {
throw new IllegalArgumentException(s"No matching '%' in the formatted date expression: $template")
}
output.toString()
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -227,72 +227,124 @@ class StringUtilsSuite extends AnyWordSpec {
}
}

"replaceFormattedDate" should {
"replaceFormattedDateExpression" should {
val infoDate = LocalDate.of(2022, 2, 18)

"work with normal variables" in {
val template = "SELECT @dat FROM my_table_@date + 1"

val replaced = replaceFormattedDate(template, "@date", infoDate)
val replaced = replaceFormattedDateExpression(template, "date", infoDate)

assert(replaced == "SELECT @dat FROM my_table_2022-02-18 + 1")
}

"work with variables at the end" in {
val template = "SELECT @dat FROM my_table_@date"

val replaced = replaceFormattedDate(template, "@date", infoDate)
val replaced = replaceFormattedDateExpression(template, "date", infoDate)

assert(replaced == "SELECT @dat FROM my_table_2022-02-18")
}

"work with just variables" in {
val template = "@date"

val replaced = replaceFormattedDate(template, "@date", infoDate)
val replaced = replaceFormattedDateExpression(template, "date", infoDate)

assert(replaced == "2022-02-18")
}

"work with 2 variables" in {
val template = "@date @date"

val replaced = replaceFormattedDate(template, "@date", infoDate)
val replaced = replaceFormattedDateExpression(template, "date", infoDate)

assert(replaced == "2022-02-18 2022-02-18")
}

"work with formatted variables" in {
val template = "SELECT * FROM my_table_@date%yyyyMMdd% WHERE a = b"

val replaced = replaceFormattedDate(template, "@date", infoDate)
val replaced = replaceFormattedDateExpression(template, "date", infoDate)

assert(replaced == "SELECT * FROM my_table_20220218 WHERE a = b")
}

"work with just formatted variables" in {
val template = "@date%yyyyMMdd%"

val replaced = replaceFormattedDate(template, "@date", infoDate)
val replaced = replaceFormattedDateExpression(template, "date", infoDate)

assert(replaced == "20220218")
}

"work with 2 formatted variables" in {
val template = "@date%yyyyMMdd%@date%ddMMyyy%"

val replaced = replaceFormattedDate(template, "@date", infoDate)
val replaced = replaceFormattedDateExpression(template, "date", infoDate)

assert(replaced == "2022021818022022")
}

"work with partial formatter" in {
val template = "@date%yyyyMM%"

val replaced = replaceFormattedDate(template, "@date", infoDate)
val replaced = replaceFormattedDateExpression(template, "date", infoDate)

assert(replaced == "202202")
}

"work with expressions" in {
val template = "my_table_@{@date + 1}"

val replaced = replaceFormattedDateExpression(template, "date", infoDate)

assert(replaced == "my_table_2022-02-19")
}

"work with formatted expressions" in {
val template = "my_table_@{@date + 1}%yyyyMM%"

val replaced = replaceFormattedDateExpression(template, "date", infoDate)

assert(replaced == "my_table_202202")
}

"work with formatted expressions 2" in {
val template = "SELECT * FROM my_table_@{plusMonths(@date, 1)}%yyyyMMdd% WHERE a = b"

val replaced = replaceFormattedDateExpression(template, "date", infoDate)

assert(replaced == "SELECT * FROM my_table_20220318 WHERE a = b")
}

"work with formatted expressions 3" in {
val template = "SELECT * FROM my_table WHERE snapshot_date = date'@{beginOfMonth(minusMonths(@infoDate, 1))}'"

val replaced = replaceFormattedDateExpression(template, "infoDate", infoDate)

assert(replaced == "SELECT * FROM my_table WHERE snapshot_date = date'2022-01-01'")
}

"throw an exception if format is incomplete" in {
val template = "SELECT * FROM my_table WHERE snapshot_date = date'@infoDate%yyyy-mm-dd'"

val ex = intercept[IllegalArgumentException] {
replaceFormattedDateExpression(template, "infoDate", infoDate)
}

assert(ex.getMessage.contains("No matching '%' in the formatted date expression: SELECT * FROM my_table WHERE snapshot_date = date'@infoDate%yyyy-mm-dd'"))
}

"throw an exception if the expression is incomplete" in {
val template = "SELECT * FROM my_table WHERE snapshot_date = date'@{beginOfMonth(minusMonths(@infoDate, 1))'"

val ex = intercept[IllegalArgumentException] {
replaceFormattedDateExpression(template, "infoDate", infoDate)
}

assert(ex.getMessage.contains("No matching '{' in the date expression: SELECT * FROM my_table WHERE snapshot_date = date'@{beginOfMonth(minusMonths(@infoDate, 1))'"))
}
}

}

0 comments on commit 5958342

Please sign in to comment.