Skip to content

Commit 5958342

Browse files
committed
#399 Add ability to use date expressions in SQL expressions.
1 parent 72f4bce commit 5958342

File tree

5 files changed

+179
-27
lines changed

5 files changed

+179
-27
lines changed

README.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1929,6 +1929,41 @@ Here is an example configuration for a JDBC source:
19291929
}
19301930
```
19311931

1932+
You can use date expressions and formatted dates in sql expressions. You can wrap date expressions in `@{}` and use
1933+
variables like `@infoDate` and date functions referenced below inside curly braces. And you can apply formatting to variables
1934+
using `%format%` (like `%yyyy-MM-dd%`) after variables or expressions.
1935+
Examples:
1936+
1937+
For
1938+
```hocon
1939+
sql = "SELECT * FROM my_table_@infoDate%yyyyMMdd% WHERE a = b"
1940+
```
1941+
the result would look like:
1942+
```sql
1943+
SELECT * FROM my_table_20220218 WHERE a = b
1944+
```
1945+
1946+
For
1947+
```hocon
1948+
sql = "SELECT * FROM my_table WHERE snapshot_date = date'@{beginOfMonth(minusMonths(@infoDate, 1))}'"
1949+
```
1950+
the result would look like:
1951+
```sql
1952+
-- the beginning of the previous month
1953+
SELECT * FROM my_table WHERE snapshot_date = date'2022-01-01'
1954+
```
1955+
1956+
For
1957+
```hocon
1958+
sql = "SELECT * FROM my_table_@{plusMonths(@infoDate, 1)}%yyyyMMdd% WHERE a = b"
1959+
```
1960+
the result would look like:
1961+
```sql
1962+
SELECT * FROM my_table_20220318 WHERE a = b
1963+
-- ^the month is 3 (next month)
1964+
```
1965+
1966+
19321967
The above example also shows how you can add a pre-ingestion validation on the number of records in the table
19331968
using `minimum.records` parameter.
19341969

pramen/core/src/main/scala/za/co/absa/pramen/core/reader/TableReaderJdbcNative.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -102,11 +102,11 @@ object TableReaderJdbcNative {
102102
}
103103

104104
def getFilteredSql(sqlExpression: String, infoDateBegin: LocalDate, infoDateEnd: LocalDate): String = {
105-
val f1 = StringUtils.replaceFormattedDate(sqlExpression, "@dateFrom", infoDateBegin)
106-
val f2 = StringUtils.replaceFormattedDate(f1, "@dateTo", infoDateEnd)
107-
val f3 = StringUtils.replaceFormattedDate(f2, "@date", infoDateEnd)
108-
val f4 = StringUtils.replaceFormattedDate(f3, "@infoDateBegin", infoDateBegin)
109-
val f5 = StringUtils.replaceFormattedDate(f4, "@infoDateEnd", infoDateEnd)
110-
StringUtils.replaceFormattedDate(f5, "@infoDate", infoDateEnd)
105+
val f1 = StringUtils.replaceFormattedDateExpression(sqlExpression, "dateFrom", infoDateBegin)
106+
val f2 = StringUtils.replaceFormattedDateExpression(f1, "dateTo", infoDateEnd)
107+
val f3 = StringUtils.replaceFormattedDateExpression(f2, "date", infoDateEnd)
108+
val f4 = StringUtils.replaceFormattedDateExpression(f3, "infoDateBegin", infoDateBegin)
109+
val f5 = StringUtils.replaceFormattedDateExpression(f4, "infoDateEnd", infoDateEnd)
110+
StringUtils.replaceFormattedDateExpression(f5, "infoDate", infoDateEnd)
111111
}
112112
}

pramen/core/src/main/scala/za/co/absa/pramen/core/utils/SparkUtils.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -194,9 +194,9 @@ object SparkUtils {
194194

195195
def applyFilters(df: DataFrame, filters: Seq[String], infoDate: LocalDate, dateFrom: LocalDate, dateTo: LocalDate): DataFrame = {
196196
filters.foldLeft(df)((df, filter) => {
197-
val f1 = StringUtils.replaceFormattedDate(filter, "@dateFrom", dateFrom)
198-
val f2 = StringUtils.replaceFormattedDate(f1, "@dateTo", dateTo)
199-
val f3 = StringUtils.replaceFormattedDate(f2, "@date", infoDate)
197+
val f1 = StringUtils.replaceFormattedDateExpression(filter, "dateFrom", dateFrom)
198+
val f2 = StringUtils.replaceFormattedDateExpression(f1, "dateTo", dateTo)
199+
val f3 = StringUtils.replaceFormattedDateExpression(f2, "date", infoDate)
200200
val actualFilter = f3.replaceAll("@infoDate", s"date'${infoDate.toString}'")
201201

202202
log.info(s"Applying filter: $actualFilter")

pramen/core/src/main/scala/za/co/absa/pramen/core/utils/StringUtils.scala

Lines changed: 74 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -195,9 +195,37 @@ object StringUtils {
195195
base + details
196196
}
197197

198-
def replaceFormattedDate(template: String, dateVar: String, date: LocalDate): String = {
198+
/**
199+
* Replaces a template with date substitution.
200+
*
201+
* For example, given
202+
* {{{
203+
* SELECT * FROM my_table_@date%yyyyMMdd% WHERE a = b
204+
* }}}
205+
* and date is '2022-02-18' the result is:
206+
* {{{
207+
* SELECT * FROM my_table_20220218 WHERE a = b
208+
* }}}
209+
*
210+
* and with date substitution:
211+
* {{{
212+
* SELECT * FROM my_table_@{plusMonths(@date, 1)}%yyyyMMdd% WHERE a = b
213+
* }}}
214+
* the result is
215+
* {{{
216+
* SELECT * FROM my_table_20220318 WHERE a = b
217+
* }}}
218+
*
219+
*
220+
* @param template A template to replace variablesin.
221+
* @param dateVar A variable name for the date (does not include '@') - case sensitive.
222+
* @param date The date to replace the the variable with.
223+
* @return The processed template.
224+
*/
225+
def replaceFormattedDateExpression(template: String, dateVar: String, date: LocalDate): String = {
199226
val output = new StringBuilder()
200227
val outputPartial = new StringBuilder()
228+
val outputExpression = new StringBuilder()
201229
var state = 0
202230
var i = 0
203231
var j = 0
@@ -206,16 +234,26 @@ object StringUtils {
206234
val CATCH_VARIABLE = 1
207235
val END_OF_VARIABLE = 2
208236
val END_OF_FORMAT = 3
237+
val DATE_EXPRESSION = 4
238+
239+
val expr = new DateExprEvaluator
240+
expr.setValue(dateVar, date)
209241

210242
while (i < template.length) {
211243
val c = template(i)
212244
state match {
213245
case STATE_TEMPLATE_AS_IS =>
214-
if (c == dateVar(0)) {
215-
state = CATCH_VARIABLE
216-
j = 1
246+
if (c == '@') {
247+
outputExpression.clear()
217248
outputPartial.clear()
218-
outputPartial.append(s"$c")
249+
if (i < template.length - 2 && template(i + 1) == '{') {
250+
i += 1
251+
state = DATE_EXPRESSION
252+
} else {
253+
state = CATCH_VARIABLE
254+
j = 0
255+
outputPartial.append(s"$c")
256+
}
219257
} else {
220258
output.append(s"$c")
221259
}
@@ -239,22 +277,49 @@ object StringUtils {
239277
state = END_OF_FORMAT
240278
outputPartial.clear()
241279
} else {
242-
output.append(s"$date$c")
280+
if (outputExpression.nonEmpty) {
281+
val calculatedDate = expr.evalDate(outputExpression.toString())
282+
output.append(s"$calculatedDate$c")
283+
} else {
284+
output.append(s"$date$c")
285+
}
243286
state = STATE_TEMPLATE_AS_IS
244287
}
245288
case END_OF_FORMAT =>
246289
if (c == '%') {
247290
state = STATE_TEMPLATE_AS_IS
248-
val formatter = DateTimeFormatter.ofPattern(outputPartial.toString())
249-
output.append(s"${formatter.format(date)}")
291+
if (outputExpression.nonEmpty) {
292+
val calculatedDate = expr.evalDate(outputExpression.toString())
293+
val formatter = DateTimeFormatter.ofPattern(outputPartial.toString())
294+
output.append(s"${formatter.format(calculatedDate)}")
295+
} else {
296+
val formatter = DateTimeFormatter.ofPattern(outputPartial.toString())
297+
output.append(s"${formatter.format(date)}")
298+
}
250299
} else {
251300
outputPartial.append(s"$c")
252301
}
302+
case DATE_EXPRESSION =>
303+
if (c == '}') {
304+
state = END_OF_VARIABLE
305+
if (i == template.length - 1) {
306+
val calculatedDate = expr.evalDate(outputExpression.toString())
307+
output.append(s"$calculatedDate")
308+
}
309+
} else {
310+
outputExpression.append(s"$c")
311+
}
312+
253313
}
254314
i += 1
255315
}
316+
if (state == DATE_EXPRESSION) {
317+
throw new IllegalArgumentException(s"No matching '{' in the date expression: $template")
318+
}
319+
if (state == END_OF_FORMAT) {
320+
throw new IllegalArgumentException(s"No matching '%' in the formatted date expression: $template")
321+
}
256322
output.toString()
257323
}
258324

259-
260325
}

pramen/core/src/test/scala/za/co/absa/pramen/core/tests/utils/StringUtilsSuite.scala

Lines changed: 61 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -227,72 +227,124 @@ class StringUtilsSuite extends AnyWordSpec {
227227
}
228228
}
229229

230-
"replaceFormattedDate" should {
230+
"replaceFormattedDateExpression" should {
231231
val infoDate = LocalDate.of(2022, 2, 18)
232232

233233
"work with normal variables" in {
234234
val template = "SELECT @dat FROM my_table_@date + 1"
235235

236-
val replaced = replaceFormattedDate(template, "@date", infoDate)
236+
val replaced = replaceFormattedDateExpression(template, "date", infoDate)
237237

238238
assert(replaced == "SELECT @dat FROM my_table_2022-02-18 + 1")
239239
}
240240

241241
"work with variables at the end" in {
242242
val template = "SELECT @dat FROM my_table_@date"
243243

244-
val replaced = replaceFormattedDate(template, "@date", infoDate)
244+
val replaced = replaceFormattedDateExpression(template, "date", infoDate)
245245

246246
assert(replaced == "SELECT @dat FROM my_table_2022-02-18")
247247
}
248248

249249
"work with just variables" in {
250250
val template = "@date"
251251

252-
val replaced = replaceFormattedDate(template, "@date", infoDate)
252+
val replaced = replaceFormattedDateExpression(template, "date", infoDate)
253253

254254
assert(replaced == "2022-02-18")
255255
}
256256

257257
"work with 2 variables" in {
258258
val template = "@date @date"
259259

260-
val replaced = replaceFormattedDate(template, "@date", infoDate)
260+
val replaced = replaceFormattedDateExpression(template, "date", infoDate)
261261

262262
assert(replaced == "2022-02-18 2022-02-18")
263263
}
264264

265265
"work with formatted variables" in {
266266
val template = "SELECT * FROM my_table_@date%yyyyMMdd% WHERE a = b"
267267

268-
val replaced = replaceFormattedDate(template, "@date", infoDate)
268+
val replaced = replaceFormattedDateExpression(template, "date", infoDate)
269269

270270
assert(replaced == "SELECT * FROM my_table_20220218 WHERE a = b")
271271
}
272272

273273
"work with just formatted variables" in {
274274
val template = "@date%yyyyMMdd%"
275275

276-
val replaced = replaceFormattedDate(template, "@date", infoDate)
276+
val replaced = replaceFormattedDateExpression(template, "date", infoDate)
277277

278278
assert(replaced == "20220218")
279279
}
280280

281281
"work with 2 formatted variables" in {
282282
val template = "@date%yyyyMMdd%@date%ddMMyyy%"
283283

284-
val replaced = replaceFormattedDate(template, "@date", infoDate)
284+
val replaced = replaceFormattedDateExpression(template, "date", infoDate)
285285

286286
assert(replaced == "2022021818022022")
287287
}
288288

289289
"work with partial formatter" in {
290290
val template = "@date%yyyyMM%"
291291

292-
val replaced = replaceFormattedDate(template, "@date", infoDate)
292+
val replaced = replaceFormattedDateExpression(template, "date", infoDate)
293293

294294
assert(replaced == "202202")
295295
}
296+
297+
"work with expressions" in {
298+
val template = "my_table_@{@date + 1}"
299+
300+
val replaced = replaceFormattedDateExpression(template, "date", infoDate)
301+
302+
assert(replaced == "my_table_2022-02-19")
303+
}
304+
305+
"work with formatted expressions" in {
306+
val template = "my_table_@{@date + 1}%yyyyMM%"
307+
308+
val replaced = replaceFormattedDateExpression(template, "date", infoDate)
309+
310+
assert(replaced == "my_table_202202")
311+
}
312+
313+
"work with formatted expressions 2" in {
314+
val template = "SELECT * FROM my_table_@{plusMonths(@date, 1)}%yyyyMMdd% WHERE a = b"
315+
316+
val replaced = replaceFormattedDateExpression(template, "date", infoDate)
317+
318+
assert(replaced == "SELECT * FROM my_table_20220318 WHERE a = b")
319+
}
320+
321+
"work with formatted expressions 3" in {
322+
val template = "SELECT * FROM my_table WHERE snapshot_date = date'@{beginOfMonth(minusMonths(@infoDate, 1))}'"
323+
324+
val replaced = replaceFormattedDateExpression(template, "infoDate", infoDate)
325+
326+
assert(replaced == "SELECT * FROM my_table WHERE snapshot_date = date'2022-01-01'")
327+
}
328+
329+
"throw an exception if format is incomplete" in {
330+
val template = "SELECT * FROM my_table WHERE snapshot_date = date'@infoDate%yyyy-mm-dd'"
331+
332+
val ex = intercept[IllegalArgumentException] {
333+
replaceFormattedDateExpression(template, "infoDate", infoDate)
334+
}
335+
336+
assert(ex.getMessage.contains("No matching '%' in the formatted date expression: SELECT * FROM my_table WHERE snapshot_date = date'@infoDate%yyyy-mm-dd'"))
337+
}
338+
339+
"throw an exception if the expression is incomplete" in {
340+
val template = "SELECT * FROM my_table WHERE snapshot_date = date'@{beginOfMonth(minusMonths(@infoDate, 1))'"
341+
342+
val ex = intercept[IllegalArgumentException] {
343+
replaceFormattedDateExpression(template, "infoDate", infoDate)
344+
}
345+
346+
assert(ex.getMessage.contains("No matching '{' in the date expression: SELECT * FROM my_table WHERE snapshot_date = date'@{beginOfMonth(minusMonths(@infoDate, 1))'"))
347+
}
296348
}
297349

298350
}

0 commit comments

Comments
 (0)