Skip to content

Commit cb68ec6

Browse files
committed
#501 Fix Jdbc Native treating of nullable fields.
1 parent 0c2e6a5 commit cb68ec6

File tree

7 files changed

+81
-53
lines changed

7 files changed

+81
-53
lines changed

pramen/core/src/main/scala/za/co/absa/pramen/core/utils/impl/ResultSetToRowIterator.scala

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -112,13 +112,27 @@ class ResultSetToRowIterator(rs: ResultSet, sanitizeDateTime: Boolean, incorrect
112112

113113
// WARNING. Do not forget that `null` is a valid value returned by RecordSet methods that return a reference type objects.
114114
dataType match {
115-
case BIT | BOOLEAN => rs.getBoolean(columnIndex)
116-
case TINYINT => rs.getByte(columnIndex)
117-
case SMALLINT => rs.getShort(columnIndex)
118-
case INTEGER => rs.getInt(columnIndex)
119-
case BIGINT => rs.getLong(columnIndex)
120-
case FLOAT => rs.getFloat(columnIndex)
121-
case DOUBLE => rs.getDouble(columnIndex)
115+
case BIT | BOOLEAN =>
116+
val v = rs.getBoolean(columnIndex)
117+
if (rs.wasNull()) null else v
118+
case TINYINT =>
119+
val v = rs.getByte(columnIndex)
120+
if (rs.wasNull()) null else v
121+
case SMALLINT =>
122+
val v = rs.getShort(columnIndex)
123+
if (rs.wasNull()) null else v
124+
case INTEGER =>
125+
val v = rs.getInt(columnIndex)
126+
if (rs.wasNull()) null else v
127+
case BIGINT =>
128+
val v = rs.getLong(columnIndex)
129+
if (rs.wasNull()) null else v
130+
case FLOAT =>
131+
val v = rs.getFloat(columnIndex)
132+
if (rs.wasNull()) null else v
133+
case DOUBLE =>
134+
val v = rs.getDouble(columnIndex)
135+
if (rs.wasNull()) null else v
122136
case REAL => rs.getBigDecimal(columnIndex)
123137
case NUMERIC => rs.getBigDecimal(columnIndex)
124138
case DATE => sanitizeDate(rs.getDate(columnIndex))

pramen/core/src/test/scala/za/co/absa/pramen/core/pipeline/IngestionJobSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ class IngestionJobSuite extends AnyWordSpec with SparkTestBase with TextComparis
278278
assert(df.schema.fields(2).name == "DESCRIPTION")
279279
assert(df.schema.fields(3).name == "EMAIL")
280280
assert(df.schema.fields(4).name == "FOUNDED")
281-
assert(df.schema.fields(5).name == "LAST_UPDATED")
281+
assert(df.schema.fields(5).name == "IS_TAX_FREE")
282282
}
283283

284284
"get the source data frame for source with disabled count query" in {
@@ -301,7 +301,7 @@ class IngestionJobSuite extends AnyWordSpec with SparkTestBase with TextComparis
301301
assert(df.schema.fields(2).name == "DESCRIPTION")
302302
assert(df.schema.fields(3).name == "EMAIL")
303303
assert(df.schema.fields(4).name == "FOUNDED")
304-
assert(df.schema.fields(5).name == "LAST_UPDATED")
304+
assert(df.schema.fields(5).name == "IS_TAX_FREE")
305305

306306
TransientTableManager.reset()
307307
}

pramen/core/src/test/scala/za/co/absa/pramen/core/samples/RdbExampleTable.scala

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ object RdbExampleTable {
6464
| description VARCHAR NOT NULL,
6565
| email VARCHAR(50) NOT NULL,
6666
| founded DATE NOT NULL,
67+
| is_tax_free BOOLEAN,
68+
| tax_id BIGINT,
6769
| last_updated TIMESTAMP NOT NULL,
6870
| info_date VARCHAR(10) NOT NULL,
6971
| PRIMARY KEY (id))
@@ -75,10 +77,10 @@ object RdbExampleTable {
7577
)
7678

7779
val inserts: Seq[String] = Seq(
78-
s"INSERT INTO $tableName VALUES (1,'Company1', 'description1', '[email protected]', DATE '2000-10-11', TIMESTAMP '2020-11-04 10:11:00+02:00', '2022-02-18')",
79-
s"INSERT INTO $tableName VALUES (2,'Company2', 'description2', '[email protected]', DATE '2005-03-29', TIMESTAMP '2020-11-04 10:22:33+02:00', '2022-02-18')",
80-
s"INSERT INTO $tableName VALUES (3,'Company3', 'description3', '[email protected]', DATE '2016-12-30', TIMESTAMP '2020-11-04 10:33:59+02:00', '2022-02-18')",
81-
s"INSERT INTO $tableName VALUES (4,'Company4', 'description4', '[email protected]', DATE '2016-12-31', TIMESTAMP '2020-11-04 10:34:22+02:00', '2022-02-19')"
80+
s"INSERT INTO $tableName VALUES (1,'Company1', 'description1', '[email protected]', DATE '2000-10-11', FALSE, 123, TIMESTAMP '2020-11-04 10:11:00+02:00', '2022-02-18')",
81+
s"INSERT INTO $tableName VALUES (2,'Company2', 'description2', '[email protected]', DATE '2005-03-29', TRUE, 456, TIMESTAMP '2020-11-04 10:22:33+02:00', '2022-02-18')",
82+
s"INSERT INTO $tableName VALUES (3,'Company3', 'description3', '[email protected]', DATE '2016-12-30', FALSE, NULL, TIMESTAMP '2020-11-04 10:33:59+02:00', '2022-02-18')",
83+
s"INSERT INTO $tableName VALUES (4,'Company4', 'description4', '[email protected]', DATE '2016-12-31', NULL, NULL, TIMESTAMP '2020-11-04 10:34:22+02:00', '2022-02-19')"
8284
)
8385
}
8486

pramen/core/src/test/scala/za/co/absa/pramen/core/tests/reader/TableReaderJdbcSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ class TableReaderJdbcSuite extends AnyWordSpec with BeforeAndAfterAll with Spark
386386
val df = reader.getData(Query.Table("company"), null, null, Seq.empty[String])
387387

388388
assert(df.count() == 4)
389-
assert(df.schema.fields.length == 7)
389+
assert(df.schema.fields.length == 9)
390390
}
391391

392392
"return selected column for a table snapshot-like query" in {

pramen/core/src/test/scala/za/co/absa/pramen/core/tests/sql/SqlGeneratorLoaderSuite.scala

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -711,15 +711,15 @@ class SqlGeneratorLoaderSuite extends AnyWordSpec with RelationalDbFixture {
711711
}
712712

713713
"generate data queries without date ranges" in {
714-
assert(gen.getDataQuery("company", Nil, None) == "SELECT ID 'ID', NAME 'NAME', DESCRIPTION 'DESCRIPTION', EMAIL 'EMAIL', FOUNDED 'FOUNDED', LAST_UPDATED 'LAST_UPDATED', INFO_DATE 'INFO_DATE' FROM company")
714+
assert(gen.getDataQuery("company", Nil, None) == "SELECT ID 'ID', NAME 'NAME', DESCRIPTION 'DESCRIPTION', EMAIL 'EMAIL', FOUNDED 'FOUNDED', IS_TAX_FREE 'IS_TAX_FREE', TAX_ID 'TAX_ID', LAST_UPDATED 'LAST_UPDATED', INFO_DATE 'INFO_DATE' FROM company")
715715
}
716716

717717
"generate data queries when list of columns is specified" in {
718718
assert(genEscaped.getDataQuery("company", columns, None) == "SELECT 'A'n, 'D'n, 'Column with spaces'n FROM 'company'n")
719719
}
720720

721721
"generate data queries with limit clause date ranges" in {
722-
assert(gen.getDataQuery("company", Nil, Some(100)) == "SELECT ID 'ID', NAME 'NAME', DESCRIPTION 'DESCRIPTION', EMAIL 'EMAIL', FOUNDED 'FOUNDED', LAST_UPDATED 'LAST_UPDATED', INFO_DATE 'INFO_DATE' FROM company LIMIT 100")
722+
assert(gen.getDataQuery("company", Nil, Some(100)) == "SELECT ID 'ID', NAME 'NAME', DESCRIPTION 'DESCRIPTION', EMAIL 'EMAIL', FOUNDED 'FOUNDED', IS_TAX_FREE 'IS_TAX_FREE', TAX_ID 'TAX_ID', LAST_UPDATED 'LAST_UPDATED', INFO_DATE 'INFO_DATE' FROM company LIMIT 100")
723723
}
724724

725725
"generate ranged count queries" when {
@@ -761,30 +761,30 @@ class SqlGeneratorLoaderSuite extends AnyWordSpec with RelationalDbFixture {
761761
"generate ranged data queries" when {
762762
"date is in DATE format" in {
763763
assert(gen.getDataQuery("company", date1, date1, Nil, None) ==
764-
"SELECT ID 'ID', NAME 'NAME', DESCRIPTION 'DESCRIPTION', EMAIL 'EMAIL', FOUNDED 'FOUNDED', LAST_UPDATED 'LAST_UPDATED', INFO_DATE 'INFO_DATE' FROM company WHERE D = date'2020-08-17'")
764+
"SELECT ID 'ID', NAME 'NAME', DESCRIPTION 'DESCRIPTION', EMAIL 'EMAIL', FOUNDED 'FOUNDED', IS_TAX_FREE 'IS_TAX_FREE', TAX_ID 'TAX_ID', LAST_UPDATED 'LAST_UPDATED', INFO_DATE 'INFO_DATE' FROM company WHERE D = date'2020-08-17'")
765765
assert(gen.getDataQuery("company", date1, date2, Nil, None) ==
766-
"SELECT ID 'ID', NAME 'NAME', DESCRIPTION 'DESCRIPTION', EMAIL 'EMAIL', FOUNDED 'FOUNDED', LAST_UPDATED 'LAST_UPDATED', INFO_DATE 'INFO_DATE' FROM company WHERE D >= date'2020-08-17' AND D <= date'2020-08-30'")
766+
"SELECT ID 'ID', NAME 'NAME', DESCRIPTION 'DESCRIPTION', EMAIL 'EMAIL', FOUNDED 'FOUNDED', IS_TAX_FREE 'IS_TAX_FREE', TAX_ID 'TAX_ID', LAST_UPDATED 'LAST_UPDATED', INFO_DATE 'INFO_DATE' FROM company WHERE D >= date'2020-08-17' AND D <= date'2020-08-30'")
767767
}
768768

769769
"date is in STRING format" in {
770770
assert(genStr.getDataQuery("company", date1, date1, Nil, None) ==
771-
"SELECT ID 'ID', NAME 'NAME', DESCRIPTION 'DESCRIPTION', EMAIL 'EMAIL', FOUNDED 'FOUNDED', LAST_UPDATED 'LAST_UPDATED', INFO_DATE 'INFO_DATE' FROM company WHERE D = '2020-08-17'")
771+
"SELECT ID 'ID', NAME 'NAME', DESCRIPTION 'DESCRIPTION', EMAIL 'EMAIL', FOUNDED 'FOUNDED', IS_TAX_FREE 'IS_TAX_FREE', TAX_ID 'TAX_ID', LAST_UPDATED 'LAST_UPDATED', INFO_DATE 'INFO_DATE' FROM company WHERE D = '2020-08-17'")
772772
assert(genStr.getDataQuery("company", date1, date2, Nil, None) ==
773-
"SELECT ID 'ID', NAME 'NAME', DESCRIPTION 'DESCRIPTION', EMAIL 'EMAIL', FOUNDED 'FOUNDED', LAST_UPDATED 'LAST_UPDATED', INFO_DATE 'INFO_DATE' FROM company WHERE D >= '2020-08-17' AND D <= '2020-08-30'")
773+
"SELECT ID 'ID', NAME 'NAME', DESCRIPTION 'DESCRIPTION', EMAIL 'EMAIL', FOUNDED 'FOUNDED', IS_TAX_FREE 'IS_TAX_FREE', TAX_ID 'TAX_ID', LAST_UPDATED 'LAST_UPDATED', INFO_DATE 'INFO_DATE' FROM company WHERE D >= '2020-08-17' AND D <= '2020-08-30'")
774774
}
775775

776776
"date is in NUMBER format" in {
777777
assert(genNum.getDataQuery("company", date1, date1, Nil, None) ==
778-
"SELECT ID 'ID', NAME 'NAME', DESCRIPTION 'DESCRIPTION', EMAIL 'EMAIL', FOUNDED 'FOUNDED', LAST_UPDATED 'LAST_UPDATED', INFO_DATE 'INFO_DATE' FROM company WHERE D = 20200817")
778+
"SELECT ID 'ID', NAME 'NAME', DESCRIPTION 'DESCRIPTION', EMAIL 'EMAIL', FOUNDED 'FOUNDED', IS_TAX_FREE 'IS_TAX_FREE', TAX_ID 'TAX_ID', LAST_UPDATED 'LAST_UPDATED', INFO_DATE 'INFO_DATE' FROM company WHERE D = 20200817")
779779
assert(genNum.getDataQuery("company", date1, date2, Nil, None) ==
780-
"SELECT ID 'ID', NAME 'NAME', DESCRIPTION 'DESCRIPTION', EMAIL 'EMAIL', FOUNDED 'FOUNDED', LAST_UPDATED 'LAST_UPDATED', INFO_DATE 'INFO_DATE' FROM company WHERE D >= 20200817 AND D <= 20200830")
780+
"SELECT ID 'ID', NAME 'NAME', DESCRIPTION 'DESCRIPTION', EMAIL 'EMAIL', FOUNDED 'FOUNDED', IS_TAX_FREE 'IS_TAX_FREE', TAX_ID 'TAX_ID', LAST_UPDATED 'LAST_UPDATED', INFO_DATE 'INFO_DATE' FROM company WHERE D >= 20200817 AND D <= 20200830")
781781
}
782782

783783
"with limit records" in {
784784
assert(gen.getDataQuery("company", date1, date1, Nil, Some(100)) ==
785-
"SELECT ID 'ID', NAME 'NAME', DESCRIPTION 'DESCRIPTION', EMAIL 'EMAIL', FOUNDED 'FOUNDED', LAST_UPDATED 'LAST_UPDATED', INFO_DATE 'INFO_DATE' FROM company WHERE D = date'2020-08-17' LIMIT 100")
785+
"SELECT ID 'ID', NAME 'NAME', DESCRIPTION 'DESCRIPTION', EMAIL 'EMAIL', FOUNDED 'FOUNDED', IS_TAX_FREE 'IS_TAX_FREE', TAX_ID 'TAX_ID', LAST_UPDATED 'LAST_UPDATED', INFO_DATE 'INFO_DATE' FROM company WHERE D = date'2020-08-17' LIMIT 100")
786786
assert(gen.getDataQuery("company", date1, date2, Nil, Some(100)) ==
787-
"SELECT ID 'ID', NAME 'NAME', DESCRIPTION 'DESCRIPTION', EMAIL 'EMAIL', FOUNDED 'FOUNDED', LAST_UPDATED 'LAST_UPDATED', INFO_DATE 'INFO_DATE' FROM company WHERE D >= date'2020-08-17' AND D <= date'2020-08-30' LIMIT 100")
787+
"SELECT ID 'ID', NAME 'NAME', DESCRIPTION 'DESCRIPTION', EMAIL 'EMAIL', FOUNDED 'FOUNDED', IS_TAX_FREE 'IS_TAX_FREE', TAX_ID 'TAX_ID', LAST_UPDATED 'LAST_UPDATED', INFO_DATE 'INFO_DATE' FROM company WHERE D >= date'2020-08-17' AND D <= date'2020-08-30' LIMIT 100")
788788
}
789789
}
790790

pramen/core/src/test/scala/za/co/absa/pramen/core/tests/utils/JdbcNativeUtilsSuite.scala

Lines changed: 41 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,16 @@ class JdbcNativeUtilsSuite extends AnyWordSpec with RelationalDbFixture with Spa
138138
| "nullable" : true,
139139
| "metadata" : { }
140140
| }, {
141+
| "name" : "IS_TAX_FREE",
142+
| "type" : "boolean",
143+
| "nullable" : true,
144+
| "metadata" : { }
145+
| }, {
146+
| "name" : "TAX_ID",
147+
| "type" : "long",
148+
| "nullable" : true,
149+
| "metadata" : { }
150+
| }, {
141151
| "name" : "LAST_UPDATED",
142152
| "type" : "timestamp",
143153
| "nullable" : true,
@@ -163,25 +173,30 @@ class JdbcNativeUtilsSuite extends AnyWordSpec with RelationalDbFixture with Spa
163173
| "ID" : 1,
164174
| "NAME" : "Company1",
165175
| "EMAIL" : "[email protected]",
166-
| "FOUNDED" : "2000-10-11"
176+
| "FOUNDED" : "2000-10-11",
177+
| "IS_TAX_FREE" : false,
178+
| "TAX_ID" : 123
167179
|}, {
168180
| "ID" : 2,
169181
| "NAME" : "Company2",
170182
| "EMAIL" : "[email protected]",
171-
| "FOUNDED" : "2005-03-29"
183+
| "FOUNDED" : "2005-03-29",
184+
| "IS_TAX_FREE" : true,
185+
| "TAX_ID" : 456
172186
|}, {
173187
| "ID" : 3,
174188
| "NAME" : "Company3",
175189
| "EMAIL" : "[email protected]",
176-
| "FOUNDED" : "2016-12-30"
190+
| "FOUNDED" : "2016-12-30",
191+
| "IS_TAX_FREE" : false
177192
|}, {
178193
| "ID" : 4,
179194
| "NAME" : "Company4",
180195
| "EMAIL" : "[email protected]",
181196
| "FOUNDED" : "2016-12-31"
182197
|} ]""".stripMargin
183198

184-
val df = JdbcNativeUtils.getJdbcNativeDataFrame(jdbcConfig, jdbcConfig.primaryUrl.get, s"SELECT id, name, email, founded FROM $tableName")
199+
val df = JdbcNativeUtils.getJdbcNativeDataFrame(jdbcConfig, jdbcConfig.primaryUrl.get, s"SELECT id, name, email, founded, is_tax_free, tax_id FROM $tableName")
185200
val actual = SparkUtils.convertDataFrameToPrettyJSON(df)
186201

187202
compareText(actual, expected)
@@ -236,35 +251,35 @@ class JdbcNativeUtilsSuite extends AnyWordSpec with RelationalDbFixture with Spa
236251
}
237252

238253
"getDecimalDataType" should {
239-
val resultSet = mock(classOf[ResultSet])
240-
val resultSetMetaData = mock(classOf[ResultSetMetaData])
254+
val resultSet = mock(classOf[ResultSet])
255+
val resultSetMetaData = mock(classOf[ResultSetMetaData])
241256

242-
when(resultSetMetaData.getColumnCount).thenReturn(1)
243-
when(resultSet.getMetaData).thenReturn(resultSetMetaData)
257+
when(resultSetMetaData.getColumnCount).thenReturn(1)
258+
when(resultSet.getMetaData).thenReturn(resultSetMetaData)
244259

245-
"return normal decimal for correct precision and scale" in {
246-
val iterator = new ResultSetToRowIterator(resultSet, true, incorrectDecimalsAsString = false)
247-
when(resultSetMetaData.getPrecision(0)).thenReturn(10)
248-
when(resultSetMetaData.getScale(0)).thenReturn(2)
260+
"return normal decimal for correct precision and scale" in {
261+
val iterator = new ResultSetToRowIterator(resultSet, true, incorrectDecimalsAsString = false)
262+
when(resultSetMetaData.getPrecision(0)).thenReturn(10)
263+
when(resultSetMetaData.getScale(0)).thenReturn(2)
249264

250-
assert(iterator.getDecimalDataType(0) == NUMERIC)
251-
}
265+
assert(iterator.getDecimalDataType(0) == NUMERIC)
266+
}
252267

253-
"return fixed decimal for incorrect precision and scale" in {
254-
val iterator = new ResultSetToRowIterator(resultSet, true, incorrectDecimalsAsString = false)
255-
when(resultSetMetaData.getPrecision(0)).thenReturn(0)
256-
when(resultSetMetaData.getScale(0)).thenReturn(2)
268+
"return fixed decimal for incorrect precision and scale" in {
269+
val iterator = new ResultSetToRowIterator(resultSet, true, incorrectDecimalsAsString = false)
270+
when(resultSetMetaData.getPrecision(0)).thenReturn(0)
271+
when(resultSetMetaData.getScale(0)).thenReturn(2)
257272

258-
assert(iterator.getDecimalDataType(0) == NUMERIC)
259-
}
273+
assert(iterator.getDecimalDataType(0) == NUMERIC)
274+
}
260275

261-
"return string type for incorrect precision and scale" in {
262-
val iterator = new ResultSetToRowIterator(resultSet, true, incorrectDecimalsAsString = true)
263-
when(resultSetMetaData.getPrecision(0)).thenReturn(0)
264-
when(resultSetMetaData.getScale(0)).thenReturn(2)
276+
"return string type for incorrect precision and scale" in {
277+
val iterator = new ResultSetToRowIterator(resultSet, true, incorrectDecimalsAsString = true)
278+
when(resultSetMetaData.getPrecision(0)).thenReturn(0)
279+
when(resultSetMetaData.getScale(0)).thenReturn(2)
265280

266-
assert(iterator.getDecimalDataType(0) == VARCHAR)
267-
}
281+
assert(iterator.getDecimalDataType(0) == VARCHAR)
282+
}
268283
}
269284

270285
"sanitizeDateTime" when {

pramen/core/src/test/scala/za/co/absa/pramen/core/tests/utils/SparkUtilsSuite.scala

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -263,9 +263,6 @@ class SparkUtilsSuite extends AnyWordSpec with SparkTestBase with TempDirFixture
263263
val newField2 = schema1Orig.fields.head.copy(metadata = metadata2)
264264
val schema2 = schema1Orig.copy(fields = newField2 +: schema1Orig.fields.tail)
265265

266-
println(schema1.prettyJson)
267-
println(schema2.prettyJson)
268-
269266
val diff = compareSchemas(schema1, schema2)
270267

271268
assert(diff.length == 1)

0 commit comments

Comments
 (0)