Skip to content

Commit 1b76d34

Browse files
committed
#420 Add more test cases.
1 parent 9449d62 commit 1b76d34

File tree

3 files changed

+28
-4
lines changed

3 files changed

+28
-4
lines changed

pramen/core/src/test/scala/za/co/absa/pramen/core/NestedDataFrameFactory.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import org.apache.spark.sql.types._
2020
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
2121

2222
object NestedDataFrameFactory {
23-
private val testCaseSchema = StructType(
23+
val testCaseSchema: StructType = StructType(
2424
Array(
2525
StructField("id", LongType),
2626
StructField("key1", LongType),

pramen/core/src/test/scala/za/co/absa/pramen/core/tests/utils/SparkUtilsSuite.scala

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import org.apache.spark.sql.types._
2222
import org.apache.spark.sql.{DataFrame, Row}
2323
import org.scalatest.wordspec.AnyWordSpec
2424
import za.co.absa.pramen.api.FieldChange._
25+
import za.co.absa.pramen.core.NestedDataFrameFactory
2526
import za.co.absa.pramen.core.base.SparkTestBase
2627
import za.co.absa.pramen.core.fixtures.{TempDirFixture, TextComparisonFixture}
2728
import za.co.absa.pramen.core.pipeline.TransformExpression
@@ -643,7 +644,8 @@ class SparkUtilsSuite extends AnyWordSpec with SparkTestBase with TempDirFixture
643644
}
644645

645646
"work with decimals and nested arrays of struct" in {
646-
val expectedDDL = "`id` BIGINT COMMENT 'This is my table',`legs` ARRAY<STRUCT<`conditions`: ARRAY<STRUCT<`amount`: DECIMAL(18,4), `checks`: ARRAY<STRUCT<`checkNums`: ARRAY<STRING> COMMENT 'decimal(10, 12)'>>>>, `legid`: BIGINT COMMENT 'This is a \\'test\\': long'>>"
647+
val expectedDDLWithNestedComments = "`id` BIGINT COMMENT 'This is my table',`legs` ARRAY<STRUCT<`conditions`: ARRAY<STRUCT<`amount`: DECIMAL(18,4), `checks`: ARRAY<STRUCT<`checkNums`: ARRAY<STRING> COMMENT 'decimal(10, 12)'>>>>, `legid`: BIGINT COMMENT 'This is a \\'test\\': long'>>"
648+
val expectedDDLWithoutNestedComments = "`id` BIGINT COMMENT 'This is my table',`legs` ARRAY<STRUCT<`conditions`: ARRAY<STRUCT<`amount`: DECIMAL(18,4), `checks`: ARRAY<STRUCT<`checkNums`: ARRAY<STRING>>>>>, `legid`: BIGINT>>"
647649

648650
val comment1 = new MetadataBuilder().putString("comment", "This is my table").build()
649651
val comment2 = new MetadataBuilder().putString("comment", "decimal(10, 12)").build()
@@ -660,7 +662,12 @@ class SparkUtilsSuite extends AnyWordSpec with SparkTestBase with TempDirFixture
660662

661663
val actualDDL = escapeColumnsSparkDDL(schema.toDDL)
662664

663-
assert(actualDDL == expectedDDL)
665+
// Depends on the version of Spark
666+
if (actualDDL.contains("ARRAY<STRING> COMMENT")) {
667+
assert(actualDDL == expectedDDLWithNestedComments)
668+
} else {
669+
assert(actualDDL == expectedDDLWithoutNestedComments)
670+
}
664671
}
665672

666673
"work with decimals and nested arrays of struct when the input is not escaped" in {
@@ -671,6 +678,23 @@ class SparkUtilsSuite extends AnyWordSpec with SparkTestBase with TempDirFixture
671678

672679
assert(actualDDL == expectedDDL)
673680
}
681+
682+
"work with another complex example" in {
683+
val expectedDDL = "`id` BIGINT,`key1` BIGINT,`key2` BIGINT,`struct1` STRUCT<`key3`: INT, `key4`: INT>,`struct2` STRUCT<`inner1`: STRUCT<`key5`: BIGINT, `key6`: BIGINT, `skey1`: STRING>>,`struct3` STRUCT<`inner3`: STRUCT<`array3`: ARRAY<STRUCT<`a1`: BIGINT, `a2`: BIGINT, `a3`: STRING>>>>,`array1` ARRAY<STRUCT<`key7`: BIGINT, `key8`: BIGINT, `skey2`: STRING>>,`array2` ARRAY<STRUCT<`key2`: BIGINT, `inner2`: ARRAY<STRUCT<`key9`: BIGINT, `key10`: BIGINT, `struct3`: STRUCT<`k1`: INT, `k2`: INT>>>>>"
684+
685+
val actualDDL = escapeColumnsSparkDDL(NestedDataFrameFactory.testCaseSchema.toDDL)
686+
687+
assert(actualDDL == expectedDDL)
688+
}
689+
690+
"work with another complex example (unescaped)" in {
691+
val inputDDL = "id BIGINT, key1 BIGINT, key2 BIGINT, struct1 STRUCT<key3: INT, key4: INT>, struct2 STRUCT< inner1: STRUCT<key5: BIGINT, key6: BIGINT, skey1: STRING>>, struct3 STRUCT<inner3: STRUCT<array3: ARRAY<STRUCT<a1: BIGINT, a2: BIGINT, a3: STRING>>>>,array1 ARRAY<STRUCT<key7: BIGINT, key8: BIGINT, skey2: STRING>>,array2 ARRAY<STRUCT<key2: BIGINT, inner2: ARRAY<STRUCT<key9: BIGINT, `key10`: BIGINT, struct3: STRUCT<k1: INT, k2: INT>>>>>"
692+
val expectedDDL = "`id` BIGINT, `key1` BIGINT, `key2` BIGINT, `struct1` STRUCT<`key3`: INT, `key4`: INT>, `struct2` STRUCT< `inner1`: STRUCT<`key5`: BIGINT, `key6`: BIGINT, `skey1`: STRING>>, `struct3` STRUCT<`inner3`: STRUCT<`array3`: ARRAY<STRUCT<`a1`: BIGINT, `a2`: BIGINT, `a3`: STRING>>>>,`array1` ARRAY<STRUCT<`key7`: BIGINT, `key8`: BIGINT, `skey2`: STRING>>,`array2` ARRAY<STRUCT<`key2`: BIGINT, `inner2`: ARRAY<STRUCT<`key9`: BIGINT, `key10`: BIGINT, `struct3`: STRUCT<`k1`: INT, `k2`: INT>>>>>"
693+
694+
val actualDDL = escapeColumnsSparkDDL(inputDDL)
695+
696+
assert(actualDDL == expectedDDL)
697+
}
674698
}
675699

676700
}

pramen/extras/src/test/scala/za/co/absa/pramen/extras/tests/avro/AvroUtilsSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ package za.co.absa.pramen.extras.tests.avro
1818

1919
import org.apache.spark.sql.functions.struct
2020
import org.scalatest.wordspec.AnyWordSpec
21-
import za.co.absa.pramen.NestedDataFrameFactory
21+
import za.co.absa.pramen.extras.NestedDataFrameFactory
2222
import za.co.absa.pramen.extras.utils.ResourceUtils.getResourceString
2323
import za.co.absa.pramen.extras.utils.JsonUtils
2424
import za.co.absa.pramen.extras.avro.AvroUtils

0 commit comments

Comments
 (0)