@@ -22,6 +22,7 @@ import org.apache.spark.sql.types._
2222import org .apache .spark .sql .{DataFrame , Row }
2323import org .scalatest .wordspec .AnyWordSpec
2424import za .co .absa .pramen .api .FieldChange ._
25+ import za .co .absa .pramen .core .NestedDataFrameFactory
2526import za .co .absa .pramen .core .base .SparkTestBase
2627import za .co .absa .pramen .core .fixtures .{TempDirFixture , TextComparisonFixture }
2728import za .co .absa .pramen .core .pipeline .TransformExpression
@@ -643,7 +644,8 @@ class SparkUtilsSuite extends AnyWordSpec with SparkTestBase with TempDirFixture
643644 }
644645
645646 " work with decimals and nested arrays of struct" in {
646- val expectedDDL = " `id` BIGINT COMMENT 'This is my table',`legs` ARRAY<STRUCT<`conditions`: ARRAY<STRUCT<`amount`: DECIMAL(18,4), `checks`: ARRAY<STRUCT<`checkNums`: ARRAY<STRING> COMMENT 'decimal(10, 12)'>>>>, `legid`: BIGINT COMMENT 'This is a \\ 'test\\ ': long'>>"
647+ val expectedDDLWithNestedComments = " `id` BIGINT COMMENT 'This is my table',`legs` ARRAY<STRUCT<`conditions`: ARRAY<STRUCT<`amount`: DECIMAL(18,4), `checks`: ARRAY<STRUCT<`checkNums`: ARRAY<STRING> COMMENT 'decimal(10, 12)'>>>>, `legid`: BIGINT COMMENT 'This is a \\ 'test\\ ': long'>>"
648+ val expectedDDLWithoutNestedComments = " `id` BIGINT COMMENT 'This is my table',`legs` ARRAY<STRUCT<`conditions`: ARRAY<STRUCT<`amount`: DECIMAL(18,4), `checks`: ARRAY<STRUCT<`checkNums`: ARRAY<STRING>>>>>, `legid`: BIGINT>>"
647649
648650 val comment1 = new MetadataBuilder ().putString(" comment" , " This is my table" ).build()
649651 val comment2 = new MetadataBuilder ().putString(" comment" , " decimal(10, 12)" ).build()
@@ -660,7 +662,12 @@ class SparkUtilsSuite extends AnyWordSpec with SparkTestBase with TempDirFixture
660662
661663 val actualDDL = escapeColumnsSparkDDL(schema.toDDL)
662664
663- assert(actualDDL == expectedDDL)
665+ // Depends on the version of Spark
666+ if (actualDDL.contains(" ARRAY<STRING> COMMENT" )) {
667+ assert(actualDDL == expectedDDLWithNestedComments)
668+ } else {
669+ assert(actualDDL == expectedDDLWithoutNestedComments)
670+ }
664671 }
665672
666673 " work with decimals and nested arrays of struct when the input is not escaped" in {
@@ -671,6 +678,23 @@ class SparkUtilsSuite extends AnyWordSpec with SparkTestBase with TempDirFixture
671678
672679 assert(actualDDL == expectedDDL)
673680 }
681+
682+ " work with another complex example" in {
683+ val expectedDDL = " `id` BIGINT,`key1` BIGINT,`key2` BIGINT,`struct1` STRUCT<`key3`: INT, `key4`: INT>,`struct2` STRUCT<`inner1`: STRUCT<`key5`: BIGINT, `key6`: BIGINT, `skey1`: STRING>>,`struct3` STRUCT<`inner3`: STRUCT<`array3`: ARRAY<STRUCT<`a1`: BIGINT, `a2`: BIGINT, `a3`: STRING>>>>,`array1` ARRAY<STRUCT<`key7`: BIGINT, `key8`: BIGINT, `skey2`: STRING>>,`array2` ARRAY<STRUCT<`key2`: BIGINT, `inner2`: ARRAY<STRUCT<`key9`: BIGINT, `key10`: BIGINT, `struct3`: STRUCT<`k1`: INT, `k2`: INT>>>>>"
684+
685+ val actualDDL = escapeColumnsSparkDDL(NestedDataFrameFactory .testCaseSchema.toDDL)
686+
687+ assert(actualDDL == expectedDDL)
688+ }
689+
690+ " work with another complex example (unescaped)" in {
691+ val inputDDL = " id BIGINT, key1 BIGINT, key2 BIGINT, struct1 STRUCT<key3: INT, key4: INT>, struct2 STRUCT< inner1: STRUCT<key5: BIGINT, key6: BIGINT, skey1: STRING>>, struct3 STRUCT<inner3: STRUCT<array3: ARRAY<STRUCT<a1: BIGINT, a2: BIGINT, a3: STRING>>>>,array1 ARRAY<STRUCT<key7: BIGINT, key8: BIGINT, skey2: STRING>>,array2 ARRAY<STRUCT<key2: BIGINT, inner2: ARRAY<STRUCT<key9: BIGINT, `key10`: BIGINT, struct3: STRUCT<k1: INT, k2: INT>>>>>"
692+ val expectedDDL = " `id` BIGINT, `key1` BIGINT, `key2` BIGINT, `struct1` STRUCT<`key3`: INT, `key4`: INT>, `struct2` STRUCT< `inner1`: STRUCT<`key5`: BIGINT, `key6`: BIGINT, `skey1`: STRING>>, `struct3` STRUCT<`inner3`: STRUCT<`array3`: ARRAY<STRUCT<`a1`: BIGINT, `a2`: BIGINT, `a3`: STRING>>>>,`array1` ARRAY<STRUCT<`key7`: BIGINT, `key8`: BIGINT, `skey2`: STRING>>,`array2` ARRAY<STRUCT<`key2`: BIGINT, `inner2`: ARRAY<STRUCT<`key9`: BIGINT, `key10`: BIGINT, `struct3`: STRUCT<`k1`: INT, `k2`: INT>>>>>"
693+
694+ val actualDDL = escapeColumnsSparkDDL(inputDDL)
695+
696+ assert(actualDDL == expectedDDL)
697+ }
674698 }
675699
676700}
0 commit comments