@@ -22,6 +22,7 @@ import org.apache.spark.sql.types._
22
22
import org .apache .spark .sql .{DataFrame , Row }
23
23
import org .scalatest .wordspec .AnyWordSpec
24
24
import za .co .absa .pramen .api .FieldChange ._
25
+ import za .co .absa .pramen .core .NestedDataFrameFactory
25
26
import za .co .absa .pramen .core .base .SparkTestBase
26
27
import za .co .absa .pramen .core .fixtures .{TempDirFixture , TextComparisonFixture }
27
28
import za .co .absa .pramen .core .pipeline .TransformExpression
@@ -643,7 +644,8 @@ class SparkUtilsSuite extends AnyWordSpec with SparkTestBase with TempDirFixture
643
644
}
644
645
645
646
" work with decimals and nested arrays of struct" in {
646
- val expectedDDL = " `id` BIGINT COMMENT 'This is my table',`legs` ARRAY<STRUCT<`conditions`: ARRAY<STRUCT<`amount`: DECIMAL(18,4), `checks`: ARRAY<STRUCT<`checkNums`: ARRAY<STRING> COMMENT 'decimal(10, 12)'>>>>, `legid`: BIGINT COMMENT 'This is a \\ 'test\\ ': long'>>"
647
+ val expectedDDLWithNestedComments = " `id` BIGINT COMMENT 'This is my table',`legs` ARRAY<STRUCT<`conditions`: ARRAY<STRUCT<`amount`: DECIMAL(18,4), `checks`: ARRAY<STRUCT<`checkNums`: ARRAY<STRING> COMMENT 'decimal(10, 12)'>>>>, `legid`: BIGINT COMMENT 'This is a \\ 'test\\ ': long'>>"
648
+ val expectedDDLWithoutNestedComments = " `id` BIGINT COMMENT 'This is my table',`legs` ARRAY<STRUCT<`conditions`: ARRAY<STRUCT<`amount`: DECIMAL(18,4), `checks`: ARRAY<STRUCT<`checkNums`: ARRAY<STRING>>>>>, `legid`: BIGINT>>"
647
649
648
650
val comment1 = new MetadataBuilder ().putString(" comment" , " This is my table" ).build()
649
651
val comment2 = new MetadataBuilder ().putString(" comment" , " decimal(10, 12)" ).build()
@@ -660,7 +662,12 @@ class SparkUtilsSuite extends AnyWordSpec with SparkTestBase with TempDirFixture
660
662
661
663
val actualDDL = escapeColumnsSparkDDL(schema.toDDL)
662
664
663
- assert(actualDDL == expectedDDL)
665
+ // Depends on the version of Spark
666
+ if (actualDDL.contains(" ARRAY<STRING> COMMENT" )) {
667
+ assert(actualDDL == expectedDDLWithNestedComments)
668
+ } else {
669
+ assert(actualDDL == expectedDDLWithoutNestedComments)
670
+ }
664
671
}
665
672
666
673
" work with decimals and nested arrays of struct when the input is not escaped" in {
@@ -671,6 +678,23 @@ class SparkUtilsSuite extends AnyWordSpec with SparkTestBase with TempDirFixture
671
678
672
679
assert(actualDDL == expectedDDL)
673
680
}
681
+
682
+ " work with another complex example" in {
683
+ val expectedDDL = " `id` BIGINT,`key1` BIGINT,`key2` BIGINT,`struct1` STRUCT<`key3`: INT, `key4`: INT>,`struct2` STRUCT<`inner1`: STRUCT<`key5`: BIGINT, `key6`: BIGINT, `skey1`: STRING>>,`struct3` STRUCT<`inner3`: STRUCT<`array3`: ARRAY<STRUCT<`a1`: BIGINT, `a2`: BIGINT, `a3`: STRING>>>>,`array1` ARRAY<STRUCT<`key7`: BIGINT, `key8`: BIGINT, `skey2`: STRING>>,`array2` ARRAY<STRUCT<`key2`: BIGINT, `inner2`: ARRAY<STRUCT<`key9`: BIGINT, `key10`: BIGINT, `struct3`: STRUCT<`k1`: INT, `k2`: INT>>>>>"
684
+
685
+ val actualDDL = escapeColumnsSparkDDL(NestedDataFrameFactory .testCaseSchema.toDDL)
686
+
687
+ assert(actualDDL == expectedDDL)
688
+ }
689
+
690
+ " work with another complex example (unescaped)" in {
691
+ val inputDDL = " id BIGINT, key1 BIGINT, key2 BIGINT, struct1 STRUCT<key3: INT, key4: INT>, struct2 STRUCT< inner1: STRUCT<key5: BIGINT, key6: BIGINT, skey1: STRING>>, struct3 STRUCT<inner3: STRUCT<array3: ARRAY<STRUCT<a1: BIGINT, a2: BIGINT, a3: STRING>>>>,array1 ARRAY<STRUCT<key7: BIGINT, key8: BIGINT, skey2: STRING>>,array2 ARRAY<STRUCT<key2: BIGINT, inner2: ARRAY<STRUCT<key9: BIGINT, `key10`: BIGINT, struct3: STRUCT<k1: INT, k2: INT>>>>>"
692
+ val expectedDDL = " `id` BIGINT, `key1` BIGINT, `key2` BIGINT, `struct1` STRUCT<`key3`: INT, `key4`: INT>, `struct2` STRUCT< `inner1`: STRUCT<`key5`: BIGINT, `key6`: BIGINT, `skey1`: STRING>>, `struct3` STRUCT<`inner3`: STRUCT<`array3`: ARRAY<STRUCT<`a1`: BIGINT, `a2`: BIGINT, `a3`: STRING>>>>,`array1` ARRAY<STRUCT<`key7`: BIGINT, `key8`: BIGINT, `skey2`: STRING>>,`array2` ARRAY<STRUCT<`key2`: BIGINT, `inner2`: ARRAY<STRUCT<`key9`: BIGINT, `key10`: BIGINT, `struct3`: STRUCT<`k1`: INT, `k2`: INT>>>>>"
693
+
694
+ val actualDDL = escapeColumnsSparkDDL(inputDDL)
695
+
696
+ assert(actualDDL == expectedDDL)
697
+ }
674
698
}
675
699
676
700
}
0 commit comments