diff --git a/pramen/core/src/test/scala/za/co/absa/pramen/core/NestedDataFrameFactory.scala b/pramen/core/src/test/scala/za/co/absa/pramen/core/NestedDataFrameFactory.scala index a340c504f..cffae6b01 100644 --- a/pramen/core/src/test/scala/za/co/absa/pramen/core/NestedDataFrameFactory.scala +++ b/pramen/core/src/test/scala/za/co/absa/pramen/core/NestedDataFrameFactory.scala @@ -20,7 +20,7 @@ import org.apache.spark.sql.types._ import org.apache.spark.sql.{DataFrame, Row, SparkSession} object NestedDataFrameFactory { - private val testCaseSchema = StructType( + val testCaseSchema: StructType = StructType( Array( StructField("id", LongType), StructField("key1", LongType), diff --git a/pramen/core/src/test/scala/za/co/absa/pramen/core/tests/utils/SparkUtilsSuite.scala b/pramen/core/src/test/scala/za/co/absa/pramen/core/tests/utils/SparkUtilsSuite.scala index 832a47a5a..5cd6a4f50 100644 --- a/pramen/core/src/test/scala/za/co/absa/pramen/core/tests/utils/SparkUtilsSuite.scala +++ b/pramen/core/src/test/scala/za/co/absa/pramen/core/tests/utils/SparkUtilsSuite.scala @@ -22,6 +22,7 @@ import org.apache.spark.sql.types._ import org.apache.spark.sql.{DataFrame, Row} import org.scalatest.wordspec.AnyWordSpec import za.co.absa.pramen.api.FieldChange._ +import za.co.absa.pramen.core.NestedDataFrameFactory import za.co.absa.pramen.core.base.SparkTestBase import za.co.absa.pramen.core.fixtures.{TempDirFixture, TextComparisonFixture} import za.co.absa.pramen.core.pipeline.TransformExpression @@ -643,7 +644,8 @@ class SparkUtilsSuite extends AnyWordSpec with SparkTestBase with TempDirFixture } "work with decimals and nested arrays of struct" in { - val expectedDDL = "`id` BIGINT COMMENT 'This is my table',`legs` ARRAY COMMENT 'decimal(10, 12)'>>>>, `legid`: BIGINT COMMENT 'This is a \\'test\\': long'>>" + val expectedDDLWithNestedComments = "`id` BIGINT COMMENT 'This is my table',`legs` ARRAY COMMENT 'decimal(10, 12)'>>>>, `legid`: BIGINT COMMENT 'This is a \\'test\\': long'>>" + val expectedDDLWithoutNestedComments = "`id` BIGINT COMMENT 'This is my table',`legs` ARRAY>>>>, `legid`: BIGINT>>" val comment1 = new MetadataBuilder().putString("comment", "This is my table").build() val comment2 = new MetadataBuilder().putString("comment", "decimal(10, 12)").build() @@ -660,7 +662,12 @@ class SparkUtilsSuite extends AnyWordSpec with SparkTestBase with TempDirFixture val actualDDL = escapeColumnsSparkDDL(schema.toDDL) - assert(actualDDL == expectedDDL) + // Depends on the version of Spark + if (actualDDL.contains("ARRAY COMMENT")) { + assert(actualDDL == expectedDDLWithNestedComments) + } else { + assert(actualDDL == expectedDDLWithoutNestedComments) + } } "work with decimals and nested arrays of struct when the input is not escaped" in { @@ -671,6 +678,23 @@ class SparkUtilsSuite extends AnyWordSpec with SparkTestBase with TempDirFixture assert(actualDDL == expectedDDL) } + + "work with another complex example" in { + val expectedDDL = "`id` BIGINT,`key1` BIGINT,`key2` BIGINT,`struct1` STRUCT<`key3`: INT, `key4`: INT>,`struct2` STRUCT<`inner1`: STRUCT<`key5`: BIGINT, `key6`: BIGINT, `skey1`: STRING>>,`struct3` STRUCT<`inner3`: STRUCT<`array3`: ARRAY>>>,`array1` ARRAY>,`array2` ARRAY>>>>" + + val actualDDL = escapeColumnsSparkDDL(NestedDataFrameFactory.testCaseSchema.toDDL) + + assert(actualDDL == expectedDDL) + } + + "work with another complex example (unescaped)" in { + val inputDDL = "id BIGINT, key1 BIGINT, key2 BIGINT, struct1 STRUCT, struct2 STRUCT< inner1: STRUCT>, struct3 STRUCT>>>,array1 ARRAY>,array2 ARRAY>>>>" + val expectedDDL = "`id` BIGINT, `key1` BIGINT, `key2` BIGINT, `struct1` STRUCT<`key3`: INT, `key4`: INT>, `struct2` STRUCT< `inner1`: STRUCT<`key5`: BIGINT, `key6`: BIGINT, `skey1`: STRING>>, `struct3` STRUCT<`inner3`: STRUCT<`array3`: ARRAY>>>,`array1` ARRAY>,`array2` ARRAY>>>>" + + val actualDDL = escapeColumnsSparkDDL(inputDDL) + + assert(actualDDL == expectedDDL) + } } } diff --git a/pramen/extras/src/test/scala/za/co/absa/pramen/extras/tests/avro/AvroUtilsSuite.scala b/pramen/extras/src/test/scala/za/co/absa/pramen/extras/tests/avro/AvroUtilsSuite.scala index b2872887a..8e0f1e111 100644 --- a/pramen/extras/src/test/scala/za/co/absa/pramen/extras/tests/avro/AvroUtilsSuite.scala +++ b/pramen/extras/src/test/scala/za/co/absa/pramen/extras/tests/avro/AvroUtilsSuite.scala @@ -18,7 +18,7 @@ package za.co.absa.pramen.extras.tests.avro import org.apache.spark.sql.functions.struct import org.scalatest.wordspec.AnyWordSpec -import za.co.absa.pramen.NestedDataFrameFactory +import za.co.absa.pramen.extras.NestedDataFrameFactory import za.co.absa.pramen.extras.utils.ResourceUtils.getResourceString import za.co.absa.pramen.extras.utils.JsonUtils import za.co.absa.pramen.extras.avro.AvroUtils