Skip to content

Commit

Permalink
#420 Add more test cases.
Browse files Browse the repository at this point in the history
  • Loading branch information
yruslan committed Jun 6, 2024
1 parent 9449d62 commit 1b76d34
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import org.apache.spark.sql.types._
import org.apache.spark.sql.{DataFrame, Row, SparkSession}

object NestedDataFrameFactory {
private val testCaseSchema = StructType(
val testCaseSchema: StructType = StructType(
Array(
StructField("id", LongType),
StructField("key1", LongType),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import org.apache.spark.sql.types._
import org.apache.spark.sql.{DataFrame, Row}
import org.scalatest.wordspec.AnyWordSpec
import za.co.absa.pramen.api.FieldChange._
import za.co.absa.pramen.core.NestedDataFrameFactory
import za.co.absa.pramen.core.base.SparkTestBase
import za.co.absa.pramen.core.fixtures.{TempDirFixture, TextComparisonFixture}
import za.co.absa.pramen.core.pipeline.TransformExpression
Expand Down Expand Up @@ -643,7 +644,8 @@ class SparkUtilsSuite extends AnyWordSpec with SparkTestBase with TempDirFixture
}

"work with decimals and nested arrays of struct" in {
val expectedDDL = "`id` BIGINT COMMENT 'This is my table',`legs` ARRAY<STRUCT<`conditions`: ARRAY<STRUCT<`amount`: DECIMAL(18,4), `checks`: ARRAY<STRUCT<`checkNums`: ARRAY<STRING> COMMENT 'decimal(10, 12)'>>>>, `legid`: BIGINT COMMENT 'This is a \\'test\\': long'>>"
val expectedDDLWithNestedComments = "`id` BIGINT COMMENT 'This is my table',`legs` ARRAY<STRUCT<`conditions`: ARRAY<STRUCT<`amount`: DECIMAL(18,4), `checks`: ARRAY<STRUCT<`checkNums`: ARRAY<STRING> COMMENT 'decimal(10, 12)'>>>>, `legid`: BIGINT COMMENT 'This is a \\'test\\': long'>>"
val expectedDDLWithoutNestedComments = "`id` BIGINT COMMENT 'This is my table',`legs` ARRAY<STRUCT<`conditions`: ARRAY<STRUCT<`amount`: DECIMAL(18,4), `checks`: ARRAY<STRUCT<`checkNums`: ARRAY<STRING>>>>>, `legid`: BIGINT>>"

val comment1 = new MetadataBuilder().putString("comment", "This is my table").build()
val comment2 = new MetadataBuilder().putString("comment", "decimal(10, 12)").build()
Expand All @@ -660,7 +662,12 @@ class SparkUtilsSuite extends AnyWordSpec with SparkTestBase with TempDirFixture

val actualDDL = escapeColumnsSparkDDL(schema.toDDL)

assert(actualDDL == expectedDDL)
// Depends on the version of Spark
if (actualDDL.contains("ARRAY<STRING> COMMENT")) {
assert(actualDDL == expectedDDLWithNestedComments)
} else {
assert(actualDDL == expectedDDLWithoutNestedComments)
}
}

"work with decimals and nested arrays of struct when the input is not escaped" in {
Expand All @@ -671,6 +678,23 @@ class SparkUtilsSuite extends AnyWordSpec with SparkTestBase with TempDirFixture

assert(actualDDL == expectedDDL)
}

"work with another complex example" in {
val expectedDDL = "`id` BIGINT,`key1` BIGINT,`key2` BIGINT,`struct1` STRUCT<`key3`: INT, `key4`: INT>,`struct2` STRUCT<`inner1`: STRUCT<`key5`: BIGINT, `key6`: BIGINT, `skey1`: STRING>>,`struct3` STRUCT<`inner3`: STRUCT<`array3`: ARRAY<STRUCT<`a1`: BIGINT, `a2`: BIGINT, `a3`: STRING>>>>,`array1` ARRAY<STRUCT<`key7`: BIGINT, `key8`: BIGINT, `skey2`: STRING>>,`array2` ARRAY<STRUCT<`key2`: BIGINT, `inner2`: ARRAY<STRUCT<`key9`: BIGINT, `key10`: BIGINT, `struct3`: STRUCT<`k1`: INT, `k2`: INT>>>>>"

val actualDDL = escapeColumnsSparkDDL(NestedDataFrameFactory.testCaseSchema.toDDL)

assert(actualDDL == expectedDDL)
}

"work with another complex example (unescaped)" in {
val inputDDL = "id BIGINT, key1 BIGINT, key2 BIGINT, struct1 STRUCT<key3: INT, key4: INT>, struct2 STRUCT< inner1: STRUCT<key5: BIGINT, key6: BIGINT, skey1: STRING>>, struct3 STRUCT<inner3: STRUCT<array3: ARRAY<STRUCT<a1: BIGINT, a2: BIGINT, a3: STRING>>>>,array1 ARRAY<STRUCT<key7: BIGINT, key8: BIGINT, skey2: STRING>>,array2 ARRAY<STRUCT<key2: BIGINT, inner2: ARRAY<STRUCT<key9: BIGINT, `key10`: BIGINT, struct3: STRUCT<k1: INT, k2: INT>>>>>"
val expectedDDL = "`id` BIGINT, `key1` BIGINT, `key2` BIGINT, `struct1` STRUCT<`key3`: INT, `key4`: INT>, `struct2` STRUCT< `inner1`: STRUCT<`key5`: BIGINT, `key6`: BIGINT, `skey1`: STRING>>, `struct3` STRUCT<`inner3`: STRUCT<`array3`: ARRAY<STRUCT<`a1`: BIGINT, `a2`: BIGINT, `a3`: STRING>>>>,`array1` ARRAY<STRUCT<`key7`: BIGINT, `key8`: BIGINT, `skey2`: STRING>>,`array2` ARRAY<STRUCT<`key2`: BIGINT, `inner2`: ARRAY<STRUCT<`key9`: BIGINT, `key10`: BIGINT, `struct3`: STRUCT<`k1`: INT, `k2`: INT>>>>>"

val actualDDL = escapeColumnsSparkDDL(inputDDL)

assert(actualDDL == expectedDDL)
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ package za.co.absa.pramen.extras.tests.avro

import org.apache.spark.sql.functions.struct
import org.scalatest.wordspec.AnyWordSpec
import za.co.absa.pramen.NestedDataFrameFactory
import za.co.absa.pramen.extras.NestedDataFrameFactory
import za.co.absa.pramen.extras.utils.ResourceUtils.getResourceString
import za.co.absa.pramen.extras.utils.JsonUtils
import za.co.absa.pramen.extras.avro.AvroUtils
Expand Down

0 comments on commit 1b76d34

Please sign in to comment.