diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/BinaryNumberDecoders.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/BinaryNumberDecoders.scala index 110c2e522..f665950e3 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/BinaryNumberDecoders.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/BinaryNumberDecoders.scala @@ -82,6 +82,13 @@ object BinaryNumberDecoders { if (v<0) null else v } + def decodeBinaryUnsignedIntBigEndianAsLong(bytes: Array[Byte]): java.lang.Long = { + if (bytes.length < 4) { + return null + } + ((bytes(0) & 255L) << 24L) | ((bytes(1) & 255L) << 16L) | ((bytes(2) & 255L) << 8L) | (bytes(3) & 255L) + } + def decodeBinaryUnsignedIntLittleEndian(bytes: Array[Byte]): Integer = { if (bytes.length < 4) { return null @@ -90,6 +97,13 @@ object BinaryNumberDecoders { if (v<0) null else v } + def decodeBinaryUnsignedIntLittleEndianAsLong(bytes: Array[Byte]): java.lang.Long = { + if (bytes.length < 4) { + return null + } + ((bytes(3) & 255L) << 24L) | ((bytes(2) & 255L) << 16L) | ((bytes(1) & 255L) << 8L) | (bytes(0) & 255L) + } + def decodeBinarySignedLongBigEndian(bytes: Array[Byte]): java.lang.Long = { if (bytes.length < 8) { return null @@ -112,6 +126,13 @@ object BinaryNumberDecoders { if (v < 0L) null else v } + def decodeBinaryUnsignedLongBigEndianAsDecimal(bytes: Array[Byte]): BigDecimal = { + if (bytes.length < 8) { + return null + } + BigDecimal(BigInt(1, bytes).toString()) + } + def decodeBinaryUnsignedLongLittleEndian(bytes: Array[Byte]): java.lang.Long = { if (bytes.length < 8) { return null @@ -120,6 +141,13 @@ object BinaryNumberDecoders { if (v < 0L) null else v } + def decodeBinaryUnsignedLongLittleEndianAsDecimal(bytes: Array[Byte]): BigDecimal = { + if (bytes.length < 8) { + return null + } + BigDecimal(BigInt(1, bytes.reverse).toString()) + } + def decodeBinaryAribtraryPrecision(bytes: Array[Byte], isBigEndian: Boolean, isSigned: Boolean): BigDecimal = { if (bytes.length == 0) { return null diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/DecoderSelector.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/DecoderSelector.scala index 80c4bc63f..462fbf03f 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/DecoderSelector.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/DecoderSelector.scala @@ -19,6 +19,7 @@ package za.co.absa.cobrix.cobol.parser.decoders import java.nio.charset.{Charset, StandardCharsets} import za.co.absa.cobrix.cobol.parser.ast.datatype._ import za.co.absa.cobrix.cobol.parser.common.Constants +import za.co.absa.cobrix.cobol.parser.common.Constants.{maxIntegerPrecision, maxLongPrecision} import za.co.absa.cobrix.cobol.parser.decoders.FloatingPointFormat.FloatingPointFormat import za.co.absa.cobrix.cobol.parser.encoding._ import za.co.absa.cobrix.cobol.parser.encoding.codepage.{CodePage, CodePageCommon} @@ -255,26 +256,32 @@ object DecoderSelector { val isSigned = signPosition.nonEmpty val numOfBytes = BinaryUtils.getBytesCount(compact, precision, isSigned, isExplicitDecimalPt = false, isSignSeparate = false) + val isMaxUnsignedPrecision = precision == maxIntegerPrecision || precision == maxLongPrecision + val decoder = if (strictIntegralPrecision) { (a: Array[Byte]) => BinaryNumberDecoders.decodeBinaryAribtraryPrecision(a, isBigEndian, isSigned) } else { - (isSigned, isBigEndian, numOfBytes) match { - case (true, true, 1) => BinaryNumberDecoders.decodeSignedByte _ - case (true, true, 2) => BinaryNumberDecoders.decodeBinarySignedShortBigEndian _ - case (true, true, 4) => BinaryNumberDecoders.decodeBinarySignedIntBigEndian _ - case (true, true, 8) => BinaryNumberDecoders.decodeBinarySignedLongBigEndian _ - case (true, false, 1) => BinaryNumberDecoders.decodeSignedByte _ - case (true, false, 2) => BinaryNumberDecoders.decodeBinarySignedShortLittleEndian _ - case (true, false, 4) => BinaryNumberDecoders.decodeBinarySignedIntLittleEndian _ - case (true, false, 8) => BinaryNumberDecoders.decodeBinarySignedLongLittleEndian _ - case (false, true, 1) => BinaryNumberDecoders.decodeUnsignedByte _ - case (false, true, 2) => BinaryNumberDecoders.decodeBinaryUnsignedShortBigEndian _ - case (false, true, 4) => BinaryNumberDecoders.decodeBinaryUnsignedIntBigEndian _ - case (false, true, 8) => BinaryNumberDecoders.decodeBinaryUnsignedLongBigEndian _ - case (false, false, 1) => BinaryNumberDecoders.decodeUnsignedByte _ - case (false, false, 2) => BinaryNumberDecoders.decodeBinaryUnsignedShortLittleEndian _ - case (false, false, 4) => BinaryNumberDecoders.decodeBinaryUnsignedIntLittleEndian _ - case (false, false, 8) => BinaryNumberDecoders.decodeBinaryUnsignedLongLittleEndian _ + (isSigned, isBigEndian, isMaxUnsignedPrecision, numOfBytes) match { + case (true, true, _, 1) => BinaryNumberDecoders.decodeSignedByte _ + case (true, true, _, 2) => BinaryNumberDecoders.decodeBinarySignedShortBigEndian _ + case (true, true, _, 4) => BinaryNumberDecoders.decodeBinarySignedIntBigEndian _ + case (true, true, _, 8) => BinaryNumberDecoders.decodeBinarySignedLongBigEndian _ + case (true, false, _, 1) => BinaryNumberDecoders.decodeSignedByte _ + case (true, false, _, 2) => BinaryNumberDecoders.decodeBinarySignedShortLittleEndian _ + case (true, false, _, 4) => BinaryNumberDecoders.decodeBinarySignedIntLittleEndian _ + case (true, false, _, 8) => BinaryNumberDecoders.decodeBinarySignedLongLittleEndian _ + case (false, true, _, 1) => BinaryNumberDecoders.decodeUnsignedByte _ + case (false, true, _, 2) => BinaryNumberDecoders.decodeBinaryUnsignedShortBigEndian _ + case (false, true, false, 4) => BinaryNumberDecoders.decodeBinaryUnsignedIntBigEndian _ + case (false, true, true, 4) => BinaryNumberDecoders.decodeBinaryUnsignedIntBigEndianAsLong _ + case (false, true, false, 8) => BinaryNumberDecoders.decodeBinaryUnsignedLongBigEndian _ + case (false, true, true, 8) => BinaryNumberDecoders.decodeBinaryUnsignedLongBigEndianAsDecimal _ + case (false, false, _, 1) => BinaryNumberDecoders.decodeUnsignedByte _ + case (false, false, _, 2) => BinaryNumberDecoders.decodeBinaryUnsignedShortLittleEndian _ + case (false, false, false, 4) => BinaryNumberDecoders.decodeBinaryUnsignedIntLittleEndian _ + case (false, false, true, 4) => BinaryNumberDecoders.decodeBinaryUnsignedIntLittleEndianAsLong _ + case (false, false, false, 8) => BinaryNumberDecoders.decodeBinaryUnsignedLongLittleEndian _ + case (false, false, true, 8) => BinaryNumberDecoders.decodeBinaryUnsignedLongLittleEndianAsDecimal _ case _ => (a: Array[Byte]) => BinaryNumberDecoders.decodeBinaryAribtraryPrecision(a, isBigEndian, isSigned) } diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/decoders/BinaryDecoderSpec.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/decoders/BinaryDecoderSpec.scala index 9c89b243c..9a681eb36 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/decoders/BinaryDecoderSpec.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/decoders/BinaryDecoderSpec.scala @@ -460,10 +460,14 @@ class BinaryDecoderSpec extends AnyFunSuite { val decoderUnsignedShort = DecoderSelector.getIntegralDecoder(integralType.copy(precision = 3, compact = Some(COMP5()), signPosition = None), strictSignOverpunch = false, improvedNullDetection = false, strictIntegralPrecision = false) val decoderSignedInt = DecoderSelector.getIntegralDecoder(integralType.copy(precision = 8, compact = Some(COMP4())), strictSignOverpunch = false, improvedNullDetection = false, strictIntegralPrecision = false) val decoderUnsignedIntBe = DecoderSelector.getIntegralDecoder(integralType.copy(precision = 8, compact = Some(COMP5()), signPosition = None), strictSignOverpunch = false, improvedNullDetection = false, strictIntegralPrecision = false) + val decoderUnsignedIntBeAsLong = DecoderSelector.getIntegralDecoder(integralType.copy(precision = 9, compact = Some(COMP5()), signPosition = None), strictSignOverpunch = false, improvedNullDetection = false, strictIntegralPrecision = false) val decoderUnsignedIntLe = DecoderSelector.getIntegralDecoder(integralType.copy(precision = 8, compact = Some(COMP9()), signPosition = None), strictSignOverpunch = false, improvedNullDetection = false, strictIntegralPrecision = false) + val decoderUnsignedIntLeAsLong = DecoderSelector.getIntegralDecoder(integralType.copy(precision = 9, compact = Some(COMP9()), signPosition = None), strictSignOverpunch = false, improvedNullDetection = false, strictIntegralPrecision = false) val decoderSignedLong = DecoderSelector.getIntegralDecoder(integralType.copy(precision = 15, compact = Some(COMP4())), strictSignOverpunch = false, improvedNullDetection = false, strictIntegralPrecision = false) val decoderUnsignedLongBe = DecoderSelector.getIntegralDecoder(integralType.copy(precision = 15, compact = Some(COMP5()), signPosition = None), strictSignOverpunch = false, improvedNullDetection = false, strictIntegralPrecision = false) + val decoderUnsignedLongBeAsBig = DecoderSelector.getIntegralDecoder(integralType.copy(precision = 18, compact = Some(COMP5()), signPosition = None), strictSignOverpunch = false, improvedNullDetection = false, strictIntegralPrecision = false) val decoderUnsignedLongLe = DecoderSelector.getIntegralDecoder(integralType.copy(precision = 15, compact = Some(COMP9()), signPosition = None), strictSignOverpunch = false, improvedNullDetection = false, strictIntegralPrecision = false) + val decoderUnsignedLongLeAsBig = DecoderSelector.getIntegralDecoder(integralType.copy(precision = 18, compact = Some(COMP9()), signPosition = None), strictSignOverpunch = false, improvedNullDetection = false, strictIntegralPrecision = false) val num1 = decoderSignedByte(Array(0x10).map(_.toByte)) assert(num1.isInstanceOf[Integer]) @@ -501,10 +505,18 @@ class BinaryDecoderSpec extends AnyFunSuite { assert(num9.isInstanceOf[Integer]) assert(num9.asInstanceOf[Integer] == 9437184) + val num9a = decoderUnsignedIntBeAsLong(Array(0x00, 0x90, 0x00, 0x00).map(_.toByte)) + assert(num9a.isInstanceOf[java.lang.Long]) + assert(num9a.asInstanceOf[java.lang.Long] == 9437184L) + val num10 = decoderUnsignedIntLe(Array(0x00, 0x00, 0x90, 0x00).map(_.toByte)) assert(num10.isInstanceOf[Integer]) assert(num10.asInstanceOf[Integer] == 9437184) + val num10a = decoderUnsignedIntLeAsLong(Array(0x00, 0x00, 0x90, 0x00).map(_.toByte)) + assert(num10a.isInstanceOf[java.lang.Long]) + assert(num10a.asInstanceOf[java.lang.Long] == 9437184L) + val num11 = decoderSignedLong(Array(0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00).map(_.toByte)) assert(num11.isInstanceOf[Long]) assert(num11.asInstanceOf[Long] == 72057594037927936L) @@ -517,9 +529,17 @@ class BinaryDecoderSpec extends AnyFunSuite { assert(num13.isInstanceOf[Long]) assert(num13.asInstanceOf[Long] == 40532396646334464L) + val num13a = decoderUnsignedLongBeAsBig(Array(0x00, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00).map(_.toByte)) + assert(num13a.isInstanceOf[BigDecimal]) + assert(num13a.asInstanceOf[BigDecimal] == BigDecimal("40532396646334464")) + val num14 = decoderUnsignedLongLe(Array(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00).map(_.toByte)) assert(num14.isInstanceOf[Long]) assert(num14.asInstanceOf[Long] == 40532396646334464L) + + val num14a = decoderUnsignedLongLeAsBig(Array(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00).map(_.toByte)) + assert(num14a.isInstanceOf[BigDecimal]) + assert(num14a.asInstanceOf[BigDecimal] == BigDecimal("40532396646334464")) } test("Test Binary strict integral precision numbers") { diff --git a/data/test17_expected/test17a_schema.json b/data/test17_expected/test17a_schema.json index 00b8ee6a4..d5a32c124 100644 --- a/data/test17_expected/test17a_schema.json +++ b/data/test17_expected/test17a_schema.json @@ -40,7 +40,7 @@ } }, { "name" : "TAXPAYER", - "type" : "integer", + "type" : "long", "nullable" : true, "metadata" : { } } ] diff --git a/data/test17_expected/test17b_schema.json b/data/test17_expected/test17b_schema.json index 59e80e2c8..0564c0ed9 100644 --- a/data/test17_expected/test17b_schema.json +++ b/data/test17_expected/test17b_schema.json @@ -61,7 +61,7 @@ } }, { "name" : "TAXPAYER", - "type" : "integer", + "type" : "long", "nullable" : true, "metadata" : { } } ] diff --git a/data/test17_expected/test17c_schema.json b/data/test17_expected/test17c_schema.json index 4f4101422..85b13c663 100644 --- a/data/test17_expected/test17c_schema.json +++ b/data/test17_expected/test17c_schema.json @@ -40,7 +40,7 @@ } }, { "name" : "TAXPAYER", - "type" : "integer", + "type" : "long", "nullable" : true, "metadata" : { } }, { diff --git a/data/test18 special_char_expected/test18a_schema.json b/data/test18 special_char_expected/test18a_schema.json index 00b8ee6a4..d5a32c124 100644 --- a/data/test18 special_char_expected/test18a_schema.json +++ b/data/test18 special_char_expected/test18a_schema.json @@ -40,7 +40,7 @@ } }, { "name" : "TAXPAYER", - "type" : "integer", + "type" : "long", "nullable" : true, "metadata" : { } } ] diff --git a/data/test24_expected/test24_schema.json b/data/test24_expected/test24_schema.json index 1e3c4e504..c0d8fcbee 100644 --- a/data/test24_expected/test24_schema.json +++ b/data/test24_expected/test24_schema.json @@ -712,7 +712,7 @@ } }, { "name" : "NUM_BIN_INT07", - "type" : "integer", + "type" : "long", "nullable" : true, "metadata" : { } }, { @@ -760,7 +760,7 @@ } }, { "name" : "NUM_BIN_INT11", - "type" : "long", + "type" : "decimal(20,0)", "nullable" : true, "metadata" : { } }, { diff --git a/data/test24_expected/test24b_schema.json b/data/test24_expected/test24b_schema.json index 7e14723e1..751aebd0c 100644 --- a/data/test24_expected/test24b_schema.json +++ b/data/test24_expected/test24b_schema.json @@ -712,7 +712,7 @@ } }, { "name" : "NUM_BIN_INT07", - "type" : "integer", + "type" : "long", "nullable" : true, "metadata" : { } }, { @@ -760,7 +760,7 @@ } }, { "name" : "NUM_BIN_INT11", - "type" : "long", + "type" : "decimal(20,0)", "nullable" : true, "metadata" : { } }, { diff --git a/data/test6_expected/test6_schema.json b/data/test6_expected/test6_schema.json index ebacd62b0..ca2d7eb23 100644 --- a/data/test6_expected/test6_schema.json +++ b/data/test6_expected/test6_schema.json @@ -299,7 +299,7 @@ "metadata" : { } }, { "name" : "NUM_BIN_INT07", - "type" : "integer", + "type" : "long", "nullable" : true, "metadata" : { } }, { @@ -319,7 +319,7 @@ "metadata" : { } }, { "name" : "NUM_BIN_INT11", - "type" : "long", + "type" : "decimal(20,0)", "nullable" : true, "metadata" : { } }, { diff --git a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/schema/CobolSchema.scala b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/schema/CobolSchema.scala index 681a9badc..2b8a7098e 100644 --- a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/schema/CobolSchema.scala +++ b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/schema/CobolSchema.scala @@ -20,7 +20,7 @@ import org.apache.spark.sql.types._ import za.co.absa.cobrix.cobol.internal.Logging import za.co.absa.cobrix.cobol.parser.Copybook import za.co.absa.cobrix.cobol.parser.ast._ -import za.co.absa.cobrix.cobol.parser.ast.datatype.{AlphaNumeric, COMP1, COMP2, Decimal, Integral} +import za.co.absa.cobrix.cobol.parser.ast.datatype.{AlphaNumeric, COMP1, COMP2, COMP4, COMP5, COMP9, Decimal, Integral} import za.co.absa.cobrix.cobol.parser.common.Constants import za.co.absa.cobrix.cobol.parser.encoding.RAW import za.co.absa.cobrix.cobol.parser.policies.MetadataPolicy @@ -66,23 +66,10 @@ class CobolSchema(copybook: Copybook, @throws(classOf[IllegalStateException]) private[this] lazy val sparkSchema = createSparkSchema() - @throws(classOf[IllegalStateException]) - private[this] lazy val sparkFlatSchema = { - val arraySchema = copybook.ast.children.toArray - val records = arraySchema.flatMap(record => { - parseGroupFlat(record.asInstanceOf[Group], s"${record.name}_") - }) - StructType(records) - } - def getSparkSchema: StructType = { sparkSchema } - def getSparkFlatSchema: StructType = { - sparkFlatSchema - } - @throws(classOf[IllegalStateException]) private def createSparkSchema(): StructType = { val records = for (record <- copybook.getRootRecords) yield { @@ -200,12 +187,16 @@ class CobolSchema(copybook: Copybook, case dt: Integral if strictIntegralPrecision => DecimalType(precision = dt.precision, scale = 0) case dt: Integral => + val isBinary = dt.compact.exists(c => c == COMP4() || c == COMP5() || c == COMP9()) if (dt.precision > Constants.maxLongPrecision) { DecimalType(precision = dt.precision, scale = 0) + } else if (dt.precision == Constants.maxLongPrecision && isBinary && dt.signPosition.isEmpty) { // promoting unsigned int to long to be able to fit any value + DecimalType(precision = dt.precision + 2, scale = 0) } else if (dt.precision > Constants.maxIntegerPrecision) { LongType - } - else { + } else if (dt.precision == Constants.maxIntegerPrecision && isBinary && dt.signPosition.isEmpty) { // promoting unsigned long to decimal(20) to be able to fit any value + LongType + } else { IntegerType } case _ => throw new IllegalStateException("Unknown AST object") @@ -290,53 +281,6 @@ class CobolSchema(copybook: Copybook, }) childSegments } - - @throws(classOf[IllegalStateException]) - private def parseGroupFlat(group: Group, structPath: String = ""): ArrayBuffer[StructField] = { - val fields = new ArrayBuffer[StructField]() - for (field <- group.children if !field.isFiller) { - field match { - case group: Group => - if (group.isArray) { - for (i <- Range(1, group.arrayMaxSize + 1)) { - val path = s"$structPath${group.name}_${i}_" - fields ++= parseGroupFlat(group, path) - } - } else { - val path = s"$structPath${group.name}_" - fields ++= parseGroupFlat(group, path) - } - case s: Primitive => - val dataType: DataType = s.dataType match { - case d: Decimal => - DecimalType(d.getEffectivePrecision, d.getEffectiveScale) - case a: AlphaNumeric => - a.enc match { - case Some(RAW) => BinaryType - case _ => StringType - } - case dt: Integral => - if (dt.precision > Constants.maxIntegerPrecision) { - LongType - } - else { - IntegerType - } - case _ => throw new IllegalStateException("Unknown AST object") - } - val path = s"$structPath" //${group.name}_" - if (s.isArray) { - for (i <- Range(1, s.arrayMaxSize + 1)) { - fields += StructField(s"$path{s.name}_$i", ArrayType(dataType), nullable = true) - } - } else { - fields += StructField(s"$path${s.name}", dataType, nullable = true) - } - } - } - - fields - } } object CobolSchema { diff --git a/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/integration/Test38StrictBinaryTypeSpec.scala b/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/integration/Test38StrictBinaryTypeSpec.scala new file mode 100644 index 000000000..951d301f6 --- /dev/null +++ b/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/integration/Test38StrictBinaryTypeSpec.scala @@ -0,0 +1,136 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.cobrix.spark.cobol.source.integration + +import org.scalatest.wordspec.AnyWordSpec +import za.co.absa.cobrix.spark.cobol.source.base.SparkTestBase +import za.co.absa.cobrix.spark.cobol.source.fixtures.{BinaryFileFixture, TextComparisonFixture} + +class Test38StrictBinaryTypeSpec extends AnyWordSpec with SparkTestBase with BinaryFileFixture with TextComparisonFixture { + "binary fields handling" should { + "support full integer and ling decoding" in { + val copybook = + """ 01 R. + 05 NUM1 PIC 9(9) COMP. + 05 NUM2 PIC 9(9) COMP-9. + 05 NUM3 PIC S9(9) COMP-5. + 05 NUM4 PIC S9(9) COMP-9. + 05 STRUCT. + 10 NUM5 PIC 9(9) COMP. + 10 NUM6 PIC 9(9) COMP-9. + 10 NUM7 PIC S9(9) COMP-5. + 10 NUM8 PIC S9(9) COMP-9. + """ + + val integerDataSample: Array[Byte] = Array( + 0xFF, 0xFF, 0xFF, 0xFE, // 4294967294 + 0xFF, 0xFF, 0xFF, 0xFE, // 4278190079 + 0xFF, 0xFF, 0xFF, 0xFE, // -2 + 0xFF, 0xFF, 0xFF, 0xFE, // -16777217 + 0xF0, 0x00, 0x01, 0xF0, // 4026532336 + 0xF0, 0x00, 0x01, 0xF0, // 4026597616 + 0x84, 0xF0, 0x21, 0x67, // -2064637593 + 0x67, 0x21, 0xF0, 0x84 // -2064637593 + ).map(_.toByte) + + withTempBinFile("strict_bin_fields", ".tmp", integerDataSample) { tempFile => + val expectedSchema = + """root + | |-- NUM1: long (nullable = true) + | |-- NUM2: long (nullable = true) + | |-- NUM3: integer (nullable = true) + | |-- NUM4: integer (nullable = true) + | |-- STRUCT: struct (nullable = true) + | | |-- NUM5: long (nullable = true) + | | |-- NUM6: long (nullable = true) + | | |-- NUM7: integer (nullable = true) + | | |-- NUM8: integer (nullable = true) + |""".stripMargin + + val expectedData = """{"NUM1":4294967294,"NUM2":4278190079,"NUM3":-2,"NUM4":-16777217,"STRUCT":{"NUM5":4026532336,"NUM6":4026597616,"NUM7":-2064637593,"NUM8":-2064637593}}""" + + val df = spark.read + .format("cobol") + .option("copybook_contents", copybook) + .option("record_format", "F") + .option("pedantic", "true") + .load(tempFile) + + val actualSchema = df.schema.treeString + val actualData = df.toJSON.collect().mkString(",") + + compareText(actualSchema, expectedSchema) + assert(actualData == expectedData) + } + } + + "support full long and int decimal decoding" in { + val copybook = + """ 01 R. + 05 NUM1 PIC 9(18) COMP. + 05 NUM2 PIC 9(18) COMP-9. + 05 NUM3 PIC S9(18) COMP-5. + 05 NUM4 PIC S9(18) COMP-9. + 05 STRUCT. + 10 NUM5 PIC 9(18) COMP. + 10 NUM6 PIC 9(18) COMP-9. + 10 NUM7 PIC S9(18) COMP-5. + 10 NUM8 PIC S9(18) COMP-9. + """ + + val integerDataSample: Array[Byte] = Array( + 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFE, // 18446462598732906494 + 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFE, // 18374405004694978559 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, // -2 + 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFE, // -72339069014573057 + 0xF0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xF0, // 17293822569102705136 + 0xF0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xF0, // 17294104044079415536 + 0x84, 0xF0, 0x00, 0x00, 0x00, 0x00, 0x21, 0x67, // -8867587666292498073 + 0x67, 0x21, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x84 // -8867587666292498073 + ).map(_.toByte) + + withTempBinFile("strict_bin_fields", ".tmp", integerDataSample) { tempFile => + val expectedSchema = + """root + | |-- NUM1: decimal(20,0) (nullable = true) + | |-- NUM2: decimal(20,0) (nullable = true) + | |-- NUM3: long (nullable = true) + | |-- NUM4: long (nullable = true) + | |-- STRUCT: struct (nullable = true) + | | |-- NUM5: decimal(20,0) (nullable = true) + | | |-- NUM6: decimal(20,0) (nullable = true) + | | |-- NUM7: long (nullable = true) + | | |-- NUM8: long (nullable = true)""".stripMargin + + val expectedData = """{"NUM1":18446462598732906494,"NUM2":18374405004694978559,"NUM3":-2,"NUM4":-72339069014573057,"STRUCT":{"NUM5":17293822569102705136,"NUM6":17294104044079415536,"NUM7":-8867587666292498073,"NUM8":-8867587666292498073}}""" + + val df = spark.read + .format("cobol") + .option("copybook_contents", copybook) + .option("record_format", "F") + .option("pedantic", "true") + .load(tempFile) + + val actualSchema = df.schema.treeString + val actualData = df.toJSON.collect().mkString(",") + + compareText(actualSchema, expectedSchema) + assert(actualData == expectedData) + } + } + } +}