16
16
17
17
package za .co .absa .cobrix .cobol .reader .schema
18
18
19
+ import za .co .absa .cobrix .cobol .parser .encoding .codepage .CodePage
20
+
19
21
import java .time .ZonedDateTime
20
22
import java .time .format .DateTimeFormatter
21
- import za .co .absa .cobrix .cobol .parser .Copybook
23
+ import za .co .absa .cobrix .cobol .parser .{Copybook , CopybookParser }
24
+ import za .co .absa .cobrix .cobol .parser .encoding .{ASCII , EBCDIC }
22
25
import za .co .absa .cobrix .cobol .parser .policies .MetadataPolicy
26
+ import za .co .absa .cobrix .cobol .reader .parameters .ReaderParameters
23
27
import za .co .absa .cobrix .cobol .reader .policies .SchemaRetentionPolicy .SchemaRetentionPolicy
24
28
29
+ import java .nio .charset .{Charset , StandardCharsets }
30
+ import scala .collection .immutable .HashMap
31
+
25
32
26
33
/**
27
34
* This class provides a view on a COBOL schema from the perspective of Spark. When provided with a parsed copybook the class
@@ -59,3 +66,72 @@ class CobolSchema(val copybook: Copybook,
59
66
timestampFormat.format(now)
60
67
}
61
68
}
69
+
70
+ object CobolSchema {
71
+ def fromReaderParameters (copyBookContents : Seq [String ], readerParameters : ReaderParameters ): CobolSchema = {
72
+ if (copyBookContents.isEmpty) {
73
+ throw new IllegalArgumentException (" At least one copybook must be specified." )
74
+ }
75
+
76
+ val encoding = if (readerParameters.isEbcdic) EBCDIC else ASCII
77
+ val segmentRedefines = readerParameters.multisegment.map(r => r.segmentIdRedefineMap.values.toList.distinct).getOrElse(Nil )
78
+ val fieldParentMap = readerParameters.multisegment.map(r => r.fieldParentMap).getOrElse(HashMap [String , String ]())
79
+ val codePage = getCodePage(readerParameters.ebcdicCodePage, readerParameters.ebcdicCodePageClass)
80
+ val asciiCharset = if (readerParameters.asciiCharset.isEmpty) StandardCharsets .UTF_8 else Charset .forName(readerParameters.asciiCharset)
81
+
82
+ val schema = if (copyBookContents.size == 1 )
83
+ CopybookParser .parseTree(encoding,
84
+ copyBookContents.head,
85
+ readerParameters.dropGroupFillers,
86
+ readerParameters.dropValueFillers,
87
+ readerParameters.fillerNamingPolicy,
88
+ segmentRedefines,
89
+ fieldParentMap,
90
+ readerParameters.stringTrimmingPolicy,
91
+ readerParameters.commentPolicy,
92
+ readerParameters.strictSignOverpunch,
93
+ readerParameters.improvedNullDetection,
94
+ readerParameters.decodeBinaryAsHex,
95
+ codePage,
96
+ asciiCharset,
97
+ readerParameters.isUtf16BigEndian,
98
+ readerParameters.floatingPointFormat,
99
+ readerParameters.nonTerminals,
100
+ readerParameters.occursMappings,
101
+ readerParameters.debugFieldsPolicy,
102
+ readerParameters.fieldCodePage)
103
+ else
104
+ Copybook .merge(copyBookContents.map(cpb =>
105
+ CopybookParser .parseTree(encoding,
106
+ cpb,
107
+ readerParameters.dropGroupFillers,
108
+ readerParameters.dropValueFillers,
109
+ readerParameters.fillerNamingPolicy,
110
+ segmentRedefines,
111
+ fieldParentMap,
112
+ readerParameters.stringTrimmingPolicy,
113
+ readerParameters.commentPolicy,
114
+ readerParameters.strictSignOverpunch,
115
+ readerParameters.improvedNullDetection,
116
+ readerParameters.decodeBinaryAsHex,
117
+ codePage,
118
+ asciiCharset,
119
+ readerParameters.isUtf16BigEndian,
120
+ readerParameters.floatingPointFormat,
121
+ nonTerminals = readerParameters.nonTerminals,
122
+ readerParameters.occursMappings,
123
+ readerParameters.debugFieldsPolicy,
124
+ readerParameters.fieldCodePage)
125
+ ))
126
+ val segIdFieldCount = readerParameters.multisegment.map(p => p.segmentLevelIds.size).getOrElse(0 )
127
+ val segmentIdPrefix = readerParameters.multisegment.map(p => p.segmentIdPrefix).getOrElse(" " )
128
+ new CobolSchema (schema, readerParameters.schemaPolicy, readerParameters.inputFileNameColumn, readerParameters.generateRecordId, readerParameters.generateRecordBytes, segIdFieldCount, segmentIdPrefix, readerParameters.metadataPolicy)
129
+ }
130
+
131
+ def getCodePage (codePageName : String , codePageClass : Option [String ]): CodePage = {
132
+ codePageClass match {
133
+ case Some (c) => CodePage .getCodePageByClass(c)
134
+ case None => CodePage .getCodePageByName(codePageName)
135
+ }
136
+ }
137
+ }
0 commit comments