Skip to content

Commit 42cdebb

Browse files
committed
Add CP1147 code page (France) which is same as CP297 with 9F is replaced with the "€" (euro) character.
1 parent 2f9309a commit 42cdebb

File tree

6 files changed

+74
-1
lines changed

6 files changed

+74
-1
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1646,6 +1646,7 @@ The output looks like this:
16461646
| .option("ebcdic_code_page", "cp1143") | EBCDIC 1143 | Same as code page 278 with € at the position of the international currency symbol ¤. |
16471647
| .option("ebcdic_code_page", "cp1144") | EBCDIC 1144 | Same as code page 280 with € at the position of the international currency symbol ¤. |
16481648
| .option("ebcdic_code_page", "cp1145") | EBCDIC 1145 | Same as code page 284 with € at the position of the international currency symbol ¤. |
1649+
| .option("ebcdic_code_page", "cp1147") | EBCDIC 1147 | Same as code page 297 with € at the position of the international currency symbol ¤. |
16491650
| .option("ebcdic_code_page", "cp1148") | EBCDIC 1148 | Same as code page 500 with € at the position of the international currency symbol ¤. |
16501651
| .option("ebcdic_code_page", "cp1364") | EBCDIC 1364 | Double-byte code page CCSID-1364, Korean. |
16511652
| .option("ebcdic_code_page", "cp1388") | EBCDIC 1388 | Double-byte code page CCSID-1388, Simplified Chinese. |

cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ object CodePage extends Logging {
6868
case "cp1144" => new CodePage1144
6969
case "cp1145" => new CodePage1145
7070
case "cp1146" => new CodePage1146
71+
case "cp1147" => new CodePage1147
7172
case "cp1148" => new CodePage1148
7273
case "cp1364" => new CodePage1364
7374
case "cp1388" => new CodePage1388
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/*
2+
* Copyright 2018 ABSA Group Limited
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package za.co.absa.cobrix.cobol.parser.encoding.codepage
18+
19+
/**
20+
* EBCDIC code page 1147 of France is the same as 297.
21+
* with € at the position of the international currency symbol ¤.
22+
*/
23+
class CodePage1147 extends SingleByteCodePage(CodePage1147.ebcdicToAsciiMapping) {
24+
override def codePageShortName: String = "cp1147"
25+
}
26+
27+
object CodePage1147 {
28+
val ebcdicToAsciiMapping: Array[Char] = {
29+
import EbcdicNonPrintable._
30+
31+
/* This is the EBCDIC Code Page 1147 to ASCII conversion table
32+
from https://en.wikibooks.org/wiki/Character_Encodings/Code_Tables/EBCDIC/EBCDIC_297 */
33+
val ebcdic2ascii: Array[Char] = {
34+
// Non-printable characters map used: http://www.pacsys.com/asciitab.htm
35+
Array[Char](
36+
c00, c01, c02, c03, spc, c09, spc, del, spc, spc, spc, c0b, c0c, ccr, c0e, c0f, // 0 - 15
37+
c10, c11, c12, c13, spc, nel, c08, spc, c18, c19, spc, spc, c1c, c1d, c1e, c1f, // 16 - 31
38+
spc, spc, spc, spc, spc, clf, c17, c1b, spc, spc, spc, spc, spc, c05, c06, c07, // 32 - 47
39+
spc, spc, c16, spc, spc, spc, spc, c04, spc, spc, spc, spc, c14, c15, spc, c1a, // 48 - 63
40+
' ', rsp, 'â', 'ä', '@', 'á', 'ã', 'å', bsh, 'ñ', '°', '.', '<', '(', '+', '!', // 64 - 79
41+
'&', '{', 'ê', 'ë', '}', 'í', 'î', 'ï', 'ì', 'ß', '§', '$', '*', ')', ';', '^', // 80 - 95
42+
'-', '/', 'Â', 'Ä', 'À', 'Á', 'Ã', 'Å', 'Ç', 'Ñ', 'ù', ',', '%', '_', '>', '?', // 96 - 111
43+
'ø', 'É', 'Ê', 'Ë', 'È', 'Í', 'Î', 'Ï', 'Ì', 'µ', ':', '£', 'à', qts, '=', qtd, // 112 - 127
44+
'Ø', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', '«', '»', 'ð', 'ý', 'þ', '±', // 128 - 143
45+
'[', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 'ª', 'º', 'æ', '¸', 'Æ', '€', // 144 - 159
46+
'`', '¨', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '¡', '¿', 'Ð', 'Ý', 'Þ', '®', // 160 - 175
47+
'¢', '#', '¥', '·', '©', ']', '¶', '¼', '½', '¾', '¬', '|', '¯', '~', '´', '×', // 176 - 191
48+
'é', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', shy, 'ô', 'ö', 'ò', 'ó', 'õ', // 192 - 207
49+
'è', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', '¹', 'û', 'ü', '¦', 'ú', 'ÿ', // 208 - 223
50+
'ç', '÷', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '²', 'Ô', 'Ö', 'Ò', 'Ó', 'Õ', // 224 - 239
51+
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '³', 'Û', 'Ü', 'Ù', 'Ú', spc) // 240 - 255
52+
}
53+
ebcdic2ascii
54+
}
55+
}

cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/decoders/StringDecodersSpec.scala

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,17 @@ class StringDecodersSpec extends AnyWordSpec {
373373
assert(actual == expected)
374374
}
375375

376+
"decode a CP1147 string special characters" in {
377+
val expected = " °$¨¢#¬¯ä£!üÜ^]ù§ß¢°Öéèæö¨åàÆØÅÄÉ€ "
378+
val bytes = Array(0x40, 0x4A, 0x5B, 0xA1, 0xB0, 0xB1, 0xBA, 0xBC, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59,
379+
0xB0, 0x4A, 0xEC, 0xC0, 0xD0, 0x9C, 0xCC, 0xA1, 0x47, 0x7C, 0x9E, 0x80, 0x67, 0x63,
380+
0x71, 0x9F, 0x40).map(_.toByte)
381+
382+
val actual = decodeEbcdicString(bytes, KeepAll, new CodePage1147, improvedNullDetection = false)
383+
384+
assert(actual == expected)
385+
}
386+
376387
"decode a CP1148 string special characters" in {
377388
val expected = "âäàáãåçñ[.<(+!&éêëèíîïìß]$*);^-/ÂÄÀÁÃÅÇѦ,%_>?øÉÊËÈÍÎÏÌ`:#@'=\"Øabcdefghi«»ðýþ±°jklmnopqrªºæ¸Æ€µ~stuvwxyz¡¿ÐÝÞ®¢£¥·©§¶¼½¾¬|¯¨´×{ABCDEFGHI\u00ADôöòóõ}JKLMNOPQR¹ûüùúÿ\\÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ"
378389
val bytes = Array(

cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePageSingleByteSpec.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,11 @@ class CodePageSingleByteSpec extends AnyFunSuite {
157157
assert(codePage.codePageShortName == "cp1146")
158158
}
159159

160+
test("Ensure codepage 'cp1147' gives the associated CodePage") {
161+
val codePage = CodePage.getCodePageByName("cp1147")
162+
assert(codePage.codePageShortName == "cp1147")
163+
}
164+
160165
test("Ensure codepage 'cp1148' gives the associated CodePage") {
161166
val codePage = CodePage.getCodePageByName("cp1148")
162167
assert(codePage.codePageShortName == "cp1148")

project/Dependencies.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ import sbt._
1818

1919
object Dependencies {
2020
private val guavaVersion = "15.0"
21-
private val scodecCoreVersion = "1.11.10"
21+
private val scodecCoreVersion = "1.11.4"
2222
private val antlrValue = "4.8"
2323
private val slf4jVersion = "1.7.25"
2424
private val jacksonVersion = "2.13.0"

0 commit comments

Comments
 (0)