Skip to content

Commit

Permalink
Add CP1147 code page (France) which is same as CP297 with 9F is repla…
Browse files Browse the repository at this point in the history
…ced with the "€" (euro) character.
  • Loading branch information
yruslan committed Jan 15, 2025
1 parent 2f9309a commit 42cdebb
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 1 deletion.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1646,6 +1646,7 @@ The output looks like this:
| .option("ebcdic_code_page", "cp1143") | EBCDIC 1143 | Same as code page 278 with € at the position of the international currency symbol ¤. |
| .option("ebcdic_code_page", "cp1144") | EBCDIC 1144 | Same as code page 280 with € at the position of the international currency symbol ¤. |
| .option("ebcdic_code_page", "cp1145") | EBCDIC 1145 | Same as code page 284 with € at the position of the international currency symbol ¤. |
| .option("ebcdic_code_page", "cp1147") | EBCDIC 1147 | Same as code page 297 with € at the position of the international currency symbol ¤. |
| .option("ebcdic_code_page", "cp1148") | EBCDIC 1148 | Same as code page 500 with € at the position of the international currency symbol ¤. |
| .option("ebcdic_code_page", "cp1364") | EBCDIC 1364 | Double-byte code page CCSID-1364, Korean. |
| .option("ebcdic_code_page", "cp1388") | EBCDIC 1388 | Double-byte code page CCSID-1388, Simplified Chinese. |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ object CodePage extends Logging {
case "cp1144" => new CodePage1144
case "cp1145" => new CodePage1145
case "cp1146" => new CodePage1146
case "cp1147" => new CodePage1147
case "cp1148" => new CodePage1148
case "cp1364" => new CodePage1364
case "cp1388" => new CodePage1388
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright 2018 ABSA Group Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package za.co.absa.cobrix.cobol.parser.encoding.codepage

/**
* EBCDIC code page 1147 of France is the same as 297.
* with € at the position of the international currency symbol ¤.
*/
class CodePage1147 extends SingleByteCodePage(CodePage1147.ebcdicToAsciiMapping) {
override def codePageShortName: String = "cp1147"
}

object CodePage1147 {
val ebcdicToAsciiMapping: Array[Char] = {
import EbcdicNonPrintable._

/* This is the EBCDIC Code Page 1147 to ASCII conversion table
from https://en.wikibooks.org/wiki/Character_Encodings/Code_Tables/EBCDIC/EBCDIC_297 */
val ebcdic2ascii: Array[Char] = {
// Non-printable characters map used: http://www.pacsys.com/asciitab.htm
Array[Char](
c00, c01, c02, c03, spc, c09, spc, del, spc, spc, spc, c0b, c0c, ccr, c0e, c0f, // 0 - 15
c10, c11, c12, c13, spc, nel, c08, spc, c18, c19, spc, spc, c1c, c1d, c1e, c1f, // 16 - 31
spc, spc, spc, spc, spc, clf, c17, c1b, spc, spc, spc, spc, spc, c05, c06, c07, // 32 - 47
spc, spc, c16, spc, spc, spc, spc, c04, spc, spc, spc, spc, c14, c15, spc, c1a, // 48 - 63
' ', rsp, 'â', 'ä', '@', 'á', 'ã', 'å', bsh, 'ñ', '°', '.', '<', '(', '+', '!', // 64 - 79
'&', '{', 'ê', 'ë', '}', 'í', 'î', 'ï', 'ì', 'ß', '§', '$', '*', ')', ';', '^', // 80 - 95
'-', '/', 'Â', 'Ä', 'À', 'Á', 'Ã', 'Å', 'Ç', 'Ñ', 'ù', ',', '%', '_', '>', '?', // 96 - 111
'ø', 'É', 'Ê', 'Ë', 'È', 'Í', 'Î', 'Ï', 'Ì', 'µ', ':', '£', 'à', qts, '=', qtd, // 112 - 127
'Ø', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', '«', '»', 'ð', 'ý', 'þ', '±', // 128 - 143
'[', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 'ª', 'º', 'æ', '¸', 'Æ', '€', // 144 - 159
'`', '¨', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '¡', '¿', 'Ð', 'Ý', 'Þ', '®', // 160 - 175
'¢', '#', '¥', '·', '©', ']', '¶', '¼', '½', '¾', '¬', '|', '¯', '~', '´', '×', // 176 - 191
'é', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', shy, 'ô', 'ö', 'ò', 'ó', 'õ', // 192 - 207
'è', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', '¹', 'û', 'ü', '¦', 'ú', 'ÿ', // 208 - 223
'ç', '÷', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '²', 'Ô', 'Ö', 'Ò', 'Ó', 'Õ', // 224 - 239
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '³', 'Û', 'Ü', 'Ù', 'Ú', spc) // 240 - 255
}
ebcdic2ascii
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,17 @@ class StringDecodersSpec extends AnyWordSpec {
assert(actual == expected)
}

"decode a CP1147 string special characters" in {
val expected = " °$¨¢#¬¯ä£!üÜ^]ù§ß¢°Öéèæö¨åàÆØÅÄÉ€ "
val bytes = Array(0x40, 0x4A, 0x5B, 0xA1, 0xB0, 0xB1, 0xBA, 0xBC, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59,
0xB0, 0x4A, 0xEC, 0xC0, 0xD0, 0x9C, 0xCC, 0xA1, 0x47, 0x7C, 0x9E, 0x80, 0x67, 0x63,
0x71, 0x9F, 0x40).map(_.toByte)

val actual = decodeEbcdicString(bytes, KeepAll, new CodePage1147, improvedNullDetection = false)

assert(actual == expected)
}

"decode a CP1148 string special characters" in {
val expected = "âäàáãåçñ[.<(+!&éêëèíîïìß]$*);^-/ÂÄÀÁÃÅÇѦ,%_>?øÉÊËÈÍÎÏÌ`:#@'=\"Øabcdefghi«»ðýþ±°jklmnopqrªºæ¸Æ€µ~stuvwxyz¡¿ÐÝÞ®¢£¥·©§¶¼½¾¬|¯¨´×{ABCDEFGHI\u00ADôöòóõ}JKLMNOPQR¹ûüùúÿ\\÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ"
val bytes = Array(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,11 @@ class CodePageSingleByteSpec extends AnyFunSuite {
assert(codePage.codePageShortName == "cp1146")
}

test("Ensure codepage 'cp1147' gives the associated CodePage") {
val codePage = CodePage.getCodePageByName("cp1147")
assert(codePage.codePageShortName == "cp1147")
}

test("Ensure codepage 'cp1148' gives the associated CodePage") {
val codePage = CodePage.getCodePageByName("cp1148")
assert(codePage.codePageShortName == "cp1148")
Expand Down
2 changes: 1 addition & 1 deletion project/Dependencies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import sbt._

object Dependencies {
private val guavaVersion = "15.0"
private val scodecCoreVersion = "1.11.10"
private val scodecCoreVersion = "1.11.4"
private val antlrValue = "4.8"
private val slf4jVersion = "1.7.25"
private val jacksonVersion = "2.13.0"
Expand Down

0 comments on commit 42cdebb

Please sign in to comment.