Skip to content

Commit

Permalink
Add CP284 code page (Spain and Latin America).
Browse files Browse the repository at this point in the history
  • Loading branch information
yruslan committed Nov 6, 2024
1 parent e391a23 commit 6c49999
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 1 deletion.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1629,7 +1629,8 @@ The output looks like this:
| .option("ebcdic_code_page", "cp275") | EBCDIC 275 | Brazil. |
| .option("ebcdic_code_page", "cp277") | EBCDIC 277 | Denmark and Norway. |
| .option("ebcdic_code_page", "cp278") | EBCDIC 278 | Finland and Sweden. |
| .option("ebcdic_code_page", "cp278") | EBCDIC 280 | Italy. |
| .option("ebcdic_code_page", "cp280") | EBCDIC 280 | Italy. |
| .option("ebcdic_code_page", "cp284") | EBCDIC 284 | Spain and Latin America. |
| .option("ebcdic_code_page", "cp300") | EBCDIC 300 | Double-byte code page with Japanese and Latin characters. |
| .option("ebcdic_code_page", "cp500") | EBCDIC 500 | Belgium, Canada, Switzerland, International. |
| .option("ebcdic_code_page", "cp838") | EBCDIC 838 | Double-byte code page with Thai and Latin characters. |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ object CodePage extends Logging {
case "cp277" => new CodePage277
case "cp278" => new CodePage278
case "cp280" => new CodePage280
case "cp284" => new CodePage284
case "cp300" => new CodePage300
case "cp500" => new CodePage500
case "cp838" => new CodePage838
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Copyright 2018 ABSA Group Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package za.co.absa.cobrix.cobol.parser.encoding.codepage

/**
* EBCDIC code page 284 is used to represent characters of Spain and Latin America.
*/
class CodePage284 extends SingleByteCodePage(CodePage284.ebcdicToAsciiMapping) {
override def codePageShortName: String = "cp284"
}

object CodePage284 {
val ebcdicToAsciiMapping: Array[Char] = {
import EbcdicNonPrintable._

/* This is the EBCDIC Code Page 284 to ASCII conversion table
from https://en.wikibooks.org/wiki/Character_Encodings/Code_Tables/EBCDIC/EBCDIC_284 */
val ebcdic2ascii: Array[Char] = {
// Non-printable characters map used: http://www.pacsys.com/asciitab.htm
Array[Char](
c00, c01, c02, c03, spc, c09, spc, del, spc, spc, spc, c0b, c0c, ccr, c0e, c0f, // 0 - 15
c10, c11, c12, c13, spc, nel, c08, spc, c18, c19, spc, spc, c1c, c1d, c1e, c1f, // 16 - 31
spc, spc, spc, spc, spc, clf, c17, c1b, spc, spc, spc, spc, spc, c05, c06, c07, // 32 - 47
spc, spc, c16, spc, spc, spc, spc, c04, spc, spc, spc, spc, c14, c15, spc, c1a, // 48 - 63
' ', rsp, 'â', 'ä', 'à', 'á', 'ã', 'å', 'ç', '¦', '[', '.', '<', '(', '+', '|', // 64 - 79
'&', 'é', 'ê', 'ë', 'è', 'í', 'î', 'ï', 'ì', 'ß', ']', '$', '*', ')', ';', '¬', // 80 - 95
'-', '/', 'Â', 'Ä', 'À', 'Á', 'Ã', 'Å', 'Ç', '#', 'ñ', ',', '%', '_', '>', '?', // 96 - 111
'ø', 'É', 'Ê', 'Ë', 'È', 'Í', 'Î', 'Ï', 'Ì', '`', ':', 'Ñ', '@', qts, '=', qtd, // 112 - 127
'Ø', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', '«', '»', 'ð', 'ý', 'þ', '±', // 128 - 143
'°', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 'ª', 'º', 'æ', '¸', 'Æ', '¤', // 144 - 159
'µ', '¨', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '¡', '¿', 'Ð', 'Ý', 'Þ', '®', // 160 - 175
'¢', '£', '¥', '·', '©', '§', '¶', '¼', '½', '¾', '^', '!', '¯', '~', '´', '×', // 176 - 191
'{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', shy, 'ô', 'ö', 'ò', 'ó', 'õ', // 192 - 207
'}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', '¹', 'û', 'ü', 'ù', 'ú', 'ÿ', // 208 - 223
bsh, '÷', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '²', 'Ô', 'Ö', 'Ò', 'Ó', 'Õ', // 224 - 239
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '³', 'Û', 'Ü', 'Ù', 'Ú', spc) // 240 - 255
}
ebcdic2ascii
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,28 @@ class StringDecodersSpec extends AnyWordSpec {
assert(actual == expected)
}

"decode a CP280 string special characters" in {
val expected = " ä£!üÜ^@òéߢ°Öàèæöìå§ÆØÅÄɤ "
val bytes = Array(0x40, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59,
0xB0, 0x4A, 0xEC, 0xC0, 0xD0, 0x9C, 0xCC, 0xA1, 0x47, 0x7C, 0x9E, 0x80, 0x67, 0x63,
0x71, 0x9F, 0x40).map(_.toByte)

val actual = decodeEbcdicString(bytes, KeepAll, new CodePage280, improvedNullDetection = false)

assert(actual == expected)
}

"decode a CP284 string special characters" in {
val expected = " äÑ|üܬ§ñ]ߢ[Ö{}æö¨å@ÆØÅÄɤ "
val bytes = Array(0x40, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59,
0xB0, 0x4A, 0xEC, 0xC0, 0xD0, 0x9C, 0xCC, 0xA1, 0x47, 0x7C, 0x9E, 0x80, 0x67, 0x63,
0x71, 0x9F, 0x40).map(_.toByte)

val actual = decodeEbcdicString(bytes, KeepAll, new CodePage284, improvedNullDetection = false)

assert(actual == expected)
}

"decode a CP500 string special characters" in {
val expected = "âäàáãåçñ[.<(+!&éêëèíîïìß]$*);^-/ÂÄÀÁÃÅÇѦ,%_>?øÉÊËÈÍÎÏÌ`:#@'=\"Øabcdefghi«»ðýþ±°jklmnopqrªºæ¸Æ¤µ~stuvwxyz¡¿ÐÝÞ®¢£¥·©§¶¼½¾¬|¯¨´×{ABCDEFGHI\u00ADôöòóõ}JKLMNOPQR¹ûüùúÿ\\÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ"
val bytes = Array(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ class CodePageSingleByteSpec extends AnyFunSuite {
assert(codePage.codePageShortName == "cp280")
}

test("Ensure codepage 'cp284' gives the associated CodePage") {
val codePage = CodePage.getCodePageByName("cp284")
assert(codePage.codePageShortName == "cp284")
}

test("Ensure codepage 'cp300' gives the associated CodePage") {
val codePage = CodePage.getCodePageByName("cp300")
assert(codePage.codePageShortName == "cp300")
Expand Down

0 comments on commit 6c49999

Please sign in to comment.