2 * Routines for handling character sets
6 * Wireshark - Network traffic analyzer
7 * By Gerald Combs <gerald@wireshark.org>
8 * Copyright 1998 Gerald Combs
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version 2
13 * of the License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
34 * Wikipedia's "Character encoding" template, giving a pile of character encodings and
35 * Wikipedia pages for them:
37 * http://en.wikipedia.org/wiki/Template:Character_encoding
39 * Unicode character encoding model:
41 * http://www.unicode.org/reports/tr17/
43 * International Components for Unicode character set mapping tables:
45 * http://site.icu-project.org/charts/charset
47 * MSDN information on code pages:
49 * http://msdn.microsoft.com/en-us/library/dd317752(v=VS.85).aspx
51 * ASCII-based code pages, from IBM:
53 * http://www-01.ibm.com/software/globalization/cp/cp_cpgid.html
55 * EBCDIC code pages, from IBM:
57 * http://www-03.ibm.com/systems/i/software/globalization/codepages.html
60 /* ASCII/EBCDIC conversion tables from
61 * http://www.room42.com/store/computer_center/code_tables.shtml
64 static guint8 ASCII_translate_EBCDIC [ 256 ] = {
65 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
66 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
67 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
68 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
69 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D,
70 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
71 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8,
72 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
73 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8,
74 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
75 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
76 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
77 0x7D, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88,
78 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
79 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
80 0xA8, 0xA9, 0xC0, 0x6A, 0xD0, 0xA1, 0x4B,
81 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
82 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
83 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
84 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
85 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
86 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
87 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
88 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
89 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
90 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
91 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
92 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
93 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
94 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
95 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
96 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B
100 ASCII_to_EBCDIC(guint8 *buf, guint bytes)
107 for (i = 0; i < bytes; i++, bufptr++) {
108 *bufptr = ASCII_translate_EBCDIC[*bufptr];
113 ASCII_to_EBCDIC1(guint8 c)
115 return ASCII_translate_EBCDIC[c];
119 static guint8 EBCDIC_translate_ASCII [ 256 ] = {
120 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
121 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
122 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
123 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
124 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
125 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
126 0x2E, 0x2E, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
127 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x2E, 0x3F,
128 0x20, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
129 0x2E, 0x2E, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
130 0x26, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
131 0x2E, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
132 0x2D, 0x2F, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
133 0x2E, 0x7C, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
134 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
135 0x2E, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
136 0x2E, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
137 0x69, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
138 0x2E, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71,
139 0x72, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
140 0x2E, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
141 0x7A, 0x2E, 0x2E, 0x2E, 0x5B, 0x2E, 0x2E,
142 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
143 0x2E, 0x2E, 0x2E, 0x2E, 0x5D, 0x2E, 0x2E,
144 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
145 0x49, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
146 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51,
147 0x52, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
148 0x5C, 0x2E, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
149 0x5A, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
150 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
151 0x39, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E
155 EBCDIC_to_ASCII(guint8 *buf, guint bytes)
162 for (i = 0; i < bytes; i++, bufptr++) {
163 *bufptr = EBCDIC_translate_ASCII[*bufptr];
168 EBCDIC_to_ASCII1(guint8 c)
170 return EBCDIC_translate_ASCII[c];