2 Unix SMB/Netbios implementation.
5 Copyright (C) Andrew Tridgell 1992-1998
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 Adding for Japanese language by <fujita@ainix.isac.co.jp> 1994.9.5
22 and extend coding system to EUC/SJIS/JIS/HEX at 1994.10.11
23 and add all jis codes sequence type at 1995.8.16
24 Notes: Hexadecimal code by <ohki@gssm.otuka.tsukuba.ac.jp>
31 * Function pointers that get overridden when multi-byte code pages
35 char *(*multibyte_strchr)(char *, int ) = (char *(*)(char *, int )) strchr;
36 char *(*multibyte_strrchr)(char *, int ) = (char *(*)(char *, int )) strrchr;
37 char *(*multibyte_strstr)(char *, char *) = (char *(*)(char *, char *)) strstr;
38 char *(*multibyte_strtok)(char *, char *) = (char *(*)(char *, char *)) strtok;
41 * Kanji is treated differently here due to historical accident of
42 * it being the first non-English codepage added to Samba.
43 * The define 'KANJI' is being overloaded to mean 'use kanji codepage
44 * by default' and also 'this is the filename-to-disk conversion
45 * method to use'. This really should be removed and all control
46 * over this left in the smb.conf parameters 'client codepage'
47 * and 'coding system'.
53 * Set the default conversion to be the functions in
57 static int not_multibyte_char(char);
59 char *(*_dos_to_unix)(char *, BOOL) = dos2unix_format;
60 char *(*_unix_to_dos)(char *, BOOL) = unix2dos_format;
61 int (*is_multibyte_char)(char) = not_multibyte_char;
66 * Set the default conversion to be the function
67 * sj_to_sj in this file.
70 static char *sj_to_sj(char *from, BOOL overwrite);
71 static int kanji_multibyte_char(char);
73 char *(*_dos_to_unix)(char *, BOOL) = sj_to_sj;
74 char *(*_unix_to_dos)(char *, BOOL) = sj_to_sj;
75 int (*is_multibyte_char)(char) = kanji_multibyte_char;
79 /* jis si/so sequence */
80 static char jis_kso = JIS_KSO;
81 static char jis_ksi = JIS_KSI;
82 static char hex_tag = HEXTAG;
84 /*******************************************************************
86 ********************************************************************/
87 /*******************************************************************
88 search token from S1 separated any char of S2
89 S1 contains SHIFT JIS chars.
90 ********************************************************************/
91 static char *sj_strtok(char *s1, char *s2)
93 static char *s = NULL;
101 for (q = s1; *s1; ) {
102 if (is_shift_jis (*s1)) {
104 } else if (is_kana (*s1)) {
107 char *p = strchr (s2, *s1);
126 /*******************************************************************
127 search string S2 from S1
128 S1 contains SHIFT JIS chars.
129 ********************************************************************/
130 static char *sj_strstr(char *s1, char *s2)
132 int len = strlen ((char *) s2);
137 if (strncmp (s1, s2, len) == 0)
140 if (is_shift_jis (*s1)) {
149 /*******************************************************************
150 Search char C from beginning of S.
151 S contains SHIFT JIS chars.
152 ********************************************************************/
153 static char *sj_strchr (char *s, int c)
158 if (is_shift_jis (*s)) {
167 /*******************************************************************
168 Search char C end of S.
169 S contains SHIFT JIS chars.
170 ********************************************************************/
171 static char *sj_strrchr(char *s, int c)
179 if (is_shift_jis (*s)) {
188 /*******************************************************************
189 Kanji multibyte char function.
190 *******************************************************************/
192 static int kanji_multibyte_char(char c)
194 if(is_shift_jis(c)) {
196 } else if (is_kana(c)) {
202 /*******************************************************************
203 Hangul (Korean - code page 949) functions
204 ********************************************************************/
205 /*******************************************************************
206 search token from S1 separated any char of S2
207 S1 contains hangul chars.
208 ********************************************************************/
209 static char *hangul_strtok(char *s1, char *s2)
211 static char *s = NULL;
219 for (q = s1; *s1; ) {
220 if (is_hangul (*s1)) {
223 char *p = strchr (s2, *s1);
242 /*******************************************************************
243 search string S2 from S1
244 S1 contains hangul chars.
245 ********************************************************************/
246 static char *hangul_strstr(char *s1, char *s2)
248 int len = strlen ((char *) s2);
253 if (strncmp (s1, s2, len) == 0)
256 if (is_hangul (*s1)) {
265 /*******************************************************************
266 Search char C from beginning of S.
267 S contains hangul chars.
268 ********************************************************************/
269 static char *hangul_strchr (char *s, int c)
274 if (is_hangul (*s)) {
283 /*******************************************************************
284 Search char C end of S.
285 S contains hangul chars.
286 ********************************************************************/
287 static char *hangul_strrchr(char *s, int c)
295 if (is_hangul (*s)) {
304 /*******************************************************************
305 Hangul multibyte char function.
306 *******************************************************************/
308 static int hangul_multibyte_char(char c)
316 /*******************************************************************
317 Big5 Traditional Chinese (code page 950) functions
318 ********************************************************************/
320 /*******************************************************************
321 search token from S1 separated any char of S2
322 S1 contains big5 chars.
323 ********************************************************************/
324 static char *big5_strtok(char *s1, char *s2)
326 static char *s = NULL;
334 for (q = s1; *s1; ) {
335 if (is_big5_c1 (*s1)) {
338 char *p = strchr (s2, *s1);
357 /*******************************************************************
358 search string S2 from S1
359 S1 contains big5 chars.
360 ********************************************************************/
361 static char *big5_strstr(char *s1, char *s2)
363 int len = strlen ((char *) s2);
368 if (strncmp (s1, s2, len) == 0)
371 if (is_big5_c1 (*s1)) {
380 /*******************************************************************
381 Search char C from beginning of S.
382 S contains big5 chars.
383 ********************************************************************/
384 static char *big5_strchr (char *s, int c)
389 if (is_big5_c1 (*s)) {
398 /*******************************************************************
399 Search char C end of S.
400 S contains big5 chars.
401 ********************************************************************/
402 static char *big5_strrchr(char *s, int c)
410 if (is_big5_c1 (*s)) {
419 /*******************************************************************
420 Big5 multibyte char function.
421 *******************************************************************/
423 static int big5_multibyte_char(char c)
431 /*******************************************************************
433 ********************************************************************/
434 /* convesion buffer */
435 static char cvtbuf[1024];
437 /*******************************************************************
439 ********************************************************************/
440 static int euc2sjis (int hi, int lo)
443 return ((hi / 2 + (hi < 0xdf ? 0x31 : 0x71)) << 8) |
444 (lo - (lo >= 0xe0 ? 0x60 : 0x61));
446 return ((hi / 2 + (hi < 0xdf ? 0x30 : 0x70)) << 8) | (lo - 2);
449 static int sjis2euc (int hi, int lo)
452 return ((hi * 2 - (hi >= 0xe0 ? 0xe0 : 0x60)) << 8) | (lo + 2);
454 return ((hi * 2 - (hi >= 0xe0 ? 0xe1 : 0x61)) << 8) |
455 (lo + (lo >= 0x7f ? 0x60 : 0x61));
458 /*******************************************************************
459 Convert FROM contain SHIFT JIS codes to EUC codes
460 return converted buffer
461 ********************************************************************/
462 static char *sj_to_euc(char *from, BOOL overwrite)
467 save = (char *) from;
468 for (out = cvtbuf; *from;) {
469 if (is_shift_jis (*from)) {
470 int code = sjis2euc ((int) from[0] & 0xff, (int) from[1] & 0xff);
471 *out++ = (code >> 8) & 0xff;
474 } else if (is_kana (*from)) {
483 strcpy((char *) save, (char *) cvtbuf);
484 return (char *) save;
490 /*******************************************************************
491 Convert FROM contain EUC codes to SHIFT JIS codes
492 return converted buffer
493 ********************************************************************/
494 static char *euc_to_sj(char *from, BOOL overwrite)
499 save = (char *) from;
500 for (out = cvtbuf; *from; ) {
501 if (is_euc (*from)) {
502 int code = euc2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
503 *out++ = (code >> 8) & 0xff;
506 } else if (is_euc_kana (*from)) {
515 strcpy(save, (char *) cvtbuf);
522 /*******************************************************************
523 JIS7,JIS8,JUNET <-> SJIS
524 ********************************************************************/
525 static int sjis2jis(int hi, int lo)
528 return ((hi * 2 - (hi >= 0xe0 ? 0x160 : 0xe0)) << 8) | (lo - 0x7e);
530 return ((hi * 2 - (hi >= 0xe0 ? 0x161 : 0xe1)) << 8) |
531 (lo - (lo >= 0x7f ? 0x20 : 0x1f));
534 static int jis2sjis(int hi, int lo)
537 return ((hi / 2 + (hi < 0x5f ? 0x71 : 0xb1)) << 8) |
538 (lo + (lo >= 0x60 ? 0x20 : 0x1f));
540 return ((hi / 2 + (hi < 0x5f ? 0x70 : 0xb0)) << 8) | (lo + 0x7e);
543 /*******************************************************************
544 Convert FROM contain JIS codes to SHIFT JIS codes
545 return converted buffer
546 ********************************************************************/
547 static char *jis8_to_sj(char *from, BOOL overwrite)
554 save = (char *) from;
555 for (out = cvtbuf; *from;) {
556 if (is_esc (*from)) {
557 if (is_so1 (from[1]) && is_so2 (from[2])) {
560 } else if (is_si1 (from[1]) && is_si2 (from[2])) {
563 } else { /* sequence error */
575 int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
576 *out++ = (code >> 8) & 0xff;
586 strcpy (save, (char *) cvtbuf);
593 /*******************************************************************
594 Convert FROM contain SHIFT JIS codes to JIS codes
595 return converted buffer
596 ********************************************************************/
597 static char *sj_to_jis8(char *from, BOOL overwrite)
604 save = (char *) from;
605 for (out = cvtbuf; *from; ) {
606 if (is_shift_jis (*from)) {
609 case _KJ_ROMAN: /* to KANJI */
616 code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff);
617 *out++ = (code >> 8) & 0xff;
622 case _KJ_KANJI: /* to ROMAN/KANA */
633 case _KJ_KANJI: /* to ROMAN/KANA */
642 strcpy (save, (char *) cvtbuf);
649 /*******************************************************************
650 Convert FROM contain 7 bits JIS codes to SHIFT JIS codes
651 return converted buffer
652 ********************************************************************/
653 static char *jis7_to_sj(char *from, BOOL overwrite)
660 save = (char *) from;
661 for (out = cvtbuf; *from;) {
662 if (is_esc (*from)) {
663 if (is_so1 (from[1]) && is_so2 (from[2])) {
666 } else if (is_si1 (from[1]) && is_si2 (from[2])) {
669 } else { /* sequence error */
672 } else if (is_so (*from)) {
673 shifted = _KJ_KANA; /* to KANA */
675 } else if (is_si (*from)) {
676 shifted = _KJ_ROMAN; /* to ROMAN */
687 int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
688 *out++ = (code >> 8) & 0xff;
694 *out++ = ((int) from[0]) + 0x80;
701 strcpy (save, (char *) cvtbuf);
708 /*******************************************************************
709 Convert FROM contain SHIFT JIS codes to 7 bits JIS codes
710 return converted buffer
711 ********************************************************************/
712 static char *sj_to_jis7(char *from, BOOL overwrite)
719 save = (char *) from;
720 for (out = cvtbuf; *from; ) {
721 if (is_shift_jis (*from)) {
725 *out++ = jis_si; /* to ROMAN and through down */
726 case _KJ_ROMAN: /* to KANJI */
733 code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff);
734 *out++ = (code >> 8) & 0xff;
737 } else if (is_kana (from[0])) {
739 case _KJ_KANJI: /* to ROMAN */
743 case _KJ_ROMAN: /* to KANA */
748 *out++ = ((int) *from++) - 0x80;
752 *out++ = jis_si; /* to ROMAN */
755 case _KJ_KANJI: /* to ROMAN */
767 *out++ = jis_si; /* to ROMAN */
769 case _KJ_KANJI: /* to ROMAN */
777 strcpy (save, (char *) cvtbuf);
784 /*******************************************************************
785 Convert FROM contain 7 bits JIS(junet) codes to SHIFT JIS codes
786 return converted buffer
787 ********************************************************************/
788 static char *junet_to_sj(char *from, BOOL overwrite)
795 save = (char *) from;
796 for (out = cvtbuf; *from;) {
797 if (is_esc (*from)) {
798 if (is_so1 (from[1]) && is_so2 (from[2])) {
801 } else if (is_si1 (from[1]) && is_si2 (from[2])) {
804 } else if (is_juk1(from[1]) && is_juk2 (from[2])) {
807 } else { /* sequence error */
819 int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
820 *out++ = (code >> 8) & 0xff;
826 *out++ = ((int) from[0]) + 0x80;
833 strcpy (save, (char *) cvtbuf);
840 /*******************************************************************
841 Convert FROM contain SHIFT JIS codes to 7 bits JIS(junet) codes
842 return converted buffer
843 ********************************************************************/
844 static char *sj_to_junet(char *from, BOOL overwrite)
851 save = (char *) from;
852 for (out = cvtbuf; *from; ) {
853 if (is_shift_jis (*from)) {
857 case _KJ_ROMAN: /* to KANJI */
864 code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff);
865 *out++ = (code >> 8) & 0xff;
868 } else if (is_kana (from[0])) {
870 case _KJ_KANJI: /* to ROMAN */
871 case _KJ_ROMAN: /* to KANA */
873 *out++ = junet_kana1;
874 *out++ = junet_kana2;
878 *out++ = ((int) *from++) - 0x80;
882 case _KJ_KANJI: /* to ROMAN */
894 case _KJ_KANJI: /* to ROMAN */
902 strcpy (save, (char *) cvtbuf);
909 /*******************************************************************
911 ********************************************************************/
912 /* ":xx" -> a byte */
913 static char *hex_to_sj(char *from, BOOL overwrite)
920 if (*sp == hex_tag && isxdigit (sp[1]) && isxdigit (sp[2])) {
921 *dp++ = (hex2bin (sp[1])<<4) | (hex2bin (sp[2]));
928 strcpy ((char *) from, (char *) cvtbuf);
929 return (char *) from;
935 /*******************************************************************
937 ********************************************************************/
938 static char *sj_to_hex(char *from, BOOL overwrite)
940 unsigned char *sp, *dp;
942 sp = (unsigned char*) from;
943 dp = (unsigned char*) cvtbuf;
947 *dp++ = bin2hex (((*sp)>>4)&0x0f);
948 *dp++ = bin2hex ((*sp)&0x0f);
950 } else if (is_shift_jis (*sp) && is_shift_jis2 (sp[1])) {
952 *dp++ = bin2hex (((*sp)>>4)&0x0f);
953 *dp++ = bin2hex ((*sp)&0x0f);
956 *dp++ = bin2hex (((*sp)>>4)&0x0f);
957 *dp++ = bin2hex ((*sp)&0x0f);
964 strcpy ((char *) from, (char *) cvtbuf);
965 return (char *) from;
971 /*******************************************************************
973 ********************************************************************/
974 /* ":xx" CAP -> a byte */
975 static char *cap_to_sj(char *from, BOOL overwrite)
983 * The only change between this and hex_to_sj is here. sj_to_cap only
984 * translates characters greater or equal to 0x80 - make sure that here
985 * we only do the reverse (that's why the strchr is used rather than
986 * isxdigit. Based on fix from ado@elsie.nci.nih.gov (Arthur David Olson).
988 if (*sp == hex_tag && (strchr ("89abcdefABCDEF", sp[1]) != NULL) && isxdigit (sp[2])) {
989 *dp++ = (hex2bin (sp[1])<<4) | (hex2bin (sp[2]));
996 strcpy ((char *) from, (char *) cvtbuf);
997 return (char *) from;
1003 /*******************************************************************
1004 kanji/kana -> ":xx" - CAP format.
1005 ********************************************************************/
1006 static char *sj_to_cap(char *from, BOOL overwrite)
1008 unsigned char *sp, *dp;
1010 sp = (unsigned char*) from;
1011 dp = (unsigned char*) cvtbuf;
1015 *dp++ = bin2hex (((*sp)>>4)&0x0f);
1016 *dp++ = bin2hex ((*sp)&0x0f);
1024 strcpy ((char *) from, (char *) cvtbuf);
1025 return (char *) from;
1031 /*******************************************************************
1033 ********************************************************************/
1034 static char *sj_to_sj(char *from, BOOL overwrite)
1037 strcpy (cvtbuf, (char *) from);
1040 return (char *) from;
1044 /************************************************************************
1046 _dos_to_unix _unix_to_dos
1047 ************************************************************************/
1049 static void setup_string_function(int codes)
1053 _dos_to_unix = dos2unix_format;
1054 _unix_to_dos = unix2dos_format;
1058 _dos_to_unix = sj_to_sj;
1059 _unix_to_dos = sj_to_sj;
1063 _dos_to_unix = sj_to_euc;
1064 _unix_to_dos = euc_to_sj;
1068 _dos_to_unix = sj_to_jis7;
1069 _unix_to_dos = jis7_to_sj;
1073 _dos_to_unix = sj_to_jis8;
1074 _unix_to_dos = jis8_to_sj;
1078 _dos_to_unix = sj_to_junet;
1079 _unix_to_dos = junet_to_sj;
1083 _dos_to_unix = sj_to_hex;
1084 _unix_to_dos = hex_to_sj;
1088 _dos_to_unix = sj_to_cap;
1089 _unix_to_dos = cap_to_sj;
1095 * Interpret coding system.
1097 void interpret_coding_system(char *str)
1099 int codes = UNKNOWN_CODE;
1101 if (strequal (str, "sjis")) {
1103 } else if (strequal (str, "euc")) {
1105 } else if (strequal (str, "cap")) {
1108 } else if (strequal (str, "hex")) {
1111 } else if (strncasecmp (str, "hex", 3)) {
1113 hex_tag = (str[3] ? str[3] : HEXTAG);
1114 } else if (strequal (str, "j8bb")) {
1118 } else if (strequal (str, "j8bj") || strequal (str, "jis8")) {
1122 } else if (strequal (str, "j8bh")) {
1126 } else if (strequal (str, "j8@b")) {
1130 } else if (strequal (str, "j8@j")) {
1134 } else if (strequal (str, "j8@h")) {
1138 } else if (strequal (str, "j7bb")) {
1142 } else if (strequal (str, "j7bj") || strequal (str, "jis7")) {
1146 } else if (strequal (str, "j7bh")) {
1150 } else if (strequal (str, "j7@b")) {
1154 } else if (strequal (str, "j7@j")) {
1158 } else if (strequal (str, "j7@h")) {
1162 } else if (strequal (str, "jubb")) {
1166 } else if (strequal (str, "jubj") || strequal (str, "junet")) {
1170 } else if (strequal (str, "jubh")) {
1174 } else if (strequal (str, "ju@b")) {
1178 } else if (strequal (str, "ju@j")) {
1182 } else if (strequal (str, "ju@h")) {
1187 setup_string_function (codes);
1190 /*******************************************************************
1191 Non multibyte char function.
1192 *******************************************************************/
1194 static int not_multibyte_char(char c)
1199 /*******************************************************************
1200 Setup the function pointers for the functions that are replaced
1201 when multi-byte codepages are used.
1203 The dos_to_unix and unix_to_dos function pointers are only
1204 replaced by setup_string_function called by interpret_coding_system
1206 *******************************************************************/
1208 void initialize_multibyte_vectors( int client_codepage)
1210 switch( client_codepage )
1212 case KANJI_CODEPAGE:
1213 multibyte_strchr = (char *(*)(char *, int )) sj_strchr;
1214 multibyte_strrchr = (char *(*)(char *, int )) sj_strrchr;
1215 multibyte_strstr = (char *(*)(char *, char *)) sj_strstr;
1216 multibyte_strtok = (char *(*)(char *, char *)) sj_strtok;
1217 is_multibyte_char = kanji_multibyte_char;
1219 case HANGUL_CODEPAGE:
1220 multibyte_strchr = (char *(*)(char *, int )) hangul_strchr;
1221 multibyte_strrchr = (char *(*)(char *, int )) hangul_strrchr;
1222 multibyte_strstr = (char *(*)(char *, char *)) hangul_strstr;
1223 multibyte_strtok = (char *(*)(char *, char *)) hangul_strtok;
1224 is_multibyte_char = hangul_multibyte_char;
1227 multibyte_strchr = (char *(*)(char *, int )) big5_strchr;
1228 multibyte_strrchr = (char *(*)(char *, int )) big5_strrchr;
1229 multibyte_strstr = (char *(*)(char *, char *)) big5_strstr;
1230 multibyte_strtok = (char *(*)(char *, char *)) big5_strtok;
1231 is_multibyte_char = big5_multibyte_char;
1234 multibyte_strchr = (char *(*)(char *, int )) strchr;
1235 multibyte_strrchr = (char *(*)(char *, int )) strrchr;
1236 multibyte_strstr = (char *(*)(char *, char *)) strstr;
1237 multibyte_strtok = (char *(*)(char *, char *)) strtok;
1238 is_multibyte_char = not_multibyte_char;