2 Unix SMB/Netbios implementation.
5 Copyright (C) Andrew Tridgell 1992-1998
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 Adding for Japanese language by <fujita@ainix.isac.co.jp> 1994.9.5
22 and extend coding system to EUC/SJIS/JIS/HEX at 1994.10.11
23 and add all jis codes sequence type at 1995.8.16
24 Notes: Hexadecimal code by <ohki@gssm.otuka.tsukuba.ac.jp>
31 * Function pointers that get overridden when multi-byte code pages
35 char *(*multibyte_strchr)(char *, int ) = (char *(*)(char *, int )) strchr;
36 char *(*multibyte_strrchr)(char *, int ) = (char *(*)(char *, int )) strrchr;
37 char *(*multibyte_strstr)(char *, char *) = (char *(*)(char *, char *)) strstr;
38 char *(*multibyte_strtok)(char *, char *) = (char *(*)(char *, char *)) strtok;
41 * Kanji is treated differently here due to historical accident of
42 * it being the first non-English codepage added to Samba.
43 * The define 'KANJI' is being overloaded to mean 'use kanji codepage
44 * by default' and also 'this is the filename-to-disk conversion
45 * method to use'. This really should be removed and all control
46 * over this left in the smb.conf parameters 'client codepage'
47 * and 'coding system'.
53 * Set the default conversion to be the functions in
57 static int skip_non_multibyte_char(char);
58 static BOOL not_multibyte_char_1(char);
60 char *(*_dos_to_unix)(char *, BOOL) = dos2unix_format;
61 char *(*_unix_to_dos)(char *, BOOL) = unix2dos_format;
62 int (*_skip_multibyte_char)(char) = skip_non_multibyte_char;
63 BOOL (*is_multibyte_char_1)(char) = not_multibyte_char_1;
68 * Set the default conversion to be the function
69 * sj_to_sj in this file.
72 static char *sj_to_sj(char *from, BOOL overwrite);
73 static int skip_kanji_multibyte_char(char);
74 static BOOL is_kanji_multibyte_char_1(char);
76 char *(*_dos_to_unix)(char *, BOOL) = sj_to_sj;
77 char *(*_unix_to_dos)(char *, BOOL) = sj_to_sj;
78 int (*_skip_multibyte_char)(char) = skip_kanji_multibyte_char;
79 int (*is_multibyte_char_1)(char) = is_kanji_multibyte_char_1;
83 /* jis si/so sequence */
84 static char jis_kso = JIS_KSO;
85 static char jis_ksi = JIS_KSI;
86 static char hex_tag = HEXTAG;
88 /*******************************************************************
90 ********************************************************************/
91 /*******************************************************************
92 search token from S1 separated any char of S2
93 S1 contains SHIFT JIS chars.
94 ********************************************************************/
95 static char *sj_strtok(char *s1, char *s2)
97 static char *s = NULL;
105 for (q = s1; *s1; ) {
106 if (is_shift_jis (*s1)) {
108 } else if (is_kana (*s1)) {
111 char *p = strchr (s2, *s1);
130 /*******************************************************************
131 search string S2 from S1
132 S1 contains SHIFT JIS chars.
133 ********************************************************************/
134 static char *sj_strstr(char *s1, char *s2)
136 int len = strlen ((char *) s2);
141 if (strncmp (s1, s2, len) == 0)
144 if (is_shift_jis (*s1)) {
153 /*******************************************************************
154 Search char C from beginning of S.
155 S contains SHIFT JIS chars.
156 ********************************************************************/
157 static char *sj_strchr (char *s, int c)
162 if (is_shift_jis (*s)) {
171 /*******************************************************************
172 Search char C end of S.
173 S contains SHIFT JIS chars.
174 ********************************************************************/
175 static char *sj_strrchr(char *s, int c)
183 if (is_shift_jis (*s)) {
192 /*******************************************************************
193 Kanji multibyte char skip function.
194 *******************************************************************/
196 static int skip_kanji_multibyte_char(char c)
198 if(is_shift_jis(c)) {
200 } else if (is_kana(c)) {
206 /*******************************************************************
207 Kanji multibyte char identification.
208 *******************************************************************/
210 static BOOL is_kanji_multibyte_char_1(char c)
212 return is_shift_jis(c);
215 /*******************************************************************
216 The following functions are the only ones needed to do multibyte
217 support for Hangul, Big5 and Simplified Chinese. Most of the
218 real work for these codepages is done in the generic multibyte
219 functions. The only reason these functions are needed at all
220 is that the is_xxx(c) calls are really preprocessor macros.
221 ********************************************************************/
223 /*******************************************************************
224 Hangul (Korean - code page 949) function.
225 ********************************************************************/
227 static BOOL hangul_is_multibyte_char_1(char c)
232 /*******************************************************************
233 Big5 Traditional Chinese (code page 950) function.
234 ********************************************************************/
236 static BOOL big5_is_multibyte_char_1(char c)
238 return is_big5_c1(c);
241 /*******************************************************************
242 Simplified Chinese (code page 936) function.
243 ********************************************************************/
245 static BOOL simpch_is_multibyte_char_1(char c)
247 return is_simpch_c1(c);
250 /*******************************************************************
251 Generic multibyte functions - used by Hangul, Big5 and Simplified
253 ********************************************************************/
255 /*******************************************************************
256 search token from S1 separated any char of S2
257 S1 contains generic multibyte chars.
258 ********************************************************************/
260 static char *generic_multibyte_strtok(char *s1, char *s2)
262 static char *s = NULL;
270 for (q = s1; *s1; ) {
271 if ((*is_multibyte_char_1)(*s1)) {
274 char *p = strchr (s2, *s1);
293 /*******************************************************************
294 search string S2 from S1
295 S1 contains generic multibyte chars.
296 ********************************************************************/
298 static char *generic_multibyte_strstr(char *s1, char *s2)
300 int len = strlen ((char *) s2);
305 if (strncmp (s1, s2, len) == 0)
308 if ((*is_multibyte_char_1)(*s1)) {
317 /*******************************************************************
318 Search char C from beginning of S.
319 S contains generic multibyte chars.
320 ********************************************************************/
322 static char *generic_multibyte_strchr(char *s, int c)
327 if ((*is_multibyte_char_1)(*s)) {
336 /*******************************************************************
337 Search char C end of S.
338 S contains generic multibyte chars.
339 ********************************************************************/
341 static char *generic_multibyte_strrchr(char *s, int c)
349 if ((*is_multibyte_char_1)(*s)) {
358 /*******************************************************************
359 Generic multibyte char skip function.
360 *******************************************************************/
362 static int skip_generic_multibyte_char(char c)
364 if( (*is_multibyte_char_1)(c)) {
370 /*******************************************************************
372 ********************************************************************/
373 /* convesion buffer */
374 static char cvtbuf[1024];
376 /*******************************************************************
378 ********************************************************************/
379 static int euc2sjis (int hi, int lo)
382 return ((hi / 2 + (hi < 0xdf ? 0x31 : 0x71)) << 8) |
383 (lo - (lo >= 0xe0 ? 0x60 : 0x61));
385 return ((hi / 2 + (hi < 0xdf ? 0x30 : 0x70)) << 8) | (lo - 2);
388 static int sjis2euc (int hi, int lo)
391 return ((hi * 2 - (hi >= 0xe0 ? 0xe0 : 0x60)) << 8) | (lo + 2);
393 return ((hi * 2 - (hi >= 0xe0 ? 0xe1 : 0x61)) << 8) |
394 (lo + (lo >= 0x7f ? 0x60 : 0x61));
397 /*******************************************************************
398 Convert FROM contain SHIFT JIS codes to EUC codes
399 return converted buffer
400 ********************************************************************/
401 static char *sj_to_euc(char *from, BOOL overwrite)
406 save = (char *) from;
407 for (out = cvtbuf; *from;) {
408 if (is_shift_jis (*from)) {
409 int code = sjis2euc ((int) from[0] & 0xff, (int) from[1] & 0xff);
410 *out++ = (code >> 8) & 0xff;
413 } else if (is_kana (*from)) {
422 strcpy((char *) save, (char *) cvtbuf);
423 return (char *) save;
429 /*******************************************************************
430 Convert FROM contain EUC codes to SHIFT JIS codes
431 return converted buffer
432 ********************************************************************/
433 static char *euc_to_sj(char *from, BOOL overwrite)
438 save = (char *) from;
439 for (out = cvtbuf; *from; ) {
440 if (is_euc (*from)) {
441 int code = euc2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
442 *out++ = (code >> 8) & 0xff;
445 } else if (is_euc_kana (*from)) {
454 strcpy(save, (char *) cvtbuf);
461 /*******************************************************************
462 JIS7,JIS8,JUNET <-> SJIS
463 ********************************************************************/
464 static int sjis2jis(int hi, int lo)
467 return ((hi * 2 - (hi >= 0xe0 ? 0x160 : 0xe0)) << 8) | (lo - 0x7e);
469 return ((hi * 2 - (hi >= 0xe0 ? 0x161 : 0xe1)) << 8) |
470 (lo - (lo >= 0x7f ? 0x20 : 0x1f));
473 static int jis2sjis(int hi, int lo)
476 return ((hi / 2 + (hi < 0x5f ? 0x71 : 0xb1)) << 8) |
477 (lo + (lo >= 0x60 ? 0x20 : 0x1f));
479 return ((hi / 2 + (hi < 0x5f ? 0x70 : 0xb0)) << 8) | (lo + 0x7e);
482 /*******************************************************************
483 Convert FROM contain JIS codes to SHIFT JIS codes
484 return converted buffer
485 ********************************************************************/
486 static char *jis8_to_sj(char *from, BOOL overwrite)
493 save = (char *) from;
494 for (out = cvtbuf; *from;) {
495 if (is_esc (*from)) {
496 if (is_so1 (from[1]) && is_so2 (from[2])) {
499 } else if (is_si1 (from[1]) && is_si2 (from[2])) {
502 } else { /* sequence error */
514 int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
515 *out++ = (code >> 8) & 0xff;
525 strcpy (save, (char *) cvtbuf);
532 /*******************************************************************
533 Convert FROM contain SHIFT JIS codes to JIS codes
534 return converted buffer
535 ********************************************************************/
536 static char *sj_to_jis8(char *from, BOOL overwrite)
543 save = (char *) from;
544 for (out = cvtbuf; *from; ) {
545 if (is_shift_jis (*from)) {
548 case _KJ_ROMAN: /* to KANJI */
555 code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff);
556 *out++ = (code >> 8) & 0xff;
561 case _KJ_KANJI: /* to ROMAN/KANA */
572 case _KJ_KANJI: /* to ROMAN/KANA */
581 strcpy (save, (char *) cvtbuf);
588 /*******************************************************************
589 Convert FROM contain 7 bits JIS codes to SHIFT JIS codes
590 return converted buffer
591 ********************************************************************/
592 static char *jis7_to_sj(char *from, BOOL overwrite)
599 save = (char *) from;
600 for (out = cvtbuf; *from;) {
601 if (is_esc (*from)) {
602 if (is_so1 (from[1]) && is_so2 (from[2])) {
605 } else if (is_si1 (from[1]) && is_si2 (from[2])) {
608 } else { /* sequence error */
611 } else if (is_so (*from)) {
612 shifted = _KJ_KANA; /* to KANA */
614 } else if (is_si (*from)) {
615 shifted = _KJ_ROMAN; /* to ROMAN */
626 int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
627 *out++ = (code >> 8) & 0xff;
633 *out++ = ((int) from[0]) + 0x80;
640 strcpy (save, (char *) cvtbuf);
647 /*******************************************************************
648 Convert FROM contain SHIFT JIS codes to 7 bits JIS codes
649 return converted buffer
650 ********************************************************************/
651 static char *sj_to_jis7(char *from, BOOL overwrite)
658 save = (char *) from;
659 for (out = cvtbuf; *from; ) {
660 if (is_shift_jis (*from)) {
664 *out++ = jis_si; /* to ROMAN and through down */
665 case _KJ_ROMAN: /* to KANJI */
672 code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff);
673 *out++ = (code >> 8) & 0xff;
676 } else if (is_kana (from[0])) {
678 case _KJ_KANJI: /* to ROMAN */
682 case _KJ_ROMAN: /* to KANA */
687 *out++ = ((int) *from++) - 0x80;
691 *out++ = jis_si; /* to ROMAN */
694 case _KJ_KANJI: /* to ROMAN */
706 *out++ = jis_si; /* to ROMAN */
708 case _KJ_KANJI: /* to ROMAN */
716 strcpy (save, (char *) cvtbuf);
723 /*******************************************************************
724 Convert FROM contain 7 bits JIS(junet) codes to SHIFT JIS codes
725 return converted buffer
726 ********************************************************************/
727 static char *junet_to_sj(char *from, BOOL overwrite)
734 save = (char *) from;
735 for (out = cvtbuf; *from;) {
736 if (is_esc (*from)) {
737 if (is_so1 (from[1]) && is_so2 (from[2])) {
740 } else if (is_si1 (from[1]) && is_si2 (from[2])) {
743 } else if (is_juk1(from[1]) && is_juk2 (from[2])) {
746 } else { /* sequence error */
758 int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
759 *out++ = (code >> 8) & 0xff;
765 *out++ = ((int) from[0]) + 0x80;
772 strcpy (save, (char *) cvtbuf);
779 /*******************************************************************
780 Convert FROM contain SHIFT JIS codes to 7 bits JIS(junet) codes
781 return converted buffer
782 ********************************************************************/
783 static char *sj_to_junet(char *from, BOOL overwrite)
790 save = (char *) from;
791 for (out = cvtbuf; *from; ) {
792 if (is_shift_jis (*from)) {
796 case _KJ_ROMAN: /* to KANJI */
803 code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff);
804 *out++ = (code >> 8) & 0xff;
807 } else if (is_kana (from[0])) {
809 case _KJ_KANJI: /* to ROMAN */
810 case _KJ_ROMAN: /* to KANA */
812 *out++ = junet_kana1;
813 *out++ = junet_kana2;
817 *out++ = ((int) *from++) - 0x80;
821 case _KJ_KANJI: /* to ROMAN */
833 case _KJ_KANJI: /* to ROMAN */
841 strcpy (save, (char *) cvtbuf);
848 /*******************************************************************
850 ********************************************************************/
851 /* ":xx" -> a byte */
852 static char *hex_to_sj(char *from, BOOL overwrite)
859 if (*sp == hex_tag && isxdigit (sp[1]) && isxdigit (sp[2])) {
860 *dp++ = (hex2bin (sp[1])<<4) | (hex2bin (sp[2]));
867 strcpy ((char *) from, (char *) cvtbuf);
868 return (char *) from;
874 /*******************************************************************
876 ********************************************************************/
877 static char *sj_to_hex(char *from, BOOL overwrite)
879 unsigned char *sp, *dp;
881 sp = (unsigned char*) from;
882 dp = (unsigned char*) cvtbuf;
886 *dp++ = bin2hex (((*sp)>>4)&0x0f);
887 *dp++ = bin2hex ((*sp)&0x0f);
889 } else if (is_shift_jis (*sp) && is_shift_jis2 (sp[1])) {
891 *dp++ = bin2hex (((*sp)>>4)&0x0f);
892 *dp++ = bin2hex ((*sp)&0x0f);
895 *dp++ = bin2hex (((*sp)>>4)&0x0f);
896 *dp++ = bin2hex ((*sp)&0x0f);
903 strcpy ((char *) from, (char *) cvtbuf);
904 return (char *) from;
910 /*******************************************************************
912 ********************************************************************/
913 /* ":xx" CAP -> a byte */
914 static char *cap_to_sj(char *from, BOOL overwrite)
922 * The only change between this and hex_to_sj is here. sj_to_cap only
923 * translates characters greater or equal to 0x80 - make sure that here
924 * we only do the reverse (that's why the strchr is used rather than
925 * isxdigit. Based on fix from ado@elsie.nci.nih.gov (Arthur David Olson).
927 if (*sp == hex_tag && (strchr ("89abcdefABCDEF", sp[1]) != NULL) && isxdigit (sp[2])) {
928 *dp++ = (hex2bin (sp[1])<<4) | (hex2bin (sp[2]));
935 strcpy ((char *) from, (char *) cvtbuf);
936 return (char *) from;
942 /*******************************************************************
943 kanji/kana -> ":xx" - CAP format.
944 ********************************************************************/
945 static char *sj_to_cap(char *from, BOOL overwrite)
947 unsigned char *sp, *dp;
949 sp = (unsigned char*) from;
950 dp = (unsigned char*) cvtbuf;
954 *dp++ = bin2hex (((*sp)>>4)&0x0f);
955 *dp++ = bin2hex ((*sp)&0x0f);
963 strcpy ((char *) from, (char *) cvtbuf);
964 return (char *) from;
970 /*******************************************************************
972 ********************************************************************/
973 static char *sj_to_sj(char *from, BOOL overwrite)
976 strcpy (cvtbuf, (char *) from);
979 return (char *) from;
983 /************************************************************************
985 _dos_to_unix _unix_to_dos
986 ************************************************************************/
988 static void setup_string_function(int codes)
992 _dos_to_unix = dos2unix_format;
993 _unix_to_dos = unix2dos_format;
997 _dos_to_unix = sj_to_sj;
998 _unix_to_dos = sj_to_sj;
1002 _dos_to_unix = sj_to_euc;
1003 _unix_to_dos = euc_to_sj;
1007 _dos_to_unix = sj_to_jis7;
1008 _unix_to_dos = jis7_to_sj;
1012 _dos_to_unix = sj_to_jis8;
1013 _unix_to_dos = jis8_to_sj;
1017 _dos_to_unix = sj_to_junet;
1018 _unix_to_dos = junet_to_sj;
1022 _dos_to_unix = sj_to_hex;
1023 _unix_to_dos = hex_to_sj;
1027 _dos_to_unix = sj_to_cap;
1028 _unix_to_dos = cap_to_sj;
1033 /************************************************************************
1034 Interpret coding system.
1035 ************************************************************************/
1037 void interpret_coding_system(char *str)
1039 int codes = UNKNOWN_CODE;
1041 if (strequal (str, "sjis")) {
1043 } else if (strequal (str, "euc")) {
1045 } else if (strequal (str, "cap")) {
1048 } else if (strequal (str, "hex")) {
1051 } else if (!strncasecmp (str, "hex", 3)) {
1053 hex_tag = (str[3] ? str[3] : HEXTAG);
1054 } else if (strequal (str, "j8bb")) {
1058 } else if (strequal (str, "j8bj") || strequal (str, "jis8")) {
1062 } else if (strequal (str, "j8bh")) {
1066 } else if (strequal (str, "j8@b")) {
1070 } else if (strequal (str, "j8@j")) {
1074 } else if (strequal (str, "j8@h")) {
1078 } else if (strequal (str, "j7bb")) {
1082 } else if (strequal (str, "j7bj") || strequal (str, "jis7")) {
1086 } else if (strequal (str, "j7bh")) {
1090 } else if (strequal (str, "j7@b")) {
1094 } else if (strequal (str, "j7@j")) {
1098 } else if (strequal (str, "j7@h")) {
1102 } else if (strequal (str, "jubb")) {
1106 } else if (strequal (str, "jubj") || strequal (str, "junet")) {
1110 } else if (strequal (str, "jubh")) {
1114 } else if (strequal (str, "ju@b")) {
1118 } else if (strequal (str, "ju@j")) {
1122 } else if (strequal (str, "ju@h")) {
1127 setup_string_function (codes);
1130 /*******************************************************************
1131 Non multibyte char function.
1132 *******************************************************************/
1134 static int skip_non_multibyte_char(char c)
1139 /*******************************************************************
1140 Function that always says a character isn't multibyte.
1141 *******************************************************************/
1143 static BOOL not_multibyte_char_1(char c)
1148 /*******************************************************************
1149 Setup the function pointers for the functions that are replaced
1150 when multi-byte codepages are used.
1152 The dos_to_unix and unix_to_dos function pointers are only
1153 replaced by setup_string_function called by interpret_coding_system
1155 *******************************************************************/
1157 void initialize_multibyte_vectors( int client_codepage)
1159 switch( client_codepage )
1161 case KANJI_CODEPAGE:
1162 multibyte_strchr = (char *(*)(char *, int )) sj_strchr;
1163 multibyte_strrchr = (char *(*)(char *, int )) sj_strrchr;
1164 multibyte_strstr = (char *(*)(char *, char *)) sj_strstr;
1165 multibyte_strtok = (char *(*)(char *, char *)) sj_strtok;
1166 _skip_multibyte_char = skip_kanji_multibyte_char;
1167 is_multibyte_char_1 = is_kanji_multibyte_char_1;
1169 case HANGUL_CODEPAGE:
1170 multibyte_strchr = (char *(*)(char *, int )) generic_multibyte_strchr;
1171 multibyte_strrchr = (char *(*)(char *, int )) generic_multibyte_strrchr;
1172 multibyte_strstr = (char *(*)(char *, char *)) generic_multibyte_strstr;
1173 multibyte_strtok = (char *(*)(char *, char *)) generic_multibyte_strtok;
1174 _skip_multibyte_char = skip_generic_multibyte_char;
1175 is_multibyte_char_1 = hangul_is_multibyte_char_1;
1177 multibyte_strchr = (char *(*)(char *, int )) generic_multibyte_strchr;
1178 multibyte_strrchr = (char *(*)(char *, int )) generic_multibyte_strrchr;
1179 multibyte_strstr = (char *(*)(char *, char *)) generic_multibyte_strstr;
1180 multibyte_strtok = (char *(*)(char *, char *)) generic_multibyte_strtok;
1181 _skip_multibyte_char = skip_generic_multibyte_char;
1182 is_multibyte_char_1 = big5_is_multibyte_char_1;
1183 case SIMPLIFIED_CHINESE_CODEPAGE:
1184 multibyte_strchr = (char *(*)(char *, int )) generic_multibyte_strchr;
1185 multibyte_strrchr = (char *(*)(char *, int )) generic_multibyte_strrchr;
1186 multibyte_strstr = (char *(*)(char *, char *)) generic_multibyte_strstr;
1187 multibyte_strtok = (char *(*)(char *, char *)) generic_multibyte_strtok;
1188 _skip_multibyte_char = skip_generic_multibyte_char;
1189 is_multibyte_char_1 = simpch_is_multibyte_char_1;
1192 * Single char size code page.
1195 multibyte_strchr = (char *(*)(char *, int )) strchr;
1196 multibyte_strrchr = (char *(*)(char *, int )) strrchr;
1197 multibyte_strstr = (char *(*)(char *, char *)) strstr;
1198 multibyte_strtok = (char *(*)(char *, char *)) strtok;
1199 _skip_multibyte_char = skip_non_multibyte_char;
1200 is_multibyte_char_1 = not_multibyte_char_1;