2 Unix SMB/Netbios implementation.
5 Copyright (C) Andrew Tridgell 1992-1998
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 Adding for Japanese language by <fujita@ainix.isac.co.jp> 1994.9.5
22 and extend coding system to EUC/SJIS/JIS/HEX at 1994.10.11
23 and add all jis codes sequence type at 1995.8.16
24 Notes: Hexadecimal code by <ohki@gssm.otuka.tsukuba.ac.jp>
31 * Function pointers that get overridden when multi-byte code pages
35 char *(*multibyte_strchr)(char *, int ) = (char *(*)(char *, int )) strchr;
36 char *(*multibyte_strrchr)(char *, int ) = (char *(*)(char *, int )) strrchr;
37 char *(*multibyte_strstr)(char *, char *) = (char *(*)(char *, char *)) strstr;
38 char *(*multibyte_strtok)(char *, char *) = (char *(*)(char *, char *)) strtok;
41 * Kanji is treated differently here due to historical accident of
42 * it being the first non-English codepage added to Samba.
43 * The define 'KANJI' is being overloaded to mean 'use kanji codepage
44 * by default' and also 'this is the filename-to-disk conversion
45 * method to use'. This really should be removed and all control
46 * over this left in the smb.conf parameters 'client codepage'
47 * and 'coding system'.
53 * Set the default conversion to be the functions in
57 static int skip_non_multibyte_char(char);
58 static BOOL not_multibyte_char_1(char);
60 char *(*_dos_to_unix)(char *, BOOL) = dos2unix_format;
61 char *(*_unix_to_dos)(char *, BOOL) = unix2dos_format;
62 int (*_skip_multibyte_char)(char) = skip_non_multibyte_char;
63 BOOL (*is_multibyte_char_1)(char) = not_multibyte_char_1;
68 * Set the default conversion to be the function
69 * sj_to_sj in this file.
72 static char *sj_to_sj(char *from, BOOL overwrite);
73 static int skip_kanji_multibyte_char(char);
74 static BOOL is_kanji_multibyte_char_1(char);
76 char *(*_dos_to_unix)(char *, BOOL) = sj_to_sj;
77 char *(*_unix_to_dos)(char *, BOOL) = sj_to_sj;
78 int (*_skip_multibyte_char)(char) = skip_kanji_multibyte_char;
79 int (*is_multibyte_char_1)(char) = is_kanji_multibyte_char_1;
83 /* jis si/so sequence */
84 static char jis_kso = JIS_KSO;
85 static char jis_ksi = JIS_KSI;
86 static char hex_tag = HEXTAG;
88 /*******************************************************************
90 ********************************************************************/
92 /*******************************************************************
93 search token from S1 separated any char of S2
94 S1 contains SHIFT JIS chars.
95 ********************************************************************/
97 static char *sj_strtok(char *s1, char *s2)
99 static char *s = NULL;
107 for (q = s1; *s1; ) {
108 if (is_shift_jis (*s1)) {
110 } else if (is_kana (*s1)) {
113 char *p = strchr (s2, *s1);
132 /*******************************************************************
133 search string S2 from S1
134 S1 contains SHIFT JIS chars.
135 ********************************************************************/
137 static char *sj_strstr(char *s1, char *s2)
139 int len = strlen ((char *) s2);
144 if (strncmp (s1, s2, len) == 0)
147 if (is_shift_jis (*s1)) {
156 /*******************************************************************
157 Search char C from beginning of S.
158 S contains SHIFT JIS chars.
159 ********************************************************************/
161 static char *sj_strchr (char *s, int c)
166 if (is_shift_jis (*s)) {
175 /*******************************************************************
176 Search char C end of S.
177 S contains SHIFT JIS chars.
178 ********************************************************************/
180 static char *sj_strrchr(char *s, int c)
188 if (is_shift_jis (*s)) {
197 /*******************************************************************
198 Kanji multibyte char skip function.
199 *******************************************************************/
201 static int skip_kanji_multibyte_char(char c)
203 if(is_shift_jis(c)) {
205 } else if (is_kana(c)) {
211 /*******************************************************************
212 Kanji multibyte char identification.
213 *******************************************************************/
215 static BOOL is_kanji_multibyte_char_1(char c)
217 return is_shift_jis(c);
220 /*******************************************************************
221 The following functions are the only ones needed to do multibyte
222 support for Hangul, Big5 and Simplified Chinese. Most of the
223 real work for these codepages is done in the generic multibyte
224 functions. The only reason these functions are needed at all
225 is that the is_xxx(c) calls are really preprocessor macros.
226 ********************************************************************/
228 /*******************************************************************
229 Hangul (Korean - code page 949) function.
230 ********************************************************************/
232 static BOOL hangul_is_multibyte_char_1(char c)
237 /*******************************************************************
238 Big5 Traditional Chinese (code page 950) function.
239 ********************************************************************/
241 static BOOL big5_is_multibyte_char_1(char c)
243 return is_big5_c1(c);
246 /*******************************************************************
247 Simplified Chinese (code page 936) function.
248 ********************************************************************/
250 static BOOL simpch_is_multibyte_char_1(char c)
252 return is_simpch_c1(c);
255 /*******************************************************************
256 Generic multibyte functions - used by Hangul, Big5 and Simplified
258 ********************************************************************/
260 /*******************************************************************
261 search token from S1 separated any char of S2
262 S1 contains generic multibyte chars.
263 ********************************************************************/
265 static char *generic_multibyte_strtok(char *s1, char *s2)
267 static char *s = NULL;
275 for (q = s1; *s1; ) {
276 if ((*is_multibyte_char_1)(*s1)) {
279 char *p = strchr (s2, *s1);
298 /*******************************************************************
299 search string S2 from S1
300 S1 contains generic multibyte chars.
301 ********************************************************************/
303 static char *generic_multibyte_strstr(char *s1, char *s2)
305 int len = strlen ((char *) s2);
310 if (strncmp (s1, s2, len) == 0)
313 if ((*is_multibyte_char_1)(*s1)) {
322 /*******************************************************************
323 Search char C from beginning of S.
324 S contains generic multibyte chars.
325 ********************************************************************/
327 static char *generic_multibyte_strchr(char *s, int c)
332 if ((*is_multibyte_char_1)(*s)) {
341 /*******************************************************************
342 Search char C end of S.
343 S contains generic multibyte chars.
344 ********************************************************************/
346 static char *generic_multibyte_strrchr(char *s, int c)
354 if ((*is_multibyte_char_1)(*s)) {
363 /*******************************************************************
364 Generic multibyte char skip function.
365 *******************************************************************/
367 static int skip_generic_multibyte_char(char c)
369 if( (*is_multibyte_char_1)(c)) {
375 /*******************************************************************
377 ********************************************************************/
379 /* convesion buffer */
380 static char cvtbuf[1024];
382 /*******************************************************************
384 ********************************************************************/
386 static int euc2sjis (int hi, int lo)
389 return ((hi / 2 + (hi < 0xdf ? 0x31 : 0x71)) << 8) |
390 (lo - (lo >= 0xe0 ? 0x60 : 0x61));
392 return ((hi / 2 + (hi < 0xdf ? 0x30 : 0x70)) << 8) | (lo - 2);
395 static int sjis2euc (int hi, int lo)
398 return ((hi * 2 - (hi >= 0xe0 ? 0xe0 : 0x60)) << 8) | (lo + 2);
400 return ((hi * 2 - (hi >= 0xe0 ? 0xe1 : 0x61)) << 8) |
401 (lo + (lo >= 0x7f ? 0x60 : 0x61));
404 /*******************************************************************
405 Convert FROM contain SHIFT JIS codes to EUC codes
406 return converted buffer
407 ********************************************************************/
409 static char *sj_to_euc(char *from, BOOL overwrite)
414 save = (char *) from;
415 for (out = cvtbuf; *from;) {
416 if (is_shift_jis (*from)) {
417 int code = sjis2euc ((int) from[0] & 0xff, (int) from[1] & 0xff);
418 *out++ = (code >> 8) & 0xff;
421 } else if (is_kana (*from)) {
422 *out++ = (char)euc_kana;
430 pstrcpy((char *) save, (char *) cvtbuf);
431 return (char *) save;
437 /*******************************************************************
438 Convert FROM contain EUC codes to SHIFT JIS codes
439 return converted buffer
440 ********************************************************************/
442 static char *euc_to_sj(char *from, BOOL overwrite)
447 save = (char *) from;
448 for (out = cvtbuf; *from; ) {
449 if (is_euc (*from)) {
450 int code = euc2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
451 *out++ = (code >> 8) & 0xff;
454 } else if (is_euc_kana (*from)) {
463 pstrcpy(save, (char *) cvtbuf);
470 /*******************************************************************
471 JIS7,JIS8,JUNET <-> SJIS
472 ********************************************************************/
474 static int sjis2jis(int hi, int lo)
477 return ((hi * 2 - (hi >= 0xe0 ? 0x160 : 0xe0)) << 8) | (lo - 0x7e);
479 return ((hi * 2 - (hi >= 0xe0 ? 0x161 : 0xe1)) << 8) |
480 (lo - (lo >= 0x7f ? 0x20 : 0x1f));
483 static int jis2sjis(int hi, int lo)
486 return ((hi / 2 + (hi < 0x5f ? 0x71 : 0xb1)) << 8) |
487 (lo + (lo >= 0x60 ? 0x20 : 0x1f));
489 return ((hi / 2 + (hi < 0x5f ? 0x70 : 0xb0)) << 8) | (lo + 0x7e);
492 /*******************************************************************
493 Convert FROM contain JIS codes to SHIFT JIS codes
494 return converted buffer
495 ********************************************************************/
497 static char *jis8_to_sj(char *from, BOOL overwrite)
504 save = (char *) from;
505 for (out = cvtbuf; *from;) {
506 if (is_esc (*from)) {
507 if (is_so1 (from[1]) && is_so2 (from[2])) {
510 } else if (is_si1 (from[1]) && is_si2 (from[2])) {
513 } else { /* sequence error */
525 int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
526 *out++ = (code >> 8) & 0xff;
536 pstrcpy (save, (char *) cvtbuf);
543 /*******************************************************************
544 Convert FROM contain SHIFT JIS codes to JIS codes
545 return converted buffer
546 ********************************************************************/
548 static char *sj_to_jis8(char *from, BOOL overwrite)
555 save = (char *) from;
556 for (out = cvtbuf; *from; ) {
557 if (is_shift_jis (*from)) {
560 case _KJ_ROMAN: /* to KANJI */
567 code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff);
568 *out++ = (code >> 8) & 0xff;
573 case _KJ_KANJI: /* to ROMAN/KANA */
584 case _KJ_KANJI: /* to ROMAN/KANA */
593 pstrcpy (save, (char *) cvtbuf);
600 /*******************************************************************
601 Convert FROM contain 7 bits JIS codes to SHIFT JIS codes
602 return converted buffer
603 ********************************************************************/
604 static char *jis7_to_sj(char *from, BOOL overwrite)
611 save = (char *) from;
612 for (out = cvtbuf; *from;) {
613 if (is_esc (*from)) {
614 if (is_so1 (from[1]) && is_so2 (from[2])) {
617 } else if (is_si1 (from[1]) && is_si2 (from[2])) {
620 } else { /* sequence error */
623 } else if (is_so (*from)) {
624 shifted = _KJ_KANA; /* to KANA */
626 } else if (is_si (*from)) {
627 shifted = _KJ_ROMAN; /* to ROMAN */
638 int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
639 *out++ = (code >> 8) & 0xff;
645 *out++ = ((int) from[0]) + 0x80;
652 pstrcpy (save, (char *) cvtbuf);
659 /*******************************************************************
660 Convert FROM contain SHIFT JIS codes to 7 bits JIS codes
661 return converted buffer
662 ********************************************************************/
663 static char *sj_to_jis7(char *from, BOOL overwrite)
670 save = (char *) from;
671 for (out = cvtbuf; *from; ) {
672 if (is_shift_jis (*from)) {
676 *out++ = jis_si; /* to ROMAN and through down */
677 case _KJ_ROMAN: /* to KANJI */
684 code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff);
685 *out++ = (code >> 8) & 0xff;
688 } else if (is_kana (from[0])) {
690 case _KJ_KANJI: /* to ROMAN */
694 case _KJ_ROMAN: /* to KANA */
699 *out++ = ((int) *from++) - 0x80;
703 *out++ = jis_si; /* to ROMAN */
706 case _KJ_KANJI: /* to ROMAN */
718 *out++ = jis_si; /* to ROMAN */
720 case _KJ_KANJI: /* to ROMAN */
728 pstrcpy (save, (char *) cvtbuf);
735 /*******************************************************************
736 Convert FROM contain 7 bits JIS(junet) codes to SHIFT JIS codes
737 return converted buffer
738 ********************************************************************/
739 static char *junet_to_sj(char *from, BOOL overwrite)
746 save = (char *) from;
747 for (out = cvtbuf; *from;) {
748 if (is_esc (*from)) {
749 if (is_so1 (from[1]) && is_so2 (from[2])) {
752 } else if (is_si1 (from[1]) && is_si2 (from[2])) {
755 } else if (is_juk1(from[1]) && is_juk2 (from[2])) {
758 } else { /* sequence error */
770 int code = jis2sjis ((int) from[0] & 0xff, (int) from[1] & 0xff);
771 *out++ = (code >> 8) & 0xff;
777 *out++ = ((int) from[0]) + 0x80;
784 pstrcpy (save, (char *) cvtbuf);
791 /*******************************************************************
792 Convert FROM contain SHIFT JIS codes to 7 bits JIS(junet) codes
793 return converted buffer
794 ********************************************************************/
795 static char *sj_to_junet(char *from, BOOL overwrite)
802 save = (char *) from;
803 for (out = cvtbuf; *from; ) {
804 if (is_shift_jis (*from)) {
808 case _KJ_ROMAN: /* to KANJI */
815 code = sjis2jis ((int) from[0] & 0xff, (int) from[1] & 0xff);
816 *out++ = (code >> 8) & 0xff;
819 } else if (is_kana (from[0])) {
821 case _KJ_KANJI: /* to ROMAN */
822 case _KJ_ROMAN: /* to KANA */
824 *out++ = junet_kana1;
825 *out++ = junet_kana2;
829 *out++ = ((int) *from++) - 0x80;
833 case _KJ_KANJI: /* to ROMAN */
845 case _KJ_KANJI: /* to ROMAN */
853 pstrcpy (save, (char *) cvtbuf);
860 /*******************************************************************
862 ********************************************************************/
863 /* ":xx" -> a byte */
864 static char *hex_to_sj(char *from, BOOL overwrite)
871 if (*sp == hex_tag && isxdigit((int)sp[1]) && isxdigit((int)sp[2])) {
872 *dp++ = (hex2bin (sp[1])<<4) | (hex2bin (sp[2]));
879 pstrcpy ((char *) from, (char *) cvtbuf);
880 return (char *) from;
886 /*******************************************************************
888 ********************************************************************/
889 static char *sj_to_hex(char *from, BOOL overwrite)
891 unsigned char *sp, *dp;
893 sp = (unsigned char*) from;
894 dp = (unsigned char*) cvtbuf;
898 *dp++ = bin2hex (((*sp)>>4)&0x0f);
899 *dp++ = bin2hex ((*sp)&0x0f);
901 } else if (is_shift_jis (*sp) && is_shift_jis2 (sp[1])) {
903 *dp++ = bin2hex (((*sp)>>4)&0x0f);
904 *dp++ = bin2hex ((*sp)&0x0f);
907 *dp++ = bin2hex (((*sp)>>4)&0x0f);
908 *dp++ = bin2hex ((*sp)&0x0f);
915 pstrcpy ((char *) from, (char *) cvtbuf);
916 return (char *) from;
922 /*******************************************************************
924 ********************************************************************/
925 /* ":xx" CAP -> a byte */
926 static char *cap_to_sj(char *from, BOOL overwrite)
934 * The only change between this and hex_to_sj is here. sj_to_cap only
935 * translates characters greater or equal to 0x80 - make sure that here
936 * we only do the reverse (that's why the strchr is used rather than
937 * isxdigit. Based on fix from ado@elsie.nci.nih.gov (Arthur David Olson).
939 if (*sp == hex_tag && (strchr ("89abcdefABCDEF", sp[1]) != NULL) && isxdigit((int)sp[2])) {
940 *dp++ = (hex2bin (sp[1])<<4) | (hex2bin (sp[2]));
947 pstrcpy ((char *) from, (char *) cvtbuf);
948 return (char *) from;
954 /*******************************************************************
955 kanji/kana -> ":xx" - CAP format.
956 ********************************************************************/
957 static char *sj_to_cap(char *from, BOOL overwrite)
959 unsigned char *sp, *dp;
961 sp = (unsigned char*) from;
962 dp = (unsigned char*) cvtbuf;
966 *dp++ = bin2hex (((*sp)>>4)&0x0f);
967 *dp++ = bin2hex ((*sp)&0x0f);
975 pstrcpy ((char *) from, (char *) cvtbuf);
976 return (char *) from;
982 /*******************************************************************
984 ********************************************************************/
985 static char *sj_to_sj(char *from, BOOL overwrite)
988 pstrcpy (cvtbuf, (char *) from);
991 return (char *) from;
995 /************************************************************************
997 _dos_to_unix _unix_to_dos
998 ************************************************************************/
1000 static void setup_string_function(int codes)
1004 _dos_to_unix = dos2unix_format;
1005 _unix_to_dos = unix2dos_format;
1009 _dos_to_unix = sj_to_sj;
1010 _unix_to_dos = sj_to_sj;
1014 _dos_to_unix = sj_to_euc;
1015 _unix_to_dos = euc_to_sj;
1019 _dos_to_unix = sj_to_jis7;
1020 _unix_to_dos = jis7_to_sj;
1024 _dos_to_unix = sj_to_jis8;
1025 _unix_to_dos = jis8_to_sj;
1029 _dos_to_unix = sj_to_junet;
1030 _unix_to_dos = junet_to_sj;
1034 _dos_to_unix = sj_to_hex;
1035 _unix_to_dos = hex_to_sj;
1039 _dos_to_unix = sj_to_cap;
1040 _unix_to_dos = cap_to_sj;
1045 /************************************************************************
1046 Interpret coding system.
1047 ************************************************************************/
1049 void interpret_coding_system(char *str)
1051 int codes = UNKNOWN_CODE;
1053 if (strequal (str, "sjis")) {
1055 } else if (strequal (str, "euc")) {
1057 } else if (strequal (str, "cap")) {
1060 } else if (strequal (str, "hex")) {
1063 } else if (!strncasecmp (str, "hex", 3)) {
1065 hex_tag = (str[3] ? str[3] : HEXTAG);
1066 } else if (strequal (str, "j8bb")) {
1070 } else if (strequal (str, "j8bj") || strequal (str, "jis8")) {
1074 } else if (strequal (str, "j8bh")) {
1078 } else if (strequal (str, "j8@b")) {
1082 } else if (strequal (str, "j8@j")) {
1086 } else if (strequal (str, "j8@h")) {
1090 } else if (strequal (str, "j7bb")) {
1094 } else if (strequal (str, "j7bj") || strequal (str, "jis7")) {
1098 } else if (strequal (str, "j7bh")) {
1102 } else if (strequal (str, "j7@b")) {
1106 } else if (strequal (str, "j7@j")) {
1110 } else if (strequal (str, "j7@h")) {
1114 } else if (strequal (str, "jubb")) {
1118 } else if (strequal (str, "jubj") || strequal (str, "junet")) {
1122 } else if (strequal (str, "jubh")) {
1126 } else if (strequal (str, "ju@b")) {
1130 } else if (strequal (str, "ju@j")) {
1134 } else if (strequal (str, "ju@h")) {
1139 setup_string_function (codes);
1142 /*******************************************************************
1143 Non multibyte char function.
1144 *******************************************************************/
1146 static int skip_non_multibyte_char(char c)
1151 /*******************************************************************
1152 Function that always says a character isn't multibyte.
1153 *******************************************************************/
1155 static BOOL not_multibyte_char_1(char c)
1160 /*******************************************************************
1161 Function to determine if we are in a multibyte code page.
1162 *******************************************************************/
1164 static BOOL is_multibyte_codepage_val = False;
1166 BOOL is_multibyte_codepage(void)
1168 return is_multibyte_codepage_val;
1171 /*******************************************************************
1172 Setup the function pointers for the functions that are replaced
1173 when multi-byte codepages are used.
1175 The dos_to_unix and unix_to_dos function pointers are only
1176 replaced by setup_string_function called by interpret_coding_system
1178 *******************************************************************/
1180 void initialize_multibyte_vectors( int client_codepage)
1182 switch( client_codepage )
1184 case KANJI_CODEPAGE:
1185 multibyte_strchr = (char *(*)(char *, int )) sj_strchr;
1186 multibyte_strrchr = (char *(*)(char *, int )) sj_strrchr;
1187 multibyte_strstr = (char *(*)(char *, char *)) sj_strstr;
1188 multibyte_strtok = (char *(*)(char *, char *)) sj_strtok;
1189 _skip_multibyte_char = skip_kanji_multibyte_char;
1190 is_multibyte_char_1 = is_kanji_multibyte_char_1;
1191 is_multibyte_codepage_val = True;
1193 case HANGUL_CODEPAGE:
1194 multibyte_strchr = (char *(*)(char *, int )) generic_multibyte_strchr;
1195 multibyte_strrchr = (char *(*)(char *, int )) generic_multibyte_strrchr;
1196 multibyte_strstr = (char *(*)(char *, char *)) generic_multibyte_strstr;
1197 multibyte_strtok = (char *(*)(char *, char *)) generic_multibyte_strtok;
1198 _skip_multibyte_char = skip_generic_multibyte_char;
1199 is_multibyte_char_1 = hangul_is_multibyte_char_1;
1200 is_multibyte_codepage_val = True;
1203 multibyte_strchr = (char *(*)(char *, int )) generic_multibyte_strchr;
1204 multibyte_strrchr = (char *(*)(char *, int )) generic_multibyte_strrchr;
1205 multibyte_strstr = (char *(*)(char *, char *)) generic_multibyte_strstr;
1206 multibyte_strtok = (char *(*)(char *, char *)) generic_multibyte_strtok;
1207 _skip_multibyte_char = skip_generic_multibyte_char;
1208 is_multibyte_char_1 = big5_is_multibyte_char_1;
1209 is_multibyte_codepage_val = True;
1211 case SIMPLIFIED_CHINESE_CODEPAGE:
1212 multibyte_strchr = (char *(*)(char *, int )) generic_multibyte_strchr;
1213 multibyte_strrchr = (char *(*)(char *, int )) generic_multibyte_strrchr;
1214 multibyte_strstr = (char *(*)(char *, char *)) generic_multibyte_strstr;
1215 multibyte_strtok = (char *(*)(char *, char *)) generic_multibyte_strtok;
1216 _skip_multibyte_char = skip_generic_multibyte_char;
1217 is_multibyte_char_1 = simpch_is_multibyte_char_1;
1218 is_multibyte_codepage_val = True;
1221 * Single char size code page.
1224 multibyte_strchr = (char *(*)(char *, int )) strchr;
1225 multibyte_strrchr = (char *(*)(char *, int )) strrchr;
1226 multibyte_strstr = (char *(*)(char *, char *)) strstr;
1227 multibyte_strtok = (char *(*)(char *, char *)) strtok;
1228 _skip_multibyte_char = skip_non_multibyte_char;
1229 is_multibyte_char_1 = not_multibyte_char_1;
1230 is_multibyte_codepage_val = False;