Merge branch 'master' of ssh://git.samba.org/data/git/samba into wspp-schema
[vlendec/samba-autobuild/.git] / lib / util / charset / charset.h
1 /* 
2    Unix SMB/CIFS implementation.
3    charset defines
4    Copyright (C) Andrew Tridgell 2001
5    Copyright (C) Jelmer Vernooij 2002
6    
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 /* This is a public header file that is installed as part of Samba. 
22  * If you remove any functions or change their signature, update 
23  * the so version number. */
24
25 #ifndef __CHARSET_H__
26 #define __CHARSET_H__
27
28 #include <talloc.h>
29
30 /* this defines the charset types used in samba */
31 typedef enum {CH_UTF16LE=0, CH_UTF16=0, CH_UNIX, CH_DISPLAY, CH_DOS, CH_UTF8, CH_UTF16BE, CH_UTF16MUNGED} charset_t;
32
33 #define NUM_CHARSETS 7
34
35 /*
36  * SMB UCS2 (16-bit unicode) internal type.
37  * smb_ucs2_t is *always* in little endian format.
38  */
39
40 typedef uint16_t smb_ucs2_t;
41
42 /*
43  *   for each charset we have a function that pulls from that charset to
44  *     a ucs2 buffer, and a function that pushes to a ucs2 buffer
45  *     */
46
47 struct charset_functions {
48         const char *name;
49         size_t (*pull)(void *, const char **inbuf, size_t *inbytesleft,
50                                    char **outbuf, size_t *outbytesleft);
51         size_t (*push)(void *, const char **inbuf, size_t *inbytesleft,
52                                    char **outbuf, size_t *outbytesleft);
53         struct charset_functions *prev, *next;
54 };
55
56 /* this type is used for manipulating unicode codepoints */
57 typedef uint32_t codepoint_t;
58
59 #define INVALID_CODEPOINT ((codepoint_t)-1)
60
61 /*
62  * This is auxiliary struct used by source/script/gen-8-bit-gap.sh script
63  * during generation of an encoding table for charset module
64  *     */
65
66 struct charset_gap_table {
67   uint16_t start;
68   uint16_t end;
69   int32_t idx;
70 };
71
72
73 /* generic iconv conversion structure */
74 typedef struct smb_iconv_s {
75         size_t (*direct)(void *cd, const char **inbuf, size_t *inbytesleft,
76                          char **outbuf, size_t *outbytesleft);
77         size_t (*pull)(void *cd, const char **inbuf, size_t *inbytesleft,
78                        char **outbuf, size_t *outbytesleft);
79         size_t (*push)(void *cd, const char **inbuf, size_t *inbytesleft,
80                        char **outbuf, size_t *outbytesleft);
81         void *cd_direct, *cd_pull, *cd_push;
82         char *from_name, *to_name;
83 } *smb_iconv_t;
84
85 /* string manipulation flags */
86 #define STR_TERMINATE 1
87 #define STR_UPPER 2
88 #define STR_ASCII 4
89 #define STR_UNICODE 8
90 #define STR_NOALIGN 16
91 #define STR_NO_RANGE_CHECK 32
92 #define STR_LEN8BIT 64
93 #define STR_TERMINATE_ASCII 128 /* only terminate if ascii */
94 #define STR_LEN_NOTERM 256 /* the length field is the unterminated length */
95
96 struct loadparm_context;
97 struct smb_iconv_convenience;
98
99 /* replace some string functions with multi-byte
100    versions */
101 #define strlower(s) strlower_m(s)
102 #define strupper(s) strupper_m(s)
103
104 char *strchr_m(const char *s, char c);
105 size_t strlen_m_term(const char *s);
106 size_t strlen_m_term_null(const char *s);
107 size_t strlen_m(const char *s);
108 char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength);
109 void string_replace_m(char *s, char oldc, char newc);
110 bool strcsequal_m(const char *s1,const char *s2);
111 bool strequal_m(const char *s1, const char *s2);
112 int strncasecmp_m(const char *s1, const char *s2, size_t n);
113 bool next_token(const char **ptr,char *buff, const char *sep, size_t bufsize);
114 int strcasecmp_m(const char *s1, const char *s2);
115 size_t count_chars_m(const char *s, char c);
116 void strupper_m(char *s);
117 void strlower_m(char *s);
118 char *strupper_talloc(TALLOC_CTX *ctx, const char *src);
119 char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src);
120 char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n);
121 char *strlower_talloc(TALLOC_CTX *ctx, const char *src);
122 bool strhasupper(const char *string);
123 bool strhaslower(const char *string);
124 char *strrchr_m(const char *s, char c);
125 char *strchr_m(const char *s, char c);
126
127 bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size);
128 bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size);
129 bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size);
130 bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size);
131 bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src, size_t *converted_size);
132 bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size);
133 ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags);
134 ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags);
135
136 bool convert_string_talloc(TALLOC_CTX *ctx, 
137                                        charset_t from, charset_t to, 
138                                        void const *src, size_t srclen, 
139                                        void *dest, size_t *converted_size, 
140                                            bool allow_badcharcnv);
141
142 size_t convert_string(charset_t from, charset_t to,
143                                 void const *src, size_t srclen, 
144                                 void *dest, size_t destlen, bool allow_badcharcnv);
145
146 ssize_t iconv_talloc(TALLOC_CTX *mem_ctx, 
147                                        smb_iconv_t cd,
148                                        void const *src, size_t srclen, 
149                                        void *dest);
150
151 extern struct smb_iconv_convenience *global_iconv_convenience;
152
153 codepoint_t next_codepoint(const char *str, size_t *size);
154
155 /* codepoints */
156 codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic, 
157                             const char *str, size_t *size);
158 ssize_t push_codepoint(struct smb_iconv_convenience *ic, 
159                                 char *str, codepoint_t c);
160 codepoint_t toupper_m(codepoint_t val);
161 codepoint_t tolower_m(codepoint_t val);
162 int codepoint_cmpi(codepoint_t c1, codepoint_t c2);
163
164 /* Iconv convenience functions */
165 struct smb_iconv_convenience *smb_iconv_convenience_init(TALLOC_CTX *mem_ctx,
166                                                          const char *dos_charset,
167                                                          const char *unix_charset,
168                                                          bool native_iconv);
169
170 bool convert_string_convenience(struct smb_iconv_convenience *ic,
171                                 charset_t from, charset_t to,
172                                 void const *src, size_t srclen, 
173                                 void *dest, size_t destlen, size_t *converted_size,
174                                 bool allow_badcharcnv);
175 bool convert_string_talloc_convenience(TALLOC_CTX *ctx, 
176                                        struct smb_iconv_convenience *ic, 
177                                        charset_t from, charset_t to, 
178                                        void const *src, size_t srclen, 
179                                        void *dest, size_t *converted_size, bool allow_badcharcnv);
180 /* iconv */
181 smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode);
182 int smb_iconv_close(smb_iconv_t cd);
183 size_t smb_iconv(smb_iconv_t cd, 
184                  const char **inbuf, size_t *inbytesleft,
185                  char **outbuf, size_t *outbytesleft);
186 smb_iconv_t smb_iconv_open_ex(TALLOC_CTX *mem_ctx, const char *tocode, 
187                               const char *fromcode, bool native_iconv);
188
189 void load_case_tables(void);
190 bool charset_register_backend(const void *_funcs);
191
192 /*
193  *   Define stub for charset module which implements 8-bit encoding with gaps.
194  *   Encoding tables for such module should be produced from glibc's CHARMAPs
195  *   using script source/script/gen-8bit-gap.sh
196  *   CHARSETNAME is CAPITALIZED charset name
197  *
198  *     */
199 #define SMB_GENERATE_CHARSET_MODULE_8_BIT_GAP(CHARSETNAME)                                      \
200 static size_t CHARSETNAME ## _push(void *cd, const char **inbuf, size_t *inbytesleft,                   \
201                          char **outbuf, size_t *outbytesleft)                                   \
202 {                                                                                               \
203         while (*inbytesleft >= 2 && *outbytesleft >= 1) {                                       \
204                 int i;                                                                          \
205                 int done = 0;                                                                   \
206                                                                                                 \
207                 uint16 ch = SVAL(*inbuf,0);                                                     \
208                                                                                                 \
209                 for (i=0; from_idx[i].start != 0xffff; i++) {                                   \
210                         if ((from_idx[i].start <= ch) && (from_idx[i].end >= ch)) {             \
211                                 ((unsigned char*)(*outbuf))[0] = from_ucs2[from_idx[i].idx+ch]; \
212                                 (*inbytesleft) -= 2;                                            \
213                                 (*outbytesleft) -= 1;                                           \
214                                 (*inbuf)  += 2;                                                 \
215                                 (*outbuf) += 1;                                                 \
216                                 done = 1;                                                       \
217                                 break;                                                          \
218                         }                                                                       \
219                 }                                                                               \
220                 if (!done) {                                                                    \
221                         errno = EINVAL;                                                         \
222                         return -1;                                                              \
223                 }                                                                               \
224                                                                                                 \
225         }                                                                                       \
226                                                                                                 \
227         if (*inbytesleft == 1) {                                                                \
228                 errno = EINVAL;                                                                 \
229                 return -1;                                                                      \
230         }                                                                                       \
231                                                                                                 \
232         if (*inbytesleft > 1) {                                                                 \
233                 errno = E2BIG;                                                                  \
234                 return -1;                                                                      \
235         }                                                                                       \
236                                                                                                 \
237         return 0;                                                                               \
238 }                                                                                               \
239                                                                                                 \
240 static size_t CHARSETNAME ## _pull(void *cd, const char **inbuf, size_t *inbytesleft,                           \
241                          char **outbuf, size_t *outbytesleft)                                   \
242 {                                                                                               \
243         while (*inbytesleft >= 1 && *outbytesleft >= 2) {                                       \
244                 *(uint16*)(*outbuf) = to_ucs2[((unsigned char*)(*inbuf))[0]];                   \
245                 (*inbytesleft)  -= 1;                                                           \
246                 (*outbytesleft) -= 2;                                                           \
247                 (*inbuf)  += 1;                                                                 \
248                 (*outbuf) += 2;                                                                 \
249         }                                                                                       \
250                                                                                                 \
251         if (*inbytesleft > 0) {                                                                 \
252                 errno = E2BIG;                                                                  \
253                 return -1;                                                                      \
254         }                                                                                       \
255                                                                                                 \
256         return 0;                                                                               \
257 }                                                                                               \
258                                                                                                 \
259 struct charset_functions CHARSETNAME ## _functions =                                            \
260                 {#CHARSETNAME, CHARSETNAME ## _pull, CHARSETNAME ## _push};                     \
261                                                                                                 \
262 NTSTATUS charset_ ## CHARSETNAME ## _init(void);                                                        \
263 NTSTATUS charset_ ## CHARSETNAME ## _init(void)                                                 \
264 {                                                                                               \
265         return smb_register_charset(& CHARSETNAME ## _functions);                               \
266 }                                                                                               \
267
268
269 #endif /* __CHARSET_H__ */