Add a new non-convenience version of push_codepoint.
[ira/wip.git] / lib / util / charset / charset.h
1 /* 
2    Unix SMB/CIFS implementation.
3    charset defines
4    Copyright (C) Andrew Tridgell 2001
5    Copyright (C) Jelmer Vernooij 2002
6    
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 /* This is a public header file that is installed as part of Samba. 
22  * If you remove any functions or change their signature, update 
23  * the so version number. */
24
25 #ifndef __CHARSET_H__
26 #define __CHARSET_H__
27
28 #include <talloc.h>
29
30 /* this defines the charset types used in samba */
31 typedef enum {CH_UTF16LE=0, CH_UTF16=0, CH_UNIX, CH_DISPLAY, CH_DOS, CH_UTF8, CH_UTF16BE, CH_UTF16MUNGED} charset_t;
32
33 #define NUM_CHARSETS 7
34
35 /*
36  * SMB UCS2 (16-bit unicode) internal type.
37  * smb_ucs2_t is *always* in little endian format.
38  */
39
40 typedef uint16_t smb_ucs2_t;
41
42 /*
43  *   for each charset we have a function that pulls from that charset to
44  *     a ucs2 buffer, and a function that pushes to a ucs2 buffer
45  *     */
46
47 struct charset_functions {
48         const char *name;
49         size_t (*pull)(void *, const char **inbuf, size_t *inbytesleft,
50                                    char **outbuf, size_t *outbytesleft);
51         size_t (*push)(void *, const char **inbuf, size_t *inbytesleft,
52                                    char **outbuf, size_t *outbytesleft);
53         struct charset_functions *prev, *next;
54 };
55
56 /* this type is used for manipulating unicode codepoints */
57 typedef uint32_t codepoint_t;
58
59 #define INVALID_CODEPOINT ((codepoint_t)-1)
60
61 /*
62  * This is auxiliary struct used by source/script/gen-8-bit-gap.sh script
63  * during generation of an encoding table for charset module
64  *     */
65
66 struct charset_gap_table {
67   uint16_t start;
68   uint16_t end;
69   int32_t idx;
70 };
71
72
73 /* generic iconv conversion structure */
74 typedef struct smb_iconv_s {
75         size_t (*direct)(void *cd, const char **inbuf, size_t *inbytesleft,
76                          char **outbuf, size_t *outbytesleft);
77         size_t (*pull)(void *cd, const char **inbuf, size_t *inbytesleft,
78                        char **outbuf, size_t *outbytesleft);
79         size_t (*push)(void *cd, const char **inbuf, size_t *inbytesleft,
80                        char **outbuf, size_t *outbytesleft);
81         void *cd_direct, *cd_pull, *cd_push;
82         char *from_name, *to_name;
83 } *smb_iconv_t;
84
85 /* string manipulation flags */
86 #define STR_TERMINATE 1
87 #define STR_UPPER 2
88 #define STR_ASCII 4
89 #define STR_UNICODE 8
90 #define STR_NOALIGN 16
91 #define STR_NO_RANGE_CHECK 32
92 #define STR_LEN8BIT 64
93 #define STR_TERMINATE_ASCII 128 /* only terminate if ascii */
94 #define STR_LEN_NOTERM 256 /* the length field is the unterminated length */
95
96 struct loadparm_context;
97 struct smb_iconv_convenience;
98
99 /* replace some string functions with multi-byte
100    versions */
101 #define strlower(s) strlower_m(s)
102 #define strupper(s) strupper_m(s)
103
104 char *strchr_m(const char *s, char c);
105 size_t strlen_m_term(const char *s);
106 size_t strlen_m_term_null(const char *s);
107 size_t strlen_m(const char *s);
108 char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength);
109 void string_replace_m(char *s, char oldc, char newc);
110 bool strcsequal_m(const char *s1,const char *s2);
111 bool strequal_m(const char *s1, const char *s2);
112 int strncasecmp_m(const char *s1, const char *s2, size_t n);
113 bool next_token(const char **ptr,char *buff, const char *sep, size_t bufsize);
114 int strcasecmp_m(const char *s1, const char *s2);
115 size_t count_chars_m(const char *s, char c);
116 void strupper_m(char *s);
117 void strlower_m(char *s);
118 char *strupper_talloc(TALLOC_CTX *ctx, const char *src);
119 char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src);
120 char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n);
121 char *strlower_talloc(TALLOC_CTX *ctx, const char *src);
122 bool strhasupper(const char *string);
123 bool strhaslower(const char *string);
124 char *strrchr_m(const char *s, char c);
125 char *strchr_m(const char *s, char c);
126
127 bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size);
128 bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size);
129 bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size);
130 bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size);
131 bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src, size_t *converted_size);
132 bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size);
133 ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags);
134 ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags);
135
136 bool convert_string_talloc(TALLOC_CTX *ctx, 
137                                        charset_t from, charset_t to, 
138                                        void const *src, size_t srclen, 
139                                        void *dest, size_t *converted_size, 
140                                            bool allow_badcharcnv);
141
142 size_t convert_string(charset_t from, charset_t to,
143                                 void const *src, size_t srclen, 
144                                 void *dest, size_t destlen, bool allow_badcharcnv);
145
146 ssize_t iconv_talloc(TALLOC_CTX *mem_ctx, 
147                                        smb_iconv_t cd,
148                                        void const *src, size_t srclen, 
149                                        void *dest);
150
151 extern struct smb_iconv_convenience *global_iconv_convenience;
152
153 codepoint_t next_codepoint(const char *str, size_t *size);
154 ssize_t push_codepoint(char *str, codepoint_t c);
155
156 /* codepoints */
157 codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic, 
158                             const char *str, size_t *size);
159 ssize_t push_codepoint_convenience(struct smb_iconv_convenience *ic, 
160                                 char *str, codepoint_t c);
161 codepoint_t toupper_m(codepoint_t val);
162 codepoint_t tolower_m(codepoint_t val);
163 int codepoint_cmpi(codepoint_t c1, codepoint_t c2);
164
165 /* Iconv convenience functions */
166 struct smb_iconv_convenience *smb_iconv_convenience_init(TALLOC_CTX *mem_ctx,
167                                                          const char *dos_charset,
168                                                          const char *unix_charset,
169                                                          bool native_iconv);
170
171 bool convert_string_convenience(struct smb_iconv_convenience *ic,
172                                 charset_t from, charset_t to,
173                                 void const *src, size_t srclen, 
174                                 void *dest, size_t destlen, size_t *converted_size,
175                                 bool allow_badcharcnv);
176 bool convert_string_talloc_convenience(TALLOC_CTX *ctx, 
177                                        struct smb_iconv_convenience *ic, 
178                                        charset_t from, charset_t to, 
179                                        void const *src, size_t srclen, 
180                                        void *dest, size_t *converted_size, bool allow_badcharcnv);
181 /* iconv */
182 smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode);
183 int smb_iconv_close(smb_iconv_t cd);
184 size_t smb_iconv(smb_iconv_t cd, 
185                  const char **inbuf, size_t *inbytesleft,
186                  char **outbuf, size_t *outbytesleft);
187 smb_iconv_t smb_iconv_open_ex(TALLOC_CTX *mem_ctx, const char *tocode, 
188                               const char *fromcode, bool native_iconv);
189
190 void load_case_tables(void);
191 bool charset_register_backend(const void *_funcs);
192
193 /*
194  *   Define stub for charset module which implements 8-bit encoding with gaps.
195  *   Encoding tables for such module should be produced from glibc's CHARMAPs
196  *   using script source/script/gen-8bit-gap.sh
197  *   CHARSETNAME is CAPITALIZED charset name
198  *
199  *     */
200 #define SMB_GENERATE_CHARSET_MODULE_8_BIT_GAP(CHARSETNAME)                                      \
201 static size_t CHARSETNAME ## _push(void *cd, const char **inbuf, size_t *inbytesleft,                   \
202                          char **outbuf, size_t *outbytesleft)                                   \
203 {                                                                                               \
204         while (*inbytesleft >= 2 && *outbytesleft >= 1) {                                       \
205                 int i;                                                                          \
206                 int done = 0;                                                                   \
207                                                                                                 \
208                 uint16 ch = SVAL(*inbuf,0);                                                     \
209                                                                                                 \
210                 for (i=0; from_idx[i].start != 0xffff; i++) {                                   \
211                         if ((from_idx[i].start <= ch) && (from_idx[i].end >= ch)) {             \
212                                 ((unsigned char*)(*outbuf))[0] = from_ucs2[from_idx[i].idx+ch]; \
213                                 (*inbytesleft) -= 2;                                            \
214                                 (*outbytesleft) -= 1;                                           \
215                                 (*inbuf)  += 2;                                                 \
216                                 (*outbuf) += 1;                                                 \
217                                 done = 1;                                                       \
218                                 break;                                                          \
219                         }                                                                       \
220                 }                                                                               \
221                 if (!done) {                                                                    \
222                         errno = EINVAL;                                                         \
223                         return -1;                                                              \
224                 }                                                                               \
225                                                                                                 \
226         }                                                                                       \
227                                                                                                 \
228         if (*inbytesleft == 1) {                                                                \
229                 errno = EINVAL;                                                                 \
230                 return -1;                                                                      \
231         }                                                                                       \
232                                                                                                 \
233         if (*inbytesleft > 1) {                                                                 \
234                 errno = E2BIG;                                                                  \
235                 return -1;                                                                      \
236         }                                                                                       \
237                                                                                                 \
238         return 0;                                                                               \
239 }                                                                                               \
240                                                                                                 \
241 static size_t CHARSETNAME ## _pull(void *cd, const char **inbuf, size_t *inbytesleft,                           \
242                          char **outbuf, size_t *outbytesleft)                                   \
243 {                                                                                               \
244         while (*inbytesleft >= 1 && *outbytesleft >= 2) {                                       \
245                 *(uint16*)(*outbuf) = to_ucs2[((unsigned char*)(*inbuf))[0]];                   \
246                 (*inbytesleft)  -= 1;                                                           \
247                 (*outbytesleft) -= 2;                                                           \
248                 (*inbuf)  += 1;                                                                 \
249                 (*outbuf) += 2;                                                                 \
250         }                                                                                       \
251                                                                                                 \
252         if (*inbytesleft > 0) {                                                                 \
253                 errno = E2BIG;                                                                  \
254                 return -1;                                                                      \
255         }                                                                                       \
256                                                                                                 \
257         return 0;                                                                               \
258 }                                                                                               \
259                                                                                                 \
260 struct charset_functions CHARSETNAME ## _functions =                                            \
261                 {#CHARSETNAME, CHARSETNAME ## _pull, CHARSETNAME ## _push};                     \
262                                                                                                 \
263 NTSTATUS charset_ ## CHARSETNAME ## _init(void);                                                        \
264 NTSTATUS charset_ ## CHARSETNAME ## _init(void)                                                 \
265 {                                                                                               \
266         return smb_register_charset(& CHARSETNAME ## _functions);                               \
267 }                                                                                               \
268
269
270 #endif /* __CHARSET_H__ */