This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
+ the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
+/*
+ * We have to use strcasecmp here as the character conversions
+ * haven't been initialised yet. JRA.
+ */
+
+#undef strcasecmp
/**
* @file
* @sa Samba Developers Guide
**/
-static size_t ascii_pull(void *,char **, size_t *, char **, size_t *);
-static size_t ascii_push(void *,char **, size_t *, char **, size_t *);
-static size_t utf8_pull(void *,char **, size_t *, char **, size_t *);
-static size_t utf8_push(void *,char **, size_t *, char **, size_t *);
-static size_t ucs2hex_pull(void *,char **, size_t *, char **, size_t *);
-static size_t ucs2hex_push(void *,char **, size_t *, char **, size_t *);
-static size_t iconv_copy(void *,char **, size_t *, char **, size_t *);
+static_decl_charset;
+
+static size_t ascii_pull(void *,const char **, size_t *, char **, size_t *);
+static size_t ascii_push(void *,const char **, size_t *, char **, size_t *);
+static size_t latin1_push(void *,const char **, size_t *, char **, size_t *);
+static size_t utf8_pull(void *,const char **, size_t *, char **, size_t *);
+static size_t utf8_push(void *,const char **, size_t *, char **, size_t *);
+static size_t ucs2hex_pull(void *,const char **, size_t *, char **, size_t *);
+static size_t ucs2hex_push(void *,const char **, size_t *, char **, size_t *);
+static size_t iconv_copy(void *,const char **, size_t *, char **, size_t *);
+static size_t iconv_swab (void *,const char **, size_t *, char **, size_t *);
static struct charset_functions builtin_functions[] = {
+ /* windows is really neither UCS-2 not UTF-16 */
{"UCS-2LE", iconv_copy, iconv_copy},
+ {"UTF-16LE", iconv_copy, iconv_copy},
+ {"UCS-2BE", iconv_swab, iconv_swab},
+ {"UTF-16BE", iconv_swab, iconv_swab},
+
+ /* we include the UTF-8 alias to cope with differing locale settings */
{"UTF8", utf8_pull, utf8_push},
+ {"UTF-8", utf8_pull, utf8_push},
{"ASCII", ascii_pull, ascii_push},
+ {"646", ascii_pull, ascii_push},
+ {"ISO-8859-1", ascii_pull, latin1_push},
{"UCS2-HEX", ucs2hex_pull, ucs2hex_push},
{NULL, NULL, NULL}
};
return NULL;
}
-BOOL smb_register_charset(struct charset_functions *funcs)
+NTSTATUS smb_register_charset(struct charset_functions *funcs)
{
- struct charset_functions *c = charsets;
+ if (!funcs) {
+ return NT_STATUS_INVALID_PARAMETER;
+ }
DEBUG(5, ("Attempting to register new charset %s\n", funcs->name));
/* Check whether we already have this charset... */
-
if (find_charset_functions(funcs->name)) {
DEBUG(0, ("Duplicate charset %s, not registering\n", funcs->name));
- return False;
+ return NT_STATUS_OBJECT_NAME_COLLISION;
}
funcs->next = funcs->prev = NULL;
DEBUG(5, ("Registered charset %s\n", funcs->name));
DLIST_ADD(charsets, funcs);
- return True;
+ return NT_STATUS_OK;
}
-void lazy_initialize_iconv(void)
+static void lazy_initialize_iconv(void)
{
- static BOOL initialized;
+ static bool initialized;
int i;
if (!initialized) {
}
}
+#ifdef HAVE_NATIVE_ICONV
/* if there was an error then reset the internal state,
this ensures that we don't have a shift state remaining for
character sets like SJIS */
static size_t sys_iconv(void *cd,
- char **inbuf, size_t *inbytesleft,
+ const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
-#ifdef HAVE_NATIVE_ICONV
size_t ret = iconv((iconv_t)cd,
- inbuf, inbytesleft,
+ (void *)inbuf, inbytesleft,
outbuf, outbytesleft);
- if (ret == (size_t)-1) iconv(cd, NULL, NULL, NULL, NULL);
+ if (ret == (size_t)-1) {
+ int saved_errno = errno;
+ iconv(cd, NULL, NULL, NULL, NULL);
+ errno = saved_errno;
+ }
return ret;
-#else
- errno = EINVAL;
- return -1;
-#endif
}
+#endif
/**
* This is a simple portable iconv() implementaion.
/* in many cases we can go direct */
if (cd->direct) {
return cd->direct(cd->cd_direct,
- (char **)inbuf, inbytesleft, outbuf, outbytesleft);
+ inbuf, inbytesleft, outbuf, outbytesleft);
}
bufsize = sizeof(cvtbuf);
if (cd->pull(cd->cd_pull,
- (char **)inbuf, inbytesleft, &bufp, &bufsize) == -1
+ inbuf, inbytesleft, &bufp, &bufsize) == -1
&& errno != E2BIG) return -1;
bufp = cvtbuf;
bufsize = sizeof(cvtbuf) - bufsize;
if (cd->push(cd->cd_push,
- &bufp, &bufsize,
+ (const char **)&bufp, &bufsize,
outbuf, outbytesleft) == -1) return -1;
}
return 0;
}
+
+static bool is_utf16(const char *name)
+{
+ return strcasecmp(name, "UCS-2LE") == 0 ||
+ strcasecmp(name, "UTF-16LE") == 0;
+}
+
/*
simple iconv_open() wrapper
*/
from = charsets;
to = charsets;
- ret = (smb_iconv_t)malloc(sizeof(*ret));
+ ret = SMB_MALLOC_P(struct smb_iconv_s);
if (!ret) {
errno = ENOMEM;
return (smb_iconv_t)-1;
}
- memset(ret, 0, sizeof(*ret));
+ memset(ret, 0, sizeof(struct smb_iconv_s));
- ret->from_name = strdup(fromcode);
- ret->to_name = strdup(tocode);
+ ret->from_name = SMB_STRDUP(fromcode);
+ ret->to_name = SMB_STRDUP(tocode);
/* check for the simplest null conversion */
if (strcasecmp(fromcode, tocode) == 0) {
/* check if we can use iconv for this conversion */
#ifdef HAVE_NATIVE_ICONV
if (!ret->pull) {
- ret->cd_pull = iconv_open("UCS-2LE", fromcode);
+ ret->cd_pull = iconv_open("UTF-16LE", fromcode);
+ if (ret->cd_pull == (iconv_t)-1)
+ ret->cd_pull = iconv_open("UCS-2LE", fromcode);
if (ret->cd_pull != (iconv_t)-1)
ret->pull = sys_iconv;
}
if (!ret->push) {
- ret->cd_push = iconv_open(tocode, "UCS-2LE");
+ ret->cd_push = iconv_open(tocode, "UTF-16LE");
+ if (ret->cd_push == (iconv_t)-1)
+ ret->cd_push = iconv_open(tocode, "UCS-2LE");
if (ret->cd_push != (iconv_t)-1)
ret->push = sys_iconv;
}
#endif
/* check if there is a module available that can do this conversion */
- if (!ret->pull && smb_probe_module("charset", fromcode)) {
+ if (!ret->pull && NT_STATUS_IS_OK(smb_probe_module("charset", fromcode))) {
if(!(from = find_charset_functions(fromcode)))
DEBUG(0, ("Module %s doesn't provide charset %s!\n", fromcode, fromcode));
else
ret->pull = from->pull;
}
- if (!ret->push && smb_probe_module("charset", tocode)) {
+ if (!ret->push && NT_STATUS_IS_OK(smb_probe_module("charset", tocode))) {
if(!(to = find_charset_functions(tocode)))
DEBUG(0, ("Module %s doesn't provide charset %s!\n", tocode, tocode));
else
}
/* check for conversion to/from ucs2 */
- if (strcasecmp(fromcode, "UCS-2LE") == 0 && to) {
+ if (is_utf16(fromcode) && to) {
ret->direct = to->push;
ret->push = ret->pull = NULL;
return ret;
}
- if (strcasecmp(tocode, "UCS-2LE") == 0 && from) {
+ if (is_utf16(tocode) && from) {
ret->direct = from->pull;
ret->push = ret->pull = NULL;
return ret;
/* Check if we can do the conversion direct */
#ifdef HAVE_NATIVE_ICONV
- if (strcasecmp(fromcode, "UCS-2LE") == 0) {
+ if (is_utf16(fromcode)) {
ret->direct = sys_iconv;
ret->cd_direct = ret->cd_push;
ret->cd_push = NULL;
return ret;
}
- if (strcasecmp(tocode, "UCS-2LE") == 0) {
+ if (is_utf16(tocode)) {
ret->direct = sys_iconv;
ret->cd_direct = ret->cd_pull;
ret->cd_pull = NULL;
multi-byte character set support for english users
***********************************************************************/
-static size_t ascii_pull(void *cd, char **inbuf, size_t *inbytesleft,
+static size_t ascii_pull(void *cd, const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
while (*inbytesleft >= 1 && *outbytesleft >= 2) {
return 0;
}
-static size_t ascii_push(void *cd, char **inbuf, size_t *inbytesleft,
+static size_t ascii_push(void *cd, const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
int ir_count=0;
return ir_count;
}
+static size_t latin1_push(void *cd, const char **inbuf, size_t *inbytesleft,
+ char **outbuf, size_t *outbytesleft)
+{
+ int ir_count=0;
-static size_t ucs2hex_pull(void *cd, char **inbuf, size_t *inbytesleft,
+ while (*inbytesleft >= 2 && *outbytesleft >= 1) {
+ (*outbuf)[0] = (*inbuf)[0];
+ if ((*inbuf)[1]) ir_count++;
+ (*inbytesleft) -= 2;
+ (*outbytesleft) -= 1;
+ (*inbuf) += 2;
+ (*outbuf) += 1;
+ }
+
+ if (*inbytesleft == 1) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (*inbytesleft > 1) {
+ errno = E2BIG;
+ return -1;
+ }
+
+ return ir_count;
+}
+
+static size_t ucs2hex_pull(void *cd, const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
while (*inbytesleft >= 1 && *outbytesleft >= 2) {
return 0;
}
-static size_t ucs2hex_push(void *cd, char **inbuf, size_t *inbytesleft,
+static size_t ucs2hex_push(void *cd, const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
while (*inbytesleft >= 2 && *outbytesleft >= 1) {
return 0;
}
+static size_t iconv_swab(void *cd, const char **inbuf, size_t *inbytesleft,
+ char **outbuf, size_t *outbytesleft)
+{
+ int n;
+
+ n = MIN(*inbytesleft, *outbytesleft);
+
+ swab(*inbuf, *outbuf, (n&~1));
+ if (n&1) {
+ (*outbuf)[n-1] = 0;
+ }
-static size_t iconv_copy(void *cd, char **inbuf, size_t *inbytesleft,
+ (*inbytesleft) -= n;
+ (*outbytesleft) -= n;
+ (*inbuf) += n;
+ (*outbuf) += n;
+
+ if (*inbytesleft > 0) {
+ errno = E2BIG;
+ return -1;
+ }
+
+ return 0;
+}
+
+static size_t iconv_copy(void *cd, const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
int n;
return 0;
}
-static size_t utf8_pull(void *cd, char **inbuf, size_t *inbytesleft,
+static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
- while (*inbytesleft >= 1 && *outbytesleft >= 2) {
- unsigned char *c = (unsigned char *)*inbuf;
- unsigned char *uc = (unsigned char *)*outbuf;
- int len = 1;
+ size_t in_left=*inbytesleft, out_left=*outbytesleft;
+ const uint8 *c = (const uint8 *)*inbuf;
+ uint8 *uc = (uint8 *)*outbuf;
+
+ while (in_left >= 1 && out_left >= 2) {
+ unsigned int codepoint;
if ((c[0] & 0x80) == 0) {
uc[0] = c[0];
uc[1] = 0;
- } else if ((c[0] & 0xf0) == 0xe0) {
- if (*inbytesleft < 3) {
- DEBUG(0,("short utf8 char\n"));
- goto badseq;
+ c += 1;
+ in_left -= 1;
+ out_left -= 2;
+ uc += 2;
+ continue;
+ }
+
+ if ((c[0] & 0xe0) == 0xc0) {
+ if (in_left < 2 ||
+ (c[1] & 0xc0) != 0x80) {
+ errno = EILSEQ;
+ goto error;
}
- uc[1] = ((c[0]&0xF)<<4) | ((c[1]>>2)&0xF);
- uc[0] = (c[1]<<6) | (c[2]&0x3f);
- len = 3;
- } else if ((c[0] & 0xe0) == 0xc0) {
- if (*inbytesleft < 2) {
- DEBUG(0,("short utf8 char\n"));
- goto badseq;
+ codepoint = (c[1]&0x3f) | ((c[0]&0x1f)<<6);
+ if (codepoint < 0x80) {
+ /* don't accept UTF-8 characters that are not minimally packed */
+ errno = EILSEQ;
+ goto error;
}
- uc[1] = (c[0]>>2) & 0x7;
- uc[0] = (c[0]<<6) | (c[1]&0x3f);
- len = 2;
+ uc[1] = codepoint >> 8;
+ uc[0] = codepoint & 0xff;
+ c += 2;
+ in_left -= 2;
+ out_left -= 2;
+ uc += 2;
+ continue;
}
- (*inbuf) += len;
- (*inbytesleft) -= len;
- (*outbytesleft) -= 2;
- (*outbuf) += 2;
+ if ((c[0] & 0xf0) == 0xe0) {
+ if (in_left < 3 ||
+ (c[1] & 0xc0) != 0x80 ||
+ (c[2] & 0xc0) != 0x80) {
+ errno = EILSEQ;
+ goto error;
+ }
+ codepoint = (c[2]&0x3f) | ((c[1]&0x3f)<<6) | ((c[0]&0xf)<<12);
+ if (codepoint < 0x800) {
+ /* don't accept UTF-8 characters that are not minimally packed */
+ errno = EILSEQ;
+ goto error;
+ }
+ uc[1] = codepoint >> 8;
+ uc[0] = codepoint & 0xff;
+ c += 3;
+ in_left -= 3;
+ out_left -= 2;
+ uc += 2;
+ continue;
+ }
+
+ if ((c[0] & 0xf8) == 0xf0) {
+ if (in_left < 4 ||
+ (c[1] & 0xc0) != 0x80 ||
+ (c[2] & 0xc0) != 0x80 ||
+ (c[3] & 0xc0) != 0x80) {
+ errno = EILSEQ;
+ goto error;
+ }
+ codepoint =
+ (c[3]&0x3f) |
+ ((c[2]&0x3f)<<6) |
+ ((c[1]&0x3f)<<12) |
+ ((c[0]&0x7)<<18);
+ if (codepoint < 0x10000 || codepoint > 0x10ffff) {
+ /* don't accept UTF-8 characters that are not minimally packed */
+ errno = EILSEQ;
+ goto error;
+ }
+
+ codepoint -= 0x10000;
+
+ if (out_left < 4) {
+ errno = E2BIG;
+ goto error;
+ }
+
+ uc[0] = (codepoint>>10) & 0xFF;
+ uc[1] = (codepoint>>18) | 0xd8;
+ uc[2] = codepoint & 0xFF;
+ uc[3] = ((codepoint>>8) & 0x3) | 0xdc;
+ c += 4;
+ in_left -= 4;
+ out_left -= 4;
+ uc += 4;
+ continue;
+ }
+
+ /* we don't handle 5 byte sequences */
+ errno = EINVAL;
+ goto error;
}
- if (*inbytesleft > 0) {
+ if (in_left > 0) {
errno = E2BIG;
- return -1;
+ goto error;
}
-
+
+ *inbytesleft = in_left;
+ *outbytesleft = out_left;
+ *inbuf = (char *)c;
+ *outbuf = (char *)uc;
return 0;
-badseq:
- errno = EINVAL;
+error:
+ *inbytesleft = in_left;
+ *outbytesleft = out_left;
+ *inbuf = (char *)c;
+ *outbuf = (char *)uc;
return -1;
}
-static size_t utf8_push(void *cd, char **inbuf, size_t *inbytesleft,
- char **outbuf, size_t *outbytesleft)
+static size_t utf8_push(void *cd, const char **inbuf, size_t *inbytesleft,
+ char **outbuf, size_t *outbytesleft)
{
- while (*inbytesleft >= 2 && *outbytesleft >= 1) {
- unsigned char *c = (unsigned char *)*outbuf;
- unsigned char *uc = (unsigned char *)*inbuf;
- int len=1;
-
- if (uc[1] & 0xf8) {
- if (*outbytesleft < 3) {
- DEBUG(0,("short utf8 write\n"));
- goto toobig;
+ size_t in_left=*inbytesleft, out_left=*outbytesleft;
+ uint8 *c = (uint8 *)*outbuf;
+ const uint8 *uc = (const uint8 *)*inbuf;
+
+ while (in_left >= 2 && out_left >= 1) {
+ unsigned int codepoint;
+
+ if (uc[1] == 0 && !(uc[0] & 0x80)) {
+ /* simplest case */
+ c[0] = uc[0];
+ in_left -= 2;
+ out_left -= 1;
+ uc += 2;
+ c += 1;
+ continue;
+ }
+
+ if ((uc[1]&0xf8) == 0) {
+ /* next simplest case */
+ if (out_left < 2) {
+ errno = E2BIG;
+ goto error;
}
- c[0] = 0xe0 | (uc[1]>>4);
- c[1] = 0x80 | ((uc[1]&0xF)<<2) | (uc[0]>>6);
- c[2] = 0x80 | (uc[0]&0x3f);
- len = 3;
- } else if (uc[1] | (uc[0] & 0x80)) {
- if (*outbytesleft < 2) {
- DEBUG(0,("short utf8 write\n"));
- goto toobig;
+ c[0] = 0xc0 | (uc[0]>>6) | (uc[1]<<2);
+ c[1] = 0x80 | (uc[0] & 0x3f);
+ in_left -= 2;
+ out_left -= 2;
+ uc += 2;
+ c += 2;
+ continue;
+ }
+
+ if ((uc[1] & 0xfc) == 0xdc) {
+ /* its the second part of a 4 byte sequence. Illegal */
+ if (in_left < 4) {
+ errno = EINVAL;
+ } else {
+ errno = EILSEQ;
}
- c[0] = 0xc0 | (uc[1]<<2) | (uc[0]>>6);
- c[1] = 0x80 | (uc[0]&0x3f);
- len = 2;
- } else {
- c[0] = uc[0];
+ goto error;
}
+ if ((uc[1] & 0xfc) != 0xd8) {
+ codepoint = uc[0] | (uc[1]<<8);
+ if (out_left < 3) {
+ errno = E2BIG;
+ goto error;
+ }
+ c[0] = 0xe0 | (codepoint >> 12);
+ c[1] = 0x80 | ((codepoint >> 6) & 0x3f);
+ c[2] = 0x80 | (codepoint & 0x3f);
+
+ in_left -= 2;
+ out_left -= 3;
+ uc += 2;
+ c += 3;
+ continue;
+ }
- (*inbytesleft) -= 2;
- (*outbytesleft) -= len;
- (*inbuf) += 2;
- (*outbuf) += len;
+ /* its the first part of a 4 byte sequence */
+ if (in_left < 4) {
+ errno = EINVAL;
+ goto error;
+ }
+ if ((uc[3] & 0xfc) != 0xdc) {
+ errno = EILSEQ;
+ goto error;
+ }
+ codepoint = 0x10000 + (uc[2] | ((uc[3] & 0x3)<<8) |
+ (uc[0]<<10) | ((uc[1] & 0x3)<<18));
+
+ if (out_left < 4) {
+ errno = E2BIG;
+ goto error;
+ }
+ c[0] = 0xf0 | (codepoint >> 18);
+ c[1] = 0x80 | ((codepoint >> 12) & 0x3f);
+ c[2] = 0x80 | ((codepoint >> 6) & 0x3f);
+ c[3] = 0x80 | (codepoint & 0x3f);
+
+ in_left -= 4;
+ out_left -= 4;
+ uc += 4;
+ c += 4;
}
- if (*inbytesleft == 1) {
+ if (in_left == 1) {
errno = EINVAL;
- return -1;
+ goto error;
}
- if (*inbytesleft > 1) {
+ if (in_left > 1) {
errno = E2BIG;
- return -1;
+ goto error;
}
+
+ *inbytesleft = in_left;
+ *outbytesleft = out_left;
+ *inbuf = (char *)uc;
+ *outbuf = (char *)c;
return 0;
-toobig:
- errno = E2BIG;
+error:
+ *inbytesleft = in_left;
+ *outbytesleft = out_left;
+ *inbuf = (char *)uc;
+ *outbuf = (char *)c;
return -1;
}