This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
+ the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
* @sa Samba Developers Guide
**/
-static size_t ascii_pull(void *,char **, size_t *, char **, size_t *);
-static size_t ascii_push(void *,char **, size_t *, char **, size_t *);
-static size_t latin1_push(void *,char **, size_t *, char **, size_t *);
-static size_t utf8_pull(void *,char **, size_t *, char **, size_t *);
-static size_t utf8_push(void *,char **, size_t *, char **, size_t *);
-static size_t ucs2hex_pull(void *,char **, size_t *, char **, size_t *);
-static size_t ucs2hex_push(void *,char **, size_t *, char **, size_t *);
-static size_t iconv_copy(void *,char **, size_t *, char **, size_t *);
+static_decl_charset;
+
+static size_t ascii_pull(void *,const char **, size_t *, char **, size_t *);
+static size_t ascii_push(void *,const char **, size_t *, char **, size_t *);
+static size_t latin1_push(void *,const char **, size_t *, char **, size_t *);
+static size_t utf8_pull(void *,const char **, size_t *, char **, size_t *);
+static size_t utf8_push(void *,const char **, size_t *, char **, size_t *);
+static size_t ucs2hex_pull(void *,const char **, size_t *, char **, size_t *);
+static size_t ucs2hex_push(void *,const char **, size_t *, char **, size_t *);
+static size_t iconv_copy(void *,const char **, size_t *, char **, size_t *);
+static size_t iconv_swab (void *,const char **, size_t *, char **, size_t *);
static struct charset_functions builtin_functions[] = {
+ /* windows is really neither UCS-2 not UTF-16 */
{"UCS-2LE", iconv_copy, iconv_copy},
+ {"UTF-16LE", iconv_copy, iconv_copy},
+ {"UCS-2BE", iconv_swab, iconv_swab},
+ {"UTF-16BE", iconv_swab, iconv_swab},
+
+ /* we include the UTF-8 alias to cope with differing locale settings */
{"UTF8", utf8_pull, utf8_push},
+ {"UTF-8", utf8_pull, utf8_push},
{"ASCII", ascii_pull, ascii_push},
{"646", ascii_pull, ascii_push},
{"ISO-8859-1", ascii_pull, latin1_push},
return NT_STATUS_OK;
}
-void lazy_initialize_iconv(void)
+static void lazy_initialize_iconv(void)
{
- static BOOL initialized;
+ static bool initialized;
int i;
if (!initialized) {
}
}
+#ifdef HAVE_NATIVE_ICONV
/* if there was an error then reset the internal state,
this ensures that we don't have a shift state remaining for
character sets like SJIS */
static size_t sys_iconv(void *cd,
- char **inbuf, size_t *inbytesleft,
+ const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
-#ifdef HAVE_NATIVE_ICONV
size_t ret = iconv((iconv_t)cd,
- inbuf, inbytesleft,
+ (void *)inbuf, inbytesleft,
outbuf, outbytesleft);
- if (ret == (size_t)-1) iconv(cd, NULL, NULL, NULL, NULL);
+ if (ret == (size_t)-1) {
+ int saved_errno = errno;
+ iconv(cd, NULL, NULL, NULL, NULL);
+ errno = saved_errno;
+ }
return ret;
-#else
- errno = EINVAL;
- return -1;
-#endif
}
+#endif
/**
* This is a simple portable iconv() implementaion.
* enough that Samba works on systems that don't have iconv.
**/
size_t smb_iconv(smb_iconv_t cd,
- char **inbuf, size_t *inbytesleft,
+ const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
char cvtbuf[2048];
/* in many cases we can go direct */
if (cd->direct) {
return cd->direct(cd->cd_direct,
- (char **)inbuf, inbytesleft, outbuf, outbytesleft);
+ inbuf, inbytesleft, outbuf, outbytesleft);
}
bufsize = sizeof(cvtbuf);
if (cd->pull(cd->cd_pull,
- (char **)inbuf, inbytesleft, &bufp, &bufsize) == -1
+ inbuf, inbytesleft, &bufp, &bufsize) == -1
&& errno != E2BIG) return -1;
bufp = cvtbuf;
bufsize = sizeof(cvtbuf) - bufsize;
if (cd->push(cd->cd_push,
- &bufp, &bufsize,
+ (const char **)&bufp, &bufsize,
outbuf, outbytesleft) == -1) return -1;
}
return 0;
}
+
+static bool is_utf16(const char *name)
+{
+ return strcasecmp(name, "UCS-2LE") == 0 ||
+ strcasecmp(name, "UTF-16LE") == 0;
+}
+
/*
simple iconv_open() wrapper
*/
from = charsets;
to = charsets;
- ret = (smb_iconv_t)malloc(sizeof(*ret));
+ ret = SMB_MALLOC_P(struct smb_iconv_s);
if (!ret) {
errno = ENOMEM;
return (smb_iconv_t)-1;
}
- memset(ret, 0, sizeof(*ret));
+ memset(ret, 0, sizeof(struct smb_iconv_s));
- ret->from_name = strdup(fromcode);
- ret->to_name = strdup(tocode);
+ ret->from_name = SMB_STRDUP(fromcode);
+ ret->to_name = SMB_STRDUP(tocode);
/* check for the simplest null conversion */
if (strcasecmp(fromcode, tocode) == 0) {
/* check if we can use iconv for this conversion */
#ifdef HAVE_NATIVE_ICONV
if (!ret->pull) {
- ret->cd_pull = iconv_open("UCS-2LE", fromcode);
+ ret->cd_pull = iconv_open("UTF-16LE", fromcode);
+ if (ret->cd_pull == (iconv_t)-1)
+ ret->cd_pull = iconv_open("UCS-2LE", fromcode);
if (ret->cd_pull != (iconv_t)-1)
ret->pull = sys_iconv;
}
if (!ret->push) {
- ret->cd_push = iconv_open(tocode, "UCS-2LE");
+ ret->cd_push = iconv_open(tocode, "UTF-16LE");
+ if (ret->cd_push == (iconv_t)-1)
+ ret->cd_push = iconv_open(tocode, "UCS-2LE");
if (ret->cd_push != (iconv_t)-1)
ret->push = sys_iconv;
}
}
/* check for conversion to/from ucs2 */
- if (strcasecmp(fromcode, "UCS-2LE") == 0 && to) {
+ if (is_utf16(fromcode) && to) {
ret->direct = to->push;
ret->push = ret->pull = NULL;
return ret;
}
- if (strcasecmp(tocode, "UCS-2LE") == 0 && from) {
+ if (is_utf16(tocode) && from) {
ret->direct = from->pull;
ret->push = ret->pull = NULL;
return ret;
/* Check if we can do the conversion direct */
#ifdef HAVE_NATIVE_ICONV
- if (strcasecmp(fromcode, "UCS-2LE") == 0) {
+ if (is_utf16(fromcode)) {
ret->direct = sys_iconv;
ret->cd_direct = ret->cd_push;
ret->cd_push = NULL;
return ret;
}
- if (strcasecmp(tocode, "UCS-2LE") == 0) {
+ if (is_utf16(tocode)) {
ret->direct = sys_iconv;
ret->cd_direct = ret->cd_pull;
ret->cd_pull = NULL;
multi-byte character set support for english users
***********************************************************************/
-static size_t ascii_pull(void *cd, char **inbuf, size_t *inbytesleft,
+static size_t ascii_pull(void *cd, const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
while (*inbytesleft >= 1 && *outbytesleft >= 2) {
return 0;
}
-static size_t ascii_push(void *cd, char **inbuf, size_t *inbytesleft,
+static size_t ascii_push(void *cd, const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
int ir_count=0;
return ir_count;
}
-static size_t latin1_push(void *cd, char **inbuf, size_t *inbytesleft,
+static size_t latin1_push(void *cd, const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
int ir_count=0;
return ir_count;
}
-static size_t ucs2hex_pull(void *cd, char **inbuf, size_t *inbytesleft,
+static size_t ucs2hex_pull(void *cd, const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
while (*inbytesleft >= 1 && *outbytesleft >= 2) {
return 0;
}
-static size_t ucs2hex_push(void *cd, char **inbuf, size_t *inbytesleft,
+static size_t ucs2hex_push(void *cd, const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
while (*inbytesleft >= 2 && *outbytesleft >= 1) {
return 0;
}
+static size_t iconv_swab(void *cd, const char **inbuf, size_t *inbytesleft,
+ char **outbuf, size_t *outbytesleft)
+{
+ int n;
-static size_t iconv_copy(void *cd, char **inbuf, size_t *inbytesleft,
+ n = MIN(*inbytesleft, *outbytesleft);
+
+ swab(*inbuf, *outbuf, (n&~1));
+ if (n&1) {
+ (*outbuf)[n-1] = 0;
+ }
+
+ (*inbytesleft) -= n;
+ (*outbytesleft) -= n;
+ (*inbuf) += n;
+ (*outbuf) += n;
+
+ if (*inbytesleft > 0) {
+ errno = E2BIG;
+ return -1;
+ }
+
+ return 0;
+}
+
+static size_t iconv_copy(void *cd, const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
int n;
return 0;
}
-static size_t utf8_pull(void *cd, char **inbuf, size_t *inbytesleft,
+static size_t utf8_pull(void *cd, const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
- while (*inbytesleft >= 1 && *outbytesleft >= 2) {
- unsigned char *c = (unsigned char *)*inbuf;
- unsigned char *uc = (unsigned char *)*outbuf;
- int len = 1;
+ size_t in_left=*inbytesleft, out_left=*outbytesleft;
+ const uint8 *c = (const uint8 *)*inbuf;
+ uint8 *uc = (uint8 *)*outbuf;
+
+ while (in_left >= 1 && out_left >= 2) {
+ unsigned int codepoint;
if ((c[0] & 0x80) == 0) {
uc[0] = c[0];
uc[1] = 0;
- } else if ((c[0] & 0xf0) == 0xe0) {
- if (*inbytesleft < 3) {
- DEBUG(0,("short utf8 char\n"));
- goto badseq;
+ c += 1;
+ in_left -= 1;
+ out_left -= 2;
+ uc += 2;
+ continue;
+ }
+
+ if ((c[0] & 0xe0) == 0xc0) {
+ if (in_left < 2 ||
+ (c[1] & 0xc0) != 0x80) {
+ errno = EILSEQ;
+ goto error;
}
- uc[1] = ((c[0]&0xF)<<4) | ((c[1]>>2)&0xF);
- uc[0] = (c[1]<<6) | (c[2]&0x3f);
- len = 3;
- } else if ((c[0] & 0xe0) == 0xc0) {
- if (*inbytesleft < 2) {
- DEBUG(0,("short utf8 char\n"));
- goto badseq;
+ codepoint = (c[1]&0x3f) | ((c[0]&0x1f)<<6);
+ if (codepoint < 0x80) {
+ /* don't accept UTF-8 characters that are not minimally packed */
+ errno = EILSEQ;
+ goto error;
}
- uc[1] = (c[0]>>2) & 0x7;
- uc[0] = (c[0]<<6) | (c[1]&0x3f);
- len = 2;
+ uc[1] = codepoint >> 8;
+ uc[0] = codepoint & 0xff;
+ c += 2;
+ in_left -= 2;
+ out_left -= 2;
+ uc += 2;
+ continue;
}
- (*inbuf) += len;
- (*inbytesleft) -= len;
- (*outbytesleft) -= 2;
- (*outbuf) += 2;
+ if ((c[0] & 0xf0) == 0xe0) {
+ if (in_left < 3 ||
+ (c[1] & 0xc0) != 0x80 ||
+ (c[2] & 0xc0) != 0x80) {
+ errno = EILSEQ;
+ goto error;
+ }
+ codepoint = (c[2]&0x3f) | ((c[1]&0x3f)<<6) | ((c[0]&0xf)<<12);
+ if (codepoint < 0x800) {
+ /* don't accept UTF-8 characters that are not minimally packed */
+ errno = EILSEQ;
+ goto error;
+ }
+ uc[1] = codepoint >> 8;
+ uc[0] = codepoint & 0xff;
+ c += 3;
+ in_left -= 3;
+ out_left -= 2;
+ uc += 2;
+ continue;
+ }
+
+ if ((c[0] & 0xf8) == 0xf0) {
+ if (in_left < 4 ||
+ (c[1] & 0xc0) != 0x80 ||
+ (c[2] & 0xc0) != 0x80 ||
+ (c[3] & 0xc0) != 0x80) {
+ errno = EILSEQ;
+ goto error;
+ }
+ codepoint =
+ (c[3]&0x3f) |
+ ((c[2]&0x3f)<<6) |
+ ((c[1]&0x3f)<<12) |
+ ((c[0]&0x7)<<18);
+ if (codepoint < 0x10000 || codepoint > 0x10ffff) {
+ /* don't accept UTF-8 characters that are not minimally packed */
+ errno = EILSEQ;
+ goto error;
+ }
+
+ codepoint -= 0x10000;
+
+ if (out_left < 4) {
+ errno = E2BIG;
+ goto error;
+ }
+
+ uc[0] = (codepoint>>10) & 0xFF;
+ uc[1] = (codepoint>>18) | 0xd8;
+ uc[2] = codepoint & 0xFF;
+ uc[3] = ((codepoint>>8) & 0x3) | 0xdc;
+ c += 4;
+ in_left -= 4;
+ out_left -= 4;
+ uc += 4;
+ continue;
+ }
+
+ /* we don't handle 5 byte sequences */
+ errno = EINVAL;
+ goto error;
}
- if (*inbytesleft > 0) {
+ if (in_left > 0) {
errno = E2BIG;
- return -1;
+ goto error;
}
-
+
+ *inbytesleft = in_left;
+ *outbytesleft = out_left;
+ *inbuf = (char *)c;
+ *outbuf = (char *)uc;
return 0;
-badseq:
- errno = EINVAL;
+error:
+ *inbytesleft = in_left;
+ *outbytesleft = out_left;
+ *inbuf = (char *)c;
+ *outbuf = (char *)uc;
return -1;
}
-static size_t utf8_push(void *cd, char **inbuf, size_t *inbytesleft,
- char **outbuf, size_t *outbytesleft)
+static size_t utf8_push(void *cd, const char **inbuf, size_t *inbytesleft,
+ char **outbuf, size_t *outbytesleft)
{
- while (*inbytesleft >= 2 && *outbytesleft >= 1) {
- unsigned char *c = (unsigned char *)*outbuf;
- unsigned char *uc = (unsigned char *)*inbuf;
- int len=1;
-
- if (uc[1] & 0xf8) {
- if (*outbytesleft < 3) {
- DEBUG(0,("short utf8 write\n"));
- goto toobig;
+ size_t in_left=*inbytesleft, out_left=*outbytesleft;
+ uint8 *c = (uint8 *)*outbuf;
+ const uint8 *uc = (const uint8 *)*inbuf;
+
+ while (in_left >= 2 && out_left >= 1) {
+ unsigned int codepoint;
+
+ if (uc[1] == 0 && !(uc[0] & 0x80)) {
+ /* simplest case */
+ c[0] = uc[0];
+ in_left -= 2;
+ out_left -= 1;
+ uc += 2;
+ c += 1;
+ continue;
+ }
+
+ if ((uc[1]&0xf8) == 0) {
+ /* next simplest case */
+ if (out_left < 2) {
+ errno = E2BIG;
+ goto error;
}
- c[0] = 0xe0 | (uc[1]>>4);
- c[1] = 0x80 | ((uc[1]&0xF)<<2) | (uc[0]>>6);
- c[2] = 0x80 | (uc[0]&0x3f);
- len = 3;
- } else if (uc[1] | (uc[0] & 0x80)) {
- if (*outbytesleft < 2) {
- DEBUG(0,("short utf8 write\n"));
- goto toobig;
+ c[0] = 0xc0 | (uc[0]>>6) | (uc[1]<<2);
+ c[1] = 0x80 | (uc[0] & 0x3f);
+ in_left -= 2;
+ out_left -= 2;
+ uc += 2;
+ c += 2;
+ continue;
+ }
+
+ if ((uc[1] & 0xfc) == 0xdc) {
+ /* its the second part of a 4 byte sequence. Illegal */
+ if (in_left < 4) {
+ errno = EINVAL;
+ } else {
+ errno = EILSEQ;
}
- c[0] = 0xc0 | (uc[1]<<2) | (uc[0]>>6);
- c[1] = 0x80 | (uc[0]&0x3f);
- len = 2;
- } else {
- c[0] = uc[0];
+ goto error;
}
+ if ((uc[1] & 0xfc) != 0xd8) {
+ codepoint = uc[0] | (uc[1]<<8);
+ if (out_left < 3) {
+ errno = E2BIG;
+ goto error;
+ }
+ c[0] = 0xe0 | (codepoint >> 12);
+ c[1] = 0x80 | ((codepoint >> 6) & 0x3f);
+ c[2] = 0x80 | (codepoint & 0x3f);
+
+ in_left -= 2;
+ out_left -= 3;
+ uc += 2;
+ c += 3;
+ continue;
+ }
- (*inbytesleft) -= 2;
- (*outbytesleft) -= len;
- (*inbuf) += 2;
- (*outbuf) += len;
+ /* its the first part of a 4 byte sequence */
+ if (in_left < 4) {
+ errno = EINVAL;
+ goto error;
+ }
+ if ((uc[3] & 0xfc) != 0xdc) {
+ errno = EILSEQ;
+ goto error;
+ }
+ codepoint = 0x10000 + (uc[2] | ((uc[3] & 0x3)<<8) |
+ (uc[0]<<10) | ((uc[1] & 0x3)<<18));
+
+ if (out_left < 4) {
+ errno = E2BIG;
+ goto error;
+ }
+ c[0] = 0xf0 | (codepoint >> 18);
+ c[1] = 0x80 | ((codepoint >> 12) & 0x3f);
+ c[2] = 0x80 | ((codepoint >> 6) & 0x3f);
+ c[3] = 0x80 | (codepoint & 0x3f);
+
+ in_left -= 4;
+ out_left -= 4;
+ uc += 4;
+ c += 4;
}
- if (*inbytesleft == 1) {
+ if (in_left == 1) {
errno = EINVAL;
- return -1;
+ goto error;
}
- if (*inbytesleft > 1) {
+ if (in_left > 1) {
errno = E2BIG;
- return -1;
+ goto error;
}
+
+ *inbytesleft = in_left;
+ *outbytesleft = out_left;
+ *inbuf = (char *)uc;
+ *outbuf = (char *)c;
return 0;
-toobig:
- errno = E2BIG;
+error:
+ *inbytesleft = in_left;
+ *outbytesleft = out_left;
+ *inbuf = (char *)uc;
+ *outbuf = (char *)c;
return -1;
}