#include "../lib/util/dlinklist.h"
#include "system/iconv.h"
#include "system/filesys.h"
+#include "charset_proto.h"
#ifdef strcasecmp
#undef strcasecmp
#endif
-#ifdef static_decl_charset
-static_decl_charset;
-#endif
-
/**
* @file
*
static size_t ascii_pull (void *,const char **, size_t *, char **, size_t *);
static size_t ascii_push (void *,const char **, size_t *, char **, size_t *);
+static size_t latin1_pull(void *,const char **, size_t *, char **, size_t *);
static size_t latin1_push(void *,const char **, size_t *, char **, size_t *);
static size_t utf8_pull (void *,const char **, size_t *, char **, size_t *);
static size_t utf8_push (void *,const char **, size_t *, char **, size_t *);
{"UTF-8", utf8_pull, utf8_push},
/* this handles the munging needed for String2Key */
- {"UTF16_MUNGED", utf16_munged_pull, iconv_copy},
+ {"UTF16_MUNGED", utf16_munged_pull, iconv_copy, true},
{"ASCII", ascii_pull, ascii_push},
{"646", ascii_pull, ascii_push},
- {"ISO-8859-1", ascii_pull, latin1_push},
- {"UCS2-HEX", ucs2hex_pull, ucs2hex_push}
-};
-
-static struct charset_functions *charsets = NULL;
-
-static struct charset_functions *find_charset_functions(const char *name)
-{
- struct charset_functions *c;
-
- /* Check whether we already have this charset... */
- for (c = charsets; c != NULL; c = c->next) {
- if(strcasecmp(c->name, name) == 0) {
- return c;
- }
- }
-
- return NULL;
-}
-
-bool smb_register_charset(const struct charset_functions *funcs_in)
-{
- struct charset_functions *funcs;
-
- DEBUG(5, ("Attempting to register new charset %s\n", funcs_in->name));
- /* Check whether we already have this charset... */
- if (find_charset_functions(funcs_in->name)) {
- DEBUG(0, ("Duplicate charset %s, not registering\n", funcs_in->name));
- return false;
- }
-
- funcs = talloc(NULL, struct charset_functions);
- if (!funcs) {
- DEBUG(0, ("Out of memory duplicating charset %s\n", funcs_in->name));
- return false;
- }
- *funcs = *funcs_in;
-
- funcs->next = funcs->prev = NULL;
- DEBUG(5, ("Registered charset %s\n", funcs->name));
- DLIST_ADD(charsets, funcs);
- return true;
-}
-
-static void lazy_initialize_iconv(void)
-{
- static bool initialized;
-
-#ifdef static_init_charset
- if (!initialized) {
- static_init_charset;
- initialized = true;
- }
+ {"ISO-8859-1", latin1_pull, latin1_push},
+#ifdef DEVELOPER
+ {"WEIRD", weird_pull, weird_push, true},
#endif
-}
+#ifdef DARWINOS
+ {"MACOSXFS", macosxfs_encoding_pull, macosxfs_encoding_push, true},
+#endif
+ {"UCS2-HEX", ucs2hex_pull, ucs2hex_push, true}
+
+};
#ifdef HAVE_NATIVE_ICONV
/* if there was an error then reset the internal state,
const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
- char cvtbuf[2048];
- size_t bufsize;
-
/* in many cases we can go direct */
if (cd->direct) {
return cd->direct(cd->cd_direct,
inbuf, inbytesleft, outbuf, outbytesleft);
}
-
/* otherwise we have to do it chunks at a time */
- while (*inbytesleft > 0) {
- char *bufp1 = cvtbuf;
- const char *bufp2 = cvtbuf;
+ {
+#ifndef SMB_ICONV_BUFSIZE
+#define SMB_ICONV_BUFSIZE 2048
+#endif
+ TALLOC_CTX *mem_ctx;
+ size_t bufsize;
+ char *cvtbuf;
- bufsize = sizeof(cvtbuf);
-
- if (cd->pull(cd->cd_pull,
- inbuf, inbytesleft, &bufp1, &bufsize) == -1
- && errno != E2BIG) return -1;
+#if _SAMBA_BUILD_ == 3
+ mem_ctx = talloc_tos();
+#else
+ mem_ctx = cd;
+#endif
+ cvtbuf = talloc_array(mem_ctx, char, SMB_ICONV_BUFSIZE);
+
+ if (!cvtbuf) {
+ return (size_t)-1;
+ }
- bufsize = sizeof(cvtbuf) - bufsize;
+ while (*inbytesleft > 0) {
+ char *bufp1 = cvtbuf;
+ const char *bufp2 = cvtbuf;
+ int saved_errno = errno;
+ bool pull_failed = false;
+ bufsize = SMB_ICONV_BUFSIZE;
+
+ if (cd->pull(cd->cd_pull,
+ inbuf, inbytesleft, &bufp1, &bufsize) == -1
+ && errno != E2BIG) {
+ saved_errno = errno;
+ pull_failed = true;
+ }
- if (cd->push(cd->cd_push,
- &bufp2, &bufsize,
- outbuf, outbytesleft) == -1) return -1;
+ bufsize = SMB_ICONV_BUFSIZE - bufsize;
+
+ if (cd->push(cd->cd_push,
+ &bufp2, &bufsize,
+ outbuf, outbytesleft) == -1) {
+ talloc_free(cvtbuf);
+ return -1;
+ } else if (pull_failed) {
+ /* We want the pull errno if possible */
+ errno = saved_errno;
+ return -1;
+ }
+ }
+ talloc_free(cvtbuf);
}
return 0;
}
_PUBLIC_ smb_iconv_t smb_iconv_open_ex(TALLOC_CTX *mem_ctx, const char *tocode,
- const char *fromcode, bool native_iconv)
+ const char *fromcode, bool use_builtin_handlers)
{
smb_iconv_t ret;
const struct charset_functions *from=NULL, *to=NULL;
int i;
- lazy_initialize_iconv();
-
ret = (smb_iconv_t)talloc_named(mem_ctx,
sizeof(*ret),
"iconv(%s,%s)", tocode, fromcode);
return ret;
}
+ /* check if we have a builtin function for this conversion */
for (i=0;i<ARRAY_SIZE(builtin_functions);i++) {
if (strcasecmp(fromcode, builtin_functions[i].name) == 0) {
- from = &builtin_functions[i];
- }
- if (strcasecmp(tocode, builtin_functions[i].name) == 0) {
- to = &builtin_functions[i];
- }
- }
-
- if (from == NULL) {
- for (from=charsets; from; from=from->next) {
- if (strcasecmp(from->name, fromcode) == 0) break;
+ if (use_builtin_handlers || builtin_functions[i].samba_internal_charset) {
+ from = &builtin_functions[i];
+ }
}
- }
-
- if (to == NULL) {
- for (to=charsets; to; to=to->next) {
- if (strcasecmp(to->name, tocode) == 0) break;
+ if (strcasecmp(tocode, builtin_functions[i].name) == 0) {
+ if (use_builtin_handlers || builtin_functions[i].samba_internal_charset) {
+ to = &builtin_functions[i];
+ }
}
}
#ifdef HAVE_NATIVE_ICONV
- if ((!from || !to) && !native_iconv) {
- goto failed;
- }
- if (!from) {
- ret->pull = sys_iconv;
+ /* the from and to varaibles indicate a samba module or
+ * internal conversion, ret->pull and ret->push are
+ * initialised only in this block for iconv based
+ * conversions */
+
+ if (from == NULL) {
ret->cd_pull = iconv_open("UTF-16LE", fromcode);
if (ret->cd_pull == (iconv_t)-1)
ret->cd_pull = iconv_open("UCS-2LE", fromcode);
- if (ret->cd_pull == (iconv_t)-1) goto failed;
+ if (ret->cd_pull != (iconv_t)-1) {
+ ret->pull = sys_iconv;
+ }
}
-
- if (!to) {
- ret->push = sys_iconv;
+
+ if (to == NULL) {
ret->cd_push = iconv_open(tocode, "UTF-16LE");
if (ret->cd_push == (iconv_t)-1)
ret->cd_push = iconv_open(tocode, "UCS-2LE");
- if (ret->cd_push == (iconv_t)-1) goto failed;
+ if (ret->cd_push != (iconv_t)-1) {
+ ret->push = sys_iconv;
+ }
}
-#else
- if (!from || !to) {
+#endif
+
+ if (ret->pull == NULL && from == NULL) {
+ goto failed;
+ }
+
+ if (ret->push == NULL && to == NULL) {
goto failed;
}
-#endif
/* check for conversion to/from ucs2 */
if (is_utf16(fromcode) && to) {
and also the "test" character sets that are designed to test
multi-byte character set support for english users
***********************************************************************/
+
+/*
+ this takes an ASCII sequence and produces a UTF16 sequence
+
+ The first 127 codepoints of latin1 matches the first 127 codepoints
+ of unicode, and so can be put into the first byte of UTF16LE
+
+ */
+
static size_t ascii_pull(void *cd, const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
while (*inbytesleft >= 1 && *outbytesleft >= 2) {
+ if (((*inbuf)[0] & 0x7F) != (*inbuf)[0]) {
+ /* If this is multi-byte, then it isn't legal ASCII */
+ errno = EILSEQ;
+ return -1;
+ }
(*outbuf)[0] = (*inbuf)[0];
(*outbuf)[1] = 0;
(*inbytesleft) -= 1;
return 0;
}
+/*
+ this takes a UTF16 sequence and produces an ASCII sequence
+
+ The first 127 codepoints of ASCII matches the first 127 codepoints
+ of unicode, and so can be read directly from the first byte of UTF16LE
+
+ */
static size_t ascii_push(void *cd, const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
int ir_count=0;
while (*inbytesleft >= 2 && *outbytesleft >= 1) {
- (*outbuf)[0] = (*inbuf)[0] & 0x7F;
- if ((*inbuf)[1]) ir_count++;
+ if (((*inbuf)[0] & 0x7F) != (*inbuf)[0] ||
+ (*inbuf)[1] != 0) {
+ /* If this is multi-byte, then it isn't legal ASCII */
+ errno = EILSEQ;
+ return -1;
+ }
+ (*outbuf)[0] = (*inbuf)[0];
(*inbytesleft) -= 2;
(*outbytesleft) -= 1;
(*inbuf) += 2;
return ir_count;
}
+/*
+ this takes a latin1/ISO-8859-1 sequence and produces a UTF16 sequence
+
+ The first 256 codepoints of latin1 matches the first 256 codepoints
+ of unicode, and so can be put into the first byte of UTF16LE
+
+ */
+static size_t latin1_pull(void *cd, const char **inbuf, size_t *inbytesleft,
+ char **outbuf, size_t *outbytesleft)
+{
+ while (*inbytesleft >= 1 && *outbytesleft >= 2) {
+ (*outbuf)[0] = (*inbuf)[0];
+ (*outbuf)[1] = 0;
+ (*inbytesleft) -= 1;
+ (*outbytesleft) -= 2;
+ (*inbuf) += 1;
+ (*outbuf) += 2;
+ }
+
+ if (*inbytesleft > 0) {
+ errno = E2BIG;
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ this takes a UTF16 sequence and produces a latin1/ISO-8859-1 sequence
+
+ The first 256 codepoints of latin1 matches the first 256 codepoints
+ of unicode, and so can be read directly from the first byte of UTF16LE
+
+ */
static size_t latin1_push(void *cd, const char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
{
while (*inbytesleft >= 2 && *outbytesleft >= 1) {
(*outbuf)[0] = (*inbuf)[0];
- if ((*inbuf)[1]) ir_count++;
+ if ((*inbuf)[1] != 0) {
+ /* If this is multi-byte, then it isn't legal latin1 */
+ errno = EILSEQ;
+ return -1;
+ }
(*inbytesleft) -= 2;
(*outbytesleft) -= 1;
(*inbuf) += 2;