#include "includes.h"
#include "lib/util/charset/charset.h"
#include "system/locale.h"
-#include "dynconfig.h"
+#include "dynconfig/dynconfig.h"
#ifdef strcasecmp
#undef strcasecmp
const char *unix_charset;
const char *dos_charset;
const char *display_charset;
- bool native_iconv;
+ bool use_builtin_handlers;
smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
};
{
if (global_iconv_handle == NULL)
global_iconv_handle = smb_iconv_handle_reinit(talloc_autofree_context(),
- "ASCII", "UTF-8", "ASCII", true, NULL);
+ "ASCII", "UTF-8", true, NULL);
return global_iconv_handle;
}
+struct smb_iconv_handle *get_iconv_testing_handle(TALLOC_CTX *mem_ctx,
+ const char *dos_charset,
+ const char *unix_charset,
+ bool use_builtin_handlers)
+{
+ return smb_iconv_handle_reinit(mem_ctx,
+ dos_charset, unix_charset, use_builtin_handlers, NULL);
+}
+
/**
* Return the name of a charset to give to iconv().
**/
case CH_UTF16: return "UTF-16LE";
case CH_UNIX: return ic->unix_charset;
case CH_DOS: return ic->dos_charset;
- case CH_DISPLAY: return ic->display_charset;
case CH_UTF8: return "UTF8";
case CH_UTF16BE: return "UTF-16BE";
case CH_UTF16MUNGED: return "UTF16_MUNGED";
return 0;
}
-static const char *map_locale(const char *charset)
-{
- if (strcmp(charset, "LOCALE") != 0) {
- return charset;
- }
-#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
- {
- const char *ln;
- smb_iconv_t handle;
-
- ln = nl_langinfo(CODESET);
- if (ln == NULL) {
- DEBUG(1,("Unable to determine charset for LOCALE - using ASCII\n"));
- return "ASCII";
- }
- /* Check whether the charset name is supported
- by iconv */
- handle = smb_iconv_open(ln, "UCS-2LE");
- if (handle == (smb_iconv_t) -1) {
- DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
- return "ASCII";
- } else {
- DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
- smb_iconv_close(handle);
- }
- return ln;
- }
-#endif
- return "ASCII";
-}
-
/*
the old_ic is passed in here as the smb_iconv_handle structure
is used as a global pointer in some places (eg. python modules). We
_PUBLIC_ struct smb_iconv_handle *smb_iconv_handle_reinit(TALLOC_CTX *mem_ctx,
const char *dos_charset,
const char *unix_charset,
- const char *display_charset,
- bool native_iconv,
+ bool use_builtin_handlers,
struct smb_iconv_handle *old_ic)
{
struct smb_iconv_handle *ret;
- display_charset = map_locale(display_charset);
-
if (old_ic != NULL) {
ret = old_ic;
close_iconv_handle(ret);
talloc_set_destructor(ret, close_iconv_handle);
+ if (strcasecmp(dos_charset, "UTF8") == 0 || strcasecmp(dos_charset, "UTF-8") == 0) {
+ DEBUG(0,("ERROR: invalid DOS charset: 'dos charset' must not be UTF8, using (default value) CP850 instead\n"));
+ dos_charset = "CP850";
+ }
+
ret->dos_charset = talloc_strdup(ret->child_ctx, dos_charset);
ret->unix_charset = talloc_strdup(ret->child_ctx, unix_charset);
- ret->display_charset = talloc_strdup(ret->child_ctx, display_charset);
- ret->native_iconv = native_iconv;
+ ret->use_builtin_handlers = use_builtin_handlers;
return ret;
}
charset_t from, charset_t to)
{
const char *n1, *n2;
- static bool initialised;
-
- if (initialised == false) {
- initialised = true;
- }
if (ic->conv_handles[from][to]) {
return ic->conv_handles[from][to];
n2 = charset_name(ic, to);
ic->conv_handles[from][to] = smb_iconv_open_ex(ic, n2, n1,
- ic->native_iconv);
+ ic->use_builtin_handlers);
if (ic->conv_handles[from][to] == (smb_iconv_t)-1) {
if ((from == CH_DOS || to == CH_DOS) &&
n2 = charset_name(ic, to);
ic->conv_handles[from][to] =
- smb_iconv_open_ex(ic, n2, n1, ic->native_iconv);
+ smb_iconv_open_ex(ic, n2, n1, ic->use_builtin_handlers);
}
}
*/
_PUBLIC_ codepoint_t next_codepoint_handle_ext(
struct smb_iconv_handle *ic,
- const char *str, charset_t src_charset,
+ const char *str, size_t len,
+ charset_t src_charset,
size_t *bytes_consumed)
{
/* it cannot occupy more than 4 bytes in UTF16 format */
size_t olen;
char *outbuf;
- if ((str[0] & 0x80) == 0) {
+
+ if (((str[0] & 0x80) == 0) && (src_charset == CH_DOS ||
+ src_charset == CH_UNIX ||
+ src_charset == CH_UTF8)) {
*bytes_consumed = 1;
return (codepoint_t)str[0];
}
* we assume that no multi-byte character can take more than 5 bytes.
* This is OK as we only support codepoints up to 1M (U+100000)
*/
- ilen_orig = strnlen(str, 5);
+ ilen_orig = MIN(len, 5);
ilen = ilen_orig;
descriptor = get_conv_handle(ic, src_charset, CH_UTF16);
return INVALID_CODEPOINT if the next character cannot be converted
*/
_PUBLIC_ codepoint_t next_codepoint_handle(struct smb_iconv_handle *ic,
- const char *str, size_t *size)
+ const char *str, size_t *size)
{
- return next_codepoint_handle_ext(ic, str, CH_UNIX, size);
+ /*
+ * We assume that no multi-byte character can take more than 5 bytes
+ * thus avoiding walking all the way down a long string. This is OK as
+ * Unicode codepoints only go up to (U+10ffff), which can always be
+ * encoded in 4 bytes or less.
+ */
+ return next_codepoint_handle_ext(ic, str, strnlen(str, 5), CH_UNIX,
+ size);
}
/*
return 5 - olen;
}
-_PUBLIC_ codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
- size_t *size)
+_PUBLIC_ codepoint_t next_codepoint_ext(const char *str, size_t len,
+ charset_t src_charset, size_t *size)
{
- return next_codepoint_handle_ext(get_iconv_handle(), str,
- src_charset, size);
+ return next_codepoint_handle_ext(get_iconv_handle(), str, len,
+ src_charset, size);
}
_PUBLIC_ codepoint_t next_codepoint(const char *str, size_t *size)