From 5829cb0a984b6f3ee2ef114a4d2ff85f1a5a592b Mon Sep 17 00:00:00 2001 From: Michael Adam Date: Fri, 29 Oct 2010 22:06:05 +0200 Subject: [PATCH] lib/charset/charcnv: add next_codepoint_convenience_ext() that accepts input charset. next_codepoint_convenience() takes as string in CH_UNIX encoding and returns the unicode codepoint of the next (possibly multibyte) character of the input string. The new next_codepoint_convenience_ext() function adds the encoding of the input string as a parameter. next_codepoint_convenience() now only calls next_codepoint_convenience_ext() with CH_UNIX als src_charset argument. --- lib/util/charset/charcnv.c | 45 ++++++++++++++++++++++++++++---------- lib/util/charset/charset.h | 3 +++ 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/lib/util/charset/charcnv.c b/lib/util/charset/charcnv.c index e9f6ab0d947..25a54cca13d 100644 --- a/lib/util/charset/charcnv.c +++ b/lib/util/charset/charcnv.c @@ -373,17 +373,25 @@ _PUBLIC_ bool convert_string_talloc_convenience(TALLOC_CTX *ctx, return true; } -/* - return the unicode codepoint for the next multi-byte CH_UNIX character - in the string - also return the number of bytes consumed (which tells the caller - how many bytes to skip to get to the next CH_UNIX character) - - return INVALID_CODEPOINT if the next character cannot be converted -*/ -_PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic, - const char *str, size_t *size) +/** + * Return the unicode codepoint for the next character in the input + * string in the given src_charset. + * The unicode codepoint (codepoint_t) is an unsinged 32 bit value. + * + * Also return the number of bytes consumed (which tells the caller + * how many bytes to skip to get to the next src_charset-character). + * + * This is implemented (in the non-ascii-case) by first converting the + * next character in the input string to UTF16_LE and then calculating + * the unicode codepoint from that. + * + * Return INVALID_CODEPOINT if the next character cannot be converted. + */ +_PUBLIC_ codepoint_t next_codepoint_convenience_ext( + struct smb_iconv_convenience *ic, + const char *str, charset_t src_charset, + size_t *size) { /* it cannot occupy more than 4 bytes in UTF16 format */ uint8_t buf[4]; @@ -404,7 +412,7 @@ _PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic ilen_orig = strnlen(str, 5); ilen = ilen_orig; - descriptor = get_conv_handle(ic, CH_UNIX, CH_UTF16); + descriptor = get_conv_handle(ic, src_charset, CH_UTF16); if (descriptor == (smb_iconv_t)-1) { *size = 1; return INVALID_CODEPOINT; @@ -445,6 +453,21 @@ _PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic return INVALID_CODEPOINT; } +/* + return the unicode codepoint for the next multi-byte CH_UNIX character + in the string + + also return the number of bytes consumed (which tells the caller + how many bytes to skip to get to the next CH_UNIX character) + + return INVALID_CODEPOINT if the next character cannot be converted +*/ +_PUBLIC_ codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic, + const char *str, size_t *size) +{ + return next_codepoint_convenience_ext(ic, str, CH_UNIX, size); +} + /* push a single codepoint into a CH_UNIX string the target string must be able to hold the full character, which is guaranteed if it is at diff --git a/lib/util/charset/charset.h b/lib/util/charset/charset.h index a66e24f2b96..7960631484d 100644 --- a/lib/util/charset/charset.h +++ b/lib/util/charset/charset.h @@ -172,6 +172,9 @@ codepoint_t next_codepoint(const char *str, size_t *size); ssize_t push_codepoint(char *str, codepoint_t c); /* codepoints */ +codepoint_t next_codepoint_convenience_ext(struct smb_iconv_convenience *ic, + const char *str, charset_t src_charset, + size_t *size); codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic, const char *str, size_t *size); ssize_t push_codepoint_convenience(struct smb_iconv_convenience *ic, -- 2.34.1