s3:lib/charcnv: add next_codepoint_ext() that accepts input charset.

author Michael Adam <obnox@samba.org>

Fri, 29 Oct 2010 20:06:05 +0000 (22:06 +0200)

committer Michael Adam <obnox@samba.org>

Wed, 3 Nov 2010 22:45:19 +0000 (22:45 +0000)
author Michael Adam <obnox@samba.org>
Fri, 29 Oct 2010 20:06:05 +0000 (22:06 +0200)
committer Michael Adam <obnox@samba.org>
Wed, 3 Nov 2010 22:45:19 +0000 (22:45 +0000)
diff --git a/source3/include/proto.h b/source3/include/proto.h

index 5095b1fde42e5ce5f237fb9f074393461f47d2c5..f76bc32aaa6213aae0e3065d45f2b3e289ab99f1 100644 (file)
--- a/source3/include/proto.h
+++ b/source3/include/proto.h
@@ -480,6 +480,8 @@ size_t pull_string_talloc_fn(const char *function,
                         size_t src_len,
                         int flags);
  size_t align_string(const void *base_ptr, const char *p, int flags);
+codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
+                              size_t *size);
  codepoint_t next_codepoint(const char *str, size_t *size);
  
  /* The following definitions come from lib/clobber.c  */
diff --git a/source3/lib/charcnv.c b/source3/lib/charcnv.c

index 9ac9930267ef6614738f0f5d75a933d3d8d958b9..ce01841f7e6456be381a13abcea4bb1f4fda0b69 100644 (file)
--- a/source3/lib/charcnv.c
+++ b/source3/lib/charcnv.c
@@ -1793,17 +1793,23 @@ size_t align_string(const void *base_ptr, const char *p, int flags)
         return 0;
  }
  
-/*
-  Return the unicode codepoint for the next multi-byte CH_UNIX character
-  in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
-
-  Also return the number of bytes consumed (which tells the caller
-  how many bytes to skip to get to the next CH_UNIX character).
-
-  Return INVALID_CODEPOINT if the next character cannot be converted.
-*/
+/**
+ * Return the unicode codepoint for the next character in the input
+ * string in the given src_charset.
+ * The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
+ *
+ * Also return the number of bytes consumed (which tells the caller
+ * how many bytes to skip to get to the next src_charset-character).
+ *
+ * This is implemented (in the non-ascii-case) by first converting the
+ * next character in the input string to UTF16_LE and then calculating
+ * the unicode codepoint from that.
+ *
+ * Return INVALID_CODEPOINT if the next character cannot be converted.
+ */
  
-codepoint_t next_codepoint(const char *str, size_t *size)
+codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
+                              size_t *size)
  {
         /* It cannot occupy more than 4 bytes in UTF16 format */
         uint8_t buf[4];
@@ -1827,7 +1833,7 @@ codepoint_t next_codepoint(const char *str, size_t *size)
  
          lazy_initialize_conv();
  
-        descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
+       descriptor = conv_handles[src_charset][CH_UTF16LE];
         if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
                 *size = 1;
                 return INVALID_CODEPOINT;
@@ -1876,6 +1882,21 @@ codepoint_t next_codepoint(const char *str, size_t *size)
         return INVALID_CODEPOINT;
  }
  
+/*
+  Return the unicode codepoint for the next multi-byte CH_UNIX character
+  in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
+
+  Also return the number of bytes consumed (which tells the caller
+  how many bytes to skip to get to the next CH_UNIX character).
+
+  Return INVALID_CODEPOINT if the next character cannot be converted.
+*/
+
+codepoint_t next_codepoint(const char *str, size_t *size)
+{
+       return next_codepoint_ext(str, CH_UNIX, size);
+}
+
  /*
    push a single codepoint into a CH_UNIX string the target string must
    be able to hold the full character, which is guaranteed if it is at
author	Michael Adam <obnox@samba.org>
	Fri, 29 Oct 2010 20:06:05 +0000 (22:06 +0200)
committer	Michael Adam <obnox@samba.org>
	Wed, 3 Nov 2010 22:45:19 +0000 (22:45 +0000)
source3/include/proto.h		patch \| blob \| history
source3/lib/charcnv.c		patch \| blob \| history