wsutil/unicode-utils.c

   1 /* unicode-utils.c
   2  * Unicode utility routines
   3  *
   4  * Wireshark - Network traffic analyzer
   5  * By Gerald Combs <gerald@wireshark.org>
   6  * Copyright 2006 Gerald Combs
   7  *
   8  * This program is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU General Public License
  10  * as published by the Free Software Foundation; either version 2
  11  * of the License, or (at your option) any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  21  */
  22
  23 #include "unicode-utils.h"
  24
  25 int
  26 ws_utf8_char_len(guint8 ch)
  27 {
  28   if (ch >= 0xfe) return -1;
  29   if (ch >= 0xfc) return  6;
  30   if (ch >= 0xf8) return  5;
  31   if (ch >= 0xf0) return  4;
  32   if (ch >= 0xe0) return  3;
  33   if (ch >= 0xc0) return  2;
  34   else            return  1;
  35 }
  36
  37
  38 #ifdef _WIN32
  39
  40 #include <shellapi.h>
  41 #include <strsafe.h>
  42
  43 /** @file
  44  * Unicode utilities (internal interface)
  45  *
  46  * We define UNICODE and _UNICODE under Windows.  This means that
  47  * Windows SDK routines expect UTF-16 strings, in contrast to newer
  48  * versions of Glib and GTK+ which expect UTF-8.  This module provides
  49  * convenience routines for converting between UTF-8 and UTF-16.
  50  */
  51
  52 #define INITIAL_UTFBUF_SIZE 128
  53
  54 /*
  55  * XXX - Should we use g_utf8_to_utf16() and g_utf16_to_utf8()
  56  * instead?  The goal of the functions below was to provide simple
  57  * wrappers for UTF-8 <-> UTF-16 conversion without making the
  58  * caller worry about freeing up memory afterward.
  59  */
  60
  61 /* Convert from UTF-8 to UTF-16. */
  62 const wchar_t *
  63 utf_8to16(const char *utf8str)
  64 {
  65   static wchar_t *utf16buf[3];
  66   static int utf16buf_len[3];
  67   static int idx;
  68
  69   if (utf8str == NULL)
  70     return NULL;
  71
  72   idx = (idx + 1) % 3;
  73
  74   /*
  75    * Allocate the buffer if it's not already allocated.
  76    */
  77   if (utf16buf[idx] == NULL) {
  78     utf16buf_len[idx] = INITIAL_UTFBUF_SIZE;
  79     utf16buf[idx] = g_malloc(utf16buf_len[idx] * sizeof(wchar_t));
  80   }
  81
  82   while (MultiByteToWideChar(CP_UTF8, 0, utf8str,
  83       -1, NULL, 0) >= utf16buf_len[idx]) {
  84     /*
  85      * Double the buffer's size if it's not big enough.
  86      * The size of the buffer starts at 128, so doubling its size
  87      * adds at least another 128 bytes, which is more than enough
  88      * for one more character plus a terminating '\0'.
  89      */
  90     utf16buf_len[idx] *= 2;
  91     utf16buf[idx] = g_realloc(utf16buf[idx], utf16buf_len[idx] * sizeof(wchar_t));
  92   }
  93
  94   if (MultiByteToWideChar(CP_UTF8, 0, utf8str,
  95       -1, utf16buf[idx], utf16buf_len[idx]) == 0)
  96     return NULL;
  97
  98   return utf16buf[idx];
  99 }
 100
 101 void
 102 utf_8to16_snprintf(TCHAR *utf16buf, gint utf16buf_len, const gchar* fmt, ...)
 103 {
 104   va_list ap;
 105   gchar* dst;
 106
 107   va_start(ap,fmt);
 108   dst = g_strdup_vprintf(fmt, ap);
 109   va_end(ap);
 110
 111   StringCchPrintf(utf16buf, utf16buf_len, _T("%s"), utf_8to16(dst));
 112
 113   g_free(dst);
 114 }
 115
 116 /* Convert from UTF-16 to UTF-8. */
 117 gchar *
 118 utf_16to8(const wchar_t *utf16str)
 119 {
 120   static gchar *utf8buf[3];
 121   static int utf8buf_len[3];
 122   static int idx;
 123
 124   if (utf16str == NULL)
 125     return NULL;
 126
 127   idx = (idx + 1) % 3;
 128
 129   /*
 130    * Allocate the buffer if it's not already allocated.
 131    */
 132   if (utf8buf[idx] == NULL) {
 133     utf8buf_len[idx] = INITIAL_UTFBUF_SIZE;
 134     utf8buf[idx] = g_malloc(utf8buf_len[idx]);
 135   }
 136
 137   while (WideCharToMultiByte(CP_UTF8, 0, utf16str, -1,
 138       NULL, 0, NULL, NULL) >= utf8buf_len[idx]) {
 139     /*
 140      * Double the buffer's size if it's not big enough.
 141      * The size of the buffer starts at 128, so doubling its size
 142      * adds at least another 128 bytes, which is more than enough
 143      * for one more character plus a terminating '\0'.
 144      */
 145     utf8buf_len[idx] *= 2;
 146     utf8buf[idx] = g_realloc(utf8buf[idx], utf8buf_len[idx]);
 147   }
 148
 149   if (WideCharToMultiByte(CP_UTF8, 0, utf16str, -1,
 150       utf8buf[idx], utf8buf_len[idx], NULL, NULL) == 0)
 151     return NULL;
 152
 153   return utf8buf[idx];
 154 }
 155
 156 /* Convert our argument list from UTF-16 to UTF-8. */
 157 void
 158 arg_list_utf_16to8(int argc, char *argv[]) {
 159   LPWSTR              *wc_argv;
 160   int                  wc_argc, i;
 161
 162   /* Convert our arg list to UTF-8. */
 163   wc_argv = CommandLineToArgvW(GetCommandLineW(), &wc_argc);
 164   if (wc_argv && wc_argc == argc) {
 165     for (i = 0; i < argc; i++) {
 166       argv[i] = g_utf16_to_utf8(wc_argv[i], -1, NULL, NULL, NULL);
 167     }
 168   } /* XXX else bail because something is horribly, horribly wrong? */
 169   LocalFree(wc_argv);
 170 }
 171
 172 #endif
 173
 174 /*
 175  * Editor modelines  -  http://www.wireshark.org/tools/modelines.html
 176  *
 177  * Local Variables:
 178  * c-basic-offset: 2
 179  * tab-width: 8
 180  * indent-tabs-mode: nil
 181  * End:
 182  *
 183  * ex: set shiftwidth=2 tabstop=8 expandtab:
 184  * :indentSize=2:tabSize=8:noTabs=true:
 185  */