wmem_stack_pop(data->stack);
}
+static int
+json_tvb_memcpy_utf8(char *buf, tvbuff_t *tvb, int offset, int offset_max)
+{
+ int len = ws_utf8_char_len((guint8) *buf);
+
+ /* XXX, before moving to core API check if it's off-by-one safe.
+ * For JSON analyzer it's not a problem
+ * (string always terminated by ", which is not valid UTF-8 continuation character) */
+ if (len == -1 || ((guint) (offset + len)) >= (guint) offset_max) {
+ *buf = '?';
+ return 1;
+ }
+
+ /* assume it's valid UTF-8 */
+ tvb_memcpy(tvb, buf + 1, offset + 1, len - 1);
+
+ if (!g_utf8_validate(buf, len, NULL)) {
+ *buf = '?';
+ return 1;
+ }
+
+ return len;
+}
+
static char *json_string_unescape(tvbparse_elem_t *tok)
{
char *str = (char *)wmem_alloc(wmem_packet_scope(), tok->len - 1);
case '\"':
case '\\':
case '/':
- default:
str[j++] = ch;
break;
str[j++] = '?';
break;
}
+
+ default:
+ /* not valid by JSON grammar (also tvbparse rules should not allow it) */
+ DISSECTOR_ASSERT_NOT_REACHED();
+ break;
}
- } else
- str[j++] = ch;
+ } else {
+ int utf_len;
+
+ str[j] = ch;
+ /* XXX if it's not valid UTF-8 character, add some expert info? (it violates JSON grammar) */
+ utf_len = json_tvb_memcpy_utf8(&str[j], tok->tvb, i, tok->len);
+ j += utf_len;
+ i += (utf_len - 1);
+ }
}
str[j] = '\0';
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
-#ifndef _WIN32
-#error "This is only for Windows"
-#endif
-
#include "unicode-utils.h"
+int
+ws_utf8_char_len(guint8 ch)
+{
+ if (ch >= 0xfe) return -1;
+ if (ch >= 0xfc) return 6;
+ if (ch >= 0xf8) return 5;
+ if (ch >= 0xf0) return 4;
+ if (ch >= 0xe0) return 3;
+ if (ch >= 0xc0) return 2;
+ else return 1;
+}
+
+
+#ifdef _WIN32
+
#include <shellapi.h>
/** @file
}
} /* XXX else bail because something is horribly, horribly wrong? */
}
+
+#endif
#ifndef __UNICODEUTIL_H__
#define __UNICODEUTIL_H__
-#include "ws_symbol_export.h"
-
-#ifdef _WIN32
-
#include "config.h"
+#include "ws_symbol_export.h"
+
#include <glib.h>
-#include <windows.h>
-#include <tchar.h>
-#include <wchar.h>
/**
* @file Unicode convenience routines.
*/
+WS_DLL_PUBLIC
+int ws_utf8_char_len(guint8 ch);
+
+#ifdef _WIN32
+
+#include <windows.h>
+#include <tchar.h>
+#include <wchar.h>
+
/** Given a UTF-8 string, convert it to UTF-16. This is meant to be used
* to convert between GTK+ 2.x (UTF-8) to Windows (UTF-16).
*