return NULL;
}
+extern const guint8 *ws_mempbrk(const guint8* haystack, size_t haystacklen, const guint8 *needles);
+
static inline const guint8*
guint8_pbrk(const guint8* haystack, size_t haystacklen, const guint8 *needles, guchar *found_needle)
{
- gchar tmp[256] = { 0 };
- const guint8 *haystack_end;
-
- while (*needles)
- tmp[*needles++] = 1;
+ const guint8 *result = ws_mempbrk(haystack, haystacklen, needles);
- haystack_end = haystack + haystacklen;
- while (haystack < haystack_end) {
- if (tmp[*haystack]) {
- if (found_needle)
- *found_needle = *haystack;
- return haystack;
- }
- haystack++;
- }
+ if (result && found_needle)
+ *found_needle = *result;
- return NULL;
+ return result;
}
gint
tvb_find_line_end(tvbuff_t *tvb, const gint offset, int len, gint *next_offset, const gboolean desegment)
{
+#ifdef WIN32
+ static const char __declspec(align(16)) crlf[] = "\r\n" ;
+#else
+ static const char crlf[] __attribute__((aligned(16))) = "\r\n" ;
+#endif
+
gint eob_offset;
gint eol_offset;
int linelen;
/*
* Look either for a CR or an LF.
*/
- eol_offset = tvb_pbrk_guint8(tvb, offset, len, "\r\n", &found_needle);
+ eol_offset = tvb_pbrk_guint8(tvb, offset, len, crlf, &found_needle);
if (eol_offset == -1) {
/*
* No CR or LF - line is presumably continued in next packet.
--- /dev/null
+/* ws_mempbrk.c
+ *
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 1998 Gerald Combs
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "config.h"
+
+#include <glib.h>
+#include "ws_symbol_export.h"
+#include "ws_cpuid.h"
+
+#ifdef HAVE_SSE42
+extern const char *_ws_mempbrk_sse42(const char* haystack, size_t haystacklen, const char *needles);
+#endif
+
+const guint8 *_ws_mempbrk(const guint8* haystack, size_t haystacklen, const guint8 *needles);
+
+const guint8 *
+_ws_mempbrk(const guint8* haystack, size_t haystacklen, const guint8 *needles)
+{
+ gchar tmp[256] = { 0 };
+ const guint8 *haystack_end;
+
+ while (*needles)
+ tmp[*needles++] = 1;
+
+ haystack_end = haystack + haystacklen;
+ while (haystack < haystack_end) {
+ if (tmp[*haystack])
+ return haystack;
+ haystack++;
+ }
+
+ return NULL;
+}
+
+WS_DLL_PUBLIC const guint8 *
+ws_mempbrk(const guint8* haystack, size_t haystacklen, const guint8 *needles)
+{
+#ifdef HAVE_SSE42
+ guint32 CPUInfo[4];
+ guint32 bSSE42Extensions;
+ /*const int has_sse42 = 1;*/ /* XXX, use CPUID */
+#endif
+ if (*needles == 0)
+ return NULL;
+
+#ifdef HAVE_SSE42
+ ws_cpuid(CPUInfo, 1);
+
+ bSSE42Extensions = (CPUInfo[2] & 0x100000);
+
+ if (haystacklen >= 16 && bSSE42Extensions)
+ return _ws_mempbrk_sse42(haystack, haystacklen, needles);
+#endif
+
+ return _ws_mempbrk(haystack, haystacklen, needles);
+}
--- /dev/null
+/* strcspn with SSE4.2 intrinsics
+ Copyright (C) 2009-2014 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+
+#include <glib.h>
+
+#ifdef WIN32
+ #include <tmmintrin.h>
+ #include <stdint.h>
+#endif
+
+#include <nmmintrin.h>
+#include <string.h>
+
+extern const guint8 *_ws_mempbrk(const guint8* haystack, size_t haystacklen, const guint8 *needles);
+const char *_ws_mempbrk_sse42(const char* haystack, size_t haystacklen, const char *needles);
+
+/* Helper for variable shifts of SSE registers.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+ */
+
+static const int8_t ___m128i_shift_right[31] =
+ {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+ };
+
+static inline __m128i
+__m128i_shift_right (__m128i value, unsigned long int offset)
+{
+ /* _mm_loadu_si128() works with unaligned data, cast safe */
+ return _mm_shuffle_epi8 (value,
+ _mm_loadu_si128 ((__m128i *) (void *) (___m128i_shift_right + offset)));
+}
+
+/* We use 0x2:
+ _SIDD_SBYTE_OPS
+ | _SIDD_CMP_EQUAL_ANY
+ | _SIDD_POSITIVE_POLARITY
+ | _SIDD_LEAST_SIGNIFICANT
+ on pcmpistri to compare xmm/mem128
+
+ 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ X X X X X X X X X X X X X X X X
+
+ against xmm
+
+ 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ A A A A A A A A A A A A A A A A
+
+ to find out if the first 16byte data element has any byte A and
+ the offset of the first byte. There are 3 cases:
+
+ 1. The first 16byte data element has the byte A at the offset X.
+ 2. The first 16byte data element has EOS and doesn't have the byte A.
+ 3. The first 16byte data element is valid and doesn't have the byte A.
+
+ Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
+
+ 1 X 1 0/1 0
+ 2 16 0 1 0
+ 3 16 0 0 0
+
+ We exit from the loop for cases 1 and 2 with jbe which branches
+ when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset
+ X for case 1. */
+
+const char *
+_ws_mempbrk_sse42(const char *s, size_t slen, const char *a)
+{
+ const char *aligned;
+ __m128i mask;
+ int offset;
+
+ offset = (int) ((size_t) a & 15);
+ aligned = (const char *) ((size_t) a & -16L);
+ if (offset != 0)
+ {
+ int length;
+
+ /* Load masks. */
+ /* cast safe - _mm_load_si128() it's 16B aligned */
+ mask = __m128i_shift_right(_mm_load_si128 ((__m128i *) (void *) aligned), offset);
+
+ /* Find where the NULL terminator is. */
+ length = _mm_cmpistri (mask, mask, 0x3a);
+ if (length == 16 - offset)
+ {
+ /* There is no NULL terminator. */
+ __m128i mask1 = _mm_load_si128 ((__m128i *) (void *) (aligned + 16));
+ int index = _mm_cmpistri (mask1, mask1, 0x3a);
+ length += index;
+
+ /* Don't use SSE4.2 if the length of A > 16. */
+ if (length > 16)
+ return _ws_mempbrk(s, slen, a);
+
+ if (index != 0)
+ {
+ /* Combine mask0 and mask1. We could play games with
+ palignr, but frankly this data should be in L1 now
+ so do the merge via an unaligned load. */
+ mask = _mm_loadu_si128 ((__m128i *) (void *) a);
+ }
+ }
+ }
+ else
+ {
+ int length;
+
+ /* A is aligned. (cast safe) */
+ mask = _mm_load_si128 ((__m128i *) (void *) a);
+
+ /* Find where the NULL terminator is. */
+ length = _mm_cmpistri (mask, mask, 0x3a);
+ if (length == 16)
+ {
+ /* There is no NULL terminator. Don't use SSE4.2 if the length
+ of A > 16. */
+ if (a[16] != 0)
+ return _ws_mempbrk(s, slen, a);
+ }
+ }
+
+ offset = (int) ((size_t) s & 15);
+ aligned = (const char *) ((size_t) s & -16L);
+ if (offset != 0)
+ {
+ /* Check partial string. cast safe it's 16B aligned */
+ __m128i value = __m128i_shift_right (_mm_load_si128 ((__m128i *) (void *) aligned), offset);
+
+ int length = _mm_cmpistri (mask, value, 0x2);
+ /* No need to check ZFlag since ZFlag is always 1. */
+ int cflag = _mm_cmpistrc (mask, value, 0x2);
+ int index = _mm_cmpistri (value, value, 0x3a);
+
+ if (cflag)
+ return s + length;
+ /* Find where the NULL terminator is. */
+ if (index < 16 - offset)
+ {
+ /* fond NUL @ 'index', need to switch to slower mempbrk */
+ return _ws_mempbrk(s + index + 1, slen - index - 1, a); /* slen is bigger than 16 & index < 16 so no undeflow here */
+ }
+ aligned += 16;
+ slen -= (16 - offset);
+ }
+ else
+ aligned = s;
+
+ while (slen >= 16)
+ {
+ __m128i value = _mm_load_si128 ((__m128i *) (void *) aligned);
+ int index = _mm_cmpistri (mask, value, 0x2);
+ int cflag = _mm_cmpistrc (mask, value, 0x2);
+ int zflag = _mm_cmpistrz (mask, value, 0x2);
+
+ if (cflag)
+ return aligned + index;
+ if (zflag)
+ {
+ /* found NUL, need to switch to slower mempbrk */
+ return _ws_mempbrk(aligned, slen, a);
+ }
+ aligned += 16;
+ slen -= 16;
+ }
+
+ /* XXX, use mempbrk_slow here? */
+ return _ws_mempbrk(aligned, slen, a);
+}