nexmon – Blame information for rev 1

Subversion Repositories:
Rev:
Rev Author Line No. Line
1 office 1 /* strcspn with SSE4.2 intrinsics
2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5  
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10  
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15  
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19  
20  
21 #include "config.h"
22  
23 #ifdef HAVE_SSE4_2
24  
25 #include <glib.h>
26 #include "ws_cpuid.h"
27  
28 #ifdef _WIN32
29 #include <tmmintrin.h>
30 #endif
31  
32 #include <nmmintrin.h>
33 #include <string.h>
34 #include "ws_mempbrk.h"
35 #include "ws_mempbrk_int.h"
36  
37 /* __has_feature(address_sanitizer) is used later for Clang, this is for
38 * compatibility with other compilers (such as GCC and MSVC) */
39 #ifndef __has_feature
40 # define __has_feature(x) 0
41 #endif
42  
43 #define cast_128aligned__m128i(p) ((const __m128i *) (const void *) (p))
44  
45 /* Helper for variable shifts of SSE registers.
46 Copyright (C) 2010 Free Software Foundation, Inc.
47 */
48  
49 static const gint8 ___m128i_shift_right[31] =
50 {
51 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
52 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
53 };
54  
55 static inline __m128i
56 __m128i_shift_right (__m128i value, unsigned long int offset)
57 {
58 /* _mm_loadu_si128() works with unaligned data, cast safe */
59 return _mm_shuffle_epi8 (value,
60 _mm_loadu_si128 (cast_128aligned__m128i(___m128i_shift_right + offset)));
61 }
62  
63  
64 void
65 ws_mempbrk_sse42_compile(ws_mempbrk_pattern* pattern, const gchar *needles)
66 {
67 size_t length = strlen(needles);
68  
69 pattern->use_sse42 = ws_cpuid_sse42() && (length <= 16);
70  
71 if (pattern->use_sse42) {
72 pattern->mask = _mm_setzero_si128();
73 memcpy(&(pattern->mask), needles, length);
74 }
75 }
76  
77 /* We use 0x2:
78 _SIDD_SBYTE_OPS
79 | _SIDD_CMP_EQUAL_ANY
80 | _SIDD_POSITIVE_POLARITY
81 | _SIDD_LEAST_SIGNIFICANT
82 on pcmpistri to compare xmm/mem128
83  
84  
85 X X X X X X X X X X X X X X X X
86  
87 against xmm
88  
89  
90 A A A A A A A A A A A A A A A A
91  
92 to find out if the first 16byte data element has any byte A and
93 the offset of the first byte. There are 3 cases:
94  
95 1. The first 16byte data element has the byte A at the offset X.
96 2. The first 16byte data element has EOS and doesn't have the byte A.
97 3. The first 16byte data element is valid and doesn't have the byte A.
98  
99 Here is the table of ECX, CFlag, ZFlag and SFlag for 2 cases:
100  
101 1 X 1 0/1 0
102 2 16 0 1 0
103 3 16 0 0 0
104  
105 We exit from the loop for cases 1 and 2 with jbe which branches
106 when either CFlag or ZFlag is 1. If CFlag == 1, ECX has the offset
107 X for case 1. */
108  
109 const char *
110 ws_mempbrk_sse42_exec(const char *s, size_t slen, const ws_mempbrk_pattern* pattern, guchar *found_needle)
111 {
112 const char *aligned;
113 int offset;
114  
115 offset = (int) ((size_t) s & 15);
116 aligned = (const char *) ((size_t) s & -16L);
117 if (offset != 0)
118 {
119 /* Check partial string. cast safe it's 16B aligned */
120 __m128i value = __m128i_shift_right (_mm_load_si128 (cast_128aligned__m128i(aligned)), offset);
121  
122 int length = _mm_cmpistri (pattern->mask, value, 0x2);
123 /* No need to check ZFlag since ZFlag is always 1. */
124 int cflag = _mm_cmpistrc (pattern->mask, value, 0x2);
125 /* XXX: why does this compare value with value? */
126 int idx = _mm_cmpistri (value, value, 0x3a);
127  
128 if (cflag) {
129 if (found_needle)
130 *found_needle = *(s + length);
131 return s + length;
132 }
133  
134 /* Find where the NULL terminator is. */
135 if (idx < 16 - offset)
136 {
137 /* found NUL @ 'idx', need to switch to slower mempbrk */
138 return ws_mempbrk_portable_exec(s + idx + 1, slen - idx - 1, pattern, found_needle); /* slen is bigger than 16 & idx < 16 so no undeflow here */
139 }
140 aligned += 16;
141 slen -= (16 - offset);
142 }
143 else
144 aligned = s;
145  
146 while (slen >= 16)
147 {
148 __m128i value = _mm_load_si128 (cast_128aligned__m128i(aligned));
149 int idx = _mm_cmpistri (pattern->mask, value, 0x2);
150 int cflag = _mm_cmpistrc (pattern->mask, value, 0x2);
151 int zflag = _mm_cmpistrz (pattern->mask, value, 0x2);
152  
153 if (cflag) {
154 if (found_needle)
155 *found_needle = *(aligned + idx);
156 return aligned + idx;
157 }
158  
159 if (zflag)
160 {
161 /* found NUL, need to switch to slower mempbrk */
162 return ws_mempbrk_portable_exec(aligned, slen, pattern, found_needle);
163 }
164 aligned += 16;
165 slen -= 16;
166 }
167  
168 /* XXX, use mempbrk_slow here? */
169 return ws_mempbrk_portable_exec(aligned, slen, pattern, found_needle);
170 }
171  
172 #endif /* HAVE_SSE4_2 */
173 /*
174 * Editor modelines
175 *
176 * Local Variables:
177 * c-basic-offset: 2
178 * tab-width: 8
179 * indent-tabs-mode: nil
180 * End:
181 *
182 * ex: set shiftwidth=2 tabstop=8 expandtab:
183 * :indentSize=2:tabSize=8:noTabs=true:
184 */