nexmon – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | /* GRegex -- regular expression API wrapper around PCRE. |
2 | * |
||
3 | * Copyright (C) 1999, 2000 Scott Wimer |
||
4 | * Copyright (C) 2004, Matthias Clasen <mclasen@redhat.com> |
||
5 | * Copyright (C) 2005 - 2007, Marco Barisione <marco@barisione.org> |
||
6 | * |
||
7 | * This library is free software; you can redistribute it and/or |
||
8 | * modify it under the terms of the GNU Lesser General Public |
||
9 | * License as published by the Free Software Foundation; either |
||
10 | * version 2.1 of the License, or (at your option) any later version. |
||
11 | * |
||
12 | * This library is distributed in the hope that it will be useful, |
||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
15 | * Lesser General Public License for more details. |
||
16 | * |
||
17 | * You should have received a copy of the GNU Lesser General Public |
||
18 | * License along with this library; if not, write to the Free Software |
||
19 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
||
20 | */ |
||
21 | |||
22 | #include "config.h" |
||
23 | |||
24 | #include <string.h> |
||
25 | |||
26 | #ifdef USE_SYSTEM_PCRE |
||
27 | #include <pcre.h> |
||
28 | #else |
||
29 | #include "pcre/pcre.h" |
||
30 | #endif |
||
31 | |||
32 | #include "gtypes.h" |
||
33 | #include "gregex.h" |
||
34 | #include "glibintl.h" |
||
35 | #include "glist.h" |
||
36 | #include "gmessages.h" |
||
37 | #include "gstrfuncs.h" |
||
38 | #include "gatomic.h" |
||
39 | #include "gthread.h" |
||
40 | |||
41 | /** |
||
42 | * SECTION:gregex |
||
43 | * @title: Perl-compatible regular expressions |
||
44 | * @short_description: matches strings against regular expressions |
||
45 | * @see_also: [Regular expression syntax][glib-regex-syntax] |
||
46 | * |
||
47 | * The g_regex_*() functions implement regular |
||
48 | * expression pattern matching using syntax and semantics similar to |
||
49 | * Perl regular expression. |
||
50 | * |
||
51 | * Some functions accept a @start_position argument, setting it differs |
||
52 | * from just passing over a shortened string and setting #G_REGEX_MATCH_NOTBOL |
||
53 | * in the case of a pattern that begins with any kind of lookbehind assertion. |
||
54 | * For example, consider the pattern "\Biss\B" which finds occurrences of "iss" |
||
55 | * in the middle of words. ("\B" matches only if the current position in the |
||
56 | * subject is not a word boundary.) When applied to the string "Mississipi" |
||
57 | * from the fourth byte, namely "issipi", it does not match, because "\B" is |
||
58 | * always false at the start of the subject, which is deemed to be a word |
||
59 | * boundary. However, if the entire string is passed , but with |
||
60 | * @start_position set to 4, it finds the second occurrence of "iss" because |
||
61 | * it is able to look behind the starting point to discover that it is |
||
62 | * preceded by a letter. |
||
63 | * |
||
64 | * Note that, unless you set the #G_REGEX_RAW flag, all the strings passed |
||
65 | * to these functions must be encoded in UTF-8. The lengths and the positions |
||
66 | * inside the strings are in bytes and not in characters, so, for instance, |
||
67 | * "\xc3\xa0" (i.e. "à") is two bytes long but it is treated as a |
||
68 | * single character. If you set #G_REGEX_RAW the strings can be non-valid |
||
69 | * UTF-8 strings and a byte is treated as a character, so "\xc3\xa0" is two |
||
70 | * bytes and two characters long. |
||
71 | * |
||
72 | * When matching a pattern, "\n" matches only against a "\n" character in |
||
73 | * the string, and "\r" matches only a "\r" character. To match any newline |
||
74 | * sequence use "\R". This particular group matches either the two-character |
||
75 | * sequence CR + LF ("\r\n"), or one of the single characters LF (linefeed, |
||
76 | * U+000A, "\n"), VT vertical tab, U+000B, "\v"), FF (formfeed, U+000C, "\f"), |
||
77 | * CR (carriage return, U+000D, "\r"), NEL (next line, U+0085), LS (line |
||
78 | * separator, U+2028), or PS (paragraph separator, U+2029). |
||
79 | * |
||
80 | * The behaviour of the dot, circumflex, and dollar metacharacters are |
||
81 | * affected by newline characters, the default is to recognize any newline |
||
82 | * character (the same characters recognized by "\R"). This can be changed |
||
83 | * with #G_REGEX_NEWLINE_CR, #G_REGEX_NEWLINE_LF and #G_REGEX_NEWLINE_CRLF |
||
84 | * compile options, and with #G_REGEX_MATCH_NEWLINE_ANY, |
||
85 | * #G_REGEX_MATCH_NEWLINE_CR, #G_REGEX_MATCH_NEWLINE_LF and |
||
86 | * #G_REGEX_MATCH_NEWLINE_CRLF match options. These settings are also |
||
87 | * relevant when compiling a pattern if #G_REGEX_EXTENDED is set, and an |
||
88 | * unescaped "#" outside a character class is encountered. This indicates |
||
89 | * a comment that lasts until after the next newline. |
||
90 | * |
||
91 | * When setting the %G_REGEX_JAVASCRIPT_COMPAT flag, pattern syntax and pattern |
||
92 | * matching is changed to be compatible with the way that regular expressions |
||
93 | * work in JavaScript. More precisely, a lonely ']' character in the pattern |
||
94 | * is a syntax error; the '\x' escape only allows 0 to 2 hexadecimal digits, and |
||
95 | * you must use the '\u' escape sequence with 4 hex digits to specify a unicode |
||
96 | * codepoint instead of '\x' or 'x{....}'. If '\x' or '\u' are not followed by |
||
97 | * the specified number of hex digits, they match 'x' and 'u' literally; also |
||
98 | * '\U' always matches 'U' instead of being an error in the pattern. Finally, |
||
99 | * pattern matching is modified so that back references to an unset subpattern |
||
100 | * group produces a match with the empty string instead of an error. See |
||
101 | * pcreapi(3) for more information. |
||
102 | * |
||
103 | * Creating and manipulating the same #GRegex structure from different |
||
104 | * threads is not a problem as #GRegex does not modify its internal |
||
105 | * state between creation and destruction, on the other hand #GMatchInfo |
||
106 | * is not threadsafe. |
||
107 | * |
||
108 | * The regular expressions low-level functionalities are obtained through |
||
109 | * the excellent |
||
110 | * [PCRE](http://www.pcre.org/) |
||
111 | * library written by Philip Hazel. |
||
112 | */ |
||
113 | |||
114 | /* Mask of all the possible values for GRegexCompileFlags. */ |
||
115 | #define G_REGEX_COMPILE_MASK (G_REGEX_CASELESS | \ |
||
116 | G_REGEX_MULTILINE | \ |
||
117 | G_REGEX_DOTALL | \ |
||
118 | G_REGEX_EXTENDED | \ |
||
119 | G_REGEX_ANCHORED | \ |
||
120 | G_REGEX_DOLLAR_ENDONLY | \ |
||
121 | G_REGEX_UNGREEDY | \ |
||
122 | G_REGEX_RAW | \ |
||
123 | G_REGEX_NO_AUTO_CAPTURE | \ |
||
124 | G_REGEX_OPTIMIZE | \ |
||
125 | G_REGEX_FIRSTLINE | \ |
||
126 | G_REGEX_DUPNAMES | \ |
||
127 | G_REGEX_NEWLINE_CR | \ |
||
128 | G_REGEX_NEWLINE_LF | \ |
||
129 | G_REGEX_NEWLINE_CRLF | \ |
||
130 | G_REGEX_NEWLINE_ANYCRLF | \ |
||
131 | G_REGEX_BSR_ANYCRLF | \ |
||
132 | G_REGEX_JAVASCRIPT_COMPAT) |
||
133 | |||
134 | /* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */ |
||
135 | #define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK) |
||
136 | #define G_REGEX_COMPILE_NONPCRE_MASK (G_REGEX_RAW | \ |
||
137 | G_REGEX_OPTIMIZE) |
||
138 | |||
139 | /* Mask of all the possible values for GRegexMatchFlags. */ |
||
140 | #define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED | \ |
||
141 | G_REGEX_MATCH_NOTBOL | \ |
||
142 | G_REGEX_MATCH_NOTEOL | \ |
||
143 | G_REGEX_MATCH_NOTEMPTY | \ |
||
144 | G_REGEX_MATCH_PARTIAL | \ |
||
145 | G_REGEX_MATCH_NEWLINE_CR | \ |
||
146 | G_REGEX_MATCH_NEWLINE_LF | \ |
||
147 | G_REGEX_MATCH_NEWLINE_CRLF | \ |
||
148 | G_REGEX_MATCH_NEWLINE_ANY | \ |
||
149 | G_REGEX_MATCH_NEWLINE_ANYCRLF | \ |
||
150 | G_REGEX_MATCH_BSR_ANYCRLF | \ |
||
151 | G_REGEX_MATCH_BSR_ANY | \ |
||
152 | G_REGEX_MATCH_PARTIAL_SOFT | \ |
||
153 | G_REGEX_MATCH_PARTIAL_HARD | \ |
||
154 | G_REGEX_MATCH_NOTEMPTY_ATSTART) |
||
155 | |||
156 | /* we rely on these flags having the same values */ |
||
157 | G_STATIC_ASSERT (G_REGEX_CASELESS == PCRE_CASELESS); |
||
158 | G_STATIC_ASSERT (G_REGEX_MULTILINE == PCRE_MULTILINE); |
||
159 | G_STATIC_ASSERT (G_REGEX_DOTALL == PCRE_DOTALL); |
||
160 | G_STATIC_ASSERT (G_REGEX_EXTENDED == PCRE_EXTENDED); |
||
161 | G_STATIC_ASSERT (G_REGEX_ANCHORED == PCRE_ANCHORED); |
||
162 | G_STATIC_ASSERT (G_REGEX_DOLLAR_ENDONLY == PCRE_DOLLAR_ENDONLY); |
||
163 | G_STATIC_ASSERT (G_REGEX_UNGREEDY == PCRE_UNGREEDY); |
||
164 | G_STATIC_ASSERT (G_REGEX_NO_AUTO_CAPTURE == PCRE_NO_AUTO_CAPTURE); |
||
165 | G_STATIC_ASSERT (G_REGEX_FIRSTLINE == PCRE_FIRSTLINE); |
||
166 | G_STATIC_ASSERT (G_REGEX_DUPNAMES == PCRE_DUPNAMES); |
||
167 | G_STATIC_ASSERT (G_REGEX_NEWLINE_CR == PCRE_NEWLINE_CR); |
||
168 | G_STATIC_ASSERT (G_REGEX_NEWLINE_LF == PCRE_NEWLINE_LF); |
||
169 | G_STATIC_ASSERT (G_REGEX_NEWLINE_CRLF == PCRE_NEWLINE_CRLF); |
||
170 | G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF); |
||
171 | G_STATIC_ASSERT (G_REGEX_BSR_ANYCRLF == PCRE_BSR_ANYCRLF); |
||
172 | G_STATIC_ASSERT (G_REGEX_JAVASCRIPT_COMPAT == PCRE_JAVASCRIPT_COMPAT); |
||
173 | |||
174 | G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED == PCRE_ANCHORED); |
||
175 | G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL == PCRE_NOTBOL); |
||
176 | G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL == PCRE_NOTEOL); |
||
177 | G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY == PCRE_NOTEMPTY); |
||
178 | G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL == PCRE_PARTIAL); |
||
179 | G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR == PCRE_NEWLINE_CR); |
||
180 | G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF == PCRE_NEWLINE_LF); |
||
181 | G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF == PCRE_NEWLINE_CRLF); |
||
182 | G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY == PCRE_NEWLINE_ANY); |
||
183 | G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF); |
||
184 | G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF); |
||
185 | G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE); |
||
186 | G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT == PCRE_PARTIAL_SOFT); |
||
187 | G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD == PCRE_PARTIAL_HARD); |
||
188 | G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART == PCRE_NOTEMPTY_ATSTART); |
||
189 | |||
190 | /* These PCRE flags are unused or not exposed publically in GRegexFlags, so |
||
191 | * it should be ok to reuse them for different things. |
||
192 | */ |
||
193 | G_STATIC_ASSERT (G_REGEX_OPTIMIZE == PCRE_NO_UTF8_CHECK); |
||
194 | G_STATIC_ASSERT (G_REGEX_RAW == PCRE_UTF8); |
||
195 | |||
196 | /* if the string is in UTF-8 use g_utf8_ functions, else use |
||
197 | * use just +/- 1. */ |
||
198 | #define NEXT_CHAR(re, s) (((re)->compile_opts & G_REGEX_RAW) ? \ |
||
199 | ((s) + 1) : \ |
||
200 | g_utf8_next_char (s)) |
||
201 | #define PREV_CHAR(re, s) (((re)->compile_opts & G_REGEX_RAW) ? \ |
||
202 | ((s) - 1) : \ |
||
203 | g_utf8_prev_char (s)) |
||
204 | |||
205 | struct _GMatchInfo |
||
206 | { |
||
207 | volatile gint ref_count; /* the ref count */ |
||
208 | GRegex *regex; /* the regex */ |
||
209 | GRegexMatchFlags match_opts; /* options used at match time on the regex */ |
||
210 | gint matches; /* number of matching sub patterns */ |
||
211 | gint pos; /* position in the string where last match left off */ |
||
212 | gint n_offsets; /* number of offsets */ |
||
213 | gint *offsets; /* array of offsets paired 0,1 ; 2,3 ; 3,4 etc */ |
||
214 | gint *workspace; /* workspace for pcre_dfa_exec() */ |
||
215 | gint n_workspace; /* number of workspace elements */ |
||
216 | const gchar *string; /* string passed to the match function */ |
||
217 | gssize string_len; /* length of string */ |
||
218 | }; |
||
219 | |||
220 | struct _GRegex |
||
221 | { |
||
222 | volatile gint ref_count; /* the ref count for the immutable part */ |
||
223 | gchar *pattern; /* the pattern */ |
||
224 | pcre *pcre_re; /* compiled form of the pattern */ |
||
225 | GRegexCompileFlags compile_opts; /* options used at compile time on the pattern */ |
||
226 | GRegexMatchFlags match_opts; /* options used at match time on the regex */ |
||
227 | pcre_extra *extra; /* data stored when G_REGEX_OPTIMIZE is used */ |
||
228 | }; |
||
229 | |||
230 | /* TRUE if ret is an error code, FALSE otherwise. */ |
||
231 | #define IS_PCRE_ERROR(ret) ((ret) < PCRE_ERROR_NOMATCH && (ret) != PCRE_ERROR_PARTIAL) |
||
232 | |||
233 | typedef struct _InterpolationData InterpolationData; |
||
234 | static gboolean interpolation_list_needs_match (GList *list); |
||
235 | static gboolean interpolate_replacement (const GMatchInfo *match_info, |
||
236 | GString *result, |
||
237 | gpointer data); |
||
238 | static GList *split_replacement (const gchar *replacement, |
||
239 | GError **error); |
||
240 | static void free_interpolation_data (InterpolationData *data); |
||
241 | |||
242 | |||
243 | static const gchar * |
||
244 | match_error (gint errcode) |
||
245 | { |
||
246 | switch (errcode) |
||
247 | { |
||
248 | case PCRE_ERROR_NOMATCH: |
||
249 | /* not an error */ |
||
250 | break; |
||
251 | case PCRE_ERROR_NULL: |
||
252 | /* NULL argument, this should not happen in GRegex */ |
||
253 | g_warning ("A NULL argument was passed to PCRE"); |
||
254 | break; |
||
255 | case PCRE_ERROR_BADOPTION: |
||
256 | return "bad options"; |
||
257 | case PCRE_ERROR_BADMAGIC: |
||
258 | return _("corrupted object"); |
||
259 | case PCRE_ERROR_UNKNOWN_OPCODE: |
||
260 | return N_("internal error or corrupted object"); |
||
261 | case PCRE_ERROR_NOMEMORY: |
||
262 | return _("out of memory"); |
||
263 | case PCRE_ERROR_NOSUBSTRING: |
||
264 | /* not used by pcre_exec() */ |
||
265 | break; |
||
266 | case PCRE_ERROR_MATCHLIMIT: |
||
267 | return _("backtracking limit reached"); |
||
268 | case PCRE_ERROR_CALLOUT: |
||
269 | /* callouts are not implemented */ |
||
270 | break; |
||
271 | case PCRE_ERROR_BADUTF8: |
||
272 | case PCRE_ERROR_BADUTF8_OFFSET: |
||
273 | /* we do not check if strings are valid */ |
||
274 | break; |
||
275 | case PCRE_ERROR_PARTIAL: |
||
276 | /* not an error */ |
||
277 | break; |
||
278 | case PCRE_ERROR_BADPARTIAL: |
||
279 | return _("the pattern contains items not supported for partial matching"); |
||
280 | case PCRE_ERROR_INTERNAL: |
||
281 | return _("internal error"); |
||
282 | case PCRE_ERROR_BADCOUNT: |
||
283 | /* negative ovecsize, this should not happen in GRegex */ |
||
284 | g_warning ("A negative ovecsize was passed to PCRE"); |
||
285 | break; |
||
286 | case PCRE_ERROR_DFA_UITEM: |
||
287 | return _("the pattern contains items not supported for partial matching"); |
||
288 | case PCRE_ERROR_DFA_UCOND: |
||
289 | return _("back references as conditions are not supported for partial matching"); |
||
290 | case PCRE_ERROR_DFA_UMLIMIT: |
||
291 | /* the match_field field is not used in GRegex */ |
||
292 | break; |
||
293 | case PCRE_ERROR_DFA_WSSIZE: |
||
294 | /* handled expanding the workspace */ |
||
295 | break; |
||
296 | case PCRE_ERROR_DFA_RECURSE: |
||
297 | case PCRE_ERROR_RECURSIONLIMIT: |
||
298 | return _("recursion limit reached"); |
||
299 | case PCRE_ERROR_BADNEWLINE: |
||
300 | return _("invalid combination of newline flags"); |
||
301 | case PCRE_ERROR_BADOFFSET: |
||
302 | return _("bad offset"); |
||
303 | case PCRE_ERROR_SHORTUTF8: |
||
304 | return _("short utf8"); |
||
305 | case PCRE_ERROR_RECURSELOOP: |
||
306 | return _("recursion loop"); |
||
307 | default: |
||
308 | break; |
||
309 | } |
||
310 | return _("unknown error"); |
||
311 | } |
||
312 | |||
313 | static void |
||
314 | translate_compile_error (gint *errcode, const gchar **errmsg) |
||
315 | { |
||
316 | /* Compile errors are created adding 100 to the error code returned |
||
317 | * by PCRE. |
||
318 | * If errcode is known we put the translatable error message in |
||
319 | * erromsg. If errcode is unknown we put the generic |
||
320 | * G_REGEX_ERROR_COMPILE error code in errcode and keep the |
||
321 | * untranslated error message returned by PCRE. |
||
322 | * Note that there can be more PCRE errors with the same GRegexError |
||
323 | * and that some PCRE errors are useless for us. |
||
324 | */ |
||
325 | *errcode += 100; |
||
326 | |||
327 | switch (*errcode) |
||
328 | { |
||
329 | case G_REGEX_ERROR_STRAY_BACKSLASH: |
||
330 | *errmsg = _("\\ at end of pattern"); |
||
331 | break; |
||
332 | case G_REGEX_ERROR_MISSING_CONTROL_CHAR: |
||
333 | *errmsg = _("\\c at end of pattern"); |
||
334 | break; |
||
335 | case G_REGEX_ERROR_UNRECOGNIZED_ESCAPE: |
||
336 | *errmsg = _("unrecognized character following \\"); |
||
337 | break; |
||
338 | case G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER: |
||
339 | *errmsg = _("numbers out of order in {} quantifier"); |
||
340 | break; |
||
341 | case G_REGEX_ERROR_QUANTIFIER_TOO_BIG: |
||
342 | *errmsg = _("number too big in {} quantifier"); |
||
343 | break; |
||
344 | case G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS: |
||
345 | *errmsg = _("missing terminating ] for character class"); |
||
346 | break; |
||
347 | case G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS: |
||
348 | *errmsg = _("invalid escape sequence in character class"); |
||
349 | break; |
||
350 | case G_REGEX_ERROR_RANGE_OUT_OF_ORDER: |
||
351 | *errmsg = _("range out of order in character class"); |
||
352 | break; |
||
353 | case G_REGEX_ERROR_NOTHING_TO_REPEAT: |
||
354 | *errmsg = _("nothing to repeat"); |
||
355 | break; |
||
356 | case 111: /* internal error: unexpected repeat */ |
||
357 | *errcode = G_REGEX_ERROR_INTERNAL; |
||
358 | *errmsg = _("unexpected repeat"); |
||
359 | break; |
||
360 | case G_REGEX_ERROR_UNRECOGNIZED_CHARACTER: |
||
361 | *errmsg = _("unrecognized character after (? or (?-"); |
||
362 | break; |
||
363 | case G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS: |
||
364 | *errmsg = _("POSIX named classes are supported only within a class"); |
||
365 | break; |
||
366 | case G_REGEX_ERROR_UNMATCHED_PARENTHESIS: |
||
367 | *errmsg = _("missing terminating )"); |
||
368 | break; |
||
369 | case G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE: |
||
370 | *errmsg = _("reference to non-existent subpattern"); |
||
371 | break; |
||
372 | case G_REGEX_ERROR_UNTERMINATED_COMMENT: |
||
373 | *errmsg = _("missing ) after comment"); |
||
374 | break; |
||
375 | case G_REGEX_ERROR_EXPRESSION_TOO_LARGE: |
||
376 | *errmsg = _("regular expression is too large"); |
||
377 | break; |
||
378 | case G_REGEX_ERROR_MEMORY_ERROR: |
||
379 | *errmsg = _("failed to get memory"); |
||
380 | break; |
||
381 | case 122: /* unmatched parentheses */ |
||
382 | *errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS; |
||
383 | *errmsg = _(") without opening ("); |
||
384 | break; |
||
385 | case 123: /* internal error: code overflow */ |
||
386 | *errcode = G_REGEX_ERROR_INTERNAL; |
||
387 | *errmsg = _("code overflow"); |
||
388 | break; |
||
389 | case 124: /* "unrecognized character after (?<\0 */ |
||
390 | *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER; |
||
391 | *errmsg = _("unrecognized character after (?<"); |
||
392 | break; |
||
393 | case G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND: |
||
394 | *errmsg = _("lookbehind assertion is not fixed length"); |
||
395 | break; |
||
396 | case G_REGEX_ERROR_MALFORMED_CONDITION: |
||
397 | *errmsg = _("malformed number or name after (?("); |
||
398 | break; |
||
399 | case G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES: |
||
400 | *errmsg = _("conditional group contains more than two branches"); |
||
401 | break; |
||
402 | case G_REGEX_ERROR_ASSERTION_EXPECTED: |
||
403 | *errmsg = _("assertion expected after (?("); |
||
404 | break; |
||
405 | case 129: |
||
406 | *errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS; |
||
407 | /* translators: '(?R' and '(?[+-]digits' are both meant as (groups of) |
||
408 | * sequences here, '(?-54' would be an example for the second group. |
||
409 | */ |
||
410 | *errmsg = _("(?R or (?[+-]digits must be followed by )"); |
||
411 | break; |
||
412 | case G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME: |
||
413 | *errmsg = _("unknown POSIX class name"); |
||
414 | break; |
||
415 | case G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED: |
||
416 | *errmsg = _("POSIX collating elements are not supported"); |
||
417 | break; |
||
418 | case G_REGEX_ERROR_HEX_CODE_TOO_LARGE: |
||
419 | *errmsg = _("character value in \\x{...} sequence is too large"); |
||
420 | break; |
||
421 | case G_REGEX_ERROR_INVALID_CONDITION: |
||
422 | *errmsg = _("invalid condition (?(0)"); |
||
423 | break; |
||
424 | case G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND: |
||
425 | *errmsg = _("\\C not allowed in lookbehind assertion"); |
||
426 | break; |
||
427 | case 137: /* PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0 */ |
||
428 | /* A number of Perl escapes are not handled by PCRE. |
||
429 | * Therefore it explicitly raises ERR37. |
||
430 | */ |
||
431 | *errcode = G_REGEX_ERROR_UNRECOGNIZED_ESCAPE; |
||
432 | *errmsg = _("escapes \\L, \\l, \\N{name}, \\U, and \\u are not supported"); |
||
433 | break; |
||
434 | case G_REGEX_ERROR_INFINITE_LOOP: |
||
435 | *errmsg = _("recursive call could loop indefinitely"); |
||
436 | break; |
||
437 | case 141: /* unrecognized character after (?P\0 */ |
||
438 | *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER; |
||
439 | *errmsg = _("unrecognized character after (?P"); |
||
440 | break; |
||
441 | case G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR: |
||
442 | *errmsg = _("missing terminator in subpattern name"); |
||
443 | break; |
||
444 | case G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME: |
||
445 | *errmsg = _("two named subpatterns have the same name"); |
||
446 | break; |
||
447 | case G_REGEX_ERROR_MALFORMED_PROPERTY: |
||
448 | *errmsg = _("malformed \\P or \\p sequence"); |
||
449 | break; |
||
450 | case G_REGEX_ERROR_UNKNOWN_PROPERTY: |
||
451 | *errmsg = _("unknown property name after \\P or \\p"); |
||
452 | break; |
||
453 | case G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG: |
||
454 | *errmsg = _("subpattern name is too long (maximum 32 characters)"); |
||
455 | break; |
||
456 | case G_REGEX_ERROR_TOO_MANY_SUBPATTERNS: |
||
457 | *errmsg = _("too many named subpatterns (maximum 10,000)"); |
||
458 | break; |
||
459 | case G_REGEX_ERROR_INVALID_OCTAL_VALUE: |
||
460 | *errmsg = _("octal value is greater than \\377"); |
||
461 | break; |
||
462 | case 152: /* internal error: overran compiling workspace */ |
||
463 | *errcode = G_REGEX_ERROR_INTERNAL; |
||
464 | *errmsg = _("overran compiling workspace"); |
||
465 | break; |
||
466 | case 153: /* internal error: previously-checked referenced subpattern not found */ |
||
467 | *errcode = G_REGEX_ERROR_INTERNAL; |
||
468 | *errmsg = _("previously-checked referenced subpattern not found"); |
||
469 | break; |
||
470 | case G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE: |
||
471 | *errmsg = _("DEFINE group contains more than one branch"); |
||
472 | break; |
||
473 | case G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS: |
||
474 | *errmsg = _("inconsistent NEWLINE options"); |
||
475 | break; |
||
476 | case G_REGEX_ERROR_MISSING_BACK_REFERENCE: |
||
477 | *errmsg = _("\\g is not followed by a braced, angle-bracketed, or quoted name or " |
||
478 | "number, or by a plain number"); |
||
479 | break; |
||
480 | case G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE: |
||
481 | *errmsg = _("a numbered reference must not be zero"); |
||
482 | break; |
||
483 | case G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN: |
||
484 | *errmsg = _("an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)"); |
||
485 | break; |
||
486 | case G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB: |
||
487 | *errmsg = _("(*VERB) not recognized"); |
||
488 | break; |
||
489 | case G_REGEX_ERROR_NUMBER_TOO_BIG: |
||
490 | *errmsg = _("number is too big"); |
||
491 | break; |
||
492 | case G_REGEX_ERROR_MISSING_SUBPATTERN_NAME: |
||
493 | *errmsg = _("missing subpattern name after (?&"); |
||
494 | break; |
||
495 | case G_REGEX_ERROR_MISSING_DIGIT: |
||
496 | *errmsg = _("digit expected after (?+"); |
||
497 | break; |
||
498 | case G_REGEX_ERROR_INVALID_DATA_CHARACTER: |
||
499 | *errmsg = _("] is an invalid data character in JavaScript compatibility mode"); |
||
500 | break; |
||
501 | case G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME: |
||
502 | *errmsg = _("different names for subpatterns of the same number are not allowed"); |
||
503 | break; |
||
504 | case G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED: |
||
505 | *errmsg = _("(*MARK) must have an argument"); |
||
506 | break; |
||
507 | case G_REGEX_ERROR_INVALID_CONTROL_CHAR: |
||
508 | *errmsg = _( "\\c must be followed by an ASCII character"); |
||
509 | break; |
||
510 | case G_REGEX_ERROR_MISSING_NAME: |
||
511 | *errmsg = _("\\k is not followed by a braced, angle-bracketed, or quoted name"); |
||
512 | break; |
||
513 | case G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS: |
||
514 | *errmsg = _("\\N is not supported in a class"); |
||
515 | break; |
||
516 | case G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES: |
||
517 | *errmsg = _("too many forward references"); |
||
518 | break; |
||
519 | case G_REGEX_ERROR_NAME_TOO_LONG: |
||
520 | *errmsg = _("name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)"); |
||
521 | break; |
||
522 | case G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE: |
||
523 | *errmsg = _("character value in \\u.... sequence is too large"); |
||
524 | break; |
||
525 | |||
526 | case 116: /* erroffset passed as NULL */ |
||
527 | /* This should not happen as we never pass a NULL erroffset */ |
||
528 | g_warning ("erroffset passed as NULL"); |
||
529 | *errcode = G_REGEX_ERROR_COMPILE; |
||
530 | break; |
||
531 | case 117: /* unknown option bit(s) set */ |
||
532 | /* This should not happen as we check options before passing them |
||
533 | * to pcre_compile2() */ |
||
534 | g_warning ("unknown option bit(s) set"); |
||
535 | *errcode = G_REGEX_ERROR_COMPILE; |
||
536 | break; |
||
537 | case 132: /* this version of PCRE is compiled without UTF support */ |
||
538 | case 144: /* invalid UTF-8 string */ |
||
539 | case 145: /* support for \\P, \\p, and \\X has not been compiled */ |
||
540 | case 167: /* this version of PCRE is not compiled with Unicode property support */ |
||
541 | case 173: /* disallowed Unicode code point (>= 0xd800 && <= 0xdfff) */ |
||
542 | case 174: /* invalid UTF-16 string */ |
||
543 | /* These errors should not happen as we are using an UTF-8 and UCP-enabled PCRE |
||
544 | * and we do not check if strings are valid */ |
||
545 | case 170: /* internal error: unknown opcode in find_fixedlength() */ |
||
546 | *errcode = G_REGEX_ERROR_INTERNAL; |
||
547 | break; |
||
548 | |||
549 | default: |
||
550 | *errcode = G_REGEX_ERROR_COMPILE; |
||
551 | } |
||
552 | } |
||
553 | |||
554 | /* GMatchInfo */ |
||
555 | |||
556 | static GMatchInfo * |
||
557 | match_info_new (const GRegex *regex, |
||
558 | const gchar *string, |
||
559 | gint string_len, |
||
560 | gint start_position, |
||
561 | gint match_options, |
||
562 | gboolean is_dfa) |
||
563 | { |
||
564 | GMatchInfo *match_info; |
||
565 | |||
566 | if (string_len < 0) |
||
567 | string_len = strlen (string); |
||
568 | |||
569 | match_info = g_new0 (GMatchInfo, 1); |
||
570 | match_info->ref_count = 1; |
||
571 | match_info->regex = g_regex_ref ((GRegex *)regex); |
||
572 | match_info->string = string; |
||
573 | match_info->string_len = string_len; |
||
574 | match_info->matches = PCRE_ERROR_NOMATCH; |
||
575 | match_info->pos = start_position; |
||
576 | match_info->match_opts = match_options; |
||
577 | |||
578 | if (is_dfa) |
||
579 | { |
||
580 | /* These values should be enough for most cases, if they are not |
||
581 | * enough g_regex_match_all_full() will expand them. */ |
||
582 | match_info->n_offsets = 24; |
||
583 | match_info->n_workspace = 100; |
||
584 | match_info->workspace = g_new (gint, match_info->n_workspace); |
||
585 | } |
||
586 | else |
||
587 | { |
||
588 | gint capture_count; |
||
589 | pcre_fullinfo (regex->pcre_re, regex->extra, |
||
590 | PCRE_INFO_CAPTURECOUNT, &capture_count); |
||
591 | match_info->n_offsets = (capture_count + 1) * 3; |
||
592 | } |
||
593 | |||
594 | match_info->offsets = g_new0 (gint, match_info->n_offsets); |
||
595 | /* Set an invalid position for the previous match. */ |
||
596 | match_info->offsets[0] = -1; |
||
597 | match_info->offsets[1] = -1; |
||
598 | |||
599 | return match_info; |
||
600 | } |
||
601 | |||
602 | /** |
||
603 | * g_match_info_get_regex: |
||
604 | * @match_info: a #GMatchInfo |
||
605 | * |
||
606 | * Returns #GRegex object used in @match_info. It belongs to Glib |
||
607 | * and must not be freed. Use g_regex_ref() if you need to keep it |
||
608 | * after you free @match_info object. |
||
609 | * |
||
610 | * Returns: #GRegex object used in @match_info |
||
611 | * |
||
612 | * Since: 2.14 |
||
613 | */ |
||
614 | GRegex * |
||
615 | g_match_info_get_regex (const GMatchInfo *match_info) |
||
616 | { |
||
617 | g_return_val_if_fail (match_info != NULL, NULL); |
||
618 | return match_info->regex; |
||
619 | } |
||
620 | |||
621 | /** |
||
622 | * g_match_info_get_string: |
||
623 | * @match_info: a #GMatchInfo |
||
624 | * |
||
625 | * Returns the string searched with @match_info. This is the |
||
626 | * string passed to g_regex_match() or g_regex_replace() so |
||
627 | * you may not free it before calling this function. |
||
628 | * |
||
629 | * Returns: the string searched with @match_info |
||
630 | * |
||
631 | * Since: 2.14 |
||
632 | */ |
||
633 | const gchar * |
||
634 | g_match_info_get_string (const GMatchInfo *match_info) |
||
635 | { |
||
636 | g_return_val_if_fail (match_info != NULL, NULL); |
||
637 | return match_info->string; |
||
638 | } |
||
639 | |||
640 | /** |
||
641 | * g_match_info_ref: |
||
642 | * @match_info: a #GMatchInfo |
||
643 | * |
||
644 | * Increases reference count of @match_info by 1. |
||
645 | * |
||
646 | * Returns: @match_info |
||
647 | * |
||
648 | * Since: 2.30 |
||
649 | */ |
||
650 | GMatchInfo * |
||
651 | g_match_info_ref (GMatchInfo *match_info) |
||
652 | { |
||
653 | g_return_val_if_fail (match_info != NULL, NULL); |
||
654 | g_atomic_int_inc (&match_info->ref_count); |
||
655 | return match_info; |
||
656 | } |
||
657 | |||
658 | /** |
||
659 | * g_match_info_unref: |
||
660 | * @match_info: a #GMatchInfo |
||
661 | * |
||
662 | * Decreases reference count of @match_info by 1. When reference count drops |
||
663 | * to zero, it frees all the memory associated with the match_info structure. |
||
664 | * |
||
665 | * Since: 2.30 |
||
666 | */ |
||
667 | void |
||
668 | g_match_info_unref (GMatchInfo *match_info) |
||
669 | { |
||
670 | if (g_atomic_int_dec_and_test (&match_info->ref_count)) |
||
671 | { |
||
672 | g_regex_unref (match_info->regex); |
||
673 | g_free (match_info->offsets); |
||
674 | g_free (match_info->workspace); |
||
675 | g_free (match_info); |
||
676 | } |
||
677 | } |
||
678 | |||
679 | /** |
||
680 | * g_match_info_free: |
||
681 | * @match_info: (allow-none): a #GMatchInfo, or %NULL |
||
682 | * |
||
683 | * If @match_info is not %NULL, calls g_match_info_unref(); otherwise does |
||
684 | * nothing. |
||
685 | * |
||
686 | * Since: 2.14 |
||
687 | */ |
||
688 | void |
||
689 | g_match_info_free (GMatchInfo *match_info) |
||
690 | { |
||
691 | if (match_info == NULL) |
||
692 | return; |
||
693 | |||
694 | g_match_info_unref (match_info); |
||
695 | } |
||
696 | |||
697 | /** |
||
698 | * g_match_info_next: |
||
699 | * @match_info: a #GMatchInfo structure |
||
700 | * @error: location to store the error occurring, or %NULL to ignore errors |
||
701 | * |
||
702 | * Scans for the next match using the same parameters of the previous |
||
703 | * call to g_regex_match_full() or g_regex_match() that returned |
||
704 | * @match_info. |
||
705 | * |
||
706 | * The match is done on the string passed to the match function, so you |
||
707 | * cannot free it before calling this function. |
||
708 | * |
||
709 | * Returns: %TRUE is the string matched, %FALSE otherwise |
||
710 | * |
||
711 | * Since: 2.14 |
||
712 | */ |
||
713 | gboolean |
||
714 | g_match_info_next (GMatchInfo *match_info, |
||
715 | GError **error) |
||
716 | { |
||
717 | gint prev_match_start; |
||
718 | gint prev_match_end; |
||
719 | |||
720 | g_return_val_if_fail (match_info != NULL, FALSE); |
||
721 | g_return_val_if_fail (error == NULL || *error == NULL, FALSE); |
||
722 | g_return_val_if_fail (match_info->pos >= 0, FALSE); |
||
723 | |||
724 | prev_match_start = match_info->offsets[0]; |
||
725 | prev_match_end = match_info->offsets[1]; |
||
726 | |||
727 | if (match_info->pos > match_info->string_len) |
||
728 | { |
||
729 | /* we have reached the end of the string */ |
||
730 | match_info->pos = -1; |
||
731 | match_info->matches = PCRE_ERROR_NOMATCH; |
||
732 | return FALSE; |
||
733 | } |
||
734 | |||
735 | match_info->matches = pcre_exec (match_info->regex->pcre_re, |
||
736 | match_info->regex->extra, |
||
737 | match_info->string, |
||
738 | match_info->string_len, |
||
739 | match_info->pos, |
||
740 | match_info->regex->match_opts | match_info->match_opts, |
||
741 | match_info->offsets, |
||
742 | match_info->n_offsets); |
||
743 | if (IS_PCRE_ERROR (match_info->matches)) |
||
744 | { |
||
745 | g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH, |
||
746 | _("Error while matching regular expression %s: %s"), |
||
747 | match_info->regex->pattern, match_error (match_info->matches)); |
||
748 | return FALSE; |
||
749 | } |
||
750 | |||
751 | /* avoid infinite loops if the pattern is an empty string or something |
||
752 | * equivalent */ |
||
753 | if (match_info->pos == match_info->offsets[1]) |
||
754 | { |
||
755 | if (match_info->pos > match_info->string_len) |
||
756 | { |
||
757 | /* we have reached the end of the string */ |
||
758 | match_info->pos = -1; |
||
759 | match_info->matches = PCRE_ERROR_NOMATCH; |
||
760 | return FALSE; |
||
761 | } |
||
762 | |||
763 | match_info->pos = NEXT_CHAR (match_info->regex, |
||
764 | &match_info->string[match_info->pos]) - |
||
765 | match_info->string; |
||
766 | } |
||
767 | else |
||
768 | { |
||
769 | match_info->pos = match_info->offsets[1]; |
||
770 | } |
||
771 | |||
772 | /* it's possible to get two identical matches when we are matching |
||
773 | * empty strings, for instance if the pattern is "(?=[A-Z0-9])" and |
||
774 | * the string is "RegExTest" we have: |
||
775 | * - search at position 0: match from 0 to 0 |
||
776 | * - search at position 1: match from 3 to 3 |
||
777 | * - search at position 3: match from 3 to 3 (duplicate) |
||
778 | * - search at position 4: match from 5 to 5 |
||
779 | * - search at position 5: match from 5 to 5 (duplicate) |
||
780 | * - search at position 6: no match -> stop |
||
781 | * so we have to ignore the duplicates. |
||
782 | * see bug #515944: http://bugzilla.gnome.org/show_bug.cgi?id=515944 */ |
||
783 | if (match_info->matches >= 0 && |
||
784 | prev_match_start == match_info->offsets[0] && |
||
785 | prev_match_end == match_info->offsets[1]) |
||
786 | { |
||
787 | /* ignore this match and search the next one */ |
||
788 | return g_match_info_next (match_info, error); |
||
789 | } |
||
790 | |||
791 | return match_info->matches >= 0; |
||
792 | } |
||
793 | |||
794 | /** |
||
795 | * g_match_info_matches: |
||
796 | * @match_info: a #GMatchInfo structure |
||
797 | * |
||
798 | * Returns whether the previous match operation succeeded. |
||
799 | * |
||
800 | * Returns: %TRUE if the previous match operation succeeded, |
||
801 | * %FALSE otherwise |
||
802 | * |
||
803 | * Since: 2.14 |
||
804 | */ |
||
805 | gboolean |
||
806 | g_match_info_matches (const GMatchInfo *match_info) |
||
807 | { |
||
808 | g_return_val_if_fail (match_info != NULL, FALSE); |
||
809 | |||
810 | return match_info->matches >= 0; |
||
811 | } |
||
812 | |||
813 | /** |
||
814 | * g_match_info_get_match_count: |
||
815 | * @match_info: a #GMatchInfo structure |
||
816 | * |
||
817 | * Retrieves the number of matched substrings (including substring 0, |
||
818 | * that is the whole matched text), so 1 is returned if the pattern |
||
819 | * has no substrings in it and 0 is returned if the match failed. |
||
820 | * |
||
821 | * If the last match was obtained using the DFA algorithm, that is |
||
822 | * using g_regex_match_all() or g_regex_match_all_full(), the retrieved |
||
823 | * count is not that of the number of capturing parentheses but that of |
||
824 | * the number of matched substrings. |
||
825 | * |
||
826 | * Returns: Number of matched substrings, or -1 if an error occurred |
||
827 | * |
||
828 | * Since: 2.14 |
||
829 | */ |
||
830 | gint |
||
831 | g_match_info_get_match_count (const GMatchInfo *match_info) |
||
832 | { |
||
833 | g_return_val_if_fail (match_info, -1); |
||
834 | |||
835 | if (match_info->matches == PCRE_ERROR_NOMATCH) |
||
836 | /* no match */ |
||
837 | return 0; |
||
838 | else if (match_info->matches < PCRE_ERROR_NOMATCH) |
||
839 | /* error */ |
||
840 | return -1; |
||
841 | else |
||
842 | /* match */ |
||
843 | return match_info->matches; |
||
844 | } |
||
845 | |||
846 | /** |
||
847 | * g_match_info_is_partial_match: |
||
848 | * @match_info: a #GMatchInfo structure |
||
849 | * |
||
850 | * Usually if the string passed to g_regex_match*() matches as far as |
||
851 | * it goes, but is too short to match the entire pattern, %FALSE is |
||
852 | * returned. There are circumstances where it might be helpful to |
||
853 | * distinguish this case from other cases in which there is no match. |
||
854 | * |
||
855 | * Consider, for example, an application where a human is required to |
||
856 | * type in data for a field with specific formatting requirements. An |
||
857 | * example might be a date in the form ddmmmyy, defined by the pattern |
||
858 | * "^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$". |
||
859 | * If the application sees the user’s keystrokes one by one, and can |
||
860 | * check that what has been typed so far is potentially valid, it is |
||
861 | * able to raise an error as soon as a mistake is made. |
||
862 | * |
||
863 | * GRegex supports the concept of partial matching by means of the |
||
864 | * #G_REGEX_MATCH_PARTIAL_SOFT and #G_REGEX_MATCH_PARTIAL_HARD flags. |
||
865 | * When they are used, the return code for |
||
866 | * g_regex_match() or g_regex_match_full() is, as usual, %TRUE |
||
867 | * for a complete match, %FALSE otherwise. But, when these functions |
||
868 | * return %FALSE, you can check if the match was partial calling |
||
869 | * g_match_info_is_partial_match(). |
||
870 | * |
||
871 | * The difference between #G_REGEX_MATCH_PARTIAL_SOFT and |
||
872 | * #G_REGEX_MATCH_PARTIAL_HARD is that when a partial match is encountered |
||
873 | * with #G_REGEX_MATCH_PARTIAL_SOFT, matching continues to search for a |
||
874 | * possible complete match, while with #G_REGEX_MATCH_PARTIAL_HARD matching |
||
875 | * stops at the partial match. |
||
876 | * When both #G_REGEX_MATCH_PARTIAL_SOFT and #G_REGEX_MATCH_PARTIAL_HARD |
||
877 | * are set, the latter takes precedence. |
||
878 | * |
||
879 | * There were formerly some restrictions on the pattern for partial matching. |
||
880 | * The restrictions no longer apply. |
||
881 | * |
||
882 | * See pcrepartial(3) for more information on partial matching. |
||
883 | * |
||
884 | * Returns: %TRUE if the match was partial, %FALSE otherwise |
||
885 | * |
||
886 | * Since: 2.14 |
||
887 | */ |
||
888 | gboolean |
||
889 | g_match_info_is_partial_match (const GMatchInfo *match_info) |
||
890 | { |
||
891 | g_return_val_if_fail (match_info != NULL, FALSE); |
||
892 | |||
893 | return match_info->matches == PCRE_ERROR_PARTIAL; |
||
894 | } |
||
895 | |||
896 | /** |
||
897 | * g_match_info_expand_references: |
||
898 | * @match_info: (allow-none): a #GMatchInfo or %NULL |
||
899 | * @string_to_expand: the string to expand |
||
900 | * @error: location to store the error occurring, or %NULL to ignore errors |
||
901 | * |
||
902 | * Returns a new string containing the text in @string_to_expand with |
||
903 | * references and escape sequences expanded. References refer to the last |
||
904 | * match done with @string against @regex and have the same syntax used by |
||
905 | * g_regex_replace(). |
||
906 | * |
||
907 | * The @string_to_expand must be UTF-8 encoded even if #G_REGEX_RAW was |
||
908 | * passed to g_regex_new(). |
||
909 | * |
||
910 | * The backreferences are extracted from the string passed to the match |
||
911 | * function, so you cannot call this function after freeing the string. |
||
912 | * |
||
913 | * @match_info may be %NULL in which case @string_to_expand must not |
||
914 | * contain references. For instance "foo\n" does not refer to an actual |
||
915 | * pattern and '\n' merely will be replaced with \n character, |
||
916 | * while to expand "\0" (whole match) one needs the result of a match. |
||
917 | * Use g_regex_check_replacement() to find out whether @string_to_expand |
||
918 | * contains references. |
||
919 | * |
||
920 | * Returns: (allow-none): the expanded string, or %NULL if an error occurred |
||
921 | * |
||
922 | * Since: 2.14 |
||
923 | */ |
||
924 | gchar * |
||
925 | g_match_info_expand_references (const GMatchInfo *match_info, |
||
926 | const gchar *string_to_expand, |
||
927 | GError **error) |
||
928 | { |
||
929 | GString *result; |
||
930 | GList *list; |
||
931 | GError *tmp_error = NULL; |
||
932 | |||
933 | g_return_val_if_fail (string_to_expand != NULL, NULL); |
||
934 | g_return_val_if_fail (error == NULL || *error == NULL, NULL); |
||
935 | |||
936 | list = split_replacement (string_to_expand, &tmp_error); |
||
937 | if (tmp_error != NULL) |
||
938 | { |
||
939 | g_propagate_error (error, tmp_error); |
||
940 | return NULL; |
||
941 | } |
||
942 | |||
943 | if (!match_info && interpolation_list_needs_match (list)) |
||
944 | { |
||
945 | g_critical ("String '%s' contains references to the match, can't " |
||
946 | "expand references without GMatchInfo object", |
||
947 | string_to_expand); |
||
948 | return NULL; |
||
949 | } |
||
950 | |||
951 | result = g_string_sized_new (strlen (string_to_expand)); |
||
952 | interpolate_replacement (match_info, result, list); |
||
953 | |||
954 | g_list_free_full (list, (GDestroyNotify) free_interpolation_data); |
||
955 | |||
956 | return g_string_free (result, FALSE); |
||
957 | } |
||
958 | |||
959 | /** |
||
960 | * g_match_info_fetch: |
||
961 | * @match_info: #GMatchInfo structure |
||
962 | * @match_num: number of the sub expression |
||
963 | * |
||
964 | * Retrieves the text matching the @match_num'th capturing |
||
965 | * parentheses. 0 is the full text of the match, 1 is the first paren |
||
966 | * set, 2 the second, and so on. |
||
967 | * |
||
968 | * If @match_num is a valid sub pattern but it didn't match anything |
||
969 | * (e.g. sub pattern 1, matching "b" against "(a)?b") then an empty |
||
970 | * string is returned. |
||
971 | * |
||
972 | * If the match was obtained using the DFA algorithm, that is using |
||
973 | * g_regex_match_all() or g_regex_match_all_full(), the retrieved |
||
974 | * string is not that of a set of parentheses but that of a matched |
||
975 | * substring. Substrings are matched in reverse order of length, so |
||
976 | * 0 is the longest match. |
||
977 | * |
||
978 | * The string is fetched from the string passed to the match function, |
||
979 | * so you cannot call this function after freeing the string. |
||
980 | * |
||
981 | * Returns: (allow-none): The matched substring, or %NULL if an error |
||
982 | * occurred. You have to free the string yourself |
||
983 | * |
||
984 | * Since: 2.14 |
||
985 | */ |
||
986 | gchar * |
||
987 | g_match_info_fetch (const GMatchInfo *match_info, |
||
988 | gint match_num) |
||
989 | { |
||
990 | /* we cannot use pcre_get_substring() because it allocates the |
||
991 | * string using pcre_malloc(). */ |
||
992 | gchar *match = NULL; |
||
993 | gint start, end; |
||
994 | |||
995 | g_return_val_if_fail (match_info != NULL, NULL); |
||
996 | g_return_val_if_fail (match_num >= 0, NULL); |
||
997 | |||
998 | /* match_num does not exist or it didn't matched, i.e. matching "b" |
||
999 | * against "(a)?b" then group 0 is empty. */ |
||
1000 | if (!g_match_info_fetch_pos (match_info, match_num, &start, &end)) |
||
1001 | match = NULL; |
||
1002 | else if (start == -1) |
||
1003 | match = g_strdup (""); |
||
1004 | else |
||
1005 | match = g_strndup (&match_info->string[start], end - start); |
||
1006 | |||
1007 | return match; |
||
1008 | } |
||
1009 | |||
1010 | /** |
||
1011 | * g_match_info_fetch_pos: |
||
1012 | * @match_info: #GMatchInfo structure |
||
1013 | * @match_num: number of the sub expression |
||
1014 | * @start_pos: (out) (allow-none): pointer to location where to store |
||
1015 | * the start position, or %NULL |
||
1016 | * @end_pos: (out) (allow-none): pointer to location where to store |
||
1017 | * the end position, or %NULL |
||
1018 | * |
||
1019 | * Retrieves the position in bytes of the @match_num'th capturing |
||
1020 | * parentheses. 0 is the full text of the match, 1 is the first |
||
1021 | * paren set, 2 the second, and so on. |
||
1022 | * |
||
1023 | * If @match_num is a valid sub pattern but it didn't match anything |
||
1024 | * (e.g. sub pattern 1, matching "b" against "(a)?b") then @start_pos |
||
1025 | * and @end_pos are set to -1 and %TRUE is returned. |
||
1026 | * |
||
1027 | * If the match was obtained using the DFA algorithm, that is using |
||
1028 | * g_regex_match_all() or g_regex_match_all_full(), the retrieved |
||
1029 | * position is not that of a set of parentheses but that of a matched |
||
1030 | * substring. Substrings are matched in reverse order of length, so |
||
1031 | * 0 is the longest match. |
||
1032 | * |
||
1033 | * Returns: %TRUE if the position was fetched, %FALSE otherwise. If |
||
1034 | * the position cannot be fetched, @start_pos and @end_pos are left |
||
1035 | * unchanged |
||
1036 | * |
||
1037 | * Since: 2.14 |
||
1038 | */ |
||
1039 | gboolean |
||
1040 | g_match_info_fetch_pos (const GMatchInfo *match_info, |
||
1041 | gint match_num, |
||
1042 | gint *start_pos, |
||
1043 | gint *end_pos) |
||
1044 | { |
||
1045 | g_return_val_if_fail (match_info != NULL, FALSE); |
||
1046 | g_return_val_if_fail (match_num >= 0, FALSE); |
||
1047 | |||
1048 | /* make sure the sub expression number they're requesting is less than |
||
1049 | * the total number of sub expressions that were matched. */ |
||
1050 | if (match_num >= match_info->matches) |
||
1051 | return FALSE; |
||
1052 | |||
1053 | if (start_pos != NULL) |
||
1054 | *start_pos = match_info->offsets[2 * match_num]; |
||
1055 | |||
1056 | if (end_pos != NULL) |
||
1057 | *end_pos = match_info->offsets[2 * match_num + 1]; |
||
1058 | |||
1059 | return TRUE; |
||
1060 | } |
||
1061 | |||
1062 | /* |
||
1063 | * Returns number of first matched subpattern with name @name. |
||
1064 | * There may be more than one in case when DUPNAMES is used, |
||
1065 | * and not all subpatterns with that name match; |
||
1066 | * pcre_get_stringnumber() does not work in that case. |
||
1067 | */ |
||
1068 | static gint |
||
1069 | get_matched_substring_number (const GMatchInfo *match_info, |
||
1070 | const gchar *name) |
||
1071 | { |
||
1072 | gint entrysize; |
||
1073 | gchar *first, *last; |
||
1074 | guchar *entry; |
||
1075 | |||
1076 | if (!(match_info->regex->compile_opts & G_REGEX_DUPNAMES)) |
||
1077 | return pcre_get_stringnumber (match_info->regex->pcre_re, name); |
||
1078 | |||
1079 | /* This code is copied from pcre_get.c: get_first_set() */ |
||
1080 | entrysize = pcre_get_stringtable_entries (match_info->regex->pcre_re, |
||
1081 | name, |
||
1082 | &first, |
||
1083 | &last); |
||
1084 | |||
1085 | if (entrysize <= 0) |
||
1086 | return entrysize; |
||
1087 | |||
1088 | for (entry = (guchar*) first; entry <= (guchar*) last; entry += entrysize) |
||
1089 | { |
||
1090 | gint n = (entry[0] << 8) + entry[1]; |
||
1091 | if (match_info->offsets[n*2] >= 0) |
||
1092 | return n; |
||
1093 | } |
||
1094 | |||
1095 | return (first[0] << 8) + first[1]; |
||
1096 | } |
||
1097 | |||
1098 | /** |
||
1099 | * g_match_info_fetch_named: |
||
1100 | * @match_info: #GMatchInfo structure |
||
1101 | * @name: name of the subexpression |
||
1102 | * |
||
1103 | * Retrieves the text matching the capturing parentheses named @name. |
||
1104 | * |
||
1105 | * If @name is a valid sub pattern name but it didn't match anything |
||
1106 | * (e.g. sub pattern "X", matching "b" against "(?P<X>a)?b") |
||
1107 | * then an empty string is returned. |
||
1108 | * |
||
1109 | * The string is fetched from the string passed to the match function, |
||
1110 | * so you cannot call this function after freeing the string. |
||
1111 | * |
||
1112 | * Returns: (allow-none): The matched substring, or %NULL if an error |
||
1113 | * occurred. You have to free the string yourself |
||
1114 | * |
||
1115 | * Since: 2.14 |
||
1116 | */ |
||
1117 | gchar * |
||
1118 | g_match_info_fetch_named (const GMatchInfo *match_info, |
||
1119 | const gchar *name) |
||
1120 | { |
||
1121 | /* we cannot use pcre_get_named_substring() because it allocates the |
||
1122 | * string using pcre_malloc(). */ |
||
1123 | gint num; |
||
1124 | |||
1125 | g_return_val_if_fail (match_info != NULL, NULL); |
||
1126 | g_return_val_if_fail (name != NULL, NULL); |
||
1127 | |||
1128 | num = get_matched_substring_number (match_info, name); |
||
1129 | if (num < 0) |
||
1130 | return NULL; |
||
1131 | else |
||
1132 | return g_match_info_fetch (match_info, num); |
||
1133 | } |
||
1134 | |||
1135 | /** |
||
1136 | * g_match_info_fetch_named_pos: |
||
1137 | * @match_info: #GMatchInfo structure |
||
1138 | * @name: name of the subexpression |
||
1139 | * @start_pos: (out) (allow-none): pointer to location where to store |
||
1140 | * the start position, or %NULL |
||
1141 | * @end_pos: (out) (allow-none): pointer to location where to store |
||
1142 | * the end position, or %NULL |
||
1143 | * |
||
1144 | * Retrieves the position in bytes of the capturing parentheses named @name. |
||
1145 | * |
||
1146 | * If @name is a valid sub pattern name but it didn't match anything |
||
1147 | * (e.g. sub pattern "X", matching "b" against "(?P<X>a)?b") |
||
1148 | * then @start_pos and @end_pos are set to -1 and %TRUE is returned. |
||
1149 | * |
||
1150 | * Returns: %TRUE if the position was fetched, %FALSE otherwise. |
||
1151 | * If the position cannot be fetched, @start_pos and @end_pos |
||
1152 | * are left unchanged. |
||
1153 | * |
||
1154 | * Since: 2.14 |
||
1155 | */ |
||
1156 | gboolean |
||
1157 | g_match_info_fetch_named_pos (const GMatchInfo *match_info, |
||
1158 | const gchar *name, |
||
1159 | gint *start_pos, |
||
1160 | gint *end_pos) |
||
1161 | { |
||
1162 | gint num; |
||
1163 | |||
1164 | g_return_val_if_fail (match_info != NULL, FALSE); |
||
1165 | g_return_val_if_fail (name != NULL, FALSE); |
||
1166 | |||
1167 | num = get_matched_substring_number (match_info, name); |
||
1168 | if (num < 0) |
||
1169 | return FALSE; |
||
1170 | |||
1171 | return g_match_info_fetch_pos (match_info, num, start_pos, end_pos); |
||
1172 | } |
||
1173 | |||
1174 | /** |
||
1175 | * g_match_info_fetch_all: |
||
1176 | * @match_info: a #GMatchInfo structure |
||
1177 | * |
||
1178 | * Bundles up pointers to each of the matching substrings from a match |
||
1179 | * and stores them in an array of gchar pointers. The first element in |
||
1180 | * the returned array is the match number 0, i.e. the entire matched |
||
1181 | * text. |
||
1182 | * |
||
1183 | * If a sub pattern didn't match anything (e.g. sub pattern 1, matching |
||
1184 | * "b" against "(a)?b") then an empty string is inserted. |
||
1185 | * |
||
1186 | * If the last match was obtained using the DFA algorithm, that is using |
||
1187 | * g_regex_match_all() or g_regex_match_all_full(), the retrieved |
||
1188 | * strings are not that matched by sets of parentheses but that of the |
||
1189 | * matched substring. Substrings are matched in reverse order of length, |
||
1190 | * so the first one is the longest match. |
||
1191 | * |
||
1192 | * The strings are fetched from the string passed to the match function, |
||
1193 | * so you cannot call this function after freeing the string. |
||
1194 | * |
||
1195 | * Returns: (transfer full): a %NULL-terminated array of gchar * |
||
1196 | * pointers. It must be freed using g_strfreev(). If the previous |
||
1197 | * match failed %NULL is returned |
||
1198 | * |
||
1199 | * Since: 2.14 |
||
1200 | */ |
||
1201 | gchar ** |
||
1202 | g_match_info_fetch_all (const GMatchInfo *match_info) |
||
1203 | { |
||
1204 | /* we cannot use pcre_get_substring_list() because the returned value |
||
1205 | * isn't suitable for g_strfreev(). */ |
||
1206 | gchar **result; |
||
1207 | gint i; |
||
1208 | |||
1209 | g_return_val_if_fail (match_info != NULL, NULL); |
||
1210 | |||
1211 | if (match_info->matches < 0) |
||
1212 | return NULL; |
||
1213 | |||
1214 | result = g_new (gchar *, match_info->matches + 1); |
||
1215 | for (i = 0; i < match_info->matches; i++) |
||
1216 | result[i] = g_match_info_fetch (match_info, i); |
||
1217 | result[i] = NULL; |
||
1218 | |||
1219 | return result; |
||
1220 | } |
||
1221 | |||
1222 | |||
1223 | /* GRegex */ |
||
1224 | |||
1225 | G_DEFINE_QUARK (g-regex-error-quark, g_regex_error) |
||
1226 | |||
1227 | /** |
||
1228 | * g_regex_ref: |
||
1229 | * @regex: a #GRegex |
||
1230 | * |
||
1231 | * Increases reference count of @regex by 1. |
||
1232 | * |
||
1233 | * Returns: @regex |
||
1234 | * |
||
1235 | * Since: 2.14 |
||
1236 | */ |
||
1237 | GRegex * |
||
1238 | g_regex_ref (GRegex *regex) |
||
1239 | { |
||
1240 | g_return_val_if_fail (regex != NULL, NULL); |
||
1241 | g_atomic_int_inc (®ex->ref_count); |
||
1242 | return regex; |
||
1243 | } |
||
1244 | |||
1245 | /** |
||
1246 | * g_regex_unref: |
||
1247 | * @regex: a #GRegex |
||
1248 | * |
||
1249 | * Decreases reference count of @regex by 1. When reference count drops |
||
1250 | * to zero, it frees all the memory associated with the regex structure. |
||
1251 | * |
||
1252 | * Since: 2.14 |
||
1253 | */ |
||
1254 | void |
||
1255 | g_regex_unref (GRegex *regex) |
||
1256 | { |
||
1257 | g_return_if_fail (regex != NULL); |
||
1258 | |||
1259 | if (g_atomic_int_dec_and_test (®ex->ref_count)) |
||
1260 | { |
||
1261 | g_free (regex->pattern); |
||
1262 | if (regex->pcre_re != NULL) |
||
1263 | pcre_free (regex->pcre_re); |
||
1264 | if (regex->extra != NULL) |
||
1265 | pcre_free (regex->extra); |
||
1266 | g_free (regex); |
||
1267 | } |
||
1268 | } |
||
1269 | |||
1270 | /* |
||
1271 | * @match_options: (inout) (optional): |
||
1272 | */ |
||
1273 | static pcre *regex_compile (const gchar *pattern, |
||
1274 | GRegexCompileFlags compile_options, |
||
1275 | GRegexCompileFlags *compile_options_out, |
||
1276 | GRegexMatchFlags *match_options, |
||
1277 | GError **error); |
||
1278 | |||
1279 | /** |
||
1280 | * g_regex_new: |
||
1281 | * @pattern: the regular expression |
||
1282 | * @compile_options: compile options for the regular expression, or 0 |
||
1283 | * @match_options: match options for the regular expression, or 0 |
||
1284 | * @error: return location for a #GError |
||
1285 | * |
||
1286 | * Compiles the regular expression to an internal form, and does |
||
1287 | * the initial setup of the #GRegex structure. |
||
1288 | * |
||
1289 | * Returns: (nullable): a #GRegex structure or %NULL if an error occured. Call |
||
1290 | * g_regex_unref() when you are done with it |
||
1291 | * |
||
1292 | * Since: 2.14 |
||
1293 | */ |
||
1294 | GRegex * |
||
1295 | g_regex_new (const gchar *pattern, |
||
1296 | GRegexCompileFlags compile_options, |
||
1297 | GRegexMatchFlags match_options, |
||
1298 | GError **error) |
||
1299 | { |
||
1300 | GRegex *regex; |
||
1301 | pcre *re; |
||
1302 | const gchar *errmsg; |
||
1303 | gboolean optimize = FALSE; |
||
1304 | static volatile gsize initialised = 0; |
||
1305 | |||
1306 | g_return_val_if_fail (pattern != NULL, NULL); |
||
1307 | g_return_val_if_fail (error == NULL || *error == NULL, NULL); |
||
1308 | g_return_val_if_fail ((compile_options & ~G_REGEX_COMPILE_MASK) == 0, NULL); |
||
1309 | g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL); |
||
1310 | |||
1311 | if (g_once_init_enter (&initialised)) |
||
1312 | { |
||
1313 | int supports_utf8, supports_ucp; |
||
1314 | |||
1315 | pcre_config (PCRE_CONFIG_UTF8, &supports_utf8); |
||
1316 | if (!supports_utf8) |
||
1317 | g_critical (_("PCRE library is compiled without UTF8 support")); |
||
1318 | |||
1319 | pcre_config (PCRE_CONFIG_UNICODE_PROPERTIES, &supports_ucp); |
||
1320 | if (!supports_ucp) |
||
1321 | g_critical (_("PCRE library is compiled without UTF8 properties support")); |
||
1322 | |||
1323 | g_once_init_leave (&initialised, supports_utf8 && supports_ucp ? 1 : 2); |
||
1324 | } |
||
1325 | |||
1326 | if (G_UNLIKELY (initialised != 1)) |
||
1327 | { |
||
1328 | g_set_error_literal (error, G_REGEX_ERROR, G_REGEX_ERROR_COMPILE, |
||
1329 | _("PCRE library is compiled with incompatible options")); |
||
1330 | return NULL; |
||
1331 | } |
||
1332 | |||
1333 | /* G_REGEX_OPTIMIZE has the same numeric value of PCRE_NO_UTF8_CHECK, |
||
1334 | * as we do not need to wrap PCRE_NO_UTF8_CHECK. */ |
||
1335 | if (compile_options & G_REGEX_OPTIMIZE) |
||
1336 | optimize = TRUE; |
||
1337 | |||
1338 | re = regex_compile (pattern, compile_options, &compile_options, |
||
1339 | &match_options, error); |
||
1340 | |||
1341 | if (re == NULL) |
||
1342 | return NULL; |
||
1343 | |||
1344 | regex = g_new0 (GRegex, 1); |
||
1345 | regex->ref_count = 1; |
||
1346 | regex->pattern = g_strdup (pattern); |
||
1347 | regex->pcre_re = re; |
||
1348 | regex->compile_opts = compile_options; |
||
1349 | regex->match_opts = match_options; |
||
1350 | |||
1351 | if (optimize) |
||
1352 | { |
||
1353 | regex->extra = pcre_study (regex->pcre_re, 0, &errmsg); |
||
1354 | if (errmsg != NULL) |
||
1355 | { |
||
1356 | GError *tmp_error = g_error_new (G_REGEX_ERROR, |
||
1357 | G_REGEX_ERROR_OPTIMIZE, |
||
1358 | _("Error while optimizing " |
||
1359 | "regular expression %s: %s"), |
||
1360 | regex->pattern, |
||
1361 | errmsg); |
||
1362 | g_propagate_error (error, tmp_error); |
||
1363 | |||
1364 | g_regex_unref (regex); |
||
1365 | return NULL; |
||
1366 | } |
||
1367 | } |
||
1368 | |||
1369 | return regex; |
||
1370 | } |
||
1371 | |||
1372 | static pcre * |
||
1373 | regex_compile (const gchar *pattern, |
||
1374 | GRegexCompileFlags compile_options, |
||
1375 | GRegexCompileFlags *compile_options_out, |
||
1376 | GRegexMatchFlags *match_options, |
||
1377 | GError **error) |
||
1378 | { |
||
1379 | pcre *re; |
||
1380 | const gchar *errmsg; |
||
1381 | gint erroffset; |
||
1382 | gint errcode; |
||
1383 | GRegexCompileFlags nonpcre_compile_options; |
||
1384 | unsigned long int pcre_compile_options; |
||
1385 | |||
1386 | nonpcre_compile_options = compile_options & G_REGEX_COMPILE_NONPCRE_MASK; |
||
1387 | |||
1388 | /* In GRegex the string are, by default, UTF-8 encoded. PCRE |
||
1389 | * instead uses UTF-8 only if required with PCRE_UTF8. */ |
||
1390 | if (compile_options & G_REGEX_RAW) |
||
1391 | { |
||
1392 | /* disable utf-8 */ |
||
1393 | compile_options &= ~G_REGEX_RAW; |
||
1394 | } |
||
1395 | else |
||
1396 | { |
||
1397 | /* enable utf-8 */ |
||
1398 | compile_options |= PCRE_UTF8 | PCRE_NO_UTF8_CHECK; |
||
1399 | |||
1400 | if (match_options != NULL) |
||
1401 | *match_options |= PCRE_NO_UTF8_CHECK; |
||
1402 | } |
||
1403 | |||
1404 | /* PCRE_NEWLINE_ANY is the default for the internal PCRE but |
||
1405 | * not for the system one. */ |
||
1406 | if (!(compile_options & G_REGEX_NEWLINE_CR) && |
||
1407 | !(compile_options & G_REGEX_NEWLINE_LF)) |
||
1408 | { |
||
1409 | compile_options |= PCRE_NEWLINE_ANY; |
||
1410 | } |
||
1411 | |||
1412 | compile_options |= PCRE_UCP; |
||
1413 | |||
1414 | /* PCRE_BSR_UNICODE is the default for the internal PCRE but |
||
1415 | * possibly not for the system one. |
||
1416 | */ |
||
1417 | if (~compile_options & G_REGEX_BSR_ANYCRLF) |
||
1418 | compile_options |= PCRE_BSR_UNICODE; |
||
1419 | |||
1420 | /* compile the pattern */ |
||
1421 | re = pcre_compile2 (pattern, compile_options, &errcode, |
||
1422 | &errmsg, &erroffset, NULL); |
||
1423 | |||
1424 | /* if the compilation failed, set the error member and return |
||
1425 | * immediately */ |
||
1426 | if (re == NULL) |
||
1427 | { |
||
1428 | GError *tmp_error; |
||
1429 | |||
1430 | /* Translate the PCRE error code to GRegexError and use a translated |
||
1431 | * error message if possible */ |
||
1432 | translate_compile_error (&errcode, &errmsg); |
||
1433 | |||
1434 | /* PCRE uses byte offsets but we want to show character offsets */ |
||
1435 | erroffset = g_utf8_pointer_to_offset (pattern, &pattern[erroffset]); |
||
1436 | |||
1437 | tmp_error = g_error_new (G_REGEX_ERROR, errcode, |
||
1438 | _("Error while compiling regular " |
||
1439 | "expression %s at char %d: %s"), |
||
1440 | pattern, erroffset, errmsg); |
||
1441 | g_propagate_error (error, tmp_error); |
||
1442 | |||
1443 | return NULL; |
||
1444 | } |
||
1445 | |||
1446 | /* For options set at the beginning of the pattern, pcre puts them into |
||
1447 | * compile options, e.g. "(?i)foo" will make the pcre structure store |
||
1448 | * PCRE_CASELESS even though it wasn't explicitly given for compilation. */ |
||
1449 | pcre_fullinfo (re, NULL, PCRE_INFO_OPTIONS, &pcre_compile_options); |
||
1450 | compile_options = pcre_compile_options & G_REGEX_COMPILE_PCRE_MASK; |
||
1451 | |||
1452 | /* Don't leak PCRE_NEWLINE_ANY, which is part of PCRE_NEWLINE_ANYCRLF */ |
||
1453 | if ((pcre_compile_options & PCRE_NEWLINE_ANYCRLF) != PCRE_NEWLINE_ANYCRLF) |
||
1454 | compile_options &= ~PCRE_NEWLINE_ANY; |
||
1455 | |||
1456 | compile_options |= nonpcre_compile_options; |
||
1457 | |||
1458 | if (!(compile_options & G_REGEX_DUPNAMES)) |
||
1459 | { |
||
1460 | gboolean jchanged = FALSE; |
||
1461 | pcre_fullinfo (re, NULL, PCRE_INFO_JCHANGED, &jchanged); |
||
1462 | if (jchanged) |
||
1463 | compile_options |= G_REGEX_DUPNAMES; |
||
1464 | } |
||
1465 | |||
1466 | if (compile_options_out != 0) |
||
1467 | *compile_options_out = compile_options; |
||
1468 | |||
1469 | return re; |
||
1470 | } |
||
1471 | |||
1472 | /** |
||
1473 | * g_regex_get_pattern: |
||
1474 | * @regex: a #GRegex structure |
||
1475 | * |
||
1476 | * Gets the pattern string associated with @regex, i.e. a copy of |
||
1477 | * the string passed to g_regex_new(). |
||
1478 | * |
||
1479 | * Returns: the pattern of @regex |
||
1480 | * |
||
1481 | * Since: 2.14 |
||
1482 | */ |
||
1483 | const gchar * |
||
1484 | g_regex_get_pattern (const GRegex *regex) |
||
1485 | { |
||
1486 | g_return_val_if_fail (regex != NULL, NULL); |
||
1487 | |||
1488 | return regex->pattern; |
||
1489 | } |
||
1490 | |||
1491 | /** |
||
1492 | * g_regex_get_max_backref: |
||
1493 | * @regex: a #GRegex |
||
1494 | * |
||
1495 | * Returns the number of the highest back reference |
||
1496 | * in the pattern, or 0 if the pattern does not contain |
||
1497 | * back references. |
||
1498 | * |
||
1499 | * Returns: the number of the highest back reference |
||
1500 | * |
||
1501 | * Since: 2.14 |
||
1502 | */ |
||
1503 | gint |
||
1504 | g_regex_get_max_backref (const GRegex *regex) |
||
1505 | { |
||
1506 | gint value; |
||
1507 | |||
1508 | pcre_fullinfo (regex->pcre_re, regex->extra, |
||
1509 | PCRE_INFO_BACKREFMAX, &value); |
||
1510 | |||
1511 | return value; |
||
1512 | } |
||
1513 | |||
1514 | /** |
||
1515 | * g_regex_get_capture_count: |
||
1516 | * @regex: a #GRegex |
||
1517 | * |
||
1518 | * Returns the number of capturing subpatterns in the pattern. |
||
1519 | * |
||
1520 | * Returns: the number of capturing subpatterns |
||
1521 | * |
||
1522 | * Since: 2.14 |
||
1523 | */ |
||
1524 | gint |
||
1525 | g_regex_get_capture_count (const GRegex *regex) |
||
1526 | { |
||
1527 | gint value; |
||
1528 | |||
1529 | pcre_fullinfo (regex->pcre_re, regex->extra, |
||
1530 | PCRE_INFO_CAPTURECOUNT, &value); |
||
1531 | |||
1532 | return value; |
||
1533 | } |
||
1534 | |||
1535 | /** |
||
1536 | * g_regex_get_has_cr_or_lf: |
||
1537 | * @regex: a #GRegex structure |
||
1538 | * |
||
1539 | * Checks whether the pattern contains explicit CR or LF references. |
||
1540 | * |
||
1541 | * Returns: %TRUE if the pattern contains explicit CR or LF references |
||
1542 | * |
||
1543 | * Since: 2.34 |
||
1544 | */ |
||
1545 | gboolean |
||
1546 | g_regex_get_has_cr_or_lf (const GRegex *regex) |
||
1547 | { |
||
1548 | gint value; |
||
1549 | |||
1550 | pcre_fullinfo (regex->pcre_re, regex->extra, |
||
1551 | PCRE_INFO_HASCRORLF, &value); |
||
1552 | |||
1553 | return !!value; |
||
1554 | } |
||
1555 | |||
1556 | /** |
||
1557 | * g_regex_get_max_lookbehind: |
||
1558 | * @regex: a #GRegex structure |
||
1559 | * |
||
1560 | * Gets the number of characters in the longest lookbehind assertion in the |
||
1561 | * pattern. This information is useful when doing multi-segment matching using |
||
1562 | * the partial matching facilities. |
||
1563 | * |
||
1564 | * Returns: the number of characters in the longest lookbehind assertion. |
||
1565 | * |
||
1566 | * Since: 2.38 |
||
1567 | */ |
||
1568 | gint |
||
1569 | g_regex_get_max_lookbehind (const GRegex *regex) |
||
1570 | { |
||
1571 | gint max_lookbehind; |
||
1572 | |||
1573 | pcre_fullinfo (regex->pcre_re, regex->extra, |
||
1574 | PCRE_INFO_MAXLOOKBEHIND, &max_lookbehind); |
||
1575 | |||
1576 | return max_lookbehind; |
||
1577 | } |
||
1578 | |||
1579 | /** |
||
1580 | * g_regex_get_compile_flags: |
||
1581 | * @regex: a #GRegex |
||
1582 | * |
||
1583 | * Returns the compile options that @regex was created with. |
||
1584 | * |
||
1585 | * Returns: flags from #GRegexCompileFlags |
||
1586 | * |
||
1587 | * Since: 2.26 |
||
1588 | */ |
||
1589 | GRegexCompileFlags |
||
1590 | g_regex_get_compile_flags (const GRegex *regex) |
||
1591 | { |
||
1592 | g_return_val_if_fail (regex != NULL, 0); |
||
1593 | |||
1594 | return regex->compile_opts; |
||
1595 | } |
||
1596 | |||
1597 | /** |
||
1598 | * g_regex_get_match_flags: |
||
1599 | * @regex: a #GRegex |
||
1600 | * |
||
1601 | * Returns the match options that @regex was created with. |
||
1602 | * |
||
1603 | * Returns: flags from #GRegexMatchFlags |
||
1604 | * |
||
1605 | * Since: 2.26 |
||
1606 | */ |
||
1607 | GRegexMatchFlags |
||
1608 | g_regex_get_match_flags (const GRegex *regex) |
||
1609 | { |
||
1610 | g_return_val_if_fail (regex != NULL, 0); |
||
1611 | |||
1612 | return regex->match_opts & G_REGEX_MATCH_MASK; |
||
1613 | } |
||
1614 | |||
1615 | /** |
||
1616 | * g_regex_match_simple: |
||
1617 | * @pattern: the regular expression |
||
1618 | * @string: the string to scan for matches |
||
1619 | * @compile_options: compile options for the regular expression, or 0 |
||
1620 | * @match_options: match options, or 0 |
||
1621 | * |
||
1622 | * Scans for a match in @string for @pattern. |
||
1623 | * |
||
1624 | * This function is equivalent to g_regex_match() but it does not |
||
1625 | * require to compile the pattern with g_regex_new(), avoiding some |
||
1626 | * lines of code when you need just to do a match without extracting |
||
1627 | * substrings, capture counts, and so on. |
||
1628 | * |
||
1629 | * If this function is to be called on the same @pattern more than |
||
1630 | * once, it's more efficient to compile the pattern once with |
||
1631 | * g_regex_new() and then use g_regex_match(). |
||
1632 | * |
||
1633 | * Returns: %TRUE if the string matched, %FALSE otherwise |
||
1634 | * |
||
1635 | * Since: 2.14 |
||
1636 | */ |
||
1637 | gboolean |
||
1638 | g_regex_match_simple (const gchar *pattern, |
||
1639 | const gchar *string, |
||
1640 | GRegexCompileFlags compile_options, |
||
1641 | GRegexMatchFlags match_options) |
||
1642 | { |
||
1643 | GRegex *regex; |
||
1644 | gboolean result; |
||
1645 | |||
1646 | regex = g_regex_new (pattern, compile_options, 0, NULL); |
||
1647 | if (!regex) |
||
1648 | return FALSE; |
||
1649 | result = g_regex_match_full (regex, string, -1, 0, match_options, NULL, NULL); |
||
1650 | g_regex_unref (regex); |
||
1651 | return result; |
||
1652 | } |
||
1653 | |||
1654 | /** |
||
1655 | * g_regex_match: |
||
1656 | * @regex: a #GRegex structure from g_regex_new() |
||
1657 | * @string: the string to scan for matches |
||
1658 | * @match_options: match options |
||
1659 | * @match_info: (out) (allow-none): pointer to location where to store |
||
1660 | * the #GMatchInfo, or %NULL if you do not need it |
||
1661 | * |
||
1662 | * Scans for a match in string for the pattern in @regex. |
||
1663 | * The @match_options are combined with the match options specified |
||
1664 | * when the @regex structure was created, letting you have more |
||
1665 | * flexibility in reusing #GRegex structures. |
||
1666 | * |
||
1667 | * A #GMatchInfo structure, used to get information on the match, |
||
1668 | * is stored in @match_info if not %NULL. Note that if @match_info |
||
1669 | * is not %NULL then it is created even if the function returns %FALSE, |
||
1670 | * i.e. you must free it regardless if regular expression actually matched. |
||
1671 | * |
||
1672 | * To retrieve all the non-overlapping matches of the pattern in |
||
1673 | * string you can use g_match_info_next(). |
||
1674 | * |
||
1675 | * |[<!-- language="C" --> |
||
1676 | * static void |
||
1677 | * print_uppercase_words (const gchar *string) |
||
1678 | * { |
||
1679 | * // Print all uppercase-only words. |
||
1680 | * GRegex *regex; |
||
1681 | * GMatchInfo *match_info; |
||
1682 | * |
||
1683 | * regex = g_regex_new ("[A-Z]+", 0, 0, NULL); |
||
1684 | * g_regex_match (regex, string, 0, &match_info); |
||
1685 | * while (g_match_info_matches (match_info)) |
||
1686 | * { |
||
1687 | * gchar *word = g_match_info_fetch (match_info, 0); |
||
1688 | * g_print ("Found: %s\n", word); |
||
1689 | * g_free (word); |
||
1690 | * g_match_info_next (match_info, NULL); |
||
1691 | * } |
||
1692 | * g_match_info_free (match_info); |
||
1693 | * g_regex_unref (regex); |
||
1694 | * } |
||
1695 | * ]| |
||
1696 | * |
||
1697 | * @string is not copied and is used in #GMatchInfo internally. If |
||
1698 | * you use any #GMatchInfo method (except g_match_info_free()) after |
||
1699 | * freeing or modifying @string then the behaviour is undefined. |
||
1700 | * |
||
1701 | * Returns: %TRUE is the string matched, %FALSE otherwise |
||
1702 | * |
||
1703 | * Since: 2.14 |
||
1704 | */ |
||
1705 | gboolean |
||
1706 | g_regex_match (const GRegex *regex, |
||
1707 | const gchar *string, |
||
1708 | GRegexMatchFlags match_options, |
||
1709 | GMatchInfo **match_info) |
||
1710 | { |
||
1711 | return g_regex_match_full (regex, string, -1, 0, match_options, |
||
1712 | match_info, NULL); |
||
1713 | } |
||
1714 | |||
1715 | /** |
||
1716 | * g_regex_match_full: |
||
1717 | * @regex: a #GRegex structure from g_regex_new() |
||
1718 | * @string: (array length=string_len): the string to scan for matches |
||
1719 | * @string_len: the length of @string, or -1 if @string is nul-terminated |
||
1720 | * @start_position: starting index of the string to match, in bytes |
||
1721 | * @match_options: match options |
||
1722 | * @match_info: (out) (allow-none): pointer to location where to store |
||
1723 | * the #GMatchInfo, or %NULL if you do not need it |
||
1724 | * @error: location to store the error occurring, or %NULL to ignore errors |
||
1725 | * |
||
1726 | * Scans for a match in string for the pattern in @regex. |
||
1727 | * The @match_options are combined with the match options specified |
||
1728 | * when the @regex structure was created, letting you have more |
||
1729 | * flexibility in reusing #GRegex structures. |
||
1730 | * |
||
1731 | * Setting @start_position differs from just passing over a shortened |
||
1732 | * string and setting #G_REGEX_MATCH_NOTBOL in the case of a pattern |
||
1733 | * that begins with any kind of lookbehind assertion, such as "\b". |
||
1734 | * |
||
1735 | * A #GMatchInfo structure, used to get information on the match, is |
||
1736 | * stored in @match_info if not %NULL. Note that if @match_info is |
||
1737 | * not %NULL then it is created even if the function returns %FALSE, |
||
1738 | * i.e. you must free it regardless if regular expression actually |
||
1739 | * matched. |
||
1740 | * |
||
1741 | * @string is not copied and is used in #GMatchInfo internally. If |
||
1742 | * you use any #GMatchInfo method (except g_match_info_free()) after |
||
1743 | * freeing or modifying @string then the behaviour is undefined. |
||
1744 | * |
||
1745 | * To retrieve all the non-overlapping matches of the pattern in |
||
1746 | * string you can use g_match_info_next(). |
||
1747 | * |
||
1748 | * |[<!-- language="C" --> |
||
1749 | * static void |
||
1750 | * print_uppercase_words (const gchar *string) |
||
1751 | * { |
||
1752 | * // Print all uppercase-only words. |
||
1753 | * GRegex *regex; |
||
1754 | * GMatchInfo *match_info; |
||
1755 | * GError *error = NULL; |
||
1756 | * |
||
1757 | * regex = g_regex_new ("[A-Z]+", 0, 0, NULL); |
||
1758 | * g_regex_match_full (regex, string, -1, 0, 0, &match_info, &error); |
||
1759 | * while (g_match_info_matches (match_info)) |
||
1760 | * { |
||
1761 | * gchar *word = g_match_info_fetch (match_info, 0); |
||
1762 | * g_print ("Found: %s\n", word); |
||
1763 | * g_free (word); |
||
1764 | * g_match_info_next (match_info, &error); |
||
1765 | * } |
||
1766 | * g_match_info_free (match_info); |
||
1767 | * g_regex_unref (regex); |
||
1768 | * if (error != NULL) |
||
1769 | * { |
||
1770 | * g_printerr ("Error while matching: %s\n", error->message); |
||
1771 | * g_error_free (error); |
||
1772 | * } |
||
1773 | * } |
||
1774 | * ]| |
||
1775 | * |
||
1776 | * Returns: %TRUE is the string matched, %FALSE otherwise |
||
1777 | * |
||
1778 | * Since: 2.14 |
||
1779 | */ |
||
1780 | gboolean |
||
1781 | g_regex_match_full (const GRegex *regex, |
||
1782 | const gchar *string, |
||
1783 | gssize string_len, |
||
1784 | gint start_position, |
||
1785 | GRegexMatchFlags match_options, |
||
1786 | GMatchInfo **match_info, |
||
1787 | GError **error) |
||
1788 | { |
||
1789 | GMatchInfo *info; |
||
1790 | gboolean match_ok; |
||
1791 | |||
1792 | g_return_val_if_fail (regex != NULL, FALSE); |
||
1793 | g_return_val_if_fail (string != NULL, FALSE); |
||
1794 | g_return_val_if_fail (start_position >= 0, FALSE); |
||
1795 | g_return_val_if_fail (error == NULL || *error == NULL, FALSE); |
||
1796 | g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, FALSE); |
||
1797 | |||
1798 | info = match_info_new (regex, string, string_len, start_position, |
||
1799 | match_options, FALSE); |
||
1800 | match_ok = g_match_info_next (info, error); |
||
1801 | if (match_info != NULL) |
||
1802 | *match_info = info; |
||
1803 | else |
||
1804 | g_match_info_free (info); |
||
1805 | |||
1806 | return match_ok; |
||
1807 | } |
||
1808 | |||
1809 | /** |
||
1810 | * g_regex_match_all: |
||
1811 | * @regex: a #GRegex structure from g_regex_new() |
||
1812 | * @string: the string to scan for matches |
||
1813 | * @match_options: match options |
||
1814 | * @match_info: (out) (allow-none): pointer to location where to store |
||
1815 | * the #GMatchInfo, or %NULL if you do not need it |
||
1816 | * |
||
1817 | * Using the standard algorithm for regular expression matching only |
||
1818 | * the longest match in the string is retrieved. This function uses |
||
1819 | * a different algorithm so it can retrieve all the possible matches. |
||
1820 | * For more documentation see g_regex_match_all_full(). |
||
1821 | * |
||
1822 | * A #GMatchInfo structure, used to get information on the match, is |
||
1823 | * stored in @match_info if not %NULL. Note that if @match_info is |
||
1824 | * not %NULL then it is created even if the function returns %FALSE, |
||
1825 | * i.e. you must free it regardless if regular expression actually |
||
1826 | * matched. |
||
1827 | * |
||
1828 | * @string is not copied and is used in #GMatchInfo internally. If |
||
1829 | * you use any #GMatchInfo method (except g_match_info_free()) after |
||
1830 | * freeing or modifying @string then the behaviour is undefined. |
||
1831 | * |
||
1832 | * Returns: %TRUE is the string matched, %FALSE otherwise |
||
1833 | * |
||
1834 | * Since: 2.14 |
||
1835 | */ |
||
1836 | gboolean |
||
1837 | g_regex_match_all (const GRegex *regex, |
||
1838 | const gchar *string, |
||
1839 | GRegexMatchFlags match_options, |
||
1840 | GMatchInfo **match_info) |
||
1841 | { |
||
1842 | return g_regex_match_all_full (regex, string, -1, 0, match_options, |
||
1843 | match_info, NULL); |
||
1844 | } |
||
1845 | |||
1846 | /** |
||
1847 | * g_regex_match_all_full: |
||
1848 | * @regex: a #GRegex structure from g_regex_new() |
||
1849 | * @string: (array length=string_len): the string to scan for matches |
||
1850 | * @string_len: the length of @string, or -1 if @string is nul-terminated |
||
1851 | * @start_position: starting index of the string to match, in bytes |
||
1852 | * @match_options: match options |
||
1853 | * @match_info: (out) (allow-none): pointer to location where to store |
||
1854 | * the #GMatchInfo, or %NULL if you do not need it |
||
1855 | * @error: location to store the error occurring, or %NULL to ignore errors |
||
1856 | * |
||
1857 | * Using the standard algorithm for regular expression matching only |
||
1858 | * the longest match in the string is retrieved, it is not possible |
||
1859 | * to obtain all the available matches. For instance matching |
||
1860 | * "<a> <b> <c>" against the pattern "<.*>" |
||
1861 | * you get "<a> <b> <c>". |
||
1862 | * |
||
1863 | * This function uses a different algorithm (called DFA, i.e. deterministic |
||
1864 | * finite automaton), so it can retrieve all the possible matches, all |
||
1865 | * starting at the same point in the string. For instance matching |
||
1866 | * "<a> <b> <c>" against the pattern "<.*>;" |
||
1867 | * you would obtain three matches: "<a> <b> <c>", |
||
1868 | * "<a> <b>" and "<a>". |
||
1869 | * |
||
1870 | * The number of matched strings is retrieved using |
||
1871 | * g_match_info_get_match_count(). To obtain the matched strings and |
||
1872 | * their position you can use, respectively, g_match_info_fetch() and |
||
1873 | * g_match_info_fetch_pos(). Note that the strings are returned in |
||
1874 | * reverse order of length; that is, the longest matching string is |
||
1875 | * given first. |
||
1876 | * |
||
1877 | * Note that the DFA algorithm is slower than the standard one and it |
||
1878 | * is not able to capture substrings, so backreferences do not work. |
||
1879 | * |
||
1880 | * Setting @start_position differs from just passing over a shortened |
||
1881 | * string and setting #G_REGEX_MATCH_NOTBOL in the case of a pattern |
||
1882 | * that begins with any kind of lookbehind assertion, such as "\b". |
||
1883 | * |
||
1884 | * A #GMatchInfo structure, used to get information on the match, is |
||
1885 | * stored in @match_info if not %NULL. Note that if @match_info is |
||
1886 | * not %NULL then it is created even if the function returns %FALSE, |
||
1887 | * i.e. you must free it regardless if regular expression actually |
||
1888 | * matched. |
||
1889 | * |
||
1890 | * @string is not copied and is used in #GMatchInfo internally. If |
||
1891 | * you use any #GMatchInfo method (except g_match_info_free()) after |
||
1892 | * freeing or modifying @string then the behaviour is undefined. |
||
1893 | * |
||
1894 | * Returns: %TRUE is the string matched, %FALSE otherwise |
||
1895 | * |
||
1896 | * Since: 2.14 |
||
1897 | */ |
||
1898 | gboolean |
||
1899 | g_regex_match_all_full (const GRegex *regex, |
||
1900 | const gchar *string, |
||
1901 | gssize string_len, |
||
1902 | gint start_position, |
||
1903 | GRegexMatchFlags match_options, |
||
1904 | GMatchInfo **match_info, |
||
1905 | GError **error) |
||
1906 | { |
||
1907 | GMatchInfo *info; |
||
1908 | gboolean done; |
||
1909 | pcre *pcre_re; |
||
1910 | pcre_extra *extra; |
||
1911 | |||
1912 | g_return_val_if_fail (regex != NULL, FALSE); |
||
1913 | g_return_val_if_fail (string != NULL, FALSE); |
||
1914 | g_return_val_if_fail (start_position >= 0, FALSE); |
||
1915 | g_return_val_if_fail (error == NULL || *error == NULL, FALSE); |
||
1916 | g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, FALSE); |
||
1917 | |||
1918 | #ifdef PCRE_NO_AUTO_POSSESS |
||
1919 | /* For PCRE >= 8.34 we need to turn off PCRE_NO_AUTO_POSSESS, which |
||
1920 | * is an optimization for normal regex matching, but results in omitting |
||
1921 | * some shorter matches here, and an observable behaviour change. |
||
1922 | * |
||
1923 | * DFA matching is rather niche, and very rarely used according to |
||
1924 | * codesearch.debian.net, so don't bother caching the recompiled RE. */ |
||
1925 | pcre_re = regex_compile (regex->pattern, |
||
1926 | regex->compile_opts | PCRE_NO_AUTO_POSSESS, |
||
1927 | NULL, NULL, error); |
||
1928 | |||
1929 | if (pcre_re == NULL) |
||
1930 | return FALSE; |
||
1931 | |||
1932 | /* Not bothering to cache the optimization data either, with similar |
||
1933 | * reasoning */ |
||
1934 | extra = NULL; |
||
1935 | #else |
||
1936 | /* For PCRE < 8.33 the precompiled regex is fine. */ |
||
1937 | pcre_re = regex->pcre_re; |
||
1938 | extra = regex->extra; |
||
1939 | #endif |
||
1940 | |||
1941 | info = match_info_new (regex, string, string_len, start_position, |
||
1942 | match_options, TRUE); |
||
1943 | |||
1944 | done = FALSE; |
||
1945 | while (!done) |
||
1946 | { |
||
1947 | done = TRUE; |
||
1948 | info->matches = pcre_dfa_exec (pcre_re, extra, |
||
1949 | info->string, info->string_len, |
||
1950 | info->pos, |
||
1951 | regex->match_opts | match_options, |
||
1952 | info->offsets, info->n_offsets, |
||
1953 | info->workspace, info->n_workspace); |
||
1954 | if (info->matches == PCRE_ERROR_DFA_WSSIZE) |
||
1955 | { |
||
1956 | /* info->workspace is too small. */ |
||
1957 | info->n_workspace *= 2; |
||
1958 | info->workspace = g_realloc (info->workspace, |
||
1959 | info->n_workspace * sizeof (gint)); |
||
1960 | done = FALSE; |
||
1961 | } |
||
1962 | else if (info->matches == 0) |
||
1963 | { |
||
1964 | /* info->offsets is too small. */ |
||
1965 | info->n_offsets *= 2; |
||
1966 | info->offsets = g_realloc (info->offsets, |
||
1967 | info->n_offsets * sizeof (gint)); |
||
1968 | done = FALSE; |
||
1969 | } |
||
1970 | else if (IS_PCRE_ERROR (info->matches)) |
||
1971 | { |
||
1972 | g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH, |
||
1973 | _("Error while matching regular expression %s: %s"), |
||
1974 | regex->pattern, match_error (info->matches)); |
||
1975 | } |
||
1976 | } |
||
1977 | |||
1978 | #ifdef PCRE_NO_AUTO_POSSESS |
||
1979 | pcre_free (pcre_re); |
||
1980 | #endif |
||
1981 | |||
1982 | /* set info->pos to -1 so that a call to g_match_info_next() fails. */ |
||
1983 | info->pos = -1; |
||
1984 | |||
1985 | if (match_info != NULL) |
||
1986 | *match_info = info; |
||
1987 | else |
||
1988 | g_match_info_free (info); |
||
1989 | |||
1990 | return info->matches >= 0; |
||
1991 | } |
||
1992 | |||
1993 | /** |
||
1994 | * g_regex_get_string_number: |
||
1995 | * @regex: #GRegex structure |
||
1996 | * @name: name of the subexpression |
||
1997 | * |
||
1998 | * Retrieves the number of the subexpression named @name. |
||
1999 | * |
||
2000 | * Returns: The number of the subexpression or -1 if @name |
||
2001 | * does not exists |
||
2002 | * |
||
2003 | * Since: 2.14 |
||
2004 | */ |
||
2005 | gint |
||
2006 | g_regex_get_string_number (const GRegex *regex, |
||
2007 | const gchar *name) |
||
2008 | { |
||
2009 | gint num; |
||
2010 | |||
2011 | g_return_val_if_fail (regex != NULL, -1); |
||
2012 | g_return_val_if_fail (name != NULL, -1); |
||
2013 | |||
2014 | num = pcre_get_stringnumber (regex->pcre_re, name); |
||
2015 | if (num == PCRE_ERROR_NOSUBSTRING) |
||
2016 | num = -1; |
||
2017 | |||
2018 | return num; |
||
2019 | } |
||
2020 | |||
2021 | /** |
||
2022 | * g_regex_split_simple: |
||
2023 | * @pattern: the regular expression |
||
2024 | * @string: the string to scan for matches |
||
2025 | * @compile_options: compile options for the regular expression, or 0 |
||
2026 | * @match_options: match options, or 0 |
||
2027 | * |
||
2028 | * Breaks the string on the pattern, and returns an array of |
||
2029 | * the tokens. If the pattern contains capturing parentheses, |
||
2030 | * then the text for each of the substrings will also be returned. |
||
2031 | * If the pattern does not match anywhere in the string, then the |
||
2032 | * whole string is returned as the first token. |
||
2033 | * |
||
2034 | * This function is equivalent to g_regex_split() but it does |
||
2035 | * not require to compile the pattern with g_regex_new(), avoiding |
||
2036 | * some lines of code when you need just to do a split without |
||
2037 | * extracting substrings, capture counts, and so on. |
||
2038 | * |
||
2039 | * If this function is to be called on the same @pattern more than |
||
2040 | * once, it's more efficient to compile the pattern once with |
||
2041 | * g_regex_new() and then use g_regex_split(). |
||
2042 | * |
||
2043 | * As a special case, the result of splitting the empty string "" |
||
2044 | * is an empty vector, not a vector containing a single string. |
||
2045 | * The reason for this special case is that being able to represent |
||
2046 | * a empty vector is typically more useful than consistent handling |
||
2047 | * of empty elements. If you do need to represent empty elements, |
||
2048 | * you'll need to check for the empty string before calling this |
||
2049 | * function. |
||
2050 | * |
||
2051 | * A pattern that can match empty strings splits @string into |
||
2052 | * separate characters wherever it matches the empty string between |
||
2053 | * characters. For example splitting "ab c" using as a separator |
||
2054 | * "\s*", you will get "a", "b" and "c". |
||
2055 | * |
||
2056 | * Returns: (transfer full): a %NULL-terminated array of strings. Free |
||
2057 | * it using g_strfreev() |
||
2058 | * |
||
2059 | * Since: 2.14 |
||
2060 | **/ |
||
2061 | gchar ** |
||
2062 | g_regex_split_simple (const gchar *pattern, |
||
2063 | const gchar *string, |
||
2064 | GRegexCompileFlags compile_options, |
||
2065 | GRegexMatchFlags match_options) |
||
2066 | { |
||
2067 | GRegex *regex; |
||
2068 | gchar **result; |
||
2069 | |||
2070 | regex = g_regex_new (pattern, compile_options, 0, NULL); |
||
2071 | if (!regex) |
||
2072 | return NULL; |
||
2073 | |||
2074 | result = g_regex_split_full (regex, string, -1, 0, match_options, 0, NULL); |
||
2075 | g_regex_unref (regex); |
||
2076 | return result; |
||
2077 | } |
||
2078 | |||
2079 | /** |
||
2080 | * g_regex_split: |
||
2081 | * @regex: a #GRegex structure |
||
2082 | * @string: the string to split with the pattern |
||
2083 | * @match_options: match time option flags |
||
2084 | * |
||
2085 | * Breaks the string on the pattern, and returns an array of the tokens. |
||
2086 | * If the pattern contains capturing parentheses, then the text for each |
||
2087 | * of the substrings will also be returned. If the pattern does not match |
||
2088 | * anywhere in the string, then the whole string is returned as the first |
||
2089 | * token. |
||
2090 | * |
||
2091 | * As a special case, the result of splitting the empty string "" is an |
||
2092 | * empty vector, not a vector containing a single string. The reason for |
||
2093 | * this special case is that being able to represent a empty vector is |
||
2094 | * typically more useful than consistent handling of empty elements. If |
||
2095 | * you do need to represent empty elements, you'll need to check for the |
||
2096 | * empty string before calling this function. |
||
2097 | * |
||
2098 | * A pattern that can match empty strings splits @string into separate |
||
2099 | * characters wherever it matches the empty string between characters. |
||
2100 | * For example splitting "ab c" using as a separator "\s*", you will get |
||
2101 | * "a", "b" and "c". |
||
2102 | * |
||
2103 | * Returns: (transfer full): a %NULL-terminated gchar ** array. Free |
||
2104 | * it using g_strfreev() |
||
2105 | * |
||
2106 | * Since: 2.14 |
||
2107 | **/ |
||
2108 | gchar ** |
||
2109 | g_regex_split (const GRegex *regex, |
||
2110 | const gchar *string, |
||
2111 | GRegexMatchFlags match_options) |
||
2112 | { |
||
2113 | return g_regex_split_full (regex, string, -1, 0, |
||
2114 | match_options, 0, NULL); |
||
2115 | } |
||
2116 | |||
2117 | /** |
||
2118 | * g_regex_split_full: |
||
2119 | * @regex: a #GRegex structure |
||
2120 | * @string: (array length=string_len): the string to split with the pattern |
||
2121 | * @string_len: the length of @string, or -1 if @string is nul-terminated |
||
2122 | * @start_position: starting index of the string to match, in bytes |
||
2123 | * @match_options: match time option flags |
||
2124 | * @max_tokens: the maximum number of tokens to split @string into. |
||
2125 | * If this is less than 1, the string is split completely |
||
2126 | * @error: return location for a #GError |
||
2127 | * |
||
2128 | * Breaks the string on the pattern, and returns an array of the tokens. |
||
2129 | * If the pattern contains capturing parentheses, then the text for each |
||
2130 | * of the substrings will also be returned. If the pattern does not match |
||
2131 | * anywhere in the string, then the whole string is returned as the first |
||
2132 | * token. |
||
2133 | * |
||
2134 | * As a special case, the result of splitting the empty string "" is an |
||
2135 | * empty vector, not a vector containing a single string. The reason for |
||
2136 | * this special case is that being able to represent a empty vector is |
||
2137 | * typically more useful than consistent handling of empty elements. If |
||
2138 | * you do need to represent empty elements, you'll need to check for the |
||
2139 | * empty string before calling this function. |
||
2140 | * |
||
2141 | * A pattern that can match empty strings splits @string into separate |
||
2142 | * characters wherever it matches the empty string between characters. |
||
2143 | * For example splitting "ab c" using as a separator "\s*", you will get |
||
2144 | * "a", "b" and "c". |
||
2145 | * |
||
2146 | * Setting @start_position differs from just passing over a shortened |
||
2147 | * string and setting #G_REGEX_MATCH_NOTBOL in the case of a pattern |
||
2148 | * that begins with any kind of lookbehind assertion, such as "\b". |
||
2149 | * |
||
2150 | * Returns: (transfer full): a %NULL-terminated gchar ** array. Free |
||
2151 | * it using g_strfreev() |
||
2152 | * |
||
2153 | * Since: 2.14 |
||
2154 | **/ |
||
2155 | gchar ** |
||
2156 | g_regex_split_full (const GRegex *regex, |
||
2157 | const gchar *string, |
||
2158 | gssize string_len, |
||
2159 | gint start_position, |
||
2160 | GRegexMatchFlags match_options, |
||
2161 | gint max_tokens, |
||
2162 | GError **error) |
||
2163 | { |
||
2164 | GError *tmp_error = NULL; |
||
2165 | GMatchInfo *match_info; |
||
2166 | GList *list, *last; |
||
2167 | gint i; |
||
2168 | gint token_count; |
||
2169 | gboolean match_ok; |
||
2170 | /* position of the last separator. */ |
||
2171 | gint last_separator_end; |
||
2172 | /* was the last match 0 bytes long? */ |
||
2173 | gboolean last_match_is_empty; |
||
2174 | /* the returned array of char **s */ |
||
2175 | gchar **string_list; |
||
2176 | |||
2177 | g_return_val_if_fail (regex != NULL, NULL); |
||
2178 | g_return_val_if_fail (string != NULL, NULL); |
||
2179 | g_return_val_if_fail (start_position >= 0, NULL); |
||
2180 | g_return_val_if_fail (error == NULL || *error == NULL, NULL); |
||
2181 | g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL); |
||
2182 | |||
2183 | if (max_tokens <= 0) |
||
2184 | max_tokens = G_MAXINT; |
||
2185 | |||
2186 | if (string_len < 0) |
||
2187 | string_len = strlen (string); |
||
2188 | |||
2189 | /* zero-length string */ |
||
2190 | if (string_len - start_position == 0) |
||
2191 | return g_new0 (gchar *, 1); |
||
2192 | |||
2193 | if (max_tokens == 1) |
||
2194 | { |
||
2195 | string_list = g_new0 (gchar *, 2); |
||
2196 | string_list[0] = g_strndup (&string[start_position], |
||
2197 | string_len - start_position); |
||
2198 | return string_list; |
||
2199 | } |
||
2200 | |||
2201 | list = NULL; |
||
2202 | token_count = 0; |
||
2203 | last_separator_end = start_position; |
||
2204 | last_match_is_empty = FALSE; |
||
2205 | |||
2206 | match_ok = g_regex_match_full (regex, string, string_len, start_position, |
||
2207 | match_options, &match_info, &tmp_error); |
||
2208 | |||
2209 | while (tmp_error == NULL) |
||
2210 | { |
||
2211 | if (match_ok) |
||
2212 | { |
||
2213 | last_match_is_empty = |
||
2214 | (match_info->offsets[0] == match_info->offsets[1]); |
||
2215 | |||
2216 | /* we need to skip empty separators at the same position of the end |
||
2217 | * of another separator. e.g. the string is "a b" and the separator |
||
2218 | * is " *", so from 1 to 2 we have a match and at position 2 we have |
||
2219 | * an empty match. */ |
||
2220 | if (last_separator_end != match_info->offsets[1]) |
||
2221 | { |
||
2222 | gchar *token; |
||
2223 | gint match_count; |
||
2224 | |||
2225 | token = g_strndup (string + last_separator_end, |
||
2226 | match_info->offsets[0] - last_separator_end); |
||
2227 | list = g_list_prepend (list, token); |
||
2228 | token_count++; |
||
2229 | |||
2230 | /* if there were substrings, these need to be added to |
||
2231 | * the list. */ |
||
2232 | match_count = g_match_info_get_match_count (match_info); |
||
2233 | if (match_count > 1) |
||
2234 | { |
||
2235 | for (i = 1; i < match_count; i++) |
||
2236 | list = g_list_prepend (list, g_match_info_fetch (match_info, i)); |
||
2237 | } |
||
2238 | } |
||
2239 | } |
||
2240 | else |
||
2241 | { |
||
2242 | /* if there was no match, copy to end of string. */ |
||
2243 | if (!last_match_is_empty) |
||
2244 | { |
||
2245 | gchar *token = g_strndup (string + last_separator_end, |
||
2246 | match_info->string_len - last_separator_end); |
||
2247 | list = g_list_prepend (list, token); |
||
2248 | } |
||
2249 | /* no more tokens, end the loop. */ |
||
2250 | break; |
||
2251 | } |
||
2252 | |||
2253 | /* -1 to leave room for the last part. */ |
||
2254 | if (token_count >= max_tokens - 1) |
||
2255 | { |
||
2256 | /* we have reached the maximum number of tokens, so we copy |
||
2257 | * the remaining part of the string. */ |
||
2258 | if (last_match_is_empty) |
||
2259 | { |
||
2260 | /* the last match was empty, so we have moved one char |
||
2261 | * after the real position to avoid empty matches at the |
||
2262 | * same position. */ |
||
2263 | match_info->pos = PREV_CHAR (regex, &string[match_info->pos]) - string; |
||
2264 | } |
||
2265 | /* the if is needed in the case we have terminated the available |
||
2266 | * tokens, but we are at the end of the string, so there are no |
||
2267 | * characters left to copy. */ |
||
2268 | if (string_len > match_info->pos) |
||
2269 | { |
||
2270 | gchar *token = g_strndup (string + match_info->pos, |
||
2271 | string_len - match_info->pos); |
||
2272 | list = g_list_prepend (list, token); |
||
2273 | } |
||
2274 | /* end the loop. */ |
||
2275 | break; |
||
2276 | } |
||
2277 | |||
2278 | last_separator_end = match_info->pos; |
||
2279 | if (last_match_is_empty) |
||
2280 | /* if the last match was empty, g_match_info_next() has moved |
||
2281 | * forward to avoid infinite loops, but we still need to copy that |
||
2282 | * character. */ |
||
2283 | last_separator_end = PREV_CHAR (regex, &string[last_separator_end]) - string; |
||
2284 | |||
2285 | match_ok = g_match_info_next (match_info, &tmp_error); |
||
2286 | } |
||
2287 | g_match_info_free (match_info); |
||
2288 | if (tmp_error != NULL) |
||
2289 | { |
||
2290 | g_propagate_error (error, tmp_error); |
||
2291 | g_list_free_full (list, g_free); |
||
2292 | match_info->pos = -1; |
||
2293 | return NULL; |
||
2294 | } |
||
2295 | |||
2296 | string_list = g_new (gchar *, g_list_length (list) + 1); |
||
2297 | i = 0; |
||
2298 | for (last = g_list_last (list); last; last = g_list_previous (last)) |
||
2299 | string_list[i++] = last->data; |
||
2300 | string_list[i] = NULL; |
||
2301 | g_list_free (list); |
||
2302 | |||
2303 | return string_list; |
||
2304 | } |
||
2305 | |||
2306 | enum |
||
2307 | { |
||
2308 | REPL_TYPE_STRING, |
||
2309 | REPL_TYPE_CHARACTER, |
||
2310 | REPL_TYPE_SYMBOLIC_REFERENCE, |
||
2311 | REPL_TYPE_NUMERIC_REFERENCE, |
||
2312 | REPL_TYPE_CHANGE_CASE |
||
2313 | }; |
||
2314 | |||
2315 | typedef enum |
||
2316 | { |
||
2317 | CHANGE_CASE_NONE = 1 << 0, |
||
2318 | CHANGE_CASE_UPPER = 1 << 1, |
||
2319 | CHANGE_CASE_LOWER = 1 << 2, |
||
2320 | CHANGE_CASE_UPPER_SINGLE = 1 << 3, |
||
2321 | CHANGE_CASE_LOWER_SINGLE = 1 << 4, |
||
2322 | CHANGE_CASE_SINGLE_MASK = CHANGE_CASE_UPPER_SINGLE | CHANGE_CASE_LOWER_SINGLE, |
||
2323 | CHANGE_CASE_LOWER_MASK = CHANGE_CASE_LOWER | CHANGE_CASE_LOWER_SINGLE, |
||
2324 | CHANGE_CASE_UPPER_MASK = CHANGE_CASE_UPPER | CHANGE_CASE_UPPER_SINGLE |
||
2325 | } ChangeCase; |
||
2326 | |||
2327 | struct _InterpolationData |
||
2328 | { |
||
2329 | gchar *text; |
||
2330 | gint type; |
||
2331 | gint num; |
||
2332 | gchar c; |
||
2333 | ChangeCase change_case; |
||
2334 | }; |
||
2335 | |||
2336 | static void |
||
2337 | free_interpolation_data (InterpolationData *data) |
||
2338 | { |
||
2339 | g_free (data->text); |
||
2340 | g_free (data); |
||
2341 | } |
||
2342 | |||
2343 | static const gchar * |
||
2344 | expand_escape (const gchar *replacement, |
||
2345 | const gchar *p, |
||
2346 | InterpolationData *data, |
||
2347 | GError **error) |
||
2348 | { |
||
2349 | const gchar *q, *r; |
||
2350 | gint x, d, h, i; |
||
2351 | const gchar *error_detail; |
||
2352 | gint base = 0; |
||
2353 | GError *tmp_error = NULL; |
||
2354 | |||
2355 | p++; |
||
2356 | switch (*p) |
||
2357 | { |
||
2358 | case 't': |
||
2359 | p++; |
||
2360 | data->c = '\t'; |
||
2361 | data->type = REPL_TYPE_CHARACTER; |
||
2362 | break; |
||
2363 | case 'n': |
||
2364 | p++; |
||
2365 | data->c = '\n'; |
||
2366 | data->type = REPL_TYPE_CHARACTER; |
||
2367 | break; |
||
2368 | case 'v': |
||
2369 | p++; |
||
2370 | data->c = '\v'; |
||
2371 | data->type = REPL_TYPE_CHARACTER; |
||
2372 | break; |
||
2373 | case 'r': |
||
2374 | p++; |
||
2375 | data->c = '\r'; |
||
2376 | data->type = REPL_TYPE_CHARACTER; |
||
2377 | break; |
||
2378 | case 'f': |
||
2379 | p++; |
||
2380 | data->c = '\f'; |
||
2381 | data->type = REPL_TYPE_CHARACTER; |
||
2382 | break; |
||
2383 | case 'a': |
||
2384 | p++; |
||
2385 | data->c = '\a'; |
||
2386 | data->type = REPL_TYPE_CHARACTER; |
||
2387 | break; |
||
2388 | case 'b': |
||
2389 | p++; |
||
2390 | data->c = '\b'; |
||
2391 | data->type = REPL_TYPE_CHARACTER; |
||
2392 | break; |
||
2393 | case '\\': |
||
2394 | p++; |
||
2395 | data->c = '\\'; |
||
2396 | data->type = REPL_TYPE_CHARACTER; |
||
2397 | break; |
||
2398 | case 'x': |
||
2399 | p++; |
||
2400 | x = 0; |
||
2401 | if (*p == '{') |
||
2402 | { |
||
2403 | p++; |
||
2404 | do |
||
2405 | { |
||
2406 | h = g_ascii_xdigit_value (*p); |
||
2407 | if (h < 0) |
||
2408 | { |
||
2409 | error_detail = _("hexadecimal digit or '}' expected"); |
||
2410 | goto error; |
||
2411 | } |
||
2412 | x = x * 16 + h; |
||
2413 | p++; |
||
2414 | } |
||
2415 | while (*p != '}'); |
||
2416 | p++; |
||
2417 | } |
||
2418 | else |
||
2419 | { |
||
2420 | for (i = 0; i < 2; i++) |
||
2421 | { |
||
2422 | h = g_ascii_xdigit_value (*p); |
||
2423 | if (h < 0) |
||
2424 | { |
||
2425 | error_detail = _("hexadecimal digit expected"); |
||
2426 | goto error; |
||
2427 | } |
||
2428 | x = x * 16 + h; |
||
2429 | p++; |
||
2430 | } |
||
2431 | } |
||
2432 | data->type = REPL_TYPE_STRING; |
||
2433 | data->text = g_new0 (gchar, 8); |
||
2434 | g_unichar_to_utf8 (x, data->text); |
||
2435 | break; |
||
2436 | case 'l': |
||
2437 | p++; |
||
2438 | data->type = REPL_TYPE_CHANGE_CASE; |
||
2439 | data->change_case = CHANGE_CASE_LOWER_SINGLE; |
||
2440 | break; |
||
2441 | case 'u': |
||
2442 | p++; |
||
2443 | data->type = REPL_TYPE_CHANGE_CASE; |
||
2444 | data->change_case = CHANGE_CASE_UPPER_SINGLE; |
||
2445 | break; |
||
2446 | case 'L': |
||
2447 | p++; |
||
2448 | data->type = REPL_TYPE_CHANGE_CASE; |
||
2449 | data->change_case = CHANGE_CASE_LOWER; |
||
2450 | break; |
||
2451 | case 'U': |
||
2452 | p++; |
||
2453 | data->type = REPL_TYPE_CHANGE_CASE; |
||
2454 | data->change_case = CHANGE_CASE_UPPER; |
||
2455 | break; |
||
2456 | case 'E': |
||
2457 | p++; |
||
2458 | data->type = REPL_TYPE_CHANGE_CASE; |
||
2459 | data->change_case = CHANGE_CASE_NONE; |
||
2460 | break; |
||
2461 | case 'g': |
||
2462 | p++; |
||
2463 | if (*p != '<') |
||
2464 | { |
||
2465 | error_detail = _("missing '<' in symbolic reference"); |
||
2466 | goto error; |
||
2467 | } |
||
2468 | q = p + 1; |
||
2469 | do |
||
2470 | { |
||
2471 | p++; |
||
2472 | if (!*p) |
||
2473 | { |
||
2474 | error_detail = _("unfinished symbolic reference"); |
||
2475 | goto error; |
||
2476 | } |
||
2477 | } |
||
2478 | while (*p != '>'); |
||
2479 | if (p - q == 0) |
||
2480 | { |
||
2481 | error_detail = _("zero-length symbolic reference"); |
||
2482 | goto error; |
||
2483 | } |
||
2484 | if (g_ascii_isdigit (*q)) |
||
2485 | { |
||
2486 | x = 0; |
||
2487 | do |
||
2488 | { |
||
2489 | h = g_ascii_digit_value (*q); |
||
2490 | if (h < 0) |
||
2491 | { |
||
2492 | error_detail = _("digit expected"); |
||
2493 | p = q; |
||
2494 | goto error; |
||
2495 | } |
||
2496 | x = x * 10 + h; |
||
2497 | q++; |
||
2498 | } |
||
2499 | while (q != p); |
||
2500 | data->num = x; |
||
2501 | data->type = REPL_TYPE_NUMERIC_REFERENCE; |
||
2502 | } |
||
2503 | else |
||
2504 | { |
||
2505 | r = q; |
||
2506 | do |
||
2507 | { |
||
2508 | if (!g_ascii_isalnum (*r)) |
||
2509 | { |
||
2510 | error_detail = _("illegal symbolic reference"); |
||
2511 | p = r; |
||
2512 | goto error; |
||
2513 | } |
||
2514 | r++; |
||
2515 | } |
||
2516 | while (r != p); |
||
2517 | data->text = g_strndup (q, p - q); |
||
2518 | data->type = REPL_TYPE_SYMBOLIC_REFERENCE; |
||
2519 | } |
||
2520 | p++; |
||
2521 | break; |
||
2522 | case '0': |
||
2523 | /* if \0 is followed by a number is an octal number representing a |
||
2524 | * character, else it is a numeric reference. */ |
||
2525 | if (g_ascii_digit_value (*g_utf8_next_char (p)) >= 0) |
||
2526 | { |
||
2527 | base = 8; |
||
2528 | p = g_utf8_next_char (p); |
||
2529 | } |
||
2530 | case '1': |
||
2531 | case '2': |
||
2532 | case '3': |
||
2533 | case '4': |
||
2534 | case '5': |
||
2535 | case '6': |
||
2536 | case '7': |
||
2537 | case '8': |
||
2538 | case '9': |
||
2539 | x = 0; |
||
2540 | d = 0; |
||
2541 | for (i = 0; i < 3; i++) |
||
2542 | { |
||
2543 | h = g_ascii_digit_value (*p); |
||
2544 | if (h < 0) |
||
2545 | break; |
||
2546 | if (h > 7) |
||
2547 | { |
||
2548 | if (base == 8) |
||
2549 | break; |
||
2550 | else |
||
2551 | base = 10; |
||
2552 | } |
||
2553 | if (i == 2 && base == 10) |
||
2554 | break; |
||
2555 | x = x * 8 + h; |
||
2556 | d = d * 10 + h; |
||
2557 | p++; |
||
2558 | } |
||
2559 | if (base == 8 || i == 3) |
||
2560 | { |
||
2561 | data->type = REPL_TYPE_STRING; |
||
2562 | data->text = g_new0 (gchar, 8); |
||
2563 | g_unichar_to_utf8 (x, data->text); |
||
2564 | } |
||
2565 | else |
||
2566 | { |
||
2567 | data->type = REPL_TYPE_NUMERIC_REFERENCE; |
||
2568 | data->num = d; |
||
2569 | } |
||
2570 | break; |
||
2571 | case 0: |
||
2572 | error_detail = _("stray final '\\'"); |
||
2573 | goto error; |
||
2574 | break; |
||
2575 | default: |
||
2576 | error_detail = _("unknown escape sequence"); |
||
2577 | goto error; |
||
2578 | } |
||
2579 | |||
2580 | return p; |
||
2581 | |||
2582 | error: |
||
2583 | /* G_GSSIZE_FORMAT doesn't work with gettext, so we use %lu */ |
||
2584 | tmp_error = g_error_new (G_REGEX_ERROR, |
||
2585 | G_REGEX_ERROR_REPLACE, |
||
2586 | _("Error while parsing replacement " |
||
2587 | "text \"%s\" at char %lu: %s"), |
||
2588 | replacement, |
||
2589 | (gulong)(p - replacement), |
||
2590 | error_detail); |
||
2591 | g_propagate_error (error, tmp_error); |
||
2592 | |||
2593 | return NULL; |
||
2594 | } |
||
2595 | |||
2596 | static GList * |
||
2597 | split_replacement (const gchar *replacement, |
||
2598 | GError **error) |
||
2599 | { |
||
2600 | GList *list = NULL; |
||
2601 | InterpolationData *data; |
||
2602 | const gchar *p, *start; |
||
2603 | |||
2604 | start = p = replacement; |
||
2605 | while (*p) |
||
2606 | { |
||
2607 | if (*p == '\\') |
||
2608 | { |
||
2609 | data = g_new0 (InterpolationData, 1); |
||
2610 | start = p = expand_escape (replacement, p, data, error); |
||
2611 | if (p == NULL) |
||
2612 | { |
||
2613 | g_list_free_full (list, (GDestroyNotify) free_interpolation_data); |
||
2614 | free_interpolation_data (data); |
||
2615 | |||
2616 | return NULL; |
||
2617 | } |
||
2618 | list = g_list_prepend (list, data); |
||
2619 | } |
||
2620 | else |
||
2621 | { |
||
2622 | p++; |
||
2623 | if (*p == '\\' || *p == '\0') |
||
2624 | { |
||
2625 | if (p - start > 0) |
||
2626 | { |
||
2627 | data = g_new0 (InterpolationData, 1); |
||
2628 | data->text = g_strndup (start, p - start); |
||
2629 | data->type = REPL_TYPE_STRING; |
||
2630 | list = g_list_prepend (list, data); |
||
2631 | } |
||
2632 | } |
||
2633 | } |
||
2634 | } |
||
2635 | |||
2636 | return g_list_reverse (list); |
||
2637 | } |
||
2638 | |||
2639 | /* Change the case of c based on change_case. */ |
||
2640 | #define CHANGE_CASE(c, change_case) \ |
||
2641 | (((change_case) & CHANGE_CASE_LOWER_MASK) ? \ |
||
2642 | g_unichar_tolower (c) : \ |
||
2643 | g_unichar_toupper (c)) |
||
2644 | |||
2645 | static void |
||
2646 | string_append (GString *string, |
||
2647 | const gchar *text, |
||
2648 | ChangeCase *change_case) |
||
2649 | { |
||
2650 | gunichar c; |
||
2651 | |||
2652 | if (text[0] == '\0') |
||
2653 | return; |
||
2654 | |||
2655 | if (*change_case == CHANGE_CASE_NONE) |
||
2656 | { |
||
2657 | g_string_append (string, text); |
||
2658 | } |
||
2659 | else if (*change_case & CHANGE_CASE_SINGLE_MASK) |
||
2660 | { |
||
2661 | c = g_utf8_get_char (text); |
||
2662 | g_string_append_unichar (string, CHANGE_CASE (c, *change_case)); |
||
2663 | g_string_append (string, g_utf8_next_char (text)); |
||
2664 | *change_case = CHANGE_CASE_NONE; |
||
2665 | } |
||
2666 | else |
||
2667 | { |
||
2668 | while (*text != '\0') |
||
2669 | { |
||
2670 | c = g_utf8_get_char (text); |
||
2671 | g_string_append_unichar (string, CHANGE_CASE (c, *change_case)); |
||
2672 | text = g_utf8_next_char (text); |
||
2673 | } |
||
2674 | } |
||
2675 | } |
||
2676 | |||
2677 | static gboolean |
||
2678 | interpolate_replacement (const GMatchInfo *match_info, |
||
2679 | GString *result, |
||
2680 | gpointer data) |
||
2681 | { |
||
2682 | GList *list; |
||
2683 | InterpolationData *idata; |
||
2684 | gchar *match; |
||
2685 | ChangeCase change_case = CHANGE_CASE_NONE; |
||
2686 | |||
2687 | for (list = data; list; list = list->next) |
||
2688 | { |
||
2689 | idata = list->data; |
||
2690 | switch (idata->type) |
||
2691 | { |
||
2692 | case REPL_TYPE_STRING: |
||
2693 | string_append (result, idata->text, &change_case); |
||
2694 | break; |
||
2695 | case REPL_TYPE_CHARACTER: |
||
2696 | g_string_append_c (result, CHANGE_CASE (idata->c, change_case)); |
||
2697 | if (change_case & CHANGE_CASE_SINGLE_MASK) |
||
2698 | change_case = CHANGE_CASE_NONE; |
||
2699 | break; |
||
2700 | case REPL_TYPE_NUMERIC_REFERENCE: |
||
2701 | match = g_match_info_fetch (match_info, idata->num); |
||
2702 | if (match) |
||
2703 | { |
||
2704 | string_append (result, match, &change_case); |
||
2705 | g_free (match); |
||
2706 | } |
||
2707 | break; |
||
2708 | case REPL_TYPE_SYMBOLIC_REFERENCE: |
||
2709 | match = g_match_info_fetch_named (match_info, idata->text); |
||
2710 | if (match) |
||
2711 | { |
||
2712 | string_append (result, match, &change_case); |
||
2713 | g_free (match); |
||
2714 | } |
||
2715 | break; |
||
2716 | case REPL_TYPE_CHANGE_CASE: |
||
2717 | change_case = idata->change_case; |
||
2718 | break; |
||
2719 | } |
||
2720 | } |
||
2721 | |||
2722 | return FALSE; |
||
2723 | } |
||
2724 | |||
2725 | /* whether actual match_info is needed for replacement, i.e. |
||
2726 | * whether there are references |
||
2727 | */ |
||
2728 | static gboolean |
||
2729 | interpolation_list_needs_match (GList *list) |
||
2730 | { |
||
2731 | while (list != NULL) |
||
2732 | { |
||
2733 | InterpolationData *data = list->data; |
||
2734 | |||
2735 | if (data->type == REPL_TYPE_SYMBOLIC_REFERENCE || |
||
2736 | data->type == REPL_TYPE_NUMERIC_REFERENCE) |
||
2737 | { |
||
2738 | return TRUE; |
||
2739 | } |
||
2740 | |||
2741 | list = list->next; |
||
2742 | } |
||
2743 | |||
2744 | return FALSE; |
||
2745 | } |
||
2746 | |||
2747 | /** |
||
2748 | * g_regex_replace: |
||
2749 | * @regex: a #GRegex structure |
||
2750 | * @string: (array length=string_len): the string to perform matches against |
||
2751 | * @string_len: the length of @string, or -1 if @string is nul-terminated |
||
2752 | * @start_position: starting index of the string to match, in bytes |
||
2753 | * @replacement: text to replace each match with |
||
2754 | * @match_options: options for the match |
||
2755 | * @error: location to store the error occurring, or %NULL to ignore errors |
||
2756 | * |
||
2757 | * Replaces all occurrences of the pattern in @regex with the |
||
2758 | * replacement text. Backreferences of the form '\number' or |
||
2759 | * '\g<number>' in the replacement text are interpolated by the |
||
2760 | * number-th captured subexpression of the match, '\g<name>' refers |
||
2761 | * to the captured subexpression with the given name. '\0' refers |
||
2762 | * to the complete match, but '\0' followed by a number is the octal |
||
2763 | * representation of a character. To include a literal '\' in the |
||
2764 | * replacement, write '\\'. |
||
2765 | * |
||
2766 | * There are also escapes that changes the case of the following text: |
||
2767 | * |
||
2768 | * - \l: Convert to lower case the next character |
||
2769 | * - \u: Convert to upper case the next character |
||
2770 | * - \L: Convert to lower case till \E |
||
2771 | * - \U: Convert to upper case till \E |
||
2772 | * - \E: End case modification |
||
2773 | * |
||
2774 | * If you do not need to use backreferences use g_regex_replace_literal(). |
||
2775 | * |
||
2776 | * The @replacement string must be UTF-8 encoded even if #G_REGEX_RAW was |
||
2777 | * passed to g_regex_new(). If you want to use not UTF-8 encoded stings |
||
2778 | * you can use g_regex_replace_literal(). |
||
2779 | * |
||
2780 | * Setting @start_position differs from just passing over a shortened |
||
2781 | * string and setting #G_REGEX_MATCH_NOTBOL in the case of a pattern that |
||
2782 | * begins with any kind of lookbehind assertion, such as "\b". |
||
2783 | * |
||
2784 | * Returns: a newly allocated string containing the replacements |
||
2785 | * |
||
2786 | * Since: 2.14 |
||
2787 | */ |
||
2788 | gchar * |
||
2789 | g_regex_replace (const GRegex *regex, |
||
2790 | const gchar *string, |
||
2791 | gssize string_len, |
||
2792 | gint start_position, |
||
2793 | const gchar *replacement, |
||
2794 | GRegexMatchFlags match_options, |
||
2795 | GError **error) |
||
2796 | { |
||
2797 | gchar *result; |
||
2798 | GList *list; |
||
2799 | GError *tmp_error = NULL; |
||
2800 | |||
2801 | g_return_val_if_fail (regex != NULL, NULL); |
||
2802 | g_return_val_if_fail (string != NULL, NULL); |
||
2803 | g_return_val_if_fail (start_position >= 0, NULL); |
||
2804 | g_return_val_if_fail (replacement != NULL, NULL); |
||
2805 | g_return_val_if_fail (error == NULL || *error == NULL, NULL); |
||
2806 | g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL); |
||
2807 | |||
2808 | list = split_replacement (replacement, &tmp_error); |
||
2809 | if (tmp_error != NULL) |
||
2810 | { |
||
2811 | g_propagate_error (error, tmp_error); |
||
2812 | return NULL; |
||
2813 | } |
||
2814 | |||
2815 | result = g_regex_replace_eval (regex, |
||
2816 | string, string_len, start_position, |
||
2817 | match_options, |
||
2818 | interpolate_replacement, |
||
2819 | (gpointer)list, |
||
2820 | &tmp_error); |
||
2821 | if (tmp_error != NULL) |
||
2822 | g_propagate_error (error, tmp_error); |
||
2823 | |||
2824 | g_list_free_full (list, (GDestroyNotify) free_interpolation_data); |
||
2825 | |||
2826 | return result; |
||
2827 | } |
||
2828 | |||
2829 | static gboolean |
||
2830 | literal_replacement (const GMatchInfo *match_info, |
||
2831 | GString *result, |
||
2832 | gpointer data) |
||
2833 | { |
||
2834 | g_string_append (result, data); |
||
2835 | return FALSE; |
||
2836 | } |
||
2837 | |||
2838 | /** |
||
2839 | * g_regex_replace_literal: |
||
2840 | * @regex: a #GRegex structure |
||
2841 | * @string: (array length=string_len): the string to perform matches against |
||
2842 | * @string_len: the length of @string, or -1 if @string is nul-terminated |
||
2843 | * @start_position: starting index of the string to match, in bytes |
||
2844 | * @replacement: text to replace each match with |
||
2845 | * @match_options: options for the match |
||
2846 | * @error: location to store the error occurring, or %NULL to ignore errors |
||
2847 | * |
||
2848 | * Replaces all occurrences of the pattern in @regex with the |
||
2849 | * replacement text. @replacement is replaced literally, to |
||
2850 | * include backreferences use g_regex_replace(). |
||
2851 | * |
||
2852 | * Setting @start_position differs from just passing over a |
||
2853 | * shortened string and setting #G_REGEX_MATCH_NOTBOL in the |
||
2854 | * case of a pattern that begins with any kind of lookbehind |
||
2855 | * assertion, such as "\b". |
||
2856 | * |
||
2857 | * Returns: a newly allocated string containing the replacements |
||
2858 | * |
||
2859 | * Since: 2.14 |
||
2860 | */ |
||
2861 | gchar * |
||
2862 | g_regex_replace_literal (const GRegex *regex, |
||
2863 | const gchar *string, |
||
2864 | gssize string_len, |
||
2865 | gint start_position, |
||
2866 | const gchar *replacement, |
||
2867 | GRegexMatchFlags match_options, |
||
2868 | GError **error) |
||
2869 | { |
||
2870 | g_return_val_if_fail (replacement != NULL, NULL); |
||
2871 | g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL); |
||
2872 | |||
2873 | return g_regex_replace_eval (regex, |
||
2874 | string, string_len, start_position, |
||
2875 | match_options, |
||
2876 | literal_replacement, |
||
2877 | (gpointer)replacement, |
||
2878 | error); |
||
2879 | } |
||
2880 | |||
2881 | /** |
||
2882 | * g_regex_replace_eval: |
||
2883 | * @regex: a #GRegex structure from g_regex_new() |
||
2884 | * @string: (array length=string_len): string to perform matches against |
||
2885 | * @string_len: the length of @string, or -1 if @string is nul-terminated |
||
2886 | * @start_position: starting index of the string to match, in bytes |
||
2887 | * @match_options: options for the match |
||
2888 | * @eval: a function to call for each match |
||
2889 | * @user_data: user data to pass to the function |
||
2890 | * @error: location to store the error occurring, or %NULL to ignore errors |
||
2891 | * |
||
2892 | * Replaces occurrences of the pattern in regex with the output of |
||
2893 | * @eval for that occurrence. |
||
2894 | * |
||
2895 | * Setting @start_position differs from just passing over a shortened |
||
2896 | * string and setting #G_REGEX_MATCH_NOTBOL in the case of a pattern |
||
2897 | * that begins with any kind of lookbehind assertion, such as "\b". |
||
2898 | * |
||
2899 | * The following example uses g_regex_replace_eval() to replace multiple |
||
2900 | * strings at once: |
||
2901 | * |[<!-- language="C" --> |
||
2902 | * static gboolean |
||
2903 | * eval_cb (const GMatchInfo *info, |
||
2904 | * GString *res, |
||
2905 | * gpointer data) |
||
2906 | * { |
||
2907 | * gchar *match; |
||
2908 | * gchar *r; |
||
2909 | * |
||
2910 | * match = g_match_info_fetch (info, 0); |
||
2911 | * r = g_hash_table_lookup ((GHashTable *)data, match); |
||
2912 | * g_string_append (res, r); |
||
2913 | * g_free (match); |
||
2914 | * |
||
2915 | * return FALSE; |
||
2916 | * } |
||
2917 | * |
||
2918 | * ... |
||
2919 | * |
||
2920 | * GRegex *reg; |
||
2921 | * GHashTable *h; |
||
2922 | * gchar *res; |
||
2923 | * |
||
2924 | * h = g_hash_table_new (g_str_hash, g_str_equal); |
||
2925 | * |
||
2926 | * g_hash_table_insert (h, "1", "ONE"); |
||
2927 | * g_hash_table_insert (h, "2", "TWO"); |
||
2928 | * g_hash_table_insert (h, "3", "THREE"); |
||
2929 | * g_hash_table_insert (h, "4", "FOUR"); |
||
2930 | * |
||
2931 | * reg = g_regex_new ("1|2|3|4", 0, 0, NULL); |
||
2932 | * res = g_regex_replace_eval (reg, text, -1, 0, 0, eval_cb, h, NULL); |
||
2933 | * g_hash_table_destroy (h); |
||
2934 | * |
||
2935 | * ... |
||
2936 | * ]| |
||
2937 | * |
||
2938 | * Returns: a newly allocated string containing the replacements |
||
2939 | * |
||
2940 | * Since: 2.14 |
||
2941 | */ |
||
2942 | gchar * |
||
2943 | g_regex_replace_eval (const GRegex *regex, |
||
2944 | const gchar *string, |
||
2945 | gssize string_len, |
||
2946 | gint start_position, |
||
2947 | GRegexMatchFlags match_options, |
||
2948 | GRegexEvalCallback eval, |
||
2949 | gpointer user_data, |
||
2950 | GError **error) |
||
2951 | { |
||
2952 | GMatchInfo *match_info; |
||
2953 | GString *result; |
||
2954 | gint str_pos = 0; |
||
2955 | gboolean done = FALSE; |
||
2956 | GError *tmp_error = NULL; |
||
2957 | |||
2958 | g_return_val_if_fail (regex != NULL, NULL); |
||
2959 | g_return_val_if_fail (string != NULL, NULL); |
||
2960 | g_return_val_if_fail (start_position >= 0, NULL); |
||
2961 | g_return_val_if_fail (eval != NULL, NULL); |
||
2962 | g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL); |
||
2963 | |||
2964 | if (string_len < 0) |
||
2965 | string_len = strlen (string); |
||
2966 | |||
2967 | result = g_string_sized_new (string_len); |
||
2968 | |||
2969 | /* run down the string making matches. */ |
||
2970 | g_regex_match_full (regex, string, string_len, start_position, |
||
2971 | match_options, &match_info, &tmp_error); |
||
2972 | while (!done && g_match_info_matches (match_info)) |
||
2973 | { |
||
2974 | g_string_append_len (result, |
||
2975 | string + str_pos, |
||
2976 | match_info->offsets[0] - str_pos); |
||
2977 | done = (*eval) (match_info, result, user_data); |
||
2978 | str_pos = match_info->offsets[1]; |
||
2979 | g_match_info_next (match_info, &tmp_error); |
||
2980 | } |
||
2981 | g_match_info_free (match_info); |
||
2982 | if (tmp_error != NULL) |
||
2983 | { |
||
2984 | g_propagate_error (error, tmp_error); |
||
2985 | g_string_free (result, TRUE); |
||
2986 | return NULL; |
||
2987 | } |
||
2988 | |||
2989 | g_string_append_len (result, string + str_pos, string_len - str_pos); |
||
2990 | return g_string_free (result, FALSE); |
||
2991 | } |
||
2992 | |||
2993 | /** |
||
2994 | * g_regex_check_replacement: |
||
2995 | * @replacement: the replacement string |
||
2996 | * @has_references: (out) (allow-none): location to store information about |
||
2997 | * references in @replacement or %NULL |
||
2998 | * @error: location to store error |
||
2999 | * |
||
3000 | * Checks whether @replacement is a valid replacement string |
||
3001 | * (see g_regex_replace()), i.e. that all escape sequences in |
||
3002 | * it are valid. |
||
3003 | * |
||
3004 | * If @has_references is not %NULL then @replacement is checked |
||
3005 | * for pattern references. For instance, replacement text 'foo\n' |
||
3006 | * does not contain references and may be evaluated without information |
||
3007 | * about actual match, but '\0\1' (whole match followed by first |
||
3008 | * subpattern) requires valid #GMatchInfo object. |
||
3009 | * |
||
3010 | * Returns: whether @replacement is a valid replacement string |
||
3011 | * |
||
3012 | * Since: 2.14 |
||
3013 | */ |
||
3014 | gboolean |
||
3015 | g_regex_check_replacement (const gchar *replacement, |
||
3016 | gboolean *has_references, |
||
3017 | GError **error) |
||
3018 | { |
||
3019 | GList *list; |
||
3020 | GError *tmp = NULL; |
||
3021 | |||
3022 | list = split_replacement (replacement, &tmp); |
||
3023 | |||
3024 | if (tmp) |
||
3025 | { |
||
3026 | g_propagate_error (error, tmp); |
||
3027 | return FALSE; |
||
3028 | } |
||
3029 | |||
3030 | if (has_references) |
||
3031 | *has_references = interpolation_list_needs_match (list); |
||
3032 | |||
3033 | g_list_free_full (list, (GDestroyNotify) free_interpolation_data); |
||
3034 | |||
3035 | return TRUE; |
||
3036 | } |
||
3037 | |||
3038 | /** |
||
3039 | * g_regex_escape_nul: |
||
3040 | * @string: the string to escape |
||
3041 | * @length: the length of @string |
||
3042 | * |
||
3043 | * Escapes the nul characters in @string to "\x00". It can be used |
||
3044 | * to compile a regex with embedded nul characters. |
||
3045 | * |
||
3046 | * For completeness, @length can be -1 for a nul-terminated string. |
||
3047 | * In this case the output string will be of course equal to @string. |
||
3048 | * |
||
3049 | * Returns: a newly-allocated escaped string |
||
3050 | * |
||
3051 | * Since: 2.30 |
||
3052 | */ |
||
3053 | gchar * |
||
3054 | g_regex_escape_nul (const gchar *string, |
||
3055 | gint length) |
||
3056 | { |
||
3057 | GString *escaped; |
||
3058 | const gchar *p, *piece_start, *end; |
||
3059 | gint backslashes; |
||
3060 | |||
3061 | g_return_val_if_fail (string != NULL, NULL); |
||
3062 | |||
3063 | if (length < 0) |
||
3064 | return g_strdup (string); |
||
3065 | |||
3066 | end = string + length; |
||
3067 | p = piece_start = string; |
||
3068 | escaped = g_string_sized_new (length + 1); |
||
3069 | |||
3070 | backslashes = 0; |
||
3071 | while (p < end) |
||
3072 | { |
||
3073 | switch (*p) |
||
3074 | { |
||
3075 | case '\0': |
||
3076 | if (p != piece_start) |
||
3077 | { |
||
3078 | /* copy the previous piece. */ |
||
3079 | g_string_append_len (escaped, piece_start, p - piece_start); |
||
3080 | } |
||
3081 | if ((backslashes & 1) == 0) |
||
3082 | g_string_append_c (escaped, '\\'); |
||
3083 | g_string_append_c (escaped, 'x'); |
||
3084 | g_string_append_c (escaped, '0'); |
||
3085 | g_string_append_c (escaped, '0'); |
||
3086 | piece_start = ++p; |
||
3087 | backslashes = 0; |
||
3088 | break; |
||
3089 | case '\\': |
||
3090 | backslashes++; |
||
3091 | ++p; |
||
3092 | break; |
||
3093 | default: |
||
3094 | backslashes = 0; |
||
3095 | p = g_utf8_next_char (p); |
||
3096 | break; |
||
3097 | } |
||
3098 | } |
||
3099 | |||
3100 | if (piece_start < end) |
||
3101 | g_string_append_len (escaped, piece_start, end - piece_start); |
||
3102 | |||
3103 | return g_string_free (escaped, FALSE); |
||
3104 | } |
||
3105 | |||
3106 | /** |
||
3107 | * g_regex_escape_string: |
||
3108 | * @string: (array length=length): the string to escape |
||
3109 | * @length: the length of @string, or -1 if @string is nul-terminated |
||
3110 | * |
||
3111 | * Escapes the special characters used for regular expressions |
||
3112 | * in @string, for instance "a.b*c" becomes "a\.b\*c". This |
||
3113 | * function is useful to dynamically generate regular expressions. |
||
3114 | * |
||
3115 | * @string can contain nul characters that are replaced with "\0", |
||
3116 | * in this case remember to specify the correct length of @string |
||
3117 | * in @length. |
||
3118 | * |
||
3119 | * Returns: a newly-allocated escaped string |
||
3120 | * |
||
3121 | * Since: 2.14 |
||
3122 | */ |
||
3123 | gchar * |
||
3124 | g_regex_escape_string (const gchar *string, |
||
3125 | gint length) |
||
3126 | { |
||
3127 | GString *escaped; |
||
3128 | const char *p, *piece_start, *end; |
||
3129 | |||
3130 | g_return_val_if_fail (string != NULL, NULL); |
||
3131 | |||
3132 | if (length < 0) |
||
3133 | length = strlen (string); |
||
3134 | |||
3135 | end = string + length; |
||
3136 | p = piece_start = string; |
||
3137 | escaped = g_string_sized_new (length + 1); |
||
3138 | |||
3139 | while (p < end) |
||
3140 | { |
||
3141 | switch (*p) |
||
3142 | { |
||
3143 | case '\0': |
||
3144 | case '\\': |
||
3145 | case '|': |
||
3146 | case '(': |
||
3147 | case ')': |
||
3148 | case '[': |
||
3149 | case ']': |
||
3150 | case '{': |
||
3151 | case '}': |
||
3152 | case '^': |
||
3153 | case '$': |
||
3154 | case '*': |
||
3155 | case '+': |
||
3156 | case '?': |
||
3157 | case '.': |
||
3158 | if (p != piece_start) |
||
3159 | /* copy the previous piece. */ |
||
3160 | g_string_append_len (escaped, piece_start, p - piece_start); |
||
3161 | g_string_append_c (escaped, '\\'); |
||
3162 | if (*p == '\0') |
||
3163 | g_string_append_c (escaped, '0'); |
||
3164 | else |
||
3165 | g_string_append_c (escaped, *p); |
||
3166 | piece_start = ++p; |
||
3167 | break; |
||
3168 | default: |
||
3169 | p = g_utf8_next_char (p); |
||
3170 | break; |
||
3171 | } |
||
3172 | } |
||
3173 | |||
3174 | if (piece_start < end) |
||
3175 | g_string_append_len (escaped, piece_start, end - piece_start); |
||
3176 | |||
3177 | return g_string_free (escaped, FALSE); |
||
3178 | } |