nexmon – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | /* GLIB - Library of useful routines for C programming |
2 | * Copyright (C) 1995-1997, 1999 Peter Mattis, Red Hat, Inc. |
||
3 | * |
||
4 | * This library is free software; you can redistribute it and/or |
||
5 | * modify it under the terms of the GNU Lesser General Public |
||
6 | * License as published by the Free Software Foundation; either |
||
7 | * version 2 of the License, or (at your option) any later version. |
||
8 | * |
||
9 | * This library is distributed in the hope that it will be useful, |
||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
12 | * Lesser General Public License for more details. |
||
13 | * |
||
14 | * You should have received a copy of the GNU Lesser General Public |
||
15 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
||
16 | */ |
||
17 | |||
18 | #include "config.h" |
||
19 | |||
20 | #include <string.h> |
||
21 | |||
22 | #include "gpattern.h" |
||
23 | |||
24 | #include "gmacros.h" |
||
25 | #include "gmessages.h" |
||
26 | #include "gmem.h" |
||
27 | #include "gunicode.h" |
||
28 | #include "gutils.h" |
||
29 | |||
30 | /** |
||
31 | * SECTION:patterns |
||
32 | * @title: Glob-style pattern matching |
||
33 | * @short_description: matches strings against patterns containing '*' |
||
34 | * (wildcard) and '?' (joker) |
||
35 | * |
||
36 | * The g_pattern_match* functions match a string |
||
37 | * against a pattern containing '*' and '?' wildcards with similar |
||
38 | * semantics as the standard glob() function: '*' matches an arbitrary, |
||
39 | * possibly empty, string, '?' matches an arbitrary character. |
||
40 | * |
||
41 | * Note that in contrast to glob(), the '/' character can be matched by |
||
42 | * the wildcards, there are no '[...]' character ranges and '*' and '?' |
||
43 | * can not be escaped to include them literally in a pattern. |
||
44 | * |
||
45 | * When multiple strings must be matched against the same pattern, it |
||
46 | * is better to compile the pattern to a #GPatternSpec using |
||
47 | * g_pattern_spec_new() and use g_pattern_match_string() instead of |
||
48 | * g_pattern_match_simple(). This avoids the overhead of repeated |
||
49 | * pattern compilation. |
||
50 | **/ |
||
51 | |||
52 | /** |
||
53 | * GPatternSpec: |
||
54 | * |
||
55 | * A GPatternSpec struct is the 'compiled' form of a pattern. This |
||
56 | * structure is opaque and its fields cannot be accessed directly. |
||
57 | */ |
||
58 | |||
59 | /* keep enum and structure of gpattern.c and patterntest.c in sync */ |
||
60 | typedef enum |
||
61 | { |
||
62 | G_MATCH_ALL, /* "*A?A*" */ |
||
63 | G_MATCH_ALL_TAIL, /* "*A?AA" */ |
||
64 | G_MATCH_HEAD, /* "AAAA*" */ |
||
65 | G_MATCH_TAIL, /* "*AAAA" */ |
||
66 | G_MATCH_EXACT, /* "AAAAA" */ |
||
67 | G_MATCH_LAST |
||
68 | } GMatchType; |
||
69 | |||
70 | struct _GPatternSpec |
||
71 | { |
||
72 | GMatchType match_type; |
||
73 | guint pattern_length; |
||
74 | guint min_length; |
||
75 | guint max_length; |
||
76 | gchar *pattern; |
||
77 | }; |
||
78 | |||
79 | |||
80 | /* --- functions --- */ |
||
81 | static inline gboolean |
||
82 | g_pattern_ph_match (const gchar *match_pattern, |
||
83 | const gchar *match_string, |
||
84 | gboolean *wildcard_reached_p) |
||
85 | { |
||
86 | const gchar *pattern, *string; |
||
87 | gchar ch; |
||
88 | |||
89 | pattern = match_pattern; |
||
90 | string = match_string; |
||
91 | |||
92 | ch = *pattern; |
||
93 | pattern++; |
||
94 | while (ch) |
||
95 | { |
||
96 | switch (ch) |
||
97 | { |
||
98 | case '?': |
||
99 | if (!*string) |
||
100 | return FALSE; |
||
101 | string = g_utf8_next_char (string); |
||
102 | break; |
||
103 | |||
104 | case '*': |
||
105 | *wildcard_reached_p = TRUE; |
||
106 | do |
||
107 | { |
||
108 | ch = *pattern; |
||
109 | pattern++; |
||
110 | if (ch == '?') |
||
111 | { |
||
112 | if (!*string) |
||
113 | return FALSE; |
||
114 | string = g_utf8_next_char (string); |
||
115 | } |
||
116 | } |
||
117 | while (ch == '*' || ch == '?'); |
||
118 | if (!ch) |
||
119 | return TRUE; |
||
120 | do |
||
121 | { |
||
122 | gboolean next_wildcard_reached = FALSE; |
||
123 | while (ch != *string) |
||
124 | { |
||
125 | if (!*string) |
||
126 | return FALSE; |
||
127 | string = g_utf8_next_char (string); |
||
128 | } |
||
129 | string++; |
||
130 | if (g_pattern_ph_match (pattern, string, &next_wildcard_reached)) |
||
131 | return TRUE; |
||
132 | if (next_wildcard_reached) |
||
133 | /* the forthcoming pattern substring up to the next wildcard has |
||
134 | * been matched, but a mismatch occoured for the rest of the |
||
135 | * pattern, following the next wildcard. |
||
136 | * there's no need to advance the current match position any |
||
137 | * further if the rest pattern will not match. |
||
138 | */ |
||
139 | return FALSE; |
||
140 | } |
||
141 | while (*string); |
||
142 | break; |
||
143 | |||
144 | default: |
||
145 | if (ch == *string) |
||
146 | string++; |
||
147 | else |
||
148 | return FALSE; |
||
149 | break; |
||
150 | } |
||
151 | |||
152 | ch = *pattern; |
||
153 | pattern++; |
||
154 | } |
||
155 | |||
156 | return *string == 0; |
||
157 | } |
||
158 | |||
159 | /** |
||
160 | * g_pattern_match: |
||
161 | * @pspec: a #GPatternSpec |
||
162 | * @string_length: the length of @string (in bytes, i.e. strlen(), |
||
163 | * not g_utf8_strlen()) |
||
164 | * @string: the UTF-8 encoded string to match |
||
165 | * @string_reversed: (allow-none): the reverse of @string or %NULL |
||
166 | * |
||
167 | * Matches a string against a compiled pattern. Passing the correct |
||
168 | * length of the string given is mandatory. The reversed string can be |
||
169 | * omitted by passing %NULL, this is more efficient if the reversed |
||
170 | * version of the string to be matched is not at hand, as |
||
171 | * g_pattern_match() will only construct it if the compiled pattern |
||
172 | * requires reverse matches. |
||
173 | * |
||
174 | * Note that, if the user code will (possibly) match a string against a |
||
175 | * multitude of patterns containing wildcards, chances are high that |
||
176 | * some patterns will require a reversed string. In this case, it's |
||
177 | * more efficient to provide the reversed string to avoid multiple |
||
178 | * constructions thereof in the various calls to g_pattern_match(). |
||
179 | * |
||
180 | * Note also that the reverse of a UTF-8 encoded string can in general |
||
181 | * not be obtained by g_strreverse(). This works only if the string |
||
182 | * does not contain any multibyte characters. GLib offers the |
||
183 | * g_utf8_strreverse() function to reverse UTF-8 encoded strings. |
||
184 | * |
||
185 | * Returns: %TRUE if @string matches @pspec |
||
186 | **/ |
||
187 | gboolean |
||
188 | g_pattern_match (GPatternSpec *pspec, |
||
189 | guint string_length, |
||
190 | const gchar *string, |
||
191 | const gchar *string_reversed) |
||
192 | { |
||
193 | g_return_val_if_fail (pspec != NULL, FALSE); |
||
194 | g_return_val_if_fail (string != NULL, FALSE); |
||
195 | |||
196 | if (string_length < pspec->min_length || |
||
197 | string_length > pspec->max_length) |
||
198 | return FALSE; |
||
199 | |||
200 | switch (pspec->match_type) |
||
201 | { |
||
202 | gboolean dummy; |
||
203 | case G_MATCH_ALL: |
||
204 | return g_pattern_ph_match (pspec->pattern, string, &dummy); |
||
205 | case G_MATCH_ALL_TAIL: |
||
206 | if (string_reversed) |
||
207 | return g_pattern_ph_match (pspec->pattern, string_reversed, &dummy); |
||
208 | else |
||
209 | { |
||
210 | gboolean result; |
||
211 | gchar *tmp; |
||
212 | tmp = g_utf8_strreverse (string, string_length); |
||
213 | result = g_pattern_ph_match (pspec->pattern, tmp, &dummy); |
||
214 | g_free (tmp); |
||
215 | return result; |
||
216 | } |
||
217 | case G_MATCH_HEAD: |
||
218 | if (pspec->pattern_length == string_length) |
||
219 | return strcmp (pspec->pattern, string) == 0; |
||
220 | else if (pspec->pattern_length) |
||
221 | return strncmp (pspec->pattern, string, pspec->pattern_length) == 0; |
||
222 | else |
||
223 | return TRUE; |
||
224 | case G_MATCH_TAIL: |
||
225 | if (pspec->pattern_length) |
||
226 | return strcmp (pspec->pattern, string + (string_length - pspec->pattern_length)) == 0; |
||
227 | else |
||
228 | return TRUE; |
||
229 | case G_MATCH_EXACT: |
||
230 | if (pspec->pattern_length != string_length) |
||
231 | return FALSE; |
||
232 | else |
||
233 | return strcmp (pspec->pattern, string) == 0; |
||
234 | default: |
||
235 | g_return_val_if_fail (pspec->match_type < G_MATCH_LAST, FALSE); |
||
236 | return FALSE; |
||
237 | } |
||
238 | } |
||
239 | |||
240 | /** |
||
241 | * g_pattern_spec_new: |
||
242 | * @pattern: a zero-terminated UTF-8 encoded string |
||
243 | * |
||
244 | * Compiles a pattern to a #GPatternSpec. |
||
245 | * |
||
246 | * Returns: a newly-allocated #GPatternSpec |
||
247 | **/ |
||
248 | GPatternSpec* |
||
249 | g_pattern_spec_new (const gchar *pattern) |
||
250 | { |
||
251 | GPatternSpec *pspec; |
||
252 | gboolean seen_joker = FALSE, seen_wildcard = FALSE, more_wildcards = FALSE; |
||
253 | gint hw_pos = -1, tw_pos = -1, hj_pos = -1, tj_pos = -1; |
||
254 | gboolean follows_wildcard = FALSE; |
||
255 | guint pending_jokers = 0; |
||
256 | const gchar *s; |
||
257 | gchar *d; |
||
258 | guint i; |
||
259 | |||
260 | g_return_val_if_fail (pattern != NULL, NULL); |
||
261 | |||
262 | /* canonicalize pattern and collect necessary stats */ |
||
263 | pspec = g_new (GPatternSpec, 1); |
||
264 | pspec->pattern_length = strlen (pattern); |
||
265 | pspec->min_length = 0; |
||
266 | pspec->max_length = 0; |
||
267 | pspec->pattern = g_new (gchar, pspec->pattern_length + 1); |
||
268 | d = pspec->pattern; |
||
269 | for (i = 0, s = pattern; *s != 0; s++) |
||
270 | { |
||
271 | switch (*s) |
||
272 | { |
||
273 | case '*': |
||
274 | if (follows_wildcard) /* compress multiple wildcards */ |
||
275 | { |
||
276 | pspec->pattern_length--; |
||
277 | continue; |
||
278 | } |
||
279 | follows_wildcard = TRUE; |
||
280 | if (hw_pos < 0) |
||
281 | hw_pos = i; |
||
282 | tw_pos = i; |
||
283 | break; |
||
284 | case '?': |
||
285 | pending_jokers++; |
||
286 | pspec->min_length++; |
||
287 | pspec->max_length += 4; /* maximum UTF-8 character length */ |
||
288 | continue; |
||
289 | default: |
||
290 | for (; pending_jokers; pending_jokers--, i++) { |
||
291 | *d++ = '?'; |
||
292 | if (hj_pos < 0) |
||
293 | hj_pos = i; |
||
294 | tj_pos = i; |
||
295 | } |
||
296 | follows_wildcard = FALSE; |
||
297 | pspec->min_length++; |
||
298 | pspec->max_length++; |
||
299 | break; |
||
300 | } |
||
301 | *d++ = *s; |
||
302 | i++; |
||
303 | } |
||
304 | for (; pending_jokers; pending_jokers--) { |
||
305 | *d++ = '?'; |
||
306 | if (hj_pos < 0) |
||
307 | hj_pos = i; |
||
308 | tj_pos = i; |
||
309 | } |
||
310 | *d++ = 0; |
||
311 | seen_joker = hj_pos >= 0; |
||
312 | seen_wildcard = hw_pos >= 0; |
||
313 | more_wildcards = seen_wildcard && hw_pos != tw_pos; |
||
314 | if (seen_wildcard) |
||
315 | pspec->max_length = G_MAXUINT; |
||
316 | |||
317 | /* special case sole head/tail wildcard or exact matches */ |
||
318 | if (!seen_joker && !more_wildcards) |
||
319 | { |
||
320 | if (pspec->pattern[0] == '*') |
||
321 | { |
||
322 | pspec->match_type = G_MATCH_TAIL; |
||
323 | memmove (pspec->pattern, pspec->pattern + 1, --pspec->pattern_length); |
||
324 | pspec->pattern[pspec->pattern_length] = 0; |
||
325 | return pspec; |
||
326 | } |
||
327 | if (pspec->pattern_length > 0 && |
||
328 | pspec->pattern[pspec->pattern_length - 1] == '*') |
||
329 | { |
||
330 | pspec->match_type = G_MATCH_HEAD; |
||
331 | pspec->pattern[--pspec->pattern_length] = 0; |
||
332 | return pspec; |
||
333 | } |
||
334 | if (!seen_wildcard) |
||
335 | { |
||
336 | pspec->match_type = G_MATCH_EXACT; |
||
337 | return pspec; |
||
338 | } |
||
339 | } |
||
340 | |||
341 | /* now just need to distinguish between head or tail match start */ |
||
342 | tw_pos = pspec->pattern_length - 1 - tw_pos; /* last pos to tail distance */ |
||
343 | tj_pos = pspec->pattern_length - 1 - tj_pos; /* last pos to tail distance */ |
||
344 | if (seen_wildcard) |
||
345 | pspec->match_type = tw_pos > hw_pos ? G_MATCH_ALL_TAIL : G_MATCH_ALL; |
||
346 | else /* seen_joker */ |
||
347 | pspec->match_type = tj_pos > hj_pos ? G_MATCH_ALL_TAIL : G_MATCH_ALL; |
||
348 | if (pspec->match_type == G_MATCH_ALL_TAIL) { |
||
349 | gchar *tmp = pspec->pattern; |
||
350 | pspec->pattern = g_utf8_strreverse (pspec->pattern, pspec->pattern_length); |
||
351 | g_free (tmp); |
||
352 | } |
||
353 | return pspec; |
||
354 | } |
||
355 | |||
356 | /** |
||
357 | * g_pattern_spec_free: |
||
358 | * @pspec: a #GPatternSpec |
||
359 | * |
||
360 | * Frees the memory allocated for the #GPatternSpec. |
||
361 | **/ |
||
362 | void |
||
363 | g_pattern_spec_free (GPatternSpec *pspec) |
||
364 | { |
||
365 | g_return_if_fail (pspec != NULL); |
||
366 | |||
367 | g_free (pspec->pattern); |
||
368 | g_free (pspec); |
||
369 | } |
||
370 | |||
371 | /** |
||
372 | * g_pattern_spec_equal: |
||
373 | * @pspec1: a #GPatternSpec |
||
374 | * @pspec2: another #GPatternSpec |
||
375 | * |
||
376 | * Compares two compiled pattern specs and returns whether they will |
||
377 | * match the same set of strings. |
||
378 | * |
||
379 | * Returns: Whether the compiled patterns are equal |
||
380 | **/ |
||
381 | gboolean |
||
382 | g_pattern_spec_equal (GPatternSpec *pspec1, |
||
383 | GPatternSpec *pspec2) |
||
384 | { |
||
385 | g_return_val_if_fail (pspec1 != NULL, FALSE); |
||
386 | g_return_val_if_fail (pspec2 != NULL, FALSE); |
||
387 | |||
388 | return (pspec1->pattern_length == pspec2->pattern_length && |
||
389 | pspec1->match_type == pspec2->match_type && |
||
390 | strcmp (pspec1->pattern, pspec2->pattern) == 0); |
||
391 | } |
||
392 | |||
393 | /** |
||
394 | * g_pattern_match_string: |
||
395 | * @pspec: a #GPatternSpec |
||
396 | * @string: the UTF-8 encoded string to match |
||
397 | * |
||
398 | * Matches a string against a compiled pattern. If the string is to be |
||
399 | * matched against more than one pattern, consider using |
||
400 | * g_pattern_match() instead while supplying the reversed string. |
||
401 | * |
||
402 | * Returns: %TRUE if @string matches @pspec |
||
403 | **/ |
||
404 | gboolean |
||
405 | g_pattern_match_string (GPatternSpec *pspec, |
||
406 | const gchar *string) |
||
407 | { |
||
408 | g_return_val_if_fail (pspec != NULL, FALSE); |
||
409 | g_return_val_if_fail (string != NULL, FALSE); |
||
410 | |||
411 | return g_pattern_match (pspec, strlen (string), string, NULL); |
||
412 | } |
||
413 | |||
414 | /** |
||
415 | * g_pattern_match_simple: |
||
416 | * @pattern: the UTF-8 encoded pattern |
||
417 | * @string: the UTF-8 encoded string to match |
||
418 | * |
||
419 | * Matches a string against a pattern given as a string. If this |
||
420 | * function is to be called in a loop, it's more efficient to compile |
||
421 | * the pattern once with g_pattern_spec_new() and call |
||
422 | * g_pattern_match_string() repeatedly. |
||
423 | * |
||
424 | * Returns: %TRUE if @string matches @pspec |
||
425 | **/ |
||
426 | gboolean |
||
427 | g_pattern_match_simple (const gchar *pattern, |
||
428 | const gchar *string) |
||
429 | { |
||
430 | GPatternSpec *pspec; |
||
431 | gboolean ergo; |
||
432 | |||
433 | g_return_val_if_fail (pattern != NULL, FALSE); |
||
434 | g_return_val_if_fail (string != NULL, FALSE); |
||
435 | |||
436 | pspec = g_pattern_spec_new (pattern); |
||
437 | ergo = g_pattern_match (pspec, strlen (string), string, NULL); |
||
438 | g_pattern_spec_free (pspec); |
||
439 | |||
440 | return ergo; |
||
441 | } |