nexmon – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | /* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*- */ |
2 | |||
3 | /* GLIB - Library of useful routines for C programming |
||
4 | * Copyright (C) 2008 Red Hat, Inc. |
||
5 | * |
||
6 | * This library is free software; you can redistribute it and/or |
||
7 | * modify it under the terms of the GNU Lesser General Public |
||
8 | * License as published by the Free Software Foundation; either |
||
9 | * version 2 of the License, or (at your option) any later version. |
||
10 | * |
||
11 | * This library is distributed in the hope that it will be useful, |
||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
14 | * Lesser General Public License for more details. |
||
15 | * |
||
16 | * You should have received a copy of the GNU Lesser General |
||
17 | * Public License along with this library; if not, see <http://www.gnu.org/licenses/>. |
||
18 | */ |
||
19 | |||
20 | #include "config.h" |
||
21 | |||
22 | #include <string.h> |
||
23 | |||
24 | #include "ghostutils.h" |
||
25 | |||
26 | #include "garray.h" |
||
27 | #include "gmem.h" |
||
28 | #include "gstring.h" |
||
29 | #include "gstrfuncs.h" |
||
30 | #include "glibintl.h" |
||
31 | |||
32 | |||
33 | /** |
||
34 | * SECTION:ghostutils |
||
35 | * @short_description: Internet hostname utilities |
||
36 | * |
||
37 | * Functions for manipulating internet hostnames; in particular, for |
||
38 | * converting between Unicode and ASCII-encoded forms of |
||
39 | * Internationalized Domain Names (IDNs). |
||
40 | * |
||
41 | * The |
||
42 | * [Internationalized Domain Names for Applications (IDNA)](http://www.ietf.org/rfc/rfc3490.txt) |
||
43 | * standards allow for the use |
||
44 | * of Unicode domain names in applications, while providing |
||
45 | * backward-compatibility with the old ASCII-only DNS, by defining an |
||
46 | * ASCII-Compatible Encoding of any given Unicode name, which can be |
||
47 | * used with non-IDN-aware applications and protocols. (For example, |
||
48 | * "Παν語.org" maps to "xn--4wa8awb4637h.org".) |
||
49 | **/ |
||
50 | |||
51 | #define IDNA_ACE_PREFIX "xn--" |
||
52 | #define IDNA_ACE_PREFIX_LEN 4 |
||
53 | |||
54 | /* Punycode constants, from RFC 3492. */ |
||
55 | |||
56 | #define PUNYCODE_BASE 36 |
||
57 | #define PUNYCODE_TMIN 1 |
||
58 | #define PUNYCODE_TMAX 26 |
||
59 | #define PUNYCODE_SKEW 38 |
||
60 | #define PUNYCODE_DAMP 700 |
||
61 | #define PUNYCODE_INITIAL_BIAS 72 |
||
62 | #define PUNYCODE_INITIAL_N 0x80 |
||
63 | |||
64 | #define PUNYCODE_IS_BASIC(cp) ((guint)(cp) < 0x80) |
||
65 | |||
66 | /* Encode/decode a single base-36 digit */ |
||
67 | static inline gchar |
||
68 | encode_digit (guint dig) |
||
69 | { |
||
70 | if (dig < 26) |
||
71 | return dig + 'a'; |
||
72 | else |
||
73 | return dig - 26 + '0'; |
||
74 | } |
||
75 | |||
76 | static inline guint |
||
77 | decode_digit (gchar dig) |
||
78 | { |
||
79 | if (dig >= 'A' && dig <= 'Z') |
||
80 | return dig - 'A'; |
||
81 | else if (dig >= 'a' && dig <= 'z') |
||
82 | return dig - 'a'; |
||
83 | else if (dig >= '0' && dig <= '9') |
||
84 | return dig - '0' + 26; |
||
85 | else |
||
86 | return G_MAXUINT; |
||
87 | } |
||
88 | |||
89 | /* Punycode bias adaptation algorithm, RFC 3492 section 6.1 */ |
||
90 | static guint |
||
91 | adapt (guint delta, |
||
92 | guint numpoints, |
||
93 | gboolean firsttime) |
||
94 | { |
||
95 | guint k; |
||
96 | |||
97 | delta = firsttime ? delta / PUNYCODE_DAMP : delta / 2; |
||
98 | delta += delta / numpoints; |
||
99 | |||
100 | k = 0; |
||
101 | while (delta > ((PUNYCODE_BASE - PUNYCODE_TMIN) * PUNYCODE_TMAX) / 2) |
||
102 | { |
||
103 | delta /= PUNYCODE_BASE - PUNYCODE_TMIN; |
||
104 | k += PUNYCODE_BASE; |
||
105 | } |
||
106 | |||
107 | return k + ((PUNYCODE_BASE - PUNYCODE_TMIN + 1) * delta / |
||
108 | (delta + PUNYCODE_SKEW)); |
||
109 | } |
||
110 | |||
111 | /* Punycode encoder, RFC 3492 section 6.3. The algorithm is |
||
112 | * sufficiently bizarre that it's not really worth trying to explain |
||
113 | * here. |
||
114 | */ |
||
115 | static gboolean |
||
116 | punycode_encode (const gchar *input_utf8, |
||
117 | gsize input_utf8_length, |
||
118 | GString *output) |
||
119 | { |
||
120 | guint delta, handled_chars, num_basic_chars, bias, j, q, k, t, digit; |
||
121 | gunichar n, m, *input; |
||
122 | glong input_length; |
||
123 | gboolean success = FALSE; |
||
124 | |||
125 | /* Convert from UTF-8 to Unicode code points */ |
||
126 | input = g_utf8_to_ucs4 (input_utf8, input_utf8_length, NULL, |
||
127 | &input_length, NULL); |
||
128 | if (!input) |
||
129 | return FALSE; |
||
130 | |||
131 | /* Copy basic chars */ |
||
132 | for (j = num_basic_chars = 0; j < input_length; j++) |
||
133 | { |
||
134 | if (PUNYCODE_IS_BASIC (input[j])) |
||
135 | { |
||
136 | g_string_append_c (output, g_ascii_tolower (input[j])); |
||
137 | num_basic_chars++; |
||
138 | } |
||
139 | } |
||
140 | if (num_basic_chars) |
||
141 | g_string_append_c (output, '-'); |
||
142 | |||
143 | handled_chars = num_basic_chars; |
||
144 | |||
145 | /* Encode non-basic chars */ |
||
146 | delta = 0; |
||
147 | bias = PUNYCODE_INITIAL_BIAS; |
||
148 | n = PUNYCODE_INITIAL_N; |
||
149 | while (handled_chars < input_length) |
||
150 | { |
||
151 | /* let m = the minimum {non-basic} code point >= n in the input */ |
||
152 | for (m = G_MAXUINT, j = 0; j < input_length; j++) |
||
153 | { |
||
154 | if (input[j] >= n && input[j] < m) |
||
155 | m = input[j]; |
||
156 | } |
||
157 | |||
158 | if (m - n > (G_MAXUINT - delta) / (handled_chars + 1)) |
||
159 | goto fail; |
||
160 | delta += (m - n) * (handled_chars + 1); |
||
161 | n = m; |
||
162 | |||
163 | for (j = 0; j < input_length; j++) |
||
164 | { |
||
165 | if (input[j] < n) |
||
166 | { |
||
167 | if (++delta == 0) |
||
168 | goto fail; |
||
169 | } |
||
170 | else if (input[j] == n) |
||
171 | { |
||
172 | q = delta; |
||
173 | for (k = PUNYCODE_BASE; ; k += PUNYCODE_BASE) |
||
174 | { |
||
175 | if (k <= bias) |
||
176 | t = PUNYCODE_TMIN; |
||
177 | else if (k >= bias + PUNYCODE_TMAX) |
||
178 | t = PUNYCODE_TMAX; |
||
179 | else |
||
180 | t = k - bias; |
||
181 | if (q < t) |
||
182 | break; |
||
183 | digit = t + (q - t) % (PUNYCODE_BASE - t); |
||
184 | g_string_append_c (output, encode_digit (digit)); |
||
185 | q = (q - t) / (PUNYCODE_BASE - t); |
||
186 | } |
||
187 | |||
188 | g_string_append_c (output, encode_digit (q)); |
||
189 | bias = adapt (delta, handled_chars + 1, handled_chars == num_basic_chars); |
||
190 | delta = 0; |
||
191 | handled_chars++; |
||
192 | } |
||
193 | } |
||
194 | |||
195 | delta++; |
||
196 | n++; |
||
197 | } |
||
198 | |||
199 | success = TRUE; |
||
200 | |||
201 | fail: |
||
202 | g_free (input); |
||
203 | return success; |
||
204 | } |
||
205 | |||
206 | /* From RFC 3454, Table B.1 */ |
||
207 | #define idna_is_junk(ch) ((ch) == 0x00AD || (ch) == 0x1806 || (ch) == 0x200B || (ch) == 0x2060 || (ch) == 0xFEFF || (ch) == 0x034F || (ch) == 0x180B || (ch) == 0x180C || (ch) == 0x180D || (ch) == 0x200C || (ch) == 0x200D || ((ch) >= 0xFE00 && (ch) <= 0xFE0F)) |
||
208 | |||
209 | /* Scan @str for "junk" and return a cleaned-up string if any junk |
||
210 | * is found. Else return %NULL. |
||
211 | */ |
||
212 | static gchar * |
||
213 | remove_junk (const gchar *str, |
||
214 | gint len) |
||
215 | { |
||
216 | GString *cleaned = NULL; |
||
217 | const gchar *p; |
||
218 | gunichar ch; |
||
219 | |||
220 | for (p = str; len == -1 ? *p : p < str + len; p = g_utf8_next_char (p)) |
||
221 | { |
||
222 | ch = g_utf8_get_char (p); |
||
223 | if (idna_is_junk (ch)) |
||
224 | { |
||
225 | if (!cleaned) |
||
226 | { |
||
227 | cleaned = g_string_new (NULL); |
||
228 | g_string_append_len (cleaned, str, p - str); |
||
229 | } |
||
230 | } |
||
231 | else if (cleaned) |
||
232 | g_string_append_unichar (cleaned, ch); |
||
233 | } |
||
234 | |||
235 | if (cleaned) |
||
236 | return g_string_free (cleaned, FALSE); |
||
237 | else |
||
238 | return NULL; |
||
239 | } |
||
240 | |||
241 | static inline gboolean |
||
242 | contains_uppercase_letters (const gchar *str, |
||
243 | gint len) |
||
244 | { |
||
245 | const gchar *p; |
||
246 | |||
247 | for (p = str; len == -1 ? *p : p < str + len; p = g_utf8_next_char (p)) |
||
248 | { |
||
249 | if (g_unichar_isupper (g_utf8_get_char (p))) |
||
250 | return TRUE; |
||
251 | } |
||
252 | return FALSE; |
||
253 | } |
||
254 | |||
255 | static inline gboolean |
||
256 | contains_non_ascii (const gchar *str, |
||
257 | gint len) |
||
258 | { |
||
259 | const gchar *p; |
||
260 | |||
261 | for (p = str; len == -1 ? *p : p < str + len; p++) |
||
262 | { |
||
263 | if ((guchar)*p > 0x80) |
||
264 | return TRUE; |
||
265 | } |
||
266 | return FALSE; |
||
267 | } |
||
268 | |||
269 | /* RFC 3454, Appendix C. ish. */ |
||
270 | static inline gboolean |
||
271 | idna_is_prohibited (gunichar ch) |
||
272 | { |
||
273 | switch (g_unichar_type (ch)) |
||
274 | { |
||
275 | case G_UNICODE_CONTROL: |
||
276 | case G_UNICODE_FORMAT: |
||
277 | case G_UNICODE_UNASSIGNED: |
||
278 | case G_UNICODE_PRIVATE_USE: |
||
279 | case G_UNICODE_SURROGATE: |
||
280 | case G_UNICODE_LINE_SEPARATOR: |
||
281 | case G_UNICODE_PARAGRAPH_SEPARATOR: |
||
282 | case G_UNICODE_SPACE_SEPARATOR: |
||
283 | return TRUE; |
||
284 | |||
285 | case G_UNICODE_OTHER_SYMBOL: |
||
286 | if (ch == 0xFFFC || ch == 0xFFFD || |
||
287 | (ch >= 0x2FF0 && ch <= 0x2FFB)) |
||
288 | return TRUE; |
||
289 | return FALSE; |
||
290 | |||
291 | case G_UNICODE_NON_SPACING_MARK: |
||
292 | if (ch == 0x0340 || ch == 0x0341) |
||
293 | return TRUE; |
||
294 | return FALSE; |
||
295 | |||
296 | default: |
||
297 | return FALSE; |
||
298 | } |
||
299 | } |
||
300 | |||
301 | /* RFC 3491 IDN cleanup algorithm. */ |
||
302 | static gchar * |
||
303 | nameprep (const gchar *hostname, |
||
304 | gint len, |
||
305 | gboolean *is_unicode) |
||
306 | { |
||
307 | gchar *name, *tmp = NULL, *p; |
||
308 | |||
309 | /* It would be nice if we could do this without repeatedly |
||
310 | * allocating strings and converting back and forth between |
||
311 | * gunichars and UTF-8... The code does at least avoid doing most of |
||
312 | * the sub-operations when they would just be equivalent to a |
||
313 | * g_strdup(). |
||
314 | */ |
||
315 | |||
316 | /* Remove presentation-only characters */ |
||
317 | name = remove_junk (hostname, len); |
||
318 | if (name) |
||
319 | { |
||
320 | tmp = name; |
||
321 | len = -1; |
||
322 | } |
||
323 | else |
||
324 | name = (gchar *)hostname; |
||
325 | |||
326 | /* Convert to lowercase */ |
||
327 | if (contains_uppercase_letters (name, len)) |
||
328 | { |
||
329 | name = g_utf8_strdown (name, len); |
||
330 | g_free (tmp); |
||
331 | tmp = name; |
||
332 | len = -1; |
||
333 | } |
||
334 | |||
335 | /* If there are no UTF8 characters, we're done. */ |
||
336 | if (!contains_non_ascii (name, len)) |
||
337 | { |
||
338 | *is_unicode = FALSE; |
||
339 | if (name == (gchar *)hostname) |
||
340 | return len == -1 ? g_strdup (hostname) : g_strndup (hostname, len); |
||
341 | else |
||
342 | return name; |
||
343 | } |
||
344 | |||
345 | *is_unicode = TRUE; |
||
346 | |||
347 | /* Normalize */ |
||
348 | name = g_utf8_normalize (name, len, G_NORMALIZE_NFKC); |
||
349 | g_free (tmp); |
||
350 | tmp = name; |
||
351 | |||
352 | if (!name) |
||
353 | return NULL; |
||
354 | |||
355 | /* KC normalization may have created more capital letters (eg, |
||
356 | * angstrom -> capital A with ring). So we have to lowercasify a |
||
357 | * second time. (This is more-or-less how the nameprep algorithm |
||
358 | * does it. If tolower(nfkc(tolower(X))) is guaranteed to be the |
||
359 | * same as tolower(nfkc(X)), then we could skip the first tolower, |
||
360 | * but I'm not sure it is.) |
||
361 | */ |
||
362 | if (contains_uppercase_letters (name, -1)) |
||
363 | { |
||
364 | name = g_utf8_strdown (name, -1); |
||
365 | g_free (tmp); |
||
366 | tmp = name; |
||
367 | } |
||
368 | |||
369 | /* Check for prohibited characters */ |
||
370 | for (p = name; *p; p = g_utf8_next_char (p)) |
||
371 | { |
||
372 | if (idna_is_prohibited (g_utf8_get_char (p))) |
||
373 | { |
||
374 | name = NULL; |
||
375 | g_free (tmp); |
||
376 | goto done; |
||
377 | } |
||
378 | } |
||
379 | |||
380 | /* FIXME: We're supposed to verify certain constraints on bidi |
||
381 | * characters, but glib does not appear to have that information. |
||
382 | */ |
||
383 | |||
384 | done: |
||
385 | return name; |
||
386 | } |
||
387 | |||
388 | /* RFC 3490, section 3.1 says '.', 0x3002, 0xFF0E, and 0xFF61 count as |
||
389 | * label-separating dots. @str must be '\0'-terminated. |
||
390 | */ |
||
391 | #define idna_is_dot(str) ( \ |
||
392 | ((guchar)(str)[0] == '.') || \ |
||
393 | ((guchar)(str)[0] == 0xE3 && (guchar)(str)[1] == 0x80 && (guchar)(str)[2] == 0x82) || \ |
||
394 | ((guchar)(str)[0] == 0xEF && (guchar)(str)[1] == 0xBC && (guchar)(str)[2] == 0x8E) || \ |
||
395 | ((guchar)(str)[0] == 0xEF && (guchar)(str)[1] == 0xBD && (guchar)(str)[2] == 0xA1) ) |
||
396 | |||
397 | static const gchar * |
||
398 | idna_end_of_label (const gchar *str) |
||
399 | { |
||
400 | for (; *str; str = g_utf8_next_char (str)) |
||
401 | { |
||
402 | if (idna_is_dot (str)) |
||
403 | return str; |
||
404 | } |
||
405 | return str; |
||
406 | } |
||
407 | |||
408 | /** |
||
409 | * g_hostname_to_ascii: |
||
410 | * @hostname: a valid UTF-8 or ASCII hostname |
||
411 | * |
||
412 | * Converts @hostname to its canonical ASCII form; an ASCII-only |
||
413 | * string containing no uppercase letters and not ending with a |
||
414 | * trailing dot. |
||
415 | * |
||
416 | * Returns: an ASCII hostname, which must be freed, or %NULL if |
||
417 | * @hostname is in some way invalid. |
||
418 | * |
||
419 | * Since: 2.22 |
||
420 | **/ |
||
421 | gchar * |
||
422 | g_hostname_to_ascii (const gchar *hostname) |
||
423 | { |
||
424 | gchar *name, *label, *p; |
||
425 | GString *out; |
||
426 | gssize llen, oldlen; |
||
427 | gboolean unicode; |
||
428 | |||
429 | label = name = nameprep (hostname, -1, &unicode); |
||
430 | if (!name || !unicode) |
||
431 | return name; |
||
432 | |||
433 | out = g_string_new (NULL); |
||
434 | |||
435 | do |
||
436 | { |
||
437 | unicode = FALSE; |
||
438 | for (p = label; *p && !idna_is_dot (p); p++) |
||
439 | { |
||
440 | if ((guchar)*p > 0x80) |
||
441 | unicode = TRUE; |
||
442 | } |
||
443 | |||
444 | oldlen = out->len; |
||
445 | llen = p - label; |
||
446 | if (unicode) |
||
447 | { |
||
448 | if (!strncmp (label, IDNA_ACE_PREFIX, IDNA_ACE_PREFIX_LEN)) |
||
449 | goto fail; |
||
450 | |||
451 | g_string_append (out, IDNA_ACE_PREFIX); |
||
452 | if (!punycode_encode (label, llen, out)) |
||
453 | goto fail; |
||
454 | } |
||
455 | else |
||
456 | g_string_append_len (out, label, llen); |
||
457 | |||
458 | if (out->len - oldlen > 63) |
||
459 | goto fail; |
||
460 | |||
461 | label += llen; |
||
462 | if (*label) |
||
463 | label = g_utf8_next_char (label); |
||
464 | if (*label) |
||
465 | g_string_append_c (out, '.'); |
||
466 | } |
||
467 | while (*label); |
||
468 | |||
469 | g_free (name); |
||
470 | return g_string_free (out, FALSE); |
||
471 | |||
472 | fail: |
||
473 | g_free (name); |
||
474 | g_string_free (out, TRUE); |
||
475 | return NULL; |
||
476 | } |
||
477 | |||
478 | /** |
||
479 | * g_hostname_is_non_ascii: |
||
480 | * @hostname: a hostname |
||
481 | * |
||
482 | * Tests if @hostname contains Unicode characters. If this returns |
||
483 | * %TRUE, you need to encode the hostname with g_hostname_to_ascii() |
||
484 | * before using it in non-IDN-aware contexts. |
||
485 | * |
||
486 | * Note that a hostname might contain a mix of encoded and unencoded |
||
487 | * segments, and so it is possible for g_hostname_is_non_ascii() and |
||
488 | * g_hostname_is_ascii_encoded() to both return %TRUE for a name. |
||
489 | * |
||
490 | * Returns: %TRUE if @hostname contains any non-ASCII characters |
||
491 | * |
||
492 | * Since: 2.22 |
||
493 | **/ |
||
494 | gboolean |
||
495 | g_hostname_is_non_ascii (const gchar *hostname) |
||
496 | { |
||
497 | return contains_non_ascii (hostname, -1); |
||
498 | } |
||
499 | |||
500 | /* Punycode decoder, RFC 3492 section 6.2. As with punycode_encode(), |
||
501 | * read the RFC if you want to understand what this is actually doing. |
||
502 | */ |
||
503 | static gboolean |
||
504 | punycode_decode (const gchar *input, |
||
505 | gsize input_length, |
||
506 | GString *output) |
||
507 | { |
||
508 | GArray *output_chars; |
||
509 | gunichar n; |
||
510 | guint i, bias; |
||
511 | guint oldi, w, k, digit, t; |
||
512 | const gchar *split; |
||
513 | |||
514 | n = PUNYCODE_INITIAL_N; |
||
515 | i = 0; |
||
516 | bias = PUNYCODE_INITIAL_BIAS; |
||
517 | |||
518 | split = input + input_length - 1; |
||
519 | while (split > input && *split != '-') |
||
520 | split--; |
||
521 | if (split > input) |
||
522 | { |
||
523 | output_chars = g_array_sized_new (FALSE, FALSE, sizeof (gunichar), |
||
524 | split - input); |
||
525 | input_length -= (split - input) + 1; |
||
526 | while (input < split) |
||
527 | { |
||
528 | gunichar ch = (gunichar)*input++; |
||
529 | if (!PUNYCODE_IS_BASIC (ch)) |
||
530 | goto fail; |
||
531 | g_array_append_val (output_chars, ch); |
||
532 | } |
||
533 | input++; |
||
534 | } |
||
535 | else |
||
536 | output_chars = g_array_new (FALSE, FALSE, sizeof (gunichar)); |
||
537 | |||
538 | while (input_length) |
||
539 | { |
||
540 | oldi = i; |
||
541 | w = 1; |
||
542 | for (k = PUNYCODE_BASE; ; k += PUNYCODE_BASE) |
||
543 | { |
||
544 | if (!input_length--) |
||
545 | goto fail; |
||
546 | digit = decode_digit (*input++); |
||
547 | if (digit >= PUNYCODE_BASE) |
||
548 | goto fail; |
||
549 | if (digit > (G_MAXUINT - i) / w) |
||
550 | goto fail; |
||
551 | i += digit * w; |
||
552 | if (k <= bias) |
||
553 | t = PUNYCODE_TMIN; |
||
554 | else if (k >= bias + PUNYCODE_TMAX) |
||
555 | t = PUNYCODE_TMAX; |
||
556 | else |
||
557 | t = k - bias; |
||
558 | if (digit < t) |
||
559 | break; |
||
560 | if (w > G_MAXUINT / (PUNYCODE_BASE - t)) |
||
561 | goto fail; |
||
562 | w *= (PUNYCODE_BASE - t); |
||
563 | } |
||
564 | |||
565 | bias = adapt (i - oldi, output_chars->len + 1, oldi == 0); |
||
566 | |||
567 | if (i / (output_chars->len + 1) > G_MAXUINT - n) |
||
568 | goto fail; |
||
569 | n += i / (output_chars->len + 1); |
||
570 | i %= (output_chars->len + 1); |
||
571 | |||
572 | g_array_insert_val (output_chars, i++, n); |
||
573 | } |
||
574 | |||
575 | for (i = 0; i < output_chars->len; i++) |
||
576 | g_string_append_unichar (output, g_array_index (output_chars, gunichar, i)); |
||
577 | g_array_free (output_chars, TRUE); |
||
578 | return TRUE; |
||
579 | |||
580 | fail: |
||
581 | g_array_free (output_chars, TRUE); |
||
582 | return FALSE; |
||
583 | } |
||
584 | |||
585 | /** |
||
586 | * g_hostname_to_unicode: |
||
587 | * @hostname: a valid UTF-8 or ASCII hostname |
||
588 | * |
||
589 | * Converts @hostname to its canonical presentation form; a UTF-8 |
||
590 | * string in Unicode normalization form C, containing no uppercase |
||
591 | * letters, no forbidden characters, and no ASCII-encoded segments, |
||
592 | * and not ending with a trailing dot. |
||
593 | * |
||
594 | * Of course if @hostname is not an internationalized hostname, then |
||
595 | * the canonical presentation form will be entirely ASCII. |
||
596 | * |
||
597 | * Returns: a UTF-8 hostname, which must be freed, or %NULL if |
||
598 | * @hostname is in some way invalid. |
||
599 | * |
||
600 | * Since: 2.22 |
||
601 | **/ |
||
602 | gchar * |
||
603 | g_hostname_to_unicode (const gchar *hostname) |
||
604 | { |
||
605 | GString *out; |
||
606 | gssize llen; |
||
607 | |||
608 | out = g_string_new (NULL); |
||
609 | |||
610 | do |
||
611 | { |
||
612 | llen = idna_end_of_label (hostname) - hostname; |
||
613 | if (!g_ascii_strncasecmp (hostname, IDNA_ACE_PREFIX, IDNA_ACE_PREFIX_LEN)) |
||
614 | { |
||
615 | hostname += IDNA_ACE_PREFIX_LEN; |
||
616 | llen -= IDNA_ACE_PREFIX_LEN; |
||
617 | if (!punycode_decode (hostname, llen, out)) |
||
618 | { |
||
619 | g_string_free (out, TRUE); |
||
620 | return NULL; |
||
621 | } |
||
622 | } |
||
623 | else |
||
624 | { |
||
625 | gboolean unicode; |
||
626 | gchar *canonicalized = nameprep (hostname, llen, &unicode); |
||
627 | |||
628 | if (!canonicalized) |
||
629 | { |
||
630 | g_string_free (out, TRUE); |
||
631 | return NULL; |
||
632 | } |
||
633 | g_string_append (out, canonicalized); |
||
634 | g_free (canonicalized); |
||
635 | } |
||
636 | |||
637 | hostname += llen; |
||
638 | if (*hostname) |
||
639 | hostname = g_utf8_next_char (hostname); |
||
640 | if (*hostname) |
||
641 | g_string_append_c (out, '.'); |
||
642 | } |
||
643 | while (*hostname); |
||
644 | |||
645 | return g_string_free (out, FALSE); |
||
646 | } |
||
647 | |||
648 | /** |
||
649 | * g_hostname_is_ascii_encoded: |
||
650 | * @hostname: a hostname |
||
651 | * |
||
652 | * Tests if @hostname contains segments with an ASCII-compatible |
||
653 | * encoding of an Internationalized Domain Name. If this returns |
||
654 | * %TRUE, you should decode the hostname with g_hostname_to_unicode() |
||
655 | * before displaying it to the user. |
||
656 | * |
||
657 | * Note that a hostname might contain a mix of encoded and unencoded |
||
658 | * segments, and so it is possible for g_hostname_is_non_ascii() and |
||
659 | * g_hostname_is_ascii_encoded() to both return %TRUE for a name. |
||
660 | * |
||
661 | * Returns: %TRUE if @hostname contains any ASCII-encoded |
||
662 | * segments. |
||
663 | * |
||
664 | * Since: 2.22 |
||
665 | **/ |
||
666 | gboolean |
||
667 | g_hostname_is_ascii_encoded (const gchar *hostname) |
||
668 | { |
||
669 | while (1) |
||
670 | { |
||
671 | if (!g_ascii_strncasecmp (hostname, IDNA_ACE_PREFIX, IDNA_ACE_PREFIX_LEN)) |
||
672 | return TRUE; |
||
673 | hostname = idna_end_of_label (hostname); |
||
674 | if (*hostname) |
||
675 | hostname = g_utf8_next_char (hostname); |
||
676 | if (!*hostname) |
||
677 | return FALSE; |
||
678 | } |
||
679 | } |
||
680 | |||
681 | /** |
||
682 | * g_hostname_is_ip_address: |
||
683 | * @hostname: a hostname (or IP address in string form) |
||
684 | * |
||
685 | * Tests if @hostname is the string form of an IPv4 or IPv6 address. |
||
686 | * (Eg, "192.168.0.1".) |
||
687 | * |
||
688 | * Returns: %TRUE if @hostname is an IP address |
||
689 | * |
||
690 | * Since: 2.22 |
||
691 | **/ |
||
692 | gboolean |
||
693 | g_hostname_is_ip_address (const gchar *hostname) |
||
694 | { |
||
695 | gchar *p, *end; |
||
696 | gint nsegments, octet; |
||
697 | |||
698 | /* On Linux we could implement this using inet_pton, but the Windows |
||
699 | * equivalent of that requires linking against winsock, so we just |
||
700 | * figure this out ourselves. Tested by tests/hostutils.c. |
||
701 | */ |
||
702 | |||
703 | p = (char *)hostname; |
||
704 | |||
705 | if (strchr (p, ':')) |
||
706 | { |
||
707 | gboolean skipped; |
||
708 | |||
709 | /* If it contains a ':', it's an IPv6 address (assuming it's an |
||
710 | * IP address at all). This consists of eight ':'-separated |
||
711 | * segments, each containing a 1-4 digit hex number, except that |
||
712 | * optionally: (a) the last two segments can be replaced by an |
||
713 | * IPv4 address, and (b) a single span of 1 to 8 "0000" segments |
||
714 | * can be replaced with just "::". |
||
715 | */ |
||
716 | |||
717 | nsegments = 0; |
||
718 | skipped = FALSE; |
||
719 | while (*p && nsegments < 8) |
||
720 | { |
||
721 | /* Each segment after the first must be preceded by a ':'. |
||
722 | * (We also handle half of the "string starts with ::" case |
||
723 | * here.) |
||
724 | */ |
||
725 | if (p != (char *)hostname || (p[0] == ':' && p[1] == ':')) |
||
726 | { |
||
727 | if (*p != ':') |
||
728 | return FALSE; |
||
729 | p++; |
||
730 | } |
||
731 | |||
732 | /* If there's another ':', it means we're skipping some segments */ |
||
733 | if (*p == ':' && !skipped) |
||
734 | { |
||
735 | skipped = TRUE; |
||
736 | nsegments++; |
||
737 | |||
738 | /* Handle the "string ends with ::" case */ |
||
739 | if (!p[1]) |
||
740 | p++; |
||
741 | |||
742 | continue; |
||
743 | } |
||
744 | |||
745 | /* Read the segment, make sure it's valid. */ |
||
746 | for (end = p; g_ascii_isxdigit (*end); end++) |
||
747 | ; |
||
748 | if (end == p || end > p + 4) |
||
749 | return FALSE; |
||
750 | |||
751 | if (*end == '.') |
||
752 | { |
||
753 | if ((nsegments == 6 && !skipped) || (nsegments <= 6 && skipped)) |
||
754 | goto parse_ipv4; |
||
755 | else |
||
756 | return FALSE; |
||
757 | } |
||
758 | |||
759 | nsegments++; |
||
760 | p = end; |
||
761 | } |
||
762 | |||
763 | return !*p && (nsegments == 8 || skipped); |
||
764 | } |
||
765 | |||
766 | parse_ipv4: |
||
767 | |||
768 | /* Parse IPv4: N.N.N.N, where each N <= 255 and doesn't have leading 0s. */ |
||
769 | for (nsegments = 0; nsegments < 4; nsegments++) |
||
770 | { |
||
771 | if (nsegments != 0) |
||
772 | { |
||
773 | if (*p != '.') |
||
774 | return FALSE; |
||
775 | p++; |
||
776 | } |
||
777 | |||
778 | /* Check the segment; a little tricker than the IPv6 case since |
||
779 | * we can't allow extra leading 0s, and we can't assume that all |
||
780 | * strings of valid length are within range. |
||
781 | */ |
||
782 | octet = 0; |
||
783 | if (*p == '0') |
||
784 | end = p + 1; |
||
785 | else |
||
786 | { |
||
787 | for (end = p; g_ascii_isdigit (*end); end++) |
||
788 | octet = 10 * octet + (*end - '0'); |
||
789 | } |
||
790 | if (end == p || end > p + 3 || octet > 255) |
||
791 | return FALSE; |
||
792 | |||
793 | p = end; |
||
794 | } |
||
795 | |||
796 | /* If there's nothing left to parse, then it's ok. */ |
||
797 | return !*p; |
||
798 | } |