nexmon – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | /* gcharset.c - Charset information |
2 | * |
||
3 | * Copyright (C) 2011 Red Hat, Inc. |
||
4 | * |
||
5 | * This library is free software; you can redistribute it and/or |
||
6 | * modify it under the terms of the GNU Lesser General Public |
||
7 | * License as published by the Free Software Foundation; either |
||
8 | * version 2 of the License, or (at your option) any later version. |
||
9 | * |
||
10 | * This library is distributed in the hope that it will be useful, |
||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
13 | * Lesser General Public License for more details. |
||
14 | * |
||
15 | * You should have received a copy of the GNU Lesser General Public |
||
16 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
||
17 | */ |
||
18 | |||
19 | #include "config.h" |
||
20 | |||
21 | #include "gcharsetprivate.h" |
||
22 | |||
23 | #include "garray.h" |
||
24 | #include "genviron.h" |
||
25 | #include "ghash.h" |
||
26 | #include "gmessages.h" |
||
27 | #include "gstrfuncs.h" |
||
28 | #include "gthread.h" |
||
29 | #ifdef G_OS_WIN32 |
||
30 | #include "gwin32.h" |
||
31 | #endif |
||
32 | |||
33 | #include "libcharset/libcharset.h" |
||
34 | |||
35 | #include <string.h> |
||
36 | #include <stdio.h> |
||
37 | |||
38 | G_LOCK_DEFINE_STATIC (aliases); |
||
39 | |||
40 | static GHashTable * |
||
41 | get_alias_hash (void) |
||
42 | { |
||
43 | static GHashTable *alias_hash = NULL; |
||
44 | const char *aliases; |
||
45 | |||
46 | G_LOCK (aliases); |
||
47 | |||
48 | if (!alias_hash) |
||
49 | { |
||
50 | alias_hash = g_hash_table_new (g_str_hash, g_str_equal); |
||
51 | |||
52 | aliases = _g_locale_get_charset_aliases (); |
||
53 | while (*aliases != '\0') |
||
54 | { |
||
55 | const char *canonical; |
||
56 | const char *alias; |
||
57 | const char **alias_array; |
||
58 | int count = 0; |
||
59 | |||
60 | alias = aliases; |
||
61 | aliases += strlen (aliases) + 1; |
||
62 | canonical = aliases; |
||
63 | aliases += strlen (aliases) + 1; |
||
64 | |||
65 | alias_array = g_hash_table_lookup (alias_hash, canonical); |
||
66 | if (alias_array) |
||
67 | { |
||
68 | while (alias_array[count]) |
||
69 | count++; |
||
70 | } |
||
71 | |||
72 | alias_array = g_renew (const char *, alias_array, count + 2); |
||
73 | alias_array[count] = alias; |
||
74 | alias_array[count + 1] = NULL; |
||
75 | |||
76 | g_hash_table_insert (alias_hash, (char *)canonical, alias_array); |
||
77 | } |
||
78 | } |
||
79 | |||
80 | G_UNLOCK (aliases); |
||
81 | |||
82 | return alias_hash; |
||
83 | } |
||
84 | |||
85 | /* As an abuse of the alias table, the following routines gets |
||
86 | * the charsets that are aliases for the canonical name. |
||
87 | */ |
||
88 | const char ** |
||
89 | _g_charset_get_aliases (const char *canonical_name) |
||
90 | { |
||
91 | GHashTable *alias_hash = get_alias_hash (); |
||
92 | |||
93 | return g_hash_table_lookup (alias_hash, canonical_name); |
||
94 | } |
||
95 | |||
96 | static gboolean |
||
97 | g_utf8_get_charset_internal (const char *raw_data, |
||
98 | const char **a) |
||
99 | { |
||
100 | const char *charset = g_getenv ("CHARSET"); |
||
101 | |||
102 | if (charset && *charset) |
||
103 | { |
||
104 | *a = charset; |
||
105 | |||
106 | if (charset && strstr (charset, "UTF-8")) |
||
107 | return TRUE; |
||
108 | else |
||
109 | return FALSE; |
||
110 | } |
||
111 | |||
112 | /* The libcharset code tries to be thread-safe without |
||
113 | * a lock, but has a memory leak and a missing memory |
||
114 | * barrier, so we lock for it |
||
115 | */ |
||
116 | G_LOCK (aliases); |
||
117 | charset = _g_locale_charset_unalias (raw_data); |
||
118 | G_UNLOCK (aliases); |
||
119 | |||
120 | if (charset && *charset) |
||
121 | { |
||
122 | *a = charset; |
||
123 | |||
124 | if (charset && strstr (charset, "UTF-8")) |
||
125 | return TRUE; |
||
126 | else |
||
127 | return FALSE; |
||
128 | } |
||
129 | |||
130 | /* Assume this for compatibility at present. */ |
||
131 | *a = "US-ASCII"; |
||
132 | |||
133 | return FALSE; |
||
134 | } |
||
135 | |||
136 | typedef struct _GCharsetCache GCharsetCache; |
||
137 | |||
138 | struct _GCharsetCache { |
||
139 | gboolean is_utf8; |
||
140 | gchar *raw; |
||
141 | gchar *charset; |
||
142 | }; |
||
143 | |||
144 | static void |
||
145 | charset_cache_free (gpointer data) |
||
146 | { |
||
147 | GCharsetCache *cache = data; |
||
148 | g_free (cache->raw); |
||
149 | g_free (cache->charset); |
||
150 | g_free (cache); |
||
151 | } |
||
152 | |||
153 | /** |
||
154 | * g_get_charset: |
||
155 | * @charset: (out) (optional) (transfer none): return location for character set |
||
156 | * name, or %NULL. |
||
157 | * |
||
158 | * Obtains the character set for the [current locale][setlocale]; you |
||
159 | * might use this character set as an argument to g_convert(), to convert |
||
160 | * from the current locale's encoding to some other encoding. (Frequently |
||
161 | * g_locale_to_utf8() and g_locale_from_utf8() are nice shortcuts, though.) |
||
162 | * |
||
163 | * On Windows the character set returned by this function is the |
||
164 | * so-called system default ANSI code-page. That is the character set |
||
165 | * used by the "narrow" versions of C library and Win32 functions that |
||
166 | * handle file names. It might be different from the character set |
||
167 | * used by the C library's current locale. |
||
168 | * |
||
169 | * The return value is %TRUE if the locale's encoding is UTF-8, in that |
||
170 | * case you can perhaps avoid calling g_convert(). |
||
171 | * |
||
172 | * The string returned in @charset is not allocated, and should not be |
||
173 | * freed. |
||
174 | * |
||
175 | * Returns: %TRUE if the returned charset is UTF-8 |
||
176 | */ |
||
177 | gboolean |
||
178 | g_get_charset (const char **charset) |
||
179 | { |
||
180 | static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free); |
||
181 | GCharsetCache *cache = g_private_get (&cache_private); |
||
182 | const gchar *raw; |
||
183 | |||
184 | if (!cache) |
||
185 | { |
||
186 | cache = g_new0 (GCharsetCache, 1); |
||
187 | g_private_set (&cache_private, cache); |
||
188 | } |
||
189 | |||
190 | G_LOCK (aliases); |
||
191 | raw = _g_locale_charset_raw (); |
||
192 | G_UNLOCK (aliases); |
||
193 | |||
194 | if (!(cache->raw && strcmp (cache->raw, raw) == 0)) |
||
195 | { |
||
196 | const gchar *new_charset; |
||
197 | |||
198 | g_free (cache->raw); |
||
199 | g_free (cache->charset); |
||
200 | cache->raw = g_strdup (raw); |
||
201 | cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset); |
||
202 | cache->charset = g_strdup (new_charset); |
||
203 | } |
||
204 | |||
205 | if (charset) |
||
206 | *charset = cache->charset; |
||
207 | |||
208 | return cache->is_utf8; |
||
209 | } |
||
210 | |||
211 | /** |
||
212 | * g_get_codeset: |
||
213 | * |
||
214 | * Gets the character set for the current locale. |
||
215 | * |
||
216 | * Returns: a newly allocated string containing the name |
||
217 | * of the character set. This string must be freed with g_free(). |
||
218 | */ |
||
219 | gchar * |
||
220 | g_get_codeset (void) |
||
221 | { |
||
222 | const gchar *charset; |
||
223 | |||
224 | g_get_charset (&charset); |
||
225 | |||
226 | return g_strdup (charset); |
||
227 | } |
||
228 | |||
229 | #ifndef G_OS_WIN32 |
||
230 | |||
231 | /* read an alias file for the locales */ |
||
232 | static void |
||
233 | read_aliases (gchar *file, |
||
234 | GHashTable *alias_table) |
||
235 | { |
||
236 | FILE *fp; |
||
237 | char buf[256]; |
||
238 | |||
239 | fp = fopen (file,"r"); |
||
240 | if (!fp) |
||
241 | return; |
||
242 | while (fgets (buf, 256, fp)) |
||
243 | { |
||
244 | char *p, *q; |
||
245 | |||
246 | g_strstrip (buf); |
||
247 | |||
248 | /* Line is a comment */ |
||
249 | if ((buf[0] == '#') || (buf[0] == '\0')) |
||
250 | continue; |
||
251 | |||
252 | /* Reads first column */ |
||
253 | for (p = buf, q = NULL; *p; p++) { |
||
254 | if ((*p == '\t') || (*p == ' ') || (*p == ':')) { |
||
255 | *p = '\0'; |
||
256 | q = p+1; |
||
257 | while ((*q == '\t') || (*q == ' ')) { |
||
258 | q++; |
||
259 | } |
||
260 | break; |
||
261 | } |
||
262 | } |
||
263 | /* The line only had one column */ |
||
264 | if (!q || *q == '\0') |
||
265 | continue; |
||
266 | |||
267 | /* Read second column */ |
||
268 | for (p = q; *p; p++) { |
||
269 | if ((*p == '\t') || (*p == ' ')) { |
||
270 | *p = '\0'; |
||
271 | break; |
||
272 | } |
||
273 | } |
||
274 | |||
275 | /* Add to alias table if necessary */ |
||
276 | if (!g_hash_table_lookup (alias_table, buf)) { |
||
277 | g_hash_table_insert (alias_table, g_strdup (buf), g_strdup (q)); |
||
278 | } |
||
279 | } |
||
280 | fclose (fp); |
||
281 | } |
||
282 | |||
283 | #endif |
||
284 | |||
285 | static char * |
||
286 | unalias_lang (char *lang) |
||
287 | { |
||
288 | #ifndef G_OS_WIN32 |
||
289 | static GHashTable *alias_table = NULL; |
||
290 | char *p; |
||
291 | int i; |
||
292 | |||
293 | if (g_once_init_enter (&alias_table)) |
||
294 | { |
||
295 | GHashTable *table = g_hash_table_new (g_str_hash, g_str_equal); |
||
296 | read_aliases ("/usr/share/locale/locale.alias", table); |
||
297 | g_once_init_leave (&alias_table, table); |
||
298 | } |
||
299 | |||
300 | i = 0; |
||
301 | while ((p = g_hash_table_lookup (alias_table, lang)) && (strcmp (p, lang) != 0)) |
||
302 | { |
||
303 | lang = p; |
||
304 | if (i++ == 30) |
||
305 | { |
||
306 | static gboolean said_before = FALSE; |
||
307 | if (!said_before) |
||
308 | g_warning ("Too many alias levels for a locale, " |
||
309 | "may indicate a loop"); |
||
310 | said_before = TRUE; |
||
311 | return lang; |
||
312 | } |
||
313 | } |
||
314 | #endif |
||
315 | return lang; |
||
316 | } |
||
317 | |||
318 | /* Mask for components of locale spec. The ordering here is from |
||
319 | * least significant to most significant |
||
320 | */ |
||
321 | enum |
||
322 | { |
||
323 | COMPONENT_CODESET = 1 << 0, |
||
324 | COMPONENT_TERRITORY = 1 << 1, |
||
325 | COMPONENT_MODIFIER = 1 << 2 |
||
326 | }; |
||
327 | |||
328 | /* Break an X/Open style locale specification into components |
||
329 | */ |
||
330 | static guint |
||
331 | explode_locale (const gchar *locale, |
||
332 | gchar **language, |
||
333 | gchar **territory, |
||
334 | gchar **codeset, |
||
335 | gchar **modifier) |
||
336 | { |
||
337 | const gchar *uscore_pos; |
||
338 | const gchar *at_pos; |
||
339 | const gchar *dot_pos; |
||
340 | |||
341 | guint mask = 0; |
||
342 | |||
343 | uscore_pos = strchr (locale, '_'); |
||
344 | dot_pos = strchr (uscore_pos ? uscore_pos : locale, '.'); |
||
345 | at_pos = strchr (dot_pos ? dot_pos : (uscore_pos ? uscore_pos : locale), '@'); |
||
346 | |||
347 | if (at_pos) |
||
348 | { |
||
349 | mask |= COMPONENT_MODIFIER; |
||
350 | *modifier = g_strdup (at_pos); |
||
351 | } |
||
352 | else |
||
353 | at_pos = locale + strlen (locale); |
||
354 | |||
355 | if (dot_pos) |
||
356 | { |
||
357 | mask |= COMPONENT_CODESET; |
||
358 | *codeset = g_strndup (dot_pos, at_pos - dot_pos); |
||
359 | } |
||
360 | else |
||
361 | dot_pos = at_pos; |
||
362 | |||
363 | if (uscore_pos) |
||
364 | { |
||
365 | mask |= COMPONENT_TERRITORY; |
||
366 | *territory = g_strndup (uscore_pos, dot_pos - uscore_pos); |
||
367 | } |
||
368 | else |
||
369 | uscore_pos = dot_pos; |
||
370 | |||
371 | *language = g_strndup (locale, uscore_pos - locale); |
||
372 | |||
373 | return mask; |
||
374 | } |
||
375 | |||
376 | /* |
||
377 | * Compute all interesting variants for a given locale name - |
||
378 | * by stripping off different components of the value. |
||
379 | * |
||
380 | * For simplicity, we assume that the locale is in |
||
381 | * X/Open format: language[_territory][.codeset][@modifier] |
||
382 | * |
||
383 | * TODO: Extend this to handle the CEN format (see the GNUlibc docs) |
||
384 | * as well. We could just copy the code from glibc wholesale |
||
385 | * but it is big, ugly, and complicated, so I'm reluctant |
||
386 | * to do so when this should handle 99% of the time... |
||
387 | */ |
||
388 | static void |
||
389 | append_locale_variants (GPtrArray *array, |
||
390 | const gchar *locale) |
||
391 | { |
||
392 | gchar *language = NULL; |
||
393 | gchar *territory = NULL; |
||
394 | gchar *codeset = NULL; |
||
395 | gchar *modifier = NULL; |
||
396 | |||
397 | guint mask; |
||
398 | guint i, j; |
||
399 | |||
400 | g_return_if_fail (locale != NULL); |
||
401 | |||
402 | mask = explode_locale (locale, &language, &territory, &codeset, &modifier); |
||
403 | |||
404 | /* Iterate through all possible combinations, from least attractive |
||
405 | * to most attractive. |
||
406 | */ |
||
407 | for (j = 0; j <= mask; ++j) |
||
408 | { |
||
409 | i = mask - j; |
||
410 | |||
411 | if ((i & ~mask) == 0) |
||
412 | { |
||
413 | gchar *val = g_strconcat (language, |
||
414 | (i & COMPONENT_TERRITORY) ? territory : "", |
||
415 | (i & COMPONENT_CODESET) ? codeset : "", |
||
416 | (i & COMPONENT_MODIFIER) ? modifier : "", |
||
417 | NULL); |
||
418 | g_ptr_array_add (array, val); |
||
419 | } |
||
420 | } |
||
421 | |||
422 | g_free (language); |
||
423 | if (mask & COMPONENT_CODESET) |
||
424 | g_free (codeset); |
||
425 | if (mask & COMPONENT_TERRITORY) |
||
426 | g_free (territory); |
||
427 | if (mask & COMPONENT_MODIFIER) |
||
428 | g_free (modifier); |
||
429 | } |
||
430 | |||
431 | /** |
||
432 | * g_get_locale_variants: |
||
433 | * @locale: a locale identifier |
||
434 | * |
||
435 | * Returns a list of derived variants of @locale, which can be used to |
||
436 | * e.g. construct locale-dependent filenames or search paths. The returned |
||
437 | * list is sorted from most desirable to least desirable. |
||
438 | * This function handles territory, charset and extra locale modifiers. |
||
439 | * |
||
440 | * For example, if @locale is "fr_BE", then the returned list |
||
441 | * is "fr_BE", "fr". |
||
442 | * |
||
443 | * If you need the list of variants for the current locale, |
||
444 | * use g_get_language_names(). |
||
445 | * |
||
446 | * Returns: (transfer full) (array zero-terminated=1) (element-type utf8): a newly |
||
447 | * allocated array of newly allocated strings with the locale variants. Free with |
||
448 | * g_strfreev(). |
||
449 | * |
||
450 | * Since: 2.28 |
||
451 | */ |
||
452 | gchar ** |
||
453 | g_get_locale_variants (const gchar *locale) |
||
454 | { |
||
455 | GPtrArray *array; |
||
456 | |||
457 | g_return_val_if_fail (locale != NULL, NULL); |
||
458 | |||
459 | array = g_ptr_array_sized_new (8); |
||
460 | append_locale_variants (array, locale); |
||
461 | g_ptr_array_add (array, NULL); |
||
462 | |||
463 | return (gchar **) g_ptr_array_free (array, FALSE); |
||
464 | } |
||
465 | |||
466 | /* The following is (partly) taken from the gettext package. |
||
467 | Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. */ |
||
468 | |||
469 | static const gchar * |
||
470 | guess_category_value (const gchar *category_name) |
||
471 | { |
||
472 | const gchar *retval; |
||
473 | |||
474 | /* The highest priority value is the 'LANGUAGE' environment |
||
475 | variable. This is a GNU extension. */ |
||
476 | retval = g_getenv ("LANGUAGE"); |
||
477 | if ((retval != NULL) && (retval[0] != '\0')) |
||
478 | return retval; |
||
479 | |||
480 | /* 'LANGUAGE' is not set. So we have to proceed with the POSIX |
||
481 | methods of looking to 'LC_ALL', 'LC_xxx', and 'LANG'. On some |
||
482 | systems this can be done by the 'setlocale' function itself. */ |
||
483 | |||
484 | /* Setting of LC_ALL overwrites all other. */ |
||
485 | retval = g_getenv ("LC_ALL"); |
||
486 | if ((retval != NULL) && (retval[0] != '\0')) |
||
487 | return retval; |
||
488 | |||
489 | /* Next comes the name of the desired category. */ |
||
490 | retval = g_getenv (category_name); |
||
491 | if ((retval != NULL) && (retval[0] != '\0')) |
||
492 | return retval; |
||
493 | |||
494 | /* Last possibility is the LANG environment variable. */ |
||
495 | retval = g_getenv ("LANG"); |
||
496 | if ((retval != NULL) && (retval[0] != '\0')) |
||
497 | return retval; |
||
498 | |||
499 | #ifdef G_PLATFORM_WIN32 |
||
500 | /* g_win32_getlocale() first checks for LC_ALL, LC_MESSAGES and |
||
501 | * LANG, which we already did above. Oh well. The main point of |
||
502 | * calling g_win32_getlocale() is to get the thread's locale as used |
||
503 | * by Windows and the Microsoft C runtime (in the "English_United |
||
504 | * States" format) translated into the Unixish format. |
||
505 | */ |
||
506 | { |
||
507 | char *locale = g_win32_getlocale (); |
||
508 | retval = g_intern_string (locale); |
||
509 | g_free (locale); |
||
510 | return retval; |
||
511 | } |
||
512 | #endif |
||
513 | |||
514 | return NULL; |
||
515 | } |
||
516 | |||
517 | typedef struct _GLanguageNamesCache GLanguageNamesCache; |
||
518 | |||
519 | struct _GLanguageNamesCache { |
||
520 | gchar *languages; |
||
521 | gchar **language_names; |
||
522 | }; |
||
523 | |||
524 | static void |
||
525 | language_names_cache_free (gpointer data) |
||
526 | { |
||
527 | GLanguageNamesCache *cache = data; |
||
528 | g_free (cache->languages); |
||
529 | g_strfreev (cache->language_names); |
||
530 | g_free (cache); |
||
531 | } |
||
532 | |||
533 | /** |
||
534 | * g_get_language_names: |
||
535 | * |
||
536 | * Computes a list of applicable locale names, which can be used to |
||
537 | * e.g. construct locale-dependent filenames or search paths. The returned |
||
538 | * list is sorted from most desirable to least desirable and always contains |
||
539 | * the default locale "C". |
||
540 | * |
||
541 | * For example, if LANGUAGE=de:en_US, then the returned list is |
||
542 | * "de", "en_US", "en", "C". |
||
543 | * |
||
544 | * This function consults the environment variables `LANGUAGE`, `LC_ALL`, |
||
545 | * `LC_MESSAGES` and `LANG` to find the list of locales specified by the |
||
546 | * user. |
||
547 | * |
||
548 | * Returns: (array zero-terminated=1) (transfer none): a %NULL-terminated array of strings owned by GLib |
||
549 | * that must not be modified or freed. |
||
550 | * |
||
551 | * Since: 2.6 |
||
552 | **/ |
||
553 | const gchar * const * |
||
554 | g_get_language_names (void) |
||
555 | { |
||
556 | static GPrivate cache_private = G_PRIVATE_INIT (language_names_cache_free); |
||
557 | GLanguageNamesCache *cache = g_private_get (&cache_private); |
||
558 | const gchar *value; |
||
559 | |||
560 | if (!cache) |
||
561 | { |
||
562 | cache = g_new0 (GLanguageNamesCache, 1); |
||
563 | g_private_set (&cache_private, cache); |
||
564 | } |
||
565 | |||
566 | value = guess_category_value ("LC_MESSAGES"); |
||
567 | if (!value) |
||
568 | value = "C"; |
||
569 | |||
570 | if (!(cache->languages && strcmp (cache->languages, value) == 0)) |
||
571 | { |
||
572 | GPtrArray *array; |
||
573 | gchar **alist, **a; |
||
574 | |||
575 | g_free (cache->languages); |
||
576 | g_strfreev (cache->language_names); |
||
577 | cache->languages = g_strdup (value); |
||
578 | |||
579 | array = g_ptr_array_sized_new (8); |
||
580 | |||
581 | alist = g_strsplit (value, ":", 0); |
||
582 | for (a = alist; *a; a++) |
||
583 | append_locale_variants (array, unalias_lang (*a)); |
||
584 | g_strfreev (alist); |
||
585 | g_ptr_array_add (array, g_strdup ("C")); |
||
586 | g_ptr_array_add (array, NULL); |
||
587 | |||
588 | cache->language_names = (gchar **) g_ptr_array_free (array, FALSE); |
||
589 | } |
||
590 | |||
591 | return (const gchar * const *) cache->language_names; |
||
592 | } |