nexmon – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | /* |
2 | * Copyright © 2014 Canonical Limited |
||
3 | * |
||
4 | * This library is free software; you can redistribute it and/or |
||
5 | * modify it under the terms of the GNU Lesser General Public |
||
6 | * License as published by the Free Software Foundation; either |
||
7 | * version 2 of the licence, or (at your option) any later version. |
||
8 | * |
||
9 | * This library is distributed in the hope that it will be useful, |
||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
12 | * Lesser General Public License for more details. |
||
13 | * |
||
14 | * You should have received a copy of the GNU Lesser General Public |
||
15 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
||
16 | * |
||
17 | * Author: Ryan Lortie <desrt@desrt.ca> |
||
18 | */ |
||
19 | |||
20 | #include <config.h> |
||
21 | |||
22 | #include "gstrfuncs.h" |
||
23 | |||
24 | #include <glib.h> |
||
25 | #include <locale.h> |
||
26 | #include <stdlib.h> |
||
27 | #include <string.h> |
||
28 | |||
29 | struct mapping_entry |
||
30 | { |
||
31 | guint16 src; |
||
32 | guint16 ascii; |
||
33 | }; |
||
34 | |||
35 | struct mapping_range |
||
36 | { |
||
37 | guint16 start; |
||
38 | guint16 length; |
||
39 | }; |
||
40 | |||
41 | struct locale_entry |
||
42 | { |
||
43 | guint8 name_offset; |
||
44 | guint8 item_id; |
||
45 | }; |
||
46 | |||
47 | #include "gtranslit-data.h" |
||
48 | |||
49 | #define get_src_char(array, encoded, index) ((encoded & 0x8000) ? (array)[((encoded) & 0xfff) + index] : encoded) |
||
50 | #define get_length(encoded) ((encoded & 0x8000) ? ((encoded & 0x7000) >> 12) : 1) |
||
51 | |||
52 | #if G_BYTE_ORDER == G_BIG_ENDIAN |
||
53 | #define get_ascii_item(array, encoded) ((encoded & 0x8000) ? &(array)[(encoded) & 0xfff] : (gpointer) (((char *) &(encoded)) + 1)) |
||
54 | #else |
||
55 | #define get_ascii_item(array, encoded) ((encoded & 0x8000) ? &(array)[(encoded) & 0xfff] : (gpointer) &(encoded)) |
||
56 | #endif |
||
57 | |||
58 | static const gchar * lookup_in_item (guint item_id, |
||
59 | const gunichar *key, |
||
60 | gint *result_len, |
||
61 | gint *key_consumed); |
||
62 | |||
63 | static gint |
||
64 | compare_mapping_entry (gconstpointer user_data, |
||
65 | gconstpointer data) |
||
66 | { |
||
67 | const struct mapping_entry *entry = data; |
||
68 | const gunichar *key = user_data; |
||
69 | gunichar src_0; |
||
70 | |||
71 | G_STATIC_ASSERT(MAX_KEY_SIZE == 2); |
||
72 | |||
73 | src_0 = get_src_char (src_table, entry->src, 0); |
||
74 | |||
75 | if (key[0] > src_0) |
||
76 | return 1; |
||
77 | else if (key[0] < src_0) |
||
78 | return -1; |
||
79 | |||
80 | if (get_length (entry->src) > 1) |
||
81 | { |
||
82 | gunichar src_1; |
||
83 | |||
84 | src_1 = get_src_char (src_table, entry->src, 1); |
||
85 | |||
86 | if (key[1] > src_1) |
||
87 | return 1; |
||
88 | else if (key[1] < src_1) |
||
89 | return -1; |
||
90 | } |
||
91 | else if (key[1]) |
||
92 | return 1; |
||
93 | |||
94 | return 0; |
||
95 | } |
||
96 | |||
97 | static const gchar * |
||
98 | lookup_in_mapping (const struct mapping_entry *mapping, |
||
99 | gint mapping_size, |
||
100 | const gunichar *key, |
||
101 | gint *result_len, |
||
102 | gint *key_consumed) |
||
103 | { |
||
104 | const struct mapping_entry *hit; |
||
105 | |||
106 | hit = bsearch (key, mapping, mapping_size, sizeof (struct mapping_entry), compare_mapping_entry); |
||
107 | |||
108 | if (hit == NULL) |
||
109 | return NULL; |
||
110 | |||
111 | *key_consumed = get_length (hit->src); |
||
112 | *result_len = get_length (hit->ascii); |
||
113 | |||
114 | return get_ascii_item(ascii_table, hit->ascii); |
||
115 | } |
||
116 | |||
117 | static const gchar * |
||
118 | lookup_in_chain (const guint8 *chain, |
||
119 | const gunichar *key, |
||
120 | gint *result_len, |
||
121 | gint *key_consumed) |
||
122 | { |
||
123 | const gchar *result; |
||
124 | |||
125 | while (*chain != 0xff) |
||
126 | { |
||
127 | result = lookup_in_item (*chain, key, result_len, key_consumed); |
||
128 | |||
129 | if (result) |
||
130 | return result; |
||
131 | |||
132 | chain++; |
||
133 | } |
||
134 | |||
135 | return NULL; |
||
136 | } |
||
137 | |||
138 | static const gchar * |
||
139 | lookup_in_item (guint item_id, |
||
140 | const gunichar *key, |
||
141 | gint *result_len, |
||
142 | gint *key_consumed) |
||
143 | { |
||
144 | if (item_id & 0x80) |
||
145 | { |
||
146 | const guint8 *chain = chains_table + chain_starts[item_id & 0x7f]; |
||
147 | |||
148 | return lookup_in_chain (chain, key, result_len, key_consumed); |
||
149 | } |
||
150 | else |
||
151 | { |
||
152 | const struct mapping_range *range = &mapping_ranges[item_id]; |
||
153 | |||
154 | return lookup_in_mapping (mappings_table + range->start, range->length, key, result_len, key_consumed); |
||
155 | } |
||
156 | } |
||
157 | |||
158 | static gint |
||
159 | compare_locale_entry (gconstpointer user_data, |
||
160 | gconstpointer data) |
||
161 | { |
||
162 | const struct locale_entry *entry = data; |
||
163 | const gchar *key = user_data; |
||
164 | |||
165 | return strcmp (key, &locale_names[entry->name_offset]); |
||
166 | } |
||
167 | |||
168 | static gboolean |
||
169 | lookup_item_id_for_one_locale (const gchar *key, |
||
170 | guint *item_id) |
||
171 | { |
||
172 | const struct locale_entry *hit; |
||
173 | |||
174 | hit = bsearch (key, locale_index, G_N_ELEMENTS (locale_index), sizeof (struct locale_entry), compare_locale_entry); |
||
175 | |||
176 | if (hit == NULL) |
||
177 | return FALSE; |
||
178 | |||
179 | *item_id = hit->item_id; |
||
180 | return TRUE; |
||
181 | } |
||
182 | |||
183 | static guint |
||
184 | lookup_item_id_for_locale (const gchar *locale) |
||
185 | { |
||
186 | gchar key[MAX_LOCALE_NAME + 1]; |
||
187 | const gchar *language; |
||
188 | guint language_len; |
||
189 | const gchar *territory = NULL; |
||
190 | guint territory_len = 0; |
||
191 | const gchar *modifier = NULL; |
||
192 | guint modifier_len = 0; |
||
193 | const gchar *next_char; |
||
194 | guint id; |
||
195 | |||
196 | /* As per POSIX, a valid locale looks like: |
||
197 | * |
||
198 | * language[_territory][.codeset][@modifier] |
||
199 | */ |
||
200 | language = locale; |
||
201 | language_len = strcspn (language, "_.@"); |
||
202 | next_char = language + language_len; |
||
203 | |||
204 | if (*next_char == '_') |
||
205 | { |
||
206 | territory = next_char; |
||
207 | territory_len = strcspn (territory + 1, "_.@") + 1; |
||
208 | next_char = territory + territory_len; |
||
209 | } |
||
210 | |||
211 | if (*next_char == '.') |
||
212 | { |
||
213 | const gchar *codeset; |
||
214 | guint codeset_len; |
||
215 | |||
216 | codeset = next_char; |
||
217 | codeset_len = strcspn (codeset + 1, "_.@") + 1; |
||
218 | next_char = codeset + codeset_len; |
||
219 | } |
||
220 | |||
221 | if (*next_char == '@') |
||
222 | { |
||
223 | modifier = next_char; |
||
224 | modifier_len = strcspn (modifier + 1, "_.@") + 1; |
||
225 | next_char = modifier + modifier_len; |
||
226 | } |
||
227 | |||
228 | /* What madness is this? */ |
||
229 | if (language_len == 0 || *next_char) |
||
230 | return default_item_id; |
||
231 | |||
232 | /* We are not interested in codeset. |
||
233 | * |
||
234 | * For this locale: |
||
235 | * |
||
236 | * aa_BB@cc |
||
237 | * |
||
238 | * try in this order: |
||
239 | * |
||
240 | * Note: we have no locales of the form aa_BB@cc in the database. |
||
241 | * |
||
242 | * 1. aa@cc |
||
243 | * 2. aa_BB |
||
244 | * 3. aa |
||
245 | */ |
||
246 | |||
247 | /* 1. */ |
||
248 | if (modifier_len && language_len + modifier_len <= MAX_LOCALE_NAME) |
||
249 | { |
||
250 | memcpy (key, language, language_len); |
||
251 | memcpy (key + language_len, modifier, modifier_len); |
||
252 | key[language_len + modifier_len] = '\0'; |
||
253 | |||
254 | if (lookup_item_id_for_one_locale (key, &id)) |
||
255 | return id; |
||
256 | } |
||
257 | |||
258 | /* 2. */ |
||
259 | if (territory_len && language_len + territory_len <= MAX_LOCALE_NAME) |
||
260 | { |
||
261 | memcpy (key, language, language_len); |
||
262 | memcpy (key + language_len, territory, territory_len); |
||
263 | key[language_len + territory_len] = '\0'; |
||
264 | |||
265 | if (lookup_item_id_for_one_locale (key, &id)) |
||
266 | return id; |
||
267 | } |
||
268 | |||
269 | /* 3. */ |
||
270 | if (language_len <= MAX_LOCALE_NAME) |
||
271 | { |
||
272 | memcpy (key, language, language_len); |
||
273 | key[language_len] = '\0'; |
||
274 | |||
275 | if (lookup_item_id_for_one_locale (key, &id)) |
||
276 | return id; |
||
277 | } |
||
278 | |||
279 | return default_item_id; |
||
280 | } |
||
281 | |||
282 | static guint |
||
283 | get_default_item_id (void) |
||
284 | { |
||
285 | static guint item_id; |
||
286 | static gboolean done; |
||
287 | |||
288 | /* Doesn't need to be locked -- no harm in doing it twice. */ |
||
289 | if (!done) |
||
290 | { |
||
291 | const gchar *locale; |
||
292 | |||
293 | locale = setlocale (LC_CTYPE, NULL); |
||
294 | item_id = lookup_item_id_for_locale (locale); |
||
295 | done = TRUE; |
||
296 | } |
||
297 | |||
298 | return item_id; |
||
299 | } |
||
300 | |||
301 | /** |
||
302 | * g_str_to_ascii: |
||
303 | * @str: a string, in UTF-8 |
||
304 | * @from_locale: (allow-none): the source locale, if known |
||
305 | * |
||
306 | * Transliterate @str to plain ASCII. |
||
307 | * |
||
308 | * For best results, @str should be in composed normalised form. |
||
309 | * |
||
310 | * This function performs a reasonably good set of character |
||
311 | * replacements. The particular set of replacements that is done may |
||
312 | * change by version or even by runtime environment. |
||
313 | * |
||
314 | * If the source language of @str is known, it can used to improve the |
||
315 | * accuracy of the translation by passing it as @from_locale. It should |
||
316 | * be a valid POSIX locale string (of the form |
||
317 | * "language[_territory][.codeset][@modifier]"). |
||
318 | * |
||
319 | * If @from_locale is %NULL then the current locale is used. |
||
320 | * |
||
321 | * If you want to do translation for no specific locale, and you want it |
||
322 | * to be done independently of the currently locale, specify "C" for |
||
323 | * @from_locale. |
||
324 | * |
||
325 | * Returns: a string in plain ASCII |
||
326 | * |
||
327 | * Since: 2.40 |
||
328 | **/ |
||
329 | gchar * |
||
330 | g_str_to_ascii (const gchar *str, |
||
331 | const gchar *from_locale) |
||
332 | { |
||
333 | GString *result; |
||
334 | guint item_id; |
||
335 | |||
336 | g_return_val_if_fail (str != NULL, NULL); |
||
337 | |||
338 | if (g_str_is_ascii (str)) |
||
339 | return g_strdup (str); |
||
340 | |||
341 | if (from_locale) |
||
342 | item_id = lookup_item_id_for_locale (from_locale); |
||
343 | else |
||
344 | item_id = get_default_item_id (); |
||
345 | |||
346 | result = g_string_sized_new (strlen (str)); |
||
347 | |||
348 | while (*str) |
||
349 | { |
||
350 | /* We only need to transliterate non-ASCII values... */ |
||
351 | if (*str & 0x80) |
||
352 | { |
||
353 | gunichar key[MAX_KEY_SIZE]; |
||
354 | const gchar *r; |
||
355 | gint consumed; |
||
356 | gint r_len; |
||
357 | gunichar c; |
||
358 | |||
359 | G_STATIC_ASSERT(MAX_KEY_SIZE == 2); |
||
360 | |||
361 | c = g_utf8_get_char (str); |
||
362 | |||
363 | /* This is where it gets evil... |
||
364 | * |
||
365 | * We know that MAX_KEY_SIZE is 2. We also know that we |
||
366 | * only want to try another character if it's non-ascii. |
||
367 | */ |
||
368 | str = g_utf8_next_char (str); |
||
369 | |||
370 | key[0] = c; |
||
371 | if (*str & 0x80) |
||
372 | key[1] = g_utf8_get_char (str); |
||
373 | else |
||
374 | key[1] = 0; |
||
375 | |||
376 | r = lookup_in_item (item_id, key, &r_len, &consumed); |
||
377 | |||
378 | /* If we failed to map two characters, try again with one. |
||
379 | * |
||
380 | * gconv behaviour is a bit weird here -- it seems to |
||
381 | * depend in the randomness of the binary search and the |
||
382 | * size of the input buffer as to what result we get here. |
||
383 | * |
||
384 | * Doing it this way is more work, but should be |
||
385 | * more-correct. |
||
386 | */ |
||
387 | if (r == NULL && key[1]) |
||
388 | { |
||
389 | key[1] = 0; |
||
390 | r = lookup_in_item (item_id, key, &r_len, &consumed); |
||
391 | } |
||
392 | |||
393 | if (r != NULL) |
||
394 | { |
||
395 | g_string_append_len (result, r, r_len); |
||
396 | if (consumed == 2) |
||
397 | /* If it took both then skip again */ |
||
398 | str = g_utf8_next_char (str); |
||
399 | } |
||
400 | else /* no match found */ |
||
401 | g_string_append_c (result, '?'); |
||
402 | } |
||
403 | else if (*str & 0x80) /* Out-of-range non-ASCII case */ |
||
404 | { |
||
405 | g_string_append_c (result, '?'); |
||
406 | str = g_utf8_next_char (str); |
||
407 | } |
||
408 | else /* ASCII case */ |
||
409 | g_string_append_c (result, *str++); |
||
410 | } |
||
411 | |||
412 | return g_string_free (result, FALSE); |
||
413 | } |