WebSVN – nexmon – Blame – Rev 1 – /utilities/glib/glib/gunidecomp.c

Rev	Author	Line No.	Line
1	office	1	`/* decomp.c - Character decomposition.`
		2	`*`
		3	`* Copyright (C) 1999, 2000 Tom Tromey`
		4	`* Copyright 2000 Red Hat, Inc.`
		5	`*`
		6	`* The Gnome Library is free software; you can redistribute it and/or`
		7	`* modify it under the terms of the GNU Lesser General Public License as`
		8	`* published by the Free Software Foundation; either version 2 of the`
		9	`* License, or (at your option) any later version.`
		10	`*`
		11	`* The Gnome Library is distributed in the hope that it will be useful,`
		12	`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
		13	`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
		14	`* Lesser General Public License for more details.`
		15	`*`
		16	`* You should have received a copy of the GNU Lesser General Public`
		17	`* License along with the Gnome Library; see the file COPYING.LIB. If not,`
		18	`* see <http://www.gnu.org/licenses/>.`
		19	`*/`
		20
		21	`/**`
		22	`* SECTION:unicode`
		23	`* @Title: Unicode Manipulation`
		24	`* @Short_description: functions operating on Unicode characters and`
		25	`* UTF-8 strings`
		26	`* @See_also: g_locale_to_utf8(), g_locale_from_utf8()`
		27	`*`
		28	`* This section describes a number of functions for dealing with`
		29	`* Unicode characters and strings. There are analogues of the`
		30	* traditional `ctype.h` character classification and case conversion
		31	`* functions, UTF-8 analogues of some string utility functions,`
		32	`* functions to perform normalization, case conversion and collation`
		33	`* on UTF-8 strings and finally functions to convert between the UTF-8,`
		34	`* UTF-16 and UCS-4 encodings of Unicode.`
		35	`*`
		36	`* The implementations of the Unicode functions in GLib are based`
		37	`* on the Unicode Character Data tables, which are available from`
		38	`* [www.unicode.org](http://www.unicode.org/).`
		39	`* GLib 2.8 supports Unicode 4.0, GLib 2.10 supports Unicode 4.1,`
		40	`* GLib 2.12 supports Unicode 5.0, GLib 2.16.3 supports Unicode 5.1,`
		41	`* GLib 2.30 supports Unicode 6.0.`
		42	`*/`
		43
		44	`#include "config.h"`
		45
		46	`#include <stdlib.h>`
		47
		48	`#include "gunicode.h"`
		49	`#include "gunidecomp.h"`
		50	`#include "gmem.h"`
		51	`#include "gunicomp.h"`
		52	`#include "gunicodeprivate.h"`
		53
		54
		55	`#define CC_PART1(Page, Char) \`
		56	`((combining_class_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \`
		57	`? (combining_class_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \`
		58	`: (cclass_data[combining_class_table_part1[Page]][Char]))`
		59
		60	`#define CC_PART2(Page, Char) \`
		61	`((combining_class_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \`
		62	`? (combining_class_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \`
		63	`: (cclass_data[combining_class_table_part2[Page]][Char]))`
		64
		65	`#define COMBINING_CLASS(Char) \`
		66	`(((Char) <= G_UNICODE_LAST_CHAR_PART1) \`
		67	`? CC_PART1 ((Char) >> 8, (Char) & 0xff) \`
		68	`: (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \`
		69	`? CC_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \`
		70	`: 0))`
		71
		72	`/**`
		73	`* g_unichar_combining_class:`
		74	`* @uc: a Unicode character`
		75	`*`
		76	`* Determines the canonical combining class of a Unicode character.`
		77	`*`
		78	`* Returns: the combining class of the character`
		79	`*`
		80	`* Since: 2.14`
		81	`**/`
		82	`gint`
		83	`g_unichar_combining_class (gunichar uc)`
		84	`{`
		85	`return COMBINING_CLASS (uc);`
		86	`}`
		87
		88	`/* constants for hangul syllable [de]composition */`
		89	`#define SBase 0xAC00`
		90	`#define LBase 0x1100`
		91	`#define VBase 0x1161`
		92	`#define TBase 0x11A7`
		93	`#define LCount 19`
		94	`#define VCount 21`
		95	`#define TCount 28`
		96	`#define NCount (VCount * TCount)`
		97	`#define SCount (LCount * NCount)`
		98
		99	`/**`
		100	`* g_unicode_canonical_ordering:`
		101	`* @string: a UCS-4 encoded string.`
		102	`* @len: the maximum length of @string to use.`
		103	`*`
		104	`* Computes the canonical ordering of a string in-place.`
		105	`* This rearranges decomposed characters in the string`
		106	`* according to their combining classes. See the Unicode`
		107	`* manual for more information.`
		108	`**/`
		109	`void`
		110	`g_unicode_canonical_ordering (gunichar *string,`
		111	`gsize len)`
		112	`{`
		113	`gsize i;`
		114	`int swap = 1;`
		115
		116	`while (swap)`
		117	`{`
		118	`int last;`
		119	`swap = 0;`
		120	`last = COMBINING_CLASS (string[0]);`
		121	`for (i = 0; i < len - 1; ++i)`
		122	`{`
		123	`int next = COMBINING_CLASS (string[i + 1]);`
		124	`if (next != 0 && last > next)`
		125	`{`
		126	`gsize j;`
		127	`/* Percolate item leftward through string. */`
		128	`for (j = i + 1; j > 0; --j)`
		129	`{`
		130	`gunichar t;`
		131	`if (COMBINING_CLASS (string[j - 1]) <= next)`
		132	`break;`
		133	`t = string[j];`
		134	`string[j] = string[j - 1];`
		135	`string[j - 1] = t;`
		136	`swap = 1;`
		137	`}`
		138	`/* We're re-entering the loop looking at the old`
		139	`character again. */`
		140	`next = last;`
		141	`}`
		142	`last = next;`
		143	`}`
		144	`}`
		145	`}`
		146
		147	`/* http://www.unicode.org/unicode/reports/tr15/#Hangul`
		148	`* r should be null or have sufficient space. Calling with r == NULL will`
		149	`* only calculate the result_len; however, a buffer with space for three`
		150	`* characters will always be big enough. */`
		151	`static void`
		152	`decompose_hangul (gunichar s,`
		153	`gunichar *r,`
		154	`gsize *result_len)`
		155	`{`
		156	`gint SIndex = s - SBase;`
		157	`gint TIndex = SIndex % TCount;`
		158
		159	`if (r)`
		160	`{`
		161	`r[0] = LBase + SIndex / NCount;`
		162	`r[1] = VBase + (SIndex % NCount) / TCount;`
		163	`}`
		164
		165	`if (TIndex)`
		166	`{`
		167	`if (r)`
		168	`r[2] = TBase + TIndex;`
		169	`*result_len = 3;`
		170	`}`
		171	`else`
		172	`*result_len = 2;`
		173	`}`
		174
		175	`/* returns a pointer to a null-terminated UTF-8 string */`
		176	`static const gchar *`
		177	`find_decomposition (gunichar ch,`
		178	`gboolean compat)`
		179	`{`
		180	`int start = 0;`
		181	`int end = G_N_ELEMENTS (decomp_table);`
		182
		183	`if (ch >= decomp_table[start].ch &&`
		184	`ch <= decomp_table[end - 1].ch)`
		185	`{`
		186	`while (TRUE)`
		187	`{`
		188	`int half = (start + end) / 2;`
		189	`if (ch == decomp_table[half].ch)`
		190	`{`
		191	`int offset;`
		192
		193	`if (compat)`
		194	`{`
		195	`offset = decomp_table[half].compat_offset;`
		196	`if (offset == G_UNICODE_NOT_PRESENT_OFFSET)`
		197	`offset = decomp_table[half].canon_offset;`
		198	`}`
		199	`else`
		200	`{`
		201	`offset = decomp_table[half].canon_offset;`
		202	`if (offset == G_UNICODE_NOT_PRESENT_OFFSET)`
		203	`return NULL;`
		204	`}`
		205
		206	`return &(decomp_expansion_string[offset]);`
		207	`}`
		208	`else if (half == start)`
		209	`break;`
		210	`else if (ch > decomp_table[half].ch)`
		211	`start = half;`
		212	`else`
		213	`end = half;`
		214	`}`
		215	`}`
		216
		217	`return NULL;`
		218	`}`
		219
		220	`/**`
		221	`* g_unicode_canonical_decomposition:`
		222	`* @ch: a Unicode character.`
		223	`* @result_len: location to store the length of the return value.`
		224	`*`
		225	`* Computes the canonical decomposition of a Unicode character.`
		226	`*`
		227	`* Returns: a newly allocated string of Unicode characters.`
		228	`* @result_len is set to the resulting length of the string.`
		229	`*`
		230	`* Deprecated: 2.30: Use the more flexible g_unichar_fully_decompose()`
		231	`* instead.`
		232	`**/`
		233	`gunichar *`
		234	`g_unicode_canonical_decomposition (gunichar ch,`
		235	`gsize *result_len)`
		236	`{`
		237	`const gchar *decomp;`
		238	`const gchar *p;`
		239	`gunichar *r;`
		240
		241	`/* Hangul syllable */`
		242	`if (ch >= SBase && ch < SBase + SCount)`
		243	`{`
		244	`decompose_hangul (ch, NULL, result_len);`
		245	`r = g_malloc (result_len sizeof (gunichar));`
		246	`decompose_hangul (ch, r, result_len);`
		247	`}`
		248	`else if ((decomp = find_decomposition (ch, FALSE)) != NULL)`
		249	`{`
		250	`/* Found it. */`
		251	`int i;`
		252
		253	`*result_len = g_utf8_strlen (decomp, -1);`
		254	`r = g_malloc (result_len sizeof (gunichar));`
		255
		256	`for (p = decomp, i = 0; *p != '\0'; p = g_utf8_next_char (p), i++)`
		257	`r[i] = g_utf8_get_char (p);`
		258	`}`
		259	`else`
		260	`{`
		261	`/* Not in our table. */`
		262	`r = g_malloc (sizeof (gunichar));`
		263	`*r = ch;`
		264	`*result_len = 1;`
		265	`}`
		266
		267	`return r;`
		268	`}`
		269
		270	`/* L,V => LV and LV,T => LVT */`
		271	`static gboolean`
		272	`combine_hangul (gunichar a,`
		273	`gunichar b,`
		274	`gunichar *result)`
		275	`{`
		276	`gint LIndex = a - LBase;`
		277	`gint SIndex = a - SBase;`
		278
		279	`gint VIndex = b - VBase;`
		280	`gint TIndex = b - TBase;`
		281
		282	`if (0 <= LIndex && LIndex < LCount`
		283	`&& 0 <= VIndex && VIndex < VCount)`
		284	`{`
		285	`result = SBase + (LIndex VCount + VIndex) * TCount;`
		286	`return TRUE;`
		287	`}`
		288	`else if (0 <= SIndex && SIndex < SCount && (SIndex % TCount) == 0`
		289	`&& 0 < TIndex && TIndex < TCount)`
		290	`{`
		291	`*result = a + TIndex;`
		292	`return TRUE;`
		293	`}`
		294
		295	`return FALSE;`
		296	`}`
		297
		298	`#define CI(Page, Char) \`
		299	`((compose_table[Page] >= G_UNICODE_MAX_TABLE_INDEX) \`
		300	`? (compose_table[Page] - G_UNICODE_MAX_TABLE_INDEX) \`
		301	`: (compose_data[compose_table[Page]][Char]))`
		302
		303	`#define COMPOSE_INDEX(Char) \`
		304	`(((Char >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff))`
		305
		306	`static gboolean`
		307	`combine (gunichar a,`
		308	`gunichar b,`
		309	`gunichar *result)`
		310	`{`
		311	`gushort index_a, index_b;`
		312
		313	`if (combine_hangul (a, b, result))`
		314	`return TRUE;`
		315
		316	`index_a = COMPOSE_INDEX(a);`
		317
		318	`if (index_a >= COMPOSE_FIRST_SINGLE_START && index_a < COMPOSE_SECOND_START)`
		319	`{`
		320	`if (b == compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][0])`
		321	`{`
		322	`*result = compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][1];`
		323	`return TRUE;`
		324	`}`
		325	`else`
		326	`return FALSE;`
		327	`}`
		328
		329	`index_b = COMPOSE_INDEX(b);`
		330
		331	`if (index_b >= COMPOSE_SECOND_SINGLE_START)`
		332	`{`
		333	`if (a == compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][0])`
		334	`{`
		335	`*result = compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][1];`
		336	`return TRUE;`
		337	`}`
		338	`else`
		339	`return FALSE;`
		340	`}`
		341
		342	`if (index_a >= COMPOSE_FIRST_START && index_a < COMPOSE_FIRST_SINGLE_START &&`
		343	`index_b >= COMPOSE_SECOND_START && index_b < COMPOSE_SECOND_SINGLE_START)`
		344	`{`
		345	`gunichar res = compose_array[index_a - COMPOSE_FIRST_START][index_b - COMPOSE_SECOND_START];`
		346
		347	`if (res)`
		348	`{`
		349	`*result = res;`
		350	`return TRUE;`
		351	`}`
		352	`}`
		353
		354	`return FALSE;`
		355	`}`
		356
		357	`gunichar *`
		358	`_g_utf8_normalize_wc (const gchar *str,`
		359	`gssize max_len,`
		360	`GNormalizeMode mode)`
		361	`{`
		362	`gsize n_wc;`
		363	`gunichar *wc_buffer;`
		364	`const char *p;`
		365	`gsize last_start;`
		366	`gboolean do_compat = (mode == G_NORMALIZE_NFKC \|\|`
		367	`mode == G_NORMALIZE_NFKD);`
		368	`gboolean do_compose = (mode == G_NORMALIZE_NFC \|\|`
		369	`mode == G_NORMALIZE_NFKC);`
		370
		371	`n_wc = 0;`
		372	`p = str;`
		373	`while ((max_len < 0 \|\| p < str + max_len) && *p)`
		374	`{`
		375	`const gchar *decomp;`
		376	`gunichar wc = g_utf8_get_char (p);`
		377
		378	`if (wc >= SBase && wc < SBase + SCount)`
		379	`{`
		380	`gsize result_len;`
		381	`decompose_hangul (wc, NULL, &result_len);`
		382	`n_wc += result_len;`
		383	`}`
		384	`else`
		385	`{`
		386	`decomp = find_decomposition (wc, do_compat);`
		387
		388	`if (decomp)`
		389	`n_wc += g_utf8_strlen (decomp, -1);`
		390	`else`
		391	`n_wc++;`
		392	`}`
		393
		394	`p = g_utf8_next_char (p);`
		395	`}`
		396
		397	`wc_buffer = g_new (gunichar, n_wc + 1);`
		398
		399	`last_start = 0;`
		400	`n_wc = 0;`
		401	`p = str;`
		402	`while ((max_len < 0 \|\| p < str + max_len) && *p)`
		403	`{`
		404	`gunichar wc = g_utf8_get_char (p);`
		405	`const gchar *decomp;`
		406	`int cc;`
		407	`gsize old_n_wc = n_wc;`
		408
		409	`if (wc >= SBase && wc < SBase + SCount)`
		410	`{`
		411	`gsize result_len;`
		412	`decompose_hangul (wc, wc_buffer + n_wc, &result_len);`
		413	`n_wc += result_len;`
		414	`}`
		415	`else`
		416	`{`
		417	`decomp = find_decomposition (wc, do_compat);`
		418
		419	`if (decomp)`
		420	`{`
		421	`const char *pd;`
		422	`for (pd = decomp; *pd != '\0'; pd = g_utf8_next_char (pd))`
		423	`wc_buffer[n_wc++] = g_utf8_get_char (pd);`
		424	`}`
		425	`else`
		426	`wc_buffer[n_wc++] = wc;`
		427	`}`
		428
		429	`if (n_wc > 0)`
		430	`{`
		431	`cc = COMBINING_CLASS (wc_buffer[old_n_wc]);`
		432
		433	`if (cc == 0)`
		434	`{`
		435	`g_unicode_canonical_ordering (wc_buffer + last_start, n_wc - last_start);`
		436	`last_start = old_n_wc;`
		437	`}`
		438	`}`
		439
		440	`p = g_utf8_next_char (p);`
		441	`}`
		442
		443	`if (n_wc > 0)`
		444	`{`
		445	`g_unicode_canonical_ordering (wc_buffer + last_start, n_wc - last_start);`
		446	`last_start = n_wc;`
		447	`}`
		448
		449	`wc_buffer[n_wc] = 0;`
		450
		451	`/* All decomposed and reordered */`
		452
		453	`if (do_compose && n_wc > 0)`
		454	`{`
		455	`gsize i, j;`
		456	`int last_cc = 0;`
		457	`last_start = 0;`
		458
		459	`for (i = 0; i < n_wc; i++)`
		460	`{`
		461	`int cc = COMBINING_CLASS (wc_buffer[i]);`
		462
		463	`if (i > 0 &&`
		464	`(last_cc == 0 \|\| last_cc < cc) &&`
		465	`combine (wc_buffer[last_start], wc_buffer[i],`
		466	`&wc_buffer[last_start]))`
		467	`{`
		468	`for (j = i + 1; j < n_wc; j++)`
		469	`wc_buffer[j-1] = wc_buffer[j];`
		470	`n_wc--;`
		471	`i--;`
		472
		473	`if (i == last_start)`
		474	`last_cc = 0;`
		475	`else`
		476	`last_cc = COMBINING_CLASS (wc_buffer[i-1]);`
		477
		478	`continue;`
		479	`}`
		480
		481	`if (cc == 0)`
		482	`last_start = i;`
		483
		484	`last_cc = cc;`
		485	`}`
		486	`}`
		487
		488	`wc_buffer[n_wc] = 0;`
		489
		490	`return wc_buffer;`
		491	`}`
		492
		493	`/**`
		494	`* g_utf8_normalize:`
		495	`* @str: a UTF-8 encoded string.`
		496	`* @len: length of @str, in bytes, or -1 if @str is nul-terminated.`
		497	`* @mode: the type of normalization to perform.`
		498	`*`
		499	`* Converts a string into canonical form, standardizing`
		500	`* such issues as whether a character with an accent`
		501	`* is represented as a base character and combining`
		502	`* accent or as a single precomposed character. The`
		503	`* string has to be valid UTF-8, otherwise %NULL is`
		504	`* returned. You should generally call g_utf8_normalize()`
		505	`* before comparing two Unicode strings.`
		506	`*`
		507	`* The normalization mode %G_NORMALIZE_DEFAULT only`
		508	`* standardizes differences that do not affect the`
		509	`* text content, such as the above-mentioned accent`
		510	`* representation. %G_NORMALIZE_ALL also standardizes`
		511	`* the "compatibility" characters in Unicode, such`
		512	`* as SUPERSCRIPT THREE to the standard forms`
		513	`* (in this case DIGIT THREE). Formatting information`
		514	`* may be lost but for most text operations such`
		515	`* characters should be considered the same.`
		516	`*`
		517	`* %G_NORMALIZE_DEFAULT_COMPOSE and %G_NORMALIZE_ALL_COMPOSE`
		518	`* are like %G_NORMALIZE_DEFAULT and %G_NORMALIZE_ALL,`
		519	`* but returned a result with composed forms rather`
		520	`* than a maximally decomposed form. This is often`
		521	`* useful if you intend to convert the string to`
		522	`* a legacy encoding or pass it to a system with`
		523	`* less capable Unicode handling.`
		524	`*`
		525	`* Returns: a newly allocated string, that is the`
		526	`* normalized form of @str, or %NULL if @str is not`
		527	`* valid UTF-8.`
		528	`**/`
		529	`gchar *`
		530	`g_utf8_normalize (const gchar *str,`
		531	`gssize len,`
		532	`GNormalizeMode mode)`
		533	`{`
		534	`gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode);`
		535	`gchar *result;`
		536
		537	`result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL, NULL);`
		538	`g_free (result_wc);`
		539
		540	`return result;`
		541	`}`
		542
		543	`static gboolean`
		544	`decompose_hangul_step (gunichar ch,`
		545	`gunichar *a,`
		546	`gunichar *b)`
		547	`{`
		548	`gint SIndex, TIndex;`
		549
		550	`if (ch < SBase \|\| ch >= SBase + SCount)`
		551	`return FALSE; /* not a hangul syllable */`
		552
		553	`SIndex = ch - SBase;`
		554	`TIndex = SIndex % TCount;`
		555
		556	`if (TIndex)`
		557	`{`
		558	`/* split LVT -> LV,T */`
		559	`*a = ch - TIndex;`
		560	`*b = TBase + TIndex;`
		561	`}`
		562	`else`
		563	`{`
		564	`/* split LV -> L,V */`
		565	`*a = LBase + SIndex / NCount;`
		566	`*b = VBase + (SIndex % NCount) / TCount;`
		567	`}`
		568
		569	`return TRUE;`
		570	`}`
		571
		572	`/**`
		573	`* g_unichar_decompose:`
		574	`* @ch: a Unicode character`
		575	`* @a: return location for the first component of @ch`
		576	`* @b: return location for the second component of @ch`
		577	`*`
		578	`* Performs a single decomposition step of the`
		579	`* Unicode canonical decomposition algorithm.`
		580	`*`
		581	`* This function does not include compatibility`
		582	`* decompositions. It does, however, include algorithmic`
		583	`* Hangul Jamo decomposition, as well as 'singleton'`
		584	`* decompositions which replace a character by a single`
		585	`* other character. In the case of singletons *@b will`
		586	`* be set to zero.`
		587	`*`
		588	`* If @ch is not decomposable, @a is set to @ch and @b`
		589	`* is set to zero.`
		590	`*`
		591	`* Note that the way Unicode decomposition pairs are`
		592	`* defined, it is guaranteed that @b would not decompose`
		593	`* further, but @a may itself decompose. To get the full`
		594	`* canonical decomposition for @ch, one would need to`
		595	`* recursively call this function on @a. Or use`
		596	`* g_unichar_fully_decompose().`
		597	`*`
		598	`* See`
		599	`* [UAX#15](http://unicode.org/reports/tr15/)`
		600	`* for details.`
		601	`*`
		602	`* Returns: %TRUE if the character could be decomposed`
		603	`*`
		604	`* Since: 2.30`
		605	`*/`
		606	`gboolean`
		607	`g_unichar_decompose (gunichar ch,`
		608	`gunichar *a,`
		609	`gunichar *b)`
		610	`{`
		611	`gint start = 0;`
		612	`gint end = G_N_ELEMENTS (decomp_step_table);`
		613
		614	`if (decompose_hangul_step (ch, a, b))`
		615	`return TRUE;`
		616
		617	`/* TODO use bsearch() */`
		618	`if (ch >= decomp_step_table[start].ch &&`
		619	`ch <= decomp_step_table[end - 1].ch)`
		620	`{`
		621	`while (TRUE)`
		622	`{`
		623	`gint half = (start + end) / 2;`
		624	`const decomposition_step *p = &(decomp_step_table[half]);`
		625	`if (ch == p->ch)`
		626	`{`
		627	`*a = p->a;`
		628	`*b = p->b;`
		629	`return TRUE;`
		630	`}`
		631	`else if (half == start)`
		632	`break;`
		633	`else if (ch > p->ch)`
		634	`start = half;`
		635	`else`
		636	`end = half;`
		637	`}`
		638	`}`
		639
		640	`*a = ch;`
		641	`*b = 0;`
		642
		643	`return FALSE;`
		644	`}`
		645
		646	`/**`
		647	`* g_unichar_compose:`
		648	`* @a: a Unicode character`
		649	`* @b: a Unicode character`
		650	`* @ch: return location for the composed character`
		651	`*`
		652	`* Performs a single composition step of the`
		653	`* Unicode canonical composition algorithm.`
		654	`*`
		655	`* This function includes algorithmic Hangul Jamo composition,`
		656	`* but it is not exactly the inverse of g_unichar_decompose().`
		657	`* No composition can have either of @a or @b equal to zero.`
		658	`* To be precise, this function composes if and only if`
		659	`* there exists a Primary Composite P which is canonically`
		660	`* equivalent to the sequence <@a,@b>. See the Unicode`
		661	`* Standard for the definition of Primary Composite.`
		662	`*`
		663	`* If @a and @b do not compose a new character, @ch is set to zero.`
		664	`*`
		665	`* See`
		666	`* [UAX#15](http://unicode.org/reports/tr15/)`
		667	`* for details.`
		668	`*`
		669	`* Returns: %TRUE if the characters could be composed`
		670	`*`
		671	`* Since: 2.30`
		672	`*/`
		673	`gboolean`
		674	`g_unichar_compose (gunichar a,`
		675	`gunichar b,`
		676	`gunichar *ch)`
		677	`{`
		678	`if (combine (a, b, ch))`
		679	`return TRUE;`
		680
		681	`*ch = 0;`
		682	`return FALSE;`
		683	`}`
		684
		685	`/**`
		686	`* g_unichar_fully_decompose:`
		687	`* @ch: a Unicode character.`
		688	`* @compat: whether perform canonical or compatibility decomposition`
		689	`* @result: (allow-none): location to store decomposed result, or %NULL`
		690	`* @result_len: length of @result`
		691	`*`
		692	`* Computes the canonical or compatibility decomposition of a`
		693	`* Unicode character. For compatibility decomposition,`
		694	`* pass %TRUE for @compat; for canonical decomposition`
		695	`* pass %FALSE for @compat.`
		696	`*`
		697	`* The decomposed sequence is placed in @result. Only up to`
		698	`* @result_len characters are written into @result. The length`
		699	`* of the full decomposition (irrespective of @result_len) is`
		700	`* returned by the function. For canonical decomposition,`
		701	`* currently all decompositions are of length at most 4, but`
		702	`* this may change in the future (very unlikely though).`
		703	`* At any rate, Unicode does guarantee that a buffer of length`
		704	`* 18 is always enough for both compatibility and canonical`
		705	`* decompositions, so that is the size recommended. This is provided`
		706	`* as %G_UNICHAR_MAX_DECOMPOSITION_LENGTH.`
		707	`*`
		708	`* See`
		709	`* [UAX#15](http://unicode.org/reports/tr15/)`
		710	`* for details.`
		711	`*`
		712	`* Returns: the length of the full decomposition.`
		713	`*`
		714	`* Since: 2.30`
		715	`**/`
		716	`gsize`
		717	`g_unichar_fully_decompose (gunichar ch,`
		718	`gboolean compat,`
		719	`gunichar *result,`
		720	`gsize result_len)`
		721	`{`
		722	`const gchar *decomp;`
		723	`const gchar *p;`
		724
		725	`/* Hangul syllable */`
		726	`if (ch >= SBase && ch < SBase + SCount)`
		727	`{`
		728	`gsize len, i;`
		729	`gunichar buffer[3];`
		730	`decompose_hangul (ch, result ? buffer : NULL, &len);`
		731	`if (result)`
		732	`for (i = 0; i < len && i < result_len; i++)`
		733	`result[i] = buffer[i];`
		734	`return len;`
		735	`}`
		736	`else if ((decomp = find_decomposition (ch, compat)) != NULL)`
		737	`{`
		738	`/* Found it. */`
		739	`gsize len, i;`
		740
		741	`len = g_utf8_strlen (decomp, -1);`
		742
		743	`for (p = decomp, i = 0; i < len && i < result_len; p = g_utf8_next_char (p), i++)`
		744	`result[i] = g_utf8_get_char (p);`
		745
		746	`return len;`
		747	`}`
		748
		749	`/* Does not decompose */`
		750	`if (result && result_len >= 1)`
		751	`*result = ch;`
		752	`return 1;`
		753	`}`

nexmon – Blame information for rev 1