nexmon – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | /* |
2 | * Copyright (C) 1999-2001 Free Software Foundation, Inc. |
||
3 | * This file is part of the GNU LIBICONV Library. |
||
4 | * |
||
5 | * The GNU LIBICONV Library is free software; you can redistribute it |
||
6 | * and/or modify it under the terms of the GNU Library General Public |
||
7 | * License as published by the Free Software Foundation; either version 2 |
||
8 | * of the License, or (at your option) any later version. |
||
9 | * |
||
10 | * The GNU LIBICONV Library is distributed in the hope that it will be |
||
11 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
13 | * Library General Public License for more details. |
||
14 | * |
||
15 | * You should have received a copy of the GNU Library General Public |
||
16 | * License along with the GNU LIBICONV Library; see the file COPYING.LIB. |
||
17 | * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, |
||
18 | * Fifth Floor, Boston, MA 02110-1301, USA. |
||
19 | */ |
||
20 | |||
21 | /* |
||
22 | * JOHAB Hangul |
||
23 | * |
||
24 | * Ken Lunde writes in his "CJKV Information Processing" book, p. 114: |
||
25 | * "Hangul can be composed of two or three jamo (some jamo are considered |
||
26 | * compound). Johab uses 19 initial jamo (consonants), 21 medial jamo (vowels) |
||
27 | * and 27 final jamo (consonants; 28 when you include the "fill" character |
||
28 | * for Hangul containing only two jamo). Multiplying these numbers results in |
||
29 | * 11172." |
||
30 | * |
||
31 | * Structure of the Johab encoding (see p. 181-184): |
||
32 | * bit 15 = 1 |
||
33 | * bit 14..10 = initial jamo, only 19+1 out of 32 possible values are used |
||
34 | * bit 9..5 = medial jamo, only 21+1 out of 32 possible values are used |
||
35 | * bit 4..0 = final jamo, only 27+1 out of 32 possible values are used |
||
36 | * |
||
37 | * Structure of the Unicode encoding: |
||
38 | * grep '^0x\([8-C]...\|D[0-7]..\)' unicode.org-mappings/EASTASIA/KSC/JOHAB.TXT |
||
39 | * You see that all characters there are marked "HANGUL LETTER" or "HANGUL |
||
40 | * SYLLABLE". If you eliminate the "HANGUL LETTER"s, the table is sorted |
||
41 | * in ascending order according to Johab encoding and according to the Unicode |
||
42 | * encoding. Now look a little more carefully, and you see that the following |
||
43 | * formula holds: |
||
44 | * unicode == 0xAC00 |
||
45 | * + 21 * 28 * (jamo_initial_index[(johab >> 10) & 31] - 1) |
||
46 | * + 28 * (jamo_medial_index[(johab >> 5) & 31] - 1) |
||
47 | * + jamo_final_index[johab & 31] |
||
48 | * where the index tables are defined as below. |
||
49 | */ |
||
50 | |||
51 | /* Tables mapping 5-bit groups to jamo letters. */ |
||
52 | /* Note that Jamo XX = UHC 0xA4A0+XX = Unicode 0x3130+XX */ |
||
53 | #define NONE 0xfd |
||
54 | #define FILL 0xff |
||
55 | static const unsigned char jamo_initial[32] = { |
||
56 | NONE, FILL, 0x01, 0x02, 0x04, 0x07, 0x08, 0x09, |
||
57 | 0x11, 0x12, 0x13, 0x15, 0x16, 0x17, 0x18, 0x19, |
||
58 | 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, NONE, NONE, NONE, |
||
59 | NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
||
60 | }; |
||
61 | static const unsigned char jamo_medial[32] = { |
||
62 | NONE, NONE, FILL, 0x1f, 0x20, 0x21, 0x22, 0x23, |
||
63 | NONE, NONE, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, |
||
64 | NONE, NONE, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, |
||
65 | NONE, NONE, 0x30, 0x31, 0x32, 0x33, NONE, NONE, |
||
66 | }; |
||
67 | static const unsigned char jamo_final[32] = { |
||
68 | NONE, FILL, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, |
||
69 | 0x07, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
||
70 | 0x10, 0x11, NONE, 0x12, 0x14, 0x15, 0x16, 0x17, |
||
71 | 0x18, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, NONE, NONE, |
||
72 | }; |
||
73 | /* Same as jamo_final, except that it excludes characters already |
||
74 | contained in jamo_initial. 11 characters instead of 27. */ |
||
75 | static const unsigned char jamo_final_notinitial[32] = { |
||
76 | NONE, NONE, NONE, NONE, 0x03, NONE, 0x05, 0x06, |
||
77 | NONE, NONE, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
||
78 | 0x10, NONE, NONE, NONE, 0x14, NONE, NONE, NONE, |
||
79 | NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, |
||
80 | }; |
||
81 | |||
82 | /* Tables mapping 5-bit groups to packed indices. */ |
||
83 | #define none -1 |
||
84 | #define fill 0 |
||
85 | static const signed char jamo_initial_index[32] = { |
||
86 | none, fill, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, |
||
87 | 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, |
||
88 | 0x0f, 0x10, 0x11, 0x12, 0x13, none, none, none, |
||
89 | none, none, none, none, none, none, none, none, |
||
90 | }; |
||
91 | static const signed char jamo_medial_index[32] = { |
||
92 | none, none, fill, 0x01, 0x02, 0x03, 0x04, 0x05, |
||
93 | none, none, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, |
||
94 | none, none, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, |
||
95 | none, none, 0x12, 0x13, 0x14, 0x15, none, none, |
||
96 | }; |
||
97 | static const signed char jamo_final_index[32] = { |
||
98 | none, fill, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, |
||
99 | 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, |
||
100 | 0x0f, 0x10, none, 0x11, 0x12, 0x13, 0x14, 0x15, |
||
101 | 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, none, none, |
||
102 | }; |
||
103 | |||
104 | static int |
||
105 | johab_hangul_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) |
||
106 | { |
||
107 | unsigned char c1 = s[0]; |
||
108 | if ((c1 >= 0x84 && c1 <= 0xd3)) { |
||
109 | if (n >= 2) { |
||
110 | unsigned char c2 = s[1]; |
||
111 | if ((c2 >= 0x41 && c2 < 0x7f) || (c2 >= 0x81 && c2 < 0xff)) { |
||
112 | unsigned int johab = (c1 << 8) | c2; |
||
113 | unsigned int bitspart1 = (johab >> 10) & 31; |
||
114 | unsigned int bitspart2 = (johab >> 5) & 31; |
||
115 | unsigned int bitspart3 = johab & 31; |
||
116 | int index1 = jamo_initial_index[bitspart1]; |
||
117 | int index2 = jamo_medial_index[bitspart2]; |
||
118 | int index3 = jamo_final_index[bitspart3]; |
||
119 | /* Exclude "none" values. */ |
||
120 | if (index1 >= 0 && index2 >= 0 && index3 >= 0) { |
||
121 | /* Deal with "fill" values in initial or medial position. */ |
||
122 | if (index1 == fill) { |
||
123 | if (index2 == fill) { |
||
124 | unsigned char jamo3 = jamo_final_notinitial[bitspart3]; |
||
125 | if (jamo3 != NONE) { |
||
126 | *pwc = (ucs4_t) 0x3130 + jamo3; |
||
127 | return 2; |
||
128 | } |
||
129 | } else if (index3 == fill) { |
||
130 | unsigned char jamo2 = jamo_medial[bitspart2]; |
||
131 | if (jamo2 != NONE && jamo2 != FILL) { |
||
132 | *pwc = (ucs4_t) 0x3130 + jamo2; |
||
133 | return 2; |
||
134 | } |
||
135 | } |
||
136 | /* Syllables composed only of medial and final don't exist. */ |
||
137 | } else if (index2 == fill) { |
||
138 | if (index3 == fill) { |
||
139 | unsigned char jamo1 = jamo_initial[bitspart1]; |
||
140 | if (jamo1 != NONE && jamo1 != FILL) { |
||
141 | *pwc = (ucs4_t) 0x3130 + jamo1; |
||
142 | return 2; |
||
143 | } |
||
144 | } |
||
145 | /* Syllables composed only of initial and final don't exist. */ |
||
146 | } else { |
||
147 | /* index1 and index2 are not fill, but index3 may be fill. */ |
||
148 | /* Nothing more to exclude. All 11172 code points are valid. */ |
||
149 | *pwc = 0xac00 + ((index1 - 1) * 21 + (index2 - 1)) * 28 + index3; |
||
150 | return 2; |
||
151 | } |
||
152 | } |
||
153 | } |
||
154 | return RET_ILSEQ; |
||
155 | } |
||
156 | return RET_TOOFEW(0); |
||
157 | } |
||
158 | return RET_ILSEQ; |
||
159 | } |
||
160 | |||
161 | /* 51 Jamo: 19 initial, 21 medial, 11 final not initial. */ |
||
162 | static const unsigned short johab_hangul_page31[51] = { |
||
163 | 0x8841, 0x8c41, 0x8444, 0x9041, 0x8446, 0x8447, 0x9441, /*0x30-0x37*/ |
||
164 | 0x9841, 0x9c41, 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f, /*0x38-0x3f*/ |
||
165 | 0x8450, 0xa041, 0xa441, 0xa841, 0x8454, 0xac41, 0xb041, 0xb441, /*0x40-0x47*/ |
||
166 | 0xb841, 0xbc41, 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041, 0x8461, /*0x48-0x4f*/ |
||
167 | 0x8481, 0x84a1, 0x84c1, 0x84e1, 0x8541, 0x8561, 0x8581, 0x85a1, /*0x50-0x57*/ |
||
168 | 0x85c1, 0x85e1, 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1, /*0x58-0x5f*/ |
||
169 | 0x8741, 0x8761, 0x8781, 0x87a1, /*0x60-0x67*/ |
||
170 | }; |
||
171 | |||
172 | /* Tables mapping packed indices to 5-bit groups. */ |
||
173 | /* index1+1 = jamo_initial_index[bitspart1] <==> |
||
174 | bitspart1 = jamo_initial_index_inverse[index1] */ |
||
175 | static const char jamo_initial_index_inverse[19] = { |
||
176 | 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
||
177 | 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
||
178 | 0x10, 0x11, 0x12, 0x13, 0x14, |
||
179 | }; |
||
180 | /* index2+1 = jamo_medial_index[bitspart2] <==> |
||
181 | bitspart2 = jamo_medial_index_inverse[index2] */ |
||
182 | static const char jamo_medial_index_inverse[21] = { |
||
183 | 0x03, 0x04, 0x05, 0x06, 0x07, |
||
184 | 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
||
185 | 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, |
||
186 | 0x1a, 0x1b, 0x1c, 0x1d, |
||
187 | }; |
||
188 | /* index3 = jamo_final_index[bitspart3] <==> |
||
189 | bitspart3 = jamo_final_index_inverse[index3] */ |
||
190 | static const char jamo_final_index_inverse[28] = { |
||
191 | 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
||
192 | 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
||
193 | 0x10, 0x11, 0x13, 0x14, 0x15, 0x16, 0x17, |
||
194 | 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, |
||
195 | }; |
||
196 | |||
197 | static int |
||
198 | johab_hangul_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) |
||
199 | { |
||
200 | if (n >= 2) { |
||
201 | if (wc >= 0x3131 && wc < 0x3164) { |
||
202 | unsigned short c = johab_hangul_page31[wc-0x3131]; |
||
203 | r[0] = (c >> 8); r[1] = (c & 0xff); |
||
204 | return 2; |
||
205 | } else if (wc >= 0xac00 && wc < 0xd7a4) { |
||
206 | unsigned int index1; |
||
207 | unsigned int index2; |
||
208 | unsigned int index3; |
||
209 | unsigned short c; |
||
210 | unsigned int tmp = wc - 0xac00; |
||
211 | index3 = tmp % 28; tmp = tmp / 28; |
||
212 | index2 = tmp % 21; tmp = tmp / 21; |
||
213 | index1 = tmp; |
||
214 | c = (((((1 << 5) |
||
215 | | jamo_initial_index_inverse[index1]) << 5) |
||
216 | | jamo_medial_index_inverse[index2]) << 5) |
||
217 | | jamo_final_index_inverse[index3]; |
||
218 | r[0] = (c >> 8); r[1] = (c & 0xff); |
||
219 | return 2; |
||
220 | } |
||
221 | return RET_ILUNI; |
||
222 | } |
||
223 | return RET_TOOSMALL; |
||
224 | } |
||
225 | |||
226 | /* |
||
227 | * Decomposition of JOHAB Hangul in one to three Johab Jamo elements. |
||
228 | */ |
||
229 | |||
230 | /* Decompose wc into r[0..2], and return the number of resulting Jamo elements. |
||
231 | Return RET_ILUNI if decomposition is not possible. */ |
||
232 | |||
233 | static int johab_hangul_decompose (conv_t conv, ucs4_t* r, ucs4_t wc) |
||
234 | { |
||
235 | unsigned char buf[2]; |
||
236 | int ret = johab_hangul_wctomb(conv,buf,wc,2); |
||
237 | if (ret != RET_ILUNI) { |
||
238 | unsigned int hangul = (buf[0] << 8) | buf[1]; |
||
239 | unsigned char jamo1 = jamo_initial[(hangul >> 10) & 31]; |
||
240 | unsigned char jamo2 = jamo_medial[(hangul >> 5) & 31]; |
||
241 | unsigned char jamo3 = jamo_final[hangul & 31]; |
||
242 | if ((hangul >> 15) != 1) abort(); |
||
243 | if (jamo1 != NONE && jamo2 != NONE && jamo3 != NONE) { |
||
244 | /* They are not all three == FILL because that would correspond to |
||
245 | johab = 0x8441, which doesn't exist. */ |
||
246 | ucs4_t* p = r; |
||
247 | if (jamo1 != FILL) |
||
248 | *p++ = 0x3130 + jamo1; |
||
249 | if (jamo2 != FILL) |
||
250 | *p++ = 0x3130 + jamo2; |
||
251 | if (jamo3 != FILL) |
||
252 | *p++ = 0x3130 + jamo3; |
||
253 | return p-r; |
||
254 | } |
||
255 | } |
||
256 | return RET_ILUNI; |
||
257 | } |
||
258 | |||
259 | #undef fill |
||
260 | #undef none |
||
261 | #undef FILL |
||
262 | #undef NONE |