nexmon – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | /* |
2 | * Copyright (C) 1999-2001, 2004 Free Software Foundation, Inc. |
||
3 | * This file is part of the GNU LIBICONV Library. |
||
4 | * |
||
5 | * The GNU LIBICONV Library is free software; you can redistribute it |
||
6 | * and/or modify it under the terms of the GNU Library General Public |
||
7 | * License as published by the Free Software Foundation; either version 2 |
||
8 | * of the License, or (at your option) any later version. |
||
9 | * |
||
10 | * The GNU LIBICONV Library is distributed in the hope that it will be |
||
11 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
13 | * Library General Public License for more details. |
||
14 | * |
||
15 | * You should have received a copy of the GNU Library General Public |
||
16 | * License along with the GNU LIBICONV Library; see the file COPYING.LIB. |
||
17 | * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, |
||
18 | * Fifth Floor, Boston, MA 02110-1301, USA. |
||
19 | */ |
||
20 | |||
21 | /* |
||
22 | * CP1258 |
||
23 | */ |
||
24 | |||
25 | #include "flushwc.h" |
||
26 | #include "vietcomb.h" |
||
27 | |||
28 | static const unsigned char cp1258_comb_table[] = { |
||
29 | 0xcc, 0xec, 0xde, 0xd2, 0xf2, |
||
30 | }; |
||
31 | |||
32 | /* The possible bases in viet_comp_table_data: |
||
33 | 0x0041..0x0045, 0x0047..0x0049, 0x004B..0x0050, 0x0052..0x0057, |
||
34 | 0x0059..0x005A, 0x0061..0x0065, 0x0067..0x0069, 0x006B..0x0070, |
||
35 | 0x0072..0x0077, 0x0079..0x007A, 0x00A5, 0x00A8, 0x00C2, 0x00C5..0x00C7, |
||
36 | 0x00CA, 0x00CF, 0x00D3..0x00D4, 0x00D6, 0x00D8, 0x00DA, 0x00DC, 0x00E2, |
||
37 | 0x00E5..0x00E7, 0x00EA, 0x00EF, 0x00F3..0x00F4, 0x00F6, 0x00F8, 0x00FA, |
||
38 | 0x00FC, 0x0102..0x0103, 0x01A0..0x01A1, 0x01AF..0x01B0. */ |
||
39 | static const unsigned int cp1258_comp_bases[] = { |
||
40 | 0x06fdfbbe, 0x06fdfbbe, 0x00000000, 0x00000120, 0x155884e4, 0x155884e4, |
||
41 | 0x0000000c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00018003 |
||
42 | }; |
||
43 | |||
44 | static const unsigned short cp1258_2uni[128] = { |
||
45 | /* 0x80 */ |
||
46 | 0x20ac, 0xfffd, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021, |
||
47 | 0x02c6, 0x2030, 0xfffd, 0x2039, 0x0152, 0xfffd, 0xfffd, 0xfffd, |
||
48 | /* 0x90 */ |
||
49 | 0xfffd, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, |
||
50 | 0x02dc, 0x2122, 0xfffd, 0x203a, 0x0153, 0xfffd, 0xfffd, 0x0178, |
||
51 | /* 0xa0 */ |
||
52 | 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, |
||
53 | 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, |
||
54 | /* 0xb0 */ |
||
55 | 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, |
||
56 | 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, |
||
57 | /* 0xc0 */ |
||
58 | 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x00c5, 0x00c6, 0x00c7, |
||
59 | 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x0300, 0x00cd, 0x00ce, 0x00cf, |
||
60 | /* 0xd0 */ |
||
61 | 0x0110, 0x00d1, 0x0309, 0x00d3, 0x00d4, 0x01a0, 0x00d6, 0x00d7, |
||
62 | 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x01af, 0x0303, 0x00df, |
||
63 | /* 0xe0 */ |
||
64 | 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x00e5, 0x00e6, 0x00e7, |
||
65 | 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x0301, 0x00ed, 0x00ee, 0x00ef, |
||
66 | /* 0xf0 */ |
||
67 | 0x0111, 0x00f1, 0x0323, 0x00f3, 0x00f4, 0x01a1, 0x00f6, 0x00f7, |
||
68 | 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x01b0, 0x20ab, 0x00ff, |
||
69 | }; |
||
70 | |||
71 | /* In the CP1258 to Unicode direction, the state contains a buffered |
||
72 | character, or 0 if none. */ |
||
73 | |||
74 | static int |
||
75 | cp1258_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) |
||
76 | { |
||
77 | unsigned char c = *s; |
||
78 | unsigned short wc; |
||
79 | unsigned short last_wc; |
||
80 | if (c < 0x80) { |
||
81 | wc = c; |
||
82 | } else { |
||
83 | wc = cp1258_2uni[c-0x80]; |
||
84 | if (wc == 0xfffd) |
||
85 | return RET_ILSEQ; |
||
86 | } |
||
87 | last_wc = conv->istate; |
||
88 | if (last_wc) { |
||
89 | if (wc >= 0x0300 && wc < 0x0340) { |
||
90 | /* See whether last_wc and wc can be combined. */ |
||
91 | unsigned int k; |
||
92 | unsigned int i1, i2; |
||
93 | switch (wc) { |
||
94 | case 0x0300: k = 0; break; |
||
95 | case 0x0301: k = 1; break; |
||
96 | case 0x0303: k = 2; break; |
||
97 | case 0x0309: k = 3; break; |
||
98 | case 0x0323: k = 4; break; |
||
99 | default: abort(); |
||
100 | } |
||
101 | i1 = viet_comp_table[k].idx; |
||
102 | i2 = i1 + viet_comp_table[k].len-1; |
||
103 | if (last_wc >= viet_comp_table_data[i1].base |
||
104 | && last_wc <= viet_comp_table_data[i2].base) { |
||
105 | unsigned int i; |
||
106 | for (;;) { |
||
107 | i = (i1+i2)>>1; |
||
108 | if (last_wc == viet_comp_table_data[i].base) |
||
109 | break; |
||
110 | if (last_wc < viet_comp_table_data[i].base) { |
||
111 | if (i1 == i) |
||
112 | goto not_combining; |
||
113 | i2 = i; |
||
114 | } else { |
||
115 | if (i1 != i) |
||
116 | i1 = i; |
||
117 | else { |
||
118 | i = i2; |
||
119 | if (last_wc == viet_comp_table_data[i].base) |
||
120 | break; |
||
121 | goto not_combining; |
||
122 | } |
||
123 | } |
||
124 | } |
||
125 | last_wc = viet_comp_table_data[i].composed; |
||
126 | /* Output the combined character. */ |
||
127 | conv->istate = 0; |
||
128 | *pwc = (ucs4_t) last_wc; |
||
129 | return 1; |
||
130 | } |
||
131 | } |
||
132 | not_combining: |
||
133 | /* Output the buffered character. */ |
||
134 | conv->istate = 0; |
||
135 | *pwc = (ucs4_t) last_wc; |
||
136 | return 0; /* Don't advance the input pointer. */ |
||
137 | } |
||
138 | if (wc >= 0x0041 && wc <= 0x01b0 |
||
139 | && ((cp1258_comp_bases[(wc - 0x0040) >> 5] >> (wc & 0x1f)) & 1)) { |
||
140 | /* wc is a possible match in viet_comp_table_data. Buffer it. */ |
||
141 | conv->istate = wc; |
||
142 | return RET_TOOFEW(1); |
||
143 | } else { |
||
144 | /* Output wc immediately. */ |
||
145 | *pwc = (ucs4_t) wc; |
||
146 | return 1; |
||
147 | } |
||
148 | } |
||
149 | |||
150 | #define cp1258_flushwc normal_flushwc |
||
151 | |||
152 | static const unsigned char cp1258_page00[88] = { |
||
153 | 0xc0, 0xc1, 0xc2, 0x00, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */ |
||
154 | 0xc8, 0xc9, 0xca, 0xcb, 0x00, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */ |
||
155 | 0x00, 0xd1, 0x00, 0xd3, 0xd4, 0x00, 0xd6, 0xd7, /* 0xd0-0xd7 */ |
||
156 | 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x00, 0x00, 0xdf, /* 0xd8-0xdf */ |
||
157 | 0xe0, 0xe1, 0xe2, 0x00, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */ |
||
158 | 0xe8, 0xe9, 0xea, 0xeb, 0x00, 0xed, 0xee, 0xef, /* 0xe8-0xef */ |
||
159 | 0x00, 0xf1, 0x00, 0xf3, 0xf4, 0x00, 0xf6, 0xf7, /* 0xf0-0xf7 */ |
||
160 | 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x00, 0x00, 0xff, /* 0xf8-0xff */ |
||
161 | /* 0x0100 */ |
||
162 | 0x00, 0x00, 0xc3, 0xe3, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ |
||
163 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ |
||
164 | 0xd0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ |
||
165 | }; |
||
166 | static const unsigned char cp1258_page01[104] = { |
||
167 | 0x00, 0x00, 0x8c, 0x9c, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ |
||
168 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ |
||
169 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ |
||
170 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ |
||
171 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ |
||
172 | 0x9f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ |
||
173 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ |
||
174 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ |
||
175 | 0x00, 0x00, 0x83, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ |
||
176 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ |
||
177 | 0xd5, 0xf5, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ |
||
178 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdd, /* 0xa8-0xaf */ |
||
179 | 0xfd, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ |
||
180 | }; |
||
181 | static const unsigned char cp1258_page02[32] = { |
||
182 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x88, 0x00, /* 0xc0-0xc7 */ |
||
183 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */ |
||
184 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */ |
||
185 | 0x00, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, /* 0xd8-0xdf */ |
||
186 | }; |
||
187 | static const unsigned char cp1258_page03[40] = { |
||
188 | 0xcc, 0xec, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ |
||
189 | 0x00, 0xd2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ |
||
190 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ |
||
191 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ |
||
192 | 0x00, 0x00, 0x00, 0xf2, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ |
||
193 | }; |
||
194 | static const unsigned char cp1258_page20[48] = { |
||
195 | 0x00, 0x00, 0x00, 0x96, 0x97, 0x00, 0x00, 0x00, /* 0x10-0x17 */ |
||
196 | 0x91, 0x92, 0x82, 0x00, 0x93, 0x94, 0x84, 0x00, /* 0x18-0x1f */ |
||
197 | 0x86, 0x87, 0x95, 0x00, 0x00, 0x00, 0x85, 0x00, /* 0x20-0x27 */ |
||
198 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ |
||
199 | 0x89, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ |
||
200 | 0x00, 0x8b, 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ |
||
201 | }; |
||
202 | |||
203 | static int |
||
204 | cp1258_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) |
||
205 | { |
||
206 | unsigned char c = 0; |
||
207 | if (wc < 0x0080) { |
||
208 | *r = wc; |
||
209 | return 1; |
||
210 | } |
||
211 | else if (wc >= 0x00a0 && wc < 0x00c0) |
||
212 | c = wc; |
||
213 | else if (wc >= 0x00c0 && wc < 0x0118) |
||
214 | c = cp1258_page00[wc-0x00c0]; |
||
215 | else if (wc >= 0x0150 && wc < 0x01b8) |
||
216 | c = cp1258_page01[wc-0x0150]; |
||
217 | else if (wc >= 0x02c0 && wc < 0x02e0) |
||
218 | c = cp1258_page02[wc-0x02c0]; |
||
219 | else if (wc >= 0x0300 && wc < 0x0328) |
||
220 | c = cp1258_page03[wc-0x0300]; |
||
221 | else if (wc >= 0x0340 && wc < 0x0342) /* deprecated Vietnamese tone marks */ |
||
222 | c = cp1258_page03[wc-0x0340]; |
||
223 | else if (wc >= 0x2010 && wc < 0x2040) |
||
224 | c = cp1258_page20[wc-0x2010]; |
||
225 | else if (wc == 0x20ab) |
||
226 | c = 0xfe; |
||
227 | else if (wc == 0x20ac) |
||
228 | c = 0x80; |
||
229 | else if (wc == 0x2122) |
||
230 | c = 0x99; |
||
231 | if (c != 0) { |
||
232 | *r = c; |
||
233 | return 1; |
||
234 | } |
||
235 | /* Try canonical decomposition. */ |
||
236 | { |
||
237 | /* Binary search through viet_decomp_table. */ |
||
238 | unsigned int i1 = 0; |
||
239 | unsigned int i2 = sizeof(viet_decomp_table)/sizeof(viet_decomp_table[0])-1; |
||
240 | if (wc >= viet_decomp_table[i1].composed |
||
241 | && wc <= viet_decomp_table[i2].composed) { |
||
242 | unsigned int i; |
||
243 | for (;;) { |
||
244 | /* Here i2 - i1 > 0. */ |
||
245 | i = (i1+i2)>>1; |
||
246 | if (wc == viet_decomp_table[i].composed) |
||
247 | break; |
||
248 | if (wc < viet_decomp_table[i].composed) { |
||
249 | if (i1 == i) |
||
250 | return RET_ILUNI; |
||
251 | /* Here i1 < i < i2. */ |
||
252 | i2 = i; |
||
253 | } else { |
||
254 | /* Here i1 <= i < i2. */ |
||
255 | if (i1 != i) |
||
256 | i1 = i; |
||
257 | else { |
||
258 | /* Here i2 - i1 = 1. */ |
||
259 | i = i2; |
||
260 | if (wc == viet_decomp_table[i].composed) |
||
261 | break; |
||
262 | else |
||
263 | return RET_ILUNI; |
||
264 | } |
||
265 | } |
||
266 | } |
||
267 | /* Found a canonical decomposition. */ |
||
268 | wc = viet_decomp_table[i].base; |
||
269 | /* wc is one of 0x0020, 0x0041..0x005a, 0x0061..0x007a, 0x00a5, 0x00a8, |
||
270 | 0x00c2, 0x00c5..0x00c7, 0x00ca, 0x00cf, 0x00d3, 0x00d4, 0x00d6, |
||
271 | 0x00d8, 0x00da, 0x00dc, 0x00e2, 0x00e5..0x00e7, 0x00ea, 0x00ef, |
||
272 | 0x00f3, 0x00f4, 0x00f6, 0x00f8, 0x00fc, 0x0102, 0x0103, 0x01a0, |
||
273 | 0x01a1, 0x01af, 0x01b0. */ |
||
274 | if (wc < 0x0100) |
||
275 | c = wc; |
||
276 | else if (wc < 0x0118) |
||
277 | c = cp1258_page00[wc-0x00c0]; |
||
278 | else |
||
279 | c = cp1258_page01[wc-0x0150]; |
||
280 | if (n < 2) |
||
281 | return RET_TOOSMALL; |
||
282 | r[0] = c; |
||
283 | r[1] = cp1258_comb_table[viet_decomp_table[i].comb1]; |
||
284 | return 2; |
||
285 | } |
||
286 | } |
||
287 | return RET_ILUNI; |
||
288 | } |