nexmon – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | /* |
2 | * Copyright (C) 1999-2002, 2006 Free Software Foundation, Inc. |
||
3 | * This file is part of the GNU LIBICONV Library. |
||
4 | * |
||
5 | * The GNU LIBICONV Library is free software; you can redistribute it |
||
6 | * and/or modify it under the terms of the GNU Library General Public |
||
7 | * License as published by the Free Software Foundation; either version 2 |
||
8 | * of the License, or (at your option) any later version. |
||
9 | * |
||
10 | * The GNU LIBICONV Library is distributed in the hope that it will be |
||
11 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
13 | * Library General Public License for more details. |
||
14 | * |
||
15 | * You should have received a copy of the GNU Library General Public |
||
16 | * License along with the GNU LIBICONV Library; see the file COPYING.LIB. |
||
17 | * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, |
||
18 | * Fifth Floor, Boston, MA 02110-1301, USA. |
||
19 | */ |
||
20 | |||
21 | /* |
||
22 | * BIG5-HKSCS:1999 |
||
23 | */ |
||
24 | |||
25 | /* |
||
26 | * BIG5-HKSCS:1999 can be downloaded from |
||
27 | * http://www.info.gov.hk/digital21/eng/hkscs/download.html |
||
28 | * http://www.info.gov.hk/digital21/eng/hkscs/index.html |
||
29 | * |
||
30 | * It extends BIG5 (without the rows 0xC6..0xC7) through the ranges |
||
31 | * |
||
32 | * 0x{88..8D}{40..7E,A1..FE} 641 characters |
||
33 | * 0x{8E..A0}{40..7E,A1..FE} 2898 characters |
||
34 | * 0x{C6..C8}{40..7E,A1..FE} 359 characters |
||
35 | * 0xF9{D6..FE} 41 characters |
||
36 | * 0x{FA..FE}{40..7E,A1..FE} 763 characters |
||
37 | * |
||
38 | * Note that some HKSCS characters are not contained in Unicode 3.2 |
||
39 | * and are therefore best represented as sequences of Unicode characters: |
||
40 | * 0x8862 U+00CA U+0304 LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND MACRON |
||
41 | * 0x8864 U+00CA U+030C LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND CARON |
||
42 | * 0x88A3 U+00EA U+0304 LATIN SMALL LETTER E WITH CIRCUMFLEX AND MACRON |
||
43 | * 0x88A5 U+00EA U+030C LATIN SMALL LETTER E WITH CIRCUMFLEX AND CARON |
||
44 | */ |
||
45 | |||
46 | #include "hkscs1999.h" |
||
47 | #include "flushwc.h" |
||
48 | |||
49 | static int |
||
50 | big5hkscs1999_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) |
||
51 | { |
||
52 | ucs4_t last_wc = conv->istate; |
||
53 | if (last_wc) { |
||
54 | /* Output the buffered character. */ |
||
55 | conv->istate = 0; |
||
56 | *pwc = last_wc; |
||
57 | return 0; /* Don't advance the input pointer. */ |
||
58 | } else { |
||
59 | unsigned char c = *s; |
||
60 | /* Code set 0 (ASCII) */ |
||
61 | if (c < 0x80) |
||
62 | return ascii_mbtowc(conv,pwc,s,n); |
||
63 | /* Code set 1 (BIG5 extended) */ |
||
64 | if (c >= 0xa1 && c < 0xff) { |
||
65 | if (n < 2) |
||
66 | return RET_TOOFEW(0); |
||
67 | { |
||
68 | unsigned char c2 = s[1]; |
||
69 | if ((c2 >= 0x40 && c2 < 0x7f) || (c2 >= 0xa1 && c2 < 0xff)) { |
||
70 | if (!((c == 0xc6 && c2 >= 0xa1) || c == 0xc7)) { |
||
71 | int ret = big5_mbtowc(conv,pwc,s,2); |
||
72 | if (ret != RET_ILSEQ) |
||
73 | return ret; |
||
74 | } |
||
75 | } |
||
76 | } |
||
77 | } |
||
78 | { |
||
79 | int ret = hkscs1999_mbtowc(conv,pwc,s,n); |
||
80 | if (ret != RET_ILSEQ) |
||
81 | return ret; |
||
82 | } |
||
83 | if (c == 0x88) { |
||
84 | if (n < 2) |
||
85 | return RET_TOOFEW(0); |
||
86 | { |
||
87 | unsigned char c2 = s[1]; |
||
88 | if (c2 == 0x62 || c2 == 0x64 || c2 == 0xa3 || c2 == 0xa5) { |
||
89 | /* It's a composed character. */ |
||
90 | ucs4_t wc1 = ((c2 >> 3) << 2) + 0x009a; /* = 0x00ca or 0x00ea */ |
||
91 | ucs4_t wc2 = ((c2 & 6) << 2) + 0x02fc; /* = 0x0304 or 0x030c */ |
||
92 | /* We cannot output two Unicode characters at once. So, |
||
93 | output the first character and buffer the second one. */ |
||
94 | *pwc = wc1; |
||
95 | conv->istate = wc2; |
||
96 | return 2; |
||
97 | } |
||
98 | } |
||
99 | } |
||
100 | return RET_ILSEQ; |
||
101 | } |
||
102 | } |
||
103 | |||
104 | #define big5hkscs1999_flushwc normal_flushwc |
||
105 | |||
106 | static int |
||
107 | big5hkscs1999_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) |
||
108 | { |
||
109 | int count = 0; |
||
110 | unsigned char last = conv->ostate; |
||
111 | |||
112 | if (last) { |
||
113 | /* last is = 0x66 or = 0xa7. */ |
||
114 | if (wc == 0x0304 || wc == 0x030c) { |
||
115 | /* Output the combined character. */ |
||
116 | if (n >= 2) { |
||
117 | r[0] = 0x88; |
||
118 | r[1] = last + ((wc & 24) >> 2) - 4; /* = 0x62 or 0x64 or 0xa3 or 0xa5 */ |
||
119 | conv->ostate = 0; |
||
120 | return 2; |
||
121 | } else |
||
122 | return RET_TOOSMALL; |
||
123 | } |
||
124 | |||
125 | /* Output the buffered character. */ |
||
126 | if (n < 2) |
||
127 | return RET_TOOSMALL; |
||
128 | r[0] = 0x88; |
||
129 | r[1] = last; |
||
130 | r += 2; |
||
131 | count = 2; |
||
132 | } |
||
133 | |||
134 | /* Code set 0 (ASCII) */ |
||
135 | if (wc < 0x0080) { |
||
136 | /* Plain ASCII character. */ |
||
137 | if (n > count) { |
||
138 | r[0] = (unsigned char) wc; |
||
139 | conv->ostate = 0; |
||
140 | return count+1; |
||
141 | } else |
||
142 | return RET_TOOSMALL; |
||
143 | } else { |
||
144 | unsigned char buf[2]; |
||
145 | int ret; |
||
146 | |||
147 | /* Code set 1 (BIG5 extended) */ |
||
148 | ret = big5_wctomb(conv,buf,wc,2); |
||
149 | if (ret != RET_ILUNI) { |
||
150 | if (ret != 2) abort(); |
||
151 | if (!((buf[0] == 0xc6 && buf[1] >= 0xa1) || buf[0] == 0xc7)) { |
||
152 | if (n >= count+2) { |
||
153 | r[0] = buf[0]; |
||
154 | r[1] = buf[1]; |
||
155 | conv->ostate = 0; |
||
156 | return count+2; |
||
157 | } else |
||
158 | return RET_TOOSMALL; |
||
159 | } |
||
160 | } |
||
161 | ret = hkscs1999_wctomb(conv,buf,wc,2); |
||
162 | if (ret != RET_ILUNI) { |
||
163 | if (ret != 2) abort(); |
||
164 | if ((wc & ~0x0020) == 0x00ca) { |
||
165 | /* A possible first character of a multi-character sequence. We have to |
||
166 | buffer it. */ |
||
167 | if (!(buf[0] == 0x88 && (buf[1] == 0x66 || buf[1] == 0xa7))) abort(); |
||
168 | conv->ostate = buf[1]; /* = 0x66 or = 0xa7 */ |
||
169 | return count+0; |
||
170 | } |
||
171 | if (n >= count+2) { |
||
172 | r[0] = buf[0]; |
||
173 | r[1] = buf[1]; |
||
174 | conv->ostate = 0; |
||
175 | return count+2; |
||
176 | } else |
||
177 | return RET_TOOSMALL; |
||
178 | } |
||
179 | return RET_ILUNI; |
||
180 | } |
||
181 | } |
||
182 | |||
183 | static int |
||
184 | big5hkscs1999_reset (conv_t conv, unsigned char *r, int n) |
||
185 | { |
||
186 | unsigned char last = conv->ostate; |
||
187 | |||
188 | if (last) { |
||
189 | if (n < 2) |
||
190 | return RET_TOOSMALL; |
||
191 | r[0] = 0x88; |
||
192 | r[1] = last; |
||
193 | /* conv->ostate = 0; will be done by the caller */ |
||
194 | return 2; |
||
195 | } else |
||
196 | return 0; |
||
197 | } |