nexmon – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | /* |
2 | * Copyright (C) 1999-2001, 2005 Free Software Foundation, Inc. |
||
3 | * This file is part of the GNU LIBICONV Library. |
||
4 | * |
||
5 | * The GNU LIBICONV Library is free software; you can redistribute it |
||
6 | * and/or modify it under the terms of the GNU Library General Public |
||
7 | * License as published by the Free Software Foundation; either version 2 |
||
8 | * of the License, or (at your option) any later version. |
||
9 | * |
||
10 | * The GNU LIBICONV Library is distributed in the hope that it will be |
||
11 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
13 | * Library General Public License for more details. |
||
14 | * |
||
15 | * You should have received a copy of the GNU Library General Public |
||
16 | * License along with the GNU LIBICONV Library; see the file COPYING.LIB. |
||
17 | * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, |
||
18 | * Fifth Floor, Boston, MA 02110-1301, USA. |
||
19 | */ |
||
20 | |||
21 | /* |
||
22 | * ISO-IR-165 |
||
23 | */ |
||
24 | |||
25 | /* |
||
26 | * ISO-IR-165 is an extension of GB 2312, consisting of: |
||
27 | * 1. GB 6345.1-86 corrections: |
||
28 | * Two corrections to GB 2312, at 0x2367 and 0x6F71. |
||
29 | * 2. GB 6345.1-86 additions: |
||
30 | * - 6 new full-width pinyin characters in row 0x28. |
||
31 | * - ISO646-CN in row 0x2A. |
||
32 | * - 32 half-width pinyin characters in row 0x2B. |
||
33 | * 3. GB 8565.2-88 additions: |
||
34 | * - 50 characters in row 0x2D. |
||
35 | * - 92 characters in row 0x2E. |
||
36 | * - 93 characters in row 0x2F. |
||
37 | * - 470 characters in rows 0x7A-0x7E. |
||
38 | * 4. ISO-IR-165 additions: |
||
39 | * - 22 characters in row 0x26. |
||
40 | * - 94 characters in row 0x2C. |
||
41 | * - 44 new characters in row 0x2D. |
||
42 | * - 1 new character in row 0x2F. |
||
43 | * |
||
44 | * The conversion table was created from the following sources: |
||
45 | * Ad 1. The 0x2367 correction is already integrated in the unicode.org |
||
46 | * GB2312.TXT table. The 0x6F71 mapping is the same in the unicode.org |
||
47 | * GB2312.TXT and UNIHAN.TXT table and in Koichi Yasuoka's Uni2GB table, |
||
48 | * so we assume it's correct. |
||
49 | * The unicode.org UNIHAN.TXT table about GB 8565 is not usable: it has |
||
50 | * extraneous code points at rows 0x28, 0x2C, 0x2D. Note also that it does |
||
51 | * not list the 69 non-hanzi in row 0x2F. Moreover, it has the characters |
||
52 | * 0x2F7A-0x2F7D shifted down by one to 0x2F79-0x2F7C. |
||
53 | * Therefore we take the GB8565 and ISO-IR-165 data from Koichi Yasuoka's |
||
54 | * Uni2GB table. |
||
55 | * Ad 1. Yasuoka maps 0x2367 to U+0261 (small script g) and 0x2840 to U+FF47 |
||
56 | * (full-width small normal g). While coherent with ISO-IR's 165.pdf, |
||
57 | * this disagrees with Ken Lunde's book: He says that ISO-IR-165 |
||
58 | * includes the GB6345 correction, i.e. maps 0x2367 to U+FF47 or U+0067 |
||
59 | * and _not_ to U+0261 (small script g). |
||
60 | * To overcome the confusion, we just map both 0x2367 and 0x2840 to |
||
61 | * U+FF47. |
||
62 | * Ad 2. Row 0x28: Add a mapping from 0x283F to U+01F9. |
||
63 | * Row 0x2A: Mapping is well-known, also present in Koichi Yasuoka's |
||
64 | * table. |
||
65 | * Row 0x2B: Typed in by hand from appendix E in Ken Lunde's book. |
||
66 | * When converting from Unicode to ISO-IR-165, prefer the half-width |
||
67 | * range 0x2B{21..40} to the full-width range 0x28{21..40}. |
||
68 | * Ad 3. Rows 0x2D, 0x2E: Both Koichi Yasuoka's Uni2GB table and the UNIHAN.TXT |
||
69 | * data for GB 8565 agree here. |
||
70 | * Row 0x2F: Taken from Koichi Yasuoka's Uni2GB table. |
||
71 | * Rows 0x7A-0x7E: Koichi Yasuoka's Uni2GB table and the UNIHAN.TXT |
||
72 | * data for GB 8565 agree here mostly. Differences: |
||
73 | * 0x7C38 -> U+6F26 or U+527A ? We choose U+6F26. |
||
74 | * 0x7C5A -> U+7A40 or U+6996 ? We choose U+6996. |
||
75 | * Ad 4. Row 0x26: Mapping unknown. |
||
76 | * Rows 0x2C, 0x2D: Both Koichi Yasuoka's Uni2GB table and the UNIHAN.TXT |
||
77 | * data for GB 8565 (!) agree here. |
||
78 | * Row 0x2F: Taken from Koichi Yasuoka's Uni2GB table. |
||
79 | */ |
||
80 | |||
81 | #include "isoir165ext.h" |
||
82 | |||
83 | static int |
||
84 | isoir165_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) |
||
85 | { |
||
86 | int ret; |
||
87 | |||
88 | /* Map full-width pinyin (row 0x28) like half-width pinyin (row 0x2B). */ |
||
89 | if (s[0] == 0x28) { |
||
90 | if (n >= 2) { |
||
91 | unsigned char c2 = s[1]; |
||
92 | if (c2 >= 0x21 && c2 <= 0x40) { |
||
93 | unsigned char buf[2]; |
||
94 | buf[0] = 0x2b; |
||
95 | buf[1] = c2; |
||
96 | ret = isoir165ext_mbtowc(conv,pwc,buf,2); |
||
97 | if (ret != RET_ILSEQ) |
||
98 | return ret; |
||
99 | } |
||
100 | } |
||
101 | } |
||
102 | /* Try the GB2312 -> Unicode table. */ |
||
103 | ret = gb2312_mbtowc(conv,pwc,s,n); |
||
104 | if (ret != RET_ILSEQ) |
||
105 | return ret; |
||
106 | /* Row 0x2A is GB_1988-80. */ |
||
107 | if (s[0] == 0x2a) { |
||
108 | if (n >= 2) { |
||
109 | unsigned char c2 = s[1]; |
||
110 | if (c2 >= 0x21 && c2 < 0x7f) { |
||
111 | int ret = iso646_cn_mbtowc(conv,pwc,s+1,1); |
||
112 | if (ret != 1) abort(); |
||
113 | return 2; |
||
114 | } |
||
115 | return RET_ILSEQ; |
||
116 | } |
||
117 | return RET_TOOFEW(0); |
||
118 | } |
||
119 | /* Try the ISO-IR-165 extensions -> Unicode table. */ |
||
120 | ret = isoir165ext_mbtowc(conv,pwc,s,n); |
||
121 | return ret; |
||
122 | } |
||
123 | |||
124 | static int |
||
125 | isoir165_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) |
||
126 | { |
||
127 | unsigned char buf[2]; |
||
128 | int ret; |
||
129 | |||
130 | /* Try the Unicode -> GB2312 table. */ |
||
131 | ret = gb2312_wctomb(conv,buf,wc,2); |
||
132 | if (ret != RET_ILUNI) { |
||
133 | if (ret != 2) abort(); |
||
134 | if (!(buf[0] == 0x28 && buf[1] >= 0x21 && buf[1] <= 0x40)) { |
||
135 | if (n >= 2) { |
||
136 | r[0] = buf[0]; |
||
137 | r[1] = buf[1]; |
||
138 | return 2; |
||
139 | } |
||
140 | return RET_TOOSMALL; |
||
141 | } |
||
142 | } |
||
143 | /* Row 0x2A is GB_1988-80. */ |
||
144 | ret = iso646_cn_wctomb(conv,buf,wc,1); |
||
145 | if (ret != RET_ILUNI) { |
||
146 | if (ret != 1) abort(); |
||
147 | if (buf[0] >= 0x21 && buf[0] < 0x7f) { |
||
148 | if (n >= 2) { |
||
149 | r[0] = 0x2a; |
||
150 | r[1] = buf[0]; |
||
151 | return 2; |
||
152 | } |
||
153 | return RET_TOOSMALL; |
||
154 | } |
||
155 | } |
||
156 | /* Try the Unicode -> ISO-IR-165 extensions table. */ |
||
157 | ret = isoir165ext_wctomb(conv,r,wc,n); |
||
158 | return ret; |
||
159 | } |