nexmon – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | /* |
2 | * Copyright (C) 1999-2008, 2011 Free Software Foundation, Inc. |
||
3 | * This file is part of the GNU LIBICONV Library. |
||
4 | * |
||
5 | * The GNU LIBICONV Library is free software; you can redistribute it |
||
6 | * and/or modify it under the terms of the GNU Library General Public |
||
7 | * License as published by the Free Software Foundation; either version 2 |
||
8 | * of the License, or (at your option) any later version. |
||
9 | * |
||
10 | * The GNU LIBICONV Library is distributed in the hope that it will be |
||
11 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
13 | * Library General Public License for more details. |
||
14 | * |
||
15 | * You should have received a copy of the GNU Library General Public |
||
16 | * License along with the GNU LIBICONV Library; see the file COPYING.LIB. |
||
17 | * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, |
||
18 | * Fifth Floor, Boston, MA 02110-1301, USA. |
||
19 | */ |
||
20 | |||
21 | /* Part 1 of iconv_open. |
||
22 | Input: const char* tocode, const char* fromcode. |
||
23 | Output: |
||
24 | unsigned int from_index; |
||
25 | int from_wchar; |
||
26 | unsigned int to_index; |
||
27 | int to_wchar; |
||
28 | int transliterate; |
||
29 | int discard_ilseq; |
||
30 | Jumps to 'invalid' in case of errror. |
||
31 | */ |
||
32 | { |
||
33 | char buf[MAX_WORD_LENGTH+10+1]; |
||
34 | const char* cp; |
||
35 | char* bp; |
||
36 | const struct alias * ap; |
||
37 | unsigned int count; |
||
38 | |||
39 | transliterate = 0; |
||
40 | discard_ilseq = 0; |
||
41 | |||
42 | /* Before calling aliases_lookup, convert the input string to upper case, |
||
43 | * and check whether it's entirely ASCII (we call gperf with option "-7" |
||
44 | * to achieve a smaller table) and non-empty. If it's not entirely ASCII, |
||
45 | * or if it's too long, it is not a valid encoding name. |
||
46 | */ |
||
47 | for (to_wchar = 0;;) { |
||
48 | /* Search tocode in the table. */ |
||
49 | for (cp = tocode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) { |
||
50 | unsigned char c = * (unsigned char *) cp; |
||
51 | if (c >= 0x80) |
||
52 | goto invalid; |
||
53 | if (c >= 'a' && c <= 'z') |
||
54 | c -= 'a'-'A'; |
||
55 | *bp = c; |
||
56 | if (c == '\0') |
||
57 | break; |
||
58 | if (--count == 0) |
||
59 | goto invalid; |
||
60 | } |
||
61 | for (;;) { |
||
62 | if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) { |
||
63 | bp -= 10; |
||
64 | *bp = '\0'; |
||
65 | transliterate = 1; |
||
66 | continue; |
||
67 | } |
||
68 | if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) { |
||
69 | bp -= 8; |
||
70 | *bp = '\0'; |
||
71 | discard_ilseq = 1; |
||
72 | continue; |
||
73 | } |
||
74 | break; |
||
75 | } |
||
76 | if (buf[0] == '\0') { |
||
77 | tocode = locale_charset(); |
||
78 | /* Avoid an endless loop that could occur when using an older version |
||
79 | of localcharset.c. */ |
||
80 | if (tocode[0] == '\0') |
||
81 | goto invalid; |
||
82 | continue; |
||
83 | } |
||
84 | ap = aliases_lookup(buf,bp-buf); |
||
85 | if (ap == NULL) { |
||
86 | ap = aliases2_lookup(buf); |
||
87 | if (ap == NULL) |
||
88 | goto invalid; |
||
89 | } |
||
90 | if (ap->encoding_index == ei_local_char) { |
||
91 | tocode = locale_charset(); |
||
92 | /* Avoid an endless loop that could occur when using an older version |
||
93 | of localcharset.c. */ |
||
94 | if (tocode[0] == '\0') |
||
95 | goto invalid; |
||
96 | continue; |
||
97 | } |
||
98 | if (ap->encoding_index == ei_local_wchar_t) { |
||
99 | /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode. |
||
100 | This is also the case on native Woe32 systems and Cygwin >= 1.7, where |
||
101 | we know that it is UTF-16. */ |
||
102 | #if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007) |
||
103 | if (sizeof(wchar_t) == 4) { |
||
104 | to_index = ei_ucs4internal; |
||
105 | break; |
||
106 | } |
||
107 | if (sizeof(wchar_t) == 2) { |
||
108 | # if WORDS_LITTLEENDIAN |
||
109 | to_index = ei_utf16le; |
||
110 | # else |
||
111 | to_index = ei_utf16be; |
||
112 | # endif |
||
113 | break; |
||
114 | } |
||
115 | #elif __STDC_ISO_10646__ |
||
116 | if (sizeof(wchar_t) == 4) { |
||
117 | to_index = ei_ucs4internal; |
||
118 | break; |
||
119 | } |
||
120 | if (sizeof(wchar_t) == 2) { |
||
121 | to_index = ei_ucs2internal; |
||
122 | break; |
||
123 | } |
||
124 | if (sizeof(wchar_t) == 1) { |
||
125 | to_index = ei_iso8859_1; |
||
126 | break; |
||
127 | } |
||
128 | #endif |
||
129 | #if HAVE_MBRTOWC |
||
130 | to_wchar = 1; |
||
131 | tocode = locale_charset(); |
||
132 | continue; |
||
133 | #endif |
||
134 | goto invalid; |
||
135 | } |
||
136 | to_index = ap->encoding_index; |
||
137 | break; |
||
138 | } |
||
139 | for (from_wchar = 0;;) { |
||
140 | /* Search fromcode in the table. */ |
||
141 | for (cp = fromcode, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) { |
||
142 | unsigned char c = * (unsigned char *) cp; |
||
143 | if (c >= 0x80) |
||
144 | goto invalid; |
||
145 | if (c >= 'a' && c <= 'z') |
||
146 | c -= 'a'-'A'; |
||
147 | *bp = c; |
||
148 | if (c == '\0') |
||
149 | break; |
||
150 | if (--count == 0) |
||
151 | goto invalid; |
||
152 | } |
||
153 | for (;;) { |
||
154 | if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) { |
||
155 | bp -= 10; |
||
156 | *bp = '\0'; |
||
157 | continue; |
||
158 | } |
||
159 | if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) { |
||
160 | bp -= 8; |
||
161 | *bp = '\0'; |
||
162 | continue; |
||
163 | } |
||
164 | break; |
||
165 | } |
||
166 | if (buf[0] == '\0') { |
||
167 | fromcode = locale_charset(); |
||
168 | /* Avoid an endless loop that could occur when using an older version |
||
169 | of localcharset.c. */ |
||
170 | if (fromcode[0] == '\0') |
||
171 | goto invalid; |
||
172 | continue; |
||
173 | } |
||
174 | ap = aliases_lookup(buf,bp-buf); |
||
175 | if (ap == NULL) { |
||
176 | ap = aliases2_lookup(buf); |
||
177 | if (ap == NULL) |
||
178 | goto invalid; |
||
179 | } |
||
180 | if (ap->encoding_index == ei_local_char) { |
||
181 | fromcode = locale_charset(); |
||
182 | /* Avoid an endless loop that could occur when using an older version |
||
183 | of localcharset.c. */ |
||
184 | if (fromcode[0] == '\0') |
||
185 | goto invalid; |
||
186 | continue; |
||
187 | } |
||
188 | if (ap->encoding_index == ei_local_wchar_t) { |
||
189 | /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode. |
||
190 | This is also the case on native Woe32 systems and Cygwin >= 1.7, where |
||
191 | we know that it is UTF-16. */ |
||
192 | #if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007) |
||
193 | if (sizeof(wchar_t) == 4) { |
||
194 | from_index = ei_ucs4internal; |
||
195 | break; |
||
196 | } |
||
197 | if (sizeof(wchar_t) == 2) { |
||
198 | # if WORDS_LITTLEENDIAN |
||
199 | from_index = ei_utf16le; |
||
200 | # else |
||
201 | from_index = ei_utf16be; |
||
202 | # endif |
||
203 | break; |
||
204 | } |
||
205 | #elif __STDC_ISO_10646__ |
||
206 | if (sizeof(wchar_t) == 4) { |
||
207 | from_index = ei_ucs4internal; |
||
208 | break; |
||
209 | } |
||
210 | if (sizeof(wchar_t) == 2) { |
||
211 | from_index = ei_ucs2internal; |
||
212 | break; |
||
213 | } |
||
214 | if (sizeof(wchar_t) == 1) { |
||
215 | from_index = ei_iso8859_1; |
||
216 | break; |
||
217 | } |
||
218 | #endif |
||
219 | #if HAVE_WCRTOMB |
||
220 | from_wchar = 1; |
||
221 | fromcode = locale_charset(); |
||
222 | continue; |
||
223 | #endif |
||
224 | goto invalid; |
||
225 | } |
||
226 | from_index = ap->encoding_index; |
||
227 | break; |
||
228 | } |
||
229 | } |