nexmon – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | /* |
2 | * iconv library implemented with Win32 API. |
||
3 | * |
||
4 | * This file is placed in the public domain. |
||
5 | * |
||
6 | * Maintainer: Yukihiro Nakadaira <yukihiro.nakadaira@gmail.com> |
||
7 | * |
||
8 | * If $WINICONV_LIBICONV_DLL environment variable was defined, win_iconv |
||
9 | * loads the specified DLL dynamically and uses it. If loading the DLL |
||
10 | * or iconv_open() failed, falls back to internal conversion. |
||
11 | * $WINICONV_LIBICONV_DLL is a comma separated list. The first loadable |
||
12 | * DLL is used. The specified DLL should have iconv_open(), |
||
13 | * iconv_close() and iconv() functions. Or these functions can be |
||
14 | * libiconv_open(), libiconv_close() and libiconv(). |
||
15 | * |
||
16 | * Win32 API does not support strict encoding conversion for some |
||
17 | * codepage. And MLang function drop or replace invalid bytes and does |
||
18 | * not return useful error status as iconv. This implementation cannot |
||
19 | * be used for encoding validation purpose. |
||
20 | */ |
||
21 | |||
22 | /* for WC_NO_BEST_FIT_CHARS */ |
||
23 | #ifndef WINVER |
||
24 | # define WINVER 0x0500 |
||
25 | #endif |
||
26 | |||
27 | #define STRICT |
||
28 | #include <windows.h> |
||
29 | #include <errno.h> |
||
30 | #include <string.h> |
||
31 | #include <stdlib.h> |
||
32 | |||
33 | #ifdef __GNUC__ |
||
34 | #define UNUSED __attribute__((unused)) |
||
35 | #else |
||
36 | #define UNUSED |
||
37 | #endif |
||
38 | |||
39 | /* WORKAROUND: */ |
||
40 | #ifndef UNDER_CE |
||
41 | #define GetProcAddressA GetProcAddress |
||
42 | #endif |
||
43 | |||
44 | #if 0 |
||
45 | # define MAKE_EXE |
||
46 | # define MAKE_DLL |
||
47 | # define USE_LIBICONV_DLL |
||
48 | #endif |
||
49 | |||
50 | #if !defined(DEFAULT_LIBICONV_DLL) |
||
51 | # define DEFAULT_LIBICONV_DLL "" |
||
52 | #endif |
||
53 | |||
54 | #define MB_CHAR_MAX 16 |
||
55 | |||
56 | #define UNICODE_MODE_BOM_DONE 1 |
||
57 | #define UNICODE_MODE_SWAPPED 2 |
||
58 | |||
59 | #define FLAG_USE_BOM 1 |
||
60 | #define FLAG_TRANSLIT 2 /* //TRANSLIT */ |
||
61 | #define FLAG_IGNORE 4 /* //IGNORE */ |
||
62 | |||
63 | typedef unsigned char uchar; |
||
64 | typedef unsigned short ushort; |
||
65 | typedef unsigned int uint; |
||
66 | |||
67 | typedef void* iconv_t; |
||
68 | |||
69 | iconv_t iconv_open(const char *tocode, const char *fromcode); |
||
70 | int iconv_close(iconv_t cd); |
||
71 | size_t iconv(iconv_t cd, /* const */ char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); |
||
72 | |||
73 | /* libiconv interface for vim */ |
||
74 | #if defined(MAKE_DLL) |
||
75 | int |
||
76 | iconvctl (iconv_t cd, int request, void* argument) |
||
77 | { |
||
78 | /* not supported */ |
||
79 | return 0; |
||
80 | } |
||
81 | #endif |
||
82 | |||
83 | typedef struct compat_t compat_t; |
||
84 | typedef struct csconv_t csconv_t; |
||
85 | typedef struct rec_iconv_t rec_iconv_t; |
||
86 | |||
87 | typedef iconv_t (*f_iconv_open)(const char *tocode, const char *fromcode); |
||
88 | typedef int (*f_iconv_close)(iconv_t cd); |
||
89 | typedef size_t (*f_iconv)(iconv_t cd, /* const */ char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); |
||
90 | typedef int* (*f_errno)(void); |
||
91 | typedef int (*f_mbtowc)(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); |
||
92 | typedef int (*f_wctomb)(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); |
||
93 | typedef int (*f_mblen)(csconv_t *cv, const uchar *buf, int bufsize); |
||
94 | typedef int (*f_flush)(csconv_t *cv, uchar *buf, int bufsize); |
||
95 | |||
96 | #define COMPAT_IN 1 |
||
97 | #define COMPAT_OUT 2 |
||
98 | |||
99 | /* unicode mapping for compatibility with other conversion table. */ |
||
100 | struct compat_t { |
||
101 | uint in; |
||
102 | uint out; |
||
103 | uint flag; |
||
104 | }; |
||
105 | |||
106 | struct csconv_t { |
||
107 | int codepage; |
||
108 | int flags; |
||
109 | f_mbtowc mbtowc; |
||
110 | f_wctomb wctomb; |
||
111 | f_mblen mblen; |
||
112 | f_flush flush; |
||
113 | DWORD mode; |
||
114 | compat_t *compat; |
||
115 | }; |
||
116 | |||
117 | struct rec_iconv_t { |
||
118 | iconv_t cd; |
||
119 | f_iconv_close iconv_close; |
||
120 | f_iconv iconv; |
||
121 | f_errno _errno; |
||
122 | csconv_t from; |
||
123 | csconv_t to; |
||
124 | #if defined(USE_LIBICONV_DLL) |
||
125 | HMODULE hlibiconv; |
||
126 | #endif |
||
127 | }; |
||
128 | |||
129 | static int win_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode); |
||
130 | static int win_iconv_close(iconv_t cd); |
||
131 | static size_t win_iconv(iconv_t cd, /* const */ char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); |
||
132 | |||
133 | static int load_mlang(void); |
||
134 | static int make_csconv(const char *name, csconv_t *cv); |
||
135 | static int name_to_codepage(const char *name); |
||
136 | static uint utf16_to_ucs4(const ushort *wbuf); |
||
137 | static void ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize); |
||
138 | static int mbtowc_flags(int codepage); |
||
139 | static int must_use_null_useddefaultchar(int codepage); |
||
140 | static char *strrstr(const char *str, const char *token); |
||
141 | static char *xstrndup(const char *s, size_t n); |
||
142 | static int seterror(int err); |
||
143 | |||
144 | #if defined(USE_LIBICONV_DLL) |
||
145 | static int libiconv_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode); |
||
146 | static PVOID MyImageDirectoryEntryToData(LPVOID Base, BOOLEAN MappedAsImage, USHORT DirectoryEntry, PULONG Size); |
||
147 | static FARPROC find_imported_function(HMODULE hModule, const char *funcname); |
||
148 | |||
149 | static HMODULE hwiniconv; |
||
150 | #endif |
||
151 | |||
152 | static int sbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize); |
||
153 | static int dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize); |
||
154 | static int mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize); |
||
155 | static int utf8_mblen(csconv_t *cv, const uchar *buf, int bufsize); |
||
156 | static int eucjp_mblen(csconv_t *cv, const uchar *buf, int bufsize); |
||
157 | |||
158 | static int kernel_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); |
||
159 | static int kernel_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); |
||
160 | static int mlang_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); |
||
161 | static int mlang_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); |
||
162 | static int utf16_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); |
||
163 | static int utf16_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); |
||
164 | static int utf32_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); |
||
165 | static int utf32_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); |
||
166 | static int iso2022jp_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize); |
||
167 | static int iso2022jp_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize); |
||
168 | static int iso2022jp_flush(csconv_t *cv, uchar *buf, int bufsize); |
||
169 | |||
170 | static struct { |
||
171 | int codepage; |
||
172 | const char *name; |
||
173 | } codepage_alias[] = { |
||
174 | {65001, "CP65001"}, |
||
175 | {65001, "UTF8"}, |
||
176 | {65001, "UTF-8"}, |
||
177 | |||
178 | {1200, "CP1200"}, |
||
179 | {1200, "UTF16LE"}, |
||
180 | {1200, "UTF-16LE"}, |
||
181 | {1200, "UCS2LE"}, |
||
182 | {1200, "UCS-2LE"}, |
||
183 | |||
184 | {1201, "CP1201"}, |
||
185 | {1201, "UTF16BE"}, |
||
186 | {1201, "UTF-16BE"}, |
||
187 | {1201, "UCS2BE"}, |
||
188 | {1201, "UCS-2BE"}, |
||
189 | {1201, "unicodeFFFE"}, |
||
190 | |||
191 | {12000, "CP12000"}, |
||
192 | {12000, "UTF32LE"}, |
||
193 | {12000, "UTF-32LE"}, |
||
194 | {12000, "UCS4LE"}, |
||
195 | {12000, "UCS-4LE"}, |
||
196 | |||
197 | {12001, "CP12001"}, |
||
198 | {12001, "UTF32BE"}, |
||
199 | {12001, "UTF-32BE"}, |
||
200 | {12001, "UCS4BE"}, |
||
201 | {12001, "UCS-4BE"}, |
||
202 | |||
203 | #ifndef GLIB_COMPILATION |
||
204 | /* |
||
205 | * Default is big endian. |
||
206 | * See rfc2781 4.3 Interpreting text labelled as UTF-16. |
||
207 | */ |
||
208 | {1201, "UTF16"}, |
||
209 | {1201, "UTF-16"}, |
||
210 | {1201, "UCS2"}, |
||
211 | {1201, "UCS-2"}, |
||
212 | {12001, "UTF32"}, |
||
213 | {12001, "UTF-32"}, |
||
214 | {12001, "UCS-4"}, |
||
215 | {12001, "UCS4"}, |
||
216 | #else |
||
217 | /* Default is little endian, because the platform is */ |
||
218 | {1200, "UTF16"}, |
||
219 | {1200, "UTF-16"}, |
||
220 | {1200, "UCS2"}, |
||
221 | {1200, "UCS-2"}, |
||
222 | {12000, "UTF32"}, |
||
223 | {12000, "UTF-32"}, |
||
224 | {12000, "UCS4"}, |
||
225 | {12000, "UCS-4"}, |
||
226 | #endif |
||
227 | |||
228 | /* copy from libiconv `iconv -l` */ |
||
229 | /* !IsValidCodePage(367) */ |
||
230 | {20127, "ANSI_X3.4-1968"}, |
||
231 | {20127, "ANSI_X3.4-1986"}, |
||
232 | {20127, "ASCII"}, |
||
233 | {20127, "CP367"}, |
||
234 | {20127, "IBM367"}, |
||
235 | {20127, "ISO-IR-6"}, |
||
236 | {20127, "ISO646-US"}, |
||
237 | {20127, "ISO_646.IRV:1991"}, |
||
238 | {20127, "US"}, |
||
239 | {20127, "US-ASCII"}, |
||
240 | {20127, "CSASCII"}, |
||
241 | |||
242 | /* !IsValidCodePage(819) */ |
||
243 | {1252, "CP819"}, |
||
244 | {1252, "IBM819"}, |
||
245 | {28591, "ISO-8859-1"}, |
||
246 | {28591, "ISO-IR-100"}, |
||
247 | {28591, "ISO8859-1"}, |
||
248 | {28591, "ISO_8859-1"}, |
||
249 | {28591, "ISO_8859-1:1987"}, |
||
250 | {28591, "L1"}, |
||
251 | {28591, "LATIN1"}, |
||
252 | {28591, "CSISOLATIN1"}, |
||
253 | |||
254 | {1250, "CP1250"}, |
||
255 | {1250, "MS-EE"}, |
||
256 | {1250, "WINDOWS-1250"}, |
||
257 | |||
258 | {1251, "CP1251"}, |
||
259 | {1251, "MS-CYRL"}, |
||
260 | {1251, "WINDOWS-1251"}, |
||
261 | |||
262 | {1252, "CP1252"}, |
||
263 | {1252, "MS-ANSI"}, |
||
264 | {1252, "WINDOWS-1252"}, |
||
265 | |||
266 | {1253, "CP1253"}, |
||
267 | {1253, "MS-GREEK"}, |
||
268 | {1253, "WINDOWS-1253"}, |
||
269 | |||
270 | {1254, "CP1254"}, |
||
271 | {1254, "MS-TURK"}, |
||
272 | {1254, "WINDOWS-1254"}, |
||
273 | |||
274 | {1255, "CP1255"}, |
||
275 | {1255, "MS-HEBR"}, |
||
276 | {1255, "WINDOWS-1255"}, |
||
277 | |||
278 | {1256, "CP1256"}, |
||
279 | {1256, "MS-ARAB"}, |
||
280 | {1256, "WINDOWS-1256"}, |
||
281 | |||
282 | {1257, "CP1257"}, |
||
283 | {1257, "WINBALTRIM"}, |
||
284 | {1257, "WINDOWS-1257"}, |
||
285 | |||
286 | {1258, "CP1258"}, |
||
287 | {1258, "WINDOWS-1258"}, |
||
288 | |||
289 | {850, "850"}, |
||
290 | {850, "CP850"}, |
||
291 | {850, "IBM850"}, |
||
292 | {850, "CSPC850MULTILINGUAL"}, |
||
293 | |||
294 | /* !IsValidCodePage(862) */ |
||
295 | {862, "862"}, |
||
296 | {862, "CP862"}, |
||
297 | {862, "IBM862"}, |
||
298 | {862, "CSPC862LATINHEBREW"}, |
||
299 | |||
300 | {866, "866"}, |
||
301 | {866, "CP866"}, |
||
302 | {866, "IBM866"}, |
||
303 | {866, "CSIBM866"}, |
||
304 | |||
305 | /* !IsValidCodePage(154) */ |
||
306 | {154, "CP154"}, |
||
307 | {154, "CYRILLIC-ASIAN"}, |
||
308 | {154, "PT154"}, |
||
309 | {154, "PTCP154"}, |
||
310 | {154, "CSPTCP154"}, |
||
311 | |||
312 | /* !IsValidCodePage(1133) */ |
||
313 | {1133, "CP1133"}, |
||
314 | {1133, "IBM-CP1133"}, |
||
315 | |||
316 | {874, "CP874"}, |
||
317 | {874, "WINDOWS-874"}, |
||
318 | |||
319 | /* !IsValidCodePage(51932) */ |
||
320 | {51932, "CP51932"}, |
||
321 | {51932, "MS51932"}, |
||
322 | {51932, "WINDOWS-51932"}, |
||
323 | {51932, "EUC-JP"}, |
||
324 | |||
325 | {932, "CP932"}, |
||
326 | {932, "MS932"}, |
||
327 | {932, "SHIFFT_JIS"}, |
||
328 | {932, "SHIFFT_JIS-MS"}, |
||
329 | {932, "SJIS"}, |
||
330 | {932, "SJIS-MS"}, |
||
331 | {932, "SJIS-OPEN"}, |
||
332 | {932, "SJIS-WIN"}, |
||
333 | {932, "WINDOWS-31J"}, |
||
334 | {932, "WINDOWS-932"}, |
||
335 | {932, "CSWINDOWS31J"}, |
||
336 | |||
337 | {50221, "CP50221"}, |
||
338 | {50221, "ISO-2022-JP"}, |
||
339 | {50221, "ISO-2022-JP-MS"}, |
||
340 | {50221, "ISO2022-JP"}, |
||
341 | {50221, "ISO2022-JP-MS"}, |
||
342 | {50221, "MS50221"}, |
||
343 | {50221, "WINDOWS-50221"}, |
||
344 | |||
345 | {936, "CP936"}, |
||
346 | {936, "GBK"}, |
||
347 | {936, "MS936"}, |
||
348 | {936, "WINDOWS-936"}, |
||
349 | |||
350 | {950, "CP950"}, |
||
351 | {950, "BIG5"}, |
||
352 | {950, "BIG5HKSCS"}, |
||
353 | {950, "BIG5-HKSCS"}, |
||
354 | |||
355 | {949, "CP949"}, |
||
356 | {949, "UHC"}, |
||
357 | {949, "EUC-KR"}, |
||
358 | |||
359 | {1361, "CP1361"}, |
||
360 | {1361, "JOHAB"}, |
||
361 | |||
362 | {437, "437"}, |
||
363 | {437, "CP437"}, |
||
364 | {437, "IBM437"}, |
||
365 | {437, "CSPC8CODEPAGE437"}, |
||
366 | |||
367 | {737, "CP737"}, |
||
368 | |||
369 | {775, "CP775"}, |
||
370 | {775, "IBM775"}, |
||
371 | {775, "CSPC775BALTIC"}, |
||
372 | |||
373 | {852, "852"}, |
||
374 | {852, "CP852"}, |
||
375 | {852, "IBM852"}, |
||
376 | {852, "CSPCP852"}, |
||
377 | |||
378 | /* !IsValidCodePage(853) */ |
||
379 | {853, "CP853"}, |
||
380 | |||
381 | {855, "855"}, |
||
382 | {855, "CP855"}, |
||
383 | {855, "IBM855"}, |
||
384 | {855, "CSIBM855"}, |
||
385 | |||
386 | {857, "857"}, |
||
387 | {857, "CP857"}, |
||
388 | {857, "IBM857"}, |
||
389 | {857, "CSIBM857"}, |
||
390 | |||
391 | /* !IsValidCodePage(858) */ |
||
392 | {858, "CP858"}, |
||
393 | |||
394 | {860, "860"}, |
||
395 | {860, "CP860"}, |
||
396 | {860, "IBM860"}, |
||
397 | {860, "CSIBM860"}, |
||
398 | |||
399 | {861, "861"}, |
||
400 | {861, "CP-IS"}, |
||
401 | {861, "CP861"}, |
||
402 | {861, "IBM861"}, |
||
403 | {861, "CSIBM861"}, |
||
404 | |||
405 | {863, "863"}, |
||
406 | {863, "CP863"}, |
||
407 | {863, "IBM863"}, |
||
408 | {863, "CSIBM863"}, |
||
409 | |||
410 | {864, "CP864"}, |
||
411 | {864, "IBM864"}, |
||
412 | {864, "CSIBM864"}, |
||
413 | |||
414 | {865, "865"}, |
||
415 | {865, "CP865"}, |
||
416 | {865, "IBM865"}, |
||
417 | {865, "CSIBM865"}, |
||
418 | |||
419 | {869, "869"}, |
||
420 | {869, "CP-GR"}, |
||
421 | {869, "CP869"}, |
||
422 | {869, "IBM869"}, |
||
423 | {869, "CSIBM869"}, |
||
424 | |||
425 | /* !IsValidCodePage(1152) */ |
||
426 | {1125, "CP1125"}, |
||
427 | |||
428 | /* |
||
429 | * Code Page Identifiers |
||
430 | * http://msdn2.microsoft.com/en-us/library/ms776446.aspx |
||
431 | */ |
||
432 | {37, "IBM037"}, /* IBM EBCDIC US-Canada */ |
||
433 | {437, "IBM437"}, /* OEM United States */ |
||
434 | {500, "IBM500"}, /* IBM EBCDIC International */ |
||
435 | {708, "ASMO-708"}, /* Arabic (ASMO 708) */ |
||
436 | /* 709 Arabic (ASMO-449+, BCON V4) */ |
||
437 | /* 710 Arabic - Transparent Arabic */ |
||
438 | {720, "DOS-720"}, /* Arabic (Transparent ASMO); Arabic (DOS) */ |
||
439 | {737, "ibm737"}, /* OEM Greek (formerly 437G); Greek (DOS) */ |
||
440 | {775, "ibm775"}, /* OEM Baltic; Baltic (DOS) */ |
||
441 | {850, "ibm850"}, /* OEM Multilingual Latin 1; Western European (DOS) */ |
||
442 | {852, "ibm852"}, /* OEM Latin 2; Central European (DOS) */ |
||
443 | {855, "IBM855"}, /* OEM Cyrillic (primarily Russian) */ |
||
444 | {857, "ibm857"}, /* OEM Turkish; Turkish (DOS) */ |
||
445 | {858, "IBM00858"}, /* OEM Multilingual Latin 1 + Euro symbol */ |
||
446 | {860, "IBM860"}, /* OEM Portuguese; Portuguese (DOS) */ |
||
447 | {861, "ibm861"}, /* OEM Icelandic; Icelandic (DOS) */ |
||
448 | {862, "DOS-862"}, /* OEM Hebrew; Hebrew (DOS) */ |
||
449 | {863, "IBM863"}, /* OEM French Canadian; French Canadian (DOS) */ |
||
450 | {864, "IBM864"}, /* OEM Arabic; Arabic (864) */ |
||
451 | {865, "IBM865"}, /* OEM Nordic; Nordic (DOS) */ |
||
452 | {866, "cp866"}, /* OEM Russian; Cyrillic (DOS) */ |
||
453 | {869, "ibm869"}, /* OEM Modern Greek; Greek, Modern (DOS) */ |
||
454 | {870, "IBM870"}, /* IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2 */ |
||
455 | {874, "windows-874"}, /* ANSI/OEM Thai (same as 28605, ISO 8859-15); Thai (Windows) */ |
||
456 | {875, "cp875"}, /* IBM EBCDIC Greek Modern */ |
||
457 | {932, "shift_jis"}, /* ANSI/OEM Japanese; Japanese (Shift-JIS) */ |
||
458 | {932, "shift-jis"}, /* alternative name for it */ |
||
459 | {936, "gb2312"}, /* ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312) */ |
||
460 | {949, "ks_c_5601-1987"}, /* ANSI/OEM Korean (Unified Hangul Code) */ |
||
461 | {950, "big5"}, /* ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5) */ |
||
462 | {950, "big5hkscs"}, /* ANSI/OEM Traditional Chinese (Hong Kong SAR); Chinese Traditional (Big5-HKSCS) */ |
||
463 | {950, "big5-hkscs"}, /* alternative name for it */ |
||
464 | {1026, "IBM1026"}, /* IBM EBCDIC Turkish (Latin 5) */ |
||
465 | {1047, "IBM01047"}, /* IBM EBCDIC Latin 1/Open System */ |
||
466 | {1140, "IBM01140"}, /* IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro) */ |
||
467 | {1141, "IBM01141"}, /* IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro) */ |
||
468 | {1142, "IBM01142"}, /* IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro) */ |
||
469 | {1143, "IBM01143"}, /* IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro) */ |
||
470 | {1144, "IBM01144"}, /* IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro) */ |
||
471 | {1145, "IBM01145"}, /* IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro) */ |
||
472 | {1146, "IBM01146"}, /* IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro) */ |
||
473 | {1147, "IBM01147"}, /* IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro) */ |
||
474 | {1148, "IBM01148"}, /* IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro) */ |
||
475 | {1149, "IBM01149"}, /* IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro) */ |
||
476 | {1250, "windows-1250"}, /* ANSI Central European; Central European (Windows) */ |
||
477 | {1251, "windows-1251"}, /* ANSI Cyrillic; Cyrillic (Windows) */ |
||
478 | {1252, "windows-1252"}, /* ANSI Latin 1; Western European (Windows) */ |
||
479 | {1253, "windows-1253"}, /* ANSI Greek; Greek (Windows) */ |
||
480 | {1254, "windows-1254"}, /* ANSI Turkish; Turkish (Windows) */ |
||
481 | {1255, "windows-1255"}, /* ANSI Hebrew; Hebrew (Windows) */ |
||
482 | {1256, "windows-1256"}, /* ANSI Arabic; Arabic (Windows) */ |
||
483 | {1257, "windows-1257"}, /* ANSI Baltic; Baltic (Windows) */ |
||
484 | {1258, "windows-1258"}, /* ANSI/OEM Vietnamese; Vietnamese (Windows) */ |
||
485 | {1361, "Johab"}, /* Korean (Johab) */ |
||
486 | {10000, "macintosh"}, /* MAC Roman; Western European (Mac) */ |
||
487 | {10001, "x-mac-japanese"}, /* Japanese (Mac) */ |
||
488 | {10002, "x-mac-chinesetrad"}, /* MAC Traditional Chinese (Big5); Chinese Traditional (Mac) */ |
||
489 | {10003, "x-mac-korean"}, /* Korean (Mac) */ |
||
490 | {10004, "x-mac-arabic"}, /* Arabic (Mac) */ |
||
491 | {10005, "x-mac-hebrew"}, /* Hebrew (Mac) */ |
||
492 | {10006, "x-mac-greek"}, /* Greek (Mac) */ |
||
493 | {10007, "x-mac-cyrillic"}, /* Cyrillic (Mac) */ |
||
494 | {10008, "x-mac-chinesesimp"}, /* MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac) */ |
||
495 | {10010, "x-mac-romanian"}, /* Romanian (Mac) */ |
||
496 | {10017, "x-mac-ukrainian"}, /* Ukrainian (Mac) */ |
||
497 | {10021, "x-mac-thai"}, /* Thai (Mac) */ |
||
498 | {10029, "x-mac-ce"}, /* MAC Latin 2; Central European (Mac) */ |
||
499 | {10079, "x-mac-icelandic"}, /* Icelandic (Mac) */ |
||
500 | {10081, "x-mac-turkish"}, /* Turkish (Mac) */ |
||
501 | {10082, "x-mac-croatian"}, /* Croatian (Mac) */ |
||
502 | {20000, "x-Chinese_CNS"}, /* CNS Taiwan; Chinese Traditional (CNS) */ |
||
503 | {20001, "x-cp20001"}, /* TCA Taiwan */ |
||
504 | {20002, "x_Chinese-Eten"}, /* Eten Taiwan; Chinese Traditional (Eten) */ |
||
505 | {20003, "x-cp20003"}, /* IBM5550 Taiwan */ |
||
506 | {20004, "x-cp20004"}, /* TeleText Taiwan */ |
||
507 | {20005, "x-cp20005"}, /* Wang Taiwan */ |
||
508 | {20105, "x-IA5"}, /* IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5) */ |
||
509 | {20106, "x-IA5-German"}, /* IA5 German (7-bit) */ |
||
510 | {20107, "x-IA5-Swedish"}, /* IA5 Swedish (7-bit) */ |
||
511 | {20108, "x-IA5-Norwegian"}, /* IA5 Norwegian (7-bit) */ |
||
512 | {20127, "us-ascii"}, /* US-ASCII (7-bit) */ |
||
513 | {20261, "x-cp20261"}, /* T.61 */ |
||
514 | {20269, "x-cp20269"}, /* ISO 6937 Non-Spacing Accent */ |
||
515 | {20273, "IBM273"}, /* IBM EBCDIC Germany */ |
||
516 | {20277, "IBM277"}, /* IBM EBCDIC Denmark-Norway */ |
||
517 | {20278, "IBM278"}, /* IBM EBCDIC Finland-Sweden */ |
||
518 | {20280, "IBM280"}, /* IBM EBCDIC Italy */ |
||
519 | {20284, "IBM284"}, /* IBM EBCDIC Latin America-Spain */ |
||
520 | {20285, "IBM285"}, /* IBM EBCDIC United Kingdom */ |
||
521 | {20290, "IBM290"}, /* IBM EBCDIC Japanese Katakana Extended */ |
||
522 | {20297, "IBM297"}, /* IBM EBCDIC France */ |
||
523 | {20420, "IBM420"}, /* IBM EBCDIC Arabic */ |
||
524 | {20423, "IBM423"}, /* IBM EBCDIC Greek */ |
||
525 | {20424, "IBM424"}, /* IBM EBCDIC Hebrew */ |
||
526 | {20833, "x-EBCDIC-KoreanExtended"}, /* IBM EBCDIC Korean Extended */ |
||
527 | {20838, "IBM-Thai"}, /* IBM EBCDIC Thai */ |
||
528 | {20866, "koi8-r"}, /* Russian (KOI8-R); Cyrillic (KOI8-R) */ |
||
529 | {20871, "IBM871"}, /* IBM EBCDIC Icelandic */ |
||
530 | {20880, "IBM880"}, /* IBM EBCDIC Cyrillic Russian */ |
||
531 | {20905, "IBM905"}, /* IBM EBCDIC Turkish */ |
||
532 | {20924, "IBM00924"}, /* IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) */ |
||
533 | {20932, "EUC-JP"}, /* Japanese (JIS 0208-1990 and 0121-1990) */ |
||
534 | {20936, "x-cp20936"}, /* Simplified Chinese (GB2312); Chinese Simplified (GB2312-80) */ |
||
535 | {20949, "x-cp20949"}, /* Korean Wansung */ |
||
536 | {21025, "cp1025"}, /* IBM EBCDIC Cyrillic Serbian-Bulgarian */ |
||
537 | /* 21027 (deprecated) */ |
||
538 | {21866, "koi8-u"}, /* Ukrainian (KOI8-U); Cyrillic (KOI8-U) */ |
||
539 | {28591, "iso-8859-1"}, /* ISO 8859-1 Latin 1; Western European (ISO) */ |
||
540 | {28591, "iso8859-1"}, /* ISO 8859-1 Latin 1; Western European (ISO) */ |
||
541 | {28591, "iso_8859-1"}, |
||
542 | {28591, "iso_8859_1"}, |
||
543 | {28592, "iso-8859-2"}, /* ISO 8859-2 Central European; Central European (ISO) */ |
||
544 | {28592, "iso8859-2"}, /* ISO 8859-2 Central European; Central European (ISO) */ |
||
545 | {28592, "iso_8859-2"}, |
||
546 | {28592, "iso_8859_2"}, |
||
547 | {28593, "iso-8859-3"}, /* ISO 8859-3 Latin 3 */ |
||
548 | {28593, "iso8859-3"}, /* ISO 8859-3 Latin 3 */ |
||
549 | {28593, "iso_8859-3"}, |
||
550 | {28593, "iso_8859_3"}, |
||
551 | {28594, "iso-8859-4"}, /* ISO 8859-4 Baltic */ |
||
552 | {28594, "iso8859-4"}, /* ISO 8859-4 Baltic */ |
||
553 | {28594, "iso_8859-4"}, |
||
554 | {28594, "iso_8859_4"}, |
||
555 | {28595, "iso-8859-5"}, /* ISO 8859-5 Cyrillic */ |
||
556 | {28595, "iso8859-5"}, /* ISO 8859-5 Cyrillic */ |
||
557 | {28595, "iso_8859-5"}, |
||
558 | {28595, "iso_8859_5"}, |
||
559 | {28596, "iso-8859-6"}, /* ISO 8859-6 Arabic */ |
||
560 | {28596, "iso8859-6"}, /* ISO 8859-6 Arabic */ |
||
561 | {28596, "iso_8859-6"}, |
||
562 | {28596, "iso_8859_6"}, |
||
563 | {28597, "iso-8859-7"}, /* ISO 8859-7 Greek */ |
||
564 | {28597, "iso8859-7"}, /* ISO 8859-7 Greek */ |
||
565 | {28597, "iso_8859-7"}, |
||
566 | {28597, "iso_8859_7"}, |
||
567 | {28598, "iso-8859-8"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) */ |
||
568 | {28598, "iso8859-8"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Visual) */ |
||
569 | {28598, "iso_8859-8"}, |
||
570 | {28598, "iso_8859_8"}, |
||
571 | {28599, "iso-8859-9"}, /* ISO 8859-9 Turkish */ |
||
572 | {28599, "iso8859-9"}, /* ISO 8859-9 Turkish */ |
||
573 | {28599, "iso_8859-9"}, |
||
574 | {28599, "iso_8859_9"}, |
||
575 | {28603, "iso-8859-13"}, /* ISO 8859-13 Estonian */ |
||
576 | {28603, "iso8859-13"}, /* ISO 8859-13 Estonian */ |
||
577 | {28603, "iso_8859-13"}, |
||
578 | {28603, "iso_8859_13"}, |
||
579 | {28605, "iso-8859-15"}, /* ISO 8859-15 Latin 9 */ |
||
580 | {28605, "iso8859-15"}, /* ISO 8859-15 Latin 9 */ |
||
581 | {28605, "iso_8859-15"}, |
||
582 | {28605, "iso_8859_15"}, |
||
583 | {29001, "x-Europa"}, /* Europa 3 */ |
||
584 | {38598, "iso-8859-8-i"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) */ |
||
585 | {38598, "iso8859-8-i"}, /* ISO 8859-8 Hebrew; Hebrew (ISO-Logical) */ |
||
586 | {38598, "iso_8859-8-i"}, |
||
587 | {38598, "iso_8859_8-i"}, |
||
588 | {50220, "iso-2022-jp"}, /* ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) */ |
||
589 | {50221, "csISO2022JP"}, /* ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana) */ |
||
590 | {50222, "iso-2022-jp"}, /* ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI) */ |
||
591 | {50225, "iso-2022-kr"}, /* ISO 2022 Korean */ |
||
592 | {50225, "iso2022-kr"}, /* ISO 2022 Korean */ |
||
593 | {50227, "x-cp50227"}, /* ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022) */ |
||
594 | /* 50229 ISO 2022 Traditional Chinese */ |
||
595 | /* 50930 EBCDIC Japanese (Katakana) Extended */ |
||
596 | /* 50931 EBCDIC US-Canada and Japanese */ |
||
597 | /* 50933 EBCDIC Korean Extended and Korean */ |
||
598 | /* 50935 EBCDIC Simplified Chinese Extended and Simplified Chinese */ |
||
599 | /* 50936 EBCDIC Simplified Chinese */ |
||
600 | /* 50937 EBCDIC US-Canada and Traditional Chinese */ |
||
601 | /* 50939 EBCDIC Japanese (Latin) Extended and Japanese */ |
||
602 | {51932, "euc-jp"}, /* EUC Japanese */ |
||
603 | {51936, "EUC-CN"}, /* EUC Simplified Chinese; Chinese Simplified (EUC) */ |
||
604 | {51949, "euc-kr"}, /* EUC Korean */ |
||
605 | /* 51950 EUC Traditional Chinese */ |
||
606 | {52936, "hz-gb-2312"}, /* HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ) */ |
||
607 | {54936, "GB18030"}, /* Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030) */ |
||
608 | {57002, "x-iscii-de"}, /* ISCII Devanagari */ |
||
609 | {57003, "x-iscii-be"}, /* ISCII Bengali */ |
||
610 | {57004, "x-iscii-ta"}, /* ISCII Tamil */ |
||
611 | {57005, "x-iscii-te"}, /* ISCII Telugu */ |
||
612 | {57006, "x-iscii-as"}, /* ISCII Assamese */ |
||
613 | {57007, "x-iscii-or"}, /* ISCII Oriya */ |
||
614 | {57008, "x-iscii-ka"}, /* ISCII Kannada */ |
||
615 | {57009, "x-iscii-ma"}, /* ISCII Malayalam */ |
||
616 | {57010, "x-iscii-gu"}, /* ISCII Gujarati */ |
||
617 | {57011, "x-iscii-pa"}, /* ISCII Punjabi */ |
||
618 | |||
619 | {0, NULL} |
||
620 | }; |
||
621 | |||
622 | /* |
||
623 | * SJIS SHIFTJIS table CP932 table |
||
624 | * ---- --------------------------- -------------------------------- |
||
625 | * 5C U+00A5 YEN SIGN U+005C REVERSE SOLIDUS |
||
626 | * 7E U+203E OVERLINE U+007E TILDE |
||
627 | * 815C U+2014 EM DASH U+2015 HORIZONTAL BAR |
||
628 | * 815F U+005C REVERSE SOLIDUS U+FF3C FULLWIDTH REVERSE SOLIDUS |
||
629 | * 8160 U+301C WAVE DASH U+FF5E FULLWIDTH TILDE |
||
630 | * 8161 U+2016 DOUBLE VERTICAL LINE U+2225 PARALLEL TO |
||
631 | * 817C U+2212 MINUS SIGN U+FF0D FULLWIDTH HYPHEN-MINUS |
||
632 | * 8191 U+00A2 CENT SIGN U+FFE0 FULLWIDTH CENT SIGN |
||
633 | * 8192 U+00A3 POUND SIGN U+FFE1 FULLWIDTH POUND SIGN |
||
634 | * 81CA U+00AC NOT SIGN U+FFE2 FULLWIDTH NOT SIGN |
||
635 | * |
||
636 | * EUC-JP and ISO-2022-JP should be compatible with CP932. |
||
637 | * |
||
638 | * Kernel and MLang have different Unicode mapping table. Make sure |
||
639 | * which API is used. |
||
640 | */ |
||
641 | static compat_t cp932_compat[] = { |
||
642 | {0x00A5, 0x005C, COMPAT_OUT}, |
||
643 | {0x203E, 0x007E, COMPAT_OUT}, |
||
644 | {0x2014, 0x2015, COMPAT_OUT}, |
||
645 | {0x301C, 0xFF5E, COMPAT_OUT}, |
||
646 | {0x2016, 0x2225, COMPAT_OUT}, |
||
647 | {0x2212, 0xFF0D, COMPAT_OUT}, |
||
648 | {0x00A2, 0xFFE0, COMPAT_OUT}, |
||
649 | {0x00A3, 0xFFE1, COMPAT_OUT}, |
||
650 | {0x00AC, 0xFFE2, COMPAT_OUT}, |
||
651 | {0, 0, 0} |
||
652 | }; |
||
653 | |||
654 | static compat_t cp20932_compat[] = { |
||
655 | {0x00A5, 0x005C, COMPAT_OUT}, |
||
656 | {0x203E, 0x007E, COMPAT_OUT}, |
||
657 | {0x2014, 0x2015, COMPAT_OUT}, |
||
658 | {0xFF5E, 0x301C, COMPAT_OUT|COMPAT_IN}, |
||
659 | {0x2225, 0x2016, COMPAT_OUT|COMPAT_IN}, |
||
660 | {0xFF0D, 0x2212, COMPAT_OUT|COMPAT_IN}, |
||
661 | {0xFFE0, 0x00A2, COMPAT_OUT|COMPAT_IN}, |
||
662 | {0xFFE1, 0x00A3, COMPAT_OUT|COMPAT_IN}, |
||
663 | {0xFFE2, 0x00AC, COMPAT_OUT|COMPAT_IN}, |
||
664 | {0, 0, 0} |
||
665 | }; |
||
666 | |||
667 | static compat_t *cp51932_compat = cp932_compat; |
||
668 | |||
669 | /* cp20932_compat for kernel. cp932_compat for mlang. */ |
||
670 | static compat_t *cp5022x_compat = cp932_compat; |
||
671 | |||
672 | typedef HRESULT (WINAPI *CONVERTINETSTRING)( |
||
673 | LPDWORD lpdwMode, |
||
674 | DWORD dwSrcEncoding, |
||
675 | DWORD dwDstEncoding, |
||
676 | LPCSTR lpSrcStr, |
||
677 | LPINT lpnSrcSize, |
||
678 | LPBYTE lpDstStr, |
||
679 | LPINT lpnDstSize |
||
680 | ); |
||
681 | typedef HRESULT (WINAPI *CONVERTINETMULTIBYTETOUNICODE)( |
||
682 | LPDWORD lpdwMode, |
||
683 | DWORD dwSrcEncoding, |
||
684 | LPCSTR lpSrcStr, |
||
685 | LPINT lpnMultiCharCount, |
||
686 | LPWSTR lpDstStr, |
||
687 | LPINT lpnWideCharCount |
||
688 | ); |
||
689 | typedef HRESULT (WINAPI *CONVERTINETUNICODETOMULTIBYTE)( |
||
690 | LPDWORD lpdwMode, |
||
691 | DWORD dwEncoding, |
||
692 | LPCWSTR lpSrcStr, |
||
693 | LPINT lpnWideCharCount, |
||
694 | LPSTR lpDstStr, |
||
695 | LPINT lpnMultiCharCount |
||
696 | ); |
||
697 | typedef HRESULT (WINAPI *ISCONVERTINETSTRINGAVAILABLE)( |
||
698 | DWORD dwSrcEncoding, |
||
699 | DWORD dwDstEncoding |
||
700 | ); |
||
701 | typedef HRESULT (WINAPI *LCIDTORFC1766A)( |
||
702 | LCID Locale, |
||
703 | LPSTR pszRfc1766, |
||
704 | int nChar |
||
705 | ); |
||
706 | typedef HRESULT (WINAPI *LCIDTORFC1766W)( |
||
707 | LCID Locale, |
||
708 | LPWSTR pszRfc1766, |
||
709 | int nChar |
||
710 | ); |
||
711 | typedef HRESULT (WINAPI *RFC1766TOLCIDA)( |
||
712 | LCID *pLocale, |
||
713 | LPSTR pszRfc1766 |
||
714 | ); |
||
715 | typedef HRESULT (WINAPI *RFC1766TOLCIDW)( |
||
716 | LCID *pLocale, |
||
717 | LPWSTR pszRfc1766 |
||
718 | ); |
||
719 | static CONVERTINETSTRING ConvertINetString; |
||
720 | static CONVERTINETMULTIBYTETOUNICODE ConvertINetMultiByteToUnicode; |
||
721 | static CONVERTINETUNICODETOMULTIBYTE ConvertINetUnicodeToMultiByte; |
||
722 | static ISCONVERTINETSTRINGAVAILABLE IsConvertINetStringAvailable; |
||
723 | static LCIDTORFC1766A LcidToRfc1766A; |
||
724 | static RFC1766TOLCIDA Rfc1766ToLcidA; |
||
725 | |||
726 | static int |
||
727 | load_mlang(void) |
||
728 | { |
||
729 | HMODULE h; |
||
730 | if (ConvertINetString != NULL) |
||
731 | return TRUE; |
||
732 | h = LoadLibrary(TEXT("mlang.dll")); |
||
733 | if (!h) |
||
734 | return FALSE; |
||
735 | ConvertINetString = (CONVERTINETSTRING)GetProcAddressA(h, "ConvertINetString"); |
||
736 | ConvertINetMultiByteToUnicode = (CONVERTINETMULTIBYTETOUNICODE)GetProcAddressA(h, "ConvertINetMultiByteToUnicode"); |
||
737 | ConvertINetUnicodeToMultiByte = (CONVERTINETUNICODETOMULTIBYTE)GetProcAddressA(h, "ConvertINetUnicodeToMultiByte"); |
||
738 | IsConvertINetStringAvailable = (ISCONVERTINETSTRINGAVAILABLE)GetProcAddressA(h, "IsConvertINetStringAvailable"); |
||
739 | LcidToRfc1766A = (LCIDTORFC1766A)GetProcAddressA(h, "LcidToRfc1766A"); |
||
740 | Rfc1766ToLcidA = (RFC1766TOLCIDA)GetProcAddressA(h, "Rfc1766ToLcidA"); |
||
741 | return TRUE; |
||
742 | } |
||
743 | |||
744 | iconv_t |
||
745 | iconv_open(const char *tocode, const char *fromcode) |
||
746 | { |
||
747 | rec_iconv_t *cd; |
||
748 | |||
749 | cd = (rec_iconv_t *)calloc(1, sizeof(rec_iconv_t)); |
||
750 | if (cd == NULL) |
||
751 | return (iconv_t)(-1); |
||
752 | |||
753 | #if defined(USE_LIBICONV_DLL) |
||
754 | errno = 0; |
||
755 | if (libiconv_iconv_open(cd, tocode, fromcode)) |
||
756 | return (iconv_t)cd; |
||
757 | #endif |
||
758 | |||
759 | /* reset the errno to prevent reporting wrong error code. |
||
760 | * 0 for unsorted error. */ |
||
761 | errno = 0; |
||
762 | if (win_iconv_open(cd, tocode, fromcode)) |
||
763 | return (iconv_t)cd; |
||
764 | |||
765 | free(cd); |
||
766 | |||
767 | return (iconv_t)(-1); |
||
768 | } |
||
769 | |||
770 | int |
||
771 | iconv_close(iconv_t _cd) |
||
772 | { |
||
773 | rec_iconv_t *cd = (rec_iconv_t *)_cd; |
||
774 | int r = cd->iconv_close(cd->cd); |
||
775 | int e = *(cd->_errno()); |
||
776 | #if defined(USE_LIBICONV_DLL) |
||
777 | if (cd->hlibiconv != NULL) |
||
778 | FreeLibrary(cd->hlibiconv); |
||
779 | #endif |
||
780 | free(cd); |
||
781 | errno = e; |
||
782 | return r; |
||
783 | } |
||
784 | |||
785 | size_t |
||
786 | iconv(iconv_t _cd, /* const */ char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) |
||
787 | { |
||
788 | rec_iconv_t *cd = (rec_iconv_t *)_cd; |
||
789 | size_t r = cd->iconv(cd->cd, inbuf, inbytesleft, outbuf, outbytesleft); |
||
790 | errno = *(cd->_errno()); |
||
791 | return r; |
||
792 | } |
||
793 | |||
794 | static int |
||
795 | win_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode) |
||
796 | { |
||
797 | if (!make_csconv(fromcode, &cd->from) || !make_csconv(tocode, &cd->to)) |
||
798 | return FALSE; |
||
799 | cd->iconv_close = win_iconv_close; |
||
800 | cd->iconv = win_iconv; |
||
801 | cd->_errno = _errno; |
||
802 | cd->cd = (iconv_t)cd; |
||
803 | return TRUE; |
||
804 | } |
||
805 | |||
806 | static int |
||
807 | win_iconv_close(iconv_t cd UNUSED) |
||
808 | { |
||
809 | return 0; |
||
810 | } |
||
811 | |||
812 | static size_t |
||
813 | win_iconv(iconv_t _cd, /* const */ char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft) |
||
814 | { |
||
815 | rec_iconv_t *cd = (rec_iconv_t *)_cd; |
||
816 | ushort wbuf[MB_CHAR_MAX]; /* enough room for one character */ |
||
817 | int insize; |
||
818 | int outsize; |
||
819 | int wsize; |
||
820 | DWORD frommode; |
||
821 | DWORD tomode; |
||
822 | uint wc; |
||
823 | compat_t *cp; |
||
824 | int i; |
||
825 | |||
826 | if (inbuf == NULL || *inbuf == NULL) |
||
827 | { |
||
828 | if (outbuf != NULL && *outbuf != NULL && cd->to.flush != NULL) |
||
829 | { |
||
830 | tomode = cd->to.mode; |
||
831 | outsize = cd->to.flush(&cd->to, (uchar *)*outbuf, *outbytesleft); |
||
832 | if (outsize == -1) |
||
833 | { |
||
834 | if ((cd->to.flags & FLAG_IGNORE) && errno != E2BIG) |
||
835 | { |
||
836 | outsize = 0; |
||
837 | } |
||
838 | else |
||
839 | { |
||
840 | cd->to.mode = tomode; |
||
841 | return (size_t)(-1); |
||
842 | } |
||
843 | } |
||
844 | *outbuf += outsize; |
||
845 | *outbytesleft -= outsize; |
||
846 | } |
||
847 | cd->from.mode = 0; |
||
848 | cd->to.mode = 0; |
||
849 | return 0; |
||
850 | } |
||
851 | |||
852 | while (*inbytesleft != 0) |
||
853 | { |
||
854 | frommode = cd->from.mode; |
||
855 | tomode = cd->to.mode; |
||
856 | wsize = MB_CHAR_MAX; |
||
857 | |||
858 | insize = cd->from.mbtowc(&cd->from, (const uchar *)*inbuf, *inbytesleft, wbuf, &wsize); |
||
859 | if (insize == -1) |
||
860 | { |
||
861 | if (cd->to.flags & FLAG_IGNORE) |
||
862 | { |
||
863 | cd->from.mode = frommode; |
||
864 | insize = 1; |
||
865 | wsize = 0; |
||
866 | } |
||
867 | else |
||
868 | { |
||
869 | cd->from.mode = frommode; |
||
870 | return (size_t)(-1); |
||
871 | } |
||
872 | } |
||
873 | |||
874 | if (wsize == 0) |
||
875 | { |
||
876 | *inbuf += insize; |
||
877 | *inbytesleft -= insize; |
||
878 | continue; |
||
879 | } |
||
880 | |||
881 | if (cd->from.compat != NULL) |
||
882 | { |
||
883 | wc = utf16_to_ucs4(wbuf); |
||
884 | cp = cd->from.compat; |
||
885 | for (i = 0; cp[i].in != 0; ++i) |
||
886 | { |
||
887 | if ((cp[i].flag & COMPAT_IN) && cp[i].out == wc) |
||
888 | { |
||
889 | ucs4_to_utf16(cp[i].in, wbuf, &wsize); |
||
890 | break; |
||
891 | } |
||
892 | } |
||
893 | } |
||
894 | |||
895 | if (cd->to.compat != NULL) |
||
896 | { |
||
897 | wc = utf16_to_ucs4(wbuf); |
||
898 | cp = cd->to.compat; |
||
899 | for (i = 0; cp[i].in != 0; ++i) |
||
900 | { |
||
901 | if ((cp[i].flag & COMPAT_OUT) && cp[i].in == wc) |
||
902 | { |
||
903 | ucs4_to_utf16(cp[i].out, wbuf, &wsize); |
||
904 | break; |
||
905 | } |
||
906 | } |
||
907 | } |
||
908 | |||
909 | outsize = cd->to.wctomb(&cd->to, wbuf, wsize, (uchar *)*outbuf, *outbytesleft); |
||
910 | if (outsize == -1) |
||
911 | { |
||
912 | if ((cd->to.flags & FLAG_IGNORE) && errno != E2BIG) |
||
913 | { |
||
914 | cd->to.mode = tomode; |
||
915 | outsize = 0; |
||
916 | } |
||
917 | else |
||
918 | { |
||
919 | cd->from.mode = frommode; |
||
920 | cd->to.mode = tomode; |
||
921 | return (size_t)(-1); |
||
922 | } |
||
923 | } |
||
924 | |||
925 | *inbuf += insize; |
||
926 | *outbuf += outsize; |
||
927 | *inbytesleft -= insize; |
||
928 | *outbytesleft -= outsize; |
||
929 | } |
||
930 | |||
931 | return 0; |
||
932 | } |
||
933 | |||
934 | static int |
||
935 | make_csconv(const char *_name, csconv_t *cv) |
||
936 | { |
||
937 | CPINFO cpinfo; |
||
938 | int use_compat = TRUE; |
||
939 | int flag = 0; |
||
940 | char *name; |
||
941 | char *p; |
||
942 | |||
943 | name = xstrndup(_name, strlen(_name)); |
||
944 | if (name == NULL) |
||
945 | return FALSE; |
||
946 | |||
947 | /* check for option "enc_name//opt1//opt2" */ |
||
948 | while ((p = strrstr(name, "//")) != NULL) |
||
949 | { |
||
950 | if (_stricmp(p + 2, "nocompat") == 0) |
||
951 | use_compat = FALSE; |
||
952 | else if (_stricmp(p + 2, "translit") == 0) |
||
953 | flag |= FLAG_TRANSLIT; |
||
954 | else if (_stricmp(p + 2, "ignore") == 0) |
||
955 | flag |= FLAG_IGNORE; |
||
956 | *p = 0; |
||
957 | } |
||
958 | |||
959 | cv->mode = 0; |
||
960 | cv->flags = flag; |
||
961 | cv->mblen = NULL; |
||
962 | cv->flush = NULL; |
||
963 | cv->compat = NULL; |
||
964 | cv->codepage = name_to_codepage(name); |
||
965 | if (cv->codepage == 1200 || cv->codepage == 1201) |
||
966 | { |
||
967 | cv->mbtowc = utf16_mbtowc; |
||
968 | cv->wctomb = utf16_wctomb; |
||
969 | if (_stricmp(name, "UTF-16") == 0 || _stricmp(name, "UTF16") == 0 || |
||
970 | _stricmp(name, "UCS-2") == 0 || _stricmp(name, "UCS2") == 0) |
||
971 | cv->flags |= FLAG_USE_BOM; |
||
972 | } |
||
973 | else if (cv->codepage == 12000 || cv->codepage == 12001) |
||
974 | { |
||
975 | cv->mbtowc = utf32_mbtowc; |
||
976 | cv->wctomb = utf32_wctomb; |
||
977 | if (_stricmp(name, "UTF-32") == 0 || _stricmp(name, "UTF32") == 0 || |
||
978 | _stricmp(name, "UCS-4") == 0 || _stricmp(name, "UCS4") == 0) |
||
979 | cv->flags |= FLAG_USE_BOM; |
||
980 | } |
||
981 | else if (cv->codepage == 65001) |
||
982 | { |
||
983 | cv->mbtowc = kernel_mbtowc; |
||
984 | cv->wctomb = kernel_wctomb; |
||
985 | cv->mblen = utf8_mblen; |
||
986 | } |
||
987 | else if ((cv->codepage == 50220 || cv->codepage == 50221 || cv->codepage == 50222) && load_mlang()) |
||
988 | { |
||
989 | cv->mbtowc = iso2022jp_mbtowc; |
||
990 | cv->wctomb = iso2022jp_wctomb; |
||
991 | cv->flush = iso2022jp_flush; |
||
992 | } |
||
993 | else if (cv->codepage == 51932 && load_mlang()) |
||
994 | { |
||
995 | cv->mbtowc = mlang_mbtowc; |
||
996 | cv->wctomb = mlang_wctomb; |
||
997 | cv->mblen = eucjp_mblen; |
||
998 | } |
||
999 | else if (IsValidCodePage(cv->codepage) |
||
1000 | && GetCPInfo(cv->codepage, &cpinfo) != 0) |
||
1001 | { |
||
1002 | cv->mbtowc = kernel_mbtowc; |
||
1003 | cv->wctomb = kernel_wctomb; |
||
1004 | if (cpinfo.MaxCharSize == 1) |
||
1005 | cv->mblen = sbcs_mblen; |
||
1006 | else if (cpinfo.MaxCharSize == 2) |
||
1007 | cv->mblen = dbcs_mblen; |
||
1008 | else |
||
1009 | cv->mblen = mbcs_mblen; |
||
1010 | } |
||
1011 | else |
||
1012 | { |
||
1013 | /* not supported */ |
||
1014 | free(name); |
||
1015 | errno = EINVAL; |
||
1016 | return FALSE; |
||
1017 | } |
||
1018 | |||
1019 | if (use_compat) |
||
1020 | { |
||
1021 | switch (cv->codepage) |
||
1022 | { |
||
1023 | case 932: cv->compat = cp932_compat; break; |
||
1024 | case 20932: cv->compat = cp20932_compat; break; |
||
1025 | case 51932: cv->compat = cp51932_compat; break; |
||
1026 | case 50220: case 50221: case 50222: cv->compat = cp5022x_compat; break; |
||
1027 | } |
||
1028 | } |
||
1029 | |||
1030 | free(name); |
||
1031 | |||
1032 | return TRUE; |
||
1033 | } |
||
1034 | |||
1035 | static int |
||
1036 | name_to_codepage(const char *name) |
||
1037 | { |
||
1038 | int i; |
||
1039 | |||
1040 | if (*name == '\0' || |
||
1041 | strcmp(name, "char") == 0) |
||
1042 | return GetACP(); |
||
1043 | else if (strcmp(name, "wchar_t") == 0) |
||
1044 | return 1200; |
||
1045 | else if (_strnicmp(name, "cp", 2) == 0) |
||
1046 | return atoi(name + 2); /* CP123 */ |
||
1047 | else if ('0' <= name[0] && name[0] <= '9') |
||
1048 | return atoi(name); /* 123 */ |
||
1049 | else if (_strnicmp(name, "xx", 2) == 0) |
||
1050 | return atoi(name + 2); /* XX123 for debug */ |
||
1051 | |||
1052 | for (i = 0; codepage_alias[i].name != NULL; ++i) |
||
1053 | if (_stricmp(name, codepage_alias[i].name) == 0) |
||
1054 | return codepage_alias[i].codepage; |
||
1055 | return -1; |
||
1056 | } |
||
1057 | |||
1058 | /* |
||
1059 | * http://www.faqs.org/rfcs/rfc2781.html |
||
1060 | */ |
||
1061 | static uint |
||
1062 | utf16_to_ucs4(const ushort *wbuf) |
||
1063 | { |
||
1064 | uint wc = wbuf[0]; |
||
1065 | if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF) |
||
1066 | wc = ((wbuf[0] & 0x3FF) << 10) + (wbuf[1] & 0x3FF) + 0x10000; |
||
1067 | return wc; |
||
1068 | } |
||
1069 | |||
1070 | static void |
||
1071 | ucs4_to_utf16(uint wc, ushort *wbuf, int *wbufsize) |
||
1072 | { |
||
1073 | if (wc < 0x10000) |
||
1074 | { |
||
1075 | wbuf[0] = wc; |
||
1076 | *wbufsize = 1; |
||
1077 | } |
||
1078 | else |
||
1079 | { |
||
1080 | wc -= 0x10000; |
||
1081 | wbuf[0] = 0xD800 | ((wc >> 10) & 0x3FF); |
||
1082 | wbuf[1] = 0xDC00 | (wc & 0x3FF); |
||
1083 | *wbufsize = 2; |
||
1084 | } |
||
1085 | } |
||
1086 | |||
1087 | /* |
||
1088 | * Check if codepage is one of those for which the dwFlags parameter |
||
1089 | * to MultiByteToWideChar() must be zero. Return zero or |
||
1090 | * MB_ERR_INVALID_CHARS. The docs in Platform SDK for for Windows |
||
1091 | * Server 2003 R2 claims that also codepage 65001 is one of these, but |
||
1092 | * that doesn't seem to be the case. The MSDN docs for MSVS2008 leave |
||
1093 | * out 65001 (UTF-8), and that indeed seems to be the case on XP, it |
||
1094 | * works fine to pass MB_ERR_INVALID_CHARS in dwFlags when converting |
||
1095 | * from UTF-8. |
||
1096 | */ |
||
1097 | static int |
||
1098 | mbtowc_flags(int codepage) |
||
1099 | { |
||
1100 | return (codepage == 50220 || codepage == 50221 || |
||
1101 | codepage == 50222 || codepage == 50225 || |
||
1102 | codepage == 50227 || codepage == 50229 || |
||
1103 | codepage == 52936 || codepage == 54936 || |
||
1104 | (codepage >= 57002 && codepage <= 57011) || |
||
1105 | codepage == 65000 || codepage == 42) ? 0 : MB_ERR_INVALID_CHARS; |
||
1106 | } |
||
1107 | |||
1108 | /* |
||
1109 | * Check if codepage is one those for which the lpUsedDefaultChar |
||
1110 | * parameter to WideCharToMultiByte() must be NULL. The docs in |
||
1111 | * Platform SDK for Windows Server 2003 R2 claims that this is the |
||
1112 | * list below, while the MSDN docs for MSVS2008 claim that it is only |
||
1113 | * for 65000 (UTF-7) and 65001 (UTF-8). This time the earlier Platform |
||
1114 | * SDK seems to be correct, at least for XP. |
||
1115 | */ |
||
1116 | static int |
||
1117 | must_use_null_useddefaultchar(int codepage) |
||
1118 | { |
||
1119 | return (codepage == 65000 || codepage == 65001 || |
||
1120 | codepage == 50220 || codepage == 50221 || |
||
1121 | codepage == 50222 || codepage == 50225 || |
||
1122 | codepage == 50227 || codepage == 50229 || |
||
1123 | codepage == 52936 || codepage == 54936 || |
||
1124 | (codepage >= 57002 && codepage <= 57011) || |
||
1125 | codepage == 42); |
||
1126 | } |
||
1127 | |||
1128 | static char * |
||
1129 | strrstr(const char *str, const char *token) |
||
1130 | { |
||
1131 | int len = strlen(token); |
||
1132 | const char *p = str + strlen(str); |
||
1133 | |||
1134 | while (str <= --p) |
||
1135 | if (p[0] == token[0] && strncmp(p, token, len) == 0) |
||
1136 | return (char *)p; |
||
1137 | return NULL; |
||
1138 | } |
||
1139 | |||
1140 | static char * |
||
1141 | xstrndup(const char *s, size_t n) |
||
1142 | { |
||
1143 | char *p; |
||
1144 | |||
1145 | p = (char *)malloc(n + 1); |
||
1146 | if (p == NULL) |
||
1147 | return NULL; |
||
1148 | memcpy(p, s, n); |
||
1149 | p[n] = '\0'; |
||
1150 | return p; |
||
1151 | } |
||
1152 | |||
1153 | static int |
||
1154 | seterror(int err) |
||
1155 | { |
||
1156 | errno = err; |
||
1157 | return -1; |
||
1158 | } |
||
1159 | |||
1160 | #if defined(USE_LIBICONV_DLL) |
||
1161 | static int |
||
1162 | libiconv_iconv_open(rec_iconv_t *cd, const char *tocode, const char *fromcode) |
||
1163 | { |
||
1164 | HMODULE hlibiconv = NULL; |
||
1165 | char *dllname; |
||
1166 | const char *p; |
||
1167 | const char *e; |
||
1168 | f_iconv_open _iconv_open; |
||
1169 | |||
1170 | /* |
||
1171 | * always try to load dll, so that we can switch dll in runtime. |
||
1172 | */ |
||
1173 | |||
1174 | /* XXX: getenv() can't get variable set by SetEnvironmentVariable() */ |
||
1175 | p = getenv("WINICONV_LIBICONV_DLL"); |
||
1176 | if (p == NULL) |
||
1177 | p = DEFAULT_LIBICONV_DLL; |
||
1178 | /* parse comma separated value */ |
||
1179 | for ( ; *p != 0; p = (*e == ',') ? e + 1 : e) |
||
1180 | { |
||
1181 | e = strchr(p, ','); |
||
1182 | if (p == e) |
||
1183 | continue; |
||
1184 | else if (e == NULL) |
||
1185 | e = p + strlen(p); |
||
1186 | dllname = xstrndup(p, e - p); |
||
1187 | if (dllname == NULL) |
||
1188 | return FALSE; |
||
1189 | hlibiconv = LoadLibraryA(dllname); |
||
1190 | free(dllname); |
||
1191 | if (hlibiconv != NULL) |
||
1192 | { |
||
1193 | if (hlibiconv == hwiniconv) |
||
1194 | { |
||
1195 | FreeLibrary(hlibiconv); |
||
1196 | hlibiconv = NULL; |
||
1197 | continue; |
||
1198 | } |
||
1199 | break; |
||
1200 | } |
||
1201 | } |
||
1202 | |||
1203 | if (hlibiconv == NULL) |
||
1204 | goto failed; |
||
1205 | |||
1206 | _iconv_open = (f_iconv_open)GetProcAddressA(hlibiconv, "libiconv_open"); |
||
1207 | if (_iconv_open == NULL) |
||
1208 | _iconv_open = (f_iconv_open)GetProcAddressA(hlibiconv, "iconv_open"); |
||
1209 | cd->iconv_close = (f_iconv_close)GetProcAddressA(hlibiconv, "libiconv_close"); |
||
1210 | if (cd->iconv_close == NULL) |
||
1211 | cd->iconv_close = (f_iconv_close)GetProcAddressA(hlibiconv, "iconv_close"); |
||
1212 | cd->iconv = (f_iconv)GetProcAddressA(hlibiconv, "libiconv"); |
||
1213 | if (cd->iconv == NULL) |
||
1214 | cd->iconv = (f_iconv)GetProcAddressA(hlibiconv, "iconv"); |
||
1215 | cd->_errno = (f_errno)find_imported_function(hlibiconv, "_errno"); |
||
1216 | if (_iconv_open == NULL || cd->iconv_close == NULL |
||
1217 | || cd->iconv == NULL || cd->_errno == NULL) |
||
1218 | goto failed; |
||
1219 | |||
1220 | cd->cd = _iconv_open(tocode, fromcode); |
||
1221 | if (cd->cd == (iconv_t)(-1)) |
||
1222 | goto failed; |
||
1223 | |||
1224 | cd->hlibiconv = hlibiconv; |
||
1225 | return TRUE; |
||
1226 | |||
1227 | failed: |
||
1228 | if (hlibiconv != NULL) |
||
1229 | FreeLibrary(hlibiconv); |
||
1230 | return FALSE; |
||
1231 | } |
||
1232 | |||
1233 | /* |
||
1234 | * Reference: |
||
1235 | * http://forums.belution.com/ja/vc/000/234/78s.shtml |
||
1236 | * http://nienie.com/~masapico/api_ImageDirectoryEntryToData.html |
||
1237 | * |
||
1238 | * The formal way is |
||
1239 | * imagehlp.h or dbghelp.h |
||
1240 | * imagehlp.lib or dbghelp.lib |
||
1241 | * ImageDirectoryEntryToData() |
||
1242 | */ |
||
1243 | #define TO_DOS_HEADER(base) ((PIMAGE_DOS_HEADER)(base)) |
||
1244 | #define TO_NT_HEADERS(base) ((PIMAGE_NT_HEADERS)((LPBYTE)(base) + TO_DOS_HEADER(base)->e_lfanew)) |
||
1245 | static PVOID |
||
1246 | MyImageDirectoryEntryToData(LPVOID Base, BOOLEAN MappedAsImage, USHORT DirectoryEntry, PULONG Size) |
||
1247 | { |
||
1248 | /* TODO: MappedAsImage? */ |
||
1249 | PIMAGE_DATA_DIRECTORY p; |
||
1250 | p = TO_NT_HEADERS(Base)->OptionalHeader.DataDirectory + DirectoryEntry; |
||
1251 | if (p->VirtualAddress == 0) { |
||
1252 | *Size = 0; |
||
1253 | return NULL; |
||
1254 | } |
||
1255 | *Size = p->Size; |
||
1256 | return (PVOID)((LPBYTE)Base + p->VirtualAddress); |
||
1257 | } |
||
1258 | |||
1259 | static FARPROC |
||
1260 | find_imported_function(HMODULE hModule, const char *funcname) |
||
1261 | { |
||
1262 | DWORD_PTR Base; |
||
1263 | ULONG Size; |
||
1264 | PIMAGE_IMPORT_DESCRIPTOR Imp; |
||
1265 | PIMAGE_THUNK_DATA Address; /* Import Address Table */ |
||
1266 | PIMAGE_THUNK_DATA Name; /* Import Name Table */ |
||
1267 | PIMAGE_IMPORT_BY_NAME ImpName; |
||
1268 | |||
1269 | Base = (DWORD_PTR)hModule; |
||
1270 | Imp = (PIMAGE_IMPORT_DESCRIPTOR)MyImageDirectoryEntryToData( |
||
1271 | (LPVOID)Base, |
||
1272 | TRUE, |
||
1273 | IMAGE_DIRECTORY_ENTRY_IMPORT, |
||
1274 | &Size); |
||
1275 | if (Imp == NULL) |
||
1276 | return NULL; |
||
1277 | for ( ; Imp->OriginalFirstThunk != 0; ++Imp) |
||
1278 | { |
||
1279 | Address = (PIMAGE_THUNK_DATA)(Base + Imp->FirstThunk); |
||
1280 | Name = (PIMAGE_THUNK_DATA)(Base + Imp->OriginalFirstThunk); |
||
1281 | for ( ; Name->u1.Ordinal != 0; ++Name, ++Address) |
||
1282 | { |
||
1283 | if (!IMAGE_SNAP_BY_ORDINAL(Name->u1.Ordinal)) |
||
1284 | { |
||
1285 | ImpName = (PIMAGE_IMPORT_BY_NAME) |
||
1286 | (Base + (DWORD_PTR)Name->u1.AddressOfData); |
||
1287 | if (strcmp((char *)ImpName->Name, funcname) == 0) |
||
1288 | return (FARPROC)Address->u1.Function; |
||
1289 | } |
||
1290 | } |
||
1291 | } |
||
1292 | return NULL; |
||
1293 | } |
||
1294 | #endif |
||
1295 | |||
1296 | static int |
||
1297 | sbcs_mblen(csconv_t *cv UNUSED, const uchar *buf UNUSED, int bufsize UNUSED) |
||
1298 | { |
||
1299 | return 1; |
||
1300 | } |
||
1301 | |||
1302 | static int |
||
1303 | dbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize) |
||
1304 | { |
||
1305 | int len = IsDBCSLeadByteEx(cv->codepage, buf[0]) ? 2 : 1; |
||
1306 | if (bufsize < len) |
||
1307 | return seterror(EINVAL); |
||
1308 | return len; |
||
1309 | } |
||
1310 | |||
1311 | static int |
||
1312 | mbcs_mblen(csconv_t *cv, const uchar *buf, int bufsize) |
||
1313 | { |
||
1314 | int len = 0; |
||
1315 | |||
1316 | if (cv->codepage == 54936) { |
||
1317 | if (buf[0] <= 0x7F) len = 1; |
||
1318 | else if (buf[0] >= 0x81 && buf[0] <= 0xFE && |
||
1319 | bufsize >= 2 && |
||
1320 | ((buf[1] >= 0x40 && buf[1] <= 0x7E) || |
||
1321 | (buf[1] >= 0x80 && buf[1] <= 0xFE))) len = 2; |
||
1322 | else if (buf[0] >= 0x81 && buf[0] <= 0xFE && |
||
1323 | bufsize >= 4 && |
||
1324 | buf[1] >= 0x30 && buf[1] <= 0x39) len = 4; |
||
1325 | else |
||
1326 | return seterror(EINVAL); |
||
1327 | return len; |
||
1328 | } |
||
1329 | else |
||
1330 | return seterror(EINVAL); |
||
1331 | } |
||
1332 | |||
1333 | static int |
||
1334 | utf8_mblen(csconv_t *cv UNUSED, const uchar *buf, int bufsize) |
||
1335 | { |
||
1336 | int len = 0; |
||
1337 | |||
1338 | if (buf[0] < 0x80) len = 1; |
||
1339 | else if ((buf[0] & 0xE0) == 0xC0) len = 2; |
||
1340 | else if ((buf[0] & 0xF0) == 0xE0) len = 3; |
||
1341 | else if ((buf[0] & 0xF8) == 0xF0) len = 4; |
||
1342 | else if ((buf[0] & 0xFC) == 0xF8) len = 5; |
||
1343 | else if ((buf[0] & 0xFE) == 0xFC) len = 6; |
||
1344 | |||
1345 | if (len == 0) |
||
1346 | return seterror(EILSEQ); |
||
1347 | else if (bufsize < len) |
||
1348 | return seterror(EINVAL); |
||
1349 | return len; |
||
1350 | } |
||
1351 | |||
1352 | static int |
||
1353 | eucjp_mblen(csconv_t *cv UNUSED, const uchar *buf, int bufsize) |
||
1354 | { |
||
1355 | if (buf[0] < 0x80) /* ASCII */ |
||
1356 | return 1; |
||
1357 | else if (buf[0] == 0x8E) /* JIS X 0201 */ |
||
1358 | { |
||
1359 | if (bufsize < 2) |
||
1360 | return seterror(EINVAL); |
||
1361 | else if (!(0xA1 <= buf[1] && buf[1] <= 0xDF)) |
||
1362 | return seterror(EILSEQ); |
||
1363 | return 2; |
||
1364 | } |
||
1365 | else if (buf[0] == 0x8F) /* JIS X 0212 */ |
||
1366 | { |
||
1367 | if (bufsize < 3) |
||
1368 | return seterror(EINVAL); |
||
1369 | else if (!(0xA1 <= buf[1] && buf[1] <= 0xFE) |
||
1370 | || !(0xA1 <= buf[2] && buf[2] <= 0xFE)) |
||
1371 | return seterror(EILSEQ); |
||
1372 | return 3; |
||
1373 | } |
||
1374 | else /* JIS X 0208 */ |
||
1375 | { |
||
1376 | if (bufsize < 2) |
||
1377 | return seterror(EINVAL); |
||
1378 | else if (!(0xA1 <= buf[0] && buf[0] <= 0xFE) |
||
1379 | || !(0xA1 <= buf[1] && buf[1] <= 0xFE)) |
||
1380 | return seterror(EILSEQ); |
||
1381 | return 2; |
||
1382 | } |
||
1383 | } |
||
1384 | |||
1385 | static int |
||
1386 | kernel_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) |
||
1387 | { |
||
1388 | int len; |
||
1389 | |||
1390 | len = cv->mblen(cv, buf, bufsize); |
||
1391 | if (len == -1) |
||
1392 | return -1; |
||
1393 | /* If converting from ASCII, reject 8bit |
||
1394 | * chars. MultiByteToWideChar() doesn't. Note that for ASCII we |
||
1395 | * know that the mblen function is sbcs_mblen() so len is 1. |
||
1396 | */ |
||
1397 | if (cv->codepage == 20127 && buf[0] >= 0x80) |
||
1398 | return seterror(EILSEQ); |
||
1399 | *wbufsize = MultiByteToWideChar(cv->codepage, mbtowc_flags (cv->codepage), |
||
1400 | (const char *)buf, len, (wchar_t *)wbuf, *wbufsize); |
||
1401 | if (*wbufsize == 0) |
||
1402 | return seterror(EILSEQ); |
||
1403 | return len; |
||
1404 | } |
||
1405 | |||
1406 | static int |
||
1407 | kernel_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) |
||
1408 | { |
||
1409 | BOOL usedDefaultChar = 0; |
||
1410 | BOOL *p = NULL; |
||
1411 | int flags = 0; |
||
1412 | int len; |
||
1413 | |||
1414 | if (bufsize == 0) |
||
1415 | return seterror(E2BIG); |
||
1416 | if (!must_use_null_useddefaultchar(cv->codepage)) |
||
1417 | { |
||
1418 | p = &usedDefaultChar; |
||
1419 | #ifdef WC_NO_BEST_FIT_CHARS |
||
1420 | if (!(cv->flags & FLAG_TRANSLIT)) |
||
1421 | flags |= WC_NO_BEST_FIT_CHARS; |
||
1422 | #endif |
||
1423 | } |
||
1424 | len = WideCharToMultiByte(cv->codepage, flags, |
||
1425 | (const wchar_t *)wbuf, wbufsize, (char *)buf, bufsize, NULL, p); |
||
1426 | if (len == 0) |
||
1427 | { |
||
1428 | if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) |
||
1429 | return seterror(E2BIG); |
||
1430 | return seterror(EILSEQ); |
||
1431 | } |
||
1432 | else if (usedDefaultChar && !(cv->flags & FLAG_TRANSLIT)) |
||
1433 | return seterror(EILSEQ); |
||
1434 | else if (cv->mblen(cv, buf, len) != len) /* validate result */ |
||
1435 | return seterror(EILSEQ); |
||
1436 | return len; |
||
1437 | } |
||
1438 | |||
1439 | /* |
||
1440 | * It seems that the mode (cv->mode) is fixnum. |
||
1441 | * For example, when converting iso-2022-jp(cp50221) to unicode: |
||
1442 | * in ascii sequence: mode=0xC42C0000 |
||
1443 | * in jisx0208 sequence: mode=0xC42C0001 |
||
1444 | * "C42C" is same for each convert session. |
||
1445 | * It should be: ((codepage-1)<<16)|state |
||
1446 | */ |
||
1447 | static int |
||
1448 | mlang_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) |
||
1449 | { |
||
1450 | int len; |
||
1451 | int insize; |
||
1452 | HRESULT hr; |
||
1453 | |||
1454 | len = cv->mblen(cv, buf, bufsize); |
||
1455 | if (len == -1) |
||
1456 | return -1; |
||
1457 | insize = len; |
||
1458 | hr = ConvertINetMultiByteToUnicode(&cv->mode, cv->codepage, |
||
1459 | (const char *)buf, &insize, (wchar_t *)wbuf, wbufsize); |
||
1460 | if (hr != S_OK || insize != len) |
||
1461 | return seterror(EILSEQ); |
||
1462 | return len; |
||
1463 | } |
||
1464 | |||
1465 | static int |
||
1466 | mlang_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) |
||
1467 | { |
||
1468 | char tmpbuf[MB_CHAR_MAX]; /* enough room for one character */ |
||
1469 | int tmpsize = MB_CHAR_MAX; |
||
1470 | int insize = wbufsize; |
||
1471 | HRESULT hr; |
||
1472 | |||
1473 | hr = ConvertINetUnicodeToMultiByte(&cv->mode, cv->codepage, |
||
1474 | (const wchar_t *)wbuf, &wbufsize, tmpbuf, &tmpsize); |
||
1475 | if (hr != S_OK || insize != wbufsize) |
||
1476 | return seterror(EILSEQ); |
||
1477 | else if (bufsize < tmpsize) |
||
1478 | return seterror(E2BIG); |
||
1479 | else if (cv->mblen(cv, (uchar *)tmpbuf, tmpsize) != tmpsize) |
||
1480 | return seterror(EILSEQ); |
||
1481 | memcpy(buf, tmpbuf, tmpsize); |
||
1482 | return tmpsize; |
||
1483 | } |
||
1484 | |||
1485 | static int |
||
1486 | utf16_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) |
||
1487 | { |
||
1488 | int codepage = cv->codepage; |
||
1489 | |||
1490 | /* swap endian: 1200 <-> 1201 */ |
||
1491 | if (cv->mode & UNICODE_MODE_SWAPPED) |
||
1492 | codepage ^= 1; |
||
1493 | |||
1494 | if (bufsize < 2) |
||
1495 | return seterror(EINVAL); |
||
1496 | if (codepage == 1200) /* little endian */ |
||
1497 | wbuf[0] = (buf[1] << 8) | buf[0]; |
||
1498 | else if (codepage == 1201) /* big endian */ |
||
1499 | wbuf[0] = (buf[0] << 8) | buf[1]; |
||
1500 | |||
1501 | if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) |
||
1502 | { |
||
1503 | cv->mode |= UNICODE_MODE_BOM_DONE; |
||
1504 | if (wbuf[0] == 0xFFFE) |
||
1505 | { |
||
1506 | cv->mode |= UNICODE_MODE_SWAPPED; |
||
1507 | *wbufsize = 0; |
||
1508 | return 2; |
||
1509 | } |
||
1510 | else if (wbuf[0] == 0xFEFF) |
||
1511 | { |
||
1512 | *wbufsize = 0; |
||
1513 | return 2; |
||
1514 | } |
||
1515 | } |
||
1516 | |||
1517 | if (0xDC00 <= wbuf[0] && wbuf[0] <= 0xDFFF) |
||
1518 | return seterror(EILSEQ); |
||
1519 | if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF) |
||
1520 | { |
||
1521 | if (bufsize < 4) |
||
1522 | return seterror(EINVAL); |
||
1523 | if (codepage == 1200) /* little endian */ |
||
1524 | wbuf[1] = (buf[3] << 8) | buf[2]; |
||
1525 | else if (codepage == 1201) /* big endian */ |
||
1526 | wbuf[1] = (buf[2] << 8) | buf[3]; |
||
1527 | if (!(0xDC00 <= wbuf[1] && wbuf[1] <= 0xDFFF)) |
||
1528 | return seterror(EILSEQ); |
||
1529 | *wbufsize = 2; |
||
1530 | return 4; |
||
1531 | } |
||
1532 | *wbufsize = 1; |
||
1533 | return 2; |
||
1534 | } |
||
1535 | |||
1536 | static int |
||
1537 | utf16_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) |
||
1538 | { |
||
1539 | if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) |
||
1540 | { |
||
1541 | int r; |
||
1542 | |||
1543 | cv->mode |= UNICODE_MODE_BOM_DONE; |
||
1544 | if (bufsize < 2) |
||
1545 | return seterror(E2BIG); |
||
1546 | if (cv->codepage == 1200) /* little endian */ |
||
1547 | memcpy(buf, "\xFF\xFE", 2); |
||
1548 | else if (cv->codepage == 1201) /* big endian */ |
||
1549 | memcpy(buf, "\xFE\xFF", 2); |
||
1550 | |||
1551 | r = utf16_wctomb(cv, wbuf, wbufsize, buf + 2, bufsize - 2); |
||
1552 | if (r == -1) |
||
1553 | return -1; |
||
1554 | return r + 2; |
||
1555 | } |
||
1556 | |||
1557 | if (bufsize < 2) |
||
1558 | return seterror(E2BIG); |
||
1559 | if (cv->codepage == 1200) /* little endian */ |
||
1560 | { |
||
1561 | buf[0] = (wbuf[0] & 0x00FF); |
||
1562 | buf[1] = (wbuf[0] & 0xFF00) >> 8; |
||
1563 | } |
||
1564 | else if (cv->codepage == 1201) /* big endian */ |
||
1565 | { |
||
1566 | buf[0] = (wbuf[0] & 0xFF00) >> 8; |
||
1567 | buf[1] = (wbuf[0] & 0x00FF); |
||
1568 | } |
||
1569 | if (0xD800 <= wbuf[0] && wbuf[0] <= 0xDBFF) |
||
1570 | { |
||
1571 | if (bufsize < 4) |
||
1572 | return seterror(E2BIG); |
||
1573 | if (cv->codepage == 1200) /* little endian */ |
||
1574 | { |
||
1575 | buf[2] = (wbuf[1] & 0x00FF); |
||
1576 | buf[3] = (wbuf[1] & 0xFF00) >> 8; |
||
1577 | } |
||
1578 | else if (cv->codepage == 1201) /* big endian */ |
||
1579 | { |
||
1580 | buf[2] = (wbuf[1] & 0xFF00) >> 8; |
||
1581 | buf[3] = (wbuf[1] & 0x00FF); |
||
1582 | } |
||
1583 | return 4; |
||
1584 | } |
||
1585 | return 2; |
||
1586 | } |
||
1587 | |||
1588 | static int |
||
1589 | utf32_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) |
||
1590 | { |
||
1591 | int codepage = cv->codepage; |
||
1592 | uint wc = 0xD800; |
||
1593 | |||
1594 | /* swap endian: 12000 <-> 12001 */ |
||
1595 | if (cv->mode & UNICODE_MODE_SWAPPED) |
||
1596 | codepage ^= 1; |
||
1597 | |||
1598 | if (bufsize < 4) |
||
1599 | return seterror(EINVAL); |
||
1600 | if (codepage == 12000) /* little endian */ |
||
1601 | wc = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; |
||
1602 | else if (codepage == 12001) /* big endian */ |
||
1603 | wc = (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3]; |
||
1604 | |||
1605 | if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) |
||
1606 | { |
||
1607 | cv->mode |= UNICODE_MODE_BOM_DONE; |
||
1608 | if (wc == 0xFFFE0000) |
||
1609 | { |
||
1610 | cv->mode |= UNICODE_MODE_SWAPPED; |
||
1611 | *wbufsize = 0; |
||
1612 | return 4; |
||
1613 | } |
||
1614 | else if (wc == 0x0000FEFF) |
||
1615 | { |
||
1616 | *wbufsize = 0; |
||
1617 | return 4; |
||
1618 | } |
||
1619 | } |
||
1620 | |||
1621 | if ((0xD800 <= wc && wc <= 0xDFFF) || 0x10FFFF < wc) |
||
1622 | return seterror(EILSEQ); |
||
1623 | ucs4_to_utf16(wc, wbuf, wbufsize); |
||
1624 | return 4; |
||
1625 | } |
||
1626 | |||
1627 | static int |
||
1628 | utf32_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) |
||
1629 | { |
||
1630 | uint wc; |
||
1631 | |||
1632 | if ((cv->flags & FLAG_USE_BOM) && !(cv->mode & UNICODE_MODE_BOM_DONE)) |
||
1633 | { |
||
1634 | int r; |
||
1635 | |||
1636 | cv->mode |= UNICODE_MODE_BOM_DONE; |
||
1637 | if (bufsize < 4) |
||
1638 | return seterror(E2BIG); |
||
1639 | if (cv->codepage == 12000) /* little endian */ |
||
1640 | memcpy(buf, "\xFF\xFE\x00\x00", 4); |
||
1641 | else if (cv->codepage == 12001) /* big endian */ |
||
1642 | memcpy(buf, "\x00\x00\xFE\xFF", 4); |
||
1643 | |||
1644 | r = utf32_wctomb(cv, wbuf, wbufsize, buf + 4, bufsize - 4); |
||
1645 | if (r == -1) |
||
1646 | return -1; |
||
1647 | return r + 4; |
||
1648 | } |
||
1649 | |||
1650 | if (bufsize < 4) |
||
1651 | return seterror(E2BIG); |
||
1652 | wc = utf16_to_ucs4(wbuf); |
||
1653 | if (cv->codepage == 12000) /* little endian */ |
||
1654 | { |
||
1655 | buf[0] = wc & 0x000000FF; |
||
1656 | buf[1] = (wc & 0x0000FF00) >> 8; |
||
1657 | buf[2] = (wc & 0x00FF0000) >> 16; |
||
1658 | buf[3] = (wc & 0xFF000000) >> 24; |
||
1659 | } |
||
1660 | else if (cv->codepage == 12001) /* big endian */ |
||
1661 | { |
||
1662 | buf[0] = (wc & 0xFF000000) >> 24; |
||
1663 | buf[1] = (wc & 0x00FF0000) >> 16; |
||
1664 | buf[2] = (wc & 0x0000FF00) >> 8; |
||
1665 | buf[3] = wc & 0x000000FF; |
||
1666 | } |
||
1667 | return 4; |
||
1668 | } |
||
1669 | |||
1670 | /* |
||
1671 | * 50220: ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS) |
||
1672 | * 50221: ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow |
||
1673 | * 1 byte Kana) |
||
1674 | * 50222: ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte |
||
1675 | * Kana - SO/SI) |
||
1676 | * |
||
1677 | * MultiByteToWideChar() and WideCharToMultiByte() behave differently |
||
1678 | * depending on Windows version. On XP, WideCharToMultiByte() doesn't |
||
1679 | * terminate result sequence with ascii escape. But Vista does. |
||
1680 | * Use MLang instead. |
||
1681 | */ |
||
1682 | |||
1683 | #define ISO2022_MODE(cs, shift) (((cs) << 8) | (shift)) |
||
1684 | #define ISO2022_MODE_CS(mode) (((mode) >> 8) & 0xFF) |
||
1685 | #define ISO2022_MODE_SHIFT(mode) ((mode) & 0xFF) |
||
1686 | |||
1687 | #define ISO2022_SI 0 |
||
1688 | #define ISO2022_SO 1 |
||
1689 | |||
1690 | /* shift in */ |
||
1691 | static const char iso2022_SI_seq[] = "\x0F"; |
||
1692 | /* shift out */ |
||
1693 | static const char iso2022_SO_seq[] = "\x0E"; |
||
1694 | |||
1695 | typedef struct iso2022_esc_t iso2022_esc_t; |
||
1696 | struct iso2022_esc_t { |
||
1697 | const char *esc; |
||
1698 | int esc_len; |
||
1699 | int len; |
||
1700 | int cs; |
||
1701 | }; |
||
1702 | |||
1703 | #define ISO2022JP_CS_ASCII 0 |
||
1704 | #define ISO2022JP_CS_JISX0201_ROMAN 1 |
||
1705 | #define ISO2022JP_CS_JISX0201_KANA 2 |
||
1706 | #define ISO2022JP_CS_JISX0208_1978 3 |
||
1707 | #define ISO2022JP_CS_JISX0208_1983 4 |
||
1708 | #define ISO2022JP_CS_JISX0212 5 |
||
1709 | |||
1710 | static iso2022_esc_t iso2022jp_esc[] = { |
||
1711 | {"\x1B\x28\x42", 3, 1, ISO2022JP_CS_ASCII}, |
||
1712 | {"\x1B\x28\x4A", 3, 1, ISO2022JP_CS_JISX0201_ROMAN}, |
||
1713 | {"\x1B\x28\x49", 3, 1, ISO2022JP_CS_JISX0201_KANA}, |
||
1714 | {"\x1B\x24\x40", 3, 2, ISO2022JP_CS_JISX0208_1983}, /* unify 1978 with 1983 */ |
||
1715 | {"\x1B\x24\x42", 3, 2, ISO2022JP_CS_JISX0208_1983}, |
||
1716 | {"\x1B\x24\x28\x44", 4, 2, ISO2022JP_CS_JISX0212}, |
||
1717 | {NULL, 0, 0, 0} |
||
1718 | }; |
||
1719 | |||
1720 | static int |
||
1721 | iso2022jp_mbtowc(csconv_t *cv, const uchar *buf, int bufsize, ushort *wbuf, int *wbufsize) |
||
1722 | { |
||
1723 | iso2022_esc_t *iesc = iso2022jp_esc; |
||
1724 | char tmp[MB_CHAR_MAX]; |
||
1725 | int insize; |
||
1726 | HRESULT hr; |
||
1727 | DWORD dummy = 0; |
||
1728 | int len; |
||
1729 | int esc_len; |
||
1730 | int cs; |
||
1731 | int shift; |
||
1732 | int i; |
||
1733 | |||
1734 | if (buf[0] == 0x1B) |
||
1735 | { |
||
1736 | for (i = 0; iesc[i].esc != NULL; ++i) |
||
1737 | { |
||
1738 | esc_len = iesc[i].esc_len; |
||
1739 | if (bufsize < esc_len) |
||
1740 | { |
||
1741 | if (strncmp((char *)buf, iesc[i].esc, bufsize) == 0) |
||
1742 | return seterror(EINVAL); |
||
1743 | } |
||
1744 | else |
||
1745 | { |
||
1746 | if (strncmp((char *)buf, iesc[i].esc, esc_len) == 0) |
||
1747 | { |
||
1748 | cv->mode = ISO2022_MODE(iesc[i].cs, ISO2022_SI); |
||
1749 | *wbufsize = 0; |
||
1750 | return esc_len; |
||
1751 | } |
||
1752 | } |
||
1753 | } |
||
1754 | /* not supported escape sequence */ |
||
1755 | return seterror(EILSEQ); |
||
1756 | } |
||
1757 | else if (buf[0] == iso2022_SO_seq[0]) |
||
1758 | { |
||
1759 | cv->mode = ISO2022_MODE(ISO2022_MODE_CS(cv->mode), ISO2022_SO); |
||
1760 | *wbufsize = 0; |
||
1761 | return 1; |
||
1762 | } |
||
1763 | else if (buf[0] == iso2022_SI_seq[0]) |
||
1764 | { |
||
1765 | cv->mode = ISO2022_MODE(ISO2022_MODE_CS(cv->mode), ISO2022_SI); |
||
1766 | *wbufsize = 0; |
||
1767 | return 1; |
||
1768 | } |
||
1769 | |||
1770 | cs = ISO2022_MODE_CS(cv->mode); |
||
1771 | shift = ISO2022_MODE_SHIFT(cv->mode); |
||
1772 | |||
1773 | /* reset the mode for informal sequence */ |
||
1774 | if (buf[0] < 0x20) |
||
1775 | { |
||
1776 | cs = ISO2022JP_CS_ASCII; |
||
1777 | shift = ISO2022_SI; |
||
1778 | } |
||
1779 | |||
1780 | len = iesc[cs].len; |
||
1781 | if (bufsize < len) |
||
1782 | return seterror(EINVAL); |
||
1783 | for (i = 0; i < len; ++i) |
||
1784 | if (!(buf[i] < 0x80)) |
||
1785 | return seterror(EILSEQ); |
||
1786 | esc_len = iesc[cs].esc_len; |
||
1787 | memcpy(tmp, iesc[cs].esc, esc_len); |
||
1788 | if (shift == ISO2022_SO) |
||
1789 | { |
||
1790 | memcpy(tmp + esc_len, iso2022_SO_seq, 1); |
||
1791 | esc_len += 1; |
||
1792 | } |
||
1793 | memcpy(tmp + esc_len, buf, len); |
||
1794 | |||
1795 | if ((cv->codepage == 50220 || cv->codepage == 50221 |
||
1796 | || cv->codepage == 50222) && shift == ISO2022_SO) |
||
1797 | { |
||
1798 | /* XXX: shift-out cannot be used for mbtowc (both kernel and |
||
1799 | * mlang) */ |
||
1800 | esc_len = iesc[ISO2022JP_CS_JISX0201_KANA].esc_len; |
||
1801 | memcpy(tmp, iesc[ISO2022JP_CS_JISX0201_KANA].esc, esc_len); |
||
1802 | memcpy(tmp + esc_len, buf, len); |
||
1803 | } |
||
1804 | |||
1805 | insize = len + esc_len; |
||
1806 | hr = ConvertINetMultiByteToUnicode(&dummy, cv->codepage, |
||
1807 | (const char *)tmp, &insize, (wchar_t *)wbuf, wbufsize); |
||
1808 | if (hr != S_OK || insize != len + esc_len) |
||
1809 | return seterror(EILSEQ); |
||
1810 | |||
1811 | /* Check for conversion error. Assuming defaultChar is 0x3F. */ |
||
1812 | /* ascii should be converted from ascii */ |
||
1813 | if (wbuf[0] == buf[0] |
||
1814 | && cv->mode != ISO2022_MODE(ISO2022JP_CS_ASCII, ISO2022_SI)) |
||
1815 | return seterror(EILSEQ); |
||
1816 | |||
1817 | /* reset the mode for informal sequence */ |
||
1818 | if (cv->mode != ISO2022_MODE(cs, shift)) |
||
1819 | cv->mode = ISO2022_MODE(cs, shift); |
||
1820 | |||
1821 | return len; |
||
1822 | } |
||
1823 | |||
1824 | static int |
||
1825 | iso2022jp_wctomb(csconv_t *cv, ushort *wbuf, int wbufsize, uchar *buf, int bufsize) |
||
1826 | { |
||
1827 | iso2022_esc_t *iesc = iso2022jp_esc; |
||
1828 | char tmp[MB_CHAR_MAX]; |
||
1829 | int tmpsize = MB_CHAR_MAX; |
||
1830 | int insize = wbufsize; |
||
1831 | HRESULT hr; |
||
1832 | DWORD dummy = 0; |
||
1833 | int len; |
||
1834 | int esc_len; |
||
1835 | int cs; |
||
1836 | int shift; |
||
1837 | int i; |
||
1838 | |||
1839 | /* |
||
1840 | * MultiByte = [escape sequence] + character + [escape sequence] |
||
1841 | * |
||
1842 | * Whether trailing escape sequence is added depends on which API is |
||
1843 | * used (kernel or MLang, and its version). |
||
1844 | */ |
||
1845 | hr = ConvertINetUnicodeToMultiByte(&dummy, cv->codepage, |
||
1846 | (const wchar_t *)wbuf, &wbufsize, tmp, &tmpsize); |
||
1847 | if (hr != S_OK || insize != wbufsize) |
||
1848 | return seterror(EILSEQ); |
||
1849 | else if (bufsize < tmpsize) |
||
1850 | return seterror(E2BIG); |
||
1851 | |||
1852 | if (tmpsize == 1) |
||
1853 | { |
||
1854 | cs = ISO2022JP_CS_ASCII; |
||
1855 | esc_len = 0; |
||
1856 | } |
||
1857 | else |
||
1858 | { |
||
1859 | for (i = 1; iesc[i].esc != NULL; ++i) |
||
1860 | { |
||
1861 | esc_len = iesc[i].esc_len; |
||
1862 | if (strncmp(tmp, iesc[i].esc, esc_len) == 0) |
||
1863 | { |
||
1864 | cs = iesc[i].cs; |
||
1865 | break; |
||
1866 | } |
||
1867 | } |
||
1868 | if (iesc[i].esc == NULL) |
||
1869 | /* not supported escape sequence */ |
||
1870 | return seterror(EILSEQ); |
||
1871 | } |
||
1872 | |||
1873 | shift = ISO2022_SI; |
||
1874 | if (tmp[esc_len] == iso2022_SO_seq[0]) |
||
1875 | { |
||
1876 | shift = ISO2022_SO; |
||
1877 | esc_len += 1; |
||
1878 | } |
||
1879 | |||
1880 | len = iesc[cs].len; |
||
1881 | |||
1882 | /* Check for converting error. Assuming defaultChar is 0x3F. */ |
||
1883 | /* ascii should be converted from ascii */ |
||
1884 | if (cs == ISO2022JP_CS_ASCII && !(wbuf[0] < 0x80)) |
||
1885 | return seterror(EILSEQ); |
||
1886 | else if (tmpsize < esc_len + len) |
||
1887 | return seterror(EILSEQ); |
||
1888 | |||
1889 | if (cv->mode == ISO2022_MODE(cs, shift)) |
||
1890 | { |
||
1891 | /* remove escape sequence */ |
||
1892 | if (esc_len != 0) |
||
1893 | memmove(tmp, tmp + esc_len, len); |
||
1894 | esc_len = 0; |
||
1895 | } |
||
1896 | else |
||
1897 | { |
||
1898 | if (cs == ISO2022JP_CS_ASCII) |
||
1899 | { |
||
1900 | esc_len = iesc[ISO2022JP_CS_ASCII].esc_len; |
||
1901 | memmove(tmp + esc_len, tmp, len); |
||
1902 | memcpy(tmp, iesc[ISO2022JP_CS_ASCII].esc, esc_len); |
||
1903 | } |
||
1904 | if (ISO2022_MODE_SHIFT(cv->mode) == ISO2022_SO) |
||
1905 | { |
||
1906 | /* shift-in before changing to other mode */ |
||
1907 | memmove(tmp + 1, tmp, len + esc_len); |
||
1908 | memcpy(tmp, iso2022_SI_seq, 1); |
||
1909 | esc_len += 1; |
||
1910 | } |
||
1911 | } |
||
1912 | |||
1913 | if (bufsize < len + esc_len) |
||
1914 | return seterror(E2BIG); |
||
1915 | memcpy(buf, tmp, len + esc_len); |
||
1916 | cv->mode = ISO2022_MODE(cs, shift); |
||
1917 | return len + esc_len; |
||
1918 | } |
||
1919 | |||
1920 | static int |
||
1921 | iso2022jp_flush(csconv_t *cv, uchar *buf, int bufsize) |
||
1922 | { |
||
1923 | iso2022_esc_t *iesc = iso2022jp_esc; |
||
1924 | int esc_len; |
||
1925 | |||
1926 | if (cv->mode != ISO2022_MODE(ISO2022JP_CS_ASCII, ISO2022_SI)) |
||
1927 | { |
||
1928 | esc_len = 0; |
||
1929 | if (ISO2022_MODE_SHIFT(cv->mode) != ISO2022_SI) |
||
1930 | esc_len += 1; |
||
1931 | if (ISO2022_MODE_CS(cv->mode) != ISO2022JP_CS_ASCII) |
||
1932 | esc_len += iesc[ISO2022JP_CS_ASCII].esc_len; |
||
1933 | if (bufsize < esc_len) |
||
1934 | return seterror(E2BIG); |
||
1935 | |||
1936 | esc_len = 0; |
||
1937 | if (ISO2022_MODE_SHIFT(cv->mode) != ISO2022_SI) |
||
1938 | { |
||
1939 | memcpy(buf, iso2022_SI_seq, 1); |
||
1940 | esc_len += 1; |
||
1941 | } |
||
1942 | if (ISO2022_MODE_CS(cv->mode) != ISO2022JP_CS_ASCII) |
||
1943 | { |
||
1944 | memcpy(buf + esc_len, iesc[ISO2022JP_CS_ASCII].esc, |
||
1945 | iesc[ISO2022JP_CS_ASCII].esc_len); |
||
1946 | esc_len += iesc[ISO2022JP_CS_ASCII].esc_len; |
||
1947 | } |
||
1948 | return esc_len; |
||
1949 | } |
||
1950 | return 0; |
||
1951 | } |
||
1952 | |||
1953 | #if defined(MAKE_DLL) && defined(USE_LIBICONV_DLL) |
||
1954 | BOOL WINAPI |
||
1955 | DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) |
||
1956 | { |
||
1957 | switch( fdwReason ) |
||
1958 | { |
||
1959 | case DLL_PROCESS_ATTACH: |
||
1960 | hwiniconv = (HMODULE)hinstDLL; |
||
1961 | break; |
||
1962 | case DLL_THREAD_ATTACH: |
||
1963 | case DLL_THREAD_DETACH: |
||
1964 | case DLL_PROCESS_DETACH: |
||
1965 | break; |
||
1966 | } |
||
1967 | return TRUE; |
||
1968 | } |
||
1969 | #endif |
||
1970 | |||
1971 | #if defined(MAKE_EXE) |
||
1972 | #include <stdio.h> |
||
1973 | #include <fcntl.h> |
||
1974 | #include <io.h> |
||
1975 | int |
||
1976 | main(int argc, char **argv) |
||
1977 | { |
||
1978 | char *fromcode = NULL; |
||
1979 | char *tocode = NULL; |
||
1980 | int i; |
||
1981 | char inbuf[BUFSIZ]; |
||
1982 | char outbuf[BUFSIZ]; |
||
1983 | const char *pin; |
||
1984 | char *pout; |
||
1985 | size_t inbytesleft; |
||
1986 | size_t outbytesleft; |
||
1987 | size_t rest = 0; |
||
1988 | iconv_t cd; |
||
1989 | size_t r; |
||
1990 | FILE *in = stdin; |
||
1991 | FILE *out = stdout; |
||
1992 | int ignore = 0; |
||
1993 | char *p; |
||
1994 | |||
1995 | _setmode(_fileno(stdin), _O_BINARY); |
||
1996 | _setmode(_fileno(stdout), _O_BINARY); |
||
1997 | |||
1998 | for (i = 1; i < argc; ++i) |
||
1999 | { |
||
2000 | if (strcmp(argv[i], "-l") == 0) |
||
2001 | { |
||
2002 | for (i = 0; codepage_alias[i].name != NULL; ++i) |
||
2003 | printf("%s\n", codepage_alias[i].name); |
||
2004 | return 0; |
||
2005 | } |
||
2006 | |||
2007 | if (strcmp(argv[i], "-f") == 0) |
||
2008 | fromcode = argv[++i]; |
||
2009 | else if (strcmp(argv[i], "-t") == 0) |
||
2010 | tocode = argv[++i]; |
||
2011 | else if (strcmp(argv[i], "-c") == 0) |
||
2012 | ignore = 1; |
||
2013 | else if (strcmp(argv[i], "--output") == 0) |
||
2014 | { |
||
2015 | out = fopen(argv[++i], "wb"); |
||
2016 | if(out == NULL) |
||
2017 | { |
||
2018 | fprintf(stderr, "cannot open %s\n", argv[i]); |
||
2019 | return 1; |
||
2020 | } |
||
2021 | } |
||
2022 | else |
||
2023 | { |
||
2024 | in = fopen(argv[i], "rb"); |
||
2025 | if (in == NULL) |
||
2026 | { |
||
2027 | fprintf(stderr, "cannot open %s\n", argv[i]); |
||
2028 | return 1; |
||
2029 | } |
||
2030 | break; |
||
2031 | } |
||
2032 | } |
||
2033 | |||
2034 | if (fromcode == NULL || tocode == NULL) |
||
2035 | { |
||
2036 | printf("usage: %s [-c] -f from-enc -t to-enc [file]\n", argv[0]); |
||
2037 | return 0; |
||
2038 | } |
||
2039 | |||
2040 | if (ignore) |
||
2041 | { |
||
2042 | p = tocode; |
||
2043 | tocode = (char *)malloc(strlen(p) + strlen("//IGNORE") + 1); |
||
2044 | if (tocode == NULL) |
||
2045 | { |
||
2046 | perror("fatal error"); |
||
2047 | return 1; |
||
2048 | } |
||
2049 | strcpy(tocode, p); |
||
2050 | strcat(tocode, "//IGNORE"); |
||
2051 | } |
||
2052 | |||
2053 | cd = iconv_open(tocode, fromcode); |
||
2054 | if (cd == (iconv_t)(-1)) |
||
2055 | { |
||
2056 | perror("iconv_open error"); |
||
2057 | return 1; |
||
2058 | } |
||
2059 | |||
2060 | while ((inbytesleft = fread(inbuf + rest, 1, sizeof(inbuf) - rest, in)) != 0 |
||
2061 | || rest != 0) |
||
2062 | { |
||
2063 | inbytesleft += rest; |
||
2064 | pin = inbuf; |
||
2065 | pout = outbuf; |
||
2066 | outbytesleft = sizeof(outbuf); |
||
2067 | r = iconv(cd, &pin, &inbytesleft, &pout, &outbytesleft); |
||
2068 | fwrite(outbuf, 1, sizeof(outbuf) - outbytesleft, out); |
||
2069 | if (r == (size_t)(-1) && errno != E2BIG && (errno != EINVAL || feof(in))) |
||
2070 | { |
||
2071 | perror("conversion error"); |
||
2072 | return 1; |
||
2073 | } |
||
2074 | memmove(inbuf, pin, inbytesleft); |
||
2075 | rest = inbytesleft; |
||
2076 | } |
||
2077 | pout = outbuf; |
||
2078 | outbytesleft = sizeof(outbuf); |
||
2079 | r = iconv(cd, NULL, NULL, &pout, &outbytesleft); |
||
2080 | fwrite(outbuf, 1, sizeof(outbuf) - outbytesleft, out); |
||
2081 | if (r == (size_t)(-1)) |
||
2082 | { |
||
2083 | perror("conversion error"); |
||
2084 | return 1; |
||
2085 | } |
||
2086 | |||
2087 | iconv_close(cd); |
||
2088 | |||
2089 | return 0; |
||
2090 | } |
||
2091 | #endif |