nexmon – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | /* |
2 | * Copyright (C) 1999-2001, 2008 Free Software Foundation, Inc. |
||
3 | * This file is part of the GNU LIBICONV Library. |
||
4 | * |
||
5 | * The GNU LIBICONV Library is free software; you can redistribute it |
||
6 | * and/or modify it under the terms of the GNU Library General Public |
||
7 | * License as published by the Free Software Foundation; either version 2 |
||
8 | * of the License, or (at your option) any later version. |
||
9 | * |
||
10 | * The GNU LIBICONV Library is distributed in the hope that it will be |
||
11 | * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
13 | * Library General Public License for more details. |
||
14 | * |
||
15 | * You should have received a copy of the GNU Library General Public |
||
16 | * License along with the GNU LIBICONV Library; see the file COPYING.LIB. |
||
17 | * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, |
||
18 | * Fifth Floor, Boston, MA 02110-1301, USA. |
||
19 | */ |
||
20 | |||
21 | /* |
||
22 | * UTF-16 |
||
23 | */ |
||
24 | |||
25 | /* Specification: RFC 2781 */ |
||
26 | |||
27 | /* Here we accept FFFE/FEFF marks as endianness indicators everywhere |
||
28 | in the stream, not just at the beginning. (This is contrary to what |
||
29 | RFC 2781 section 3.2 specifies, but it allows concatenation of byte |
||
30 | sequences to work flawlessly, while disagreeing with the RFC behaviour |
||
31 | only for strings containing U+FEFF characters, which is quite rare.) |
||
32 | The default is big-endian. */ |
||
33 | /* The state is 0 if big-endian, 1 if little-endian. */ |
||
34 | static int |
||
35 | utf16_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) |
||
36 | { |
||
37 | state_t state = conv->istate; |
||
38 | int count = 0; |
||
39 | for (; n >= 2;) { |
||
40 | ucs4_t wc = (state ? s[0] + (s[1] << 8) : (s[0] << 8) + s[1]); |
||
41 | if (wc == 0xfeff) { |
||
42 | } else if (wc == 0xfffe) { |
||
43 | state ^= 1; |
||
44 | } else if (wc >= 0xd800 && wc < 0xdc00) { |
||
45 | if (n >= 4) { |
||
46 | ucs4_t wc2 = (state ? s[2] + (s[3] << 8) : (s[2] << 8) + s[3]); |
||
47 | if (!(wc2 >= 0xdc00 && wc2 < 0xe000)) |
||
48 | goto ilseq; |
||
49 | *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00); |
||
50 | conv->istate = state; |
||
51 | return count+4; |
||
52 | } else |
||
53 | break; |
||
54 | } else if (wc >= 0xdc00 && wc < 0xe000) { |
||
55 | goto ilseq; |
||
56 | } else { |
||
57 | *pwc = wc; |
||
58 | conv->istate = state; |
||
59 | return count+2; |
||
60 | } |
||
61 | s += 2; n -= 2; count += 2; |
||
62 | } |
||
63 | conv->istate = state; |
||
64 | return RET_TOOFEW(count); |
||
65 | |||
66 | ilseq: |
||
67 | conv->istate = state; |
||
68 | return RET_SHIFT_ILSEQ(count); |
||
69 | } |
||
70 | |||
71 | /* We output UTF-16 in big-endian order, with byte-order mark. |
||
72 | See RFC 2781 section 3.3 for a rationale: Some document formats |
||
73 | mandate a BOM; the file concatenation issue is not so severe as |
||
74 | long as the above utf16_mbtowc function is used. */ |
||
75 | /* The state is 0 at the beginning, 1 after the BOM has been written. */ |
||
76 | static int |
||
77 | utf16_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) |
||
78 | { |
||
79 | if (wc != 0xfffe && !(wc >= 0xd800 && wc < 0xe000)) { |
||
80 | int count = 0; |
||
81 | if (!conv->ostate) { |
||
82 | if (n >= 2) { |
||
83 | r[0] = 0xFE; |
||
84 | r[1] = 0xFF; |
||
85 | r += 2; n -= 2; count += 2; |
||
86 | } else |
||
87 | return RET_TOOSMALL; |
||
88 | } |
||
89 | if (wc < 0x10000) { |
||
90 | if (n >= 2) { |
||
91 | r[0] = (unsigned char) (wc >> 8); |
||
92 | r[1] = (unsigned char) wc; |
||
93 | conv->ostate = 1; |
||
94 | return count+2; |
||
95 | } else |
||
96 | return RET_TOOSMALL; |
||
97 | } |
||
98 | else if (wc < 0x110000) { |
||
99 | if (n >= 4) { |
||
100 | ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10); |
||
101 | ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff); |
||
102 | r[0] = (unsigned char) (wc1 >> 8); |
||
103 | r[1] = (unsigned char) wc1; |
||
104 | r[2] = (unsigned char) (wc2 >> 8); |
||
105 | r[3] = (unsigned char) wc2; |
||
106 | conv->ostate = 1; |
||
107 | return count+4; |
||
108 | } else |
||
109 | return RET_TOOSMALL; |
||
110 | } |
||
111 | } |
||
112 | return RET_ILUNI; |
||
113 | } |