BadVPN – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | /** |
2 | * @file Utf16Decoder.h |
||
3 | * @author Ambroz Bizjak <ambrop7@gmail.com> |
||
4 | * |
||
5 | * @section LICENSE |
||
6 | * |
||
7 | * Redistribution and use in source and binary forms, with or without |
||
8 | * modification, are permitted provided that the following conditions are met: |
||
9 | * 1. Redistributions of source code must retain the above copyright |
||
10 | * notice, this list of conditions and the following disclaimer. |
||
11 | * 2. Redistributions in binary form must reproduce the above copyright |
||
12 | * notice, this list of conditions and the following disclaimer in the |
||
13 | * documentation and/or other materials provided with the distribution. |
||
14 | * 3. Neither the name of the author nor the |
||
15 | * names of its contributors may be used to endorse or promote products |
||
16 | * derived from this software without specific prior written permission. |
||
17 | * |
||
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
||
19 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
||
20 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
||
21 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
||
22 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
||
23 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||
24 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
||
25 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||
26 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
||
27 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||
28 | */ |
||
29 | |||
30 | #ifndef BADVPN_UTF16DECODER_H |
||
31 | #define BADVPN_UTF16DECODER_H |
||
32 | |||
33 | #include <stdint.h> |
||
34 | |||
35 | #include <misc/debug.h> |
||
36 | |||
37 | /** |
||
38 | * Decodes UTF-16 data into Unicode characters. |
||
39 | */ |
||
40 | typedef struct { |
||
41 | int cont; |
||
42 | uint32_t ch; |
||
43 | } Utf16Decoder; |
||
44 | |||
45 | /** |
||
46 | * Initializes the UTF-16 decoder. |
||
47 | * |
||
48 | * @param o the object |
||
49 | */ |
||
50 | static void Utf16Decoder_Init (Utf16Decoder *o); |
||
51 | |||
52 | /** |
||
53 | * Inputs a 16-bit value to the decoder. |
||
54 | * |
||
55 | * @param o the object |
||
56 | * @param b 16-bit value to input |
||
57 | * @param out_ch will receive a Unicode character if this function returns 1. |
||
58 | * If written, the character will be in the range 0 - 0x10FFFF, |
||
59 | * excluding the surrogate range 0xD800 - 0xDFFF. |
||
60 | * @return 1 if a Unicode character has been written to *out_ch, 0 if not |
||
61 | */ |
||
62 | static int Utf16Decoder_Input (Utf16Decoder *o, uint16_t b, uint32_t *out_ch); |
||
63 | |||
64 | void Utf16Decoder_Init (Utf16Decoder *o) |
||
65 | { |
||
66 | o->cont = 0; |
||
67 | } |
||
68 | |||
69 | int Utf16Decoder_Input (Utf16Decoder *o, uint16_t b, uint32_t *out_ch) |
||
70 | { |
||
71 | // high surrogate |
||
72 | if (b >= UINT16_C(0xD800) && b <= UINT16_C(0xDBFF)) { |
||
73 | // set continuation state |
||
74 | o->cont = 1; |
||
75 | |||
76 | // add high bits |
||
77 | o->ch = (uint32_t)(b - UINT16_C(0xD800)) << 10; |
||
78 | |||
79 | return 0; |
||
80 | } |
||
81 | |||
82 | // low surrogate |
||
83 | if (b >= UINT16_C(0xDC00) && b <= UINT16_C(0xDFFF)) { |
||
84 | // check continuation |
||
85 | if (!o->cont) { |
||
86 | return 0; |
||
87 | } |
||
88 | |||
89 | // add low bits |
||
90 | o->ch |= (b - UINT16_C(0xDC00)); |
||
91 | |||
92 | // reset state |
||
93 | o->cont = 0; |
||
94 | |||
95 | // don't report surrogates |
||
96 | if (o->ch >= UINT32_C(0xD800) && o->ch <= UINT32_C(0xDFFF)) { |
||
97 | return 0; |
||
98 | } |
||
99 | |||
100 | // return character |
||
101 | *out_ch = o->ch; |
||
102 | return 1; |
||
103 | } |
||
104 | |||
105 | // reset state |
||
106 | o->cont = 0; |
||
107 | |||
108 | // return character |
||
109 | *out_ch = b; |
||
110 | return 1; |
||
111 | } |
||
112 | |||
113 | #endif |