BadVPN – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | /** |
2 | * @file Utf8Encoder.h |
||
3 | * @author Ambroz Bizjak <ambrop7@gmail.com> |
||
4 | * |
||
5 | * @section LICENSE |
||
6 | * |
||
7 | * Redistribution and use in source and binary forms, with or without |
||
8 | * modification, are permitted provided that the following conditions are met: |
||
9 | * 1. Redistributions of source code must retain the above copyright |
||
10 | * notice, this list of conditions and the following disclaimer. |
||
11 | * 2. Redistributions in binary form must reproduce the above copyright |
||
12 | * notice, this list of conditions and the following disclaimer in the |
||
13 | * documentation and/or other materials provided with the distribution. |
||
14 | * 3. Neither the name of the author nor the |
||
15 | * names of its contributors may be used to endorse or promote products |
||
16 | * derived from this software without specific prior written permission. |
||
17 | * |
||
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
||
19 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
||
20 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
||
21 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
||
22 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
||
23 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||
24 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
||
25 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||
26 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
||
27 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||
28 | */ |
||
29 | |||
30 | #ifndef BADVPN_UTF8ENCODER_H |
||
31 | #define BADVPN_UTF8ENCODER_H |
||
32 | |||
33 | #include <stdint.h> |
||
34 | |||
35 | /** |
||
36 | * Encodes a Unicode character into a sequence of bytes according to UTF-8. |
||
37 | * |
||
38 | * @param ch Unicode character to encode |
||
39 | * @param out will receive the encoded bytes. Must have space for 4 bytes. |
||
40 | * @return number of bytes written, 0-4, with 0 meaning the character cannot |
||
41 | * be encoded |
||
42 | */ |
||
43 | static int Utf8Encoder_EncodeCharacter (uint32_t ch, uint8_t *out); |
||
44 | |||
45 | int Utf8Encoder_EncodeCharacter (uint32_t ch, uint8_t *out) |
||
46 | { |
||
47 | if (ch <= UINT32_C(0x007F)) { |
||
48 | out[0] = ch; |
||
49 | return 1; |
||
50 | } |
||
51 | |||
52 | if (ch <= UINT32_C(0x07FF)) { |
||
53 | out[0] = (0xC0 | (ch >> 6)); |
||
54 | out[1] = (0x80 | ((ch >> 0) & 0x3F)); |
||
55 | return 2; |
||
56 | } |
||
57 | |||
58 | if (ch <= UINT32_C(0xFFFF)) { |
||
59 | // surrogates |
||
60 | if (ch >= UINT32_C(0xD800) && ch <= UINT32_C(0xDFFF)) { |
||
61 | return 0; |
||
62 | } |
||
63 | |||
64 | out[0] = (0xE0 | (ch >> 12)); |
||
65 | out[1] = (0x80 | ((ch >> 6) & 0x3F)); |
||
66 | out[2] = (0x80 | ((ch >> 0) & 0x3F)); |
||
67 | return 3; |
||
68 | } |
||
69 | |||
70 | if (ch < UINT32_C(0x10FFFF)) { |
||
71 | out[0] = (0xF0 | (ch >> 18)); |
||
72 | out[1] = (0x80 | ((ch >> 12) & 0x3F)); |
||
73 | out[2] = (0x80 | ((ch >> 6) & 0x3F)); |
||
74 | out[3] = (0x80 | ((ch >> 0) & 0x3F)); |
||
75 | return 4; |
||
76 | } |
||
77 | |||
78 | return 0; |
||
79 | } |
||
80 | |||
81 | #endif |