BadVPN – Blame information for rev 1

Subversion Repositories:
Rev:
Rev Author Line No. Line
1 office 1 /**
2 * @file Utf8Encoder.h
3 * @author Ambroz Bizjak <ambrop7@gmail.com>
4 *
5 * @section LICENSE
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. Neither the name of the author nor the
15 * names of its contributors may be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29  
30 #ifndef BADVPN_UTF8ENCODER_H
31 #define BADVPN_UTF8ENCODER_H
32  
33 #include <stdint.h>
34  
35 /**
36 * Encodes a Unicode character into a sequence of bytes according to UTF-8.
37 *
38 * @param ch Unicode character to encode
39 * @param out will receive the encoded bytes. Must have space for 4 bytes.
40 * @return number of bytes written, 0-4, with 0 meaning the character cannot
41 * be encoded
42 */
43 static int Utf8Encoder_EncodeCharacter (uint32_t ch, uint8_t *out);
44  
45 int Utf8Encoder_EncodeCharacter (uint32_t ch, uint8_t *out)
46 {
47 if (ch <= UINT32_C(0x007F)) {
48 out[0] = ch;
49 return 1;
50 }
51  
52 if (ch <= UINT32_C(0x07FF)) {
53 out[0] = (0xC0 | (ch >> 6));
54 out[1] = (0x80 | ((ch >> 0) & 0x3F));
55 return 2;
56 }
57  
58 if (ch <= UINT32_C(0xFFFF)) {
59 // surrogates
60 if (ch >= UINT32_C(0xD800) && ch <= UINT32_C(0xDFFF)) {
61 return 0;
62 }
63  
64 out[0] = (0xE0 | (ch >> 12));
65 out[1] = (0x80 | ((ch >> 6) & 0x3F));
66 out[2] = (0x80 | ((ch >> 0) & 0x3F));
67 return 3;
68 }
69  
70 if (ch < UINT32_C(0x10FFFF)) {
71 out[0] = (0xF0 | (ch >> 18));
72 out[1] = (0x80 | ((ch >> 12) & 0x3F));
73 out[2] = (0x80 | ((ch >> 6) & 0x3F));
74 out[3] = (0x80 | ((ch >> 0) & 0x3F));
75 return 4;
76 }
77  
78 return 0;
79 }
80  
81 #endif