nexmon – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | # This file is derived from |
2 | # |
||
3 | # http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt |
||
4 | # |
||
5 | # Which was created by Markus Kuhn <mkuhn@acm.org> - 2000-09-02 |
||
6 | # |
||
7 | # lines begining with # and blank lines are ignored |
||
8 | # |
||
9 | # Beyond that, this file consists of a series of test cases. Each test case consists of |
||
10 | # 2 or 3 lines: |
||
11 | # |
||
12 | # 1. A UTF-8 string |
||
13 | # 2. A status |
||
14 | # VALID : The string is a valid UTF-8 representation of valid Unicode |
||
15 | # INCOMPLETE : The string has a partial character at the end |
||
16 | # NOTUNICODE : The string is valid UTF-8, but the characters represented |
||
17 | # are not valid unicode ( |
||
18 | # OVERLONG : The string includes overlong sequences |
||
19 | # MALFORMED : The string is not valid UTF-8 |
||
20 | # 3. If the status is VALID or NOTUNICODE, the UCS-4 representation of the string, |
||
21 | # as a series of hex numbers. |
||
22 | |||
23 | # 1 Some correct UTF-8 text |
||
24 | κόσμε |
||
25 | VALID |
||
26 | 03ba 1f79 03c3 03bc 03b5 |
||
27 | |||
28 | # 2.1 First possible sequence of a certain length |
||
29 | # |
||
30 | # FIXME - handle NULLS? |
||
31 | # |
||
32 | # [ NULL BYTE ] |
||
33 | #VALID |
||
34 | #0000 |
||
35 | |||
36 | |
||
37 | VALID |
||
38 | 0080 |
||
39 | |||
40 | ࠀ |
||
41 | VALID |
||
42 | 0800 |
||
43 | |||
44 | 𐀀 |
||
45 | VALID |
||
46 | 00010000 |
||
47 | |||
48 |
|
||
49 | NOTUNICODE |
||
50 | 00200000 |
||
51 | |||
52 |
|
||
53 | NOTUNICODE |
||
54 | 04000000 |
||
55 | |||
56 | |
||
57 | VALID |
||
58 | 0000007f |
||
59 | |||
60 | ߿ |
||
61 | VALID |
||
62 | 000007ff |
||
63 | |||
64 |
|
||
65 | VALID |
||
66 | 0000ffff |
||
67 | |||
68 |
|
||
69 | NOTUNICODE |
||
70 | 001fffff |
||
71 | |||
72 |
|
||
73 | NOTUNICODE |
||
74 | 03ffffff |
||
75 | |||
76 |
|
||
77 | NOTUNICODE |
||
78 | 7fffffff |
||
79 | |||
80 | # 2.3 Other boundary conditions |
||
81 | |||
82 | |
||
83 | VALID |
||
84 | d7ff |
||
85 | |||
86 | |
||
87 | VALID |
||
88 | e000 |
||
89 | |||
90 | � |
||
91 | VALID |
||
92 | fffd |
||
93 | |||
94 | |
||
95 | VALID |
||
96 | 0010fffd |
||
97 | |||
98 | |
||
99 | VALID |
||
100 | 0010ffff |
||
101 | |||
102 |
|
||
103 | NOTUNICODE |
||
104 | 00110000 |
||
105 | |||
106 | # 3.1 Unexpected continuation bytes |
||
107 | |||
108 |
|
||
109 | MALFORMED |
||
110 |
|
||
111 | MALFORMED |
||
112 |
|
||
113 | MALFORMED |
||
114 |
|
||
115 | MALFORMED |
||
116 |
|
||
117 | MALFORMED |
||
118 |
|
||
119 | MALFORMED |
||
120 |
|
||
121 | MALFORMED |
||
122 |
|
||
123 | MALFORMED |
||
124 |
|
||
125 | MALFORMED |
||
126 | |||
127 | # 3.2 Lonely start characters |
||
128 | |||
129 | |
||
130 | MALFORMED |
||
131 | |
||
132 | MALFORMED |
||
133 | |
||
134 | MALFORMED |
||
135 | |
||
136 | MALFORMED |
||
137 | |
||
138 | MALFORMED |
||
139 | |||
140 | # 3.3 Sequences with last continuation byte missing |
||
141 | |||
142 |
|
||
143 | INCOMPLETE |
||
144 |
|
||
145 | INCOMPLETE |
||
146 |
|
||
147 | INCOMPLETE |
||
148 |
|
||
149 | INCOMPLETE |
||
150 |
|
||
151 | INCOMPLETE |
||
152 |
|
||
153 | INCOMPLETE |
||
154 |
|
||
155 | INCOMPLETE |
||
156 |
|
||
157 | INCOMPLETE |
||
158 |
|
||
159 | INCOMPLETE |
||
160 |
|
||
161 | INCOMPLETE |
||
162 | |||
163 | # 3.4 Concatenation of incomplete sequences |
||
164 | |||
165 |
|
||
166 | MALFORMED |
||
167 | |||
168 | # 3.5 Impossible bytes |
||
169 | |||
170 |
|
||
171 | MALFORMED |
||
172 |
|
||
173 | MALFORMED |
||
174 |
|
||
175 | MALFORMED |
||
176 | |||
177 | # Examples of an overlong ASCII character |
||
178 | |||
179 |
|
||
180 | OVERLONG |
||
181 |
|
||
182 | OVERLONG |
||
183 |
|
||
184 | OVERLONG |
||
185 |
|
||
186 | OVERLONG |
||
187 |
|
||
188 | OVERLONG |
||
189 | |||
190 | # Maximum overlong sequences |
||
191 | |||
192 |
|
||
193 | OVERLONG |
||
194 |
|
||
195 | OVERLONG |
||
196 |
|
||
197 | OVERLONG |
||
198 |
|
||
199 | OVERLONG |
||
200 |
|
||
201 | OVERLONG |
||
202 | |||
203 | # Overlong representation of the NUL character |
||
204 | |||
205 |
|
||
206 | OVERLONG |
||
207 |
|
||
208 | OVERLONG |
||
209 |
|
||
210 | OVERLONG |
||
211 |
|
||
212 | OVERLONG |
||
213 |
|
||
214 | OVERLONG |
||
215 | |||
216 | # Illegal code positions |
||
217 | |||
218 | # Single UTF-16 surrogates |
||
219 | |||
220 |
|
||
221 | NOTUNICODE |
||
222 | d800 |
||
223 | |||
224 |
|
||
225 | NOTUNICODE |
||
226 | db7f |
||
227 | |||
228 |
|
||
229 | NOTUNICODE |
||
230 | db80 |
||
231 | |||
232 |
|
||
233 | NOTUNICODE |
||
234 | dbff |
||
235 | |||
236 |
|
||
237 | NOTUNICODE |
||
238 | dc00 |
||
239 | |||
240 |
|
||
241 | NOTUNICODE |
||
242 | df80 |
||
243 | |||
244 |
|
||
245 | NOTUNICODE |
||
246 | dfff |
||
247 | |||
248 | # Paired UTF-16 surrogates |
||
249 | |||
250 |
|
||
251 | NOTUNICODE |
||
252 | d800 dc00 |
||
253 | |||
254 |
|
||
255 | NOTUNICODE |
||
256 | d800 dfff |
||
257 | |||
258 |
|
||
259 | NOTUNICODE |
||
260 | db7f dc00 |
||
261 | |||
262 |
|
||
263 | NOTUNICODE |
||
264 | db7f dfff |
||
265 | |||
266 |
|
||
267 | NOTUNICODE |
||
268 | db80 dc00 |
||
269 | |||
270 |
|
||
271 | NOTUNICODE |
||
272 | db80 dfff |
||
273 | |||
274 |
|
||
275 | NOTUNICODE |
||
276 | dbff dc00 |
||
277 | |||
278 |
|
||
279 | NOTUNICODE |
||
280 | dbff dfff |
||
281 | |||
282 | ################ |
||
283 | # |
||
284 | # Some more tests, not from Markus Kuhn's file |
||
285 | # |
||
286 | |||
287 | # Mixed plane 0 and higher planes |
||
288 | |||
289 | A𐀀BC |
||
290 | VALID |
||
291 | 41 00010000 42 10fffd 43 |