wasCSharpSQLite – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | using System; |
2 | using System.Diagnostics; |
||
3 | using System.Text; |
||
4 | using u32 = System.UInt32; |
||
5 | |||
6 | namespace Community.CsharpSqlite |
||
7 | { |
||
8 | public partial class Sqlite3 |
||
9 | { |
||
10 | /* |
||
11 | ** 2004 April 13 |
||
12 | ** |
||
13 | ** The author disclaims copyright to this source code. In place of |
||
14 | ** a legal notice, here is a blessing: |
||
15 | ** |
||
16 | ** May you do good and not evil. |
||
17 | ** May you find forgiveness for yourself and forgive others. |
||
18 | ** May you share freely, never taking more than you give. |
||
19 | ** |
||
20 | ************************************************************************* |
||
21 | ** This file contains routines used to translate between UTF-8, |
||
22 | ** UTF-16, UTF-16BE, and UTF-16LE. |
||
23 | ** |
||
24 | ** Notes on UTF-8: |
||
25 | ** |
||
26 | ** Byte-0 Byte-1 Byte-2 Byte-3 Value |
||
27 | ** 0xxxxxxx 00000000 00000000 0xxxxxxx |
||
28 | ** 110yyyyy 10xxxxxx 00000000 00000yyy yyxxxxxx |
||
29 | ** 1110zzzz 10yyyyyy 10xxxxxx 00000000 zzzzyyyy yyxxxxxx |
||
30 | ** 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx 000uuuuu zzzzyyyy yyxxxxxx |
||
31 | ** |
||
32 | ** |
||
33 | ** Notes on UTF-16: (with wwww+1==uuuuu) |
||
34 | ** |
||
35 | ** Word-0 Word-1 Value |
||
36 | ** 110110ww wwzzzzyy 110111yy yyxxxxxx 000uuuuu zzzzyyyy yyxxxxxx |
||
37 | ** zzzzyyyy yyxxxxxx 00000000 zzzzyyyy yyxxxxxx |
||
38 | ** |
||
39 | ** |
||
40 | ** BOM or Byte Order Mark: |
||
41 | ** 0xff 0xfe little-endian utf-16 follows |
||
42 | ** 0xfe 0xff big-endian utf-16 follows |
||
43 | ** |
||
44 | ************************************************************************* |
||
45 | ** Included in SQLite3 port to C#-SQLite; 2008 Noah B Hart |
||
46 | ** C#-SQLite is an independent reimplementation of the SQLite software library |
||
47 | ** |
||
48 | ** SQLITE_SOURCE_ID: 2011-06-23 19:49:22 4374b7e83ea0a3fbc3691f9c0c936272862f32f2 |
||
49 | ** |
||
50 | ************************************************************************* |
||
51 | */ |
||
52 | //#include "sqliteInt.h" |
||
53 | //#include <assert.h> |
||
54 | //#include "vdbeInt.h" |
||
55 | |||
56 | #if !SQLITE_AMALGAMATION |
||
57 | /* |
||
58 | ** The following constant value is used by the SQLITE_BIGENDIAN and |
||
59 | ** SQLITE_LITTLEENDIAN macros. |
||
60 | */ |
||
61 | //const int sqlite3one = 1; |
||
62 | #endif //* SQLITE_AMALGAMATION */ |
||
63 | |||
64 | /* |
||
65 | ** This lookup table is used to help decode the first byte of |
||
66 | ** a multi-byte UTF8 character. |
||
67 | */ |
||
68 | static byte[] sqlite3Utf8Trans1 = new byte[] { |
||
69 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
||
70 | 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
||
71 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, |
||
72 | 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, |
||
73 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
||
74 | 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
||
75 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
||
76 | 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, |
||
77 | }; |
||
78 | |||
79 | |||
80 | //#define WRITE_UTF8(zOut, c) { \ |
||
81 | // if( c<0x00080 ){ \ |
||
82 | // *zOut++ = (u8)(c&0xFF); \ |
||
83 | // } \ |
||
84 | // else if( c<0x00800 ){ \ |
||
85 | // *zOut++ = 0xC0 + (u8)((c>>6)&0x1F); \ |
||
86 | // *zOut++ = 0x80 + (u8)(c & 0x3F); \ |
||
87 | // } \ |
||
88 | // else if( c<0x10000 ){ \ |
||
89 | // *zOut++ = 0xE0 + (u8)((c>>12)&0x0F); \ |
||
90 | // *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \ |
||
91 | // *zOut++ = 0x80 + (u8)(c & 0x3F); \ |
||
92 | // }else{ \ |
||
93 | // *zOut++ = 0xF0 + (u8)((c>>18) & 0x07); \ |
||
94 | // *zOut++ = 0x80 + (u8)((c>>12) & 0x3F); \ |
||
95 | // *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \ |
||
96 | // *zOut++ = 0x80 + (u8)(c & 0x3F); \ |
||
97 | // } \ |
||
98 | //} |
||
99 | |||
100 | //#define WRITE_UTF16LE(zOut, c) { \ |
||
101 | // if( c<=0xFFFF ){ \ |
||
102 | // *zOut++ = (u8)(c&0x00FF); \ |
||
103 | // *zOut++ = (u8)((c>>8)&0x00FF); \ |
||
104 | // }else{ \ |
||
105 | // *zOut++ = (u8)(((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0)); \ |
||
106 | // *zOut++ = (u8)(0x00D8 + (((c-0x10000)>>18)&0x03)); \ |
||
107 | // *zOut++ = (u8)(c&0x00FF); \ |
||
108 | // *zOut++ = (u8)(0x00DC + ((c>>8)&0x03)); \ |
||
109 | // } \ |
||
110 | //} |
||
111 | |||
112 | //#define WRITE_UTF16BE(zOut, c) { \ |
||
113 | // if( c<=0xFFFF ){ \ |
||
114 | // *zOut++ = (u8)((c>>8)&0x00FF); \ |
||
115 | // *zOut++ = (u8)(c&0x00FF); \ |
||
116 | // }else{ \ |
||
117 | // *zOut++ = (u8)(0x00D8 + (((c-0x10000)>>18)&0x03)); \ |
||
118 | // *zOut++ = (u8)(((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0)); \ |
||
119 | // *zOut++ = (u8)(0x00DC + ((c>>8)&0x03)); \ |
||
120 | // *zOut++ = (u8)(c&0x00FF); \ |
||
121 | // } \ |
||
122 | //} |
||
123 | |||
124 | //#define READ_UTF16LE(zIn, TERM, c){ \ |
||
125 | // c = (*zIn++); \ |
||
126 | // c += ((*zIn++)<<8); \ |
||
127 | // if( c>=0xD800 && c<0xE000 && TERM ){ \ |
||
128 | // int c2 = (*zIn++); \ |
||
129 | // c2 += ((*zIn++)<<8); \ |
||
130 | // c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \ |
||
131 | // } \ |
||
132 | //} |
||
133 | |||
134 | //#define READ_UTF16BE(zIn, TERM, c){ \ |
||
135 | // c = ((*zIn++)<<8); \ |
||
136 | // c += (*zIn++); \ |
||
137 | // if( c>=0xD800 && c<0xE000 && TERM ){ \ |
||
138 | // int c2 = ((*zIn++)<<8); \ |
||
139 | // c2 += (*zIn++); \ |
||
140 | // c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \ |
||
141 | // } \ |
||
142 | //} |
||
143 | |||
144 | /* |
||
145 | ** Translate a single UTF-8 character. Return the unicode value. |
||
146 | ** |
||
147 | ** During translation, assume that the byte that zTerm points |
||
148 | ** is a 0x00. |
||
149 | ** |
||
150 | ** Write a pointer to the next unread byte back into pzNext. |
||
151 | ** |
||
152 | ** Notes On Invalid UTF-8: |
||
153 | ** |
||
154 | ** * This routine never allows a 7-bit character (0x00 through 0x7f) to |
||
155 | ** be encoded as a multi-byte character. Any multi-byte character that |
||
156 | ** attempts to encode a value between 0x00 and 0x7f is rendered as 0xfffd. |
||
157 | ** |
||
158 | ** * This routine never allows a UTF16 surrogate value to be encoded. |
||
159 | ** If a multi-byte character attempts to encode a value between |
||
160 | ** 0xd800 and 0xe000 then it is rendered as 0xfffd. |
||
161 | ** |
||
162 | ** * Bytes in the range of 0x80 through 0xbf which occur as the first |
||
163 | ** byte of a character are interpreted as single-byte characters |
||
164 | ** and rendered as themselves even though they are technically |
||
165 | ** invalid characters. |
||
166 | ** |
||
167 | ** * This routine accepts an infinite number of different UTF8 encodings |
||
168 | ** for unicode values 0x80 and greater. It do not change over-length |
||
169 | ** encodings to 0xfffd as some systems recommend. |
||
170 | */ |
||
171 | //#define READ_UTF8(zIn, zTerm, c) \ |
||
172 | // c = *(zIn++); \ |
||
173 | // if( c>=0xc0 ){ \ |
||
174 | // c = sqlite3Utf8Trans1[c-0xc0]; \ |
||
175 | // while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \ |
||
176 | // c = (c<<6) + (0x3f & *(zIn++)); \ |
||
177 | // } \ |
||
178 | // if( c<0x80 \ |
||
179 | // || (c&0xFFFFF800)==0xD800 \ |
||
180 | // || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \ |
||
181 | // } |
||
182 | static u32 sqlite3Utf8Read( |
||
183 | string zIn, /* First byte of UTF-8 character */ |
||
184 | ref string pzNext /* Write first byte past UTF-8 char here */ |
||
185 | ) |
||
186 | { |
||
187 | //unsigned int c; |
||
188 | /* Same as READ_UTF8() above but without the zTerm parameter. |
||
189 | ** For this routine, we assume the UTF8 string is always zero-terminated. |
||
190 | */ |
||
191 | if ( string.IsNullOrEmpty( zIn ) ) |
||
192 | return 0; |
||
193 | //c = *( zIn++ ); |
||
194 | //if ( c >= 0xc0 ) |
||
195 | //{ |
||
196 | // c = sqlite3Utf8Trans1[c - 0xc0]; |
||
197 | // while ( ( *zIn & 0xc0 ) == 0x80 ) |
||
198 | // { |
||
199 | // c = ( c << 6 ) + ( 0x3f & *( zIn++ ) ); |
||
200 | // } |
||
201 | // if ( c < 0x80 |
||
202 | // || ( c & 0xFFFFF800 ) == 0xD800 |
||
203 | // || ( c & 0xFFFFFFFE ) == 0xFFFE ) { c = 0xFFFD; } |
||
204 | //} |
||
205 | //*pzNext = zIn; |
||
206 | int zIndex = 0; |
||
207 | u32 c = zIn[zIndex++]; |
||
208 | if ( c >= 0xc0 ) |
||
209 | { |
||
210 | //if ( c > 0xff ) c = 0; |
||
211 | //else |
||
212 | { |
||
213 | //c = sqlite3Utf8Trans1[c - 0xc0]; |
||
214 | while ( zIndex != zIn.Length && ( zIn[zIndex] & 0xc0 ) == 0x80 ) |
||
215 | { |
||
216 | c = (u32)( ( c << 6 ) + ( 0x3f & zIn[zIndex++] ) ); |
||
217 | } |
||
218 | if ( c < 0x80 |
||
219 | || ( c & 0xFFFFF800 ) == 0xD800 |
||
220 | || ( c & 0xFFFFFFFE ) == 0xFFFE ) |
||
221 | { |
||
222 | c = 0xFFFD; |
||
223 | } |
||
224 | } |
||
225 | } |
||
226 | pzNext = zIn.Substring( zIndex ); |
||
227 | return c; |
||
228 | } |
||
229 | |||
230 | |||
231 | |||
232 | /* |
||
233 | ** If the TRANSLATE_TRACE macro is defined, the value of each Mem is |
||
234 | ** printed on stderr on the way into and out of sqlite3VdbeMemTranslate(). |
||
235 | */ |
||
236 | /* #define TRANSLATE_TRACE 1 */ |
||
237 | |||
238 | #if !SQLITE_OMIT_UTF16 |
||
239 | |||
240 | /* |
||
241 | ** This routine transforms the internal text encoding used by pMem to |
||
242 | ** desiredEnc. It is an error if the string is already of the desired |
||
243 | ** encoding, or if pMem does not contain a string value. |
||
244 | */ |
||
245 | static int sqlite3VdbeMemTranslate(Mem pMem, int desiredEnc){ |
||
246 | int len; /* Maximum length of output string in bytes */ |
||
247 | Debugger.Break (); // TODO - |
||
248 | //unsigned char *zOut; /* Output buffer */ |
||
249 | //unsigned char *zIn; /* Input iterator */ |
||
250 | //unsigned char *zTerm; /* End of input */ |
||
251 | //unsigned char *z; /* Output iterator */ |
||
252 | //unsigned int c; |
||
253 | |||
254 | Debug.Assert( pMem.db==null || sqlite3_mutex_held(pMem.db.mutex) ); |
||
255 | Debug.Assert( (pMem.flags&MEM_Str )!=0); |
||
256 | Debug.Assert( pMem.enc!=desiredEnc ); |
||
257 | Debug.Assert( pMem.enc!=0 ); |
||
258 | Debug.Assert( pMem.n>=0 ); |
||
259 | |||
260 | #if TRANSLATE_TRACE && SQLITE_DEBUG |
||
261 | { |
||
262 | char zBuf[100]; |
||
263 | sqlite3VdbeMemPrettyPrint(pMem, zBuf); |
||
264 | fprintf(stderr, "INPUT: %s\n", zBuf); |
||
265 | } |
||
266 | #endif |
||
267 | |||
268 | /* If the translation is between UTF-16 little and big endian, then |
||
269 | ** all that is required is to swap the byte order. This case is handled |
||
270 | ** differently from the others. |
||
271 | */ |
||
272 | Debugger.Break (); // TODO - |
||
273 | //if( pMem->enc!=SQLITE_UTF8 && desiredEnc!=SQLITE_UTF8 ){ |
||
274 | // u8 temp; |
||
275 | // int rc; |
||
276 | // rc = sqlite3VdbeMemMakeWriteable(pMem); |
||
277 | // if( rc!=SQLITE_OK ){ |
||
278 | // Debug.Assert( rc==SQLITE_NOMEM ); |
||
279 | // return SQLITE_NOMEM; |
||
280 | // } |
||
281 | // zIn = (u8*)pMem.z; |
||
282 | // zTerm = &zIn[pMem->n&~1]; |
||
283 | // while( zIn<zTerm ){ |
||
284 | // temp = *zIn; |
||
285 | // *zIn = *(zIn+1); |
||
286 | // zIn++; |
||
287 | // *zIn++ = temp; |
||
288 | // } |
||
289 | // pMem->enc = desiredEnc; |
||
290 | // goto translate_out; |
||
291 | //} |
||
292 | |||
293 | /* Set len to the maximum number of bytes required in the output buffer. */ |
||
294 | if( desiredEnc==SQLITE_UTF8 ){ |
||
295 | /* When converting from UTF-16, the maximum growth results from |
||
296 | ** translating a 2-byte character to a 4-byte UTF-8 character. |
||
297 | ** A single byte is required for the output string |
||
298 | ** nul-terminator. |
||
299 | */ |
||
300 | pMem->n &= ~1; |
||
301 | len = pMem.n * 2 + 1; |
||
302 | }else{ |
||
303 | /* When converting from UTF-8 to UTF-16 the maximum growth is caused |
||
304 | ** when a 1-byte UTF-8 character is translated into a 2-byte UTF-16 |
||
305 | ** character. Two bytes are required in the output buffer for the |
||
306 | ** nul-terminator. |
||
307 | */ |
||
308 | len = pMem.n * 2 + 2; |
||
309 | } |
||
310 | |||
311 | /* Set zIn to point at the start of the input buffer and zTerm to point 1 |
||
312 | ** byte past the end. |
||
313 | ** |
||
314 | ** Variable zOut is set to point at the output buffer, space obtained |
||
315 | ** from sqlite3Malloc(). |
||
316 | */ |
||
317 | Debugger.Break (); // TODO - |
||
318 | //zIn = (u8*)pMem.z; |
||
319 | //zTerm = &zIn[pMem->n]; |
||
320 | //zOut = sqlite3DbMallocRaw(pMem->db, len); |
||
321 | //if( !zOut ){ |
||
322 | // return SQLITE_NOMEM; |
||
323 | //} |
||
324 | //z = zOut; |
||
325 | |||
326 | //if( pMem->enc==SQLITE_UTF8 ){ |
||
327 | // if( desiredEnc==SQLITE_UTF16LE ){ |
||
328 | // /* UTF-8 -> UTF-16 Little-endian */ |
||
329 | // while( zIn<zTerm ){ |
||
330 | ///* c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn); */ |
||
331 | //READ_UTF8(zIn, zTerm, c); |
||
332 | // WRITE_UTF16LE(z, c); |
||
333 | // } |
||
334 | // }else{ |
||
335 | // Debug.Assert( desiredEnc==SQLITE_UTF16BE ); |
||
336 | // /* UTF-8 -> UTF-16 Big-endian */ |
||
337 | // while( zIn<zTerm ){ |
||
338 | ///* c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn); */ |
||
339 | //READ_UTF8(zIn, zTerm, c); |
||
340 | // WRITE_UTF16BE(z, c); |
||
341 | // } |
||
342 | // } |
||
343 | // pMem->n = (int)(z - zOut); |
||
344 | // *z++ = 0; |
||
345 | //}else{ |
||
346 | // Debug.Assert( desiredEnc==SQLITE_UTF8 ); |
||
347 | // if( pMem->enc==SQLITE_UTF16LE ){ |
||
348 | // /* UTF-16 Little-endian -> UTF-8 */ |
||
349 | // while( zIn<zTerm ){ |
||
350 | // READ_UTF16LE(zIn, zIn<zTerm, c); |
||
351 | // WRITE_UTF8(z, c); |
||
352 | // } |
||
353 | // }else{ |
||
354 | // /* UTF-16 Big-endian -> UTF-8 */ |
||
355 | // while( zIn<zTerm ){ |
||
356 | // READ_UTF16BE(zIn, zIn<zTerm, c); |
||
357 | // WRITE_UTF8(z, c); |
||
358 | // } |
||
359 | // } |
||
360 | // pMem->n = (int)(z - zOut); |
||
361 | //} |
||
362 | //*z = 0; |
||
363 | //Debug.Assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len ); |
||
364 | |||
365 | //sqlite3VdbeMemRelease(pMem); |
||
366 | //pMem->flags &= ~(MEM_Static|MEM_Dyn|MEM_Ephem); |
||
367 | //pMem->enc = desiredEnc; |
||
368 | //pMem->flags |= (MEM_Term|MEM_Dyn); |
||
369 | //pMem.z = (char*)zOut; |
||
370 | //pMem.zMalloc = pMem.z; |
||
371 | |||
372 | translate_out: |
||
373 | #if TRANSLATE_TRACE && SQLITE_DEBUG |
||
374 | { |
||
375 | char zBuf[100]; |
||
376 | sqlite3VdbeMemPrettyPrint(pMem, zBuf); |
||
377 | fprintf(stderr, "OUTPUT: %s\n", zBuf); |
||
378 | } |
||
379 | #endif |
||
380 | return SQLITE_OK; |
||
381 | } |
||
382 | |||
383 | /* |
||
384 | ** This routine checks for a byte-order mark at the beginning of the |
||
385 | ** UTF-16 string stored in pMem. If one is present, it is removed and |
||
386 | ** the encoding of the Mem adjusted. This routine does not do any |
||
387 | ** byte-swapping, it just sets Mem.enc appropriately. |
||
388 | ** |
||
389 | ** The allocation (static, dynamic etc.) and encoding of the Mem may be |
||
390 | ** changed by this function. |
||
391 | */ |
||
392 | static int sqlite3VdbeMemHandleBom(Mem pMem){ |
||
393 | int rc = SQLITE_OK; |
||
394 | int bom = 0; |
||
395 | byte[] b01 = new byte[2]; |
||
396 | Encoding.Unicode.GetBytes( pMem.z, 0, 1,b01,0 ); |
||
397 | assert( pMem->n>=0 ); |
||
398 | if( pMem->n>1 ){ |
||
399 | // u8 b1 = *(u8 *)pMem.z; |
||
400 | // u8 b2 = *(((u8 *)pMem.z) + 1); |
||
401 | if( b01[0]==0xFE && b01[1]==0xFF ){// if( b1==0xFE && b2==0xFF ){ |
||
402 | bom = SQLITE_UTF16BE; |
||
403 | } |
||
404 | if( b01[0]==0xFF && b01[1]==0xFE ){ // if( b1==0xFF && b2==0xFE ){ |
||
405 | bom = SQLITE_UTF16LE; |
||
406 | } |
||
407 | } |
||
408 | |||
409 | if( bom!=0 ){ |
||
410 | rc = sqlite3VdbeMemMakeWriteable(pMem); |
||
411 | if( rc==SQLITE_OK ){ |
||
412 | pMem.n -= 2; |
||
413 | Debugger.Break (); // TODO - |
||
414 | //memmove(pMem.z, pMem.z[2], pMem.n); |
||
415 | //pMem.z[pMem.n] = '\0'; |
||
416 | //pMem.z[pMem.n+1] = '\0'; |
||
417 | pMem.flags |= MEM_Term; |
||
418 | pMem.enc = bom; |
||
419 | } |
||
420 | } |
||
421 | return rc; |
||
422 | } |
||
423 | #endif // * SQLITE_OMIT_UTF16 */ |
||
424 | |||
425 | /* |
||
426 | ** pZ is a UTF-8 encoded unicode string. If nByte is less than zero, |
||
427 | ** return the number of unicode characters in pZ up to (but not including) |
||
428 | ** the first 0x00 byte. If nByte is not less than zero, return the |
||
429 | ** number of unicode characters in the first nByte of pZ (or up to |
||
430 | ** the first 0x00, whichever comes first). |
||
431 | */ |
||
432 | static int sqlite3Utf8CharLen( string zIn, int nByte ) |
||
433 | { |
||
434 | //int r = 0; |
||
435 | //string z = zIn; |
||
436 | if ( zIn.Length == 0 ) |
||
437 | return 0; |
||
438 | int zInLength = zIn.Length; |
||
439 | int zTerm = ( nByte >= 0 && nByte <= zInLength ) ? nByte : zInLength; |
||
440 | //Debug.Assert( z<=zTerm ); |
||
441 | //for ( int i = 0 ; i < zTerm ; i++ ) //while( *z!=0 && z<zTerm ){ |
||
442 | //{ |
||
443 | // SQLITE_SKIP_UTF8( ref z);// SQLITE_SKIP_UTF8(z); |
||
444 | // r++; |
||
445 | //} |
||
446 | //return r; |
||
447 | if ( zTerm == zInLength ) |
||
448 | return zInLength - ( zIn[zTerm - 1] == 0 ? 1 : 0 ); |
||
449 | else |
||
450 | return nByte; |
||
451 | } |
||
452 | |||
453 | /* This test function is not currently used by the automated test-suite. |
||
454 | ** Hence it is only available in debug builds. |
||
455 | */ |
||
456 | #if SQLITE_TEST && SQLITE_DEBUG |
||
457 | /* |
||
458 | ** Translate UTF-8 to UTF-8. |
||
459 | ** |
||
460 | ** This has the effect of making sure that the string is well-formed |
||
461 | ** UTF-8. Miscoded characters are removed. |
||
462 | ** |
||
463 | ** The translation is done in-place and aborted if the output |
||
464 | ** overruns the input. |
||
465 | */ |
||
466 | static int sqlite3Utf8To8(byte[] zIn){ |
||
467 | //byte[] zOut = zIn; |
||
468 | //byte[] zStart = zIn; |
||
469 | //u32 c; |
||
470 | |||
471 | // while( zIn[0] && zOut<=zIn ){ |
||
472 | // c = sqlite3Utf8Read(zIn, (const u8**)&zIn); |
||
473 | // if( c!=0xfffd ){ |
||
474 | // WRITE_UTF8(zOut, c); |
||
475 | // } |
||
476 | //} |
||
477 | //zOut = 0; |
||
478 | //return (int)(zOut - zStart); |
||
479 | try |
||
480 | { |
||
481 | string z1 = Encoding.UTF8.GetString( zIn, 0, zIn.Length ); |
||
482 | byte[] zOut = Encoding.UTF8.GetBytes( z1 ); |
||
483 | //if ( zOut.Length != zIn.Length ) |
||
484 | // return 0; |
||
485 | //else |
||
486 | { |
||
487 | Array.Copy( zOut, 0, zIn, 0,zIn.Length ); |
||
488 | return zIn.Length;} |
||
489 | } |
||
490 | catch ( EncoderFallbackException e ) |
||
491 | { |
||
492 | return 0; |
||
493 | } |
||
494 | } |
||
495 | #endif |
||
496 | |||
497 | #if !SQLITE_OMIT_UTF16 |
||
498 | /* |
||
499 | ** Convert a UTF-16 string in the native encoding into a UTF-8 string. |
||
500 | ** Memory to hold the UTF-8 string is obtained from sqlite3Malloc and must |
||
501 | ** be freed by the calling function. |
||
502 | ** |
||
503 | ** NULL is returned if there is an allocation error. |
||
504 | */ |
||
505 | static string sqlite3Utf16to8(sqlite3 db, string z, int nByte, u8 enc){ |
||
506 | Debugger.Break (); // TODO - |
||
507 | Mem m = Pool.Allocate_Mem(); |
||
508 | // memset(&m, 0, sizeof(m)); |
||
509 | // m.db = db; |
||
510 | // sqlite3VdbeMemSetStr(&m, z, nByte, enc, SQLITE_STATIC); |
||
511 | // sqlite3VdbeChangeEncoding(&m, SQLITE_UTF8); |
||
512 | // if( db.mallocFailed !=0{ |
||
513 | // sqlite3VdbeMemRelease(&m); |
||
514 | // m.z = 0; |
||
515 | // } |
||
516 | // Debug.Assert( (m.flags & MEM_Term)!=0 || db.mallocFailed !=0); |
||
517 | // Debug.Assert( (m.flags & MEM_Str)!=0 || db.mallocFailed !=0); |
||
518 | assert( (m.flags & MEM_Dyn)!=0 || db->mallocFailed ); |
||
519 | assert( m.z || db->mallocFailed ); |
||
520 | return m.z; |
||
521 | } |
||
522 | |||
523 | /* |
||
524 | ** Convert a UTF-8 string to the UTF-16 encoding specified by parameter |
||
525 | ** enc. A pointer to the new string is returned, and the value of *pnOut |
||
526 | ** is set to the length of the returned string in bytes. The call should |
||
527 | ** arrange to call sqlite3DbFree() on the returned pointer when it is |
||
528 | ** no longer required. |
||
529 | ** |
||
530 | ** If a malloc failure occurs, NULL is returned and the db.mallocFailed |
||
531 | ** flag set. |
||
532 | */ |
||
533 | #if SQLITE_ENABLE_STAT2 |
||
534 | char *sqlite3Utf8to16(sqlite3 db, u8 enc, char *z, int n, int *pnOut){ |
||
535 | Mem m; |
||
536 | memset(&m, 0, sizeof(m)); |
||
537 | m.db = db; |
||
538 | sqlite3VdbeMemSetStr(&m, z, n, SQLITE_UTF8, SQLITE_STATIC); |
||
539 | if( sqlite3VdbeMemTranslate(&m, enc) ){ |
||
540 | assert( db->mallocFailed ); |
||
541 | return 0; |
||
542 | } |
||
543 | assert( m.z==m.zMalloc ); |
||
544 | *pnOut = m.n; |
||
545 | return m.z; |
||
546 | } |
||
547 | #endif |
||
548 | |||
549 | /* |
||
550 | ** zIn is a UTF-16 encoded unicode string at least nChar characters long. |
||
551 | ** Return the number of bytes in the first nChar unicode characters |
||
552 | ** in pZ. nChar must be non-negative. |
||
553 | */ |
||
554 | int sqlite3Utf16ByteLen(const void *zIn, int nChar){ |
||
555 | int c; |
||
556 | unsigned char const *z = zIn; |
||
557 | int n = 0; |
||
558 | |||
559 | if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){ |
||
560 | while( n<nChar ){ |
||
561 | READ_UTF16BE(z, 1, c); |
||
562 | n++; |
||
563 | } |
||
564 | }else{ |
||
565 | while( n<nChar ){ |
||
566 | READ_UTF16LE(z, 1, c); |
||
567 | n++; |
||
568 | } |
||
569 | } |
||
570 | return (int)(z-(unsigned char const *)zIn); |
||
571 | } |
||
572 | |||
573 | #if SQLITE_TEST |
||
574 | /* |
||
575 | ** This routine is called from the TCL test function "translate_selftest". |
||
576 | ** It checks that the primitives for serializing and deserializing |
||
577 | ** characters in each encoding are inverses of each other. |
||
578 | */ |
||
579 | /* |
||
580 | ** This routine is called from the TCL test function "translate_selftest". |
||
581 | ** It checks that the primitives for serializing and deserializing |
||
582 | ** characters in each encoding are inverses of each other. |
||
583 | */ |
||
584 | void sqlite3UtfSelfTest(void){ |
||
585 | unsigned int i, t; |
||
586 | unsigned char zBuf[20]; |
||
587 | unsigned char *z; |
||
588 | int n; |
||
589 | unsigned int c; |
||
590 | |||
591 | for(i=0; i<0x00110000; i++){ |
||
592 | z = zBuf; |
||
593 | WRITE_UTF8(z, i); |
||
594 | n = (int)(z-zBuf); |
||
595 | assert( n>0 && n<=4 ); |
||
596 | z[0] = 0; |
||
597 | z = zBuf; |
||
598 | c = sqlite3Utf8Read(z, (const u8**)&z); |
||
599 | t = i; |
||
600 | if( i>=0xD800 && i<=0xDFFF ) t = 0xFFFD; |
||
601 | if( (i&0xFFFFFFFE)==0xFFFE ) t = 0xFFFD; |
||
602 | assert( c==t ); |
||
603 | assert( (z-zBuf)==n ); |
||
604 | } |
||
605 | for(i=0; i<0x00110000; i++){ |
||
606 | if( i>=0xD800 && i<0xE000 ) continue; |
||
607 | z = zBuf; |
||
608 | WRITE_UTF16LE(z, i); |
||
609 | n = (int)(z-zBuf); |
||
610 | assert( n>0 && n<=4 ); |
||
611 | z[0] = 0; |
||
612 | z = zBuf; |
||
613 | READ_UTF16LE(z, 1, c); |
||
614 | assert( c==i ); |
||
615 | assert( (z-zBuf)==n ); |
||
616 | } |
||
617 | for(i=0; i<0x00110000; i++){ |
||
618 | if( i>=0xD800 && i<0xE000 ) continue; |
||
619 | z = zBuf; |
||
620 | WRITE_UTF16BE(z, i); |
||
621 | n = (int)(z-zBuf); |
||
622 | assert( n>0 && n<=4 ); |
||
623 | z[0] = 0; |
||
624 | z = zBuf; |
||
625 | READ_UTF16BE(z, 1, c); |
||
626 | assert( c==i ); |
||
627 | assert( (z-zBuf)==n ); |
||
628 | } |
||
629 | } |
||
630 | #endif // * SQLITE_TEST */ |
||
631 | #endif // * SQLITE_OMIT_UTF16 */ |
||
632 | } |
||
633 | } |