nexmon – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | #! /usr/bin/perl -w |
2 | |||
3 | # Copyright (C) 1998, 1999 Tom Tromey |
||
4 | # Copyright (C) 2001 Red Hat Software |
||
5 | |||
6 | # This program is free software; you can redistribute it and/or modify |
||
7 | # it under the terms of the GNU General Public License as published by |
||
8 | # the Free Software Foundation; either version 2, or (at your option) |
||
9 | # any later version. |
||
10 | |||
11 | # This program is distributed in the hope that it will be useful, |
||
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
14 | # GNU General Public License for more details. |
||
15 | |||
16 | # You should have received a copy of the GNU General Public License |
||
17 | # along with this program; if not, see <http://www.gnu.org/licenses/>. |
||
18 | |||
19 | # gen-casemap-test.pl - Generate test cases for case mapping from Unicode data. |
||
20 | # See http://www.unicode.org/Public/UNIDATA/UnicodeCharacterDatabase.html |
||
21 | # I consider the output of this program to be unrestricted. Use it as |
||
22 | # you will. |
||
23 | |||
24 | require 5.006; |
||
25 | use utf8; |
||
26 | |||
27 | if (@ARGV != 3) { |
||
28 | $0 =~ s@.*/@@; |
||
29 | die "Usage: $0 UNICODE-VERSION UnicodeData.txt SpecialCasing.txt\n"; |
||
30 | } |
||
31 | |||
32 | use vars qw($CODE $NAME $CATEGORY $COMBINING_CLASSES $BIDI_CATEGORY $DECOMPOSITION $DECIMAL_VALUE $DIGIT_VALUE $NUMERIC_VALUE $MIRRORED $OLD_NAME $COMMENT $UPPER $LOWER $TITLE $BREAK_CODE $BREAK_CATEGORY $BREAK_NAME $CASE_CODE $CASE_LOWER $CASE_TITLE $CASE_UPPER $CASE_CONDITION); |
||
33 | |||
34 | # Names of fields in Unicode data table. |
||
35 | $CODE = 0; |
||
36 | $NAME = 1; |
||
37 | $CATEGORY = 2; |
||
38 | $COMBINING_CLASSES = 3; |
||
39 | $BIDI_CATEGORY = 4; |
||
40 | $DECOMPOSITION = 5; |
||
41 | $DECIMAL_VALUE = 6; |
||
42 | $DIGIT_VALUE = 7; |
||
43 | $NUMERIC_VALUE = 8; |
||
44 | $MIRRORED = 9; |
||
45 | $OLD_NAME = 10; |
||
46 | $COMMENT = 11; |
||
47 | $UPPER = 12; |
||
48 | $LOWER = 13; |
||
49 | $TITLE = 14; |
||
50 | |||
51 | # Names of fields in the SpecialCasing table |
||
52 | $CASE_CODE = 0; |
||
53 | $CASE_LOWER = 1; |
||
54 | $CASE_TITLE = 2; |
||
55 | $CASE_UPPER = 3; |
||
56 | $CASE_CONDITION = 4; |
||
57 | |||
58 | my @upper; |
||
59 | my @title; |
||
60 | my @lower; |
||
61 | |||
62 | binmode STDOUT, ":utf8"; |
||
63 | open (INPUT, "< $ARGV[1]") || exit 1; |
||
64 | |||
65 | $last_code = -1; |
||
66 | while (<INPUT>) |
||
67 | { |
||
68 | chop; |
||
69 | @fields = split (';', $_, 30); |
||
70 | if ($#fields != 14) |
||
71 | { |
||
72 | printf STDERR ("Entry for $fields[$CODE] has wrong number of fields (%d)\n", $#fields); |
||
73 | } |
||
74 | |||
75 | $code = hex ($fields[$CODE]); |
||
76 | |||
77 | if ($code > $last_code + 1) |
||
78 | { |
||
79 | # Found a gap. |
||
80 | if ($fields[$NAME] =~ /Last>/) |
||
81 | { |
||
82 | # Fill the gap with the last character read, |
||
83 | # since this was a range specified in the char database |
||
84 | @gfields = @fields; |
||
85 | } |
||
86 | else |
||
87 | { |
||
88 | # The gap represents undefined characters. Only the type |
||
89 | # matters. |
||
90 | @gfields = ('', '', 'Cn', '0', '', '', '', '', '', '', '', |
||
91 | '', '', '', ''); |
||
92 | } |
||
93 | for (++$last_code; $last_code < $code; ++$last_code) |
||
94 | { |
||
95 | $gfields{$CODE} = sprintf ("%04x", $last_code); |
||
96 | &process_one ($last_code, @gfields); |
||
97 | } |
||
98 | } |
||
99 | &process_one ($code, @fields); |
||
100 | $last_code = $code; |
||
101 | } |
||
102 | |||
103 | close INPUT; |
||
104 | |||
105 | open (INPUT, "< $ARGV[2]") || exit 1; |
||
106 | |||
107 | while (<INPUT>) |
||
108 | { |
||
109 | my $code; |
||
110 | |||
111 | chop; |
||
112 | |||
113 | next if /^#/; |
||
114 | next if /^\s*$/; |
||
115 | |||
116 | s/\s*#.*//; |
||
117 | |||
118 | @fields = split ('\s*;\s*', $_, 30); |
||
119 | |||
120 | $raw_code = $fields[$CASE_CODE]; |
||
121 | $code = hex ($raw_code); |
||
122 | |||
123 | if ($#fields != 4 && $#fields != 5) |
||
124 | { |
||
125 | printf STDERR ("Entry for $raw_code has wrong number of fields (%d)\n", $#fields); |
||
126 | next; |
||
127 | } |
||
128 | |||
129 | if (defined $fields[5]) { |
||
130 | # Ignore conditional special cases - we'll handle them manually |
||
131 | next; |
||
132 | } |
||
133 | |||
134 | $upper[$code] = &make_hex ($fields[$CASE_UPPER]); |
||
135 | $lower[$code] = &make_hex ($fields[$CASE_LOWER]); |
||
136 | $title[$code] = &make_hex ($fields[$CASE_TITLE]); |
||
137 | } |
||
138 | |||
139 | close INPUT; |
||
140 | |||
141 | print <<EOT; |
||
142 | # Test cases generated from Unicode $ARGV[0] data |
||
143 | # by gen-case-tests.pl. Do not edit. |
||
144 | # |
||
145 | # Some special hand crafted tests |
||
146 | # |
||
147 | tr_TR\ti\ti\t\x{0130}\t\x{0130}\t# i => LATIN CAPITAL LETTER I WITH DOT ABOVE |
||
148 | tr_TR\tI\t\x{0131}\tI\tI\t# I => LATIN SMALL LETTER DOTLESS I |
||
149 | tr_TR\tI\x{0307}\ti\tI\x{0307}\tI\x{0307}\t# I => LATIN SMALL LETTER DOTLESS I |
||
150 | tr_TR.UTF-8\ti\ti\t\x{0130}\t\x{0130}\t# i => LATIN CAPITAL LETTER I WITH DOT ABOVE |
||
151 | tr_TR.UTF-8\tI\t\x{0131}\tI\tI\t# I => LATIN SMALL LETTER DOTLESS I |
||
152 | tr_TR.UTF-8\tI\x{0307}\ti\tI\x{0307}\tI\x{0307}\t# I => LATIN SMALL LETTER DOTLESS I |
||
153 | # Test reordering of YPOGEGRAMMENI across other accents |
||
154 | \t\x{03b1}\x{0345}\x{0314}\t\x{03b1}\x{0345}\x{314}\t\x{0391}\x{0345}\x{0314}\t\x{0391}\x{0314}\x{0399}\t |
||
155 | \t\x{03b1}\x{0314}\x{0345}\t\x{03b1}\x{314}\x{0345}\t\x{0391}\x{0314}\x{0345}\t\x{0391}\x{0314}\x{0399}\t |
||
156 | # Handling of final and nonfinal sigma |
||
157 | ΜΆΙΟΣ μάιος Μάιος ΜΆΙΟΣ |
||
158 | ΜΆΙΟΣ μάιος Μάιος ΜΆΙΟΣ |
||
159 | ΣΙΓΜΑ σιγμα Σιγμα ΣΙΓΜΑ |
||
160 | # Lithuanian rule of i followed by letter with dot. Not at all sure |
||
161 | # about the titlecase part here |
||
162 | lt_LT\ti\x{117}\ti\x{117}\tIe\tIE\t |
||
163 | lt_LT\tie\x{307}\tie\x{307}\tIe\tIE\t |
||
164 | lt_LT\t\x{00cc}\ti\x{0307}\x{0300}\t\x{00cc}\t\x{00cc}\t # LATIN CAPITAL LETTER I WITH GRAVE |
||
165 | lt_LT\t\x{00CD}\ti\x{0307}\x{0301}\t\x{00CD}\t\x{00CD}\t # LATIN CAPITAL LETTER I WITH ACUTE |
||
166 | lt_LT\t\x{0128}\ti\x{0307}\x{0303}\t\x{0128}\t\x{0128}\t # LATIN CAPITAL LETTER I WITH TILDE |
||
167 | lt_LT\tI\x{0301}\ti\x{0307}\x{0301}\tI\x{0301}\tI\x{0301}\t # LATIN CAPITAL LETTER I (with acute accent) |
||
168 | lt_LT\tI\x{0300}\ti\x{0307}\x{0300}\tI\x{0300}\tI\x{0300}\t # LATIN CAPITAL LETTER I (with grave accent) |
||
169 | lt_LT\tI\x{0303}\ti\x{0307}\x{0303}\tI\x{0303}\tI\x{0303}\t # LATIN CAPITAL LETTER I (with tilde above) |
||
170 | lt_LT\tI\x{0328}\x{0301}\ti\x{0307}\x{0328}\x{0301}\tI\x{0328}\x{0301}\tI\x{0328}\x{0301}\t # LATIN CAPITAL LETTER I (with ogonek and acute accent) |
||
171 | lt_LT\tJ\x{0301}\tj\x{0307}\x{0301}\tJ\x{0301}\tJ\x{0301}\t # LATIN CAPITAL LETTER J (with acute accent) |
||
172 | lt_LT\t\x{012e}\x{0301}\t\x{012f}\x{0307}\x{0301}\t\x{012e}\x{0301}\t\x{012e}\x{0301}\t # LATIN CAPITAL LETTER I WITH OGONEK (with acute accent) |
||
173 | lt_LT.UTF-8\ti\x{117}\ti\x{117}\tIe\tIE\t |
||
174 | lt_LT.UTF-8\tie\x{307}\tie\x{307}\tIe\tIE\t |
||
175 | lt_LT.UTF-8\t\x{00cc}\ti\x{0307}\x{0300}\t\x{00cc}\t\x{00cc}\t # LATIN CAPITAL LETTER I WITH GRAVE |
||
176 | lt_LT.UTF-8\t\x{00CD}\ti\x{0307}\x{0301}\t\x{00CD}\t\x{00CD}\t # LATIN CAPITAL LETTER I WITH ACUTE |
||
177 | lt_LT.UTF-8\t\x{0128}\ti\x{0307}\x{0303}\t\x{0128}\t\x{0128}\t # LATIN CAPITAL LETTER I WITH TILDE |
||
178 | lt_LT.UTF-8\tI\x{0301}\ti\x{0307}\x{0301}\tI\x{0301}\tI\x{0301}\t # LATIN CAPITAL LETTER I (with acute accent) |
||
179 | lt_LT.UTF-8\tI\x{0300}\ti\x{0307}\x{0300}\tI\x{0300}\tI\x{0300}\t # LATIN CAPITAL LETTER I (with grave accent) |
||
180 | lt_LT.UTF-8\tI\x{0303}\ti\x{0307}\x{0303}\tI\x{0303}\tI\x{0303}\t # LATIN CAPITAL LETTER I (with tilde above) |
||
181 | lt_LT.UTF-8\tI\x{0328}\x{0301}\ti\x{0307}\x{0328}\x{0301}\tI\x{0328}\x{0301}\tI\x{0328}\x{0301}\t # LATIN CAPITAL LETTER I (with ogonek and acute accent) |
||
182 | lt_LT.UTF-8\tJ\x{0301}\tj\x{0307}\x{0301}\tJ\x{0301}\tJ\x{0301}\t # LATIN CAPITAL LETTER J (with acute accent) |
||
183 | lt_LT.UTF-8\t\x{012e}\x{0301}\t\x{012f}\x{0307}\x{0301}\t\x{012e}\x{0301}\t\x{012e}\x{0301}\t # LATIN CAPITAL LETTER I WITH OGONEK (with acute accent) |
||
184 | # Special case not at initial position |
||
185 | \ta\x{fb04}\ta\x{fb04}\tAffl\tAFFL\t# FB04 |
||
186 | # |
||
187 | # Now the automatic tests |
||
188 | # |
||
189 | EOT |
||
190 | &print_tests; |
||
191 | |||
192 | exit 0; |
||
193 | |||
194 | # Process a single character. |
||
195 | sub process_one |
||
196 | { |
||
197 | my ($code, @fields) = @_; |
||
198 | |||
199 | my $type = $fields[$CATEGORY]; |
||
200 | if ($type eq 'Ll') |
||
201 | { |
||
202 | $upper[$code] = make_hex ($fields[$UPPER]); |
||
203 | $lower[$code] = pack ("U", $code); |
||
204 | $title[$code] = make_hex ($fields[$TITLE]); |
||
205 | } |
||
206 | elsif ($type eq 'Lu') |
||
207 | { |
||
208 | $lower[$code] = make_hex ($fields[$LOWER]); |
||
209 | $upper[$code] = pack ("U", $code); |
||
210 | $title[$code] = make_hex ($fields[$TITLE]); |
||
211 | } |
||
212 | |||
213 | if ($type eq 'Lt') |
||
214 | { |
||
215 | $upper[$code] = make_hex ($fields[$UPPER]); |
||
216 | $lower[$code] = pack ("U", hex ($fields[$LOWER])); |
||
217 | $title[$code] = make_hex ($fields[$LOWER]); |
||
218 | } |
||
219 | } |
||
220 | |||
221 | sub print_tests |
||
222 | { |
||
223 | for ($i = 0; $i < 0x10ffff; $i++) { |
||
224 | if ($i == 0x3A3) { |
||
225 | # Greek sigma needs special tests |
||
226 | next; |
||
227 | } |
||
228 | |||
229 | my $lower = $lower[$i]; |
||
230 | my $title = $title[$i]; |
||
231 | my $upper = $upper[$i]; |
||
232 | |||
233 | if (defined $upper || defined $lower || defined $title) { |
||
234 | printf "\t%s\t%s\t%s\t%s\t# %4X\n", |
||
235 | pack ("U", $i), |
||
236 | (defined $lower ? $lower : ""), |
||
237 | (defined $title ? $title : ""), |
||
238 | (defined $upper ? $upper : ""), |
||
239 | $i; |
||
240 | } |
||
241 | } |
||
242 | } |
||
243 | |||
244 | sub make_hex |
||
245 | { |
||
246 | my $codes = shift; |
||
247 | |||
248 | $codes =~ s/^\s+//; |
||
249 | $codes =~ s/\s+$//; |
||
250 | |||
251 | if ($codes eq "0" || $codes eq "") { |
||
252 | return ""; |
||
253 | } else { |
||
254 | return pack ("U*", map { hex ($_) } split /\s+/, $codes); |
||
255 | } |
||
256 | } |