wasCSharpSQLite – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | /* |
2 | * Regsub.java |
||
3 | * |
||
4 | * See the file "license.terms" for information on usage and |
||
5 | * redistribution of this file, and for a DISCLAIMER OF ALL |
||
6 | * WARRANTIES. |
||
7 | * |
||
8 | * SCCS: %Z% %M% %I% %E% %U% |
||
9 | */ |
||
10 | // Included in SQLite3 port to C# for use in testharness only; 2008 Noah B Hart |
||
11 | //$Header$ |
||
12 | |||
13 | using System; |
||
14 | namespace sunlabs.brazil.util.regexp |
||
15 | { |
||
16 | |||
17 | /// <summary> The <code>Regsub</code> class provides an iterator-like object to |
||
18 | /// extract the matched and unmatched portions of a string with respect to |
||
19 | /// a given regular expression. |
||
20 | /// <p> |
||
21 | /// After each match is found, the portions of the string already |
||
22 | /// checked are not searched again -- searching for the next match will |
||
23 | /// begin at the character just after where the last match ended. |
||
24 | /// <p> |
||
25 | /// Here is an example of using Regsub to replace all "%XX" sequences in |
||
26 | /// a string with the ASCII character represented by the hex digits "XX": |
||
27 | /// <pre> |
||
28 | /// public static void |
||
29 | /// main(String[] args) |
||
30 | /// throws Exception |
||
31 | /// { |
||
32 | /// Regexp re = new Regexp("%[a-fA-F0-9][a-fA-F0-9]"); |
||
33 | /// Regsub rs = new Regsub(re, args[0]); |
||
34 | /// |
||
35 | /// StringBuffer sb = new StringBuffer(); |
||
36 | /// |
||
37 | /// while (rs.nextMatch()) { |
||
38 | /// sb.append(rs.skipped()); |
||
39 | /// |
||
40 | /// String match = rs.matched(); |
||
41 | /// |
||
42 | /// int hi = Character.digit(match.charAt(1), 16); |
||
43 | /// int lo = Character.digit(match.charAt(2), 16); |
||
44 | /// sb.append((char) ((hi << 4) | lo)); |
||
45 | /// } |
||
46 | /// sb.append(rs.rest()); |
||
47 | /// |
||
48 | /// System.out.println(sb); |
||
49 | /// } |
||
50 | /// </pre> |
||
51 | /// |
||
52 | /// </summary> |
||
53 | /// <author> Colin Stevens (colin.stevens@sun.com) |
||
54 | /// </author> |
||
55 | /// <version> 1.4, 99/10/14 |
||
56 | /// </version> |
||
57 | /// <seealso cref="Regexp"> |
||
58 | /// </seealso> |
||
59 | public class Regsub |
||
60 | { |
||
61 | internal Regexp r; |
||
62 | internal string str; |
||
63 | internal int ustart; |
||
64 | internal int mstart; |
||
65 | internal int end; |
||
66 | internal Regexp.Match m; |
||
67 | |||
68 | /// <summary> Construct a new <code>Regsub</code> that can be used to step |
||
69 | /// through the given string, finding each substring that matches |
||
70 | /// the given regular expression. |
||
71 | /// <p> |
||
72 | /// <code>Regexp</code> contains two substitution methods, |
||
73 | /// <code>sub</code> and <code>subAll</code>, that can be used instead |
||
74 | /// of <code>Regsub</code> if just simple substitutions are being done. |
||
75 | /// |
||
76 | /// </summary> |
||
77 | /// <param name="">r |
||
78 | /// The compiled regular expression. |
||
79 | /// |
||
80 | /// </param> |
||
81 | /// <param name="">str |
||
82 | /// The string to search. |
||
83 | /// |
||
84 | /// </param> |
||
85 | /// <seealso cref="Regexp#sub"> |
||
86 | /// </seealso> |
||
87 | /// <seealso cref="Regexp#subAll"> |
||
88 | /// </seealso> |
||
89 | public Regsub( Regexp r, string str ) |
||
90 | { |
||
91 | this.r = r; |
||
92 | this.str = str; |
||
93 | this.ustart = 0; |
||
94 | this.mstart = -1; |
||
95 | this.end = 0; |
||
96 | } |
||
97 | |||
98 | /// <summary> Searches for the next substring that matches the regular expression. |
||
99 | /// After calling this method, the caller would call methods like |
||
100 | /// <code>skipped</code>, <code>matched</code>, etc. to query attributes |
||
101 | /// of the matched region. |
||
102 | /// <p> |
||
103 | /// Calling this function again will search for the next match, beginning |
||
104 | /// at the character just after where the last match ended. |
||
105 | /// |
||
106 | /// </summary> |
||
107 | /// <returns> <code>true</code> if a match was found, <code>false</code> |
||
108 | /// if there are no more matches. |
||
109 | /// </returns> |
||
110 | public bool nextMatch() |
||
111 | { |
||
112 | ustart = end; |
||
113 | |||
114 | /* |
||
115 | * Consume one character if the last match didn't consume any |
||
116 | * characters, to avoid an infinite loop. |
||
117 | */ |
||
118 | |||
119 | int off = ustart; |
||
120 | if ( off == mstart ) |
||
121 | { |
||
122 | off++; |
||
123 | if ( off >= str.Length ) |
||
124 | { |
||
125 | return false; |
||
126 | } |
||
127 | } |
||
128 | |||
129 | |||
130 | m = r.exec( str, 0, off ); |
||
131 | if ( m == null ) |
||
132 | { |
||
133 | return false; |
||
134 | } |
||
135 | |||
136 | mstart = m.indices[0]; |
||
137 | end = m.indices[1]; |
||
138 | |||
139 | return true; |
||
140 | } |
||
141 | |||
142 | /// <summary> Returns a substring consisting of all the characters skipped |
||
143 | /// between the end of the last match (or the start of the original |
||
144 | /// search string) and the start of this match. |
||
145 | /// <p> |
||
146 | /// This method can be used extract all the portions of string that |
||
147 | /// <b>didn't</b> match the regular expression. |
||
148 | /// |
||
149 | /// </summary> |
||
150 | /// <returns> The characters that didn't match. |
||
151 | /// </returns> |
||
152 | public string skipped() |
||
153 | { |
||
154 | return str.Substring( ustart, ( mstart ) - ( ustart ) ); |
||
155 | } |
||
156 | |||
157 | /// <summary> Returns a substring consisting of the characters that matched |
||
158 | /// the entire regular expression during the last call to |
||
159 | /// <code>nextMatch</code>. |
||
160 | /// |
||
161 | /// </summary> |
||
162 | /// <returns> The characters that did match. |
||
163 | /// |
||
164 | /// </returns> |
||
165 | /// <seealso cref="#submatch"> |
||
166 | /// </seealso> |
||
167 | public string matched() |
||
168 | { |
||
169 | return str.Substring( mstart, ( end ) - ( mstart ) ); |
||
170 | } |
||
171 | |||
172 | /// <summary> Returns a substring consisting of the characters that matched |
||
173 | /// the given parenthesized subexpression during the last call to |
||
174 | /// <code>nextMatch</code>. |
||
175 | /// |
||
176 | /// </summary> |
||
177 | /// <param name="">i |
||
178 | /// The index of the parenthesized subexpression. |
||
179 | /// |
||
180 | /// </param> |
||
181 | /// <returns> The characters that matched the subexpression, or |
||
182 | /// <code>null</code> if the given subexpression did not |
||
183 | /// exist or did not match. |
||
184 | /// </returns> |
||
185 | public string submatch( int i ) |
||
186 | { |
||
187 | if ( i * 2 + 1 >= m.indices.Length ) |
||
188 | { |
||
189 | return null; |
||
190 | } |
||
191 | int start = m.indices[i * 2]; |
||
192 | int end = m.indices[i * 2 + 1]; |
||
193 | if ( ( start < 0 ) || ( end < 0 ) ) |
||
194 | { |
||
195 | return null; |
||
196 | } |
||
197 | return str.Substring( start, ( end ) - ( start ) ); |
||
198 | } |
||
199 | |||
200 | /// <summary> Returns a substring consisting of all the characters that come |
||
201 | /// after the last match. As the matches progress, the <code>rest</code> |
||
202 | /// gets shorter. When <code>nextMatch</code> returns <code>false</code>, |
||
203 | /// then this method will return the rest of the string that can't be |
||
204 | /// matched. |
||
205 | /// |
||
206 | /// </summary> |
||
207 | /// <returns> The rest of the characters after the last match. |
||
208 | /// </returns> |
||
209 | public string rest() |
||
210 | { |
||
211 | return str.Substring( end ); |
||
212 | } |
||
213 | } |
||
214 | } |