wasCSharpSQLite – Blame information for rev 1

Subversion Repositories:
Rev:
Rev Author Line No. Line
1 office 1 /*
2 * Regsub.java
3 *
4 * See the file "license.terms" for information on usage and
5 * redistribution of this file, and for a DISCLAIMER OF ALL
6 * WARRANTIES.
7 *
8 * SCCS: %Z% %M% %I% %E% %U%
9 */
10 // Included in SQLite3 port to C# for use in testharness only; 2008 Noah B Hart
11 //$Header$
12  
13 using System;
14 namespace sunlabs.brazil.util.regexp
15 {
16  
17 /// <summary> The <code>Regsub</code> class provides an iterator-like object to
18 /// extract the matched and unmatched portions of a string with respect to
19 /// a given regular expression.
20 /// <p>
21 /// After each match is found, the portions of the string already
22 /// checked are not searched again -- searching for the next match will
23 /// begin at the character just after where the last match ended.
24 /// <p>
25 /// Here is an example of using Regsub to replace all "%XX" sequences in
26 /// a string with the ASCII character represented by the hex digits "XX":
27 /// <pre>
28 /// public static void
29 /// main(String[] args)
30 /// throws Exception
31 /// {
32 /// Regexp re = new Regexp("%[a-fA-F0-9][a-fA-F0-9]");
33 /// Regsub rs = new Regsub(re, args[0]);
34 ///
35 /// StringBuffer sb = new StringBuffer();
36 ///
37 /// while (rs.nextMatch()) {
38 /// sb.append(rs.skipped());
39 ///
40 /// String match = rs.matched();
41 ///
42 /// int hi = Character.digit(match.charAt(1), 16);
43 /// int lo = Character.digit(match.charAt(2), 16);
44 /// sb.append((char) ((hi &lt;&lt; 4) | lo));
45 /// }
46 /// sb.append(rs.rest());
47 ///
48 /// System.out.println(sb);
49 /// }
50 /// </pre>
51 ///
52 /// </summary>
53 /// <author> Colin Stevens (colin.stevens@sun.com)
54 /// </author>
55 /// <version> 1.4, 99/10/14
56 /// </version>
57 /// <seealso cref="Regexp">
58 /// </seealso>
59 public class Regsub
60 {
61 internal Regexp r;
62 internal string str;
63 internal int ustart;
64 internal int mstart;
65 internal int end;
66 internal Regexp.Match m;
67  
68 /// <summary> Construct a new <code>Regsub</code> that can be used to step
69 /// through the given string, finding each substring that matches
70 /// the given regular expression.
71 /// <p>
72 /// <code>Regexp</code> contains two substitution methods,
73 /// <code>sub</code> and <code>subAll</code>, that can be used instead
74 /// of <code>Regsub</code> if just simple substitutions are being done.
75 ///
76 /// </summary>
77 /// <param name="">r
78 /// The compiled regular expression.
79 ///
80 /// </param>
81 /// <param name="">str
82 /// The string to search.
83 ///
84 /// </param>
85 /// <seealso cref="Regexp#sub">
86 /// </seealso>
87 /// <seealso cref="Regexp#subAll">
88 /// </seealso>
89 public Regsub( Regexp r, string str )
90 {
91 this.r = r;
92 this.str = str;
93 this.ustart = 0;
94 this.mstart = -1;
95 this.end = 0;
96 }
97  
98 /// <summary> Searches for the next substring that matches the regular expression.
99 /// After calling this method, the caller would call methods like
100 /// <code>skipped</code>, <code>matched</code>, etc. to query attributes
101 /// of the matched region.
102 /// <p>
103 /// Calling this function again will search for the next match, beginning
104 /// at the character just after where the last match ended.
105 ///
106 /// </summary>
107 /// <returns> <code>true</code> if a match was found, <code>false</code>
108 /// if there are no more matches.
109 /// </returns>
110 public bool nextMatch()
111 {
112 ustart = end;
113  
114 /*
115 * Consume one character if the last match didn't consume any
116 * characters, to avoid an infinite loop.
117 */
118  
119 int off = ustart;
120 if ( off == mstart )
121 {
122 off++;
123 if ( off >= str.Length )
124 {
125 return false;
126 }
127 }
128  
129  
130 m = r.exec( str, 0, off );
131 if ( m == null )
132 {
133 return false;
134 }
135  
136 mstart = m.indices[0];
137 end = m.indices[1];
138  
139 return true;
140 }
141  
142 /// <summary> Returns a substring consisting of all the characters skipped
143 /// between the end of the last match (or the start of the original
144 /// search string) and the start of this match.
145 /// <p>
146 /// This method can be used extract all the portions of string that
147 /// <b>didn't</b> match the regular expression.
148 ///
149 /// </summary>
150 /// <returns> The characters that didn't match.
151 /// </returns>
152 public string skipped()
153 {
154 return str.Substring( ustart, ( mstart ) - ( ustart ) );
155 }
156  
157 /// <summary> Returns a substring consisting of the characters that matched
158 /// the entire regular expression during the last call to
159 /// <code>nextMatch</code>.
160 ///
161 /// </summary>
162 /// <returns> The characters that did match.
163 ///
164 /// </returns>
165 /// <seealso cref="#submatch">
166 /// </seealso>
167 public string matched()
168 {
169 return str.Substring( mstart, ( end ) - ( mstart ) );
170 }
171  
172 /// <summary> Returns a substring consisting of the characters that matched
173 /// the given parenthesized subexpression during the last call to
174 /// <code>nextMatch</code>.
175 ///
176 /// </summary>
177 /// <param name="">i
178 /// The index of the parenthesized subexpression.
179 ///
180 /// </param>
181 /// <returns> The characters that matched the subexpression, or
182 /// <code>null</code> if the given subexpression did not
183 /// exist or did not match.
184 /// </returns>
185 public string submatch( int i )
186 {
187 if ( i * 2 + 1 >= m.indices.Length )
188 {
189 return null;
190 }
191 int start = m.indices[i * 2];
192 int end = m.indices[i * 2 + 1];
193 if ( ( start < 0 ) || ( end < 0 ) )
194 {
195 return null;
196 }
197 return str.Substring( start, ( end ) - ( start ) );
198 }
199  
200 /// <summary> Returns a substring consisting of all the characters that come
201 /// after the last match. As the matches progress, the <code>rest</code>
202 /// gets shorter. When <code>nextMatch</code> returns <code>false</code>,
203 /// then this method will return the rest of the string that can't be
204 /// matched.
205 ///
206 /// </summary>
207 /// <returns> The rest of the characters after the last match.
208 /// </returns>
209 public string rest()
210 {
211 return str.Substring( end );
212 }
213 }
214 }