vanilla-wow-addons – Blame information for rev 1

Subversion Repositories:
Rev:
Rev Author Line No. Line
1 office 1  
2 -- Add the module to the tree
3 local mod = klhtm
4 local me = {}
5 mod.regex = me
6  
7 --[[
8 Regex.lua
9  
10 The Regex module converts printing formatted strings to parsing formatted strings, in a locale independent way.
11  
12 e.g.
13 "Your %s hits %s for %d." -> {"Your (.+) hits (.+) for (%d+)%.", {1, 2, 3}}
14 "Le %$3s de %$2s vous fait gagner %$1d points de vie." -> {"Le (.+) de (.+) vous fait gagner (%d+) points de vie%.", {3, 2, 1}}
15  
16 First a bit of background. We want to be able to read the combat log on all clients, whether the language is english or french or chinese or otherwise. Furthermore, we don't want to rely on localisers working out the parser strings manually, because there is a likelihood of human error, and it would take too long to get a new string added.
17  
18 Fortunately, we have all the information we need (at runtime, at least). For instance, in the example above, the value of the format string is given in the variable SPELLLOGSELFOTHER. If you open the GlobalStrings.lua (may need the WoW interface extractor to see it), on english clients you will see
19 ...
20 SPELLLOGSELFOTHER = "Your %s hits %s for %d."
21 ...
22 and on french clients you will see
23 ...
24 SPELLLOGSELFOTHER = "Le %$3s de %$2s vous fait gagner %$1d points de vie."
25 ...
26 When the WoW client is printing to the combat log, it will run a command like
27 ChatFrame2:AddMessage(string.format(SPELLLOGSELFOTHER, "Mortal Strike", "Mottled Boar", 352))
28  
29 So, at Runtime (that is, when the addon loads, but not when i am writing it - i only have the english values) the mod has access to all the printing string format variables, like SPELLLOGSELFOTHER. We have a list of all the important ones, for all the abilities that the mod needs, so we want to make a big parser to scan them all at runtime. So the first thing we do when the addon loads is create all these parsers, then use them for all our combat log parsing.
30  
31 ------------------------------------------------------------
32  
33 Structures:
34  
35 1) Small Parser:
36  
37 local parser =
38 {
39 ["formatstring"] = formatstring, "You hit %s for %s."
40 ["regexstring"] = regexstring, "You hit (.+) for (.+)%."
41 numarguments = me.numarguments, 2
42 ordering = me.ordering, {1, 2}
43 argtypes = me.types, {"string", "number"}
44 }
45 Note that the values of <argtypes> matches the canonical ordering (1, 2, 3, ...), not the localised ordering
46 as in <ordering>.
47  
48 2) Big Parser:
49  
50 local value =
51 {
52 ["parser"] = parser, a <Small Parser> structure
53 ["globalstring"] = globalstringname, COMBATHITSELFOTHER
54 ["identifier"] = identifier, "whiteattackhit"
55 }
56  
57 3) Parser Set:
58  
59 First level is a key-value list. The keys are event names, e.g. "CHAT_MSG_SPELL_SELF_BUFF".
60 The values are ordered lists of <Big Parser>s.
61  
62 4) Parser Output:
63  
64 local output =
65 {
66 hit = <flag. Nil or non-nil>,
67 temp = { }, list of up to 4 values, the captures with localised ordering
68 final = { }, list of up to 4 values, the captures with canonical ordering
69 }
70  
71 The idea is to reuse the <Parser Output> structure, so the flag <hit> just records whether the last parse
72 succeeded (non-nil for success). It is assumed that all parse strings have at most 4 arguments.
73  
74 5) BigParser Output:
75  
76 same as <Parser Output>, but has the property <parser>, which is a <BigParser> structure.
77 ]]
78  
79 --[[
80 ------------------------------------------------------------------------------
81 Section A: Parsing a String With the Parser Engine
82 ------------------------------------------------------------------------------
83 ]]
84  
85 -- this is returned from all calls to mod.regex.parse().
86 me.output =
87 {
88 hit = nil,
89 temp = { },
90 final = { },
91 parser = nil,
92 }
93  
94 --[[
95 mod.regex.parse(inputstring, event)
96 Given a string, checks whether it matches any parser in the engine. The return value is a <BigParser Output>
97 structure.
98 <inputstring> is e.g. a line from your combat log to be parsed.
99 <event> is the event the string was received on, e.g. "CHAT_MSG_SPELL_SELF_BUFF"
100 ]]
101 me.parse = function(parserset, inputstring, event)
102  
103 -- 0) Reset output
104 me.output.hit = nil
105  
106 -- 1) Check that the event is handled by the parser
107 local parsersubset = parserset[event]
108 if parsersubset == nil then
109 return me.output
110 end
111  
112 -- 2) Look for a parser
113 local x, bigparser, y, parser
114  
115 for x, bigparser in parsersubset do
116 parser = bigparser.parser
117  
118 if me.parsestring(parser, inputstring, me.output) then
119 me.output.parser = bigparser
120  
121 -- verify numeric arguments
122 for y = 1, parser.numarguments do
123 if (parser.argtypes[y] == "number") and (tonumber(me.output.final[y]) == nil) then
124  
125 -- error occur!
126 if mod.out.checktrace("error", me, "regex") then
127 mod.out.printtrace(string.format("The value |cffffff00%s|r of argument %d is not a number as it should be! Parser = %s, format string = %s. Event = %s, string = %s.", me.output.final[y], y, bigparser.identifier, parser.formatstring, event, inputstring))
128 end
129  
130 break
131 end
132 end
133  
134 return me.output
135 end
136 end
137  
138 -- 3) No hit - oh well!
139 return me.output
140  
141 end
142  
143 --[[
144 me.parsestring(parser, string, output)
145 Parses a string with the specified parser. Returns non-nil if the string satisfies the parser
146 <parser> is a parser structure, i.e. an output of me.formattoregex().
147 <string> is the string to parse, e.g. a combat log line.
148 <output> is a structure to store the output. It must have .temp and .final properties which are lists.
149 ]]
150 me.parsestring = function(parser, inputstring, output)
151  
152 _, output.hit, output.temp[1], output.temp[2], output.temp[3], output.temp[4], output.temp[5] = string.find(inputstring, parser.regexstring)
153  
154 -- early exit on fail
155 if output.hit == nil then
156 return
157 end
158  
159 -- now reorder arguments
160 local x
161  
162 for x = 1, parser.numarguments do
163 output.final[parser.ordering[x]] = output.temp[x]
164 end
165  
166 return true
167 end
168  
169  
170 --[[
171 ------------------------------------------------------------------------------
172 Section B: Creating the Parser Engine at Startup
173 ------------------------------------------------------------------------------
174 ]]
175  
176 --[[
177 me.addparsestring(parserset, indentifier, globalstringname, event)
178 Adds a new parser to the parser set.
179 <parserset> is a key-value list, keyed by event names, values are a list of parsers listening to that event
180 <identifier> is a description of the capture, e.g. "spellcrit"
181 <globalstringname> is the name of the variable that holds for format pattern, e.g. "SPELLLOGHIT"
182 <event> is the event in which the capture comes, e.g. "CHAT_MESSAGE_SPELL_SELF_BUFF"
183 ]]
184 me.addparsestring = function(parserset, identifier, globalstringname, event)
185  
186 -- if there are no parsers on this event already, create a new list
187 if parserset[event] == nil then
188 parserset[event] = { }
189 end
190  
191 -- get the value of the global string variable
192 local formatstring = getglobal(globalstringname)
193 if formatstring == nil then
194 if mod.out.checktrace("error", me, "regex") then
195 mod.out.printtrace(string.format("No global string %s found. ID = %s, event = %s.", globalstringname, identifier, event))
196 end
197 return
198 end
199  
200 -- convert to regex
201 local parser = me.formattoregex(formatstring)
202  
203 if me.testparser(parser) == nil then
204 if mod.out.checktrace("error", me, "regex") then
205 mod.out.printtrace(string.format("parser failed on %s.", identifier))
206 end
207 return
208 end
209  
210 -- This is a parser structure, i guess. A big one, call it.
211 local value =
212 {
213 ["parser"] = parser,
214 ["globalstring"] = globalstringname,
215 ["identifier"] = identifier,
216 }
217  
218 -- ordered insert. If there are several parsers sharing the one event, we want to order them in such a way
219 -- that no parser gets blocked by another, less specific parser.
220 local length, x = table.getn(parserset[event])
221  
222 if length == 0 then
223 table.insert(parserset[event], value)
224  
225 else
226  
227 for x = 1, length do
228 -- keep going until you are smaller than one of them
229  
230 if me.compareregexstrings(parserset[event][x].parser, parser) == 1 then
231  
232 -- our string is definitely higher
233 table.insert(parserset[event], x, value)
234 break
235  
236 elseif x == length then
237 table.insert(parserset[event], value)
238 end
239 end
240 end
241 end
242  
243 --[[
244 me.formattoregex(formatstring)
245 Returns a small parser structure from a print formatting string.
246 <formatstring> is e.g. "You hit %s for %s.".
247 The output describes how to convert this to a parser.
248 ]]
249 me.formattoregex = function(formatstring)
250  
251 --[[
252 gsub replaces all occurences of the first string with the second string.
253 [%.%(%)] means all occurences of . or ( or )
254 %%%1 means replace these with a % and then itself.
255 We're replacing them now so they don't interfere with the next bit.
256 ]]
257 local regexstring = string.gsub(formatstring, "([%.%(%)])", "%%%1")
258  
259 --[[
260 Formatting blocks have two types. If they arguments are in the same order as the english, the patterns
261 will look like "%s %s %d %s" etc. If they have a different argument ordering, it would be e.g.
262 "%3$s %1$d %2$s". So we need to check for both these circumstances
263 ]]
264  
265 me.numarguments = 0
266 me.ordering = { }
267 me.types = { }
268  
269 --[[
270 string.gsub will search the string regexstring, identify captures of the form "(%%(%d?)$?([sd]))", then replace
271 them with the value me.gsubreplacement(<captures>). See me.gsubreplacement comments for more details.
272 ]]
273 regexstring = string.gsub(regexstring, "(%%(%d?)$?([sd]))", me.gsubreplacement)
274  
275 --[[
276 Adding a ^ character to the search string means that the string.find() is only allowed to match the test string
277 starting at the first character.
278 ]]
279 regexstring = "^" .. regexstring
280  
281 local parser =
282 {
283 ["formatstring"] = formatstring,
284 ["regexstring"] = regexstring,
285 numarguments = me.numarguments,
286 ordering = me.ordering,
287 argtypes = me.types,
288 }
289  
290 return parser
291  
292 end
293  
294 -- set in me.formattoregex:
295 -- me.numarguments = 0
296 -- me.ordering = { }
297 -- me.types = { }
298  
299 --[[
300 The round brackets in the format string "(%%(%d?)$?([sd]))" denote captures. They will be sent to the
301 replacement function as arguments. Their order is the order of the open brackets. So the first argument
302 is the entire string, e.g. "%3$s" or "%s", the second argument is the index, if supplied, e.g. "3" or nil,
303 and the third argument is "s" or "d", i.e. whether the print format is a string or an integer.
304 ]]
305 me.gsubreplacement = function(totalstring, index, formattype)
306  
307 me.numarguments = me.numarguments + 1
308  
309 -- set the index for strings that don't supply them by default (when ordering is 1, 2, 3, ...)
310 index = tonumber(index)
311  
312 if index == nil then
313 index = me.numarguments
314 end
315  
316 table.insert(me.ordering, index)
317  
318 -- the return value is the actual replacement
319 if formattype == "d" then
320 me.types[index] = "number"
321 return "(%d+)"
322 else
323 me.types[index] = "string"
324 return "(.+)"
325 end
326  
327 end
328  
329 --[[
330 me.compareregexstrings(regex1, regex2)
331 We are given two strings, and we want to know in which order to check them. e.g.
332 (1) "You gain (%d+) health from (.+)%." vs
333 (2) "You gain (%d+) (.+) from (.+)%."
334 In this case we should check for (1) first, then (2). To be more specific,
335 1) If one pattern goes to a capture and another goes to text, due the text first.
336 2) If both of them go to different texts, put the guy with the most captures first. Otherwise, the longest guy.
337 3) If both go to captures of differnt types, then don't worry.
338  
339 return values:
340 -1: regex1 first
341 +1: regex2 first
342  
343 Where possible, prefer to return -1.
344 ]]
345 me.compareregexstrings = function(parser1, parser2)
346  
347 local regex1, regex2 = parser1.regexstring, parser2.regexstring
348 local start1, start2 = 1, 1
349 local token1, token2
350  
351 while true do
352  
353 token1 = me.getnexttoken(regex1, start1)
354 token2 = me.getnexttoken(regex2, start2)
355  
356 -- check for end of strings
357 if token2 == nil then
358 return -1
359 elseif token1 == nil then
360 return 1
361 end
362  
363 -- check for equal (so far)
364 if token1 == token2 then
365 start1 = start1 + string.len(token1)
366 start2 = start2 + string.len(token2)
367 else
368 break
369 end
370  
371 end
372  
373 -- to get there, they have arrived at different tokens, therefore they must be orderable
374  
375 if string.len(token1) > 2 then
376 -- regex1 is at a capture
377  
378 if string.len(token2) > 2 then
379 -- regex2 is at a capture
380  
381 -- they are different, so one is a number, one a string, so who cares
382 return -1
383  
384 else
385  
386 -- prefer the non-capture first
387 return 1
388 end
389  
390 else
391 -- regex1 is not at a capture
392  
393 if string.len(token2) > 2 then
394 -- regex2 at a capture
395 return -1
396  
397 else
398  
399 if string.find(string.sub(regex2, start2), string.sub(regex1, start1)) then
400 return 1
401 end
402  
403 if true then
404 return -1
405 end
406  
407 -- neither at a capture
408 if parser1.numarguments < parser2.numarguments then
409 return 1
410  
411 elseif parser1.numarguments > parser2.numarguments then
412 return -1
413  
414 elseif string.len(regex1) >= string.len(regex2) then
415 return -1
416  
417 else
418 return 1
419 end
420 end
421 end
422  
423 end
424  
425 --[[
426 me.getnexttoken(regex, start)
427 Returns the next regex token in a string.
428 <regex> is the regex string, e.g. "hello (.+)%." .
429 <start> is the 1-based index of the string to start from.
430 Tokens are captures, e.g. "(.+)" or "(%d+)", or escaped characters, e.g. "%." or "%(", or normal letters, e.g. "a", ",".
431 ]]
432 me.getnexttoken = function(regex, start)
433  
434 if start > string.len(regex) then
435 return nil
436 end
437  
438 local char = string.sub(regex, start, start)
439  
440 if char == "%" then
441 return string.sub(regex, start, start + 1)
442  
443 elseif char == "(" then
444 char = string.sub(regex, start + 1, start + 1)
445  
446 if char == "%" then
447 return string.sub(regex, start, start + 4)
448  
449 else
450 return string.sub(regex, start, start + 3)
451 end
452  
453 else
454 return char
455 end
456  
457 end
458  
459 --[[
460 ------------------------------------------------------------------------------
461 Section C: Testing the Regex System
462 ------------------------------------------------------------------------------
463 ]]
464  
465 --[[
466 mod.regex.test()
467 Checks that the parsers created from print format strings are working correctly, over a range of tough strings.
468 Will print out the results.
469 ]]
470 me.test = function()
471  
472 strings = {"%3$s vous fait gagner %1$d %2$s.", "Votre %4$s inflige %2$d points de degats de %3$s a %1$s.",
473 "Vous utilisez %s sur votre %s."}
474  
475 for x = 1, table.getn(strings) do
476 if me.testformatstring(strings[x]) == nil then
477 mod.out.print(string.format("test failed on string %d, '%s'.", x, strings[x]))
478 return
479 end
480 end
481  
482 mod.out.print(string.format("all %d strings passed their tests.", table.getn(strings)))
483  
484 end
485  
486 --[[
487 me.testformatstring(value)
488 Given a print formatting string, creates a parser for that string, and checks that the parser works correctly.
489 <value> is e.g. "You hit %s for %s."
490 Returns: non-nil if the test succeeds.
491 ]]
492 me.testformatstring = function(value)
493  
494 local parser = me.formattoregex(value)
495  
496 -- debug a bit
497 mod.out.print(string.format("Format string = |cffffff00%s|r, regex string = |cffffff00%s|r, numargs = |cffffff00%d|r.", parser.formatstring, parser.regexstring, parser.numarguments))
498  
499 return me.testparser(parser)
500  
501 end
502  
503 --[[
504 me.testparser(parser, debug)
505 Verifies experimentally that a parser matches its print format string.
506 <parser> is a <Small Parser> structure.
507 <debug> is a flag, if non-nil come debugging will be printed.
508 Returns: non-nil if the test succeeds.
509 The method generates a random string that could be made from <parser>'s format string, then parses it with the
510 parser, and checks that the captured values match the original arguments.
511 ]]
512 me.testparser = function(parser, debug)
513  
514 -- 1) Generate a random string that matches the format
515 local arguments = { }
516 local x
517  
518 for x = 1, parser.numarguments do
519 if parser.argtypes[parser.ordering[x]] == "string" then
520 arguments[parser.ordering[x]] = me.generaterandomstring()
521 else
522 arguments[parser.ordering[x]] = math.random(1000)
523 end
524 end
525  
526 -- debug print
527 if debug then
528 for x = 1, parser.numarguments do
529 if arguments[x] == nil then
530 mod.out.print("arg " .. x .. " is nil!")
531 return
532 end
533  
534 mod.out.print("arg" .. x .. " = " .. arguments[x])
535 end
536 end
537  
538 local randomstring = string.format(parser.formatstring, unpack(arguments))
539  
540 -- debug print
541 if debug then
542 mod.out.print("the test string = " .. randomstring)
543 end
544  
545 -- try parse
546 local output =
547 {
548 temp = { },
549 final = { },
550 }
551  
552 if me.parsestring(parser, randomstring, output) == nil then
553 mod.out.print("The string did not parse.")
554 return nil
555  
556 else
557  
558 -- debug print
559 if debug then
560 for x = 1, parser.numarguments do
561 mod.out.print("output" .. x .. " = " .. output.final[x])
562 end
563 end
564  
565 return true
566 end
567  
568 end
569  
570 --[[
571 Generates a random string of capital letters and spaces. Will look something like "AJ WFDSO ECL SFOE".
572 ]]
573 me.generaterandomstring = function()
574  
575 local length = 10 + math.random(10)
576 local x
577 local value = ""
578  
579 for x = 1, length do
580 if math.random(3) == 3 then
581 value = value .. " "
582 else
583 value = value .. string.format("%c", 64 + math.random(26))
584 end
585 end
586  
587 return value
588 end