scratch – Blame information for rev 75

Subversion Repositories:
Rev:
Rev Author Line No. Line
75 office 1  
2 Inline = require './Inline'
3 Pattern = require './Pattern'
4 Utils = require './Utils'
5 ParseException = require './Exception/ParseException'
6  
7 # Parser parses YAML strings to convert them to JavaScript objects.
8 #
9 class Parser
10  
11 # Pre-compiled patterns
12 #
13 PATTERN_FOLDED_SCALAR_ALL: new Pattern '^(?:(?<type>![^\\|>]*)\\s+)?(?<separator>\\||>)(?<modifiers>\\+|\\-|\\d+|\\+\\d+|\\-\\d+|\\d+\\+|\\d+\\-)?(?<comments> +#.*)?$'
14 PATTERN_FOLDED_SCALAR_END: new Pattern '(?<separator>\\||>)(?<modifiers>\\+|\\-|\\d+|\\+\\d+|\\-\\d+|\\d+\\+|\\d+\\-)?(?<comments> +#.*)?$'
15 PATTERN_SEQUENCE_ITEM: new Pattern '^\\-((?<leadspaces>\\s+)(?<value>.+?))?\\s*$'
16 PATTERN_ANCHOR_VALUE: new Pattern '^&(?<ref>[^ ]+) *(?<value>.*)'
17 PATTERN_COMPACT_NOTATION: new Pattern '^(?<key>'+Inline.REGEX_QUOTED_STRING+'|[^ \'"\\{\\[].*?) *\\:(\\s+(?<value>.+?))?\\s*$'
18 PATTERN_MAPPING_ITEM: new Pattern '^(?<key>'+Inline.REGEX_QUOTED_STRING+'|[^ \'"\\[\\{].*?) *\\:(\\s+(?<value>.+?))?\\s*$'
19 PATTERN_DECIMAL: new Pattern '\\d+'
20 PATTERN_INDENT_SPACES: new Pattern '^ +'
21 PATTERN_TRAILING_LINES: new Pattern '(\n*)$'
22 PATTERN_YAML_HEADER: new Pattern '^\\%YAML[: ][\\d\\.]+.*\n'
23 PATTERN_LEADING_COMMENTS: new Pattern '^(\\#.*?\n)+'
24 PATTERN_DOCUMENT_MARKER_START: new Pattern '^\\-\\-\\-.*?\n'
25 PATTERN_DOCUMENT_MARKER_END: new Pattern '^\\.\\.\\.\\s*$'
26 PATTERN_FOLDED_SCALAR_BY_INDENTATION: {}
27  
28 # Context types
29 #
30 CONTEXT_NONE: 0
31 CONTEXT_SEQUENCE: 1
32 CONTEXT_MAPPING: 2
33  
34  
35 # Constructor
36 #
37 # @param [Integer] offset The offset of YAML document (used for line numbers in error messages)
38 #
39 constructor: (@offset = 0) ->
40 @lines = []
41 @currentLineNb = -1
42 @currentLine = ''
43 @refs = {}
44  
45  
46 # Parses a YAML string to a JavaScript value.
47 #
48 # @param [String] value A YAML string
49 # @param [Boolean] exceptionOnInvalidType true if an exception must be thrown on invalid types (a JavaScript resource or object), false otherwise
50 # @param [Function] objectDecoder A function to deserialize custom objects, null otherwise
51 #
52 # @return [Object] A JavaScript value
53 #
54 # @throw [ParseException] If the YAML is not valid
55 #
56 parse: (value, exceptionOnInvalidType = false, objectDecoder = null) ->
57 @currentLineNb = -1
58 @currentLine = ''
59 @lines = @cleanup(value).split "\n"
60  
61 data = null
62 context = @CONTEXT_NONE
63 allowOverwrite = false
64 while @moveToNextLine()
65 if @isCurrentLineEmpty()
66 continue
67  
68 # Tab?
69 if "\t" is @currentLine[0]
70 throw new ParseException 'A YAML file cannot contain tabs as indentation.', @getRealCurrentLineNb() + 1, @currentLine
71  
72 isRef = mergeNode = false
73 if values = @PATTERN_SEQUENCE_ITEM.exec @currentLine
74 if @CONTEXT_MAPPING is context
75 throw new ParseException 'You cannot define a sequence item when in a mapping'
76 context = @CONTEXT_SEQUENCE
77 data ?= []
78  
79 if values.value? and matches = @PATTERN_ANCHOR_VALUE.exec values.value
80 isRef = matches.ref
81 values.value = matches.value
82  
83 # Array
84 if not(values.value?) or '' is Utils.trim(values.value, ' ') or Utils.ltrim(values.value, ' ').indexOf('#') is 0
85 if @currentLineNb < @lines.length - 1 and not @isNextLineUnIndentedCollection()
86 c = @getRealCurrentLineNb() + 1
87 parser = new Parser c
88 parser.refs = @refs
89 data.push parser.parse(@getNextEmbedBlock(null, true), exceptionOnInvalidType, objectDecoder)
90 else
91 data.push null
92  
93 else
94 if values.leadspaces?.length and matches = @PATTERN_COMPACT_NOTATION.exec values.value
95  
96 # This is a compact notation element, add to next block and parse
97 c = @getRealCurrentLineNb()
98 parser = new Parser c
99 parser.refs = @refs
100  
101 block = values.value
102 indent = @getCurrentLineIndentation()
103 if @isNextLineIndented(false)
104 block += "\n"+@getNextEmbedBlock(indent + values.leadspaces.length + 1, true)
105  
106 data.push parser.parse block, exceptionOnInvalidType, objectDecoder
107  
108 else
109 data.push @parseValue values.value, exceptionOnInvalidType, objectDecoder
110  
111 else if (values = @PATTERN_MAPPING_ITEM.exec @currentLine) and values.key.indexOf(' #') is -1
112 if @CONTEXT_SEQUENCE is context
113 throw new ParseException 'You cannot define a mapping item when in a sequence'
114 context = @CONTEXT_MAPPING
115 data ?= {}
116  
117 # Force correct settings
118 Inline.configure exceptionOnInvalidType, objectDecoder
119 try
120 key = Inline.parseScalar values.key
121 catch e
122 e.parsedLine = @getRealCurrentLineNb() + 1
123 e.snippet = @currentLine
124  
125 throw e
126  
127 if '<<' is key
128 mergeNode = true
129 allowOverwrite = true
130 if values.value?.indexOf('*') is 0
131 refName = values.value[1..]
132 unless @refs[refName]?
133 throw new ParseException 'Reference "'+refName+'" does not exist.', @getRealCurrentLineNb() + 1, @currentLine
134  
135 refValue = @refs[refName]
136  
137 if typeof refValue isnt 'object'
138 throw new ParseException 'YAML merge keys used with a scalar value instead of an object.', @getRealCurrentLineNb() + 1, @currentLine
139  
140 if refValue instanceof Array
141 # Merge array with object
142 for value, i in refValue
143 data[String(i)] ?= value
144 else
145 # Merge objects
146 for key, value of refValue
147 data[key] ?= value
148  
149 else
150 if values.value? and values.value isnt ''
151 value = values.value
152 else
153 value = @getNextEmbedBlock()
154  
155 c = @getRealCurrentLineNb() + 1
156 parser = new Parser c
157 parser.refs = @refs
158 parsed = parser.parse value, exceptionOnInvalidType
159  
160 unless typeof parsed is 'object'
161 throw new ParseException 'YAML merge keys used with a scalar value instead of an object.', @getRealCurrentLineNb() + 1, @currentLine
162  
163 if parsed instanceof Array
164 # If the value associated with the merge key is a sequence, then this sequence is expected to contain mapping nodes
165 # and each of these nodes is merged in turn according to its order in the sequence. Keys in mapping nodes earlier
166 # in the sequence override keys specified in later mapping nodes.
167 for parsedItem in parsed
168 unless typeof parsedItem is 'object'
169 throw new ParseException 'Merge items must be objects.', @getRealCurrentLineNb() + 1, parsedItem
170  
171 if parsedItem instanceof Array
172 # Merge array with object
173 for value, i in parsedItem
174 k = String(i)
175 unless data.hasOwnProperty(k)
176 data[k] = value
177 else
178 # Merge objects
179 for key, value of parsedItem
180 unless data.hasOwnProperty(key)
181 data[key] = value
182  
183 else
184 # If the value associated with the key is a single mapping node, each of its key/value pairs is inserted into the
185 # current mapping, unless the key already exists in it.
186 for key, value of parsed
187 unless data.hasOwnProperty(key)
188 data[key] = value
189  
190 else if values.value? and matches = @PATTERN_ANCHOR_VALUE.exec values.value
191 isRef = matches.ref
192 values.value = matches.value
193  
194  
195 if mergeNode
196 # Merge keys
197 else if not(values.value?) or '' is Utils.trim(values.value, ' ') or Utils.ltrim(values.value, ' ').indexOf('#') is 0
198 # Hash
199 # if next line is less indented or equal, then it means that the current value is null
200 if not(@isNextLineIndented()) and not(@isNextLineUnIndentedCollection())
201 # Spec: Keys MUST be unique; first one wins.
202 # But overwriting is allowed when a merge node is used in current block.
203 if allowOverwrite or data[key] is undefined
204 data[key] = null
205  
206 else
207 c = @getRealCurrentLineNb() + 1
208 parser = new Parser c
209 parser.refs = @refs
210 val = parser.parse @getNextEmbedBlock(), exceptionOnInvalidType, objectDecoder
211  
212 # Spec: Keys MUST be unique; first one wins.
213 # But overwriting is allowed when a merge node is used in current block.
214 if allowOverwrite or data[key] is undefined
215 data[key] = val
216  
217 else
218 val = @parseValue values.value, exceptionOnInvalidType, objectDecoder
219  
220 # Spec: Keys MUST be unique; first one wins.
221 # But overwriting is allowed when a merge node is used in current block.
222 if allowOverwrite or data[key] is undefined
223 data[key] = val
224  
225 else
226 # 1-liner optionally followed by newline
227 lineCount = @lines.length
228 if 1 is lineCount or (2 is lineCount and Utils.isEmpty(@lines[1]))
229 try
230 value = Inline.parse @lines[0], exceptionOnInvalidType, objectDecoder
231 catch e
232 e.parsedLine = @getRealCurrentLineNb() + 1
233 e.snippet = @currentLine
234  
235 throw e
236  
237 if typeof value is 'object'
238 if value instanceof Array
239 first = value[0]
240 else
241 for key of value
242 first = value[key]
243 break
244  
245 if typeof first is 'string' and first.indexOf('*') is 0
246 data = []
247 for alias in value
248 data.push @refs[alias[1..]]
249 value = data
250  
251 return value
252  
253 else if Utils.ltrim(value).charAt(0) in ['[', '{']
254 try
255 return Inline.parse value, exceptionOnInvalidType, objectDecoder
256 catch e
257 e.parsedLine = @getRealCurrentLineNb() + 1
258 e.snippet = @currentLine
259  
260 throw e
261  
262 throw new ParseException 'Unable to parse.', @getRealCurrentLineNb() + 1, @currentLine
263  
264 if isRef
265 if data instanceof Array
266 @refs[isRef] = data[data.length-1]
267 else
268 lastKey = null
269 for key of data
270 lastKey = key
271 @refs[isRef] = data[lastKey]
272  
273  
274 if Utils.isEmpty(data)
275 return null
276 else
277 return data
278  
279  
280  
281 # Returns the current line number (takes the offset into account).
282 #
283 # @return [Integer] The current line number
284 #
285 getRealCurrentLineNb: ->
286 return @currentLineNb + @offset
287  
288  
289 # Returns the current line indentation.
290 #
291 # @return [Integer] The current line indentation
292 #
293 getCurrentLineIndentation: ->
294 return @currentLine.length - Utils.ltrim(@currentLine, ' ').length
295  
296  
297 # Returns the next embed block of YAML.
298 #
299 # @param [Integer] indentation The indent level at which the block is to be read, or null for default
300 #
301 # @return [String] A YAML string
302 #
303 # @throw [ParseException] When indentation problem are detected
304 #
305 getNextEmbedBlock: (indentation = null, includeUnindentedCollection = false) ->
306 @moveToNextLine()
307  
308 if not indentation?
309 newIndent = @getCurrentLineIndentation()
310  
311 unindentedEmbedBlock = @isStringUnIndentedCollectionItem @currentLine
312  
313 if not(@isCurrentLineEmpty()) and 0 is newIndent and not(unindentedEmbedBlock)
314 throw new ParseException 'Indentation problem.', @getRealCurrentLineNb() + 1, @currentLine
315  
316 else
317 newIndent = indentation
318  
319  
320 data = [@currentLine[newIndent..]]
321  
322 unless includeUnindentedCollection
323 isItUnindentedCollection = @isStringUnIndentedCollectionItem @currentLine
324  
325 # Comments must not be removed inside a string block (ie. after a line ending with "|")
326 # They must not be removed inside a sub-embedded block as well
327 removeCommentsPattern = @PATTERN_FOLDED_SCALAR_END
328 removeComments = not removeCommentsPattern.test @currentLine
329  
330 while @moveToNextLine()
331 indent = @getCurrentLineIndentation()
332  
333 if indent is newIndent
334 removeComments = not removeCommentsPattern.test @currentLine
335  
336 if isItUnindentedCollection and not @isStringUnIndentedCollectionItem(@currentLine) and indent is newIndent
337 @moveToPreviousLine()
338 break
339  
340 if @isCurrentLineBlank()
341 data.push @currentLine[newIndent..]
342 continue
343  
344 if removeComments and @isCurrentLineComment()
345 if indent is newIndent
346 continue
347  
348 if indent >= newIndent
349 data.push @currentLine[newIndent..]
350 else if Utils.ltrim(@currentLine).charAt(0) is '#'
351 # Don't add line with comments
352 else if 0 is indent
353 @moveToPreviousLine()
354 break
355 else
356 throw new ParseException 'Indentation problem.', @getRealCurrentLineNb() + 1, @currentLine
357  
358  
359 return data.join "\n"
360  
361  
362 # Moves the parser to the next line.
363 #
364 # @return [Boolean]
365 #
366 moveToNextLine: ->
367 if @currentLineNb >= @lines.length - 1
368 return false
369  
370 @currentLine = @lines[++@currentLineNb];
371  
372 return true
373  
374  
375 # Moves the parser to the previous line.
376 #
377 moveToPreviousLine: ->
378 @currentLine = @lines[--@currentLineNb]
379 return
380  
381  
382 # Parses a YAML value.
383 #
384 # @param [String] value A YAML value
385 # @param [Boolean] exceptionOnInvalidType true if an exception must be thrown on invalid types false otherwise
386 # @param [Function] objectDecoder A function to deserialize custom objects, null otherwise
387 #
388 # @return [Object] A JavaScript value
389 #
390 # @throw [ParseException] When reference does not exist
391 #
392 parseValue: (value, exceptionOnInvalidType, objectDecoder) ->
393 if 0 is value.indexOf('*')
394 pos = value.indexOf '#'
395 if pos isnt -1
396 value = value.substr(1, pos-2)
397 else
398 value = value[1..]
399  
400 if @refs[value] is undefined
401 throw new ParseException 'Reference "'+value+'" does not exist.', @currentLine
402  
403 return @refs[value]
404  
405  
406 if matches = @PATTERN_FOLDED_SCALAR_ALL.exec value
407 modifiers = matches.modifiers ? ''
408  
409 foldedIndent = Math.abs(parseInt(modifiers))
410 if isNaN(foldedIndent) then foldedIndent = 0
411 val = @parseFoldedScalar matches.separator, @PATTERN_DECIMAL.replace(modifiers, ''), foldedIndent
412 if matches.type?
413 # Force correct settings
414 Inline.configure exceptionOnInvalidType, objectDecoder
415 return Inline.parseScalar matches.type+' '+val
416 else
417 return val
418  
419 try
420 return Inline.parse value, exceptionOnInvalidType, objectDecoder
421 catch e
422 # Try to parse multiline compact sequence or mapping
423 if value.charAt(0) in ['[', '{'] and e instanceof ParseException and @isNextLineIndented()
424 value += "\n" + @getNextEmbedBlock()
425 try
426 return Inline.parse value, exceptionOnInvalidType, objectDecoder
427 catch e
428 e.parsedLine = @getRealCurrentLineNb() + 1
429 e.snippet = @currentLine
430  
431 throw e
432  
433 else
434 e.parsedLine = @getRealCurrentLineNb() + 1
435 e.snippet = @currentLine
436  
437 throw e
438  
439 return
440  
441  
442 # Parses a folded scalar.
443 #
444 # @param [String] separator The separator that was used to begin this folded scalar (| or >)
445 # @param [String] indicator The indicator that was used to begin this folded scalar (+ or -)
446 # @param [Integer] indentation The indentation that was used to begin this folded scalar
447 #
448 # @return [String] The text value
449 #
450 parseFoldedScalar: (separator, indicator = '', indentation = 0) ->
451 notEOF = @moveToNextLine()
452 if not notEOF
453 return ''
454  
455 isCurrentLineBlank = @isCurrentLineBlank()
456 text = ''
457  
458 # Leading blank lines are consumed before determining indentation
459 while notEOF and isCurrentLineBlank
460 # newline only if not EOF
461 if notEOF = @moveToNextLine()
462 text += "\n"
463 isCurrentLineBlank = @isCurrentLineBlank()
464  
465  
466 # Determine indentation if not specified
467 if 0 is indentation
468 if matches = @PATTERN_INDENT_SPACES.exec @currentLine
469 indentation = matches[0].length
470  
471  
472 if indentation > 0
473 pattern = @PATTERN_FOLDED_SCALAR_BY_INDENTATION[indentation]
474 unless pattern?
475 pattern = new Pattern '^ {'+indentation+'}(.*)$'
476 Parser::PATTERN_FOLDED_SCALAR_BY_INDENTATION[indentation] = pattern
477  
478 while notEOF and (isCurrentLineBlank or matches = pattern.exec @currentLine)
479 if isCurrentLineBlank
480 text += @currentLine[indentation..]
481 else
482 text += matches[1]
483  
484 # newline only if not EOF
485 if notEOF = @moveToNextLine()
486 text += "\n"
487 isCurrentLineBlank = @isCurrentLineBlank()
488  
489 else if notEOF
490 text += "\n"
491  
492  
493 if notEOF
494 @moveToPreviousLine()
495  
496  
497 # Remove line breaks of each lines except the empty and more indented ones
498 if '>' is separator
499 newText = ''
500 for line in text.split "\n"
501 if line.length is 0 or line.charAt(0) is ' '
502 newText = Utils.rtrim(newText, ' ') + line + "\n"
503 else
504 newText += line + ' '
505 text = newText
506  
507 if '+' isnt indicator
508 # Remove any extra space or new line as we are adding them after
509 text = Utils.rtrim(text)
510  
511 # Deal with trailing newlines as indicated
512 if '' is indicator
513 text = @PATTERN_TRAILING_LINES.replace text, "\n"
514 else if '-' is indicator
515 text = @PATTERN_TRAILING_LINES.replace text, ''
516  
517 return text
518  
519  
520 # Returns true if the next line is indented.
521 #
522 # @return [Boolean] Returns true if the next line is indented, false otherwise
523 #
524 isNextLineIndented: (ignoreComments = true) ->
525 currentIndentation = @getCurrentLineIndentation()
526 EOF = not @moveToNextLine()
527  
528 if ignoreComments
529 while not(EOF) and @isCurrentLineEmpty()
530 EOF = not @moveToNextLine()
531 else
532 while not(EOF) and @isCurrentLineBlank()
533 EOF = not @moveToNextLine()
534  
535 if EOF
536 return false
537  
538 ret = false
539 if @getCurrentLineIndentation() > currentIndentation
540 ret = true
541  
542 @moveToPreviousLine()
543  
544 return ret
545  
546  
547 # Returns true if the current line is blank or if it is a comment line.
548 #
549 # @return [Boolean] Returns true if the current line is empty or if it is a comment line, false otherwise
550 #
551 isCurrentLineEmpty: ->
552 trimmedLine = Utils.trim(@currentLine, ' ')
553 return trimmedLine.length is 0 or trimmedLine.charAt(0) is '#'
554  
555  
556 # Returns true if the current line is blank.
557 #
558 # @return [Boolean] Returns true if the current line is blank, false otherwise
559 #
560 isCurrentLineBlank: ->
561 return '' is Utils.trim(@currentLine, ' ')
562  
563  
564 # Returns true if the current line is a comment line.
565 #
566 # @return [Boolean] Returns true if the current line is a comment line, false otherwise
567 #
568 isCurrentLineComment: ->
569 # Checking explicitly the first char of the trim is faster than loops or strpos
570 ltrimmedLine = Utils.ltrim(@currentLine, ' ')
571  
572 return ltrimmedLine.charAt(0) is '#'
573  
574  
575 # Cleanups a YAML string to be parsed.
576 #
577 # @param [String] value The input YAML string
578 #
579 # @return [String] A cleaned up YAML string
580 #
581 cleanup: (value) ->
582 if value.indexOf("\r") isnt -1
583 value = value.split("\r\n").join("\n").split("\r").join("\n")
584  
585 # Strip YAML header
586 count = 0
587 [value, count] = @PATTERN_YAML_HEADER.replaceAll value, ''
588 @offset += count
589  
590 # Remove leading comments
591 [trimmedValue, count] = @PATTERN_LEADING_COMMENTS.replaceAll value, '', 1
592 if count is 1
593 # Items have been removed, update the offset
594 @offset += Utils.subStrCount(value, "\n") - Utils.subStrCount(trimmedValue, "\n")
595 value = trimmedValue
596  
597 # Remove start of the document marker (---)
598 [trimmedValue, count] = @PATTERN_DOCUMENT_MARKER_START.replaceAll value, '', 1
599 if count is 1
600 # Items have been removed, update the offset
601 @offset += Utils.subStrCount(value, "\n") - Utils.subStrCount(trimmedValue, "\n")
602 value = trimmedValue
603  
604 # Remove end of the document marker (...)
605 value = @PATTERN_DOCUMENT_MARKER_END.replace value, ''
606  
607 # Ensure the block is not indented
608 lines = value.split("\n")
609 smallestIndent = -1
610 for line in lines
611 continue if Utils.trim(line, ' ').length == 0
612 indent = line.length - Utils.ltrim(line).length
613 if smallestIndent is -1 or indent < smallestIndent
614 smallestIndent = indent
615 if smallestIndent > 0
616 for line, i in lines
617 lines[i] = line[smallestIndent..]
618 value = lines.join("\n")
619  
620 return value
621  
622  
623 # Returns true if the next line starts unindented collection
624 #
625 # @return [Boolean] Returns true if the next line starts unindented collection, false otherwise
626 #
627 isNextLineUnIndentedCollection: (currentIndentation = null) ->
628 currentIndentation ?= @getCurrentLineIndentation()
629 notEOF = @moveToNextLine()
630  
631 while notEOF and @isCurrentLineEmpty()
632 notEOF = @moveToNextLine()
633  
634 if false is notEOF
635 return false
636  
637 ret = false
638 if @getCurrentLineIndentation() is currentIndentation and @isStringUnIndentedCollectionItem(@currentLine)
639 ret = true
640  
641 @moveToPreviousLine()
642  
643 return ret
644  
645  
646 # Returns true if the string is un-indented collection item
647 #
648 # @return [Boolean] Returns true if the string is un-indented collection item, false otherwise
649 #
650 isStringUnIndentedCollectionItem: ->
651 return @currentLine is '-' or @currentLine[0...2] is '- '
652  
653  
654 module.exports = Parser