corrade-nucleus-nucleons – Rev 21

Subversion Repositories:
Rev:
//[4]           NameStartChar      ::=          ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
//[4a]          NameChar           ::=          NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
//[5]           Name       ::=          NameStartChar (NameChar)*
var nameStartChar = /[A-Z_a-z\xC0-\xD6\xD8-\xF6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]///\u10000-\uEFFFF
var nameChar = new RegExp("[\\-\\.0-9"+nameStartChar.source.slice(1,-1)+"\\u00B7\\u0300-\\u036F\\u203F-\\u2040]");
var tagNamePattern = new RegExp('^'+nameStartChar.source+nameChar.source+'*(?:\:'+nameStartChar.source+nameChar.source+'*)?$');
//var tagNamePattern = /^[a-zA-Z_][\w\-\.]*(?:\:[a-zA-Z_][\w\-\.]*)?$/
//var handlers = 'resolveEntity,getExternalSubset,characters,endDocument,endElement,endPrefixMapping,ignorableWhitespace,processingInstruction,setDocumentLocator,skippedEntity,startDocument,startElement,startPrefixMapping,notationDecl,unparsedEntityDecl,error,fatalError,warning,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,comment,endCDATA,endDTD,endEntity,startCDATA,startDTD,startEntity'.split(',')

//S_TAG,        S_ATTR, S_EQ,   S_ATTR_NOQUOT_VALUE
//S_ATTR_SPACE, S_ATTR_END,     S_TAG_SPACE, S_TAG_CLOSE
var S_TAG = 0;//tag name offerring
var S_ATTR = 1;//attr name offerring 
var S_ATTR_SPACE=2;//attr name end and space offer
var S_EQ = 3;//=space?
var S_ATTR_NOQUOT_VALUE = 4;//attr value(no quot value only)
var S_ATTR_END = 5;//attr value end and no space(quot end)
var S_TAG_SPACE = 6;//(attr value end || tag end ) && (space offer)
var S_TAG_CLOSE = 7;//closed el<el />

function XMLReader(){
        
}

XMLReader.prototype = {
        parse:function(source,defaultNSMap,entityMap){
                var domBuilder = this.domBuilder;
                domBuilder.startDocument();
                _copy(defaultNSMap ,defaultNSMap = {})
                parse(source,defaultNSMap,entityMap,
                                domBuilder,this.errorHandler);
                domBuilder.endDocument();
        }
}
function parse(source,defaultNSMapCopy,entityMap,domBuilder,errorHandler){
        function fixedFromCharCode(code) {
                // String.prototype.fromCharCode does not supports
                // > 2 bytes unicode chars directly
                if (code > 0xffff) {
                        code -= 0x10000;
                        var surrogate1 = 0xd800 + (code >> 10)
                                , surrogate2 = 0xdc00 + (code & 0x3ff);

                        return String.fromCharCode(surrogate1, surrogate2);
                } else {
                        return String.fromCharCode(code);
                }
        }
        function entityReplacer(a){
                var k = a.slice(1,-1);
                if(k in entityMap){
                        return entityMap[k]; 
                }else if(k.charAt(0) === '#'){
                        return fixedFromCharCode(parseInt(k.substr(1).replace('x','0x')))
                }else{
                        errorHandler.error('entity not found:'+a);
                        return a;
                }
        }
        function appendText(end){//has some bugs
                if(end>start){
                        var xt = source.substring(start,end).replace(/&#?\w+;/g,entityReplacer);
                        locator&&position(start);
                        domBuilder.characters(xt,0,end-start);
                        start = end
                }
        }
        function position(p,m){
                while(p>=lineEnd && (m = linePattern.exec(source))){
                        lineStart = m.index;
                        lineEnd = lineStart + m[0].length;
                        locator.lineNumber++;
                        //console.log('line++:',locator,startPos,endPos)
                }
                locator.columnNumber = p-lineStart+1;
        }
        var lineStart = 0;
        var lineEnd = 0;
        var linePattern = /.*(?:\r\n?|\n)|.*$/g
        var locator = domBuilder.locator;
        
        var parseStack = [{currentNSMap:defaultNSMapCopy}]
        var closeMap = {};
        var start = 0;
        while(true){
                try{
                        var tagStart = source.indexOf('<',start);
                        if(tagStart<0){
                                if(!source.substr(start).match(/^\s*$/)){
                                        var doc = domBuilder.doc;
                                var text = doc.createTextNode(source.substr(start));
                                doc.appendChild(text);
                                domBuilder.currentElement = text;
                                }
                                return;
                        }
                        if(tagStart>start){
                                appendText(tagStart);
                        }
                        switch(source.charAt(tagStart+1)){
                        case '/':
                                var end = source.indexOf('>',tagStart+3);
                                var tagName = source.substring(tagStart+2,end);
                                var config = parseStack.pop();
                                if(end<0){
                                        
                                tagName = source.substring(tagStart+2).replace(/[\s<].*/,'');
                                //console.error('#@@@@@@'+tagName)
                                errorHandler.error("end tag name: "+tagName+' is not complete:'+config.tagName);
                                end = tagStart+1+tagName.length;
                        }else if(tagName.match(/\s)){
                                tagName = tagName.replace(/[\s<].*/,'');
                                errorHandler.error("end tag name: "+tagName+' maybe not complete');
                                end = tagStart+1+tagName.length;
                                }
                                //console.error(parseStack.length,parseStack)
                                //console.error(config);
                                var localNSMap = config.localNSMap;
                                var endMatch = config.tagName == tagName;
                                var endIgnoreCaseMach = endMatch || config.tagName&&config.tagName.toLowerCase() == tagName.toLowerCase()
                        if(endIgnoreCaseMach){
                                domBuilder.endElement(config.uri,config.localName,tagName);
                                        if(localNSMap){
                                                for(var prefix in localNSMap){
                                                        domBuilder.endPrefixMapping(prefix) ;
                                                }
                                        }
                                        if(!endMatch){
                                errorHandler.fatalError("end tag name: "+tagName+' is not match the current start tagName:'+config.tagName );
                                        }
                        }else{
                                parseStack.push(config)
                        }
                                
                                end++;
                                break;
                                // end elment
                        case '?':// <?...?>
                                locator&&position(tagStart);
                                end = parseInstruction(source,tagStart,domBuilder);
                                break;
                        case '!':// <!doctype,<![CDATA,<!--
                                locator&&position(tagStart);
                                end = parseDCC(source,tagStart,domBuilder,errorHandler);
                                break;
                        default:
                                locator&&position(tagStart);
                                var el = new ElementAttributes();
                                var currentNSMap = parseStack[parseStack.length-1].currentNSMap;
                                //elStartEnd
                                var end = parseElementStartPart(source,tagStart,el,currentNSMap,entityReplacer,errorHandler);
                                var len = el.length;
                                
                                
                                if(!el.closed && fixSelfClosed(source,end,el.tagName,closeMap)){
                                        el.closed = true;
                                        if(!entityMap.nbsp){
                                                errorHandler.warning('unclosed xml attribute');
                                        }
                                }
                                if(locator && len){
                                        var locator2 = copyLocator(locator,{});
                                        //try{//attribute position fixed
                                        for(var i = 0;i<len;i++){
                                                var a = el[i];
                                                position(a.offset);
                                                a.locator = copyLocator(locator,{});
                                        }
                                        //}catch(e){console.error('@@@@@'+e)}
                                        domBuilder.locator = locator2
                                        if(appendElement(el,domBuilder,currentNSMap)){
                                                parseStack.push(el)
                                        }
                                        domBuilder.locator = locator;
                                }else{
                                        if(appendElement(el,domBuilder,currentNSMap)){
                                                parseStack.push(el)
                                        }
                                }
                                
                                
                                
                                if(el.uri === 'http://www.w3.org/1999/xhtml' && !el.closed){
                                        end = parseHtmlSpecialContent(source,end,el.tagName,entityReplacer,domBuilder)
                                }else{
                                        end++;
                                }
                        }
                }catch(e){
                        errorHandler.error('element parse error: '+e)
                        //errorHandler.error('element parse error: '+e);
                        end = -1;
                        //throw e;
                }
                if(end>start){
                        start = end;
                }else{
                        //TODO: 这里有可能sax回退,有位置错误风险
                        appendText(Math.max(tagStart,start)+1);
                }
        }
}
function copyLocator(f,t){
        t.lineNumber = f.lineNumber;
        t.columnNumber = f.columnNumber;
        return t;
}

/**
 * @see #appendElement(source,elStartEnd,el,selfClosed,entityReplacer,domBuilder,parseStack);
 * @return end of the elementStartPart(end of elementEndPart for selfClosed el)
 */
function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,errorHandler){
        var attrName;
        var value;
        var p = ++start;
        var s = S_TAG;//status
        while(true){
                var c = source.charAt(p);
                switch(c){
                case '=':
                        if(s === S_ATTR){//attrName
                                attrName = source.slice(start,p);
                                s = S_EQ;
                        }else if(s === S_ATTR_SPACE){
                                s = S_EQ;
                        }else{
                                //fatalError: equal must after attrName or space after attrName
                                throw new Error('attribute equal must after attrName');
                        }
                        break;
                case '\'':
                case '"':
                        if(s === S_EQ || s === S_ATTR //|| s == S_ATTR_SPACE
                                ){//equal
                                if(s === S_ATTR){
                                        errorHandler.warning('attribute value must after "="')
                                        attrName = source.slice(start,p)
                                }
                                start = p+1;
                                p = source.indexOf(c,start)
                                if(p>0){
                                        value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
                                        el.add(attrName,value,start-1);
                                        s = S_ATTR_END;
                                }else{
                                        //fatalError: no end quot match
                                        throw new Error('attribute value no end \''+c+'\' match');
                                }
                        }else if(s == S_ATTR_NOQUOT_VALUE){
                                value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
                                //console.log(attrName,value,start,p)
                                el.add(attrName,value,start);
                                //console.dir(el)
                                errorHandler.warning('attribute "'+attrName+'" missed start quot('+c+')!!');
                                start = p+1;
                                s = S_ATTR_END
                        }else{
                                //fatalError: no equal before
                                throw new Error('attribute value must after "="');
                        }
                        break;
                case '/':
                        switch(s){
                        case S_TAG:
                                el.setTagName(source.slice(start,p));
                        case S_ATTR_END:
                        case S_TAG_SPACE:
                        case S_TAG_CLOSE:
                                s =S_TAG_CLOSE;
                                el.closed = true;
                        case S_ATTR_NOQUOT_VALUE:
                        case S_ATTR:
                        case S_ATTR_SPACE:
                                break;
                        //case S_EQ:
                        default:
                                throw new Error("attribute invalid close char('/')")
                        }
                        break;
                case ''://end document
                        //throw new Error('unexpected end of input')
                        errorHandler.error('unexpected end of input');
                        if(s == S_TAG){
                                el.setTagName(source.slice(start,p));
                        }
                        return p;
                case '>':
                        switch(s){
                        case S_TAG:
                                el.setTagName(source.slice(start,p));
                        case S_ATTR_END:
                        case S_TAG_SPACE:
                        case S_TAG_CLOSE:
                                break;//normal
                        case S_ATTR_NOQUOT_VALUE://Compatible state
                        case S_ATTR:
                                value = source.slice(start,p);
                                if(value.slice(-1) === '/'){
                                        el.closed  = true;
                                        value = value.slice(0,-1)
                                }
                        case S_ATTR_SPACE:
                                if(s === S_ATTR_SPACE){
                                        value = attrName;
                                }
                                if(s == S_ATTR_NOQUOT_VALUE){
                                        errorHandler.warning('attribute "'+value+'" missed quot(")!!');
                                        el.add(attrName,value.replace(/&#?\w+;/g,entityReplacer),start)
                                }else{
                                        if(currentNSMap[''] !== 'http://www.w3.org/1999/xhtml' || !value.match(/^(?:disabled|checked|selected)$/i)){
                                                errorHandler.warning('attribute "'+value+'" missed value!! "'+value+'" instead!!')
                                        }
                                        el.add(value,value,start)
                                }
                                break;
                        case S_EQ:
                                throw new Error('attribute value missed!!');
                        }
//                      console.log(tagName,tagNamePattern,tagNamePattern.test(tagName))
                        return p;
                /*xml space '\x20' | #x9 | #xD | #xA; */
                case '\u0080':
                        c = ' ';
                default:
                        if(c<= ' '){//space
                                switch(s){
                                case S_TAG:
                                        el.setTagName(source.slice(start,p));//tagName
                                        s = S_TAG_SPACE;
                                        break;
                                case S_ATTR:
                                        attrName = source.slice(start,p)
                                        s = S_ATTR_SPACE;
                                        break;
                                case S_ATTR_NOQUOT_VALUE:
                                        var value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
                                        errorHandler.warning('attribute "'+value+'" missed quot(")!!');
                                        el.add(attrName,value,start)
                                case S_ATTR_END:
                                        s = S_TAG_SPACE;
                                        break;
                                //case S_TAG_SPACE:
                                //case S_EQ:
                                //case S_ATTR_SPACE:
                                //      void();break;
                                //case S_TAG_CLOSE:
                                        //ignore warning
                                }
                        }else{//not space
//S_TAG,        S_ATTR, S_EQ,   S_ATTR_NOQUOT_VALUE
//S_ATTR_SPACE, S_ATTR_END,     S_TAG_SPACE, S_TAG_CLOSE
                                switch(s){
                                //case S_TAG:void();break;
                                //case S_ATTR:void();break;
                                //case S_ATTR_NOQUOT_VALUE:void();break;
                                case S_ATTR_SPACE:
                                        var tagName =  el.tagName;
                                        if(currentNSMap[''] !== 'http://www.w3.org/1999/xhtml' || !attrName.match(/^(?:disabled|checked|selected)$/i)){
                                                errorHandler.warning('attribute "'+attrName+'" missed value!! "'+attrName+'" instead2!!')
                                        }
                                        el.add(attrName,attrName,start);
                                        start = p;
                                        s = S_ATTR;
                                        break;
                                case S_ATTR_END:
                                        errorHandler.warning('attribute space is required"'+attrName+'"!!')
                                case S_TAG_SPACE:
                                        s = S_ATTR;
                                        start = p;
                                        break;
                                case S_EQ:
                                        s = S_ATTR_NOQUOT_VALUE;
                                        start = p;
                                        break;
                                case S_TAG_CLOSE:
                                        throw new Error("elements closed character '/' and '>' must be connected to");
                                }
                        }
                }//end outer switch
                //console.log('p++',p)
                p++;
        }
}
/**
 * @return true if has new namespace define
 */
function appendElement(el,domBuilder,currentNSMap){
        var tagName = el.tagName;
        var localNSMap = null;
        //var currentNSMap = parseStack[parseStack.length-1].currentNSMap;
        var i = el.length;
        while(i--){
                var a = el[i];
                var qName = a.qName;
                var value = a.value;
                var nsp = qName.indexOf(':');
                if(nsp>0){
                        var prefix = a.prefix = qName.slice(0,nsp);
                        var localName = qName.slice(nsp+1);
                        var nsPrefix = prefix === 'xmlns' && localName
                }else{
                        localName = qName;
                        prefix = null
                        nsPrefix = qName === 'xmlns' && ''
                }
                //can not set prefix,because prefix !== ''
                a.localName = localName ;
                //prefix == null for no ns prefix attribute 
                if(nsPrefix !== false){//hack!!
                        if(localNSMap == null){
                                localNSMap = {}
                                //console.log(currentNSMap,0)
                                _copy(currentNSMap,currentNSMap={})
                                //console.log(currentNSMap,1)
                        }
                        currentNSMap[nsPrefix] = localNSMap[nsPrefix] = value;
                        a.uri = 'http://www.w3.org/2000/xmlns/'
                        domBuilder.startPrefixMapping(nsPrefix, value) 
                }
        }
        var i = el.length;
        while(i--){
                a = el[i];
                var prefix = a.prefix;
                if(prefix){//no prefix attribute has no namespace
                        if(prefix === 'xml'){
                                a.uri = 'http://www.w3.org/XML/1998/namespace';
                        }if(prefix !== 'xmlns'){
                                a.uri = currentNSMap[prefix || '']
                                
                                //{console.log('###'+a.qName,domBuilder.locator.systemId+'',currentNSMap,a.uri)}
                        }
                }
        }
        var nsp = tagName.indexOf(':');
        if(nsp>0){
                prefix = el.prefix = tagName.slice(0,nsp);
                localName = el.localName = tagName.slice(nsp+1);
        }else{
                prefix = null;//important!!
                localName = el.localName = tagName;
        }
        //no prefix element has default namespace
        var ns = el.uri = currentNSMap[prefix || ''];
        domBuilder.startElement(ns,localName,tagName,el);
        //endPrefixMapping and startPrefixMapping have not any help for dom builder
        //localNSMap = null
        if(el.closed){
                domBuilder.endElement(ns,localName,tagName);
                if(localNSMap){
                        for(prefix in localNSMap){
                                domBuilder.endPrefixMapping(prefix) 
                        }
                }
        }else{
                el.currentNSMap = currentNSMap;
                el.localNSMap = localNSMap;
                //parseStack.push(el);
                return true;
        }
}
function parseHtmlSpecialContent(source,elStartEnd,tagName,entityReplacer,domBuilder){
        if(/^(?:script|textarea)$/i.test(tagName)){
                var elEndStart =  source.indexOf('</'+tagName+'>',elStartEnd);
                var text = source.substring(elStartEnd+1,elEndStart);
                if(/[&<]/.test(text)){
                        if(/^script$/i.test(tagName)){
                                //if(!/\]\]>/.test(text)){
                                        //lexHandler.startCDATA();
                                        domBuilder.characters(text,0,text.length);
                                        //lexHandler.endCDATA();
                                        return elEndStart;
                                //}
                        }//}else{//text area
                                text = text.replace(/&#?\w+;/g,entityReplacer);
                                domBuilder.characters(text,0,text.length);
                                return elEndStart;
                        //}
                        
                }
        }
        return elStartEnd+1;
}
function fixSelfClosed(source,elStartEnd,tagName,closeMap){
        //if(tagName in closeMap){
        var pos = closeMap[tagName];
        if(pos == null){
                //console.log(tagName)
                pos =  source.lastIndexOf('</'+tagName+'>')
                if(pos<elStartEnd){//忘记闭合
                        pos = source.lastIndexOf('</'+tagName)
                }
                closeMap[tagName] =pos
        }
        return pos<elStartEnd;
        //} 
}
function _copy(source,target){
        for(var n in source){target[n] = source[n]}
}
function parseDCC(source,start,domBuilder,errorHandler){//sure start with '<!'
        var next= source.charAt(start+2)
        switch(next){
        case '-':
                if(source.charAt(start + 3) === '-'){
                        var end = source.indexOf('-->',start+4);
                        //append comment source.substring(4,end)//<!--
                        if(end>start){
                                domBuilder.comment(source,start+4,end-start-4);
                                return end+3;
                        }else{
                                errorHandler.error("Unclosed comment");
                                return -1;
                        }
                }else{
                        //error
                        return -1;
                }
        default:
                if(source.substr(start+3,6) == 'CDATA['){
                        var end = source.indexOf(']]>',start+9);
                        domBuilder.startCDATA();
                        domBuilder.characters(source,start+9,end-start-9);
                        domBuilder.endCDATA() 
                        return end+3;
                }
                //<!DOCTYPE
                //startDTD(java.lang.String name, java.lang.String publicId, java.lang.String systemId) 
                var matchs = split(source,start);
                var len = matchs.length;
                if(len>1 && /!doctype/i.test(matchs[0][0])){
                        var name = matchs[1][0];
                        var pubid = len>3 && /^public$/i.test(matchs[2][0]) && matchs[3][0]
                        var sysid = len>4 && matchs[4][0];
                        var lastMatch = matchs[len-1]
                        domBuilder.startDTD(name,pubid && pubid.replace(/^(['"])(.*?)\1$/,'$2'),
                                        sysid && sysid.replace(/^(['"])(.*?)\1$/,'$2'));
                        domBuilder.endDTD();
                        
                        return lastMatch.index+lastMatch[0].length
                }
        }
        return -1;
}



function parseInstruction(source,start,domBuilder){
        var end = source.indexOf('?>',start);
        if(end){
                var match = source.substring(start,end).match(/^<\?(\S*)\s*([\s\S]*?)\s*$/);
                if(match){
                        var len = match[0].length;
                        domBuilder.processingInstruction(match[1], match[2]) ;
                        return end+2;
                }else{//error
                        return -1;
                }
        }
        return -1;
}

/**
 * @param source
 */
function ElementAttributes(source){
        
}
ElementAttributes.prototype = {
        setTagName:function(tagName){
                if(!tagNamePattern.test(tagName)){
                        throw new Error('invalid tagName:'+tagName)
                }
                this.tagName = tagName
        },
        add:function(qName,value,offset){
                if(!tagNamePattern.test(qName)){
                        throw new Error('invalid attribute:'+qName)
                }
                this[this.length++] = {qName:qName,value:value,offset:offset}
        },
        length:0,
        getLocalName:function(i){return this[i].localName},
        getLocator:function(i){return this[i].locator},
        getQName:function(i){return this[i].qName},
        getURI:function(i){return this[i].uri},
        getValue:function(i){return this[i].value}
//      ,getIndex:function(uri, localName)){
//              if(localName){
//                      
//              }else{
//                      var qName = uri
//              }
//      },
//      getValue:function(){return this.getValue(this.getIndex.apply(this,arguments))},
//      getType:function(uri,localName){}
//      getType:function(i){},
}




function _set_proto_(thiz,parent){
        thiz.__proto__ = parent;
        return thiz;
}
if(!(_set_proto_({},_set_proto_.prototype) instanceof _set_proto_)){
        _set_proto_ = function(thiz,parent){
                function p(){};
                p.prototype = parent;
                p = new p();
                for(parent in thiz){
                        p[parent] = thiz[parent];
                }
                return p;
        }
}

function split(source,start){
        var match;
        var buf = [];
        var reg = /'[^']+'|"[^"]+"|[^\s<>\/=]+=?|(\/?\s*>|<)/g;
        reg.lastIndex = start;
        reg.exec(source);//skip <
        while(match = reg.exec(source)){
                buf.push(match);
                if(match[1])return buf;
        }
}

exports.XMLReader = XMLReader;