VYPR
High severityNVD Advisory· Published Jul 29, 2024· Updated Oct 11, 2024

ReDOS at currency parsing fast-xml-parser

CVE-2024-41818

Description

fast-xml-parser is an open source, pure javascript xml parser. a ReDOS exists on currency.js. This vulnerability is fixed in 4.4.1.

Affected packages

Versions sourced from the GitHub Security Advisory.

PackageAffected versionsPatched versions
fast-xml-parsernpm
>= 4.3.5, < 4.4.14.4.1

Affected products

1

Patches

2
d0bfe8a3a281

fix maxlength for currency value

https://github.com/NaturalIntelligence/fast-xml-parseramit kumar guptaJul 28, 2024via ghsa
5 files changed · +19 7
  • src/v5/OutputBuilders/JsArrBuilder.js+1 1 modified
    @@ -3,7 +3,7 @@ const {buildOptions,registerCommonValueParsers} = require("./ParserOptionsBuilde
     class OutputBuilder{
       constructor(options){
         this.options = buildOptions(options);
    -      this.registeredParsers = registerCommonValueParsers();
    +      this.registeredParsers = registerCommonValueParsers(this.options);
         }
         
         registerValueParser(name,parserInstance){//existing name will override the parser without warning
    
  • src/v5/OutputBuilders/JsMinArrBuilder.js+1 1 modified
    @@ -3,7 +3,7 @@ const {buildOptions,registerCommonValueParsers} = require("./ParserOptionsBuilde
     class OutputBuilder{
       constructor(options){
         this.options = buildOptions(options);
    -      this.registeredParsers = registerCommonValueParsers();
    +      this.registeredParsers = registerCommonValueParsers(this.options);
         }
         
         registerValueParser(name,parserInstance){//existing name will override the parser without warning
    
  • src/v5/OutputBuilders/JsObjBuilder.js+1 1 modified
    @@ -5,7 +5,7 @@ const {buildOptions,registerCommonValueParsers} = require("./ParserOptionsBuilde
     class OutputBuilder{
       constructor(builderOptions){
           this.options = buildOptions(builderOptions);
    -      this.registeredParsers = registerCommonValueParsers();
    +      this.registeredParsers = registerCommonValueParsers(this.options);
       }
     
       registerValueParser(name,parserInstance){//existing name will override the parser without warning
    
  • src/v5/OutputBuilders/ParserOptionsBuilder.js+4 1 modified
    @@ -34,6 +34,9 @@ const defaultOptions={
           // "currency",
           // "date",
         ]
    +  },
    +  dataType:{
    +
       }
     }
     
    @@ -75,7 +78,7 @@ function copyProperties(target, source) {
       }
     }
     
    -function registerCommonValueParsers(){
    +function registerCommonValueParsers(options){
       return {
         "trim": new trimParser(),
         // "join": this.entityParser.parse,
    
  • src/v5/valueParsers/currency.js+12 3 modified
    @@ -1,20 +1,27 @@
    -
    +const defaultOptions = {
    +    maxLength: 200,
    +    // locale: "en-IN"
    +}
     const localeMap = {
         "$":"en-US",
         "€":"de-DE",
         "£":"en-GB",
         "¥":"ja-JP",
         "₹":"en-IN",
     }
    +const sign = "(?:-|\+)?";
    +const digitsAndSeparator = "(?:\d+|\d{1,3}(?:,\d{3})+)";
    +const decimalPart = "(?:\.\d{1,2})?";
    +const symbol = "(?:\$|€|¥|₹)?";
     
     const currencyCheckRegex = /^\s*(?:-|\+)?(?:\d+|\d{1,3}(?:,\d{3})+)?(?:\.\d{1,2})?\s*(?:\$|€|¥|₹)?\s*$/u;
     
     class CurrencyParser{
         constructor(options){
    -        this.options = options;
    +        this.options = options || defaultOptions;
         }
         parse(val){
    -        if (typeof val === 'string') {
    +        if (typeof val === 'string' && val.length <= this.options.maxLength) {
                 if(val.indexOf(",,") !== -1 && val.indexOf(".." !== -1)){
                     const match = val.match(currencyCheckRegex);
                     if(match){
    @@ -28,4 +35,6 @@ class CurrencyParser{
             return val;
         }
     }
    +CurrencyParser.defaultOptions = defaultOptions;
    +
     module.exports = CurrencyParser;
    \ No newline at end of file
    
ba5f35e76804

XML Parser v5

https://github.com/NaturalIntelligence/fast-xml-parseramit kumar guptaFeb 24, 2024via ghsa
30 files changed · +2183 17
  • CHANGELOG.md+3 0 modified
    @@ -1,5 +1,8 @@
     Note: If you find missing information about particular minor version, that version must have been changed without any functional change in this library.
     
    +**4.3.5 / 2024-02-24**
    +* code for v5 is added for experimental use
    +
     **4.3.4 / 2024-01-10**
     * fix: Don't escape entities in CDATA sections (#633) (By [wackbyte](https://github.com/wackbyte))
     
    
  • docs/v5/1. Getting Started.md+217 0 added
    @@ -0,0 +1,217 @@
    +
    +
    +Example
    +
    +```js
    +const options = {
    +  preserveOrder: true,
    +  removeNSPrefix: false, // remove NS from tag name or attribute name if true
    +  stopNodes: [], //nested tags will not be parsed even for errors
    +  htmlEntities: false,
    +  tags:{
    +    unpaired: [],
    +    nameFor:{
    +      cdata: false,
    +      comment: false,
    +      text: '#text'
    +    },
    +    separateTextProperty: false,
    +    //"join" only if preserveOrder: true
    +    valueParsers: ["trim","entities","join","boolean","number","currency","date"]
    +  },
    +  attributes: {
    +      ignore: false,
    +      booleanType:true,
    +      entities: true,
    +      //"groupBy": "att"
    +  },
    +  OutputBuilder: new JsObjOutputBuilder()
    +};
    +const parser = new XMLParser(options);
    +let result = parser.parse(xmlData, true);
    +```
    +
    +- You can build your own Output Builder. FXP provides 3 builders
    +  - JsObjOutputBuilder
    +  - JsArrBuilder
    +  - JsMinArrBuilder
    +- You can control the sequence of value parsing for a tag or attribute
    +- You can pass a string or bytes array as input.
    +
    +### Value Parser
    +You can change the sequence of value parsers or remove one or provide your own parser to control the parsing.
    +
    +### Output builders
    +You can use provided output builds or your own output builder.
    +
    +JsObjOutputBuilder
    +```js
    +{
    +    "soap:Envelope": {
    +        "@_xmlns:soap": "http://schemas.xmlsoap.org/soap/envelope/",
    +        "soap:Body": {
    +            "rpt:loadReportFileResponseElem": {
    +                "@_xmlns:s": "http://bus.x.com/common/support/v1",
    +                "@_xmlns:rpt": "http://bus.x.com/service/statement/v1",
    +                "s:code": 0,
    +                "s:responseTime": 2588,
    +                "s:responseDbTime": 1893,
    +                "s:requestId": "6b408fd09eb211e7a0807e34820340ec",
    +                "s:route": "172.16.x.x:9192",
    +                "rpt:result": {
    +                    "rpt:file": "<soap:Envelope xmlns:soap=\"http://schemas.xmlsoap.org/soap/envelope/\">\n    <soap:Body>\n        <rpt:loadReportFileResponseElem\n                xmlns:s=\"http://bus.x.com/common/support/v1\"\n                xmlns:rpt=\"http://bus.x.com/service/statement/v1\">\n            <s:code>0</s:code>\n            <s:responseTime>2588</s:responseTime>\n            <s:responseDbTime>1893</s:responseDbTime>\n            <s:requestId>6b408fd09eb211e7a0807e34820340ec</s:requestId>\n            <s:route>172.16.x.x:9192</s:route>\n            <rpt:result>\n <rpt:file></rpt:file>\n            </rpt:result>\n        </rpt:loadReportFileResponseElem>\n    </soap:Body>\n</soap:Envelope>"
    +                }
    +            }
    +        }
    +    }
    +}
    +```
    +
    +JsArrBuilder
    +```js
    +{
    +    "tagname": "soap:Envelope",
    +    "child": [
    +        {
    +            "tagname": "soap:Body",
    +            "child": [
    +                {
    +                    "tagname": "rpt:loadReportFileResponseElem",
    +                    "child": [
    +                        {
    +                            "tagname": "s:code",
    +                            "child": [
    +                                {
    +                                    "#text": 0
    +                                }
    +                            ]
    +                        },
    +                        {
    +                            "tagname": "s:responseTime",
    +                            "child": [
    +                                {
    +                                    "#text": 2588
    +                                }
    +                            ]
    +                        },
    +                        {
    +                            "tagname": "s:responseDbTime",
    +                            "child": [
    +                                {
    +                                    "#text": 1893
    +                                }
    +                            ]
    +                        },
    +                        {
    +                            "tagname": "s:requestId",
    +                            "child": [
    +                                {
    +                                    "#text": "6b408fd09eb211e7a0807e34820340ec"
    +                                }
    +                            ]
    +                        },
    +                        {
    +                            "tagname": "s:route",
    +                            "child": [
    +                                {
    +                                    "#text": "172.16.x.x:9192"
    +                                }
    +                            ]
    +                        },
    +                        {
    +                            "tagname": "rpt:result",
    +                            "child": [
    +                                {
    +                                    "tagname": "rpt:file",
    +                                    "child": [
    +                                        {
    +                                            "#text": "<soap:Envelope xmlns:soap=\"http://schemas.xmlsoap.org/soap/envelope/\">\n    <soap:Body>\n        <rpt:loadReportFileResponseElem\n                xmlns:s=\"http://bus.x.com/common/support/v1\"\n                xmlns:rpt=\"http://bus.x.com/service/statement/v1\">\n            <s:code>0</s:code>\n            <s:responseTime>2588</s:responseTime>\n            <s:responseDbTime>1893</s:responseDbTime>\n            <s:requestId>6b408fd09eb211e7a0807e34820340ec</s:requestId>\n            <s:route>172.16.x.x:9192</s:route>\n            <rpt:result>\n <rpt:file></rpt:file>\n            </rpt:result>\n        </rpt:loadReportFileResponseElem>\n    </soap:Body>\n</soap:Envelope>"
    +                                        }
    +                                    ]
    +                                }
    +                            ]
    +                        }
    +                    ],
    +                    ":@": {
    +                        "@_xmlns:s": "http://bus.x.com/common/support/v1",
    +                        "@_xmlns:rpt": "http://bus.x.com/service/statement/v1"
    +                    }
    +                }
    +            ]
    +        }
    +    ],
    +    ":@": {
    +        "@_xmlns:soap": "http://schemas.xmlsoap.org/soap/envelope/"
    +    }
    +}
    +```
    +
    +JsMinArrBuilder
    +```js
    +{
    +    "soap:Envelope": [
    +        {
    +            "soap:Body": [
    +                {
    +                    "rpt:loadReportFileResponseElem": [
    +                        {
    +                            "s:code": [
    +                                {
    +                                    "#text": 0
    +                                }
    +                            ]
    +                        },
    +                        {
    +                            "s:responseTime": [
    +                                {
    +                                    "#text": 2588
    +                                }
    +                            ]
    +                        },
    +                        {
    +                            "s:responseDbTime": [
    +                                {
    +                                    "#text": 1893
    +                                }
    +                            ]
    +                        },
    +                        {
    +                            "s:requestId": [
    +                                {
    +                                    "#text": "6b408fd09eb211e7a0807e34820340ec"
    +                                }
    +                            ]
    +                        },
    +                        {
    +                            "s:route": [
    +                                {
    +                                    "#text": "172.16.x.x:9192"
    +                                }
    +                            ]
    +                        },
    +                        {
    +                            "rpt:result": [
    +                                {
    +                                    "rpt:file": [
    +                                        {
    +                                            "#text": "<soap:Envelope xmlns:soap=\"http://schemas.xmlsoap.org/soap/envelope/\">\n    <soap:Body>\n        <rpt:loadReportFileResponseElem\n                xmlns:s=\"http://bus.x.com/common/support/v1\"\n                xmlns:rpt=\"http://bus.x.com/service/statement/v1\">\n            <s:code>0</s:code>\n            <s:responseTime>2588</s:responseTime>\n            <s:responseDbTime>1893</s:responseDbTime>\n            <s:requestId>6b408fd09eb211e7a0807e34820340ec</s:requestId>\n            <s:route>172.16.x.x:9192</s:route>\n            <rpt:result>\n <rpt:file></rpt:file>\n            </rpt:result>\n        </rpt:loadReportFileResponseElem>\n    </soap:Body>\n</soap:Envelope>"
    +                                        }
    +                                    ]
    +                                }
    +                            ]
    +                        }
    +                    ],
    +                    ":@": {
    +                        "@_xmlns:s": "http://bus.x.com/common/support/v1",
    +                        "@_xmlns:rpt": "http://bus.x.com/service/statement/v1"
    +                    }
    +                }
    +            ]
    +        }
    +    ],
    +    ":@": {
    +        "@_xmlns:soap": "http://schemas.xmlsoap.org/soap/envelope/"
    +    }
    +}
    +```
    +
    
  • package.json+1 1 modified
    @@ -1,6 +1,6 @@
     {
       "name": "fast-xml-parser",
    -  "version": "4.3.4",
    +  "version": "4.3.5",
       "description": "Validate XML, Parse XML, Build XML without C/C++ based libraries",
       "main": "./src/fxp.js",
       "scripts": {
    
  • package-lock.json+2 2 modified
    @@ -1,12 +1,12 @@
     {
       "name": "fast-xml-parser",
    -  "version": "4.3.4",
    +  "version": "4.3.5",
       "lockfileVersion": 2,
       "requires": true,
       "packages": {
         "": {
           "name": "fast-xml-parser",
    -      "version": "4.3.4",
    +      "version": "4.3.5",
           "funding": [
             {
               "type": "paypal",
    
  • README.md+7 14 modified
    @@ -11,7 +11,8 @@
     
     Validate XML, Parse XML to JS Object, or Build XML from JS Object without C/C++ based libraries and no callback.
     
    -<font size="6">I need a Career advice. I've posted the query on my <a href="github.com/amitguptagwl">profile</a>. Your support would be appreciable.</font>
    +> XML Parser v5 is added for experimental use
    +> https://solothought.com
     
     Sponsor this project 👉 
     <a href="https://github.com/sponsors/NaturalIntelligence"> 
    @@ -91,6 +92,11 @@ If you want to be an anonymous user of this application and don't want to be hig
     * Supports parsing of PI (Processing Instruction) tags with XML declaration tags
     * And many more other features.
     
    +## v5
    +I developed v5 in Apr 2023. And I didn't get the chance to complete all the features. I've ensured that new features don't impact performance. With v5, you have more control on parsing output. Check [docs](./docs/v5) for syntax help and basic understanding.
    +
    +Please leave a comment in discussion forum for your suggestions and if you really need v5.
    +
     ## How to use
     
     To use as package dependency
    @@ -174,19 +180,6 @@ Check lib folder for different browser bundles
     
     [![](static/img/ni_ads_ads.gif)](https://github.com/NaturalIntelligence/ads/)
     
    -## Our other projects and research you must try
    -
    -* **[BigBit standard](https://github.com/amitguptagwl/bigbit)** : 
    -  * Single text encoding to replace UTF-8, UTF-16, UTF-32 and more with less memory.
    -  * Single Numeric datatype alternative of integer, float, double, long, decimal and more without precision loss.
    -* **[Cytorus](https://github.com/NaturalIntelligence/cytorus)**:  Be specific and flexible while running E2E tests.
    -  * Run tests only for a particular User Story
    -  * Run tests for a route or from a route
    -  * Customizable reporting
    -  * Central dashboard for better monitoring
    -  * Options to integrate E2E tests with Jira, Github etc using Central dashboard `Tian`.
    -* **[Stubmatic](https://github.com/NaturalIntelligence/Stubmatic)** : Create fake webservices, DynamoDB or S3 servers, Manage fake/mock stub data, Or fake any HTTP(s) call.
    -
     
     ## Supporters
     ### Contributors
    
  • spec/v5/test.js+32 0 added
    @@ -0,0 +1,32 @@
    +const XMLParser = require("../../src/v5/XMLParser");
    +const JsObjOutputBuilder = require("../../src/v5/OutputBuilders/JsObjBuilder");
    +const JsArrBuilder = require("../../src/v5/OutputBuilders/JsArrBuilder");
    +const JsMinArrBuilder = require("../../src/v5/OutputBuilders/JsMinArrBuilder");
    +
    +const fs = require("fs");
    +const path = require("path");
    +const fileNamePath = path.join(__dirname, "../assets/ptest.xml");//with CDATA
    +// const fileNamePath = path.join(__dirname, "../assets/ptest_with_prolog.xml");//with CDATA
    +// const fileNamePath = path.join(__dirname, "../assets/sample.xml");//1.5k
    +// const fileNamePath = path.join(__dirname, "../assets/midsize.xml");//13m
    +// const fileNamePath = path.join(__dirname, "../assets/large.xml");//98m
    +const xmlData = fs.readFileSync(fileNamePath).toString();
    +
    +describe("XMLParser Entities", function() {
    +
    +  it("should parse", function() {
    +      
    +      const options = {
    +          attributes: {
    +              ignore: false,
    +              booleanType:true
    +          },
    +          OutputBuilder: new JsMinArrBuilder()
    +      };
    +      const parser = new XMLParser(options);
    +      let result = parser.parse(xmlData);
    +
    +      console.log(JSON.stringify(result,null,4));
    +    //   expect(result).toEqual(expected);
    +  });
    +});
    \ No newline at end of file
    
  • src/v5/CharsSymbol.js+16 0 added
    @@ -0,0 +1,16 @@
    +modules.export = {
    +  "<" : "<", //tag start
    +  ">" : ">", //tag end
    +  "/" : "/", //close tag
    +  "!" : "!", //comment or docttype
    +  "!--" : "!--", //comment
    +  "-->" : "-->", //comment end
    +  "?" : "?", //pi
    +  "?>" : "?>", //pi end
    +  "?xml" : "?xml", //pi end
    +  "![" : "![", //cdata
    +  "]]>" : "]]>", //cdata end
    +  "[" : "[",
    +  "-" : "-",
    +  "D" : "D",
    +}
    \ No newline at end of file
    
  • src/v5/EntitiesParser.js+105 0 added
    @@ -0,0 +1,105 @@
    +const ampEntity = { regex: /&(amp|#38|#x26);/g, val : "&"};
    +const htmlEntities = {
    +    "space": { regex: /&(nbsp|#160);/g, val: " " },
    +    // "lt" : { regex: /&(lt|#60);/g, val: "<" },
    +    // "gt" : { regex: /&(gt|#62);/g, val: ">" },
    +    // "amp" : { regex: /&(amp|#38);/g, val: "&" },
    +    // "quot" : { regex: /&(quot|#34);/g, val: "\"" },
    +    // "apos" : { regex: /&(apos|#39);/g, val: "'" },
    +    "cent" : { regex: /&(cent|#162);/g, val: "¢" },
    +    "pound" : { regex: /&(pound|#163);/g, val: "£" },
    +    "yen" : { regex: /&(yen|#165);/g, val: "¥" },
    +    "euro" : { regex: /&(euro|#8364);/g, val: "€" },
    +    "copyright" : { regex: /&(copy|#169);/g, val: "©" },
    +    "reg" : { regex: /&(reg|#174);/g, val: "®" },
    +    "inr" : { regex: /&(inr|#8377);/g, val: "₹" },
    +};
    +
    +class EntitiesParser{
    +    constructor(replaceHtmlEntities) {
    +      this.replaceHtmlEntities = replaceHtmlEntities;
    +      this.docTypeEntities = {};
    +      this.lastEntities = {
    +        "apos" : { regex: /&(apos|#39|#x27);/g, val : "'"},
    +        "gt" : { regex: /&(gt|#62|#x3E);/g, val : ">"},
    +        "lt" : { regex: /&(lt|#60|#x3C);/g, val : "<"},
    +        "quot" : { regex: /&(quot|#34|#x22);/g, val : "\""},
    +      };
    +    }
    +
    +    addExternalEntities(externalEntities){
    +        const entKeys = Object.keys(externalEntities);
    +        for (let i = 0; i < entKeys.length; i++) {
    +          const ent = entKeys[i];
    +          this.addExternalEntity(ent,externalEntities[ent])
    +        }
    +    }
    +    addExternalEntity(key,val){
    +      validateEntityName(key);
    +      if(val.indexOf("&") !== -1) {
    +        reportWarning(`Entity ${key} is not added as '&' is found in value;`)
    +        return;
    +      }else{
    +        this.lastEntities[ent] = {
    +          regex: new RegExp("&"+key+";","g"),
    +          val : val
    +        }
    +      }
    +    }
    +
    +    addDocTypeEntities(entities){
    +        const entKeys = Object.keys(entities);
    +        for (let i = 0; i < entKeys.length; i++) {
    +          const ent = entKeys[i];
    +          this.docTypeEntities[ent] = {
    +             regex: new RegExp("&"+ent+";","g"),
    +             val : entities[ent]
    +          }
    +        }
    +    }
    +
    +    parse(val){
    +        return this.replaceEntitiesValue(val)
    +    }
    +
    +    /**
    +     * 1. Replace DOCTYPE entities 
    +     * 2. Replace external entities 
    +     * 3. Replace HTML entities if asked
    +     * @param {string} val 
    +     */
    +    replaceEntitiesValue(val){
    +        if(typeof val === "string" && val.length > 0){
    +            for(let entityName in this.docTypeEntities){
    +                const entity = this.docTypeEntities[entityName];
    +                val = val.replace( entity.regx, entity.val);
    +              }
    +              for(let entityName in this.lastEntities){
    +                const entity = this.lastEntities[entityName];
    +                val = val.replace( entity.regex, entity.val);
    +              }
    +              if(this.replaceHtmlEntities){
    +                for(let entityName in htmlEntities){
    +                  const entity = htmlEntities[entityName];
    +                  val = val.replace( entity.regex, entity.val);
    +                }
    +              }
    +            val = val.replace( ampEntity.regex, ampEntity.val);
    +        }
    +        return val;
    +    }
    +};
    +
    +//an entity name should not contains special characters that may be used in regex
    +//Eg !?\\\/[]$%{}^&*()<>
    +const specialChar = "!?\\\/[]$%{}^&*()<>|+";
    +
    +function validateEntityName(name){
    +    for (let i = 0; i < specialChar.length; i++) {
    +        const ch = specialChar[i];
    +        if(name.indexOf(ch) !== -1) throw new Error(`Invalid character ${ch} in entity name`);
    +    }
    +    return name;
    +}
    +
    +module.exports = EntitiesParser;
    \ No newline at end of file
    
  • src/v5/inputSource/BufferSource.js+118 0 added
    @@ -0,0 +1,118 @@
    +const Constants = {
    +  space: 32,
    +  tab: 9
    +}
    +class BufferSource{
    +  constructor(bytesArr){
    +    this.line = 1;
    +    this.cols = 0;
    +    this.buffer = bytesArr;
    +    this.startIndex = 0;
    +  }
    +
    +
    +
    +  readCh() {
    +    return String.fromCharCode(this.buffer[this.startIndex++]);
    +  }
    +
    +  readChAt(index) {
    +    return String.fromCharCode(this.buffer[this.startIndex+index]);
    +  }
    +
    +  readStr(n,from){
    +    if(typeof from === "undefined") from = this.startIndex;
    +    return this.buffer.slice(from, from + n).toString();
    +  }
    +
    +  readUpto(stopStr) {
    +    const inputLength = this.buffer.length;
    +    const stopLength = stopStr.length;
    +    const stopBuffer = Buffer.from(stopStr);
    +
    +    for (let i = this.startIndex; i < inputLength; i++) {
    +        let match = true;
    +        for (let j = 0; j < stopLength; j++) {
    +            if (this.buffer[i + j] !== stopBuffer[j]) {
    +                match = false;
    +                break;
    +            }
    +        }
    +
    +        if (match) {
    +            const result = this.buffer.slice(this.startIndex, i).toString();
    +            this.startIndex = i + stopLength;
    +            return result;
    +        }
    +    }
    +
    +    throw new Error(`Unexpected end of source. Reading '${stopStr}'`);
    +}
    +
    +readUptoCloseTag(stopStr) { //stopStr: "</tagname"
    +    const inputLength = this.buffer.length;
    +    const stopLength = stopStr.length;
    +    const stopBuffer = Buffer.from(stopStr);
    +    let stopIndex = 0;
    +    //0: non-matching, 1: matching stop string, 2: matching closing
    +    let match = 0;
    +
    +    for (let i = this.startIndex; i < inputLength; i++) {
    +        if(match === 1){//initial part matched
    +            if(stopIndex === 0) stopIndex = i;
    +            if(this.buffer[i] === Constants.space || this.buffer[i] === Constants.tab) continue;
    +            else if(this.buffer[i] === '>'){ //TODO: if it should be equivalent ASCII
    +                match = 2;
    +                //tag boundary found
    +                // this.startIndex
    +            }
    +        }else{
    +            match = 1;
    +            for (let j = 0; j < stopLength; j++) {
    +                if (this.buffer[i + j] !== stopBuffer[j]) {
    +                    match = 0;
    +                    break;
    +                }
    +            }
    +        }
    +        if (match === 2) {//matched closing part
    +            const result = this.buffer.slice(this.startIndex, stopIndex - 1 ).toString();
    +            this.startIndex = i + 1;
    +            return result;
    +        }
    +    }
    +
    +    throw new Error(`Unexpected end of source. Reading '${stopStr}'`);
    +}
    +
    +  readFromBuffer(n, shouldUpdate) {
    +    let ch;
    +    if (n === 1) {
    +      ch = this.buffer[this.startIndex];
    +      if (ch === 10) {
    +        this.line++;
    +        this.cols = 1;
    +      } else {
    +        this.cols++;
    +      }
    +      ch = String.fromCharCode(ch);
    +    } else {
    +      this.cols += n;
    +      ch = this.buffer.slice(this.startIndex, this.startIndex + n).toString();
    +    }
    +    if (shouldUpdate) this.updateBuffer(n);
    +    return ch;
    +  }
    +
    +  updateBufferBoundary(n = 1) { //n: number of characters read
    +    this.startIndex += n;
    +  }
    +
    +  canRead(n){
    +    n = n || this.startIndex;
    +    return this.buffer.length - n + 1 > 0;
    +  }
    +  
    +}
    +
    +module.exports = BufferSource;
    \ No newline at end of file
    
  • src/v5/inputSource/StringSource.js+123 0 added
    @@ -0,0 +1,123 @@
    +const whiteSpaces = [" ", "\n", "\t"];
    +
    +
    +class StringSource{
    +  constructor(str){
    +    this.line = 1;
    +    this.cols = 0;
    +    this.buffer = str;
    +    //a boundary pointer to indicate where from the buffer dat should be read
    +    // data before this pointer can be deleted to free the memory
    +    this.startIndex = 0;
    +  }
    +
    +  readCh() {
    +    return this.buffer[this.startIndex++];
    +  }
    +
    +  readChAt(index) {
    +    return this.buffer[this.startIndex+index];
    +  }
    +
    +  readStr(n,from){
    +    if(typeof from === "undefined") from = this.startIndex;
    +    return this.buffer.substring(from, from + n);
    +  }
    +
    +  readUpto(stopStr) {
    +    const inputLength = this.buffer.length;
    +    const stopLength = stopStr.length;
    +
    +    for (let i = this.startIndex; i < inputLength; i++) {
    +      let match = true;
    +      for (let j = 0; j < stopLength; j++) {
    +        if (this.buffer[i + j] !== stopStr[j]) {
    +          match = false;
    +          break;
    +        }
    +      }
    +
    +      if (match) {
    +        const result = this.buffer.substring(this.startIndex, i);
    +        this.startIndex = i + stopLength;
    +        return result;
    +      }
    +    }
    +
    +    throw new Error(`Unexpected end of source. Reading '${stopStr}'`);
    +  }
    +
    +  readUptoCloseTag(stopStr) { //stopStr: "</tagname"
    +    const inputLength = this.buffer.length;
    +    const stopLength = stopStr.length;
    +    let stopIndex = 0;
    +    //0: non-matching, 1: matching stop string, 2: matching closing
    +    let match = 0;
    +
    +    for (let i = this.startIndex; i < inputLength; i++) {
    +      if(match === 1){//initial part matched
    +        if(stopIndex === 0) stopIndex = i;
    +        if(this.buffer[i] === ' ' || this.buffer[i] === '\t') continue;
    +        else if(this.buffer[i] === '>'){
    +          match = 2;
    +          //tag boundary found
    +          // this.startIndex
    +        }
    +      }else{
    +        match = 1;
    +        for (let j = 0; j < stopLength; j++) {
    +          if (this.buffer[i + j] !== stopStr[j]) {
    +            match = 0;
    +            break;
    +          }
    +        }
    +      }
    +      if (match === 2) {//matched closing part
    +        const result = this.buffer.substring(this.startIndex, stopIndex - 1 );
    +        this.startIndex = i + 1;
    +        return result;
    +      }
    +    }
    +
    +    throw new Error(`Unexpected end of source. Reading '${stopStr}'`);
    +  }
    +
    +  readFromBuffer(n, updateIndex){
    +    let ch;
    +    if(n===1){
    +      ch = this.buffer[this.startIndex];
    +      // if(ch === "\n") {
    +      //   this.line++;
    +      //   this.cols = 1;
    +      // }else{
    +      //   this.cols++;
    +      // }
    +    }else{
    +      ch = this.buffer.substring(this.startIndex, this.startIndex + n);
    +      // if("".indexOf("\n") !== -1){
    +      //   //TODO: handle the scenario when there are multiple lines
    +      //   //TODO: col should be set to number of chars after last '\n'
    +      //   // this.cols = 1;
    +      // }else{
    +      //   this.cols += n;
    +
    +      // }
    +    }
    +    if(updateIndex) this.updateBufferBoundary(n);
    +    return ch;
    +  }
    +
    +  //TODO: rename to updateBufferReadIndex
    +  
    +  updateBufferBoundary(n = 1) { //n: number of characters read
    +    this.startIndex += n;
    +  }
    +
    +  canRead(n){
    +    n = n || this.startIndex;
    +    return this.buffer.length - n + 1 > 0;
    +  }
    +  
    +}
    +
    +module.exports = StringSource;
    \ No newline at end of file
    
  • src/v5/OptionsBuilder.js+73 0 added
    @@ -0,0 +1,73 @@
    +
    +const JsArrBuilder = require("./OutputBuilders/JsArrBuilder");
    +
    +const defaultOptions = {
    +  preserveOrder: false,
    +  removeNSPrefix: false, // remove NS from tag name or attribute name if true
    +  //ignoreRootElement : false,
    +  stopNodes: [], //nested tags will not be parsed even for errors
    +  // isArray: () => false, //User will set it
    +  htmlEntities: false,
    +  // skipEmptyListItem: false
    +  tags:{
    +    unpaired: [],
    +    nameFor:{
    +      cdata: false,
    +      comment: false,
    +      text: '#text'
    +    },
    +    separateTextProperty: false,
    +    valueParsers: []
    +  },
    +  attributes:{
    +    ignore: false,
    +    booleanType: true,
    +    entities: true
    +  },
    +
    +  // select: ["img[src]"],
    +  // stop: ["anim", "[ads]"]
    +  only: [], // rest tags will be skipped. It will result in flat array
    +  hierarchy: false, //will be used when a particular tag is set to be parsed.
    +  skip: [], // will be skipped from parse result. on('skip') will be triggered
    +
    +  select: [], // on('select', tag => tag ) will be called if match
    +  stop: [], //given tagPath will not be parsed. innerXML will be set as string value
    +  OutputBuilder: new JsArrBuilder(),
    +};
    +   
    +const buildOptions = function(options) {
    +  const finalOptions = { ... defaultOptions};
    +  finalOptions.tags.valueParsers.push("trim");
    +  finalOptions.tags.valueParsers.push("entities");
    +  if(!this.preserveOrder)
    +    finalOptions.tags.valueParsers.push("join");
    +  finalOptions.tags.valueParsers.push("boolean");
    +  finalOptions.tags.valueParsers.push("number");
    +  finalOptions.tags.valueParsers.push("currency");
    +  finalOptions.tags.valueParsers.push("date");
    +  copyProperties(finalOptions,options)
    +  return  finalOptions;
    +};
    +
    +function copyProperties(target, source) {
    +  for (let key in source) {
    +    if (source.hasOwnProperty(key)) {
    +      if (key === 'OutputBuilder') {
    +        target[key] = source[key];
    +      }else if (typeof source[key] === 'object' && !Array.isArray(source[key])) {
    +        // Recursively copy nested properties
    +        if (typeof target[key] === 'undefined') {
    +          target[key] = {};
    +        }
    +        copyProperties(target[key], source[key]);
    +      } else {
    +        // Copy non-nested properties
    +        target[key] = source[key];
    +      }
    +    }
    +  }
    +}
    +
    +exports.buildOptions = buildOptions;
    +exports.defaultOptions = defaultOptions;
    \ No newline at end of file
    
  • src/v5/OutputBuilders/BaseOutputBuilder.js+69 0 added
    @@ -0,0 +1,69 @@
    +class BaseOutputBuilder{
    +  constructor(){
    +    // this.attributes = {};
    +  }
    +
    +  addAttribute(name, value){
    +    if(this.options.onAttribute){
    +      //TODO: better to pass tag path
    +      const v = this.options.onAttribute(name, value, this.tagName);
    +      if(!v) this.attributes[v.name] = v.value;
    +    }else{
    +      name = this.options.attributes.prefix + name + this.options.attributes.suffix;
    +      this.attributes[name] = this.parseValue(value, this.options.attributes.valueParsers);
    +    }
    +  }
    +
    +  /**
    +   * parse value by chain of parsers
    +   * @param {string} val 
    +   * @returns {any} parsed value if matching parser found
    +   */
    +    parseValue = function(val, valParsers){
    +      for (let i = 0; i < valParsers.length; i++) {
    +        let valParser = this.registeredParsers[valParsers[i]];
    +        if(valParser){
    +          val = valParser.parse(val);
    +          // if(!valParser.chainable) break;
    +        }
    +      }
    +      return val;
    +    }
    +
    +  /**
    +   * To add a nested empty tag.
    +   * @param {string} key 
    +   * @param {any} val 
    +   */
    +  _addChild(key, val){}
    +
    +  /**
    +   * skip the comment if property is not set
    +   */
    +  addComment(text){
    +    if(this.options.nameFor.comment)
    +      this._addChild(this.options.nameFor.comment, text);
    +  }
    +
    +  //store CDATA separately if property is set
    +  //otherwise add to tag's value
    +  addCdata(text){
    +    if (this.options.nameFor.cdata) {
    +      this._addChild(this.options.nameFor.cdata, text);
    +    } else {
    +      this.addRawValue(text || "");
    +    }
    +  }
    +
    +  addRawValue = text => this.addValue(text);
    +
    +  addDeclaration(){
    +    if(!this.options.declaration){
    +    }else{
    +      this.addPi("?xml");
    +    }
    +    this.attributes = {}
    +  }
    +}
    +
    +module.exports = BaseOutputBuilder;
    \ No newline at end of file
    
  • src/v5/OutputBuilders/JsArrBuilder.js+102 0 added
    @@ -0,0 +1,102 @@
    +const {buildOptions,registerCommonValueParsers} = require("./ParserOptionsBuilder");
    +
    +class OutputBuilder{
    +  constructor(options){
    +    this.options = buildOptions(options);
    +      this.registeredParsers = registerCommonValueParsers();
    +    }
    +    
    +    registerValueParser(name,parserInstance){//existing name will override the parser without warning
    +      this.registeredParsers[name] = parserInstance;
    +    }
    +
    +  getInstance(){
    +    return new JsArrBuilder(this.options, this.registeredParsers);
    +  }
    +}
    +
    +const rootName = '!js_arr';
    +const BaseOutputBuilder = require("./BaseOutputBuilder");
    +
    +class JsArrBuilder extends BaseOutputBuilder{
    +
    +  constructor(options,registeredParsers) {
    +    super();
    +    this.tagsStack = [];
    +    this.options = options;
    +    this.registeredParsers = registeredParsers;
    +
    +    this.root = new Node(rootName);
    +    this.currentNode = this.root;
    +    this.attributes = {};
    +  }
    +
    +  addTag(tag){
    +    //when a new tag is added, it should be added as child of current node
    +    //TODO: shift this check to the parser
    +    if(tag.name === "__proto__") tag.name = "#__proto__";
    +
    +    this.tagsStack.push(this.currentNode);
    +    this.currentNode = new Node(tag.name, this.attributes);
    +    this.attributes = {};
    +  }
    +
    +  /**
    +   * Check if the node should be added by checking user's preference
    +   * @param {Node} node 
    +   * @returns boolean: true if the node should not be added
    +   */
    +  closeTag(){
    +    const node = this.currentNode;
    +    this.currentNode = this.tagsStack.pop(); //set parent node in scope
    +    if(this.options.onClose !== undefined){
    +      //TODO TagPathMatcher 
    +      const resultTag = this.options.onClose(node, 
    +        new TagPathMatcher(this.tagsStack,node));
    +
    +      if(resultTag) return;
    +    }
    +    this.currentNode.child.push(node);  //to parent node
    +  }
    +
    +  //Called by parent class methods
    +  _addChild(key, val){
    +    // if(key === "__proto__") tagName = "#__proto__";
    +    this.currentNode.child.push( {[key]: val });
    +    // this.currentNode.leafType = false;
    +  }
    +
    +  /**
    +   * Add text value child node 
    +   * @param {string} text 
    +   */
    +  addValue(text){
    +    this.currentNode.child.push( {[this.options.nameFor.text]: this.parseValue(text, this.options.tags.valueParsers) });
    +  }
    +
    +  addPi(name){
    +    //TODO: set pi flag
    +    if(!this.options.ignorePiTags){
    +      const node = new Node(name, this.attributes);
    +      this.currentNode[":@"] = this.attributes;
    +      this.currentNode.child.push(node);
    +    }
    +    this.attributes = {};
    +  }
    +  getOutput(){
    +    return this.root.child[0];
    +  }
    +}
    +
    +
    +
    +class Node{
    +  constructor(tagname, attributes){
    +    this.tagname = tagname;
    +    this.child = []; //nested tags, text, cdata, comments
    +    if(attributes && Object.keys(attributes).length > 0)
    +      this[":@"] = attributes;
    +  }
    +}
    +
    +module.exports = OutputBuilder;
    \ No newline at end of file
    
  • src/v5/OutputBuilders/JsMinArrBuilder.js+101 0 added
    @@ -0,0 +1,101 @@
    +const {buildOptions,registerCommonValueParsers} = require("./ParserOptionsBuilder");
    +
    +class OutputBuilder{
    +  constructor(options){
    +    this.options = buildOptions(options);
    +      this.registeredParsers = registerCommonValueParsers();
    +    }
    +    
    +    registerValueParser(name,parserInstance){//existing name will override the parser without warning
    +      this.registeredParsers[name] = parserInstance;
    +    }
    +
    +  getInstance(){
    +    return new JsMinArrBuilder(this.options, this.registeredParsers);
    +  }
    +}
    +
    +const BaseOutputBuilder = require("./BaseOutputBuilder");
    +const rootName = '^';
    +
    +class JsMinArrBuilder extends BaseOutputBuilder{
    +
    +  constructor(options,registeredParsers) {
    +    super();
    +    this.tagsStack = [];
    +    this.options = options;
    +    this.registeredParsers = registeredParsers;
    +
    +    this.root = {[rootName]: []};
    +    this.currentNode = this.root;
    +    this.currentNodeTagName = rootName;
    +    this.attributes = {};
    +  }
    +
    +  addTag(tag){
    +    //when a new tag is added, it should be added as child of current node
    +    //TODO: shift this check to the parser
    +    if(tag.name === "__proto__") tag.name = "#__proto__";
    +
    +    this.tagsStack.push([this.currentNodeTagName,this.currentNode]); //this.currentNode is parent node here
    +    this.currentNodeTagName = tag.name;
    +    this.currentNode = { [tag.name]:[]}
    +    if(Object.keys(this.attributes).length > 0){
    +      this.currentNode[":@"] = this.attributes;
    +      this.attributes = {};
    +    }
    +  }
    +
    +  /**
    +   * Check if the node should be added by checking user's preference
    +   * @param {Node} node 
    +   * @returns boolean: true if the node should not be added
    +   */
    +  closeTag(){
    +    const node = this.currentNode;
    +    const nodeName = this.currentNodeTagName;
    +    const arr = this.tagsStack.pop(); //set parent node in scope
    +    this.currentNodeTagName = arr[0];
    +    this.currentNode = arr[1];
    +
    +    if(this.options.onClose !== undefined){
    +      //TODO TagPathMatcher 
    +      const resultTag = this.options.onClose(node, 
    +        new TagPathMatcher(this.tagsStack,node));
    +
    +      if(resultTag) return;
    +    }
    +    this.currentNode[this.currentNodeTagName].push(node);  //to parent node
    +  }
    +
    +  //Called by parent class methods
    +  _addChild(key, val){
    +    // if(key === "__proto__") tagName = "#__proto__";
    +    this.currentNode.push( {[key]: val });
    +    // this.currentNode.leafType = false;
    +  }
    +
    +  /**
    +   * Add text value child node 
    +   * @param {string} text 
    +   */
    +  addValue(text){
    +    this.currentNode[this.currentNodeTagName].push( {[this.options.nameFor.text]: this.parseValue(text, this.options.tags.valueParsers) });
    +  }
    +
    +  addPi(name){
    +    if(!this.options.ignorePiTags){
    +      const node = { [name]:[]}
    +      if(this.attributes){
    +        node[":@"] = this.attributes;
    +      }
    +      this.currentNode.push(node);
    +    }
    +    this.attributes = {};
    +  }
    +  getOutput(){
    +    return this.root[rootName];
    +  }
    +}
    +
    +module.exports = OutputBuilder;
    \ No newline at end of file
    
  • src/v5/OutputBuilders/JsObjBuilder.js+155 0 added
    @@ -0,0 +1,155 @@
    +
    +
    +const {buildOptions,registerCommonValueParsers} = require("./ParserOptionsBuilder");
    +
    +class OutputBuilder{
    +  constructor(options){
    +      this.options = buildOptions(options);
    +      this.registeredParsers = registerCommonValueParsers();
    +  }
    +
    +  registerValueParser(name,parserInstance){//existing name will override the parser without warning
    +    this.registeredParsers[name] = parserInstance;
    +  }
    +
    +  getInstance(){
    +    return new JsObjBuilder(this.options, this.registeredParsers);
    +  }
    +}
    +
    +const BaseOutputBuilder = require("./BaseOutputBuilder");
    +const rootName = '^';
    +
    +class JsObjBuilder extends BaseOutputBuilder{
    +
    +  constructor(options,registeredParsers) {
    +    super();
    +    //hold the raw detail of a tag and sequence with reference to the output
    +    this.tagsStack = [];
    +    this.options = options;
    +    this.registeredParsers = registeredParsers;
    +
    +    this.root = {};
    +    this.parent = this.root;
    +    this.tagName = rootName;
    +    this.value = {};
    +    this.textValue = "";
    +    this.attributes = {};
    +  }
    +
    +  addTag(tag){
    +
    +    let value = "";
    +    if( !isEmpty(this.attributes)){
    +      value = {};
    +      if(this.options.attributes.groupBy){
    +        value[this.options.attributes.groupBy] = this.attributes;
    +      }else{
    +        value = this.attributes;
    +      }
    +    }
    +
    +    this.tagsStack.push([this.tagName, this.textValue, this.value]); //parent tag, parent text value, parent tag value (jsobj)
    +    this.tagName = tag.name;
    +    this.value = value;
    +    this.textValue = "";
    +    this.attributes = {};
    +  }
    +
    +  /**
    +   * Check if the node should be added by checking user's preference
    +   * @param {Node} node 
    +   * @returns boolean: true if the node should not be added
    +   */
    +  closeTag(){
    +    const tagName = this.tagName;
    +    let value = this.value;
    +    let textValue = this.textValue;
    +
    +    //update tag text value
    +    if(typeof value !== "object" && !Array.isArray(value)){
    +      value = this.parseValue(textValue.trim(), this.options.tags.valueParsers);
    +    }else if(textValue.length > 0){
    +      value[this.options.nameFor.text] = this.parseValue(textValue.trim(), this.options.tags.valueParsers);
    +    }
    +
    +    
    +    let resultTag= {
    +      tagName: this.tagName,
    +      value: value
    +    };
    +
    +    if(this.options.onTagClose !== undefined){
    +      //TODO TagPathMatcher 
    +      resultTag = this.options.onClose(this.tagName, value, this.textValue, new TagPathMatcher(this.tagsStack,node));
    +
    +      if(!resultTag) return;
    +    }
    +
    +    //set parent node in scope
    +    let arr = this.tagsStack.pop(); 
    +    let parentTag = arr[2];
    +    parentTag=this._addChildTo(resultTag.tagName, resultTag.value, parentTag);
    +
    +    this.tagName = arr[0];
    +    this.textValue = arr[1];
    +    this.value = parentTag;
    +  }
    +
    +  _addChild(key, val){
    +    if(typeof this.value === "string"){
    +      this.value = { [this.options.nameFor.text] : this.value };
    +    }
    +
    +    this._addChildTo(key, val, this.value);
    +    // this.currentNode.leafType = false;
    +    this.attributes = {};
    +  }
    +
    +  _addChildTo(key, val, node){
    +    if(typeof node === 'string') node = {};
    +    if(!node[key]){
    +      node[key] = val;
    +    }else{ //Repeated
    +      if(!Array.isArray(node[key])){ //but not stored as array
    +        node[key] = [node[key]];
    +      }
    +      node[key].push(val);
    +    }
    +    return node;
    +  }
    +
    +
    +  /**
    +   * Add text value child node 
    +   * @param {string} text 
    +   */
    +  addValue(text){
    +    //TODO: use bytes join
    +    if(this.textValue.length > 0) this.textValue += " " + text;
    +    else this.textValue = text;
    +  }
    +
    +  addPi(name){
    +    let value = "";
    +    if( !isEmpty(this.attributes)){
    +      value = {};
    +      if(this.options.attributes.groupBy){
    +        value[this.options.attributes.groupBy] = this.attributes;
    +      }else{
    +        value = this.attributes;
    +      }
    +    }
    +    this._addChild(name, value);
    +    
    +  }
    +  getOutput(){
    +    return this.value;
    +  }
    +}
    +
    +function isEmpty(obj) {
    +  return Object.keys(obj).length === 0;
    +}
    +
    +module.exports = OutputBuilder;
    \ No newline at end of file
    
  • src/v5/OutputBuilders/ParserOptionsBuilder.js+94 0 added
    @@ -0,0 +1,94 @@
    +const trimParser = require("../valueParsers/trim")
    +const booleanParser = require("../valueParsers/booleanParser")
    +const currencyParser = require("../valueParsers/currency")
    +const numberParser = require("../valueParsers/number")
    +
    +const defaultOptions={
    +  nameFor:{
    +    text: "#text",
    +    comment: "",
    +    cdata: "",
    +  },
    +  // onTagClose: () => {},
    +  // onAttribute: () => {},
    +  piTag: false,
    +  declaration: false, //"?xml"
    +  tags: {
    +    valueParsers: [
    +      // "trim",
    +      // "boolean",
    +      // "number",
    +      // "currency",
    +      // "date",
    +    ]
    +  },
    +  attributes:{
    +    prefix: "@_",
    +    suffix: "",
    +    groupBy: "",
    +    
    +    valueParsers: [
    +      // "trim",
    +      // "boolean",
    +      // "number",
    +      // "currency",
    +      // "date",
    +    ]
    +  }
    +}
    +function buildOptions(options){
    +  //clone
    +  const finalOptions = { ... defaultOptions};
    +
    +  //add config missed in cloning
    +  finalOptions.tags.valueParsers.push("trim")
    +  finalOptions.tags.valueParsers.push("boolean")
    +  finalOptions.tags.valueParsers.push("number")
    +  finalOptions.tags.valueParsers.push("currency")
    +
    +  //add config missed in cloning
    +  finalOptions.attributes.valueParsers.push("trim")
    +  finalOptions.attributes.valueParsers.push("boolean")
    +  finalOptions.attributes.valueParsers.push("number")
    +  finalOptions.attributes.valueParsers.push("currency")
    +
    +  copyProperties(finalOptions,options);
    +  return finalOptions;
    +}
    +
    +function copyProperties(target, source) {
    +  for (let key in source) {
    +    if (source.hasOwnProperty(key)) {
    +      if (typeof source[key] === 'object' && !Array.isArray(source[key])) {
    +        // Recursively copy nested properties
    +        if (typeof target[key] === 'undefined') {
    +          target[key] = {};
    +        }
    +        copyProperties(target[key], source[key]);
    +      } else {
    +        // Copy non-nested properties
    +        target[key] = source[key];
    +      }
    +    }
    +  }
    +}
    +
    +function registerCommonValueParsers(){
    +  return {
    +    "trim": new trimParser(),
    +    // "join": this.entityParser.parse,
    +    "boolean": new booleanParser(),
    +    "number": new numberParser({
    +          hex: true,
    +          leadingZeros: true,
    +          eNotation: true
    +        }),
    +    "currency": new currencyParser(),
    +    // "date": this.entityParser.parse,
    +  }
    +}
    +
    +module.exports = {
    +  buildOptions : buildOptions,
    +  registerCommonValueParsers: registerCommonValueParsers
    +}
    \ No newline at end of file
    
  • src/v5/Report.js+0 0 added
  • src/v5/TagPath.js+81 0 added
    @@ -0,0 +1,81 @@
    +class TagPath{
    +  constructor(pathStr){
    +    let text = "";
    +    let tName = "";
    +    let pos;
    +    let aName = "";
    +    let aVal = "";
    +    this.stack = []
    +
    +    for (let i = 0; i < pathStr.length; i++) {
    +      let ch = pathStr[i];
    +      if(ch === " ") {
    +        if(text.length === 0) continue;        
    +        tName = text; text = "";
    +      }else if(ch === "["){
    +        if(tName.length === 0){
    +          tName = text; text = "";
    +        }
    +        i++;
    +        for (; i < pathStr.length; i++) {
    +          ch = pathStr[i];
    +          if(ch=== "=") continue;
    +          else if(ch=== "]") {aName = text.trim(); text=""; break; i--;}
    +          else if(ch === "'" || ch === '"'){
    +            let attrEnd = pathStr.indexOf(ch,i+1);
    +            aVal = pathStr.substring(i+1, attrEnd);
    +            i = attrEnd;
    +          }else{
    +            text +=ch;
    +          }
    +        }
    +      }else if(ch !== " " && text.length === 0 && tName.length > 0){//reading tagName
    +        //save previous tag
    +        this.stack.push(new TagPathNode(tName,pos,aName,aVal));
    +        text = ch; tName = ""; aName = ""; aVal = "";
    +      }else{
    +        text+=ch;
    +      }
    +    }
    +
    +    //last tag in the path
    +    if(tName.length >0 || text.length>0){
    +      this.stack.push(new TagPathNode(text||tName,pos,aName,aVal));
    +    }
    +  }
    +
    +  match(tagStack,node){
    +    if(this.stack[0].name !== "*"){
    +      if(this.stack.length !== tagStack.length +1) return false;
    +  
    +      //loop through tagPath and tagStack and match
    +      for (let i = 0; i < this.tagStack.length; i++) {
    +        if(!this.stack[i].match(tagStack[i])) return false;
    +      }
    +    }
    +    if(!this.stack[this.stack.length - 1].match(node)) return false;
    +    return true;
    +  }
    +}
    +
    +class TagPathNode{
    +  constructor(name,position,attrName,attrVal){
    +    this.name = name;
    +    this.position = position;
    +    this.attrName = attrName,
    +    this.attrVal = attrVal;
    +  }
    +
    +  match(node){
    +    let matching = true;
    +    matching = node.name === this.name;
    +    if(this.position) matching = node.position === this.position;
    +    if(this.attrName) matching = node.attrs[this.attrName !== undefined];
    +    if(this.attrVal) matching = node.attrs[this.attrName !== this.attrVal];
    +    return matching;
    +  }
    +}
    +
    +// console.log((new TagPath("* b[b]")).stack);
    +// console.log((new TagPath("a[a] b[b] c")).stack);
    +// console.log((new TagPath(" b [ b= 'cf  sdadwa' ] a  ")).stack);
    \ No newline at end of file
    
  • src/v5/TagPathMatcher.js+15 0 added
    @@ -0,0 +1,15 @@
    +const TagPath = require("./TagPath");
    +
    +class TagPathMatcher{
    +  constructor(stack,node){
    +    this.stack = stack;
    +    this.node= node;
    +  }
    +
    +  match(path){
    +    const tagPath = new TagPath(path);
    +    return tagPath.match(this.stack, this.node);
    +  }
    +}
    +
    +module.exports = TagPathMatcher;
    \ No newline at end of file
    
  • src/v5/valueParsers/booleanParserExt.js+20 0 added
    @@ -0,0 +1,20 @@
    +function boolParserExt(val){
    +    if(isArray(val)){
    +        for (let i = 0; i < val.length; i++) {
    +            val[i] = parse(val[i])
    +        }
    +    }else{
    +        val = parse(val)
    +    }
    +    return val;
    +}
    +
    +function parse(val){
    +    if (typeof val === 'string') {
    +        const temp = val.toLowerCase();
    +        if(temp === 'true' || temp ==="yes" || temp==="1") return true;
    +        else if(temp === 'false' || temp ==="no" || temp==="0") return false;
    +    }
    +    return val;
    +}
    +module.exports = boolParserExt;
    \ No newline at end of file
    
  • src/v5/valueParsers/booleanParser.js+23 0 added
    @@ -0,0 +1,23 @@
    +class boolParser{
    +    constructor(trueList, falseList){
    +        if(trueList)
    +            this.trueList = trueList;
    +        else
    +            this.trueList = ["true"];
    +        
    +        if(falseList)
    +            this.falseList = falseList;
    +        else
    +            this.falseList = ["false"];
    +    }
    +    parse(val){
    +        if (typeof val === 'string') {
    +            //TODO: performance: don't convert
    +            const temp = val.toLowerCase();
    +            if(this.trueList.indexOf(temp) !== -1) return true;
    +            else if(this.falseList.indexOf(temp) !== -1 ) return false;
    +        }
    +        return val;
    +    }
    +}
    +module.exports = boolParser;
    \ No newline at end of file
    
  • src/v5/valueParsers/currency.js+31 0 added
    @@ -0,0 +1,31 @@
    +
    +const localeMap = {
    +    "$":"en-US",
    +    "€":"de-DE",
    +    "£":"en-GB",
    +    "¥":"ja-JP",
    +    "₹":"en-IN",
    +}
    +
    +const currencyCheckRegex = /^\s*(?:-|\+)?(?:\d+|\d{1,3}(?:,\d{3})+)?(?:\.\d{1,2})?\s*(?:\$|€|¥|₹)?\s*$/u;
    +
    +class CurrencyParser{
    +    constructor(options){
    +        this.options = options;
    +    }
    +    parse(val){
    +        if (typeof val === 'string') {
    +            if(val.indexOf(",,") !== -1 && val.indexOf(".." !== -1)){
    +                const match = val.match(currencyCheckRegex);
    +                if(match){
    +                    const locale = this.options.locale || localeMap[match[2]||match[5]||"₹"];
    +                    const formatter = new Intl.NumberFormat(locale)
    +                    val = val.replace(/[^0-9,.]/g, '').trim();
    +                    val = Number(val.replace(formatter.format(1000)[1], ''));
    +                }
    +            }
    +        }
    +        return val;
    +    }
    +}
    +module.exports = CurrencyParser;
    \ No newline at end of file
    
  • src/v5/valueParsers/EntitiesParser.js+105 0 added
    @@ -0,0 +1,105 @@
    +const ampEntity = { regex: /&(amp|#38|#x26);/g, val : "&"};
    +const htmlEntities = {
    +    "space": { regex: /&(nbsp|#160);/g, val: " " },
    +    // "lt" : { regex: /&(lt|#60);/g, val: "<" },
    +    // "gt" : { regex: /&(gt|#62);/g, val: ">" },
    +    // "amp" : { regex: /&(amp|#38);/g, val: "&" },
    +    // "quot" : { regex: /&(quot|#34);/g, val: "\"" },
    +    // "apos" : { regex: /&(apos|#39);/g, val: "'" },
    +    "cent" : { regex: /&(cent|#162);/g, val: "¢" },
    +    "pound" : { regex: /&(pound|#163);/g, val: "£" },
    +    "yen" : { regex: /&(yen|#165);/g, val: "¥" },
    +    "euro" : { regex: /&(euro|#8364);/g, val: "€" },
    +    "copyright" : { regex: /&(copy|#169);/g, val: "©" },
    +    "reg" : { regex: /&(reg|#174);/g, val: "®" },
    +    "inr" : { regex: /&(inr|#8377);/g, val: "₹" },
    +};
    +
    +class EntitiesParser{
    +    constructor(replaceHtmlEntities) {
    +      this.replaceHtmlEntities = replaceHtmlEntities;
    +      this.docTypeEntities = {};
    +      this.lastEntities = {
    +        "apos" : { regex: /&(apos|#39|#x27);/g, val : "'"},
    +        "gt" : { regex: /&(gt|#62|#x3E);/g, val : ">"},
    +        "lt" : { regex: /&(lt|#60|#x3C);/g, val : "<"},
    +        "quot" : { regex: /&(quot|#34|#x22);/g, val : "\""},
    +      };
    +    }
    +
    +    addExternalEntities(externalEntities){
    +        const entKeys = Object.keys(externalEntities);
    +        for (let i = 0; i < entKeys.length; i++) {
    +          const ent = entKeys[i];
    +          this.addExternalEntity(ent,externalEntities[ent])
    +        }
    +    }
    +    addExternalEntity(key,val){
    +      validateEntityName(key);
    +      if(val.indexOf("&") !== -1) {
    +        reportWarning(`Entity ${key} is not added as '&' is found in value;`)
    +        return;
    +      }else{
    +        this.lastEntities[ent] = {
    +          regex: new RegExp("&"+key+";","g"),
    +          val : val
    +        }
    +      }
    +    }
    +
    +    addDocTypeEntities(entities){
    +        const entKeys = Object.keys(entities);
    +        for (let i = 0; i < entKeys.length; i++) {
    +          const ent = entKeys[i];
    +          this.docTypeEntities[ent] = {
    +             regex: new RegExp("&"+ent+";","g"),
    +             val : entities[ent]
    +          }
    +        }
    +    }
    +
    +    parse(val){
    +        return this.replaceEntitiesValue(val)
    +    }
    +
    +    /**
    +     * 1. Replace DOCTYPE entities 
    +     * 2. Replace external entities 
    +     * 3. Replace HTML entities if asked
    +     * @param {string} val 
    +     */
    +    replaceEntitiesValue(val){
    +        if(typeof val === "string" && val.length > 0){
    +            for(let entityName in this.docTypeEntities){
    +                const entity = this.docTypeEntities[entityName];
    +                val = val.replace( entity.regx, entity.val);
    +              }
    +              for(let entityName in this.lastEntities){
    +                const entity = this.lastEntities[entityName];
    +                val = val.replace( entity.regex, entity.val);
    +              }
    +              if(this.replaceHtmlEntities){
    +                for(let entityName in htmlEntities){
    +                  const entity = htmlEntities[entityName];
    +                  val = val.replace( entity.regex, entity.val);
    +                }
    +              }
    +            val = val.replace( ampEntity.regex, ampEntity.val);
    +        }
    +        return val;
    +    }
    +};
    +
    +//an entity name should not contains special characters that may be used in regex
    +//Eg !?\\\/[]$%{}^&*()<>
    +const specialChar = "!?\\\/[]$%{}^&*()<>|+";
    +
    +function validateEntityName(name){
    +    for (let i = 0; i < specialChar.length; i++) {
    +        const ch = specialChar[i];
    +        if(name.indexOf(ch) !== -1) throw new Error(`Invalid character ${ch} in entity name`);
    +    }
    +    return name;
    +}
    +
    +module.exports = EntitiesParser;
    \ No newline at end of file
    
  • src/v5/valueParsers/join.js+14 0 added
    @@ -0,0 +1,14 @@
    +/**
    + * 
    + * @param {array} val 
    + * @param {string} by 
    + * @returns 
    + */
    +function join(val, by=" "){
    +    if(isArray(val)){
    +        val.join(by)
    +    }
    +    return val;
    +}
    +
    +module.exports = join;
    \ No newline at end of file
    
  • src/v5/valueParsers/number.js+16 0 added
    @@ -0,0 +1,16 @@
    +const toNumber = require("strnum");
    +
    +
    +class numParser{
    +    constructor(options){
    +        this.options = options;
    +    }
    +    parse(val){
    +        if (typeof val === 'string') {
    +            val = toNumber(val,this.options);
    +        }
    +        return val;
    +    }
    +}
    +
    +module.exports = numParser;
    \ No newline at end of file
    
  • src/v5/valueParsers/trim.js+8 0 added
    @@ -0,0 +1,8 @@
    +class trimmer{
    +    parse(val){
    +        if(typeof val === "string") return val.trim();
    +        else return val;
    +    }
    +}
    +
    +module.exports = trimmer;
    \ No newline at end of file
    
  • src/v5/Xml2JsParser.js+237 0 added
    @@ -0,0 +1,237 @@
    +const StringSource = require("./inputSource/StringSource");
    +const BufferSource = require("./inputSource/BufferSource");
    +const {readTagExp,readClosingTagName} = require("./XmlPartReader");
    +const {readComment, readCdata,readDocType,readPiTag} = require("./XmlSpecialTagsReader");
    +const TagPath = require("./TagPath");
    +const TagPathMatcher = require("./TagPathMatcher");
    +const EntitiesParser = require('./EntitiesParser');
    +
    +//To hold the data of current tag
    +//This is usually used to compare jpath expression against current tag
    +class TagDetail{
    +  constructor(name){
    +    this.name = name;
    +    this.position = 0;
    +    // this.attributes = {};
    +  }
    +}
    +
    +class Xml2JsParser {
    +    constructor(options) {
    +      this.options = options;
    +      
    +      this.currentTagDetail = null;
    +      this.tagTextData = "";
    +      this.tagsStack = [];
    +      this.entityParser = new EntitiesParser(options.htmlEntities);
    +      this.stopNodes = [];
    +      for (let i = 0; i < this.options.stopNodes.length; i++) {
    +        this.stopNodes.push(new TagPath(this.options.stopNodes[i]));
    +      }
    +    }
    +
    +    parse(strData) {
    +        this.source = new StringSource(strData);
    +        this.parseXml();
    +        return this.outputBuilder.getOutput();
    +    }
    +    parseBytesArr(data) {
    +        this.source = new BufferSource(data );
    +        this.parseXml();
    +        return this.outputBuilder.getOutput();
    +    }
    +  
    +    parseXml() {
    +      //TODO: Separate TagValueParser as separate class. So no scope issue in node builder class 
    +
    +      //OutputBuilder should be set in XML Parser
    +      this.outputBuilder = this.options.OutputBuilder.getInstance(this.options);
    +      this.root = { root: true};
    +      this.currentTagDetail = this.root;
    +
    +      while(this.source.canRead()){
    +        let ch = this.source.readCh();
    +        if (ch === "") break;
    +        
    +        if(ch === "<"){//tagStart
    +          let nextChar = this.source.readChAt(0);
    +          if (nextChar === "" ) throw new Error("Unexpected end of source");
    +          
    +        
    +          if(nextChar === "!" || nextChar === "?"){
    +            this.source.updateBufferBoundary();
    +            //previously collected text should be added to current node
    +            this.addTextNode(); 
    +            
    +            this.readSpecialTag(nextChar);// Read DOCTYPE, comment, CDATA, PI tag
    +          }else if(nextChar === "/"){
    +            this.source.updateBufferBoundary();
    +            this.readClosingTag();
    +            // console.log(this.source.buffer.length, this.source.readable);
    +            // console.log(this.tagsStack.length);
    +          }else{//opening tag
    +            this.readOpeningTag();
    +          }
    +        }else{
    +          this.tagTextData += ch;
    +        }
    +      }//End While loop
    +      if(this.tagsStack.length > 0 || ( this.tagTextData !== "undefined" && this.tagTextData.trimEnd().length > 0) ) throw new Error("Unexpected data in the end of document");
    +    }
    +  
    +    /**
    +     * read closing paired tag. Set parent tag in scope.
    +     * skip a node on user's choice
    +     */
    +    readClosingTag(){
    +      const tagName = this.processTagName(readClosingTagName(this.source));
    +      // console.log(tagName, this.tagsStack.length);
    +      this.validateClosingTag(tagName);
    +      // All the text data collected, belongs to current tag.
    +      if(!this.currentTagDetail.root) this.addTextNode();
    +      this.outputBuilder.closeTag();
    +      // Since the tag is closed now, parent tag comes in scope
    +      this.currentTagDetail = this.tagsStack.pop(); 
    +    }
    +
    +    validateClosingTag(tagName){
    +      // This can't be unpaired tag, or a stop tag.
    +      if(this.isUnpaired(tagName) || this.isStopNode(tagName)) throw new Error(`Unexpected closing tag '${tagName}'`);
    +      // This must match with last opening tag
    +      else if(tagName !== this.currentTagDetail.name) 
    +        throw new Error(`Unexpected closing tag '${tagName}' expecting '${this.currentTagDetail.name}'`)
    +    }
    +
    +    /**
    +     * Read paired, unpaired, self-closing, stop and special tags.
    +     * Create a new node
    +     * Push paired tag in stack.
    +     */
    +    readOpeningTag(){
    +      //save previously collected text data to current node
    +      this.addTextNode();
    +
    +      //create new tag
    +      let tagExp = readTagExp(this, ">" );
    +      
    +      // process and skip from tagsStack For unpaired tag, self closing tag, and stop node
    +      const tagDetail = new TagDetail(tagExp.tagName);
    +      if(this.isUnpaired(tagExp.tagName)) {
    +        //TODO: this will lead 2 extra stack operation
    +        this.outputBuilder.addTag(tagDetail);
    +        this.outputBuilder.closeTag();
    +      } else if(tagExp.selfClosing){
    +        this.outputBuilder.addTag(tagDetail);
    +        this.outputBuilder.closeTag();
    +      } else if(this.isStopNode(this.currentTagDetail)){
    +        // TODO: let's user set a stop node boundary detector for complex contents like script tag
    +        //TODO: pass tag name only to avoid string operations
    +        const content = source.readUptoCloseTag(`</${tagExp.tagName}`);
    +        this.outputBuilder.addTag(tagDetail);
    +        this.outputBuilder.addValue(content);
    +        this.outputBuilder.closeTag();
    +      }else{//paired tag
    +        //set new nested tag in scope.
    +        this.tagsStack.push(this.currentTagDetail);
    +        this.outputBuilder.addTag(tagDetail);
    +        this.currentTagDetail = tagDetail;
    +      }
    +      // console.log(tagExp.tagName,this.tagsStack.length);
    +      // this.options.onClose()
    +
    +    }
    +
    +    readSpecialTag(startCh){
    +      if(startCh == "!"){
    +        let nextChar = this.source.readCh();
    +        if (nextChar === null || nextChar === undefined) throw new Error("Unexpected ending of the source");
    +        
    +        if(nextChar === "-"){//comment
    +          readComment(this);
    +        }else if(nextChar === "["){//CDATA
    +          readCdata(this);
    +        }else if(nextChar === "D"){//DOCTYPE
    +          readDocType(this);
    +        }
    +      }else if(startCh === "?"){
    +        readPiTag(this);
    +      }else{
    +        throw new Error(`Invalid tag '<${startCh}' at ${this.source.line}:${this.source.col}`)
    +      }
    +    }
    +    addTextNode = function() {
    +      // if(this.currentTagDetail){
    +        //save text as child node
    +        // if(this.currentTagDetail.tagname !== '!xml')
    +        if (this.tagTextData !== undefined && this.tagTextData !== "") { //store previously collected data as textNode
    +          if(this.tagTextData.trim().length > 0){
    +            //TODO: shift parsing to output builder
    +
    +            this.outputBuilder.addValue(this.replaceEntities(this.tagTextData));
    +          }
    +          this.tagTextData = "";
    +        }
    +      // }
    +    }
    +
    +    processAttrName(name){
    +      if(name === "__proto__") name  = "#__proto__";
    +      name = resolveNameSpace(name, this.removeNSPrefix);
    +      return name;
    +    }
    +    
    +    processTagName(name){
    +      if(name === "__proto__") name  = "#__proto__";
    +      name = resolveNameSpace(name, this.removeNSPrefix);
    +      return name;
    +    }
    +
    +    /**
    +     * Generate tags path from tagsStack
    +     */
    +    tagsPath(tagName){
    +      //TODO: return TagPath Object. User can call match method with path
    +      return "";
    +    }
    +
    +    isUnpaired(tagName){
    +      return this.options.tags.unpaired.indexOf(tagName) !== -1;
    +    }
    +
    +    /**
    +     * valid expressions are 
    +     * tag nested
    +     * * nested
    +     * tag nested[attribute]
    +     * tag nested[attribute=""]
    +     * tag nested[attribute!=""]
    +     * tag nested:0 //for future
    +     * @param {string} tagName 
    +     * @returns 
    +     */
    +    isStopNode(node){
    +      for (let i = 0; i < this.stopNodes.length; i++) {
    +        const givenPath = this.stopNodes[i];
    +        if(givenPath.match(this.tagsStack, node)) return true;
    +      }
    +      return false 
    +    }
    +
    +    replaceEntities(text){
    +      //TODO: if option is set then replace entities
    +      return this.entityParser.parse(text)
    +    }
    +}
    +
    +function resolveNameSpace(name, removeNSPrefix) {
    +  if (removeNSPrefix) {
    +    const parts = name.split(':');
    +    if(parts.length === 2){
    +      if (parts[0] === 'xmlns') return '';
    +      else return parts[1];
    +    }else reportError(`Multiple namespaces ${name}`)
    +  }
    +  return name;
    +}
    +
    +module.exports = Xml2JsParser;
    \ No newline at end of file
    
  • src/v5/XMLParser.js+85 0 added
    @@ -0,0 +1,85 @@
    +const { buildOptions} = require("./OptionsBuilder");
    +const Xml2JsParser = require("./Xml2JsParser");
    +
    +class XMLParser{
    +    
    +    constructor(options){
    +        this.externalEntities = {};
    +        this.options = buildOptions(options);
    +        // console.log(this.options)
    +    }
    +    /**
    +     * Parse XML data string to JS object 
    +     * @param {string|Buffer} xmlData 
    +     * @param {boolean|Object} validationOption 
    +     */
    +    parse(xmlData){
    +        if(Array.isArray(xmlData) && xmlData.byteLength !== undefined){
    +            return this.parse(xmlData);
    +        }else if( xmlData.toString){
    +            xmlData = xmlData.toString();
    +        }else{
    +            throw new Error("XML data is accepted in String or Bytes[] form.")
    +        }
    +        // if( validationOption){
    +        //     if(validationOption === true) validationOption = {}; //validate with default options
    +            
    +        //     const result = validator.validate(xmlData, validationOption);
    +        //     if (result !== true) {
    +        //       throw Error( `${result.err.msg}:${result.err.line}:${result.err.col}` )
    +        //     }
    +        //   }
    +        const parser = new Xml2JsParser(this.options);
    +        parser.entityParser.addExternalEntities(this.externalEntities);
    +        return parser.parse(xmlData);
    +    }
    +    /**
    +     * Parse XML data buffer to JS object 
    +     * @param {string|Buffer} xmlData 
    +     * @param {boolean|Object} validationOption 
    +     */
    +    parseBytesArr(xmlData){
    +        if(Array.isArray(xmlData) && xmlData.byteLength !== undefined){
    +        }else{
    +            throw new Error("XML data is accepted in Bytes[] form.")
    +        }
    +        const parser = new Xml2JsParser(this.options);
    +        parser.entityParser.addExternalEntities(this.externalEntities);
    +        return parser.parseBytesArr(xmlData);
    +    }
    +    /**
    +     * Parse XML data stream to JS object 
    +     * @param {fs.ReadableStream} xmlDataStream 
    +     */
    +    parseStream(xmlDataStream){
    +        if(!isStream(xmlDataStream)) throw new Error("FXP: Invalid stream input");
    +        
    +        const orderedObjParser = new Xml2JsParser(this.options);
    +        orderedObjParser.entityParser.addExternalEntities(this.externalEntities);
    +        return orderedObjParser.parseStream(xmlDataStream);
    +    }
    +
    +    /**
    +     * Add Entity which is not by default supported by this library
    +     * @param {string} key 
    +     * @param {string} value 
    +     */
    +    addEntity(key, value){
    +        if(value.indexOf("&") !== -1){
    +            throw new Error("Entity value can't have '&'")
    +        }else if(key.indexOf("&") !== -1 || key.indexOf(";") !== -1){
    +            throw new Error("An entity must be set without '&' and ';'. Eg. use '#xD' for '&#xD;'")
    +        }else if(value === "&"){
    +            throw new Error("An entity with value '&' is not permitted");
    +        }else{
    +            this.externalEntities[key] = value;
    +        }
    +    }
    +}
    +
    +function isStream(stream){
    +    if(stream && typeof stream.read === "function" && typeof stream.on === "function" && typeof stream.readableEnded === "boolean") return true;
    +    return false;
    +}
    +
    +module.exports = XMLParser;
    \ No newline at end of file
    
  • src/v5/XmlPartReader.js+212 0 added
    @@ -0,0 +1,212 @@
    +'use strict';
    +
    +/**
    + * find paired tag for a stop node
    + * @param {string} xmlDoc 
    + * @param {string} tagName 
    + * @param {number} i : start index
    + */
    +function readStopNode(xmlDoc, tagName, i){
    +    const startIndex = i;
    +    // Starting at 1 since we already have an open tag
    +    let openTagCount = 1;
    +  
    +    for (; i < xmlDoc.length; i++) {
    +      if( xmlDoc[i] === "<"){ 
    +        if (xmlDoc[i+1] === "/") {//close tag
    +            const closeIndex = findSubStrIndex(xmlDoc, ">", i, `${tagName} is not closed`);
    +            let closeTagName = xmlDoc.substring(i+2,closeIndex).trim();
    +            if(closeTagName === tagName){
    +              openTagCount--;
    +              if (openTagCount === 0) {
    +                return {
    +                  tagContent: xmlDoc.substring(startIndex, i),
    +                  i : closeIndex
    +                }
    +              }
    +            }
    +            i=closeIndex;
    +          } else if(xmlDoc[i+1] === '?') { 
    +            const closeIndex = findSubStrIndex(xmlDoc, "?>", i+1, "StopNode is not closed.")
    +            i=closeIndex;
    +          } else if(xmlDoc.substr(i + 1, 3) === '!--') { 
    +            const closeIndex = findSubStrIndex(xmlDoc, "-->", i+3, "StopNode is not closed.")
    +            i=closeIndex;
    +          } else if(xmlDoc.substr(i + 1, 2) === '![') { 
    +            const closeIndex = findSubStrIndex(xmlDoc, "]]>", i, "StopNode is not closed.") - 2;
    +            i=closeIndex;
    +          } else {
    +            const tagData = readTagExp(xmlDoc, i, '>')
    +  
    +            if (tagData) {
    +              const openTagName = tagData && tagData.tagName;
    +              if (openTagName === tagName && tagData.tagExp[tagData.tagExp.length-1] !== "/") {
    +                openTagCount++;
    +              }
    +              i=tagData.closeIndex;
    +            }
    +          }
    +        }
    +    }//end for loop
    +}
    +
    +/**
    + * Read closing tag name
    + * @param {Source} source 
    + * @returns tag name
    + */
    +function readClosingTagName(source){
    +  let text = ""; //temporary data
    +  while(source.canRead()){
    +    let ch = source.readCh();
    +    // if (ch === null || ch === undefined) break;
    +    // source.updateBuffer();
    +
    +    if (ch === ">") return text.trimEnd();
    +    else text += ch;
    +  }
    +  throw new Error(`Unexpected end of source. Reading '${substr}'`);
    +}
    +
    +/**
    + * Read XML tag and build attributes map
    + * This function can be used to read normal tag, pi tag.
    + * This function can't be used to read comment, CDATA, DOCTYPE.
    + * Eg <tag attr = ' some"' attr= ">" bool>
    + * @param {string} xmlDoc 
    + * @param {number} startIndex starting index
    + * @returns tag expression includes tag name & attribute string 
    + */
    +function readTagExp(parser) {
    +  let inSingleQuotes = false;
    +  let inDoubleQuotes = false;
    +  let i;
    +  let EOE = false;
    +
    +  for (i = 0; parser.source.canRead(i); i++) {
    +    const char = parser.source.readChAt(i);
    +
    +    if (char === "'" && !inDoubleQuotes) {
    +      inSingleQuotes = !inSingleQuotes;
    +    } else if (char === '"' && !inSingleQuotes) {
    +      inDoubleQuotes = !inDoubleQuotes;
    +    } else if (char === '>' && !inSingleQuotes && !inDoubleQuotes) {
    +      // If not inside quotes, stop reading at '>'
    +      EOE = true;
    +      break;
    +    }
    +
    +  }
    +  if(inSingleQuotes || inDoubleQuotes){
    +    throw new Error("Invalid attribute expression. Quote is not properly closed");
    +  }else if(!EOE) throw new Error("Unexpected closing of source. Waiting for '>'");
    +  
    +  
    +  const exp = parser.source.readStr(i);
    +  parser.source.updateBufferBoundary(i + 1);
    +  return buildTagExpObj(exp, parser)
    +}
    +
    +function readPiExp(parser) {
    +  let inSingleQuotes = false;
    +  let inDoubleQuotes = false;
    +  let i;
    +  let EOE = false;
    +
    +  for (i = 0; parser.source.canRead(i) ; i++) {
    +    const currentChar = parser.source.readChAt(i);
    +    const nextChar =  parser.source.readChAt(i+1);
    +
    +    if (currentChar === "'" && !inDoubleQuotes) {
    +      inSingleQuotes = !inSingleQuotes;
    +    } else if (currentChar === '"' && !inSingleQuotes) {
    +      inDoubleQuotes = !inDoubleQuotes;
    +    }
    +
    +    if (!inSingleQuotes && !inDoubleQuotes) {
    +      if (currentChar === '?' && nextChar === '>') {
    +        EOE = true;
    +        break; // Exit the loop when '?>' is found
    +      }
    +    }
    +  }
    +  if(inSingleQuotes || inDoubleQuotes){
    +    throw new Error("Invalid attribute expression. Quote is not properly closed in PI tag expression");
    +  }else if(!EOE) throw new Error("Unexpected closing of source. Waiting for '?>'");
    +  
    +  if(!parser.options.attributes.ignore){
    +    //TODO: use regex to verify attributes if not set to ignore
    +  }
    +
    +  const exp = parser.source.readStr(i);
    +  parser.source.updateBufferBoundary(i + 1);
    +  return buildTagExpObj(exp, parser)
    +}
    +
    +function buildTagExpObj(exp, parser){
    +  const tagExp = {
    +    tagName: "",
    +    selfClosing: false
    +  };
    +  let attrsExp = "";
    +
    +  if(exp[exp.length -1] === "/") tagExp.selfClosing = true;
    +
    +  //separate tag name
    +  let i = 0;
    +  for (; i < exp.length; i++) {
    +    const char = exp[i];
    +    if(char === " "){
    +      tagExp.tagName = exp.substring(0, i);
    +      attrsExp = exp.substring(i + 1);
    +      break;
    +    }
    +  }
    +  //only tag
    +  if(tagExp.tagName.length === 0 && i === exp.length)tagExp.tagName = exp;
    +
    +  tagExp.tagName = tagExp.tagName.trimEnd();
    +
    +  if(!parser.options.attributes.ignore && attrsExp.length > 0){
    +    parseAttributesExp(attrsExp,parser)
    +  }
    +
    +  return tagExp;
    +}
    +
    +const attrsRegx = new RegExp('([^\\s=]+)\\s*(=\\s*([\'"])([\\s\\S]*?)\\3)?', 'gm');
    +
    +function parseAttributesExp(attrStr, parser) {
    +  const matches = getAllMatches(attrStr, attrsRegx);
    +  const len = matches.length; //don't make it inline
    +  for (let i = 0; i < len; i++) {
    +    let attrName = parser.processAttrName(matches[i][1]);
    +    let attrVal = parser.replaceEntities(matches[i][4] || true);
    +    
    +    parser.outputBuilder.addAttribute(attrName, attrVal);
    +  }
    +}
    +
    +
    +const getAllMatches = function(string, regex) {
    +  const matches = [];
    +  let match = regex.exec(string);
    +  while (match) {
    +    const allmatches = [];
    +    allmatches.startIndex = regex.lastIndex - match[0].length;
    +    const len = match.length;
    +    for (let index = 0; index < len; index++) {
    +      allmatches.push(match[index]);
    +    }
    +    matches.push(allmatches);
    +    match = regex.exec(string);
    +  }
    +  return matches;
    +};
    +
    +module.exports = {
    +    readStopNode: readStopNode,
    +    readClosingTagName: readClosingTagName,
    +    readTagExp: readTagExp,
    +    readPiExp: readPiExp,
    +}
    \ No newline at end of file
    
  • src/v5/XmlSpecialTagsReader.js+118 0 added
    @@ -0,0 +1,118 @@
    +const {readPiExp} = require("./XmlPartReader");
    +
    +function readCdata(parser){
    +  //<![ are already read till this point
    +  let str = parser.source.readStr(6); //CDATA[
    +  parser.source.updateBufferBoundary(6);
    +
    +  if(str !== "CDATA[") throw new Error(`Invalid CDATA expression at ${parser.source.line}:${parser.source.cols}`);
    +
    +  let text = parser.source.readUpto("]]>");
    +  parser.outputBuilder.addCdata(text);
    +}
    +function readPiTag(parser){
    +  //<? are already read till this point
    +  let tagExp = readPiExp(parser, "?>");
    +  if(!tagExp) throw new Error("Invalid Pi Tag expression.");
    +
    +  if (tagExp.tagName === "?xml") {//TODO: test if tagName is just xml
    +    parser.outputBuilder.addDeclaration();
    +  } else {
    +    parser.outputBuilder.addPi("?"+tagExp.tagName);
    +  }
    +}
    +
    +function readComment(parser){
    +  //<!- are already read till this point
    +  let ch = parser.source.readCh();
    +  if(ch !== "-") throw new Error(`Invalid comment expression at ${parser.source.line}:${parser.source.cols}`);
    +
    +  let text = parser.source.readUpto("-->");
    +  parser.outputBuilder.addComment(text);
    +}
    +
    +const DOCTYPE_tags = {
    +  "EL":/^EMENT\s+([^\s>]+)\s+(ANY|EMPTY|\(.+\)\s*$)/m,
    +  "AT":/^TLIST\s+[^\s]+\s+[^\s]+\s+[^\s]+\s+[^\s]+\s+$/m,
    +  "NO":/^TATION.+$/m
    +}
    +function readDocType(parser){
    +  //<!D are already read till this point
    +  let str = parser.source.readStr(6); //OCTYPE
    +  parser.source.updateBufferBoundary(6);
    +
    +  if(str !== "OCTYPE") throw new Error(`Invalid DOCTYPE expression at ${parser.source.line}:${parser.source.cols}`);
    +
    +  let hasBody = false, lastch = "";
    +
    +  while(parser.source.canRead()){
    +    //TODO: use readChAt like used in partReader
    +    let ch = parser.source.readCh();
    +    if(hasBody){
    +      if (ch === '<') { //Determine the tag type
    +        let str = parser.source.readStr(2);
    +        parser.source.updateBufferBoundary(2);
    +        if(str === "EN"){ //ENTITY
    +          let str = parser.source.readStr(4);
    +          parser.source.updateBufferBoundary(4);
    +          if(str !== "TITY") throw new Error("Invalid DOCTYPE ENTITY expression");
    +
    +          registerEntity(parser);
    +        }else if(str === "!-") {//comment
    +          readComment(parser);
    +        }else{ //ELEMENT, ATTLIST, NOTATION
    +          let dTagExp = parser.source.readUpto(">");
    +          const regx = DOCTYPE_tags[str];
    +          if(regx){
    +            const match = dTagExp.match(regx);
    +            if(!match) throw new Error("Invalid DOCTYPE");
    +          }else throw new Error("Invalid DOCTYPE");
    +        }
    +      }else if( ch === '>' && lastch === "]"){//end of doctype
    +        return;  
    +      }
    +    }else if( ch === '>'){//end of doctype
    +      return;
    +    }else if( ch === '['){
    +      hasBody = true;
    +    }else{
    +      lastch = ch;
    +    }
    +  }//End While loop
    +
    +}
    +
    +function registerEntity(parser){
    +  //read Entity
    +  let attrBoundary="";
    +  let name ="", val ="";
    +  while(source.canRead()){
    +    let ch = source.readCh();
    +
    +    if(attrBoundary){
    +      if (ch === attrBoundary){
    +        val = text;
    +        text = ""
    +      }
    +    }else if(ch === " " || ch === "\t"){
    +      if(!name){
    +        name = text.trimStart();
    +        text = "";
    +      }
    +    }else if (ch === '"' || ch === "'") {//start of attrBoundary
    +      attrBoundary = ch;
    +    }else if(ch === ">"){
    +      parser.entityParser.addExternalEntity(name,val);
    +      return;
    +    }else{
    +      text+=ch;
    +    }
    +  }
    +}
    +
    +module.exports = {
    +  readCdata: readCdata,
    +  readComment:readComment,
    +  readDocType:readDocType,
    +  readPiTag:readPiTag
    +}
    \ No newline at end of file
    

Vulnerability mechanics

Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.

References

6

News mentions

0

No linked articles in our index yet.