fast-xml-parser affected by DoS through entity expansion in DOCTYPE (no expansion limit)
Description
fast-xml-parser allows users to validate XML, parse XML to JS object, or build XML from JS object without C/C++ based libraries and no callback. In versions 4.1.3 through 5.3.5, the XML parser can be forced to do an unlimited amount of entity expansion. With a very small XML input, it’s possible to make the parser spend seconds or even minutes processing a single request, effectively freezing the application. Version 5.3.6 fixes the issue. As a workaround, avoid using DOCTYPE parsing by processEntities: false option.
Affected packages
Versions sourced from the GitHub Security Advisory.
| Package | Affected versions | Patched versions |
|---|---|---|
fast-xml-parsernpm | >= 4.1.3, < 4.5.4 | 4.5.4 |
fast-xml-parsernpm | >= 5.0.0, < 5.3.6 | 5.3.6 |
Affected products
1- Range: >= 5.0.0, < 5.3.6
Patches
1910dae5be2defix entities performance & security issues
6 files changed · +1138 −309
lib/fxp.d.cts+76 −13 modified@@ -1,4 +1,60 @@ -type X2jOptions = { +type ProcessEntitiesOptions = { + /** + * Whether to enable entity processing + * + * Defaults to `true` + */ + enabled?: boolean; + + /** + * Maximum size in characters for a single entity definition + * + * Defaults to `10000` + */ + maxEntitySize?: number; + + /** + * Maximum depth for nested entity references (reserved for future use) + * + * Defaults to `10` + */ + maxExpansionDepth?: number; + + /** + * Maximum total number of entity expansions allowed + * + * Defaults to `1000` + */ + maxTotalExpansions?: number; + + /** + * Maximum total expanded content length in characters + * + * Defaults to `100000` + */ + maxExpandedLength?: number; + + /** + * Array of tag names where entity replacement is allowed. + * If null, entities are replaced in all tags. + * + * Defaults to `null` + */ + allowedTags?: string[] | null; + + /** + * Custom filter function to determine if entities should be replaced in a tag + * + * @param tagName - The name of the current tag + * @param jPath - The jPath of the current tag + * @returns `true` to allow entity replacement, `false` to skip + * + * Defaults to `null` + */ + tagFilter?: ((tagName: string, jPath: string) => boolean) | null; +}; + +export type X2jOptions = { /** * Preserve the order of tags in resulting JS object * @@ -10,7 +66,7 @@ type X2jOptions = { * Give a prefix to the attribute name in the resulting JS object * * Defaults to '@_' - */ + */ attributeNamePrefix?: string; /** @@ -64,7 +120,7 @@ type X2jOptions = { parseTagValue?: boolean; /** - * Whether to parse tag value with `strnum` package + * Whether to parse attribute value with `strnum` package * * Defaults to `false` */ @@ -161,9 +217,15 @@ type X2jOptions = { /** * Whether to process default and DOCTYPE entities * + * When `true` - enables entity processing with default limits + * + * When `false` - disables all entity processing + * + * When `ProcessEntitiesOptions` - enables entity processing with custom configuration + * * Defaults to `true` */ - processEntities?: boolean; + processEntities?: boolean | ProcessEntitiesOptions; /** * Whether to process HTML entities @@ -209,7 +271,7 @@ type X2jOptions = { * * Defaults to `(tagName, jPath, attrs) => tagName` */ - updateTag?: (tagName: string, jPath: string, attrs: {[k: string]: string}) => string | boolean; + updateTag?: (tagName: string, jPath: string, attrs: { [k: string]: string }) => string | boolean; /** * If true, adds a Symbol to all object nodes, accessible by {@link XMLParser.getMetaDataSymbol} with @@ -232,7 +294,7 @@ type validationOptions = { * Defaults to `false` */ allowBooleanAttributes?: boolean; - + /** * List of tags without closing tags * @@ -246,7 +308,7 @@ type XmlBuilderOptions = { * Give a prefix to the attribute name in the resulting JS object * * Defaults to '@_' - */ + */ attributeNamePrefix?: string; /** @@ -393,20 +455,20 @@ type XmlBuilderOptions = { oneListGroup?: boolean; }; -type ESchema = string | object | Array<string|object>; +type ESchema = string | object | Array<string | object>; type ValidationError = { - err: { + err: { code: string; msg: string, line: number, - col: number + col: number }; }; declare class XMLParser { constructor(options?: X2jOptions); - parse(xmlData: string | Uint8Array ,validationOptions?: validationOptions | boolean): any; + parse(xmlData: string | Uint8Array, validationOptions?: validationOptions | boolean): any; /** * Add Entity which is not by default supported by this library * @param entityIdentifier {string} Eg: 'ent' for &ent; @@ -424,10 +486,10 @@ declare class XMLParser { * The XMLMetaData property is only present when {@link X2jOptions.captureMetaData} * is true in the options. */ - static getMetaDataSymbol() : Symbol; + static getMetaDataSymbol(): Symbol; } -declare class XMLValidator{ +declare class XMLValidator { static validate(xmlData: string, options?: validationOptions): true | ValidationError; } @@ -458,6 +520,7 @@ declare namespace fxp { ValidationError, strnumOptions, validationOptions, + ProcessEntitiesOptions, } }
spec/entities_security_spec.js+587 −0 added@@ -0,0 +1,587 @@ +import { XMLParser } from "../src/fxp.js"; + +describe("XMLParser entity expansion security", function () { + + // ================================================================= + // MAX ENTITY SIZE TESTS + // ================================================================= + + describe("maxEntitySize limit", function () { + it("should throw error when entity size exceeds maxEntitySize", function () { + const entity = 'A'.repeat(15000); + const xmlData = `<!DOCTYPE foo [<!ENTITY big "${entity}">]><root>&big;</root>`; + + const options = { + processEntities: { + maxEntitySize: 10000 + } + }; + const parser = new XMLParser(options); + + expect(function () { + parser.parse(xmlData); + }).toThrowError(/Entity "big" size .* exceeds maximum allowed size/); + }); + + it("should allow entity within maxEntitySize", function () { + const entity = 'A'.repeat(5000); + const xmlData = `<!DOCTYPE foo [<!ENTITY big "${entity}">]><root>&big;</root>`; + + const options = { + processEntities: { + maxEntitySize: 10000 + } + }; + const parser = new XMLParser(options); + const result = parser.parse(xmlData); + + expect(result.root).toEqual(entity); + }); + + it("should use default maxEntitySize of 10000", function () { + const entity = 'A'.repeat(15000); + const xmlData = `<!DOCTYPE foo [<!ENTITY big "${entity}">]><root>&big;</root>`; + + const options = { + processEntities: {} // Use defaults + }; + const parser = new XMLParser(options); + + expect(function () { + parser.parse(xmlData); + }).toThrowError(/Entity "big" size .* exceeds maximum allowed size/); + }); + }); + + // ================================================================= + // MAX TOTAL EXPANSIONS TESTS + // ================================================================= + + describe("maxTotalExpansions limit", function () { + it("should throw error when total expansions exceed limit", function () { + const entity = 'A'.repeat(10); + const refs = '&big;'.repeat(1500); // 1500 expansions + const xmlData = `<!DOCTYPE foo [<!ENTITY big "${entity}">]><root>${refs}</root>`; + + const options = { + processEntities: { + maxTotalExpansions: 1000 + } + }; + const parser = new XMLParser(options); + + expect(function () { + parser.parse(xmlData); + }).toThrowError(/Entity expansion limit exceeded/); + }); + + it("should allow expansions within limit", function () { + const entity = 'A'.repeat(10); + const refs = '&big;'.repeat(500); // 500 expansions + const xmlData = `<!DOCTYPE foo [<!ENTITY big "${entity}">]><root>${refs}</root>`; + + const options = { + processEntities: { + maxTotalExpansions: 1000 + } + }; + const parser = new XMLParser(options); + const result = parser.parse(xmlData); + + expect(result.root).toEqual('A'.repeat(5000)); + }); + + it("should count expansions across multiple tags", function () { + const entity = 'X'; + const refs = '&e;'.repeat(600); + const xmlData = `<!DOCTYPE root [<!ENTITY e "${entity}">]> + <root> + <tag1>${refs}</tag1> + <tag2>${refs}</tag2> + </root>`; + + const options = { + processEntities: { + maxTotalExpansions: 1000 + } + }; + const parser = new XMLParser(options); + + expect(function () { + parser.parse(xmlData); + }).toThrowError(/Entity expansion limit exceeded/); + }); + }); + + // ================================================================= + // MAX EXPANDED LENGTH TESTS + // ================================================================= + + describe("maxExpandedLength limit", function () { + it("should throw error when expanded content exceeds maxExpandedLength", function () { + const entity = 'A'.repeat(1000); + const refs = '&big;'.repeat(150); // 150 * 1000 = 150,000 chars + const xmlData = `<!DOCTYPE foo [<!ENTITY big "${entity}">]><root>${refs}</root>`; + + const options = { + processEntities: { + maxExpandedLength: 100000 + } + }; + const parser = new XMLParser(options); + + expect(function () { + parser.parse(xmlData); + }).toThrowError(/Total expanded content size exceeded/); + }); + + it("should allow expansions within maxExpandedLength", function () { + const entity = 'A'.repeat(100); + const refs = '&big;'.repeat(500); // 500 * 100 = 50,000 chars + const xmlData = `<!DOCTYPE foo [<!ENTITY big "${entity}">]><root>${refs}</root>`; + + const options = { + processEntities: { + maxExpandedLength: 100000 + } + }; + const parser = new XMLParser(options); + const result = parser.parse(xmlData); + + expect(result.root).toEqual('A'.repeat(50000)); + }); + }); + + // ================================================================= + // BILLION LAUGHS ATTACK PREVENTION + // ================================================================= + + describe("Billion laughs attack prevention", function () { + it("should prevent billion laughs attack with maxEntitySize", function () { + const entity = 'A'.repeat(50000); + const refs = '&big;'.repeat(100); // Would be 5MB but blocked at definition + const xmlData = `<!DOCTYPE foo [<!ENTITY big "${entity}">]><root>${refs}</root>`; + + const options = { + processEntities: { + maxEntitySize: 10000 + } + }; + const parser = new XMLParser(options); + + expect(function () { + parser.parse(xmlData); + }).toThrowError(/Entity "big" size .* exceeds maximum allowed size/); + }); + + it("should prevent billion laughs with maxTotalExpansions", function () { + const entity = 'A'.repeat(100); + const refs = '&big;'.repeat(5000); // Too many expansions + const xmlData = `<!DOCTYPE foo [<!ENTITY big "${entity}">]><root>${refs}</root>`; + + const options = { + processEntities: { + maxTotalExpansions: 1000 + } + }; + const parser = new XMLParser(options); + + expect(function () { + parser.parse(xmlData); + }).toThrowError(/Entity expansion limit exceeded/); + }); + + it("should prevent billion laughs with maxExpandedLength", function () { + const entity = 'A'.repeat(1000); + const refs = '&big;'.repeat(200); // 200KB output + const xmlData = `<!DOCTYPE foo [<!ENTITY big "${entity}">]><root>${refs}</root>`; + + const options = { + processEntities: { + maxExpandedLength: 100000 + } + }; + const parser = new XMLParser(options); + + expect(function () { + parser.parse(xmlData); + }).toThrowError(/Total expanded content size exceeded/); + }); + }); + + // ================================================================= + // TAG FILTERING - ALLOWEDTAGS TESTS + // ================================================================= + + describe("allowedTags filtering", function () { + it("should only expand entities in allowed tags", function () { + const xmlData = `<!DOCTYPE root [<!ENTITY test "REPLACED">]> + <root> + <allowed>&test;</allowed> + <blocked>&test;</blocked> + </root>`; + + const options = { + processEntities: { + allowedTags: ['allowed'] + } + }; + const parser = new XMLParser(options); + const result = parser.parse(xmlData); + + expect(result.root.allowed).toEqual("REPLACED"); + expect(result.root.blocked).toEqual("&test;"); + }); + + it("should expand entities in all tags when allowedTags is null", function () { + const xmlData = `<!DOCTYPE root [<!ENTITY test "REPLACED">]> + <root> + <tag1>&test;</tag1> + <tag2>&test;</tag2> + </root>`; + + const options = { + processEntities: { + allowedTags: null + } + }; + const parser = new XMLParser(options); + const result = parser.parse(xmlData); + + expect(result.root.tag1).toEqual("REPLACED"); + expect(result.root.tag2).toEqual("REPLACED"); + }); + + it("should work with multiple allowed tags", function () { + const xmlData = `<!DOCTYPE root [<!ENTITY test "OK">]> + <root> + <description>&test;</description> + <content>&test;</content> + <script>&test;</script> + </root>`; + + const options = { + processEntities: { + allowedTags: ['description', 'content'] + } + }; + const parser = new XMLParser(options); + const result = parser.parse(xmlData); + + expect(result.root.description).toEqual("OK"); + expect(result.root.content).toEqual("OK"); + expect(result.root.script).toEqual("&test;"); + }); + }); + + // ================================================================= + // TAG FILTERING - TAGFILTER FUNCTION TESTS + // ================================================================= + + describe("tagFilter function", function () { + it("should use custom filter function to control entity expansion", function () { + const xmlData = `<!DOCTYPE root [<!ENTITY test "REPLACED">]> + <root> + <safe>&test;</safe> + <script>&test;</script> + <code>&test;</code> + </root>`; + + const options = { + processEntities: { + tagFilter: (tagName, jPath) => { + return !['script', 'code'].includes(tagName); + } + } + }; + const parser = new XMLParser(options); + const result = parser.parse(xmlData); + + expect(result.root.safe).toEqual("REPLACED"); + expect(result.root.script).toEqual("&test;"); + expect(result.root.code).toEqual("&test;"); + }); + + it("should pass jPath to tagFilter function", function () { + const xmlData = `<!DOCTYPE root [<!ENTITY test "OK">]> + <root> + <level1> + <dangerous>&test;</dangerous> + </level1> + <safe>&test;</safe> + </root>`; + + const options = { + processEntities: { + tagFilter: (tagName, jPath) => { + // Block expansion in nested dangerous paths + return !jPath.includes('level1.dangerous'); + } + } + }; + const parser = new XMLParser(options); + const result = parser.parse(xmlData); + + expect(result.root.safe).toEqual("OK"); + expect(result.root.level1.dangerous).toEqual("&test;"); + }); + }); + + // ================================================================= + // PERFORMANCE OPTIMIZATION TESTS + // ================================================================= + + describe("Performance optimization", function () { + it("should skip entity processing when no ampersand present", function () { + const xmlData = `<root> + <tag1>No entities here</tag1> + <tag2>Just plain text</tag2> + <tag3>More text without entities</tag3> + </root>`; + + const options = { + processEntities: true + }; + const parser = new XMLParser(options); + const result = parser.parse(xmlData); + + expect(result.root.tag1).toEqual("No entities here"); + expect(result.root.tag2).toEqual("Just plain text"); + expect(result.root.tag3).toEqual("More text without entities"); + }); + + it("should process standard entities correctly", function () { + const xmlData = `<root> + <tag1><test></tag1> + <tag2>& " '</tag2> + </root>`; + + const options = { + processEntities: true + }; + const parser = new XMLParser(options); + const result = parser.parse(xmlData); + + expect(result.root.tag1).toEqual("<test>"); + expect(result.root.tag2).toEqual('& " \''); + }); + }); + + // ================================================================= + // COMBINED LIMITS TESTS + // ================================================================= + + describe("Combined security limits", function () { + it("should work with all limits configured", function () { + const entity = 'A'.repeat(100); + const refs = '&e;'.repeat(50); + const xmlData = `<!DOCTYPE root [<!ENTITY e "${entity}">]><root>${refs}</root>`; + + const options = { + processEntities: { + maxEntitySize: 1000, + maxTotalExpansions: 100, + maxExpandedLength: 10000, + allowedTags: ['root'] + } + }; + const parser = new XMLParser(options); + const result = parser.parse(xmlData); + + expect(result.root).toEqual('A'.repeat(5000)); + }); + + it("should enforce strictest applicable limit", function () { + const entity = 'X'.repeat(100); + const refs = '&e;'.repeat(200); // Would be 20,000 chars + const xmlData = `<!DOCTYPE root [<!ENTITY e "${entity}">]><root>${refs}</root>`; + + const options = { + processEntities: { + maxEntitySize: 1000, // OK (100 < 1000) + maxTotalExpansions: 1000, // OK (200 < 1000) + maxExpandedLength: 10000 // FAIL (20000 > 10000) + } + }; + const parser = new XMLParser(options); + + expect(function () { + parser.parse(xmlData); + }).toThrowError(/Total expanded content size exceeded/); + }); + }); + + // ================================================================= + // ENTITY IN ATTRIBUTES TESTS + // ================================================================= + + describe("Entities in attributes", function () { + it("should expand entities in attributes when enabled", function () { + const xmlData = `<!DOCTYPE root [<!ENTITY test "value">]> + <root attr="&test;">text</root>`; + + const options = { + ignoreAttributes: false, + processEntities: true + }; + const parser = new XMLParser(options); + const result = parser.parse(xmlData); + + expect(result.root["@_attr"]).toEqual("value"); + }); + + it("should respect limits for entities in attributes", function () { + const entity = 'A'.repeat(100); + const refs = '&e;'.repeat(50); + const xmlData = `<!DOCTYPE root [<!ENTITY e "${entity}">]> + <root attr="${refs}">text</root>`; + + const options = { + ignoreAttributes: false, + processEntities: { + maxTotalExpansions: 100 + } + }; + const parser = new XMLParser(options); + const result = parser.parse(xmlData); + + expect(result.root["@_attr"]).toEqual('A'.repeat(5000)); + }); + }); + + // ================================================================= + // EDGE CASES + // ================================================================= + + describe("Edge cases", function () { + it("should handle empty entity definitions", function () { + const xmlData = `<!DOCTYPE root [<!ENTITY empty "">]> + <root>∅test</root>`; + + const options = { + processEntities: true + }; + const parser = new XMLParser(options); + const result = parser.parse(xmlData); + + expect(result.root).toEqual("test"); + }); + + it("should handle multiple different entities", function () { + const xmlData = `<!DOCTYPE root [ + <!ENTITY e1 "AAA"> + <!ENTITY e2 "BBB"> + <!ENTITY e3 "CCC"> + ]><root>&e1;&e2;&e3;</root>`; + + const options = { + processEntities: true + }; + const parser = new XMLParser(options); + const result = parser.parse(xmlData); + + expect(result.root).toEqual("AAABBBCCC"); + }); + + it("should reset counters between parse calls", function () { + const entity = 'X'; + const refs = '&e;'.repeat(600); + const xmlData = `<!DOCTYPE root [<!ENTITY e "${entity}">]><root>${refs}</root>`; + + const options = { + processEntities: { + maxTotalExpansions: 1000 + } + }; + const parser = new XMLParser(options); + + // First parse should succeed + const result1 = parser.parse(xmlData); + expect(result1.root).toEqual('X'.repeat(600)); + + // Second parse should also succeed (counters reset) + const result2 = parser.parse(xmlData); + expect(result2.root).toEqual('X'.repeat(600)); + }); + + it("should handle entity expansion with enabled: false", function () { + const xmlData = `<!DOCTYPE root [<!ENTITY test "value">]> + <root>&test;</root>`; + + const options = { + processEntities: { + enabled: false + } + }; + const parser = new XMLParser(options); + const result = parser.parse(xmlData); + + expect(result.root).toEqual("&test;"); + }); + + it("should work with small limits for strict security", function () { + const entity = 'A'.repeat(50); + const refs = '&e;'.repeat(10); + const xmlData = `<!DOCTYPE root [<!ENTITY e "${entity}">]><root>${refs}</root>`; + + const options = { + processEntities: { + maxEntitySize: 100, + maxTotalExpansions: 20, + maxExpandedLength: 1000 + } + }; + const parser = new XMLParser(options); + const result = parser.parse(xmlData); + + expect(result.root).toEqual('A'.repeat(500)); + }); + }); + + // ================================================================= + // CONFIGURATION VALIDATION TESTS + // ================================================================= + + describe("Configuration normalization", function () { + it("should normalize boolean true to object with enabled: true", function () { + const xmlData = `<!DOCTYPE root [<!ENTITY test "value">]><root>&test;</root>`; + + const options = { + processEntities: true + }; + const parser = new XMLParser(options); + const result = parser.parse(xmlData); + + expect(result.root).toEqual("value"); + }); + + it("should normalize boolean false to object with enabled: false", function () { + const xmlData = `<!DOCTYPE root [<!ENTITY test "value">]><root>&test;</root>`; + + const options = { + processEntities: false + }; + const parser = new XMLParser(options); + const result = parser.parse(xmlData); + + expect(result.root).toEqual("&test;"); + }); + + it("should merge partial config with defaults", function () { + const entity = 'A'.repeat(100); + const refs = '&e;'.repeat(50); + const xmlData = `<!DOCTYPE root [<!ENTITY e "${entity}">]><root>${refs}</root>`; + + const options = { + processEntities: { + maxEntitySize: 200 // Only set one option + } + }; + const parser = new XMLParser(options); + const result = parser.parse(xmlData); + + // Should use defaults for other limits + expect(result.root).toEqual('A'.repeat(5000)); + }); + }); +}); \ No newline at end of file
src/fxp.d.ts+74 −12 modified@@ -1,3 +1,59 @@ +export type ProcessEntitiesOptions = { + /** + * Whether to enable entity processing + * + * Defaults to `true` + */ + enabled?: boolean; + + /** + * Maximum size in characters for a single entity definition + * + * Defaults to `10000` + */ + maxEntitySize?: number; + + /** + * Maximum depth for nested entity references (reserved for future use) + * + * Defaults to `10` + */ + maxExpansionDepth?: number; + + /** + * Maximum total number of entity expansions allowed + * + * Defaults to `1000` + */ + maxTotalExpansions?: number; + + /** + * Maximum total expanded content length in characters + * + * Defaults to `100000` + */ + maxExpandedLength?: number; + + /** + * Array of tag names where entity replacement is allowed. + * If null, entities are replaced in all tags. + * + * Defaults to `null` + */ + allowedTags?: string[] | null; + + /** + * Custom filter function to determine if entities should be replaced in a tag + * + * @param tagName - The name of the current tag + * @param jPath - The jPath of the current tag + * @returns `true` to allow entity replacement, `false` to skip + * + * Defaults to `null` + */ + tagFilter?: ((tagName: string, jPath: string) => boolean) | null; +}; + export type X2jOptions = { /** * Preserve the order of tags in resulting JS object @@ -10,7 +66,7 @@ export type X2jOptions = { * Give a prefix to the attribute name in the resulting JS object * * Defaults to '@_' - */ + */ attributeNamePrefix?: string; /** @@ -161,9 +217,15 @@ export type X2jOptions = { /** * Whether to process default and DOCTYPE entities * + * When `true` - enables entity processing with default limits + * + * When `false` - disables all entity processing + * + * When `ProcessEntitiesOptions` - enables entity processing with custom configuration + * * Defaults to `true` */ - processEntities?: boolean; + processEntities?: boolean | ProcessEntitiesOptions; /** * Whether to process HTML entities @@ -209,7 +271,7 @@ export type X2jOptions = { * * Defaults to `(tagName, jPath, attrs) => tagName` */ - updateTag?: (tagName: string, jPath: string, attrs: {[k: string]: string}) => string | boolean; + updateTag?: (tagName: string, jPath: string, attrs: { [k: string]: string }) => string | boolean; /** * If true, adds a Symbol to all object nodes, accessible by {@link XMLParser.getMetaDataSymbol} with @@ -232,7 +294,7 @@ export type validationOptions = { * Defaults to `false` */ allowBooleanAttributes?: boolean; - + /** * List of tags without closing tags * @@ -246,7 +308,7 @@ export type XmlBuilderOptions = { * Give a prefix to the attribute name in the resulting JS object * * Defaults to '@_' - */ + */ attributeNamePrefix?: string; /** @@ -393,14 +455,14 @@ export type XmlBuilderOptions = { oneListGroup?: boolean; }; -type ESchema = string | object | Array<string|object>; +type ESchema = string | object | Array<string | object>; export type ValidationError = { - err: { + err: { code: string; msg: string, line: number, - col: number + col: number }; }; @@ -424,11 +486,11 @@ export class XMLParser { * The XMLMetaData property is only present when {@link X2jOptions.captureMetaData} * is true in the options. */ - static getMetaDataSymbol() : Symbol; + static getMetaDataSymbol(): Symbol; } -export class XMLValidator{ - static validate( xmlData: string, options?: validationOptions): true | ValidationError; +export class XMLValidator { + static validate(xmlData: string, options?: validationOptions): true | ValidationError; } export class XMLBuilder { constructor(options?: XmlBuilderOptions); @@ -442,4 +504,4 @@ export class XMLBuilder { export interface XMLMetaData { /** The index, if available, of the character where the XML node began in the input stream. */ startIndex?: number; -} +} \ No newline at end of file
src/xmlparser/DocTypeReader.js+63 −53 modified@@ -1,82 +1,82 @@ -import {isName} from '../util.js'; +import { isName } from '../util.js'; -export default class DocTypeReader{ - constructor(processEntities){ - this.suppressValidationErr = !processEntities; +export default class DocTypeReader { + constructor(options) { + this.suppressValidationErr = !options; + this.options = options; } - - readDocType(xmlData, i){ - + + readDocType(xmlData, i) { + const entities = {}; - if( xmlData[i + 3] === 'O' && + if (xmlData[i + 3] === 'O' && xmlData[i + 4] === 'C' && xmlData[i + 5] === 'T' && xmlData[i + 6] === 'Y' && xmlData[i + 7] === 'P' && - xmlData[i + 8] === 'E') - { - i = i+9; + xmlData[i + 8] === 'E') { + i = i + 9; let angleBracketsCount = 1; let hasBody = false, comment = false; let exp = ""; - for(;i<xmlData.length;i++){ + for (; i < xmlData.length; i++) { if (xmlData[i] === '<' && !comment) { //Determine the tag type - if( hasBody && hasSeq(xmlData, "!ENTITY",i)){ - i += 7; + if (hasBody && hasSeq(xmlData, "!ENTITY", i)) { + i += 7; let entityName, val; - [entityName, val,i] = this.readEntityExp(xmlData,i+1,this.suppressValidationErr); - if(val.indexOf("&") === -1){ //Parameter entities are not supported + [entityName, val, i] = this.readEntityExp(xmlData, i + 1, this.suppressValidationErr); + if (val.indexOf("&") === -1) { //Parameter entities are not supported const escaped = entityName.replace(/[.\-+*:]/g, '\\.'); - entities[ entityName ] = { - regx : RegExp( `&${escaped};`,"g"), + entities[entityName] = { + regx: RegExp(`&${escaped};`, "g"), val: val }; } } - else if( hasBody && hasSeq(xmlData, "!ELEMENT",i)) { + else if (hasBody && hasSeq(xmlData, "!ELEMENT", i)) { i += 8;//Not supported - const {index} = this.readElementExp(xmlData,i+1); + const { index } = this.readElementExp(xmlData, i + 1); i = index; - }else if( hasBody && hasSeq(xmlData, "!ATTLIST",i)){ + } else if (hasBody && hasSeq(xmlData, "!ATTLIST", i)) { i += 8;//Not supported // const {index} = this.readAttlistExp(xmlData,i+1); // i = index; - }else if( hasBody && hasSeq(xmlData, "!NOTATION",i)) { + } else if (hasBody && hasSeq(xmlData, "!NOTATION", i)) { i += 9;//Not supported - const {index} = this.readNotationExp(xmlData,i+1,this.suppressValidationErr); + const { index } = this.readNotationExp(xmlData, i + 1, this.suppressValidationErr); i = index; - }else if( hasSeq(xmlData, "!--",i) ) comment = true; + } else if (hasSeq(xmlData, "!--", i)) comment = true; else throw new Error(`Invalid DOCTYPE`); angleBracketsCount++; exp = ""; } else if (xmlData[i] === '>') { //Read tag content - if(comment){ - if( xmlData[i - 1] === "-" && xmlData[i - 2] === "-"){ + if (comment) { + if (xmlData[i - 1] === "-" && xmlData[i - 2] === "-") { comment = false; angleBracketsCount--; } - }else{ + } else { angleBracketsCount--; } if (angleBracketsCount === 0) { - break; + break; } - }else if( xmlData[i] === '['){ + } else if (xmlData[i] === '[') { hasBody = true; - }else{ + } else { exp += xmlData[i]; } } - if(angleBracketsCount !== 0){ + if (angleBracketsCount !== 0) { throw new Error(`Unclosed DOCTYPE`); } - }else{ + } else { throw new Error(`Invalid Tag instead of DOCTYPE`); } - return {entities, i}; + return { entities, i }; } - readEntityExp(xmlData, i) { + readEntityExp(xmlData, i) { //External entities are not supported // <!ENTITY ext SYSTEM "http://normal-website.com" > @@ -101,19 +101,29 @@ export default class DocTypeReader{ i = skipWhitespace(xmlData, i); // Check for unsupported constructs (external entities or parameter entities) - if(!this.suppressValidationErr){ + if (!this.suppressValidationErr) { if (xmlData.substring(i, i + 6).toUpperCase() === "SYSTEM") { throw new Error("External entities are not supported"); - }else if (xmlData[i] === "%") { + } else if (xmlData[i] === "%") { throw new Error("Parameter entities are not supported"); } } // Read entity value (internal entity) let entityValue = ""; [i, entityValue] = this.readIdentifierVal(xmlData, i, "entity"); + + // Validate entity size + if (this.options.enabled !== false && + this.options.maxEntitySize && + entityValue.length > this.options.maxEntitySize) { + throw new Error( + `Entity "${entityName}" size (${entityValue.length}) exceeds maximum allowed size (${this.options.maxEntitySize})` + ); + } + i--; - return [entityName, entityValue, i ]; + return [entityName, entityValue, i]; } readNotationExp(xmlData, i) { @@ -146,25 +156,25 @@ export default class DocTypeReader{ let systemIdentifier = null; if (identifierType === "PUBLIC") { - [i, publicIdentifier ] = this.readIdentifierVal(xmlData, i, "publicIdentifier"); + [i, publicIdentifier] = this.readIdentifierVal(xmlData, i, "publicIdentifier"); // Skip whitespace after public identifier i = skipWhitespace(xmlData, i); // Optionally read system identifier if (xmlData[i] === '"' || xmlData[i] === "'") { - [i, systemIdentifier ] = this.readIdentifierVal(xmlData, i,"systemIdentifier"); + [i, systemIdentifier] = this.readIdentifierVal(xmlData, i, "systemIdentifier"); } } else if (identifierType === "SYSTEM") { // Read system identifier (mandatory for SYSTEM) - [i, systemIdentifier ] = this.readIdentifierVal(xmlData, i, "systemIdentifier"); + [i, systemIdentifier] = this.readIdentifierVal(xmlData, i, "systemIdentifier"); if (!this.suppressValidationErr && !systemIdentifier) { throw new Error("Missing mandatory system identifier for SYSTEM notation"); } } - - return {notationName, publicIdentifier, systemIdentifier, index: --i}; + + return { notationName, publicIdentifier, systemIdentifier, index: --i }; } readIdentifierVal(xmlData, i, type) { @@ -193,7 +203,7 @@ export default class DocTypeReader{ // <!ELEMENT title (#PCDATA)> // <!ELEMENT book (title, author+)> // <!ELEMENT name (content-model)> - + // Skip leading whitespace after <!ELEMENT i = skipWhitespace(xmlData, i); @@ -213,8 +223,8 @@ export default class DocTypeReader{ i = skipWhitespace(xmlData, i); let contentModel = ""; // Expect '(' to start content model - if(xmlData[i] === "E" && hasSeq(xmlData, "MPTY",i)) i+=4; - else if(xmlData[i] === "A" && hasSeq(xmlData, "NY",i)) i+=2; + if (xmlData[i] === "E" && hasSeq(xmlData, "MPTY", i)) i += 4; + else if (xmlData[i] === "A" && hasSeq(xmlData, "NY", i)) i += 2; else if (xmlData[i] === "(") { i++; // Move past '(' @@ -227,10 +237,10 @@ export default class DocTypeReader{ throw new Error("Unterminated content model"); } - }else if(!this.suppressValidationErr){ + } else if (!this.suppressValidationErr) { throw new Error(`Invalid Element Expression, found "${xmlData[i]}"`); } - + return { elementName, contentModel: contentModel.trim(), @@ -366,16 +376,16 @@ const skipWhitespace = (data, index) => { -function hasSeq(data, seq,i){ - for(let j=0;j<seq.length;j++){ - if(seq[j]!==data[i+j+1]) return false; +function hasSeq(data, seq, i) { + for (let j = 0; j < seq.length; j++) { + if (seq[j] !== data[i + j + 1]) return false; } return true; } -function validateEntityName(name){ +function validateEntityName(name) { if (isName(name)) - return name; + return name; else throw new Error(`Invalid entity name ${name}`); -} +} \ No newline at end of file
src/xmlparser/OptionsBuilder.js+84 −44 modified@@ -1,46 +1,86 @@ - export const defaultOptions = { - preserveOrder: false, - attributeNamePrefix: '@_', - attributesGroupName: false, - textNodeName: '#text', - ignoreAttributes: true, - removeNSPrefix: false, // remove NS from tag name or attribute name if true - allowBooleanAttributes: false, //a tag can have attributes without any value - //ignoreRootElement : false, - parseTagValue: true, - parseAttributeValue: false, - trimValues: true, //Trim string values of tag and attributes - cdataPropName: false, - numberParseOptions: { - hex: true, - leadingZeros: true, - eNotation: true - }, - tagValueProcessor: function(tagName, val) { - return val; - }, - attributeValueProcessor: function(attrName, val) { - return val; - }, - stopNodes: [], //nested tags will not be parsed even for errors - alwaysCreateTextNode: false, - isArray: () => false, - commentPropName: false, - unpairedTags: [], - processEntities: true, - htmlEntities: false, - ignoreDeclaration: false, - ignorePiTags: false, - transformTagName: false, - transformAttributeName: false, - updateTag: function(tagName, jPath, attrs){ - return tagName - }, - // skipEmptyListItem: false - captureMetaData: false, -}; - -export const buildOptions = function(options) { - return Object.assign({}, defaultOptions, options); + preserveOrder: false, + attributeNamePrefix: '@_', + attributesGroupName: false, + textNodeName: '#text', + ignoreAttributes: true, + removeNSPrefix: false, // remove NS from tag name or attribute name if true + allowBooleanAttributes: false, //a tag can have attributes without any value + //ignoreRootElement : false, + parseTagValue: true, + parseAttributeValue: false, + trimValues: true, //Trim string values of tag and attributes + cdataPropName: false, + numberParseOptions: { + hex: true, + leadingZeros: true, + eNotation: true + }, + tagValueProcessor: function (tagName, val) { + return val; + }, + attributeValueProcessor: function (attrName, val) { + return val; + }, + stopNodes: [], //nested tags will not be parsed even for errors + alwaysCreateTextNode: false, + isArray: () => false, + commentPropName: false, + unpairedTags: [], + processEntities: true, + htmlEntities: false, + ignoreDeclaration: false, + ignorePiTags: false, + transformTagName: false, + transformAttributeName: false, + updateTag: function (tagName, jPath, attrs) { + return tagName + }, + // skipEmptyListItem: false + captureMetaData: false, }; + +/** + * Normalizes processEntities option for backward compatibility + * @param {boolean|object} value + * @returns {object} Always returns normalized object + */ +function normalizeProcessEntities(value) { + // Boolean backward compatibility + if (typeof value === 'boolean') { + return { + enabled: value, // true or false + maxEntitySize: 10000, + maxExpansionDepth: 10, + maxTotalExpansions: 1000, + maxExpandedLength: 100000, + allowedTags: null, + tagFilter: null + }; + } + + // Object config - merge with defaults + if (typeof value === 'object' && value !== null) { + return { + enabled: value.enabled !== false, // default true if not specified + maxEntitySize: value.maxEntitySize ?? 10000, + maxExpansionDepth: value.maxExpansionDepth ?? 10, + maxTotalExpansions: value.maxTotalExpansions ?? 1000, + maxExpandedLength: value.maxExpandedLength ?? 100000, + allowedTags: value.allowedTags ?? null, + tagFilter: value.tagFilter ?? null + }; + } + + // Default to enabled with limits + return normalizeProcessEntities(true); +} + +export const buildOptions = function (options) { + const built = Object.assign({}, defaultOptions, options); + + // Always normalize processEntities for backward compatibility and validation + built.processEntities = normalizeProcessEntities(built.processEntities); + //console.debug(built.processEntities) + return built; +}; \ No newline at end of file
src/xmlparser/OrderedObjParser.js+254 −187 modified@@ -1,7 +1,7 @@ 'use strict'; ///@ts-check -import {getAllMatches, isExist} from '../util.js'; +import { getAllMatches, isExist } from '../util.js'; import xmlNode from './xmlNode.js'; import DocTypeReader from './DocTypeReader.js'; import toNumber from "strnum"; @@ -14,35 +14,35 @@ import getIgnoreAttributesFn from "../ignoreAttributes.js"; //const tagsRegx = new RegExp("<(\\/?[\\w:\\-\._]+)([^>]*)>(\\s*"+cdataRegx+")*([^<]+)?","g"); //const tagsRegx = new RegExp("<(\\/?)((\\w*:)?([\\w:\\-\._]+))([^>]*)>([^<]*)("+cdataRegx+"([^<]*))*([^<]+)?","g"); -export default class OrderedObjParser{ - constructor(options){ +export default class OrderedObjParser { + constructor(options) { this.options = options; this.currentNode = null; this.tagsNodeStack = []; this.docTypeEntities = {}; this.lastEntities = { - "apos" : { regex: /&(apos|#39|#x27);/g, val : "'"}, - "gt" : { regex: /&(gt|#62|#x3E);/g, val : ">"}, - "lt" : { regex: /&(lt|#60|#x3C);/g, val : "<"}, - "quot" : { regex: /&(quot|#34|#x22);/g, val : "\""}, + "apos": { regex: /&(apos|#39|#x27);/g, val: "'" }, + "gt": { regex: /&(gt|#62|#x3E);/g, val: ">" }, + "lt": { regex: /&(lt|#60|#x3C);/g, val: "<" }, + "quot": { regex: /&(quot|#34|#x22);/g, val: "\"" }, }; - this.ampEntity = { regex: /&(amp|#38|#x26);/g, val : "&"}; + this.ampEntity = { regex: /&(amp|#38|#x26);/g, val: "&" }; this.htmlEntities = { "space": { regex: /&(nbsp|#160);/g, val: " " }, // "lt" : { regex: /&(lt|#60);/g, val: "<" }, // "gt" : { regex: /&(gt|#62);/g, val: ">" }, // "amp" : { regex: /&(amp|#38);/g, val: "&" }, // "quot" : { regex: /&(quot|#34);/g, val: "\"" }, // "apos" : { regex: /&(apos|#39);/g, val: "'" }, - "cent" : { regex: /&(cent|#162);/g, val: "¢" }, - "pound" : { regex: /&(pound|#163);/g, val: "£" }, - "yen" : { regex: /&(yen|#165);/g, val: "¥" }, - "euro" : { regex: /&(euro|#8364);/g, val: "€" }, - "copyright" : { regex: /&(copy|#169);/g, val: "©" }, - "reg" : { regex: /&(reg|#174);/g, val: "®" }, - "inr" : { regex: /&(inr|#8377);/g, val: "₹" }, - "num_dec": { regex: /&#([0-9]{1,7});/g, val : (_, str) => fromCodePoint(str, 10, "&#") }, - "num_hex": { regex: /&#x([0-9a-fA-F]{1,6});/g, val : (_, str) => fromCodePoint(str, 16, "&#x") }, + "cent": { regex: /&(cent|#162);/g, val: "¢" }, + "pound": { regex: /&(pound|#163);/g, val: "£" }, + "yen": { regex: /&(yen|#165);/g, val: "¥" }, + "euro": { regex: /&(euro|#8364);/g, val: "€" }, + "copyright": { regex: /&(copy|#169);/g, val: "©" }, + "reg": { regex: /&(reg|#174);/g, val: "®" }, + "inr": { regex: /&(inr|#8377);/g, val: "₹" }, + "num_dec": { regex: /&#([0-9]{1,7});/g, val: (_, str) => fromCodePoint(str, 10, "&#") }, + "num_hex": { regex: /&#x([0-9a-fA-F]{1,6});/g, val: (_, str) => fromCodePoint(str, 16, "&#x") }, }; this.addExternalEntities = addExternalEntities; this.parseXml = parseXml; @@ -55,16 +55,18 @@ export default class OrderedObjParser{ this.saveTextToParentTag = saveTextToParentTag; this.addChild = addChild; this.ignoreAttributesFn = getIgnoreAttributesFn(this.options.ignoreAttributes) + this.entityExpansionCount = 0; + this.currentExpandedLength = 0; - if(this.options.stopNodes && this.options.stopNodes.length > 0){ + if (this.options.stopNodes && this.options.stopNodes.length > 0) { this.stopNodesExact = new Set(); this.stopNodesWildcard = new Set(); - for(let i = 0; i < this.options.stopNodes.length; i++){ + for (let i = 0; i < this.options.stopNodes.length; i++) { const stopNodeExp = this.options.stopNodes[i]; - if(typeof stopNodeExp !== 'string') continue; - if(stopNodeExp.startsWith("*.")){ + if (typeof stopNodeExp !== 'string') continue; + if (stopNodeExp.startsWith("*.")) { this.stopNodesWildcard.add(stopNodeExp.substring(2)); - }else{ + } else { this.stopNodesExact.add(stopNodeExp); } } @@ -73,14 +75,14 @@ export default class OrderedObjParser{ } -function addExternalEntities(externalEntities){ +function addExternalEntities(externalEntities) { const entKeys = Object.keys(externalEntities); for (let i = 0; i < entKeys.length; i++) { const ent = entKeys[i]; const escaped = ent.replace(/[.\-+*:]/g, '\\.'); this.lastEntities[ent] = { - regex: new RegExp("&"+escaped+";","g"), - val : externalEntities[ent] + regex: new RegExp("&" + escaped + ";", "g"), + val: externalEntities[ent] } } } @@ -99,23 +101,23 @@ function parseTextData(val, tagName, jPath, dontTrim, hasAttributes, isLeafNode, if (this.options.trimValues && !dontTrim) { val = val.trim(); } - if(val.length > 0){ - if(!escapeEntities) val = this.replaceEntitiesValue(val); - + if (val.length > 0) { + if (!escapeEntities) val = this.replaceEntitiesValue(val, tagName, jPath); + const newval = this.options.tagValueProcessor(tagName, val, jPath, hasAttributes, isLeafNode); - if(newval === null || newval === undefined){ + if (newval === null || newval === undefined) { //don't parse return val; - }else if(typeof newval !== typeof val || newval !== val){ + } else if (typeof newval !== typeof val || newval !== val) { //overwrite return newval; - }else if(this.options.trimValues){ + } else if (this.options.trimValues) { return parseValue(val, this.options.parseTagValue, this.options.numberParseOptions); - }else{ + } else { const trimmedVal = val.trim(); - if(trimmedVal === val){ + if (trimmedVal === val) { return parseValue(val, this.options.parseTagValue, this.options.numberParseOptions); - }else{ + } else { return val; } } @@ -141,7 +143,7 @@ function resolveNameSpace(tagname) { //const attrsRegx = new RegExp("([\\w\\-\\.\\:]+)\\s*=\\s*(['\"])((.|\n)*?)\\2","gm"); const attrsRegx = new RegExp('([^\\s=]+)\\s*(=\\s*([\'"])([\\s\\S]*?)\\3)?', 'gm'); -function buildAttributesMap(attrStr, jPath) { +function buildAttributesMap(attrStr, jPath, tagName) { if (this.options.ignoreAttributes !== true && typeof attrStr === 'string') { // attrStr = attrStr.replace(/\r?\n/g, ' '); //attrStr = attrStr || attrStr.trim(); @@ -160,20 +162,20 @@ function buildAttributesMap(attrStr, jPath) { if (this.options.transformAttributeName) { aName = this.options.transformAttributeName(aName); } - if(aName === "__proto__") aName = "#__proto__"; + if (aName === "__proto__") aName = "#__proto__"; if (oldVal !== undefined) { if (this.options.trimValues) { oldVal = oldVal.trim(); } - oldVal = this.replaceEntitiesValue(oldVal); + oldVal = this.replaceEntitiesValue(oldVal, tagName, jPath); const newVal = this.options.attributeValueProcessor(attrName, oldVal, jPath); - if(newVal === null || newVal === undefined){ + if (newVal === null || newVal === undefined) { //don't parse attrs[aName] = oldVal; - }else if(typeof newVal !== typeof oldVal || newVal !== oldVal){ + } else if (typeof newVal !== typeof oldVal || newVal !== oldVal) { //overwrite attrs[aName] = newVal; - }else{ + } else { //parse attrs[aName] = parseValue( oldVal, @@ -198,109 +200,114 @@ function buildAttributesMap(attrStr, jPath) { } } -const parseXml = function(xmlData) { +const parseXml = function (xmlData) { xmlData = xmlData.replace(/\r\n?/g, "\n"); //TODO: remove this line const xmlObj = new xmlNode('!xml'); let currentNode = xmlObj; let textData = ""; let jPath = ""; + + // Reset entity expansion counters for this document + this.entityExpansionCount = 0; + this.currentExpandedLength = 0; + const docTypeReader = new DocTypeReader(this.options.processEntities); - for(let i=0; i< xmlData.length; i++){//for each char in XML data + for (let i = 0; i < xmlData.length; i++) {//for each char in XML data const ch = xmlData[i]; - if(ch === '<'){ + if (ch === '<') { // const nextIndex = i+1; // const _2ndChar = xmlData[nextIndex]; - if( xmlData[i+1] === '/') {//Closing Tag + if (xmlData[i + 1] === '/') {//Closing Tag const closeIndex = findClosingIndex(xmlData, ">", i, "Closing Tag is not closed.") - let tagName = xmlData.substring(i+2,closeIndex).trim(); + let tagName = xmlData.substring(i + 2, closeIndex).trim(); - if(this.options.removeNSPrefix){ + if (this.options.removeNSPrefix) { const colonIndex = tagName.indexOf(":"); - if(colonIndex !== -1){ - tagName = tagName.substr(colonIndex+1); + if (colonIndex !== -1) { + tagName = tagName.substr(colonIndex + 1); } } - if(this.options.transformTagName) { + if (this.options.transformTagName) { tagName = this.options.transformTagName(tagName); } - if(currentNode){ + if (currentNode) { textData = this.saveTextToParentTag(textData, currentNode, jPath); } //check if last tag of nested tag was unpaired tag - const lastTagName = jPath.substring(jPath.lastIndexOf(".")+1); - if(tagName && this.options.unpairedTags.indexOf(tagName) !== -1 ){ + const lastTagName = jPath.substring(jPath.lastIndexOf(".") + 1); + if (tagName && this.options.unpairedTags.indexOf(tagName) !== -1) { throw new Error(`Unpaired tag can not be used as closing tag: </${tagName}>`); } let propIndex = 0 - if(lastTagName && this.options.unpairedTags.indexOf(lastTagName) !== -1 ){ - propIndex = jPath.lastIndexOf('.', jPath.lastIndexOf('.')-1) + if (lastTagName && this.options.unpairedTags.indexOf(lastTagName) !== -1) { + propIndex = jPath.lastIndexOf('.', jPath.lastIndexOf('.') - 1) this.tagsNodeStack.pop(); - }else{ + } else { propIndex = jPath.lastIndexOf("."); } jPath = jPath.substring(0, propIndex); currentNode = this.tagsNodeStack.pop();//avoid recursion, set the parent tag scope textData = ""; i = closeIndex; - } else if( xmlData[i+1] === '?') { + } else if (xmlData[i + 1] === '?') { - let tagData = readTagExp(xmlData,i, false, "?>"); - if(!tagData) throw new Error("Pi Tag is not closed."); + let tagData = readTagExp(xmlData, i, false, "?>"); + if (!tagData) throw new Error("Pi Tag is not closed."); textData = this.saveTextToParentTag(textData, currentNode, jPath); - if( (this.options.ignoreDeclaration && tagData.tagName === "?xml") || this.options.ignorePiTags){ + if ((this.options.ignoreDeclaration && tagData.tagName === "?xml") || this.options.ignorePiTags) { //do nothing - }else{ - + } else { + const childNode = new xmlNode(tagData.tagName); childNode.add(this.options.textNodeName, ""); - - if(tagData.tagName !== tagData.tagExp && tagData.attrExpPresent){ - childNode[":@"] = this.buildAttributesMap(tagData.tagExp, jPath); + + if (tagData.tagName !== tagData.tagExp && tagData.attrExpPresent) { + childNode[":@"] = this.buildAttributesMap(tagData.tagExp, jPath, tagData.tagName); } this.addChild(currentNode, childNode, jPath, i); } i = tagData.closeIndex + 1; - } else if(xmlData.substr(i + 1, 3) === '!--') { - const endIndex = findClosingIndex(xmlData, "-->", i+4, "Comment is not closed.") - if(this.options.commentPropName){ + } else if (xmlData.substr(i + 1, 3) === '!--') { + const endIndex = findClosingIndex(xmlData, "-->", i + 4, "Comment is not closed.") + if (this.options.commentPropName) { const comment = xmlData.substring(i + 4, endIndex - 2); textData = this.saveTextToParentTag(textData, currentNode, jPath); - currentNode.add(this.options.commentPropName, [ { [this.options.textNodeName] : comment } ]); + currentNode.add(this.options.commentPropName, [{ [this.options.textNodeName]: comment }]); } i = endIndex; - } else if( xmlData.substr(i + 1, 2) === '!D') { + } else if (xmlData.substr(i + 1, 2) === '!D') { const result = docTypeReader.readDocType(xmlData, i); this.docTypeEntities = result.entities; i = result.i; - }else if(xmlData.substr(i + 1, 2) === '![') { + } else if (xmlData.substr(i + 1, 2) === '![') { const closeIndex = findClosingIndex(xmlData, "]]>", i, "CDATA is not closed.") - 2; - const tagExp = xmlData.substring(i + 9,closeIndex); + const tagExp = xmlData.substring(i + 9, closeIndex); textData = this.saveTextToParentTag(textData, currentNode, jPath); let val = this.parseTextData(tagExp, currentNode.tagname, jPath, true, false, true, true); - if(val == undefined) val = ""; + if (val == undefined) val = ""; //cdata should be set even if it is 0 length string - if(this.options.cdataPropName){ - currentNode.add(this.options.cdataPropName, [ { [this.options.textNodeName] : tagExp } ]); - }else{ + if (this.options.cdataPropName) { + currentNode.add(this.options.cdataPropName, [{ [this.options.textNodeName]: tagExp }]); + } else { currentNode.add(this.options.textNodeName, val); } - + i = closeIndex + 2; - }else {//Opening tag - let result = readTagExp(xmlData,i, this.options.removeNSPrefix); - let tagName= result.tagName; + } else {//Opening tag + let result = readTagExp(xmlData, i, this.options.removeNSPrefix); + let tagName = result.tagName; const rawTagName = result.rawTagName; let tagExp = result.tagExp; let attrExpPresent = result.attrExpPresent; @@ -309,104 +316,103 @@ const parseXml = function(xmlData) { if (this.options.transformTagName) { //console.log(tagExp, tagName) const newTagName = this.options.transformTagName(tagName); - if(tagExp === tagName) { + if (tagExp === tagName) { tagExp = newTagName } tagName = newTagName; } - + //save text as child node if (currentNode && textData) { - if(currentNode.tagname !== '!xml'){ + if (currentNode.tagname !== '!xml') { //when nested tag is found textData = this.saveTextToParentTag(textData, currentNode, jPath, false); } } //check if last tag was unpaired tag const lastTag = currentNode; - if(lastTag && this.options.unpairedTags.indexOf(lastTag.tagname) !== -1 ){ + if (lastTag && this.options.unpairedTags.indexOf(lastTag.tagname) !== -1) { currentNode = this.tagsNodeStack.pop(); jPath = jPath.substring(0, jPath.lastIndexOf(".")); } - if(tagName !== xmlObj.tagname){ + if (tagName !== xmlObj.tagname) { jPath += jPath ? "." + tagName : tagName; } const startIndex = i; if (this.isItStopNode(this.stopNodesExact, this.stopNodesWildcard, jPath, tagName)) { let tagContent = ""; //self-closing tag - if(tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1){ - if(tagName[tagName.length - 1] === "/"){ //remove trailing '/' + if (tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1) { + if (tagName[tagName.length - 1] === "/") { //remove trailing '/' tagName = tagName.substr(0, tagName.length - 1); jPath = jPath.substr(0, jPath.length - 1); tagExp = tagName; - }else{ + } else { tagExp = tagExp.substr(0, tagExp.length - 1); } i = result.closeIndex; } //unpaired tag - else if(this.options.unpairedTags.indexOf(tagName) !== -1){ - + else if (this.options.unpairedTags.indexOf(tagName) !== -1) { + i = result.closeIndex; } //normal tag - else{ + else { //read until closing tag is found const result = this.readStopNodeData(xmlData, rawTagName, closeIndex + 1); - if(!result) throw new Error(`Unexpected end of ${rawTagName}`); + if (!result) throw new Error(`Unexpected end of ${rawTagName}`); i = result.i; tagContent = result.tagContent; } const childNode = new xmlNode(tagName); - if(tagName !== tagExp && attrExpPresent){ - childNode[":@"] = this.buildAttributesMap(tagExp, jPath - ); + if (tagName !== tagExp && attrExpPresent) { + childNode[":@"] = this.buildAttributesMap(tagExp, jPath, tagName); } - if(tagContent) { + if (tagContent) { tagContent = this.parseTextData(tagContent, tagName, jPath, true, attrExpPresent, true, true); } - + jPath = jPath.substr(0, jPath.lastIndexOf(".")); childNode.add(this.options.textNodeName, tagContent); - + this.addChild(currentNode, childNode, jPath, startIndex); - }else{ - //selfClosing tag - if(tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1){ - if(tagName[tagName.length - 1] === "/"){ //remove trailing '/' + } else { + //selfClosing tag + if (tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1) { + if (tagName[tagName.length - 1] === "/") { //remove trailing '/' tagName = tagName.substr(0, tagName.length - 1); jPath = jPath.substr(0, jPath.length - 1); tagExp = tagName; - }else{ + } else { tagExp = tagExp.substr(0, tagExp.length - 1); } - - if(this.options.transformTagName) { + + if (this.options.transformTagName) { const newTagName = this.options.transformTagName(tagName); - if(tagExp === tagName) { + if (tagExp === tagName) { tagExp = newTagName } tagName = newTagName; } const childNode = new xmlNode(tagName); - if(tagName !== tagExp && attrExpPresent){ - childNode[":@"] = this.buildAttributesMap(tagExp, jPath); + if (tagName !== tagExp && attrExpPresent) { + childNode[":@"] = this.buildAttributesMap(tagExp, jPath, tagName); } this.addChild(currentNode, childNode, jPath, startIndex); jPath = jPath.substr(0, jPath.lastIndexOf(".")); } - //opening tag - else{ - const childNode = new xmlNode( tagName); + //opening tag + else { + const childNode = new xmlNode(tagName); this.tagsNodeStack.push(currentNode); - - if(tagName !== tagExp && attrExpPresent){ - childNode[":@"] = this.buildAttributesMap(tagExp, jPath); + + if (tagName !== tagExp && attrExpPresent) { + childNode[":@"] = this.buildAttributesMap(tagExp, jPath, tagName); } this.addChild(currentNode, childNode, jPath, startIndex); currentNode = childNode; @@ -415,52 +421,113 @@ const parseXml = function(xmlData) { i = closeIndex; } } - }else{ + } else { textData += xmlData[i]; } } return xmlObj.child; } -function addChild(currentNode, childNode, jPath, startIndex){ +function addChild(currentNode, childNode, jPath, startIndex) { // unset startIndex if not requested if (!this.options.captureMetaData) startIndex = undefined; const result = this.options.updateTag(childNode.tagname, jPath, childNode[":@"]) - if(result === false){ + if (result === false) { //do nothing - } else if(typeof result === "string"){ + } else if (typeof result === "string") { childNode.tagname = result currentNode.addChild(childNode, startIndex); - }else{ + } else { currentNode.addChild(childNode, startIndex); } } -const replaceEntitiesValue = function(val){ +const replaceEntitiesValue = function (val, tagName, jPath) { + // Performance optimization: Early return if no entities to replace + if (val.indexOf('&') === -1) { + return val; + } + + const entityConfig = this.options.processEntities; - if(this.options.processEntities){ - for(let entityName in this.docTypeEntities){ - const entity = this.docTypeEntities[entityName]; - val = val.replace( entity.regx, entity.val); + if (!entityConfig.enabled) { + return val; + } + + // Check tag-specific filtering + if (entityConfig.allowedTags) { + if (!entityConfig.allowedTags.includes(tagName)) { + return val; // Skip entity replacement for current tag as not set } - for(let entityName in this.lastEntities){ - const entity = this.lastEntities[entityName]; - val = val.replace( entity.regex, entity.val); + } + + if (entityConfig.tagFilter) { + if (!entityConfig.tagFilter(tagName, jPath)) { + return val; // Skip based on custom filter } - if(this.options.htmlEntities){ - for(let entityName in this.htmlEntities){ - const entity = this.htmlEntities[entityName]; - val = val.replace( entity.regex, entity.val); + } + + // Replace DOCTYPE entities + for (let entityName in this.docTypeEntities) { + const entity = this.docTypeEntities[entityName]; + const matches = val.match(entity.regx); + + if (matches) { + // Track expansions + this.entityExpansionCount += matches.length; + + // Check expansion limit + if (entityConfig.maxTotalExpansions && + this.entityExpansionCount > entityConfig.maxTotalExpansions) { + throw new Error( + `Entity expansion limit exceeded: ${this.entityExpansionCount} > ${entityConfig.maxTotalExpansions}` + ); + } + + // Store length before replacement + const lengthBefore = val.length; + val = val.replace(entity.regx, entity.val); + + // Check expanded length immediately after replacement + if (entityConfig.maxExpandedLength) { + this.currentExpandedLength += (val.length - lengthBefore); + + if (this.currentExpandedLength > entityConfig.maxExpandedLength) { + throw new Error( + `Total expanded content size exceeded: ${this.currentExpandedLength} > ${entityConfig.maxExpandedLength}` + ); + } } } - val = val.replace( this.ampEntity.regex, this.ampEntity.val); } + if (val.indexOf('&') === -1) return val; // Early exit + + // Replace standard entities + for (let entityName in this.lastEntities) { + const entity = this.lastEntities[entityName]; + val = val.replace(entity.regex, entity.val); + } + if (val.indexOf('&') === -1) return val; // Early exit + + // Replace HTML entities if enabled + if (this.options.htmlEntities) { + for (let entityName in this.htmlEntities) { + const entity = this.htmlEntities[entityName]; + val = val.replace(entity.regex, entity.val); + } + } + + // Replace ampersand entity last + val = val.replace(this.ampEntity.regex, this.ampEntity.val); + return val; } + + function saveTextToParentTag(textData, currentNode, jPath, isLeafNode) { if (textData) { //store previously collected data as textNode - if(isLeafNode === undefined) isLeafNode = currentNode.child.length === 0 - + if (isLeafNode === undefined) isLeafNode = currentNode.child.length === 0 + textData = this.parseTextData(textData, currentNode.tagname, jPath, @@ -482,9 +549,9 @@ function saveTextToParentTag(textData, currentNode, jPath, isLeafNode) { * @param {string} jPath * @param {string} currentTagName */ -function isItStopNode(stopNodesExact, stopNodesWildcard, jPath, currentTagName){ - if(stopNodesWildcard && stopNodesWildcard.has(currentTagName)) return true; - if(stopNodesExact && stopNodesExact.has(jPath)) return true; +function isItStopNode(stopNodesExact, stopNodesWildcard, jPath, currentTagName) { + if (stopNodesWildcard && stopNodesWildcard.has(currentTagName)) return true; + if (stopNodesExact && stopNodesExact.has(jPath)) return true; return false; } @@ -494,24 +561,24 @@ function isItStopNode(stopNodesExact, stopNodesWildcard, jPath, currentTagName){ * @param {number} i starting index * @returns */ -function tagExpWithClosingIndex(xmlData, i, closingChar = ">"){ +function tagExpWithClosingIndex(xmlData, i, closingChar = ">") { let attrBoundary; let tagExp = ""; for (let index = i; index < xmlData.length; index++) { let ch = xmlData[index]; if (attrBoundary) { - if (ch === attrBoundary) attrBoundary = "";//reset + if (ch === attrBoundary) attrBoundary = "";//reset } else if (ch === '"' || ch === "'") { - attrBoundary = ch; + attrBoundary = ch; } else if (ch === closingChar[0]) { - if(closingChar[1]){ - if(xmlData[index + 1] === closingChar[1]){ + if (closingChar[1]) { + if (xmlData[index + 1] === closingChar[1]) { return { data: tagExp, index: index } } - }else{ + } else { return { data: tagExp, index: index @@ -524,33 +591,33 @@ function tagExpWithClosingIndex(xmlData, i, closingChar = ">"){ } } -function findClosingIndex(xmlData, str, i, errMsg){ +function findClosingIndex(xmlData, str, i, errMsg) { const closingIndex = xmlData.indexOf(str, i); - if(closingIndex === -1){ + if (closingIndex === -1) { throw new Error(errMsg) - }else{ + } else { return closingIndex + str.length - 1; } } -function readTagExp(xmlData,i, removeNSPrefix, closingChar = ">"){ - const result = tagExpWithClosingIndex(xmlData, i+1, closingChar); - if(!result) return; +function readTagExp(xmlData, i, removeNSPrefix, closingChar = ">") { + const result = tagExpWithClosingIndex(xmlData, i + 1, closingChar); + if (!result) return; let tagExp = result.data; const closeIndex = result.index; const separatorIndex = tagExp.search(/\s/); let tagName = tagExp; let attrExpPresent = true; - if(separatorIndex !== -1){//separate tag name and attributes expression + if (separatorIndex !== -1) {//separate tag name and attributes expression tagName = tagExp.substring(0, separatorIndex); tagExp = tagExp.substring(separatorIndex + 1).trimStart(); } const rawTagName = tagName; - if(removeNSPrefix){ + if (removeNSPrefix) { const colonIndex = tagName.indexOf(":"); - if(colonIndex !== -1){ - tagName = tagName.substr(colonIndex+1); + if (colonIndex !== -1) { + tagName = tagName.substr(colonIndex + 1); attrExpPresent = tagName !== result.data.substr(colonIndex + 1); } } @@ -569,56 +636,56 @@ function readTagExp(xmlData,i, removeNSPrefix, closingChar = ">"){ * @param {string} tagName * @param {number} i */ -function readStopNodeData(xmlData, tagName, i){ +function readStopNodeData(xmlData, tagName, i) { const startIndex = i; // Starting at 1 since we already have an open tag let openTagCount = 1; for (; i < xmlData.length; i++) { - if( xmlData[i] === "<"){ - if (xmlData[i+1] === "/") {//close tag - const closeIndex = findClosingIndex(xmlData, ">", i, `${tagName} is not closed`); - let closeTagName = xmlData.substring(i+2,closeIndex).trim(); - if(closeTagName === tagName){ - openTagCount--; - if (openTagCount === 0) { - return { - tagContent: xmlData.substring(startIndex, i), - i : closeIndex - } + if (xmlData[i] === "<") { + if (xmlData[i + 1] === "/") {//close tag + const closeIndex = findClosingIndex(xmlData, ">", i, `${tagName} is not closed`); + let closeTagName = xmlData.substring(i + 2, closeIndex).trim(); + if (closeTagName === tagName) { + openTagCount--; + if (openTagCount === 0) { + return { + tagContent: xmlData.substring(startIndex, i), + i: closeIndex } } - i=closeIndex; - } else if(xmlData[i+1] === '?') { - const closeIndex = findClosingIndex(xmlData, "?>", i+1, "StopNode is not closed.") - i=closeIndex; - } else if(xmlData.substr(i + 1, 3) === '!--') { - const closeIndex = findClosingIndex(xmlData, "-->", i+3, "StopNode is not closed.") - i=closeIndex; - } else if(xmlData.substr(i + 1, 2) === '![') { - const closeIndex = findClosingIndex(xmlData, "]]>", i, "StopNode is not closed.") - 2; - i=closeIndex; - } else { - const tagData = readTagExp(xmlData, i, '>') + } + i = closeIndex; + } else if (xmlData[i + 1] === '?') { + const closeIndex = findClosingIndex(xmlData, "?>", i + 1, "StopNode is not closed.") + i = closeIndex; + } else if (xmlData.substr(i + 1, 3) === '!--') { + const closeIndex = findClosingIndex(xmlData, "-->", i + 3, "StopNode is not closed.") + i = closeIndex; + } else if (xmlData.substr(i + 1, 2) === '![') { + const closeIndex = findClosingIndex(xmlData, "]]>", i, "StopNode is not closed.") - 2; + i = closeIndex; + } else { + const tagData = readTagExp(xmlData, i, '>') - if (tagData) { - const openTagName = tagData && tagData.tagName; - if (openTagName === tagName && tagData.tagExp[tagData.tagExp.length-1] !== "/") { - openTagCount++; - } - i=tagData.closeIndex; + if (tagData) { + const openTagName = tagData && tagData.tagName; + if (openTagName === tagName && tagData.tagExp[tagData.tagExp.length - 1] !== "/") { + openTagCount++; } + i = tagData.closeIndex; } } + } }//end for loop } function parseValue(val, shouldParse, options) { if (shouldParse && typeof val === 'string') { //console.log(options) const newval = val.trim(); - if(newval === 'true' ) return true; - else if(newval === 'false' ) return false; + if (newval === 'true') return true; + else if (newval === 'false') return false; else return toNumber(val, options); } else { if (isExist(val)) { @@ -629,12 +696,12 @@ function parseValue(val, shouldParse, options) { } } -function fromCodePoint(str, base, prefix){ +function fromCodePoint(str, base, prefix) { const codePoint = Number.parseInt(str, base); if (codePoint >= 0 && codePoint <= 0x10FFFF) { - return String.fromCodePoint(codePoint); + return String.fromCodePoint(codePoint); } else { - return prefix +str + ";"; + return prefix + str + ";"; } } \ No newline at end of file
Vulnerability mechanics
Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.
References
5- github.com/advisories/GHSA-jmr7-xgp7-cmfjghsaADVISORY
- nvd.nist.gov/vuln/detail/CVE-2026-26278ghsaADVISORY
- github.com/NaturalIntelligence/fast-xml-parser/commit/910dae5be2de2955e968558fadf6e8f74f117a77ghsax_refsource_MISCWEB
- github.com/NaturalIntelligence/fast-xml-parser/releases/tag/v5.3.6ghsax_refsource_MISCWEB
- github.com/NaturalIntelligence/fast-xml-parser/security/advisories/GHSA-jmr7-xgp7-cmfjghsax_refsource_CONFIRMWEB
News mentions
0No linked articles in our index yet.