Spamworldpro Mini Shell
Spamworldpro


Server : Apache
System : Linux server2.corals.io 4.18.0-348.2.1.el8_5.x86_64 #1 SMP Mon Nov 15 09:17:08 EST 2021 x86_64
User : corals ( 1002)
PHP Version : 7.4.33
Disable Function : exec,passthru,shell_exec,system
Directory :  /home/corals/vreg/node_modules/node-html-parser/dist/esm/nodes/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Current File : //home/corals/vreg/node_modules/node-html-parser/dist/esm/nodes/html.js
import he from 'he';
import { selectAll, selectOne } from 'css-select';
import Node from './node';
import NodeType from './type';
import TextNode from './text';
import Matcher from '../matcher';
import arr_back from '../back';
import CommentNode from './comment';
// const { decode } = he;
function decode(val) {
    // clone string
    return JSON.parse(JSON.stringify(he.decode(val)));
}
// https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
const kBlockElements = new Set();
kBlockElements.add('address');
kBlockElements.add('ADDRESS');
kBlockElements.add('article');
kBlockElements.add('ARTICLE');
kBlockElements.add('aside');
kBlockElements.add('ASIDE');
kBlockElements.add('blockquote');
kBlockElements.add('BLOCKQUOTE');
kBlockElements.add('br');
kBlockElements.add('BR');
kBlockElements.add('details');
kBlockElements.add('DETAILS');
kBlockElements.add('dialog');
kBlockElements.add('DIALOG');
kBlockElements.add('dd');
kBlockElements.add('DD');
kBlockElements.add('div');
kBlockElements.add('DIV');
kBlockElements.add('dl');
kBlockElements.add('DL');
kBlockElements.add('dt');
kBlockElements.add('DT');
kBlockElements.add('fieldset');
kBlockElements.add('FIELDSET');
kBlockElements.add('figcaption');
kBlockElements.add('FIGCAPTION');
kBlockElements.add('figure');
kBlockElements.add('FIGURE');
kBlockElements.add('footer');
kBlockElements.add('FOOTER');
kBlockElements.add('form');
kBlockElements.add('FORM');
kBlockElements.add('h1');
kBlockElements.add('H1');
kBlockElements.add('h2');
kBlockElements.add('H2');
kBlockElements.add('h3');
kBlockElements.add('H3');
kBlockElements.add('h4');
kBlockElements.add('H4');
kBlockElements.add('h5');
kBlockElements.add('H5');
kBlockElements.add('h6');
kBlockElements.add('H6');
kBlockElements.add('header');
kBlockElements.add('HEADER');
kBlockElements.add('hgroup');
kBlockElements.add('HGROUP');
kBlockElements.add('hr');
kBlockElements.add('HR');
kBlockElements.add('li');
kBlockElements.add('LI');
kBlockElements.add('main');
kBlockElements.add('MAIN');
kBlockElements.add('nav');
kBlockElements.add('NAV');
kBlockElements.add('ol');
kBlockElements.add('OL');
kBlockElements.add('p');
kBlockElements.add('P');
kBlockElements.add('pre');
kBlockElements.add('PRE');
kBlockElements.add('section');
kBlockElements.add('SECTION');
kBlockElements.add('table');
kBlockElements.add('TABLE');
kBlockElements.add('td');
kBlockElements.add('TD');
kBlockElements.add('tr');
kBlockElements.add('TR');
kBlockElements.add('ul');
kBlockElements.add('UL');
class DOMTokenList {
    constructor(valuesInit = [], afterUpdate = (() => null)) {
        this._set = new Set(valuesInit);
        this._afterUpdate = afterUpdate;
    }
    _validate(c) {
        if (/\s/.test(c)) {
            throw new Error(`DOMException in DOMTokenList.add: The token '${c}' contains HTML space characters, which are not valid in tokens.`);
        }
    }
    add(c) {
        this._validate(c);
        this._set.add(c);
        this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
    }
    replace(c1, c2) {
        this._validate(c2);
        this._set.delete(c1);
        this._set.add(c2);
        this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
    }
    remove(c) {
        this._set.delete(c) &&
            this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
    }
    toggle(c) {
        this._validate(c);
        if (this._set.has(c))
            this._set.delete(c);
        else
            this._set.add(c);
        this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
    }
    contains(c) {
        return this._set.has(c);
    }
    get length() {
        return this._set.size;
    }
    values() {
        return this._set.values();
    }
    get value() {
        return Array.from(this._set.values());
    }
    toString() {
        return Array.from(this._set.values()).join(' ');
    }
}
/**
 * HTMLElement, which contains a set of children.
 *
 * Note: this is a minimalist implementation, no complete tree
 *   structure provided (no parentNode, nextSibling,
 *   previousSibling etc).
 * @class HTMLElement
 * @extends {Node}
 */
export default class HTMLElement extends Node {
    /**
     * Creates an instance of HTMLElement.
     * @param keyAttrs	id and class attribute
     * @param [rawAttrs]	attributes in string
     *
     * @memberof HTMLElement
     */
    constructor(tagName, keyAttrs, rawAttrs = '', parentNode) {
        super(parentNode);
        this.rawAttrs = rawAttrs;
        /**
         * Node Type declaration.
         */
        this.nodeType = NodeType.ELEMENT_NODE;
        this.rawTagName = tagName;
        this.rawAttrs = rawAttrs || '';
        this.id = keyAttrs.id || '';
        this.childNodes = [];
        this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], (classList) => (this.setAttribute('class', classList.toString()) // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call
        ));
        if (keyAttrs.id) {
            if (!rawAttrs) {
                this.rawAttrs = `id="${keyAttrs.id}"`;
            }
        }
        if (keyAttrs.class) {
            if (!rawAttrs) {
                const cls = `class="${this.classList.toString()}"`;
                if (this.rawAttrs) {
                    this.rawAttrs += ` ${cls}`;
                }
                else {
                    this.rawAttrs = cls;
                }
            }
        }
    }
    /**
     * Quote attribute values
     * @param attr attribute value
     * @returns {string} quoted value
     */
    quoteAttribute(attr) {
        if (attr === null) {
            return "null";
        }
        return JSON.stringify(attr.replace(/"/g, '"'));
    }
    /**
     * Remove current element
     */
    remove() {
        if (this.parentNode) {
            const children = this.parentNode.childNodes;
            this.parentNode.childNodes = children.filter((child) => {
                return this !== child;
            });
        }
    }
    /**
     * Remove Child element from childNodes array
     * @param {HTMLElement} node     node to remove
     */
    removeChild(node) {
        this.childNodes = this.childNodes.filter((child) => {
            return (child !== node);
        });
    }
    /**
     * Exchanges given child with new child
     * @param {HTMLElement} oldNode     node to exchange
     * @param {HTMLElement} newNode     new node
     */
    exchangeChild(oldNode, newNode) {
        const children = this.childNodes;
        this.childNodes = children.map((child) => {
            if (child === oldNode) {
                return newNode;
            }
            return child;
        });
    }
    get tagName() {
        return this.rawTagName ? this.rawTagName.toUpperCase() : this.rawTagName;
    }
    get localName() {
        return this.rawTagName.toLowerCase();
    }
    /**
     * Get escpaed (as-it) text value of current node and its children.
     * @return {string} text content
     */
    get rawText() {
        return this.childNodes.reduce((pre, cur) => {
            return (pre += cur.rawText);
        }, '');
    }
    get textContent() {
        return this.rawText;
    }
    set textContent(val) {
        const content = [new TextNode(val, this)];
        this.childNodes = content;
    }
    /**
     * Get unescaped text value of current node and its children.
     * @return {string} text content
     */
    get text() {
        return decode(this.rawText);
    }
    /**
     * Get structured Text (with '\n' etc.)
     * @return {string} structured text
     */
    get structuredText() {
        let currentBlock = [];
        const blocks = [currentBlock];
        function dfs(node) {
            if (node.nodeType === NodeType.ELEMENT_NODE) {
                if (kBlockElements.has(node.rawTagName)) {
                    if (currentBlock.length > 0) {
                        blocks.push(currentBlock = []);
                    }
                    node.childNodes.forEach(dfs);
                    if (currentBlock.length > 0) {
                        blocks.push(currentBlock = []);
                    }
                }
                else {
                    node.childNodes.forEach(dfs);
                }
            }
            else if (node.nodeType === NodeType.TEXT_NODE) {
                if (node.isWhitespace) {
                    // Whitespace node, postponed output
                    currentBlock.prependWhitespace = true;
                }
                else {
                    let text = node.trimmedText;
                    if (currentBlock.prependWhitespace) {
                        text = ` ${text}`;
                        currentBlock.prependWhitespace = false;
                    }
                    currentBlock.push(text);
                }
            }
        }
        dfs(this);
        return blocks.map((block) => {
            // Normalize each line's whitespace
            return block.join('').replace(/\s{2,}/g, ' ');
        })
            .join('\n').replace(/\s+$/, ''); // trimRight;
    }
    toString() {
        const tag = this.rawTagName;
        if (tag) {
            // const void_tags = new Set('area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr'.split('|'));
            // const is_void = void_tags.has(tag);
            const is_void = /^(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(tag);
            const attrs = this.rawAttrs ? ` ${this.rawAttrs}` : '';
            if (is_void) {
                return `<${tag}${attrs}>`;
            }
            return `<${tag}${attrs}>${this.innerHTML}</${tag}>`;
        }
        return this.innerHTML;
    }
    get innerHTML() {
        return this.childNodes.map((child) => {
            return child.toString();
        }).join('');
    }
    set innerHTML(content) {
        //const r = parse(content, global.options); // TODO global.options ?
        const r = parse(content);
        this.childNodes = r.childNodes.length ? r.childNodes : [new TextNode(content, this)];
    }
    set_content(content, options = {}) {
        if (content instanceof Node) {
            content = [content];
        }
        else if (typeof content == 'string') {
            const r = parse(content, options);
            content = r.childNodes.length ? r.childNodes : [new TextNode(content, this)];
        }
        this.childNodes = content;
    }
    replaceWith(...nodes) {
        const content = nodes.map((node) => {
            if (node instanceof Node) {
                return [node];
            }
            else if (typeof node == 'string') {
                // const r = parse(content, global.options); // TODO global.options ?
                const r = parse(node);
                return r.childNodes.length ? r.childNodes : [new TextNode(node, this)];
            }
            return [];
        }).flat();
        const idx = this.parentNode.childNodes.findIndex((child) => {
            return child === this;
        });
        this.parentNode.childNodes = [
            ...this.parentNode.childNodes.slice(0, idx),
            ...content,
            ...this.parentNode.childNodes.slice(idx + 1),
        ];
    }
    get outerHTML() {
        return this.toString();
    }
    /**
     * Trim element from right (in block) after seeing pattern in a TextNode.
     * @param  {RegExp} pattern pattern to find
     * @return {HTMLElement}    reference to current node
     */
    trimRight(pattern) {
        for (let i = 0; i < this.childNodes.length; i++) {
            const childNode = this.childNodes[i];
            if (childNode.nodeType === NodeType.ELEMENT_NODE) {
                childNode.trimRight(pattern);
            }
            else {
                const index = childNode.rawText.search(pattern);
                if (index > -1) {
                    childNode.rawText = childNode.rawText.substr(0, index);
                    // trim all following nodes.
                    this.childNodes.length = i + 1;
                }
            }
        }
        return this;
    }
    /**
     * Get DOM structure
     * @return {string} strucutre
     */
    get structure() {
        const res = [];
        let indention = 0;
        function write(str) {
            res.push('  '.repeat(indention) + str);
        }
        function dfs(node) {
            const idStr = node.id ? (`#${node.id}`) : '';
            const classStr = node.classList.length ? (`.${node.classList.value.join('.')}`) : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
            write(`${node.rawTagName}${idStr}${classStr}`);
            indention++;
            node.childNodes.forEach((childNode) => {
                if (childNode.nodeType === NodeType.ELEMENT_NODE) {
                    dfs(childNode);
                }
                else if (childNode.nodeType === NodeType.TEXT_NODE) {
                    if (!childNode.isWhitespace) {
                        write('#text');
                    }
                }
            });
            indention--;
        }
        dfs(this);
        return res.join('\n');
    }
    /**
     * Remove whitespaces in this sub tree.
     * @return {HTMLElement} pointer to this
     */
    removeWhitespace() {
        let o = 0;
        this.childNodes.forEach((node) => {
            if (node.nodeType === NodeType.TEXT_NODE) {
                if (node.isWhitespace) {
                    return;
                }
                node.rawText = node.trimmedText;
            }
            else if (node.nodeType === NodeType.ELEMENT_NODE) {
                node.removeWhitespace();
            }
            this.childNodes[o++] = node;
        });
        this.childNodes.length = o;
        return this;
    }
    /**
     * Query CSS selector to find matching nodes.
     * @param  {string}         selector Simplified CSS selector
     * @return {HTMLElement[]}  matching elements
     */
    querySelectorAll(selector) {
        return selectAll(selector, this, {
            xmlMode: true,
            adapter: Matcher
        });
        // let matcher: Matcher;
        // if (selector instanceof Matcher) {
        // 	matcher = selector;
        // 	matcher.reset();
        // } else {
        // 	if (selector.includes(',')) {
        // 		const selectors = selector.split(',');
        // 		return Array.from(selectors.reduce((pre, cur) => {
        // 			const result = this.querySelectorAll(cur.trim());
        // 			return result.reduce((p, c) => {
        // 				return p.add(c);
        // 			}, pre);
        // 		}, new Set<HTMLElement>()));
        // 	}
        // 	matcher = new Matcher(selector);
        // }
        // interface IStack {
        // 	0: Node;	// node
        // 	1: number;	// children
        // 	2: boolean;	// found flag
        // }
        // const stack = [] as IStack[];
        // return this.childNodes.reduce((res, cur) => {
        // 	stack.push([cur, 0, false]);
        // 	while (stack.length) {
        // 		const state = arr_back(stack);	// get last element
        // 		const el = state[0];
        // 		if (state[1] === 0) {
        // 			// Seen for first time.
        // 			if (el.nodeType !== NodeType.ELEMENT_NODE) {
        // 				stack.pop();
        // 				continue;
        // 			}
        // 			const html_el = el as HTMLElement;
        // 			state[2] = matcher.advance(html_el);
        // 			if (state[2]) {
        // 				if (matcher.matched) {
        // 					res.push(html_el);
        // 					res.push(...(html_el.querySelectorAll(selector)));
        // 					// no need to go further.
        // 					matcher.rewind();
        // 					stack.pop();
        // 					continue;
        // 				}
        // 			}
        // 		}
        // 		if (state[1] < el.childNodes.length) {
        // 			stack.push([el.childNodes[state[1]++], 0, false]);
        // 		} else {
        // 			if (state[2]) {
        // 				matcher.rewind();
        // 			}
        // 			stack.pop();
        // 		}
        // 	}
        // 	return res;
        // }, [] as HTMLElement[]);
    }
    /**
     * Query CSS Selector to find matching node.
     * @param  {string}         selector Simplified CSS selector
     * @return {HTMLElement}    matching node
     */
    querySelector(selector) {
        return selectOne(selector, this, {
            xmlMode: true,
            adapter: Matcher
        });
        // let matcher: Matcher;
        // if (selector instanceof Matcher) {
        // 	matcher = selector;
        // 	matcher.reset();
        // } else {
        // 	matcher = new Matcher(selector);
        // }
        // const stack = [] as { 0: Node; 1: 0 | 1; 2: boolean }[];
        // for (const node of this.childNodes) {
        // 	stack.push([node, 0, false]);
        // 	while (stack.length) {
        // 		const state = arr_back(stack);
        // 		const el = state[0];
        // 		if (state[1] === 0) {
        // 			// Seen for first time.
        // 			if (el.nodeType !== NodeType.ELEMENT_NODE) {
        // 				stack.pop();
        // 				continue;
        // 			}
        // 			state[2] = matcher.advance(el as HTMLElement);
        // 			if (state[2]) {
        // 				if (matcher.matched) {
        // 					return el as HTMLElement;
        // 				}
        // 			}
        // 		}
        // 		if (state[1] < el.childNodes.length) {
        // 			stack.push([el.childNodes[state[1]++], 0, false]);
        // 		} else {
        // 			if (state[2]) {
        // 				matcher.rewind();
        // 			}
        // 			stack.pop();
        // 		}
        // 	}
        // }
        // return null;
    }
    /**
     * traverses the Element and its parents (heading toward the document root) until it finds a node that matches the provided selector string. Will return itself or the matching ancestor. If no such element exists, it returns null.
     * @param selector a DOMString containing a selector list
     */
    closest(selector) {
        const mapChild = new Map();
        let el = this;
        let old = null;
        function findOne(test, elems) {
            let elem = null;
            for (let i = 0, l = elems.length; i < l && !elem; i++) {
                const el = elems[i];
                if (test(el)) {
                    elem = el;
                }
                else {
                    const child = mapChild.get(el);
                    if (child) {
                        elem = findOne(test, [child]);
                    }
                }
            }
            return elem;
        }
        while (el) {
            mapChild.set(el, old);
            old = el;
            el = el.parentNode;
        }
        el = this;
        while (el) {
            const e = selectOne(selector, el, {
                xmlMode: true,
                adapter: {
                    ...Matcher,
                    getChildren(node) {
                        const child = mapChild.get(node);
                        return child && [child];
                    },
                    getSiblings(node) {
                        return [node];
                    },
                    findOne,
                    findAll() {
                        return [];
                    }
                }
            });
            if (e) {
                return e;
            }
            el = el.parentNode;
        }
        return null;
    }
    /**
     * Append a child node to childNodes
     * @param  {Node} node node to append
     * @return {Node}      node appended
     */
    appendChild(node) {
        // node.parentNode = this;
        this.childNodes.push(node);
        node.parentNode = this;
        return node;
    }
    /**
     * Get first child node
     * @return {Node} first child node
     */
    get firstChild() {
        return this.childNodes[0];
    }
    /**
     * Get last child node
     * @return {Node} last child node
     */
    get lastChild() {
        return arr_back(this.childNodes);
    }
    /**
     * Get attributes
     * @access private
     * @return {Object} parsed and unescaped attributes
     */
    get attrs() {
        if (this._attrs) {
            return this._attrs;
        }
        this._attrs = {};
        const attrs = this.rawAttributes;
        for (const key in attrs) {
            const val = attrs[key] || '';
            this._attrs[key.toLowerCase()] = decode(val);
        }
        return this._attrs;
    }
    get attributes() {
        const ret_attrs = {};
        const attrs = this.rawAttributes;
        for (const key in attrs) {
            const val = attrs[key] || '';
            ret_attrs[key] = decode(val);
        }
        return ret_attrs;
    }
    /**
     * Get escaped (as-it) attributes
     * @return {Object} parsed attributes
     */
    get rawAttributes() {
        if (this._rawAttrs) {
            return this._rawAttrs;
        }
        const attrs = {};
        if (this.rawAttrs) {
            const re = /\b([a-z][a-z0-9-_:]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig;
            let match;
            while ((match = re.exec(this.rawAttrs))) {
                attrs[match[1]] = match[2] || match[3] || match[4] || null;
            }
        }
        this._rawAttrs = attrs;
        return attrs;
    }
    removeAttribute(key) {
        const attrs = this.rawAttributes;
        delete attrs[key];
        // Update this.attribute
        if (this._attrs) {
            delete this._attrs[key];
        }
        // Update rawString
        this.rawAttrs = Object.keys(attrs).map((name) => {
            const val = JSON.stringify(attrs[name]);
            if (val === undefined || val === 'null') {
                return name;
            }
            return `${name}=${val}`;
        }).join(' ');
        // Update this.id
        if (key === 'id') {
            this.id = '';
        }
    }
    hasAttribute(key) {
        return key.toLowerCase() in this.attrs;
    }
    /**
     * Get an attribute
     * @return {string} value of the attribute
     */
    getAttribute(key) {
        return this.attrs[key.toLowerCase()];
    }
    /**
     * Set an attribute value to the HTMLElement
     * @param {string} key The attribute name
     * @param {string} value The value to set, or null / undefined to remove an attribute
     */
    setAttribute(key, value) {
        if (arguments.length < 2) {
            throw new Error('Failed to execute \'setAttribute\' on \'Element\'');
        }
        const k2 = key.toLowerCase();
        const attrs = this.rawAttributes;
        for (const k in attrs) {
            if (k.toLowerCase() === k2) {
                key = k;
                break;
            }
        }
        attrs[key] = String(value);
        // update this.attrs
        if (this._attrs) {
            this._attrs[k2] = decode(attrs[key]);
        }
        // Update rawString
        this.rawAttrs = Object.keys(attrs).map((name) => {
            const val = this.quoteAttribute(attrs[name]);
            if (val === 'null' || val === '""') {
                return name;
            }
            return `${name}=${val}`;
        }).join(' ');
        // Update this.id
        if (key === 'id') {
            this.id = value;
        }
    }
    /**
     * Replace all the attributes of the HTMLElement by the provided attributes
     * @param {Attributes} attributes the new attribute set
     */
    setAttributes(attributes) {
        // Invalidate current this.attributes
        if (this._attrs) {
            delete this._attrs;
        }
        // Invalidate current this.rawAttributes
        if (this._rawAttrs) {
            delete this._rawAttrs;
        }
        // Update rawString
        this.rawAttrs = Object.keys(attributes).map((name) => {
            const val = attributes[name];
            if (val === 'null' || val === '""') {
                return name;
            }
            return `${name}=${this.quoteAttribute(String(val))}`;
        }).join(' ');
    }
    insertAdjacentHTML(where, html) {
        if (arguments.length < 2) {
            throw new Error('2 arguments required');
        }
        const p = parse(html);
        if (where === 'afterend') {
            const idx = this.parentNode.childNodes.findIndex((child) => {
                return child === this;
            });
            this.parentNode.childNodes.splice(idx + 1, 0, ...p.childNodes);
            p.childNodes.forEach((n) => {
                if (n instanceof HTMLElement) {
                    n.parentNode = this.parentNode;
                }
            });
        }
        else if (where === 'afterbegin') {
            this.childNodes.unshift(...p.childNodes);
        }
        else if (where === 'beforeend') {
            p.childNodes.forEach((n) => {
                this.appendChild(n);
            });
        }
        else if (where === 'beforebegin') {
            const idx = this.parentNode.childNodes.findIndex((child) => {
                return child === this;
            });
            this.parentNode.childNodes.splice(idx, 0, ...p.childNodes);
            p.childNodes.forEach((n) => {
                if (n instanceof HTMLElement) {
                    n.parentNode = this.parentNode;
                }
            });
        }
        else {
            throw new Error(`The value provided ('${where}') is not one of 'beforebegin', 'afterbegin', 'beforeend', or 'afterend'`);
        }
        // if (!where || html === undefined || html === null) {
        // 	return;
        // }
    }
    get nextSibling() {
        if (this.parentNode) {
            const children = this.parentNode.childNodes;
            let i = 0;
            while (i < children.length) {
                const child = children[i++];
                if (this === child) {
                    return children[i] || null;
                }
            }
            return null;
        }
    }
    get nextElementSibling() {
        if (this.parentNode) {
            const children = this.parentNode.childNodes;
            let i = 0;
            let find = false;
            while (i < children.length) {
                const child = children[i++];
                if (find) {
                    if (child instanceof HTMLElement) {
                        return child || null;
                    }
                }
                else if (this === child) {
                    find = true;
                }
            }
            return null;
        }
    }
    get classNames() {
        return this.classList.toString();
    }
}
// https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
const kMarkupPattern = /<!--[^]*?(?=-->)-->|<(\/?)([a-z][-.:0-9_a-z]*)\s*([^>]*?)(\/?)>/ig;
// <(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
// <([a-z][-.:0-9_a-z]*)\s*\/>
// <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>
// <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>|<(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
const kAttributePattern = /(^|\s)(id|class)\s*=\s*("([^"]*)"|'([^']*)'|(\S+))/ig;
const kSelfClosingElements = {
    area: true,
    AREA: true,
    base: true,
    BASE: true,
    br: true,
    BR: true,
    col: true,
    COL: true,
    hr: true,
    HR: true,
    img: true,
    IMG: true,
    input: true,
    INPUT: true,
    link: true,
    LINK: true,
    meta: true,
    META: true,
    source: true,
    SOURCE: true,
    embed: true,
    EMBED: true,
    param: true,
    PARAM: true,
    track: true,
    TRACK: true,
    wbr: true,
    WBR: true
};
const kElementsClosedByOpening = {
    li: { li: true, LI: true },
    LI: { li: true, LI: true },
    p: { p: true, div: true, P: true, DIV: true },
    P: { p: true, div: true, P: true, DIV: true },
    b: { div: true, DIV: true },
    B: { div: true, DIV: true },
    td: { td: true, th: true, TD: true, TH: true },
    TD: { td: true, th: true, TD: true, TH: true },
    th: { td: true, th: true, TD: true, TH: true },
    TH: { td: true, th: true, TD: true, TH: true },
    h1: { h1: true, H1: true },
    H1: { h1: true, H1: true },
    h2: { h2: true, H2: true },
    H2: { h2: true, H2: true },
    h3: { h3: true, H3: true },
    H3: { h3: true, H3: true },
    h4: { h4: true, H4: true },
    H4: { h4: true, H4: true },
    h5: { h5: true, H5: true },
    H5: { h5: true, H5: true },
    h6: { h6: true, H6: true },
    H6: { h6: true, H6: true }
};
const kElementsClosedByClosing = {
    li: { ul: true, ol: true, UL: true, OL: true },
    LI: { ul: true, ol: true, UL: true, OL: true },
    a: { div: true, DIV: true },
    A: { div: true, DIV: true },
    b: { div: true, DIV: true },
    B: { div: true, DIV: true },
    i: { div: true, DIV: true },
    I: { div: true, DIV: true },
    p: { div: true, DIV: true },
    P: { div: true, DIV: true },
    td: { tr: true, table: true, TR: true, TABLE: true },
    TD: { tr: true, table: true, TR: true, TABLE: true },
    th: { tr: true, table: true, TR: true, TABLE: true },
    TH: { tr: true, table: true, TR: true, TABLE: true }
};
const frameflag = 'documentfragmentcontainer';
/**
 * Parses HTML and returns a root element
 * Parse a chuck of HTML source.
 * @param  {string} data      html
 * @return {HTMLElement}      root element
 */
export function base_parse(data, options = { lowerCaseTagName: false, comment: false }) {
    const elements = options.blockTextElements || {
        script: true,
        noscript: true,
        style: true,
        pre: true
    };
    const element_names = Object.keys(elements);
    const kBlockTextElements = element_names.map((it) => {
        return new RegExp(it, 'i');
    });
    const kIgnoreElements = element_names.filter((it) => {
        return elements[it];
    }).map((it) => {
        return new RegExp(it, 'i');
    });
    function element_should_be_ignore(tag) {
        return kIgnoreElements.some((it) => {
            return it.test(tag);
        });
    }
    function is_block_text_element(tag) {
        return kBlockTextElements.some((it) => {
            return it.test(tag);
        });
    }
    const root = new HTMLElement(null, {}, '', null);
    let currentParent = root;
    const stack = [root];
    let lastTextPos = -1;
    let match;
    // https://github.com/taoqf/node-html-parser/issues/38
    data = `<${frameflag}>${data}</${frameflag}>`;
    while ((match = kMarkupPattern.exec(data))) {
        if (lastTextPos > -1) {
            if (lastTextPos + match[0].length < kMarkupPattern.lastIndex) {
                // if has content
                const text = data.substring(lastTextPos, kMarkupPattern.lastIndex - match[0].length);
                currentParent.appendChild(new TextNode(text, currentParent));
            }
        }
        lastTextPos = kMarkupPattern.lastIndex;
        if (match[2] === frameflag) {
            continue;
        }
        if (match[0][1] === '!') {
            // this is a comment
            if (options.comment) {
                // Only keep what is in between <!-- and -->
                const text = data.substring(lastTextPos - 3, lastTextPos - match[0].length + 4);
                currentParent.appendChild(new CommentNode(text, currentParent));
            }
            continue;
        }
        if (options.lowerCaseTagName) {
            match[2] = match[2].toLowerCase();
        }
        if (!match[1]) {
            // not </ tags
            const attrs = {};
            for (let attMatch; (attMatch = kAttributePattern.exec(match[3]));) {
                attrs[attMatch[2].toLowerCase()] = attMatch[4] || attMatch[5] || attMatch[6];
            }
            const tagName = currentParent.rawTagName;
            if (!match[4] && kElementsClosedByOpening[tagName]) {
                if (kElementsClosedByOpening[tagName][match[2]]) {
                    stack.pop();
                    currentParent = arr_back(stack);
                }
            }
            // ignore container tag we add above
            // https://github.com/taoqf/node-html-parser/issues/38
            currentParent = currentParent.appendChild(new HTMLElement(match[2], attrs, match[3], null));
            stack.push(currentParent);
            if (is_block_text_element(match[2])) {
                // a little test to find next </script> or </style> ...
                const closeMarkup = `</${match[2]}>`;
                const index = (() => {
                    if (options.lowerCaseTagName) {
                        return data.toLocaleLowerCase().indexOf(closeMarkup, kMarkupPattern.lastIndex);
                    }
                    return data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
                })();
                if (element_should_be_ignore(match[2])) {
                    let text;
                    if (index === -1) {
                        // there is no matching ending for the text element.
                        text = data.substr(kMarkupPattern.lastIndex);
                    }
                    else {
                        text = data.substring(kMarkupPattern.lastIndex, index);
                    }
                    if (text.length > 0) {
                        currentParent.appendChild(new TextNode(text, currentParent));
                    }
                }
                if (index === -1) {
                    lastTextPos = kMarkupPattern.lastIndex = data.length + 1;
                }
                else {
                    lastTextPos = kMarkupPattern.lastIndex = index + closeMarkup.length;
                    match[1] = 'true';
                }
            }
        }
        if (match[1] || match[4] || kSelfClosingElements[match[2]]) {
            // </ or /> or <br> etc.
            while (true) {
                if (currentParent.rawTagName === match[2]) {
                    stack.pop();
                    currentParent = arr_back(stack);
                    break;
                }
                else {
                    const tagName = currentParent.tagName;
                    // Trying to close current tag, and move on
                    if (kElementsClosedByClosing[tagName]) {
                        if (kElementsClosedByClosing[tagName][match[2]]) {
                            stack.pop();
                            currentParent = arr_back(stack);
                            continue;
                        }
                    }
                    // Use aggressive strategy to handle unmatching markups.
                    break;
                }
            }
        }
    }
    return stack;
}
/**
 * Parses HTML and returns a root element
 * Parse a chuck of HTML source.
 */
export function parse(data, options = { lowerCaseTagName: false, comment: false }) {
    const stack = base_parse(data, options);
    const [root] = stack;
    while (stack.length > 1) {
        // Handle each error elements.
        const last = stack.pop();
        const oneBefore = arr_back(stack);
        if (last.parentNode && last.parentNode.parentNode) {
            if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {
                // Pair error case <h3> <h3> handle : Fixes to <h3> </h3>
                oneBefore.removeChild(last);
                last.childNodes.forEach((child) => {
                    oneBefore.parentNode.appendChild(child);
                });
                stack.pop();
            }
            else {
                // Single error  <div> <h3> </div> handle: Just removes <h3>
                oneBefore.removeChild(last);
                last.childNodes.forEach((child) => {
                    oneBefore.appendChild(child);
                });
            }
        }
        else {
            // If it's final element just skip.
        }
    }
    // response.childNodes.forEach((node) => {
    // 	if (node instanceof HTMLElement) {
    // 		node.parentNode = null;
    // 	}
    // });
    return root;
}

Spamworldpro Mini