// parse a single path portion import { parseClass } from './brace-expressions.js'; import { unescape } from './unescape.js'; const types = new Set(['!', '?', '+', '*', '@']); const isExtglobType = (c) => types.has(c); // Patterns that get prepended to bind to the start of either the // entire string, or just a single path portion, to prevent dots // and/or traversal patterns, when needed. // Exts don't need the ^ or / bit, because the root binds that already. const startNoTraversal = '(?!(?:^|/)\\.\\.?(?:$|/))'; const startNoDot = '(?!\\.)'; // characters that indicate a start of pattern needs the "no dots" bit, // because a dot *might* be matched. ( is not in the list, because in // the case of a child extglob, it will handle the prevention itself. const addPatternStart = new Set(['[', '.']); // cases where traversal is A-OK, no dot prevention needed const justDots = new Set(['..', '.']); const reSpecials = new Set('().*{}+?[]^$\\!'); const regExpEscape = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&'); // any single thing other than / const qmark = '[^/]'; // * => any number of characters const star = qmark + '*?'; // use + when we need to ensure that *something* matches, because the * is // the only thing in the path portion. const starNoEmpty = qmark + '+?'; // remove the \ chars that we added if we end up doing a nonmagic compare // const deslash = (s: string) => s.replace(/\\(.)/g, '$1') export class AST { type; #root; #hasMagic; #uflag = false; #parts = []; #parent; #parentIndex; #negs; #filledNegs = false; #options; #toString; // set to true if it's an extglob with no children // (which really means one child of '') #emptyExt = false; constructor(type, parent, options = {}) { this.type = type; // extglobs are inherently magical if (type) this.#hasMagic = true; this.#parent = parent; this.#root = this.#parent ? this.#parent.#root : this; this.#options = this.#root === this ? options : this.#root.#options; this.#negs = this.#root === this ? [] : this.#root.#negs; if (type === '!' && !this.#root.#filledNegs) this.#negs.push(this); this.#parentIndex = this.#parent ? this.#parent.#parts.length : 0; } get hasMagic() { /* c8 ignore start */ if (this.#hasMagic !== undefined) return this.#hasMagic; /* c8 ignore stop */ for (const p of this.#parts) { if (typeof p === 'string') continue; if (p.type || p.hasMagic) return (this.#hasMagic = true); } // note: will be undefined until we generate the regexp src and find out return this.#hasMagic; } // reconstructs the pattern toString() { if (this.#toString !== undefined) return this.#toString; if (!this.type) { return (this.#toString = this.#parts.map(p => String(p)).join('')); } else { return (this.#toString = this.type + '(' + this.#parts.map(p => String(p)).join('|') + ')'); } } #fillNegs() { /* c8 ignore start */ if (this !== this.#root) throw new Error('should only call on root'); if (this.#filledNegs) return this; /* c8 ignore stop */ // call toString() once to fill this out this.toString(); this.#filledNegs = true; let n; while ((n = this.#negs.pop())) { if (n.type !== '!') continue; // walk up the tree, appending everthing that comes AFTER parentIndex let p = n; let pp = p.#parent; while (pp) { for (let i = p.#parentIndex + 1; !pp.type && i < pp.#parts.length; i++) { for (const part of n.#parts) { /* c8 ignore start */ if (typeof part === 'string') { throw new Error('string part in extglob AST??'); } /* c8 ignore stop */ part.copyIn(pp.#parts[i]); } } p = pp; pp = p.#parent; } } return this; } push(...parts) { for (const p of parts) { if (p === '') continue; /* c8 ignore start */ if (typeof p !== 'string' && !(p instanceof AST && p.#parent === this)) { throw new Error('invalid part: ' + p); } /* c8 ignore stop */ this.#parts.push(p); } } toJSON() { const ret = this.type === null ? this.#parts.slice().map(p => (typeof p === 'string' ? p : p.toJSON())) : [this.type, ...this.#parts.map(p => p.toJSON())]; if (this.isStart() && !this.type) ret.unshift([]); if (this.isEnd() && (this === this.#root || (this.#root.#filledNegs && this.#parent?.type === '!'))) { ret.push({}); } return ret; } isStart() { if (this.#root === this) return true; // if (this.type) return !!this.#parent?.isStart() if (!this.#parent?.isStart()) return false; if (this.#parentIndex === 0) return true; // if everything AHEAD of this is a negation, then it's still the "start" const p = this.#parent; for (let i = 0; i < this.#parentIndex; i++) { const pp = p.#parts[i]; if (!(pp instanceof AST && pp.type === '!')) { return false; } } return true; } isEnd() { if (this.#root === this) return true; if (this.#parent?.type === '!') return true; if (!this.#parent?.isEnd()) return false; if (!this.type) return this.#parent?.isEnd(); // if not root, it'll always have a parent /* c8 ignore start */ const pl = this.#parent ? this.#parent.#parts.length : 0; /* c8 ignore stop */ return this.#parentIndex === pl - 1; } copyIn(part) { if (typeof part === 'string') this.push(part); else this.push(part.clone(this)); } clone(parent) { const c = new AST(this.type, parent); for (const p of this.#parts) { c.copyIn(p); } return c; } static #parseAST(str, ast, pos, opt) { let escaping = false; let inBrace = false; let braceStart = -1; let braceNeg = false; if (ast.type === null) { // outside of a extglob, append until we find a start let i = pos; let acc = ''; while (i < str.length) { const c = str.charAt(i++); // still accumulate escapes at this point, but we do ignore // starts that are escaped if (escaping || c === '\\') { escaping = !escaping; acc += c; continue; } if (inBrace) { if (i === braceStart + 1) { if (c === '^' || c === '!') { braceNeg = true; } } else if (c === ']' && !(i === braceStart + 2 && braceNeg)) { inBrace = false; } acc += c; continue; } else if (c === '[') { inBrace = true; braceStart = i; braceNeg = false; acc += c; continue; } if (!opt.noext && isExtglobType(c) && str.charAt(i) === '(') { ast.push(acc); acc = ''; const ext = new AST(c, ast); i = AST.#parseAST(str, ext, i, opt); ast.push(ext); continue; } acc += c; } ast.push(acc); return i; } // some kind of extglob, pos is at the ( // find the next | or ) let i = pos + 1; let part = new AST(null, ast); const parts = []; let acc = ''; while (i < str.length) { const c = str.charAt(i++); // still accumulate escapes at this point, but we do ignore // starts that are escaped if (escaping || c === '\\') { escaping = !escaping; acc += c; continue; } if (inBrace) { if (i === braceStart + 1) { if (c === '^' || c === '!') { braceNeg = true; } } else if (c === ']' && !(i === braceStart + 2 && braceNeg)) { inBrace = false; } acc += c; continue; } else if (c === '[') { inBrace = true; braceStart = i; braceNeg = false; acc += c; continue; } if (isExtglobType(c) && str.charAt(i) === '(') { part.push(acc); acc = ''; const ext = new AST(c, part); part.push(ext); i = AST.#parseAST(str, ext, i, opt); continue; } if (c === '|') { part.push(acc); acc = ''; parts.push(part); part = new AST(null, ast); continue; } if (c === ')') { if (acc === '' && ast.#parts.length === 0) { ast.#emptyExt = true; } part.push(acc); acc = ''; ast.push(...parts, part); return i; } acc += c; } // unfinished extglob // if we got here, it was a malformed extglob! not an extglob, but // maybe something else in there. ast.type = null; ast.#hasMagic = undefined; ast.#parts = [str.substring(pos - 1)]; return i; } static fromGlob(pattern, options = {}) { const ast = new AST(null, undefined, options); AST.#parseAST(pattern, ast, 0, options); return ast; } // returns the regular expression if there's magic, or the unescaped // string if not. toMMPattern() { // should only be called on root /* c8 ignore start */ if (this !== this.#root) return this.#root.toMMPattern(); /* c8 ignore stop */ const glob = this.toString(); const [re, body, hasMagic, uflag] = this.toRegExpSource(); // if we're in nocase mode, and not nocaseMagicOnly, then we do // still need a regular expression if we have to case-insensitively // match capital/lowercase characters. const anyMagic = hasMagic || this.#hasMagic || (this.#options.nocase && !this.#options.nocaseMagicOnly && glob.toUpperCase() !== glob.toLowerCase()); if (!anyMagic) { return body; } const flags = (this.#options.nocase ? 'i' : '') + (uflag ? 'u' : ''); return Object.assign(new RegExp(`^${re}$`, flags), { _src: re, _glob: glob, }); } get options() { return this.#options; } // returns the string match, the regexp source, whether there's magic // in the regexp (so a regular expression is required) and whether or // not the uflag is needed for the regular expression (for posix classes) // TODO: instead of injecting the start/end at this point, just return // the BODY of the regexp, along with the start/end portions suitable // for binding the start/end in either a joined full-path makeRe context // (where we bind to (^|/), or a standalone matchPart context (where // we bind to ^, and not /). Otherwise slashes get duped! // // In part-matching mode, the start is: // - if not isStart: nothing // - if traversal possible, but not allowed: ^(?!\.\.?$) // - if dots allowed or not possible: ^ // - if dots possible and not allowed: ^(?!\.) // end is: // - if not isEnd(): nothing // - else: $ // // In full-path matching mode, we put the slash at the START of the // pattern, so start is: // - if first pattern: same as part-matching mode // - if not isStart(): nothing // - if traversal possible, but not allowed: /(?!\.\.?(?:$|/)) // - if dots allowed or not possible: / // - if dots possible and not allowed: /(?!\.) // end is: // - if last pattern, same as part-matching mode // - else nothing // // Always put the (?:$|/) on negated tails, though, because that has to be // there to bind the end of the negated pattern portion, and it's easier to // just stick it in now rather than try to inject it later in the middle of // the pattern. // // We can just always return the same end, and leave it up to the caller // to know whether it's going to be used joined or in parts. // And, if the start is adjusted slightly, can do the same there: // - if not isStart: nothing // - if traversal possible, but not allowed: (?:/|^)(?!\.\.?$) // - if dots allowed or not possible: (?:/|^) // - if dots possible and not allowed: (?:/|^)(?!\.) // // But it's better to have a simpler binding without a conditional, for // performance, so probably better to return both start options. // // Then the caller just ignores the end if it's not the first pattern, // and the start always gets applied. // // But that's always going to be $ if it's the ending pattern, or nothing, // so the caller can just attach $ at the end of the pattern when building. // // So the todo is: // - better detect what kind of start is needed // - return both flavors of starting pattern // - attach $ at the end of the pattern when creating the actual RegExp // // Ah, but wait, no, that all only applies to the root when the first pattern // is not an extglob. If the first pattern IS an extglob, then we need all // that dot prevention biz to live in the extglob portions, because eg // +(*|.x*) can match .xy but not .yx. // // So, return the two flavors if it's #root and the first child is not an // AST, otherwise leave it to the child AST to handle it, and there, // use the (?:^|/) style of start binding. // // Even simplified further: // - Since the start for a join is eg /(?!\.) and the start for a part // is ^(?!\.), we can just prepend (?!\.) to the pattern (either root // or start or whatever) and prepend ^ or / at the Regexp construction. toRegExpSource(allowDot) { const dot = allowDot ?? !!this.#options.dot; if (this.#root === this) this.#fillNegs(); if (!this.type) { const noEmpty = this.isStart() && this.isEnd(); const src = this.#parts .map(p => { const [re, _, hasMagic, uflag] = typeof p === 'string' ? AST.#parseGlob(p, this.#hasMagic, noEmpty) : p.toRegExpSource(allowDot); this.#hasMagic = this.#hasMagic || hasMagic; this.#uflag = this.#uflag || uflag; return re; }) .join(''); let start = ''; if (this.isStart()) { if (typeof this.#parts[0] === 'string') { // this is the string that will match the start of the pattern, // so we need to protect against dots and such. // '.' and '..' cannot match unless the pattern is that exactly, // even if it starts with . or dot:true is set. const dotTravAllowed = this.#parts.length === 1 && justDots.has(this.#parts[0]); if (!dotTravAllowed) { const aps = addPatternStart; // check if we have a possibility of matching . or .., // and prevent that. const needNoTrav = // dots are allowed, and the pattern starts with [ or . (dot && aps.has(src.charAt(0))) || // the pattern starts with \., and then [ or . (src.startsWith('\\.') && aps.has(src.charAt(2))) || // the pattern starts with \.\., and then [ or . (src.startsWith('\\.\\.') && aps.has(src.charAt(4))); // no need to prevent dots if it can't match a dot, or if a // sub-pattern will be preventing it anyway. const needNoDot = !dot && !allowDot && aps.has(src.charAt(0)); start = needNoTrav ? startNoTraversal : needNoDot ? startNoDot : ''; } } } // append the "end of path portion" pattern to negation tails let end = ''; if (this.isEnd() && this.#root.#filledNegs && this.#parent?.type === '!') { end = '(?:$|\\/)'; } const final = start + src + end; return [ final, unescape(src), (this.#hasMagic = !!this.#hasMagic), this.#uflag, ]; } // We need to calculate the body *twice* if it's a repeat pattern // at the start, once in nodot mode, then again in dot mode, so a // pattern like *(?) can match 'x.y' const repeated = this.type === '*' || this.type === '+'; // some kind of extglob const start = this.type === '!' ? '(?:(?!(?:' : '(?:'; let body = this.#partsToRegExp(dot); if (this.isStart() && this.isEnd() && !body && this.type !== '!') { // invalid extglob, has to at least be *something* present, if it's // the entire path portion. const s = this.toString(); this.#parts = [s]; this.type = null; this.#hasMagic = undefined; return [s, unescape(this.toString()), false, false]; } // XXX abstract out this map method let bodyDotAllowed = !repeated || allowDot || dot || !startNoDot ? '' : this.#partsToRegExp(true); if (bodyDotAllowed === body) { bodyDotAllowed = ''; } if (bodyDotAllowed) { body = `(?:${body})(?:${bodyDotAllowed})*?`; } // an empty !() is exactly equivalent to a starNoEmpty let final = ''; if (this.type === '!' && this.#emptyExt) { final = (this.isStart() && !dot ? startNoDot : '') + starNoEmpty; } else { const close = this.type === '!' ? // !() must match something,but !(x) can match '' '))' + (this.isStart() && !dot && !allowDot ? startNoDot : '') + star + ')' : this.type === '@' ? ')' : this.type === '?' ? ')?' : this.type === '+' && bodyDotAllowed ? ')' : this.type === '*' && bodyDotAllowed ? `)?` : `)${this.type}`; final = start + body + close; } return [ final, unescape(body), (this.#hasMagic = !!this.#hasMagic), this.#uflag, ]; } #partsToRegExp(dot) { return this.#parts .map(p => { // extglob ASTs should only contain parent ASTs /* c8 ignore start */ if (typeof p === 'string') { throw new Error('string type in extglob ast??'); } /* c8 ignore stop */ // can ignore hasMagic, because extglobs are already always magic const [re, _, _hasMagic, uflag] = p.toRegExpSource(dot); this.#uflag = this.#uflag || uflag; return re; }) .filter(p => !(this.isStart() && this.isEnd()) || !!p) .join('|'); } static #parseGlob(glob, hasMagic, noEmpty = false) { let escaping = false; let re = ''; let uflag = false; for (let i = 0; i < glob.length; i++) { const c = glob.charAt(i); if (escaping) { escaping = false; re += (reSpecials.has(c) ? '\\' : '') + c; continue; } if (c === '\\') { if (i === glob.length - 1) { re += '\\\\'; } else { escaping = true; } continue; } if (c === '[') { const [src, needUflag, consumed, magic] = parseClass(glob, i); if (consumed) { re += src; uflag = uflag || needUflag; i += consumed - 1; hasMagic = hasMagic || magic; continue; } } if (c === '*') { if (noEmpty && glob === '*') re += starNoEmpty; else re += star; hasMagic = true; continue; } if (c === '?') { re += qmark; hasMagic = true; continue; } re += regExpEscape(c); } return [re, unescape(glob), !!hasMagic, uflag]; } } //# sourceMappingURL=ast.js.map