securityos/node_modules/safe-regex/test/regex.spec.js

const safeRegex = require("../");

const REPETITION_LIMIT = 25;
const REPETITION_TOO_MUCH = REPETITION_LIMIT + 1;

// TODO Named character classes

test("linear-time regexes are safe", () => {
  const linTime = [
    // regular RE's
    /a/,
    /a*/,
    /a+/,
    /a?/,
    /a{3,5}/,
    /a|b/,
    /(ab)/,
    /(ab)\1/,
    /\bOakland\b/,
    /(a+)|(b+)/,

    // RE's in a string
    "^foo/bar",

    // non-RE, non-string
    1,
  ];

  linTime.forEach(re => {
    expect(safeRegex(re)).toBe(true);
  });
});

test("linear-time regexes are safe, under varying repetition limits", () => {
  const re1 = RegExp("a?".repeat(REPETITION_LIMIT) + "a".repeat(REPETITION_LIMIT));
  expect(safeRegex(re1)).toBe(true);

  const LOW_REPETITION_LIMIT = 3;
  const re2 = RegExp(Array(LOW_REPETITION_LIMIT).join("a?"));

  expect(safeRegex(re2, { limit: LOW_REPETITION_LIMIT })).toBe(true);
});

test("poly-time regexes are safe (at least according to our heuristics)", () => {
  const polyTime = [
    /^a+a+$/,        // QOA
    /^a+aa+$/,       // QOA with obvious intermediate run
    /^a+aaaa+$/,     // QOA with obvious intermediate run
    /^a+[a-z]a+$/,   // QOA with obvious intermediate run
    /^a+\wa+$/,      // QOA with intermediate character class
    /^a+(\w|\d)a+$/, // QOA with valid path through
    /^a+b?a+$/,      // QOA with valid path through
    /^a+(cde)*a+$/,  // QOA with valid path through
    /^.*a*$/,        // QOA by subset
    /^\w*\d*$/,      // QOA by intersection
    /^\S+@\S+\.\S+$/, // Example from Django
    /a+$/,           // QOA under partial-match
    /abc.*$/,        // QOA under partial-match
    // TODO It would be nice to have one of the regexes that are poly-time even when they match, due to non-greedy quantifiers (p-NFA)
  ];

  polyTime.forEach(re => {
    expect(safeRegex(re)).toBe(true);
  });
});

test("exp-time regexes due to star height are unsafe", () => {
  const expTime = [
    // Straightforward star height
    /(a*)*$/,
    /(a?)*$/,
    /(a*)?$/,
    /(a*)+$/,
    /(a+)*$/,
    /(\wa+)*$/, // Prefix
    /(\..*)*$/, // Suffix

    // Branching and nesting.
    /(a*|b)+$/,
    /(a|b*)+$/,
    /(((b*)))+$/,
    /(((b*))+)$/,
    /(((b*)+))$/,
    /(((b)*)+)$/,
    /(((b)*))+$/,

    // Misc. more complex cases
    /^(a?){25}(a){25}$/,
    /(x+x+)+y/,
    /foo|(x+x+)+y/,
    /(a+){10}y/,
    /(a+){2}y/,
    /(.*){1,32000}[bc]/,


    // RE's in a string
    "(a+)+",
  ];

  expTime.forEach(re => {
    expect(safeRegex(re)).toBe(false);
  });
});

test("linear-time regexes with star height > 1", () => {
  // TODO These are false positives, Fix once we improve analysis
  const linTime = [
    /(ab*)+$/,
    /(b*a)+$/,
  ];

  linTime.forEach(re => {
    expect(safeRegex(re)).toBe(false);
  });
});

test("exp-time regexes due to disjunction are safe (according to current heuristics)", () => {
  // TODO These are false negatives. Fix once we improve analysis
  const expTime = [
    /(a|a)*$/,       // QOD: obvious 
    /(a|\w)*$/,      // QOD due to overlap
    /([abc]|b)*$/,   // QOD due to overlap
    /(\w\w\w|bab)*$/, // QOD due to overlap, with multi-step internal paths
  ];

  expTime.forEach(re => {
    expect(safeRegex(re)).toBe(true);
  });
});

test("regex that exceeds repetition limit is unsafe", () => {
  const re1 = RegExp("a?".repeat(REPETITION_TOO_MUCH) + "a".repeat(REPETITION_TOO_MUCH));
  expect(safeRegex(re1)).toBe(false);

  const LOW_REPETITION_LIMIT = 3;
  const re2 = RegExp("a?".repeat(LOW_REPETITION_LIMIT + 1));
  expect(safeRegex(re2, { limit: LOW_REPETITION_LIMIT })).toBe(false);
});

test("invalid regexes default to unsafe", () => {
  const invalid = [
    "(a+",
    "[a-z",
    "*Oakland*",
    "hey(yoo) )",
    "abcde(?>hellow)",
    "[abc",
  ];

  invalid.forEach(re => {
    expect(safeRegex(re)).toBe(false);
  });
});
First commit 2024-09-06 15:32:35 +00:00			`const safeRegex = require("../");`

			`const REPETITION_LIMIT = 25;`
			`const REPETITION_TOO_MUCH = REPETITION_LIMIT + 1;`

			`// TODO Named character classes`

			`test("linear-time regexes are safe", () => {`
			`const linTime = [`
			`// regular RE's`
			`/a/,`
			`/a*/,`
			`/a+/,`
			`/a?/,`
			`/a{3,5}/,`
			`/a\|b/,`
			`/(ab)/,`
			`/(ab)\1/,`
			`/\bOakland\b/,`
			`/(a+)\|(b+)/,`

			`// RE's in a string`
			`"^foo/bar",`

			`// non-RE, non-string`
			`1,`
			`];`

			`linTime.forEach(re => {`
			`expect(safeRegex(re)).toBe(true);`
			`});`
			`});`

			`test("linear-time regexes are safe, under varying repetition limits", () => {`
			`const re1 = RegExp("a?".repeat(REPETITION_LIMIT) + "a".repeat(REPETITION_LIMIT));`
			`expect(safeRegex(re1)).toBe(true);`

			`const LOW_REPETITION_LIMIT = 3;`
			`const re2 = RegExp(Array(LOW_REPETITION_LIMIT).join("a?"));`

			`expect(safeRegex(re2, { limit: LOW_REPETITION_LIMIT })).toBe(true);`
			`});`

			`test("poly-time regexes are safe (at least according to our heuristics)", () => {`
			`const polyTime = [`
			`/^a+a+$/, // QOA`
			`/^a+aa+$/, // QOA with obvious intermediate run`
			`/^a+aaaa+$/, // QOA with obvious intermediate run`
			`/^a+[a-z]a+$/, // QOA with obvious intermediate run`
			`/^a+\wa+$/, // QOA with intermediate character class`
			`/^a+(\w\|\d)a+$/, // QOA with valid path through`
			`/^a+b?a+$/, // QOA with valid path through`
			`/^a+(cde)*a+$/, // QOA with valid path through`
			`/^.a$/, // QOA by subset`
			`/^\w\d$/, // QOA by intersection`
			`/^\S+@\S+\.\S+$/, // Example from Django`
			`/a+$/, // QOA under partial-match`
			`/abc.*$/, // QOA under partial-match`
			`// TODO It would be nice to have one of the regexes that are poly-time even when they match, due to non-greedy quantifiers (p-NFA)`
			`];`

			`polyTime.forEach(re => {`
			`expect(safeRegex(re)).toBe(true);`
			`});`
			`});`

			`test("exp-time regexes due to star height are unsafe", () => {`
			`const expTime = [`
			`// Straightforward star height`
			`/(a)$/,`
			`/(a?)*$/,`
			`/(a*)?$/,`
			`/(a*)+$/,`
			`/(a+)*$/,`
			`/(\wa+)*$/, // Prefix`
			`/(\..)$/, // Suffix`

			`// Branching and nesting.`
			`/(a*\|b)+$/,`
			`/(a\|b*)+$/,`
			`/(((b*)))+$/,`
			`/(((b*))+)$/,`
			`/(((b*)+))$/,`
			`/(((b)*)+)$/,`
			`/(((b)*))+$/,`

			`// Misc. more complex cases`
			`/^(a?){25}(a){25}$/,`
			`/(x+x+)+y/,`
			`/foo\|(x+x+)+y/,`
			`/(a+){10}y/,`
			`/(a+){2}y/,`
			`/(.*){1,32000}[bc]/,`


			`// RE's in a string`
			`"(a+)+",`
			`];`

			`expTime.forEach(re => {`
			`expect(safeRegex(re)).toBe(false);`
			`});`
			`});`

			`test("linear-time regexes with star height > 1", () => {`
			`// TODO These are false positives, Fix once we improve analysis`
			`const linTime = [`
			`/(ab*)+$/,`
			`/(b*a)+$/,`
			`];`

			`linTime.forEach(re => {`
			`expect(safeRegex(re)).toBe(false);`
			`});`
			`});`

			`test("exp-time regexes due to disjunction are safe (according to current heuristics)", () => {`
			`// TODO These are false negatives. Fix once we improve analysis`
			`const expTime = [`
			`/(a\|a)*$/, // QOD: obvious`
			`/(a\|\w)*$/, // QOD due to overlap`
			`/([abc]\|b)*$/, // QOD due to overlap`
			`/(\w\w\w\|bab)*$/, // QOD due to overlap, with multi-step internal paths`
			`];`

			`expTime.forEach(re => {`
			`expect(safeRegex(re)).toBe(true);`
			`});`
			`});`

			`test("regex that exceeds repetition limit is unsafe", () => {`
			`const re1 = RegExp("a?".repeat(REPETITION_TOO_MUCH) + "a".repeat(REPETITION_TOO_MUCH));`
			`expect(safeRegex(re1)).toBe(false);`

			`const LOW_REPETITION_LIMIT = 3;`
			`const re2 = RegExp("a?".repeat(LOW_REPETITION_LIMIT + 1));`
			`expect(safeRegex(re2, { limit: LOW_REPETITION_LIMIT })).toBe(false);`
			`});`

			`test("invalid regexes default to unsafe", () => {`
			`const invalid = [`
			`"(a+",`
			`"[a-z",`
			`"Oakland",`
			`"hey(yoo) )",`
			`"abcde(?>hellow)",`
			`"[abc",`
			`];`

			`invalid.forEach(re => {`
			`expect(safeRegex(re)).toBe(false);`
			`});`
			`});`