securityos/node_modules/lunr/lib/query_lexer.js

lunr.QueryLexer = function (str) {
  this.lexemes = []
  this.str = str
  this.length = str.length
  this.pos = 0
  this.start = 0
  this.escapeCharPositions = []
}

lunr.QueryLexer.prototype.run = function () {
  var state = lunr.QueryLexer.lexText

  while (state) {
    state = state(this)
  }
}

lunr.QueryLexer.prototype.sliceString = function () {
  var subSlices = [],
      sliceStart = this.start,
      sliceEnd = this.pos

  for (var i = 0; i < this.escapeCharPositions.length; i++) {
    sliceEnd = this.escapeCharPositions[i]
    subSlices.push(this.str.slice(sliceStart, sliceEnd))
    sliceStart = sliceEnd + 1
  }

  subSlices.push(this.str.slice(sliceStart, this.pos))
  this.escapeCharPositions.length = 0

  return subSlices.join('')
}

lunr.QueryLexer.prototype.emit = function (type) {
  this.lexemes.push({
    type: type,
    str: this.sliceString(),
    start: this.start,
    end: this.pos
  })

  this.start = this.pos
}

lunr.QueryLexer.prototype.escapeCharacter = function () {
  this.escapeCharPositions.push(this.pos - 1)
  this.pos += 1
}

lunr.QueryLexer.prototype.next = function () {
  if (this.pos >= this.length) {
    return lunr.QueryLexer.EOS
  }

  var char = this.str.charAt(this.pos)
  this.pos += 1
  return char
}

lunr.QueryLexer.prototype.width = function () {
  return this.pos - this.start
}

lunr.QueryLexer.prototype.ignore = function () {
  if (this.start == this.pos) {
    this.pos += 1
  }

  this.start = this.pos
}

lunr.QueryLexer.prototype.backup = function () {
  this.pos -= 1
}

lunr.QueryLexer.prototype.acceptDigitRun = function () {
  var char, charCode

  do {
    char = this.next()
    charCode = char.charCodeAt(0)
  } while (charCode > 47 && charCode < 58)

  if (char != lunr.QueryLexer.EOS) {
    this.backup()
  }
}

lunr.QueryLexer.prototype.more = function () {
  return this.pos < this.length
}

lunr.QueryLexer.EOS = 'EOS'
lunr.QueryLexer.FIELD = 'FIELD'
lunr.QueryLexer.TERM = 'TERM'
lunr.QueryLexer.EDIT_DISTANCE = 'EDIT_DISTANCE'
lunr.QueryLexer.BOOST = 'BOOST'
lunr.QueryLexer.PRESENCE = 'PRESENCE'

lunr.QueryLexer.lexField = function (lexer) {
  lexer.backup()
  lexer.emit(lunr.QueryLexer.FIELD)
  lexer.ignore()
  return lunr.QueryLexer.lexText
}

lunr.QueryLexer.lexTerm = function (lexer) {
  if (lexer.width() > 1) {
    lexer.backup()
    lexer.emit(lunr.QueryLexer.TERM)
  }

  lexer.ignore()

  if (lexer.more()) {
    return lunr.QueryLexer.lexText
  }
}

lunr.QueryLexer.lexEditDistance = function (lexer) {
  lexer.ignore()
  lexer.acceptDigitRun()
  lexer.emit(lunr.QueryLexer.EDIT_DISTANCE)
  return lunr.QueryLexer.lexText
}

lunr.QueryLexer.lexBoost = function (lexer) {
  lexer.ignore()
  lexer.acceptDigitRun()
  lexer.emit(lunr.QueryLexer.BOOST)
  return lunr.QueryLexer.lexText
}

lunr.QueryLexer.lexEOS = function (lexer) {
  if (lexer.width() > 0) {
    lexer.emit(lunr.QueryLexer.TERM)
  }
}

// This matches the separator used when tokenising fields
// within a document. These should match otherwise it is
// not possible to search for some tokens within a document.
//
// It is possible for the user to change the separator on the
// tokenizer so it _might_ clash with any other of the special
// characters already used within the search string, e.g. :.
//
// This means that it is possible to change the separator in
// such a way that makes some words unsearchable using a search
// string.
lunr.QueryLexer.termSeparator = lunr.tokenizer.separator

lunr.QueryLexer.lexText = function (lexer) {
  while (true) {
    var char = lexer.next()

    if (char == lunr.QueryLexer.EOS) {
      return lunr.QueryLexer.lexEOS
    }

    // Escape character is '\'
    if (char.charCodeAt(0) == 92) {
      lexer.escapeCharacter()
      continue
    }

    if (char == ":") {
      return lunr.QueryLexer.lexField
    }

    if (char == "~") {
      lexer.backup()
      if (lexer.width() > 0) {
        lexer.emit(lunr.QueryLexer.TERM)
      }
      return lunr.QueryLexer.lexEditDistance
    }

    if (char == "^") {
      lexer.backup()
      if (lexer.width() > 0) {
        lexer.emit(lunr.QueryLexer.TERM)
      }
      return lunr.QueryLexer.lexBoost
    }

    // "+" indicates term presence is required
    // checking for length to ensure that only
    // leading "+" are considered
    if (char == "+" && lexer.width() === 1) {
      lexer.emit(lunr.QueryLexer.PRESENCE)
      return lunr.QueryLexer.lexText
    }

    // "-" indicates term presence is prohibited
    // checking for length to ensure that only
    // leading "-" are considered
    if (char == "-" && lexer.width() === 1) {
      lexer.emit(lunr.QueryLexer.PRESENCE)
      return lunr.QueryLexer.lexText
    }

    if (char.match(lunr.QueryLexer.termSeparator)) {
      return lunr.QueryLexer.lexTerm
    }
  }
}
First commit 2024-09-06 15:32:35 +00:00			`lunr.QueryLexer = function (str) {`
			`this.lexemes = []`
			`this.str = str`
			`this.length = str.length`
			`this.pos = 0`
			`this.start = 0`
			`this.escapeCharPositions = []`
			`}`

			`lunr.QueryLexer.prototype.run = function () {`
			`var state = lunr.QueryLexer.lexText`

			`while (state) {`
			`state = state(this)`
			`}`
			`}`

			`lunr.QueryLexer.prototype.sliceString = function () {`
			`var subSlices = [],`
			`sliceStart = this.start,`
			`sliceEnd = this.pos`

			`for (var i = 0; i < this.escapeCharPositions.length; i++) {`
			`sliceEnd = this.escapeCharPositions[i]`
			`subSlices.push(this.str.slice(sliceStart, sliceEnd))`
			`sliceStart = sliceEnd + 1`
			`}`

			`subSlices.push(this.str.slice(sliceStart, this.pos))`
			`this.escapeCharPositions.length = 0`

			`return subSlices.join('')`
			`}`

			`lunr.QueryLexer.prototype.emit = function (type) {`
			`this.lexemes.push({`
			`type: type,`
			`str: this.sliceString(),`
			`start: this.start,`
			`end: this.pos`
			`})`

			`this.start = this.pos`
			`}`

			`lunr.QueryLexer.prototype.escapeCharacter = function () {`
			`this.escapeCharPositions.push(this.pos - 1)`
			`this.pos += 1`
			`}`

			`lunr.QueryLexer.prototype.next = function () {`
			`if (this.pos >= this.length) {`
			`return lunr.QueryLexer.EOS`
			`}`

			`var char = this.str.charAt(this.pos)`
			`this.pos += 1`
			`return char`
			`}`

			`lunr.QueryLexer.prototype.width = function () {`
			`return this.pos - this.start`
			`}`

			`lunr.QueryLexer.prototype.ignore = function () {`
			`if (this.start == this.pos) {`
			`this.pos += 1`
			`}`

			`this.start = this.pos`
			`}`

			`lunr.QueryLexer.prototype.backup = function () {`
			`this.pos -= 1`
			`}`

			`lunr.QueryLexer.prototype.acceptDigitRun = function () {`
			`var char, charCode`

			`do {`
			`char = this.next()`
			`charCode = char.charCodeAt(0)`
			`} while (charCode > 47 && charCode < 58)`

			`if (char != lunr.QueryLexer.EOS) {`
			`this.backup()`
			`}`
			`}`

			`lunr.QueryLexer.prototype.more = function () {`
			`return this.pos < this.length`
			`}`

			`lunr.QueryLexer.EOS = 'EOS'`
			`lunr.QueryLexer.FIELD = 'FIELD'`
			`lunr.QueryLexer.TERM = 'TERM'`
			`lunr.QueryLexer.EDIT_DISTANCE = 'EDIT_DISTANCE'`
			`lunr.QueryLexer.BOOST = 'BOOST'`
			`lunr.QueryLexer.PRESENCE = 'PRESENCE'`

			`lunr.QueryLexer.lexField = function (lexer) {`
			`lexer.backup()`
			`lexer.emit(lunr.QueryLexer.FIELD)`
			`lexer.ignore()`
			`return lunr.QueryLexer.lexText`
			`}`

			`lunr.QueryLexer.lexTerm = function (lexer) {`
			`if (lexer.width() > 1) {`
			`lexer.backup()`
			`lexer.emit(lunr.QueryLexer.TERM)`
			`}`

			`lexer.ignore()`

			`if (lexer.more()) {`
			`return lunr.QueryLexer.lexText`
			`}`
			`}`

			`lunr.QueryLexer.lexEditDistance = function (lexer) {`
			`lexer.ignore()`
			`lexer.acceptDigitRun()`
			`lexer.emit(lunr.QueryLexer.EDIT_DISTANCE)`
			`return lunr.QueryLexer.lexText`
			`}`

			`lunr.QueryLexer.lexBoost = function (lexer) {`
			`lexer.ignore()`
			`lexer.acceptDigitRun()`
			`lexer.emit(lunr.QueryLexer.BOOST)`
			`return lunr.QueryLexer.lexText`
			`}`

			`lunr.QueryLexer.lexEOS = function (lexer) {`
			`if (lexer.width() > 0) {`
			`lexer.emit(lunr.QueryLexer.TERM)`
			`}`
			`}`

			`// This matches the separator used when tokenising fields`
			`// within a document. These should match otherwise it is`
			`// not possible to search for some tokens within a document.`
			`//`
			`// It is possible for the user to change the separator on the`
			`// tokenizer so it _might_ clash with any other of the special`
			`// characters already used within the search string, e.g. :.`
			`//`
			`// This means that it is possible to change the separator in`
			`// such a way that makes some words unsearchable using a search`
			`// string.`
			`lunr.QueryLexer.termSeparator = lunr.tokenizer.separator`

			`lunr.QueryLexer.lexText = function (lexer) {`
			`while (true) {`
			`var char = lexer.next()`

			`if (char == lunr.QueryLexer.EOS) {`
			`return lunr.QueryLexer.lexEOS`
			`}`

			`// Escape character is '\'`
			`if (char.charCodeAt(0) == 92) {`
			`lexer.escapeCharacter()`
			`continue`
			`}`

			`if (char == ":") {`
			`return lunr.QueryLexer.lexField`
			`}`

			`if (char == "~") {`
			`lexer.backup()`
			`if (lexer.width() > 0) {`
			`lexer.emit(lunr.QueryLexer.TERM)`
			`}`
			`return lunr.QueryLexer.lexEditDistance`
			`}`

			`if (char == "^") {`
			`lexer.backup()`
			`if (lexer.width() > 0) {`
			`lexer.emit(lunr.QueryLexer.TERM)`
			`}`
			`return lunr.QueryLexer.lexBoost`
			`}`

			`// "+" indicates term presence is required`
			`// checking for length to ensure that only`
			`// leading "+" are considered`
			`if (char == "+" && lexer.width() === 1) {`
			`lexer.emit(lunr.QueryLexer.PRESENCE)`
			`return lunr.QueryLexer.lexText`
			`}`

			`// "-" indicates term presence is prohibited`
			`// checking for length to ensure that only`
			`// leading "-" are considered`
			`if (char == "-" && lexer.width() === 1) {`
			`lexer.emit(lunr.QueryLexer.PRESENCE)`
			`return lunr.QueryLexer.lexText`
			`}`

			`if (char.match(lunr.QueryLexer.termSeparator)) {`
			`return lunr.QueryLexer.lexTerm`
			`}`
			`}`
			`}`