securityos/node_modules/lunr/lib/query_lexer.js

210 lines
4.7 KiB
JavaScript

lunr.QueryLexer = function (str) {
this.lexemes = []
this.str = str
this.length = str.length
this.pos = 0
this.start = 0
this.escapeCharPositions = []
}
lunr.QueryLexer.prototype.run = function () {
var state = lunr.QueryLexer.lexText
while (state) {
state = state(this)
}
}
lunr.QueryLexer.prototype.sliceString = function () {
var subSlices = [],
sliceStart = this.start,
sliceEnd = this.pos
for (var i = 0; i < this.escapeCharPositions.length; i++) {
sliceEnd = this.escapeCharPositions[i]
subSlices.push(this.str.slice(sliceStart, sliceEnd))
sliceStart = sliceEnd + 1
}
subSlices.push(this.str.slice(sliceStart, this.pos))
this.escapeCharPositions.length = 0
return subSlices.join('')
}
lunr.QueryLexer.prototype.emit = function (type) {
this.lexemes.push({
type: type,
str: this.sliceString(),
start: this.start,
end: this.pos
})
this.start = this.pos
}
lunr.QueryLexer.prototype.escapeCharacter = function () {
this.escapeCharPositions.push(this.pos - 1)
this.pos += 1
}
lunr.QueryLexer.prototype.next = function () {
if (this.pos >= this.length) {
return lunr.QueryLexer.EOS
}
var char = this.str.charAt(this.pos)
this.pos += 1
return char
}
lunr.QueryLexer.prototype.width = function () {
return this.pos - this.start
}
lunr.QueryLexer.prototype.ignore = function () {
if (this.start == this.pos) {
this.pos += 1
}
this.start = this.pos
}
lunr.QueryLexer.prototype.backup = function () {
this.pos -= 1
}
lunr.QueryLexer.prototype.acceptDigitRun = function () {
var char, charCode
do {
char = this.next()
charCode = char.charCodeAt(0)
} while (charCode > 47 && charCode < 58)
if (char != lunr.QueryLexer.EOS) {
this.backup()
}
}
lunr.QueryLexer.prototype.more = function () {
return this.pos < this.length
}
lunr.QueryLexer.EOS = 'EOS'
lunr.QueryLexer.FIELD = 'FIELD'
lunr.QueryLexer.TERM = 'TERM'
lunr.QueryLexer.EDIT_DISTANCE = 'EDIT_DISTANCE'
lunr.QueryLexer.BOOST = 'BOOST'
lunr.QueryLexer.PRESENCE = 'PRESENCE'
lunr.QueryLexer.lexField = function (lexer) {
lexer.backup()
lexer.emit(lunr.QueryLexer.FIELD)
lexer.ignore()
return lunr.QueryLexer.lexText
}
lunr.QueryLexer.lexTerm = function (lexer) {
if (lexer.width() > 1) {
lexer.backup()
lexer.emit(lunr.QueryLexer.TERM)
}
lexer.ignore()
if (lexer.more()) {
return lunr.QueryLexer.lexText
}
}
lunr.QueryLexer.lexEditDistance = function (lexer) {
lexer.ignore()
lexer.acceptDigitRun()
lexer.emit(lunr.QueryLexer.EDIT_DISTANCE)
return lunr.QueryLexer.lexText
}
lunr.QueryLexer.lexBoost = function (lexer) {
lexer.ignore()
lexer.acceptDigitRun()
lexer.emit(lunr.QueryLexer.BOOST)
return lunr.QueryLexer.lexText
}
lunr.QueryLexer.lexEOS = function (lexer) {
if (lexer.width() > 0) {
lexer.emit(lunr.QueryLexer.TERM)
}
}
// This matches the separator used when tokenising fields
// within a document. These should match otherwise it is
// not possible to search for some tokens within a document.
//
// It is possible for the user to change the separator on the
// tokenizer so it _might_ clash with any other of the special
// characters already used within the search string, e.g. :.
//
// This means that it is possible to change the separator in
// such a way that makes some words unsearchable using a search
// string.
lunr.QueryLexer.termSeparator = lunr.tokenizer.separator
lunr.QueryLexer.lexText = function (lexer) {
while (true) {
var char = lexer.next()
if (char == lunr.QueryLexer.EOS) {
return lunr.QueryLexer.lexEOS
}
// Escape character is '\'
if (char.charCodeAt(0) == 92) {
lexer.escapeCharacter()
continue
}
if (char == ":") {
return lunr.QueryLexer.lexField
}
if (char == "~") {
lexer.backup()
if (lexer.width() > 0) {
lexer.emit(lunr.QueryLexer.TERM)
}
return lunr.QueryLexer.lexEditDistance
}
if (char == "^") {
lexer.backup()
if (lexer.width() > 0) {
lexer.emit(lunr.QueryLexer.TERM)
}
return lunr.QueryLexer.lexBoost
}
// "+" indicates term presence is required
// checking for length to ensure that only
// leading "+" are considered
if (char == "+" && lexer.width() === 1) {
lexer.emit(lunr.QueryLexer.PRESENCE)
return lunr.QueryLexer.lexText
}
// "-" indicates term presence is prohibited
// checking for length to ensure that only
// leading "-" are considered
if (char == "-" && lexer.width() === 1) {
lexer.emit(lunr.QueryLexer.PRESENCE)
return lunr.QueryLexer.lexText
}
if (char.match(lunr.QueryLexer.termSeparator)) {
return lunr.QueryLexer.lexTerm
}
}
}