lunr.QueryLexer = function (str) { this.lexemes = [] this.str = str this.length = str.length this.pos = 0 this.start = 0 this.escapeCharPositions = [] } lunr.QueryLexer.prototype.run = function () { var state = lunr.QueryLexer.lexText while (state) { state = state(this) } } lunr.QueryLexer.prototype.sliceString = function () { var subSlices = [], sliceStart = this.start, sliceEnd = this.pos for (var i = 0; i < this.escapeCharPositions.length; i++) { sliceEnd = this.escapeCharPositions[i] subSlices.push(this.str.slice(sliceStart, sliceEnd)) sliceStart = sliceEnd + 1 } subSlices.push(this.str.slice(sliceStart, this.pos)) this.escapeCharPositions.length = 0 return subSlices.join('') } lunr.QueryLexer.prototype.emit = function (type) { this.lexemes.push({ type: type, str: this.sliceString(), start: this.start, end: this.pos }) this.start = this.pos } lunr.QueryLexer.prototype.escapeCharacter = function () { this.escapeCharPositions.push(this.pos - 1) this.pos += 1 } lunr.QueryLexer.prototype.next = function () { if (this.pos >= this.length) { return lunr.QueryLexer.EOS } var char = this.str.charAt(this.pos) this.pos += 1 return char } lunr.QueryLexer.prototype.width = function () { return this.pos - this.start } lunr.QueryLexer.prototype.ignore = function () { if (this.start == this.pos) { this.pos += 1 } this.start = this.pos } lunr.QueryLexer.prototype.backup = function () { this.pos -= 1 } lunr.QueryLexer.prototype.acceptDigitRun = function () { var char, charCode do { char = this.next() charCode = char.charCodeAt(0) } while (charCode > 47 && charCode < 58) if (char != lunr.QueryLexer.EOS) { this.backup() } } lunr.QueryLexer.prototype.more = function () { return this.pos < this.length } lunr.QueryLexer.EOS = 'EOS' lunr.QueryLexer.FIELD = 'FIELD' lunr.QueryLexer.TERM = 'TERM' lunr.QueryLexer.EDIT_DISTANCE = 'EDIT_DISTANCE' lunr.QueryLexer.BOOST = 'BOOST' lunr.QueryLexer.PRESENCE = 'PRESENCE' lunr.QueryLexer.lexField = function (lexer) { lexer.backup() lexer.emit(lunr.QueryLexer.FIELD) lexer.ignore() return lunr.QueryLexer.lexText } lunr.QueryLexer.lexTerm = function (lexer) { if (lexer.width() > 1) { lexer.backup() lexer.emit(lunr.QueryLexer.TERM) } lexer.ignore() if (lexer.more()) { return lunr.QueryLexer.lexText } } lunr.QueryLexer.lexEditDistance = function (lexer) { lexer.ignore() lexer.acceptDigitRun() lexer.emit(lunr.QueryLexer.EDIT_DISTANCE) return lunr.QueryLexer.lexText } lunr.QueryLexer.lexBoost = function (lexer) { lexer.ignore() lexer.acceptDigitRun() lexer.emit(lunr.QueryLexer.BOOST) return lunr.QueryLexer.lexText } lunr.QueryLexer.lexEOS = function (lexer) { if (lexer.width() > 0) { lexer.emit(lunr.QueryLexer.TERM) } } // This matches the separator used when tokenising fields // within a document. These should match otherwise it is // not possible to search for some tokens within a document. // // It is possible for the user to change the separator on the // tokenizer so it _might_ clash with any other of the special // characters already used within the search string, e.g. :. // // This means that it is possible to change the separator in // such a way that makes some words unsearchable using a search // string. lunr.QueryLexer.termSeparator = lunr.tokenizer.separator lunr.QueryLexer.lexText = function (lexer) { while (true) { var char = lexer.next() if (char == lunr.QueryLexer.EOS) { return lunr.QueryLexer.lexEOS } // Escape character is '\' if (char.charCodeAt(0) == 92) { lexer.escapeCharacter() continue } if (char == ":") { return lunr.QueryLexer.lexField } if (char == "~") { lexer.backup() if (lexer.width() > 0) { lexer.emit(lunr.QueryLexer.TERM) } return lunr.QueryLexer.lexEditDistance } if (char == "^") { lexer.backup() if (lexer.width() > 0) { lexer.emit(lunr.QueryLexer.TERM) } return lunr.QueryLexer.lexBoost } // "+" indicates term presence is required // checking for length to ensure that only // leading "+" are considered if (char == "+" && lexer.width() === 1) { lexer.emit(lunr.QueryLexer.PRESENCE) return lunr.QueryLexer.lexText } // "-" indicates term presence is prohibited // checking for length to ensure that only // leading "-" are considered if (char == "-" && lexer.width() === 1) { lexer.emit(lunr.QueryLexer.PRESENCE) return lunr.QueryLexer.lexText } if (char.match(lunr.QueryLexer.termSeparator)) { return lunr.QueryLexer.lexTerm } } }