securityos/node_modules/lunr/lib/tokenizer.js

/*!
 * lunr.tokenizer
 * Copyright (C) @YEAR Oliver Nightingale
 */

/**
 * A function for splitting a string into tokens ready to be inserted into
 * the search index. Uses `lunr.tokenizer.separator` to split strings, change
 * the value of this property to change how strings are split into tokens.
 *
 * This tokenizer will convert its parameter to a string by calling `toString` and
 * then will split this string on the character in `lunr.tokenizer.separator`.
 * Arrays will have their elements converted to strings and wrapped in a lunr.Token.
 *
 * Optional metadata can be passed to the tokenizer, this metadata will be cloned and
 * added as metadata to every token that is created from the object to be tokenized.
 *
 * @static
 * @param {?(string|object|object[])} obj - The object to convert into tokens
 * @param {?object} metadata - Optional metadata to associate with every token
 * @returns {lunr.Token[]}
 * @see {@link lunr.Pipeline}
 */
lunr.tokenizer = function (obj, metadata) {
  if (obj == null || obj == undefined) {
    return []
  }

  if (Array.isArray(obj)) {
    return obj.map(function (t) {
      return new lunr.Token(
        lunr.utils.asString(t).toLowerCase(),
        lunr.utils.clone(metadata)
      )
    })
  }

  var str = obj.toString().toLowerCase(),
      len = str.length,
      tokens = []

  for (var sliceEnd = 0, sliceStart = 0; sliceEnd <= len; sliceEnd++) {
    var char = str.charAt(sliceEnd),
        sliceLength = sliceEnd - sliceStart

    if ((char.match(lunr.tokenizer.separator) || sliceEnd == len)) {

      if (sliceLength > 0) {
        var tokenMetadata = lunr.utils.clone(metadata) || {}
        tokenMetadata["position"] = [sliceStart, sliceLength]
        tokenMetadata["index"] = tokens.length

        tokens.push(
          new lunr.Token (
            str.slice(sliceStart, sliceEnd),
            tokenMetadata
          )
        )
      }

      sliceStart = sliceEnd + 1
    }

  }

  return tokens
}

/**
 * The separator used to split a string into tokens. Override this property to change the behaviour of
 * `lunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens.
 *
 * @static
 * @see lunr.tokenizer
 */
lunr.tokenizer.separator = /[\s\-]+/
First commit 2024-09-06 15:32:35 +00:00			`/*!`
			`* lunr.tokenizer`
			`* Copyright (C) @YEAR Oliver Nightingale`
			`*/`

			`/**`
			`* A function for splitting a string into tokens ready to be inserted into`
			* the search index. Uses `lunr.tokenizer.separator` to split strings, change
			`* the value of this property to change how strings are split into tokens.`
			`*`
			* This tokenizer will convert its parameter to a string by calling `toString` and
			* then will split this string on the character in `lunr.tokenizer.separator`.
			`* Arrays will have their elements converted to strings and wrapped in a lunr.Token.`
			`*`
			`* Optional metadata can be passed to the tokenizer, this metadata will be cloned and`
			`* added as metadata to every token that is created from the object to be tokenized.`
			`*`
			`* @static`
			`* @param {?(string\|object\|object[])} obj - The object to convert into tokens`
			`* @param {?object} metadata - Optional metadata to associate with every token`
			`* @returns {lunr.Token[]}`
			`* @see {@link lunr.Pipeline}`
			`*/`
			`lunr.tokenizer = function (obj, metadata) {`
			`if (obj == null \|\| obj == undefined) {`
			`return []`
			`}`

			`if (Array.isArray(obj)) {`
			`return obj.map(function (t) {`
			`return new lunr.Token(`
			`lunr.utils.asString(t).toLowerCase(),`
			`lunr.utils.clone(metadata)`
			`)`
			`})`
			`}`

			`var str = obj.toString().toLowerCase(),`
			`len = str.length,`
			`tokens = []`

			`for (var sliceEnd = 0, sliceStart = 0; sliceEnd <= len; sliceEnd++) {`
			`var char = str.charAt(sliceEnd),`
			`sliceLength = sliceEnd - sliceStart`

			`if ((char.match(lunr.tokenizer.separator) \|\| sliceEnd == len)) {`

			`if (sliceLength > 0) {`
			`var tokenMetadata = lunr.utils.clone(metadata) \|\| {}`
			`tokenMetadata["position"] = [sliceStart, sliceLength]`
			`tokenMetadata["index"] = tokens.length`

			`tokens.push(`
			`new lunr.Token (`
			`str.slice(sliceStart, sliceEnd),`
			`tokenMetadata`
			`)`
			`)`
			`}`

			`sliceStart = sliceEnd + 1`
			`}`

			`}`

			`return tokens`
			`}`

			`/**`
			`* The separator used to split a string into tokens. Override this property to change the behaviour of`
			* `lunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens.
			`*`
			`* @static`
			`* @see lunr.tokenizer`
			`*/`
			`lunr.tokenizer.separator = /[\s\-]+/`