306 lines
12 KiB
HTML
306 lines
12 KiB
HTML
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<title>lunr.js - A bit like Solr, but much smaller and not as bright</title>
|
|
<link rel="stylesheet" href="/styles.css" type="text/css">
|
|
<script type="text/javascript" src="/lunr.min.js">
|
|
</script>
|
|
<script type="text/javascript">
|
|
|
|
var _gaq = _gaq || [];
|
|
_gaq.push(['_setAccount', 'UA-25695442-4']);
|
|
_gaq.push(['_trackPageview']);
|
|
|
|
(function() {
|
|
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
|
|
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
|
|
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
|
|
})();
|
|
|
|
</script>
|
|
</head>
|
|
<body>
|
|
<div class='wrap'>
|
|
<header>
|
|
<h1>lunr<span>.js</span></h1>
|
|
<h2>Simple full-text search in your browser</h2>
|
|
</header>
|
|
|
|
<nav>
|
|
<ul>
|
|
<li><a href="/docs">Docs</a></li>
|
|
<li><a href="/example">Examples</a></li>
|
|
<li><a href="http://github.com/olivernn/lunr.js">Code</a></li>
|
|
</ul>
|
|
</nav>
|
|
|
|
<section class="columns">
|
|
<article>
|
|
<header>
|
|
<h3>Get Started</h3>
|
|
</header>
|
|
|
|
<p>Open your browser's <a id="developer-tools" target="_blank">developer tools</a> on this page to follow along.</p>
|
|
|
|
<p>Set up an index for your notes:</p>
|
|
<pre>
|
|
var index = lunr(function () {
|
|
this.field('title', {boost: 10})
|
|
this.field('body')
|
|
this.ref('id')
|
|
})</pre>
|
|
|
|
<p>Add documents to your index</p>
|
|
<pre>
|
|
index.add({
|
|
id: 1,
|
|
title: 'Foo',
|
|
body: 'Foo foo foo!'
|
|
})
|
|
|
|
index.add({
|
|
id: 2,
|
|
title: 'Bar',
|
|
body: 'Bar bar bar!'
|
|
})</pre>
|
|
|
|
<p>Search your documents</p>
|
|
<pre>
|
|
index.search('foo')</pre>
|
|
</article>
|
|
|
|
<article>
|
|
<header>
|
|
<h3>About</h3>
|
|
</header>
|
|
|
|
<p>lunr.js is a simple full text search engine for your client side applications. It is designed to be small, yet full featured, enabling you to provide a great search experience without the need for external, server side, search services.</p>
|
|
|
|
<p>lunr.js has no external dependencies, although it does require a modern browser with ES5 support. In older browsers you can use an ES5 shim, such as <a href="https://olivernn.github.io/augment.js/">augment.js</a>, to provide any missing JavaScript functionality.</p>
|
|
</article>
|
|
|
|
<article class="download">
|
|
<header>
|
|
<h3>Download</h3>
|
|
</header>
|
|
|
|
<ul>
|
|
<li><a href="https://raw.github.com/olivernn/lunr.js/master/lunr.js">lunr.js</a> - uncompressed</li>
|
|
<li><a href="https://raw.github.com/olivernn/lunr.js/master/lunr.min.js">lunr.min.js</a> - minified</li>
|
|
</ul>
|
|
</article>
|
|
</section>
|
|
|
|
<section>
|
|
<article>
|
|
<header>
|
|
<h3>Pipeline</h3>
|
|
</header>
|
|
|
|
<p>Every document and search query that enters lunr is passed through a text <a href="/docs#Pipeline">processing pipeline</a>. The pipeline is simply a stack of functions that perform some processing on the text. Pipeline functions act on the text one token at a time, and what they return is passed to the next function in the pipeline.</p>
|
|
|
|
<p>By default lunr adds a <a href="/docs#stopWordFilter">stop word filter</a> and <a href="/docs#stemmer">stemmer</a> to the pipeline. You can also add your own processors or remove the default ones depending on your requirements. The stemmer currently used is an English language stemmer, which could be replaced with a non-English language stemmer if required, or a <a href="http://en.wikipedia.org/wiki/Metaphone">Metaphoning</a> processor could be added.</p>
|
|
|
|
|
|
<pre>
|
|
var index = lunr(function () {
|
|
this.pipeline.add(function (token, tokenIndex, tokens) {
|
|
// text processing in here
|
|
})
|
|
|
|
this.pipeline.after(lunr.stopWordFilter, function (token, tokenIndex, tokens) {
|
|
// text processing in here
|
|
})
|
|
})
|
|
</pre>
|
|
|
|
<p>Functions in the pipeline are called with three arguments: the current token being processed; the index of that token in the array of tokens, and the whole list of tokens part of the document being processed. This enables simple unigram processing of tokens as well as more sophisticated n-gram processing.</p>
|
|
|
|
<p>The function should return the processed version of the text, which will in turn be passed to the next function in the pipeline. Returning <code>undefined</code> will prevent any further processing of the token, and that token will not make it to the index.</p>
|
|
</article>
|
|
</section>
|
|
|
|
<section class="columns">
|
|
<article>
|
|
<header>
|
|
<h3>Tokenization</h3>
|
|
</header>
|
|
|
|
<p>Tokenization is how lunr converts documents and searches into individual tokens, ready to be run through the text processing pipeline and entered or looked up in the index.</p>
|
|
|
|
<p>The default tokenizer included with lunr is designed to handle general english text well, although application, or language specific tokenizers can be used instead.</p>
|
|
</article>
|
|
|
|
<article>
|
|
<header>
|
|
<h3>Stemming</h3>
|
|
</header>
|
|
|
|
<p>Stemming increases the recall of the search index by reducing related words down to their stem, so that non-exact search terms still match relevant documents. For example 'search', 'searching' and 'searched' all get reduced to the stem 'search'.</p>
|
|
|
|
<p>lunr automatically includes a stemmer based on <a href="http://tartarus.org/martin/PorterStemmer/">Martin Porter's</a> algorithms.</p>
|
|
</article>
|
|
|
|
<article>
|
|
<header>
|
|
<h3>Stop words</h3>
|
|
</header>
|
|
|
|
<p>Stop words are words that are very common and are not useful in differentiating between documents. These are automatically removed by lunr. This helps to reduce the size of the index and improve search speed and accuracy.</p>
|
|
|
|
<p>The default stop word filter contains a large list of very common words in English. For best results a corpus specific stop word filter can also be added to the pipeline. The search algorithm already penalises more common words, but preventing them from entering the index at all can be very beneficial for both space and speed performance.</p>
|
|
</article>
|
|
|
|
</section>
|
|
<footer>
|
|
<ul>
|
|
<li>Code by <a href="http://twitter.com/olivernn">Oliver Nightingale</a></li>
|
|
<li><a href="http://github.com/olivernn/lunr.js">Code</a></li>
|
|
<li><a href="/docs">Documentation</a></li>
|
|
<li><a href="http://github.com/olivernn/lunr.js/issues">Issues</a></li>
|
|
</ul>
|
|
</footer>
|
|
</div>
|
|
|
|
<script>
|
|
(function (ua) {
|
|
// some _really_ basic browser sniffing to choose the right dev tools link.
|
|
|
|
var consoleSrc,
|
|
CHROME = /Chrome/,
|
|
CHROMIUM = /Chromium/,
|
|
FIREFOX = /Firefox/,
|
|
MSIE = /MSIE/
|
|
OPERA = /(OPR|Opera)/,
|
|
SAFARI = /Safari/,
|
|
SEAMONKEY = /Seamonkey/
|
|
|
|
switch (true) {
|
|
case FIREFOX.test(ua) && !SEAMONKEY.test(ua):
|
|
// firefox
|
|
consoleSrc = "https://developer.mozilla.org/en-US/docs/Tools/Web_Console"
|
|
break
|
|
|
|
case CHROME.test(ua):
|
|
// chrome
|
|
consoleSrc = "https://developer.chrome.com/devtools"
|
|
break
|
|
|
|
case SAFARI.test(ua) && !CHROME.test(ua) && !CHROMIUM.test(ua):
|
|
// safari
|
|
consoleSrc = "https://developer.apple.com/library/safari/documentation/AppleApplications/Conceptual/Safari_Developer_Guide/GettingStarted/GettingStarted.html#//apple_ref/doc/uid/TP40007874-CH2-SW1"
|
|
break
|
|
|
|
case OPERA.test(ua):
|
|
// opera
|
|
consoleSrc = "http://www.opera.com/dragonfly/"
|
|
break
|
|
|
|
case MSIE.test(ua):
|
|
// IE
|
|
consoleSrc = "http://msdn.microsoft.com/en-us/library/ie/hh673541(v=vs.85).aspx"
|
|
break
|
|
}
|
|
|
|
if (consoleSrc) document.getElementById('developer-tools').href = consoleSrc
|
|
})(navigator.userAgent)
|
|
</script>
|
|
|
|
<script>
|
|
(function (hijs) {
|
|
//
|
|
// hijs - JavaScript Syntax Highlighter
|
|
//
|
|
// Copyright (c) 2010 Alexis Sellier
|
|
//
|
|
|
|
// All elements which match this will be syntax highlighted.
|
|
var selector = hijs || 'pre';
|
|
|
|
var keywords = ('var function if else for while break switch case do new null in with void '
|
|
+'continue delete return this true false throw catch typeof with instanceof').split(' '),
|
|
special = ('eval window document undefined NaN Infinity parseInt parseFloat '
|
|
+'encodeURI decodeURI encodeURIComponent decodeURIComponent').split(' ');
|
|
|
|
// Syntax definition
|
|
// The key becomes the class name of the <span>
|
|
// around the matched block of code.
|
|
var syntax = [
|
|
['comment', /(\/\*(?:[^*\n]|\*+[^\/*])*\*+\/)/g],
|
|
['comment', /(\/\/[^\n]*)/g],
|
|
['string' , /("(?:(?!")[^\\\n]|\\.)*"|'(?:(?!')[^\\\n]|\\.)*')/g],
|
|
['regexp' , /(\/.+\/[mgi]*)(?!\s*\w)/g],
|
|
['class' , /\b([A-Z][a-zA-Z]+)\b/g],
|
|
['number' , /\b([0-9]+(?:\.[0-9]+)?)\b/g],
|
|
['keyword', new(RegExp)('\\b(' + keywords.join('|') + ')\\b', 'g')],
|
|
['special', new(RegExp)('\\b(' + special.join('|') + ')\\b', 'g')]
|
|
];
|
|
var nodes, table = {};
|
|
|
|
if (/^[a-z]+$/.test(selector)) {
|
|
nodes = document.getElementsByTagName(selector);
|
|
} else if (/^\.[\w-]+$/.test(selector)) {
|
|
nodes = document.getElementsByClassName(selector.slice(1));
|
|
} else if (document.querySelectorAll) {
|
|
nodes = document.querySelectorAll(selector);
|
|
} else {
|
|
nodes = [];
|
|
}
|
|
|
|
for (var i = 0, children; i < nodes.length; i++) {
|
|
children = nodes[i].childNodes;
|
|
|
|
for (var j = 0, str; j < children.length; j++) {
|
|
code = children[j];
|
|
|
|
if (code.length >= 0) { // It's a text node
|
|
// Don't highlight command-line snippets
|
|
if (! /^\$/.test(code.nodeValue.trim())) {
|
|
syntax.forEach(function (s) {
|
|
var k = s[0], v = s[1];
|
|
code.nodeValue = code.nodeValue.replace(v, function (_, m) {
|
|
return '\u00ab' + encode(k) + '\u00b7'
|
|
+ encode(m) +
|
|
'\u00b7' + encode(k) + '\u00bb';
|
|
});
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
for (var i = 0; i < nodes.length; i++) {
|
|
nodes[i].innerHTML =
|
|
nodes[i].innerHTML.replace(/\u00ab(.+?)\u00b7(.+?)\u00b7\1\u00bb/g, function (_, name, value) {
|
|
value = value.replace(/\u00ab[^\u00b7]+\u00b7/g, '').replace(/\u00b7[^\u00bb]+\u00bb/g, '');
|
|
return '<span class="' + decode(name) + '">' + escape(decode(value)) + '</span>';
|
|
});
|
|
}
|
|
|
|
function escape(str) {
|
|
return str.replace(/</g, '<').replace(/>/g, '>');
|
|
}
|
|
|
|
// Encode ASCII characters to, and from Braille
|
|
function encode (str, encoded) {
|
|
table[encoded = str.split('').map(function (s) {
|
|
if (s.charCodeAt(0) > 127) { return s }
|
|
return String.fromCharCode(s.charCodeAt(0) + 0x2800);
|
|
}).join('')] = str;
|
|
return encoded;
|
|
}
|
|
function decode (str) {
|
|
if (str in table) {
|
|
return table[str];
|
|
} else {
|
|
return str.trim().split('').map(function (s) {
|
|
if (s.charCodeAt(0) - 0x2800 > 127) { return s }
|
|
return String.fromCharCode(s.charCodeAt(0) - 0x2800);
|
|
}).join('');
|
|
}
|
|
}
|
|
|
|
})(window.hijs);
|
|
</script>
|
|
</body>
|
|
</html>
|