diff --git a/src/renderer/html_handlebars/search.rs b/src/renderer/html_handlebars/search.rs
index 0a59ffe9f9..b5f8c4a2ac 100644
--- a/src/renderer/html_handlebars/search.rs
+++ b/src/renderer/html_handlebars/search.rs
@@ -2,7 +2,7 @@ use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use std::path::Path;
-use elasticlunr::Index;
+use elasticlunr::{Index, IndexBuilder};
use pulldown_cmark::*;
use crate::book::{Book, BookItem};
@@ -13,9 +13,25 @@ use crate::utils;
use serde::Serialize;
+const MAX_WORD_LENGTH_TO_INDEX: usize = 80;
+
+/// Tokenizes in the same way as elasticlunr-rs (for English), but also drops long tokens.
+fn tokenize(text: &str) -> Vec {
+ text.split(|c: char| c.is_whitespace() || c == '-')
+ .filter(|s| !s.is_empty())
+ .map(|s| s.trim().to_lowercase())
+ .filter(|s| s.len() <= MAX_WORD_LENGTH_TO_INDEX)
+ .collect()
+}
+
/// Creates all files required for search.
pub fn create_files(search_config: &Search, destination: &Path, book: &Book) -> Result<()> {
- let mut index = Index::new(&["title", "body", "breadcrumbs"]);
+ let mut index = IndexBuilder::new()
+ .add_field_with_tokenizer("title", Box::new(&tokenize))
+ .add_field_with_tokenizer("body", Box::new(&tokenize))
+ .add_field_with_tokenizer("breadcrumbs", Box::new(&tokenize))
+ .build();
+
let mut doc_urls = Vec::with_capacity(book.sections.len());
for item in book.iter() {
diff --git a/tests/dummy_book/src/first/no-headers.md b/tests/dummy_book/src/first/no-headers.md
index 8f9a6d17ef..5d799aa684 100644
--- a/tests/dummy_book/src/first/no-headers.md
+++ b/tests/dummy_book/src/first/no-headers.md
@@ -1,3 +1,5 @@
Capybara capybara capybara.
-Capybara capybara capybara.
\ No newline at end of file
+Capybara capybara capybara.
+
+ThisLongWordIsIncludedSoWeCanCheckThatSufficientlyLongWordsAreOmittedFromTheSearchIndex.
diff --git a/tests/rendered_output.rs b/tests/rendered_output.rs
index 873a622df5..c6267830e3 100644
--- a/tests/rendered_output.rs
+++ b/tests/rendered_output.rs
@@ -772,7 +772,7 @@ mod search {
);
assert_eq!(
docs[&no_headers]["body"],
- "Capybara capybara capybara. Capybara capybara capybara."
+ "Capybara capybara capybara. Capybara capybara capybara. ThisLongWordIsIncludedSoWeCanCheckThatSufficientlyLongWordsAreOmittedFromTheSearchIndex."
);
}
diff --git a/tests/searchindex_fixture.json b/tests/searchindex_fixture.json
index 9c349b6b20..3d7062d237 100644
--- a/tests/searchindex_fixture.json
+++ b/tests/searchindex_fixture.json
@@ -229,7 +229,7 @@
"title": "Unicode stress tests"
},
"18": {
- "body": "Capybara capybara capybara. Capybara capybara capybara.",
+ "body": "Capybara capybara capybara. Capybara capybara capybara. ThisLongWordIsIncludedSoWeCanCheckThatSufficientlyLongWordsAreOmittedFromTheSearchIndex.",
"breadcrumbs": "First Chapter ยป No Headers",
"id": "18",
"title": "First Chapter"