10
10
//! assert_eq!(stream.next().unwrap().text, "crafty");
11
11
//! assert!(stream.next().is_none());
12
12
//! ```
13
+ #[ cfg( feature = "stopwords" ) ]
14
+ #[ rustfmt:: skip]
15
+ mod stopwords;
16
+
13
17
use std:: sync:: Arc ;
14
18
15
19
use rustc_hash:: FxHashSet ;
@@ -31,14 +35,87 @@ impl StopWordFilter {
31
35
}
32
36
}
33
37
34
- fn english ( ) -> StopWordFilter {
35
- let words: [ & ' static str ; 33 ] = [
38
+ fn from_word_list ( words : & [ & str ] ) -> Self {
39
+ Self :: remove ( words. iter ( ) . map ( |& word| word. to_owned ( ) ) )
40
+ }
41
+
42
+ #[ cfg( feature = "stopwords" ) ]
43
+ /// Create a `StopWorldFilter` for the Danish language
44
+ pub fn danish ( ) -> Self {
45
+ Self :: from_word_list ( stopwords:: DANISH )
46
+ }
47
+
48
+ #[ cfg( feature = "stopwords" ) ]
49
+ /// Create a `StopWorldFilter` for the Dutch language
50
+ pub fn dutch ( ) -> Self {
51
+ Self :: from_word_list ( stopwords:: DUTCH )
52
+ }
53
+
54
+ /// Create a `StopWorldFilter` for the English language
55
+ pub fn english ( ) -> Self {
56
+ // This is the same list of words used by the Apache-licensed Lucene project,
57
+ // c.f. https://github.com/apache/lucene/blob/d5d6dc079395c47cd6d12dcce3bcfdd2c7d9dc63/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java#L46
58
+ const WORDS : & [ & str ] = & [
36
59
"a" , "an" , "and" , "are" , "as" , "at" , "be" , "but" , "by" , "for" , "if" , "in" , "into" ,
37
60
"is" , "it" , "no" , "not" , "of" , "on" , "or" , "such" , "that" , "the" , "their" , "then" ,
38
61
"there" , "these" , "they" , "this" , "to" , "was" , "will" , "with" ,
39
62
] ;
40
63
41
- StopWordFilter :: remove ( words. iter ( ) . map ( |& s| s. to_string ( ) ) )
64
+ Self :: from_word_list ( WORDS )
65
+ }
66
+
67
+ #[ cfg( feature = "stopwords" ) ]
68
+ /// Create a `StopWorldFilter` for the Finnish language
69
+ pub fn finnish ( ) -> Self {
70
+ Self :: from_word_list ( stopwords:: FINNISH )
71
+ }
72
+
73
+ #[ cfg( feature = "stopwords" ) ]
74
+ /// Create a `StopWorldFilter` for the French language
75
+ pub fn french ( ) -> Self {
76
+ Self :: from_word_list ( stopwords:: FRENCH )
77
+ }
78
+
79
+ #[ cfg( feature = "stopwords" ) ]
80
+ /// Create a `StopWorldFilter` for the German language
81
+ pub fn german ( ) -> Self {
82
+ Self :: from_word_list ( stopwords:: GERMAN )
83
+ }
84
+
85
+ #[ cfg( feature = "stopwords" ) ]
86
+ /// Create a `StopWorldFilter` for the Italian language
87
+ pub fn italian ( ) -> Self {
88
+ Self :: from_word_list ( stopwords:: ITALIAN )
89
+ }
90
+
91
+ #[ cfg( feature = "stopwords" ) ]
92
+ /// Create a `StopWorldFilter` for the Norwegian language
93
+ pub fn norwegian ( ) -> Self {
94
+ Self :: from_word_list ( stopwords:: NORWEGIAN )
95
+ }
96
+
97
+ #[ cfg( feature = "stopwords" ) ]
98
+ /// Create a `StopWorldFilter` for the Portuguese language
99
+ pub fn portuguese ( ) -> Self {
100
+ Self :: from_word_list ( stopwords:: PORTUGUESE )
101
+ }
102
+
103
+ #[ cfg( feature = "stopwords" ) ]
104
+ /// Create a `StopWorldFilter` for the Russian language
105
+ pub fn russian ( ) -> Self {
106
+ Self :: from_word_list ( stopwords:: RUSSIAN )
107
+ }
108
+
109
+ #[ cfg( feature = "stopwords" ) ]
110
+ /// Create a `StopWorldFilter` for the Spanish language
111
+ pub fn spanish ( ) -> Self {
112
+ Self :: from_word_list ( stopwords:: SPANISH )
113
+ }
114
+
115
+ #[ cfg( feature = "stopwords" ) ]
116
+ /// Create a `StopWorldFilter` for the Swedish language
117
+ pub fn swedish ( ) -> Self {
118
+ Self :: from_word_list ( stopwords:: SWEDISH )
42
119
}
43
120
}
44
121
0 commit comments