@@ -15,7 +15,7 @@ use crate::postings::{
15
15
use crate :: schema:: { FieldEntry , FieldType , Schema , Term , Value , DATE_TIME_PRECISION_INDEXED } ;
16
16
use crate :: store:: { StoreReader , StoreWriter } ;
17
17
use crate :: tokenizer:: { FacetTokenizer , PreTokenizedStream , TextAnalyzer , Tokenizer } ;
18
- use crate :: { DocId , Document , Opstamp , SegmentComponent } ;
18
+ use crate :: { DocId , Document , Opstamp , SegmentComponent , TantivyError } ;
19
19
20
20
/// Computes the initial size of the hash table.
21
21
///
@@ -98,14 +98,18 @@ impl SegmentWriter {
98
98
}
99
99
_ => None ,
100
100
} ;
101
- text_options
102
- . and_then ( |text_index_option| {
103
- let tokenizer_name = & text_index_option. tokenizer ( ) ;
104
- tokenizer_manager. get ( tokenizer_name)
105
- } )
106
- . unwrap_or_default ( )
101
+ let tokenizer_name = text_options
102
+ . and_then ( |text_index_option| Some ( text_index_option. tokenizer ( ) ) )
103
+ . unwrap_or ( "default" ) ;
104
+
105
+ tokenizer_manager. get ( tokenizer_name) . ok_or_else ( || {
106
+ TantivyError :: SchemaError ( format ! (
107
+ "Error getting tokenizer for field: {}" ,
108
+ field_entry. name( )
109
+ ) )
110
+ } )
107
111
} )
108
- . collect ( ) ;
112
+ . collect :: < Result < Vec < _ > , _ > > ( ) ? ;
109
113
Ok ( SegmentWriter {
110
114
max_doc : 0 ,
111
115
ctx : IndexingContext :: new ( table_size) ,
@@ -438,15 +442,19 @@ fn remap_and_write(
438
442
439
443
#[ cfg( test) ]
440
444
mod tests {
441
- use std:: path:: Path ;
445
+ use std:: path:: { Path , PathBuf } ;
446
+
447
+ use tempfile:: TempDir ;
442
448
443
449
use super :: compute_initial_table_size;
444
450
use crate :: collector:: Count ;
445
451
use crate :: core:: json_utils:: JsonTermWriter ;
446
452
use crate :: directory:: RamDirectory ;
447
453
use crate :: postings:: TermInfo ;
448
454
use crate :: query:: PhraseQuery ;
449
- use crate :: schema:: { IndexRecordOption , Schema , Type , STORED , STRING , TEXT } ;
455
+ use crate :: schema:: {
456
+ IndexRecordOption , Schema , TextFieldIndexing , TextOptions , Type , STORED , STRING , TEXT ,
457
+ } ;
450
458
use crate :: store:: { Compressor , StoreReader , StoreWriter } ;
451
459
use crate :: time:: format_description:: well_known:: Rfc3339 ;
452
460
use crate :: time:: OffsetDateTime ;
@@ -900,4 +908,32 @@ mod tests {
900
908
postings. positions ( & mut positions) ;
901
909
assert_eq ! ( positions, & [ 4 ] ) ; //< as opposed to 3 if we had a position length of 1.
902
910
}
911
+
912
+ #[ test]
913
+ fn test_show_error_when_tokenizer_not_registered ( ) {
914
+ let text_field_indexing = TextFieldIndexing :: default ( )
915
+ . set_tokenizer ( "custom_en" )
916
+ . set_index_option ( IndexRecordOption :: WithFreqsAndPositions ) ;
917
+ let text_options = TextOptions :: default ( )
918
+ . set_indexing_options ( text_field_indexing)
919
+ . set_stored ( ) ;
920
+ let mut schema_builder = Schema :: builder ( ) ;
921
+ schema_builder. add_text_field ( "title" , text_options) ;
922
+ let schema = schema_builder. build ( ) ;
923
+ let tempdir = TempDir :: new ( ) . unwrap ( ) ;
924
+ let tempdir_path = PathBuf :: from ( tempdir. path ( ) ) ;
925
+ Index :: create_in_dir ( & tempdir_path, schema) . unwrap ( ) ;
926
+ let index = Index :: open_in_dir ( tempdir_path) . unwrap ( ) ;
927
+ let schema = index. schema ( ) ;
928
+ let mut index_writer = index. writer ( 50_000_000 ) . unwrap ( ) ;
929
+ let title = schema. get_field ( "title" ) . unwrap ( ) ;
930
+ let mut document = Document :: default ( ) ;
931
+ document. add_text ( title, "The Old Man and the Sea" ) ;
932
+ index_writer. add_document ( document) . unwrap ( ) ;
933
+ let error = index_writer. commit ( ) . unwrap_err ( ) ;
934
+ assert_eq ! (
935
+ error. to_string( ) ,
936
+ "Schema error: 'Error getting tokenizer for field: title'"
937
+ ) ;
938
+ }
903
939
}
0 commit comments