@@ -5,13 +5,14 @@ use combine::parser::range::{take_while, take_while1};
5
5
use combine:: parser:: repeat:: escaped;
6
6
use combine:: parser:: Parser ;
7
7
use combine:: {
8
- attempt, between, choice, eof, many, many1, one_of, optional, parser, satisfy, sep_by,
8
+ any , attempt, between, choice, eof, many, many1, one_of, optional, parser, satisfy, sep_by,
9
9
skip_many1, value,
10
10
} ;
11
11
use once_cell:: sync:: Lazy ;
12
12
use regex:: Regex ;
13
13
14
14
use super :: user_input_ast:: { UserInputAst , UserInputBound , UserInputLeaf , UserInputLiteral } ;
15
+ use crate :: user_input_ast:: Delimiter ;
15
16
use crate :: Occur ;
16
17
17
18
// Note: '-' char is only forbidden at the beginning of a field name, would be clearer to add it to
@@ -133,16 +134,41 @@ fn date_time<'a>() -> impl Parser<&'a str, Output = String> {
133
134
recognize ( ( date, char ( 'T' ) , time) )
134
135
}
135
136
136
- fn term_val < ' a > ( ) -> impl Parser < & ' a str , Output = String > {
137
- let phrase = char ( '"' ) . with ( many1 ( satisfy ( |c| c != '"' ) ) ) . skip ( char ( '"' ) ) ;
138
- negative_number ( ) . or ( phrase. or ( word ( ) ) )
137
+ fn escaped_character < ' a > ( ) -> impl Parser < & ' a str , Output = char > {
138
+ ( char ( '\\' ) , any ( ) ) . map ( |( _, x) | x)
139
+ }
140
+
141
+ fn escaped_string < ' a > ( delimiter : char ) -> impl Parser < & ' a str , Output = String > {
142
+ (
143
+ char ( delimiter) ,
144
+ many ( choice ( (
145
+ escaped_character ( ) ,
146
+ satisfy ( move |c : char | c != delimiter) ,
147
+ ) ) ) ,
148
+ char ( delimiter) ,
149
+ )
150
+ . map ( |( _, s, _) | s)
151
+ }
152
+
153
+ fn term_val < ' a > ( ) -> impl Parser < & ' a str , Output = ( Delimiter , String ) > {
154
+ let double_quotes = escaped_string ( '"' ) . map ( |phrase| ( Delimiter :: DoubleQuotes , phrase) ) ;
155
+ let single_quotes = escaped_string ( '\'' ) . map ( |phrase| ( Delimiter :: SingleQuotes , phrase) ) ;
156
+ let text_no_delimiter = word ( ) . map ( |text| ( Delimiter :: None , text) ) ;
157
+ negative_number ( )
158
+ . map ( |negative_number_str| ( Delimiter :: None , negative_number_str) )
159
+ . or ( double_quotes)
160
+ . or ( single_quotes)
161
+ . or ( text_no_delimiter)
139
162
}
140
163
141
164
fn term_query < ' a > ( ) -> impl Parser < & ' a str , Output = UserInputLiteral > {
142
- ( field_name ( ) , term_val ( ) , slop_val ( ) ) . map ( |( field_name, phrase, slop) | UserInputLiteral {
143
- field_name : Some ( field_name) ,
144
- phrase,
145
- slop,
165
+ ( field_name ( ) , term_val ( ) , slop_val ( ) ) . map ( |( field_name, ( delimiter, phrase) , slop) | {
166
+ UserInputLiteral {
167
+ field_name : Some ( field_name) ,
168
+ phrase,
169
+ delimiter,
170
+ slop,
171
+ }
146
172
} )
147
173
}
148
174
@@ -159,11 +185,13 @@ fn slop_val<'a>() -> impl Parser<&'a str, Output = u32> {
159
185
}
160
186
161
187
fn literal < ' a > ( ) -> impl Parser < & ' a str , Output = UserInputLeaf > {
162
- let term_default_field = ( term_val ( ) , slop_val ( ) ) . map ( |( phrase, slop) | UserInputLiteral {
163
- field_name : None ,
164
- phrase,
165
- slop,
166
- } ) ;
188
+ let term_default_field =
189
+ ( term_val ( ) , slop_val ( ) ) . map ( |( ( delimiter, phrase) , slop) | UserInputLiteral {
190
+ field_name : None ,
191
+ phrase,
192
+ delimiter,
193
+ slop,
194
+ } ) ;
167
195
168
196
attempt ( term_query ( ) )
169
197
. or ( term_default_field)
@@ -268,7 +296,11 @@ fn range<'a>() -> impl Parser<&'a str, Output = UserInputLeaf> {
268
296
/// Function that parses a set out of a Stream
269
297
/// Supports ranges like: `IN [val1 val2 val3]`
270
298
fn set < ' a > ( ) -> impl Parser < & ' a str , Output = UserInputLeaf > {
271
- let term_list = between ( char ( '[' ) , char ( ']' ) , sep_by ( term_val ( ) , spaces ( ) ) ) ;
299
+ let term_list = between (
300
+ char ( '[' ) ,
301
+ char ( ']' ) ,
302
+ sep_by ( term_val ( ) . map ( |( _delimiter, text) | text) , spaces ( ) ) ,
303
+ ) ;
272
304
273
305
let set_content = ( ( string ( "IN" ) , spaces ( ) ) , term_list) . map ( |( _, elements) | elements) ;
274
306
@@ -486,6 +518,7 @@ mod test {
486
518
assert_eq ! ( remaining, "" ) ;
487
519
}
488
520
521
+ #[ track_caller]
489
522
fn test_parse_query_to_ast_helper ( query : & str , expected : & str ) {
490
523
let query = parse_to_ast ( ) . parse ( query) . unwrap ( ) . 0 ;
491
524
let query_str = format ! ( "{query:?}" ) ;
@@ -504,8 +537,9 @@ mod test {
504
537
#[ test]
505
538
fn test_parse_query_to_ast_hyphen ( ) {
506
539
test_parse_query_to_ast_helper ( "\" www-form-encoded\" " , "\" www-form-encoded\" " ) ;
507
- test_parse_query_to_ast_helper ( "www-form-encoded" , "\" www-form-encoded\" " ) ;
508
- test_parse_query_to_ast_helper ( "www-form-encoded" , "\" www-form-encoded\" " ) ;
540
+ test_parse_query_to_ast_helper ( "'www-form-encoded'" , "'www-form-encoded'" ) ;
541
+ test_parse_query_to_ast_helper ( "www-form-encoded" , "www-form-encoded" ) ;
542
+ test_parse_query_to_ast_helper ( "www-form-encoded" , "www-form-encoded" ) ;
509
543
}
510
544
511
545
#[ test]
@@ -514,25 +548,25 @@ mod test {
514
548
format!( "{:?}" , parse_to_ast( ) . parse( "NOT" ) ) ,
515
549
"Err(UnexpectedParse)"
516
550
) ;
517
- test_parse_query_to_ast_helper ( "NOTa" , "\" NOTa\" " ) ;
518
- test_parse_query_to_ast_helper ( "NOT a" , "(-\" a \" )" ) ;
551
+ test_parse_query_to_ast_helper ( "NOTa" , "NOTa" ) ;
552
+ test_parse_query_to_ast_helper ( "NOT a" , "(-a )" ) ;
519
553
}
520
554
521
555
#[ test]
522
556
fn test_boosting ( ) {
523
557
assert ! ( parse_to_ast( ) . parse( "a^2^3" ) . is_err( ) ) ;
524
558
assert ! ( parse_to_ast( ) . parse( "a^2^" ) . is_err( ) ) ;
525
- test_parse_query_to_ast_helper ( "a^3" , "(\" a \" )^3" ) ;
526
- test_parse_query_to_ast_helper ( "a^3 b^2" , "(*(\" a \" )^3 *(\" b \" )^2)" ) ;
527
- test_parse_query_to_ast_helper ( "a^1" , "\" a \" " ) ;
559
+ test_parse_query_to_ast_helper ( "a^3" , "(a )^3" ) ;
560
+ test_parse_query_to_ast_helper ( "a^3 b^2" , "(*(a )^3 *(b )^2)" ) ;
561
+ test_parse_query_to_ast_helper ( "a^1" , "a " ) ;
528
562
}
529
563
530
564
#[ test]
531
565
fn test_parse_query_to_ast_binary_op ( ) {
532
- test_parse_query_to_ast_helper ( "a AND b" , "(+\" a \" + \" b \" )" ) ;
533
- test_parse_query_to_ast_helper ( "a OR b" , "(?\" a \" ? \" b \" )" ) ;
534
- test_parse_query_to_ast_helper ( "a OR b AND c" , "(?\" a \" ?(+\" b \" + \" c \" ))" ) ;
535
- test_parse_query_to_ast_helper ( "a AND b AND c" , "(+\" a \" + \" b \" + \" c \" )" ) ;
566
+ test_parse_query_to_ast_helper ( "a AND b" , "(+a +b )" ) ;
567
+ test_parse_query_to_ast_helper ( "a OR b" , "(?a ?b )" ) ;
568
+ test_parse_query_to_ast_helper ( "a OR b AND c" , "(?a ?(+b +c ))" ) ;
569
+ test_parse_query_to_ast_helper ( "a AND b AND c" , "(+a +b +c )" ) ;
536
570
assert_eq ! (
537
571
format!( "{:?}" , parse_to_ast( ) . parse( "a OR b aaa" ) ) ,
538
572
"Err(UnexpectedParse)"
@@ -574,7 +608,7 @@ mod test {
574
608
fn test_occur_leaf ( ) {
575
609
let ( ( occur, ast) , _) = super :: occur_leaf ( ) . parse ( "+abc" ) . unwrap ( ) ;
576
610
assert_eq ! ( occur, Some ( Occur :: Must ) ) ;
577
- assert_eq ! ( format!( "{ast:?}" ) , "\" abc\" " ) ;
611
+ assert_eq ! ( format!( "{ast:?}" ) , "abc" ) ;
578
612
}
579
613
580
614
#[ test]
@@ -728,56 +762,62 @@ mod test {
728
762
729
763
#[ test]
730
764
fn test_parse_query_to_triming_spaces ( ) {
731
- test_parse_query_to_ast_helper ( " abc" , "\" abc\" " ) ;
732
- test_parse_query_to_ast_helper ( "abc " , "\" abc\" " ) ;
733
- test_parse_query_to_ast_helper ( "( a OR abc)" , "(?\" a \" ? \" abc\" )" ) ;
734
- test_parse_query_to_ast_helper ( "(a OR abc)" , "(?\" a \" ? \" abc\" )" ) ;
735
- test_parse_query_to_ast_helper ( "(a OR abc)" , "(?\" a \" ? \" abc\" )" ) ;
736
- test_parse_query_to_ast_helper ( "a OR abc " , "(?\" a \" ? \" abc\" )" ) ;
737
- test_parse_query_to_ast_helper ( "(a OR abc )" , "(?\" a \" ? \" abc\" )" ) ;
738
- test_parse_query_to_ast_helper ( "(a OR abc) " , "(?\" a \" ? \" abc\" )" ) ;
765
+ test_parse_query_to_ast_helper ( " abc" , "abc" ) ;
766
+ test_parse_query_to_ast_helper ( "abc " , "abc" ) ;
767
+ test_parse_query_to_ast_helper ( "( a OR abc)" , "(?a ? abc)" ) ;
768
+ test_parse_query_to_ast_helper ( "(a OR abc)" , "(?a ? abc)" ) ;
769
+ test_parse_query_to_ast_helper ( "(a OR abc)" , "(?a ? abc)" ) ;
770
+ test_parse_query_to_ast_helper ( "a OR abc " , "(?a ? abc)" ) ;
771
+ test_parse_query_to_ast_helper ( "(a OR abc )" , "(?a ? abc)" ) ;
772
+ test_parse_query_to_ast_helper ( "(a OR abc) " , "(?a ? abc)" ) ;
739
773
}
740
774
741
775
#[ test]
742
776
fn test_parse_query_single_term ( ) {
743
- test_parse_query_to_ast_helper ( "abc" , "\" abc\" " ) ;
777
+ test_parse_query_to_ast_helper ( "abc" , "abc" ) ;
744
778
}
745
779
746
780
#[ test]
747
781
fn test_parse_query_default_clause ( ) {
748
- test_parse_query_to_ast_helper ( "a b" , "(*\" a \" * \" b \" )" ) ;
782
+ test_parse_query_to_ast_helper ( "a b" , "(*a *b )" ) ;
749
783
}
750
784
751
785
#[ test]
752
786
fn test_parse_query_must_default_clause ( ) {
753
- test_parse_query_to_ast_helper ( "+(a b)" , "(*\" a \" * \" b \" )" ) ;
787
+ test_parse_query_to_ast_helper ( "+(a b)" , "(*a *b )" ) ;
754
788
}
755
789
756
790
#[ test]
757
791
fn test_parse_query_must_single_term ( ) {
758
- test_parse_query_to_ast_helper ( "+d" , "\" d \" " ) ;
792
+ test_parse_query_to_ast_helper ( "+d" , "d " ) ;
759
793
}
760
794
761
795
#[ test]
762
796
fn test_single_term_with_field ( ) {
763
- test_parse_query_to_ast_helper ( "abc:toto" , "\" abc\" :\" toto\" " ) ;
797
+ test_parse_query_to_ast_helper ( "abc:toto" , "\" abc\" :toto" ) ;
798
+ }
799
+
800
+ #[ test]
801
+ fn test_phrase_with_field ( ) {
802
+ test_parse_query_to_ast_helper ( "abc:\" happy tax payer\" " , "\" abc\" :\" happy tax payer\" " ) ;
803
+ test_parse_query_to_ast_helper ( "abc:'happy tax payer'" , "\" abc\" :'happy tax payer'" ) ;
764
804
}
765
805
766
806
#[ test]
767
807
fn test_single_term_with_float ( ) {
768
- test_parse_query_to_ast_helper ( "abc:1.1" , "\" abc\" :\" 1.1\" " ) ;
769
- test_parse_query_to_ast_helper ( "a.b.c:1.1" , "\" a.b.c\" :\" 1.1\" " ) ;
770
- test_parse_query_to_ast_helper ( "a\\ b\\ c:1.1" , "\" a b c\" :\" 1.1\" " ) ;
808
+ test_parse_query_to_ast_helper ( "abc:1.1" , "\" abc\" :1.1" ) ;
809
+ test_parse_query_to_ast_helper ( "a.b.c:1.1" , "\" a.b.c\" :1.1" ) ;
810
+ test_parse_query_to_ast_helper ( "a\\ b\\ c:1.1" , "\" a b c\" :1.1" ) ;
771
811
}
772
812
773
813
#[ test]
774
814
fn test_must_clause ( ) {
775
- test_parse_query_to_ast_helper ( "(+a +b)" , "(+\" a \" + \" b \" )" ) ;
815
+ test_parse_query_to_ast_helper ( "(+a +b)" , "(+a +b )" ) ;
776
816
}
777
817
778
818
#[ test]
779
819
fn test_parse_test_query_plus_a_b_plus_d ( ) {
780
- test_parse_query_to_ast_helper ( "+(a b) +d" , "(+(*\" a \" * \" b \" ) +\" d \" )" ) ;
820
+ test_parse_query_to_ast_helper ( "+(a b) +d" , "(+(*a *b ) +d )" ) ;
781
821
}
782
822
783
823
#[ test]
@@ -790,13 +830,13 @@ mod test {
790
830
791
831
#[ test]
792
832
fn test_parse_test_query_other ( ) {
793
- test_parse_query_to_ast_helper ( "(+a +b) d" , "(*(+\" a \" + \" b \" ) *\" d \" )" ) ;
794
- test_parse_query_to_ast_helper ( "+abc:toto" , "\" abc\" :\" toto\" " ) ;
795
- test_parse_query_to_ast_helper ( "+a\\ +b\\ +c:toto" , "\" a+b+c\" :\" toto\" " ) ;
796
- test_parse_query_to_ast_helper ( "(+abc:toto -titi)" , "(+\" abc\" :\" toto\" - \" titi\" )" ) ;
797
- test_parse_query_to_ast_helper ( "-abc:toto" , "(-\" abc\" :\" toto\" )" ) ;
833
+ test_parse_query_to_ast_helper ( "(+a +b) d" , "(*(+a +b ) *d )" ) ;
834
+ test_parse_query_to_ast_helper ( "+abc:toto" , "\" abc\" :toto" ) ;
835
+ test_parse_query_to_ast_helper ( "+a\\ +b\\ +c:toto" , "\" a+b+c\" :toto" ) ;
836
+ test_parse_query_to_ast_helper ( "(+abc:toto -titi)" , "(+\" abc\" :toto - titi)" ) ;
837
+ test_parse_query_to_ast_helper ( "-abc:toto" , "(-\" abc\" :toto)" ) ;
798
838
test_is_parse_err ( "--abc:toto" ) ;
799
- test_parse_query_to_ast_helper ( "abc:a b" , "(*\" abc\" :\" a \" * \" b \" )" ) ;
839
+ test_parse_query_to_ast_helper ( "abc:a b" , "(*\" abc\" :a *b )" ) ;
800
840
test_parse_query_to_ast_helper ( "abc:\" a b\" " , "\" abc\" :\" a b\" " ) ;
801
841
test_parse_query_to_ast_helper ( "foo:[1 TO 5]" , "\" foo\" :[\" 1\" TO \" 5\" ]" ) ;
802
842
}
@@ -821,11 +861,10 @@ mod test {
821
861
assert ! ( parse_to_ast( ) . parse( "foo:\" a b\" ~" ) . is_err( ) ) ;
822
862
assert ! ( parse_to_ast( ) . parse( "\" a b\" ~a" ) . is_err( ) ) ;
823
863
assert ! ( parse_to_ast( ) . parse( "\" a b\" ~100000000000000000" ) . is_err( ) ) ;
824
-
825
- test_parse_query_to_ast_helper ( "\" a b\" ^2~4" , "(*(\" a b\" )^2 *\" ~4\" )" ) ;
864
+ test_parse_query_to_ast_helper ( "\" a b\" ^2~4" , "(*(\" a b\" )^2 *~4)" ) ;
826
865
test_parse_query_to_ast_helper ( "\" ~Document\" " , "\" ~Document\" " ) ;
827
- test_parse_query_to_ast_helper ( "~Document" , "\" ~Document\" " ) ;
828
- test_parse_query_to_ast_helper ( "a~2" , "\" a~2\" " ) ;
866
+ test_parse_query_to_ast_helper ( "~Document" , "~Document" ) ;
867
+ test_parse_query_to_ast_helper ( "a~2" , "a~2" ) ;
829
868
test_parse_query_to_ast_helper ( "\" a b\" ~0" , "\" a b\" " ) ;
830
869
test_parse_query_to_ast_helper ( "\" a b\" ~1" , "\" a b\" ~1" ) ;
831
870
test_parse_query_to_ast_helper ( "\" a b\" ~3" , "\" a b\" ~3" ) ;
@@ -835,7 +874,19 @@ mod test {
835
874
836
875
#[ test]
837
876
fn test_not_queries_are_consistent ( ) {
838
- test_parse_query_to_ast_helper ( "tata -toto" , "(*\" tata\" -\" toto\" )" ) ;
839
- test_parse_query_to_ast_helper ( "tata NOT toto" , "(*\" tata\" -\" toto\" )" ) ;
877
+ test_parse_query_to_ast_helper ( "tata -toto" , "(*tata -toto)" ) ;
878
+ test_parse_query_to_ast_helper ( "tata NOT toto" , "(*tata -toto)" ) ;
879
+ }
880
+
881
+ #[ test]
882
+ fn test_escaping ( ) {
883
+ test_parse_query_to_ast_helper (
884
+ r#"myfield:"hello\"happy\'tax""# ,
885
+ r#""myfield":"hello"happy'tax""# ,
886
+ ) ;
887
+ test_parse_query_to_ast_helper (
888
+ r#"myfield:'hello\"happy\'tax'"# ,
889
+ r#""myfield":'hello"happy'tax'"# ,
890
+ ) ;
840
891
}
841
892
}
0 commit comments