forked from cplusplus/draft
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathregex.tex
3840 lines (3365 loc) · 127 KB
/
regex.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
%!TEX root = std.tex
\rSec0[re]{Regular expressions library}
\indextext{regular expression|(}
\rSec1[re.general]{General}
\pnum
This Clause describes components that \Cpp{} programs may use to
perform operations involving regular expression matching and
searching.
\pnum
The following subclauses describe a basic regular expression class template and its
traits that can handle char-like\iref{strings.general} template arguments,
two specializations of this class template that handle sequences of \tcode{char} and \tcode{wchar_t},
a class template that holds the
result of a regular expression match, a series of algorithms that allow a character
sequence to be operated upon by a regular expression,
and two iterator types for
enumerating regular expression matches, as summarized in \tref{re.summary}.
\begin{libsumtab}{Regular expressions library summary}{re.summary}
\ref{re.def} & Definitions & \\
\ref{re.req} & Requirements & \\ \rowsep
\ref{re.const} & Constants & \tcode{<regex>} \\
\ref{re.badexp} & Exception type & \\
\ref{re.traits} & Traits & \\
\ref{re.regex} & Regular expression template & \\
\ref{re.submatch} & Submatches & \\
\ref{re.results} & Match results & \\
\ref{re.alg} & Algorithms & \\
\ref{re.iter} & Iterators & \\ \rowsep
\ref{re.grammar} & Grammar & \\
\end{libsumtab}
\rSec1[re.def]{Definitions}
\pnum
The following definitions shall apply to this Clause:
\indextext{collating element}%
\indextext{locale}%
\definition{collating element}{defns.regex.collating.element}
a sequence of one or more characters within the
current locale that collate as if they were a single character.
\indextext{finite state machine}%
\definition{finite state machine}{defns.regex.finite.state.machine}
an unspecified data structure that is used to
represent a regular expression, and which permits efficient matches
against the regular expression to be obtained.
\indextext{format specifier}%
\definition{format specifier}{defns.regex.format.specifier}
a sequence of one or more characters that is to be
replaced with some part of a regular expression match.
\indextext{matched}%
\indextext{regular expression!matched}%
\definition{matched}{defns.regex.matched}
a sequence of zero or more characters is matched by
a regular expression when the characters in the sequence
correspond to a sequence of characters defined by the pattern.
\indextext{primary equivalence class}%
\indextext{locale}%
\definition{primary equivalence class}{defns.regex.primary.equivalence.class}
a set of one or more characters which
share the same primary sort key: that is the sort key weighting that
depends only upon character shape, and not accents, case, or
locale specific tailorings.
\definition{regular expression}{defns.regex.regular.expression}
a pattern that selects specific strings
from a set of character strings.
\indextext{sub-expression!regular expression}%
\definition{sub-expression}{defns.regex.subexpression}
a subset of a regular expression that has
been marked by parenthesis.
\rSec1[re.req]{Requirements}
\pnum
This subclause defines requirements on classes representing regular
expression traits.
\begin{note}
The class template
\tcode{regex_traits}, defined in \ref{re.traits},
meets these requirements.
\end{note}
\pnum
The class template \tcode{basic_regex}, defined in
\ref{re.regex}, needs a set of related types and
functions to complete the definition of its semantics. These types
and functions are provided as a set of member \grammarterm{typedef-name}{s} and functions
in the template parameter \tcode{traits} used by the \tcode{basic_regex} class
template. This subclause defines the semantics of these
members.
\pnum
To specialize class template \tcode{basic_regex} for a character
container \tcode{CharT} and its related regular
expression traits class \tcode{Traits}, use \tcode{basic_regex<CharT, Traits>}.
\pnum
\indextext{regular expression traits!requirements}%
\indextext{requirements!regular expression traits}%
\indextext{regular expression!requirements}%
\indextext{locale}%
In \tref{re.req} \tcode{X} denotes a traits class
defining types and functions for the character container
type \tcode{charT}; \tcode{u} is an object of
type \tcode{X}; \tcode{v} is an object of type \tcode{const
X}; \tcode{p} is a value of type \tcode{const charT*}; \tcode{I1}
and \tcode{I2} are input iterators\iref{input.iterators};
\tcode{F1} and \tcode{F2} are forward iterators\iref{forward.iterators};
\tcode{c} is a value of type \tcode{const charT};
\tcode{s} is an object of type \tcode{X::string_type};
\tcode{cs} is an object of type \tcode{const X::string_type};
\tcode{b} is a value of type \tcode{bool};
\tcode{I} is a value of type \tcode{int};
\tcode{cl} is an object of type \tcode{X::char_class_type},
and \tcode{loc} is an object of type \tcode{X::locale_type}.
\begin{libreqtab3}
{Regular expression traits class requirements}
{re.req}
\\ \topline
\lhdr{Expression} & \chdr{Return type} & \rhdr{Assertion/note pre-/post-condition } \\ \capsep
\endfirsthead
\continuedcaption\\
\hline
\lhdr{Expression} & \chdr{Return type} & \rhdr{Assertion/note pre-/post-condition } \\ \capsep
\endhead
%%
\tcode{X::char_type}
& \tcode{charT}
& The character container type used in the implementation of class
template \tcode{basic_regex}.
\\ \rowsep
\tcode{X::string_type}
& \tcode{basic_string<charT>}
&
\\ \rowsep
\tcode{X::locale_type}
& A copy constructible type
& A type that represents the locale used by the traits class. \indextext{locale}
\\ \rowsep
\tcode{X::char_class_type}
& A bitmask type\iref{bitmask.types}.
& A bitmask type representing a particular character classification.
\indextext{regular expression traits!\idxcode{char_class_type}}%
\indextext{\idxcode{char_class_type}!regular expression traits}%
\\ \rowsep
\tcode{X::length(p)}
& \tcode{size_t}
& Yields the smallest \tcode{i} such that \tcode{p[i] == 0}. Complexity is
linear in \tcode{i}.
\\ \rowsep
\tcode{v.translate(c)}
& \tcode{X::char_type}
& Returns a character such that for any character \tcode{d} that is to
be considered equivalent to \tcode{c} then \tcode{v.translate(c) == v.translate(d)}.
\indextext{regular expression traits!\idxcode{translate}}%
\indextext{\idxcode{translate}!regular expression traits}%
\\ \rowsep
\tcode{v.translate_nocase(c)}
& \tcode{X::char_type}
& For all characters \tcode{C} that are to be considered equivalent
to \tcode{c} when comparisons are to be performed without regard to
case, then \tcode{v.translate_nocase(c) == v.translate_nocase(C)}.
\indextext{regular expression traits!\idxcode{translate_nocase}}%
\indextext{\idxcode{translate_nocase}!regular expression traits}%
\\ \rowsep
\tcode{v.transform(F1, F2)}
& \tcode{X::string_type}
& Returns a sort key for the character sequence designated by the
iterator range \range{F1}{F2} such that if the character sequence
\range{G1}{G2} sorts before the character sequence \range{H1}{H2}
then \tcode{v.transform(G1, G2) < v.transform(H1, H2)}.
\indextext{regular expression traits!\idxcode{transform}}%
\indextext{\idxcode{transform}!regular expression traits}%
\\ \rowsep
\tcode{v.transform_primary(F1, F2)}
& \tcode{X::string_type}
& Returns a sort key for the character sequence designated by the
iterator range \range{F1}{F2} such that if the character sequence
\range{G1}{G2} sorts before the character sequence \range{H1}{H2}
when character case is not considered
then \tcode{v.transform_primary(G1, G2) < v.transform_primary(H1, H2)}.
\indextext{regular expression traits!\idxcode{transform_primary}}%
\indextext{transform_primary@\tcode{transform_primary}!regular expression traits}%
\\ \rowsep
\tcode{v.lookup_collatename(F1, F2)}
& \tcode{X::string_type}
& Returns a sequence of characters that represents the collating element
consisting of the character sequence designated by the iterator range
\range{F1}{F2}. Returns an empty string if the character sequence is not
a valid collating element.
\indextext{regular expression traits!\idxcode{lookup_collatename}}%
\indextext{\idxcode{lookup_collatename}!regular expression traits}%
\\ \rowsep
\tcode{v.lookup_classname(F1, F2, b)}
& \tcode{X::char_class_type}
& Converts the character sequence designated by the iterator range
\range{F1}{F2} into a value of a bitmask type that can
subsequently be passed to \tcode{isctype}. Values returned from
\tcode{lookup_classname} can be bitwise \logop{OR}'ed together; the
resulting value represents membership in either of the
corresponding character classes.
If \tcode{b} is \tcode{true}, the returned bitmask is suitable for
matching characters without regard to their case.
Returns \tcode{0} if the character
sequence is not the name of a character class recognized by
\tcode{X}. The value returned shall be independent of the case of
the characters in the sequence.
\indextext{regular expression traits!\idxcode{lookup_classname}}%
\indextext{\idxcode{lookup_classname}!regular expression traits}%
\\ \rowsep
\tcode{v.isctype(c, cl)}
& \tcode{bool}
& Returns \tcode{true} if character \tcode{c} is a member of
one of the character classes designated by \tcode{cl},
\tcode{false} otherwise.
\indextext{regular expression traits!\idxcode{isctype}}%
\indextext{\idxcode{isctype}!regular expression traits}%
\\ \rowsep
\tcode{v.value(c, I)}
& \tcode{int}
& Returns the value represented by the digit \textit{c} in base
\textit{I} if the character \textit{c} is a valid digit in base \textit{I};
otherwise returns \tcode{-1}.
\begin{tailnote}
The value of \textit{I} will only
be 8, 10, or 16.
\end{tailnote}
\\ \rowsep
\tcode{u.imbue(loc)}
& \tcode{X::locale_type}
& Imbues \tcode{u} with the locale \tcode{loc} and returns the previous locale
used by \tcode{u} if any. \indextext{locale}%
\\ \rowsep
\tcode{v.getloc()}
& \tcode{X::locale_type}
& Returns the current locale used by \tcode{v}, if any. \indextext{locale}%
\\
\end{libreqtab3}
\pnum
\begin{note}
Class template \tcode{regex_traits} meets the requirements for a
regular expression traits class when it is specialized for
\tcode{char} or \tcode{wchar_t}. This class template is described in
the header \libheader{regex}, and is described in \ref{re.traits}.
\end{note}
\rSec1[re.syn]{Header \tcode{<regex>} synopsis}
\indexheader{regex}%
\indexlibraryglobal{basic_regex}%
\indexlibraryglobal{regex}%
\indexlibraryglobal{wregex}%
\begin{codeblock}
#include <compare> // see \ref{compare.syn}
#include <initializer_list> // see \ref{initializer.list.syn}
namespace std {
// \ref{re.const}, regex constants
namespace regex_constants {
using syntax_option_type = @\placeholder{T1}@;
using match_flag_type = @\placeholder{T2}@;
using error_type = @\placeholder{T3}@;
}
// \ref{re.badexp}, class \tcode{regex_error}
class regex_error;
// \ref{re.traits}, class template \tcode{regex_traits}
template<class charT> struct regex_traits;
// \ref{re.regex}, class template \tcode{basic_regex}
template<class charT, class traits = regex_traits<charT>> class basic_regex;
using regex = basic_regex<char>;
using wregex = basic_regex<wchar_t>;
// \ref{re.regex.swap}, \tcode{basic_regex} swap
template<class charT, class traits>
void swap(basic_regex<charT, traits>& e1, basic_regex<charT, traits>& e2);
// \ref{re.submatch}, class template \tcode{sub_match}
template<class BidirectionalIterator>
class sub_match;
using csub_match = sub_match<const char*>;
using wcsub_match = sub_match<const wchar_t*>;
using ssub_match = sub_match<string::const_iterator>;
using wssub_match = sub_match<wstring::const_iterator>;
// \ref{re.submatch.op}, \tcode{sub_match} non-member operators
template<class BiIter>
bool operator==(const sub_match<BiIter>& lhs, const sub_match<BiIter>& rhs);
template<class BiIter>
auto operator<=>(const sub_match<BiIter>& lhs, const sub_match<BiIter>& rhs);
template<class BiIter, class ST, class SA>
bool operator==(
const sub_match<BiIter>& lhs,
const basic_string<typename iterator_traits<BiIter>::value_type, ST, SA>& rhs);
template<class BiIter, class ST, class SA>
auto operator<=>(
const sub_match<BiIter>& lhs,
const basic_string<typename iterator_traits<BiIter>::value_type, ST, SA>& rhs);
template<class BiIter>
bool operator==(const sub_match<BiIter>& lhs,
const typename iterator_traits<BiIter>::value_type* rhs);
template<class BiIter>
auto operator<=>(const sub_match<BiIter>& lhs,
const typename iterator_traits<BiIter>::value_type* rhs);
template<class BiIter>
bool operator==(const sub_match<BiIter>& lhs,
const typename iterator_traits<BiIter>::value_type& rhs);
template<class BiIter>
auto operator<=>(const sub_match<BiIter>& lhs,
const typename iterator_traits<BiIter>::value_type& rhs);
template<class charT, class ST, class BiIter>
basic_ostream<charT, ST>&
operator<<(basic_ostream<charT, ST>& os, const sub_match<BiIter>& m);
// \ref{re.results}, class template \tcode{match_results}
template<class BidirectionalIterator,
class Allocator = allocator<sub_match<BidirectionalIterator>>>
class match_results;
using cmatch = match_results<const char*>;
using wcmatch = match_results<const wchar_t*>;
using smatch = match_results<string::const_iterator>;
using wsmatch = match_results<wstring::const_iterator>;
// \tcode{match_results} comparisons
template<class BidirectionalIterator, class Allocator>
bool operator==(const match_results<BidirectionalIterator, Allocator>& m1,
const match_results<BidirectionalIterator, Allocator>& m2);
// \ref{re.results.swap}, \tcode{match_results} swap
template<class BidirectionalIterator, class Allocator>
void swap(match_results<BidirectionalIterator, Allocator>& m1,
match_results<BidirectionalIterator, Allocator>& m2);
// \ref{re.alg.match}, function template \tcode{regex_match}
template<class BidirectionalIterator, class Allocator, class charT, class traits>
bool regex_match(BidirectionalIterator first, BidirectionalIterator last,
match_results<BidirectionalIterator, Allocator>& m,
const basic_regex<charT, traits>& e,
regex_constants::match_flag_type flags = regex_constants::match_default);
template<class BidirectionalIterator, class charT, class traits>
bool regex_match(BidirectionalIterator first, BidirectionalIterator last,
const basic_regex<charT, traits>& e,
regex_constants::match_flag_type flags = regex_constants::match_default);
template<class charT, class Allocator, class traits>
bool regex_match(const charT* str, match_results<const charT*, Allocator>& m,
const basic_regex<charT, traits>& e,
regex_constants::match_flag_type flags = regex_constants::match_default);
template<class ST, class SA, class Allocator, class charT, class traits>
bool regex_match(const basic_string<charT, ST, SA>& s,
match_results<typename basic_string<charT, ST, SA>::const_iterator,
Allocator>& m,
const basic_regex<charT, traits>& e,
regex_constants::match_flag_type flags = regex_constants::match_default);
template<class ST, class SA, class Allocator, class charT, class traits>
bool regex_match(const basic_string<charT, ST, SA>&&,
match_results<typename basic_string<charT, ST, SA>::const_iterator,
Allocator>&,
const basic_regex<charT, traits>&,
regex_constants::match_flag_type = regex_constants::match_default) = delete;
template<class charT, class traits>
bool regex_match(const charT* str,
const basic_regex<charT, traits>& e,
regex_constants::match_flag_type flags = regex_constants::match_default);
template<class ST, class SA, class charT, class traits>
bool regex_match(const basic_string<charT, ST, SA>& s,
const basic_regex<charT, traits>& e,
regex_constants::match_flag_type flags = regex_constants::match_default);
// \ref{re.alg.search}, function template \tcode{regex_search}
template<class BidirectionalIterator, class Allocator, class charT, class traits>
bool regex_search(BidirectionalIterator first, BidirectionalIterator last,
match_results<BidirectionalIterator, Allocator>& m,
const basic_regex<charT, traits>& e,
regex_constants::match_flag_type flags = regex_constants::match_default);
template<class BidirectionalIterator, class charT, class traits>
bool regex_search(BidirectionalIterator first, BidirectionalIterator last,
const basic_regex<charT, traits>& e,
regex_constants::match_flag_type flags = regex_constants::match_default);
template<class charT, class Allocator, class traits>
bool regex_search(const charT* str,
match_results<const charT*, Allocator>& m,
const basic_regex<charT, traits>& e,
regex_constants::match_flag_type flags = regex_constants::match_default);
template<class charT, class traits>
bool regex_search(const charT* str,
const basic_regex<charT, traits>& e,
regex_constants::match_flag_type flags = regex_constants::match_default);
template<class ST, class SA, class charT, class traits>
bool regex_search(const basic_string<charT, ST, SA>& s,
const basic_regex<charT, traits>& e,
regex_constants::match_flag_type flags = regex_constants::match_default);
template<class ST, class SA, class Allocator, class charT, class traits>
bool regex_search(const basic_string<charT, ST, SA>& s,
match_results<typename basic_string<charT, ST, SA>::const_iterator,
Allocator>& m,
const basic_regex<charT, traits>& e,
regex_constants::match_flag_type flags = regex_constants::match_default);
template<class ST, class SA, class Allocator, class charT, class traits>
bool regex_search(const basic_string<charT, ST, SA>&&,
match_results<typename basic_string<charT, ST, SA>::const_iterator,
Allocator>&,
const basic_regex<charT, traits>&,
regex_constants::match_flag_type
= regex_constants::match_default) = delete;
// \ref{re.alg.replace}, function template \tcode{regex_replace}
template<class OutputIterator, class BidirectionalIterator,
class traits, class charT, class ST, class SA>
OutputIterator
regex_replace(OutputIterator out,
BidirectionalIterator first, BidirectionalIterator last,
const basic_regex<charT, traits>& e,
const basic_string<charT, ST, SA>& fmt,
regex_constants::match_flag_type flags = regex_constants::match_default);
template<class OutputIterator, class BidirectionalIterator, class traits, class charT>
OutputIterator
regex_replace(OutputIterator out,
BidirectionalIterator first, BidirectionalIterator last,
const basic_regex<charT, traits>& e,
const charT* fmt,
regex_constants::match_flag_type flags = regex_constants::match_default);
template<class traits, class charT, class ST, class SA, class FST, class FSA>
basic_string<charT, ST, SA>
regex_replace(const basic_string<charT, ST, SA>& s,
const basic_regex<charT, traits>& e,
const basic_string<charT, FST, FSA>& fmt,
regex_constants::match_flag_type flags = regex_constants::match_default);
template<class traits, class charT, class ST, class SA>
basic_string<charT, ST, SA>
regex_replace(const basic_string<charT, ST, SA>& s,
const basic_regex<charT, traits>& e,
const charT* fmt,
regex_constants::match_flag_type flags = regex_constants::match_default);
template<class traits, class charT, class ST, class SA>
basic_string<charT>
regex_replace(const charT* s,
const basic_regex<charT, traits>& e,
const basic_string<charT, ST, SA>& fmt,
regex_constants::match_flag_type flags = regex_constants::match_default);
template<class traits, class charT>
basic_string<charT>
regex_replace(const charT* s,
const basic_regex<charT, traits>& e,
const charT* fmt,
regex_constants::match_flag_type flags = regex_constants::match_default);
// \ref{re.regiter}, class template \tcode{regex_iterator}
template<class BidirectionalIterator,
class charT = typename iterator_traits<BidirectionalIterator>::value_type,
class traits = regex_traits<charT>>
class regex_iterator;
using cregex_iterator = regex_iterator<const char*>;
using wcregex_iterator = regex_iterator<const wchar_t*>;
using sregex_iterator = regex_iterator<string::const_iterator>;
using wsregex_iterator = regex_iterator<wstring::const_iterator>;
// \ref{re.tokiter}, class template \tcode{regex_token_iterator}
template<class BidirectionalIterator,
class charT = typename iterator_traits<BidirectionalIterator>::value_type,
class traits = regex_traits<charT>>
class regex_token_iterator;
using cregex_token_iterator = regex_token_iterator<const char*>;
using wcregex_token_iterator = regex_token_iterator<const wchar_t*>;
using sregex_token_iterator = regex_token_iterator<string::const_iterator>;
using wsregex_token_iterator = regex_token_iterator<wstring::const_iterator>;
namespace pmr {
template<class BidirectionalIterator>
using match_results =
std::match_results<BidirectionalIterator,
polymorphic_allocator<sub_match<BidirectionalIterator>>>;
using cmatch = match_results<const char*>;
using wcmatch = match_results<const wchar_t*>;
using smatch = match_results<string::const_iterator>;
using wsmatch = match_results<wstring::const_iterator>;
}
}
\end{codeblock}
\rSec1[re.const]{Namespace \tcode{std::regex_constants}}
\rSec2[re.const.general]{General}
\pnum
\indexlibraryglobal{regex_constants}%
The namespace \tcode{std::regex_constants} holds
symbolic constants used by the regular expression library. This
namespace provides three types, \tcode{syntax_option_type},
\tcode{match_flag_type}, and \tcode{error_type}, along with several
constants of these types.
\rSec2[re.synopt]{Bitmask type \tcode{syntax_option_type}}
\indexlibraryglobal{syntax_option_type}%
\indexlibrarymember{regex_constants}{syntax_option_type}%
\begin{codeblock}
namespace std::regex_constants {
using syntax_option_type = @\textit{T1}@;
inline constexpr syntax_option_type icase = @\unspec@;
inline constexpr syntax_option_type nosubs = @\unspec@;
inline constexpr syntax_option_type optimize = @\unspec@;
inline constexpr syntax_option_type collate = @\unspec@;
inline constexpr syntax_option_type ECMAScript = @\unspec@;
inline constexpr syntax_option_type basic = @\unspec@;
inline constexpr syntax_option_type extended = @\unspec@;
inline constexpr syntax_option_type awk = @\unspec@;
inline constexpr syntax_option_type grep = @\unspec@;
inline constexpr syntax_option_type egrep = @\unspec@;
inline constexpr syntax_option_type multiline = @\unspec@;
}
\end{codeblock}
\pnum
\indexlibraryglobal{syntax_option_type}%
\indexlibrarymember{syntax_option_type}{icase}%
\indexlibrarymember{syntax_option_type}{nosubs}%
\indexlibrarymember{syntax_option_type}{optimize}%
\indexlibrarymember{syntax_option_type}{collate}%
\indexlibrarymember{syntax_option_type}{ECMAScript}%
\indexlibrarymember{syntax_option_type}{basic}%
\indexlibrarymember{syntax_option_type}{extended}%
\indexlibrarymember{syntax_option_type}{awk}%
\indexlibrarymember{syntax_option_type}{grep}%
\indexlibrarymember{syntax_option_type}{egrep}%
The type \tcode{syntax_option_type} is an \impldef{type of \tcode{syntax_option_type}} bitmask
type\iref{bitmask.types}. Setting its elements has the effects listed in
\tref{re.synopt}. A valid value of type
\tcode{syntax_option_type} shall have at most one of the grammar elements
\tcode{ECMAScript}, \tcode{basic}, \tcode{extended}, \tcode{awk}, \tcode{grep}, \tcode{egrep}, set.
If no grammar element is set, the default grammar is \tcode{ECMAScript}.
\begin{libefftab}
{\tcode{syntax_option_type} effects}
{re.synopt}
%
\tcode{icase} &
Specifies that matching of regular expressions against a character
container sequence shall be performed without regard to case.
\indexlibrarymember{syntax_option_type}{icase}%
\\ \rowsep
%
\tcode{nosubs} &
Specifies that no sub-expressions shall be considered to be marked, so that
when a regular expression is matched against a
character container sequence, no sub-expression matches shall be
stored in the supplied \tcode{match_results} object.
\indexlibrarymember{syntax_option_type}{nosubs}%
\\ \rowsep
%
\tcode{optimize} &
Specifies that the regular expression engine should pay more attention
to the speed with which regular expressions are matched, and less to
the speed with which regular expression objects are
constructed. Otherwise it has no detectable effect on the program
output.
\indexlibrarymember{syntax_option_type}{optimize}%
\\ \rowsep
%
\tcode{collate} &
Specifies that character ranges of the form \tcode{"[a-b]"} shall be locale
sensitive.
\indexlibrarymember{syntax_option_type}{collate}%
\indextext{locale}%
\\ \rowsep
%
\tcode{ECMAScript} &
Specifies that the grammar recognized by the regular expression engine
shall be that used by ECMAScript in ECMA-262, as modified in~\ref{re.grammar}.
\newline \xref ECMA-262 15.10
\indextext{ECMAScript}%
\indexlibrarymember{syntax_option_type}{ECMAScript}%
\\ \rowsep
%
\tcode{basic} &
Specifies that the grammar recognized by the regular expression engine
shall be that used by basic regular expressions in POSIX.
\newline \xref POSIX, Base Definitions and Headers, Section 9.3
\indextext{POSIX!regular expressions}%
\indexlibrarymember{syntax_option_type}{basic}%
\\ \rowsep
%
\tcode{extended} &
Specifies that the grammar recognized by the regular expression engine
shall be that used by extended regular expressions in POSIX.
\newline \xref POSIX, Base Definitions and Headers, Section 9.4
\indextext{POSIX!extended regular expressions}%
\indexlibrarymember{syntax_option_type}{extended}%
\\ \rowsep
%
\tcode{awk} &
Specifies that the grammar recognized by the regular expression engine
shall be that used by the utility awk in POSIX.
\indextext{\idxcode{awk}}%
\indexlibrarymember{syntax_option_type}{awk}%
\\ \rowsep
%
\tcode{grep} &
Specifies that the grammar recognized by the regular expression engine
shall be that used by the utility grep in POSIX.
\indextext{\idxcode{grep}}%
\indexlibrarymember{syntax_option_type}{grep}%
\\ \rowsep
%
\tcode{egrep} &
Specifies that the grammar recognized by the regular expression engine
shall be that used by the utility grep when given the -E
option in POSIX.
\indextext{\idxcode{egrep}}%
\indexlibrarymember{syntax_option_type}{egrep}%
\\ \rowsep
%
\tcode{multiline} &
Specifies that \tcode{\caret} shall match the beginning of a line and
\tcode{\$} shall match the end of a line,
if the \tcode{ECMAScript} engine is selected.
\indextext{\idxcode{multiline}}%
\indexlibrarymember{syntax_option_type}{multiline}%
\\
%
\end{libefftab}
\rSec2[re.matchflag]{Bitmask type \tcode{match_flag_type}}
\indexlibraryglobal{match_flag_type}%
\indexlibrarymember{regex_constants}{match_flag_type}%
\indexlibraryglobal{match_default}%
\indexlibraryglobal{match_not_bol}%
\indexlibraryglobal{match_not_eol}%
\indexlibraryglobal{match_not_bow}%
\indexlibraryglobal{match_not_eow}%
\indexlibraryglobal{match_any}%
\indexlibraryglobal{match_not_null}%
\indexlibraryglobal{match_continuous}%
\indexlibraryglobal{match_prev_avail}%
\indexlibraryglobal{format_default}%
\indexlibraryglobal{format_sed}%
\indexlibraryglobal{format_no_copy}%
\indexlibraryglobal{format_first_only}%
\begin{codeblock}
namespace std::regex_constants {
using match_flag_type = @\textit{T2}@;
inline constexpr match_flag_type match_default = {};
inline constexpr match_flag_type match_not_bol = @\unspec@;
inline constexpr match_flag_type match_not_eol = @\unspec@;
inline constexpr match_flag_type match_not_bow = @\unspec@;
inline constexpr match_flag_type match_not_eow = @\unspec@;
inline constexpr match_flag_type match_any = @\unspec@;
inline constexpr match_flag_type match_not_null = @\unspec@;
inline constexpr match_flag_type match_continuous = @\unspec@;
inline constexpr match_flag_type match_prev_avail = @\unspec@;
inline constexpr match_flag_type format_default = {};
inline constexpr match_flag_type format_sed = @\unspec@;
inline constexpr match_flag_type format_no_copy = @\unspec@;
inline constexpr match_flag_type format_first_only = @\unspec@;
}
\end{codeblock}
\pnum
\indexlibraryglobal{match_flag_type}%
The type \tcode{match_flag_type} is an
\impldef{type of \tcode{regex_constants::match_flag_type}} bitmask type\iref{bitmask.types}.
The constants of that type, except for \tcode{match_default} and
\tcode{format_default}, are bitmask elements. The \tcode{match_default} and
\tcode{format_default} constants are empty bitmasks.
Matching a regular expression against a sequence of characters
\range{first}{last} proceeds according to the rules of the grammar specified for the regular
expression object, modified according to the effects listed in \tref{re.matchflag} for
any bitmask elements set.
\begin{longlibefftab}
{\tcode{regex_constants::match_flag_type} effects when obtaining a match against a
character container sequence \range{first}{last}.}
{re.matchflag}
%
\indexlibraryglobal{match_not_bol}%
\tcode{match_not_bol} &
The first character in the sequence \range{first}{last} shall be treated
as though it is not at the beginning of a line, so the character
\verb|^| in the regular expression shall not match \range{first}{first}.
\\ \rowsep
%
\indexlibraryglobal{match_not_eol}%
\tcode{match_not_eol} &
The last character in the sequence \range{first}{last} shall be treated
as though it is not at the end of a line, so the character
\verb|"$"| in the regular expression shall not match \range{last}{last}.
\\ \rowsep
%
\indexlibraryglobal{match_not_bow}%
\tcode{match_not_bow} &
The expression \verb|"\\b"| shall not match the
sub-sequence \range{first}{first}.
\\ \rowsep
%
\indexlibraryglobal{match_not_eow}%
\tcode{match_not_eow} &
The expression \verb|"\\b"| shall not match the
sub-sequence \range{last}{last}.
\\ \rowsep
%
\indexlibraryglobal{match_any}%
\tcode{match_any} &
If more than one match is possible then any match is an
acceptable result.
\\ \rowsep
%
\indexlibraryglobal{match_not_null}%
\tcode{match_not_null} &
The expression shall not match an empty
sequence.
\\ \rowsep
%
\indexlibraryglobal{match_continuous}%
\tcode{match_continuous} &
The expression shall only match a sub-sequence that begins at
\tcode{first}.
\\ \rowsep
%
\indexlibraryglobal{match_prev_avail}%
\tcode{match_prev_avail} &
\verb!--first! is a valid iterator position. When this flag is
set the flags \tcode{match_not_bol} and \tcode{match_not_bow} shall be ignored by the
regular expression algorithms\iref{re.alg} and iterators\iref{re.iter}.
\\ \rowsep
%
\indexlibraryglobal{format_default}%
\tcode{format_default} &
When a regular expression match is to be replaced by a
new string, the new string shall be constructed using the rules used by
the ECMAScript replace function in ECMA-262,
part 15.5.4.11 String.prototype.replace. In
addition, during search and replace operations all non-overlapping
occurrences of the regular expression shall be located and replaced, and
sections of the input that did not match the expression shall be copied
unchanged to the output string.
\\ \rowsep
%
\indexlibraryglobal{format_sed}%
\tcode{format_sed} &
When a regular expression match is to be replaced by a
new string, the new string shall be constructed using the rules used by
the sed utility in POSIX.
\\ \rowsep
%
\indexlibraryglobal{format_no_copy}%
\tcode{format_no_copy} &
During a search and replace operation, sections of
the character container sequence being searched that do not match the
regular expression shall not be copied to the output string. \\ \rowsep
%
\indexlibraryglobal{format_first_only}%
\tcode{format_first_only} &
When specified during a search and replace operation, only the
first occurrence of the regular expression shall be replaced.
\\
\end{longlibefftab}
\rSec2[re.err]{Implementation-defined \tcode{error_type}}
\indexlibraryglobal{error_type}%
\indexlibrarymember{regex_constants}{error_type}%
\begin{codeblock}
namespace std::regex_constants {
using error_type = @\textit{T3}@;
inline constexpr error_type error_collate = @\unspec@;
inline constexpr error_type error_ctype = @\unspec@;
inline constexpr error_type error_escape = @\unspec@;
inline constexpr error_type error_backref = @\unspec@;
inline constexpr error_type error_brack = @\unspec@;
inline constexpr error_type error_paren = @\unspec@;
inline constexpr error_type error_brace = @\unspec@;
inline constexpr error_type error_badbrace = @\unspec@;
inline constexpr error_type error_range = @\unspec@;
inline constexpr error_type error_space = @\unspec@;
inline constexpr error_type error_badrepeat = @\unspec@;
inline constexpr error_type error_complexity = @\unspec@;
inline constexpr error_type error_stack = @\unspec@;
}
\end{codeblock}
\pnum
\indexlibraryglobal{error_type}%
\indexlibrarymember{regex_constants}{error_type}%
The type \tcode{error_type} is an \impldef{type of
\tcode{regex_constants::error_type}} enumerated type\iref{enumerated.types}.
Values of type \tcode{error_type} represent the error
conditions described in \tref{re.err}:
\begin{longliberrtab}
{\tcode{error_type} values in the C locale}
{re.err}
\tcode{error_collate}
&
The expression contained an invalid collating element name. \\ \rowsep
%
\tcode{error_ctype}
&
The expression contained an invalid character class name. \\ \rowsep
%
\tcode{error_escape}
&
The expression contained an invalid escaped character, or a trailing
escape. \\ \rowsep
%
\tcode{error_backref}
&
The expression contained an invalid back reference. \\ \rowsep
%
\tcode{error_brack}
&
The expression contained mismatched \verb|[| and \verb|]|. \\ \rowsep
%
\tcode{error_paren}
&
The expression contained mismatched \verb|(| and \verb|)|. \\ \rowsep
%
\tcode{error_brace}
&
The expression contained mismatched \verb|{| and \verb|}| \\ \rowsep
%
\tcode{error_badbrace}
&
The expression contained an invalid range in a \verb|{}| expression. \\
\rowsep
%
\tcode{error_range}
&
The expression contained an invalid character range, such as
\verb|[b-a]| in most encodings. \\ \rowsep
%
\tcode{error_space}
&
There was insufficient memory to convert the expression into a finite
state machine. \\ \rowsep
%
\tcode{error_badrepeat}
&
One of \verb|*?+{| was not preceded by a valid regular expression. \\ \rowsep
%
\tcode{error_complexity}
&
The complexity of an attempted match against a regular expression
exceeded a pre-set level. \\ \rowsep
%
\tcode{error_stack}
&
There was insufficient memory to determine whether the regular
expression could match the specified character sequence. \\
%
\end{longliberrtab}
\rSec1[re.badexp]{Class \tcode{regex_error}}
\indexlibraryglobal{regex_error}%
\begin{codeblock}
class regex_error : public runtime_error {
public:
explicit regex_error(regex_constants::error_type ecode);
regex_constants::error_type code() const;
};
\end{codeblock}
\pnum
The class \tcode{regex_error} defines the type of objects thrown as
exceptions to report errors from the regular expression library.
\indexlibraryctor{regex_error}%
\begin{itemdecl}
regex_error(regex_constants::error_type ecode);
\end{itemdecl}
\begin{itemdescr}
\pnum
\ensures
\tcode{ecode == code()}.
\end{itemdescr}
\indexlibraryglobal{error_type}%
\indexlibrarymember{regex_constants}{error_type}%
\begin{itemdecl}
regex_constants::error_type code() const;
\end{itemdecl}
\begin{itemdescr}
\pnum
\returns
The error code that was passed to the constructor.
\end{itemdescr}
\rSec1[re.traits]{Class template \tcode{regex_traits}}
\indexlibraryglobal{regex_traits}%
\begin{codeblock}
namespace std {
template<class charT>
struct regex_traits {
using char_type = charT;
using string_type = basic_string<char_type>;
using locale_type = locale;
using char_class_type = @\placeholdernc{bitmask_type}@;
regex_traits();
static size_t length(const char_type* p);
charT translate(charT c) const;
charT translate_nocase(charT c) const;
template<class ForwardIterator>
string_type transform(ForwardIterator first, ForwardIterator last) const;
template<class ForwardIterator>
string_type transform_primary(
ForwardIterator first, ForwardIterator last) const;
template<class ForwardIterator>
string_type lookup_collatename(
ForwardIterator first, ForwardIterator last) const;
template<class ForwardIterator>
char_class_type lookup_classname(
ForwardIterator first, ForwardIterator last, bool icase = false) const;
bool isctype(charT c, char_class_type f) const;
int value(charT ch, int radix) const;
locale_type imbue(locale_type l);
locale_type getloc() const;
};
}
\end{codeblock}
\pnum
\indextext{regular expression traits!requirements}%
\indextext{requirements!regular expression traits}%
\indextext{\idxcode{regex_traits}!specializations}%
The specializations \tcode{regex_traits<char>} and
\tcode{regex_traits<wchar_t>} meet the
requirements for a regular expression traits class\iref{re.req}.
\indexlibrarymember{regex_traits}{char_class_type}%
\begin{itemdecl}
using char_class_type = @\textit{bitmask_type}@;
\end{itemdecl}
\begin{itemdescr}
\pnum
The type \tcode{char_class_type} is used to represent a character
classification and is capable of holding an implementation specific
set returned by \tcode{lookup_classname}.
\end{itemdescr}
\indexlibrarymember{length}{regex_traits}%
\begin{itemdecl}
static size_t length(const char_type* p);
\end{itemdecl}
\begin{itemdescr}
\pnum
\returns
\tcode{char_traits<charT>::length(p)}.
\end{itemdescr}
\indexlibrarymember{regex_traits}{translate}%
\begin{itemdecl}
charT translate(charT c) const;
\end{itemdecl}
\begin{itemdescr}
\pnum
\returns
\tcode{c}.
\end{itemdescr}
\indexlibrarymember{regex_traits}{translate_nocase}%
\begin{itemdecl}
charT translate_nocase(charT c) const;
\end{itemdecl}
\begin{itemdescr}
\pnum
\returns
\tcode{use_facet<ctype<charT>>(getloc()).tolower(c)}.
\end{itemdescr}
\indexlibrarymember{regex_traits}{transform}%