@@ -1970,9 +1970,11 @@ void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
1970
1970
}
1971
1971
}
1972
1972
1973
+ namespace {
1974
+
1973
1975
// Check for [0-9A-Z_a-z].
1974
- static void EmitWordCheck (RegExpMacroAssembler* assembler, Label* word,
1975
- Label* non_word, bool fall_through_on_word) {
1976
+ void EmitWordCheck (RegExpMacroAssembler* assembler, Label* word,
1977
+ Label* non_word, bool fall_through_on_word) {
1976
1978
if (assembler->CheckSpecialCharacterClass (
1977
1979
fall_through_on_word ? ' w' : ' W' ,
1978
1980
fall_through_on_word ? non_word : word)) {
@@ -1994,24 +1996,37 @@ static void EmitWordCheck(RegExpMacroAssembler* assembler, Label* word,
1994
1996
1995
1997
// Emit the code to check for a ^ in multiline mode (1-character lookbehind
1996
1998
// that matches newline or the start of input).
1997
- static void EmitHat (RegExpCompiler* compiler, RegExpNode* on_success,
1998
- Trace* trace) {
1999
+ void EmitHat (RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace) {
1999
2000
RegExpMacroAssembler* assembler = compiler->macro_assembler ();
2000
- // We will be loading the previous character into the current character
2001
- // register.
2001
+
2002
+ // We will load the previous character into the current character register.
2002
2003
Trace new_trace (*trace);
2003
2004
new_trace.InvalidateCurrentCharacter ();
2004
2005
2006
+ // A positive (> 0) cp_offset means we've already successfully matched a
2007
+ // non-empty-width part of the pattern, and thus cannot be at or before the
2008
+ // start of the subject string. We can thus skip both at-start and
2009
+ // bounds-checks when loading the one-character lookbehind.
2010
+ const bool may_be_at_or_before_subject_string_start =
2011
+ new_trace.cp_offset () <= 0 ;
2012
+
2005
2013
Label ok;
2006
- if (new_trace.cp_offset () == 0 ) {
2007
- // The start of input counts as a newline in this context, so skip to
2008
- // ok if we are at the start.
2009
- assembler->CheckAtStart (&ok);
2014
+ if (may_be_at_or_before_subject_string_start) {
2015
+ // The start of input counts as a newline in this context, so skip to ok if
2016
+ // we are at the start.
2017
+ // TODO(jgruber): It would be less awkward to use CheckAtStart here, but
2018
+ // that currently does not support a non-zero cp_offset.
2019
+ Label not_at_start;
2020
+ assembler->CheckNotAtStart (new_trace.cp_offset (), ¬_at_start);
2021
+ assembler->GoTo (&ok);
2022
+ assembler->Bind (¬_at_start);
2010
2023
}
2011
- // We already checked that we are not at the start of input so it must be
2012
- // OK to load the previous character.
2024
+
2025
+ // If we've already checked that we are not at the start of input, it's okay
2026
+ // to load the previous character without bounds checks.
2027
+ const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
2013
2028
assembler->LoadCurrentCharacter (new_trace.cp_offset () - 1 ,
2014
- new_trace.backtrack (), false );
2029
+ new_trace.backtrack (), can_skip_bounds_check );
2015
2030
if (!assembler->CheckSpecialCharacterClass (' n' , new_trace.backtrack ())) {
2016
2031
// Newline means \n, \r, 0x2028 or 0x2029.
2017
2032
if (!compiler->one_byte ()) {
@@ -2024,6 +2039,8 @@ static void EmitHat(RegExpCompiler* compiler, RegExpNode* on_success,
2024
2039
on_success->Emit (compiler, &new_trace);
2025
2040
}
2026
2041
2042
+ } // namespace
2043
+
2027
2044
// Emit the code to handle \b and \B (word-boundary or non-word-boundary).
2028
2045
void AssertionNode::EmitBoundaryCheck (RegExpCompiler* compiler, Trace* trace) {
2029
2046
RegExpMacroAssembler* assembler = compiler->macro_assembler ();
@@ -2080,21 +2097,35 @@ void AssertionNode::BacktrackIfPrevious(
2080
2097
Trace new_trace (*trace);
2081
2098
new_trace.InvalidateCurrentCharacter ();
2082
2099
2083
- Label fall_through, dummy;
2084
-
2100
+ Label fall_through;
2085
2101
Label* non_word = backtrack_if_previous == kIsNonWord ? new_trace.backtrack ()
2086
2102
: &fall_through;
2087
2103
Label* word = backtrack_if_previous == kIsNonWord ? &fall_through
2088
2104
: new_trace.backtrack ();
2089
2105
2090
- if (new_trace.cp_offset () == 0 ) {
2106
+ // A positive (> 0) cp_offset means we've already successfully matched a
2107
+ // non-empty-width part of the pattern, and thus cannot be at or before the
2108
+ // start of the subject string. We can thus skip both at-start and
2109
+ // bounds-checks when loading the one-character lookbehind.
2110
+ const bool may_be_at_or_before_subject_string_start =
2111
+ new_trace.cp_offset () <= 0 ;
2112
+
2113
+ if (may_be_at_or_before_subject_string_start) {
2091
2114
// The start of input counts as a non-word character, so the question is
2092
2115
// decided if we are at the start.
2093
- assembler->CheckAtStart (non_word);
2094
- }
2095
- // We already checked that we are not at the start of input so it must be
2096
- // OK to load the previous character.
2097
- assembler->LoadCurrentCharacter (new_trace.cp_offset () - 1 , &dummy, false );
2116
+ // TODO(jgruber): It would be less awkward to use CheckAtStart here, but
2117
+ // that currently does not support a non-zero cp_offset.
2118
+ Label not_at_start;
2119
+ assembler->CheckNotAtStart (new_trace.cp_offset (), ¬_at_start);
2120
+ assembler->GoTo (non_word);
2121
+ assembler->Bind (¬_at_start);
2122
+ }
2123
+
2124
+ // If we've already checked that we are not at the start of input, it's okay
2125
+ // to load the previous character without bounds checks.
2126
+ const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
2127
+ assembler->LoadCurrentCharacter (new_trace.cp_offset () - 1 , non_word,
2128
+ can_skip_bounds_check);
2098
2129
EmitWordCheck (assembler, word, non_word, backtrack_if_previous == kIsNonWord );
2099
2130
2100
2131
assembler->Bind (&fall_through);
0 commit comments