Skip to content

Commit 5803973

Browse files
addaleaxtargos
authored andcommitted
src: minor refactor to string_search.h
- Use `std::max` instead of a custom variant - Use member method pointers to avoid an extra layer of indirection - Stop transferring `Vector` into the `node` namespace PR-URL: #20546 Reviewed-By: James M Snell <[email protected]> Reviewed-By: Daniel Bevenius <[email protected]> Reviewed-By: Tiancheng "Timothy" Gu <[email protected]>
1 parent 105f606 commit 5803973

File tree

1 file changed

+54
-91
lines changed

1 file changed

+54
-91
lines changed

src/string_search.h

+54-91
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,11 @@
99

1010
#include "node_internals.h"
1111
#include <string.h>
12+
#include <algorithm>
1213

1314
namespace node {
1415
namespace stringsearch {
1516

16-
17-
// Returns the maximum of the two parameters.
18-
template <typename T>
19-
T Max(T a, T b) {
20-
return a < b ? b : a;
21-
}
22-
23-
2417
static const uint32_t kMaxOneByteCharCodeU = 0xff;
2518

2619
template <typename T>
@@ -98,7 +91,9 @@ class StringSearchBase {
9891
template <typename Char>
9992
class StringSearch : private StringSearchBase {
10093
public:
101-
explicit StringSearch(Vector<const Char> pattern)
94+
typedef stringsearch::Vector<const Char> Vector;
95+
96+
explicit StringSearch(Vector pattern)
10297
: pattern_(pattern), start_(0) {
10398
if (pattern.length() >= kBMMaxShift) {
10499
start_ = pattern.length() - kBMMaxShift;
@@ -108,17 +103,17 @@ class StringSearch : private StringSearchBase {
108103
CHECK_GT(pattern_length, 0);
109104
if (pattern_length < kBMMinPatternLength) {
110105
if (pattern_length == 1) {
111-
strategy_ = &SingleCharSearch;
106+
strategy_ = &StringSearch::SingleCharSearch;
112107
return;
113108
}
114-
strategy_ = &LinearSearch;
109+
strategy_ = &StringSearch::LinearSearch;
115110
return;
116111
}
117-
strategy_ = &InitialSearch;
112+
strategy_ = &StringSearch::InitialSearch;
118113
}
119114

120-
size_t Search(Vector<const Char> subject, size_t index) {
121-
return strategy_(this, subject, index);
115+
size_t Search(Vector subject, size_t index) {
116+
return (this->*strategy_)(subject, index);
122117
}
123118

124119
static inline int AlphabetSize() {
@@ -136,31 +131,12 @@ class StringSearch : private StringSearchBase {
136131
}
137132

138133
private:
139-
typedef size_t (*SearchFunction)(
140-
StringSearch<Char>*,
141-
Vector<const Char>,
142-
size_t);
143-
144-
static size_t SingleCharSearch(StringSearch<Char>* search,
145-
Vector<const Char> subject,
146-
size_t start_index);
147-
148-
static size_t LinearSearch(StringSearch<Char>* search,
149-
Vector<const Char> subject,
150-
size_t start_index);
151-
152-
static size_t InitialSearch(StringSearch<Char>* search,
153-
Vector<const Char> subject,
154-
size_t start_index);
155-
156-
static size_t BoyerMooreHorspoolSearch(
157-
StringSearch<Char>* search,
158-
Vector<const Char> subject,
159-
size_t start_index);
160-
161-
static size_t BoyerMooreSearch(StringSearch<Char>* search,
162-
Vector<const Char> subject,
163-
size_t start_index);
134+
typedef size_t (StringSearch::*SearchFunction)(Vector, size_t);
135+
size_t SingleCharSearch(Vector subject, size_t start_index);
136+
size_t LinearSearch(Vector subject, size_t start_index);
137+
size_t InitialSearch(Vector subject, size_t start_index);
138+
size_t BoyerMooreHorspoolSearch(Vector subject, size_t start_index);
139+
size_t BoyerMooreSearch(Vector subject, size_t start_index);
164140

165141
void PopulateBoyerMooreHorspoolTable();
166142

@@ -197,7 +173,7 @@ class StringSearch : private StringSearchBase {
197173
}
198174

199175
// The pattern to search for.
200-
Vector<const Char> pattern_;
176+
Vector pattern_;
201177
// Pointer to implementation of the search.
202178
SearchFunction strategy_;
203179
// Cache value of Max(0, pattern_length() - kBMMaxShift)
@@ -213,8 +189,8 @@ inline T AlignDown(T value, U alignment) {
213189

214190

215191
inline uint8_t GetHighestValueByte(uint16_t character) {
216-
return Max(static_cast<uint8_t>(character & 0xFF),
217-
static_cast<uint8_t>(character >> 8));
192+
return std::max(static_cast<uint8_t>(character & 0xFF),
193+
static_cast<uint8_t>(character >> 8));
218194
}
219195

220196

@@ -319,11 +295,10 @@ inline size_t FindFirstCharacter(Vector<const uint8_t> pattern,
319295

320296
template <typename Char>
321297
size_t StringSearch<Char>::SingleCharSearch(
322-
StringSearch<Char>* search,
323-
Vector<const Char> subject,
298+
Vector subject,
324299
size_t index) {
325-
CHECK_EQ(1, search->pattern_.length());
326-
return FindFirstCharacter(search->pattern_, subject, index);
300+
CHECK_EQ(1, pattern_.length());
301+
return FindFirstCharacter(pattern_, subject, index);
327302
}
328303

329304
//---------------------------------------------------------------------
@@ -333,22 +308,19 @@ size_t StringSearch<Char>::SingleCharSearch(
333308
// Simple linear search for short patterns. Never bails out.
334309
template <typename Char>
335310
size_t StringSearch<Char>::LinearSearch(
336-
StringSearch<Char>* search,
337-
Vector<const Char> subject,
311+
Vector subject,
338312
size_t index) {
339-
Vector<const Char> pattern = search->pattern_;
340-
CHECK_GT(pattern.length(), 1);
341-
const size_t pattern_length = pattern.length();
342-
const size_t n = subject.length() - pattern_length;
313+
CHECK_GT(pattern_.length(), 1);
314+
const size_t n = subject.length() - pattern_.length();
343315
for (size_t i = index; i <= n; i++) {
344-
i = FindFirstCharacter(pattern, subject, i);
316+
i = FindFirstCharacter(pattern_, subject, i);
345317
if (i == subject.length())
346318
return subject.length();
347319
CHECK_LE(i, n);
348320

349321
bool matches = true;
350-
for (size_t j = 1; j < pattern_length; j++) {
351-
if (pattern[j] != subject[i + j]) {
322+
for (size_t j = 1; j < pattern_.length(); j++) {
323+
if (pattern_[j] != subject[i + j]) {
352324
matches = false;
353325
break;
354326
}
@@ -366,19 +338,17 @@ size_t StringSearch<Char>::LinearSearch(
366338

367339
template <typename Char>
368340
size_t StringSearch<Char>::BoyerMooreSearch(
369-
StringSearch<Char>* search,
370-
Vector<const Char> subject,
341+
Vector subject,
371342
size_t start_index) {
372-
Vector<const Char> pattern = search->pattern_;
373343
const size_t subject_length = subject.length();
374-
const size_t pattern_length = pattern.length();
344+
const size_t pattern_length = pattern_.length();
375345
// Only preprocess at most kBMMaxShift last characters of pattern.
376-
size_t start = search->start_;
346+
size_t start = start_;
377347

378-
int* bad_char_occurrence = search->bad_char_table();
379-
int* good_suffix_shift = search->good_suffix_shift_table();
348+
int* bad_char_occurrence = bad_char_table();
349+
int* good_suffix_shift = good_suffix_shift_table();
380350

381-
Char last_char = pattern[pattern_length - 1];
351+
Char last_char = pattern_[pattern_length - 1];
382352
size_t index = start_index;
383353
// Continue search from i.
384354
while (index <= subject_length - pattern_length) {
@@ -391,7 +361,7 @@ size_t StringSearch<Char>::BoyerMooreSearch(
391361
return subject.length();
392362
}
393363
}
394-
while (pattern[j] == (c = subject[index + j])) {
364+
while (pattern_[j] == (c = subject[index + j])) {
395365
if (j == 0) {
396366
return index;
397367
}
@@ -420,7 +390,6 @@ size_t StringSearch<Char>::BoyerMooreSearch(
420390
template <typename Char>
421391
void StringSearch<Char>::PopulateBoyerMooreTable() {
422392
const size_t pattern_length = pattern_.length();
423-
Vector<const Char> pattern = pattern_;
424393
// Only look at the last kBMMaxShift characters of pattern (from start_
425394
// to pattern_length).
426395
const size_t start = start_;
@@ -448,8 +417,8 @@ void StringSearch<Char>::PopulateBoyerMooreTable() {
448417
{
449418
size_t i = pattern_length;
450419
while (i > start) {
451-
Char c = pattern[i - 1];
452-
while (suffix <= pattern_length && c != pattern[suffix - 1]) {
420+
Char c = pattern_[i - 1];
421+
while (suffix <= pattern_length && c != pattern_[suffix - 1]) {
453422
if (static_cast<size_t>(shift_table[suffix]) == length) {
454423
shift_table[suffix] = suffix - i;
455424
}
@@ -458,7 +427,7 @@ void StringSearch<Char>::PopulateBoyerMooreTable() {
458427
suffix_table[--i] = --suffix;
459428
if (suffix == pattern_length) {
460429
// No suffix to extend, so we check against last_char only.
461-
while ((i > start) && (pattern[i - 1] != last_char)) {
430+
while ((i > start) && (pattern_[i - 1] != last_char)) {
462431
if (static_cast<size_t>(shift_table[pattern_length]) == length) {
463432
shift_table[pattern_length] = pattern_length - i;
464433
}
@@ -489,17 +458,15 @@ void StringSearch<Char>::PopulateBoyerMooreTable() {
489458

490459
template <typename Char>
491460
size_t StringSearch<Char>::BoyerMooreHorspoolSearch(
492-
StringSearch<Char>* search,
493-
Vector<const Char> subject,
461+
Vector subject,
494462
size_t start_index) {
495-
Vector<const Char> pattern = search->pattern_;
496463
const size_t subject_length = subject.length();
497-
const size_t pattern_length = pattern.length();
498-
int* char_occurrences = search->bad_char_table();
464+
const size_t pattern_length = pattern_.length();
465+
int* char_occurrences = bad_char_table();
499466
int64_t badness = -pattern_length;
500467

501468
// How bad we are doing without a good-suffix table.
502-
Char last_char = pattern[pattern_length - 1];
469+
Char last_char = pattern_[pattern_length - 1];
503470
int last_char_shift =
504471
pattern_length - 1 -
505472
CharOccurrence(char_occurrences, static_cast<Char>(last_char));
@@ -519,7 +486,7 @@ size_t StringSearch<Char>::BoyerMooreHorspoolSearch(
519486
}
520487
}
521488
j--;
522-
while (pattern[j] == (subject[index + j])) {
489+
while (pattern_[j] == (subject[index + j])) {
523490
if (j == 0) {
524491
return index;
525492
}
@@ -532,9 +499,9 @@ size_t StringSearch<Char>::BoyerMooreHorspoolSearch(
532499
// compared to reading each character exactly once.
533500
badness += (pattern_length - j) - last_char_shift;
534501
if (badness > 0) {
535-
search->PopulateBoyerMooreTable();
536-
search->strategy_ = &BoyerMooreSearch;
537-
return BoyerMooreSearch(search, subject, index);
502+
PopulateBoyerMooreTable();
503+
strategy_ = &StringSearch::BoyerMooreSearch;
504+
return BoyerMooreSearch(subject, index);
538505
}
539506
}
540507
return subject.length();
@@ -575,11 +542,9 @@ void StringSearch<Char>::PopulateBoyerMooreHorspoolTable() {
575542
// isn't found very early in the subject. Upgrades to BoyerMooreHorspool.
576543
template <typename Char>
577544
size_t StringSearch<Char>::InitialSearch(
578-
StringSearch<Char>* search,
579-
Vector<const Char> subject,
545+
Vector subject,
580546
size_t index) {
581-
Vector<const Char> pattern = search->pattern_;
582-
const size_t pattern_length = pattern.length();
547+
const size_t pattern_length = pattern_.length();
583548
// Badness is a count of how much work we have done. When we have
584549
// done enough work we decide it's probably worth switching to a better
585550
// algorithm.
@@ -590,13 +555,13 @@ size_t StringSearch<Char>::InitialSearch(
590555
for (size_t i = index, n = subject.length() - pattern_length; i <= n; i++) {
591556
badness++;
592557
if (badness <= 0) {
593-
i = FindFirstCharacter(pattern, subject, i);
558+
i = FindFirstCharacter(pattern_, subject, i);
594559
if (i == subject.length())
595560
return subject.length();
596561
CHECK_LE(i, n);
597562
size_t j = 1;
598563
do {
599-
if (pattern[j] != subject[i + j]) {
564+
if (pattern_[j] != subject[i + j]) {
600565
break;
601566
}
602567
j++;
@@ -606,9 +571,9 @@ size_t StringSearch<Char>::InitialSearch(
606571
}
607572
badness += j;
608573
} else {
609-
search->PopulateBoyerMooreHorspoolTable();
610-
search->strategy_ = &BoyerMooreHorspoolSearch;
611-
return BoyerMooreHorspoolSearch(search, subject, i);
574+
PopulateBoyerMooreHorspoolTable();
575+
strategy_ = &StringSearch::BoyerMooreHorspoolSearch;
576+
return BoyerMooreHorspoolSearch(subject, i);
612577
}
613578
}
614579
return subject.length();
@@ -629,7 +594,6 @@ size_t SearchString(Vector<const Char> subject,
629594
} // namespace node
630595

631596
namespace node {
632-
using node::stringsearch::Vector;
633597

634598
template <typename Char>
635599
size_t SearchString(const Char* haystack,
@@ -643,9 +607,8 @@ size_t SearchString(const Char* haystack,
643607
// code, create two vectors that are reversed views into the input strings.
644608
// For example, v_needle[0] would return the *last* character of the needle.
645609
// So we're searching for the first instance of rev(needle) in rev(haystack)
646-
Vector<const Char> v_needle = Vector<const Char>(
647-
needle, needle_length, is_forward);
648-
Vector<const Char> v_haystack = Vector<const Char>(
610+
stringsearch::Vector<const Char> v_needle(needle, needle_length, is_forward);
611+
stringsearch::Vector<const Char> v_haystack(
649612
haystack, haystack_length, is_forward);
650613
size_t diff = haystack_length - needle_length;
651614
size_t relative_start_index;

0 commit comments

Comments
 (0)