Skip to content

Commit 43c152c

Browse files
Merge pull request #110 from UmbrellaDocs/109fix-regex
Fix: Fixed failing ignore and replacement patterns
2 parents 2b8aa0f + 73a9a30 commit 43c152c

File tree

7 files changed

+170
-24
lines changed

7 files changed

+170
-24
lines changed

index.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ program
6161
// Update the current file name
6262
currentFile = file
6363
if (!cmd.json) {
64-
spinner.text = `Checking ${currentFile}...\n`
64+
spinner.text = `Checking ${currentFile}...`
6565
}
6666

6767
// Increment file count for statistics
@@ -120,7 +120,7 @@ program
120120
`${currentFile}:${linkStatusObj.line_number}:${linkStatusObj.position.start.column}: 🚫 ${linkStatusObj.link} Status:${linkStatusObj.status_code}${linkStatusObj.error_message ? ` ${linkStatusObj.error_message}` : ' Cannot reach link'}`
121121
)
122122
)
123-
spinner.start(`Checking ${currentFile}...\n`)
123+
spinner.start(`Checking ${currentFile}...`)
124124
}
125125
hasErrorLinks = true
126126
} else if (

lib/handle-links-modification.js

+66-11
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,65 @@
99
*
1010
* @returns {Array} The modified nodes.
1111
*/
12-
import { escapeRegExp } from 'lodash-es'
1312

1413
function doReplacements(nodes, opts = {}) {
1514
const { ignorePatterns = [], replacementPatterns = [], baseUrl } = opts
1615

16+
// Safer regex compilation with timeout protection
17+
function createSafeRegex(pattern) {
18+
try {
19+
// Validate pattern complexity before creating RegExp
20+
// Check for common problematic patterns that could lead to ReDoS
21+
if (
22+
pattern.includes('(.*)*') ||
23+
pattern.includes('(.+)+') ||
24+
pattern.match(/\([^)]+\)\+\+/) ||
25+
pattern.match(/\(\[.*?\]\+\)\+/) ||
26+
pattern.match(/\(a\+\)\+/)
27+
) {
28+
console.warn(`Potentially unsafe regex pattern detected: ${pattern}`)
29+
return null
30+
}
31+
32+
// Apply length limits for safety
33+
if (pattern.length > 100) {
34+
console.warn(
35+
`Pattern exceeds maximum safe length: ${pattern.substring(0, 50)}...`
36+
)
37+
return null
38+
}
39+
40+
return new RegExp(pattern)
41+
} catch (e) {
42+
console.warn(`Invalid regex pattern: ${pattern}. Error: ${e.message}`)
43+
return null
44+
}
45+
}
46+
47+
// Pre-compile regular expressions with safer approach
48+
const ignoreRegexes = ignorePatterns
49+
.map(({ pattern }) => createSafeRegex(pattern))
50+
.filter(Boolean)
51+
52+
const replacementRegexes = replacementPatterns
53+
.map(({ pattern, replacement }) => {
54+
const regex = createSafeRegex(pattern)
55+
return regex ? { regex, replacement } : null
56+
})
57+
.filter(Boolean)
58+
1759
return nodes.filter((node) => {
1860
let { url } = node
61+
1962
// Skip link checking if it matches any ignore pattern
2063
if (
21-
ignorePatterns.some(({ pattern }) => {
22-
// Sanitize the pattern before creating the RegExp
23-
const sanitizedPattern = escapeRegExp(pattern)
24-
const regex = new RegExp(sanitizedPattern)
25-
return regex.test(url)
64+
ignoreRegexes.some((regex) => {
65+
try {
66+
return regex.test(url)
67+
} catch (e) {
68+
console.warn(`Error testing URL against pattern: ${e.message}`)
69+
return false
70+
}
2671
})
2772
) {
2873
return false // Exclude this node
@@ -34,13 +79,23 @@ function doReplacements(nodes, opts = {}) {
3479
}
3580

3681
// Replace link URL based on replacement patterns
37-
replacementPatterns.forEach(({ pattern, replacement }) => {
38-
// Sanitize the pattern before creating the RegExp
39-
const sanitizedPattern = escapeRegExp(pattern)
40-
url = url.replace(new RegExp(sanitizedPattern), replacement)
82+
replacementRegexes.forEach(({ regex, replacement }) => {
83+
try {
84+
// Use a safer string replace approach
85+
const oldUrl = url
86+
url = url.replace(regex, replacement)
87+
88+
// If replacement leads to an extremely long string, revert
89+
if (url.length > oldUrl.length * 3 && url.length > 2000) {
90+
console.warn(`Suspicious replacement result detected. Reverting.`)
91+
url = oldUrl
92+
}
93+
} catch (e) {
94+
console.warn(`Error replacing URL: ${e.message}`)
95+
}
4196
})
42-
node.url = url
4397

98+
node.url = url
4499
return true // Include this node
45100
})
46101
}

package-lock.json

+2-9
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@umbrelladocs/linkspector",
3-
"version": "0.4.1",
3+
"version": "0.4.2",
44
"description": "Uncover broken links in your content.",
55
"type": "module",
66
"main": "linkspector.js",
@@ -56,7 +56,6 @@
5656
"joi": "^17.13.3",
5757
"js-yaml": "^4.1.0",
5858
"kleur": "^4.1.5",
59-
"lodash-es": "^4.17.21",
6059
"ora": "^8.2.0",
6160
"puppeteer": "^24.4.0",
6261
"remark-gfm": "^4.0.1",

test/fixtures/patterns/patterns.md

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Test Patterns
2+
3+
## Links that should be ignored
4+
5+
- [Ignored Link 1](https://ignored-domain.example.com/page1)
6+
- [Ignored Link 2](https://ignored-domain.example.com/page2)
7+
- [Ignored Link 3](https://another-ignored.example.com/test)
8+
9+
## Links that should be replaced
10+
11+
- [Replace Example 1](https://example.com/old/path1)
12+
- [Replace Example 2](https://example.com/old/path2)
13+
- [Replace Example 3](https://replace-domain.example.com/path3)
14+
15+
## Normal links that should be checked
16+
17+
- [Google](https://www.google.com)
18+
- [GitHub](https://github.com)
+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import { expect, test } from 'vitest'
2+
import { linkspector } from './linkspector.js'
3+
4+
let cmd = {
5+
json: true,
6+
}
7+
8+
test('linkspector should correctly apply ignorePatterns and replacementPatterns', async () => {
9+
let currentFile = ''
10+
let results = []
11+
12+
for await (const { file, result } of linkspector(
13+
'./test/fixtures/patterns/patternsTest.yml',
14+
cmd
15+
)) {
16+
currentFile = file
17+
for (const linkStatusObj of result) {
18+
results.push({
19+
file: currentFile,
20+
link: linkStatusObj.link,
21+
status_code: linkStatusObj.status_code,
22+
line_number: linkStatusObj.line_number,
23+
position: linkStatusObj.position,
24+
status: linkStatusObj.status,
25+
error_message: linkStatusObj.error_message,
26+
})
27+
}
28+
}
29+
30+
// Test expectations for pattern checks
31+
32+
// 1. Check that ignored links are not in the results
33+
const ignoredLinks = [
34+
'https://ignored-domain.example.com/page1',
35+
'https://ignored-domain.example.com/page2',
36+
'https://another-ignored.example.com/test',
37+
]
38+
39+
ignoredLinks.forEach((link) => {
40+
expect(results.find((r) => r.link === link)).toBeUndefined()
41+
})
42+
43+
// 2. Check that replacement patterns were applied
44+
expect(
45+
results.find((r) => r.link === 'https://example.com/new/path1')
46+
).toBeDefined()
47+
expect(
48+
results.find((r) => r.link === 'https://example.com/new/path2')
49+
).toBeDefined()
50+
expect(
51+
results.find((r) => r.link === 'https://new-domain.example.com/path3')
52+
).toBeDefined()
53+
54+
// 3. Confirm original links (before replacement) are not in the results
55+
expect(
56+
results.find((r) => r.link === 'https://example.com/old/path1')
57+
).toBeUndefined()
58+
expect(
59+
results.find((r) => r.link === 'https://example.com/old/path2')
60+
).toBeUndefined()
61+
expect(
62+
results.find((r) => r.link === 'https://replace-domain.example.com/path3')
63+
).toBeUndefined()
64+
65+
// 4. Check that normal links are still being checked
66+
expect(results.find((r) => r.link === 'https://www.google.com')).toBeDefined()
67+
expect(results.find((r) => r.link === 'https://github.com')).toBeDefined()
68+
69+
// Total number of links should be 5 (2 normal + 3 replaced)
70+
expect(results.length).toBe(5)
71+
})
+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
dirs:
2+
- ./test/fixtures/patterns
3+
ignorePatterns:
4+
- pattern: '^https://ignored-domain.example.com/.*$'
5+
- pattern: '^https://another-ignored.example.com/.*$'
6+
replacementPatterns:
7+
- pattern: 'https://example.com/old/(.*)'
8+
replacement: 'https://example.com/new/$1'
9+
- pattern: 'https://replace-domain.example.com/(.*)'
10+
replacement: 'https://new-domain.example.com/$1'

0 commit comments

Comments
 (0)