Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

url: handle "unsafe" characters properly in pathToFileURL #54545

Merged
merged 14 commits into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/internal/process/execution.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ function tryGetCwd() {

let evalIndex = 0;
function getEvalModuleUrl() {
return pathToFileURL(`${process.cwd()}/[eval${++evalIndex}]`).href;
return `${pathToFileURL(process.cwd())}/[eval${++evalIndex}]`;
}

/**
Expand Down
94 changes: 57 additions & 37 deletions lib/internal/url.js
Original file line number Diff line number Diff line change
Expand Up @@ -1498,44 +1498,75 @@ function fileURLToPath(path, options = kEmptyObject) {
return (windows ?? isWindows) ? getPathFromURLWin32(path) : getPathFromURLPosix(path);
}

// The following characters are percent-encoded when converting from file path
// to URL:
// - %: The percent character is the only character not encoded by the
// `pathname` setter.
// - \: Backslash is encoded on non-windows platforms since it's a valid
// character but the `pathname` setters replaces it by a forward slash.
// - LF: The newline character is stripped out by the `pathname` setter.
// (See whatwg/url#419)
// - CR: The carriage return character is also stripped out by the `pathname`
// setter.
// - TAB: The tab character is also stripped out by the `pathname` setter.
// RFC1738 defines the following chars as "unsafe" for URLs
// @see https://www.ietf.org/rfc/rfc1738.txt 2.2. URL Character Encoding Issues
const percentRegEx = /%/g;
const backslashRegEx = /\\/g;
const newlineRegEx = /\n/g;
const carriageReturnRegEx = /\r/g;
const tabRegEx = /\t/g;
const questionRegex = /\?/g;
const quoteRegEx = /"/g;
const hashRegex = /#/g;
const spaceRegEx = / /g;
const questionMarkRegex = /\?/g;
const openSquareBracketRegEx = /\[/g;
const backslashRegEx = /\\/g;
const closeSquareBracketRegEx = /]/g;
const caretRegEx = /\^/g;
const verticalBarRegEx = /\|/g;
const tildeRegEx = /~/g;

function encodePathChars(filepath, options = kEmptyObject) {
const windows = options?.windows;
if (StringPrototypeIndexOf(filepath, '%') !== -1)
if (StringPrototypeIncludes(filepath, '%')) {
filepath = RegExpPrototypeSymbolReplace(percentRegEx, filepath, '%25');
// In posix, backslash is a valid character in paths:
if (!(windows ?? isWindows) && StringPrototypeIndexOf(filepath, '\\') !== -1)
filepath = RegExpPrototypeSymbolReplace(backslashRegEx, filepath, '%5C');
if (StringPrototypeIndexOf(filepath, '\n') !== -1)
}

if (StringPrototypeIncludes(filepath, '\t')) {
filepath = RegExpPrototypeSymbolReplace(tabRegEx, filepath, '%09');
}
if (StringPrototypeIncludes(filepath, '\n')) {
filepath = RegExpPrototypeSymbolReplace(newlineRegEx, filepath, '%0A');
if (StringPrototypeIndexOf(filepath, '\r') !== -1)
}
if (StringPrototypeIncludes(filepath, '\r')) {
filepath = RegExpPrototypeSymbolReplace(carriageReturnRegEx, filepath, '%0D');
if (StringPrototypeIndexOf(filepath, '\t') !== -1)
filepath = RegExpPrototypeSymbolReplace(tabRegEx, filepath, '%09');
}
if (StringPrototypeIncludes(filepath, ' ')) {
filepath = RegExpPrototypeSymbolReplace(spaceRegEx, filepath, '%20');
}
if (StringPrototypeIncludes(filepath, '"')) {
filepath = RegExpPrototypeSymbolReplace(quoteRegEx, filepath, '%22');
}
if (StringPrototypeIncludes(filepath, '#')) {
filepath = RegExpPrototypeSymbolReplace(hashRegex, filepath, '%23');
}
if (StringPrototypeIncludes(filepath, '?')) {
filepath = RegExpPrototypeSymbolReplace(questionMarkRegex, filepath, '%3F');
}
if (StringPrototypeIncludes(filepath, '[')) {
filepath = RegExpPrototypeSymbolReplace(openSquareBracketRegEx, filepath, '%5B');
}
// Back-slashes must be special-cased on Windows, where they are treated as path separator.
if (!options.windows && StringPrototypeIncludes(filepath, '\\')) {
filepath = RegExpPrototypeSymbolReplace(backslashRegEx, filepath, '%5C');
}
if (StringPrototypeIncludes(filepath, ']')) {
filepath = RegExpPrototypeSymbolReplace(closeSquareBracketRegEx, filepath, '%5D');
}
if (StringPrototypeIncludes(filepath, '^')) {
filepath = RegExpPrototypeSymbolReplace(caretRegEx, filepath, '%5E');
}
if (StringPrototypeIncludes(filepath, '|')) {
filepath = RegExpPrototypeSymbolReplace(verticalBarRegEx, filepath, '%7C');
}
if (StringPrototypeIncludes(filepath, '~')) {
filepath = RegExpPrototypeSymbolReplace(tildeRegEx, filepath, '%7E');
}

return filepath;
}

function pathToFileURL(filepath, options = kEmptyObject) {
const windows = options?.windows;
if ((windows ?? isWindows) && StringPrototypeStartsWith(filepath, '\\\\')) {
const windows = options?.windows ?? isWindows;
if (windows && StringPrototypeStartsWith(filepath, '\\\\')) {
const outURL = new URL('file://');
// UNC path format: \\server\share\resource
// Handle extended UNC path and standard UNC path
Expand Down Expand Up @@ -1566,20 +1597,9 @@ function pathToFileURL(filepath, options = kEmptyObject) {
);
return outURL;
}
let resolved = (windows ?? isWindows) ? path.win32.resolve(filepath) : path.posix.resolve(filepath);

// Call encodePathChars first to avoid encoding % again for ? and #.
resolved = encodePathChars(resolved, { windows });
const resolved = windows ? path.win32.resolve(filepath) : path.posix.resolve(filepath);

// Question and hash character should be included in pathname.
// Therefore, encoding is required to eliminate parsing them in different states.
// This is done as an optimization to not creating a URL instance and
// later triggering pathname setter, which impacts performance
if (StringPrototypeIndexOf(resolved, '?') !== -1)
resolved = RegExpPrototypeSymbolReplace(questionRegex, resolved, '%3F');
if (StringPrototypeIndexOf(resolved, '#') !== -1)
resolved = RegExpPrototypeSymbolReplace(hashRegex, resolved, '%23');
return new URL(`file://${resolved}`);
return new URL(`file://${encodePathChars(resolved, { windows })}`);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having a third option in URL constructor, preferably a symbol, and later doing the same operation in C++ would be the fastest solution imho.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're probably correct, but that's very much out of my depth :)

}

function toPathIfFileURL(fileURLOrPath) {
Expand Down
13 changes: 9 additions & 4 deletions test/parallel/test-url-pathtofileurl.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,7 @@ const url = require('url');
{
const fileURL = url.pathToFileURL('test\\').href;
assert.ok(fileURL.startsWith('file:///'));
if (isWindows)
assert.ok(fileURL.endsWith('/'));
else
assert.ok(fileURL.endsWith('%5C'));
assert.match(fileURL, isWindows ? /\/$/ : /%5C$/);
}

{
Expand Down Expand Up @@ -104,6 +101,12 @@ const windowsTestCases = [
{ path: 'C:\\€', expected: 'file:///C:/%E2%82%AC' },
// Rocket emoji (non-BMP code point)
{ path: 'C:\\🚀', expected: 'file:///C:/%F0%9F%9A%80' },
// caret
{ path: 'C:\\foo^bar', expected: 'file:///C:/foo%5Ebar' },
// left bracket
{ path: 'C:\\foo[bar', expected: 'file:///C:/foo%5Bbar' },
// right bracket
{ path: 'C:\\foo]bar', expected: 'file:///C:/foo%5Dbar' },
// Local extended path
{ path: '\\\\?\\C:\\path\\to\\file.txt', expected: 'file:///C:/path/to/file.txt' },
// UNC path (see https://docs.microsoft.com/en-us/archive/blogs/ie/file-uris-in-windows)
Expand Down Expand Up @@ -154,6 +157,8 @@ const posixTestCases = [
{ path: '/€', expected: 'file:///%E2%82%AC' },
// Rocket emoji (non-BMP code point)
{ path: '/🚀', expected: 'file:///%F0%9F%9A%80' },
// "unsafe" chars
{ path: '/foo\r\n\t<>"#%{}|^[\\~]`?bar', expected: 'file:///foo%0D%0A%09%3C%3E%22%23%25%7B%7D%7C%5E%5B%5C%7E%5D%60%3Fbar' },
];

for (const { path, expected } of windowsTestCases) {
Expand Down
Loading