diff --git a/benchmark/url/file-url-to-path.js b/benchmark/url/file-url-to-path.js new file mode 100644 index 00000000000000..b575502e938273 --- /dev/null +++ b/benchmark/url/file-url-to-path.js @@ -0,0 +1,33 @@ +'use strict'; +const common = require('../common.js'); +const { fileURLToPath } = require('url'); + +const toUrl = '(url) '; +const validUrls = [ + 'file:///home/user/test/index.js', + 'file:///home/user/test%20index.js', + 'file:///home/user/test%2Findex.js', + 'file://google.com/home/user/test%2Findex.js', + 'http://google.com/home/test.js', +]; +const bench = common.createBenchmark(main, { + url: [ + ...validUrls, + ...validUrls.map(url => toUrl + url), + 'not-even-a-url', + ], + n: [1e5], +}); + +function main({ url, n }) { + bench.start(); + + const urlToTest = url.startsWith(toUrl) ? new URL(url.slice(toUrl.length)) : url; + + for (let i = 0; i < n; i += 1) { + try { + fileURLToPath(urlToTest); + } catch { } + } + bench.end(n); +} diff --git a/lib/internal/url.js b/lib/internal/url.js index ccd89830f91ded..840da25d23e6e6 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -1298,67 +1298,14 @@ function urlToHttpOptions(url) { return options; } -function getPathFromURLWin32(url) { - const hostname = url.hostname; - let pathname = url.pathname; - for (let n = 0; n < pathname.length; n++) { - if (pathname[n] === '%') { - const third = StringPrototypeCodePointAt(pathname, n + 2) | 0x20; - if ((pathname[n + 1] === '2' && third === 102) || // 2f 2F / - (pathname[n + 1] === '5' && third === 99)) { // 5c 5C \ - throw new ERR_INVALID_FILE_URL_PATH( - 'must not include encoded \\ or / characters', - ); - } - } - } - pathname = SideEffectFreeRegExpPrototypeSymbolReplace(FORWARD_SLASH, pathname, '\\'); - pathname = decodeURIComponent(pathname); - if (hostname !== '') { - // If hostname is set, then we have a UNC path - // Pass the hostname through domainToUnicode just in case - // it is an IDN using punycode encoding. We do not need to worry - // about percent encoding because the URL parser will have - // already taken care of that for us. Note that this only - // causes IDNs with an appropriate `xn--` prefix to be decoded. - return `\\\\${domainToUnicode(hostname)}${pathname}`; - } - // Otherwise, it's a local path that requires a drive letter - const letter = StringPrototypeCodePointAt(pathname, 1) | 0x20; - const sep = StringPrototypeCharAt(pathname, 2); - if (letter < CHAR_LOWERCASE_A || letter > CHAR_LOWERCASE_Z || // a..z A..Z - (sep !== ':')) { - throw new ERR_INVALID_FILE_URL_PATH('must be absolute'); - } - return StringPrototypeSlice(pathname, 1); -} - -function getPathFromURLPosix(url) { - if (url.hostname !== '') { - throw new ERR_INVALID_FILE_URL_HOST(platform); - } - const pathname = url.pathname; - for (let n = 0; n < pathname.length; n++) { - if (pathname[n] === '%') { - const third = StringPrototypeCodePointAt(pathname, n + 2) | 0x20; - if (pathname[n + 1] === '2' && third === 102) { - throw new ERR_INVALID_FILE_URL_PATH( - 'must not include encoded / characters', - ); - } - } - } - return decodeURIComponent(pathname); -} - function fileURLToPath(path) { if (typeof path === 'string') - path = new URL(path); - else if (!isURL(path)) - throw new ERR_INVALID_ARG_TYPE('path', ['string', 'URL'], path); - if (path.protocol !== 'file:') - throw new ERR_INVALID_URL_SCHEME('file'); - return isWindows ? getPathFromURLWin32(path) : getPathFromURLPosix(path); + return bindingUrl.fileURLToPath(path); + + if (isURL(path)) + return bindingUrl.URLfileURLToPath(path.href); + + return bindingUrl.URLfileURLToPath(path.toString()); } // The following characters are percent-encoded when converting from file path diff --git a/src/node_errors.h b/src/node_errors.h index ddb87df20ef4af..8ea9a409a0e8a2 100644 --- a/src/node_errors.h +++ b/src/node_errors.h @@ -68,11 +68,15 @@ void AppendExceptionLine(Environment* env, V(ERR_INVALID_ARG_VALUE, TypeError) \ V(ERR_OSSL_EVP_INVALID_DIGEST, Error) \ V(ERR_INVALID_ARG_TYPE, TypeError) \ + V(ERR_INVALID_FILE_URL_HOST, TypeError) \ + V(ERR_INVALID_FILE_URL_PATH, TypeError) \ V(ERR_INVALID_OBJECT_DEFINE_PROPERTY, TypeError) \ V(ERR_INVALID_MODULE, Error) \ V(ERR_INVALID_STATE, Error) \ + V(ERR_INVALID_URL_SCHEME, TypeError) \ V(ERR_INVALID_THIS, TypeError) \ V(ERR_INVALID_TRANSFER_OBJECT, TypeError) \ + V(ERR_INVALID_URL, TypeError) \ V(ERR_MEMORY_ALLOCATION_FAILED, Error) \ V(ERR_MESSAGE_TARGET_CONTEXT_UNAVAILABLE, Error) \ V(ERR_MISSING_ARGS, TypeError) \ @@ -161,8 +165,10 @@ ERRORS_WITH_CODE(V) V(ERR_INVALID_ADDRESS, "Invalid socket address") \ V(ERR_INVALID_MODULE, "No such module") \ V(ERR_INVALID_STATE, "Invalid state") \ + V(ERR_INVALID_URL_SCHEME, "The URL must be of scheme file:") \ V(ERR_INVALID_THIS, "Value of \"this\" is the wrong type") \ V(ERR_INVALID_TRANSFER_OBJECT, "Found invalid object in transferList") \ + V(ERR_INVALID_URL, "Invalid URL") \ V(ERR_MEMORY_ALLOCATION_FAILED, "Failed to allocate memory") \ V(ERR_OSSL_EVP_INVALID_DIGEST, "Invalid digest used") \ V(ERR_MESSAGE_TARGET_CONTEXT_UNAVAILABLE, \ diff --git a/src/node_url.cc b/src/node_url.cc index ef845c612ef464..5d7f208ac09e26 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -4,6 +4,7 @@ #include "node_errors.h" #include "node_external_reference.h" #include "node_i18n.h" +#include "node_metadata.h" #include "util-inl.h" #include "v8-fast-api-calls.h" #include "v8.h" @@ -117,6 +118,126 @@ void BindingData::DomainToUnicode(const FunctionCallbackInfo& args) { .ToLocalChecked()); } +bool FileURLToPathImpl(Environment* env, + const ada::url_aggregator& file_url, + std::string& result_file_path) { + if (file_url.type != ada::scheme::FILE) { + env->isolate()->ThrowException(ERR_INVALID_URL_SCHEME(env->isolate())); + + return false; + } + + std::string_view pathname = file_url.get_pathname(); +#ifdef _WIN_32 + std::string pathname_escaped_slash; + + for (size_t i = 0; (i + 2) < pathname.size(); i++) { + pathname_escaped_slash += pathname[i]; + + if (pathname[i] == '\\') pathname_escaped_slash += '\\'; + + if (pathname[i] != '%') continue; + + char third = pathname[i + 2] | 0x20; + + bool is_slash = pathname[i + 1] == '2' && third == 102; // 2f 2F / + bool is_forward_slash = pathname[i + 1] == '5' && third == 99; // 5c 5C \ + + if (!is_slash && !is_forward_slash) continue; + + env->isolate()->ThrowException(ERR_INVALID_FILE_URL_PATH( + env->isolate(), + "File URL path must not include encoded \\ or / characters")); + + return false; + } + + std::string decoded_pathname = ada::unicode::percent_decode( + std::string_view(pathname_escaped_slash), pathname_escaped_slash.size()); + + if (hostname.size() > 0) { + // If hostname is set, then we have a UNC path + // Pass the hostname through domainToUnicode just in case + // it is an IDN using punycode encoding. We do not need to worry + // about percent encoding because the URL parser will have + // already taken care of that for us. Note that this only + // causes IDNs with an appropriate `xn--` prefix to be decoded. + *result_file_path = + "\\\\" + ada::unicode::to_unicode(hostname) + decoded_pathname; + + return true; + } + + char letter = decoded_pathname[1] | 0x20; + char sep = decoded_pathname[2]; + + // a..z A..Z + if (letter < 'a' || letter > 'z' || sep != ':') { + env->isolate()->ThrowException(ERR_INVALID_FILE_URL_PATH( + env->isolate(), "File URL path must be absolute")); + + return false; + } + + result_file_path = decoded_pathname.substr(1); + + return true; +#else + std::string_view hostname = file_url.get_hostname(); + + if (hostname.size() > 0) { + std::string error_message = + std::string("File URL host must be \"localhost\" or empty on ") + + std::string(per_process::metadata.platform); + env->isolate()->ThrowException( + ERR_INVALID_FILE_URL_HOST(env->isolate(), error_message.c_str())); + + return false; + } + + for (size_t i = 0; (i + 2) < pathname.size(); i++) { + if (pathname[i] == '%' && pathname[i + 1] == '2' && + (pathname[i + 2] | 0x20) == 102) { + env->isolate()->ThrowException(ERR_INVALID_FILE_URL_PATH( + env->isolate(), + "File URL path must not include encoded / characters")); + + return false; + } + } + + result_file_path = ada::unicode::percent_decode(pathname, pathname.size()); + + return true; +#endif +} + +void BindingData::FileURLToPath(const FunctionCallbackInfo& args) { + CHECK_GE(args.Length(), 1); + CHECK(args[0]->IsString()); // url + + Environment* env = Environment::GetCurrent(args); + + Utf8Value input(env->isolate(), args[0]); + + auto file_url = ada::parse(input.ToStringView()); + + if (!file_url) { + env->isolate()->ThrowException(ERR_INVALID_URL(env->isolate())); + + return; + } + + std::string result_file_path; + + if (!FileURLToPathImpl(env, file_url.value(), result_file_path)) + return; + + args.GetReturnValue().Set( + ToV8Value(env->context(), result_file_path, env->isolate()) + .ToLocalChecked()); +} + void BindingData::CanParse(const FunctionCallbackInfo& args) { CHECK_GE(args.Length(), 1); CHECK(args[0]->IsString()); // input @@ -321,6 +442,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data, Isolate* isolate = isolate_data->isolate(); SetMethodNoSideEffect(isolate, target, "domainToASCII", DomainToASCII); SetMethodNoSideEffect(isolate, target, "domainToUnicode", DomainToUnicode); + SetMethodNoSideEffect(isolate, target, "fileURLToPath", FileURLToPath); SetMethodNoSideEffect(isolate, target, "format", Format); SetMethod(isolate, target, "parse", Parse); SetMethod(isolate, target, "update", Update); @@ -344,6 +466,7 @@ void BindingData::RegisterExternalReferences( registry->Register(Parse); registry->Register(Update); registry->Register(CanParse); + registry->Register(FileURLToPath); registry->Register(FastCanParse); registry->Register(fast_can_parse_.GetTypeInfo()); } diff --git a/src/node_url.h b/src/node_url.h index dffe4b63ef11ad..a53d508abe0a7d 100644 --- a/src/node_url.h +++ b/src/node_url.h @@ -47,6 +47,7 @@ class BindingData : public SnapshotableObject { static void DomainToASCII(const v8::FunctionCallbackInfo& args); static void DomainToUnicode(const v8::FunctionCallbackInfo& args); + static void FileURLToPath(const v8::FunctionCallbackInfo& args); static void CanParse(const v8::FunctionCallbackInfo& args); static bool FastCanParse(v8::Local receiver, @@ -74,6 +75,9 @@ class BindingData : public SnapshotableObject { static v8::CFunction fast_can_parse_; }; +bool FileURLToPathImpl(Environment* env, + const ada::url_aggregator& file_url, + std::string& result_file_path); std::string FromFilePath(const std::string_view file_path); } // namespace url