diff --git a/idna/tests/punycode.rs b/idna/tests/punycode.rs index 1851a32cf..aac0307bc 100644 --- a/idna/tests/punycode.rs +++ b/idna/tests/punycode.rs @@ -7,8 +7,8 @@ // except according to those terms. use idna::punycode::{decode, encode_str}; -use serde_json::Value; use serde_json::map::Map; +use serde_json::Value; use std::str::FromStr; use test::TestFn; diff --git a/idna/tests/tests.rs b/idna/tests/tests.rs index 995bbadd6..15d381b64 100644 --- a/idna/tests/tests.rs +++ b/idna/tests/tests.rs @@ -1,6 +1,6 @@ extern crate idna; -extern crate serde_json; extern crate rustc_test as test; +extern crate serde_json; mod punycode; mod uts46; diff --git a/src/lib.rs b/src/lib.rs index 92777e592..cd6bdf326 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -105,7 +105,7 @@ assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css"); # run().unwrap(); */ -#![doc(html_root_url = "https://docs.rs/url/2.0.0")] +// #![doc(html_root_url = "https://docs.rs/url/2.0.0")] #[macro_use] extern crate matches; @@ -1922,9 +1922,15 @@ impl Url { pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> { let mut parser = Parser::for_setter(String::new()); let remaining = parser.parse_scheme(parser::Input::new(scheme))?; - if !remaining.is_empty() - || (!self.has_host() && SchemeType::from(&parser.serialization).is_special()) - { + let new_scheme_type = SchemeType::from(&parser.serialization); + let old_scheme_type = SchemeType::from(self.scheme()); + // Switching from special scheme to non special scheme + // and switching from file to non file is not allowed + if old_scheme_type != new_scheme_type { + return Err(()); + } + + if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) { return Err(()); } let old_scheme_end = self.scheme_end; diff --git a/src/parser.rs b/src/parser.rs index 96906f94a..62dcbeeed 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -138,7 +138,7 @@ impl fmt::Display for SyntaxViolation { } } -#[derive(Copy, Clone)] +#[derive(Copy, Clone, PartialEq)] pub enum SchemeType { File, SpecialNotFile, @@ -470,15 +470,102 @@ impl<'a> Parser<'a> { mut self, input: Input, scheme_type: SchemeType, - mut base_file_url: Option<&Url>, + base_file_url: Option<&Url>, ) -> ParseResult { use SyntaxViolation::Backslash; // file state debug_assert!(self.serialization.is_empty()); let (first_char, input_after_first_char) = input.split_first(); - match first_char { - None => { - if let Some(base_url) = base_file_url { + if matches!(first_char, Some('/') | Some('\\')) { + self.log_violation_if(SyntaxViolation::Backslash, || first_char == Some('\\')); + // file slash state + let (next_char, input_after_next_char) = input_after_first_char.split_first(); + if matches!(next_char, Some('/') | Some('\\')) { + self.log_violation_if(Backslash, || next_char == Some('\\')); + // file host state + self.serialization.push_str("file://"); + let scheme_end = "file".len() as u32; + let host_start = "file://".len() as u32; + let (path_start, mut host, remaining) = + self.parse_file_host(input_after_next_char)?; + let mut host_end = to_u32(self.serialization.len())?; + let mut has_host = !matches!(host, HostInternal::None); + let remaining = if path_start { + self.parse_path_start(SchemeType::File, &mut has_host, remaining) + } else { + let path_start = self.serialization.len(); + self.serialization.push('/'); + self.parse_path(SchemeType::File, &mut has_host, path_start, remaining) + }; + // For file URLs that have a host and whose path starts + // with the windows drive letter we just remove the host. + if !has_host { + self.serialization + .drain(host_start as usize..host_end as usize); + host_end = host_start; + host = HostInternal::None; + } + let (query_start, fragment_start) = + self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?; + return Ok(Url { + serialization: self.serialization, + scheme_end: scheme_end, + username_end: host_start, + host_start: host_start, + host_end: host_end, + host: host, + port: None, + path_start: host_end, + query_start: query_start, + fragment_start: fragment_start, + }); + } else { + self.serialization.push_str("file://"); + let scheme_end = "file".len() as u32; + let host_start = "file://".len(); + let mut host_end = host_start; + let mut host = HostInternal::None; + if !starts_with_windows_drive_letter_segment(&input_after_first_char) { + if let Some(base_url) = base_file_url { + let first_segment = base_url.path_segments().unwrap().next().unwrap(); + if is_normalized_windows_drive_letter(first_segment) { + self.serialization.push('/'); + self.serialization.push_str(first_segment); + } else if let Some(host_str) = base_url.host_str() { + self.serialization.push_str(host_str); + host_end = self.serialization.len(); + host = base_url.host.clone(); + } + } + } + self.serialization.push('/'); + let remaining = self.parse_path( + SchemeType::File, + &mut false, + host_end, + input_after_first_char, + ); + let (query_start, fragment_start) = + self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?; + let host_start = host_start as u32; + let host_end = host_end as u32; + return Ok(Url { + serialization: self.serialization, + scheme_end: scheme_end, + username_end: host_start, + host_start, + host_end, + host, + port: None, + path_start: host_end, + query_start: query_start, + fragment_start: fragment_start, + }); + } + } + if let Some(base_url) = base_file_url { + match first_char { + None => { // Copy everything except the fragment let before_fragment = match base_url.fragment_start { Some(i) => &base_url.serialization[..i as usize], @@ -490,26 +577,8 @@ impl<'a> Parser<'a> { fragment_start: None, ..*base_url }) - } else { - self.serialization.push_str("file:///"); - let scheme_end = "file".len() as u32; - let path_start = "file://".len() as u32; - Ok(Url { - serialization: self.serialization, - scheme_end: scheme_end, - username_end: path_start, - host_start: path_start, - host_end: path_start, - host: HostInternal::None, - port: None, - path_start: path_start, - query_start: None, - fragment_start: None, - }) } - } - Some('?') => { - if let Some(base_url) = base_file_url { + Some('?') => { // Copy everything up to the query string let before_query = match (base_url.query_start, base_url.fragment_start) { (None, None) => &*base_url.serialization, @@ -524,179 +593,77 @@ impl<'a> Parser<'a> { fragment_start: fragment_start, ..*base_url }) - } else { - self.serialization.push_str("file:///"); - let scheme_end = "file".len() as u32; - let path_start = "file://".len() as u32; - let (query_start, fragment_start) = - self.parse_query_and_fragment(scheme_type, scheme_end, input)?; - Ok(Url { - serialization: self.serialization, - scheme_end: scheme_end, - username_end: path_start, - host_start: path_start, - host_end: path_start, - host: HostInternal::None, - port: None, - path_start: path_start, - query_start: query_start, - fragment_start: fragment_start, - }) - } - } - Some('#') => { - if let Some(base_url) = base_file_url { - self.fragment_only(base_url, input) - } else { - self.serialization.push_str("file:///"); - let scheme_end = "file".len() as u32; - let path_start = "file://".len() as u32; - let fragment_start = "file:///".len() as u32; - self.serialization.push('#'); - self.parse_fragment(input_after_first_char); - Ok(Url { - serialization: self.serialization, - scheme_end: scheme_end, - username_end: path_start, - host_start: path_start, - host_end: path_start, - host: HostInternal::None, - port: None, - path_start: path_start, - query_start: None, - fragment_start: Some(fragment_start), - }) } - } - Some('/') | Some('\\') => { - self.log_violation_if(Backslash, || first_char == Some('\\')); - // file slash state - let (next_char, input_after_next_char) = input_after_first_char.split_first(); - self.log_violation_if(Backslash, || next_char == Some('\\')); - if matches!(next_char, Some('/') | Some('\\')) { - // file host state - self.serialization.push_str("file://"); - let scheme_end = "file".len() as u32; - let host_start = "file://".len() as u32; - let (path_start, mut host, remaining) = - self.parse_file_host(input_after_next_char)?; - let mut host_end = to_u32(self.serialization.len())?; - let mut has_host = !matches!(host, HostInternal::None); - let remaining = if path_start { - self.parse_path_start(SchemeType::File, &mut has_host, remaining) + Some('#') => self.fragment_only(base_url, input), + _ => { + if !starts_with_windows_drive_letter_segment(&input) { + let before_query = match (base_url.query_start, base_url.fragment_start) { + (None, None) => &*base_url.serialization, + (Some(i), _) | (None, Some(i)) => base_url.slice(..i), + }; + self.serialization.push_str(before_query); + self.pop_path(SchemeType::File, base_url.path_start as usize); + let remaining = self.parse_path( + SchemeType::File, + &mut true, + base_url.path_start as usize, + input, + ); + self.with_query_and_fragment( + SchemeType::File, + base_url.scheme_end, + base_url.username_end, + base_url.host_start, + base_url.host_end, + base_url.host, + base_url.port, + base_url.path_start, + remaining, + ) } else { - let path_start = self.serialization.len(); - self.serialization.push('/'); - self.parse_path(SchemeType::File, &mut has_host, path_start, remaining) - }; - // For file URLs that have a host and whose path starts - // with the windows drive letter we just remove the host. - if !has_host { - self.serialization - .drain(host_start as usize..host_end as usize); - host_end = host_start; - host = HostInternal::None; - } - let (query_start, fragment_start) = - self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?; - Ok(Url { - serialization: self.serialization, - scheme_end: scheme_end, - username_end: host_start, - host_start: host_start, - host_end: host_end, - host: host, - port: None, - path_start: host_end, - query_start: query_start, - fragment_start: fragment_start, - }) - } else { - self.serialization.push_str("file:///"); - let scheme_end = "file".len() as u32; - let path_start = "file://".len(); - if let Some(base_url) = base_file_url { - let first_segment = base_url.path_segments().unwrap().next().unwrap(); - // FIXME: *normalized* drive letter - if is_windows_drive_letter(first_segment) { - self.serialization.push_str(first_segment); - self.serialization.push('/'); - } + self.serialization.push_str("file:///"); + let scheme_end = "file".len() as u32; + let path_start = "file://".len(); + let remaining = + self.parse_path(SchemeType::File, &mut false, path_start, input); + let (query_start, fragment_start) = + self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?; + let path_start = path_start as u32; + Ok(Url { + serialization: self.serialization, + scheme_end: scheme_end, + username_end: path_start, + host_start: path_start, + host_end: path_start, + host: HostInternal::None, + port: None, + path_start: path_start, + query_start: query_start, + fragment_start: fragment_start, + }) } - let remaining = self.parse_path( - SchemeType::File, - &mut false, - path_start, - input_after_first_char, - ); - let (query_start, fragment_start) = - self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?; - let path_start = path_start as u32; - Ok(Url { - serialization: self.serialization, - scheme_end: scheme_end, - username_end: path_start, - host_start: path_start, - host_end: path_start, - host: HostInternal::None, - port: None, - path_start: path_start, - query_start: query_start, - fragment_start: fragment_start, - }) - } - } - _ => { - if starts_with_windows_drive_letter_segment(&input) { - base_file_url = None; - } - if let Some(base_url) = base_file_url { - let before_query = match (base_url.query_start, base_url.fragment_start) { - (None, None) => &*base_url.serialization, - (Some(i), _) | (None, Some(i)) => base_url.slice(..i), - }; - self.serialization.push_str(before_query); - self.pop_path(SchemeType::File, base_url.path_start as usize); - let remaining = self.parse_path( - SchemeType::File, - &mut true, - base_url.path_start as usize, - input, - ); - self.with_query_and_fragment( - SchemeType::File, - base_url.scheme_end, - base_url.username_end, - base_url.host_start, - base_url.host_end, - base_url.host, - base_url.port, - base_url.path_start, - remaining, - ) - } else { - self.serialization.push_str("file:///"); - let scheme_end = "file".len() as u32; - let path_start = "file://".len(); - let remaining = - self.parse_path(SchemeType::File, &mut false, path_start, input); - let (query_start, fragment_start) = - self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?; - let path_start = path_start as u32; - Ok(Url { - serialization: self.serialization, - scheme_end: scheme_end, - username_end: path_start, - host_start: path_start, - host_end: path_start, - host: HostInternal::None, - port: None, - path_start: path_start, - query_start: query_start, - fragment_start: fragment_start, - }) } } + } else { + self.serialization.push_str("file:///"); + let scheme_end = "file".len() as u32; + let path_start = "file://".len(); + let remaining = self.parse_path(SchemeType::File, &mut false, path_start, input); + let (query_start, fragment_start) = + self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?; + let path_start = path_start as u32; + Ok(Url { + serialization: self.serialization, + scheme_end: scheme_end, + username_end: path_start, + host_start: path_start, + host_end: path_start, + host: HostInternal::None, + port: None, + path_start: path_start, + query_start: query_start, + fragment_start: fragment_start, + }) } } @@ -1311,14 +1278,8 @@ impl<'a> Parser<'a> { self.log_violation(SyntaxViolation::NullInFragment) } else { self.check_url_code_point(c, &input); - self.serialization.extend(utf8_percent_encode( - utf8_c, - // FIXME: tests fail when we use the FRAGMENT set here - // as defined in the spec as of 2019-07-17, - // likely because tests are out of date. - // See https://github.com/servo/rust-url/issues/290 - CONTROLS, - )); + self.serialization + .extend(utf8_percent_encode(utf8_c, FRAGMENT)); } } } @@ -1391,6 +1352,10 @@ pub fn to_u32(i: usize) -> ParseResult { } } +fn is_normalized_windows_drive_letter(segment: &str) -> bool { + is_windows_drive_letter(segment) && segment.as_bytes()[1] == b':' +} + /// Wether the scheme is file:, the path has a single segment, and that segment /// is a Windows drive letter fn is_windows_drive_letter(segment: &str) -> bool { diff --git a/tests/setters_tests.json b/tests/setters_tests.json index a45171bf3..db23d9247 100644 --- a/tests/setters_tests.json +++ b/tests/setters_tests.json @@ -27,7 +27,7 @@ "href": "a://example.net", "new_value": "", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, @@ -35,16 +35,24 @@ "href": "a://example.net", "new_value": "b", "expected": { - "href": "b://example.net/", + "href": "b://example.net", "protocol": "b:" } }, + { + "href": "javascript:alert(1)", + "new_value": "defuse", + "expected": { + "href": "defuse:alert(1)", + "protocol": "defuse:" + } + }, { "comment": "Upper-case ASCII is lower-cased", "href": "a://example.net", "new_value": "B", "expected": { - "href": "b://example.net/", + "href": "b://example.net", "protocol": "b:" } }, @@ -53,7 +61,7 @@ "href": "a://example.net", "new_value": "é", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, @@ -62,7 +70,7 @@ "href": "a://example.net", "new_value": "0b", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, @@ -71,7 +79,7 @@ "href": "a://example.net", "new_value": "+b", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, @@ -79,7 +87,7 @@ "href": "a://example.net", "new_value": "bC0+-.", "expected": { - "href": "bc0+-.://example.net/", + "href": "bc0+-.://example.net", "protocol": "bc0+-.:" } }, @@ -88,7 +96,7 @@ "href": "a://example.net", "new_value": "b,c", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, @@ -97,10 +105,35 @@ "href": "a://example.net", "new_value": "bé", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, + { + "comment": "Can’t switch from URL containing username/password/port to file", + "href": "http://test@example.net", + "new_value": "file", + "expected": { + "href": "http://test@example.net/", + "protocol": "http:" + } + }, + { + "href": "gopher://example.net:1234", + "new_value": "file", + "expected": { + "href": "gopher://example.net:1234/", + "protocol": "gopher:" + } + }, + { + "href": "wss://x:x@example.net:1234", + "new_value": "file", + "expected": { + "href": "wss://x:x@example.net:1234/", + "protocol": "wss:" + } + }, { "comment": "Can’t switch from file URL with no host", "href": "file://localhost/", @@ -127,12 +160,36 @@ } }, { - "comment": "Spec deviation: from special scheme to not is not problematic. https://github.com/whatwg/url/issues/104", + "comment": "Can’t switch from special scheme to non-special", "href": "http://example.net", "new_value": "b", "expected": { - "href": "b://example.net/", - "protocol": "b:" + "href": "http://example.net/", + "protocol": "http:" + } + }, + { + "href": "file://hi/path", + "new_value": "s", + "expected": { + "href": "file://hi/path", + "protocol": "file:" + } + }, + { + "href": "https://example.net", + "new_value": "s", + "expected": { + "href": "https://example.net/", + "protocol": "https:" + } + }, + { + "href": "ftp://example.net", + "new_value": "test", + "expected": { + "href": "ftp://example.net/", + "protocol": "ftp:" } }, { @@ -145,12 +202,44 @@ } }, { - "comment": "Spec deviation: from non-special scheme with a host to special is not problematic. https://github.com/whatwg/url/issues/104", + "comment": "Can’t switch from non-special scheme to special", "href": "ssh://me@example.net", "new_value": "http", "expected": { - "href": "http://me@example.net/", - "protocol": "http:" + "href": "ssh://me@example.net", + "protocol": "ssh:" + } + }, + { + "href": "ssh://me@example.net", + "new_value": "gopher", + "expected": { + "href": "ssh://me@example.net", + "protocol": "ssh:" + } + }, + { + "href": "ssh://me@example.net", + "new_value": "file", + "expected": { + "href": "ssh://me@example.net", + "protocol": "ssh:" + } + }, + { + "href": "ssh://example.net", + "new_value": "file", + "expected": { + "href": "ssh://example.net", + "protocol": "ssh:" + } + }, + { + "href": "nonsense:///test", + "new_value": "https", + "expected": { + "href": "nonsense:///test", + "protocol": "nonsense:" } }, { @@ -170,6 +259,16 @@ "href": "view-source+data:text/html,

Test", "protocol": "view-source+data:" } + }, + { + "comment": "Port is set to null if it is the default for new scheme.", + "href": "http://foo.com:443/", + "new_value": "https", + "expected": { + "href": "https://foo.com/", + "protocol": "https:", + "port": "" + } } ], "username": [ @@ -266,14 +365,6 @@ "username": "" } }, - { - "href": "file://test/", - "new_value": "test", - "expected": { - "href": "file://test/", - "username": "" - } - }, { "href": "javascript://x/", "new_value": "wario", @@ -281,6 +372,14 @@ "href": "javascript://wario@x/", "username": "wario" } + }, + { + "href": "file://test/", + "new_value": "test", + "expected": { + "href": "file://test/", + "username": "" + } } ], "password": [ @@ -369,14 +468,6 @@ "password": "" } }, - { - "href": "file://test/", - "new_value": "test", - "expected": { - "href": "file://test/", - "password": "" - } - }, { "href": "javascript://x/", "new_value": "bowser", @@ -384,9 +475,27 @@ "href": "javascript://:bowser@x/", "password": "bowser" } + }, + { + "href": "file://test/", + "new_value": "test", + "expected": { + "href": "file://test/", + "password": "" + } } ], "host": [ + { + "comment": "Non-special scheme", + "href": "sc://x/", + "new_value": "\u0000", + "expected": { + "href": "sc://x/", + "host": "x", + "hostname": "x" + } + }, { "href": "sc://x/", "new_value": "\u0009", @@ -414,6 +523,15 @@ "hostname": "" } }, + { + "href": "sc://x/", + "new_value": " ", + "expected": { + "href": "sc://x/", + "host": "x", + "hostname": "x" + } + }, { "href": "sc://x/", "new_value": "#", @@ -459,6 +577,16 @@ "hostname": "%C3%9F" } }, + { + "comment": "IDNA Nontransitional_Processing", + "href": "https://x/", + "new_value": "ß", + "expected": { + "href": "https://xn--zca/", + "host": "xn--zca", + "hostname": "xn--zca" + } + }, { "comment": "Cannot-be-a-base means no host", "href": "mailto:me@example.net", @@ -499,14 +627,14 @@ } }, { - "comment": "Port number is removed if empty in the new value: https://github.com/whatwg/url/pull/113", + "comment": "Port number is unchanged if not specified", "href": "http://example.net:8080", "new_value": "example.com:", "expected": { - "href": "http://example.com/", - "host": "example.com", + "href": "http://example.com:8080/", + "host": "example.com:8080", "hostname": "example.com", - "port": "" + "port": "8080" } }, { @@ -591,6 +719,17 @@ "port": "80" } }, + { + "comment": "Port number is removed if new port is scheme default and existing URL has a non-default port", + "href": "http://example.net:8080", + "new_value": "example.com:80", + "expected": { + "href": "http://example.com/", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, { "comment": "Stuff after a / delimiter is ignored", "href": "http://example.net/path", @@ -790,9 +929,69 @@ "host": "example.net", "hostname": "example.net" } + }, + { + "href": "file://y/", + "new_value": "x:123", + "expected": { + "href": "file://y/", + "host": "y", + "hostname": "y", + "port": "" + } + }, + { + "href": "file://y/", + "new_value": "loc%41lhost", + "expected": { + "href": "file:///", + "host": "", + "hostname": "", + "port": "" + } + }, + { + "href": "file://hi/x", + "new_value": "", + "expected": { + "href": "file:///x", + "host": "", + "hostname": "", + "port": "" + } + }, + { + "href": "sc://test@test/", + "new_value": "", + "expected": { + "href": "sc://test@test/", + "host": "test", + "hostname": "test", + "username": "test" + } + }, + { + "href": "sc://test:12/", + "new_value": "", + "expected": { + "href": "sc://test:12/", + "host": "test:12", + "hostname": "test", + "port": "12" + } } ], "hostname": [ + { + "comment": "Non-special scheme", + "href": "sc://x/", + "new_value": "\u0000", + "expected": { + "href": "sc://x/", + "host": "x", + "hostname": "x" + } + }, { "href": "sc://x/", "new_value": "\u0009", @@ -820,6 +1019,15 @@ "hostname": "" } }, + { + "href": "sc://x/", + "new_value": " ", + "expected": { + "href": "sc://x/", + "host": "x", + "hostname": "x" + } + }, { "href": "sc://x/", "new_value": "#", @@ -1055,6 +1263,56 @@ "host": "example.net", "hostname": "example.net" } + }, + { + "href": "file://y/", + "new_value": "x:123", + "expected": { + "href": "file://y/", + "host": "y", + "hostname": "y", + "port": "" + } + }, + { + "href": "file://y/", + "new_value": "loc%41lhost", + "expected": { + "href": "file:///", + "host": "", + "hostname": "", + "port": "" + } + }, + { + "href": "file://hi/x", + "new_value": "", + "expected": { + "href": "file:///x", + "host": "", + "hostname": "", + "port": "" + } + }, + { + "href": "sc://test@test/", + "new_value": "", + "expected": { + "href": "sc://test@test/", + "host": "test", + "hostname": "test", + "username": "test" + } + }, + { + "href": "sc://test:12/", + "new_value": "", + "expected": { + "href": "sc://test:12/", + "host": "test:12", + "hostname": "test", + "port": "12" + } } ], "port": [ @@ -1324,12 +1582,12 @@ } }, { - "comment": "UTF-8 percent encoding with the default encode set. Tabs and newlines are removed. Leading or training C0 controls and space are removed.", + "comment": "UTF-8 percent encoding with the default encode set. Tabs and newlines are removed.", "href": "a:/", - "new_value": "\u0000\u0001\t\n\r\u001f !\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", "expected": { - "href": "a:/!%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9", - "pathname": "/!%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9" + "href": "a:/%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9", + "pathname": "/%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9" } }, { @@ -1376,6 +1634,33 @@ "href": "sc://example.net/%23", "pathname": "/%23" } + }, + { + "comment": "File URLs and (back)slashes", + "href": "file://monkey/", + "new_value": "\\\\", + "expected": { + "href": "file://monkey/", + "pathname": "/" + } + }, + { + "comment": "File URLs and (back)slashes", + "href": "file:///unicorn", + "new_value": "//\\/", + "expected": { + "href": "file:///", + "pathname": "/" + } + }, + { + "comment": "File URLs and (back)slashes", + "href": "file:///unicorn", + "new_value": "//monkey/..//", + "expected": { + "href": "file:///", + "pathname": "/" + } } ], "search": [ @@ -1444,12 +1729,12 @@ } }, { - "comment": "UTF-8 percent encoding with the query encode set. Tabs and newlines are removed. Leading or training C0 controls and space are removed.", + "comment": "UTF-8 percent encoding with the query encode set. Tabs and newlines are removed.", "href": "a:/", - "new_value": "\u0000\u0001\t\n\r\u001f !\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", "expected": { - "href": "a:/?!%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9", - "search": "?!%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9" + "href": "a:/?%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9", + "search": "?%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9" } }, { @@ -1511,13 +1796,53 @@ "hash": "" } }, + { + "href": "http://example.net", + "new_value": "#foo bar", + "expected": { + "href": "http://example.net/#foo%20bar", + "hash": "#foo%20bar" + } + }, + { + "href": "http://example.net", + "new_value": "#foo\"bar", + "expected": { + "href": "http://example.net/#foo%22bar", + "hash": "#foo%22bar" + } + }, + { + "href": "http://example.net", + "new_value": "#foobar", + "expected": { + "href": "http://example.net/#foo%3Ebar", + "hash": "#foo%3Ebar" + } + }, + { + "href": "http://example.net", + "new_value": "#foo`bar", + "expected": { + "href": "http://example.net/#foo%60bar", + "hash": "#foo%60bar" + } + }, { "comment": "Simple percent-encoding; nuls, tabs, and newlines are removed", "href": "a:/", - "new_value": "\u0000\u0001\t\n\r\u001f !\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", "expected": { - "href": "a:/#!%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9", - "hash": "#!%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9" + "href": "a:/#%01%1F%20!%22#$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_%60az{|}~%7F%C2%80%C2%81%C3%89%C3%A9", + "hash": "#%01%1F%20!%22#$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_%60az{|}~%7F%C2%80%C2%81%C3%89%C3%A9" } }, { @@ -1528,6 +1853,14 @@ "href": "http://example.net/#%c3%89t%C3%A9", "hash": "#%c3%89t%C3%A9" } + }, + { + "href": "javascript:alert(1)", + "new_value": "castle", + "expected": { + "href": "javascript:alert(1)#castle", + "hash": "#castle" + } } ] } diff --git a/tests/urltestdata.json b/tests/urltestdata.json index 5565c938f..bf4e2a783 100644 --- a/tests/urltestdata.json +++ b/tests/urltestdata.json @@ -153,7 +153,7 @@ { "input": "http://f:21/ b ? d # e ", "base": "http://example.org/foo/bar", - "href": "http://f:21/%20b%20?%20d%20# e", + "href": "http://f:21/%20b%20?%20d%20#%20e", "origin": "http://f:21", "protocol": "http:", "username": "", @@ -163,12 +163,12 @@ "port": "21", "pathname": "/%20b%20", "search": "?%20d%20", - "hash": "# e" + "hash": "#%20e" }, { "input": "lolscheme:x x#x x", "base": "about:blank", - "href": "lolscheme:x x#x x", + "href": "lolscheme:x x#x%20x", "protocol": "lolscheme:", "username": "", "password": "", @@ -177,7 +177,7 @@ "port": "", "pathname": "x x", "search": "", - "hash": "#x x" + "hash": "#x%20x" }, { "input": "http://f:/c", @@ -572,7 +572,7 @@ { "input": "foo://", "base": "http://example.org/foo/bar", - "href": "foo:///", + "href": "foo://", "origin": "null", "protocol": "foo:", "username": "", @@ -580,7 +580,7 @@ "host": "", "hostname": "", "port": "", - "pathname": "/", + "pathname": "", "search": "", "hash": "" }, @@ -1433,6 +1433,22 @@ "search": "", "hash": "" }, + "# Based on https://felixfbecker.github.io/whatwg-url-custom-host-repro/", + { + "input": "ssh://example.com/foo/bar.git", + "base": "http://example.org/", + "href": "ssh://example.com/foo/bar.git", + "origin": "null", + "protocol": "ssh:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/bar.git", + "search": "", + "hash": "" + }, "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/file.html", { "input": "file:c:\\foo\\bar.html", @@ -2260,7 +2276,7 @@ { "input": "http://www.google.com/foo?bar=baz# »", "base": "about:blank", - "href": "http://www.google.com/foo?bar=baz# %C2%BB", + "href": "http://www.google.com/foo?bar=baz#%20%C2%BB", "origin": "http://www.google.com", "protocol": "http:", "username": "", @@ -2270,12 +2286,12 @@ "port": "", "pathname": "/foo", "search": "?bar=baz", - "hash": "# %C2%BB" + "hash": "#%20%C2%BB" }, { "input": "data:test# »", "base": "about:blank", - "href": "data:test# %C2%BB", + "href": "data:test#%20%C2%BB", "origin": "null", "protocol": "data:", "username": "", @@ -2285,7 +2301,7 @@ "port": "", "pathname": "test", "search": "", - "hash": "# %C2%BB" + "hash": "#%20%C2%BB" }, { "input": "http://www.google.com", @@ -4015,6 +4031,37 @@ "search": "?`{}", "hash": "" }, + "byte is ' and url is special", + { + "input": "http://host/?'", + "base": "about:blank", + "href": "http://host/?%27", + "origin": "http://host", + "protocol": "http:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/", + "search": "?%27", + "hash": "" + }, + { + "input": "notspecial://host/?'", + "base": "about:blank", + "href": "notspecial://host/?'", + "origin": "null", + "protocol": "notspecial:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/", + "search": "?'", + "hash": "" + }, "# Credentials in base", { "input": "/some/path", @@ -4473,6 +4520,26 @@ "search": "", "hash": "" }, + { + "input": "sc://@/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://te@s:t@/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://:/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://:12/", + "base": "about:blank", + "failure": true + }, { "input": "sc://[/", "base": "about:blank", @@ -4566,6 +4633,22 @@ "search": "", "hash": "" }, + "# unknown scheme with non-URL characters in the path", + { + "input": "wow:\uFFFF", + "base": "about:blank", + "href": "wow:%EF%BF%BF", + "origin": "null", + "protocol": "wow:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "%EF%BF%BF", + "search": "", + "hash": "" + }, "# Hosts and percent-encoding", { "input": "ftp://example.com%80/", @@ -4767,6 +4850,70 @@ "searchParams": "qux=", "hash": "#foo%08bar" }, + { + "input": "http://foo.bar/baz?qux#foo\"bar", + "base": "about:blank", + "href": "http://foo.bar/baz?qux#foo%22bar", + "origin": "http://foo.bar", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.bar", + "hostname": "foo.bar", + "port": "", + "pathname": "/baz", + "search": "?qux", + "searchParams": "qux=", + "hash": "#foo%22bar" + }, + { + "input": "http://foo.bar/baz?qux#foobar", + "base": "about:blank", + "href": "http://foo.bar/baz?qux#foo%3Ebar", + "origin": "http://foo.bar", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.bar", + "hostname": "foo.bar", + "port": "", + "pathname": "/baz", + "search": "?qux", + "searchParams": "qux=", + "hash": "#foo%3Ebar" + }, + { + "input": "http://foo.bar/baz?qux#foo`bar", + "base": "about:blank", + "href": "http://foo.bar/baz?qux#foo%60bar", + "origin": "http://foo.bar", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.bar", + "hostname": "foo.bar", + "port": "", + "pathname": "/baz", + "search": "?qux", + "searchParams": "qux=", + "hash": "#foo%60bar" + }, "# IPv4 parsing (via https://github.com/nodejs/node/pull/10317)", { "input": "http://192.168.257", @@ -4954,6 +5101,11 @@ "hash": "" }, "More IPv4 parsing (via https://github.com/jsdom/whatwg-url/issues/92)", + { + "input": "https://0x100000000/test", + "base": "about:blank", + "failure": true + }, { "input": "https://256.0.0.1/test", "base": "about:blank", @@ -5187,6 +5339,90 @@ "hash": "#x" }, "# File URLs and many (back)slashes", + { + "input": "file:\\\\//", + "base": "about:blank", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:\\\\\\\\", + "base": "about:blank", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:\\\\\\\\?fox", + "base": "about:blank", + "href": "file:///?fox", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "?fox", + "hash": "" + }, + { + "input": "file:\\\\\\\\#guppy", + "base": "about:blank", + "href": "file:///#guppy", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "#guppy" + }, + { + "input": "file://spider///", + "base": "about:blank", + "href": "file://spider/", + "protocol": "file:", + "username": "", + "password": "", + "host": "spider", + "hostname": "spider", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:\\\\localhost//", + "base": "about:blank", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, { "input": "file:///localhost//cat", "base": "about:blank", @@ -5201,6 +5437,48 @@ "search": "", "hash": "" }, + { + "input": "file://\\/localhost//cat", + "base": "about:blank", + "href": "file:///localhost//cat", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/localhost//cat", + "search": "", + "hash": "" + }, + { + "input": "file://localhost//a//../..//", + "base": "about:blank", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "/////mouse", + "base": "file:///elephant", + "href": "file:///mouse", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/mouse", + "search": "", + "hash": "" + }, { "input": "\\//pig", "base": "file://lion/", @@ -5215,6 +5493,48 @@ "search": "", "hash": "" }, + { + "input": "\\/localhost//pig", + "base": "file://lion/", + "href": "file:///pig", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pig", + "search": "", + "hash": "" + }, + { + "input": "//localhost//pig", + "base": "file://lion/", + "href": "file:///pig", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pig", + "search": "", + "hash": "" + }, + { + "input": "/..//localhost//pig", + "base": "file://lion/", + "href": "file://lion/localhost//pig", + "protocol": "file:", + "username": "", + "password": "", + "host": "lion", + "hostname": "lion", + "port": "", + "pathname": "/localhost//pig", + "search": "", + "hash": "" + }, { "input": "file://", "base": "file://ape/", @@ -5229,7 +5549,50 @@ "search": "", "hash": "" }, + "# File URLs with non-empty hosts", + { + "input": "/rooibos", + "base": "file://tea/", + "href": "file://tea/rooibos", + "protocol": "file:", + "username": "", + "password": "", + "host": "tea", + "hostname": "tea", + "port": "", + "pathname": "/rooibos", + "search": "", + "hash": "" + }, + { + "input": "/?chai", + "base": "file://tea/", + "href": "file://tea/?chai", + "protocol": "file:", + "username": "", + "password": "", + "host": "tea", + "hostname": "tea", + "port": "", + "pathname": "/", + "search": "?chai", + "hash": "" + }, "# Windows drive letter handling with the 'file:' base URL", + { + "input": "C|", + "base": "file://host/dir/file", + "href": "file:///C:", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:", + "search": "", + "hash": "" + }, { "input": "C|#", "base": "file://host/dir/file", @@ -5329,6 +5692,48 @@ "hash": "" }, "# Windows drive letter quirk in the file slash state", + { + "input": "/c:/foo/bar", + "base": "file:///c:/baz/qux", + "href": "file:///c:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/c:/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "/c|/foo/bar", + "base": "file:///c:/baz/qux", + "href": "file:///c:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/c:/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "file:\\c:\\foo\\bar", + "base": "file:///c:/baz/qux", + "href": "file:///c:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/c:/foo/bar", + "search": "", + "hash": "" + }, { "input": "/c:/foo/bar", "base": "file://host/path", @@ -5343,9 +5748,9 @@ "search": "", "hash": "" }, - "# Windows drive letter quirk (no host)", + "# Windows drive letter quirk with not empty host", { - "input": "file:/C|/", + "input": "file://example.net/C:/", "base": "about:blank", "href": "file:///C:/", "protocol": "file:", @@ -5359,7 +5764,7 @@ "hash": "" }, { - "input": "file://C|/", + "input": "file://1.2.3.4/C:/", "base": "about:blank", "href": "file:///C:/", "protocol": "file:", @@ -5372,9 +5777,8 @@ "search": "", "hash": "" }, - "# Windows drive letter quirk with not empty host", { - "input": "file://example.net/C:/", + "input": "file://[1::8]/C:/", "base": "about:blank", "href": "file:///C:/", "protocol": "file:", @@ -5387,8 +5791,9 @@ "search": "", "hash": "" }, + "# Windows drive letter quirk (no host)", { - "input": "file://1.2.3.4/C:/", + "input": "file:/C|/", "base": "about:blank", "href": "file:///C:/", "protocol": "file:", @@ -5402,7 +5807,7 @@ "hash": "" }, { - "input": "file://[1::8]/C:/", + "input": "file://C|/", "base": "about:blank", "href": "file:///C:/", "protocol": "file:", @@ -5544,6 +5949,109 @@ "failure": true }, "# Non-special-URL path tests", + { + "input": "sc://ñ", + "base": "about:blank", + "href": "sc://%C3%B1", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, + { + "input": "sc://ñ?x", + "base": "about:blank", + "href": "sc://%C3%B1?x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "?x", + "hash": "" + }, + { + "input": "sc://ñ#x", + "base": "about:blank", + "href": "sc://%C3%B1#x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "", + "hash": "#x" + }, + { + "input": "#x", + "base": "sc://ñ", + "href": "sc://%C3%B1#x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "", + "hash": "#x" + }, + { + "input": "?x", + "base": "sc://ñ", + "href": "sc://%C3%B1?x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "?x", + "hash": "" + }, + { + "input": "sc://?", + "base": "about:blank", + "href": "sc://?", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, + { + "input": "sc://#", + "base": "about:blank", + "href": "sc://#", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, { "input": "///", "base": "sc://x/", @@ -5558,6 +6066,34 @@ "search": "", "hash": "" }, + { + "input": "////", + "base": "sc://x/", + "href": "sc:////", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "////x/", + "base": "sc://x/", + "href": "sc:////x/", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//x/", + "search": "", + "hash": "" + }, { "input": "tftp://foobar.com/someconfig;mode=netascii", "base": "about:blank", @@ -6048,27 +6584,34 @@ "search": "?a", "hash": "#%GH" }, - "Bad bases", + "URLs that require a non-about:blank base. (Also serve as invalid base tests.)", { - "input": "test-a.html", - "base": "a", + "input": "a", + "base": "about:blank", "failure": true }, { - "input": "test-a-slash.html", - "base": "a/", + "input": "a/", + "base": "about:blank", "failure": true }, { - "input": "test-a-slash-slash.html", - "base": "a//", + "input": "a//", + "base": "about:blank", "failure": true }, + "Bases that don't fail to parse but fail to be bases", { "input": "test-a-colon.html", "base": "a:", "failure": true }, + { + "input": "test-a-colon-b.html", + "base": "a:b", + "failure": true + }, + "Other base URL tests, that must succeed", { "input": "test-a-colon-slash.html", "base": "a:/", @@ -6097,11 +6640,6 @@ "search": "", "hash": "" }, - { - "input": "test-a-colon-b.html", - "base": "a:b", - "failure": true - }, { "input": "test-a-colon-slash-b.html", "base": "a:/b",