diff --git a/index.d.ts b/index.d.ts index 9339d73..2b73768 100644 --- a/index.d.ts +++ b/index.d.ts @@ -200,8 +200,8 @@ import normalizeUrl = require('normalize-url'); normalizeUrl('sindresorhus.com'); //=> 'http://sindresorhus.com' -normalizeUrl('HTTP://xn--xample-hva.com:80/?b=bar&a=foo'); -//=> 'http://êxample.com/?a=foo&b=bar' +normalizeUrl('//www.sindresorhus.com:80/../baz?b=bar&a=foo'); +//=> 'http://sindresorhus.com/baz?a=foo&b=bar' ``` */ declare function normalizeUrl(url: string, options?: normalizeUrl.Options): string; diff --git a/index.js b/index.js index 4bf5823..e615cf8 100644 --- a/index.js +++ b/index.js @@ -117,7 +117,7 @@ const normalizeUrl = (urlString, options) => { // Remove duplicate slashes if not preceded by a protocol if (urlObj.pathname) { - urlObj.pathname = urlObj.pathname.replace(/(? { urlObj.hostname = urlObj.hostname.replace(/\.$/, ''); // Remove `www.` - if (options.stripWWW && /^www\.(?:[a-z\-\d]{2,63})\.(?:[a-z.]{2,5})$/.test(urlObj.hostname)) { - // Each label should be max 63 at length (min: 2). - // The extension should be max 5 at length (min: 2). + if (options.stripWWW && /^www\.(?!www\.)(?:[a-z\-\d]{1,63})\.(?:[a-z.\-\d]{2,63})$/.test(urlObj.hostname)) { + // Each label should be max 63 at length (min: 1). // Source: https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names + // Each TLD should be up to 63 characters long (min: 2). + // It is technically possible to have a single character TLD, but none currently exist. urlObj.hostname = urlObj.hostname.replace(/^www\./, ''); } } diff --git a/package.json b/package.json index 39b953a..f177130 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "normalize-url", - "version": "5.0.0", + "version": "5.1.0", "description": "Normalize a URL", "license": "MIT", "repository": "sindresorhus/normalize-url", diff --git a/readme.md b/readme.md index ac99b1a..d09d3f6 100644 --- a/readme.md +++ b/readme.md @@ -10,6 +10,8 @@ Useful when you need to display, store, deduplicate, sort, compare, etc, URLs. $ npm install normalize-url ``` +*If you need to use this in the browser, use version 4: `npm i normalize-url@4`* + ## Usage ```js @@ -18,8 +20,8 @@ const normalizeUrl = require('normalize-url'); normalizeUrl('sindresorhus.com'); //=> 'http://sindresorhus.com' -normalizeUrl('HTTP://xn--xample-hva.com:80/?b=bar&a=foo'); -//=> 'http://êxample.com/?a=foo&b=bar' +normalizeUrl('//www.sindresorhus.com:80/../baz?b=bar&a=foo'); +//=> 'http://sindresorhus.com/baz?a=foo&b=bar' ``` ## API diff --git a/test.js b/test.js index b2b226a..fc5b5c2 100644 --- a/test.js +++ b/test.js @@ -75,6 +75,13 @@ test('stripWWW option', t => { t.is(normalizeUrl('www.sindresorhus.com', options), 'http://www.sindresorhus.com'); t.is(normalizeUrl('http://www.êxample.com', options), 'http://www.xn--xample-hva.com'); t.is(normalizeUrl('sindre://www.sorhus.com', options), 'sindre://www.sorhus.com'); + + const options2 = {stripWWW: true}; + t.is(normalizeUrl('http://www.vue.amsterdam', options2), 'http://vue.amsterdam'); + t.is(normalizeUrl('http://www.sorhus.xx--bck1b9a5dre4c', options2), 'http://sorhus.xx--bck1b9a5dre4c'); + + const tooLongTLDURL = 'http://www.sorhus.' + ''.padEnd(64, 'a'); + t.is(normalizeUrl(tooLongTLDURL, options2), tooLongTLDURL); }); test('removeQueryParameters option', t => { @@ -205,6 +212,18 @@ test('remove duplicate pathname slashes', t => { t.is(normalizeUrl('http://sindresorhus.com///foo'), 'http://sindresorhus.com/foo'); t.is(normalizeUrl('http://sindresorhus.com:5000//foo'), 'http://sindresorhus.com:5000/foo'); t.is(normalizeUrl('http://sindresorhus.com//foo'), 'http://sindresorhus.com/foo'); + t.is(normalizeUrl('http://sindresorhus.com/s3://sindresorhus.com'), 'http://sindresorhus.com/s3://sindresorhus.com'); + t.is(normalizeUrl('http://sindresorhus.com/s3://sindresorhus.com//foo'), 'http://sindresorhus.com/s3://sindresorhus.com/foo'); + t.is(normalizeUrl('http://sindresorhus.com//foo/s3://sindresorhus.com'), 'http://sindresorhus.com/foo/s3://sindresorhus.com'); + t.is(normalizeUrl('http://sindresorhus.com/git://sindresorhus.com'), 'http://sindresorhus.com/git://sindresorhus.com'); + t.is(normalizeUrl('http://sindresorhus.com/git://sindresorhus.com//foo'), 'http://sindresorhus.com/git://sindresorhus.com/foo'); + t.is(normalizeUrl('http://sindresorhus.com//foo/git://sindresorhus.com//foo'), 'http://sindresorhus.com/foo/git://sindresorhus.com/foo'); + t.is(normalizeUrl('http://sindresorhus.com/a://sindresorhus.com//foo'), 'http://sindresorhus.com/a:/sindresorhus.com/foo'); + t.is(normalizeUrl('http://sindresorhus.com/alongprotocolwithin50charlimitxxxxxxxxxxxxxxxxxxxx://sindresorhus.com//foo'), 'http://sindresorhus.com/alongprotocolwithin50charlimitxxxxxxxxxxxxxxxxxxxx://sindresorhus.com/foo'); + t.is(normalizeUrl('http://sindresorhus.com/alongprotocolexceeds50charlimitxxxxxxxxxxxxxxxxxxxxx://sindresorhus.com//foo'), 'http://sindresorhus.com/alongprotocolexceeds50charlimitxxxxxxxxxxxxxxxxxxxxx:/sindresorhus.com/foo'); + t.is(normalizeUrl('http://sindresorhus.com/a2-.+://sindresorhus.com'), 'http://sindresorhus.com/a2-.+://sindresorhus.com'); + t.is(normalizeUrl('http://sindresorhus.com/a2-.+_://sindresorhus.com'), 'http://sindresorhus.com/a2-.+_:/sindresorhus.com'); + t.is(normalizeUrl('http://sindresorhus.com/2abc://sindresorhus.com'), 'http://sindresorhus.com/2abc:/sindresorhus.com'); }); test('data URL', t => { @@ -263,3 +282,8 @@ test('data URL', t => { t.is(normalizeUrl('data:,foo#bar', options), 'data:,foo'); t.is(normalizeUrl('data:,www.sindresorhus.com', options), 'data:,www.sindresorhus.com'); }); + +test('prevents homograph attack', t => { + // The input string uses Unicode to make it look like a valid `ebay.com` URL. + t.is(normalizeUrl('https://ebаy.com'), 'https://xn--eby-7cd.com'); +});