diff --git a/index.d.ts b/index.d.ts index f2d9539..35851a5 100644 --- a/index.d.ts +++ b/index.d.ts @@ -7,6 +7,20 @@ export interface Options { @default false */ readonly exact?: boolean; + + /** + Allow emails with a domain that doesn't have a dot, such as `user@localhost` or `user@internal`. + + @default true + */ + readonly allowSingleLabelDomain?: boolean; + + /** + Allow the ampersand HTML entity `&` to correspond to an ampersand `&` in the email address. + + @default false + */ + readonly allowAmpersandEntity?: boolean; } /** diff --git a/index.js b/index.js index e1181c5..93dfa2e 100644 --- a/index.js +++ b/index.js @@ -1,5 +1,44 @@ -const regex = '[^\\.\\s@:](?:[^\\s@:]*[^\\s@:\\.])?@[^\\.\\s@]+(?:\\.[^\\.\\s@]+)*'; +export default function emailRegex(options) { + options = { + exact: false, + allowSingleLabelDomain: true, + allowAmpersandEntity: false, + ...options + }; -export default function emailRegex({exact} = {}) { - return exact ? new RegExp(`^${regex}$`) : new RegExp(regex, 'g'); + // RFC 5322 (https://datatracker.ietf.org/doc/html/rfc5322) + const alpha = '[A-Za-z]'; + const digit = String.raw`\d`; + const atext = String.raw`(?:${alpha}|${digit}|[!#$%&'*+\-/=?^_\`{|}~]${options.allowAmpersandEntity ? '|&' : ''})`; + const dotAtomText = String.raw`(?:${atext}+(?:\.${atext}+)+)`; + const dotAtom = `${dotAtomText}`; + const dquote = '"'; + const sp = ' '; + const htab = String.raw`\u0009`; + const wsp = `(?:${sp}|${htab})`; + const cr = String.raw`\u000D`; + const lf = String.raw`\u000A`; + const crlf = `(?:${cr}${lf})`; + const obsFws = `(?:${wsp}+(?:${crlf}${wsp}+)*)`; + const fws = `(?:(?:(?:${wsp}*${crlf})?${wsp}+)|${obsFws})`; + const obsNoWsCtl = String.raw`(?:[\u0001-\u0008]|\u000B|\u000C|[\u000E-\u001F]|\u007F)`; + const obsQtext = `${obsNoWsCtl}`; + const qtext = String.raw`(?:!|[\u0023-\u005B]|[\u005D-\u007E]|${obsQtext})`; + const vchar = String.raw`[\u0021-\u007E]`; + const obsQp = String.raw`(?:\\(?:\u0000|${obsNoWsCtl}|${lf}|${cr}))`; + const quotedPair = String.raw`(?:(?:\\(?:${vchar}|${wsp}))|${obsQp})`; + const qcontent = `(?:${qtext}|${quotedPair})`; + const quotedString = `(?:${dquote}(?:${fws}?${qcontent})*${fws}?${dquote})`; + const atom = `${atext}+`; + const word = `(?:${atom}|${quotedString})`; + const obsLocalPart = String.raw`(?:${word}(?:\.${word})*)`; + const localPart = `(?:${dotAtom}|${quotedString}|${obsLocalPart})`; + const obsDtext = `(?:${obsNoWsCtl}|${quotedPair})`; + const dtext = String.raw`(?:[\u0021-\u005A]|[\u005E-\u007E]|${obsDtext})`; + const domainLiteral = String.raw`(?:\[(?:${fws}?${dtext})*${fws}?])`; + const obsDomain = String.raw`(?:${atom}(?:\.${atom})${options.allowSingleLabelDomain ? '*' : '+'})`; + const domain = `(?:${dotAtom}|${domainLiteral}|${obsDomain})`; + const addrSpec = `${localPart}@${domain}`; + + return options.exact ? new RegExp(`^${addrSpec}$`) : new RegExp(addrSpec, 'g'); } diff --git a/index.test-d.ts b/index.test-d.ts index 3eef51c..6509c5c 100644 --- a/index.test-d.ts +++ b/index.test-d.ts @@ -4,3 +4,4 @@ import emailRegex from './index.js'; expectType(emailRegex()); expectType(emailRegex({})); expectType(emailRegex({exact: true})); +expectType(emailRegex({exact: true, allowAmpersandEntity: true, allowSingleLabelDomain: true})); diff --git a/package.json b/package.json index 8655a79..d76e6e4 100644 --- a/package.json +++ b/package.json @@ -39,7 +39,8 @@ ], "devDependencies": { "ava": "^3.15.0", - "tsd": "^0.14.0", + "tsd": "^0.21.0", + "typescript": "^4.9.5", "xo": "^0.39.1" } } diff --git a/readme.md b/readme.md index 023a69d..5abe51d 100644 --- a/readme.md +++ b/readme.md @@ -48,6 +48,20 @@ Only match an exact string. Useful with `RegExp#test` to check if a string is an email address. +##### allowSingleLabelDomain + +Type: `boolean`\ +Default: `true` + +Allow emails with a domain that doesn't have a dot, such as `user@localhost` or `user@internal`. + +##### allowAmpersandEntity + +Type: `boolean`\ +Default: `false` + +Allow the ampersand HTML entity `&` to correspond to an ampersand `&` in the email address. + ## Important If you run the regex against untrusted user input in a server context, you should [give it a timeout](https://github.com/sindresorhus/super-regex). It's also a good idea to limit the input to a reasonable length. diff --git a/test.js b/test.js index ba9ed30..05706fe 100644 --- a/test.js +++ b/test.js @@ -12,22 +12,61 @@ const fixtures = [ 'test@e.com', 'test@xn--hxajbheg2az3al.xn--jxalpdlp', 'abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklm@sindresorhus.com', - '!#$%&`*+/=?^`{|}~@sindresorhus.com', 'test@g--a.com', 'a@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefg.hij', '123@sindresorhus.com', - '"\\a"@sindresorhus.com', + String.raw`"\a"@sindresorhus.com`, '""@sindresorhus.com', '"test"@sindresorhus.com', - '"\\""@sindresorhus.com', + String.raw`"\""@sindresorhus.com`, 'abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklmn@sindresorhus.com', 'test@iana.co-uk', 'a@a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v', 'test@foo-bar.com', 'foo@x.solutions', - 'foo@[IPv6:2001:db8::2]' + 'foo@[IPv6:2001:db8::2]', + // https://github.com/sindresorhus/email-regex/issues/2#issuecomment-404654677 + 'email@example.com', + 'firstname.lastname@example.com', + 'email@subdomain.example.com', + 'firstname+lastname@example.com', + 'email@123.123.123.123', + 'email@[123.123.123.123]', + '"email"@example.com', + '1234567890@example.com', + 'email@example-one.com', + '_______@example.com', + 'email@example.name', + 'email@example.museum', + 'email@example.co.jp', + 'firstname-lastname@example.com', + // 'very.unusual.”@”.unusual.com', + 'email@example', + 'email@-example.com', + 'email@example.web', + 'email@111.222.333.44444' ]; +const fixturesCustomMatch = new Map([ + // https://github.com/sindresorhus/email-regex/issues/9#issue-569014279 + ['f="nr@context",c=e("gos")', 'nr@context'], + // https://github.com/sindresorhus/email-regex/issues/2#issuecomment-404654677 + [String.raw`very.”(),:;<>[]”.VERY.”very@\ "very”.unusual@strange.example.com`, 'unusual@strange.example.com'], + ['#@%^%#$@#$@#.com', '#@%^%#$'], + ['Joe Smith email@example.com', 'email@example.com'], + ['email@example@example.com', 'email@example'], + ['.email@example.com', 'email@example.com'], + ['email..email@example.com', 'email@example.com'], + ['email@example.com (Joe Smith)', 'email@example.com'], + ['just”not”right@example.com', 'right@example.com'], + [String.raw`this\ is"really"not\allowed@example.com`, 'allowed@example.com'] +]); + +for (const [input, expected] of fixturesCustomMatch) { + // If they match, we can't use them as notFixtures + console.assert(input !== expected, `Custom match fixture "${input}" does not match expected "${expected}"`); +} + const fixturesNot = [ '@', '@io', @@ -39,25 +78,67 @@ const fixturesNot = [ 'sindre@sindre@sindre.com', 'mailto:sindresorhus@gmail.com', 'foo.example.com', - 'test.@example.com' + 'test.@example.com', + '!#$%&`*+/=?^`{|}~@sindresorhus.com', + // https://github.com/sindresorhus/email-regex/issues/9#issue-569014279 + 'f="nr@context",c=e("gos")', + // https://github.com/sindresorhus/email-regex/issues/2#issuecomment-404654677 + 'plainaddress', + '@example.com', + 'email.example.com', + 'email@example..com', + 'email.@example.com', + 'あいうえお@example.com', + 'Abc..123@example.com', + '”(),:;<>[]@example.com', + '"(),:;<>[]@example.com', + String.raw`much.”more\ unusual”@example.com` ]; +function getFirstMatch(regex, text) { + const matches = regex.exec(text); + + if (matches) { + return matches[0]; + } +} + test('extract', t => { for (const fixture of fixtures) { - t.is((emailRegex().exec(`foo ${fixture} bar`) || [])[0], fixture); + t.is(getFirstMatch(emailRegex(), `foo ${fixture} bar`), fixture); } - t.is(emailRegex().exec('mailto:sindresorhus@gmail.com')[0], 'sindresorhus@gmail.com'); + for (const [input, expected] of fixturesCustomMatch) { + t.is(getFirstMatch(emailRegex(), input), expected, input); // eslint-disable-line ava/assertion-arguments + } + + t.is(getFirstMatch(emailRegex(), 'mailto:sindresorhus@gmail.com'), 'sindresorhus@gmail.com'); }); test('exact', t => { for (const fixture of fixtures) { - t.true(emailRegex({exact: true}).test(fixture)); + t.true(emailRegex({exact: true}).test(fixture), fixture); // eslint-disable-line ava/assertion-arguments } }); +test('allowSingleLabelDomain', t => { + t.true(emailRegex({exact: true, allowSingleLabelDomain: true}).test('abc@sindresorhus')); + t.false(emailRegex({exact: true, allowSingleLabelDomain: false}).test('abc@sindresorhus')); + t.is(getFirstMatch(emailRegex({exact: false, allowSingleLabelDomain: true}), '#@%^%#$@#$@#.com'), '#@%^%#$'); + t.is(getFirstMatch(emailRegex({exact: false, allowSingleLabelDomain: false}), '#@%^%#$@#$@#.com'), '#$@#.com'); +}); + +test('allowAmpersandEntity', t => { + t.true(emailRegex({exact: true, allowAmpersandEntity: true}).test('!#$%&`*+/=?^`{|}~@sindresorhus.com')); + t.false(emailRegex({exact: true, allowAmpersandEntity: false}).test('!#$%&`*+/=?^`{|}~@sindresorhus.com')); +}); + test('failures', t => { for (const fixture of fixturesNot) { - t.false(emailRegex({exact: true}).test(fixture)); + t.false(emailRegex({exact: true}).test(fixture), fixture); // eslint-disable-line ava/assertion-arguments + } + + for (const input of fixturesCustomMatch.keys()) { + t.false(emailRegex({exact: true}).test(input), input); // eslint-disable-line ava/assertion-arguments } });