diff --git a/docs/src/api/class-browsercontext.md b/docs/src/api/class-browsercontext.md index 91e43e22dd600..171f1e1365f3a 100644 --- a/docs/src/api/class-browsercontext.md +++ b/docs/src/api/class-browsercontext.md @@ -1203,9 +1203,7 @@ Enabling routing disables http cache. * since: v1.8 - `url` <[string]|[RegExp]|[function]\([URL]\):[boolean]> -A glob pattern, regex pattern or predicate receiving [URL] to match while routing. -When a [`option: Browser.newContext.baseURL`] via the context options was provided and the passed URL is a path, -it gets merged via the [`new URL()`](https://developer.mozilla.org/en-US/docs/Web/API/URL/URL) constructor. +A glob pattern, regex pattern, or predicate that receives a [URL] to match during routing. If [`option: Browser.newContext.baseURL`] is set in the context options and the provided URL is a string that does not start with `*`, it is resolved using the [`new URL()`](https://developer.mozilla.org/en-US/docs/Web/API/URL/URL) constructor. ### param: BrowserContext.route.handler * since: v1.8 diff --git a/docs/src/api/class-page.md b/docs/src/api/class-page.md index b59f2cb3fe59f..a69ac2446e4cb 100644 --- a/docs/src/api/class-page.md +++ b/docs/src/api/class-page.md @@ -3607,9 +3607,7 @@ Enabling routing disables http cache. * since: v1.8 - `url` <[string]|[RegExp]|[function]\([URL]\):[boolean]> -A glob pattern, regex pattern or predicate receiving [URL] to match while routing. -When a [`option: Browser.newContext.baseURL`] via the context options was provided and the passed URL is a path, -it gets merged via the [`new URL()`](https://developer.mozilla.org/en-US/docs/Web/API/URL/URL) constructor. +A glob pattern, regex pattern, or predicate that receives a [URL] to match during routing. If [`option: Browser.newContext.baseURL`] is set in the context options and the provided URL is a string that does not start with `*`, it is resolved using the [`new URL()`](https://developer.mozilla.org/en-US/docs/Web/API/URL/URL) constructor. ### param: Page.route.handler * since: v1.8 diff --git a/docs/src/network.md b/docs/src/network.md index bdb5b71c6ee07..6d0c676d6eaf3 100644 --- a/docs/src/network.md +++ b/docs/src/network.md @@ -706,15 +706,13 @@ Playwright uses simplified glob patterns for URL matching in network interceptio 1. Asterisks: - A single `*` matches any characters except `/` - A double `**` matches any characters including `/` -1. Question mark `?` matches any single character except `/` +1. Question mark `?` matches only question mark `?`. If you want to match any character, use `*` instead. 1. Curly braces `{}` can be used to match a list of options separated by commas `,` -1. Square brackets `[]` can be used to match a set of characters 1. Backslash `\` can be used to escape any of special characters (note to escape backslash itself as `\\`) Examples: - `https://example.com/*.js` matches `https://example.com/file.js` but not `https://example.com/path/file.js` -- `https://example.com/\\?page=1` matches `https://example.com/?page=1` but not `https://example.com` -- `**/v[0-9]*` matches `https://example.com/v1/` but not `https://example.com/vote/` +- `https://example.com/?page=1` matches `https://example.com/?page=1` but not `https://example.com` - `**/*.js` matches both `https://example.com/file.js` and `https://example.com/path/file.js` - `**/*.{png,jpg,jpeg}` matches all image requests diff --git a/packages/playwright-client/types/types.d.ts b/packages/playwright-client/types/types.d.ts index abdc85e923892..b327677c09c4a 100644 --- a/packages/playwright-client/types/types.d.ts +++ b/packages/playwright-client/types/types.d.ts @@ -3974,9 +3974,9 @@ export interface Page { * * **NOTE** Enabling routing disables http cache. * - * @param url A glob pattern, regex pattern or predicate receiving [URL] to match while routing. When a - * [`baseURL`](https://playwright.dev/docs/api/class-browser#browser-new-context-option-base-url) via the context - * options was provided and the passed URL is a path, it gets merged via the + * @param url A glob pattern, regex pattern, or predicate that receives a [URL] to match during routing. If + * [`baseURL`](https://playwright.dev/docs/api/class-browser#browser-new-context-option-base-url) is set in the + * context options and the provided URL is a string that does not start with `*`, it is resolved using the * [`new URL()`](https://developer.mozilla.org/en-US/docs/Web/API/URL/URL) constructor. * @param handler handler function to route the request. * @param options @@ -9068,9 +9068,9 @@ export interface BrowserContext { * * **NOTE** Enabling routing disables http cache. * - * @param url A glob pattern, regex pattern or predicate receiving [URL] to match while routing. When a - * [`baseURL`](https://playwright.dev/docs/api/class-browser#browser-new-context-option-base-url) via the context - * options was provided and the passed URL is a path, it gets merged via the + * @param url A glob pattern, regex pattern, or predicate that receives a [URL] to match during routing. If + * [`baseURL`](https://playwright.dev/docs/api/class-browser#browser-new-context-option-base-url) is set in the + * context options and the provided URL is a string that does not start with `*`, it is resolved using the * [`new URL()`](https://developer.mozilla.org/en-US/docs/Web/API/URL/URL) constructor. * @param handler handler function to route the request. * @param options diff --git a/packages/playwright-core/src/utils/isomorphic/urlMatch.ts b/packages/playwright-core/src/utils/isomorphic/urlMatch.ts index 1d3bd011dc746..a6afce68dde77 100644 --- a/packages/playwright-core/src/utils/isomorphic/urlMatch.ts +++ b/packages/playwright-core/src/utils/isomorphic/urlMatch.ts @@ -50,15 +50,6 @@ export function globToRegex(glob: string): RegExp { } switch (c) { - case '?': - tokens.push('.'); - break; - case '[': - tokens.push('['); - break; - case ']': - tokens.push(']'); - break; case '{': inGroup = true; tokens.push('('); @@ -101,7 +92,36 @@ export function urlMatches(baseURL: string | undefined, urlString: string, match // Allow http(s) baseURL to match ws(s) urls. if (baseURL && /^https?:\/\//.test(baseURL) && /^wss?:\/\//.test(urlString)) baseURL = baseURL.replace(/^http/, 'ws'); - match = constructURLBasedOnBaseURL(baseURL, match); + + const tokenMap = new Map(); + function mapToken(original: string, replacement: string) { + if (original.length === 0) + return ''; + tokenMap.set(replacement, original); + return replacement; + } + // Escaped `\\?` behaves the same as `?` in our glob patterns. + match = match.replaceAll(/\\\\\?/g, '?'); + // Glob symbols may be escaped in the URL and some of them such as ? affect resolution, + // so we replace them with safe components first. + const relativePath = match.split('/').map((token, index) => { + if (token === '.' || token === '..' || token === '') + return token; + // Handle special case of http*://, note that the new schema has to be + // a web schema so that slashes are properly inserted after domain. + if (index === 0 && token.endsWith(':')) + return mapToken(token, 'http:'); + const questionIndex = token.indexOf('?'); + if (questionIndex === -1) + return mapToken(token, `$_${index}_$`); + const newPrefix = mapToken(token.substring(0, questionIndex), `$_${index}_$`); + const newSuffix = mapToken(token.substring(questionIndex), `?$_${index}_$`); + return newPrefix + newSuffix; + }).join('/'); + let resolved = constructURLBasedOnBaseURL(baseURL, relativePath); + for (const [token, original] of tokenMap) + resolved = resolved.replace(token, original); + match = resolved; } if (isString(match)) match = globToRegex(match); diff --git a/packages/playwright-core/types/types.d.ts b/packages/playwright-core/types/types.d.ts index abdc85e923892..b327677c09c4a 100644 --- a/packages/playwright-core/types/types.d.ts +++ b/packages/playwright-core/types/types.d.ts @@ -3974,9 +3974,9 @@ export interface Page { * * **NOTE** Enabling routing disables http cache. * - * @param url A glob pattern, regex pattern or predicate receiving [URL] to match while routing. When a - * [`baseURL`](https://playwright.dev/docs/api/class-browser#browser-new-context-option-base-url) via the context - * options was provided and the passed URL is a path, it gets merged via the + * @param url A glob pattern, regex pattern, or predicate that receives a [URL] to match during routing. If + * [`baseURL`](https://playwright.dev/docs/api/class-browser#browser-new-context-option-base-url) is set in the + * context options and the provided URL is a string that does not start with `*`, it is resolved using the * [`new URL()`](https://developer.mozilla.org/en-US/docs/Web/API/URL/URL) constructor. * @param handler handler function to route the request. * @param options @@ -9068,9 +9068,9 @@ export interface BrowserContext { * * **NOTE** Enabling routing disables http cache. * - * @param url A glob pattern, regex pattern or predicate receiving [URL] to match while routing. When a - * [`baseURL`](https://playwright.dev/docs/api/class-browser#browser-new-context-option-base-url) via the context - * options was provided and the passed URL is a path, it gets merged via the + * @param url A glob pattern, regex pattern, or predicate that receives a [URL] to match during routing. If + * [`baseURL`](https://playwright.dev/docs/api/class-browser#browser-new-context-option-base-url) is set in the + * context options and the provided URL is a string that does not start with `*`, it is resolved using the * [`new URL()`](https://developer.mozilla.org/en-US/docs/Web/API/URL/URL) constructor. * @param handler handler function to route the request. * @param options diff --git a/tests/page/interception.spec.ts b/tests/page/interception.spec.ts index d3443f9015f27..1c2a8f5f4665a 100644 --- a/tests/page/interception.spec.ts +++ b/tests/page/interception.spec.ts @@ -16,7 +16,7 @@ */ import { test as it, expect } from './pageTest'; -import { globToRegex } from '../../packages/playwright-core/lib/utils/isomorphic/urlMatch'; +import { globToRegex, urlMatches } from '../../packages/playwright-core/lib/utils/isomorphic/urlMatch'; import vm from 'vm'; it('should work with navigation @smoke', async ({ page, server }) => { @@ -76,7 +76,6 @@ it('should work with glob', async () => { expect(globToRegex('*.js').test('https://localhost:8080/foo.js')).toBeFalsy(); expect(globToRegex('https://**/*.js').test('https://localhost:8080/foo.js')).toBeTruthy(); expect(globToRegex('http://localhost:8080/simple/path.js').test('http://localhost:8080/simple/path.js')).toBeTruthy(); - expect(globToRegex('http://localhost:8080/?imple/path.js').test('http://localhost:8080/Simple/path.js')).toBeTruthy(); expect(globToRegex('**/{a,b}.js').test('https://localhost:8080/a.js')).toBeTruthy(); expect(globToRegex('**/{a,b}.js').test('https://localhost:8080/b.js')).toBeTruthy(); expect(globToRegex('**/{a,b}.js').test('https://localhost:8080/c.js')).toBeFalsy(); @@ -90,21 +89,56 @@ it('should work with glob', async () => { expect(globToRegex('http://localhost:3000/signin-oidc*').test('http://localhost:3000/signin-oidc/foo')).toBeFalsy(); expect(globToRegex('http://localhost:3000/signin-oidc*').test('http://localhost:3000/signin-oidcnice')).toBeTruthy(); - // range [] - expect(globToRegex('**/api/v[0-9]').test('http://example.com/api/v1')).toBeTruthy(); + // range [] is NOT supported + expect(globToRegex('**/api/v[0-9]').test('http://example.com/api/v[0-9]')).toBeTruthy(); expect(globToRegex('**/api/v[0-9]').test('http://example.com/api/version')).toBeFalsy(); // query params expect(globToRegex('**/api\\?param').test('http://example.com/api?param')).toBeTruthy(); expect(globToRegex('**/api\\?param').test('http://example.com/api-param')).toBeFalsy(); - expect(globToRegex('**/three-columns/settings.html\\?**id=[a-z]**').test('http://mydomain:8080/blah/blah/three-columns/settings.html?id=settings-e3c58efe-02e9-44b0-97ac-dd138100cf7c&blah')).toBeTruthy(); + expect(globToRegex('**/three-columns/settings.html\\?**id=settings-**').test('http://mydomain:8080/blah/blah/three-columns/settings.html?id=settings-e3c58efe-02e9-44b0-97ac-dd138100cf7c&blah')).toBeTruthy(); expect(globToRegex('\\?')).toEqual(/^\?$/); expect(globToRegex('\\')).toEqual(/^\\$/); expect(globToRegex('\\\\')).toEqual(/^\\$/); expect(globToRegex('\\[')).toEqual(/^\[$/); - expect(globToRegex('[a-z]')).toEqual(/^[a-z]$/); + expect(globToRegex('[a-z]')).toEqual(/^\[a-z\]$/); expect(globToRegex('$^+.\\*()|\\?\\{\\}\\[\\]')).toEqual(/^\$\^\+\.\*\(\)\|\?\{\}\[\]$/); + + expect(urlMatches(undefined, 'http://playwright.dev/', 'http://playwright.dev')).toBeTruthy(); + expect(urlMatches(undefined, 'http://playwright.dev/?a=b', 'http://playwright.dev?a=b')).toBeTruthy(); + expect(urlMatches(undefined, 'http://playwright.dev/', 'h*://playwright.dev')).toBeTruthy(); + expect(urlMatches(undefined, 'http://api.playwright.dev/?x=y', 'http://*.playwright.dev?x=y')).toBeTruthy(); + expect(urlMatches(undefined, 'http://playwright.dev/foo/bar', '**/foo/**')).toBeTruthy(); + expect(urlMatches('http://playwright.dev', 'http://playwright.dev/?x=y', '?x=y')).toBeTruthy(); + expect(urlMatches('http://playwright.dev/foo/', 'http://playwright.dev/foo/bar?x=y', './bar?x=y')).toBeTruthy(); + + // This is not supported, we treat ? as a query separator. + expect(globToRegex('http://localhost:8080/?imple/path.js').test('http://localhost:8080/Simple/path.js')).toBeFalsy(); + expect(urlMatches(undefined, 'http://playwright.dev/', 'http://playwright.?ev')).toBeFalsy(); + expect(urlMatches(undefined, 'http://playwright./?ev', 'http://playwright.?ev')).toBeTruthy(); + expect(urlMatches(undefined, 'http://playwright.dev/foo', 'http://playwright.dev/f??')).toBeFalsy(); + expect(urlMatches(undefined, 'http://playwright.dev/f??', 'http://playwright.dev/f??')).toBeTruthy(); + expect(urlMatches(undefined, 'http://playwright.dev/?x=y', 'http://playwright.dev\\?x=y')).toBeTruthy(); + expect(urlMatches(undefined, 'http://playwright.dev/?x=y', 'http://playwright.dev/\\?x=y')).toBeTruthy(); + expect(urlMatches('http://playwright.dev/foo', 'http://playwright.dev/foo?bar', '?bar')).toBeTruthy(); + expect(urlMatches('http://playwright.dev/foo', 'http://playwright.dev/foo?bar', '\\\\?bar')).toBeTruthy(); + expect(urlMatches('http://first.host/', 'http://second.host/foo', '**/foo')).toBeTruthy(); + expect(urlMatches('http://playwright.dev/', 'http://localhost/', '*//localhost/')).toBeTruthy(); +}); + +it('should intercept by glob', async function({ page, server, isAndroid }) { + it.skip(isAndroid); + + await page.goto(server.EMPTY_PAGE); + await page.route('http://localhos**?*oo', async route => { + await route.fulfill({ + status: 200, + body: 'intercepted', + }); + }); + const result = await page.evaluate(url => fetch(url).then(r => r.text()), server.PREFIX + '/?foo'); + expect(result).toBe('intercepted'); }); it('should intercept network activity from worker', async function({ page, server, isAndroid }) { diff --git a/tests/page/page-route.spec.ts b/tests/page/page-route.spec.ts index 4ee9df0e044e4..51a07d20d4065 100644 --- a/tests/page/page-route.spec.ts +++ b/tests/page/page-route.spec.ts @@ -71,10 +71,11 @@ it('should unroute', async ({ page, server }) => { expect(intercepted).toEqual([1]); }); -it('should support ? in glob pattern', async ({ page, server }) => { +it('should not support ? in glob pattern', async ({ page, server }) => { server.setRoute('/index', (req, res) => res.end('index-no-hello')); server.setRoute('/index123hello', (req, res) => res.end('index123hello')); server.setRoute('/index?hello', (req, res) => res.end('index?hello')); + server.setRoute('/index1hello', (req, res) => res.end('index1hello')); await page.route('**/index?hello', async (route, request) => { await route.fulfill({ body: 'intercepted any character' }); @@ -91,7 +92,7 @@ it('should support ? in glob pattern', async ({ page, server }) => { expect(await page.content()).toContain('index-no-hello'); await page.goto(server.PREFIX + '/index1hello'); - expect(await page.content()).toContain('intercepted any character'); + expect(await page.content()).toContain('index1hello'); await page.goto(server.PREFIX + '/index123hello'); expect(await page.content()).toContain('index123hello');