Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
feat: split helpers in other file + add polling function
.
  • Loading branch information
kangoo13 committed May 7, 2023
commit 1266e6a1a20552330c22a96b86b98ac168543135
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
"devDependencies": {
"@types/debug": "^4.1.5",
"@types/node-fetch": "^2.5.4",
"@types/psl": "^1.1.0",
"@types/puppeteer": "*",
"ava": "^2.4.0",
"npm-run-all": "^4.1.5",
Expand All @@ -66,6 +67,7 @@
"debug": "^4.1.1",
"devtools-protocol": "^0.0.1138159",
"node-fetch": "^2.6.0",
"psl": "^1.9.0",
"puppeteer-extra-plugin": "^3.2.3",
"redis": "^4.6.6"
},
Expand Down
7 changes: 7 additions & 0 deletions packages/puppeteer-extra-plugin-session-persistence/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ This TypeScript library provides a Puppeteer Extra plugin for persisting session
npm install puppeteer-extra-plugin-session-persistence
```

## Strategies

The plugin supports different strategies for persisting session data, all activated by default:
- A polling strategy, update very X seconds the cookies from every page (default 1000 ms), very useful for XHR requests that sets cookies by JS
- On HTTP response, update the cookies from the response thanks to the 'set-cookie' header
- Using onFrameNavigated event, update the cookies and localStorage data from every frame

## Usage

```javascript
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import test from 'ava';

import { getDomainFromUrl, getBaseDomainFromUrl } from './helpers';

test('getDomainFromUrl', (t) => {
t.is(getDomainFromUrl('https://www.example.com'), 'www.example.com');
t.is(getDomainFromUrl('http://www.example.com'), 'www.example.com');
t.is(getDomainFromUrl('https://example.com'), 'example.com');
t.is(getDomainFromUrl('http://example.com'), 'example.com');
t.is(getDomainFromUrl('https://www.example.co.uk'), 'www.example.co.uk');
t.is(getDomainFromUrl('http://www.example.co.uk'), 'www.example.co.uk');
t.is(getDomainFromUrl('https://example.co.uk'), 'example.co.uk');
t.is(getDomainFromUrl('http://example.co.uk'), 'example.co.uk');
t.is(getDomainFromUrl('https://subdomain.example.com'), 'subdomain.example.com');
t.is(getDomainFromUrl('http://subdomain.example.com'), 'subdomain.example.com');
});

test('getBaseDomainFromUrl', (t) => {
t.is(getBaseDomainFromUrl('https://www.example.com'), 'example.com');
t.is(getBaseDomainFromUrl('http://www.example.com'), 'example.com');
t.is(getBaseDomainFromUrl('https://example.com'), 'example.com');
t.is(getBaseDomainFromUrl('http://example.com'), 'example.com');
t.is(getBaseDomainFromUrl('https://www.example.co.uk'), 'example.co.uk');
t.is(getBaseDomainFromUrl('http://www.example.co.uk'), 'example.co.uk');
t.is(getBaseDomainFromUrl('https://example.co.uk'), 'example.co.uk');
t.is(getBaseDomainFromUrl('http://example.co.uk'), 'example.co.uk');
t.is(getBaseDomainFromUrl('https://subdomain.example.com'), 'example.com');
t.is(getBaseDomainFromUrl('http://subdomain.example.com'), 'example.com');
});
24 changes: 24 additions & 0 deletions packages/puppeteer-extra-plugin-session-persistence/src/helpers.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import debug from "debug";

const psl = require('psl');

export function getDomainFromUrl(url: string): string {
try {
const parsedUrl = new URL(url);
return parsedUrl.hostname;
} catch (error) {
debug(`puppeteer-extra-plugin:session-persistence`).log('getDomainFromUrl() Error parsing url', url, error);
return '';
}
}

export function getBaseDomainFromUrl(url: string): string {
try {
const parsedUrl = new URL(url);
const parsedDomain = psl.parse(parsedUrl.hostname);
return parsedDomain.domain || '';
} catch (error) {
debug(`puppeteer-extra-plugin:session-persistence`).log('getBaseDomainFromUrl() Error parsing url', url, error);
return '';
}
}
91 changes: 64 additions & 27 deletions packages/puppeteer-extra-plugin-session-persistence/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@ import {Page, HTTPResponse} from 'puppeteer';
import {
Storage,
PluginOptions,
LocalStorageData,createStorage,
LocalStorageData, createStorage,
Cookie,
} from './types';
import {FileSystemStorage} from './storage/fileSystemStorage';
import {Protocol} from 'devtools-protocol';
import CookieSourceScheme = Protocol.Network.CookieSourceScheme;
import CookiePriority = Protocol.Network.CookiePriority;
import {getBaseDomainFromUrl, getDomainFromUrl} from './helpers';
import CookieSameSite = Protocol.Network.CookieSameSite;


/**
Expand Down Expand Up @@ -45,13 +47,17 @@ export class PuppeteerExtraPluginSessionPersistence extends PuppeteerExtraPlugin
private localStorageData: LocalStorageData = {};
private cookies: Cookie[] = [];
private storage: Storage;
private domainCookiesTrigger: string[] = [];
private needUpdateCookies: boolean = false;
private pageList: Page[] = [];
private pollingInterval: NodeJS.Timeout | null = null;

constructor(opts: Partial<PluginOptions>, localStorageData: LocalStorageData = {}, cookies: Cookie[] = []) {
super(opts)
this.storage = opts.storage ? createStorage(opts.storage) : new FileSystemStorage();
this.localStorageData = localStorageData;
this.cookies = cookies;
this.debug('constructor', {opts: this.opts, localStorageData: this.localStorageData, cookies: this.cookies});
this.debug('constructor', {opts: this.opts, localStorageData: this.localStorageData, cookies: this.cookies.map((c) => c.name)});
}

get name() {
Expand Down Expand Up @@ -82,8 +88,10 @@ export class PuppeteerExtraPluginSessionPersistence extends PuppeteerExtraPlugin

get defaults(): PluginOptions {
return {
persistCookies: true,
persistLocalStorage: true,
persistCookiesEnabled: true,
persistLocalStorageEnabled: true,
cookiesPollingEnabled: true,
cookiesPollingInterval: 1000,
storage: {
name: "filesystem",
options: {},
Expand All @@ -95,26 +103,59 @@ export class PuppeteerExtraPluginSessionPersistence extends PuppeteerExtraPlugin
this.debug('onPluginRegistered');
await this.loadCookies();
await this.loadLocalStorageData();
if (this.opts.cookiesPollingEnabled && this.opts.persistCookiesEnabled && !this.pollingInterval) {
this.debug('onPluginRegistered starting cookies polling');
this.pollingInterval = setInterval(() => {
this.debug('setInterval polling cookies', {needUpdateCookies: this.needUpdateCookies});
if (this.needUpdateCookies) {
this.pageList.forEach(async (page) => {
if (!page.isClosed()) {
try {
this.debug("Updating cookies for page (polling strategy)", page.url());
await this.mergePageCookies(page);
} catch (error) {
this.debug('setInterval error with cookies, removing page from the list', {error});
this.pageList = this.pageList.filter((p) => p !== page);
}
} else {
this.debug('setInterval page is closed, removing page from the list');
this.pageList = this.pageList.filter((p) => p !== page);
}
});
this.needUpdateCookies = false;
}
}, this.opts.cookiesPollingInterval || 1000);
} else {
this.debug('onPluginRegistered cookies polling disabled');
}
this.debug('onPluginRegistered ended', {localStorageData: this.localStorageData});
}

async onClose() {
this.debug('onClose', {localStorageData: this.localStorageData});
if (this.opts.persistCookies) {
if (this.opts.persistCookiesEnabled) {
await this.saveCookies();
}
if (this.opts.persistLocalStorage) {
if (this.opts.persistLocalStorageEnabled) {
await this.saveLocalStorageData();
}
if (this.pollingInterval) {
this.debug('onClose clearing cookies polling interval');
clearInterval(this.pollingInterval);
}
}

// onPageCreated create all the event listeners for the page.
async onPageCreated(page: Page) {
async onPageCreated(page: Page) {
this.debug('onPageCreated adding event listeners');
if (this.opts.cookiesPollingEnabled && this.opts.persistCookiesEnabled) {
this.pageList.push(page);
}
await this.setPageCookies(page);
await page.setBypassCSP(true);
page.on('framenavigated', () => this.onFrameNavigated(page));
page.on('response', this.onResponseReceived.bind(this));

}

// loadLocalStorageData loads the localStorage data from the localStorageData file, if the file does not exist, it will try to create it.
Expand All @@ -138,7 +179,7 @@ export class PuppeteerExtraPluginSessionPersistence extends PuppeteerExtraPlugin
try {
const cookies = await this.storage.loadCookies();
this.mergeCookies(cookies);
this.debug('loadCookies loaded', {cookies: this.cookies});
this.debug('loadCookies loaded', {cookies: this.cookies.map((c) => c.name)});
} catch (err) {
this.debug('loadCookies ended with error', {err});
await this.saveCookies();
Expand All @@ -152,7 +193,7 @@ export class PuppeteerExtraPluginSessionPersistence extends PuppeteerExtraPlugin
}

async setPageCookies(page: Page) {
this.debug('setPageCookies', this.cookies);
this.debug('setPageCookies', this.cookies.map((c) => c.name));
const pageTarget = page.target();
const client = await pageTarget.createCDPSession();
const rtValue = await client.send('Network.setCookies', {cookies: this.cookies});
Expand All @@ -174,11 +215,11 @@ export class PuppeteerExtraPluginSessionPersistence extends PuppeteerExtraPlugin
let secure = false;
let session = false;
let path = '/';
let sameSite: 'Lax' | 'Strict' | 'None' = 'Lax';
let sameSite: CookieSameSite = 'Lax';
let sameParty = false;
let sourceScheme = 'Secure';
let sourceScheme: CookieSourceScheme = 'Secure';
let sourcePort = 443;
let priority = 'Low';
let priority: CookiePriority = 'Low';

cookieParts.slice(1).forEach((part) => {
const [key, value] = part.trim().split('=');
Expand All @@ -203,13 +244,13 @@ export class PuppeteerExtraPluginSessionPersistence extends PuppeteerExtraPlugin
sameParty = true;
break;
case 'sourcescheme':
sourceScheme = value || 'Secure';
sourceScheme = value as CookieSourceScheme || 'Secure';
break;
case 'sourceport':
sourcePort = parseInt(value, 10) || 443;
break;
case 'priority':
priority = value || 'Low';
priority = value as CookiePriority || 'Low';
break;
}
});
Expand Down Expand Up @@ -243,6 +284,9 @@ export class PuppeteerExtraPluginSessionPersistence extends PuppeteerExtraPlugin
const parsedCookies = await this.extractCookiesFromResponse(cookies, response.url());
await this.mergeCookies(parsedCookies);
}
if (this.domainCookiesTrigger.includes(getBaseDomainFromUrl(response.url()))) {
this.needUpdateCookies = true;
}
}

// mergeCookies merges the cookies we have with incoming cookies.
Expand Down Expand Up @@ -276,7 +320,11 @@ export class PuppeteerExtraPluginSessionPersistence extends PuppeteerExtraPlugin

async onFrameNavigated(page: Page) {
this.debug('onFrameNavigated');
const domainUrl = this.getDomainFromUrl(page.url());
const domainUrl = getDomainFromUrl(page.url());
const baseDomainUrl = getBaseDomainFromUrl(page.url());
if (!this.domainCookiesTrigger.includes(baseDomainUrl)) {
this.domainCookiesTrigger.push(baseDomainUrl);
}

try {
await this.setLocalStorageValues(page, domainUrl);
Expand Down Expand Up @@ -339,21 +387,10 @@ export class PuppeteerExtraPluginSessionPersistence extends PuppeteerExtraPlugin
this.mergeCookies(cookies.cookies);
}

getDomainFromUrl(url: string): string {
try {
const parsedUrl = new URL(url);
return parsedUrl.hostname;
} catch (error) {
this.debug('getDomainFromUrl error', {error});
return '';
}
}


}

const defaultExport = (options?: Partial<PluginOptions>, localStorageData: LocalStorageData = {}, cookies: Cookie[] = []) => {
return new PuppeteerExtraPluginSessionPersistence(options??{}, localStorageData, cookies)
return new PuppeteerExtraPluginSessionPersistence(options ?? {}, localStorageData, cookies)
}

export default defaultExport
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,11 @@ export type InMemoryStorageConfig = {
};

export interface PluginOptions {
persistCookies?: boolean;
persistLocalStorage?: boolean;
persistCookiesEnabled?: boolean;
persistLocalStorageEnabled?: boolean;
storage: StorageConfig;
cookiesPollingEnabled?: boolean;
cookiesPollingInterval?: number;
}

export interface LocalStorageData {
Expand Down