Skip to content

Commit 16fdf45

Browse files
patrickhulcepaulirish
authored andcommitted
core(tracehouse): make main-thread-tasks creation more forgiving (#9491)
1 parent 39305be commit 16fdf45

File tree

2 files changed

+475
-70
lines changed

2 files changed

+475
-70
lines changed

lighthouse-core/lib/tracehouse/main-thread-tasks.js

Lines changed: 254 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -44,104 +44,292 @@ const {taskGroups, taskNameToGroup} = require('./task-groups.js');
4444
class MainThreadTasks {
4545
/**
4646
* @param {LH.TraceEvent} event
47-
* @param {TaskNode} [parent]
47+
* @param {Pick<LH.TraceEvent, 'ph'|'ts'>} [endEvent]
4848
* @return {TaskNode}
4949
*/
50-
static _createNewTaskNode(event, parent) {
50+
static _createNewTaskNode(event, endEvent) {
51+
const isCompleteEvent = event.ph === 'X' && !endEvent;
52+
const isStartEndEventPair = event.ph === 'B' && endEvent && endEvent.ph === 'E';
53+
if (!isCompleteEvent && !isStartEndEventPair) {
54+
throw new Error('Invalid parameters for _createNewTaskNode');
55+
}
56+
57+
const startTime = event.ts;
58+
const endTime = endEvent ? endEvent.ts : event.ts + Number(event.dur || 0);
59+
5160
const newTask = {
5261
event,
53-
startTime: event.ts,
54-
endTime: event.ph === 'X' ? event.ts + Number(event.dur || 0) : NaN,
55-
parent: parent,
56-
children: [],
62+
startTime,
63+
endTime,
64+
duration: endTime - startTime,
5765

5866
// These properties will be filled in later
67+
parent: undefined,
68+
children: [],
5969
attributableURLs: [],
6070
group: taskGroups.other,
61-
duration: NaN,
6271
selfTime: NaN,
6372
};
6473

65-
if (parent) {
66-
parent.children.push(newTask);
67-
}
68-
6974
return newTask;
7075
}
7176

7277
/**
73-
* @param {LH.TraceEvent[]} mainThreadEvents
78+
*
79+
* @param {TaskNode} currentTask
80+
* @param {number} stopTs
7481
* @param {PriorTaskData} priorTaskData
82+
* @param {Array<LH.TraceEvent>} reverseEventsQueue
83+
*/
84+
static _assignAllTimersUntilTs(
85+
currentTask,
86+
stopTs,
87+
priorTaskData,
88+
reverseEventsQueue
89+
) {
90+
while (reverseEventsQueue.length) {
91+
const nextTimerInstallEvent = reverseEventsQueue.pop();
92+
// We're out of events to look at; we're done.
93+
if (!nextTimerInstallEvent) break;
94+
95+
// Timer event is after our current task; push it back on for next time, and we're done.
96+
if (nextTimerInstallEvent.ts > stopTs) {
97+
reverseEventsQueue.push(nextTimerInstallEvent);
98+
break;
99+
}
100+
101+
// Timer event is before the current task, just skip it.
102+
if (nextTimerInstallEvent.ts < currentTask.startTime) {
103+
continue;
104+
}
105+
106+
// We're right where we need to be, point the timerId to our `currentTask`
107+
/** @type {string} */
108+
// @ts-ignore - timerId exists on `TimerInstall` events.
109+
const timerId = nextTimerInstallEvent.args.data.timerId;
110+
priorTaskData.timers.set(timerId, currentTask);
111+
}
112+
}
113+
114+
/**
115+
* This function takes the start and end events from a thread and creates tasks from them.
116+
* We do this by iterating through the start and end event arrays simultaneously. For each start
117+
* event we attempt to find its end event.
118+
*
119+
* Because of this matching of start/end events and the need to be mutating our end events queue,
120+
* we reverse the array to more efficiently `.pop()` them off rather than `.shift()` them off.
121+
* While it's true the worst case runtime here is O(n^2), ~99.999% of the time the reverse loop is O(1)
122+
* because the overwhelmingly common case is that end event for a given start event is simply the very next event in our queue.
123+
*
124+
* @param {LH.TraceEvent[]} taskStartEvents
125+
* @param {LH.TraceEvent[]} taskEndEvents
75126
* @param {number} traceEndTs
76127
* @return {TaskNode[]}
77128
*/
78-
static _createTasksFromEvents(mainThreadEvents, priorTaskData, traceEndTs) {
129+
static _createTasksFromStartAndEndEvents(taskStartEvents, taskEndEvents, traceEndTs) {
79130
/** @type {TaskNode[]} */
80131
const tasks = [];
81-
/** @type {TaskNode|undefined} */
82-
let currentTask;
132+
// Create a reversed copy of the array to avoid copying the rest of the queue on every mutation.
133+
// i.e. pop() is O(1) while shift() is O(n), we take the earliest ts element off the queue *a lot*
134+
// so we'll optimize for having the earliest timestamp events at the end of the array.
135+
const taskEndEventsReverseQueue = taskEndEvents.slice().reverse();
136+
137+
for (let i = 0; i < taskStartEvents.length; i++) {
138+
const taskStartEvent = taskStartEvents[i];
139+
if (taskStartEvent.ph === 'X') {
140+
// Task is a complete X event, we have all the information we need already.
141+
tasks.push(MainThreadTasks._createNewTaskNode(taskStartEvent));
142+
continue;
143+
}
83144

84-
for (const event of mainThreadEvents) {
85-
// Save the timer data, TimerInstall events are instant events `ph === 'I'` so process them first.
86-
if (event.name === 'TimerInstall' && currentTask) {
87-
/** @type {string} */
88-
// @ts-ignore - timerId exists when name is TimerInstall
89-
const timerId = event.args.data.timerId;
90-
priorTaskData.timers.set(timerId, currentTask);
145+
// Task is a B/E event pair, we need to find the matching E event.
146+
let matchedEventIndex = -1;
147+
let matchingNestedEventCount = 0;
148+
let matchingNestedEventIndex = i + 1;
149+
150+
// We loop through the reversed end events queue from back to front because we still want to
151+
// see end events in increasing timestamp order.
152+
// While worst case we will loop through all events, the overwhelmingly common case is that
153+
// the immediate next event is our event of interest which makes this loop typically O(1).
154+
for (let j = taskEndEventsReverseQueue.length - 1; j >= 0; j--) {
155+
const endEvent = taskEndEventsReverseQueue[j];
156+
// We are considering an end event, so we'll count how many nested events we saw along the way.
157+
for (; matchingNestedEventIndex < taskStartEvents.length; matchingNestedEventIndex++) {
158+
if (taskStartEvents[matchingNestedEventIndex].ts >= endEvent.ts) break;
159+
160+
if (taskStartEvents[matchingNestedEventIndex].name === taskStartEvent.name) {
161+
matchingNestedEventCount++;
162+
}
163+
}
164+
165+
// The event doesn't have a matching name, skip it.
166+
if (endEvent.name !== taskStartEvent.name) continue;
167+
// The event has a timestamp that is too early, skip it.
168+
if (endEvent.ts < taskStartEvent.ts) continue;
169+
170+
// The event matches our name and happened after start, the last thing to check is if it was for a nested event.
171+
if (matchingNestedEventCount > 0) {
172+
// If it was for a nested event, decrement our counter and move on.
173+
matchingNestedEventCount--;
174+
continue;
175+
}
176+
177+
// If it wasn't, we found our matching E event! Mark the index and stop the loop.
178+
matchedEventIndex = j;
179+
break;
91180
}
92181

93-
// Only look at X (Complete), B (Begin), and E (End) events as they have most data
94-
if (event.ph !== 'X' && event.ph !== 'B' && event.ph !== 'E') continue;
182+
/** @type {Pick<LH.TraceEvent, 'ph'|'ts'>} */
183+
let taskEndEvent;
184+
if (matchedEventIndex === -1) {
185+
// If we couldn't find an end event, we'll assume it's the end of the trace.
186+
// If this creates invalid parent/child relationships it will be caught in the next step.
187+
taskEndEvent = {ph: 'E', ts: traceEndTs};
188+
} else if (matchedEventIndex === taskEndEventsReverseQueue.length - 1) {
189+
// Use .pop() in the common case where the immediately next event is needed.
190+
// It's ~25x faster, https://jsperf.com/pop-vs-splice.
191+
taskEndEvent = /** @type {LH.TraceEvent} */ (taskEndEventsReverseQueue.pop());
192+
} else {
193+
taskEndEvent = taskEndEventsReverseQueue.splice(matchedEventIndex, 1)[0];
194+
}
95195

96-
// Update currentTask based on the elapsed time.
97-
// The next event may be after currentTask has ended.
196+
tasks.push(MainThreadTasks._createNewTaskNode(taskStartEvent, taskEndEvent));
197+
}
198+
199+
if (taskEndEventsReverseQueue.length) {
200+
throw new Error(
201+
`Fatal trace logic error - ${taskEndEventsReverseQueue.length} unmatched end events`
202+
);
203+
}
204+
205+
return tasks;
206+
}
207+
208+
/**
209+
* This function iterates through the tasks to set the `.parent`/`.children` properties of tasks
210+
* according to their implied nesting structure. If any of these relationships seem impossible based on
211+
* the timestamps, this method will throw.
212+
*
213+
* @param {TaskNode[]} sortedTasks
214+
* @param {LH.TraceEvent[]} timerInstallEvents
215+
* @param {PriorTaskData} priorTaskData
216+
*/
217+
static _createTaskRelationships(sortedTasks, timerInstallEvents, priorTaskData) {
218+
/** @type {TaskNode|undefined} */
219+
let currentTask;
220+
// Create a reversed copy of the array to avoid copying the rest of the queue on every mutation.
221+
const timerInstallEventsReverseQueue = timerInstallEvents.slice().reverse();
222+
223+
for (let i = 0; i < sortedTasks.length; i++) {
224+
const nextTask = sortedTasks[i];
225+
226+
// This inner loop updates what our `currentTask` is at `nextTask.startTime - ε`.
227+
// While `nextTask` starts after our `currentTask`, close out the task, popup to the parent, and repeat.
228+
// If at the end `currentTask` is undefined, then `nextTask` is a toplevel task.
229+
// Otherwise, `nextTask` is a child of `currentTask`.
98230
while (
99231
currentTask &&
100232
Number.isFinite(currentTask.endTime) &&
101-
currentTask.endTime <= event.ts
233+
currentTask.endTime <= nextTask.startTime
102234
) {
235+
MainThreadTasks._assignAllTimersUntilTs(
236+
currentTask,
237+
currentTask.endTime,
238+
priorTaskData,
239+
timerInstallEventsReverseQueue
240+
);
103241
currentTask = currentTask.parent;
104242
}
105243

106-
// If we don't have a current task, start a new one.
107-
if (!currentTask) {
108-
// We can't start a task with an end event
109-
if (event.ph === 'E') {
110-
throw new Error('Fatal trace logic error - unexpected end event');
244+
// If there's a `currentTask`, `nextTask` must be a child.
245+
// Set the `.parent`/`.children` relationships and timer bookkeeping accordingly.
246+
if (currentTask) {
247+
if (nextTask.endTime > currentTask.endTime) {
248+
const timeDelta = nextTask.endTime - currentTask.endTime;
249+
// The child task is taking longer than the parent task, which should be impossible.
250+
// If it's less than 1ms, we'll let it slide by increasing the duration of the parent.
251+
// If it's more, throw an error.
252+
if (timeDelta < 1000) {
253+
currentTask.endTime = nextTask.endTime;
254+
currentTask.duration += timeDelta;
255+
} else {
256+
// If we fell into this error, it's usually because of one of three reasons.
257+
// - We were missing an E event for a child task and we assumed the child ended at the end of the trace.
258+
// - There was slop in the opposite direction (child started 1ms before parent) and the child was assumed to be parent instead.
259+
// - The child timestamp ended more than 1ms after tha parent.
260+
// These have more complicated fixes, so handling separately https://github.com/GoogleChrome/lighthouse/pull/9491#discussion_r327331204.
261+
throw new Error('Fatal trace logic error - child cannot end after parent');
262+
}
111263
}
112264

113-
currentTask = MainThreadTasks._createNewTaskNode(event);
114-
tasks.push(currentTask);
115-
116-
continue;
265+
nextTask.parent = currentTask;
266+
currentTask.children.push(nextTask);
267+
MainThreadTasks._assignAllTimersUntilTs(
268+
currentTask,
269+
nextTask.startTime,
270+
priorTaskData,
271+
timerInstallEventsReverseQueue
272+
);
117273
}
118274

119-
if (event.ph === 'X' || event.ph === 'B') {
120-
// We're starting a nested event, create it as a child and make it the currentTask
121-
const newTask = MainThreadTasks._createNewTaskNode(event, currentTask);
122-
tasks.push(newTask);
123-
currentTask = newTask;
124-
} else {
125-
if (currentTask.event.ph !== 'B') {
126-
throw new Error(
127-
`Fatal trace logic error - expected start event, got ${currentTask.event.ph}`);
128-
}
275+
currentTask = nextTask;
276+
}
129277

130-
// We're ending an event, update the end time and the currentTask to its parent
131-
currentTask.endTime = event.ts;
132-
currentTask = currentTask.parent;
133-
}
278+
if (currentTask) {
279+
MainThreadTasks._assignAllTimersUntilTs(
280+
currentTask,
281+
currentTask.endTime,
282+
priorTaskData,
283+
timerInstallEventsReverseQueue
284+
);
134285
}
286+
}
135287

136-
// Starting from the last and bottom-most task, we finish any tasks that didn't end yet.
137-
while (currentTask && !Number.isFinite(currentTask.endTime)) {
138-
// The last event didn't finish before tracing stopped, use traceEnd timestamp instead.
139-
currentTask.endTime = traceEndTs;
140-
currentTask = currentTask.parent;
288+
/**
289+
* This function takes the raw trace events sorted in increasing timestamp order and outputs connected task nodes.
290+
* To create the task heirarchy we make several passes over the events.
291+
*
292+
* 1. Create three arrays of X/B events, E events, and TimerInstall events.
293+
* 2. Create tasks for each X/B event, throwing if a matching E event cannot be found for a given B.
294+
* 3. Sort the tasks by ↑ startTime, ↓ duration.
295+
* 4. Match each task to its parent, throwing if there is any invalid overlap between tasks.
296+
*
297+
* @param {LH.TraceEvent[]} mainThreadEvents
298+
* @param {PriorTaskData} priorTaskData
299+
* @param {number} traceEndTs
300+
* @return {TaskNode[]}
301+
*/
302+
static _createTasksFromEvents(mainThreadEvents, priorTaskData, traceEndTs) {
303+
/** @type {Array<LH.TraceEvent>} */
304+
const taskStartEvents = [];
305+
/** @type {Array<LH.TraceEvent>} */
306+
const taskEndEvents = [];
307+
/** @type {Array<LH.TraceEvent>} */
308+
const timerInstallEvents = [];
309+
310+
// Phase 1 - Create three arrays of X/B events, E events, and TimerInstall events.
311+
for (const event of mainThreadEvents) {
312+
if (event.ph === 'X' || event.ph === 'B') taskStartEvents.push(event);
313+
if (event.ph === 'E') taskEndEvents.push(event);
314+
if (event.name === 'TimerInstall') timerInstallEvents.push(event);
141315
}
142316

143-
// At this point we expect all tasks to have a finite startTime and endTime.
144-
return tasks;
317+
// Phase 2 - Create tasks for each taskStartEvent.
318+
const tasks = MainThreadTasks._createTasksFromStartAndEndEvents(
319+
taskStartEvents,
320+
taskEndEvents,
321+
traceEndTs
322+
);
323+
324+
// Phase 3 - Sort the tasks by increasing startTime, decreasing duration.
325+
const sortedTasks = tasks.sort(
326+
(taskA, taskB) => taskA.startTime - taskB.startTime || taskB.duration - taskA.duration
327+
);
328+
329+
// Phase 4 - Match each task to its parent.
330+
MainThreadTasks._createTaskRelationships(sortedTasks, timerInstallEvents, priorTaskData);
331+
332+
return sortedTasks;
145333
}
146334

147335
/**
@@ -157,7 +345,6 @@ class MainThreadTasks {
157345
const childTime = task.children
158346
.map(child => MainThreadTasks._computeRecursiveSelfTime(child, task))
159347
.reduce((sum, child) => sum + child, 0);
160-
task.duration = task.endTime - task.startTime;
161348
task.selfTime = task.duration - childTime;
162349
return task.duration;
163350
}
@@ -213,7 +400,8 @@ class MainThreadTasks {
213400

214401
task.attributableURLs = attributableURLs;
215402
task.children.forEach(child =>
216-
MainThreadTasks._computeRecursiveAttributableURLs(child, attributableURLs, priorTaskData));
403+
MainThreadTasks._computeRecursiveAttributableURLs(child, attributableURLs, priorTaskData)
404+
);
217405
}
218406

219407
/**
@@ -227,14 +415,18 @@ class MainThreadTasks {
227415
}
228416

229417
/**
230-
* @param {LH.TraceEvent[]} traceEvents
418+
* @param {LH.TraceEvent[]} mainThreadEvents
231419
* @param {number} traceEndTs
232420
* @return {TaskNode[]}
233421
*/
234-
static getMainThreadTasks(traceEvents, traceEndTs) {
422+
static getMainThreadTasks(mainThreadEvents, traceEndTs) {
235423
const timers = new Map();
236424
const priorTaskData = {timers};
237-
const tasks = MainThreadTasks._createTasksFromEvents(traceEvents, priorTaskData, traceEndTs);
425+
const tasks = MainThreadTasks._createTasksFromEvents(
426+
mainThreadEvents,
427+
priorTaskData,
428+
traceEndTs
429+
);
238430

239431
// Compute the recursive properties we couldn't compute earlier, starting at the toplevel tasks
240432
for (const task of tasks) {

0 commit comments

Comments
 (0)