Skip to content
113 changes: 113 additions & 0 deletions benchmark/timers/immediate.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
'use strict';
var common = require('../common.js');

var bench = common.createBenchmark(main, {
thousands: [2000],
type: ['depth', 'depth1', 'breadth', 'breadth1', 'breadth4', 'clear']
});

function main(conf) {
var N = +conf.thousands * 1e3;
switch (conf.type) {
case 'depth':
depth(N);
break;
case 'depth1':
depth1(N);
break;
case 'breadth':
breadth(N);
break;
case 'breadth1':
breadth1(N);
break;
case 'breadth4':
breadth4(N);
break;
case 'clear':
clear(N);
break;
}
}

// setImmediate tail recursion, 0 arguments
function depth(N) {
var n = 0;
bench.start();
setImmediate(cb);
function cb() {
n++;
if (n === N)
bench.end(N / 1e3);
else
setImmediate(cb);
}
}

// setImmediate tail recursion, 1 argument
function depth1(N) {
var n = 0;
bench.start();
setImmediate(cb, 1);
function cb(a1) {
n++;
if (n === N)
bench.end(N / 1e3);
else
setImmediate(cb, 1);
}
}

// concurrent setImmediate, 0 arguments
function breadth(N) {
var n = 0;
bench.start();
function cb() {
n++;
if (n === N)
bench.end(N / 1e3);
}
for (var i = 0; i < N; i++) {
setImmediate(cb);
}
}

// concurrent setImmediate, 1 argument
function breadth1(N) {
var n = 0;
bench.start();
function cb(a1) {
n++;
if (n === N)
bench.end(N / 1e3);
}
for (var i = 0; i < N; i++) {
setImmediate(cb, 1);
}
}

// concurrent setImmediate, 4 arguments
function breadth4(N) {
var n = 0;
bench.start();
function cb(a1, a2, a3, a4) {
n++;
if (n === N)
bench.end(N / 1e3);
}
for (var i = 0; i < N; i++) {
setImmediate(cb, 1, 2, 3, 4);
}
}

function clear(N) {
bench.start();
function cb(a1) {
if (a1 === 2)
bench.end(N / 1e3);
}
for (var i = 0; i < N; i++) {
clearImmediate(setImmediate(cb, 1));
}
setImmediate(cb, 2);
}
18 changes: 16 additions & 2 deletions lib/internal/linkedlist.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ function init(list) {
}
exports.init = init;

// create a new linked list
function create() {
const list = { _idleNext: null, _idlePrev: null };
init(list);
return list;
}
exports.create = create;

// show the most idle item
function peek(list) {
Expand Down Expand Up @@ -42,10 +49,17 @@ exports.remove = remove;

// remove a item from its list and place at the end.
function append(list, item) {
remove(item);
if (item._idleNext || item._idlePrev) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would the common case of no trailing list element make it better to check these in the opposite order?

Copy link
Author

@andrasq andrasq Apr 28, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

linkedlist.js implements a circular list, the expected case is that both will be set or not set together.
This is just a fast-path optimization for newly created list nodes that have never been linked.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, right. Forgot about that.

remove(item);
}

// items are linked with _idleNext -> (older) and _idlePrev -> (newer)
// TODO: swap the linkage to match the intuitive older items at "prev"
item._idleNext = list._idleNext;
list._idleNext._idlePrev = item;
item._idlePrev = list;

// the list _idleNext points to tail (newest) and _idlePrev to head (oldest)
list._idleNext._idlePrev = item;
list._idleNext = item;
}
exports.append = append;
Expand Down
82 changes: 44 additions & 38 deletions lib/timers.js
Original file line number Diff line number Diff line change
Expand Up @@ -502,24 +502,26 @@ Timeout.prototype.close = function() {
};


var immediateQueue = {};
L.init(immediateQueue);
var immediateQueue = L.create();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really understand why this change is needed or is helpful but I guess it does clean things up a little.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

prevents a hidden class change

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

at top-level it was changed to match processImmediate (and it cleans things up a bit).
In processImmediate it's a speedup, L.create() returns an access-optimized object.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I bet the object ends up being access-optimized after enough runs anyway. Not to mention we can make objects access-optimized explicitly.

That said - this change improves style anyway and is better coding.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably, though depends on how many immediates are queued in an event loop cycle.
The immediateQueue is re-created every time, so the optimization would not persist
(and there would be a run-time cost for the conversion).

Out of curiosity, what are the ways of creating access-optimized objects? I know about
objects created with { ... }, the prototype properties of new objects, (the this. properties
as assigned in the constructor?), and assigning an object as the prototype of a function
forces optimization. Any others?

Copy link
Member

@benjamingr benjamingr Apr 30, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@andrasq conveniently, here is a StackOverflow answer I wrote about a technique petkaantonov used in bluebird (with making an object as a prototype of a function).

This also works with Object.create so I guess that's another one.

this. properties assigned in the constructor is the "standard" way though, it's even "easier" than object literals and it makes the fact it's static obvious to v8 (object literals work fine too usually).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to be explicit - this specific change LGTM, even if it's not faster but it probably is given the old code.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's a great writeup, and a handy reference, thanks!



function processImmediate() {
var queue = immediateQueue;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While we're touching this code, const might be nice.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changed

var domain, immediate;

immediateQueue = {};
L.init(immediateQueue);
immediateQueue = L.create();

while (L.isEmpty(queue) === false) {
immediate = L.shift(queue);
domain = immediate.domain;

if (!immediate._onImmediate)
continue;

if (domain)
domain.enter();

immediate._callback = immediate._onImmediate;
tryOnImmediate(immediate, queue);

if (domain)
Expand All @@ -540,7 +542,8 @@ function processImmediate() {
function tryOnImmediate(immediate, queue) {
var threw = true;
try {
immediate._onImmediate();
// make the actual call outside the try/catch to allow it to be optimized
runCallback(immediate);
threw = false;
} finally {
if (threw && !L.isEmpty(queue)) {
Expand All @@ -554,67 +557,73 @@ function tryOnImmediate(immediate, queue) {
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a breaking change

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, nvm, #6206 has not landed yet

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this would be breaking if #6206 had landed, as it doesn't get rid of the Immediate class, and instead just moves the property assignments into the constructor.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That code changed since the comment was left here.


function runCallback(timer) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we sure we don't do this optimization anywhere else in the code more generically? It sounds like something that would go in util. If we don't then LGTM.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I use this construct in other projects, and found it hard to make generic;
the call semantics don't cover all uses cases well. I have 3 variants,
this version I used here is a 4th.

That being said, I agree, an optimized call invoker is a nice utility to have.

var argv = timer._argv;
var argc = argv ? argv.length : 0;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

const

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

switch (argc) {
// fast-path callbacks with 0-3 arguments
case 0:
return timer._callback();
case 1:
return timer._callback(argv[0]);
case 2:
return timer._callback(argv[0], argv[1]);
case 3:
return timer._callback(argv[0], argv[1], argv[2]);
// more then 3 arguments run slower with .apply
default:
return timer._callback.apply(timer, argv);
}
}

function Immediate() { }

Immediate.prototype.domain = undefined;
Immediate.prototype._onImmediate = undefined;
Immediate.prototype._idleNext = undefined;
Immediate.prototype._idlePrev = undefined;

function createImmediate(callback, args) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand why we need the stylistic change of returning an Object rather than using a constructor. I think it would be more consistent to still have a function Immediate that sets all these properties on this. It would change less things and would be as fast.

That said, setting properties locally rather than on the prototype definitely helps with hidden classes and is preferable.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If that is possible then we should probably go that way imo.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried various constructs, used the fastest, and used style as a tie-breaker.

I timed the new Immediate(callback, args) version setting properties on this with
and without a prototype, and the current static object is 50% faster on the breadth tests
in benchmarks/timers/immediate.js (10% for the slow 4+ args case)

my experimental Immediate was (a separate test predeclared all properties as null)

function Immediate(callback, args) {
  this._callback = callback;
  this._argv = args;
  this._onImmediate = callback;
  this.domain = process.domain;
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't buy it that new Immediate was slower than returning static object. It should be as fast while providing better debug traces (because it is named) - both get the same hidden class exactly.

Putting object properties on the prototype is a perf killer. Not suggesting we stick to that. Definitely going to see an improvement compared to prototype properties anyway.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's weird, I just ran a set of micro-benchmarks and they're exactly like you say,
setting this.x properties is very fast, and 50% faster than inheriting from prototype.
But inside timers.js a { ... } style struct is faster.

I ran the same benchmark side-by-side with --trace-gc, and the new Immediate version
prints 20% more gc scavenge lines (207 vs 248). But it also runs 20% longer, 5200
vs 6250 ms, so the gc may be normal (proportional to runtime), not sure. The only change
was how the immediate objects are allocated, createImmediate vs new Immediate
with the object

function Immediate(callback, args) {
    this._idleNext = null;
    this._idlePrev = null;
    this._callback = callback;
    this._argv = args;
    this._onImmediate = callback;
    this.domain = process.domain;
}

The timers/immediate.js breadth benchmark shows a larger 55% difference in favor of { ... }
(the 0 and 1 arg cases; the 4-args case is 11%)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Care to share the actual code you benched in both cases so I could have a look?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I double checked and it's identical in terms of performance. I think we should go with new Immediate and fix the benchmarks.

Copy link
Author

@andrasq andrasq May 1, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How did you test? I'm still seeing a very consisten difference with the sources from
this pull request. The following test can be run standalone to show the difference:

var n = 0;
function counter() { n += 1; }

var count = 1000000;
var t1 = Date.now();
for (var i=0; i<count; i++) setImmediate(counter);
setImmediate(function() {
    var t2 = Date.now();
    console.log("ran %dk setImmediate in %d ms (n calls)", count/1000, (t2 - t1), n + 1, process.memoryUsage());
})

createImmediate version: (run with $ taskset 8 ./node time-setImmediate.js):
ran 1000k setImmediate in 301 ms (n calls) 1000001 { rss: 64249856, heapTotal: 51257344, heapUsed: 38976796 }
new Immediate version:
ran 1000k setImmediate in 5295 ms (n calls) 1000001 { rss: 97222656, heapTotal: 82714624, heapUsed: 66396932 }

This is much more extreme a difference than in the other tests, but it's consistent.
The rss and heap footprint is much larger, I'm seeing a lot of calls to futex() and 12k
context switches / second while it's running. I don't know what it's doing to run so slow.
On other tests I'm seeing a steady 15% difference.

The "other test" is the one-liner from a larger suite

case '4a1':
  runit(nloops, 200, 10000, "set and run 10k setImmediate, 1 arg", function(cb) { for (var i=0; i<10000; i++) setImmediate(f, 1); setImmediate(cb) });
  break;

I packaged up my timeit / runit utility as qtimeit, so the test can be run in isolation

timeit = require('qtimeit');
function f(){}
timeit.runit(10, 200, 10000, "set and run 10k setImmediate, 1 arg", function(cb) { for (var i=0; i<10000; i++) setImmediate(f, 1); setImmediate(cb) });

createImmediate: 200 loops in 0.5001 sec: 399.92 / sec,
new Immediate: 200 loops in 0.5859 sec: 341.35 / sec,

Copy link
Author

@andrasq andrasq May 1, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm. When I patch the original v6.0.0 branch with the setImmediate function and
create var immediate = createImmediate(null, null) and leave everything else alone,
there is a straight-forward 24% speedup from 524ms to 420ms for 1 million immediates
(ran the stand-alone test above). No context switching, no huge slowdown.
I'll try to isolate the cause.

Edit: clarifiction: the 524ms is the new setImmediate without a prototype, I didn't notice that
I had grabbed that too from the diffs. The original Immediate object (with a prototype)
runs in 1017ms.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found the reason, and it makes no sense, but the good news is that the workaround is
easy, and yes new Immediate is just as fast, and the weird super-slow performance is gone.
(and the weird context switches and rss/heap discrepancy also go away)

If new Immediate() stores to this a passed-in function, the code runs slow.
If it's not passed in, or if the callback is not stored, the call runs fast. The args
parameter can be passed and assigned without penalty. Annotating the created object
is very fast and also incurs no penalty. (I have not been able to replicate this anomaly
standalone.)

Measuring small runs of tasks with a tool that subtracts the timing loop overhead,
the difference between createImmediate and new Immediate is 7% or so, close enough.
The microbenchmarks use a much much larger dataset (2m and 4m vs 10k) which seems
to favor createImmediate (by 70%), but the smaller dataset is more likely.

Either way, the immediates processing rate is over 250% faster than before this change
(standalone microbenchmark) or 350% (small runs, overhead subtracted).

return {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similarly here RE: creating a new instance with a no prototype inheritance

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no difference; see reply above

_idleNext: null,
_idlePrev: null,
_callback: callback,
_argv: args,
_onImmediate: callback,
domain: process.domain,
};
}

exports.setImmediate = function(callback, arg1, arg2, arg3) {
if (typeof callback !== 'function') {
throw new TypeError('"callback" argument must be a function');
}

var i, args;
var len = arguments.length;
var immediate = new Immediate();

L.init(immediate);

switch (len) {
switch (arguments.length) {
// fast cases
case 0:
case 1:
immediate._onImmediate = callback;
break;
case 2:
immediate._onImmediate = function() {
callback.call(immediate, arg1);
};
args = [arg1];
break;
case 3:
immediate._onImmediate = function() {
callback.call(immediate, arg1, arg2);
};
args = [arg1, arg2];
break;
case 4:
immediate._onImmediate = function() {
callback.call(immediate, arg1, arg2, arg3);
};
args = [arg1, arg2, arg3];
break;
// slow case
default:
var len = arguments.length;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the 1/2/3/more argument optimization really help with anything here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's a good deal faster than iterating over arguments iirc

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

correct, x = [1, 2, 3] runs faster than x = new Array(3), and setting the values is extra.
Some 50+% faster to create pre-populated arrays of fixed length 3. (In fact, [1], [1,2,3] and
[1,2,3,4,5,6,7,8,9] all run equally fast, while explicitly setting values is linear in array length.)

Normally either would be blazing fast, 75 million vs 48 million / second, but when tuning
code that itself runs at 4 million operations / second, the difference amounts to 3% of the total.
(the baseline was 1 million / second, where the difference was under 1%)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, but we're not actually invoking anything here, is it still slow to pass arguments around to the same function where you can have this sort of dispatch anyway?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so that's a great question, I checked, and the answer turns to be weird -- the "/more" case
(where the arguments are copied out) runs much faster. Like 3x faster. But the special-cased
0/1/2/3 args run 25-30% slower. I guess passing arguments still disables optimization.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@benjamingr I changed the three lines that copy the callback params out of arguments,
for an added 2x speedup of the "many" (more than 3) arguments handling. (And yes, it's
still faster to special-case the 1/2/3 handling, but 4/5 no longer, so 4 is the crossover.)

Turns out node v6 is much much faster to .apply args in an array that was dynamically
extended than if it was allocated to the exact size. Seems to have changed with v6;
v5 and v4 were faster to .apply arrays that were created the correct size.
The change is significant, now 4x slower than before.

From the commit message:

        // faster to copy args, but v6 very slow to .apply them:
        var argv = new Array(arguments.length - 1);   // exact size
        for (var i = 1; i < arguments.length; i++)
          argv[i - 1] = arguments[i];

        // slower to copy args, but v6 much faster to .apply them:
        var argv = new Array();                       // grow as needed
        for (var i = 1; i < arguments.length; i++)
          argv[i - 1] = arguments[i];

args = new Array(len - 1);
for (i = 1; i < len; i++)
args[i - 1] = arguments[i];

immediate._onImmediate = function() {
callback.apply(immediate, args);
};
break;
}
var immediate = createImmediate(callback, args);

if (!process._needImmediateCallback) {
process._needImmediateCallback = true;
process._immediateCallback = processImmediate;
}

if (process.domain)
immediate.domain = process.domain;

L.append(immediateQueue, immediate);

return immediate;
Expand All @@ -626,9 +635,6 @@ exports.clearImmediate = function(immediate) {

immediate._onImmediate = undefined;

L.remove(immediate);

if (L.isEmpty(immediateQueue)) {
process._needImmediateCallback = false;
}
// leave queued, much faster overall to skip it later in processImmediate
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

much faster

I don't think this is true, L.remove() is a constant-time operation. The performance gained will be minimal at best, insignificant either way.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the result is very reproducible, it's 3x faster to set/clear without the remove.
It could be caching effects, ie during processing next item has already been prefetched
when the previous one was removed for processing.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it's ever possible to even have this show 1% in a v8 profile of an actual application, could you compile some benchmark results for us on these?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would expect actual applications make very few calls to clearImmediate
so you're most likely right. I was focusing on the effect of deferring the remove a bit.

The output of test from benchmark/timers/immediate.js
With the remove as it existed:
timers/immediate.js thousands=2000 type=clear: 1388.82567
Modified to not remove (ie, the git diff shown above for lines 629-633 vs 639):
timers/immediate.js thousands=2000 type=clear: 3536.98189
The test:

function clear(N) {
  bench.start();
  function cb(a1) {
    if (a1 === 2)
      bench.end(N / 1e3);
  }
  for (var i = 0; i < N; i++) {
    clearImmediate(setImmediate(cb, 1));
  }
  setImmediate(cb, 2);
}

This test creates 2m cleared immediates and shows 254% speedup.
My other test that loops 10k gets 302%.

return;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unnecessary

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oops, indeed, left over from my first attempt to bypass the unlink. Fixing.

};
5 changes: 5 additions & 0 deletions test/parallel/test-timers-linked-list.js
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,8 @@ assert.equal(C, L.shift(list));
// list
assert.ok(L.isEmpty(list));

const list2 = L.create();
const list3 = L.create();
assert.ok(L.isEmpty(list2));
assert.ok(L.isEmpty(list3));
assert.ok(list2 != list3);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe also add:

L.init(list);
assert.deepEqual(list2, list);

(I think that should pass)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally, we'd block-scope the tests and the variables. For a later PR I suppose.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the assertion failed... which makes sense in hindsight, circular lists point to themselves,
ie will not be the same. The isEqual test above checks equivalence, ie that the circular
links are set up correctly.