Skip to content

Commit d64a56c

Browse files
bnoordhuisrvagg
authored andcommitted
cluster: remove handles when disconnecting worker
Due to the race window between the master's "disconnect" message and the worker's "handle received" message, connections sometimes got stuck in the pending handles queue when calling `worker.disconnect()` in the master process. The observable effect from the client's perspective was a TCP or HTTP connection that simply stalled. This commit fixes that by closing open handles in the master when the "disconnect" message is sent. Fixes: #3551 PR-URL: #3677 Reviewed-By: Colin Ihrig <[email protected]> Reviewed-By: Fedor Indutny <[email protected]> Reviewed-By: James M Snell <[email protected]>
1 parent b48dbf9 commit d64a56c

File tree

4 files changed

+96
-23
lines changed

4 files changed

+96
-23
lines changed

lib/cluster.js

+26-23
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ function masterInit() {
217217
// Keyed on address:port:etc. When a worker dies, we walk over the handles
218218
// and remove() the worker from each one. remove() may do a linear scan
219219
// itself so we might end up with an O(n*m) operation. Ergo, FIXME.
220-
var handles = {};
220+
const handles = require('internal/cluster').handles;
221221

222222
var initialized = false;
223223
cluster.setupMaster = function(options) {
@@ -308,6 +308,26 @@ function masterInit() {
308308

309309
var ids = 0;
310310

311+
function removeWorker(worker) {
312+
assert(worker);
313+
314+
delete cluster.workers[worker.id];
315+
316+
if (Object.keys(cluster.workers).length === 0) {
317+
assert(Object.keys(handles).length === 0, 'Resource leak detected.');
318+
intercom.emit('disconnect');
319+
}
320+
}
321+
322+
function removeHandlesForWorker(worker) {
323+
assert(worker);
324+
325+
for (var key in handles) {
326+
var handle = handles[key];
327+
if (handle.remove(worker)) delete handles[key];
328+
}
329+
}
330+
311331
cluster.fork = function(env) {
312332
cluster.setupMaster();
313333
const id = ++ids;
@@ -319,26 +339,6 @@ function masterInit() {
319339

320340
worker.on('message', this.emit.bind(this, 'message'));
321341

322-
function removeWorker(worker) {
323-
assert(worker);
324-
325-
delete cluster.workers[worker.id];
326-
327-
if (Object.keys(cluster.workers).length === 0) {
328-
assert(Object.keys(handles).length === 0, 'Resource leak detected.');
329-
intercom.emit('disconnect');
330-
}
331-
}
332-
333-
function removeHandlesForWorker(worker) {
334-
assert(worker);
335-
336-
for (var key in handles) {
337-
var handle = handles[key];
338-
if (handle.remove(worker)) delete handles[key];
339-
}
340-
}
341-
342342
worker.process.once('exit', function(exitCode, signalCode) {
343343
/*
344344
* Remove the worker from the workers list only
@@ -404,6 +404,8 @@ function masterInit() {
404404
Worker.prototype.disconnect = function() {
405405
this.suicide = true;
406406
send(this, { act: 'disconnect' });
407+
removeHandlesForWorker(this);
408+
removeWorker(this);
407409
};
408410

409411
Worker.prototype.destroy = function(signo) {
@@ -490,11 +492,12 @@ function masterInit() {
490492
cluster.emit('listening', worker, info);
491493
}
492494

493-
// Server in worker is closing, remove from list.
495+
// Server in worker is closing, remove from list. The handle may have been
496+
// removed by a prior call to removeHandlesForWorker() so guard against that.
494497
function close(worker, message) {
495498
var key = message.key;
496499
var handle = handles[key];
497-
if (handle.remove(worker)) delete handles[key];
500+
if (handle && handle.remove(worker)) delete handles[key];
498501
}
499502

500503
function send(worker, message, handle, cb) {

lib/internal/cluster.js

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
'use strict';
2+
3+
// Used in tests.
4+
exports.handles = {};

node.gyp

+1
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
'lib/vm.js',
7171
'lib/zlib.js',
7272
'lib/internal/child_process.js',
73+
'lib/internal/cluster.js',
7374
'lib/internal/freelist.js',
7475
'lib/internal/linkedlist.js',
7576
'lib/internal/module.js',
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/* eslint-disable no-debugger */
2+
// Flags: --expose_internals
3+
'use strict';
4+
5+
const common = require('../common');
6+
const assert = require('assert');
7+
const cluster = require('cluster');
8+
const net = require('net');
9+
10+
const Protocol = require('_debugger').Protocol;
11+
12+
if (common.isWindows) {
13+
console.log('1..0 # Skipped: SCHED_RR not reliable on Windows');
14+
return;
15+
}
16+
17+
cluster.schedulingPolicy = cluster.SCHED_RR;
18+
19+
// Worker sends back a "I'm here" message, then immediately suspends
20+
// inside the debugger. The master connects to the debug agent first,
21+
// connects to the TCP server second, then disconnects the worker and
22+
// unsuspends it again. The ultimate goal of this tortured exercise
23+
// is to make sure the connection is still sitting in the master's
24+
// pending handle queue.
25+
if (cluster.isMaster) {
26+
const handles = require('internal/cluster').handles;
27+
// FIXME(bnoordhuis) lib/cluster.js scans the execArgv arguments for
28+
// debugger flags and renumbers any port numbers it sees starting
29+
// from the default port 5858. Add a '.' that circumvents the
30+
// scanner but is ignored by atoi(3). Heinous hack.
31+
cluster.setupMaster({ execArgv: [`--debug=${common.PORT}.`] });
32+
const worker = cluster.fork();
33+
worker.on('message', common.mustCall(message => {
34+
assert.strictEqual(Array.isArray(message), true);
35+
assert.strictEqual(message[0], 'listening');
36+
const address = message[1];
37+
const host = address.address;
38+
const debugClient = net.connect({ host, port: common.PORT });
39+
const protocol = new Protocol();
40+
debugClient.setEncoding('utf8');
41+
debugClient.on('data', data => protocol.execute(data));
42+
debugClient.once('connect', common.mustCall(() => {
43+
protocol.onResponse = common.mustCall(res => {
44+
protocol.onResponse = () => {};
45+
const conn = net.connect({ host, port: address.port });
46+
conn.once('connect', common.mustCall(() => {
47+
conn.destroy();
48+
assert.notDeepStrictEqual(handles, {});
49+
worker.disconnect();
50+
assert.deepStrictEqual(handles, {});
51+
const req = protocol.serialize({ command: 'continue' });
52+
debugClient.write(req);
53+
}));
54+
});
55+
}));
56+
}));
57+
process.on('exit', () => assert.deepStrictEqual(handles, {}));
58+
} else {
59+
const server = net.createServer(socket => socket.pipe(socket));
60+
server.listen(() => {
61+
process.send(['listening', server.address()]);
62+
debugger;
63+
});
64+
process.on('disconnect', process.exit);
65+
}

0 commit comments

Comments
 (0)