Graceful exit with cluster and pm

2013-06-07 19:20

Graceful exit with cluster and pm

by MK2

at 2013-06-07 11:20:00

original http://www.cnblogs.com/fengmk2/archive/2013/06/07/graceful-exit-pm-cluster.html

相信大家看完 @不四Node.js 异步异常的处理与domain模块解析 后,已经对domain 模块有了更深一步了解了。



本次分享主要介绍一下使用 graceful 配合 clusterpm 实现当发生 uncaughtException 的时候优雅退出(graceful exit)。



Cluster



官方cluster模块的例子原型代码来自: Warning: Don’t Ignore Errors!



graceful 的默认示例是基于 cluster 写的: connect with cluster example



一般地,我会将代码结构分为3个文件:




  • dispatch.js: master, 管理worker进程,报警等

  • worker.js: worker,实际工作进程的容器

  • app.js: web server, hsf, other services and other logic.



dispatch.js



var cluster = require('cluster');
var path = require('path');

cluster.setupMaster({
exec: path.join(dirname, 'worker.js')
});

cluster.fork();
cluster.fork();

cluster.on('disconnect', function (worker) {
var w = cluster.fork();
console.error('[%s] [master:%s] wroker:%s disconnect! new worker:%s fork',
new Date(), process.pid, worker.process.pid, w.process.pid);
});

cluster.on('exit', function (worker) {
console.error('[%s] [master:%s] wroker:%s exit!',
new Date(), process.pid, worker.process.pid);
});


app.js



app.js 不会涉及 clustergraceful 的代码,让业务逻辑不需要关注系统的基本功能。



var http = require('http');
var connect = require('connect');

var app = connect(
function (req, res, next) {
req.on('end', function () {
if (req.url === '/asycerror') {
setTimeout(function () {
foo.bar();
}, 10);
return;
}
process.nextTick(function () {
res.setHeader('content-type', 'text/json');
res.end(JSON.stringify({
method: req.method,
url: req.url,
headers: req.headers,
Connection: res.getHeader('connection') || 'keep-alive',
pid: process.pid,
}));
});
});
req.resume();
},
function (err, req, res, next) {
var domainThrown = err.domain_thrown || err.domainThrown;
var msg = 'domainThrown: ' + domainThrown + '\n' + err.stack;
console.error('%s %s\n%s', req.method, req.url, msg);
res.statusCode = 500;
res.setHeader('content-type', 'text/plain');
res.end(msg + '\n');
}
);

var server = http.createServer(app);
module.exports = server;


worker.js



var PORT = +process.env.PORT || 1337;
var graceful = require('graceful');
var server = require('./app');
server.listen(PORT);
console.log('[%s] [worker:%s] web server start listen on %s', new Date(), process.pid, PORT);

var restapi = require('http').createServer().listen(1985);
console.log('[%s] [worker:%s] rest api start listen on %s', new Date(), process.pid, 1985);

graceful({
server: [server, restapi],
killTimeout: 10000,
error: function (err, throwErrorCount) {
// you can do custom log here, send email, call phone and so on...
if (err.message) {
err.message += ' (uncaughtException throw ' + throwErrorCount + ' times on pid:' + process.pid + ')';
}
// logger.error(err);
}
});


stdout



$ node example/connect_with_cluster/dispatch.js 
[Fri Apr 19 2013 09:32:29 GMT+0800 (CST)] [worker:10231] web server start listen on 1337
[Fri Apr 19 2013 09:32:29 GMT+0800 (CST)] [worker:10231] rest api start listen on 1985
[Fri Apr 19 2013 09:32:29 GMT+0800 (CST)] [worker:10232] web server start listen on 1337
[Fri Apr 19 2013 09:32:29 GMT+0800 (CST)] [worker:10232] rest api start listen on 1985

$ curl localhost:1337/
{"method":"GET","url":"/","headers":{"user-agent":"curl/7.24.0 (x86_64-apple-darwin12.0) libcurl/7.24.0 OpenSSL/0.9.8r zlib/1.2.5","host":"localhost:1337","accept":"/"},"Connection":"keep-alive","pid":10232}

$ curl localhost:1337/asycerror
# hang for a while
curl: (52) Empty reply from server

# Master
$ node example/connect_with_cluster/dispatch.js
[Fri Apr 19 2013 09:32:29 GMT+0800 (CST)] [worker:10231] web server start listen on 1337
[Fri Apr 19 2013 09:32:29 GMT+0800 (CST)] [worker:10231] rest api start listen on 1985
[Fri Apr 19 2013 09:32:29 GMT+0800 (CST)] [worker:10232] web server start listen on 1337
[Fri Apr 19 2013 09:32:29 GMT+0800 (CST)] [worker:10232] rest api start listen on 1985
[uncaughtException] throw error 1 times
[ReferenceError: foo is not defined (uncaughtException throw 1 times on pid:10232)]
[Fri Apr 19 2013 09:34:21 GMT+0800 (CST)] [worker:10232] close 2 servers!
[Fri Apr 19 2013 09:34:21 GMT+0800 (CST)] [worker:10232] worker disconnect!
[Fri Apr 19 2013 09:34:21 GMT+0800 (CST)] [master:10230] wroker:10232 disconnect! new worker:10288 fork
[Fri Apr 19 2013 09:34:21 GMT+0800 (CST)] [worker:10288] web server start listen on 1337
[Fri Apr 19 2013 09:34:21 GMT+0800 (CST)] [worker:10288] rest api start listen on 1985
[Fri Apr 19 2013 09:34:31 GMT+0800 (CST)] [worker:10232] kill timeout, exit now.
[Fri Apr 19 2013 09:34:31 GMT+0800 (CST)] [master:10230] wroker:10232 exit!

# test again
$ curl localhost:1337/asycerror
# hang for a while
curl: (52) Empty reply from server

# Master
$ node example/connect_with_cluster/dispatch.js
[Fri Apr 19 2013 09:32:29 GMT+0800 (CST)] [worker:10231] web server start listen on 1337
[Fri Apr 19 2013 09:32:29 GMT+0800 (CST)] [worker:10231] rest api start listen on 1985
[Fri Apr 19 2013 09:32:29 GMT+0800 (CST)] [worker:10232] web server start listen on 1337
[Fri Apr 19 2013 09:32:29 GMT+0800 (CST)] [worker:10232] rest api start listen on 1985
[uncaughtException] throw error 1 times
[ReferenceError: foo is not defined (uncaughtException throw 1 times on pid:10232)]
[Fri Apr 19 2013 09:34:21 GMT+0800 (CST)] [worker:10232] close 2 servers!
[Fri Apr 19 2013 09:34:21 GMT+0800 (CST)] [worker:10232] worker disconnect!
[Fri Apr 19 2013 09:34:21 GMT+0800 (CST)] [master:10230] wroker:10232 disconnect! new worker:10288 fork
[Fri Apr 19 2013 09:34:21 GMT+0800 (CST)] [worker:10288] web server start listen on 1337
[Fri Apr 19 2013 09:34:21 GMT+0800 (CST)] [worker:10288] rest api start listen on 1985
[Fri Apr 19 2013 09:34:31 GMT+0800 (CST)] [worker:10232] kill timeout, exit now.
[Fri Apr 19 2013 09:34:31 GMT+0800 (CST)] [master:10230] wroker:10232 exit!

[uncaughtException] throw error 1 times
[ReferenceError: foo is not defined (uncaughtException throw 1 times on pid:10288)]
[Fri Apr 19 2013 09:36:17 GMT+0800 (CST)] [worker:10288] close 2 servers!
[Fri Apr 19 2013 09:36:17 GMT+0800 (CST)] [worker:10288] worker disconnect!
[Fri Apr 19 2013 09:36:17 GMT+0800 (CST)] [master:10230] wroker:10288 disconnect! new worker:10351 fork
[Fri Apr 19 2013 09:36:17 GMT+0800 (CST)] [worker:10351] web server start listen on 1337
[Fri Apr 19 2013 09:36:17 GMT+0800 (CST)] [worker:10351] rest api start listen on 1985
[Fri Apr 19 2013 09:36:27 GMT+0800 (CST)] [worker:10288] kill timeout, exit now.
[Fri Apr 19 2013 09:36:27 GMT+0800 (CST)] [master:10230] wroker:10288 exit!


Graceful exit with



pm A graceful node library to contribute a permanent “master-worker” server.



同样,也是按照3文件划分职能。graceful exit demo



dispatch.js



var master = require('pm').createMaster();

master.on('giveup', function (name, fatals, pause) {
console.log('[%s] [master:%s] giveup to restart "%s" process after %d times. pm will try after %d ms.',
new Date(), process.pid, name, fatals, pause);
});

master.on('disconnect', function (name, pid) {
// console.log('%s %s disconnect', name, pid)
var w = master.fork(name);
console.error('[%s] [master:%s] worker:%s disconnect! new worker:%s fork',
new Date(), process.pid, pid, w.process.pid);
});

master.on('fork', function (name, pid) {
console.log('[%s] [master:%s] new %s:worker:%s fork',
new Date(), process.pid, name, pid);
});

master.on('quit', function (name, pid, code, signal) {
console.log('[%s] [master:%s] %s:worker:%s quit, code: %s, signal: %s',
new Date(), process.pid, name, pid, code, signal);
});

master.register('web',
dirname + '/worker.js', {
listen: 1984,
children: 2
});

master.dispatch();


app.js



var http = require('http');

var server = http.createServer(function (req, res) {
if (req.url === '/asyncerror') {
setTimeout(function () {
asyncError();
}, 10);
return;
}
res.end(JSON.stringify({
url: req.url,
pid: process.pid,
}));
});

module.exports = server;


worker.js



var graceful = require('graceful');
var worker = require('pm').createWorker();
var server = require('./app');

// hack for pm, because server.handle is empty.
server.close = function () {};

graceful({
server: server,
worker: worker,
error: function (err) {
console.log('[%s] [worker:%s] error: %s', new Date(), process.pid, err.stack);
},
killTimeout: 10000,
});

worker.ready(function (socket, port) {
server.emit('connection', socket);
});


stdout



$ node demo/graceful_exit/dispatch.js 
[Fri Apr 19 2013 09:28:38 GMT+0800 (CST)] [master:9773] new web:worker:9774 fork
[Fri Apr 19 2013 09:28:38 GMT+0800 (CST)] [master:9773] new web:worker:9775 fork

$ curl localhost:1984/
{"url":"/","pid":9775}

$ curl localhost:1984/asyncerror
# hang for a while
curl: (52) Empty reply from server

# Master
$ node demo/graceful_exit/dispatch.js
[Fri Apr 19 2013 09:28:38 GMT+0800 (CST)] [master:9773] new web:worker:9774 fork
[Fri Apr 19 2013 09:28:38 GMT+0800 (CST)] [master:9773] new web:worker:9775 fork
[Fri Apr 19 2013 09:29:35 GMT+0800 (CST)] [worker:9775] error: ReferenceError: asyncError is not defined
at Object.
onTimeout (/Users/mk2/git/pm/demo/graceful_exit/app.js:18:7)
at Timer.list.ontimeout (timers.js:101:19)
[uncaughtException] throw error 1 times
[ReferenceError: asyncError is not defined]
[Fri Apr 19 2013 09:29:35 GMT+0800 (CST)] [worker:9775] close 1 servers!
[Fri Apr 19 2013 09:29:35 GMT+0800 (CST)] [worker:9775] worker disconnect!
[Fri Apr 19 2013 09:29:35 GMT+0800 (CST)] [master:9773] new web:worker:10089 fork
[Fri Apr 19 2013 09:29:35 GMT+0800 (CST)] [master:9773] worker:9775 disconnect! new worker:10089 fork
[Fri Apr 19 2013 09:29:45 GMT+0800 (CST)] [worker:9775] kill timeout, exit now.
[Fri Apr 19 2013 09:29:45 GMT+0800 (CST)] [master:9773] web:worker:9775 quit, code: 1, signal: null

# test again
$ curl localhost:1984/asyncerror
# hang for a while
curl: (52) Empty reply from server

# Master
$ node demo/graceful_exit/dispatch.js
[Fri Apr 19 2013 09:28:38 GMT+0800 (CST)] [master:9773] new web:worker:9774 fork
[Fri Apr 19 2013 09:28:38 GMT+0800 (CST)] [master:9773] new web:worker:9775 fork
[Fri Apr 19 2013 09:29:35 GMT+0800 (CST)] [worker:9775] error: ReferenceError: asyncError is not defined
at Object.onTimeout (/Users/mk2/git/pm/demo/graceful_exit/app.js:18:7)
at Timer.list.ontimeout (timers.js:101:19)
[uncaughtException] throw error 1 times
[ReferenceError: asyncError is not defined]
[Fri Apr 19 2013 09:29:35 GMT+0800 (CST)] [worker:9775] close 1 servers!
[Fri Apr 19 2013 09:29:35 GMT+0800 (CST)] [worker:9775] worker disconnect!
[Fri Apr 19 2013 09:29:35 GMT+0800 (CST)] [master:9773] new web:worker:10089 fork
[Fri Apr 19 2013 09:29:35 GMT+0800 (CST)] [master:9773] worker:9775 disconnect! new worker:10089 fork
[Fri Apr 19 2013 09:29:45 GMT+0800 (CST)] [worker:9775] kill timeout, exit now.
[Fri Apr 19 2013 09:29:45 GMT+0800 (CST)] [master:9773] web:worker:9775 quit, code: 1, signal: null

[Fri Apr 19 2013 09:30:29 GMT+0800 (CST)] [worker:9774] error: ReferenceError: asyncError is not defined
at Object.
onTimeout (/Users/mk2/git/pm/demo/graceful_exit/app.js:18:7)
at Timer.list.ontimeout (timers.js:101:19)
[uncaughtException] throw error 1 times
[ReferenceError: asyncError is not defined]
[Fri Apr 19 2013 09:30:29 GMT+0800 (CST)] [worker:9774] close 1 servers!
[Fri Apr 19 2013 09:30:29 GMT+0800 (CST)] [worker:9774] worker disconnect!
[Fri Apr 19 2013 09:30:29 GMT+0800 (CST)] [master:9773] new web:worker:10119 fork
[Fri Apr 19 2013 09:30:29 GMT+0800 (CST)] [master:9773] worker:9774 disconnect! new worker:10119 fork
[Fri Apr 19 2013 09:30:39 GMT+0800 (CST)] [worker:9774] kill timeout, exit now.
[Fri Apr 19 2013 09:30:39 GMT+0800 (CST)] [master:9773] web:worker:9774 quit, code: 1, signal: null


展望



之前想依赖 [domain] 实现 uncaughtException 发生的时候,给当前请求响应友好的500错误,现在看来是无法100%实现的。



而且引入 [domain] 会造成一定的性能损耗 nodejs domain module hello world and benchmark:




  • normal: 7624.59 trans/sec

  • domain: 7068.83 trans/sec



所以,按目前的情况还是不使用 [domain] ,然后优雅退出解决未捕获异常发生后可能出现的一切问题。

本文链接