How to fix Error: Worker disconnect timeout (worker did not exit cleanly) in Node.js

Node.jsINTERMEDIATEMEDIUM

This error occurs in Node.js cluster mode when a worker process fails to exit within the expected timeout period after disconnect() is called. It typically indicates that the worker has open connections or pending operations preventing graceful shutdown.

What this error means

This error appears when using Node.js cluster module to manage worker processes and indicates that a worker failed to shut down gracefully within an expected timeframe. When the primary process calls worker.disconnect(), it expects the worker to close all server connections, clean up resources, and exit naturally. However, if the worker has long-living connections, open handles, or pending operations, it may not exit cleanly. The disconnect() method in Node.js cluster is designed for graceful shutdowns - it closes the IPC channel between the primary and worker, stops accepting new connections, but allows existing connections to complete. The problem arises when these existing connections or resources don't close within a reasonable time, leaving the worker process in a limbo state. This is different from worker.kill() which forcefully terminates the worker. The disconnect timeout error indicates that your application attempted a graceful shutdown but had to implement a fallback timeout mechanism to prevent indefinite hanging.

How to fix "Error: Worker disconnect timeout (worker did not exit cleanly)"

1Implement proper timeout handling in primary process

Add a timeout mechanism that forces worker termination if graceful disconnect takes too long:

javascript

const cluster = require('cluster');

if (cluster.isPrimary) {
  const worker = cluster.fork();

  // Graceful shutdown with timeout
  function shutdownWorker(worker) {
    worker.send('shutdown'); // Signal worker to clean up
    worker.disconnect();

    const timeout = setTimeout(() => {
      console.warn(`Worker ${worker.id} did not exit cleanly, forcing kill`);
      worker.kill('SIGTERM');
    }, 5000); // 5 second timeout

    worker.on('disconnect', () => {
      clearTimeout(timeout);
      console.log(`Worker ${worker.id} disconnected gracefully`);
    });

    worker.on('exit', (code, signal) => {
      clearTimeout(timeout);
      console.log(`Worker ${worker.id} exited`);
    });
  }

  // Handle primary process shutdown
  process.on('SIGTERM', () => {
    Object.values(cluster.workers || {}).forEach(shutdownWorker);
  });
}

This pattern ensures workers have time to clean up but won't hang indefinitely.

2Add shutdown message handler in worker process

Implement proper cleanup when the worker receives a shutdown signal:

javascript

if (cluster.isWorker) {
  const server = require('http').createServer(/* your app */);

  // Listen for shutdown message from primary
  process.on('message', (msg) => {
    if (msg === 'shutdown') {
      console.log('Received shutdown signal, cleaning up...');

      // Stop accepting new connections
      server.close(() => {
        console.log('Server closed, exiting...');
        process.exit(0);
      });

      // Force exit if cleanup takes too long
      setTimeout(() => {
        console.error('Forced exit after timeout');
        process.exit(1);
      }, 4000); // Exit before primary timeout
    }
  });

  server.listen(3000);
}

The worker should handle cleanup before the primary timeout expires.

3Close all active connections properly

Ensure database connections, external service clients, and other resources are cleaned up:

javascript

// Example with database connection pool
const pool = require('./database-pool');
const redis = require('./redis-client');

process.on('message', async (msg) => {
  if (msg === 'shutdown') {
    try {
      // Close all active connections
      await Promise.all([
        pool.end(),           // Close database pool
        redis.quit(),         // Close Redis connection
        // Add other cleanup tasks
      ]);

      console.log('All connections closed');
      process.exit(0);
    } catch (error) {
      console.error('Cleanup error:', error);
      process.exit(1);
    }
  }
});

Explicitly close all external connections to allow clean exit.

4Handle long-polling and WebSocket connections

For applications with persistent connections, implement graceful connection draining:

javascript

const WebSocket = require('ws');
const wss = new WebSocket.Server({ server });

const activeConnections = new Set();

wss.on('connection', (ws) => {
  activeConnections.add(ws);

  ws.on('close', () => {
    activeConnections.delete(ws);
  });
});

process.on('message', (msg) => {
  if (msg === 'shutdown') {
    console.log(`Closing ${activeConnections.size} active connections`);

    // Notify clients and close connections
    activeConnections.forEach((ws) => {
      ws.send(JSON.stringify({ type: 'server_shutdown' }));
      ws.close(1000, 'Server shutting down');
    });

    // Wait for connections to close
    const checkInterval = setInterval(() => {
      if (activeConnections.size === 0) {
        clearInterval(checkInterval);
        server.close(() => process.exit(0));
      }
    }, 100);
  }
});

This ensures persistent connections are handled during shutdown.

5Clear timers and intervals

Remove all active timers that might prevent the event loop from completing:

javascript

// Track all timers
const timers = new Set();

function setTrackedInterval(fn, delay) {
  const id = setInterval(fn, delay);
  timers.add({ id, type: 'interval' });
  return id;
}

function setTrackedTimeout(fn, delay) {
  const id = setTimeout(fn, delay);
  timers.add({ id, type: 'timeout' });
  return id;
}

process.on('message', (msg) => {
  if (msg === 'shutdown') {
    // Clear all tracked timers
    timers.forEach(({ id, type }) => {
      if (type === 'interval') {
        clearInterval(id);
      } else {
        clearTimeout(id);
      }
    });
    timers.clear();

    console.log('All timers cleared');
  }
});

Clearing timers allows the Node.js event loop to complete and exit.

6Monitor disconnect to exit delay

Add monitoring to identify which resources are preventing clean exit:

javascript

if (cluster.isPrimary) {
  cluster.on('disconnect', (worker) => {
    const disconnectTime = Date.now();
    console.log(`Worker ${worker.id} disconnected at ${disconnectTime}`);

    worker.once('exit', (code, signal) => {
      const exitTime = Date.now();
      const delay = exitTime - disconnectTime;

      console.log(`Worker ${worker.id} exit delay: ${delay}ms`);

      if (delay > 2000) {
        console.warn(`Worker took ${delay}ms to exit after disconnect`);
        // Log this metric to your monitoring system
      }
    });
  });
}

This helps identify patterns in slow shutdowns for debugging.

How to fix Error: Worker disconnect timeout (worker did not exit cleanly) in Node.js

What this error means

Typical symptoms

Common causes

How to fix "Error: Worker disconnect timeout (worker did not exit cleanly)"

Advanced notes

Related errors

Official resources & further reading