Lentil Hoffman b5c0c010a9
feat: Refactor Docker orchestrator and enhance test utilities
This commit includes a major refactoring of the Docker orchestrator implementation
along with improvements to the testing infrastructure:

- Refactored Docker orchestrator to handle dynamic port assignment
- Added comprehensive test utilities in docker-test-utils.ts
- Improved error handling and resource cleanup in test environments
- Enhanced NodeStatus interface with containerId and networkId
- Added support for different storage types in NodeConfig
- Fixed request port handling in TestOrchestrator
- Added proper cleanup method to NodeOrchestrator interface

The changes ensure more reliable container management and better test isolation
while maintaining backward compatibility with existing implementations.

BREAKING CHANGE: The NodeOrchestrator interface now requires a cleanup() method.
2025-06-19 16:58:07 -05:00

438 lines
15 KiB
TypeScript

import { Container, Network } from 'dockerode';
import { BaseOrchestrator } from '../base-orchestrator';
import { NodeConfig, NodeHandle, NodeStatus, NetworkPartition } from '../types';
import { DockerNodeHandle, DockerOrchestratorOptions } from './types';
import { ContainerManager } from './managers/container-manager';
import { NetworkManager } from './managers/network-manager';
import { ResourceManager } from './managers/resource-manager';
import { StatusManager } from './managers/status-manager';
import { ImageManager } from './managers/image-manager';
import { getRandomPort } from './utils/port-utils';
const DEFAULT_OPTIONS: DockerOrchestratorOptions = {
image: 'rhizome-node-test',
containerWorkDir: '/app',
autoBuildTestImage: true,
};
export class DockerOrchestrator extends BaseOrchestrator {
private options: DockerOrchestratorOptions;
private containers: Map<string, Container> = new Map();
private networks: Map<string, Network> = new Map();
private containerLogStreams: Map<string, NodeJS.ReadableStream> = new Map();
private nodeHandles: Map<string, DockerNodeHandle> = new Map();
// Managers
private readonly containerManager: ContainerManager;
private readonly networkManager: NetworkManager;
private readonly resourceManager: ResourceManager;
private readonly statusManager: StatusManager;
private readonly imageManager: ImageManager;
constructor(options: Partial<DockerOrchestratorOptions> = {}) {
super();
this.options = { ...DEFAULT_OPTIONS, ...options };
// Initialize Docker client in managers
const dockerOptions = this.options.dockerOptions || {};
this.containerManager = new ContainerManager(dockerOptions);
this.networkManager = new NetworkManager(dockerOptions);
this.resourceManager = new ResourceManager();
this.statusManager = new StatusManager();
this.imageManager = new ImageManager(dockerOptions);
}
/**
* Start a new node with the given configuration
*/
async startNode(config: NodeConfig): Promise<NodeHandle> {
const nodeId = config.id || `node-${Date.now()}`;
config.network = config.network || {};
config.network.port = config.network.port || getRandomPort();
config.network.requestPort = config.network.requestPort || getRandomPort();
try {
// Ensure test image is built
if (this.options.autoBuildTestImage) {
await this.imageManager.buildTestImage(this.options.image);
}
// Create a network for this node using NetworkManager
const network = await this.networkManager.createNetwork(nodeId);
this.networks.set(nodeId, network);
// Create container using ContainerManager
const container = await this.containerManager.createContainer(
nodeId,
config,
network.id
);
// Store container reference before starting it
this.containers.set(nodeId, container);
// Start the container
await this.containerManager.startContainer(container);
// Create node handle
const handle: DockerNodeHandle = {
id: nodeId,
containerId: container.id,
networkId: network.id,
config,
status: () => this.getNodeStatus({ id: nodeId } as NodeHandle),
stop: () => this.stopNode({ id: nodeId } as NodeHandle),
getRequestPort: () => config.network?.requestPort,
getApiUrl: () => `http://localhost:${config.network?.port}/api`,
};
// Store handle
this.nodeHandles.set(nodeId, handle);
// Wait for node to be ready using StatusManager
await this.statusManager.waitForNodeReady( container, config.network.port);
return handle;
} catch (error) {
await this.cleanupFailedStart(nodeId);
throw error;
}
}
/**
* Stop a running node
*/
async stopNode(handle: NodeHandle): Promise<void> {
const nodeId = handle.id;
const container = this.containers.get(nodeId);
if (!container) {
throw new Error(`No container found for node ${nodeId}`);
}
try {
// Stop and remove the container using ContainerManager
try {
await this.containerManager.stopContainer(container);
await this.containerManager.removeContainer(container);
} catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
console.warn(`Error managing container ${nodeId}:`, errorMessage);
// Continue with cleanup even if container operations fail
}
// Clean up network using NetworkManager
const network = this.networks.get(nodeId);
if (network) {
try {
await this.networkManager.removeNetwork(network.id);
} catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
console.warn(`Error removing network for node ${nodeId}:`, errorMessage);
} finally {
this.networks.delete(nodeId);
}
}
// Clean up log stream
this.cleanupLogStream(nodeId);
// Remove from internal maps
this.containers.delete(nodeId);
this.nodeHandles.delete(nodeId);
console.log(`Stopped and cleaned up node ${nodeId}`);
} catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
console.error(`Error during cleanup of node ${nodeId}:`, errorMessage);
throw new Error(`Failed to stop node ${nodeId}: ${errorMessage}`);
}
}
/**
* Clean up log stream for a node
* @private
*/
private cleanupLogStream(nodeId: string): void {
const logStream = this.containerLogStreams.get(nodeId);
if (!logStream) return;
try {
if ('destroy' in logStream) {
(logStream as { destroy: () => void }).destroy();
} else if ('end' in logStream) {
(logStream as { end: () => void }).end();
}
} catch (error) {
console.warn(`Error cleaning up log stream for node ${nodeId}:`, error);
} finally {
this.containerLogStreams.delete(nodeId);
}
}
/**
* Get status of a node
*/
async getNodeStatus(handle: NodeHandle): Promise<NodeStatus> {
const container = this.containers.get(handle.id);
// If container not found, return stopped status
if (!container) {
return {
id: handle.id,
status: 'stopped',
error: 'Container not found',
network: {
address: '',
httpPort: 0,
requestPort: 0,
peers: []
},
resources: {
cpu: { usage: 0, limit: 0 },
memory: { usage: 0, limit: 0 }
}
};
}
try {
// Delegate to StatusManager to get the node status
return await this.statusManager.getNodeStatus(handle, container);
} catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
console.error(`Error getting status for node ${handle.id}:`, errorMessage);
return {
id: handle.id,
status: 'error',
error: errorMessage,
network: {
address: '',
httpPort: 0,
requestPort: 0,
peers: []
},
resources: {
cpu: { usage: 0, limit: 0 },
memory: { usage: 0, limit: 0 }
}
};
}
}
/**
* Create network partitions
*/
async partitionNetwork(partitions: NetworkPartition): Promise<void> {
// Implementation for network partitioning
// This is a simplified version - in a real implementation, you would:
// 1. Create separate networks for each partition
// 2. Connect containers to their respective partition networks
// 3. Disconnect them from other networks
console.warn('Network partitioning not fully implemented');
}
/**
* Set resource limits for a node
*/
async setResourceLimits(
handle: NodeHandle,
limits: Partial<NodeConfig['resources']> = {}
): Promise<void> {
const container = this.containers.get(handle.id);
if (!container) {
throw new Error(`No container found for node ${handle.id}`);
}
try {
// Delegate to ResourceManager
await this.resourceManager.setResourceLimits(container, {
cpu: limits.cpu,
memory: limits.memory,
memorySwap: limits.memory // Default to same as memory limit if not specified
});
console.log(`Updated resource limits for node ${handle.id}:`, limits);
} catch (error) {
console.error(`Failed to update resource limits for node ${handle.id}:`, error);
throw new Error(`Failed to update resource limits: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
/**
* Connect two nodes in the network
*/
async connectNodes(handle1: NodeHandle, handle2: NodeHandle): Promise<void> {
const dockerHandle1 = handle1 as DockerNodeHandle;
const dockerHandle2 = handle2 as DockerNodeHandle;
const container1 = this.containers.get(handle1.id);
const container2 = this.containers.get(handle2.id);
if (!container1 || !container2) {
throw new Error('One or both containers not found');
}
try {
// Get the network from the first container
const networkId = dockerHandle1.networkId;
if (!networkId) {
throw new Error(`No network found for node ${handle1.id}`);
}
// Connect the second container to the same network
const network = this.networks.get(handle1.id);
if (!network) {
throw new Error(`Network not found for node ${handle1.id}`);
}
await network.connect({
Container: container2.id,
EndpointConfig: {
Aliases: [`node-${handle2.id}`]
}
});
// Update the network ID in the second handle
dockerHandle2.networkId = networkId;
} catch (error) {
console.error(`Error connecting nodes ${handle1.id} and ${handle2.id}:`, error);
throw error;
}
}
/**
* Clean up resources if node startup fails
* @param nodeId ID of the node that failed to start
* @private
*/
private async cleanupFailedStart(nodeId: string): Promise<void> {
console.log(`Cleaning up failed start for node ${nodeId}...`);
// Get references to resources before starting cleanup
const container = this.containers.get(nodeId);
const network = this.networks.get(nodeId);
// Create a map of containers to clean up
const containersToCleanup = new Map<string, Container>();
if (container) {
containersToCleanup.set(nodeId, container);
}
// Create a map of networks to clean up
const networksToCleanup = new Map<string, Network>();
if (network) {
networksToCleanup.set(nodeId, network);
}
try {
// Run container and network cleanup in parallel
const [containerErrors, networkErrors] = await Promise.all([
// Clean up containers using ContainerManager
this.containerManager.cleanupContainers(containersToCleanup),
// Clean up networks using NetworkManager
this.networkManager.cleanupNetworks(networksToCleanup)
]);
// Log any errors that occurred during cleanup
if (containerErrors.length > 0) {
console.warn(`Encountered ${containerErrors.length} error(s) while cleaning up containers for node ${nodeId}:`);
containerErrors.forEach(({ resource, error }) => {
console.warn(`- ${resource}:`, error instanceof Error ? error.message : 'Unknown error');
});
}
if (networkErrors.length > 0) {
console.warn(`Encountered ${networkErrors.length} error(s) while cleaning up networks for node ${nodeId}:`);
networkErrors.forEach(({ resource, error }) => {
console.warn(`- ${resource}:`, error instanceof Error ? error.message : 'Unknown error');
});
}
console.log(`Completed cleanup for node ${nodeId}`);
} catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
console.error(`Unexpected error during cleanup of node ${nodeId}:`, errorMessage);
} finally {
// Always clean up internal state, even if errors occurred
this.containers.delete(nodeId);
this.networks.delete(nodeId);
this.nodeHandles.delete(nodeId);
this.containerLogStreams.delete(nodeId);
}
}
/**
* Get a container by ID
* @param containerId The ID of the container to retrieve
* @returns The container instance or undefined if not found
*/
async getContainer(containerId: string): Promise<Container | undefined> {
// First try to get from our containers map
const container = this.containers.get(containerId);
if (container) {
return container;
}
// If not found, try to get it from the container manager
try {
return await this.containerManager.getContainer(containerId);
} catch (error) {
console.warn(`Failed to get container ${containerId}:`, error);
return undefined;
}
}
/**
* Clean up all resources
*/
async cleanup(): Promise<void> {
console.log('Starting cleanup of all resources...');
// Create copies of the maps to avoid modification during iteration
const containersToCleanup = new Map(this.containers);
const networksToCleanup = new Map(this.networks);
try {
// First, clean up all containers
console.log('Stopping and removing all containers...');
const containerErrors = await this.containerManager.cleanupContainers(containersToCleanup);
// Wait a short time to ensure all container cleanup is complete
await new Promise(resolve => setTimeout(resolve, 1000));
// Then clean up all networks
console.log('Removing all networks...');
const networkErrors = await this.networkManager.cleanupNetworks(networksToCleanup);
// Log any errors that occurred during cleanup
if (containerErrors.length > 0) {
console.warn(`Encountered ${containerErrors.length} error(s) while cleaning up containers:`);
containerErrors.forEach(({ resource, error }) => {
console.warn(`- ${resource}:`, error instanceof Error ? error.message : 'Unknown error');
});
}
if (networkErrors.length > 0) {
console.warn(`Encountered ${networkErrors.length} error(s) while cleaning up networks:`);
networkErrors.forEach(({ resource, error }) => {
console.warn(`- ${resource}:`, error instanceof Error ? error.message : 'Unknown error');
});
}
console.log('Completed cleanup of all resources');
} catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
console.error('Unexpected error during cleanup:', errorMessage);
throw error; // Re-throw to allow callers to handle the error
} finally {
// Always clear internal state, even if errors occurred
this.containers.clear();
this.networks.clear();
this.nodeHandles.clear();
this.containerLogStreams.clear();
}
}
}