import { Container, Network } from 'dockerode'; import { BaseOrchestrator } from '../base-orchestrator'; import { NodeConfig, NodeHandle, NodeStatus, NetworkPartition } from '../types'; import { DockerNodeHandle, DockerOrchestratorOptions } from './types'; import { ContainerManager } from './managers/container-manager'; import { NetworkManager } from './managers/network-manager'; import { ResourceManager } from './managers/resource-manager'; import { StatusManager } from './managers/status-manager'; import { ImageManager } from './managers/image-manager'; import { getRandomPort } from './utils/port-utils'; const DEFAULT_OPTIONS: DockerOrchestratorOptions = { image: 'rhizome-node-test', containerWorkDir: '/app', autoBuildTestImage: true, }; export class DockerOrchestrator extends BaseOrchestrator { private options: DockerOrchestratorOptions; private containers: Map = new Map(); private networks: Map = new Map(); private containerLogStreams: Map = new Map(); private nodeHandles: Map = new Map(); // Managers private readonly containerManager: ContainerManager; private readonly networkManager: NetworkManager; private readonly resourceManager: ResourceManager; private readonly statusManager: StatusManager; private readonly imageManager: ImageManager; constructor(options: Partial = {}) { super(); this.options = { ...DEFAULT_OPTIONS, ...options }; // Initialize Docker client in managers const dockerOptions = this.options.dockerOptions || {}; this.containerManager = new ContainerManager(dockerOptions); this.networkManager = new NetworkManager(dockerOptions); this.resourceManager = new ResourceManager(); this.statusManager = new StatusManager(); this.imageManager = new ImageManager(dockerOptions); } /** * Start a new node with the given configuration */ async startNode(config: NodeConfig): Promise { const nodeId = config.id || `node-${Date.now()}`; config.network = config.network || {}; config.network.port = config.network.port || getRandomPort(); config.network.requestPort = config.network.requestPort || getRandomPort(); try { // Ensure test image is built if (this.options.autoBuildTestImage) { await this.imageManager.buildTestImage(this.options.image); } // Create a network for this node using NetworkManager const network = await this.networkManager.createNetwork(nodeId); this.networks.set(nodeId, network); // Create container using ContainerManager const container = await this.containerManager.createContainer( nodeId, config, network.id ); // Store container reference before starting it this.containers.set(nodeId, container); // Start the container await this.containerManager.startContainer(container); // Create node handle const handle: DockerNodeHandle = { id: nodeId, containerId: container.id, networkId: network.id, config, status: () => this.getNodeStatus({ id: nodeId } as NodeHandle), stop: () => this.stopNode({ id: nodeId } as NodeHandle), getRequestPort: () => config.network?.requestPort, getApiUrl: () => `http://localhost:${config.network?.port}/api`, }; // Store handle this.nodeHandles.set(nodeId, handle); // Wait for node to be ready using StatusManager await this.statusManager.waitForNodeReady( container, config.network.port); return handle; } catch (error) { await this.cleanupFailedStart(nodeId); throw error; } } /** * Stop a running node */ async stopNode(handle: NodeHandle): Promise { const nodeId = handle.id; const container = this.containers.get(nodeId); if (!container) { throw new Error(`No container found for node ${nodeId}`); } try { // Stop and remove the container using ContainerManager try { await this.containerManager.stopContainer(container); await this.containerManager.removeContainer(container); } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error'; console.warn(`Error managing container ${nodeId}:`, errorMessage); // Continue with cleanup even if container operations fail } // Clean up network using NetworkManager const network = this.networks.get(nodeId); if (network) { try { await this.networkManager.removeNetwork(network.id); } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error'; console.warn(`Error removing network for node ${nodeId}:`, errorMessage); } finally { this.networks.delete(nodeId); } } // Clean up log stream this.cleanupLogStream(nodeId); // Remove from internal maps this.containers.delete(nodeId); this.nodeHandles.delete(nodeId); console.log(`Stopped and cleaned up node ${nodeId}`); } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error'; console.error(`Error during cleanup of node ${nodeId}:`, errorMessage); throw new Error(`Failed to stop node ${nodeId}: ${errorMessage}`); } } /** * Clean up log stream for a node * @private */ private cleanupLogStream(nodeId: string): void { const logStream = this.containerLogStreams.get(nodeId); if (!logStream) return; try { if ('destroy' in logStream) { (logStream as { destroy: () => void }).destroy(); } else if ('end' in logStream) { (logStream as { end: () => void }).end(); } } catch (error) { console.warn(`Error cleaning up log stream for node ${nodeId}:`, error); } finally { this.containerLogStreams.delete(nodeId); } } /** * Get status of a node */ async getNodeStatus(handle: NodeHandle): Promise { const container = this.containers.get(handle.id); // If container not found, return stopped status if (!container) { return { id: handle.id, status: 'stopped', error: 'Container not found', network: { address: '', httpPort: 0, requestPort: 0, peers: [] }, resources: { cpu: { usage: 0, limit: 0 }, memory: { usage: 0, limit: 0 } } }; } try { // Delegate to StatusManager to get the node status return await this.statusManager.getNodeStatus(handle, container); } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error'; console.error(`Error getting status for node ${handle.id}:`, errorMessage); return { id: handle.id, status: 'error', error: errorMessage, network: { address: '', httpPort: 0, requestPort: 0, peers: [] }, resources: { cpu: { usage: 0, limit: 0 }, memory: { usage: 0, limit: 0 } } }; } } /** * Create network partitions */ async partitionNetwork(partitions: NetworkPartition): Promise { // Implementation for network partitioning // This is a simplified version - in a real implementation, you would: // 1. Create separate networks for each partition // 2. Connect containers to their respective partition networks // 3. Disconnect them from other networks console.warn('Network partitioning not fully implemented'); } /** * Set resource limits for a node */ async setResourceLimits( handle: NodeHandle, limits: Partial = {} ): Promise { const container = this.containers.get(handle.id); if (!container) { throw new Error(`No container found for node ${handle.id}`); } try { // Delegate to ResourceManager await this.resourceManager.setResourceLimits(container, { cpu: limits.cpu, memory: limits.memory, memorySwap: limits.memory // Default to same as memory limit if not specified }); console.log(`Updated resource limits for node ${handle.id}:`, limits); } catch (error) { console.error(`Failed to update resource limits for node ${handle.id}:`, error); throw new Error(`Failed to update resource limits: ${error instanceof Error ? error.message : 'Unknown error'}`); } } /** * Connect two nodes in the network */ async connectNodes(handle1: NodeHandle, handle2: NodeHandle): Promise { const dockerHandle1 = handle1 as DockerNodeHandle; const dockerHandle2 = handle2 as DockerNodeHandle; const container1 = this.containers.get(handle1.id); const container2 = this.containers.get(handle2.id); if (!container1 || !container2) { throw new Error('One or both containers not found'); } try { // Get the network from the first container const networkId = dockerHandle1.networkId; if (!networkId) { throw new Error(`No network found for node ${handle1.id}`); } // Connect the second container to the same network const network = this.networks.get(handle1.id); if (!network) { throw new Error(`Network not found for node ${handle1.id}`); } await network.connect({ Container: container2.id, EndpointConfig: { Aliases: [`node-${handle2.id}`] } }); // Update the network ID in the second handle dockerHandle2.networkId = networkId; } catch (error) { console.error(`Error connecting nodes ${handle1.id} and ${handle2.id}:`, error); throw error; } } /** * Clean up resources if node startup fails * @param nodeId ID of the node that failed to start * @private */ private async cleanupFailedStart(nodeId: string): Promise { console.log(`Cleaning up failed start for node ${nodeId}...`); // Get references to resources before starting cleanup const container = this.containers.get(nodeId); const network = this.networks.get(nodeId); // Create a map of containers to clean up const containersToCleanup = new Map(); if (container) { containersToCleanup.set(nodeId, container); } // Create a map of networks to clean up const networksToCleanup = new Map(); if (network) { networksToCleanup.set(nodeId, network); } try { // Run container and network cleanup in parallel const [containerErrors, networkErrors] = await Promise.all([ // Clean up containers using ContainerManager this.containerManager.cleanupContainers(containersToCleanup), // Clean up networks using NetworkManager this.networkManager.cleanupNetworks(networksToCleanup) ]); // Log any errors that occurred during cleanup if (containerErrors.length > 0) { console.warn(`Encountered ${containerErrors.length} error(s) while cleaning up containers for node ${nodeId}:`); containerErrors.forEach(({ resource, error }) => { console.warn(`- ${resource}:`, error instanceof Error ? error.message : 'Unknown error'); }); } if (networkErrors.length > 0) { console.warn(`Encountered ${networkErrors.length} error(s) while cleaning up networks for node ${nodeId}:`); networkErrors.forEach(({ resource, error }) => { console.warn(`- ${resource}:`, error instanceof Error ? error.message : 'Unknown error'); }); } console.log(`Completed cleanup for node ${nodeId}`); } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error'; console.error(`Unexpected error during cleanup of node ${nodeId}:`, errorMessage); } finally { // Always clean up internal state, even if errors occurred this.containers.delete(nodeId); this.networks.delete(nodeId); this.nodeHandles.delete(nodeId); this.containerLogStreams.delete(nodeId); } } /** * Get a container by ID * @param containerId The ID of the container to retrieve * @returns The container instance or undefined if not found */ async getContainer(containerId: string): Promise { // First try to get from our containers map const container = this.containers.get(containerId); if (container) { return container; } // If not found, try to get it from the container manager try { return await this.containerManager.getContainer(containerId); } catch (error) { console.warn(`Failed to get container ${containerId}:`, error); return undefined; } } /** * Clean up all resources */ async cleanup(): Promise { console.log('Starting cleanup of all resources...'); // Create copies of the maps to avoid modification during iteration const containersToCleanup = new Map(this.containers); const networksToCleanup = new Map(this.networks); try { // First, clean up all containers console.log('Stopping and removing all containers...'); const containerErrors = await this.containerManager.cleanupContainers(containersToCleanup); // Wait a short time to ensure all container cleanup is complete await new Promise(resolve => setTimeout(resolve, 1000)); // Then clean up all networks console.log('Removing all networks...'); const networkErrors = await this.networkManager.cleanupNetworks(networksToCleanup); // Log any errors that occurred during cleanup if (containerErrors.length > 0) { console.warn(`Encountered ${containerErrors.length} error(s) while cleaning up containers:`); containerErrors.forEach(({ resource, error }) => { console.warn(`- ${resource}:`, error instanceof Error ? error.message : 'Unknown error'); }); } if (networkErrors.length > 0) { console.warn(`Encountered ${networkErrors.length} error(s) while cleaning up networks:`); networkErrors.forEach(({ resource, error }) => { console.warn(`- ${resource}:`, error instanceof Error ? error.message : 'Unknown error'); }); } console.log('Completed cleanup of all resources'); } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error'; console.error('Unexpected error during cleanup:', errorMessage); throw error; // Re-throw to allow callers to handle the error } finally { // Always clear internal state, even if errors occurred this.containers.clear(); this.networks.clear(); this.nodeHandles.clear(); this.containerLogStreams.clear(); } } }