Source: services/vsom/VSOMService.js

/**
 * VSOM Service - Backend service for managing Vector Self-Organizing Map instances
 * 
 * Provides a service layer that orchestrates multiple VSOM instances, handles training,
 * manages data loading, and integrates with the broader Semem ecosystem.
 */

import VSOM from '../../ragno/algorithms/VSOM.js';
import { logger } from '../../Utils.js';

export default class VSOMService {
    constructor(options = {}) {
        this.instances = new Map(); // Store multiple VSOM instances
        this.defaultOptions = {
            mapSize: [20, 20],
            topology: 'rectangular',
            embeddingDimension: 1536,
            maxIterations: 1000,
            initialLearningRate: 0.1,
            finalLearningRate: 0.01,
            clusterThreshold: 0.8,
            minClusterSize: 3,
            maxInstancesPerSession: 5,
            ...options
        };
        
        this.logger = logger;
        this.nextInstanceId = 1;
        this.trainingProcesses = new Map(); // Track ongoing training
        
        this.logger.info('VSOMService initialized with default options:', this.defaultOptions);
    }
    
    /**
     * Create a new VSOM instance
     * @param {Object} config - Configuration for the VSOM instance
     * @returns {Promise<Object>} Instance creation result
     */
    async createInstance(config = {}) {
        // Check instance limit
        if (this.instances.size >= this.defaultOptions.maxInstancesPerSession) {
            throw new Error(`Maximum number of instances (${this.defaultOptions.maxInstancesPerSession}) reached`);
        }
        
        const instanceId = `vsom_${this.nextInstanceId++}`;
        const instanceConfig = { ...this.defaultOptions, ...config };
        
        try {
            const vsom = new VSOM(instanceConfig);
            
            const instanceData = {
                instanceId,
                vsom,
                config: instanceConfig,
                status: 'created',
                created: new Date(),
                dataLoaded: false,
                entitiesCount: 0,
                trainingProgress: null
            };
            
            this.instances.set(instanceId, instanceData);
            
            this.logger.info(`Created VSOM instance: ${instanceId}`, { config: instanceConfig });
            
            return {
                instanceId,
                config: instanceConfig,
                status: 'created',
                created: instanceData.created
            };
        } catch (error) {
            this.logger.error(`Failed to create VSOM instance: ${error.message}`);
            throw error;
        }
    }
    
    /**
     * Load data into a VSOM instance
     * @param {string} instanceId - Instance identifier
     * @param {Array} entities - Array of entity objects with embeddings
     * @returns {Promise<Object>} Loading result
     */
    async loadData(instanceId, entities) {
        const instance = this.getInstance(instanceId);
        
        if (!Array.isArray(entities) || entities.length === 0) {
            throw new Error('Entities must be a non-empty array');
        }
        
        // Validate entity format
        for (const entity of entities) {
            if (!entity.id || !entity.embedding || !Array.isArray(entity.embedding)) {
                throw new Error('Each entity must have id and embedding array');
            }
        }
        
        try {
            // Load entities into VSOM
            await instance.vsom.loadEntities(entities);
            
            // Update instance metadata
            instance.dataLoaded = true;
            instance.entitiesCount = entities.length;
            instance.status = 'data_loaded';
            
            this.logger.info(`Loaded ${entities.length} entities into VSOM instance: ${instanceId}`);
            
            return {
                success: true,
                entitiesLoaded: entities.length,
                status: 'data_loaded'
            };
        } catch (error) {
            this.logger.error(`Failed to load data into VSOM instance ${instanceId}: ${error.message}`);
            throw error;
        }
    }
    
    /**
     * Train a VSOM instance
     * @param {string} instanceId - Instance identifier
     * @param {Object} options - Training options
     * @returns {Promise<Object>} Training result
     */
    async train(instanceId, options = {}) {
        const instance = this.getInstance(instanceId);
        
        if (!instance.dataLoaded) {
            throw new Error('Cannot train VSOM: no data loaded');
        }
        
        if (this.trainingProcesses.has(instanceId)) {
            throw new Error('Training already in progress for this instance');
        }
        
        const {
            epochs = this.defaultOptions.defaultTrainingEpochs || 100,
            batchSize = 10,
            progressCallback = null
        } = options;
        
        try {
            instance.status = 'training';
            instance.trainingProgress = {
                startTime: new Date(),
                epochs: 0,
                totalEpochs: epochs,
                currentError: null
            };
            
            // Mark training as active
            this.trainingProcesses.set(instanceId, {
                startTime: new Date(),
                epochs,
                batchSize,
                stopped: false
            });
            
            this.logger.info(`Starting training for VSOM instance: ${instanceId}`, { epochs, batchSize });
            
            // Create progress callback that updates instance state
            const internalProgressCallback = (progress) => {
                if (instance.trainingProgress) {
                    instance.trainingProgress.epochs = progress.epoch;
                    instance.trainingProgress.currentError = progress.error;
                }
                
                // Call external callback if provided
                if (progressCallback && typeof progressCallback === 'function') {
                    progressCallback(progress);
                }
            };
            
            // Execute training
            const result = await instance.vsom.train({
                epochs,
                batchSize,
                progressCallback: internalProgressCallback
            });
            
            // Update instance state
            instance.status = 'trained';
            instance.trainingProgress.endTime = new Date();
            instance.trainingProgress.finalError = result.finalError;
            
            // Clean up training process tracking
            this.trainingProcesses.delete(instanceId);
            
            this.logger.info(`Training completed for VSOM instance: ${instanceId}`, {
                epochs: result.epochs,
                finalError: result.finalError,
                duration: Date.now() - instance.trainingProgress.startTime.getTime()
            });
            
            return {
                instanceId,
                epochs: result.epochs,
                finalError: result.finalError,
                duration: instance.trainingProgress.endTime - instance.trainingProgress.startTime,
                status: 'completed'
            };
        } catch (error) {
            // Clean up on error
            instance.status = 'error';
            this.trainingProcesses.delete(instanceId);
            
            this.logger.error(`Training failed for VSOM instance ${instanceId}: ${error.message}`);
            throw error;
        }
    }
    
    /**
     * Stop ongoing training for an instance
     * @param {string} instanceId - Instance identifier
     * @returns {Promise<Object>} Stop result
     */
    async stopTraining(instanceId) {
        const instance = this.getInstance(instanceId);
        const trainingProcess = this.trainingProcesses.get(instanceId);
        
        if (!trainingProcess) {
            return { stopped: false, status: instance.status, message: 'No training in progress' };
        }
        
        try {
            // Mark as stopped
            trainingProcess.stopped = true;
            
            // Update instance state
            instance.status = 'training_stopped';
            if (instance.trainingProgress) {
                instance.trainingProgress.stopped = true;
                instance.trainingProgress.endTime = new Date();
            }
            
            // Clean up
            this.trainingProcesses.delete(instanceId);
            
            this.logger.info(`Training stopped for VSOM instance: ${instanceId}`);
            
            return { stopped: true, status: 'training_stopped' };
        } catch (error) {
            this.logger.error(`Failed to stop training for VSOM instance ${instanceId}: ${error.message}`);
            throw error;
        }
    }
    
    /**
     * Get grid state for visualization
     * @param {string} instanceId - Instance identifier
     * @param {Object} options - Grid state options
     * @returns {Promise<Object>} Grid state
     */
    async getGridState(instanceId, options = {}) {
        const instance = this.getInstance(instanceId);
        const { includeWeights = false } = options;
        
        try {
            const grid = await instance.vsom.getGrid();
            const mappings = await instance.vsom.getEntityMappings();
            
            return {
                grid: includeWeights ? grid : this.stripWeights(grid),
                mappings,
                metadata: {
                    mapSize: instance.config.mapSize,
                    entitiesCount: instance.entitiesCount,
                    trained: instance.status === 'trained'
                }
            };
        } catch (error) {
            this.logger.error(`Failed to get grid state for VSOM instance ${instanceId}: ${error.message}`);
            throw error;
        }
    }
    
    /**
     * Get feature maps (U-Matrix, component planes)
     * @param {string} instanceId - Instance identifier
     * @param {Object} options - Feature map options
     * @returns {Promise<Object>} Feature maps
     */
    async getFeatureMaps(instanceId, options = {}) {
        const instance = this.getInstance(instanceId);
        const { mapType = 'umatrix' } = options;
        
        try {
            let featureMap;
            let statistics = {};
            
            switch (mapType) {
                case 'umatrix':
                    featureMap = await instance.vsom.calculateUMatrix();
                    statistics = this.calculateUMatrixStats(featureMap);
                    break;
                case 'component':
                    featureMap = await instance.vsom.getComponentPlanes();
                    statistics = this.calculateComponentStats(featureMap);
                    break;
                default:
                    throw new Error(`Unsupported map type: ${mapType}`);
            }
            
            return {
                featureMap,
                statistics,
                mapType,
                metadata: {
                    mapSize: instance.config.mapSize,
                    generated: new Date()
                }
            };
        } catch (error) {
            this.logger.error(`Failed to get feature maps for VSOM instance ${instanceId}: ${error.message}`);
            throw error;
        }
    }
    
    /**
     * Perform clustering on the trained SOM
     * @param {string} instanceId - Instance identifier
     * @param {Object} options - Clustering options
     * @returns {Promise<Object>} Clustering result
     */
    async cluster(instanceId, options = {}) {
        const instance = this.getInstance(instanceId);
        
        if (instance.status !== 'trained') {
            throw new Error('Cannot cluster: VSOM instance is not trained');
        }
        
        const {
            algorithm = 'umatrix',
            threshold = instance.config.clusterThreshold,
            minClusterSize = instance.config.minClusterSize
        } = options;
        
        try {
            const clusters = await instance.vsom.cluster({
                algorithm,
                threshold,
                minClusterSize
            });
            
            const statistics = {
                totalClusters: clusters.length,
                totalEntities: instance.entitiesCount,
                averageClusterSize: clusters.reduce((sum, c) => sum + c.entities.length, 0) / clusters.length,
                largestCluster: Math.max(...clusters.map(c => c.entities.length)),
                smallestCluster: Math.min(...clusters.map(c => c.entities.length))
            };
            
            this.logger.info(`Clustering completed for VSOM instance: ${instanceId}`, statistics);
            
            return {
                clusters,
                statistics,
                algorithm,
                parameters: { threshold, minClusterSize }
            };
        } catch (error) {
            this.logger.error(`Failed to cluster VSOM instance ${instanceId}: ${error.message}`);
            throw error;
        }
    }
    
    /**
     * Get training status for an instance
     * @param {string} instanceId - Instance identifier
     * @returns {Promise<Object>} Training status
     */
    async getTrainingStatus(instanceId) {
        const instance = this.getInstance(instanceId);
        const trainingProcess = this.trainingProcesses.get(instanceId);
        
        return {
            status: instance.status,
            trainingProgress: instance.trainingProgress,
            isTraining: !!trainingProcess,
            dataLoaded: instance.dataLoaded,
            entitiesCount: instance.entitiesCount
        };
    }
    
    /**
     * Get training status for all instances
     * @returns {Promise<Array>} All training statuses
     */
    async getAllTrainingStatus() {
        const statuses = [];
        
        for (const [instanceId, instance] of this.instances) {
            const status = await this.getTrainingStatus(instanceId);
            statuses.push({ instanceId, ...status });
        }
        
        return statuses;
    }
    
    /**
     * List all VSOM instances
     * @returns {Promise<Array>} List of instances
     */
    async listInstances() {
        const instances = [];
        
        for (const [instanceId, instance] of this.instances) {
            instances.push({
                instanceId,
                config: instance.config,
                status: instance.status,
                created: instance.created,
                dataLoaded: instance.dataLoaded,
                entitiesCount: instance.entitiesCount
            });
        }
        
        return instances;
    }
    
    /**
     * Delete a VSOM instance
     * @param {string} instanceId - Instance identifier
     * @returns {Promise<Object>} Deletion result
     */
    async deleteInstance(instanceId) {
        const instance = this.getInstance(instanceId);
        
        // Stop training if in progress
        if (this.trainingProcesses.has(instanceId)) {
            await this.stopTraining(instanceId);
        }
        
        // Clean up resources
        if (instance.vsom && typeof instance.vsom.cleanup === 'function') {
            await instance.vsom.cleanup();
        }
        
        // Remove from instances
        this.instances.delete(instanceId);
        
        this.logger.info(`Deleted VSOM instance: ${instanceId}`);
        
        return { deleted: true };
    }
    
    /**
     * Stop all training processes
     */
    async stopAllTraining() {
        const stopPromises = [];
        
        for (const instanceId of this.trainingProcesses.keys()) {
            stopPromises.push(this.stopTraining(instanceId));
        }
        
        await Promise.allSettled(stopPromises);
    }
    
    /**
     * Get instance by ID with validation
     * @param {string} instanceId - Instance identifier
     * @returns {Object} Instance data
     * @private
     */
    getInstance(instanceId) {
        const instance = this.instances.get(instanceId);
        if (!instance) {
            throw new Error(`VSOM instance not found: ${instanceId}`);
        }
        return instance;
    }
    
    /**
     * Strip weights from grid for lightweight transfer
     * @param {Array} grid - Grid with weights
     * @returns {Array} Grid without weights
     * @private
     */
    stripWeights(grid) {
        return grid.map(row => 
            row.map(node => ({
                x: node.x,
                y: node.y,
                entityCount: node.entities ? node.entities.length : 0
            }))
        );
    }
    
    /**
     * Calculate U-Matrix statistics
     * @param {Array} uMatrix - U-Matrix data
     * @returns {Object} Statistics
     * @private
     */
    calculateUMatrixStats(uMatrix) {
        const flatValues = uMatrix.flat();
        const validValues = flatValues.filter(v => v !== null && v !== undefined);
        
        return {
            min: Math.min(...validValues),
            max: Math.max(...validValues),
            mean: validValues.reduce((sum, v) => sum + v, 0) / validValues.length,
            nonNullCount: validValues.length,
            totalCells: flatValues.length
        };
    }
    
    /**
     * Calculate component plane statistics
     * @param {Object} componentPlanes - Component planes data
     * @returns {Object} Statistics
     * @private
     */
    calculateComponentStats(componentPlanes) {
        const stats = {};
        
        for (const [component, plane] of Object.entries(componentPlanes)) {
            const flatValues = plane.flat();
            const validValues = flatValues.filter(v => v !== null && v !== undefined);
            
            stats[component] = {
                min: Math.min(...validValues),
                max: Math.max(...validValues),
                mean: validValues.reduce((sum, v) => sum + v, 0) / validValues.length
            };
        }
        
        return stats;
    }
}