Source: ragno/algorithms/vsom/VSOMCore.js

/**
 * VSOMCore.js - Core Vectorized Self-Organizing Map Engine
 * 
 * This module implements the core VSOM algorithm with vectorized operations
 * for efficient batch processing and training. It provides the fundamental
 * mathematical operations needed for SOM training and inference.
 * 
 * Key Features:
 * - Vectorized distance calculations
 * - Batch Best Matching Unit (BMU) finding
 * - Efficient neighborhood weight computation
 * - Memory-efficient matrix operations
 * - Support for different distance metrics
 */

import { logger } from '../../../Utils.js'

export default class VSOMCore {
    constructor(options = {}) {
        this.options = {
            distanceMetric: options.distanceMetric || 'cosine',
            batchSize: options.batchSize || 100,
            numericPrecision: options.numericPrecision || 1e-10,
            ...options
        }
        
        // Core matrices - will be initialized when map is created
        this.weights = null          // [mapNodes x inputDimension] - SOM weight vectors
        this.mapSize = null          // [width, height] - Map dimensions
        this.inputDimension = null   // Dimension of input vectors
        this.totalNodes = 0          // Total number of map nodes
        
        // Distance computation cache
        this.distanceCache = new Map()
        this.cacheHits = 0
        this.cacheMisses = 0
        
        // Performance statistics
        this.stats = {
            totalComputations: 0,
            averageComputationTime: 0,
            lastBMUTime: 0,
            lastUpdateTime: 0,
            memoryUsage: 0
        }
        
        logger.debug('VSOMCore initialized with options:', this.options)
    }
    
    /**
     * Initialize the SOM weight matrix with random values
     * @param {Array} mapSize - [width, height] dimensions
     * @param {number} inputDimension - Dimension of input vectors
     * @param {string} initMethod - Initialization method ('random', 'linear', 'pca')
     */
    initializeWeights(mapSize, inputDimension, initMethod = 'random') {
        this.mapSize = mapSize
        this.inputDimension = inputDimension
        this.totalNodes = mapSize[0] * mapSize[1]
        
        // Initialize weight matrix [totalNodes x inputDimension]
        this.weights = new Array(this.totalNodes)
        
        switch (initMethod) {
            case 'random':
                this.initializeRandomWeights()
                break
            case 'linear':
                this.initializeLinearWeights()
                break
            case 'pca':
                this.initializePCAWeights()
                break
            default:
                this.initializeRandomWeights()
        }
        
        logger.debug(`Initialized VSOM weights: ${this.totalNodes} nodes, ${inputDimension}D input`)
    }
    
    /**
     * Initialize weights with random values (Gaussian distribution)
     */
    initializeRandomWeights() {
        for (let i = 0; i < this.totalNodes; i++) {
            this.weights[i] = new Array(this.inputDimension)
            for (let j = 0; j < this.inputDimension; j++) {
                // Initialize with small random values (Gaussian distribution)
                this.weights[i][j] = this.gaussianRandom() * 0.1
            }
        }
    }
    
    /**
     * Initialize weights with linear interpolation across map
     */
    initializeLinearWeights() {
        for (let i = 0; i < this.totalNodes; i++) {
            const [x, y] = this.indexToCoordinates(i)
            this.weights[i] = new Array(this.inputDimension)
            
            for (let j = 0; j < this.inputDimension; j++) {
                // Linear interpolation based on position
                const xFactor = x / (this.mapSize[0] - 1)
                const yFactor = y / (this.mapSize[1] - 1)
                this.weights[i][j] = (xFactor + yFactor) / 2 * 0.1 - 0.05
            }
        }
    }
    
    /**
     * Initialize weights using PCA (placeholder - would need input data)
     */
    initializePCAWeights() {
        // For now, fall back to random initialization
        // In a full implementation, this would use the first two principal components
        this.initializeRandomWeights()
        logger.warn('PCA initialization not implemented, using random initialization')
    }
    
    /**
     * Find Best Matching Units (BMUs) for a batch of input vectors
     * @param {Array} inputBatch - Array of input vectors
     * @returns {Array} Array of BMU indices for each input
     */
    findBestMatchingUnits(inputBatch) {
        const startTime = Date.now()
        const bmus = new Array(inputBatch.length)
        
        for (let i = 0; i < inputBatch.length; i++) {
            bmus[i] = this.findSingleBMU(inputBatch[i])
        }
        
        this.stats.lastBMUTime = Date.now() - startTime
        this.stats.totalComputations += inputBatch.length
        
        return bmus
    }
    
    /**
     * Find Best Matching Unit for a single input vector
     * @param {Array} inputVector - Input vector
     * @returns {number} Index of the best matching unit
     */
    findSingleBMU(inputVector) {
        let bestDistance = Infinity
        let bestIndex = 0
        
        for (let i = 0; i < this.totalNodes; i++) {
            const distance = this.calculateDistance(inputVector, this.weights[i])
            if (distance < bestDistance) {
                bestDistance = distance
                bestIndex = i
            }
        }
        
        return bestIndex
    }
    
    /**
     * Calculate distance between two vectors
     * @param {Array} vector1 - First vector
     * @param {Array} vector2 - Second vector
     * @returns {number} Distance value
     */
    calculateDistance(vector1, vector2) {
        switch (this.options.distanceMetric) {
            case 'cosine':
                return this.cosineDistance(vector1, vector2)
            case 'euclidean':
                return this.euclideanDistance(vector1, vector2)
            case 'manhattan':
                return this.manhattanDistance(vector1, vector2)
            default:
                return this.euclideanDistance(vector1, vector2)
        }
    }
    
    /**
     * Calculate cosine distance (1 - cosine similarity)
     * @param {Array} vector1 - First vector
     * @param {Array} vector2 - Second vector
     * @returns {number} Cosine distance
     */
    cosineDistance(vector1, vector2) {
        let dotProduct = 0
        let norm1 = 0
        let norm2 = 0
        
        for (let i = 0; i < vector1.length; i++) {
            dotProduct += vector1[i] * vector2[i]
            norm1 += vector1[i] * vector1[i]
            norm2 += vector2[i] * vector2[i]
        }
        
        norm1 = Math.sqrt(norm1)
        norm2 = Math.sqrt(norm2)
        
        if (norm1 < this.options.numericPrecision || norm2 < this.options.numericPrecision) {
            return 1.0 // Maximum distance for zero vectors
        }
        
        const cosineSimilarity = dotProduct / (norm1 * norm2)
        return 1.0 - Math.max(-1.0, Math.min(1.0, cosineSimilarity)) // Clamp to [-1, 1]
    }
    
    /**
     * Calculate Euclidean distance
     * @param {Array} vector1 - First vector
     * @param {Array} vector2 - Second vector
     * @returns {number} Euclidean distance
     */
    euclideanDistance(vector1, vector2) {
        let sum = 0
        for (let i = 0; i < vector1.length; i++) {
            const diff = vector1[i] - vector2[i]
            sum += diff * diff
        }
        return Math.sqrt(sum)
    }
    
    /**
     * Calculate Manhattan distance
     * @param {Array} vector1 - First vector
     * @param {Array} vector2 - Second vector
     * @returns {number} Manhattan distance
     */
    manhattanDistance(vector1, vector2) {
        let sum = 0
        for (let i = 0; i < vector1.length; i++) {
            sum += Math.abs(vector1[i] - vector2[i])
        }
        return sum
    }
    
    /**
     * Update weights for a batch of training samples
     * @param {Array} inputBatch - Batch of input vectors
     * @param {Array} bmuIndices - BMU indices for each input
     * @param {number} learningRate - Current learning rate
     * @param {number} neighborhoodRadius - Current neighborhood radius
     * @param {Function} neighborhoodFunction - Neighborhood weight function
     */
    updateWeights(inputBatch, bmuIndices, learningRate, neighborhoodRadius, neighborhoodFunction) {
        const startTime = Date.now()
        
        // Create weight updates accumulator
        const weightUpdates = new Array(this.totalNodes)
        const updateCounts = new Array(this.totalNodes)
        
        for (let i = 0; i < this.totalNodes; i++) {
            weightUpdates[i] = new Array(this.inputDimension).fill(0)
            updateCounts[i] = 0
        }
        
        // Accumulate updates for each training sample
        for (let sampleIdx = 0; sampleIdx < inputBatch.length; sampleIdx++) {
            const inputVector = inputBatch[sampleIdx]
            const bmuIndex = bmuIndices[sampleIdx]
            const bmuCoords = this.indexToCoordinates(bmuIndex)
            
            // Update all nodes based on neighborhood function
            for (let nodeIdx = 0; nodeIdx < this.totalNodes; nodeIdx++) {
                const nodeCoords = this.indexToCoordinates(nodeIdx)
                const distance = this.calculateMapDistance(bmuCoords, nodeCoords)
                const neighborhoodWeight = neighborhoodFunction(distance, neighborhoodRadius)
                
                if (neighborhoodWeight > this.options.numericPrecision) {
                    const updateFactor = learningRate * neighborhoodWeight
                    
                    for (let dim = 0; dim < this.inputDimension; dim++) {
                        const weightDelta = updateFactor * (inputVector[dim] - this.weights[nodeIdx][dim])
                        weightUpdates[nodeIdx][dim] += weightDelta
                    }
                    updateCounts[nodeIdx] += 1
                }
            }
        }
        
        // Apply accumulated updates
        for (let nodeIdx = 0; nodeIdx < this.totalNodes; nodeIdx++) {
            if (updateCounts[nodeIdx] > 0) {
                for (let dim = 0; dim < this.inputDimension; dim++) {
                    this.weights[nodeIdx][dim] += weightUpdates[nodeIdx][dim] / updateCounts[nodeIdx]
                }
            }
        }
        
        this.stats.lastUpdateTime = Date.now() - startTime
    }
    
    /**
     * Convert linear index to 2D map coordinates
     * @param {number} index - Linear index
     * @returns {Array} [x, y] coordinates
     */
    indexToCoordinates(index) {
        const x = index % this.mapSize[0]
        const y = Math.floor(index / this.mapSize[0])
        return [x, y]
    }
    
    /**
     * Convert 2D map coordinates to linear index
     * @param {number} x - X coordinate
     * @param {number} y - Y coordinate
     * @returns {number} Linear index
     */
    coordinatesToIndex(x, y) {
        return y * this.mapSize[0] + x
    }
    
    /**
     * Calculate distance between two points on the map
     * @param {Array} coords1 - [x, y] coordinates of first point
     * @param {Array} coords2 - [x, y] coordinates of second point
     * @returns {number} Distance on the map
     */
    calculateMapDistance(coords1, coords2) {
        const dx = coords1[0] - coords2[0]
        const dy = coords1[1] - coords2[1]
        return Math.sqrt(dx * dx + dy * dy)
    }
    
    /**
     * Generate Gaussian random number (Box-Muller transform)
     * @returns {number} Random number from standard normal distribution
     */
    gaussianRandom() {
        if (this.spare !== undefined) {
            const value = this.spare
            delete this.spare
            return value
        }
        
        const u = Math.random()
        const v = Math.random()
        const mag = Math.sqrt(-2.0 * Math.log(u))
        this.spare = mag * Math.cos(2.0 * Math.PI * v)
        return mag * Math.sin(2.0 * Math.PI * v)
    }
    
    /**
     * Calculate quantization error for the current map
     * @param {Array} inputData - Array of input vectors
     * @returns {number} Average quantization error
     */
    calculateQuantizationError(inputData) {
        let totalError = 0
        
        for (const inputVector of inputData) {
            const bmuIndex = this.findSingleBMU(inputVector)
            const error = this.calculateDistance(inputVector, this.weights[bmuIndex])
            totalError += error
        }
        
        return totalError / inputData.length
    }
    
    /**
     * Calculate topographic error for the current map
     * @param {Array} inputData - Array of input vectors
     * @returns {number} Topographic error (0-1)
     */
    calculateTopographicError(inputData) {
        let topographicErrors = 0
        
        for (const inputVector of inputData) {
            const bmuIndex = this.findSingleBMU(inputVector)
            const bmuCoords = this.indexToCoordinates(bmuIndex)
            
            // Find second best matching unit
            let secondBestDistance = Infinity
            let secondBestIndex = -1
            
            for (let i = 0; i < this.totalNodes; i++) {
                if (i !== bmuIndex) {
                    const distance = this.calculateDistance(inputVector, this.weights[i])
                    if (distance < secondBestDistance) {
                        secondBestDistance = distance
                        secondBestIndex = i
                    }
                }
            }
            
            if (secondBestIndex !== -1) {
                const secondBmuCoords = this.indexToCoordinates(secondBestIndex)
                const mapDistance = this.calculateMapDistance(bmuCoords, secondBmuCoords)
                
                // If BMU and second BMU are not adjacent, it's a topographic error
                if (mapDistance > Math.sqrt(2) + this.options.numericPrecision) {
                    topographicErrors++
                }
            }
        }
        
        return topographicErrors / inputData.length
    }
    
    /**
     * Get weight vector for a specific map node
     * @param {number} nodeIndex - Index of the map node
     * @returns {Array} Weight vector
     */
    getNodeWeights(nodeIndex) {
        if (nodeIndex < 0 || nodeIndex >= this.totalNodes) {
            throw new Error(`Invalid node index: ${nodeIndex}`)
        }
        return [...this.weights[nodeIndex]] // Return copy
    }
    
    /**
     * Set weight vector for a specific map node
     * @param {number} nodeIndex - Index of the map node
     * @param {Array} weights - New weight vector
     */
    setNodeWeights(nodeIndex, weights) {
        if (nodeIndex < 0 || nodeIndex >= this.totalNodes) {
            throw new Error(`Invalid node index: ${nodeIndex}`)
        }
        if (weights.length !== this.inputDimension) {
            throw new Error(`Weight vector dimension mismatch: ${weights.length} vs ${this.inputDimension}`)
        }
        this.weights[nodeIndex] = [...weights] // Store copy
    }
    
    /**
     * Get current algorithm statistics
     * @returns {Object} Performance and usage statistics
     */
    getStatistics() {
        return {
            ...this.stats,
            cacheHits: this.cacheHits,
            cacheMisses: this.cacheMisses,
            cacheHitRatio: this.cacheHits / (this.cacheHits + this.cacheMisses) || 0,
            memoryUsage: this.estimateMemoryUsage()
        }
    }
    
    /**
     * Estimate memory usage of the algorithm
     * @returns {number} Estimated memory usage in bytes
     */
    estimateMemoryUsage() {
        if (!this.weights) return 0
        
        // Weight matrix: totalNodes * inputDimension * 8 bytes (float64)
        const weightsMemory = this.totalNodes * this.inputDimension * 8
        
        // Additional overhead for arrays and objects
        const overhead = this.totalNodes * 100 // Rough estimate
        
        return weightsMemory + overhead
    }
    
    /**
     * Reset algorithm statistics
     */
    resetStatistics() {
        this.stats = {
            totalComputations: 0,
            averageComputationTime: 0,
            lastBMUTime: 0,
            lastUpdateTime: 0,
            memoryUsage: 0
        }
        this.cacheHits = 0
        this.cacheMisses = 0
        this.distanceCache.clear()
        
        logger.debug('VSOM core statistics reset')
    }
}