Source: ragno/search/index.js

/**
 * search/index.js - Ragno Search System Integration Module
 * 
 * This module provides a unified interface to the complete Ragno search system,
 * integrating vector indexing, dual search capabilities, and REST API endpoints.
 * It serves as the main entry point for search functionality within the Ragno
 * knowledge graph system.
 * 
 * Components:
 * - VectorIndex: HNSW-based vector similarity search
 * - DualSearch: Combined exact match + vector similarity + PPR traversal
 * - SearchAPI: REST endpoints for HTTP access
 * 
 * Usage:
 * ```javascript
 * import RagnoSearch from './search/index.js'
 * 
 * const search = new RagnoSearch(config)
 * await search.initialize()
 * const results = await search.search("query text")
 * ```
 */

import VectorIndex from './VectorIndex.js'
import DualSearch from './DualSearch.js'
import SearchAPI from './SearchAPI.js'
import { logger } from '../../Utils.js'

export default class RagnoSearch {
    constructor(options = {}) {
        this.options = {
            // Vector index configuration
            vectorIndex: {
                dimension: options.dimension || 1536,
                maxElements: options.maxElements || 100000,
                efConstruction: options.efConstruction || 200,
                mMax: options.mMax || 16,
                efSearch: options.efSearch || 100,
                ...options.vectorIndex
            },
            
            // Dual search configuration
            dualSearch: {
                exactMatchTypes: ['ragno:Entity', 'ragno:Attribute'],
                vectorSimilarityTypes: [
                    'ragno:Unit', 
                    'ragno:Attribute', 
                    'ragno:CommunityElement',
                    'ragno:TextElement'
                ],
                vectorSimilarityK: 10,
                similarityThreshold: 0.7,
                pprAlpha: 0.15,
                pprIterations: 2,
                topKPerType: 5,
                ...options.dualSearch
            },
            
            // API configuration
            api: {
                enableCORS: true,
                maxQueryLength: 1000,
                defaultResultLimit: 20,
                maxResultLimit: 100,
                enableCache: true,
                cacheTimeout: 300000, // 5 minutes
                ...options.api
            },
            
            // System configuration
            autoIndex: options.autoIndex !== false,
            indexPersistence: options.indexPersistence !== false,
            indexPath: options.indexPath || './data/ragno-vector.index',
            metadataPath: options.metadataPath || './data/ragno-metadata.json',
            
            ...options
        }
        
        // Initialize components
        this.vectorIndex = null
        this.dualSearch = null
        this.searchAPI = null
        
        // External dependencies (set via setters)
        this.sparqlEndpoint = options.sparqlEndpoint || null
        this.llmHandler = options.llmHandler || null
        this.embeddingHandler = options.embeddingHandler || null
        
        // System state
        this.initialized = false
        this.indexLoaded = false
        
        // Statistics
        this.stats = {
            initializationTime: null,
            totalSearches: 0,
            systemUptime: new Date(),
            lastUpdate: null
        }
        
        logger.info('RagnoSearch system created')
    }
    
    /**
     * Initialize the complete search system
     * @param {Object} [options] - Initialization options
     */
    async initialize(options = {}) {
        const startTime = Date.now()
        logger.info('Initializing Ragno search system...')
        
        try {
            // Phase 1: Initialize vector index
            logger.info('Phase 1: Initializing vector index...')
            this.vectorIndex = new VectorIndex(this.options.vectorIndex)
            
            // Phase 2: Initialize dual search system
            logger.info('Phase 2: Initializing dual search...')
            this.dualSearch = new DualSearch({
                ...this.options.dualSearch,
                vectorIndex: this.vectorIndex,
                sparqlEndpoint: this.sparqlEndpoint,
                llmHandler: this.llmHandler,
                embeddingHandler: this.embeddingHandler
            })
            
            // Phase 3: Initialize search API
            logger.info('Phase 3: Initializing search API...')
            this.searchAPI = new SearchAPI({
                ...this.options.api,
                dualSearch: this.options.dualSearch,
                vectorIndex: this.vectorIndex
            })
            
            // Configure API dependencies
            this.searchAPI.setVectorIndex(this.vectorIndex)
            if (this.sparqlEndpoint) {
                this.searchAPI.setSPARQLEndpoint(this.sparqlEndpoint)
            }
            if (this.llmHandler) {
                this.searchAPI.setLLMHandler(this.llmHandler)
            }
            if (this.embeddingHandler) {
                this.searchAPI.setEmbeddingHandler(this.embeddingHandler)
            }
            
            // Phase 4: Load existing index if available
            if (this.options.indexPersistence && options.loadIndex !== false) {
                await this.loadVectorIndex()
            }
            
            // Mark as initialized
            this.initialized = true
            this.stats.initializationTime = Date.now() - startTime
            this.stats.lastUpdate = new Date()
            
            logger.info(`Ragno search system initialized in ${this.stats.initializationTime}ms`)
            
        } catch (error) {
            logger.error('Failed to initialize Ragno search system:', error)
            throw error
        }
    }
    
    /**
     * Main search interface
     * @param {string} query - Search query
     * @param {Object} [options] - Search options
     * @returns {Object} Search results
     */
    async search(query, options = {}) {
        this.ensureInitialized()
        
        try {
            const results = await this.dualSearch.search(query, options)
            this.stats.totalSearches++
            return results
        } catch (error) {
            logger.error(`Search failed for query "${query}":`, error)
            throw error
        }
    }
    
    /**
     * Exact match search only
     * @param {string} query - Search query
     * @param {Object} [options] - Search options
     * @returns {Array} Exact match results
     */
    async searchExact(query, options = {}) {
        this.ensureInitialized()
        
        const queryData = await this.dualSearch.processQuery(query, options)
        return await this.dualSearch.performExactMatch(queryData, options)
    }
    
    /**
     * Vector similarity search only
     * @param {string} query - Search query
     * @param {Object} [options] - Search options
     * @returns {Array} Vector similarity results
     */
    async searchSimilarity(query, options = {}) {
        this.ensureInitialized()
        
        const queryData = await this.dualSearch.processQuery(query, options)
        return await this.dualSearch.performVectorSimilarity(queryData, options)
    }
    
    /**
     * PPR traversal search
     * @param {Array} entityUris - Starting entity URIs
     * @param {Object} [options] - Traversal options
     * @returns {Object} PPR traversal results
     */
    async searchTraversal(entityUris, options = {}) {
        this.ensureInitialized()
        
        return await this.dualSearch.performPPRTraversal(entityUris, options)
    }
    
    /**
     * Add nodes to vector index
     * @param {Array} nodes - Array of {uri, embedding, metadata} objects
     * @returns {Array} Array of node IDs added
     */
    addNodesToIndex(nodes) {
        this.ensureInitialized()
        
        const nodeIds = this.vectorIndex.addNodesBatch(nodes)
        this.stats.lastUpdate = new Date()
        
        logger.info(`Added ${nodeIds.length} nodes to vector index`)
        return nodeIds
    }
    
    /**
     * Add single node to vector index
     * @param {string} uri - Node URI
     * @param {Array} embedding - Vector embedding
     * @param {Object} [metadata] - Node metadata
     * @returns {number} Node ID
     */
    addNodeToIndex(uri, embedding, metadata = {}) {
        this.ensureInitialized()
        
        const nodeId = this.vectorIndex.addNode(uri, embedding, metadata)
        this.stats.lastUpdate = new Date()
        
        return nodeId
    }
    
    /**
     * Remove node from vector index
     * @param {string} uri - Node URI
     * @returns {boolean} True if removed
     */
    removeNodeFromIndex(uri) {
        this.ensureInitialized()
        
        const removed = this.vectorIndex.removeNode(uri)
        if (removed) {
            this.stats.lastUpdate = new Date()
        }
        
        return removed
    }
    
    /**
     * Check if node exists in index
     * @param {string} uri - Node URI
     * @returns {boolean} True if node exists
     */
    hasNode(uri) {
        this.ensureInitialized()
        return this.vectorIndex.hasNode(uri)
    }
    
    /**
     * Get node metadata
     * @param {string} uri - Node URI
     * @returns {Object|null} Node metadata
     */
    getNodeMetadata(uri) {
        this.ensureInitialized()
        return this.vectorIndex.getNodeMetadata(uri)
    }
    
    /**
     * Find similar nodes
     * @param {string} uri - Reference node URI
     * @param {number} [k=10] - Number of similar nodes
     * @param {Object} [options] - Search options
     * @returns {Array} Similar nodes
     */
    findSimilarNodes(uri, k = 10, options = {}) {
        this.ensureInitialized()
        return this.vectorIndex.findSimilarNodes(uri, k, options)
    }
    
    /**
     * Get nodes by type
     * @param {string} type - Ragno type
     * @param {number} [limit] - Maximum number of nodes
     * @returns {Array} Nodes of specified type
     */
    getNodesByType(type, limit) {
        this.ensureInitialized()
        return this.vectorIndex.getNodesByType(type, limit)
    }
    
    /**
     * Save vector index to disk
     */
    async saveVectorIndex() {
        this.ensureInitialized()
        
        if (!this.options.indexPersistence) {
            logger.warn('Index persistence is disabled')
            return
        }
        
        try {
            await this.vectorIndex.saveIndex(this.options.indexPath, this.options.metadataPath)
            logger.info('Vector index saved successfully')
        } catch (error) {
            logger.error('Failed to save vector index:', error)
            throw error
        }
    }
    
    /**
     * Load vector index from disk
     */
    async loadVectorIndex() {
        this.ensureInitialized()
        
        if (!this.options.indexPersistence) {
            logger.debug('Index persistence disabled, skipping load')
            return
        }
        
        try {
            // Check if files exist
            const fs = await import('fs/promises')
            await fs.access(this.options.indexPath)
            await fs.access(this.options.metadataPath)
            
            await this.vectorIndex.loadIndex(this.options.indexPath, this.options.metadataPath)
            this.indexLoaded = true
            logger.info('Vector index loaded successfully')
            
        } catch (error) {
            if (error.code === 'ENOENT') {
                logger.info('No existing vector index found, starting fresh')
            } else {
                logger.error('Failed to load vector index:', error)
                throw error
            }
        }
    }
    
    /**
     * Clear vector index
     */
    clearVectorIndex() {
        this.ensureInitialized()
        
        this.vectorIndex.clear()
        this.stats.lastUpdate = new Date()
        
        logger.info('Vector index cleared')
    }
    
    /**
     * Optimize vector index
     * @param {Object} [options] - Optimization options
     */
    optimizeVectorIndex(options = {}) {
        this.ensureInitialized()
        
        this.vectorIndex.optimizeIndex(options)
        this.stats.lastUpdate = new Date()
    }
    
    /**
     * Get Express.js route handlers for HTTP API
     * @returns {Object} Route handlers
     */
    getAPIRouteHandlers() {
        this.ensureInitialized()
        return this.searchAPI.getRouteHandlers()
    }
    
    /**
     * Get comprehensive system statistics
     * @returns {Object} System statistics
     */
    getStatistics() {
        const baseStats = {
            system: this.stats,
            initialized: this.initialized,
            indexLoaded: this.indexLoaded
        }
        
        if (this.initialized) {
            return {
                ...baseStats,
                vectorIndex: this.vectorIndex.getStatistics(),
                dualSearch: this.dualSearch.getStatistics(),
                searchAPI: this.searchAPI.getStatistics()
            }
        }
        
        return baseStats
    }
    
    /**
     * Get system status
     * @returns {Object} System status
     */
    getStatus() {
        return {
            status: this.initialized ? 'operational' : 'initializing',
            components: {
                vectorIndex: !!this.vectorIndex,
                dualSearch: !!this.dualSearch,
                searchAPI: !!this.searchAPI
            },
            dependencies: {
                sparqlEndpoint: !!this.sparqlEndpoint,
                llmHandler: !!this.llmHandler,
                embeddingHandler: !!this.embeddingHandler
            },
            capabilities: {
                exactMatch: !!this.sparqlEndpoint,
                vectorSimilarity: !!this.vectorIndex,
                pprTraversal: !!this.sparqlEndpoint,
                entityExtraction: !!this.llmHandler,
                embeddingGeneration: !!this.embeddingHandler
            },
            uptime: Date.now() - this.stats.systemUptime.getTime(),
            initialized: this.initialized,
            indexLoaded: this.indexLoaded
        }
    }
    
    /**
     * Set SPARQL endpoint
     * @param {string} sparqlEndpoint - SPARQL endpoint URL
     */
    setSPARQLEndpoint(sparqlEndpoint) {
        this.sparqlEndpoint = sparqlEndpoint
        
        if (this.dualSearch) {
            this.dualSearch.setSPARQLEndpoint(sparqlEndpoint)
        }
        if (this.searchAPI) {
            this.searchAPI.setSPARQLEndpoint(sparqlEndpoint)
        }
        
        logger.info(`SPARQL endpoint configured: ${sparqlEndpoint}`)
    }
    
    /**
     * Set LLM handler
     * @param {Object} llmHandler - LLM handler instance
     */
    setLLMHandler(llmHandler) {
        this.llmHandler = llmHandler
        
        if (this.dualSearch) {
            this.dualSearch.setLLMHandler(llmHandler)
        }
        if (this.searchAPI) {
            this.searchAPI.setLLMHandler(llmHandler)
        }
        
        logger.info('LLM handler configured')
    }
    
    /**
     * Set embedding handler
     * @param {Object} embeddingHandler - Embedding handler instance
     */
    setEmbeddingHandler(embeddingHandler) {
        this.embeddingHandler = embeddingHandler
        
        if (this.dualSearch) {
            this.dualSearch.setEmbeddingHandler(embeddingHandler)
        }
        if (this.searchAPI) {
            this.searchAPI.setEmbeddingHandler(embeddingHandler)
        }
        
        logger.info('Embedding handler configured')
    }
    
    /**
     * Ensure system is initialized
     * @throws {Error} If system is not initialized
     */
    ensureInitialized() {
        if (!this.initialized) {
            throw new Error('RagnoSearch system not initialized. Call initialize() first.')
        }
    }
    
    /**
     * Shutdown search system and cleanup resources
     */
    async shutdown() {
        logger.info('Shutting down Ragno search system...')
        
        try {
            // Save index if persistence enabled
            if (this.options.indexPersistence && this.vectorIndex) {
                await this.saveVectorIndex()
            }
            
            // Clear caches
            if (this.searchAPI) {
                this.searchAPI.clearCache()
            }
            
            // Reset state
            this.initialized = false
            this.indexLoaded = false
            
            logger.info('Ragno search system shutdown complete')
            
        } catch (error) {
            logger.error('Error during shutdown:', error)
            throw error
        }
    }
}

// Export individual components for direct use
export {
    VectorIndex,
    DualSearch,
    SearchAPI
}