Source: handlers/LLMHandler.js

import logger from 'loglevel'
import PromptTemplates from '../PromptTemplates.js'
import ParseHelper from '../utils/ParseHelper.js'

/**
 * @typedef {Object} LLMProvider - Language model provider interface
 * @typedef {Object} ChatMessage - Chat message object
 */

export default class LLMHandler {
    /**
     * @param {LLMProvider} llmProvider
     * @param {string} chatModel
     * @param {number} [temperature=0.7]
     * @param {Object} [options={}]
     */
    constructor(llmProvider, chatModel, temperature = 0.7, options = {}) {
        this.llmProvider = llmProvider
        this.chatModel = chatModel
        this.temperature = temperature
        
        // Simple fallback configuration (opt-in for backward compatibility)
        this.options = {
            enableFallbacks: options.enableFallbacks === true, // Default disabled for backward compatibility
            timeoutMs: options.timeoutMs || 60000,
            fallbackResponse: options.fallbackResponse || 'Unable to generate response due to service unavailability.',
            ...options
        }
        
        // Detect provider interface type
        this._hasHyperdataClientsInterface = typeof this.llmProvider.chat === 'function'
        this._hasLegacyInterface = typeof this.llmProvider.generateChat === 'function'
    }

    /**
     * Call chat method using the appropriate provider interface
     */
    async _callChat(model, messages, options = {}) {
        if (this._hasHyperdataClientsInterface) {
            // hyperdata-clients interface: chat(messages, options)
            return await this.llmProvider.chat(messages, { model, ...options })
        } else if (this._hasLegacyInterface) {
            // Legacy interface: generateChat(model, messages, options)
            return await this.llmProvider.generateChat(model, messages, options)
        } else {
            throw new Error('Provider does not support chat operations')
        }
    }

    /**
     * Call completion method using the appropriate provider interface
     */
    async _callCompletion(model, prompt, options = {}) {
        if (this._hasHyperdataClientsInterface) {
            // hyperdata-clients interface: complete(prompt, options)
            return await this.llmProvider.complete(prompt, { model, ...options })
        } else if (this._hasLegacyInterface) {
            // Legacy interface: generateCompletion(model, prompt, options)
            return await this.llmProvider.generateCompletion(model, prompt, options)
        } else {
            throw new Error('Provider does not support completion operations')
        }
    }

    /**
     * @param {string} prompt
     * @param {string} context
     * @param {string} [systemPrompt]
     * @returns {Promise<string>}
     */
    /**
     * @param {string} prompt - The user's input prompt
     * @param {string} context - Additional context for the prompt
     * @param {Object} [options] - Additional options
     * @param {string} [options.systemPrompt] - System prompt to use
     * @param {string} [options.model] - Override the default model
     * @param {number} [options.temperature] - Override the default temperature
     * @returns {Promise<string>}
     */
    async generateResponse(prompt, context, {
        systemPrompt = "You're a helpful assistant with memory of past interactions.",
        model = this.chatModel,
        temperature = this.temperature,
        maxRetries = 3,
        baseDelay = 1000,
        timeoutMs = this.options.timeoutMs
    } = {}) {
        try {
            logger.log(`LLMHandler.generateResponse,
                prompt = ${prompt}
                context = ${context}
                `)
            const messages = PromptTemplates.formatChatPrompt(
                model,
                systemPrompt,
                context,
                prompt
            )
            logger.log(`LLMHandler.generateResponse, model = ${model}`)
            
            return await this.withRateLimit(async () => {
                return await this._callChat(model, messages, { temperature })
            }, maxRetries, baseDelay)
            
        } catch (error) {
            logger.error('Error generating chat response:', error)
            throw error
        }
    }

    /**
     * Execute a function with rate limiting and exponential backoff
     */
    async withRateLimit(fn, maxRetries = 3, baseDelay = 1000) {
        let lastError = null;
        
        for (let attempt = 0; attempt <= maxRetries; attempt++) {
            try {
                // Add a small delay before each request to avoid overwhelming the API
                if (attempt > 0) {
                    const delay = baseDelay * Math.pow(2, attempt - 1) + Math.random() * 500; // Add jitter
                    logger.log(`Retrying after ${delay.toFixed(0)}ms (attempt ${attempt}/${maxRetries})`);
                    await new Promise(resolve => setTimeout(resolve, delay));
                } else {
                    // Even on first attempt, add a small delay to avoid rapid-fire requests
                    await new Promise(resolve => setTimeout(resolve, Math.random() * 200 + 100));
                }
                
                return await fn();
                
            } catch (error) {
                lastError = error;
                
                // Check if it's a rate limit error
                if (error.code === 529 || error.message?.includes('overloaded') || error.message?.includes('rate limit')) {
                    if (attempt < maxRetries) {
                        logger.warn(`Rate limited (attempt ${attempt + 1}/${maxRetries + 1}), retrying...`);
                        continue;
                    }
                }
                
                // For non-rate-limit errors, throw immediately
                if (!error.message?.includes('overloaded') && error.code !== 529) {
                    throw error;
                }
            }
        }
        
        // If we've exhausted all retries, throw the last error
        logger.error(`All ${maxRetries + 1} attempts failed due to rate limiting`);
        throw lastError;
    }

    /**
     * @param {string} text
     * @returns {Promise<string[]>}
     */
    async extractConcepts(text, options = {}) {
        try {
            const prompt = PromptTemplates.formatConceptPrompt(this.chatModel, text)
            
            // Determine if we should use chat or completion based on prompt format
            const isMessagesFormat = Array.isArray(prompt)
            
            const response = await this.withRateLimit(async () => {
                if (isMessagesFormat) {
                    return await this._callChat(
                        this.chatModel,
                        prompt, // pass the full messages array
                        { temperature: 0.2 }
                    )
                } else {
                    return await this._callCompletion(
                        this.chatModel,
                        prompt,
                        { temperature: 0.2 }
                    )
                }
            })
            
            logger.info(`LLM response for concept extraction: ${response}`)
            
            // Use ParseHelper to resolve syntax
            const cleanedResponse = ParseHelper.resolveSyntax(response)
            if (cleanedResponse === false) {
                logger.warn('ParseHelper could not resolve syntax, falling back to regex extraction')
                
                // Fallback to regex-based extraction
                const match = response.match(/\[.*\]/)
                if (!match) {
                    logger.warn('No concept array found in LLM response')
                    logger.warn('Raw response was:', response)
                    throw new Error('No JSON array found in LLM response for concept extraction')
                }
                
                try {
                    const parsed = JSON.parse(match[0])
                    logger.info(`Successfully parsed ${parsed.length} concepts using regex fallback`)
                    return parsed
                } catch (parseError) {
                    logger.error('Failed to parse concepts array:', parseError)
                    logger.error('Raw match was:', match[0])
                    throw new Error(`Failed to parse JSON from LLM response: ${parseError.message}`)
                }
            }
            
            // Parse the cleaned response
            try {
                const parsed = JSON.parse(cleanedResponse)
                if (Array.isArray(parsed)) {
                    logger.info(`Successfully parsed ${parsed.length} concepts using ParseHelper`)
                    return parsed
                } else {
                    throw new Error('Parsed result is not an array')
                }
            } catch (parseError) {
                logger.error('Failed to parse cleaned response:', parseError)
                logger.error('Cleaned response was:', cleanedResponse)
                throw new Error(`Failed to parse JSON from cleaned response: ${parseError.message}`)
            }
        } catch (error) {
            logger.error('Error extracting concepts:', error)
            throw error
        }
    }

    /**
     * @param {string} text
     * @param {string} model
     * @param {number} [retries=3]
     * @returns {Promise<number[]>}
     */
    async generateEmbedding(text, model, retries = 3) {
        let lastError = null

        for (let attempt = 0; attempt < retries; attempt++) {
            try {
                return await this.llmProvider.generateEmbedding(model, text)
            } catch (error) {
                lastError = error
                logger.warn(`Embedding generation attempt ${attempt + 1} failed:`, error)
                await new Promise(resolve => setTimeout(resolve, 1000 * (attempt + 1)))
            }
        }

        throw new Error(`Failed to generate embedding after ${retries} attempts: ${lastError?.message}`)
    }

    /**
     * @param {number} temperature
     * @throws {Error} If temperature is invalid
     */
    setTemperature(temperature) {
        if (temperature < 0 || temperature > 1) {
            throw new Error('Temperature must be between 0 and 1')
        }
        this.temperature = temperature
    }

    /**
     * @param {string} model
     * @returns {boolean}
     */
    validateModel(model) {
        return typeof model === 'string' && model.length > 0
    }

    // Simple timeout wrapper
    async withTimeout(promise, timeoutMs) {
        return Promise.race([
            promise,
            new Promise((_, reject) => 
                setTimeout(() => reject(new Error('LLM operation timed out')), timeoutMs)
            )
        ])
    }

    // Simple fallback response generation
    generateFallbackResponse(prompt, context, error) {
        const isTimeoutError = error.message?.includes('timeout') || error.message?.includes('timed out')
        
        if (isTimeoutError) {
            return `${this.options.fallbackResponse} The request took too long to process. Please try with a simpler query.`
        }
        
        return this.options.fallbackResponse
    }

    // Simple fallback concept extraction using basic text analysis
    extractBasicConcepts(text) {
        if (!text || typeof text !== 'string') return []
        
        // Simple keyword extraction - remove common words and extract meaningful terms
        const commonWords = new Set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'can', 'this', 'that', 'these', 'those'])
        
        const words = text.toLowerCase()
            .replace(/[^\w\s]/g, ' ')
            .split(/\s+/)
            .filter(word => word.length > 2 && !commonWords.has(word))
        
        // Count frequency and return top concepts
        const frequency = {}
        words.forEach(word => {
            frequency[word] = (frequency[word] || 0) + 1
        })
        
        return Object.entries(frequency)
            .sort(([,a], [,b]) => b - a)
            .slice(0, 10)
            .map(([word]) => word)
    }
}