Source: utils/ParseHelper.js

/**
 * ParseHelper - Utility for handling and cleaning LLM response parsing
 * 
 * This helper addresses common issues with LLM responses that may include
 * markdown formatting, code fences, or other wrapper syntax that interferes
 * with JSON parsing.
 */

export default class ParseHelper {
    /**
     * Resolve and clean JSON syntax from LLM responses
     * 
     * This method handles common patterns where LLMs wrap JSON in markdown
     * code fences or add explanatory text around the JSON content.
     * 
     * @param {string} responseText - Raw LLM response text
     * @returns {string|false} - Cleaned JSON string if pattern matches, false otherwise
     */
    static resolveSyntax(responseText) {
        if (!responseText || typeof responseText !== 'string') {
            return false;
        }

        // Trim whitespace
        const trimmed = responseText.trim();
        
        // Pattern 1: JSON wrapped in markdown code fences
        // Matches: ```json\n{...}\n``` or ```\n{...}\n```
        const markdownFencePattern = /^```(?:json)?\s*\n?([\s\S]*?)\n?```$/;
        const markdownMatch = trimmed.match(markdownFencePattern);
        
        if (markdownMatch) {
            const extracted = markdownMatch[1].trim();
            // Validate that the extracted content looks like JSON
            if (this._looksLikeJson(extracted)) {
                return extracted;
            }
        }

        // Pattern 2: JSON with leading/trailing text
        // Look for JSON array or object patterns within the text
        const jsonArrayPattern = /(\[[\s\S]*?\])/;
        const jsonObjectPattern = /(\{[\s\S]*?\})/;
        
        const arrayMatch = trimmed.match(jsonArrayPattern);
        if (arrayMatch && this._looksLikeJson(arrayMatch[1])) {
            return arrayMatch[1].trim();
        }
        
        const objectMatch = trimmed.match(jsonObjectPattern);
        if (objectMatch && this._looksLikeJson(objectMatch[1])) {
            return objectMatch[1].trim();
        }

        // Pattern 3: Multiple JSON blocks - take the first valid one
        const multipleJsonPattern = /```(?:json)?\s*\n?([\s\S]*?)\n?```/g;
        let match;
        while ((match = multipleJsonPattern.exec(trimmed)) !== null) {
            const candidate = match[1].trim();
            if (this._looksLikeJson(candidate)) {
                return candidate;
            }
        }

        // Pattern 4: JSON with explanatory text before/after
        // Common patterns like "Here's the JSON:" or "The result is:"
        const explanationPatterns = [
            /(?:here's|here is|the result is|output|response)[\s\S]*?(\[[\s\S]*?\]|\{[\s\S]*?\})/i,
            /json[\s\S]*?(\[[\s\S]*?\]|\{[\s\S]*?\})/i
        ];
        
        for (const pattern of explanationPatterns) {
            const explanationMatch = trimmed.match(pattern);
            if (explanationMatch && this._looksLikeJson(explanationMatch[1])) {
                return explanationMatch[1].trim();
            }
        }

        // If no patterns match, return false
        return false;
    }

    /**
     * Quick heuristic check if a string looks like valid JSON
     * 
     * @param {string} str - String to check
     * @returns {boolean} - True if it looks like JSON
     * @private
     */
    static _looksLikeJson(str) {
        if (!str || typeof str !== 'string') {
            return false;
        }
        
        const trimmed = str.trim();
        
        // Must start and end with appropriate JSON delimiters
        const isArray = trimmed.startsWith('[') && trimmed.endsWith(']');
        const isObject = trimmed.startsWith('{') && trimmed.endsWith('}');
        
        if (!isArray && !isObject) {
            return false;
        }
        
        // Basic bracket balance check
        let arrayDepth = 0;
        let objectDepth = 0;
        let inString = false;
        let escaped = false;
        
        for (let i = 0; i < trimmed.length; i++) {
            const char = trimmed[i];
            
            if (escaped) {
                escaped = false;
                continue;
            }
            
            if (char === '\\') {
                escaped = true;
                continue;
            }
            
            if (char === '"' && !escaped) {
                inString = !inString;
                continue;
            }
            
            if (inString) {
                continue;
            }
            
            switch (char) {
                case '[':
                    arrayDepth++;
                    break;
                case ']':
                    arrayDepth--;
                    break;
                case '{':
                    objectDepth++;
                    break;
                case '}':
                    objectDepth--;
                    break;
            }
            
            // Early exit if brackets become unbalanced
            if (arrayDepth < 0 || objectDepth < 0) {
                return false;
            }
        }
        
        // Check final balance
        return arrayDepth === 0 && objectDepth === 0;
    }

    /**
     * Attempt to parse JSON with syntax resolution
     * 
     * This is a convenience method that combines resolveSyntax with JSON.parse
     * and provides detailed error information.
     * 
     * @param {string} responseText - Raw LLM response text
     * @returns {Object} - {success: boolean, data?: any, error?: string, cleaned?: string}
     */
    static parseJsonResponse(responseText) {
        try {
            // First try direct parsing
            const directParse = JSON.parse(responseText);
            return { success: true, data: directParse };
        } catch (directError) {
            // Try with syntax resolution
            const cleaned = this.resolveSyntax(responseText);
            
            if (cleaned === false) {
                return {
                    success: false,
                    error: 'No recognizable JSON pattern found in response',
                    originalError: directError.message
                };
            }
            
            try {
                const resolvedParse = JSON.parse(cleaned);
                return { 
                    success: true, 
                    data: resolvedParse, 
                    cleaned: cleaned,
                    resolved: true 
                };
            } catch (resolvedError) {
                return {
                    success: false,
                    error: 'JSON parsing failed even after syntax resolution',
                    cleaned: cleaned,
                    originalError: directError.message,
                    resolvedError: resolvedError.message
                };
            }
        }
    }

    /**
     * Validate that parsed JSON matches expected structure
     * 
     * @param {any} data - Parsed JSON data
     * @param {string} expectedType - 'array' or 'object'
     * @param {Object} options - Validation options
     * @returns {boolean} - True if valid
     */
    static validateJsonStructure(data, expectedType = 'array', options = {}) {
        const { minLength = 0, maxLength = Infinity, requiredKeys = [] } = options;
        
        if (expectedType === 'array') {
            if (!Array.isArray(data)) return false;
            if (data.length < minLength || data.length > maxLength) return false;
            return true;
        }
        
        if (expectedType === 'object') {
            if (!data || typeof data !== 'object' || Array.isArray(data)) return false;
            
            // Check required keys
            for (const key of requiredKeys) {
                if (!(key in data)) return false;
            }
            
            return true;
        }
        
        return false;
    }
}