JSDoc: Source: compose/workflows/BeerQAWorkflow.js

Source: compose/workflows/BeerQAWorkflow.js
/**
 * BeerQAWorkflow - Standard BeerQA question answering workflow
 * 
 * This workflow implements the standard BeerQA processing pipeline:
 * 1. Question augmentation and relationship building
 * 2. Context retrieval using ZPT navigation
 * 3. LLM-based answer generation
 * 
 * API: execute(input, resources, options)
 */

import BaseWorkflow from './BaseWorkflow.js';
import logger from 'loglevel';
import crypto from 'crypto';

export default class BeerQAWorkflow extends BaseWorkflow {
    constructor() {
        super('beerqa');
    }

    /**
     * Execute the standard BeerQA workflow
     * 
     * @param {Object} input - Workflow input data
     * @param {Object} input.question - Question object with text and optional uri
     * @param {string} input.question.text - The question text
     * @param {string} input.question.uri - Question URI (optional)
     * @param {Object} resources - External dependencies
     * @param {Object} resources.llmHandler - LLM handler for answer generation
     * @param {Object} resources.sparqlHelper - SPARQL helper for database operations
     * @param {Object} resources.config - Configuration object
     * @param {Object} resources.contextManager - Context manager for retrieval (optional)
     * @param {Object} options - Configuration options
     * @param {number} options.maxContextTokens - Maximum context tokens (default: 4000)
     * @param {boolean} options.useEnhancedContext - Use enhanced context retrieval (default: true)
     * @param {string} options.answerStyle - Answer generation style (default: 'comprehensive')
     * @returns {Promise<Object>} Workflow execution results with answer
     */
    async execute(input, resources, options = {}) {
        const startTime = Date.now();
        
        try {
            // Validate inputs and resources
            this._validateInput(input, ['question']);
            this._validateResources(resources, ['llmHandler', 'sparqlHelper', 'config']);
            
            const { question } = input;
            const { llmHandler, sparqlHelper, config } = resources;
            
            const workflowConfig = this._mergeConfig(options, {
                maxContextTokens: 4000,
                useEnhancedContext: true,
                answerStyle: 'comprehensive',
                maxRelatedCorpuscles: 10
            });

            this._logStep('START', `Processing question: "${question.text}"`);

            // Step 1: Question augmentation and relationship discovery
            const augmentationResult = await this._augmentQuestion(
                question, 
                sparqlHelper, 
                config, 
                workflowConfig
            );

            if (!augmentationResult.success) {
                return this._handleError(new Error(augmentationResult.error), 'augmentation');
            }

            // Step 2: Context retrieval and navigation
            const contextResult = await this._retrieveContext(
                augmentationResult.data.augmentedQuestion,
                sparqlHelper,
                config,
                workflowConfig
            );

            if (!contextResult.success) {
                return this._handleError(new Error(contextResult.error), 'context-retrieval');
            }

            // Step 3: Answer generation
            const answerResult = await this._generateAnswer(
                question,
                contextResult.data.context,
                llmHandler,
                workflowConfig
            );

            if (!answerResult.success) {
                return this._handleError(new Error(answerResult.error), 'answer-generation');
            }

            this._logStep('COMPLETE', 'BeerQA workflow completed successfully');

            return this._createResult(true, {
                question: question,
                augmentedQuestion: augmentationResult.data.augmentedQuestion,
                context: contextResult.data.context,
                answer: answerResult.data.answer,
                relationships: augmentationResult.data.relationships || [],
                corpusclesUsed: contextResult.data.corpusclesUsed || 0
            }, null, this._getWorkflowMetadata(startTime, {
                augmentationDuration: augmentationResult.duration,
                contextRetrievalDuration: contextResult.duration,
                answerGenerationDuration: answerResult.duration,
                totalSteps: 3
            }));

        } catch (error) {
            return this._handleError(error, 'workflow-execution');
        }
    }

    /**
     * Augment question with relationships and metadata
     * @private
     */
    async _augmentQuestion(question, sparqlHelper, config, workflowConfig) {
        const stepStart = Date.now();
        
        try {
            this._logStep('AUGMENT', 'Augmenting question with relationships');

            // For now, we'll create a basic augmented question structure
            // In a full implementation, this would call actual augmentation logic
            const augmentedQuestion = {
                ...question,
                uri: question.uri || this._generateQuestionURI(question.text, config),
                relationships: [],
                entities: [],
                concepts: [],
                timestamp: new Date().toISOString()
            };

            // TODO: Implement actual relationship discovery
            // This would involve:
            // - Entity extraction from question text
            // - SPARQL queries to find related corpuscles
            // - Ragno relationship creation

            return {
                success: true,
                data: {
                    augmentedQuestion,
                    relationships: []
                },
                duration: Date.now() - stepStart
            };

        } catch (error) {
            logger.error('Question augmentation failed:', error.message);
            return {
                success: false,
                error: error.message,
                duration: Date.now() - stepStart
            };
        }
    }

    /**
     * Retrieve context using ZPT navigation or direct queries
     * @private
     */
    async _retrieveContext(augmentedQuestion, sparqlHelper, config, workflowConfig) {
        const stepStart = Date.now();
        
        try {
            this._logStep('CONTEXT', 'Retrieving context for question');

            // Basic context retrieval - in full implementation this would use:
            // - ZPT navigation for semantic context discovery
            // - Context manager for token-aware retrieval
            // - Multiple graph queries for comprehensive context

            const contextQuery = `
PREFIX ragno: <http://purl.org/stuff/ragno/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT ?corpuscle ?label ?content WHERE {
    GRAPH <${config.beerqaGraphURI}> {
        ?corpuscle a ragno:Corpuscle ;
                  rdfs:label ?label .
        
        OPTIONAL {
            ?corpuscle ragno:hasTextElement ?textElement .
            ?textElement ragno:content ?content .
        }
    }
}
LIMIT ${workflowConfig.maxRelatedCorpuscles}`;

            const contextResult = await sparqlHelper.executeSelect(contextQuery);
            
            let context = `Question: ${augmentedQuestion.text}\n\nRelevant context:\n`;
            let corpusclesUsed = 0;

            if (contextResult.success && contextResult.data.results.bindings.length > 0) {
                for (const binding of contextResult.data.results.bindings) {
                    const label = binding.label?.value || '';
                    const content = binding.content?.value || '';
                    
                    if (label) {
                        context += `- ${label}`;
                        if (content) {
                            context += `: ${content.substring(0, 200)}${content.length > 200 ? '...' : ''}`;
                        }
                        context += '\n';
                        corpusclesUsed++;
                    }
                }
            } else {
                context += 'No specific context found in knowledge base.\n';
            }

            return {
                success: true,
                data: {
                    context,
                    corpusclesUsed
                },
                duration: Date.now() - stepStart
            };

        } catch (error) {
            logger.error('Context retrieval failed:', error.message);
            return {
                success: false,
                error: error.message,
                duration: Date.now() - stepStart
            };
        }
    }

    /**
     * Generate answer using LLM with context
     * @private
     */
    async _generateAnswer(question, context, llmHandler, workflowConfig) {
        const stepStart = Date.now();
        
        try {
            this._logStep('ANSWER', 'Generating answer with LLM');

            const prompt = this._buildAnswerPrompt(question.text, context, workflowConfig.answerStyle);
            const answer = await llmHandler.generateResponse(prompt);

            return {
                success: true,
                data: {
                    answer,
                    prompt: prompt.substring(0, 200) + '...' // Truncated for metadata
                },
                duration: Date.now() - stepStart
            };

        } catch (error) {
            logger.error('Answer generation failed:', error.message);
            return {
                success: false,
                error: error.message,
                duration: Date.now() - stepStart
            };
        }
    }

    /**
     * Build answer generation prompt
     * @private
     */
    _buildAnswerPrompt(questionText, context, answerStyle) {
        const styleInstructions = {
            comprehensive: 'Provide a comprehensive, detailed answer that covers all aspects of the question.',
            concise: 'Provide a clear, concise answer that directly addresses the question.',
            analytical: 'Provide an analytical answer that examines different perspectives and implications.'
        };

        return `You are an AI assistant answering questions based on provided context. ${styleInstructions[answerStyle] || styleInstructions.comprehensive}

CONTEXT:
${context}

QUESTION: ${questionText}

Please provide your answer based on the context provided. If the context doesn't contain sufficient information, clearly state what information is missing and provide the best answer possible with available information.

ANSWER:`;
    }

    /**
     * Generate a question URI
     * @private
     */
    _generateQuestionURI(questionText, config) {
        const hash = crypto
            .createHash('sha256')
            .update(questionText)
            .digest('hex')
            .substring(0, 16);
        
        return `${config.beerqaGraphURI}/corpuscle/question_${hash}`;
    }
}