import { fileURLToPath } from 'url';
import { dirname, join } from 'path';
import fs from 'fs';
import path from 'path';
import dotenv from 'dotenv';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
dotenv.config({ path: path.resolve(__dirname, '../.env') });
/**
* Configuration management for Semem system
*/
export default class Config {
static defaults = {
storage: {
type: 'sparql',
options: {
query: 'http://${SPARQL_HOST:-localhost}:${SPARQL_PORT:-3030}/semem/sparql',
update: 'http://${SPARQL_HOST:-localhost}:${SPARQL_PORT:-3030}/semem/update',
data: 'http://${SPARQL_HOST:-localhost}:${SPARQL_PORT:-3030}/semem/data',
graphName: 'http://hyperdata.it/content',
user: '${SPARQL_USER:-admin}',
password: '${SPARQL_PASSWORD:-admin123}'
}
},
models: {
chat: {
// provider: 'ollama',
// model: 'qwen2:1.5b',
provider: 'mistral',
model: 'mistral-small-latest',
options: {}
},
embedding: {
provider: 'ollama',
model: 'nomic-embed-text',
options: {
baseUrl: 'http://localhost:11434'
}
},
// Alternative embedding providers
embeddingProviders: {
ollama: {
model: 'nomic-embed-text',
options: {
baseUrl: 'http://localhost:11434'
}
},
nomic: {
model: 'nomic-embed-text-v1.5',
options: {
apiKey: '${NOMIC_API_KEY}'
}
}
}
},
memory: {
dimension: 1536,
similarityThreshold: 40,
contextWindow: 3,
decayRate: 0.0001,
promotionThreshold: 2.0, // Score needed for long-term promotion (lowered for testing)
classificationChance: 0.5 // Probability of running classification per retrieval (increased for testing)
},
sparqlEndpoints: [{
label: "Hyperdata Fuseki",
user: "admin",
password: "admin123",
urlBase: "https://fuseki.hyperdata.it",
dataset: "hyperdata.it",
query: "/hyperdata.it/query",
update: "/hyperdata.it/update",
upload: "/hyperdata.it/upload",
gspRead: "/hyperdata.it/data",
gspWrite: "/hyperdata.it/data"
}
/*
sparqlEndpoints: [{
label: "test-mem",
user: "admin",
password: "admin123",
urlBase: "http://${SPARQL_HOST:-localhost}:${SPARQL_PORT:-3030}",
dataset: "test-mem",
query: "/test-mem",
update: "/test-mem",
upload: "/test-mem/upload",
gspRead: "/test-mem/data",
gspWrite: "/test-mem/data"
}
*/]
}
constructor(configPath = null) {
this.config = {};
this.configFilePath = configPath || null;
this.initialized = false;
}
async init() {
if (this.initialized) return
try {
// Load environment variables from .env file
this.loadEnvironmentVariables();
// Apply defaults after environment variables are loaded
this.config = { ...Config.defaults };
let fileConfig = {}
// Load config file if requested
if (this.configFilePath) {
fileConfig = this.loadConfigFile()
console.log('Loaded config file')
//, JSON.stringify(fileConfig, null, 2))
// Transform config file format to internal format
fileConfig = this.transformJsonConfig(fileConfig)
// console.log('Transformed config:', JSON.stringify(fileConfig, null, 2))
} else {
console.log('No config file path provided, using defaults')
}
console.log('Merging configs...')
// Defaults:', JSON.stringify(Config.defaults, null, 2))
// Merge in order: defaults -> file config -> user config
this.config = this.mergeConfigs(Config.defaults, fileConfig, 0)
// console.log('After merging, config is:', JSON.stringify(this.config, null, 2))
this.initialized = true
this.applyEnvironmentOverrides()
this.validateConfig()
// console.log('Final config after overrides and validation:', JSON.stringify(this.config, null, 2))
} catch (error) {
console.error('Config initialization error details:', error);
console.error('Error stack:', error.stack);
throw new Error(`Config initialization failed: ${error.message}`)
}
}
loadConfigFile() {
try {
// If config file path was provided in constructor, use it directly
if (this.configFilePath) {
console.log('Checking provided config path:', this.configFilePath);
if (!fs.existsSync(this.configFilePath)) {
console.warn('Config file not found at provided path:', this.configFilePath);
return {};
}
console.log('Loading config from provided path:', this.configFilePath);
const fileContent = fs.readFileSync(this.configFilePath, 'utf8');
return JSON.parse(fileContent);
}
// Otherwise, try to find the config file in common locations
const possiblePaths = [
// Local development path (when running from project root)
join(process.cwd(), 'config', 'config.json'),
// Path when running from src directory
join(process.cwd(), '..', 'config', 'config.json'),
// Path when running from src/mcp directory
join(process.cwd(), '..', '..', 'config', 'config.json'),
// Docker container path
'/app/config/config.json',
// Fallback to environment variable if set
process.env.CONFIG_PATH,
// Absolute path as a last resort
'/home/danny/hyperdata/semem/config/config.json'
].filter(Boolean);
console.log('Searching for config in these locations:', possiblePaths);
for (const path of possiblePaths) {
console.log(' Checking:', path);
if (path && fs.existsSync(path)) {
console.log('✓ Config file found at:', path);
this.configFilePath = path;
const fileContent = fs.readFileSync(path, 'utf8');
console.log('Config file content:', fileContent);
return JSON.parse(fileContent);
}
}
console.warn('❌ Config file not found in any of these locations:', possiblePaths);
return {};
} catch (error) {
console.error('Error loading config file:', error);
return {};
}
}
transformJsonConfig(jsonConfig) {
const transformed = { ...jsonConfig } // Start with a copy of the original config
// Map server configs (preserve existing if none in jsonConfig)
if (jsonConfig.servers) {
transformed.servers = jsonConfig.servers
}
// Map SPARQL endpoints if they exist in the format we expect
if (jsonConfig.sparqlEndpoints && jsonConfig.sparqlEndpoints.length > 0) {
const endpoint = jsonConfig.sparqlEndpoints[0]
// Handle old format with queryEndpoint
if (endpoint.queryEndpoint) {
transformed.sparqlEndpoints = [{
label: "config-file",
user: endpoint.auth?.user || "admin",
password: endpoint.auth?.password || "admin",
urlBase: endpoint.queryEndpoint.replace('/semem/query', ''),
dataset: "semem",
query: "/semem",
update: "/semem",
upload: "/semem/upload",
gspRead: "/semem/data",
gspWrite: "/semem/data"
}]
}
// Handle new format with urlBase and query/update paths
else if (endpoint.urlBase && endpoint.query && endpoint.update) {
// Only update storage configuration if it uses a single endpoint string
// and we can safely enhance it with proper query/update URLs
if (jsonConfig.storage &&
jsonConfig.storage.type === 'sparql' &&
jsonConfig.storage.options &&
jsonConfig.storage.options.endpoint &&
typeof jsonConfig.storage.options.endpoint === 'string') {
transformed.storage = {
...jsonConfig.storage,
options: {
...jsonConfig.storage.options,
query: endpoint.urlBase + endpoint.query,
update: endpoint.urlBase + endpoint.update,
user: endpoint.user || jsonConfig.storage.options.user,
password: endpoint.password || jsonConfig.storage.options.password,
graphName: jsonConfig.storage.options.graphName || jsonConfig.graphName
}
}
}
}
}
// Preserve llmProviders array as-is
if (jsonConfig.llmProviders && Array.isArray(jsonConfig.llmProviders)) {
transformed.llmProviders = jsonConfig.llmProviders;
// Still set default models based on provider capabilities
const chatProvider = jsonConfig.llmProviders.find(p => p.capabilities?.includes('chat'))
const embeddingProvider = jsonConfig.llmProviders.find(p => p.capabilities?.includes('embedding'))
if (chatProvider) {
transformed.models = transformed.models || {}
transformed.models.chat = {
provider: chatProvider.type,
model: chatProvider.chatModel,
options: {}
}
}
if (embeddingProvider) {
transformed.models = transformed.models || {}
transformed.models.embedding = {
provider: embeddingProvider.type,
model: embeddingProvider.embeddingModel,
options: {}
}
}
}
// Map other top-level configs (only if they don't exist already)
if (jsonConfig.chatModel && !transformed.models?.chat?.model) {
transformed.models = transformed.models || {}
transformed.models.chat = transformed.models.chat || {}
transformed.models.chat.model = jsonConfig.chatModel
}
if (jsonConfig.embeddingModel && !transformed.models?.embedding?.model) {
transformed.models = transformed.models || {}
transformed.models.embedding = transformed.models.embedding || {}
transformed.models.embedding.model = jsonConfig.embeddingModel
}
return transformed
}
// Added maxDepth parameter to prevent infinite recursion
mergeConfigs(defaults, user, depth = 0) {
if (depth > 10) { // Set reasonable recursion limit
throw new Error('Config merge exceeded maximum depth')
}
// If user value is an array, replace the default entirely
if (Array.isArray(user)) {
return [...user];
}
// If user is not an object, return it (overriding defaults)
if (!user || typeof user !== 'object') {
return user || defaults;
}
// If defaults is not an object, return user (overriding defaults)
if (!defaults || typeof defaults !== 'object') {
return { ...user };
}
// Create a new object to hold the merged result
const merged = { ...defaults };
// Merge each property
for (const [key, value] of Object.entries(user)) {
if (value && typeof value === 'object' && !Array.isArray(value) &&
defaults[key] && typeof defaults[key] === 'object') {
// Recursively merge objects
merged[key] = this.mergeConfigs(defaults[key], value, depth + 1);
} else {
// Replace with user value (including arrays and primitives)
merged[key] = value !== undefined ? value : defaults[key];
}
}
return merged;
}
validateConfig() {
// Required sections
const required = ['storage', 'models', 'sparqlEndpoints']
for (const key of required) {
if (!this.config[key]) {
throw new Error(`Missing required config section: ${key}`)
}
}
// Storage validation
const validStorageTypes = ['memory', 'json', 'sparql', 'cached-sparql']
if (!validStorageTypes.includes(this.config.storage.type)) {
throw new Error('Invalid storage type')
}
// Model validation
const models = this.config.models
if (!models.chat?.provider || !models.chat?.model ||
!models.embedding?.provider || !models.embedding?.model) {
throw new Error('Invalid model configuration')
}
// SPARQL endpoint validation
const endpoint = this.config.sparqlEndpoints[0]
if (!endpoint?.urlBase || !endpoint?.query || !endpoint?.update) {
throw new Error('Invalid SPARQL endpoint configuration')
}
}
/**
* Detect if running in tbox environment and adjust SPARQL configuration accordingly
*/
detectTboxEnvironment() {
// Check for tbox environment indicators
const isTboxEnvironment = (
// Check if running in docker container with tbox network
process.env.HOSTNAME?.includes('tbox') ||
process.env.DOCKER_ENV === 'tbox' ||
// Check if Fuseki is available on tbox port (4030)
this.isTboxFusekiAvailable() ||
// Check if we're in a docker container and can resolve 'fuseki' hostname
process.env.NODE_ENV === 'production' && this.canResolveFusekiHostname() ||
// Explicit tbox environment flag
process.env.USE_TBOX_FUSEKI === 'true'
);
if (isTboxEnvironment) {
console.log('🔧 Tbox environment detected - adjusting SPARQL configuration');
this.adjustForTboxEnvironment();
} else {
console.log('📍 Standalone environment detected - using default SPARQL configuration');
}
}
/**
* Check if tbox Fuseki server is available
*/
isTboxFusekiAvailable() {
try {
// Only detect tbox environment if explicitly configured via environment variables
// Don't auto-detect based on port availability to avoid switching configs unintentionally
return process.env.FUSEKI_URL === 'http://fuseki:3030' ||
process.env.TBOX_FUSEKI_PORT === '4030' ||
process.env.USE_TBOX_FUSEKI === 'true';
} catch (error) {
return false;
}
}
/**
* Check if we can resolve the 'fuseki' hostname (docker internal)
*/
canResolveFusekiHostname() {
try {
// In docker environment, 'fuseki' hostname should be resolvable
return process.env.FUSEKI_URL === 'http://fuseki:3030';
} catch (error) {
return false;
}
}
/**
* Adjust configuration for tbox environment
*/
adjustForTboxEnvironment() {
// Override storage SPARQL endpoints to use tbox Fuseki
if (this.config.storage && this.config.storage.type === 'sparql') {
// Storage endpoints are now configured via environment variables in defaults
// and will be processed by environment variable substitution
console.log('📍 Standalone environment detected - using default SPARQL configuration');
}
// Update SPARQL endpoints list to prioritize tbox Fuseki
if (this.config.sparqlEndpoints) {
// Find existing tbox endpoint or create new one
let tboxEndpoint = this.config.sparqlEndpoints.find(ep => ep.label === 'Tbox Fuseki');
if (!tboxEndpoint) {
tboxEndpoint = {
label: 'Tbox Fuseki',
user: '${SPARQL_USER}',
password: '${SPARQL_PASSWORD}',
urlBase: 'http://${SPARQL_HOST:-localhost}:${SPARQL_PORT:-3030}',
dataset: 'semem',
query: '/semem/query',
update: '/semem/update',
upload: '/semem/upload',
gspRead: '/semem/data',
gspWrite: '/semem/data'
};
this.config.sparqlEndpoints.unshift(tboxEndpoint); // Add at beginning for priority
} else {
// Update existing endpoint to use correct dataset
tboxEndpoint.dataset = 'semem';
tboxEndpoint.query = '/semem/query';
tboxEndpoint.update = '/semem/update';
tboxEndpoint.upload = '/semem/upload';
tboxEndpoint.gspRead = '/semem/data';
tboxEndpoint.gspWrite = '/semem/data';
}
// If running inside docker, use internal hostname
if (process.env.FUSEKI_URL === 'http://fuseki:3030') {
tboxEndpoint.urlBase = 'http://fuseki:3030';
}
console.log(`🎯 Configured tbox SPARQL endpoint: ${tboxEndpoint.urlBase}/semem`);
}
}
applyEnvironmentOverrides() {
// Auto-detect tbox environment and adjust SPARQL endpoints
this.detectTboxEnvironment();
// Handle environment variables with SEMEM_ prefix
for (const [key, value] of Object.entries(process.env)) {
if (key.startsWith('SEMEM_')) {
const configPath = key.slice(6).toLowerCase().split('_')
this.set(configPath.join('.'), value)
}
}
// Replace ${VAR_NAME} placeholders with environment variables
console.log('🔍 Starting environment variable substitution...');
const replaceEnvVars = (obj) => {
if (typeof obj === 'string' && obj.includes('${')) {
console.log('🔍 Processing string:', obj);
return obj.replace(/\$\{([^}]+)\}/g, (match, varName) => {
// Handle default values: ${VAR_NAME:-default}
if (varName.includes(':-')) {
const [envVar, defaultValue] = varName.split(':-');
const result = process.env[envVar] || defaultValue || '';
console.log(`🔄 Substituting ${match} -> ${result} (envVar: ${envVar}, env: ${process.env[envVar]}, default: ${defaultValue})`);
return result;
} else {
// Simple variable without default
const result = process.env[varName] || '';
console.log(`🔄 Substituting ${match} -> ${result} (simple var: ${varName}, env: ${process.env[varName]})`);
return result;
}
});
} else if (Array.isArray(obj)) {
return obj.map(item => replaceEnvVars(item));
} else if (obj && typeof obj === 'object') {
const result = {};
for (const [key, value] of Object.entries(obj)) {
result[key] = replaceEnvVars(value);
}
return result;
}
return obj;
};
// Apply environment variable substitution to the entire config
this.config = replaceEnvVars(this.config);
}
/**
* Load environment variables from .env file
*/
loadEnvironmentVariables() {
try {
console.log('🔍 Loading environment variables...');
console.log('🔍 Current SPARQL_HOST:', process.env.SPARQL_HOST);
console.log('🔍 Current SPARQL_PORT:', process.env.SPARQL_PORT);
// Get the project root directory (semem root)
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const projectRoot = join(__dirname, '..');
const envPath = join(projectRoot, '.env');
// Load .env file if it exists
if (fs.existsSync(envPath)) {
dotenv.config({ path: envPath });
console.log('✅ Environment variables loaded from .env file');
} else {
// Try to load from current working directory as fallback
dotenv.config();
console.log('✅ Environment variables loaded from process.cwd()/.env');
}
console.log('🔍 After dotenv - SPARQL_HOST:', process.env.SPARQL_HOST);
console.log('🔍 After dotenv - SPARQL_PORT:', process.env.SPARQL_PORT);
} catch (error) {
console.warn('⚠️ Could not load .env file:', error.message);
// Don't throw - environment variables might be set via other means
}
}
get(path) {
if (!this.initialized) {
throw new Error('Config not initialized')
}
return path.split('.').reduce((obj, key) => {
return obj === undefined ? undefined : obj[key]
}, this.config)
}
set(path, value) {
if (!this.initialized) {
throw new Error('Config not initialized')
}
const keys = path.split('.')
const last = keys.pop()
const target = keys.reduce((obj, key) => {
if (!obj[key]) obj[key] = {}
return obj[key]
}, this.config)
target[last] = value
}
static create(userConfig = {}, loadFromFile = true) {
const config = new Config(userConfig, loadFromFile)
config.init()
return config
}
static createFromFile(userConfig = {}) {
return Config.create(userConfig, true)
}
static createWithoutFile(userConfig = {}) {
return Config.create(userConfig, false)
}
toJSON() {
const { password, ...safeConfig } = this.config
return safeConfig
}
}