// PoC: NL → ODMDB query (ALL OBJECTS) - Multi-schema support with intelligent routing // Usage: // 1) export OPENAI_API_KEY=sk-... // 2) node poc.js import fs from "node:fs"; import OpenAI from "openai"; import axios from "axios"; import jq from "node-jq"; import { ODMDBMappingManager } from "./schema-mappings/mapping-manager.js"; // ---- Config ---- const MODEL = process.env.OPENAI_MODEL || "gpt-5"; // ODMDB paths - point to actual ODMDB structure const ODMDB_BASE_PATH = "../smatchitObjectOdmdb"; const SCHEMA_PATH = `${ODMDB_BASE_PATH}/schema`; const OBJECTS_PATH = `${ODMDB_BASE_PATH}/objects`; // ODMDB execution config const ODMDB_BASE_URL = process.env.ODMDB_BASE_URL || "http://localhost:3000"; const ODMDB_TRIBE = process.env.ODMDB_TRIBE || "smatchit"; const EXECUTE_QUERY = process.env.EXECUTE_QUERY === "true"; // Set to "true" to execute queries // Test queries for different objects const TEST_QUERIES = { seekers: "find seekers looking for jobs urgently with their contact info and salary expectations", jobads: "show me recent job postings with salary range and requirements", recruiters: "get active recruiters with their contact information", persons: "find people with their basic profile information", sirets: "show me companies with their business information", }; // Hardcoded NL query for the PoC (no multi-turn) - can be overridden by TEST_OBJECT env var const TEST_OBJECT = process.env.TEST_OBJECT || "seekers"; const NL_QUERY = TEST_QUERIES[TEST_OBJECT] || TEST_QUERIES.seekers; // ---- Initialize Mapping Manager ---- console.log("šŸš€ Initializing ODMDB Multi-Schema PoC"); console.log("=".repeat(60)); const mappingManager = new ODMDBMappingManager(); // Query validation - detect outrageous requests function validateQuery(nlQuery) { const query = nlQuery.toLowerCase(); // Check for reasonable data limits const excessiveKeywords = [ "all users", "all people", "everyone", "entire database", "complete list", "every", "dump", "export everything", "all data", "full database", "everything", ]; const hasExcessiveRequest = excessiveKeywords.some((keyword) => query.includes(keyword) ); if (hasExcessiveRequest) { return { valid: false, reason: "Query requests excessive data - please be more specific", suggestion: "Try requesting specific criteria or a limited number of results", }; } // Check for sensitive/inappropriate requests const sensitiveKeywords = [ "password", "private", "confidential", "secret", "admin", "delete", "remove", "drop", "destroy", "hack", ]; const hasSensitiveRequest = sensitiveKeywords.some((keyword) => query.includes(keyword) ); if (hasSensitiveRequest) { return { valid: false, reason: "Query contains inappropriate or sensitive terms", suggestion: "Please rephrase your request with appropriate business terms", }; } return { valid: true }; } // ---- Multi-Object Query Processing ---- function detectTargetObject(nlQuery) { console.log(`šŸ” Analyzing query: "${nlQuery}"`); // Use mapping manager to detect target object const detectedObjects = mappingManager.detectObjectFromQuery(nlQuery); if (detectedObjects.length === 0) { return { object: "seekers", // Default fallback confidence: 0.1, reason: "No specific object detected, defaulting to seekers", }; } // Sort by confidence and return the best match detectedObjects.sort((a, b) => b.confidence - a.confidence); const bestMatch = detectedObjects[0]; console.log( `šŸŽÆ Detected object: ${bestMatch.object} (confidence: ${bestMatch.confidence})` ); console.log(` Reason: ${bestMatch.reason}`); // Check if data is available for this object const availability = mappingManager.dataAvailability.get(bestMatch.object); if (!availability?.dataAvailable) { console.log( `āš ļø No data available for ${bestMatch.object}, checking alternatives...` ); // Find alternative with available data const alternativeWithData = detectedObjects.find((detection) => { const alt = mappingManager.dataAvailability.get(detection.object); return alt?.dataAvailable; }); if (alternativeWithData) { console.log(`āœ… Using alternative: ${alternativeWithData.object}`); return alternativeWithData; } else { return { object: bestMatch.object, confidence: bestMatch.confidence, reason: bestMatch.reason, dataUnavailable: true, }; } } return bestMatch; } // ---- Dynamic Query Schema Generation ---- function getObjectMapping(objectName) { return mappingManager.mappings.get(objectName); } function getReadableFields(objectName) { const mapping = getObjectMapping(objectName); if (!mapping?.available) return []; // Try to get readable fields from access rights (for recruiters, seekers, etc.) const accessRights = mapping.accessRights; if (accessRights) { // For seekers, check recruiters.R if ( accessRights.recruiters?.R && Array.isArray(accessRights.recruiters.R) ) { return accessRights.recruiters.R; } // For jobads/recruiters, check seekers.R if (accessRights.seekers?.R && Array.isArray(accessRights.seekers.R)) { return accessRights.seekers.R; } // For other objects, check owner.R if (accessRights.owner?.R && Array.isArray(accessRights.owner.R)) { return accessRights.owner.R; } } // Fallback to all available properties (first 10 for safety) return mapping?.properties ? Object.keys(mapping.properties).slice(0, 10) : []; } function getAllObjectFields(objectName) { const mapping = getObjectMapping(objectName); if (!mapping?.available) return []; return mapping?.properties ? Object.keys(mapping.properties) : []; } function getObjectFallbackFields(objectName) { // Object-specific fallback fields when no readable fields are available const fallbacks = { seekers: ["alias", "email"], jobads: ["jobadid", "jobtitle"], recruiters: ["alias", "email"], persons: ["alias", "email"], sirets: ["alias", "name"], jobsteps: ["alias", "name"], jobtitles: ["jobtitleid", "name"], }; return fallbacks[objectName] || ["id", "name"]; } // ---- JSON Schema for Structured Outputs (no zod, no oneOf) ---- function buildResponseJsonSchema(targetObject) { const availableObjects = Array.from(mappingManager.mappings.keys()); const readableFields = getReadableFields(targetObject); return { type: "object", additionalProperties: false, properties: { object: { type: "string", enum: availableObjects.length > 0 ? availableObjects : ["seekers"], }, condition: { type: "array", items: { type: "string" }, minItems: 1 }, fields: { type: "array", items: { type: "string", enum: readableFields.length > 0 ? readableFields : getObjectFallbackFields(targetObject), }, minItems: 1, }, }, required: ["object", "condition", "fields"], }; } // ---- Prompt builders ---- function systemPrompt(targetObject) { const objectMapping = getObjectMapping(targetObject); const availableFields = getAllObjectFields(targetObject); const readableFields = getReadableFields(targetObject); const availableObjects = Array.from(mappingManager.mappings.keys()); // Get object-specific synonyms from mapping const synonyms = objectMapping?.synonyms || {}; const synonymList = Object.entries(synonyms) .slice(0, 10) .map(([field, syns]) => { const synArray = Array.isArray(syns) ? syns : [syns]; return `- '${synArray.slice(0, 2).join("', '")}' → ${field}`; }) .join("\n "); return [ "You convert a natural language request into an ODMDB search payload.", "Return ONLY a compact JSON object that matches the provided JSON Schema.", "", "ODMDB DSL:", "- join(remoteObject:localKey:remoteProp:operator:value)", "- idx.(value) - for indexed fields", "- prop.(operator:value) - for direct property queries", "", `Available objects: ${availableObjects.join(", ")}`, `Target object: ${targetObject}`, "", `Available ${targetObject} fields:`, availableFields.slice(0, 15).join(", ") + (availableFields.length > 15 ? "..." : ""), "", `Readable fields for ${targetObject} (use these for field selection):`, readableFields.join(", "), "", "Field mappings for natural language:", synonymList || "- No specific mappings available", "", "Date handling:", "- 'new/recent' → dt_create (use prop.dt_create(>=:YYYY-MM-DD))", "- 'updated' → dt_update", "", "Rules:", `- Object should be '${targetObject}' unless query clearly indicates another object`, "- Use indexes when available for better performance", "- For date filters, use prop.dt_create/dt_update with absolute dates", "- Only return readable fields in 'fields' array", `- Default fields if request is generic: ${readableFields .slice(0, 5) .join(", ")}`, "", "Timezone is Europe/Paris. Today is 2025-10-15.", "Interpret 'last week' as now minus 7 days → 2025-10-08.", "Interpret 'yesterday' as → 2025-10-14.", ].join("\n"); } function userPrompt(nl) { return `Natural language request: "${nl}"\nReturn ONLY the JSON object.`; } // ---- OpenAI call using Responses API (text.format) ---- const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY }); async function inferQuery(nlText, targetObject) { const resp = await client.responses.create({ model: MODEL, input: [ { role: "system", content: systemPrompt(targetObject) }, { role: "user", content: userPrompt(nlText) }, ], text: { format: { name: "OdmdbQuery", type: "json_schema", schema: buildResponseJsonSchema(targetObject), strict: true, }, }, }); const jsonText = resp.output_text || resp.output?.[0]?.content?.[0]?.text || (() => { throw new Error("Empty model output"); })(); const parsed = JSON.parse(jsonText); return parsed; } // ---- Validate using the ODMDB schema (not zod) ---- function validateWithOdmdbSchema(candidate, targetObject) { // Basic shape checks (already enforced by Structured Outputs, but keep defensive) if (!candidate || typeof candidate !== "object") throw new Error("Invalid response (not an object)."); const availableObjects = Array.from(mappingManager.mappings.keys()); if (!availableObjects.includes(candidate.object)) { throw new Error( `Invalid object '${ candidate.object }'; must be one of: ${availableObjects.join(", ")}` ); } if (!Array.isArray(candidate.condition) || candidate.condition.length === 0) { throw new Error( "Invalid 'condition'; must be a non-empty array of strings." ); } if (!Array.isArray(candidate.fields) || candidate.fields.length === 0) { throw new Error("Invalid 'fields'; must be a non-empty array of strings."); } // Validate fields against schema for the specific object const availableFields = getAllObjectFields(candidate.object); const readableFields = getReadableFields(candidate.object); for (const field of candidate.fields) { if (!availableFields.includes(field)) { throw new Error( `Invalid field '${field}'; not found in ${candidate.object} schema.` ); } if (!readableFields.includes(field)) { console.warn( `Warning: Field '${field}' may not be readable for ${candidate.object}.` ); } } // DSL token sanity const allowedTokens = ["join(", "idx.", "prop."]; for (const c of candidate.condition) { if (typeof c !== "string") throw new Error("Condition entries must be strings."); const tokenOK = allowedTokens.some((t) => c.includes(t)); const ascii = /^[\x09\x0A\x0D\x20-\x7E()_:\[\].,=> !objectAvailableFields.includes(f) ); if (unknown.length) { // Drop unknown but continue (PoC behavior) console.warn( `āš ļø Dropping unknown fields (not in ${candidate.object} schema):`, unknown ); candidate.fields = candidate.fields.filter((f) => objectAvailableFields.includes(f) ); if (!candidate.fields.length) { // If all dropped, fallback to object-specific default fields const fallback = getObjectFallbackFields(candidate.object); candidate.fields = fallback; console.warn( `šŸ”„ Using fallback fields for ${candidate.object}:`, fallback ); } } } else { // If we can't read the schema, at least ensure strings & dedupe candidate.fields = [ ...new Set( candidate.fields.filter((f) => typeof f === "string" && f.trim()) ), ]; } return candidate; } // ---- Local ODMDB Data Access ---- function loadSeekersData() { const seekersItemsPath = `${OBJECTS_PATH}/seekers/itm`; try { if (!fs.existsSync(seekersItemsPath)) { console.error(`āŒ Seekers data directory not found: ${seekersItemsPath}`); return []; } const files = fs .readdirSync(seekersItemsPath) .filter((file) => file.endsWith(".json") && file !== "backup") .slice(0, 50); // Limit to first 50 files for PoC performance console.log( `šŸ“ Loading ${files.length} seeker files from ${seekersItemsPath}` ); const seekers = []; for (const file of files) { try { const filePath = `${seekersItemsPath}/${file}`; const data = JSON.parse(fs.readFileSync(filePath, "utf-8")); seekers.push(data); } catch (error) { console.warn(`āš ļø Could not load ${file}:`, error.message); } } return seekers; } catch (error) { console.error("āŒ Error loading seekers data:", error.message); return []; } } // ---- Local ODMDB Query Execution ---- async function executeOdmdbQuery(query) { if (!EXECUTE_QUERY) { console.log( "šŸ’” Query execution disabled. Set EXECUTE_QUERY=true to enable." ); return null; } try { console.log( `\nšŸ” Executing query against local ODMDB data: ${OBJECTS_PATH}/seekers/` ); console.log("Query conditions:", query.condition); console.log("Requested fields:", query.fields); // Load all seekers data const allSeekers = loadSeekersData(); if (allSeekers.length === 0) { console.log("āŒ No seekers data found"); return { data: [] }; } console.log(`ļæ½ Loaded ${allSeekers.length} seekers for filtering`); // Apply basic filtering (simplified DSL processing) let filteredSeekers = allSeekers; for (const condition of query.condition) { if (condition.includes("prop.dt_create(>=:")) { // Extract date from condition like "prop.dt_create(>=:2025-10-07)" const dateMatch = condition.match(/>=:(\d{4}-\d{2}-\d{2})/); if (dateMatch) { const filterDate = new Date(dateMatch[1]); filteredSeekers = filteredSeekers.filter((seeker) => { if (!seeker.dt_create) return false; const seekerDate = new Date(seeker.dt_create); return seekerDate >= filterDate; }); console.log( `šŸ—“ļø Filtered by date >= ${dateMatch[1]}: ${filteredSeekers.length} results` ); } } if (condition.includes("idx.seekstatus_alias(")) { // Extract status from condition like "idx.seekstatus_alias(startasap)" const statusMatch = condition.match(/idx\.seekstatus_alias\(([^)]+)\)/); if (statusMatch) { const status = statusMatch[1]; filteredSeekers = filteredSeekers.filter( (seeker) => seeker.seekstatus === status ); console.log( `šŸ‘¤ Filtered by status ${status}: ${filteredSeekers.length} results` ); } } } // Select only requested fields const results = filteredSeekers.map((seeker) => { const filtered = {}; for (const field of query.fields) { if (seeker.hasOwnProperty(field)) { filtered[field] = seeker[field]; } } return filtered; }); console.log( `āœ… Query executed successfully! Found ${results.length} matching seekers` ); return { data: results, meta: { total: results.length, source: "local_files", path: `${OBJECTS_PATH}/seekers/itm/`, }, }; } catch (error) { console.error("āŒ Local query execution failed:", error.message); return null; } } // ---- Result Processing with jq ---- async function processResults(results, jqFilter = ".") { if (!results || !results.data) { console.log("No results to process."); return null; } try { // Use jq to filter and format results (pass data directly, not as string) const processed = await jq.run(jqFilter, results.data, { input: "json" }); // Return the processed result return processed; } catch (error) { console.error("āŒ jq processing failed:", error.message); return JSON.stringify(results.data, null, 2); // Return formatted JSON if jq fails } } // ---- Run PoC (generate query and optionally execute) ---- (async () => { try { if (!process.env.OPENAI_API_KEY) throw new Error("Missing OPENAI_API_KEY env var."); console.log(`šŸ¤– Processing NL query: "${NL_QUERY}"`); console.log(`šŸŽÆ Target object: ${TEST_OBJECT}`); console.log("=".repeat(60)); // Step 1: Generate ODMDB query from natural language const out = await inferQuery(NL_QUERY, TEST_OBJECT); const validated = validateWithOdmdbSchema(out, TEST_OBJECT); console.log("āœ… Generated ODMDB Query:"); const generatedQuery = { object: validated.object, condition: validated.condition, fields: validated.fields, }; console.log(JSON.stringify(generatedQuery, null, 2)); // Step 2: Execute query if enabled if (EXECUTE_QUERY) { console.log("\n" + "=".repeat(60)); const results = await executeOdmdbQuery(generatedQuery); if (results) { console.log("āœ… Query executed successfully!"); console.log(`šŸ“Š Found ${results.data?.length || 0} results`); // Step 3: Process results with jq console.log("\nšŸ“‹ Results Summary:"); const summary = await processResults( results, `.[0:3] | map({email, salaryexpectation, salarydevise, salaryunit})` ); console.log(JSON.stringify(summary, null, 2)); // Optional: Show full results count if (results.data?.length > 3) { console.log(`\n... and ${results.data.length - 3} more results`); } // Step 4: Export to CSV format console.log("\nšŸ“„ CSV Preview:"); const csvData = await processResults( results, ` map([.email // "N/A", (.salaryexpectation | tostring) // "N/A", .salarydevise // "N/A", .salaryunit // "N/A"]) | ["email","salary","currency","unit"] as $header | [$header] + .[0:5] | .[] | @csv ` ); if (csvData) { console.log(csvData); } } } else { console.log( "\nšŸ’” To execute this query against ODMDB, set EXECUTE_QUERY=true" ); console.log(` Example: EXECUTE_QUERY=true npm start`); } } catch (e) { console.error("āŒ PoC failed:", e.message || e); process.exit(1); } })();