Files
Poc-dashboard/poc.js
Eliyan 663cf45704 feat: Enhance ODMDB query handling with multi-schema support and intelligent routing
- Updated `poc.js` to support queries for multiple object types (seekers, jobads, recruiters, etc.) with intelligent routing based on natural language input.
- Implemented a query validation mechanism to prevent excessive or sensitive requests.
- Introduced a mapping manager for dynamic schema handling and object detection.
- Enhanced the response schema generation to accommodate various object types and their respective fields.
- Added a new script `verify-mapping.js` to verify and display the mapping details for the seekers schema, including available properties, indexes, access rights, and synonyms.
2025-10-15 13:54:24 +02:00

640 lines
20 KiB
JavaScript
Raw Blame History

// PoC: NL → ODMDB query (ALL OBJECTS) - Multi-schema support with intelligent routing
// Usage:
// 1) export OPENAI_API_KEY=sk-...
// 2) node poc.js
import fs from "node:fs";
import OpenAI from "openai";
import axios from "axios";
import jq from "node-jq";
import { ODMDBMappingManager } from "./schema-mappings/mapping-manager.js";
// ---- Config ----
const MODEL = process.env.OPENAI_MODEL || "gpt-5";
// ODMDB paths - point to actual ODMDB structure
const ODMDB_BASE_PATH = "../smatchitObjectOdmdb";
const SCHEMA_PATH = `${ODMDB_BASE_PATH}/schema`;
const OBJECTS_PATH = `${ODMDB_BASE_PATH}/objects`;
// ODMDB execution config
const ODMDB_BASE_URL = process.env.ODMDB_BASE_URL || "http://localhost:3000";
const ODMDB_TRIBE = process.env.ODMDB_TRIBE || "smatchit";
const EXECUTE_QUERY = process.env.EXECUTE_QUERY === "true"; // Set to "true" to execute queries
// Test queries for different objects
const TEST_QUERIES = {
seekers:
"find seekers looking for jobs urgently with their contact info and salary expectations",
jobads: "show me recent job postings with salary range and requirements",
recruiters: "get active recruiters with their contact information",
persons: "find people with their basic profile information",
sirets: "show me companies with their business information",
};
// Hardcoded NL query for the PoC (no multi-turn) - can be overridden by TEST_OBJECT env var
const TEST_OBJECT = process.env.TEST_OBJECT || "seekers";
const NL_QUERY = TEST_QUERIES[TEST_OBJECT] || TEST_QUERIES.seekers;
// ---- Initialize Mapping Manager ----
console.log("🚀 Initializing ODMDB Multi-Schema PoC");
console.log("=".repeat(60));
const mappingManager = new ODMDBMappingManager();
// Query validation - detect outrageous requests
function validateQuery(nlQuery) {
const query = nlQuery.toLowerCase();
// Check for reasonable data limits
const excessiveKeywords = [
"all users",
"all people",
"everyone",
"entire database",
"complete list",
"every",
"dump",
"export everything",
"all data",
"full database",
"everything",
];
const hasExcessiveRequest = excessiveKeywords.some((keyword) =>
query.includes(keyword)
);
if (hasExcessiveRequest) {
return {
valid: false,
reason: "Query requests excessive data - please be more specific",
suggestion:
"Try requesting specific criteria or a limited number of results",
};
}
// Check for sensitive/inappropriate requests
const sensitiveKeywords = [
"password",
"private",
"confidential",
"secret",
"admin",
"delete",
"remove",
"drop",
"destroy",
"hack",
];
const hasSensitiveRequest = sensitiveKeywords.some((keyword) =>
query.includes(keyword)
);
if (hasSensitiveRequest) {
return {
valid: false,
reason: "Query contains inappropriate or sensitive terms",
suggestion:
"Please rephrase your request with appropriate business terms",
};
}
return { valid: true };
}
// ---- Multi-Object Query Processing ----
function detectTargetObject(nlQuery) {
console.log(`🔍 Analyzing query: "${nlQuery}"`);
// Use mapping manager to detect target object
const detectedObjects = mappingManager.detectObjectFromQuery(nlQuery);
if (detectedObjects.length === 0) {
return {
object: "seekers", // Default fallback
confidence: 0.1,
reason: "No specific object detected, defaulting to seekers",
};
}
// Sort by confidence and return the best match
detectedObjects.sort((a, b) => b.confidence - a.confidence);
const bestMatch = detectedObjects[0];
console.log(
`🎯 Detected object: ${bestMatch.object} (confidence: ${bestMatch.confidence})`
);
console.log(` Reason: ${bestMatch.reason}`);
// Check if data is available for this object
const availability = mappingManager.dataAvailability.get(bestMatch.object);
if (!availability?.dataAvailable) {
console.log(
`⚠️ No data available for ${bestMatch.object}, checking alternatives...`
);
// Find alternative with available data
const alternativeWithData = detectedObjects.find((detection) => {
const alt = mappingManager.dataAvailability.get(detection.object);
return alt?.dataAvailable;
});
if (alternativeWithData) {
console.log(`✅ Using alternative: ${alternativeWithData.object}`);
return alternativeWithData;
} else {
return {
object: bestMatch.object,
confidence: bestMatch.confidence,
reason: bestMatch.reason,
dataUnavailable: true,
};
}
}
return bestMatch;
}
// ---- Dynamic Query Schema Generation ----
function getObjectMapping(objectName) {
return mappingManager.mappings.get(objectName);
}
function getReadableFields(objectName) {
const mapping = getObjectMapping(objectName);
if (!mapping?.available) return [];
// Try to get readable fields from access rights (for recruiters, seekers, etc.)
const accessRights = mapping.accessRights;
if (accessRights) {
// For seekers, check recruiters.R
if (
accessRights.recruiters?.R &&
Array.isArray(accessRights.recruiters.R)
) {
return accessRights.recruiters.R;
}
// For jobads/recruiters, check seekers.R
if (accessRights.seekers?.R && Array.isArray(accessRights.seekers.R)) {
return accessRights.seekers.R;
}
// For other objects, check owner.R
if (accessRights.owner?.R && Array.isArray(accessRights.owner.R)) {
return accessRights.owner.R;
}
}
// Fallback to all available properties (first 10 for safety)
return mapping?.properties
? Object.keys(mapping.properties).slice(0, 10)
: [];
}
function getAllObjectFields(objectName) {
const mapping = getObjectMapping(objectName);
if (!mapping?.available) return [];
return mapping?.properties ? Object.keys(mapping.properties) : [];
}
function getObjectFallbackFields(objectName) {
// Object-specific fallback fields when no readable fields are available
const fallbacks = {
seekers: ["alias", "email"],
jobads: ["jobadid", "jobtitle"],
recruiters: ["alias", "email"],
persons: ["alias", "email"],
sirets: ["alias", "name"],
jobsteps: ["alias", "name"],
jobtitles: ["jobtitleid", "name"],
};
return fallbacks[objectName] || ["id", "name"];
}
// ---- JSON Schema for Structured Outputs (no zod, no oneOf) ----
function buildResponseJsonSchema(targetObject) {
const availableObjects = Array.from(mappingManager.mappings.keys());
const readableFields = getReadableFields(targetObject);
return {
type: "object",
additionalProperties: false,
properties: {
object: {
type: "string",
enum: availableObjects.length > 0 ? availableObjects : ["seekers"],
},
condition: { type: "array", items: { type: "string" }, minItems: 1 },
fields: {
type: "array",
items: {
type: "string",
enum:
readableFields.length > 0
? readableFields
: getObjectFallbackFields(targetObject),
},
minItems: 1,
},
},
required: ["object", "condition", "fields"],
};
}
// ---- Prompt builders ----
function systemPrompt(targetObject) {
const objectMapping = getObjectMapping(targetObject);
const availableFields = getAllObjectFields(targetObject);
const readableFields = getReadableFields(targetObject);
const availableObjects = Array.from(mappingManager.mappings.keys());
// Get object-specific synonyms from mapping
const synonyms = objectMapping?.synonyms || {};
const synonymList = Object.entries(synonyms)
.slice(0, 10)
.map(([field, syns]) => {
const synArray = Array.isArray(syns) ? syns : [syns];
return `- '${synArray.slice(0, 2).join("', '")}' → ${field}`;
})
.join("\n ");
return [
"You convert a natural language request into an ODMDB search payload.",
"Return ONLY a compact JSON object that matches the provided JSON Schema.",
"",
"ODMDB DSL:",
"- join(remoteObject:localKey:remoteProp:operator:value)",
"- idx.<indexName>(value) - for indexed fields",
"- prop.<field>(operator:value) - for direct property queries",
"",
`Available objects: ${availableObjects.join(", ")}`,
`Target object: ${targetObject}`,
"",
`Available ${targetObject} fields:`,
availableFields.slice(0, 15).join(", ") +
(availableFields.length > 15 ? "..." : ""),
"",
`Readable fields for ${targetObject} (use these for field selection):`,
readableFields.join(", "),
"",
"Field mappings for natural language:",
synonymList || "- No specific mappings available",
"",
"Date handling:",
"- 'new/recent' → dt_create (use prop.dt_create(>=:YYYY-MM-DD))",
"- 'updated' → dt_update",
"",
"Rules:",
`- Object should be '${targetObject}' unless query clearly indicates another object`,
"- Use indexes when available for better performance",
"- For date filters, use prop.dt_create/dt_update with absolute dates",
"- Only return readable fields in 'fields' array",
`- Default fields if request is generic: ${readableFields
.slice(0, 5)
.join(", ")}`,
"",
"Timezone is Europe/Paris. Today is 2025-10-15.",
"Interpret 'last week' as now minus 7 days → 2025-10-08.",
"Interpret 'yesterday' as → 2025-10-14.",
].join("\n");
}
function userPrompt(nl) {
return `Natural language request: "${nl}"\nReturn ONLY the JSON object.`;
}
// ---- OpenAI call using Responses API (text.format) ----
const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
async function inferQuery(nlText, targetObject) {
const resp = await client.responses.create({
model: MODEL,
input: [
{ role: "system", content: systemPrompt(targetObject) },
{ role: "user", content: userPrompt(nlText) },
],
text: {
format: {
name: "OdmdbQuery",
type: "json_schema",
schema: buildResponseJsonSchema(targetObject),
strict: true,
},
},
});
const jsonText =
resp.output_text ||
resp.output?.[0]?.content?.[0]?.text ||
(() => {
throw new Error("Empty model output");
})();
const parsed = JSON.parse(jsonText);
return parsed;
}
// ---- Validate using the ODMDB schema (not zod) ----
function validateWithOdmdbSchema(candidate, targetObject) {
// Basic shape checks (already enforced by Structured Outputs, but keep defensive)
if (!candidate || typeof candidate !== "object")
throw new Error("Invalid response (not an object).");
const availableObjects = Array.from(mappingManager.mappings.keys());
if (!availableObjects.includes(candidate.object)) {
throw new Error(
`Invalid object '${
candidate.object
}'; must be one of: ${availableObjects.join(", ")}`
);
}
if (!Array.isArray(candidate.condition) || candidate.condition.length === 0) {
throw new Error(
"Invalid 'condition'; must be a non-empty array of strings."
);
}
if (!Array.isArray(candidate.fields) || candidate.fields.length === 0) {
throw new Error("Invalid 'fields'; must be a non-empty array of strings.");
}
// Validate fields against schema for the specific object
const availableFields = getAllObjectFields(candidate.object);
const readableFields = getReadableFields(candidate.object);
for (const field of candidate.fields) {
if (!availableFields.includes(field)) {
throw new Error(
`Invalid field '${field}'; not found in ${candidate.object} schema.`
);
}
if (!readableFields.includes(field)) {
console.warn(
`Warning: Field '${field}' may not be readable for ${candidate.object}.`
);
}
}
// DSL token sanity
const allowedTokens = ["join(", "idx.", "prop."];
for (const c of candidate.condition) {
if (typeof c !== "string")
throw new Error("Condition entries must be strings.");
const tokenOK = allowedTokens.some((t) => c.includes(t));
const ascii = /^[\x09\x0A\x0D\x20-\x7E()_:\[\].,=><!'"-]+$/.test(c);
if (!tokenOK || !ascii) throw new Error(`Malformed condition: ${c}`);
}
// Additional field validation and cleanup
const objectAvailableFields = getAllObjectFields(candidate.object);
if (objectAvailableFields.length) {
const unknown = candidate.fields.filter(
(f) => !objectAvailableFields.includes(f)
);
if (unknown.length) {
// Drop unknown but continue (PoC behavior)
console.warn(
`⚠️ Dropping unknown fields (not in ${candidate.object} schema):`,
unknown
);
candidate.fields = candidate.fields.filter((f) =>
objectAvailableFields.includes(f)
);
if (!candidate.fields.length) {
// If all dropped, fallback to object-specific default fields
const fallback = getObjectFallbackFields(candidate.object);
candidate.fields = fallback;
console.warn(
`🔄 Using fallback fields for ${candidate.object}:`,
fallback
);
}
}
} else {
// If we can't read the schema, at least ensure strings & dedupe
candidate.fields = [
...new Set(
candidate.fields.filter((f) => typeof f === "string" && f.trim())
),
];
}
return candidate;
}
// ---- Local ODMDB Data Access ----
function loadSeekersData() {
const seekersItemsPath = `${OBJECTS_PATH}/seekers/itm`;
try {
if (!fs.existsSync(seekersItemsPath)) {
console.error(`❌ Seekers data directory not found: ${seekersItemsPath}`);
return [];
}
const files = fs
.readdirSync(seekersItemsPath)
.filter((file) => file.endsWith(".json") && file !== "backup")
.slice(0, 50); // Limit to first 50 files for PoC performance
console.log(
`📁 Loading ${files.length} seeker files from ${seekersItemsPath}`
);
const seekers = [];
for (const file of files) {
try {
const filePath = `${seekersItemsPath}/${file}`;
const data = JSON.parse(fs.readFileSync(filePath, "utf-8"));
seekers.push(data);
} catch (error) {
console.warn(`⚠️ Could not load ${file}:`, error.message);
}
}
return seekers;
} catch (error) {
console.error("❌ Error loading seekers data:", error.message);
return [];
}
}
// ---- Local ODMDB Query Execution ----
async function executeOdmdbQuery(query) {
if (!EXECUTE_QUERY) {
console.log(
"💡 Query execution disabled. Set EXECUTE_QUERY=true to enable."
);
return null;
}
try {
console.log(
`\n🔍 Executing query against local ODMDB data: ${OBJECTS_PATH}/seekers/`
);
console.log("Query conditions:", query.condition);
console.log("Requested fields:", query.fields);
// Load all seekers data
const allSeekers = loadSeekersData();
if (allSeekers.length === 0) {
console.log("❌ No seekers data found");
return { data: [] };
}
console.log(`<EFBFBD> Loaded ${allSeekers.length} seekers for filtering`);
// Apply basic filtering (simplified DSL processing)
let filteredSeekers = allSeekers;
for (const condition of query.condition) {
if (condition.includes("prop.dt_create(>=:")) {
// Extract date from condition like "prop.dt_create(>=:2025-10-07)"
const dateMatch = condition.match(/>=:(\d{4}-\d{2}-\d{2})/);
if (dateMatch) {
const filterDate = new Date(dateMatch[1]);
filteredSeekers = filteredSeekers.filter((seeker) => {
if (!seeker.dt_create) return false;
const seekerDate = new Date(seeker.dt_create);
return seekerDate >= filterDate;
});
console.log(
`🗓️ Filtered by date >= ${dateMatch[1]}: ${filteredSeekers.length} results`
);
}
}
if (condition.includes("idx.seekstatus_alias(")) {
// Extract status from condition like "idx.seekstatus_alias(startasap)"
const statusMatch = condition.match(/idx\.seekstatus_alias\(([^)]+)\)/);
if (statusMatch) {
const status = statusMatch[1];
filteredSeekers = filteredSeekers.filter(
(seeker) => seeker.seekstatus === status
);
console.log(
`👤 Filtered by status ${status}: ${filteredSeekers.length} results`
);
}
}
}
// Select only requested fields
const results = filteredSeekers.map((seeker) => {
const filtered = {};
for (const field of query.fields) {
if (seeker.hasOwnProperty(field)) {
filtered[field] = seeker[field];
}
}
return filtered;
});
console.log(
`✅ Query executed successfully! Found ${results.length} matching seekers`
);
return {
data: results,
meta: {
total: results.length,
source: "local_files",
path: `${OBJECTS_PATH}/seekers/itm/`,
},
};
} catch (error) {
console.error("❌ Local query execution failed:", error.message);
return null;
}
}
// ---- Result Processing with jq ----
async function processResults(results, jqFilter = ".") {
if (!results || !results.data) {
console.log("No results to process.");
return null;
}
try {
// Use jq to filter and format results (pass data directly, not as string)
const processed = await jq.run(jqFilter, results.data, { input: "json" });
// Return the processed result
return processed;
} catch (error) {
console.error("❌ jq processing failed:", error.message);
return JSON.stringify(results.data, null, 2); // Return formatted JSON if jq fails
}
} // ---- Run PoC (generate query and optionally execute) ----
(async () => {
try {
if (!process.env.OPENAI_API_KEY)
throw new Error("Missing OPENAI_API_KEY env var.");
console.log(`🤖 Processing NL query: "${NL_QUERY}"`);
console.log(`🎯 Target object: ${TEST_OBJECT}`);
console.log("=".repeat(60));
// Step 1: Generate ODMDB query from natural language
const out = await inferQuery(NL_QUERY, TEST_OBJECT);
const validated = validateWithOdmdbSchema(out, TEST_OBJECT);
console.log("✅ Generated ODMDB Query:");
const generatedQuery = {
object: validated.object,
condition: validated.condition,
fields: validated.fields,
};
console.log(JSON.stringify(generatedQuery, null, 2));
// Step 2: Execute query if enabled
if (EXECUTE_QUERY) {
console.log("\n" + "=".repeat(60));
const results = await executeOdmdbQuery(generatedQuery);
if (results) {
console.log("✅ Query executed successfully!");
console.log(`📊 Found ${results.data?.length || 0} results`);
// Step 3: Process results with jq
console.log("\n📋 Results Summary:");
const summary = await processResults(
results,
`.[0:3] | map({email, salaryexpectation, salarydevise, salaryunit})`
);
console.log(JSON.stringify(summary, null, 2));
// Optional: Show full results count
if (results.data?.length > 3) {
console.log(`\n... and ${results.data.length - 3} more results`);
}
// Step 4: Export to CSV format
console.log("\n📄 CSV Preview:");
const csvData = await processResults(
results,
`
map([.email // "N/A", (.salaryexpectation | tostring) // "N/A", .salarydevise // "N/A", .salaryunit // "N/A"]) |
["email","salary","currency","unit"] as $header |
[$header] + .[0:5] |
.[] | @csv
`
);
if (csvData) {
console.log(csvData);
}
}
} else {
console.log(
"\n💡 To execute this query against ODMDB, set EXECUTE_QUERY=true"
);
console.log(` Example: EXECUTE_QUERY=true npm start`);
}
} catch (e) {
console.error("❌ PoC failed:", e.message || e);
process.exit(1);
}
})();