feat: Enhance ODMDB query handling with multi-schema support and intelligent routing

- Updated `poc.js` to support queries for multiple object types (seekers, jobads, recruiters, etc.) with intelligent routing based on natural language input.
- Implemented a query validation mechanism to prevent excessive or sensitive requests.
- Introduced a mapping manager for dynamic schema handling and object detection.
- Enhanced the response schema generation to accommodate various object types and their respective fields.
- Added a new script `verify-mapping.js` to verify and display the mapping details for the seekers schema, including available properties, indexes, access rights, and synonyms.
This commit is contained in:
Eliyan
2025-10-15 13:54:24 +02:00
parent 7bccdb711d
commit 663cf45704
4 changed files with 901 additions and 825 deletions

615
demo.js
View File

@@ -1,15 +1,15 @@
#!/usr/bin/env node
// Demo script that actually uses the PoC functionality to demonstrate real query generation
// Demo script with prepared queries for all ODMDB schemas
// ignore
import fs from "node:fs";
import OpenAI from "openai";
import { ODMDBMappingManager } from "./schema-mappings/mapping-manager.js";
// Import PoC components (we'll need to extract them to make them reusable)
const MODEL = process.env.OPENAI_MODEL || "gpt-5";
const MODEL = process.env.OPENAI_MODEL || "gpt-4o";
const ODMDB_BASE_PATH = "../smatchitObjectOdmdb";
const SCHEMA_PATH = `${ODMDB_BASE_PATH}/schema`;
console.log("🚀 ODMDB NL to Query Demo - Live PoC Testing");
console.log("🚀 ODMDB Multi-Schema NL to Query Demo");
console.log("=".repeat(60));
// Check prerequisites
@@ -19,80 +19,137 @@ if (!process.env.OPENAI_API_KEY) {
process.exit(1);
}
// Load schema (same function as in poc.js)
function loadJsonSafe(path) {
try {
if (fs.existsSync(path)) {
return JSON.parse(fs.readFileSync(path, "utf-8"));
}
} catch (e) {
console.warn(`Warning: Could not load ${path}:`, e.message);
}
return null;
// Initialize mapping manager
const mappingManager = new ODMDBMappingManager();
// Import functions from poc.js (simplified versions for demo)
function validateQuery(query) {
const problematicTerms = [
"all seekers",
"every seeker",
"entire database",
"all jobads",
"every job",
"complete list",
"all recruiters",
"every recruiter",
"full database",
"password",
"private",
"confidential",
"secret",
];
return !problematicTerms.some((term) =>
query.toLowerCase().includes(term.toLowerCase())
);
}
// Load actual ODMDB schemas
const SCHEMAS = {
seekers: loadJsonSafe(`${SCHEMA_PATH}/seekers.json`),
main: loadJsonSafe("./main.json"), // Fallback consolidated schema
};
function detectTargetObject(query) {
const objectKeywords = {
seekers: ["seeker", "candidate", "job seeker", "applicant", "talent"],
jobads: ["job", "position", "vacancy", "opening", "role", "jobad"],
recruiters: ["recruiter", "hr", "hiring manager", "employer"],
persons: ["person", "people", "individual", "user", "profile"],
sirets: ["siret", "company", "business", "organization", "enterprise"],
};
// Simplified SchemaMapper for demo
class DemoSchemaMapper {
constructor(schemas) {
this.seekersSchema = schemas.seekers;
console.log(
`📋 Loaded seekers schema with ${
Object.keys(this.seekersSchema?.properties || {}).length
} properties`
);
const queryLower = query.toLowerCase();
const scores = {};
for (const [object, keywords] of Object.entries(objectKeywords)) {
scores[object] = keywords.filter((keyword) =>
queryLower.includes(keyword)
).length;
}
getRecruiterReadableFields() {
if (!this.seekersSchema?.apxaccessrights?.recruiters?.R) {
return ["alias", "email", "seekstatus", "seekworkingyear"];
}
return this.seekersSchema.apxaccessrights.recruiters.R;
}
const maxScore = Math.max(...Object.values(scores));
if (maxScore === 0) return "seekers"; // Default fallback
getAllSeekersFields() {
if (!this.seekersSchema?.properties) return [];
return Object.keys(this.seekersSchema.properties);
}
return Object.keys(scores).find((key) => scores[key] === maxScore);
}
const schemaMapper = new DemoSchemaMapper(SCHEMAS);
function getObjectMapping(targetObject) {
return mappingManager.getMapping(targetObject);
}
// Sample queries to demonstrate with actual PoC execution
const demoQueries = [
{
nl: "show me seekers with status startasap and their email and experience",
description: "Status-based filtering with field selection",
},
{
nl: "find seekers looking for jobs urgently with salary expectations",
description: "Status synonym mapping + salary field",
},
{
nl: "get seekers with their contact info and personality types",
description: "Multiple field types (contact + MBTI)",
},
];
function getAllObjectFields(targetObject) {
const mapping = getObjectMapping(targetObject);
if (!mapping?.available) return [];
return mapping?.properties ? Object.keys(mapping.properties) : [];
}
console.log("<22> Demo Queries - Testing Live PoC:");
function getReadableFields(targetObject) {
const mapping = getObjectMapping(targetObject);
if (!mapping?.available) return [];
// Try to get readable fields from access rights (for recruiters, seekers, etc.)
const accessRights = mapping.accessRights;
if (accessRights) {
// For seekers, check recruiters.R
if (
accessRights.recruiters?.R &&
Array.isArray(accessRights.recruiters.R)
) {
return accessRights.recruiters.R;
}
// For jobads/recruiters, check seekers.R
if (accessRights.seekers?.R && Array.isArray(accessRights.seekers.R)) {
return accessRights.seekers.R;
}
// For other objects, check owner.R
if (accessRights.owner?.R && Array.isArray(accessRights.owner.R)) {
return accessRights.owner.R;
}
}
// Fallback to all available properties (first 10 for safety)
return mapping?.properties
? Object.keys(mapping.properties).slice(0, 10)
: [];
}
function getObjectFallbackFields(objectName) {
// Object-specific fallback fields when no readable fields are available
const fallbacks = {
seekers: ["alias", "email"],
jobads: ["jobadid", "jobtitle"],
recruiters: ["alias", "email"],
persons: ["alias", "email"],
sirets: ["alias", "name"],
jobsteps: ["alias", "name"],
jobtitles: ["jobtitleid", "name"],
};
return fallbacks[objectName] || ["id", "name"];
}
function buildResponseJsonSchema(targetObject) {
const availableObjects = Array.from(mappingManager.mappings.keys());
const readableFields = getReadableFields(targetObject);
// JSON Schema for query generation (same as poc.js)
function buildResponseJsonSchema() {
const recruiterReadableFields = schemaMapper.getRecruiterReadableFields();
return {
type: "object",
additionalProperties: false,
properties: {
object: { type: "string", enum: ["seekers"] },
condition: { type: "array", items: { type: "string" }, minItems: 1 },
object: {
type: "string",
enum: availableObjects.length > 0 ? availableObjects : ["seekers"],
},
condition: {
type: "array",
items: { type: "string" },
minItems: 1,
},
fields: {
type: "array",
items: { type: "string", enum: recruiterReadableFields },
items: {
type: "string",
enum:
readableFields.length > 0
? readableFields
: getObjectFallbackFields(targetObject),
},
minItems: 1,
},
},
@@ -100,67 +157,186 @@ function buildResponseJsonSchema() {
};
}
// System prompt (simplified version from poc.js)
function systemPrompt() {
const availableFields = schemaMapper.getAllSeekersFields();
const recruiterReadableFields = schemaMapper.getRecruiterReadableFields();
function systemPrompt(targetObject) {
const objectMapping = getObjectMapping(targetObject);
const availableFields = getAllObjectFields(targetObject);
const readableFields = getReadableFields(targetObject);
const availableObjects = Array.from(mappingManager.mappings.keys());
// Get object-specific synonyms from mapping
const synonyms = objectMapping?.synonyms || {};
const synonymList = Object.entries(synonyms)
.slice(0, 10)
.map(([field, syns]) => {
const synArray = Array.isArray(syns) ? syns : [syns];
return `- '${synArray.slice(0, 2).join("', '")}' → ${field}`;
})
.join("\n ");
return [
"You convert a natural language request into an ODMDB search payload.",
"Return ONLY a compact JSON object that matches the provided JSON Schema.",
"",
"ODMDB DSL:",
"- join(remoteObject:localKey:remoteProp:operator:value)",
"- idx.<indexName>(value) - for indexed fields",
"- prop.<field>(operator:value) - for direct property queries",
"",
"Available seekers fields:",
`Available objects: ${availableObjects.join(", ")}`,
`Target object: ${targetObject}`,
"",
`Available ${targetObject} fields:`,
availableFields.slice(0, 15).join(", ") +
(availableFields.length > 15 ? "..." : ""),
"",
"Recruiter-readable fields (use these for field selection):",
recruiterReadableFields.join(", "),
`Readable fields for ${targetObject} (use these for field selection):`,
readableFields.join(", "),
"",
"Field mappings:",
"- 'email', 'contact info' → email",
"- 'experience', 'years of experience' → seekworkingyear",
"- 'status', 'availability' → seekstatus",
"- 'salary', 'pay' → salaryexpectation",
"- 'personality', 'MBTI' → mbti",
"Field mappings for natural language:",
synonymList || "- No specific mappings available",
"",
"Status value mappings:",
"- 'urgent', 'urgently', 'ASAP' → startasap",
"- 'no rush', 'taking time' → norush",
"- 'not looking' → notlooking",
"Date handling:",
"- 'new/recent' → dt_create (use prop.dt_create(>=:YYYY-MM-DD))",
"- 'updated' → dt_update",
"",
"Rules: Object must be 'seekers'. Use idx.seekstatus_alias for status queries.",
"Rules:",
`- Object should be '${targetObject}' unless query clearly indicates another object`,
"- Use indexes when available for better performance",
"- For date filters, use prop.dt_create/dt_update with absolute dates",
"- Only return readable fields in 'fields' array",
`- Default fields if request is generic: ${readableFields
.slice(0, 5)
.join(", ")}`,
"",
"Timezone is Europe/Paris. Today is 2025-10-15.",
"Interpret 'last week' as now minus 7 days → 2025-10-08.",
"Interpret 'yesterday' as → 2025-10-14.",
].join("\n");
}
// OpenAI client and query function
// Prepared demo queries for each schema
const preparedQueries = {
seekers: [
{
nl: "show me seekers with status startasap and their email and experience",
description: "Status-based filtering with field selection",
},
{
nl: "find seekers looking for jobs urgently with salary expectations",
description: "Status synonym mapping + salary field",
},
{
nl: "get seekers with their contact info and personality types",
description: "Multiple field types (contact + MBTI)",
},
{
nl: "show recent seekers who are actively looking for work",
description: "Date filtering + status combination",
},
],
jobads: [
{
nl: "find job postings for software developer positions",
description: "Job title-based search",
},
{
nl: "show recent job ads with salary information",
description: "Date filtering + compensation data",
},
{
nl: "get remote work opportunities published this week",
description: "Remote work filter + recent date range",
},
{
nl: "find full-time positions in Paris with job descriptions",
description: "Location + employment type filtering",
},
],
recruiters: [
{
nl: "show active recruiters with their contact information",
description: "Active status + contact field selection",
},
{
nl: "find recruiters from tech companies",
description: "Industry-based filtering",
},
{
nl: "get recruiters who posted jobs recently",
description: "Activity-based filtering with date range",
},
{
nl: "show recruiter profiles with their specializations",
description: "Profile data + specialization fields",
},
],
persons: [
{
nl: "find persons with complete profiles",
description: "Profile completeness filtering",
},
{
nl: "show recent person registrations",
description: "Registration date filtering",
},
{
nl: "get persons with verified email addresses",
description: "Verification status filtering",
},
{
nl: "find persons who updated their profiles this month",
description: "Update activity filtering",
},
],
sirets: [
{
nl: "show companies in the technology sector",
description: "Industry sector filtering",
},
{
nl: "find companies with more than 100 employees",
description: "Company size filtering",
},
{
nl: "get recently registered companies",
description: "Registration date filtering",
},
{
nl: "show companies located in major French cities",
description: "Geographic location filtering",
},
],
};
// OpenAI client
const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
async function generateQuery(nlText) {
async function generateQuery(nlText, targetObject) {
try {
const resp = await client.responses.create({
const resp = await client.chat.completions.create({
model: MODEL,
input: [
{ role: "system", content: systemPrompt() },
messages: [
{ role: "system", content: systemPrompt(targetObject) },
{
role: "user",
content: `Natural language request: "${nlText}"\nReturn ONLY the JSON object.`,
},
],
text: {
format: {
response_format: {
type: "json_schema",
json_schema: {
name: "OdmdbQuery",
type: "json_schema",
schema: buildResponseJsonSchema(),
schema: buildResponseJsonSchema(targetObject),
strict: true,
},
},
});
const jsonText = resp.output_text || resp.output?.[0]?.content?.[0]?.text;
const jsonText = resp.choices[0].message.content;
return JSON.parse(jsonText);
} catch (error) {
console.error(`❌ Query generation failed: ${error.message}`);
@@ -168,181 +344,152 @@ async function generateQuery(nlText) {
}
}
// Simple query execution (simplified from poc.js)
function loadSeekersData() {
const seekersItemsPath = `${ODMDB_BASE_PATH}/objects/seekers/itm`;
try {
const files = fs
.readdirSync(seekersItemsPath)
.filter((file) => file.endsWith(".json") && file !== "backup")
.slice(0, 10); // Just 10 files for demo speed
// Check data availability for each object type
function checkDataAvailability() {
console.log("\n📊 ODMDB Data Availability Check:");
const seekers = [];
for (const file of files) {
try {
const filePath = `${seekersItemsPath}/${file}`;
const data = JSON.parse(fs.readFileSync(filePath, "utf-8"));
seekers.push(data);
} catch (error) {
// Skip invalid files
const objectTypes = ["seekers", "jobads", "recruiters", "persons", "sirets"];
const availability = {};
for (const objectType of objectTypes) {
const itemsPath = `${ODMDB_BASE_PATH}/objects/${objectType}/itm`;
try {
if (fs.existsSync(itemsPath)) {
const files = fs
.readdirSync(itemsPath)
.filter((f) => f.endsWith(".json") && f !== "backup");
availability[objectType] = files.length;
console.log(`${objectType}: ${files.length} records`);
} else {
availability[objectType] = 0;
console.log(`${objectType}: No data directory found`);
}
} catch (error) {
availability[objectType] = 0;
console.log(`${objectType}: Error accessing data (${error.message})`);
}
return seekers;
} catch (error) {
return [];
}
return availability;
}
async function executeQuery(query) {
const allSeekers = loadSeekersData();
if (allSeekers.length === 0) return { data: [] };
// Check schema mappings availability
function checkMappingAvailability() {
console.log("\n🔧 Schema Mappings Availability:");
let filteredSeekers = allSeekers;
const availableObjects = Array.from(mappingManager.mappings.keys());
console.log(`✅ Loaded mappings for: ${availableObjects.join(", ")}`);
// Simple filtering
for (const condition of query.condition) {
if (condition.includes("idx.seekstatus_alias(startasap)")) {
filteredSeekers = filteredSeekers.filter(
(seeker) => seeker.seekstatus === "startasap"
);
}
if (condition.includes("prop.salaryexpectation(exists:true)")) {
filteredSeekers = filteredSeekers.filter(
(seeker) => seeker.salaryexpectation
);
}
if (condition.includes("prop.email(exists:true)")) {
filteredSeekers = filteredSeekers.filter((seeker) => seeker.email);
}
if (condition.includes("prop.mbti(exists:true)")) {
filteredSeekers = filteredSeekers.filter((seeker) => seeker.mbti);
}
for (const objectType of availableObjects) {
const mapping = mappingManager.getMapping(objectType);
const fieldCount = getAllObjectFields(objectType).length;
const readableCount = getReadableFields(objectType).length;
console.log(
` - ${objectType}: ${fieldCount} fields (${readableCount} readable)`
);
}
// Select only requested fields
const results = filteredSeekers.map((seeker) => {
const filtered = {};
for (const field of query.fields) {
if (seeker.hasOwnProperty(field)) {
filtered[field] = seeker[field];
}
}
return filtered;
});
return { data: results };
}
// Main demo execution
async function runDemo() {
const executeQueries = process.env.EXECUTE_DEMO === "true";
for (let i = 0; i < demoQueries.length; i++) {
const query = demoQueries[i];
console.log(`\n${i + 1}. "${query.nl}"`);
console.log(` Purpose: ${query.description}`);
// Check system status
checkMappingAvailability();
const dataAvailability = checkDataAvailability();
console.log(" 🤖 Generating query...");
const generatedQuery = await generateQuery(query.nl);
console.log("\n🚀 Running Multi-Schema Query Generation Demo...");
if (generatedQuery) {
console.log(" ✅ Generated ODMDB Query:");
for (const [objectType, queries] of Object.entries(preparedQueries)) {
console.log(
`\n${"=".repeat(20)} ${objectType.toUpperCase()} QUERIES ${"=".repeat(
20
)}`
);
if (dataAvailability[objectType] === 0) {
console.log(
` ${JSON.stringify(generatedQuery, null, 6).replace(/\n/g, "\n ")}`
`⚠️ No data available for ${objectType} - showing query generation only`
);
if (executeQueries) {
console.log(" 🔍 Executing query...");
const results = await executeQuery(generatedQuery);
console.log(` 📊 Found ${results.data.length} results`);
if (results.data.length > 0) {
console.log(" 📋 Sample result:");
console.log(
` ${JSON.stringify(results.data[0], null, 6).replace(
/\n/g,
"\n "
)}`
);
}
}
} else {
console.log(" ❌ Failed to generate query");
}
if (i < demoQueries.length - 1) {
console.log(" " + "-".repeat(50));
for (let i = 0; i < queries.length; i++) {
const query = queries[i];
console.log(`\n${i + 1}. "${query.nl}"`);
console.log(` Purpose: ${query.description}`);
// Validate query first
if (!validateQuery(query.nl)) {
console.log(" ❌ Query rejected: Contains problematic terms");
continue;
}
// Detect target object (should match our intended object)
const detectedObject = detectTargetObject(query.nl);
console.log(` 🎯 Detected target object: ${detectedObject}`);
if (detectedObject !== objectType) {
console.log(
` ⚠️ Note: Auto-detection suggests '${detectedObject}' but testing with '${objectType}'`
);
}
console.log(" 🤖 Generating query...");
const generatedQuery = await generateQuery(query.nl, objectType);
if (generatedQuery) {
console.log(" ✅ Generated ODMDB Query:");
console.log(
` ${JSON.stringify(generatedQuery, null, 6).replace(
/\n/g,
"\n "
)}`
);
// Show what mapping was used
const mapping = getObjectMapping(objectType);
if (mapping) {
console.log(
` 📋 Available fields: ${mapping.availableFields?.length || 0}`
);
console.log(
` 👁️ Readable fields: ${mapping.readableFields?.length || 0}`
);
}
if (executeQueries && dataAvailability[objectType] > 0) {
console.log(
" 🔍 Query execution would run here with actual ODMDB data..."
);
console.log(
` 💾 Target: ${dataAvailability[objectType]} ${objectType} records`
);
}
} else {
console.log(" ❌ Failed to generate query");
}
if (i < queries.length - 1) {
console.log(" " + "-".repeat(50));
}
}
}
if (!executeQueries) {
console.log(`\n💡 To execute queries and see results, run:`);
console.log(`\n💡 To enable query execution simulation, run:`);
console.log(` EXECUTE_DEMO=true node demo.js`);
}
}
console.log("\n📊 ODMDB Status Check:");
// Check if ODMDB data is accessible
const seekersPath = "../smatchitObjectOdmdb/objects/seekers/itm";
try {
if (fs.existsSync(seekersPath)) {
const files = fs
.readdirSync(seekersPath)
.filter((f) => f.endsWith(".json") && f !== "backup");
console.log(`✅ Found ${files.length} seeker files in ${seekersPath}`);
// Sample a few files to show data types
const sampleFile = files[0];
const sampleData = JSON.parse(
fs.readFileSync(`${seekersPath}/${sampleFile}`, "utf-8")
);
console.log(`📄 Sample seeker data (${sampleFile}):`);
console.log(` - alias: ${sampleData.alias}`);
console.log(` - email: ${sampleData.email}`);
console.log(` - seekstatus: ${sampleData.seekstatus}`);
console.log(` - seekworkingyear: ${sampleData.seekworkingyear}`);
console.log(` - dt_create: ${sampleData.dt_create}`);
} else {
console.log(`❌ ODMDB data not found at ${seekersPath}`);
}
} catch (error) {
console.log(`❌ Error accessing ODMDB data: ${error.message}`);
}
const schemaPath = "../smatchitObjectOdmdb/schema/seekers.json";
try {
if (fs.existsSync(schemaPath)) {
const schema = JSON.parse(fs.readFileSync(schemaPath, "utf-8"));
const fieldCount = Object.keys(schema.properties || {}).length;
console.log(`✅ Loaded seekers schema with ${fieldCount} properties`);
// Show access rights info
if (schema.apxaccessrights?.recruiters?.R) {
console.log(
`📋 Recruiter-readable fields: ${schema.apxaccessrights.recruiters.R.slice(
0,
5
).join(", ")}... (${schema.apxaccessrights.recruiters.R.length} total)`
);
}
// Show available indexes
if (schema.apxidx) {
const indexes = schema.apxidx.map((idx) => idx.name);
console.log(`🔍 Available indexes: ${indexes.join(", ")}`);
}
} else {
console.log(`❌ Schema not found at ${schemaPath}`);
}
} catch (error) {
console.log(`❌ Error loading schema: ${error.message}`);
}
console.log("\n🚀 Running Live PoC Demo...");
console.log("\n📈 Multi-Schema PoC Demo Starting...");
runDemo()
.then(() => {
console.log("\n✅ Demo complete!");
console.log("\n✅ Multi-schema demo complete!");
console.log("\n🎯 Summary:");
console.log("- Demonstrated query generation for all ODMDB object types");
console.log("- Validated query safety and object detection");
console.log("- Showed dynamic schema mapping usage");
console.log("- Prepared queries showcase different use cases per schema");
})
.catch((error) => {
console.error("\n❌ Demo failed:", error.message);