- Updated `poc.js` to support queries for multiple object types (seekers, jobads, recruiters, etc.) with intelligent routing based on natural language input. - Implemented a query validation mechanism to prevent excessive or sensitive requests. - Introduced a mapping manager for dynamic schema handling and object detection. - Enhanced the response schema generation to accommodate various object types and their respective fields. - Added a new script `verify-mapping.js` to verify and display the mapping details for the seekers schema, including available properties, indexes, access rights, and synonyms.
498 lines
15 KiB
JavaScript
498 lines
15 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
// Demo script with prepared queries for all ODMDB schemas
|
|
// ignore
|
|
import fs from "node:fs";
|
|
import OpenAI from "openai";
|
|
import { ODMDBMappingManager } from "./schema-mappings/mapping-manager.js";
|
|
|
|
const MODEL = process.env.OPENAI_MODEL || "gpt-4o";
|
|
const ODMDB_BASE_PATH = "../smatchitObjectOdmdb";
|
|
|
|
console.log("🚀 ODMDB Multi-Schema NL to Query Demo");
|
|
console.log("=".repeat(60));
|
|
|
|
// Check prerequisites
|
|
if (!process.env.OPENAI_API_KEY) {
|
|
console.log("❌ Missing OPENAI_API_KEY environment variable");
|
|
console.log(" Set it with: export OPENAI_API_KEY=sk-your-api-key");
|
|
process.exit(1);
|
|
}
|
|
|
|
// Initialize mapping manager
|
|
const mappingManager = new ODMDBMappingManager();
|
|
|
|
// Import functions from poc.js (simplified versions for demo)
|
|
function validateQuery(query) {
|
|
const problematicTerms = [
|
|
"all seekers",
|
|
"every seeker",
|
|
"entire database",
|
|
"all jobads",
|
|
"every job",
|
|
"complete list",
|
|
"all recruiters",
|
|
"every recruiter",
|
|
"full database",
|
|
"password",
|
|
"private",
|
|
"confidential",
|
|
"secret",
|
|
];
|
|
|
|
return !problematicTerms.some((term) =>
|
|
query.toLowerCase().includes(term.toLowerCase())
|
|
);
|
|
}
|
|
|
|
function detectTargetObject(query) {
|
|
const objectKeywords = {
|
|
seekers: ["seeker", "candidate", "job seeker", "applicant", "talent"],
|
|
jobads: ["job", "position", "vacancy", "opening", "role", "jobad"],
|
|
recruiters: ["recruiter", "hr", "hiring manager", "employer"],
|
|
persons: ["person", "people", "individual", "user", "profile"],
|
|
sirets: ["siret", "company", "business", "organization", "enterprise"],
|
|
};
|
|
|
|
const queryLower = query.toLowerCase();
|
|
const scores = {};
|
|
|
|
for (const [object, keywords] of Object.entries(objectKeywords)) {
|
|
scores[object] = keywords.filter((keyword) =>
|
|
queryLower.includes(keyword)
|
|
).length;
|
|
}
|
|
|
|
const maxScore = Math.max(...Object.values(scores));
|
|
if (maxScore === 0) return "seekers"; // Default fallback
|
|
|
|
return Object.keys(scores).find((key) => scores[key] === maxScore);
|
|
}
|
|
|
|
function getObjectMapping(targetObject) {
|
|
return mappingManager.getMapping(targetObject);
|
|
}
|
|
|
|
function getAllObjectFields(targetObject) {
|
|
const mapping = getObjectMapping(targetObject);
|
|
if (!mapping?.available) return [];
|
|
return mapping?.properties ? Object.keys(mapping.properties) : [];
|
|
}
|
|
|
|
function getReadableFields(targetObject) {
|
|
const mapping = getObjectMapping(targetObject);
|
|
if (!mapping?.available) return [];
|
|
|
|
// Try to get readable fields from access rights (for recruiters, seekers, etc.)
|
|
const accessRights = mapping.accessRights;
|
|
if (accessRights) {
|
|
// For seekers, check recruiters.R
|
|
if (
|
|
accessRights.recruiters?.R &&
|
|
Array.isArray(accessRights.recruiters.R)
|
|
) {
|
|
return accessRights.recruiters.R;
|
|
}
|
|
// For jobads/recruiters, check seekers.R
|
|
if (accessRights.seekers?.R && Array.isArray(accessRights.seekers.R)) {
|
|
return accessRights.seekers.R;
|
|
}
|
|
// For other objects, check owner.R
|
|
if (accessRights.owner?.R && Array.isArray(accessRights.owner.R)) {
|
|
return accessRights.owner.R;
|
|
}
|
|
}
|
|
|
|
// Fallback to all available properties (first 10 for safety)
|
|
return mapping?.properties
|
|
? Object.keys(mapping.properties).slice(0, 10)
|
|
: [];
|
|
}
|
|
|
|
function getObjectFallbackFields(objectName) {
|
|
// Object-specific fallback fields when no readable fields are available
|
|
const fallbacks = {
|
|
seekers: ["alias", "email"],
|
|
jobads: ["jobadid", "jobtitle"],
|
|
recruiters: ["alias", "email"],
|
|
persons: ["alias", "email"],
|
|
sirets: ["alias", "name"],
|
|
jobsteps: ["alias", "name"],
|
|
jobtitles: ["jobtitleid", "name"],
|
|
};
|
|
|
|
return fallbacks[objectName] || ["id", "name"];
|
|
}
|
|
|
|
function buildResponseJsonSchema(targetObject) {
|
|
const availableObjects = Array.from(mappingManager.mappings.keys());
|
|
const readableFields = getReadableFields(targetObject);
|
|
|
|
return {
|
|
type: "object",
|
|
additionalProperties: false,
|
|
properties: {
|
|
object: {
|
|
type: "string",
|
|
enum: availableObjects.length > 0 ? availableObjects : ["seekers"],
|
|
},
|
|
condition: {
|
|
type: "array",
|
|
items: { type: "string" },
|
|
minItems: 1,
|
|
},
|
|
fields: {
|
|
type: "array",
|
|
items: {
|
|
type: "string",
|
|
enum:
|
|
readableFields.length > 0
|
|
? readableFields
|
|
: getObjectFallbackFields(targetObject),
|
|
},
|
|
minItems: 1,
|
|
},
|
|
},
|
|
required: ["object", "condition", "fields"],
|
|
};
|
|
}
|
|
|
|
function systemPrompt(targetObject) {
|
|
const objectMapping = getObjectMapping(targetObject);
|
|
const availableFields = getAllObjectFields(targetObject);
|
|
const readableFields = getReadableFields(targetObject);
|
|
const availableObjects = Array.from(mappingManager.mappings.keys());
|
|
|
|
// Get object-specific synonyms from mapping
|
|
const synonyms = objectMapping?.synonyms || {};
|
|
const synonymList = Object.entries(synonyms)
|
|
.slice(0, 10)
|
|
.map(([field, syns]) => {
|
|
const synArray = Array.isArray(syns) ? syns : [syns];
|
|
return `- '${synArray.slice(0, 2).join("', '")}' → ${field}`;
|
|
})
|
|
.join("\n ");
|
|
|
|
return [
|
|
"You convert a natural language request into an ODMDB search payload.",
|
|
"Return ONLY a compact JSON object that matches the provided JSON Schema.",
|
|
"",
|
|
"ODMDB DSL:",
|
|
"- join(remoteObject:localKey:remoteProp:operator:value)",
|
|
"- idx.<indexName>(value) - for indexed fields",
|
|
"- prop.<field>(operator:value) - for direct property queries",
|
|
"",
|
|
`Available objects: ${availableObjects.join(", ")}`,
|
|
`Target object: ${targetObject}`,
|
|
"",
|
|
`Available ${targetObject} fields:`,
|
|
availableFields.slice(0, 15).join(", ") +
|
|
(availableFields.length > 15 ? "..." : ""),
|
|
"",
|
|
`Readable fields for ${targetObject} (use these for field selection):`,
|
|
readableFields.join(", "),
|
|
"",
|
|
"Field mappings for natural language:",
|
|
synonymList || "- No specific mappings available",
|
|
"",
|
|
"Date handling:",
|
|
"- 'new/recent' → dt_create (use prop.dt_create(>=:YYYY-MM-DD))",
|
|
"- 'updated' → dt_update",
|
|
"",
|
|
"Rules:",
|
|
`- Object should be '${targetObject}' unless query clearly indicates another object`,
|
|
"- Use indexes when available for better performance",
|
|
"- For date filters, use prop.dt_create/dt_update with absolute dates",
|
|
"- Only return readable fields in 'fields' array",
|
|
`- Default fields if request is generic: ${readableFields
|
|
.slice(0, 5)
|
|
.join(", ")}`,
|
|
"",
|
|
"Timezone is Europe/Paris. Today is 2025-10-15.",
|
|
"Interpret 'last week' as now minus 7 days → 2025-10-08.",
|
|
"Interpret 'yesterday' as → 2025-10-14.",
|
|
].join("\n");
|
|
}
|
|
|
|
// Prepared demo queries for each schema
|
|
const preparedQueries = {
|
|
seekers: [
|
|
{
|
|
nl: "show me seekers with status startasap and their email and experience",
|
|
description: "Status-based filtering with field selection",
|
|
},
|
|
{
|
|
nl: "find seekers looking for jobs urgently with salary expectations",
|
|
description: "Status synonym mapping + salary field",
|
|
},
|
|
{
|
|
nl: "get seekers with their contact info and personality types",
|
|
description: "Multiple field types (contact + MBTI)",
|
|
},
|
|
{
|
|
nl: "show recent seekers who are actively looking for work",
|
|
description: "Date filtering + status combination",
|
|
},
|
|
],
|
|
|
|
jobads: [
|
|
{
|
|
nl: "find job postings for software developer positions",
|
|
description: "Job title-based search",
|
|
},
|
|
{
|
|
nl: "show recent job ads with salary information",
|
|
description: "Date filtering + compensation data",
|
|
},
|
|
{
|
|
nl: "get remote work opportunities published this week",
|
|
description: "Remote work filter + recent date range",
|
|
},
|
|
{
|
|
nl: "find full-time positions in Paris with job descriptions",
|
|
description: "Location + employment type filtering",
|
|
},
|
|
],
|
|
|
|
recruiters: [
|
|
{
|
|
nl: "show active recruiters with their contact information",
|
|
description: "Active status + contact field selection",
|
|
},
|
|
{
|
|
nl: "find recruiters from tech companies",
|
|
description: "Industry-based filtering",
|
|
},
|
|
{
|
|
nl: "get recruiters who posted jobs recently",
|
|
description: "Activity-based filtering with date range",
|
|
},
|
|
{
|
|
nl: "show recruiter profiles with their specializations",
|
|
description: "Profile data + specialization fields",
|
|
},
|
|
],
|
|
|
|
persons: [
|
|
{
|
|
nl: "find persons with complete profiles",
|
|
description: "Profile completeness filtering",
|
|
},
|
|
{
|
|
nl: "show recent person registrations",
|
|
description: "Registration date filtering",
|
|
},
|
|
{
|
|
nl: "get persons with verified email addresses",
|
|
description: "Verification status filtering",
|
|
},
|
|
{
|
|
nl: "find persons who updated their profiles this month",
|
|
description: "Update activity filtering",
|
|
},
|
|
],
|
|
|
|
sirets: [
|
|
{
|
|
nl: "show companies in the technology sector",
|
|
description: "Industry sector filtering",
|
|
},
|
|
{
|
|
nl: "find companies with more than 100 employees",
|
|
description: "Company size filtering",
|
|
},
|
|
{
|
|
nl: "get recently registered companies",
|
|
description: "Registration date filtering",
|
|
},
|
|
{
|
|
nl: "show companies located in major French cities",
|
|
description: "Geographic location filtering",
|
|
},
|
|
],
|
|
};
|
|
|
|
// OpenAI client
|
|
const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
|
|
|
|
async function generateQuery(nlText, targetObject) {
|
|
try {
|
|
const resp = await client.chat.completions.create({
|
|
model: MODEL,
|
|
messages: [
|
|
{ role: "system", content: systemPrompt(targetObject) },
|
|
{
|
|
role: "user",
|
|
content: `Natural language request: "${nlText}"\nReturn ONLY the JSON object.`,
|
|
},
|
|
],
|
|
response_format: {
|
|
type: "json_schema",
|
|
json_schema: {
|
|
name: "OdmdbQuery",
|
|
schema: buildResponseJsonSchema(targetObject),
|
|
strict: true,
|
|
},
|
|
},
|
|
});
|
|
|
|
const jsonText = resp.choices[0].message.content;
|
|
return JSON.parse(jsonText);
|
|
} catch (error) {
|
|
console.error(`❌ Query generation failed: ${error.message}`);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// Check data availability for each object type
|
|
function checkDataAvailability() {
|
|
console.log("\n📊 ODMDB Data Availability Check:");
|
|
|
|
const objectTypes = ["seekers", "jobads", "recruiters", "persons", "sirets"];
|
|
const availability = {};
|
|
|
|
for (const objectType of objectTypes) {
|
|
const itemsPath = `${ODMDB_BASE_PATH}/objects/${objectType}/itm`;
|
|
try {
|
|
if (fs.existsSync(itemsPath)) {
|
|
const files = fs
|
|
.readdirSync(itemsPath)
|
|
.filter((f) => f.endsWith(".json") && f !== "backup");
|
|
availability[objectType] = files.length;
|
|
console.log(`✅ ${objectType}: ${files.length} records`);
|
|
} else {
|
|
availability[objectType] = 0;
|
|
console.log(`❌ ${objectType}: No data directory found`);
|
|
}
|
|
} catch (error) {
|
|
availability[objectType] = 0;
|
|
console.log(`❌ ${objectType}: Error accessing data (${error.message})`);
|
|
}
|
|
}
|
|
|
|
return availability;
|
|
}
|
|
|
|
// Check schema mappings availability
|
|
function checkMappingAvailability() {
|
|
console.log("\n🔧 Schema Mappings Availability:");
|
|
|
|
const availableObjects = Array.from(mappingManager.mappings.keys());
|
|
console.log(`✅ Loaded mappings for: ${availableObjects.join(", ")}`);
|
|
|
|
for (const objectType of availableObjects) {
|
|
const mapping = mappingManager.getMapping(objectType);
|
|
const fieldCount = getAllObjectFields(objectType).length;
|
|
const readableCount = getReadableFields(objectType).length;
|
|
console.log(
|
|
` - ${objectType}: ${fieldCount} fields (${readableCount} readable)`
|
|
);
|
|
}
|
|
}
|
|
|
|
// Main demo execution
|
|
async function runDemo() {
|
|
const executeQueries = process.env.EXECUTE_DEMO === "true";
|
|
|
|
// Check system status
|
|
checkMappingAvailability();
|
|
const dataAvailability = checkDataAvailability();
|
|
|
|
console.log("\n🚀 Running Multi-Schema Query Generation Demo...");
|
|
|
|
for (const [objectType, queries] of Object.entries(preparedQueries)) {
|
|
console.log(
|
|
`\n${"=".repeat(20)} ${objectType.toUpperCase()} QUERIES ${"=".repeat(
|
|
20
|
|
)}`
|
|
);
|
|
|
|
if (dataAvailability[objectType] === 0) {
|
|
console.log(
|
|
`⚠️ No data available for ${objectType} - showing query generation only`
|
|
);
|
|
}
|
|
|
|
for (let i = 0; i < queries.length; i++) {
|
|
const query = queries[i];
|
|
console.log(`\n${i + 1}. "${query.nl}"`);
|
|
console.log(` Purpose: ${query.description}`);
|
|
|
|
// Validate query first
|
|
if (!validateQuery(query.nl)) {
|
|
console.log(" ❌ Query rejected: Contains problematic terms");
|
|
continue;
|
|
}
|
|
|
|
// Detect target object (should match our intended object)
|
|
const detectedObject = detectTargetObject(query.nl);
|
|
console.log(` 🎯 Detected target object: ${detectedObject}`);
|
|
|
|
if (detectedObject !== objectType) {
|
|
console.log(
|
|
` ⚠️ Note: Auto-detection suggests '${detectedObject}' but testing with '${objectType}'`
|
|
);
|
|
}
|
|
|
|
console.log(" 🤖 Generating query...");
|
|
const generatedQuery = await generateQuery(query.nl, objectType);
|
|
|
|
if (generatedQuery) {
|
|
console.log(" ✅ Generated ODMDB Query:");
|
|
console.log(
|
|
` ${JSON.stringify(generatedQuery, null, 6).replace(
|
|
/\n/g,
|
|
"\n "
|
|
)}`
|
|
);
|
|
|
|
// Show what mapping was used
|
|
const mapping = getObjectMapping(objectType);
|
|
if (mapping) {
|
|
console.log(
|
|
` 📋 Available fields: ${mapping.availableFields?.length || 0}`
|
|
);
|
|
console.log(
|
|
` 👁️ Readable fields: ${mapping.readableFields?.length || 0}`
|
|
);
|
|
}
|
|
|
|
if (executeQueries && dataAvailability[objectType] > 0) {
|
|
console.log(
|
|
" 🔍 Query execution would run here with actual ODMDB data..."
|
|
);
|
|
console.log(
|
|
` 💾 Target: ${dataAvailability[objectType]} ${objectType} records`
|
|
);
|
|
}
|
|
} else {
|
|
console.log(" ❌ Failed to generate query");
|
|
}
|
|
|
|
if (i < queries.length - 1) {
|
|
console.log(" " + "-".repeat(50));
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!executeQueries) {
|
|
console.log(`\n💡 To enable query execution simulation, run:`);
|
|
console.log(` EXECUTE_DEMO=true node demo.js`);
|
|
}
|
|
}
|
|
|
|
console.log("\n📈 Multi-Schema PoC Demo Starting...");
|
|
runDemo()
|
|
.then(() => {
|
|
console.log("\n✅ Multi-schema demo complete!");
|
|
console.log("\n🎯 Summary:");
|
|
console.log("- Demonstrated query generation for all ODMDB object types");
|
|
console.log("- Validated query safety and object detection");
|
|
console.log("- Showed dynamic schema mapping usage");
|
|
console.log("- Prepared queries showcase different use cases per schema");
|
|
})
|
|
.catch((error) => {
|
|
console.error("\n❌ Demo failed:", error.message);
|
|
process.exit(1);
|
|
});
|