Files
Poc-dashboard/poc.js

834 lines
25 KiB
JavaScript
Raw Blame History

// PoC: NL → ODMDB query (seekers), no zod — validate via ODMDB schema
// Usage:
// 1) export OPENAI_API_KEY=sk-...
// 2) node poc.js
import fs from "node:fs";
import OpenAI from "openai";
import axios from "axios";
import jq from "node-jq";
// ---- Config ----
const MODEL = process.env.OPENAI_MODEL || "gpt-5";
// ODMDB paths - point to actual ODMDB structure
const ODMDB_BASE_PATH = "../smatchitObjectOdmdb";
const SCHEMA_PATH = `${ODMDB_BASE_PATH}/schema`;
const OBJECTS_PATH = `${ODMDB_BASE_PATH}/objects`;
// ODMDB execution config
const ODMDB_BASE_URL = process.env.ODMDB_BASE_URL || "http://localhost:3000";
const ODMDB_TRIBE = process.env.ODMDB_TRIBE || "smatchit";
const EXECUTE_QUERY = process.env.EXECUTE_QUERY === "true"; // Set to "true" to execute queries
// Hardcoded NL query for the PoC (no multi-turn)
const NL_QUERY =
"find seekers looking for jobs urgently with their contact info and salary expectations";
// ---- Load schemas (safe) ----
function loadJsonSafe(path) {
try {
if (fs.existsSync(path)) {
return JSON.parse(fs.readFileSync(path, "utf-8"));
}
} catch (e) {
console.warn(`Warning: Could not load ${path}:`, e.message);
}
return null;
}
// Load actual ODMDB schemas
const SCHEMAS = {
seekers: loadJsonSafe(`${SCHEMA_PATH}/seekers.json`),
main: loadJsonSafe("./main.json"), // Fallback consolidated schema
};
// ---- Helpers to read seekers field names from your ODMDB custom schema ----
function extractSeekersPropsFromOdmdbSchema(main) {
if (!main) return [];
// Try common shapes
// 1) { objects: { seekers: { properties: {...} } } }
if (
main.objects?.seekers?.properties &&
typeof main.objects.seekers.properties === "object"
) {
return Object.keys(main.objects.seekers.properties);
}
// 2) If main is an array, search for an item that looks like seekers schema
if (Array.isArray(main)) {
for (const entry of main) {
const keys = extractSeekersPropsFromOdmdbSchema(entry);
if (keys.length) return keys;
}
}
// 3) Fallback: deep search for a { seekers: { properties: {...} } } node
try {
const stack = [main];
while (stack.length) {
const node = stack.pop();
if (node && typeof node === "object") {
if (
node.seekers?.properties &&
typeof node.seekers.properties === "object"
) {
return Object.keys(node.seekers.properties);
}
for (const v of Object.values(node)) {
if (v && typeof v === "object") stack.push(v);
}
}
}
} catch {}
return [];
}
// ---- Schema-based mapping system ----
class SchemaMapper {
constructor(schemas) {
// Use direct seekers schema if available, otherwise search in consolidated main schema
this.seekersSchema =
schemas.seekers || this.findSchemaByType("seekers", schemas.main);
this.fieldMappings = this.buildFieldMappings();
this.indexMappings = this.buildIndexMappings();
console.log(
`📋 Loaded seekers schema with ${
Object.keys(this.seekersSchema?.properties || {}).length
} properties`
);
}
findSchemaByType(objectType, schemas) {
if (!schemas || !Array.isArray(schemas)) return null;
return schemas.find(
(schema) => schema.$id && schema.$id.includes(`/${objectType}`)
);
}
buildFieldMappings() {
if (!this.seekersSchema) return {};
const mappings = {};
const properties = this.seekersSchema.properties || {};
Object.entries(properties).forEach(([fieldName, fieldDef]) => {
const synonyms = this.generateSynonyms(fieldName, fieldDef);
mappings[fieldName] = {
field: fieldName,
title: fieldDef.title?.toLowerCase(),
description: fieldDef.description?.toLowerCase(),
type: fieldDef.type,
synonyms,
};
// Index by title and synonyms
if (fieldDef.title) {
mappings[fieldDef.title.toLowerCase()] = fieldName;
}
synonyms.forEach((synonym) => {
mappings[synonym.toLowerCase()] = fieldName;
});
});
return mappings;
}
buildIndexMappings() {
if (!this.seekersSchema?.apxidx) return {};
const indexes = {};
this.seekersSchema.apxidx.forEach((idx) => {
indexes[idx.name] = {
name: idx.name,
type: idx.type,
keyval: idx.keyval,
};
});
return indexes;
}
generateSynonyms(fieldName, fieldDef) {
const synonyms = [];
// Comprehensive mappings based on actual seekers schema (62 properties)
const commonMappings = {
// Contact & Identity
email: ["contact", "mail", "contact email", "e-mail"],
alias: ["id", "identifier", "username", "user id"],
shortdescription: ["description", "bio", "summary", "about"],
// Work Experience & Status
seekworkingyear: [
"experience",
"years of experience",
"work experience",
"working years",
"career length",
],
seekjobtitleexperience: [
"job titles",
"job experience",
"positions",
"roles",
"previous jobs",
"work history",
],
seekstatus: [
"status",
"availability",
"looking",
"job search status",
"urgency",
],
employmentstatus: [
"employment",
"current status",
"work status",
"job status",
],
// Location & Geography
seeklocation: [
"location",
"where",
"place",
"work location",
"preferred location",
],
lastlocation: ["last location", "current location", "previous location"],
countryavailabletowork: [
"countries",
"available countries",
"work countries",
"country availability",
],
// Salary & Compensation
salaryexpectation: [
"salary",
"pay",
"compensation",
"wage",
"salary expectation",
"expected salary",
],
salarydevise: ["currency", "salary currency", "pay currency"],
salaryunit: [
"salary unit",
"pay unit",
"compensation unit",
"salary period",
],
// Job Preferences
seekjobtype: [
"job type",
"job types",
"employment type",
"contract type",
],
lookingforjobtype: [
"looking for",
"desired job type",
"preferred job type",
],
lookingforaction: ["actions", "desired actions", "preferred activities"],
lookingforother: ["other preferences", "additional requirements"],
// Skills & Competencies
skills: ["skills", "competencies", "abilities", "technical skills"],
languageskills: ["languages", "language skills", "linguistic skills"],
knowhow: ["knowledge", "expertise", "know-how", "competence"],
myworkexperience: [
"work experience",
"professional experience",
"career experience",
],
// Personality & Profile
mbti: ["personality", "type", "profile", "MBTI", "personality type"],
mywords: ["keywords", "profile words", "descriptive words"],
thingsilike: ["likes", "preferences", "interests", "things I like"],
thingsidislike: [
"dislikes",
"avoid",
"not interested",
"things I dislike",
],
// Availability & Schedule
preferedworkinghours: [
"working hours",
"preferred hours",
"work schedule",
"availability",
],
notavailabletowork: [
"unavailable",
"not available",
"blocked times",
"unavailable days",
],
// Job Search Activity
myjobradar: [
"job radar",
"tracked jobs",
"job interests",
"monitored jobs",
],
jobadview: ["viewed jobs", "job views", "seen jobs"],
jobadnotinterested: ["not interested", "rejected jobs", "dismissed jobs"],
jobadapply: ["applied jobs", "applications", "job applications"],
jobadinvitedtoapply: [
"invitations",
"invited to apply",
"job invitations",
],
jobadsaved: ["saved jobs", "bookmarked jobs", "favorite jobs"],
// Dates & Timestamps
dt_create: [
"created",
"creation date",
"new",
"recent",
"since",
"registration date",
],
dt_update: ["updated", "last update", "modified", "last modified"],
matchinglastdate: ["last matching", "matching date", "last match"],
// Education & Training
educations: [
"education",
"degree",
"diploma",
"qualification",
"studies",
],
tipsadvice: ["tips", "advice", "articles", "guidance"],
receivecommercialtraining: ["commercial training", "sales training"],
receivejobandinterviewtips: [
"interview tips",
"job tips",
"career advice",
],
// Notifications & Communication
notificationformatches: ["match notifications", "matching alerts"],
notificationforsupermatches: [
"super match notifications",
"premium matches",
],
notificationinvitedtoapply: [
"application invitations",
"invite notifications",
],
notificationrecruitprocessupdate: [
"recruitment updates",
"process updates",
],
notificationupcominginterview: [
"interview notifications",
"upcoming interviews",
],
notificationdirectmessage: ["direct messages", "chat notifications"],
emailactivityreportweekly: ["weekly reports", "weekly emails"],
emailactivityreportbiweekly: ["biweekly reports", "biweekly emails"],
emailactivityreportmonthly: ["monthly reports", "monthly emails"],
emailpersonnalizedcontent: ["personalized content", "custom content"],
emailnewsletter: ["newsletter", "news updates"],
// External IDs
polemploiid: ["pole emploi", "unemployment office", "job center ID"],
// System Fields
owner: ["owner", "account owner"],
activequizz: ["active quiz", "current quiz", "quiz"],
};
if (commonMappings[fieldName]) {
synonyms.push(...commonMappings[fieldName]);
}
return synonyms;
}
mapNLToFields(nlTerms) {
const mappedFields = [];
nlTerms.forEach((term) => {
const normalizedTerm = term.toLowerCase();
const mapping = this.fieldMappings[normalizedTerm];
if (mapping) {
if (typeof mapping === "string") {
mappedFields.push(mapping);
} else if (mapping.field) {
mappedFields.push(mapping.field);
}
}
});
return [...new Set(mappedFields)]; // Remove duplicates
}
getRecruiterReadableFields() {
if (!this.seekersSchema?.apxaccessrights?.recruiters?.R) {
// Fallback to basic fields
return ["alias", "email", "seekstatus", "seekworkingyear"];
}
return this.seekersSchema.apxaccessrights.recruiters.R;
}
getAllSeekersFields() {
if (!this.seekersSchema?.properties) return [];
return Object.keys(this.seekersSchema.properties);
}
getAvailableIndexes() {
return Object.keys(this.indexMappings);
}
getIndexByField(fieldName) {
const index = Object.values(this.indexMappings).find(
(idx) => idx.keyval === fieldName
);
return index ? `idx.${index.name}` : null;
}
}
// Initialize schema mapper
const schemaMapper = new SchemaMapper(SCHEMAS);
const SEEKERS_FIELDS_FROM_SCHEMA = schemaMapper.getAllSeekersFields();
console.log(
`🔍 Available seekers fields: ${SEEKERS_FIELDS_FROM_SCHEMA.slice(0, 10).join(
", "
)}${
SEEKERS_FIELDS_FROM_SCHEMA.length > 10
? `... (${SEEKERS_FIELDS_FROM_SCHEMA.length} total)`
: ""
}`
);
// ---- Minimal mapping config (for prompting + default fields) ----
const seekersMapping = {
object: "seekers",
defaultReadableFields: schemaMapper.getRecruiterReadableFields().slice(0, 5), // First 5 readable fields
};
// ---- JSON Schema for Structured Outputs (no zod, no oneOf) ----
function buildResponseJsonSchema() {
const recruiterReadableFields = schemaMapper.getRecruiterReadableFields();
return {
type: "object",
additionalProperties: false,
properties: {
object: { type: "string", enum: ["seekers"] },
condition: { type: "array", items: { type: "string" }, minItems: 1 },
fields: {
type: "array",
items: {
type: "string",
enum: recruiterReadableFields,
},
minItems: 1,
},
},
required: ["object", "condition", "fields"],
};
}
// ---- Prompt builders ----
function systemPrompt() {
const availableFields = schemaMapper.getAllSeekersFields();
const recruiterReadableFields = schemaMapper.getRecruiterReadableFields();
const availableIndexes = schemaMapper.getAvailableIndexes();
return [
"You convert a natural language request into an ODMDB search payload.",
"Return ONLY a compact JSON object that matches the provided JSON Schema.",
"",
"ODMDB DSL:",
"- join(remoteObject:localKey:remoteProp:operator:value)",
"- idx.<indexName>(value) - for indexed fields",
"- prop.<field>(operator:value) - for direct property queries",
"",
"Available seekers fields:",
availableFields.slice(0, 15).join(", ") +
(availableFields.length > 15 ? "..." : ""),
"",
"Available indexes for optimization:",
availableIndexes.join(", "),
"",
"Recruiter-readable fields (use these for field selection):",
recruiterReadableFields.join(", "),
"",
"Field mappings for natural language:",
"- 'email', 'contact info' → email",
"- 'experience', 'years of experience' → seekworkingyear",
"- 'job titles', 'positions', 'roles' → seekjobtitleexperience",
"- 'status', 'availability' → seekstatus",
"- 'salary', 'pay', 'compensation' → salaryexpectation",
"- 'location', 'where' → seeklocation",
"- 'skills', 'competencies' → skills",
"- 'languages' → languageskills",
"- 'personality', 'MBTI' → mbti",
"- 'new/recent' → dt_create (use prop.dt_create(>=:YYYY-MM-DD))",
"",
"Status value mappings:",
"- 'urgent', 'urgently', 'ASAP', 'quickly' → startasap",
"- 'no rush', 'taking time', 'leisurely' → norush",
"- 'not looking', 'not active' → notlooking",
"",
"Rules:",
"- Object must be 'seekers'.",
"- Use indexes when possible (idx.seekstatus_alias for status queries)",
"- For date filters, use prop.dt_create with absolute dates",
"- Only return recruiter-readable fields in 'fields' array",
`- Default fields if request is generic: ${recruiterReadableFields
.slice(0, 5)
.join(", ")}`,
"",
"Timezone is Europe/Paris. Today is 2025-10-14.",
"Interpret 'last week' as now minus 7 days → 2025-10-07.",
"Interpret 'yesterday' as → 2025-10-13.",
].join("\n");
}
function userPrompt(nl) {
return `Natural language request: "${nl}"\nReturn ONLY the JSON object.`;
}
// ---- OpenAI call using Responses API (text.format) ----
const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
async function inferQuery(nlText) {
const resp = await client.responses.create({
model: MODEL,
input: [
{ role: "system", content: systemPrompt() },
{ role: "user", content: userPrompt(nlText) },
],
text: {
format: {
name: "OdmdbQuery",
type: "json_schema",
schema: buildResponseJsonSchema(),
strict: true,
},
},
});
const jsonText =
resp.output_text ||
resp.output?.[0]?.content?.[0]?.text ||
(() => {
throw new Error("Empty model output");
})();
const parsed = JSON.parse(jsonText);
return parsed;
}
// ---- Validate using the ODMDB schema (not zod) ----
function validateWithOdmdbSchema(candidate) {
// Basic shape checks (already enforced by Structured Outputs, but keep defensive)
if (!candidate || typeof candidate !== "object")
throw new Error("Invalid response (not an object).");
if (candidate.object !== "seekers")
throw new Error("Invalid object; must be 'seekers'.");
if (!Array.isArray(candidate.condition) || candidate.condition.length === 0) {
throw new Error(
"Invalid 'condition'; must be a non-empty array of strings."
);
}
if (!Array.isArray(candidate.fields) || candidate.fields.length === 0) {
throw new Error("Invalid 'fields'; must be a non-empty array of strings.");
}
// Validate fields against schema
const availableFields = schemaMapper.getAllSeekersFields();
const recruiterReadableFields = schemaMapper.getRecruiterReadableFields();
for (const field of candidate.fields) {
if (!availableFields.includes(field)) {
throw new Error(`Invalid field '${field}'; not found in seekers schema.`);
}
if (!recruiterReadableFields.includes(field)) {
console.warn(
`Warning: Field '${field}' may not be readable by recruiters.`
);
}
}
// DSL token sanity
const allowedTokens = ["join(", "idx.", "prop."];
for (const c of candidate.condition) {
if (typeof c !== "string")
throw new Error("Condition entries must be strings.");
const tokenOK = allowedTokens.some((t) => c.includes(t));
const ascii = /^[\x09\x0A\x0D\x20-\x7E()_:\[\].,=><!'"-]+$/.test(c);
if (!tokenOK || !ascii) throw new Error(`Malformed condition: ${c}`);
}
// Field existence check against ODMDB custom schema (seekers properties)
if (SEEKERS_FIELDS_FROM_SCHEMA.length) {
const unknown = candidate.fields.filter(
(f) => !SEEKERS_FIELDS_FROM_SCHEMA.includes(f)
);
if (unknown.length) {
// Drop unknown but continue (PoC behavior)
console.warn(
"⚠️ Dropping unknown fields (not in seekers schema):",
unknown
);
candidate.fields = candidate.fields.filter((f) =>
SEEKERS_FIELDS_FROM_SCHEMA.includes(f)
);
if (!candidate.fields.length) {
// If all dropped, fallback to default shortlist intersected with schema
const fallback = seekersMapping.defaultReadableFields.filter((f) =>
SEEKERS_FIELDS_FROM_SCHEMA.includes(f)
);
if (!fallback.length)
throw new Error(
"No valid fields remain after validation and no fallback available."
);
candidate.fields = fallback;
}
}
} else {
// If we can't read the schema (main.json shape unknown), at least ensure strings & dedupe
candidate.fields = [
...new Set(
candidate.fields.filter((f) => typeof f === "string" && f.trim())
),
];
}
return candidate;
}
// ---- Local ODMDB Data Access ----
function loadSeekersData() {
const seekersItemsPath = `${OBJECTS_PATH}/seekers/itm`;
try {
if (!fs.existsSync(seekersItemsPath)) {
console.error(`❌ Seekers data directory not found: ${seekersItemsPath}`);
return [];
}
const files = fs
.readdirSync(seekersItemsPath)
.filter((file) => file.endsWith(".json") && file !== "backup")
.slice(0, 50); // Limit to first 50 files for PoC performance
console.log(
`📁 Loading ${files.length} seeker files from ${seekersItemsPath}`
);
const seekers = [];
for (const file of files) {
try {
const filePath = `${seekersItemsPath}/${file}`;
const data = JSON.parse(fs.readFileSync(filePath, "utf-8"));
seekers.push(data);
} catch (error) {
console.warn(`⚠️ Could not load ${file}:`, error.message);
}
}
return seekers;
} catch (error) {
console.error("❌ Error loading seekers data:", error.message);
return [];
}
}
// ---- Local ODMDB Query Execution ----
async function executeOdmdbQuery(query) {
if (!EXECUTE_QUERY) {
console.log(
"💡 Query execution disabled. Set EXECUTE_QUERY=true to enable."
);
return null;
}
try {
console.log(
`\n🔍 Executing query against local ODMDB data: ${OBJECTS_PATH}/seekers/`
);
console.log("Query conditions:", query.condition);
console.log("Requested fields:", query.fields);
// Load all seekers data
const allSeekers = loadSeekersData();
if (allSeekers.length === 0) {
console.log("❌ No seekers data found");
return { data: [] };
}
console.log(`<EFBFBD> Loaded ${allSeekers.length} seekers for filtering`);
// Apply basic filtering (simplified DSL processing)
let filteredSeekers = allSeekers;
for (const condition of query.condition) {
if (condition.includes("prop.dt_create(>=:")) {
// Extract date from condition like "prop.dt_create(>=:2025-10-07)"
const dateMatch = condition.match(/>=:(\d{4}-\d{2}-\d{2})/);
if (dateMatch) {
const filterDate = new Date(dateMatch[1]);
filteredSeekers = filteredSeekers.filter((seeker) => {
if (!seeker.dt_create) return false;
const seekerDate = new Date(seeker.dt_create);
return seekerDate >= filterDate;
});
console.log(
`🗓️ Filtered by date >= ${dateMatch[1]}: ${filteredSeekers.length} results`
);
}
}
if (condition.includes("idx.seekstatus_alias(")) {
// Extract status from condition like "idx.seekstatus_alias(startasap)"
const statusMatch = condition.match(/idx\.seekstatus_alias\(([^)]+)\)/);
if (statusMatch) {
const status = statusMatch[1];
filteredSeekers = filteredSeekers.filter(
(seeker) => seeker.seekstatus === status
);
console.log(
`👤 Filtered by status ${status}: ${filteredSeekers.length} results`
);
}
}
}
// Select only requested fields
const results = filteredSeekers.map((seeker) => {
const filtered = {};
for (const field of query.fields) {
if (seeker.hasOwnProperty(field)) {
filtered[field] = seeker[field];
}
}
return filtered;
});
console.log(
`✅ Query executed successfully! Found ${results.length} matching seekers`
);
return {
data: results,
meta: {
total: results.length,
source: "local_files",
path: `${OBJECTS_PATH}/seekers/itm/`,
},
};
} catch (error) {
console.error("❌ Local query execution failed:", error.message);
return null;
}
}
// ---- Result Processing with jq ----
async function processResults(results, jqFilter = ".") {
if (!results || !results.data) {
console.log("No results to process.");
return null;
}
try {
// Use jq to filter and format results (pass data directly, not as string)
const processed = await jq.run(jqFilter, results.data, { input: "json" });
// Return the processed result
return processed;
} catch (error) {
console.error("❌ jq processing failed:", error.message);
return JSON.stringify(results.data, null, 2); // Return formatted JSON if jq fails
}
} // ---- Run PoC (generate query and optionally execute) ----
(async () => {
try {
if (!process.env.OPENAI_API_KEY)
throw new Error("Missing OPENAI_API_KEY env var.");
console.log(`🤖 Processing NL query: "${NL_QUERY}"`);
console.log("=".repeat(60));
// Step 1: Generate ODMDB query from natural language
const out = await inferQuery(NL_QUERY);
const validated = validateWithOdmdbSchema(out);
console.log("✅ Generated ODMDB Query:");
const generatedQuery = {
object: validated.object,
condition: validated.condition,
fields: validated.fields,
};
console.log(JSON.stringify(generatedQuery, null, 2));
// Step 2: Execute query if enabled
if (EXECUTE_QUERY) {
console.log("\n" + "=".repeat(60));
const results = await executeOdmdbQuery(generatedQuery);
if (results) {
console.log("✅ Query executed successfully!");
console.log(`📊 Found ${results.data?.length || 0} results`);
// Step 3: Process results with jq
console.log("\n📋 Results Summary:");
const summary = await processResults(
results,
`.[0:3] | map({email, salaryexpectation, salarydevise, salaryunit})`
);
console.log(JSON.stringify(summary, null, 2));
// Optional: Show full results count
if (results.data?.length > 3) {
console.log(`\n... and ${results.data.length - 3} more results`);
}
// Step 4: Export to CSV format
console.log("\n📄 CSV Preview:");
const csvData = await processResults(
results,
`
map([.email // "N/A", (.salaryexpectation | tostring) // "N/A", .salarydevise // "N/A", .salaryunit // "N/A"]) |
["email","salary","currency","unit"] as $header |
[$header] + .[0:5] |
.[] | @csv
`
);
if (csvData) {
console.log(csvData);
}
}
} else {
console.log(
"\n💡 To execute this query against ODMDB, set EXECUTE_QUERY=true"
);
console.log(` Example: EXECUTE_QUERY=true npm start`);
}
} catch (e) {
console.error("❌ PoC failed:", e.message || e);
process.exit(1);
}
})();