423 lines
13 KiB
JavaScript
423 lines
13 KiB
JavaScript
// PoC: NL → ODMDB query (seekers), no zod — validate via ODMDB schema
|
|
// Usage:
|
|
// 1) export OPENAI_API_KEY=sk-...
|
|
// 2) node poc.js
|
|
|
|
import fs from "node:fs";
|
|
import OpenAI from "openai";
|
|
|
|
// ---- Config ----
|
|
const MODEL = process.env.OPENAI_MODEL || "gpt-5";
|
|
const MAIN_SCHEMA_PATH = "./main.json"; // optional context; used for validation
|
|
const LG_SCHEMA_PATH = "./lg.json"; // optional context
|
|
|
|
// Hardcoded NL query for the PoC (no multi-turn)
|
|
const NL_QUERY =
|
|
"give me new seekers since last week with email and experience";
|
|
|
|
// ---- Load schemas (safe) ----
|
|
function loadJsonSafe(path) {
|
|
try {
|
|
if (fs.existsSync(path)) {
|
|
return JSON.parse(fs.readFileSync(path, "utf-8"));
|
|
}
|
|
} catch {}
|
|
return null;
|
|
}
|
|
const SCHEMAS = {
|
|
main: loadJsonSafe(MAIN_SCHEMA_PATH),
|
|
lg: loadJsonSafe(LG_SCHEMA_PATH),
|
|
};
|
|
|
|
// ---- Helpers to read seekers field names from your ODMDB custom schema ----
|
|
function extractSeekersPropsFromOdmdbSchema(main) {
|
|
if (!main) return [];
|
|
|
|
// Try common shapes
|
|
// 1) { objects: { seekers: { properties: {...} } } }
|
|
if (
|
|
main.objects?.seekers?.properties &&
|
|
typeof main.objects.seekers.properties === "object"
|
|
) {
|
|
return Object.keys(main.objects.seekers.properties);
|
|
}
|
|
|
|
// 2) If main is an array, search for an item that looks like seekers schema
|
|
if (Array.isArray(main)) {
|
|
for (const entry of main) {
|
|
const keys = extractSeekersPropsFromOdmdbSchema(entry);
|
|
if (keys.length) return keys;
|
|
}
|
|
}
|
|
|
|
// 3) Fallback: deep search for a { seekers: { properties: {...} } } node
|
|
try {
|
|
const stack = [main];
|
|
while (stack.length) {
|
|
const node = stack.pop();
|
|
if (node && typeof node === "object") {
|
|
if (
|
|
node.seekers?.properties &&
|
|
typeof node.seekers.properties === "object"
|
|
) {
|
|
return Object.keys(node.seekers.properties);
|
|
}
|
|
for (const v of Object.values(node)) {
|
|
if (v && typeof v === "object") stack.push(v);
|
|
}
|
|
}
|
|
}
|
|
} catch {}
|
|
|
|
return [];
|
|
}
|
|
|
|
// ---- Schema-based mapping system ----
|
|
class SchemaMapper {
|
|
constructor(schemas) {
|
|
this.schemas = schemas.main || [];
|
|
this.seekersSchema = this.findSchemaByType("seekers");
|
|
this.fieldMappings = this.buildFieldMappings();
|
|
this.indexMappings = this.buildIndexMappings();
|
|
}
|
|
|
|
findSchemaByType(objectType) {
|
|
if (!this.schemas || !Array.isArray(this.schemas)) return null;
|
|
return this.schemas.find(
|
|
(schema) => schema.$id && schema.$id.includes(`/${objectType}`)
|
|
);
|
|
}
|
|
|
|
buildFieldMappings() {
|
|
if (!this.seekersSchema) return {};
|
|
|
|
const mappings = {};
|
|
const properties = this.seekersSchema.properties || {};
|
|
|
|
Object.entries(properties).forEach(([fieldName, fieldDef]) => {
|
|
const synonyms = this.generateSynonyms(fieldName, fieldDef);
|
|
mappings[fieldName] = {
|
|
field: fieldName,
|
|
title: fieldDef.title?.toLowerCase(),
|
|
description: fieldDef.description?.toLowerCase(),
|
|
type: fieldDef.type,
|
|
synonyms,
|
|
};
|
|
|
|
// Index by title and synonyms
|
|
if (fieldDef.title) {
|
|
mappings[fieldDef.title.toLowerCase()] = fieldName;
|
|
}
|
|
synonyms.forEach((synonym) => {
|
|
mappings[synonym.toLowerCase()] = fieldName;
|
|
});
|
|
});
|
|
|
|
return mappings;
|
|
}
|
|
|
|
buildIndexMappings() {
|
|
if (!this.seekersSchema?.apxidx) return {};
|
|
|
|
const indexes = {};
|
|
this.seekersSchema.apxidx.forEach((idx) => {
|
|
indexes[idx.name] = {
|
|
name: idx.name,
|
|
type: idx.type,
|
|
keyval: idx.keyval,
|
|
};
|
|
});
|
|
|
|
return indexes;
|
|
}
|
|
|
|
generateSynonyms(fieldName, fieldDef) {
|
|
const synonyms = [];
|
|
|
|
// Common mappings based on actual schema
|
|
const commonMappings = {
|
|
email: ["contact", "mail", "contact email"],
|
|
seekworkingyear: ["experience", "years of experience", "work experience"],
|
|
seekjobtitleexperience: ["job titles", "job experience", "positions"],
|
|
seekstatus: ["status", "availability", "looking"],
|
|
dt_create: ["created", "creation date", "new", "recent", "since"],
|
|
salaryexpectation: ["salary", "pay", "compensation", "wage"],
|
|
seeklocation: ["location", "where", "place"],
|
|
mbti: ["personality", "type", "profile"],
|
|
alias: ["id", "identifier", "username"],
|
|
};
|
|
|
|
if (commonMappings[fieldName]) {
|
|
synonyms.push(...commonMappings[fieldName]);
|
|
}
|
|
|
|
return synonyms;
|
|
}
|
|
|
|
mapNLToFields(nlTerms) {
|
|
const mappedFields = [];
|
|
|
|
nlTerms.forEach((term) => {
|
|
const normalizedTerm = term.toLowerCase();
|
|
const mapping = this.fieldMappings[normalizedTerm];
|
|
|
|
if (mapping) {
|
|
if (typeof mapping === "string") {
|
|
mappedFields.push(mapping);
|
|
} else if (mapping.field) {
|
|
mappedFields.push(mapping.field);
|
|
}
|
|
}
|
|
});
|
|
|
|
return [...new Set(mappedFields)]; // Remove duplicates
|
|
}
|
|
|
|
getRecruiterReadableFields() {
|
|
if (!this.seekersSchema?.apxaccessrights?.recruiters?.R) {
|
|
// Fallback to basic fields
|
|
return ["alias", "email", "seekstatus", "seekworkingyear"];
|
|
}
|
|
return this.seekersSchema.apxaccessrights.recruiters.R;
|
|
}
|
|
|
|
getAllSeekersFields() {
|
|
if (!this.seekersSchema?.properties) return [];
|
|
return Object.keys(this.seekersSchema.properties);
|
|
}
|
|
|
|
getAvailableIndexes() {
|
|
return Object.keys(this.indexMappings);
|
|
}
|
|
|
|
getIndexByField(fieldName) {
|
|
const index = Object.values(this.indexMappings).find(
|
|
(idx) => idx.keyval === fieldName
|
|
);
|
|
return index ? `idx.${index.name}` : null;
|
|
}
|
|
}
|
|
|
|
// Initialize schema mapper
|
|
const schemaMapper = new SchemaMapper(SCHEMAS);
|
|
|
|
const SEEKERS_FIELDS_FROM_SCHEMA = schemaMapper.getAllSeekersFields();
|
|
|
|
// ---- Minimal mapping config (for prompting + default fields) ----
|
|
const seekersMapping = {
|
|
object: "seekers",
|
|
defaultReadableFields: schemaMapper.getRecruiterReadableFields().slice(0, 5), // First 5 readable fields
|
|
};
|
|
|
|
// ---- JSON Schema for Structured Outputs (no zod, no oneOf) ----
|
|
function buildResponseJsonSchema() {
|
|
const recruiterReadableFields = schemaMapper.getRecruiterReadableFields();
|
|
|
|
return {
|
|
type: "object",
|
|
additionalProperties: false,
|
|
properties: {
|
|
object: { type: "string", enum: ["seekers"] },
|
|
condition: { type: "array", items: { type: "string" }, minItems: 1 },
|
|
fields: {
|
|
type: "array",
|
|
items: {
|
|
type: "string",
|
|
enum: recruiterReadableFields,
|
|
},
|
|
minItems: 1,
|
|
},
|
|
},
|
|
required: ["object", "condition", "fields"],
|
|
};
|
|
}
|
|
|
|
// ---- Prompt builders ----
|
|
function systemPrompt() {
|
|
const availableFields = schemaMapper.getAllSeekersFields();
|
|
const recruiterReadableFields = schemaMapper.getRecruiterReadableFields();
|
|
const availableIndexes = schemaMapper.getAvailableIndexes();
|
|
|
|
return [
|
|
"You convert a natural language request into an ODMDB search payload.",
|
|
"Return ONLY a compact JSON object that matches the provided JSON Schema.",
|
|
"",
|
|
"ODMDB DSL:",
|
|
"- join(remoteObject:localKey:remoteProp:operator:value)",
|
|
"- idx.<indexName>(value) - for indexed fields",
|
|
"- prop.<field>(operator:value) - for direct property queries",
|
|
"",
|
|
"Available seekers fields:",
|
|
availableFields.slice(0, 15).join(", ") +
|
|
(availableFields.length > 15 ? "..." : ""),
|
|
"",
|
|
"Available indexes for optimization:",
|
|
availableIndexes.join(", "),
|
|
"",
|
|
"Recruiter-readable fields (use these for field selection):",
|
|
recruiterReadableFields.join(", "),
|
|
"",
|
|
"Field mappings for natural language:",
|
|
"- 'email' → email",
|
|
"- 'experience' → seekworkingyear",
|
|
"- 'job titles' → seekjobtitleexperience",
|
|
"- 'status' → seekstatus",
|
|
"- 'salary' → salaryexpectation",
|
|
"- 'location' → seeklocation",
|
|
"- 'new/recent' → dt_create (use prop.dt_create(>=:YYYY-MM-DD))",
|
|
"",
|
|
"Rules:",
|
|
"- Object must be 'seekers'.",
|
|
"- Use indexes when possible (idx.seekstatus_alias for status queries)",
|
|
"- For date filters, use prop.dt_create with absolute dates",
|
|
"- Only return recruiter-readable fields in 'fields' array",
|
|
`- Default fields if request is generic: ${recruiterReadableFields
|
|
.slice(0, 5)
|
|
.join(", ")}`,
|
|
"",
|
|
"Timezone is Europe/Paris. Today is 2025-10-14.",
|
|
"Interpret 'last week' as now minus 7 days → 2025-10-07.",
|
|
"Interpret 'yesterday' as → 2025-10-13.",
|
|
].join("\n");
|
|
}
|
|
function userPrompt(nl) {
|
|
return `Natural language request: "${nl}"\nReturn ONLY the JSON object.`;
|
|
}
|
|
|
|
// ---- OpenAI call using Responses API (text.format) ----
|
|
const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
|
|
|
|
async function inferQuery(nlText) {
|
|
const resp = await client.responses.create({
|
|
model: MODEL,
|
|
input: [
|
|
{ role: "system", content: systemPrompt() },
|
|
{ role: "user", content: userPrompt(nlText) },
|
|
],
|
|
text: {
|
|
format: {
|
|
name: "OdmdbQuery",
|
|
type: "json_schema",
|
|
schema: buildResponseJsonSchema(),
|
|
strict: true,
|
|
},
|
|
},
|
|
});
|
|
|
|
const jsonText =
|
|
resp.output_text ||
|
|
resp.output?.[0]?.content?.[0]?.text ||
|
|
(() => {
|
|
throw new Error("Empty model output");
|
|
})();
|
|
|
|
const parsed = JSON.parse(jsonText);
|
|
return parsed;
|
|
}
|
|
|
|
// ---- Validate using the ODMDB schema (not zod) ----
|
|
function validateWithOdmdbSchema(candidate) {
|
|
// Basic shape checks (already enforced by Structured Outputs, but keep defensive)
|
|
if (!candidate || typeof candidate !== "object")
|
|
throw new Error("Invalid response (not an object).");
|
|
if (candidate.object !== "seekers")
|
|
throw new Error("Invalid object; must be 'seekers'.");
|
|
if (!Array.isArray(candidate.condition) || candidate.condition.length === 0) {
|
|
throw new Error(
|
|
"Invalid 'condition'; must be a non-empty array of strings."
|
|
);
|
|
}
|
|
if (!Array.isArray(candidate.fields) || candidate.fields.length === 0) {
|
|
throw new Error("Invalid 'fields'; must be a non-empty array of strings.");
|
|
}
|
|
|
|
// Validate fields against schema
|
|
const availableFields = schemaMapper.getAllSeekersFields();
|
|
const recruiterReadableFields = schemaMapper.getRecruiterReadableFields();
|
|
|
|
for (const field of candidate.fields) {
|
|
if (!availableFields.includes(field)) {
|
|
throw new Error(`Invalid field '${field}'; not found in seekers schema.`);
|
|
}
|
|
if (!recruiterReadableFields.includes(field)) {
|
|
console.warn(
|
|
`Warning: Field '${field}' may not be readable by recruiters.`
|
|
);
|
|
}
|
|
}
|
|
|
|
// DSL token sanity
|
|
const allowedTokens = ["join(", "idx.", "prop."];
|
|
for (const c of candidate.condition) {
|
|
if (typeof c !== "string")
|
|
throw new Error("Condition entries must be strings.");
|
|
const tokenOK = allowedTokens.some((t) => c.includes(t));
|
|
const ascii = /^[\x09\x0A\x0D\x20-\x7E()_:\[\].,=><!'"-]+$/.test(c);
|
|
if (!tokenOK || !ascii) throw new Error(`Malformed condition: ${c}`);
|
|
}
|
|
|
|
// Field existence check against ODMDB custom schema (seekers properties)
|
|
if (SEEKERS_FIELDS_FROM_SCHEMA.length) {
|
|
const unknown = candidate.fields.filter(
|
|
(f) => !SEEKERS_FIELDS_FROM_SCHEMA.includes(f)
|
|
);
|
|
if (unknown.length) {
|
|
// Drop unknown but continue (PoC behavior)
|
|
console.warn(
|
|
"⚠️ Dropping unknown fields (not in seekers schema):",
|
|
unknown
|
|
);
|
|
candidate.fields = candidate.fields.filter((f) =>
|
|
SEEKERS_FIELDS_FROM_SCHEMA.includes(f)
|
|
);
|
|
if (!candidate.fields.length) {
|
|
// If all dropped, fallback to default shortlist intersected with schema
|
|
const fallback = seekersMapping.defaultReadableFields.filter((f) =>
|
|
SEEKERS_FIELDS_FROM_SCHEMA.includes(f)
|
|
);
|
|
if (!fallback.length)
|
|
throw new Error(
|
|
"No valid fields remain after validation and no fallback available."
|
|
);
|
|
candidate.fields = fallback;
|
|
}
|
|
}
|
|
} else {
|
|
// If we can't read the schema (main.json shape unknown), at least ensure strings & dedupe
|
|
candidate.fields = [
|
|
...new Set(
|
|
candidate.fields.filter((f) => typeof f === "string" && f.trim())
|
|
),
|
|
];
|
|
}
|
|
|
|
return candidate;
|
|
}
|
|
|
|
// ---- Run PoC (print only the created query; do not execute) ----
|
|
(async () => {
|
|
try {
|
|
if (!process.env.OPENAI_API_KEY)
|
|
throw new Error("Missing OPENAI_API_KEY env var.");
|
|
|
|
const out = await inferQuery(NL_QUERY);
|
|
const validated = validateWithOdmdbSchema(out);
|
|
|
|
// Output ONLY the created query (no execution)
|
|
console.log(
|
|
JSON.stringify(
|
|
{
|
|
object: validated.object,
|
|
condition: validated.condition,
|
|
fields: validated.fields,
|
|
},
|
|
null,
|
|
2
|
|
)
|
|
);
|
|
} catch (e) {
|
|
console.error("PoC failed:", e.message || e);
|
|
process.exit(1);
|
|
}
|
|
})();
|