// PoC: NL → ODMDB query (seekers), no zod — validate via ODMDB schema // Usage: // 1) export OPENAI_API_KEY=sk-... // 2) node poc.js import fs from "node:fs"; import OpenAI from "openai"; // ---- Config ---- const MODEL = process.env.OPENAI_MODEL || "gpt-5"; const MAIN_SCHEMA_PATH = "./main.json"; // optional context; used for validation const LG_SCHEMA_PATH = "./lg.json"; // optional context // Hardcoded NL query for the PoC (no multi-turn) const NL_QUERY = "give me new seekers since last week with email and experience"; // ---- Load schemas (safe) ---- function loadJsonSafe(path) { try { if (fs.existsSync(path)) { return JSON.parse(fs.readFileSync(path, "utf-8")); } } catch {} return null; } const SCHEMAS = { main: loadJsonSafe(MAIN_SCHEMA_PATH), lg: loadJsonSafe(LG_SCHEMA_PATH), }; // ---- Helpers to read seekers field names from your ODMDB custom schema ---- function extractSeekersPropsFromOdmdbSchema(main) { if (!main) return []; // Try common shapes // 1) { objects: { seekers: { properties: {...} } } } if ( main.objects?.seekers?.properties && typeof main.objects.seekers.properties === "object" ) { return Object.keys(main.objects.seekers.properties); } // 2) If main is an array, search for an item that looks like seekers schema if (Array.isArray(main)) { for (const entry of main) { const keys = extractSeekersPropsFromOdmdbSchema(entry); if (keys.length) return keys; } } // 3) Fallback: deep search for a { seekers: { properties: {...} } } node try { const stack = [main]; while (stack.length) { const node = stack.pop(); if (node && typeof node === "object") { if ( node.seekers?.properties && typeof node.seekers.properties === "object" ) { return Object.keys(node.seekers.properties); } for (const v of Object.values(node)) { if (v && typeof v === "object") stack.push(v); } } } } catch {} return []; } // ---- Schema-based mapping system ---- class SchemaMapper { constructor(schemas) { this.schemas = schemas.main || []; this.seekersSchema = this.findSchemaByType("seekers"); this.fieldMappings = this.buildFieldMappings(); this.indexMappings = this.buildIndexMappings(); } findSchemaByType(objectType) { if (!this.schemas || !Array.isArray(this.schemas)) return null; return this.schemas.find( (schema) => schema.$id && schema.$id.includes(`/${objectType}`) ); } buildFieldMappings() { if (!this.seekersSchema) return {}; const mappings = {}; const properties = this.seekersSchema.properties || {}; Object.entries(properties).forEach(([fieldName, fieldDef]) => { const synonyms = this.generateSynonyms(fieldName, fieldDef); mappings[fieldName] = { field: fieldName, title: fieldDef.title?.toLowerCase(), description: fieldDef.description?.toLowerCase(), type: fieldDef.type, synonyms, }; // Index by title and synonyms if (fieldDef.title) { mappings[fieldDef.title.toLowerCase()] = fieldName; } synonyms.forEach((synonym) => { mappings[synonym.toLowerCase()] = fieldName; }); }); return mappings; } buildIndexMappings() { if (!this.seekersSchema?.apxidx) return {}; const indexes = {}; this.seekersSchema.apxidx.forEach((idx) => { indexes[idx.name] = { name: idx.name, type: idx.type, keyval: idx.keyval, }; }); return indexes; } generateSynonyms(fieldName, fieldDef) { const synonyms = []; // Common mappings based on actual schema const commonMappings = { email: ["contact", "mail", "contact email"], seekworkingyear: ["experience", "years of experience", "work experience"], seekjobtitleexperience: ["job titles", "job experience", "positions"], seekstatus: ["status", "availability", "looking"], dt_create: ["created", "creation date", "new", "recent", "since"], salaryexpectation: ["salary", "pay", "compensation", "wage"], seeklocation: ["location", "where", "place"], mbti: ["personality", "type", "profile"], alias: ["id", "identifier", "username"], }; if (commonMappings[fieldName]) { synonyms.push(...commonMappings[fieldName]); } return synonyms; } mapNLToFields(nlTerms) { const mappedFields = []; nlTerms.forEach((term) => { const normalizedTerm = term.toLowerCase(); const mapping = this.fieldMappings[normalizedTerm]; if (mapping) { if (typeof mapping === "string") { mappedFields.push(mapping); } else if (mapping.field) { mappedFields.push(mapping.field); } } }); return [...new Set(mappedFields)]; // Remove duplicates } getRecruiterReadableFields() { if (!this.seekersSchema?.apxaccessrights?.recruiters?.R) { // Fallback to basic fields return ["alias", "email", "seekstatus", "seekworkingyear"]; } return this.seekersSchema.apxaccessrights.recruiters.R; } getAllSeekersFields() { if (!this.seekersSchema?.properties) return []; return Object.keys(this.seekersSchema.properties); } getAvailableIndexes() { return Object.keys(this.indexMappings); } getIndexByField(fieldName) { const index = Object.values(this.indexMappings).find( (idx) => idx.keyval === fieldName ); return index ? `idx.${index.name}` : null; } } // Initialize schema mapper const schemaMapper = new SchemaMapper(SCHEMAS); const SEEKERS_FIELDS_FROM_SCHEMA = schemaMapper.getAllSeekersFields(); // ---- Minimal mapping config (for prompting + default fields) ---- const seekersMapping = { object: "seekers", defaultReadableFields: schemaMapper.getRecruiterReadableFields().slice(0, 5), // First 5 readable fields }; // ---- JSON Schema for Structured Outputs (no zod, no oneOf) ---- function buildResponseJsonSchema() { const recruiterReadableFields = schemaMapper.getRecruiterReadableFields(); return { type: "object", additionalProperties: false, properties: { object: { type: "string", enum: ["seekers"] }, condition: { type: "array", items: { type: "string" }, minItems: 1 }, fields: { type: "array", items: { type: "string", enum: recruiterReadableFields, }, minItems: 1, }, }, required: ["object", "condition", "fields"], }; } // ---- Prompt builders ---- function systemPrompt() { const availableFields = schemaMapper.getAllSeekersFields(); const recruiterReadableFields = schemaMapper.getRecruiterReadableFields(); const availableIndexes = schemaMapper.getAvailableIndexes(); return [ "You convert a natural language request into an ODMDB search payload.", "Return ONLY a compact JSON object that matches the provided JSON Schema.", "", "ODMDB DSL:", "- join(remoteObject:localKey:remoteProp:operator:value)", "- idx.(value) - for indexed fields", "- prop.(operator:value) - for direct property queries", "", "Available seekers fields:", availableFields.slice(0, 15).join(", ") + (availableFields.length > 15 ? "..." : ""), "", "Available indexes for optimization:", availableIndexes.join(", "), "", "Recruiter-readable fields (use these for field selection):", recruiterReadableFields.join(", "), "", "Field mappings for natural language:", "- 'email' → email", "- 'experience' → seekworkingyear", "- 'job titles' → seekjobtitleexperience", "- 'status' → seekstatus", "- 'salary' → salaryexpectation", "- 'location' → seeklocation", "- 'new/recent' → dt_create (use prop.dt_create(>=:YYYY-MM-DD))", "", "Rules:", "- Object must be 'seekers'.", "- Use indexes when possible (idx.seekstatus_alias for status queries)", "- For date filters, use prop.dt_create with absolute dates", "- Only return recruiter-readable fields in 'fields' array", `- Default fields if request is generic: ${recruiterReadableFields .slice(0, 5) .join(", ")}`, "", "Timezone is Europe/Paris. Today is 2025-10-14.", "Interpret 'last week' as now minus 7 days → 2025-10-07.", "Interpret 'yesterday' as → 2025-10-13.", ].join("\n"); } function userPrompt(nl) { return `Natural language request: "${nl}"\nReturn ONLY the JSON object.`; } // ---- OpenAI call using Responses API (text.format) ---- const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY }); async function inferQuery(nlText) { const resp = await client.responses.create({ model: MODEL, input: [ { role: "system", content: systemPrompt() }, { role: "user", content: userPrompt(nlText) }, ], text: { format: { name: "OdmdbQuery", type: "json_schema", schema: buildResponseJsonSchema(), strict: true, }, }, }); const jsonText = resp.output_text || resp.output?.[0]?.content?.[0]?.text || (() => { throw new Error("Empty model output"); })(); const parsed = JSON.parse(jsonText); return parsed; } // ---- Validate using the ODMDB schema (not zod) ---- function validateWithOdmdbSchema(candidate) { // Basic shape checks (already enforced by Structured Outputs, but keep defensive) if (!candidate || typeof candidate !== "object") throw new Error("Invalid response (not an object)."); if (candidate.object !== "seekers") throw new Error("Invalid object; must be 'seekers'."); if (!Array.isArray(candidate.condition) || candidate.condition.length === 0) { throw new Error( "Invalid 'condition'; must be a non-empty array of strings." ); } if (!Array.isArray(candidate.fields) || candidate.fields.length === 0) { throw new Error("Invalid 'fields'; must be a non-empty array of strings."); } // Validate fields against schema const availableFields = schemaMapper.getAllSeekersFields(); const recruiterReadableFields = schemaMapper.getRecruiterReadableFields(); for (const field of candidate.fields) { if (!availableFields.includes(field)) { throw new Error(`Invalid field '${field}'; not found in seekers schema.`); } if (!recruiterReadableFields.includes(field)) { console.warn( `Warning: Field '${field}' may not be readable by recruiters.` ); } } // DSL token sanity const allowedTokens = ["join(", "idx.", "prop."]; for (const c of candidate.condition) { if (typeof c !== "string") throw new Error("Condition entries must be strings."); const tokenOK = allowedTokens.some((t) => c.includes(t)); const ascii = /^[\x09\x0A\x0D\x20-\x7E()_:\[\].,=> !SEEKERS_FIELDS_FROM_SCHEMA.includes(f) ); if (unknown.length) { // Drop unknown but continue (PoC behavior) console.warn( "⚠️ Dropping unknown fields (not in seekers schema):", unknown ); candidate.fields = candidate.fields.filter((f) => SEEKERS_FIELDS_FROM_SCHEMA.includes(f) ); if (!candidate.fields.length) { // If all dropped, fallback to default shortlist intersected with schema const fallback = seekersMapping.defaultReadableFields.filter((f) => SEEKERS_FIELDS_FROM_SCHEMA.includes(f) ); if (!fallback.length) throw new Error( "No valid fields remain after validation and no fallback available." ); candidate.fields = fallback; } } } else { // If we can't read the schema (main.json shape unknown), at least ensure strings & dedupe candidate.fields = [ ...new Set( candidate.fields.filter((f) => typeof f === "string" && f.trim()) ), ]; } return candidate; } // ---- Run PoC (print only the created query; do not execute) ---- (async () => { try { if (!process.env.OPENAI_API_KEY) throw new Error("Missing OPENAI_API_KEY env var."); const out = await inferQuery(NL_QUERY); const validated = validateWithOdmdbSchema(out); // Output ONLY the created query (no execution) console.log( JSON.stringify( { object: validated.object, condition: validated.condition, fields: validated.fields, }, null, 2 ) ); } catch (e) { console.error("PoC failed:", e.message || e); process.exit(1); } })();