[INIT] innitial Poc of the concept for ql creation fron NL

This commit is contained in:
Eliyan
2025-10-13 12:33:20 +02:00
commit 6dbfe5cb07
6 changed files with 3445 additions and 0 deletions

159
poc.js Normal file
View File

@@ -0,0 +1,159 @@
// PoC: NL → ODMDB query (seekers)
// Usage:
// 1) export OPENAI_API_KEY=sk-...
// 2) node poc.js
import fs from "node:fs";
import OpenAI from "openai";
import { z } from "zod";
// ---- Config ----
const MODEL = process.env.OPENAI_MODEL || "gpt-5";
const MAIN_SCHEMA_PATH = "./main.json"; // optional context; safe if missing
const LG_SCHEMA_PATH = "./lg.json"; // optional context; safe if missing
// Hardcoded NL query for the PoC (no multi-turn)
const NL_QUERY =
"give me new seekers since last week with email and experience";
// ---- Load schemas if present (not required for output) ----
function loadJsonSafe(path) {
try {
if (fs.existsSync(path)) {
return JSON.parse(fs.readFileSync(path, "utf-8"));
}
} catch {}
return null;
}
const SCHEMAS = {
main: loadJsonSafe(MAIN_SCHEMA_PATH),
lg: loadJsonSafe(LG_SCHEMA_PATH),
};
// ---- Seekers mapping (from our agreement) ----
const seekersMapping = {
object: "seekers",
readableFieldsForRecruiters: [
"alias",
"email",
"seekstatus",
"seekworkingyear",
"seekjobtitleexperience",
],
};
// ---- Output contract (strict) ----
const OdmdbQueryZ = z.object({
object: z.literal("seekers"),
condition: z.array(z.string()),
fields: z.array(z.string()), // always an array
});
// JSON Schema for Structured Output
const RESPONSE_JSON_SCHEMA = {
type: "object",
additionalProperties: false,
properties: {
object: { type: "string", enum: ["seekers"] },
condition: { type: "array", items: { type: "string" } },
fields: { type: "array", items: { type: "string" }, minItems: 1 },
},
required: ["object", "condition", "fields"],
};
// ---- Prompt builders ----
function systemPrompt() {
return [
"You convert a natural language request into an ODMDB search payload.",
"Return ONLY a compact JSON object that matches the provided JSON Schema. The 'fields' property MUST be an array of strings.",
"",
"ODMDB DSL:",
"- join(remoteObject:localKey:remoteProp:operator:value)",
"- idx.<indexName>(value)",
"- prop.<field>(operator:value) with dates or scalars.",
"",
"Rules:",
"- Object must be 'seekers'.",
"- For 'new'/'recent' recency, map to prop.dt_create with a resolved absolute date.",
"- For 'experience', map to seekworkingyear.",
"- Prefer recruiter-readable fields if a small set is requested. If the request is generic, return this default shortlist:",
seekersMapping.readableFieldsForRecruiters.join(", "),
"",
"Timezone is Europe/Paris. Today is 2025-10-13.",
"Interpret 'last week' as now minus 7 days → 2025-10-06.",
"",
"Schemas (context only, may be null):",
JSON.stringify(SCHEMAS, null, 2),
].join("\n");
}
function userPrompt(nl) {
return `Natural language request: "${nl}"\nReturn ONLY the JSON object.`;
}
// ---- OpenAI call using Responses API (text.format) ----
const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
async function inferQuery(nlText) {
const resp = await client.responses.create({
model: MODEL,
input: [
{ role: "system", content: systemPrompt() },
{ role: "user", content: userPrompt(nlText) },
],
text: {
// <= new location for structured output format
format: {
name: "OdmdbQuery",
type: "json_schema",
schema: RESPONSE_JSON_SCHEMA,
strict: true,
},
},
});
const jsonText =
resp.output_text ||
resp.output?.[0]?.content?.[0]?.text ||
(() => {
throw new Error("Empty model output");
})();
const parsed = JSON.parse(jsonText);
const validated = OdmdbQueryZ.parse(parsed);
// Light safety check on DSL tokens
const allowed = ["join(", "idx.", "prop."];
for (const c of validated.condition) {
const ok = allowed.some((t) => c.includes(t));
const ascii = /^[\x09\x0A\x0D\x20-\x7E()_:\[\].,=><!'"-]+$/.test(c);
if (!ok || !ascii) throw new Error(`Malformed condition: ${c}`);
}
return validated;
}
// ---- Run PoC (print only the created query; do not execute) ----
(async () => {
try {
if (!process.env.OPENAI_API_KEY) {
throw new Error("Missing OPENAI_API_KEY env var.");
}
const out = await inferQuery(NL_QUERY);
// Just output the created query (no execution)
console.log(
JSON.stringify(
{
object: out.object,
condition: out.condition,
fields: out.fields,
},
null,
2
)
);
} catch (e) {
console.error("PoC failed:", e.message || e);
process.exit(1);
}
})();