[WIP] remove zod and use actual schema
This commit is contained in:
15
README.md
15
README.md
@@ -22,6 +22,7 @@ This is a **Proof of Concept (PoC)** that demonstrates the conversion of natural
|
||||
## Installation
|
||||
|
||||
1. Install dependencies:
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
@@ -63,14 +64,8 @@ The PoC generates ODMDB queries in this format:
|
||||
```json
|
||||
{
|
||||
"object": "seekers",
|
||||
"condition": [
|
||||
"prop.dt_create(>=:2025-10-06)"
|
||||
],
|
||||
"fields": [
|
||||
"alias",
|
||||
"email",
|
||||
"seekworkingyear"
|
||||
]
|
||||
"condition": ["prop.dt_create(>=:2025-10-06)"],
|
||||
"fields": ["alias", "email", "seekworkingyear"]
|
||||
}
|
||||
```
|
||||
|
||||
@@ -85,6 +80,7 @@ The PoC understands and generates these ODMDB DSL patterns:
|
||||
## Field Mappings
|
||||
|
||||
Currently supports mapping for seekers object:
|
||||
|
||||
- `email` → `email`
|
||||
- `experience` → `seekworkingyear`
|
||||
- `job titles` → `seekjobtitleexperience`
|
||||
@@ -93,6 +89,7 @@ Currently supports mapping for seekers object:
|
||||
## Schema Context
|
||||
|
||||
The PoC can optionally load schema files for context:
|
||||
|
||||
- `main.json` - Combined schema definitions
|
||||
- `lg.json` - Localization/language mappings
|
||||
|
||||
@@ -116,4 +113,4 @@ The PoC can optionally load schema files for context:
|
||||
- `poc.js` - Main PoC implementation
|
||||
- `package.json` - Dependencies and scripts
|
||||
- `main.json` - Optional schema context (if available)
|
||||
- `lg.json` - Optional localization context (if available)
|
||||
- `lg.json` - Optional localization context (if available)
|
||||
|
379
poc.js
379
poc.js
@@ -1,22 +1,21 @@
|
||||
// PoC: NL → ODMDB query (seekers)
|
||||
// PoC: NL → ODMDB query (seekers), no zod — validate via ODMDB schema
|
||||
// Usage:
|
||||
// 1) export OPENAI_API_KEY=sk-...
|
||||
// 2) node poc.js
|
||||
|
||||
import fs from "node:fs";
|
||||
import OpenAI from "openai";
|
||||
import { z } from "zod";
|
||||
|
||||
// ---- Config ----
|
||||
const MODEL = process.env.OPENAI_MODEL || "gpt-5";
|
||||
const MAIN_SCHEMA_PATH = "./main.json"; // optional context; safe if missing
|
||||
const LG_SCHEMA_PATH = "./lg.json"; // optional context; safe if missing
|
||||
const MAIN_SCHEMA_PATH = "./main.json"; // optional context; used for validation
|
||||
const LG_SCHEMA_PATH = "./lg.json"; // optional context
|
||||
|
||||
// Hardcoded NL query for the PoC (no multi-turn)
|
||||
const NL_QUERY =
|
||||
"give me new seekers since last week with email and experience";
|
||||
|
||||
// ---- Load schemas if present (not required for output) ----
|
||||
// ---- Load schemas (safe) ----
|
||||
function loadJsonSafe(path) {
|
||||
try {
|
||||
if (fs.existsSync(path)) {
|
||||
@@ -30,60 +29,255 @@ const SCHEMAS = {
|
||||
lg: loadJsonSafe(LG_SCHEMA_PATH),
|
||||
};
|
||||
|
||||
// ---- Seekers mapping (from our agreement) ----
|
||||
// ---- Helpers to read seekers field names from your ODMDB custom schema ----
|
||||
function extractSeekersPropsFromOdmdbSchema(main) {
|
||||
if (!main) return [];
|
||||
|
||||
// Try common shapes
|
||||
// 1) { objects: { seekers: { properties: {...} } } }
|
||||
if (
|
||||
main.objects?.seekers?.properties &&
|
||||
typeof main.objects.seekers.properties === "object"
|
||||
) {
|
||||
return Object.keys(main.objects.seekers.properties);
|
||||
}
|
||||
|
||||
// 2) If main is an array, search for an item that looks like seekers schema
|
||||
if (Array.isArray(main)) {
|
||||
for (const entry of main) {
|
||||
const keys = extractSeekersPropsFromOdmdbSchema(entry);
|
||||
if (keys.length) return keys;
|
||||
}
|
||||
}
|
||||
|
||||
// 3) Fallback: deep search for a { seekers: { properties: {...} } } node
|
||||
try {
|
||||
const stack = [main];
|
||||
while (stack.length) {
|
||||
const node = stack.pop();
|
||||
if (node && typeof node === "object") {
|
||||
if (
|
||||
node.seekers?.properties &&
|
||||
typeof node.seekers.properties === "object"
|
||||
) {
|
||||
return Object.keys(node.seekers.properties);
|
||||
}
|
||||
for (const v of Object.values(node)) {
|
||||
if (v && typeof v === "object") stack.push(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
// ---- Schema-based mapping system ----
|
||||
class SchemaMapper {
|
||||
constructor(schemas) {
|
||||
this.schemas = schemas.main || [];
|
||||
this.seekersSchema = this.findSchemaByType("seekers");
|
||||
this.fieldMappings = this.buildFieldMappings();
|
||||
this.indexMappings = this.buildIndexMappings();
|
||||
}
|
||||
|
||||
findSchemaByType(objectType) {
|
||||
if (!this.schemas || !Array.isArray(this.schemas)) return null;
|
||||
return this.schemas.find(
|
||||
(schema) => schema.$id && schema.$id.includes(`/${objectType}`)
|
||||
);
|
||||
}
|
||||
|
||||
buildFieldMappings() {
|
||||
if (!this.seekersSchema) return {};
|
||||
|
||||
const mappings = {};
|
||||
const properties = this.seekersSchema.properties || {};
|
||||
|
||||
Object.entries(properties).forEach(([fieldName, fieldDef]) => {
|
||||
const synonyms = this.generateSynonyms(fieldName, fieldDef);
|
||||
mappings[fieldName] = {
|
||||
field: fieldName,
|
||||
title: fieldDef.title?.toLowerCase(),
|
||||
description: fieldDef.description?.toLowerCase(),
|
||||
type: fieldDef.type,
|
||||
synonyms,
|
||||
};
|
||||
|
||||
// Index by title and synonyms
|
||||
if (fieldDef.title) {
|
||||
mappings[fieldDef.title.toLowerCase()] = fieldName;
|
||||
}
|
||||
synonyms.forEach((synonym) => {
|
||||
mappings[synonym.toLowerCase()] = fieldName;
|
||||
});
|
||||
});
|
||||
|
||||
return mappings;
|
||||
}
|
||||
|
||||
buildIndexMappings() {
|
||||
if (!this.seekersSchema?.apxidx) return {};
|
||||
|
||||
const indexes = {};
|
||||
this.seekersSchema.apxidx.forEach((idx) => {
|
||||
indexes[idx.name] = {
|
||||
name: idx.name,
|
||||
type: idx.type,
|
||||
keyval: idx.keyval,
|
||||
};
|
||||
});
|
||||
|
||||
return indexes;
|
||||
}
|
||||
|
||||
generateSynonyms(fieldName, fieldDef) {
|
||||
const synonyms = [];
|
||||
|
||||
// Common mappings based on actual schema
|
||||
const commonMappings = {
|
||||
email: ["contact", "mail", "contact email"],
|
||||
seekworkingyear: ["experience", "years of experience", "work experience"],
|
||||
seekjobtitleexperience: ["job titles", "job experience", "positions"],
|
||||
seekstatus: ["status", "availability", "looking"],
|
||||
dt_create: ["created", "creation date", "new", "recent", "since"],
|
||||
salaryexpectation: ["salary", "pay", "compensation", "wage"],
|
||||
seeklocation: ["location", "where", "place"],
|
||||
mbti: ["personality", "type", "profile"],
|
||||
alias: ["id", "identifier", "username"],
|
||||
};
|
||||
|
||||
if (commonMappings[fieldName]) {
|
||||
synonyms.push(...commonMappings[fieldName]);
|
||||
}
|
||||
|
||||
return synonyms;
|
||||
}
|
||||
|
||||
mapNLToFields(nlTerms) {
|
||||
const mappedFields = [];
|
||||
|
||||
nlTerms.forEach((term) => {
|
||||
const normalizedTerm = term.toLowerCase();
|
||||
const mapping = this.fieldMappings[normalizedTerm];
|
||||
|
||||
if (mapping) {
|
||||
if (typeof mapping === "string") {
|
||||
mappedFields.push(mapping);
|
||||
} else if (mapping.field) {
|
||||
mappedFields.push(mapping.field);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return [...new Set(mappedFields)]; // Remove duplicates
|
||||
}
|
||||
|
||||
getRecruiterReadableFields() {
|
||||
if (!this.seekersSchema?.apxaccessrights?.recruiters?.R) {
|
||||
// Fallback to basic fields
|
||||
return ["alias", "email", "seekstatus", "seekworkingyear"];
|
||||
}
|
||||
return this.seekersSchema.apxaccessrights.recruiters.R;
|
||||
}
|
||||
|
||||
getAllSeekersFields() {
|
||||
if (!this.seekersSchema?.properties) return [];
|
||||
return Object.keys(this.seekersSchema.properties);
|
||||
}
|
||||
|
||||
getAvailableIndexes() {
|
||||
return Object.keys(this.indexMappings);
|
||||
}
|
||||
|
||||
getIndexByField(fieldName) {
|
||||
const index = Object.values(this.indexMappings).find(
|
||||
(idx) => idx.keyval === fieldName
|
||||
);
|
||||
return index ? `idx.${index.name}` : null;
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize schema mapper
|
||||
const schemaMapper = new SchemaMapper(SCHEMAS);
|
||||
|
||||
const SEEKERS_FIELDS_FROM_SCHEMA = schemaMapper.getAllSeekersFields();
|
||||
|
||||
// ---- Minimal mapping config (for prompting + default fields) ----
|
||||
const seekersMapping = {
|
||||
object: "seekers",
|
||||
readableFieldsForRecruiters: [
|
||||
"alias",
|
||||
"email",
|
||||
"seekstatus",
|
||||
"seekworkingyear",
|
||||
"seekjobtitleexperience",
|
||||
],
|
||||
defaultReadableFields: schemaMapper.getRecruiterReadableFields().slice(0, 5), // First 5 readable fields
|
||||
};
|
||||
|
||||
// ---- Output contract (strict) ----
|
||||
const OdmdbQueryZ = z.object({
|
||||
object: z.literal("seekers"),
|
||||
condition: z.array(z.string()),
|
||||
fields: z.array(z.string()), // always an array
|
||||
});
|
||||
// ---- JSON Schema for Structured Outputs (no zod, no oneOf) ----
|
||||
function buildResponseJsonSchema() {
|
||||
const recruiterReadableFields = schemaMapper.getRecruiterReadableFields();
|
||||
|
||||
// JSON Schema for Structured Output
|
||||
const RESPONSE_JSON_SCHEMA = {
|
||||
type: "object",
|
||||
additionalProperties: false,
|
||||
properties: {
|
||||
object: { type: "string", enum: ["seekers"] },
|
||||
condition: { type: "array", items: { type: "string" } },
|
||||
fields: { type: "array", items: { type: "string" }, minItems: 1 },
|
||||
},
|
||||
required: ["object", "condition", "fields"],
|
||||
};
|
||||
return {
|
||||
type: "object",
|
||||
additionalProperties: false,
|
||||
properties: {
|
||||
object: { type: "string", enum: ["seekers"] },
|
||||
condition: { type: "array", items: { type: "string" }, minItems: 1 },
|
||||
fields: {
|
||||
type: "array",
|
||||
items: {
|
||||
type: "string",
|
||||
enum: recruiterReadableFields,
|
||||
},
|
||||
minItems: 1,
|
||||
},
|
||||
},
|
||||
required: ["object", "condition", "fields"],
|
||||
};
|
||||
}
|
||||
|
||||
// ---- Prompt builders ----
|
||||
function systemPrompt() {
|
||||
const availableFields = schemaMapper.getAllSeekersFields();
|
||||
const recruiterReadableFields = schemaMapper.getRecruiterReadableFields();
|
||||
const availableIndexes = schemaMapper.getAvailableIndexes();
|
||||
|
||||
return [
|
||||
"You convert a natural language request into an ODMDB search payload.",
|
||||
"Return ONLY a compact JSON object that matches the provided JSON Schema. The 'fields' property MUST be an array of strings.",
|
||||
"Return ONLY a compact JSON object that matches the provided JSON Schema.",
|
||||
"",
|
||||
"ODMDB DSL:",
|
||||
"- join(remoteObject:localKey:remoteProp:operator:value)",
|
||||
"- idx.<indexName>(value)",
|
||||
"- prop.<field>(operator:value) with dates or scalars.",
|
||||
"- idx.<indexName>(value) - for indexed fields",
|
||||
"- prop.<field>(operator:value) - for direct property queries",
|
||||
"",
|
||||
"Available seekers fields:",
|
||||
availableFields.slice(0, 15).join(", ") +
|
||||
(availableFields.length > 15 ? "..." : ""),
|
||||
"",
|
||||
"Available indexes for optimization:",
|
||||
availableIndexes.join(", "),
|
||||
"",
|
||||
"Recruiter-readable fields (use these for field selection):",
|
||||
recruiterReadableFields.join(", "),
|
||||
"",
|
||||
"Field mappings for natural language:",
|
||||
"- 'email' → email",
|
||||
"- 'experience' → seekworkingyear",
|
||||
"- 'job titles' → seekjobtitleexperience",
|
||||
"- 'status' → seekstatus",
|
||||
"- 'salary' → salaryexpectation",
|
||||
"- 'location' → seeklocation",
|
||||
"- 'new/recent' → dt_create (use prop.dt_create(>=:YYYY-MM-DD))",
|
||||
"",
|
||||
"Rules:",
|
||||
"- Object must be 'seekers'.",
|
||||
"- For 'new'/'recent' recency, map to prop.dt_create with a resolved absolute date.",
|
||||
"- For 'experience', map to seekworkingyear.",
|
||||
"- Prefer recruiter-readable fields if a small set is requested. If the request is generic, return this default shortlist:",
|
||||
seekersMapping.readableFieldsForRecruiters.join(", "),
|
||||
"- Use indexes when possible (idx.seekstatus_alias for status queries)",
|
||||
"- For date filters, use prop.dt_create with absolute dates",
|
||||
"- Only return recruiter-readable fields in 'fields' array",
|
||||
`- Default fields if request is generic: ${recruiterReadableFields
|
||||
.slice(0, 5)
|
||||
.join(", ")}`,
|
||||
"",
|
||||
"Timezone is Europe/Paris. Today is 2025-10-13.",
|
||||
"Interpret 'last week' as now minus 7 days → 2025-10-06.",
|
||||
"",
|
||||
"Schemas (context only, may be null):",
|
||||
JSON.stringify(SCHEMAS, null, 2),
|
||||
"Timezone is Europe/Paris. Today is 2025-10-14.",
|
||||
"Interpret 'last week' as now minus 7 days → 2025-10-07.",
|
||||
"Interpret 'yesterday' as → 2025-10-13.",
|
||||
].join("\n");
|
||||
}
|
||||
function userPrompt(nl) {
|
||||
@@ -101,11 +295,10 @@ async function inferQuery(nlText) {
|
||||
{ role: "user", content: userPrompt(nlText) },
|
||||
],
|
||||
text: {
|
||||
// <= new location for structured output format
|
||||
format: {
|
||||
name: "OdmdbQuery",
|
||||
type: "json_schema",
|
||||
schema: RESPONSE_JSON_SCHEMA,
|
||||
schema: buildResponseJsonSchema(),
|
||||
strict: true,
|
||||
},
|
||||
},
|
||||
@@ -119,34 +312,104 @@ async function inferQuery(nlText) {
|
||||
})();
|
||||
|
||||
const parsed = JSON.parse(jsonText);
|
||||
const validated = OdmdbQueryZ.parse(parsed);
|
||||
return parsed;
|
||||
}
|
||||
|
||||
// Light safety check on DSL tokens
|
||||
const allowed = ["join(", "idx.", "prop."];
|
||||
for (const c of validated.condition) {
|
||||
const ok = allowed.some((t) => c.includes(t));
|
||||
const ascii = /^[\x09\x0A\x0D\x20-\x7E()_:\[\].,=><!'"-]+$/.test(c);
|
||||
if (!ok || !ascii) throw new Error(`Malformed condition: ${c}`);
|
||||
// ---- Validate using the ODMDB schema (not zod) ----
|
||||
function validateWithOdmdbSchema(candidate) {
|
||||
// Basic shape checks (already enforced by Structured Outputs, but keep defensive)
|
||||
if (!candidate || typeof candidate !== "object")
|
||||
throw new Error("Invalid response (not an object).");
|
||||
if (candidate.object !== "seekers")
|
||||
throw new Error("Invalid object; must be 'seekers'.");
|
||||
if (!Array.isArray(candidate.condition) || candidate.condition.length === 0) {
|
||||
throw new Error(
|
||||
"Invalid 'condition'; must be a non-empty array of strings."
|
||||
);
|
||||
}
|
||||
return validated;
|
||||
if (!Array.isArray(candidate.fields) || candidate.fields.length === 0) {
|
||||
throw new Error("Invalid 'fields'; must be a non-empty array of strings.");
|
||||
}
|
||||
|
||||
// Validate fields against schema
|
||||
const availableFields = schemaMapper.getAllSeekersFields();
|
||||
const recruiterReadableFields = schemaMapper.getRecruiterReadableFields();
|
||||
|
||||
for (const field of candidate.fields) {
|
||||
if (!availableFields.includes(field)) {
|
||||
throw new Error(`Invalid field '${field}'; not found in seekers schema.`);
|
||||
}
|
||||
if (!recruiterReadableFields.includes(field)) {
|
||||
console.warn(
|
||||
`Warning: Field '${field}' may not be readable by recruiters.`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// DSL token sanity
|
||||
const allowedTokens = ["join(", "idx.", "prop."];
|
||||
for (const c of candidate.condition) {
|
||||
if (typeof c !== "string")
|
||||
throw new Error("Condition entries must be strings.");
|
||||
const tokenOK = allowedTokens.some((t) => c.includes(t));
|
||||
const ascii = /^[\x09\x0A\x0D\x20-\x7E()_:\[\].,=><!'"-]+$/.test(c);
|
||||
if (!tokenOK || !ascii) throw new Error(`Malformed condition: ${c}`);
|
||||
}
|
||||
|
||||
// Field existence check against ODMDB custom schema (seekers properties)
|
||||
if (SEEKERS_FIELDS_FROM_SCHEMA.length) {
|
||||
const unknown = candidate.fields.filter(
|
||||
(f) => !SEEKERS_FIELDS_FROM_SCHEMA.includes(f)
|
||||
);
|
||||
if (unknown.length) {
|
||||
// Drop unknown but continue (PoC behavior)
|
||||
console.warn(
|
||||
"⚠️ Dropping unknown fields (not in seekers schema):",
|
||||
unknown
|
||||
);
|
||||
candidate.fields = candidate.fields.filter((f) =>
|
||||
SEEKERS_FIELDS_FROM_SCHEMA.includes(f)
|
||||
);
|
||||
if (!candidate.fields.length) {
|
||||
// If all dropped, fallback to default shortlist intersected with schema
|
||||
const fallback = seekersMapping.defaultReadableFields.filter((f) =>
|
||||
SEEKERS_FIELDS_FROM_SCHEMA.includes(f)
|
||||
);
|
||||
if (!fallback.length)
|
||||
throw new Error(
|
||||
"No valid fields remain after validation and no fallback available."
|
||||
);
|
||||
candidate.fields = fallback;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// If we can't read the schema (main.json shape unknown), at least ensure strings & dedupe
|
||||
candidate.fields = [
|
||||
...new Set(
|
||||
candidate.fields.filter((f) => typeof f === "string" && f.trim())
|
||||
),
|
||||
];
|
||||
}
|
||||
|
||||
return candidate;
|
||||
}
|
||||
|
||||
// ---- Run PoC (print only the created query; do not execute) ----
|
||||
(async () => {
|
||||
try {
|
||||
if (!process.env.OPENAI_API_KEY) {
|
||||
if (!process.env.OPENAI_API_KEY)
|
||||
throw new Error("Missing OPENAI_API_KEY env var.");
|
||||
}
|
||||
|
||||
const out = await inferQuery(NL_QUERY);
|
||||
const validated = validateWithOdmdbSchema(out);
|
||||
|
||||
// Just output the created query (no execution)
|
||||
// Output ONLY the created query (no execution)
|
||||
console.log(
|
||||
JSON.stringify(
|
||||
{
|
||||
object: out.object,
|
||||
condition: out.condition,
|
||||
fields: out.fields,
|
||||
object: validated.object,
|
||||
condition: validated.condition,
|
||||
fields: validated.fields,
|
||||
},
|
||||
null,
|
||||
2
|
||||
|
Reference in New Issue
Block a user