[INIT] innitial Poc of the concept for ql creation fron NL

This commit is contained in:
Eliyan
2025-10-13 12:33:20 +02:00
commit 6dbfe5cb07
6 changed files with 3445 additions and 0 deletions

5
.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
node_modules/
dist/
.env
.vscode/
package-lock.json

119
README.md Normal file
View File

@@ -0,0 +1,119 @@
# ODMDB Natural Language Query PoC
This is a **Proof of Concept (PoC)** that demonstrates the conversion of natural language queries into ODMDB search queries using OpenAI's structured output API.
## Current Status
⚠️ **Partial Implementation**: Currently only the **seekers** object mapping is implemented. This PoC focuses on demonstrating the natural language to DSL query conversion for seeker-related searches.
## Features
- Converts natural language requests into ODMDB DSL queries
- Handles temporal queries ("new seekers since last week")
- Maps human-readable field names to schema fields
- Validates output using Zod schema validation
- Uses OpenAI's structured output for reliable JSON generation
## Prerequisites
- Node.js (v16 or higher)
- OpenAI API key
## Installation
1. Install dependencies:
```bash
npm install
```
2. Set your OpenAI API key:
```bash
export OPENAI_API_KEY=sk-your-api-key-here
```
## Usage
### Running the PoC
```bash
npm start
```
This will process the hardcoded natural language query and output the generated ODMDB query in JSON format.
### Changing the Query
To test different natural language queries, edit the `NL_QUERY` constant in `poc.js`:
```javascript
// Line 16 in poc.js
const NL_QUERY = "your natural language query here";
```
### Example Queries
- `"give me new seekers since last week with email and experience"`
- `"find recent seekers with job titles and salary expectations"`
- `"show me seekers from yesterday with their skills"`
## Output Format
The PoC generates ODMDB queries in this format:
```json
{
"object": "seekers",
"condition": [
"prop.dt_create(>=:2025-10-06)"
],
"fields": [
"alias",
"email",
"seekworkingyear"
]
}
```
## ODMDB DSL Support
The PoC understands and generates these ODMDB DSL patterns:
- **Property queries**: `prop.<field>(operator:value)`
- **Index queries**: `idx.<indexName>(value)`
- **Join queries**: `join(remoteObject:localKey:remoteProp:operator:value)`
## Field Mappings
Currently supports mapping for seekers object:
- `email` → `email`
- `experience` → `seekworkingyear`
- `job titles` → `seekjobtitleexperience`
- `status` → `seekstatus`
## Schema Context
The PoC can optionally load schema files for context:
- `main.json` - Combined schema definitions
- `lg.json` - Localization/language mappings
## Limitations
- **Seekers only**: Other ODMDB objects (jobads, recruiters, etc.) are not yet implemented
- **No execution**: Only generates queries, doesn't execute them against ODMDB
- **Hardcoded query**: Single query per run (no interactive mode)
- **Basic validation**: Limited DSL syntax validation
## Next Steps
- [ ] Add support for other ODMDB objects (jobads, recruiters, etc.)
- [ ] Interactive CLI for multiple queries
- [ ] Integration with actual ODMDB backend
- [ ] Enhanced field mapping and validation
- [ ] Multi-turn conversation support
## Files
- `poc.js` - Main PoC implementation
- `package.json` - Dependencies and scripts
- `main.json` - Optional schema context (if available)
- `lg.json` - Optional localization context (if available)

281
lg.json Normal file
View File

@@ -0,0 +1,281 @@
[
{
"comment": "lg/hobbies_fr.json"
},
{
"Lecture": [
"Roman",
"Science-fiction",
"Policier",
"Biographie",
"Essai",
"Poésie",
"Magazine",
"Bandes dessinées"
],
"Jardinage": [
"Potager",
"Fleurs",
"Aménagement paysager",
"Bonsaï",
"Jardinage urbain",
"Jardins d'intérieur"
],
"Cuisine": [
"Pâtisserie",
"Cuisine internationale",
"Cuisine végétarienne",
"Cuisine fusion",
"Cuisine moléculaire",
"Cuisine de rue",
"Confitures et conserves"
],
"Photographie": [
"Paysages",
"Portraits",
"Macro",
"Photographie de rue",
"Photographie animalière",
"Photographie de mode",
"Photographie de mariage"
],
"Peinture": [
"Acrylique",
"Huile",
"Aquarelle",
"Pastel",
"Peinture sur verre",
"Peinture sur toile",
"Peinture abstraite"
],
"Randonnée": [
"Randonnée en montagne",
"Randonnée pédestre",
"Randonnée en forêt",
"Randonnée côtière",
"Randonnée en groupe",
"Randonnée nocturne"
],
"Musique": [
"Jouer d'un instrument",
"Chant",
"Composition",
"Musique classique",
"Musique rock",
"Musique jazz",
"Musique électronique"
],
"Danse": [
"Salsa",
"Bachata",
"Tango",
"Hip-hop",
"Danse contemporaine",
"Danse de salon",
"Danse orientale"
],
"Écriture": [
"Roman",
"Nouvelle",
"Poésie",
"Scénario",
"Blog",
"Journal intime",
"Lettres"
],
"Bricolage": [
"Menuiserie",
"Électricité",
"Plomberie",
"Décoration",
"Peinture",
"Couture",
"Restauration de meubles"
],
"Jeux de société": [
"Jeu de cartes",
"Jeu de plateau",
"Jeu de rôle",
"Jeu de stratégie",
"Jeu de dés",
"Jeu de société coopératif",
"Jeu de société d'ambiance"
],
"Sports": [
"Football",
"Basketball",
"Tennis",
"Natation",
"Course à pied",
"Yoga",
"Cyclisme"
],
"Voyages": [
"Voyages en Europe",
"Voyages en Asie",
"Voyages en Amérique",
"Voyages en Afrique",
"Voyages en Océanie",
"Voyages d'aventure",
"Voyages culturels"
],
"Collection de timbres": [
"Timbres classiques",
"Timbres thématiques",
"Timbres rares",
"Timbres du monde",
"Timbres oblitérés",
"Timbres neufs",
"Timbres anciens"
],
"Couture": [
"Vêtements",
"Accessoires",
"Patchwork",
"Couture pour enfants",
"Broderie",
"Travail du cuir",
"Customisation"
],
"Tricot": [
"Écharpes",
"Pulls",
"Chaussettes",
"Bonnet",
"Gants",
"Couvertures",
"Peluches"
],
"Modélisme": [
"Modélisme ferroviaire",
"Modélisme naval",
"Modélisme aérien",
"Modélisme automobile",
"Modélisme architectural",
"Modélisme spatial",
"Modélisme militaire"
],
"Jeu d'échecs": [
"Parties classiques",
"Parties rapides",
"Variantes",
"Études",
"Problèmes d'échecs",
"Compétitions",
"Analyse de parties"
],
"Sculpture": [
"Argile",
"Pierre",
"Bois",
"Métal",
"Verre",
"Céramique",
"Sculpture sur glace"
],
"Camping": [
"Camping en tente",
"Camping-car",
"Randonnée avec camping",
"Feu de camp",
"Cuisine en plein air",
"Observation des étoiles",
"Activités de plein air"
]
},
{
"comment": "lg/persons_fr.json"
},
{
"title": "Une Personne au niveau d'une tribut avec des informations personnelles",
"description": "Un alias peut se stocker comme un objet Person avec des informations supplémentaires permettant de qualifier son profil",
"properties": {
"alias": {
"title": "Une identité numérique d'apxtri"
},
"owner": {
"title": "Le propriétaire de cet objet (celui qui posséde la clé)"
},
"dt_create": {
"title": "Date de creation"
},
"dt_update": {
"title": "Date de mise à jour"
},
"dt_lastlogin": {
"title": "Date de derniere authentification"
},
"dt_delete": {
"title": "Date de fermeture du compte"
},
"will": {
"title": "Nom du script à lancer lors d'une fermeture de compte"
},
"recoveryauth": {
"title": "Information pour recuperer ses codes d'accès",
"description": "Cette objet garde votre identité numérique, en vue d'une demande de recuperation par email.",
"properties": {
"email": {
"title": "email de recuperation"
},
"privatekey": {
"title": "La cle privée associé à l'alias"
},
"passphrase": {
"title": "La passphrase eventuelle"
}
}
},
"firstname": {
"title": "Votre prenom",
"description": "Ce prénom s'affichera pour les memebres de smatchit"
},
"lastname": {
"title": "Votre nom de famille",
"description": "Ce nom s'affichera pour les membres de smatchit"
},
"termandcondition": {
"title": "J'accepte les conditions d'utilisation de smatchit",
"description": "Conditions générales d'utilisation et de ventes de Smatchit"
},
"truthfullinformation": {
"title": "I certify all my information",
"description": "I certify all my information is truthful"
},
"contactfromschool": {
"title": "J'accepte de recevoir des offres de formations de nos partenaires",
"description": "Nous pouvons vous recommander des formations coorespondant à votre profil"
},
"dt_birth": {
"title": "Votre date anniversaire",
"description": "Date de naissance"
},
"pronom": {
"title": "Pronom",
"description": "La façon dont on doit s'adresser à votre personne"
},
"emailcom": {
"title": "L'email de communication",
"description": "Cet email sera utilisé pour communiqué avec les membres de smatchot"
},
"hobbies": {
"title": "Mes hobbies"
},
"biography": {
"title": "Quelques mots sur moi",
"description": "Ce texte sera partagé pour tous les membres de smatchit, recruteur et chercheur d'emploi"
},
"mbti": {
"title": "Mon profil mbti en tant que personne"
},
"imgavatar": {
"title": "Une image public qui me represente",
"description": "Image qui en dit long sur ma personnalité, attention elle est public"
},
"profils": {
"title": "Liste de mes profils",
"description": "Chaque profils donne des droits particulier à chaque personne sur les objet de smatchit"
}
}
}
]

2868
main.json Normal file

File diff suppressed because it is too large Load Diff

13
package.json Normal file
View File

@@ -0,0 +1,13 @@
{
"name": "nl2odmdb-poc",
"version": "0.1.0",
"type": "module",
"private": true,
"scripts": {
"start": "node poc.js"
},
"dependencies": {
"openai": "^4.60.0",
"zod": "^3.23.8"
}
}

159
poc.js Normal file
View File

@@ -0,0 +1,159 @@
// PoC: NL → ODMDB query (seekers)
// Usage:
// 1) export OPENAI_API_KEY=sk-...
// 2) node poc.js
import fs from "node:fs";
import OpenAI from "openai";
import { z } from "zod";
// ---- Config ----
const MODEL = process.env.OPENAI_MODEL || "gpt-5";
const MAIN_SCHEMA_PATH = "./main.json"; // optional context; safe if missing
const LG_SCHEMA_PATH = "./lg.json"; // optional context; safe if missing
// Hardcoded NL query for the PoC (no multi-turn)
const NL_QUERY =
"give me new seekers since last week with email and experience";
// ---- Load schemas if present (not required for output) ----
function loadJsonSafe(path) {
try {
if (fs.existsSync(path)) {
return JSON.parse(fs.readFileSync(path, "utf-8"));
}
} catch {}
return null;
}
const SCHEMAS = {
main: loadJsonSafe(MAIN_SCHEMA_PATH),
lg: loadJsonSafe(LG_SCHEMA_PATH),
};
// ---- Seekers mapping (from our agreement) ----
const seekersMapping = {
object: "seekers",
readableFieldsForRecruiters: [
"alias",
"email",
"seekstatus",
"seekworkingyear",
"seekjobtitleexperience",
],
};
// ---- Output contract (strict) ----
const OdmdbQueryZ = z.object({
object: z.literal("seekers"),
condition: z.array(z.string()),
fields: z.array(z.string()), // always an array
});
// JSON Schema for Structured Output
const RESPONSE_JSON_SCHEMA = {
type: "object",
additionalProperties: false,
properties: {
object: { type: "string", enum: ["seekers"] },
condition: { type: "array", items: { type: "string" } },
fields: { type: "array", items: { type: "string" }, minItems: 1 },
},
required: ["object", "condition", "fields"],
};
// ---- Prompt builders ----
function systemPrompt() {
return [
"You convert a natural language request into an ODMDB search payload.",
"Return ONLY a compact JSON object that matches the provided JSON Schema. The 'fields' property MUST be an array of strings.",
"",
"ODMDB DSL:",
"- join(remoteObject:localKey:remoteProp:operator:value)",
"- idx.<indexName>(value)",
"- prop.<field>(operator:value) with dates or scalars.",
"",
"Rules:",
"- Object must be 'seekers'.",
"- For 'new'/'recent' recency, map to prop.dt_create with a resolved absolute date.",
"- For 'experience', map to seekworkingyear.",
"- Prefer recruiter-readable fields if a small set is requested. If the request is generic, return this default shortlist:",
seekersMapping.readableFieldsForRecruiters.join(", "),
"",
"Timezone is Europe/Paris. Today is 2025-10-13.",
"Interpret 'last week' as now minus 7 days → 2025-10-06.",
"",
"Schemas (context only, may be null):",
JSON.stringify(SCHEMAS, null, 2),
].join("\n");
}
function userPrompt(nl) {
return `Natural language request: "${nl}"\nReturn ONLY the JSON object.`;
}
// ---- OpenAI call using Responses API (text.format) ----
const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
async function inferQuery(nlText) {
const resp = await client.responses.create({
model: MODEL,
input: [
{ role: "system", content: systemPrompt() },
{ role: "user", content: userPrompt(nlText) },
],
text: {
// <= new location for structured output format
format: {
name: "OdmdbQuery",
type: "json_schema",
schema: RESPONSE_JSON_SCHEMA,
strict: true,
},
},
});
const jsonText =
resp.output_text ||
resp.output?.[0]?.content?.[0]?.text ||
(() => {
throw new Error("Empty model output");
})();
const parsed = JSON.parse(jsonText);
const validated = OdmdbQueryZ.parse(parsed);
// Light safety check on DSL tokens
const allowed = ["join(", "idx.", "prop."];
for (const c of validated.condition) {
const ok = allowed.some((t) => c.includes(t));
const ascii = /^[\x09\x0A\x0D\x20-\x7E()_:\[\].,=><!'"-]+$/.test(c);
if (!ok || !ascii) throw new Error(`Malformed condition: ${c}`);
}
return validated;
}
// ---- Run PoC (print only the created query; do not execute) ----
(async () => {
try {
if (!process.env.OPENAI_API_KEY) {
throw new Error("Missing OPENAI_API_KEY env var.");
}
const out = await inferQuery(NL_QUERY);
// Just output the created query (no execution)
console.log(
JSON.stringify(
{
object: out.object,
condition: out.condition,
fields: out.fields,
},
null,
2
)
);
} catch (e) {
console.error("PoC failed:", e.message || e);
process.exit(1);
}
})();