// PoC: NL → ODMDB query (seekers), no zod — validate via ODMDB schema // Usage: // 1) export OPENAI_API_KEY=sk-... // 2) node poc.js import fs from "node:fs"; import OpenAI from "openai"; import axios from "axios"; import jq from "node-jq"; // ---- Config ---- const MODEL = process.env.OPENAI_MODEL || "gpt-5"; // ODMDB paths - point to actual ODMDB structure const ODMDB_BASE_PATH = "../smatchitObjectOdmdb"; const SCHEMA_PATH = `${ODMDB_BASE_PATH}/schema`; const OBJECTS_PATH = `${ODMDB_BASE_PATH}/objects`; // ODMDB execution config const ODMDB_BASE_URL = process.env.ODMDB_BASE_URL || "http://localhost:3000"; const ODMDB_TRIBE = process.env.ODMDB_TRIBE || "smatchit"; const EXECUTE_QUERY = process.env.EXECUTE_QUERY === "true"; // Set to "true" to execute queries // Hardcoded NL query for the PoC (no multi-turn) const NL_QUERY = "give me new seekers since last week with email and experience"; // ---- Load schemas (safe) ---- function loadJsonSafe(path) { try { if (fs.existsSync(path)) { return JSON.parse(fs.readFileSync(path, "utf-8")); } } catch (e) { console.warn(`Warning: Could not load ${path}:`, e.message); } return null; } // Load actual ODMDB schemas const SCHEMAS = { seekers: loadJsonSafe(`${SCHEMA_PATH}/seekers.json`), main: loadJsonSafe("./main.json"), // Fallback consolidated schema }; // ---- Helpers to read seekers field names from your ODMDB custom schema ---- function extractSeekersPropsFromOdmdbSchema(main) { if (!main) return []; // Try common shapes // 1) { objects: { seekers: { properties: {...} } } } if ( main.objects?.seekers?.properties && typeof main.objects.seekers.properties === "object" ) { return Object.keys(main.objects.seekers.properties); } // 2) If main is an array, search for an item that looks like seekers schema if (Array.isArray(main)) { for (const entry of main) { const keys = extractSeekersPropsFromOdmdbSchema(entry); if (keys.length) return keys; } } // 3) Fallback: deep search for a { seekers: { properties: {...} } } node try { const stack = [main]; while (stack.length) { const node = stack.pop(); if (node && typeof node === "object") { if ( node.seekers?.properties && typeof node.seekers.properties === "object" ) { return Object.keys(node.seekers.properties); } for (const v of Object.values(node)) { if (v && typeof v === "object") stack.push(v); } } } } catch {} return []; } // ---- Schema-based mapping system ---- class SchemaMapper { constructor(schemas) { // Use direct seekers schema if available, otherwise search in consolidated main schema this.seekersSchema = schemas.seekers || this.findSchemaByType("seekers", schemas.main); this.fieldMappings = this.buildFieldMappings(); this.indexMappings = this.buildIndexMappings(); console.log( `šŸ“‹ Loaded seekers schema with ${ Object.keys(this.seekersSchema?.properties || {}).length } properties` ); } findSchemaByType(objectType, schemas) { if (!schemas || !Array.isArray(schemas)) return null; return schemas.find( (schema) => schema.$id && schema.$id.includes(`/${objectType}`) ); } buildFieldMappings() { if (!this.seekersSchema) return {}; const mappings = {}; const properties = this.seekersSchema.properties || {}; Object.entries(properties).forEach(([fieldName, fieldDef]) => { const synonyms = this.generateSynonyms(fieldName, fieldDef); mappings[fieldName] = { field: fieldName, title: fieldDef.title?.toLowerCase(), description: fieldDef.description?.toLowerCase(), type: fieldDef.type, synonyms, }; // Index by title and synonyms if (fieldDef.title) { mappings[fieldDef.title.toLowerCase()] = fieldName; } synonyms.forEach((synonym) => { mappings[synonym.toLowerCase()] = fieldName; }); }); return mappings; } buildIndexMappings() { if (!this.seekersSchema?.apxidx) return {}; const indexes = {}; this.seekersSchema.apxidx.forEach((idx) => { indexes[idx.name] = { name: idx.name, type: idx.type, keyval: idx.keyval, }; }); return indexes; } generateSynonyms(fieldName, fieldDef) { const synonyms = []; // Common mappings based on actual schema const commonMappings = { email: ["contact", "mail", "contact email"], seekworkingyear: ["experience", "years of experience", "work experience"], seekjobtitleexperience: ["job titles", "job experience", "positions"], seekstatus: ["status", "availability", "looking"], dt_create: ["created", "creation date", "new", "recent", "since"], salaryexpectation: ["salary", "pay", "compensation", "wage"], seeklocation: ["location", "where", "place"], mbti: ["personality", "type", "profile"], alias: ["id", "identifier", "username"], }; if (commonMappings[fieldName]) { synonyms.push(...commonMappings[fieldName]); } return synonyms; } mapNLToFields(nlTerms) { const mappedFields = []; nlTerms.forEach((term) => { const normalizedTerm = term.toLowerCase(); const mapping = this.fieldMappings[normalizedTerm]; if (mapping) { if (typeof mapping === "string") { mappedFields.push(mapping); } else if (mapping.field) { mappedFields.push(mapping.field); } } }); return [...new Set(mappedFields)]; // Remove duplicates } getRecruiterReadableFields() { if (!this.seekersSchema?.apxaccessrights?.recruiters?.R) { // Fallback to basic fields return ["alias", "email", "seekstatus", "seekworkingyear"]; } return this.seekersSchema.apxaccessrights.recruiters.R; } getAllSeekersFields() { if (!this.seekersSchema?.properties) return []; return Object.keys(this.seekersSchema.properties); } getAvailableIndexes() { return Object.keys(this.indexMappings); } getIndexByField(fieldName) { const index = Object.values(this.indexMappings).find( (idx) => idx.keyval === fieldName ); return index ? `idx.${index.name}` : null; } } // Initialize schema mapper const schemaMapper = new SchemaMapper(SCHEMAS); const SEEKERS_FIELDS_FROM_SCHEMA = schemaMapper.getAllSeekersFields(); console.log( `šŸ” Available seekers fields: ${SEEKERS_FIELDS_FROM_SCHEMA.slice(0, 10).join( ", " )}${ SEEKERS_FIELDS_FROM_SCHEMA.length > 10 ? `... (${SEEKERS_FIELDS_FROM_SCHEMA.length} total)` : "" }` ); // ---- Minimal mapping config (for prompting + default fields) ---- const seekersMapping = { object: "seekers", defaultReadableFields: schemaMapper.getRecruiterReadableFields().slice(0, 5), // First 5 readable fields }; // ---- JSON Schema for Structured Outputs (no zod, no oneOf) ---- function buildResponseJsonSchema() { const recruiterReadableFields = schemaMapper.getRecruiterReadableFields(); return { type: "object", additionalProperties: false, properties: { object: { type: "string", enum: ["seekers"] }, condition: { type: "array", items: { type: "string" }, minItems: 1 }, fields: { type: "array", items: { type: "string", enum: recruiterReadableFields, }, minItems: 1, }, }, required: ["object", "condition", "fields"], }; } // ---- Prompt builders ---- function systemPrompt() { const availableFields = schemaMapper.getAllSeekersFields(); const recruiterReadableFields = schemaMapper.getRecruiterReadableFields(); const availableIndexes = schemaMapper.getAvailableIndexes(); return [ "You convert a natural language request into an ODMDB search payload.", "Return ONLY a compact JSON object that matches the provided JSON Schema.", "", "ODMDB DSL:", "- join(remoteObject:localKey:remoteProp:operator:value)", "- idx.(value) - for indexed fields", "- prop.(operator:value) - for direct property queries", "", "Available seekers fields:", availableFields.slice(0, 15).join(", ") + (availableFields.length > 15 ? "..." : ""), "", "Available indexes for optimization:", availableIndexes.join(", "), "", "Recruiter-readable fields (use these for field selection):", recruiterReadableFields.join(", "), "", "Field mappings for natural language:", "- 'email' → email", "- 'experience' → seekworkingyear", "- 'job titles' → seekjobtitleexperience", "- 'status' → seekstatus", "- 'salary' → salaryexpectation", "- 'location' → seeklocation", "- 'new/recent' → dt_create (use prop.dt_create(>=:YYYY-MM-DD))", "", "Rules:", "- Object must be 'seekers'.", "- Use indexes when possible (idx.seekstatus_alias for status queries)", "- For date filters, use prop.dt_create with absolute dates", "- Only return recruiter-readable fields in 'fields' array", `- Default fields if request is generic: ${recruiterReadableFields .slice(0, 5) .join(", ")}`, "", "Timezone is Europe/Paris. Today is 2025-10-14.", "Interpret 'last week' as now minus 7 days → 2025-10-07.", "Interpret 'yesterday' as → 2025-10-13.", ].join("\n"); } function userPrompt(nl) { return `Natural language request: "${nl}"\nReturn ONLY the JSON object.`; } // ---- OpenAI call using Responses API (text.format) ---- const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY }); async function inferQuery(nlText) { const resp = await client.responses.create({ model: MODEL, input: [ { role: "system", content: systemPrompt() }, { role: "user", content: userPrompt(nlText) }, ], text: { format: { name: "OdmdbQuery", type: "json_schema", schema: buildResponseJsonSchema(), strict: true, }, }, }); const jsonText = resp.output_text || resp.output?.[0]?.content?.[0]?.text || (() => { throw new Error("Empty model output"); })(); const parsed = JSON.parse(jsonText); return parsed; } // ---- Validate using the ODMDB schema (not zod) ---- function validateWithOdmdbSchema(candidate) { // Basic shape checks (already enforced by Structured Outputs, but keep defensive) if (!candidate || typeof candidate !== "object") throw new Error("Invalid response (not an object)."); if (candidate.object !== "seekers") throw new Error("Invalid object; must be 'seekers'."); if (!Array.isArray(candidate.condition) || candidate.condition.length === 0) { throw new Error( "Invalid 'condition'; must be a non-empty array of strings." ); } if (!Array.isArray(candidate.fields) || candidate.fields.length === 0) { throw new Error("Invalid 'fields'; must be a non-empty array of strings."); } // Validate fields against schema const availableFields = schemaMapper.getAllSeekersFields(); const recruiterReadableFields = schemaMapper.getRecruiterReadableFields(); for (const field of candidate.fields) { if (!availableFields.includes(field)) { throw new Error(`Invalid field '${field}'; not found in seekers schema.`); } if (!recruiterReadableFields.includes(field)) { console.warn( `Warning: Field '${field}' may not be readable by recruiters.` ); } } // DSL token sanity const allowedTokens = ["join(", "idx.", "prop."]; for (const c of candidate.condition) { if (typeof c !== "string") throw new Error("Condition entries must be strings."); const tokenOK = allowedTokens.some((t) => c.includes(t)); const ascii = /^[\x09\x0A\x0D\x20-\x7E()_:\[\].,=> !SEEKERS_FIELDS_FROM_SCHEMA.includes(f) ); if (unknown.length) { // Drop unknown but continue (PoC behavior) console.warn( "āš ļø Dropping unknown fields (not in seekers schema):", unknown ); candidate.fields = candidate.fields.filter((f) => SEEKERS_FIELDS_FROM_SCHEMA.includes(f) ); if (!candidate.fields.length) { // If all dropped, fallback to default shortlist intersected with schema const fallback = seekersMapping.defaultReadableFields.filter((f) => SEEKERS_FIELDS_FROM_SCHEMA.includes(f) ); if (!fallback.length) throw new Error( "No valid fields remain after validation and no fallback available." ); candidate.fields = fallback; } } } else { // If we can't read the schema (main.json shape unknown), at least ensure strings & dedupe candidate.fields = [ ...new Set( candidate.fields.filter((f) => typeof f === "string" && f.trim()) ), ]; } return candidate; } // ---- Local ODMDB Data Access ---- function loadSeekersData() { const seekersItemsPath = `${OBJECTS_PATH}/seekers/itm`; try { if (!fs.existsSync(seekersItemsPath)) { console.error(`āŒ Seekers data directory not found: ${seekersItemsPath}`); return []; } const files = fs .readdirSync(seekersItemsPath) .filter((file) => file.endsWith(".json") && file !== "backup") .slice(0, 50); // Limit to first 50 files for PoC performance console.log( `šŸ“ Loading ${files.length} seeker files from ${seekersItemsPath}` ); const seekers = []; for (const file of files) { try { const filePath = `${seekersItemsPath}/${file}`; const data = JSON.parse(fs.readFileSync(filePath, "utf-8")); seekers.push(data); } catch (error) { console.warn(`āš ļø Could not load ${file}:`, error.message); } } return seekers; } catch (error) { console.error("āŒ Error loading seekers data:", error.message); return []; } } // ---- Local ODMDB Query Execution ---- async function executeOdmdbQuery(query) { if (!EXECUTE_QUERY) { console.log( "šŸ’” Query execution disabled. Set EXECUTE_QUERY=true to enable." ); return null; } try { console.log( `\nšŸ” Executing query against local ODMDB data: ${OBJECTS_PATH}/seekers/` ); console.log("Query conditions:", query.condition); console.log("Requested fields:", query.fields); // Load all seekers data const allSeekers = loadSeekersData(); if (allSeekers.length === 0) { console.log("āŒ No seekers data found"); return { data: [] }; } console.log(`ļæ½ Loaded ${allSeekers.length} seekers for filtering`); // Apply basic filtering (simplified DSL processing) let filteredSeekers = allSeekers; for (const condition of query.condition) { if (condition.includes("prop.dt_create(>=:")) { // Extract date from condition like "prop.dt_create(>=:2025-10-07)" const dateMatch = condition.match(/>=:(\d{4}-\d{2}-\d{2})/); if (dateMatch) { const filterDate = new Date(dateMatch[1]); filteredSeekers = filteredSeekers.filter((seeker) => { if (!seeker.dt_create) return false; const seekerDate = new Date(seeker.dt_create); return seekerDate >= filterDate; }); console.log( `šŸ—“ļø Filtered by date >= ${dateMatch[1]}: ${filteredSeekers.length} results` ); } } if (condition.includes("idx.seekstatus_alias(")) { // Extract status from condition like "idx.seekstatus_alias(startasap)" const statusMatch = condition.match(/idx\.seekstatus_alias\(([^)]+)\)/); if (statusMatch) { const status = statusMatch[1]; filteredSeekers = filteredSeekers.filter( (seeker) => seeker.seekstatus === status ); console.log( `šŸ‘¤ Filtered by status ${status}: ${filteredSeekers.length} results` ); } } } // Select only requested fields const results = filteredSeekers.map((seeker) => { const filtered = {}; for (const field of query.fields) { if (seeker.hasOwnProperty(field)) { filtered[field] = seeker[field]; } } return filtered; }); console.log( `āœ… Query executed successfully! Found ${results.length} matching seekers` ); return { data: results, meta: { total: results.length, source: "local_files", path: `${OBJECTS_PATH}/seekers/itm/`, }, }; } catch (error) { console.error("āŒ Local query execution failed:", error.message); return null; } } // ---- Result Processing with jq ---- async function processResults(results, jqFilter = ".") { if (!results || !results.data) { console.log("No results to process."); return null; } try { // Use jq to filter and format results (pass data directly, not as string) const processed = await jq.run(jqFilter, results.data, { input: "json" }); // Return the processed result return processed; } catch (error) { console.error("āŒ jq processing failed:", error.message); return JSON.stringify(results.data, null, 2); // Return formatted JSON if jq fails } } // ---- Run PoC (generate query and optionally execute) ---- (async () => { try { if (!process.env.OPENAI_API_KEY) throw new Error("Missing OPENAI_API_KEY env var."); console.log(`šŸ¤– Processing NL query: "${NL_QUERY}"`); console.log("=".repeat(60)); // Step 1: Generate ODMDB query from natural language const out = await inferQuery(NL_QUERY); const validated = validateWithOdmdbSchema(out); console.log("āœ… Generated ODMDB Query:"); const generatedQuery = { object: validated.object, condition: validated.condition, fields: validated.fields, }; console.log(JSON.stringify(generatedQuery, null, 2)); // Step 2: Execute query if enabled if (EXECUTE_QUERY) { console.log("\n" + "=".repeat(60)); const results = await executeOdmdbQuery(generatedQuery); if (results) { console.log("āœ… Query executed successfully!"); console.log(`šŸ“Š Found ${results.data?.length || 0} results`); // Step 3: Process results with jq console.log("\nšŸ“‹ Results Summary:"); const summary = await processResults( results, `.[0:3] | map({alias, email, seekstatus})` ); console.log(JSON.stringify(summary, null, 2)); // Optional: Show full results count if (results.data?.length > 3) { console.log(`\n... and ${results.data.length - 3} more results`); } // Step 4: Export to CSV format console.log("\nšŸ“„ CSV Preview:"); const csvData = await processResults( results, ` map([.alias // "N/A", .email // "N/A", .seekstatus // "N/A"]) | ["alias","email","status"] as $header | [$header] + .[0:5] | .[] | @csv ` ); if (csvData) { console.log(csvData); } } } else { console.log( "\nšŸ’” To execute this query against ODMDB, set EXECUTE_QUERY=true" ); console.log(` Example: EXECUTE_QUERY=true npm start`); } } catch (e) { console.error("āŒ PoC failed:", e.message || e); process.exit(1); } })();