Files
Poc-dashboard/poc.js

641 lines
19 KiB
JavaScript
Raw Blame History

// PoC: NL → ODMDB query (seekers), no zod — validate via ODMDB schema
// Usage:
// 1) export OPENAI_API_KEY=sk-...
// 2) node poc.js
import fs from "node:fs";
import OpenAI from "openai";
import axios from "axios";
import jq from "node-jq";
// ---- Config ----
const MODEL = process.env.OPENAI_MODEL || "gpt-5";
// ODMDB paths - point to actual ODMDB structure
const ODMDB_BASE_PATH = "../smatchitObjectOdmdb";
const SCHEMA_PATH = `${ODMDB_BASE_PATH}/schema`;
const OBJECTS_PATH = `${ODMDB_BASE_PATH}/objects`;
// ODMDB execution config
const ODMDB_BASE_URL = process.env.ODMDB_BASE_URL || "http://localhost:3000";
const ODMDB_TRIBE = process.env.ODMDB_TRIBE || "smatchit";
const EXECUTE_QUERY = process.env.EXECUTE_QUERY === "true"; // Set to "true" to execute queries
// Hardcoded NL query for the PoC (no multi-turn)
const NL_QUERY =
"show me seekers with status startasap and their email and experience";
// ---- Load schemas (safe) ----
function loadJsonSafe(path) {
try {
if (fs.existsSync(path)) {
return JSON.parse(fs.readFileSync(path, "utf-8"));
}
} catch (e) {
console.warn(`Warning: Could not load ${path}:`, e.message);
}
return null;
}
// Load actual ODMDB schemas
const SCHEMAS = {
seekers: loadJsonSafe(`${SCHEMA_PATH}/seekers.json`),
main: loadJsonSafe("./main.json"), // Fallback consolidated schema
};
// ---- Helpers to read seekers field names from your ODMDB custom schema ----
function extractSeekersPropsFromOdmdbSchema(main) {
if (!main) return [];
// Try common shapes
// 1) { objects: { seekers: { properties: {...} } } }
if (
main.objects?.seekers?.properties &&
typeof main.objects.seekers.properties === "object"
) {
return Object.keys(main.objects.seekers.properties);
}
// 2) If main is an array, search for an item that looks like seekers schema
if (Array.isArray(main)) {
for (const entry of main) {
const keys = extractSeekersPropsFromOdmdbSchema(entry);
if (keys.length) return keys;
}
}
// 3) Fallback: deep search for a { seekers: { properties: {...} } } node
try {
const stack = [main];
while (stack.length) {
const node = stack.pop();
if (node && typeof node === "object") {
if (
node.seekers?.properties &&
typeof node.seekers.properties === "object"
) {
return Object.keys(node.seekers.properties);
}
for (const v of Object.values(node)) {
if (v && typeof v === "object") stack.push(v);
}
}
}
} catch {}
return [];
}
// ---- Schema-based mapping system ----
class SchemaMapper {
constructor(schemas) {
// Use direct seekers schema if available, otherwise search in consolidated main schema
this.seekersSchema =
schemas.seekers || this.findSchemaByType("seekers", schemas.main);
this.fieldMappings = this.buildFieldMappings();
this.indexMappings = this.buildIndexMappings();
console.log(
`📋 Loaded seekers schema with ${
Object.keys(this.seekersSchema?.properties || {}).length
} properties`
);
}
findSchemaByType(objectType, schemas) {
if (!schemas || !Array.isArray(schemas)) return null;
return schemas.find(
(schema) => schema.$id && schema.$id.includes(`/${objectType}`)
);
}
buildFieldMappings() {
if (!this.seekersSchema) return {};
const mappings = {};
const properties = this.seekersSchema.properties || {};
Object.entries(properties).forEach(([fieldName, fieldDef]) => {
const synonyms = this.generateSynonyms(fieldName, fieldDef);
mappings[fieldName] = {
field: fieldName,
title: fieldDef.title?.toLowerCase(),
description: fieldDef.description?.toLowerCase(),
type: fieldDef.type,
synonyms,
};
// Index by title and synonyms
if (fieldDef.title) {
mappings[fieldDef.title.toLowerCase()] = fieldName;
}
synonyms.forEach((synonym) => {
mappings[synonym.toLowerCase()] = fieldName;
});
});
return mappings;
}
buildIndexMappings() {
if (!this.seekersSchema?.apxidx) return {};
const indexes = {};
this.seekersSchema.apxidx.forEach((idx) => {
indexes[idx.name] = {
name: idx.name,
type: idx.type,
keyval: idx.keyval,
};
});
return indexes;
}
generateSynonyms(fieldName, fieldDef) {
const synonyms = [];
// Common mappings based on actual schema
const commonMappings = {
email: ["contact", "mail", "contact email"],
seekworkingyear: ["experience", "years of experience", "work experience"],
seekjobtitleexperience: ["job titles", "job experience", "positions"],
seekstatus: ["status", "availability", "looking"],
dt_create: ["created", "creation date", "new", "recent", "since"],
salaryexpectation: ["salary", "pay", "compensation", "wage"],
seeklocation: ["location", "where", "place"],
mbti: ["personality", "type", "profile"],
alias: ["id", "identifier", "username"],
};
if (commonMappings[fieldName]) {
synonyms.push(...commonMappings[fieldName]);
}
return synonyms;
}
mapNLToFields(nlTerms) {
const mappedFields = [];
nlTerms.forEach((term) => {
const normalizedTerm = term.toLowerCase();
const mapping = this.fieldMappings[normalizedTerm];
if (mapping) {
if (typeof mapping === "string") {
mappedFields.push(mapping);
} else if (mapping.field) {
mappedFields.push(mapping.field);
}
}
});
return [...new Set(mappedFields)]; // Remove duplicates
}
getRecruiterReadableFields() {
if (!this.seekersSchema?.apxaccessrights?.recruiters?.R) {
// Fallback to basic fields
return ["alias", "email", "seekstatus", "seekworkingyear"];
}
return this.seekersSchema.apxaccessrights.recruiters.R;
}
getAllSeekersFields() {
if (!this.seekersSchema?.properties) return [];
return Object.keys(this.seekersSchema.properties);
}
getAvailableIndexes() {
return Object.keys(this.indexMappings);
}
getIndexByField(fieldName) {
const index = Object.values(this.indexMappings).find(
(idx) => idx.keyval === fieldName
);
return index ? `idx.${index.name}` : null;
}
}
// Initialize schema mapper
const schemaMapper = new SchemaMapper(SCHEMAS);
const SEEKERS_FIELDS_FROM_SCHEMA = schemaMapper.getAllSeekersFields();
console.log(
`🔍 Available seekers fields: ${SEEKERS_FIELDS_FROM_SCHEMA.slice(0, 10).join(
", "
)}${
SEEKERS_FIELDS_FROM_SCHEMA.length > 10
? `... (${SEEKERS_FIELDS_FROM_SCHEMA.length} total)`
: ""
}`
);
// ---- Minimal mapping config (for prompting + default fields) ----
const seekersMapping = {
object: "seekers",
defaultReadableFields: schemaMapper.getRecruiterReadableFields().slice(0, 5), // First 5 readable fields
};
// ---- JSON Schema for Structured Outputs (no zod, no oneOf) ----
function buildResponseJsonSchema() {
const recruiterReadableFields = schemaMapper.getRecruiterReadableFields();
return {
type: "object",
additionalProperties: false,
properties: {
object: { type: "string", enum: ["seekers"] },
condition: { type: "array", items: { type: "string" }, minItems: 1 },
fields: {
type: "array",
items: {
type: "string",
enum: recruiterReadableFields,
},
minItems: 1,
},
},
required: ["object", "condition", "fields"],
};
}
// ---- Prompt builders ----
function systemPrompt() {
const availableFields = schemaMapper.getAllSeekersFields();
const recruiterReadableFields = schemaMapper.getRecruiterReadableFields();
const availableIndexes = schemaMapper.getAvailableIndexes();
return [
"You convert a natural language request into an ODMDB search payload.",
"Return ONLY a compact JSON object that matches the provided JSON Schema.",
"",
"ODMDB DSL:",
"- join(remoteObject:localKey:remoteProp:operator:value)",
"- idx.<indexName>(value) - for indexed fields",
"- prop.<field>(operator:value) - for direct property queries",
"",
"Available seekers fields:",
availableFields.slice(0, 15).join(", ") +
(availableFields.length > 15 ? "..." : ""),
"",
"Available indexes for optimization:",
availableIndexes.join(", "),
"",
"Recruiter-readable fields (use these for field selection):",
recruiterReadableFields.join(", "),
"",
"Field mappings for natural language:",
"- 'email' → email",
"- 'experience' → seekworkingyear",
"- 'job titles' → seekjobtitleexperience",
"- 'status' → seekstatus",
"- 'salary' → salaryexpectation",
"- 'location' → seeklocation",
"- 'new/recent' → dt_create (use prop.dt_create(>=:YYYY-MM-DD))",
"",
"Rules:",
"- Object must be 'seekers'.",
"- Use indexes when possible (idx.seekstatus_alias for status queries)",
"- For date filters, use prop.dt_create with absolute dates",
"- Only return recruiter-readable fields in 'fields' array",
`- Default fields if request is generic: ${recruiterReadableFields
.slice(0, 5)
.join(", ")}`,
"",
"Timezone is Europe/Paris. Today is 2025-10-14.",
"Interpret 'last week' as now minus 7 days → 2025-10-07.",
"Interpret 'yesterday' as → 2025-10-13.",
].join("\n");
}
function userPrompt(nl) {
return `Natural language request: "${nl}"\nReturn ONLY the JSON object.`;
}
// ---- OpenAI call using Responses API (text.format) ----
const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
async function inferQuery(nlText) {
const resp = await client.responses.create({
model: MODEL,
input: [
{ role: "system", content: systemPrompt() },
{ role: "user", content: userPrompt(nlText) },
],
text: {
format: {
name: "OdmdbQuery",
type: "json_schema",
schema: buildResponseJsonSchema(),
strict: true,
},
},
});
const jsonText =
resp.output_text ||
resp.output?.[0]?.content?.[0]?.text ||
(() => {
throw new Error("Empty model output");
})();
const parsed = JSON.parse(jsonText);
return parsed;
}
// ---- Validate using the ODMDB schema (not zod) ----
function validateWithOdmdbSchema(candidate) {
// Basic shape checks (already enforced by Structured Outputs, but keep defensive)
if (!candidate || typeof candidate !== "object")
throw new Error("Invalid response (not an object).");
if (candidate.object !== "seekers")
throw new Error("Invalid object; must be 'seekers'.");
if (!Array.isArray(candidate.condition) || candidate.condition.length === 0) {
throw new Error(
"Invalid 'condition'; must be a non-empty array of strings."
);
}
if (!Array.isArray(candidate.fields) || candidate.fields.length === 0) {
throw new Error("Invalid 'fields'; must be a non-empty array of strings.");
}
// Validate fields against schema
const availableFields = schemaMapper.getAllSeekersFields();
const recruiterReadableFields = schemaMapper.getRecruiterReadableFields();
for (const field of candidate.fields) {
if (!availableFields.includes(field)) {
throw new Error(`Invalid field '${field}'; not found in seekers schema.`);
}
if (!recruiterReadableFields.includes(field)) {
console.warn(
`Warning: Field '${field}' may not be readable by recruiters.`
);
}
}
// DSL token sanity
const allowedTokens = ["join(", "idx.", "prop."];
for (const c of candidate.condition) {
if (typeof c !== "string")
throw new Error("Condition entries must be strings.");
const tokenOK = allowedTokens.some((t) => c.includes(t));
const ascii = /^[\x09\x0A\x0D\x20-\x7E()_:\[\].,=><!'"-]+$/.test(c);
if (!tokenOK || !ascii) throw new Error(`Malformed condition: ${c}`);
}
// Field existence check against ODMDB custom schema (seekers properties)
if (SEEKERS_FIELDS_FROM_SCHEMA.length) {
const unknown = candidate.fields.filter(
(f) => !SEEKERS_FIELDS_FROM_SCHEMA.includes(f)
);
if (unknown.length) {
// Drop unknown but continue (PoC behavior)
console.warn(
"⚠️ Dropping unknown fields (not in seekers schema):",
unknown
);
candidate.fields = candidate.fields.filter((f) =>
SEEKERS_FIELDS_FROM_SCHEMA.includes(f)
);
if (!candidate.fields.length) {
// If all dropped, fallback to default shortlist intersected with schema
const fallback = seekersMapping.defaultReadableFields.filter((f) =>
SEEKERS_FIELDS_FROM_SCHEMA.includes(f)
);
if (!fallback.length)
throw new Error(
"No valid fields remain after validation and no fallback available."
);
candidate.fields = fallback;
}
}
} else {
// If we can't read the schema (main.json shape unknown), at least ensure strings & dedupe
candidate.fields = [
...new Set(
candidate.fields.filter((f) => typeof f === "string" && f.trim())
),
];
}
return candidate;
}
// ---- Local ODMDB Data Access ----
function loadSeekersData() {
const seekersItemsPath = `${OBJECTS_PATH}/seekers/itm`;
try {
if (!fs.existsSync(seekersItemsPath)) {
console.error(`❌ Seekers data directory not found: ${seekersItemsPath}`);
return [];
}
const files = fs
.readdirSync(seekersItemsPath)
.filter((file) => file.endsWith(".json") && file !== "backup")
.slice(0, 50); // Limit to first 50 files for PoC performance
console.log(
`📁 Loading ${files.length} seeker files from ${seekersItemsPath}`
);
const seekers = [];
for (const file of files) {
try {
const filePath = `${seekersItemsPath}/${file}`;
const data = JSON.parse(fs.readFileSync(filePath, "utf-8"));
seekers.push(data);
} catch (error) {
console.warn(`⚠️ Could not load ${file}:`, error.message);
}
}
return seekers;
} catch (error) {
console.error("❌ Error loading seekers data:", error.message);
return [];
}
}
// ---- Local ODMDB Query Execution ----
async function executeOdmdbQuery(query) {
if (!EXECUTE_QUERY) {
console.log(
"💡 Query execution disabled. Set EXECUTE_QUERY=true to enable."
);
return null;
}
try {
console.log(
`\n🔍 Executing query against local ODMDB data: ${OBJECTS_PATH}/seekers/`
);
console.log("Query conditions:", query.condition);
console.log("Requested fields:", query.fields);
// Load all seekers data
const allSeekers = loadSeekersData();
if (allSeekers.length === 0) {
console.log("❌ No seekers data found");
return { data: [] };
}
console.log(`<EFBFBD> Loaded ${allSeekers.length} seekers for filtering`);
// Apply basic filtering (simplified DSL processing)
let filteredSeekers = allSeekers;
for (const condition of query.condition) {
if (condition.includes("prop.dt_create(>=:")) {
// Extract date from condition like "prop.dt_create(>=:2025-10-07)"
const dateMatch = condition.match(/>=:(\d{4}-\d{2}-\d{2})/);
if (dateMatch) {
const filterDate = new Date(dateMatch[1]);
filteredSeekers = filteredSeekers.filter((seeker) => {
if (!seeker.dt_create) return false;
const seekerDate = new Date(seeker.dt_create);
return seekerDate >= filterDate;
});
console.log(
`🗓️ Filtered by date >= ${dateMatch[1]}: ${filteredSeekers.length} results`
);
}
}
if (condition.includes("idx.seekstatus_alias(")) {
// Extract status from condition like "idx.seekstatus_alias(startasap)"
const statusMatch = condition.match(/idx\.seekstatus_alias\(([^)]+)\)/);
if (statusMatch) {
const status = statusMatch[1];
filteredSeekers = filteredSeekers.filter(
(seeker) => seeker.seekstatus === status
);
console.log(
`👤 Filtered by status ${status}: ${filteredSeekers.length} results`
);
}
}
}
// Select only requested fields
const results = filteredSeekers.map((seeker) => {
const filtered = {};
for (const field of query.fields) {
if (seeker.hasOwnProperty(field)) {
filtered[field] = seeker[field];
}
}
return filtered;
});
console.log(
`✅ Query executed successfully! Found ${results.length} matching seekers`
);
return {
data: results,
meta: {
total: results.length,
source: "local_files",
path: `${OBJECTS_PATH}/seekers/itm/`,
},
};
} catch (error) {
console.error("❌ Local query execution failed:", error.message);
return null;
}
}
// ---- Result Processing with jq ----
async function processResults(results, jqFilter = ".") {
if (!results || !results.data) {
console.log("No results to process.");
return null;
}
try {
// Use jq to filter and format results (pass data directly, not as string)
const processed = await jq.run(jqFilter, results.data, { input: "json" });
// Return the processed result
return processed;
} catch (error) {
console.error("❌ jq processing failed:", error.message);
return JSON.stringify(results.data, null, 2); // Return formatted JSON if jq fails
}
} // ---- Run PoC (generate query and optionally execute) ----
(async () => {
try {
if (!process.env.OPENAI_API_KEY)
throw new Error("Missing OPENAI_API_KEY env var.");
console.log(`🤖 Processing NL query: "${NL_QUERY}"`);
console.log("=".repeat(60));
// Step 1: Generate ODMDB query from natural language
const out = await inferQuery(NL_QUERY);
const validated = validateWithOdmdbSchema(out);
console.log("✅ Generated ODMDB Query:");
const generatedQuery = {
object: validated.object,
condition: validated.condition,
fields: validated.fields,
};
console.log(JSON.stringify(generatedQuery, null, 2));
// Step 2: Execute query if enabled
if (EXECUTE_QUERY) {
console.log("\n" + "=".repeat(60));
const results = await executeOdmdbQuery(generatedQuery);
if (results) {
console.log("✅ Query executed successfully!");
console.log(`📊 Found ${results.data?.length || 0} results`);
// Step 3: Process results with jq
console.log("\n📋 Results Summary:");
const summary = await processResults(
results,
`.[0:3] | map({email, seekworkingyear})`
);
console.log(JSON.stringify(summary, null, 2));
// Optional: Show full results count
if (results.data?.length > 3) {
console.log(`\n... and ${results.data.length - 3} more results`);
}
// Step 4: Export to CSV format
console.log("\n📄 CSV Preview:");
const csvData = await processResults(
results,
`
map([.email // "N/A", .seekworkingyear // "N/A"]) |
["email","experience"] as $header |
[$header] + .[0:5] |
.[] | @csv
`
);
if (csvData) {
console.log(csvData);
}
}
} else {
console.log(
"\n💡 To execute this query against ODMDB, set EXECUTE_QUERY=true"
);
console.log(` Example: EXECUTE_QUERY=true npm start`);
}
} catch (e) {
console.error("❌ PoC failed:", e.message || e);
process.exit(1);
}
})();