[ADD] PoC working with our database. playground jq

This commit is contained in:
Eliyan
2025-10-14 12:57:52 +02:00
parent 0be31357cf
commit 28b0360ddb
4 changed files with 362 additions and 26 deletions

View File

@@ -36,11 +36,25 @@ This is a **Proof of Concept (PoC)** that demonstrates the conversion of natural
### Running the PoC ### Running the PoC
**Query Generation Only (Default):**
```bash ```bash
npm start npm start
``` ```
This will process the hardcoded natural language query and output the generated ODMDB query in JSON format. **Query Generation + Execution:**
```bash
EXECUTE_QUERY=true npm start
```
**With Custom ODMDB Server:**
```bash
EXECUTE_QUERY=true ODMDB_BASE_URL=http://localhost:8080 npm start
```
This will process the hardcoded natural language query and output the generated ODMDB query in JSON format. When `EXECUTE_QUERY=true`, it will also execute the query against the ODMDB server.
### Changing the Query ### Changing the Query
@@ -57,8 +71,32 @@ const NL_QUERY = "your natural language query here";
- `"find recent seekers with job titles and salary expectations"` - `"find recent seekers with job titles and salary expectations"`
- `"show me seekers from yesterday with their skills"` - `"show me seekers from yesterday with their skills"`
### Testing jq Processing
To test the jq processing capabilities with mock data:
```bash
node test-jq.js
```
This demonstrates various jq operations including:
- Basic data formatting and field selection
- CSV conversion from JSON
- Advanced filtering and transformations
- Statistical summaries and aggregations
## Environment Variables
- `OPENAI_API_KEY` - Your OpenAI API key (required)
- `EXECUTE_QUERY` - Set to "true" to execute queries against ODMDB (default: false)
- `ODMDB_BASE_URL` - ODMDB server URL (default: http://localhost:3000)
- `ODMDB_TRIBE` - ODMDB tribe name (default: smatchit)
- `OPENAI_MODEL` - OpenAI model to use (default: gpt-5)
## Output Format ## Output Format
**Query Generation:**
The PoC generates ODMDB queries in this format: The PoC generates ODMDB queries in this format:
```json ```json

View File

@@ -0,0 +1,77 @@
#!/usr/bin/env node
// experimental script to demonstrate jq processing capabilities. playground
// not rellevent to PoC itself just practice with jq
// --- IGNORE ---
import jq from "node-jq";
const mockOdmdbResponse = {
data: [
{
email: "john.doe@example.com",
seekworkingyear: 5,
dt_create: "2025-01-10T10:30:00Z",
skills: ["JavaScript", "Node.js", "React"],
},
{
email: "jane.smith@example.com",
seekworkingyear: 3,
dt_create: "2025-01-11T14:20:00Z",
skills: ["Python", "Django", "PostgreSQL"],
},
{
email: "bob.wilson@example.com",
seekworkingyear: 8,
dt_create: "2025-01-12T09:15:00Z",
skills: ["Java", "Spring", "AWS"],
},
],
};
async function testJqProcessing() {
console.log("🧪 Testing jq processing capabilities...\n");
// Test 1: Basic filtering
console.log("📋 Test 1: Basic data formatting");
const basicFormat = await jq.run(
".[] | {email, experience: .seekworkingyear}",
mockOdmdbResponse.data,
{ input: "json" }
);
console.log(basicFormat);
console.log("\n" + "=".repeat(50) + "\n");
// Test 2: CSV conversion
console.log("📊 Test 2: CSV conversion");
const csvData = await jq.run(
'map([.email, .seekworkingyear] | @csv) | join("\n")',
mockOdmdbResponse.data,
{ input: "json" }
);
console.log("email,experience");
console.log(csvData);
console.log("\n" + "=".repeat(50) + "\n");
// Test 3: Advanced filtering
console.log("🔍 Test 3: Advanced filtering (experience > 4 years)");
const filtered = await jq.run(
"map(select(.seekworkingyear > 4)) | .[] | {email, years: .seekworkingyear, skills}",
mockOdmdbResponse.data,
{ input: "json" }
);
console.log(filtered);
console.log("\n" + "=".repeat(50) + "\n");
// Test 4: Statistical summary
console.log("📈 Test 4: Statistical summary");
const stats = await jq.run(
"{ total_seekers: length, avg_experience: (map(.seekworkingyear) | add / length), skill_count: (map(.skills[]) | group_by(.) | map({skill: .[0], count: length})) }",
mockOdmdbResponse.data,
{ input: "json" }
);
console.log(JSON.stringify(JSON.parse(stats), null, 2));
console.log("\n✅ All jq tests completed successfully!");
}
testJqProcessing().catch(console.error);

View File

@@ -4,9 +4,12 @@
"type": "module", "type": "module",
"private": true, "private": true,
"scripts": { "scripts": {
"start": "node poc.js" "start": "node poc.js",
"try-jq": "node experiment-jq-playground.js"
}, },
"dependencies": { "dependencies": {
"axios": "^1.12.2",
"node-jq": "^6.3.1",
"openai": "^4.60.0", "openai": "^4.60.0",
"zod": "^3.23.8" "zod": "^3.23.8"
} }

258
poc.js
View File

@@ -5,11 +5,21 @@
import fs from "node:fs"; import fs from "node:fs";
import OpenAI from "openai"; import OpenAI from "openai";
import axios from "axios";
import jq from "node-jq";
// ---- Config ---- // ---- Config ----
const MODEL = process.env.OPENAI_MODEL || "gpt-5"; const MODEL = process.env.OPENAI_MODEL || "gpt-5";
const MAIN_SCHEMA_PATH = "./main.json"; // optional context; used for validation
const LG_SCHEMA_PATH = "./lg.json"; // optional context // ODMDB paths - point to actual ODMDB structure
const ODMDB_BASE_PATH = "../smatchitObjectOdmdb";
const SCHEMA_PATH = `${ODMDB_BASE_PATH}/schema`;
const OBJECTS_PATH = `${ODMDB_BASE_PATH}/objects`;
// ODMDB execution config
const ODMDB_BASE_URL = process.env.ODMDB_BASE_URL || "http://localhost:3000";
const ODMDB_TRIBE = process.env.ODMDB_TRIBE || "smatchit";
const EXECUTE_QUERY = process.env.EXECUTE_QUERY === "true"; // Set to "true" to execute queries
// Hardcoded NL query for the PoC (no multi-turn) // Hardcoded NL query for the PoC (no multi-turn)
const NL_QUERY = const NL_QUERY =
@@ -21,12 +31,16 @@ function loadJsonSafe(path) {
if (fs.existsSync(path)) { if (fs.existsSync(path)) {
return JSON.parse(fs.readFileSync(path, "utf-8")); return JSON.parse(fs.readFileSync(path, "utf-8"));
} }
} catch {} } catch (e) {
console.warn(`Warning: Could not load ${path}:`, e.message);
}
return null; return null;
} }
// Load actual ODMDB schemas
const SCHEMAS = { const SCHEMAS = {
main: loadJsonSafe(MAIN_SCHEMA_PATH), seekers: loadJsonSafe(`${SCHEMA_PATH}/seekers.json`),
lg: loadJsonSafe(LG_SCHEMA_PATH), main: loadJsonSafe("./main.json"), // Fallback consolidated schema
}; };
// ---- Helpers to read seekers field names from your ODMDB custom schema ---- // ---- Helpers to read seekers field names from your ODMDB custom schema ----
@@ -75,15 +89,22 @@ function extractSeekersPropsFromOdmdbSchema(main) {
// ---- Schema-based mapping system ---- // ---- Schema-based mapping system ----
class SchemaMapper { class SchemaMapper {
constructor(schemas) { constructor(schemas) {
this.schemas = schemas.main || []; // Use direct seekers schema if available, otherwise search in consolidated main schema
this.seekersSchema = this.findSchemaByType("seekers"); this.seekersSchema =
schemas.seekers || this.findSchemaByType("seekers", schemas.main);
this.fieldMappings = this.buildFieldMappings(); this.fieldMappings = this.buildFieldMappings();
this.indexMappings = this.buildIndexMappings(); this.indexMappings = this.buildIndexMappings();
console.log(
`📋 Loaded seekers schema with ${
Object.keys(this.seekersSchema?.properties || {}).length
} properties`
);
} }
findSchemaByType(objectType) { findSchemaByType(objectType, schemas) {
if (!this.schemas || !Array.isArray(this.schemas)) return null; if (!schemas || !Array.isArray(schemas)) return null;
return this.schemas.find( return schemas.find(
(schema) => schema.$id && schema.$id.includes(`/${objectType}`) (schema) => schema.$id && schema.$id.includes(`/${objectType}`)
); );
} }
@@ -203,6 +224,16 @@ const schemaMapper = new SchemaMapper(SCHEMAS);
const SEEKERS_FIELDS_FROM_SCHEMA = schemaMapper.getAllSeekersFields(); const SEEKERS_FIELDS_FROM_SCHEMA = schemaMapper.getAllSeekersFields();
console.log(
`🔍 Available seekers fields: ${SEEKERS_FIELDS_FROM_SCHEMA.slice(0, 10).join(
", "
)}${
SEEKERS_FIELDS_FROM_SCHEMA.length > 10
? `... (${SEEKERS_FIELDS_FROM_SCHEMA.length} total)`
: ""
}`
);
// ---- Minimal mapping config (for prompting + default fields) ---- // ---- Minimal mapping config (for prompting + default fields) ----
const seekersMapping = { const seekersMapping = {
object: "seekers", object: "seekers",
@@ -394,29 +425,216 @@ function validateWithOdmdbSchema(candidate) {
return candidate; return candidate;
} }
// ---- Run PoC (print only the created query; do not execute) ---- // ---- Local ODMDB Data Access ----
function loadSeekersData() {
const seekersItemsPath = `${OBJECTS_PATH}/seekers/itm`;
try {
if (!fs.existsSync(seekersItemsPath)) {
console.error(`❌ Seekers data directory not found: ${seekersItemsPath}`);
return [];
}
const files = fs
.readdirSync(seekersItemsPath)
.filter((file) => file.endsWith(".json") && file !== "backup")
.slice(0, 50); // Limit to first 50 files for PoC performance
console.log(
`📁 Loading ${files.length} seeker files from ${seekersItemsPath}`
);
const seekers = [];
for (const file of files) {
try {
const filePath = `${seekersItemsPath}/${file}`;
const data = JSON.parse(fs.readFileSync(filePath, "utf-8"));
seekers.push(data);
} catch (error) {
console.warn(`⚠️ Could not load ${file}:`, error.message);
}
}
return seekers;
} catch (error) {
console.error("❌ Error loading seekers data:", error.message);
return [];
}
}
// ---- Local ODMDB Query Execution ----
async function executeOdmdbQuery(query) {
if (!EXECUTE_QUERY) {
console.log(
"💡 Query execution disabled. Set EXECUTE_QUERY=true to enable."
);
return null;
}
try {
console.log(
`\n🔍 Executing query against local ODMDB data: ${OBJECTS_PATH}/seekers/`
);
console.log("Query conditions:", query.condition);
console.log("Requested fields:", query.fields);
// Load all seekers data
const allSeekers = loadSeekersData();
if (allSeekers.length === 0) {
console.log("❌ No seekers data found");
return { data: [] };
}
console.log(`<EFBFBD> Loaded ${allSeekers.length} seekers for filtering`);
// Apply basic filtering (simplified DSL processing)
let filteredSeekers = allSeekers;
for (const condition of query.condition) {
if (condition.includes("prop.dt_create(>=:")) {
// Extract date from condition like "prop.dt_create(>=:2025-10-07)"
const dateMatch = condition.match(/>=:(\d{4}-\d{2}-\d{2})/);
if (dateMatch) {
const filterDate = new Date(dateMatch[1]);
filteredSeekers = filteredSeekers.filter((seeker) => {
if (!seeker.dt_create) return false;
const seekerDate = new Date(seeker.dt_create);
return seekerDate >= filterDate;
});
console.log(
`🗓️ Filtered by date >= ${dateMatch[1]}: ${filteredSeekers.length} results`
);
}
}
if (condition.includes("idx.seekstatus_alias(")) {
// Extract status from condition like "idx.seekstatus_alias(startasap)"
const statusMatch = condition.match(/idx\.seekstatus_alias\(([^)]+)\)/);
if (statusMatch) {
const status = statusMatch[1];
filteredSeekers = filteredSeekers.filter(
(seeker) => seeker.seekstatus === status
);
console.log(
`👤 Filtered by status ${status}: ${filteredSeekers.length} results`
);
}
}
}
// Select only requested fields
const results = filteredSeekers.map((seeker) => {
const filtered = {};
for (const field of query.fields) {
if (seeker.hasOwnProperty(field)) {
filtered[field] = seeker[field];
}
}
return filtered;
});
console.log(
`✅ Query executed successfully! Found ${results.length} matching seekers`
);
return {
data: results,
meta: {
total: results.length,
source: "local_files",
path: `${OBJECTS_PATH}/seekers/itm/`,
},
};
} catch (error) {
console.error("❌ Local query execution failed:", error.message);
return null;
}
}
// ---- Result Processing with jq ----
async function processResults(results, jqFilter = ".") {
if (!results || !results.data) {
console.log("No results to process.");
return null;
}
try {
// Use jq to filter and format results (pass data directly, not as string)
const processed = await jq.run(jqFilter, results.data, { input: "json" });
// Return the processed result
return processed;
} catch (error) {
console.error("❌ jq processing failed:", error.message);
return JSON.stringify(results.data, null, 2); // Return formatted JSON if jq fails
}
} // ---- Run PoC (generate query and optionally execute) ----
(async () => { (async () => {
try { try {
if (!process.env.OPENAI_API_KEY) if (!process.env.OPENAI_API_KEY)
throw new Error("Missing OPENAI_API_KEY env var."); throw new Error("Missing OPENAI_API_KEY env var.");
console.log(`🤖 Processing NL query: "${NL_QUERY}"`);
console.log("=".repeat(60));
// Step 1: Generate ODMDB query from natural language
const out = await inferQuery(NL_QUERY); const out = await inferQuery(NL_QUERY);
const validated = validateWithOdmdbSchema(out); const validated = validateWithOdmdbSchema(out);
// Output ONLY the created query (no execution) console.log("✅ Generated ODMDB Query:");
console.log( const generatedQuery = {
JSON.stringify(
{
object: validated.object, object: validated.object,
condition: validated.condition, condition: validated.condition,
fields: validated.fields, fields: validated.fields,
}, };
null, console.log(JSON.stringify(generatedQuery, null, 2));
2
) // Step 2: Execute query if enabled
if (EXECUTE_QUERY) {
console.log("\n" + "=".repeat(60));
const results = await executeOdmdbQuery(generatedQuery);
if (results) {
console.log("✅ Query executed successfully!");
console.log(`📊 Found ${results.data?.length || 0} results`);
// Step 3: Process results with jq
console.log("\n📋 Results Summary:");
const summary = await processResults(
results,
`.[0:3] | map({alias, email, seekstatus})`
); );
console.log(JSON.stringify(summary, null, 2));
// Optional: Show full results count
if (results.data?.length > 3) {
console.log(`\n... and ${results.data.length - 3} more results`);
}
// Step 4: Export to CSV format
console.log("\n📄 CSV Preview:");
const csvData = await processResults(
results,
`
map([.alias // "N/A", .email // "N/A", .seekstatus // "N/A"]) |
["alias","email","status"] as $header |
[$header] + .[0:5] |
.[] | @csv
`
);
if (csvData) {
console.log(csvData);
}
}
} else {
console.log(
"\n💡 To execute this query against ODMDB, set EXECUTE_QUERY=true"
);
console.log(` Example: EXECUTE_QUERY=true npm start`);
}
} catch (e) { } catch (e) {
console.error("PoC failed:", e.message || e); console.error("PoC failed:", e.message || e);
process.exit(1); process.exit(1);
} }
})(); })();