From 28b0360ddbad32ed65607c1a31743c21661dfd9f Mon Sep 17 00:00:00 2001 From: Eliyan Date: Tue, 14 Oct 2025 12:57:52 +0200 Subject: [PATCH] [ADD] PoC working with our database. playground jq --- README.md | 40 +++++- experiment-jq-playground.js | 77 +++++++++++ package.json | 5 +- poc.js | 266 ++++++++++++++++++++++++++++++++---- 4 files changed, 362 insertions(+), 26 deletions(-) create mode 100644 experiment-jq-playground.js diff --git a/README.md b/README.md index 180f753..a0dfe9e 100644 --- a/README.md +++ b/README.md @@ -36,11 +36,25 @@ This is a **Proof of Concept (PoC)** that demonstrates the conversion of natural ### Running the PoC +**Query Generation Only (Default):** + ```bash npm start ``` -This will process the hardcoded natural language query and output the generated ODMDB query in JSON format. +**Query Generation + Execution:** + +```bash +EXECUTE_QUERY=true npm start +``` + +**With Custom ODMDB Server:** + +```bash +EXECUTE_QUERY=true ODMDB_BASE_URL=http://localhost:8080 npm start +``` + +This will process the hardcoded natural language query and output the generated ODMDB query in JSON format. When `EXECUTE_QUERY=true`, it will also execute the query against the ODMDB server. ### Changing the Query @@ -57,8 +71,32 @@ const NL_QUERY = "your natural language query here"; - `"find recent seekers with job titles and salary expectations"` - `"show me seekers from yesterday with their skills"` +### Testing jq Processing + +To test the jq processing capabilities with mock data: + +```bash +node test-jq.js +``` + +This demonstrates various jq operations including: + +- Basic data formatting and field selection +- CSV conversion from JSON +- Advanced filtering and transformations +- Statistical summaries and aggregations + +## Environment Variables + +- `OPENAI_API_KEY` - Your OpenAI API key (required) +- `EXECUTE_QUERY` - Set to "true" to execute queries against ODMDB (default: false) +- `ODMDB_BASE_URL` - ODMDB server URL (default: http://localhost:3000) +- `ODMDB_TRIBE` - ODMDB tribe name (default: smatchit) +- `OPENAI_MODEL` - OpenAI model to use (default: gpt-5) + ## Output Format +**Query Generation:** The PoC generates ODMDB queries in this format: ```json diff --git a/experiment-jq-playground.js b/experiment-jq-playground.js new file mode 100644 index 0000000..e239b0f --- /dev/null +++ b/experiment-jq-playground.js @@ -0,0 +1,77 @@ +#!/usr/bin/env node + +// experimental script to demonstrate jq processing capabilities. playground +// not rellevent to PoC itself just practice with jq +// --- IGNORE --- +import jq from "node-jq"; + +const mockOdmdbResponse = { + data: [ + { + email: "john.doe@example.com", + seekworkingyear: 5, + dt_create: "2025-01-10T10:30:00Z", + skills: ["JavaScript", "Node.js", "React"], + }, + { + email: "jane.smith@example.com", + seekworkingyear: 3, + dt_create: "2025-01-11T14:20:00Z", + skills: ["Python", "Django", "PostgreSQL"], + }, + { + email: "bob.wilson@example.com", + seekworkingyear: 8, + dt_create: "2025-01-12T09:15:00Z", + skills: ["Java", "Spring", "AWS"], + }, + ], +}; + +async function testJqProcessing() { + console.log("๐Ÿงช Testing jq processing capabilities...\n"); + + // Test 1: Basic filtering + console.log("๐Ÿ“‹ Test 1: Basic data formatting"); + const basicFormat = await jq.run( + ".[] | {email, experience: .seekworkingyear}", + mockOdmdbResponse.data, + { input: "json" } + ); + console.log(basicFormat); + console.log("\n" + "=".repeat(50) + "\n"); + + // Test 2: CSV conversion + console.log("๐Ÿ“Š Test 2: CSV conversion"); + const csvData = await jq.run( + 'map([.email, .seekworkingyear] | @csv) | join("\n")', + mockOdmdbResponse.data, + { input: "json" } + ); + console.log("email,experience"); + console.log(csvData); + console.log("\n" + "=".repeat(50) + "\n"); + + // Test 3: Advanced filtering + console.log("๐Ÿ” Test 3: Advanced filtering (experience > 4 years)"); + const filtered = await jq.run( + "map(select(.seekworkingyear > 4)) | .[] | {email, years: .seekworkingyear, skills}", + mockOdmdbResponse.data, + { input: "json" } + ); + console.log(filtered); + console.log("\n" + "=".repeat(50) + "\n"); + + // Test 4: Statistical summary + console.log("๐Ÿ“ˆ Test 4: Statistical summary"); + const stats = await jq.run( + "{ total_seekers: length, avg_experience: (map(.seekworkingyear) | add / length), skill_count: (map(.skills[]) | group_by(.) | map({skill: .[0], count: length})) }", + mockOdmdbResponse.data, + { input: "json" } + ); + console.log(JSON.stringify(JSON.parse(stats), null, 2)); + + console.log("\nโœ… All jq tests completed successfully!"); +} + +testJqProcessing().catch(console.error); diff --git a/package.json b/package.json index 62f446b..3675132 100644 --- a/package.json +++ b/package.json @@ -4,9 +4,12 @@ "type": "module", "private": true, "scripts": { - "start": "node poc.js" + "start": "node poc.js", + "try-jq": "node experiment-jq-playground.js" }, "dependencies": { + "axios": "^1.12.2", + "node-jq": "^6.3.1", "openai": "^4.60.0", "zod": "^3.23.8" } diff --git a/poc.js b/poc.js index f1e514e..de8abaf 100644 --- a/poc.js +++ b/poc.js @@ -5,11 +5,21 @@ import fs from "node:fs"; import OpenAI from "openai"; +import axios from "axios"; +import jq from "node-jq"; // ---- Config ---- const MODEL = process.env.OPENAI_MODEL || "gpt-5"; -const MAIN_SCHEMA_PATH = "./main.json"; // optional context; used for validation -const LG_SCHEMA_PATH = "./lg.json"; // optional context + +// ODMDB paths - point to actual ODMDB structure +const ODMDB_BASE_PATH = "../smatchitObjectOdmdb"; +const SCHEMA_PATH = `${ODMDB_BASE_PATH}/schema`; +const OBJECTS_PATH = `${ODMDB_BASE_PATH}/objects`; + +// ODMDB execution config +const ODMDB_BASE_URL = process.env.ODMDB_BASE_URL || "http://localhost:3000"; +const ODMDB_TRIBE = process.env.ODMDB_TRIBE || "smatchit"; +const EXECUTE_QUERY = process.env.EXECUTE_QUERY === "true"; // Set to "true" to execute queries // Hardcoded NL query for the PoC (no multi-turn) const NL_QUERY = @@ -21,12 +31,16 @@ function loadJsonSafe(path) { if (fs.existsSync(path)) { return JSON.parse(fs.readFileSync(path, "utf-8")); } - } catch {} + } catch (e) { + console.warn(`Warning: Could not load ${path}:`, e.message); + } return null; } + +// Load actual ODMDB schemas const SCHEMAS = { - main: loadJsonSafe(MAIN_SCHEMA_PATH), - lg: loadJsonSafe(LG_SCHEMA_PATH), + seekers: loadJsonSafe(`${SCHEMA_PATH}/seekers.json`), + main: loadJsonSafe("./main.json"), // Fallback consolidated schema }; // ---- Helpers to read seekers field names from your ODMDB custom schema ---- @@ -75,15 +89,22 @@ function extractSeekersPropsFromOdmdbSchema(main) { // ---- Schema-based mapping system ---- class SchemaMapper { constructor(schemas) { - this.schemas = schemas.main || []; - this.seekersSchema = this.findSchemaByType("seekers"); + // Use direct seekers schema if available, otherwise search in consolidated main schema + this.seekersSchema = + schemas.seekers || this.findSchemaByType("seekers", schemas.main); this.fieldMappings = this.buildFieldMappings(); this.indexMappings = this.buildIndexMappings(); + + console.log( + `๐Ÿ“‹ Loaded seekers schema with ${ + Object.keys(this.seekersSchema?.properties || {}).length + } properties` + ); } - findSchemaByType(objectType) { - if (!this.schemas || !Array.isArray(this.schemas)) return null; - return this.schemas.find( + findSchemaByType(objectType, schemas) { + if (!schemas || !Array.isArray(schemas)) return null; + return schemas.find( (schema) => schema.$id && schema.$id.includes(`/${objectType}`) ); } @@ -203,6 +224,16 @@ const schemaMapper = new SchemaMapper(SCHEMAS); const SEEKERS_FIELDS_FROM_SCHEMA = schemaMapper.getAllSeekersFields(); +console.log( + `๐Ÿ” Available seekers fields: ${SEEKERS_FIELDS_FROM_SCHEMA.slice(0, 10).join( + ", " + )}${ + SEEKERS_FIELDS_FROM_SCHEMA.length > 10 + ? `... (${SEEKERS_FIELDS_FROM_SCHEMA.length} total)` + : "" + }` +); + // ---- Minimal mapping config (for prompting + default fields) ---- const seekersMapping = { object: "seekers", @@ -394,29 +425,216 @@ function validateWithOdmdbSchema(candidate) { return candidate; } -// ---- Run PoC (print only the created query; do not execute) ---- +// ---- Local ODMDB Data Access ---- +function loadSeekersData() { + const seekersItemsPath = `${OBJECTS_PATH}/seekers/itm`; + + try { + if (!fs.existsSync(seekersItemsPath)) { + console.error(`โŒ Seekers data directory not found: ${seekersItemsPath}`); + return []; + } + + const files = fs + .readdirSync(seekersItemsPath) + .filter((file) => file.endsWith(".json") && file !== "backup") + .slice(0, 50); // Limit to first 50 files for PoC performance + + console.log( + `๐Ÿ“ Loading ${files.length} seeker files from ${seekersItemsPath}` + ); + + const seekers = []; + for (const file of files) { + try { + const filePath = `${seekersItemsPath}/${file}`; + const data = JSON.parse(fs.readFileSync(filePath, "utf-8")); + seekers.push(data); + } catch (error) { + console.warn(`โš ๏ธ Could not load ${file}:`, error.message); + } + } + + return seekers; + } catch (error) { + console.error("โŒ Error loading seekers data:", error.message); + return []; + } +} + +// ---- Local ODMDB Query Execution ---- +async function executeOdmdbQuery(query) { + if (!EXECUTE_QUERY) { + console.log( + "๐Ÿ’ก Query execution disabled. Set EXECUTE_QUERY=true to enable." + ); + return null; + } + + try { + console.log( + `\n๐Ÿ” Executing query against local ODMDB data: ${OBJECTS_PATH}/seekers/` + ); + console.log("Query conditions:", query.condition); + console.log("Requested fields:", query.fields); + + // Load all seekers data + const allSeekers = loadSeekersData(); + + if (allSeekers.length === 0) { + console.log("โŒ No seekers data found"); + return { data: [] }; + } + + console.log(`๏ฟฝ Loaded ${allSeekers.length} seekers for filtering`); + + // Apply basic filtering (simplified DSL processing) + let filteredSeekers = allSeekers; + + for (const condition of query.condition) { + if (condition.includes("prop.dt_create(>=:")) { + // Extract date from condition like "prop.dt_create(>=:2025-10-07)" + const dateMatch = condition.match(/>=:(\d{4}-\d{2}-\d{2})/); + if (dateMatch) { + const filterDate = new Date(dateMatch[1]); + filteredSeekers = filteredSeekers.filter((seeker) => { + if (!seeker.dt_create) return false; + const seekerDate = new Date(seeker.dt_create); + return seekerDate >= filterDate; + }); + console.log( + `๐Ÿ—“๏ธ Filtered by date >= ${dateMatch[1]}: ${filteredSeekers.length} results` + ); + } + } + + if (condition.includes("idx.seekstatus_alias(")) { + // Extract status from condition like "idx.seekstatus_alias(startasap)" + const statusMatch = condition.match(/idx\.seekstatus_alias\(([^)]+)\)/); + if (statusMatch) { + const status = statusMatch[1]; + filteredSeekers = filteredSeekers.filter( + (seeker) => seeker.seekstatus === status + ); + console.log( + `๐Ÿ‘ค Filtered by status ${status}: ${filteredSeekers.length} results` + ); + } + } + } + + // Select only requested fields + const results = filteredSeekers.map((seeker) => { + const filtered = {}; + for (const field of query.fields) { + if (seeker.hasOwnProperty(field)) { + filtered[field] = seeker[field]; + } + } + return filtered; + }); + + console.log( + `โœ… Query executed successfully! Found ${results.length} matching seekers` + ); + + return { + data: results, + meta: { + total: results.length, + source: "local_files", + path: `${OBJECTS_PATH}/seekers/itm/`, + }, + }; + } catch (error) { + console.error("โŒ Local query execution failed:", error.message); + return null; + } +} + +// ---- Result Processing with jq ---- +async function processResults(results, jqFilter = ".") { + if (!results || !results.data) { + console.log("No results to process."); + return null; + } + + try { + // Use jq to filter and format results (pass data directly, not as string) + const processed = await jq.run(jqFilter, results.data, { input: "json" }); + + // Return the processed result + return processed; + } catch (error) { + console.error("โŒ jq processing failed:", error.message); + return JSON.stringify(results.data, null, 2); // Return formatted JSON if jq fails + } +} // ---- Run PoC (generate query and optionally execute) ---- (async () => { try { if (!process.env.OPENAI_API_KEY) throw new Error("Missing OPENAI_API_KEY env var."); + console.log(`๐Ÿค– Processing NL query: "${NL_QUERY}"`); + console.log("=".repeat(60)); + + // Step 1: Generate ODMDB query from natural language const out = await inferQuery(NL_QUERY); const validated = validateWithOdmdbSchema(out); - // Output ONLY the created query (no execution) - console.log( - JSON.stringify( - { - object: validated.object, - condition: validated.condition, - fields: validated.fields, - }, - null, - 2 - ) - ); + console.log("โœ… Generated ODMDB Query:"); + const generatedQuery = { + object: validated.object, + condition: validated.condition, + fields: validated.fields, + }; + console.log(JSON.stringify(generatedQuery, null, 2)); + + // Step 2: Execute query if enabled + if (EXECUTE_QUERY) { + console.log("\n" + "=".repeat(60)); + const results = await executeOdmdbQuery(generatedQuery); + + if (results) { + console.log("โœ… Query executed successfully!"); + console.log(`๐Ÿ“Š Found ${results.data?.length || 0} results`); + + // Step 3: Process results with jq + console.log("\n๐Ÿ“‹ Results Summary:"); + const summary = await processResults( + results, + `.[0:3] | map({alias, email, seekstatus})` + ); + console.log(JSON.stringify(summary, null, 2)); + + // Optional: Show full results count + if (results.data?.length > 3) { + console.log(`\n... and ${results.data.length - 3} more results`); + } + + // Step 4: Export to CSV format + console.log("\n๐Ÿ“„ CSV Preview:"); + const csvData = await processResults( + results, + ` + map([.alias // "N/A", .email // "N/A", .seekstatus // "N/A"]) | + ["alias","email","status"] as $header | + [$header] + .[0:5] | + .[] | @csv + ` + ); + if (csvData) { + console.log(csvData); + } + } + } else { + console.log( + "\n๐Ÿ’ก To execute this query against ODMDB, set EXECUTE_QUERY=true" + ); + console.log(` Example: EXECUTE_QUERY=true npm start`); + } } catch (e) { - console.error("PoC failed:", e.message || e); + console.error("โŒ PoC failed:", e.message || e); process.exit(1); } })();