[ADD] PoC working with our database. playground jq
This commit is contained in:
40
README.md
40
README.md
@@ -36,11 +36,25 @@ This is a **Proof of Concept (PoC)** that demonstrates the conversion of natural
|
|||||||
|
|
||||||
### Running the PoC
|
### Running the PoC
|
||||||
|
|
||||||
|
**Query Generation Only (Default):**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
npm start
|
npm start
|
||||||
```
|
```
|
||||||
|
|
||||||
This will process the hardcoded natural language query and output the generated ODMDB query in JSON format.
|
**Query Generation + Execution:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
EXECUTE_QUERY=true npm start
|
||||||
|
```
|
||||||
|
|
||||||
|
**With Custom ODMDB Server:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
EXECUTE_QUERY=true ODMDB_BASE_URL=http://localhost:8080 npm start
|
||||||
|
```
|
||||||
|
|
||||||
|
This will process the hardcoded natural language query and output the generated ODMDB query in JSON format. When `EXECUTE_QUERY=true`, it will also execute the query against the ODMDB server.
|
||||||
|
|
||||||
### Changing the Query
|
### Changing the Query
|
||||||
|
|
||||||
@@ -57,8 +71,32 @@ const NL_QUERY = "your natural language query here";
|
|||||||
- `"find recent seekers with job titles and salary expectations"`
|
- `"find recent seekers with job titles and salary expectations"`
|
||||||
- `"show me seekers from yesterday with their skills"`
|
- `"show me seekers from yesterday with their skills"`
|
||||||
|
|
||||||
|
### Testing jq Processing
|
||||||
|
|
||||||
|
To test the jq processing capabilities with mock data:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
node test-jq.js
|
||||||
|
```
|
||||||
|
|
||||||
|
This demonstrates various jq operations including:
|
||||||
|
|
||||||
|
- Basic data formatting and field selection
|
||||||
|
- CSV conversion from JSON
|
||||||
|
- Advanced filtering and transformations
|
||||||
|
- Statistical summaries and aggregations
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
- `OPENAI_API_KEY` - Your OpenAI API key (required)
|
||||||
|
- `EXECUTE_QUERY` - Set to "true" to execute queries against ODMDB (default: false)
|
||||||
|
- `ODMDB_BASE_URL` - ODMDB server URL (default: http://localhost:3000)
|
||||||
|
- `ODMDB_TRIBE` - ODMDB tribe name (default: smatchit)
|
||||||
|
- `OPENAI_MODEL` - OpenAI model to use (default: gpt-5)
|
||||||
|
|
||||||
## Output Format
|
## Output Format
|
||||||
|
|
||||||
|
**Query Generation:**
|
||||||
The PoC generates ODMDB queries in this format:
|
The PoC generates ODMDB queries in this format:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
|
77
experiment-jq-playground.js
Normal file
77
experiment-jq-playground.js
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
#!/usr/bin/env node
|
||||||
|
|
||||||
|
// experimental script to demonstrate jq processing capabilities. playground
|
||||||
|
// not rellevent to PoC itself just practice with jq
|
||||||
|
// --- IGNORE ---
|
||||||
|
import jq from "node-jq";
|
||||||
|
|
||||||
|
const mockOdmdbResponse = {
|
||||||
|
data: [
|
||||||
|
{
|
||||||
|
email: "john.doe@example.com",
|
||||||
|
seekworkingyear: 5,
|
||||||
|
dt_create: "2025-01-10T10:30:00Z",
|
||||||
|
skills: ["JavaScript", "Node.js", "React"],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
email: "jane.smith@example.com",
|
||||||
|
seekworkingyear: 3,
|
||||||
|
dt_create: "2025-01-11T14:20:00Z",
|
||||||
|
skills: ["Python", "Django", "PostgreSQL"],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
email: "bob.wilson@example.com",
|
||||||
|
seekworkingyear: 8,
|
||||||
|
dt_create: "2025-01-12T09:15:00Z",
|
||||||
|
skills: ["Java", "Spring", "AWS"],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
async function testJqProcessing() {
|
||||||
|
console.log("🧪 Testing jq processing capabilities...\n");
|
||||||
|
|
||||||
|
// Test 1: Basic filtering
|
||||||
|
console.log("📋 Test 1: Basic data formatting");
|
||||||
|
const basicFormat = await jq.run(
|
||||||
|
".[] | {email, experience: .seekworkingyear}",
|
||||||
|
mockOdmdbResponse.data,
|
||||||
|
{ input: "json" }
|
||||||
|
);
|
||||||
|
console.log(basicFormat);
|
||||||
|
console.log("\n" + "=".repeat(50) + "\n");
|
||||||
|
|
||||||
|
// Test 2: CSV conversion
|
||||||
|
console.log("📊 Test 2: CSV conversion");
|
||||||
|
const csvData = await jq.run(
|
||||||
|
'map([.email, .seekworkingyear] | @csv) | join("\n")',
|
||||||
|
mockOdmdbResponse.data,
|
||||||
|
{ input: "json" }
|
||||||
|
);
|
||||||
|
console.log("email,experience");
|
||||||
|
console.log(csvData);
|
||||||
|
console.log("\n" + "=".repeat(50) + "\n");
|
||||||
|
|
||||||
|
// Test 3: Advanced filtering
|
||||||
|
console.log("🔍 Test 3: Advanced filtering (experience > 4 years)");
|
||||||
|
const filtered = await jq.run(
|
||||||
|
"map(select(.seekworkingyear > 4)) | .[] | {email, years: .seekworkingyear, skills}",
|
||||||
|
mockOdmdbResponse.data,
|
||||||
|
{ input: "json" }
|
||||||
|
);
|
||||||
|
console.log(filtered);
|
||||||
|
console.log("\n" + "=".repeat(50) + "\n");
|
||||||
|
|
||||||
|
// Test 4: Statistical summary
|
||||||
|
console.log("📈 Test 4: Statistical summary");
|
||||||
|
const stats = await jq.run(
|
||||||
|
"{ total_seekers: length, avg_experience: (map(.seekworkingyear) | add / length), skill_count: (map(.skills[]) | group_by(.) | map({skill: .[0], count: length})) }",
|
||||||
|
mockOdmdbResponse.data,
|
||||||
|
{ input: "json" }
|
||||||
|
);
|
||||||
|
console.log(JSON.stringify(JSON.parse(stats), null, 2));
|
||||||
|
|
||||||
|
console.log("\n✅ All jq tests completed successfully!");
|
||||||
|
}
|
||||||
|
|
||||||
|
testJqProcessing().catch(console.error);
|
@@ -4,9 +4,12 @@
|
|||||||
"type": "module",
|
"type": "module",
|
||||||
"private": true,
|
"private": true,
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"start": "node poc.js"
|
"start": "node poc.js",
|
||||||
|
"try-jq": "node experiment-jq-playground.js"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"axios": "^1.12.2",
|
||||||
|
"node-jq": "^6.3.1",
|
||||||
"openai": "^4.60.0",
|
"openai": "^4.60.0",
|
||||||
"zod": "^3.23.8"
|
"zod": "^3.23.8"
|
||||||
}
|
}
|
||||||
|
266
poc.js
266
poc.js
@@ -5,11 +5,21 @@
|
|||||||
|
|
||||||
import fs from "node:fs";
|
import fs from "node:fs";
|
||||||
import OpenAI from "openai";
|
import OpenAI from "openai";
|
||||||
|
import axios from "axios";
|
||||||
|
import jq from "node-jq";
|
||||||
|
|
||||||
// ---- Config ----
|
// ---- Config ----
|
||||||
const MODEL = process.env.OPENAI_MODEL || "gpt-5";
|
const MODEL = process.env.OPENAI_MODEL || "gpt-5";
|
||||||
const MAIN_SCHEMA_PATH = "./main.json"; // optional context; used for validation
|
|
||||||
const LG_SCHEMA_PATH = "./lg.json"; // optional context
|
// ODMDB paths - point to actual ODMDB structure
|
||||||
|
const ODMDB_BASE_PATH = "../smatchitObjectOdmdb";
|
||||||
|
const SCHEMA_PATH = `${ODMDB_BASE_PATH}/schema`;
|
||||||
|
const OBJECTS_PATH = `${ODMDB_BASE_PATH}/objects`;
|
||||||
|
|
||||||
|
// ODMDB execution config
|
||||||
|
const ODMDB_BASE_URL = process.env.ODMDB_BASE_URL || "http://localhost:3000";
|
||||||
|
const ODMDB_TRIBE = process.env.ODMDB_TRIBE || "smatchit";
|
||||||
|
const EXECUTE_QUERY = process.env.EXECUTE_QUERY === "true"; // Set to "true" to execute queries
|
||||||
|
|
||||||
// Hardcoded NL query for the PoC (no multi-turn)
|
// Hardcoded NL query for the PoC (no multi-turn)
|
||||||
const NL_QUERY =
|
const NL_QUERY =
|
||||||
@@ -21,12 +31,16 @@ function loadJsonSafe(path) {
|
|||||||
if (fs.existsSync(path)) {
|
if (fs.existsSync(path)) {
|
||||||
return JSON.parse(fs.readFileSync(path, "utf-8"));
|
return JSON.parse(fs.readFileSync(path, "utf-8"));
|
||||||
}
|
}
|
||||||
} catch {}
|
} catch (e) {
|
||||||
|
console.warn(`Warning: Could not load ${path}:`, e.message);
|
||||||
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Load actual ODMDB schemas
|
||||||
const SCHEMAS = {
|
const SCHEMAS = {
|
||||||
main: loadJsonSafe(MAIN_SCHEMA_PATH),
|
seekers: loadJsonSafe(`${SCHEMA_PATH}/seekers.json`),
|
||||||
lg: loadJsonSafe(LG_SCHEMA_PATH),
|
main: loadJsonSafe("./main.json"), // Fallback consolidated schema
|
||||||
};
|
};
|
||||||
|
|
||||||
// ---- Helpers to read seekers field names from your ODMDB custom schema ----
|
// ---- Helpers to read seekers field names from your ODMDB custom schema ----
|
||||||
@@ -75,15 +89,22 @@ function extractSeekersPropsFromOdmdbSchema(main) {
|
|||||||
// ---- Schema-based mapping system ----
|
// ---- Schema-based mapping system ----
|
||||||
class SchemaMapper {
|
class SchemaMapper {
|
||||||
constructor(schemas) {
|
constructor(schemas) {
|
||||||
this.schemas = schemas.main || [];
|
// Use direct seekers schema if available, otherwise search in consolidated main schema
|
||||||
this.seekersSchema = this.findSchemaByType("seekers");
|
this.seekersSchema =
|
||||||
|
schemas.seekers || this.findSchemaByType("seekers", schemas.main);
|
||||||
this.fieldMappings = this.buildFieldMappings();
|
this.fieldMappings = this.buildFieldMappings();
|
||||||
this.indexMappings = this.buildIndexMappings();
|
this.indexMappings = this.buildIndexMappings();
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
`📋 Loaded seekers schema with ${
|
||||||
|
Object.keys(this.seekersSchema?.properties || {}).length
|
||||||
|
} properties`
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
findSchemaByType(objectType) {
|
findSchemaByType(objectType, schemas) {
|
||||||
if (!this.schemas || !Array.isArray(this.schemas)) return null;
|
if (!schemas || !Array.isArray(schemas)) return null;
|
||||||
return this.schemas.find(
|
return schemas.find(
|
||||||
(schema) => schema.$id && schema.$id.includes(`/${objectType}`)
|
(schema) => schema.$id && schema.$id.includes(`/${objectType}`)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -203,6 +224,16 @@ const schemaMapper = new SchemaMapper(SCHEMAS);
|
|||||||
|
|
||||||
const SEEKERS_FIELDS_FROM_SCHEMA = schemaMapper.getAllSeekersFields();
|
const SEEKERS_FIELDS_FROM_SCHEMA = schemaMapper.getAllSeekersFields();
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
`🔍 Available seekers fields: ${SEEKERS_FIELDS_FROM_SCHEMA.slice(0, 10).join(
|
||||||
|
", "
|
||||||
|
)}${
|
||||||
|
SEEKERS_FIELDS_FROM_SCHEMA.length > 10
|
||||||
|
? `... (${SEEKERS_FIELDS_FROM_SCHEMA.length} total)`
|
||||||
|
: ""
|
||||||
|
}`
|
||||||
|
);
|
||||||
|
|
||||||
// ---- Minimal mapping config (for prompting + default fields) ----
|
// ---- Minimal mapping config (for prompting + default fields) ----
|
||||||
const seekersMapping = {
|
const seekersMapping = {
|
||||||
object: "seekers",
|
object: "seekers",
|
||||||
@@ -394,29 +425,216 @@ function validateWithOdmdbSchema(candidate) {
|
|||||||
return candidate;
|
return candidate;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---- Run PoC (print only the created query; do not execute) ----
|
// ---- Local ODMDB Data Access ----
|
||||||
|
function loadSeekersData() {
|
||||||
|
const seekersItemsPath = `${OBJECTS_PATH}/seekers/itm`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (!fs.existsSync(seekersItemsPath)) {
|
||||||
|
console.error(`❌ Seekers data directory not found: ${seekersItemsPath}`);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const files = fs
|
||||||
|
.readdirSync(seekersItemsPath)
|
||||||
|
.filter((file) => file.endsWith(".json") && file !== "backup")
|
||||||
|
.slice(0, 50); // Limit to first 50 files for PoC performance
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
`📁 Loading ${files.length} seeker files from ${seekersItemsPath}`
|
||||||
|
);
|
||||||
|
|
||||||
|
const seekers = [];
|
||||||
|
for (const file of files) {
|
||||||
|
try {
|
||||||
|
const filePath = `${seekersItemsPath}/${file}`;
|
||||||
|
const data = JSON.parse(fs.readFileSync(filePath, "utf-8"));
|
||||||
|
seekers.push(data);
|
||||||
|
} catch (error) {
|
||||||
|
console.warn(`⚠️ Could not load ${file}:`, error.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return seekers;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("❌ Error loading seekers data:", error.message);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- Local ODMDB Query Execution ----
|
||||||
|
async function executeOdmdbQuery(query) {
|
||||||
|
if (!EXECUTE_QUERY) {
|
||||||
|
console.log(
|
||||||
|
"💡 Query execution disabled. Set EXECUTE_QUERY=true to enable."
|
||||||
|
);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
console.log(
|
||||||
|
`\n🔍 Executing query against local ODMDB data: ${OBJECTS_PATH}/seekers/`
|
||||||
|
);
|
||||||
|
console.log("Query conditions:", query.condition);
|
||||||
|
console.log("Requested fields:", query.fields);
|
||||||
|
|
||||||
|
// Load all seekers data
|
||||||
|
const allSeekers = loadSeekersData();
|
||||||
|
|
||||||
|
if (allSeekers.length === 0) {
|
||||||
|
console.log("❌ No seekers data found");
|
||||||
|
return { data: [] };
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`<EFBFBD> Loaded ${allSeekers.length} seekers for filtering`);
|
||||||
|
|
||||||
|
// Apply basic filtering (simplified DSL processing)
|
||||||
|
let filteredSeekers = allSeekers;
|
||||||
|
|
||||||
|
for (const condition of query.condition) {
|
||||||
|
if (condition.includes("prop.dt_create(>=:")) {
|
||||||
|
// Extract date from condition like "prop.dt_create(>=:2025-10-07)"
|
||||||
|
const dateMatch = condition.match(/>=:(\d{4}-\d{2}-\d{2})/);
|
||||||
|
if (dateMatch) {
|
||||||
|
const filterDate = new Date(dateMatch[1]);
|
||||||
|
filteredSeekers = filteredSeekers.filter((seeker) => {
|
||||||
|
if (!seeker.dt_create) return false;
|
||||||
|
const seekerDate = new Date(seeker.dt_create);
|
||||||
|
return seekerDate >= filterDate;
|
||||||
|
});
|
||||||
|
console.log(
|
||||||
|
`🗓️ Filtered by date >= ${dateMatch[1]}: ${filteredSeekers.length} results`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (condition.includes("idx.seekstatus_alias(")) {
|
||||||
|
// Extract status from condition like "idx.seekstatus_alias(startasap)"
|
||||||
|
const statusMatch = condition.match(/idx\.seekstatus_alias\(([^)]+)\)/);
|
||||||
|
if (statusMatch) {
|
||||||
|
const status = statusMatch[1];
|
||||||
|
filteredSeekers = filteredSeekers.filter(
|
||||||
|
(seeker) => seeker.seekstatus === status
|
||||||
|
);
|
||||||
|
console.log(
|
||||||
|
`👤 Filtered by status ${status}: ${filteredSeekers.length} results`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Select only requested fields
|
||||||
|
const results = filteredSeekers.map((seeker) => {
|
||||||
|
const filtered = {};
|
||||||
|
for (const field of query.fields) {
|
||||||
|
if (seeker.hasOwnProperty(field)) {
|
||||||
|
filtered[field] = seeker[field];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return filtered;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
`✅ Query executed successfully! Found ${results.length} matching seekers`
|
||||||
|
);
|
||||||
|
|
||||||
|
return {
|
||||||
|
data: results,
|
||||||
|
meta: {
|
||||||
|
total: results.length,
|
||||||
|
source: "local_files",
|
||||||
|
path: `${OBJECTS_PATH}/seekers/itm/`,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
console.error("❌ Local query execution failed:", error.message);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- Result Processing with jq ----
|
||||||
|
async function processResults(results, jqFilter = ".") {
|
||||||
|
if (!results || !results.data) {
|
||||||
|
console.log("No results to process.");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Use jq to filter and format results (pass data directly, not as string)
|
||||||
|
const processed = await jq.run(jqFilter, results.data, { input: "json" });
|
||||||
|
|
||||||
|
// Return the processed result
|
||||||
|
return processed;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("❌ jq processing failed:", error.message);
|
||||||
|
return JSON.stringify(results.data, null, 2); // Return formatted JSON if jq fails
|
||||||
|
}
|
||||||
|
} // ---- Run PoC (generate query and optionally execute) ----
|
||||||
(async () => {
|
(async () => {
|
||||||
try {
|
try {
|
||||||
if (!process.env.OPENAI_API_KEY)
|
if (!process.env.OPENAI_API_KEY)
|
||||||
throw new Error("Missing OPENAI_API_KEY env var.");
|
throw new Error("Missing OPENAI_API_KEY env var.");
|
||||||
|
|
||||||
|
console.log(`🤖 Processing NL query: "${NL_QUERY}"`);
|
||||||
|
console.log("=".repeat(60));
|
||||||
|
|
||||||
|
// Step 1: Generate ODMDB query from natural language
|
||||||
const out = await inferQuery(NL_QUERY);
|
const out = await inferQuery(NL_QUERY);
|
||||||
const validated = validateWithOdmdbSchema(out);
|
const validated = validateWithOdmdbSchema(out);
|
||||||
|
|
||||||
// Output ONLY the created query (no execution)
|
console.log("✅ Generated ODMDB Query:");
|
||||||
console.log(
|
const generatedQuery = {
|
||||||
JSON.stringify(
|
object: validated.object,
|
||||||
{
|
condition: validated.condition,
|
||||||
object: validated.object,
|
fields: validated.fields,
|
||||||
condition: validated.condition,
|
};
|
||||||
fields: validated.fields,
|
console.log(JSON.stringify(generatedQuery, null, 2));
|
||||||
},
|
|
||||||
null,
|
// Step 2: Execute query if enabled
|
||||||
2
|
if (EXECUTE_QUERY) {
|
||||||
)
|
console.log("\n" + "=".repeat(60));
|
||||||
);
|
const results = await executeOdmdbQuery(generatedQuery);
|
||||||
|
|
||||||
|
if (results) {
|
||||||
|
console.log("✅ Query executed successfully!");
|
||||||
|
console.log(`📊 Found ${results.data?.length || 0} results`);
|
||||||
|
|
||||||
|
// Step 3: Process results with jq
|
||||||
|
console.log("\n📋 Results Summary:");
|
||||||
|
const summary = await processResults(
|
||||||
|
results,
|
||||||
|
`.[0:3] | map({alias, email, seekstatus})`
|
||||||
|
);
|
||||||
|
console.log(JSON.stringify(summary, null, 2));
|
||||||
|
|
||||||
|
// Optional: Show full results count
|
||||||
|
if (results.data?.length > 3) {
|
||||||
|
console.log(`\n... and ${results.data.length - 3} more results`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 4: Export to CSV format
|
||||||
|
console.log("\n📄 CSV Preview:");
|
||||||
|
const csvData = await processResults(
|
||||||
|
results,
|
||||||
|
`
|
||||||
|
map([.alias // "N/A", .email // "N/A", .seekstatus // "N/A"]) |
|
||||||
|
["alias","email","status"] as $header |
|
||||||
|
[$header] + .[0:5] |
|
||||||
|
.[] | @csv
|
||||||
|
`
|
||||||
|
);
|
||||||
|
if (csvData) {
|
||||||
|
console.log(csvData);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log(
|
||||||
|
"\n💡 To execute this query against ODMDB, set EXECUTE_QUERY=true"
|
||||||
|
);
|
||||||
|
console.log(` Example: EXECUTE_QUERY=true npm start`);
|
||||||
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error("PoC failed:", e.message || e);
|
console.error("❌ PoC failed:", e.message || e);
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
})();
|
})();
|
||||||
|
Reference in New Issue
Block a user