413 lines
12 KiB
JavaScript
413 lines
12 KiB
JavaScript
// Comprehensive ODMDB Schema Mapping Manager
|
|
// Handles all objects, detects data availability, and provides intelligent query routing
|
|
|
|
import fs from "node:fs";
|
|
import { seekersMapping } from "./seekers-mapping.js";
|
|
import { jobadsMapping } from "./jobads-mapping.js";
|
|
import { recruitersMapping } from "./recruiters-mapping.js";
|
|
import { personsMapping } from "./persons-mapping.js";
|
|
import { createSchemaMapping } from "./base-mapping.js";
|
|
|
|
const SCHEMA_BASE_PATH = "../smatchitObjectOdmdb/schema";
|
|
const OBJECTS_BASE_PATH = "../smatchitObjectOdmdb/objects";
|
|
|
|
class ODMDBMappingManager {
|
|
constructor() {
|
|
this.mappings = new Map();
|
|
this.dataAvailability = new Map();
|
|
this.loadAllMappings();
|
|
this.checkDataAvailability();
|
|
}
|
|
|
|
loadAllMappings() {
|
|
// Load primary mappings (with custom enhancements)
|
|
this.mappings.set("seekers", seekersMapping);
|
|
this.mappings.set("jobads", jobadsMapping);
|
|
this.mappings.set("recruiters", recruitersMapping);
|
|
this.mappings.set("persons", personsMapping);
|
|
|
|
// Load remaining schemas dynamically
|
|
const remainingSchemas = [
|
|
"jobsteps",
|
|
"jobtitles",
|
|
"quizz",
|
|
"screens",
|
|
"sirets",
|
|
"trainingprovider",
|
|
"trainings",
|
|
];
|
|
|
|
remainingSchemas.forEach((schemaName) => {
|
|
const schemaPath = `${SCHEMA_BASE_PATH}/${schemaName}.json`;
|
|
try {
|
|
if (fs.existsSync(schemaPath)) {
|
|
const schemaData = JSON.parse(fs.readFileSync(schemaPath, "utf-8"));
|
|
const mapping = createSchemaMapping(schemaData, schemaName);
|
|
this.mappings.set(schemaName, mapping);
|
|
}
|
|
} catch (error) {
|
|
console.warn(
|
|
`Warning: Could not load ${schemaName} schema: ${error.message}`
|
|
);
|
|
this.mappings.set(schemaName, {
|
|
objectName: schemaName,
|
|
available: false,
|
|
error: error.message,
|
|
properties: {},
|
|
synonyms: {},
|
|
});
|
|
}
|
|
});
|
|
|
|
console.log(`🗺️ Loaded ${this.mappings.size} object mappings`);
|
|
}
|
|
|
|
checkDataAvailability() {
|
|
// Check which objects have actual data files
|
|
this.mappings.forEach((mapping, objectName) => {
|
|
const objectPath = `${OBJECTS_BASE_PATH}/${objectName}`;
|
|
const itemsPath = `${objectPath}/itm`;
|
|
|
|
let availability = {
|
|
schemaAvailable: mapping.available,
|
|
dataAvailable: false,
|
|
dataPath: itemsPath,
|
|
fileCount: 0,
|
|
sampleFiles: [],
|
|
};
|
|
|
|
try {
|
|
if (fs.existsSync(itemsPath)) {
|
|
const files = fs
|
|
.readdirSync(itemsPath)
|
|
.filter((f) => f.endsWith(".json") && f !== "backup")
|
|
.filter((f) => !fs.statSync(`${itemsPath}/${f}`).isDirectory());
|
|
|
|
availability.dataAvailable = files.length > 0;
|
|
availability.fileCount = files.length;
|
|
availability.sampleFiles = files.slice(0, 3); // First 3 files as samples
|
|
}
|
|
} catch (error) {
|
|
console.warn(
|
|
`Warning: Could not check data for ${objectName}: ${error.message}`
|
|
);
|
|
}
|
|
|
|
this.dataAvailability.set(objectName, availability);
|
|
});
|
|
|
|
// Log availability summary
|
|
const availableObjects = Array.from(this.dataAvailability.entries())
|
|
.filter(([_, availability]) => availability.dataAvailable)
|
|
.map(
|
|
([objectName, availability]) =>
|
|
`${objectName}(${availability.fileCount})`
|
|
)
|
|
.join(", ");
|
|
|
|
console.log(`📊 Data available for: ${availableObjects}`);
|
|
}
|
|
|
|
// Intelligent object detection from natural language
|
|
detectObjectFromQuery(nlQuery) {
|
|
const query = nlQuery.toLowerCase();
|
|
const detectedObjects = [];
|
|
|
|
// Direct object name mentions
|
|
this.mappings.forEach((mapping, objectName) => {
|
|
if (
|
|
query.includes(objectName) ||
|
|
query.includes(objectName.slice(0, -1))
|
|
) {
|
|
// singular form
|
|
detectedObjects.push({
|
|
object: objectName,
|
|
confidence: 0.9,
|
|
reason: `Direct mention of '${objectName}'`,
|
|
});
|
|
}
|
|
});
|
|
|
|
// Semantic object detection
|
|
const objectIndicators = {
|
|
seekers: [
|
|
"seekers",
|
|
"seeker",
|
|
"job seekers",
|
|
"candidates",
|
|
"applicants",
|
|
"people looking for jobs",
|
|
"job hunters",
|
|
"looking for work",
|
|
"experience",
|
|
"skills",
|
|
"salary expectation",
|
|
"availability",
|
|
],
|
|
jobads: [
|
|
"jobs",
|
|
"job postings",
|
|
"job ads",
|
|
"positions",
|
|
"openings",
|
|
"vacancies",
|
|
"employment opportunities",
|
|
"job offers",
|
|
"job description",
|
|
"job requirements",
|
|
"salary range",
|
|
],
|
|
recruiters: [
|
|
"recruiters",
|
|
"recruiter",
|
|
"hiring managers",
|
|
"hr",
|
|
"employers",
|
|
"hiring",
|
|
"recruitment",
|
|
"talent acquisition",
|
|
"headhunters",
|
|
],
|
|
persons: [
|
|
"people",
|
|
"users",
|
|
"profiles",
|
|
"personal information",
|
|
"contact details",
|
|
"names",
|
|
"demographics",
|
|
"biography",
|
|
],
|
|
sirets: [
|
|
"companies",
|
|
"businesses",
|
|
"organizations",
|
|
"employers",
|
|
"firms",
|
|
"corporations",
|
|
"enterprises",
|
|
],
|
|
};
|
|
|
|
Object.entries(objectIndicators).forEach(([objectName, indicators]) => {
|
|
const matches = indicators.filter((indicator) =>
|
|
query.includes(indicator)
|
|
);
|
|
if (matches.length > 0) {
|
|
const confidence = Math.min(0.8, matches.length * 0.3);
|
|
detectedObjects.push({
|
|
object: objectName,
|
|
confidence,
|
|
reason: `Semantic match: ${matches.join(", ")}`,
|
|
});
|
|
}
|
|
});
|
|
|
|
// Sort by confidence and remove duplicates
|
|
const uniqueObjects = detectedObjects.reduce((acc, current) => {
|
|
const existing = acc.find((item) => item.object === current.object);
|
|
if (!existing || current.confidence > existing.confidence) {
|
|
acc = acc.filter((item) => item.object !== current.object);
|
|
acc.push(current);
|
|
}
|
|
return acc;
|
|
}, []);
|
|
|
|
return uniqueObjects.sort((a, b) => b.confidence - a.confidence);
|
|
}
|
|
|
|
// Get data availability statistics
|
|
getDataAvailabilityStats() {
|
|
const availableObjects = [];
|
|
const objectStats = {};
|
|
|
|
for (const [objectType, mapping] of Object.entries(this.mappings)) {
|
|
if (mapping.available) {
|
|
availableObjects.push(objectType);
|
|
objectStats[objectType] = mapping.dataStats.fileCount;
|
|
}
|
|
}
|
|
|
|
const summary = availableObjects
|
|
.map((obj) => `${obj}(${objectStats[obj]})`)
|
|
.join(", ");
|
|
|
|
return {
|
|
availableObjects,
|
|
objectStats,
|
|
summary,
|
|
totalObjects: availableObjects.length,
|
|
};
|
|
}
|
|
|
|
// Check if a query is feasible given available data
|
|
validateQueryFeasibility(nlQuery, suggestedObject = null) {
|
|
const detectedObjects = suggestedObject
|
|
? [
|
|
{
|
|
object: suggestedObject,
|
|
confidence: 1.0,
|
|
reason: "Explicitly specified",
|
|
},
|
|
]
|
|
: this.detectObjectFromQuery(nlQuery);
|
|
|
|
if (detectedObjects.length === 0) {
|
|
return {
|
|
feasible: false,
|
|
reason: "Cannot determine which object type this query refers to",
|
|
suggestion:
|
|
"Please specify if you're looking for seekers, jobs, recruiters, or companies",
|
|
availableObjects: Array.from(this.dataAvailability.keys()).filter(
|
|
(obj) => this.dataAvailability.get(obj).dataAvailable
|
|
),
|
|
};
|
|
}
|
|
|
|
const primaryObject = detectedObjects[0];
|
|
const availability = this.dataAvailability.get(primaryObject.object);
|
|
|
|
if (!availability) {
|
|
return {
|
|
feasible: false,
|
|
reason: `Unknown object type: ${primaryObject.object}`,
|
|
suggestion: `Available objects: ${Array.from(this.mappings.keys()).join(
|
|
", "
|
|
)}`,
|
|
};
|
|
}
|
|
|
|
if (!availability.schemaAvailable) {
|
|
return {
|
|
feasible: false,
|
|
reason: `Schema not available for ${primaryObject.object}`,
|
|
suggestion: `Cannot process queries for ${primaryObject.object} - schema missing`,
|
|
};
|
|
}
|
|
|
|
if (!availability.dataAvailable) {
|
|
return {
|
|
feasible: false,
|
|
reason: `No data available for ${primaryObject.object}`,
|
|
suggestion: `${
|
|
primaryObject.object
|
|
} schema exists but no data files found. Available data: ${Array.from(
|
|
this.dataAvailability.entries()
|
|
)
|
|
.filter(([_, avail]) => avail.dataAvailable)
|
|
.map(([name, avail]) => `${name}(${avail.fileCount})`)
|
|
.join(", ")}`,
|
|
};
|
|
}
|
|
|
|
// Check if requested fields exist
|
|
const mapping = this.mappings.get(primaryObject.object);
|
|
const queryWords = nlQuery.toLowerCase().split(/\s+/);
|
|
const unmappedWords = [];
|
|
|
|
queryWords.forEach((word) => {
|
|
if (
|
|
word.length > 2 && // Skip short words
|
|
!mapping.synonyms[word] &&
|
|
!Object.keys(mapping.properties).includes(word) &&
|
|
![
|
|
"show",
|
|
"get",
|
|
"find",
|
|
"with",
|
|
"their",
|
|
"and",
|
|
"the",
|
|
"me",
|
|
"all",
|
|
].includes(word)
|
|
) {
|
|
unmappedWords.push(word);
|
|
}
|
|
});
|
|
|
|
return {
|
|
feasible: true,
|
|
primaryObject,
|
|
detectedObjects,
|
|
dataStats: {
|
|
fileCount: availability.fileCount,
|
|
sampleFiles: availability.sampleFiles,
|
|
},
|
|
fieldWarnings:
|
|
unmappedWords.length > 0
|
|
? `Some terms might not map to fields: ${unmappedWords.join(", ")}`
|
|
: null,
|
|
};
|
|
}
|
|
|
|
// Get mapping for a specific object
|
|
getMapping(objectName) {
|
|
return this.mappings.get(objectName);
|
|
}
|
|
|
|
// Get all available objects with data
|
|
getAvailableObjects() {
|
|
return Array.from(this.dataAvailability.entries())
|
|
.filter(
|
|
([_, availability]) =>
|
|
availability.dataAvailable && availability.schemaAvailable
|
|
)
|
|
.map(([objectName, availability]) => ({
|
|
object: objectName,
|
|
fileCount: availability.fileCount,
|
|
propertyCount: this.mappings.get(objectName)?.propertyCount || 0,
|
|
}));
|
|
}
|
|
|
|
// Get comprehensive field suggestions for an object
|
|
getFieldSuggestions(objectName, queryTerms = []) {
|
|
const mapping = this.getMapping(objectName);
|
|
if (!mapping || !mapping.available) return [];
|
|
|
|
const suggestions = [];
|
|
|
|
// Find fields that match query terms
|
|
queryTerms.forEach((term) => {
|
|
const field = mapping.synonyms[term.toLowerCase()];
|
|
if (field) {
|
|
const fieldInfo = mapping.properties[field];
|
|
suggestions.push({
|
|
field,
|
|
matchedTerm: term,
|
|
title: fieldInfo.title,
|
|
type: fieldInfo.type,
|
|
synonyms: fieldInfo.synonyms.slice(0, 3), // Top 3 synonyms
|
|
});
|
|
}
|
|
});
|
|
|
|
return suggestions;
|
|
}
|
|
|
|
// Generate intelligent error messages with suggestions
|
|
generateErrorMessage(nlQuery, error) {
|
|
const feasibility = this.validateQueryFeasibility(nlQuery);
|
|
|
|
if (!feasibility.feasible) {
|
|
return {
|
|
error: feasibility.reason,
|
|
suggestion: feasibility.suggestion,
|
|
availableObjects:
|
|
feasibility.availableObjects || this.getAvailableObjects(),
|
|
};
|
|
}
|
|
|
|
return {
|
|
error: error.message || "Unknown error",
|
|
suggestion: "Query seems valid but processing failed",
|
|
queryAnalysis: feasibility,
|
|
};
|
|
}
|
|
}
|
|
|
|
// Export class and singleton instance
|
|
export { ODMDBMappingManager };
|
|
export const odmdbMappingManager = new ODMDBMappingManager();
|
|
export default ODMDBMappingManager;
|