Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
@@ -0,0 +1,177 @@
|
||||
"use strict";
|
||||
/**
|
||||
* Integration example with @ruvector/agentic-synth
|
||||
*
|
||||
* This example shows how to use both agentic-synth and graph-data-generator
|
||||
* together to create comprehensive synthetic datasets.
|
||||
*/
|
||||
var __importDefault = (this && this.__importDefault) || function (mod) {
|
||||
return (mod && mod.__esModule) ? mod : { "default": mod };
|
||||
};
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
const agentic_synth_1 = require("@ruvector/agentic-synth");
|
||||
const index_js_1 = require("../src/index.js");
|
||||
const fs_1 = __importDefault(require("fs"));
|
||||
async function main() {
|
||||
// Initialize both generators
|
||||
const synth = (0, agentic_synth_1.createSynth)({
|
||||
provider: 'gemini',
|
||||
apiKey: process.env.GEMINI_API_KEY,
|
||||
model: 'gemini-2.0-flash-exp'
|
||||
});
|
||||
const graphGen = (0, index_js_1.createGraphDataGenerator)({
|
||||
apiKey: process.env.OPENROUTER_API_KEY,
|
||||
model: 'moonshot/kimi-k2-instruct'
|
||||
});
|
||||
console.log('=== Hybrid Synthetic Data Generation ===\n');
|
||||
// Step 1: Generate structured user data with agentic-synth
|
||||
console.log('1. Generating user profiles with agentic-synth...');
|
||||
const userProfiles = await synth.generateStructured({
|
||||
count: 50,
|
||||
schema: {
|
||||
user_id: { type: 'string' },
|
||||
name: { type: 'string' },
|
||||
email: { type: 'string' },
|
||||
role: { type: 'string', enum: ['developer', 'designer', 'manager', 'analyst'] },
|
||||
skills: { type: 'array', items: { type: 'string' } },
|
||||
experience_years: { type: 'number', minimum: 0, maximum: 30 }
|
||||
}
|
||||
});
|
||||
console.log(`✓ Generated ${userProfiles.data.length} user profiles`);
|
||||
// Step 2: Generate project data with agentic-synth
|
||||
console.log('\n2. Generating project data with agentic-synth...');
|
||||
const projects = await synth.generateStructured({
|
||||
count: 20,
|
||||
schema: {
|
||||
project_id: { type: 'string' },
|
||||
name: { type: 'string' },
|
||||
description: { type: 'string' },
|
||||
status: { type: 'string', enum: ['active', 'completed', 'on-hold'] },
|
||||
start_date: { type: 'string' },
|
||||
tech_stack: { type: 'array', items: { type: 'string' } }
|
||||
}
|
||||
});
|
||||
console.log(`✓ Generated ${projects.data.length} projects`);
|
||||
// Step 3: Generate knowledge graph relationships with graph-data-generator
|
||||
console.log('\n3. Generating knowledge graph with relationships...');
|
||||
const knowledgeGraph = await graphGen.generateKnowledgeGraph({
|
||||
domain: 'software development teams',
|
||||
entities: 100,
|
||||
relationships: 300,
|
||||
entityTypes: ['Person', 'Project', 'Skill', 'Technology', 'Team'],
|
||||
relationshipTypes: [
|
||||
'WORKS_ON',
|
||||
'HAS_SKILL',
|
||||
'USES_TECHNOLOGY',
|
||||
'MEMBER_OF',
|
||||
'DEPENDS_ON',
|
||||
'MENTORS'
|
||||
],
|
||||
includeEmbeddings: true
|
||||
});
|
||||
console.log(`✓ Generated ${knowledgeGraph.data.nodes.length} nodes`);
|
||||
console.log(`✓ Generated ${knowledgeGraph.data.edges.length} edges`);
|
||||
// Step 4: Generate temporal event data
|
||||
console.log('\n4. Generating temporal events for user activities...');
|
||||
const temporalEvents = await graphGen.generateTemporalEvents({
|
||||
startDate: '2024-01-01',
|
||||
endDate: '2024-12-31',
|
||||
eventTypes: [
|
||||
'code_commit',
|
||||
'pull_request',
|
||||
'code_review',
|
||||
'deployment',
|
||||
'meeting',
|
||||
'task_completed'
|
||||
],
|
||||
eventsPerDay: 50,
|
||||
entities: 50,
|
||||
includeEmbeddings: false
|
||||
});
|
||||
console.log(`✓ Generated ${temporalEvents.data.nodes.length} temporal nodes`);
|
||||
console.log(`✓ Generated ${temporalEvents.data.edges.length} temporal edges`);
|
||||
// Step 5: Generate time-series metrics with agentic-synth
|
||||
console.log('\n5. Generating time-series metrics with agentic-synth...');
|
||||
const metrics = await synth.generateTimeSeries({
|
||||
startDate: '2024-01-01',
|
||||
endDate: '2024-12-31',
|
||||
interval: '1d',
|
||||
metrics: ['code_quality', 'test_coverage', 'deployment_frequency'],
|
||||
trend: 'up',
|
||||
seasonality: true,
|
||||
noise: 0.1
|
||||
});
|
||||
console.log(`✓ Generated ${metrics.data.length} time-series data points`);
|
||||
// Step 6: Combine and export data
|
||||
console.log('\n6. Combining and exporting datasets...');
|
||||
// Save structured data as JSON
|
||||
fs_1.default.writeFileSync('users.json', JSON.stringify(userProfiles.data, null, 2));
|
||||
fs_1.default.writeFileSync('projects.json', JSON.stringify(projects.data, null, 2));
|
||||
fs_1.default.writeFileSync('metrics.json', JSON.stringify(metrics.data, null, 2));
|
||||
// Save graph data as Cypher
|
||||
const knowledgeCypher = graphGen.generateCypher(knowledgeGraph.data, {
|
||||
useConstraints: true,
|
||||
useIndexes: true,
|
||||
useMerge: true
|
||||
});
|
||||
fs_1.default.writeFileSync('knowledge-graph.cypher', knowledgeCypher);
|
||||
const temporalCypher = graphGen.generateCypher(temporalEvents.data, {
|
||||
useConstraints: true,
|
||||
useIndexes: true
|
||||
});
|
||||
fs_1.default.writeFileSync('temporal-events.cypher', temporalCypher);
|
||||
// Create a combined dataset summary
|
||||
const summary = {
|
||||
generation_timestamp: new Date().toISOString(),
|
||||
datasets: {
|
||||
user_profiles: {
|
||||
count: userProfiles.data.length,
|
||||
provider: 'gemini',
|
||||
file: 'users.json'
|
||||
},
|
||||
projects: {
|
||||
count: projects.data.length,
|
||||
provider: 'gemini',
|
||||
file: 'projects.json'
|
||||
},
|
||||
knowledge_graph: {
|
||||
nodes: knowledgeGraph.data.nodes.length,
|
||||
edges: knowledgeGraph.data.edges.length,
|
||||
provider: 'openrouter/kimi-k2',
|
||||
file: 'knowledge-graph.cypher',
|
||||
has_embeddings: true
|
||||
},
|
||||
temporal_events: {
|
||||
nodes: temporalEvents.data.nodes.length,
|
||||
edges: temporalEvents.data.edges.length,
|
||||
provider: 'openrouter/kimi-k2',
|
||||
file: 'temporal-events.cypher'
|
||||
},
|
||||
time_series_metrics: {
|
||||
count: metrics.data.length,
|
||||
provider: 'gemini',
|
||||
file: 'metrics.json'
|
||||
}
|
||||
},
|
||||
total_generation_time: {
|
||||
knowledge_graph: knowledgeGraph.metadata.duration,
|
||||
temporal_events: temporalEvents.metadata.duration
|
||||
}
|
||||
};
|
||||
fs_1.default.writeFileSync('dataset-summary.json', JSON.stringify(summary, null, 2));
|
||||
console.log('\n✓ All datasets generated and saved!');
|
||||
console.log('\nGenerated files:');
|
||||
console.log('- users.json (structured user profiles)');
|
||||
console.log('- projects.json (structured project data)');
|
||||
console.log('- metrics.json (time-series metrics)');
|
||||
console.log('- knowledge-graph.cypher (Neo4j graph with embeddings)');
|
||||
console.log('- temporal-events.cypher (Neo4j temporal events)');
|
||||
console.log('- dataset-summary.json (metadata and summary)');
|
||||
console.log('\n=== Integration Complete ===');
|
||||
console.log(`Total nodes in graphs: ${knowledgeGraph.data.nodes.length + temporalEvents.data.nodes.length}`);
|
||||
console.log(`Total edges in graphs: ${knowledgeGraph.data.edges.length + temporalEvents.data.edges.length}`);
|
||||
console.log(`Total structured records: ${userProfiles.data.length + projects.data.length}`);
|
||||
console.log(`Total time-series points: ${metrics.data.length}`);
|
||||
}
|
||||
main().catch(console.error);
|
||||
//# sourceMappingURL=integration-with-agentic-synth.js.map
|
||||
Reference in New Issue
Block a user