git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
195 lines
6.5 KiB
TypeScript
195 lines
6.5 KiB
TypeScript
/**
|
|
* Integration example with @ruvector/agentic-synth
|
|
*
|
|
* This example shows how to use both agentic-synth and graph-data-generator
|
|
* together to create comprehensive synthetic datasets.
|
|
*/
|
|
|
|
import { createSynth } from '@ruvector/agentic-synth';
|
|
import { createGraphDataGenerator } from '../src/index.js';
|
|
import fs from 'fs';
|
|
|
|
async function main() {
|
|
// Initialize both generators
|
|
const synth = createSynth({
|
|
provider: 'gemini',
|
|
apiKey: process.env.GEMINI_API_KEY,
|
|
model: 'gemini-2.0-flash-exp'
|
|
});
|
|
|
|
const graphGen = createGraphDataGenerator({
|
|
apiKey: process.env.OPENROUTER_API_KEY,
|
|
model: 'moonshot/kimi-k2-instruct'
|
|
});
|
|
|
|
console.log('=== Hybrid Synthetic Data Generation ===\n');
|
|
|
|
// Step 1: Generate structured user data with agentic-synth
|
|
console.log('1. Generating user profiles with agentic-synth...');
|
|
const userProfiles = await synth.generateStructured({
|
|
count: 50,
|
|
schema: {
|
|
user_id: { type: 'string' },
|
|
name: { type: 'string' },
|
|
email: { type: 'string' },
|
|
role: { type: 'string', enum: ['developer', 'designer', 'manager', 'analyst'] },
|
|
skills: { type: 'array', items: { type: 'string' } },
|
|
experience_years: { type: 'number', minimum: 0, maximum: 30 }
|
|
}
|
|
});
|
|
|
|
console.log(`✓ Generated ${userProfiles.data.length} user profiles`);
|
|
|
|
// Step 2: Generate project data with agentic-synth
|
|
console.log('\n2. Generating project data with agentic-synth...');
|
|
const projects = await synth.generateStructured({
|
|
count: 20,
|
|
schema: {
|
|
project_id: { type: 'string' },
|
|
name: { type: 'string' },
|
|
description: { type: 'string' },
|
|
status: { type: 'string', enum: ['active', 'completed', 'on-hold'] },
|
|
start_date: { type: 'string' },
|
|
tech_stack: { type: 'array', items: { type: 'string' } }
|
|
}
|
|
});
|
|
|
|
console.log(`✓ Generated ${projects.data.length} projects`);
|
|
|
|
// Step 3: Generate knowledge graph relationships with graph-data-generator
|
|
console.log('\n3. Generating knowledge graph with relationships...');
|
|
const knowledgeGraph = await graphGen.generateKnowledgeGraph({
|
|
domain: 'software development teams',
|
|
entities: 100,
|
|
relationships: 300,
|
|
entityTypes: ['Person', 'Project', 'Skill', 'Technology', 'Team'],
|
|
relationshipTypes: [
|
|
'WORKS_ON',
|
|
'HAS_SKILL',
|
|
'USES_TECHNOLOGY',
|
|
'MEMBER_OF',
|
|
'DEPENDS_ON',
|
|
'MENTORS'
|
|
],
|
|
includeEmbeddings: true
|
|
});
|
|
|
|
console.log(`✓ Generated ${knowledgeGraph.data.nodes.length} nodes`);
|
|
console.log(`✓ Generated ${knowledgeGraph.data.edges.length} edges`);
|
|
|
|
// Step 4: Generate temporal event data
|
|
console.log('\n4. Generating temporal events for user activities...');
|
|
const temporalEvents = await graphGen.generateTemporalEvents({
|
|
startDate: '2024-01-01',
|
|
endDate: '2024-12-31',
|
|
eventTypes: [
|
|
'code_commit',
|
|
'pull_request',
|
|
'code_review',
|
|
'deployment',
|
|
'meeting',
|
|
'task_completed'
|
|
],
|
|
eventsPerDay: 50,
|
|
entities: 50,
|
|
includeEmbeddings: false
|
|
});
|
|
|
|
console.log(`✓ Generated ${temporalEvents.data.nodes.length} temporal nodes`);
|
|
console.log(`✓ Generated ${temporalEvents.data.edges.length} temporal edges`);
|
|
|
|
// Step 5: Generate time-series metrics with agentic-synth
|
|
console.log('\n5. Generating time-series metrics with agentic-synth...');
|
|
const metrics = await synth.generateTimeSeries({
|
|
startDate: '2024-01-01',
|
|
endDate: '2024-12-31',
|
|
interval: '1d',
|
|
metrics: ['code_quality', 'test_coverage', 'deployment_frequency'],
|
|
trend: 'up',
|
|
seasonality: true,
|
|
noise: 0.1
|
|
});
|
|
|
|
console.log(`✓ Generated ${metrics.data.length} time-series data points`);
|
|
|
|
// Step 6: Combine and export data
|
|
console.log('\n6. Combining and exporting datasets...');
|
|
|
|
// Save structured data as JSON
|
|
fs.writeFileSync('users.json', JSON.stringify(userProfiles.data, null, 2));
|
|
fs.writeFileSync('projects.json', JSON.stringify(projects.data, null, 2));
|
|
fs.writeFileSync('metrics.json', JSON.stringify(metrics.data, null, 2));
|
|
|
|
// Save graph data as Cypher
|
|
const knowledgeCypher = graphGen.generateCypher(knowledgeGraph.data, {
|
|
useConstraints: true,
|
|
useIndexes: true,
|
|
useMerge: true
|
|
});
|
|
fs.writeFileSync('knowledge-graph.cypher', knowledgeCypher);
|
|
|
|
const temporalCypher = graphGen.generateCypher(temporalEvents.data, {
|
|
useConstraints: true,
|
|
useIndexes: true
|
|
});
|
|
fs.writeFileSync('temporal-events.cypher', temporalCypher);
|
|
|
|
// Create a combined dataset summary
|
|
const summary = {
|
|
generation_timestamp: new Date().toISOString(),
|
|
datasets: {
|
|
user_profiles: {
|
|
count: userProfiles.data.length,
|
|
provider: 'gemini',
|
|
file: 'users.json'
|
|
},
|
|
projects: {
|
|
count: projects.data.length,
|
|
provider: 'gemini',
|
|
file: 'projects.json'
|
|
},
|
|
knowledge_graph: {
|
|
nodes: knowledgeGraph.data.nodes.length,
|
|
edges: knowledgeGraph.data.edges.length,
|
|
provider: 'openrouter/kimi-k2',
|
|
file: 'knowledge-graph.cypher',
|
|
has_embeddings: true
|
|
},
|
|
temporal_events: {
|
|
nodes: temporalEvents.data.nodes.length,
|
|
edges: temporalEvents.data.edges.length,
|
|
provider: 'openrouter/kimi-k2',
|
|
file: 'temporal-events.cypher'
|
|
},
|
|
time_series_metrics: {
|
|
count: metrics.data.length,
|
|
provider: 'gemini',
|
|
file: 'metrics.json'
|
|
}
|
|
},
|
|
total_generation_time: {
|
|
knowledge_graph: knowledgeGraph.metadata.duration,
|
|
temporal_events: temporalEvents.metadata.duration
|
|
}
|
|
};
|
|
|
|
fs.writeFileSync('dataset-summary.json', JSON.stringify(summary, null, 2));
|
|
|
|
console.log('\n✓ All datasets generated and saved!');
|
|
console.log('\nGenerated files:');
|
|
console.log('- users.json (structured user profiles)');
|
|
console.log('- projects.json (structured project data)');
|
|
console.log('- metrics.json (time-series metrics)');
|
|
console.log('- knowledge-graph.cypher (Neo4j graph with embeddings)');
|
|
console.log('- temporal-events.cypher (Neo4j temporal events)');
|
|
console.log('- dataset-summary.json (metadata and summary)');
|
|
|
|
console.log('\n=== Integration Complete ===');
|
|
console.log(`Total nodes in graphs: ${knowledgeGraph.data.nodes.length + temporalEvents.data.nodes.length}`);
|
|
console.log(`Total edges in graphs: ${knowledgeGraph.data.edges.length + temporalEvents.data.edges.length}`);
|
|
console.log(`Total structured records: ${userProfiles.data.length + projects.data.length}`);
|
|
console.log(`Total time-series points: ${metrics.data.length}`);
|
|
}
|
|
|
|
main().catch(console.error);
|