Files
wifi-densepose/npm/packages/graph-data-generator/examples/integration-with-agentic-synth.ts
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

195 lines
6.5 KiB
TypeScript

/**
* Integration example with @ruvector/agentic-synth
*
* This example shows how to use both agentic-synth and graph-data-generator
* together to create comprehensive synthetic datasets.
*/
import { createSynth } from '@ruvector/agentic-synth';
import { createGraphDataGenerator } from '../src/index.js';
import fs from 'fs';
async function main() {
// Initialize both generators
const synth = createSynth({
provider: 'gemini',
apiKey: process.env.GEMINI_API_KEY,
model: 'gemini-2.0-flash-exp'
});
const graphGen = createGraphDataGenerator({
apiKey: process.env.OPENROUTER_API_KEY,
model: 'moonshot/kimi-k2-instruct'
});
console.log('=== Hybrid Synthetic Data Generation ===\n');
// Step 1: Generate structured user data with agentic-synth
console.log('1. Generating user profiles with agentic-synth...');
const userProfiles = await synth.generateStructured({
count: 50,
schema: {
user_id: { type: 'string' },
name: { type: 'string' },
email: { type: 'string' },
role: { type: 'string', enum: ['developer', 'designer', 'manager', 'analyst'] },
skills: { type: 'array', items: { type: 'string' } },
experience_years: { type: 'number', minimum: 0, maximum: 30 }
}
});
console.log(`✓ Generated ${userProfiles.data.length} user profiles`);
// Step 2: Generate project data with agentic-synth
console.log('\n2. Generating project data with agentic-synth...');
const projects = await synth.generateStructured({
count: 20,
schema: {
project_id: { type: 'string' },
name: { type: 'string' },
description: { type: 'string' },
status: { type: 'string', enum: ['active', 'completed', 'on-hold'] },
start_date: { type: 'string' },
tech_stack: { type: 'array', items: { type: 'string' } }
}
});
console.log(`✓ Generated ${projects.data.length} projects`);
// Step 3: Generate knowledge graph relationships with graph-data-generator
console.log('\n3. Generating knowledge graph with relationships...');
const knowledgeGraph = await graphGen.generateKnowledgeGraph({
domain: 'software development teams',
entities: 100,
relationships: 300,
entityTypes: ['Person', 'Project', 'Skill', 'Technology', 'Team'],
relationshipTypes: [
'WORKS_ON',
'HAS_SKILL',
'USES_TECHNOLOGY',
'MEMBER_OF',
'DEPENDS_ON',
'MENTORS'
],
includeEmbeddings: true
});
console.log(`✓ Generated ${knowledgeGraph.data.nodes.length} nodes`);
console.log(`✓ Generated ${knowledgeGraph.data.edges.length} edges`);
// Step 4: Generate temporal event data
console.log('\n4. Generating temporal events for user activities...');
const temporalEvents = await graphGen.generateTemporalEvents({
startDate: '2024-01-01',
endDate: '2024-12-31',
eventTypes: [
'code_commit',
'pull_request',
'code_review',
'deployment',
'meeting',
'task_completed'
],
eventsPerDay: 50,
entities: 50,
includeEmbeddings: false
});
console.log(`✓ Generated ${temporalEvents.data.nodes.length} temporal nodes`);
console.log(`✓ Generated ${temporalEvents.data.edges.length} temporal edges`);
// Step 5: Generate time-series metrics with agentic-synth
console.log('\n5. Generating time-series metrics with agentic-synth...');
const metrics = await synth.generateTimeSeries({
startDate: '2024-01-01',
endDate: '2024-12-31',
interval: '1d',
metrics: ['code_quality', 'test_coverage', 'deployment_frequency'],
trend: 'up',
seasonality: true,
noise: 0.1
});
console.log(`✓ Generated ${metrics.data.length} time-series data points`);
// Step 6: Combine and export data
console.log('\n6. Combining and exporting datasets...');
// Save structured data as JSON
fs.writeFileSync('users.json', JSON.stringify(userProfiles.data, null, 2));
fs.writeFileSync('projects.json', JSON.stringify(projects.data, null, 2));
fs.writeFileSync('metrics.json', JSON.stringify(metrics.data, null, 2));
// Save graph data as Cypher
const knowledgeCypher = graphGen.generateCypher(knowledgeGraph.data, {
useConstraints: true,
useIndexes: true,
useMerge: true
});
fs.writeFileSync('knowledge-graph.cypher', knowledgeCypher);
const temporalCypher = graphGen.generateCypher(temporalEvents.data, {
useConstraints: true,
useIndexes: true
});
fs.writeFileSync('temporal-events.cypher', temporalCypher);
// Create a combined dataset summary
const summary = {
generation_timestamp: new Date().toISOString(),
datasets: {
user_profiles: {
count: userProfiles.data.length,
provider: 'gemini',
file: 'users.json'
},
projects: {
count: projects.data.length,
provider: 'gemini',
file: 'projects.json'
},
knowledge_graph: {
nodes: knowledgeGraph.data.nodes.length,
edges: knowledgeGraph.data.edges.length,
provider: 'openrouter/kimi-k2',
file: 'knowledge-graph.cypher',
has_embeddings: true
},
temporal_events: {
nodes: temporalEvents.data.nodes.length,
edges: temporalEvents.data.edges.length,
provider: 'openrouter/kimi-k2',
file: 'temporal-events.cypher'
},
time_series_metrics: {
count: metrics.data.length,
provider: 'gemini',
file: 'metrics.json'
}
},
total_generation_time: {
knowledge_graph: knowledgeGraph.metadata.duration,
temporal_events: temporalEvents.metadata.duration
}
};
fs.writeFileSync('dataset-summary.json', JSON.stringify(summary, null, 2));
console.log('\n✓ All datasets generated and saved!');
console.log('\nGenerated files:');
console.log('- users.json (structured user profiles)');
console.log('- projects.json (structured project data)');
console.log('- metrics.json (time-series metrics)');
console.log('- knowledge-graph.cypher (Neo4j graph with embeddings)');
console.log('- temporal-events.cypher (Neo4j temporal events)');
console.log('- dataset-summary.json (metadata and summary)');
console.log('\n=== Integration Complete ===');
console.log(`Total nodes in graphs: ${knowledgeGraph.data.nodes.length + temporalEvents.data.nodes.length}`);
console.log(`Total edges in graphs: ${knowledgeGraph.data.edges.length + temporalEvents.data.edges.length}`);
console.log(`Total structured records: ${userProfiles.data.length + projects.data.length}`);
console.log(`Total time-series points: ${metrics.data.length}`);
}
main().catch(console.error);