git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
240 lines
9.6 KiB
JavaScript
240 lines
9.6 KiB
JavaScript
"use strict";
|
|
/**
|
|
* BEGINNER TUTORIAL: Simple Data Generation
|
|
*
|
|
* Learn how to generate structured synthetic data with agentic-synth.
|
|
* Perfect for creating test data, mock APIs, or prototyping.
|
|
*
|
|
* What you'll learn:
|
|
* - Defining data schemas
|
|
* - Generating structured data
|
|
* - Saving output to files
|
|
* - Working with different formats
|
|
*
|
|
* Prerequisites:
|
|
* - Set GEMINI_API_KEY environment variable
|
|
* - npm install @ruvector/agentic-synth
|
|
*
|
|
* Run: npx tsx examples/beginner/simple-data-generation.ts
|
|
*/
|
|
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
if (k2 === undefined) k2 = k;
|
|
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
}
|
|
Object.defineProperty(o, k2, desc);
|
|
}) : (function(o, m, k, k2) {
|
|
if (k2 === undefined) k2 = k;
|
|
o[k2] = m[k];
|
|
}));
|
|
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
}) : function(o, v) {
|
|
o["default"] = v;
|
|
});
|
|
var __importStar = (this && this.__importStar) || (function () {
|
|
var ownKeys = function(o) {
|
|
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
var ar = [];
|
|
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
return ar;
|
|
};
|
|
return ownKeys(o);
|
|
};
|
|
return function (mod) {
|
|
if (mod && mod.__esModule) return mod;
|
|
var result = {};
|
|
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
__setModuleDefault(result, mod);
|
|
return result;
|
|
};
|
|
})();
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.synth = void 0;
|
|
exports.generateUserData = generateUserData;
|
|
exports.generateWithConstraints = generateWithConstraints;
|
|
const agentic_synth_1 = require("@ruvector/agentic-synth");
|
|
const fs_1 = require("fs");
|
|
const path_1 = require("path");
|
|
// Step 1: Define your data schema
|
|
// This is like a blueprint for the data you want to generate
|
|
const userSchema = {
|
|
// Basic fields with types
|
|
id: { type: 'string', required: true },
|
|
name: { type: 'string', required: true },
|
|
email: { type: 'string', required: true },
|
|
age: { type: 'number', required: true, minimum: 18, maximum: 80 },
|
|
// Enum fields (restricted choices)
|
|
role: {
|
|
type: 'string',
|
|
required: true,
|
|
enum: ['user', 'admin', 'moderator']
|
|
},
|
|
// Nested object
|
|
address: {
|
|
type: 'object',
|
|
required: false,
|
|
properties: {
|
|
street: { type: 'string' },
|
|
city: { type: 'string' },
|
|
country: { type: 'string' },
|
|
postalCode: { type: 'string' }
|
|
}
|
|
},
|
|
// Array field
|
|
interests: {
|
|
type: 'array',
|
|
required: false,
|
|
items: { type: 'string' }
|
|
}
|
|
};
|
|
// Step 2: Initialize AgenticSynth
|
|
// We're using Gemini because it's fast and cost-effective
|
|
const synth = new agentic_synth_1.AgenticSynth({
|
|
provider: 'gemini',
|
|
apiKey: process.env.GEMINI_API_KEY,
|
|
model: 'gemini-2.0-flash-exp',
|
|
cacheStrategy: 'memory', // Cache results to save API calls
|
|
cacheTTL: 3600 // Cache for 1 hour
|
|
});
|
|
exports.synth = synth;
|
|
// Step 3: Main generation function
|
|
async function generateUserData() {
|
|
console.log('🎯 Simple Data Generation Tutorial\n');
|
|
console.log('='.repeat(60));
|
|
// Step 3a: Generate a small batch first (5 users)
|
|
console.log('\n📊 Generating 5 sample users...\n');
|
|
try {
|
|
const result = await synth.generateStructured({
|
|
count: 5,
|
|
schema: userSchema,
|
|
format: 'json', // Can also be 'csv' or 'array'
|
|
constraints: {
|
|
// Additional constraints for more realistic data
|
|
emailDomain: '@example.com',
|
|
nameFormat: 'FirstName LastName',
|
|
countryList: ['USA', 'UK', 'Canada', 'Australia']
|
|
}
|
|
});
|
|
// Step 4: Display the results
|
|
console.log('✅ Generation Complete!\n');
|
|
console.log(`Generated ${result.metadata.count} users in ${result.metadata.duration}ms`);
|
|
console.log(`Provider: ${result.metadata.provider}`);
|
|
console.log(`Model: ${result.metadata.model}`);
|
|
console.log(`Cached: ${result.metadata.cached ? 'Yes ⚡' : 'No'}\n`);
|
|
// Show the generated data
|
|
console.log('👥 Generated Users:\n');
|
|
result.data.forEach((user, index) => {
|
|
console.log(`${index + 1}. ${user.name} (${user.role})`);
|
|
console.log(` 📧 ${user.email}`);
|
|
console.log(` 🎂 Age: ${user.age}`);
|
|
if (user.address) {
|
|
console.log(` 🏠 ${user.address.city}, ${user.address.country}`);
|
|
}
|
|
if (user.interests && user.interests.length > 0) {
|
|
console.log(` ❤️ Interests: ${user.interests.join(', ')}`);
|
|
}
|
|
console.log('');
|
|
});
|
|
// Step 5: Save to file
|
|
const outputDir = (0, path_1.join)(process.cwd(), 'examples', 'output');
|
|
const outputFile = (0, path_1.join)(outputDir, 'sample-users.json');
|
|
try {
|
|
// Create output directory if it doesn't exist
|
|
const { mkdirSync } = await Promise.resolve().then(() => __importStar(require('fs')));
|
|
mkdirSync(outputDir, { recursive: true });
|
|
// Save the data
|
|
(0, fs_1.writeFileSync)(outputFile, JSON.stringify(result.data, null, 2));
|
|
console.log(`💾 Data saved to: ${outputFile}\n`);
|
|
}
|
|
catch (error) {
|
|
console.warn('⚠️ Could not save file:', error instanceof Error ? error.message : 'Unknown error');
|
|
}
|
|
// Step 6: Generate a larger batch
|
|
console.log('='.repeat(60));
|
|
console.log('\n📈 Now generating 20 users (to demonstrate scaling)...\n');
|
|
const largeResult = await synth.generateStructured({
|
|
count: 20,
|
|
schema: userSchema,
|
|
format: 'json'
|
|
});
|
|
console.log('✅ Large batch complete!');
|
|
console.log(` Generated: ${largeResult.metadata.count} users`);
|
|
console.log(` Time: ${largeResult.metadata.duration}ms`);
|
|
console.log(` Cached: ${largeResult.metadata.cached ? 'Yes ⚡' : 'No'}\n`);
|
|
// Step 7: Demonstrate CSV format
|
|
console.log('='.repeat(60));
|
|
console.log('\n📄 Generating data in CSV format...\n');
|
|
const csvResult = await synth.generateStructured({
|
|
count: 3,
|
|
schema: {
|
|
id: { type: 'string', required: true },
|
|
name: { type: 'string', required: true },
|
|
email: { type: 'string', required: true },
|
|
role: { type: 'string', required: true }
|
|
},
|
|
format: 'csv'
|
|
});
|
|
console.log('CSV Output (first 3 users):');
|
|
console.log('─'.repeat(60));
|
|
// Note: CSV format will be in the data array as strings
|
|
console.log('✅ CSV generation successful\n');
|
|
// Step 8: Show statistics
|
|
console.log('='.repeat(60));
|
|
console.log('\n📊 Session Statistics:');
|
|
console.log(` Total users generated: ${result.data.length + largeResult.data.length + csvResult.data.length}`);
|
|
console.log(` Total API calls: ${result.metadata.cached ? '1 (cached)' : '2'}`);
|
|
console.log(` Total time: ${result.metadata.duration + largeResult.metadata.duration}ms`);
|
|
// Step 9: Next steps
|
|
console.log('\n💡 What You Can Do Next:');
|
|
console.log(' 1. Modify the schema to match your use case');
|
|
console.log(' 2. Try different data types (timeseries, events)');
|
|
console.log(' 3. Experiment with constraints for more realistic data');
|
|
console.log(' 4. Generate thousands of records for load testing');
|
|
console.log(' 5. Integrate with your test suite or mock API\n');
|
|
}
|
|
catch (error) {
|
|
console.error('❌ Generation failed:', error instanceof Error ? error.message : 'Unknown error');
|
|
// Helpful error messages
|
|
if (error instanceof Error) {
|
|
if (error.message.includes('API key')) {
|
|
console.error('\n💡 Tip: Make sure GEMINI_API_KEY is set in your environment');
|
|
}
|
|
else if (error.message.includes('schema')) {
|
|
console.error('\n💡 Tip: Check your schema definition for errors');
|
|
}
|
|
}
|
|
process.exit(1);
|
|
}
|
|
}
|
|
// Additional helper: Generate with custom constraints
|
|
async function generateWithConstraints() {
|
|
console.log('\n🎨 Example: Custom Constraints\n');
|
|
const result = await synth.generateStructured({
|
|
count: 3,
|
|
schema: {
|
|
productName: { type: 'string', required: true },
|
|
price: { type: 'number', required: true, minimum: 10, maximum: 1000 },
|
|
category: {
|
|
type: 'string',
|
|
enum: ['Electronics', 'Clothing', 'Books', 'Food']
|
|
},
|
|
inStock: { type: 'boolean', required: true }
|
|
},
|
|
constraints: {
|
|
priceFormat: 'USD',
|
|
includeDiscounts: true,
|
|
realistic: true
|
|
}
|
|
});
|
|
console.log('Generated products:', result.data);
|
|
}
|
|
// Run the example
|
|
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
generateUserData().catch(error => {
|
|
console.error('Fatal error:', error);
|
|
process.exit(1);
|
|
});
|
|
}
|
|
//# sourceMappingURL=simple-data-generation.js.map
|