Files
wifi-densepose/npm/packages/agentic-synth-examples/examples/beginner/simple-data-generation.js
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

240 lines
9.6 KiB
JavaScript

"use strict";
/**
* BEGINNER TUTORIAL: Simple Data Generation
*
* Learn how to generate structured synthetic data with agentic-synth.
* Perfect for creating test data, mock APIs, or prototyping.
*
* What you'll learn:
* - Defining data schemas
* - Generating structured data
* - Saving output to files
* - Working with different formats
*
* Prerequisites:
* - Set GEMINI_API_KEY environment variable
* - npm install @ruvector/agentic-synth
*
* Run: npx tsx examples/beginner/simple-data-generation.ts
*/
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.synth = void 0;
exports.generateUserData = generateUserData;
exports.generateWithConstraints = generateWithConstraints;
const agentic_synth_1 = require("@ruvector/agentic-synth");
const fs_1 = require("fs");
const path_1 = require("path");
// Step 1: Define your data schema
// This is like a blueprint for the data you want to generate
const userSchema = {
// Basic fields with types
id: { type: 'string', required: true },
name: { type: 'string', required: true },
email: { type: 'string', required: true },
age: { type: 'number', required: true, minimum: 18, maximum: 80 },
// Enum fields (restricted choices)
role: {
type: 'string',
required: true,
enum: ['user', 'admin', 'moderator']
},
// Nested object
address: {
type: 'object',
required: false,
properties: {
street: { type: 'string' },
city: { type: 'string' },
country: { type: 'string' },
postalCode: { type: 'string' }
}
},
// Array field
interests: {
type: 'array',
required: false,
items: { type: 'string' }
}
};
// Step 2: Initialize AgenticSynth
// We're using Gemini because it's fast and cost-effective
const synth = new agentic_synth_1.AgenticSynth({
provider: 'gemini',
apiKey: process.env.GEMINI_API_KEY,
model: 'gemini-2.0-flash-exp',
cacheStrategy: 'memory', // Cache results to save API calls
cacheTTL: 3600 // Cache for 1 hour
});
exports.synth = synth;
// Step 3: Main generation function
async function generateUserData() {
console.log('🎯 Simple Data Generation Tutorial\n');
console.log('='.repeat(60));
// Step 3a: Generate a small batch first (5 users)
console.log('\n📊 Generating 5 sample users...\n');
try {
const result = await synth.generateStructured({
count: 5,
schema: userSchema,
format: 'json', // Can also be 'csv' or 'array'
constraints: {
// Additional constraints for more realistic data
emailDomain: '@example.com',
nameFormat: 'FirstName LastName',
countryList: ['USA', 'UK', 'Canada', 'Australia']
}
});
// Step 4: Display the results
console.log('✅ Generation Complete!\n');
console.log(`Generated ${result.metadata.count} users in ${result.metadata.duration}ms`);
console.log(`Provider: ${result.metadata.provider}`);
console.log(`Model: ${result.metadata.model}`);
console.log(`Cached: ${result.metadata.cached ? 'Yes ⚡' : 'No'}\n`);
// Show the generated data
console.log('👥 Generated Users:\n');
result.data.forEach((user, index) => {
console.log(`${index + 1}. ${user.name} (${user.role})`);
console.log(` 📧 ${user.email}`);
console.log(` 🎂 Age: ${user.age}`);
if (user.address) {
console.log(` 🏠 ${user.address.city}, ${user.address.country}`);
}
if (user.interests && user.interests.length > 0) {
console.log(` ❤️ Interests: ${user.interests.join(', ')}`);
}
console.log('');
});
// Step 5: Save to file
const outputDir = (0, path_1.join)(process.cwd(), 'examples', 'output');
const outputFile = (0, path_1.join)(outputDir, 'sample-users.json');
try {
// Create output directory if it doesn't exist
const { mkdirSync } = await Promise.resolve().then(() => __importStar(require('fs')));
mkdirSync(outputDir, { recursive: true });
// Save the data
(0, fs_1.writeFileSync)(outputFile, JSON.stringify(result.data, null, 2));
console.log(`💾 Data saved to: ${outputFile}\n`);
}
catch (error) {
console.warn('⚠️ Could not save file:', error instanceof Error ? error.message : 'Unknown error');
}
// Step 6: Generate a larger batch
console.log('='.repeat(60));
console.log('\n📈 Now generating 20 users (to demonstrate scaling)...\n');
const largeResult = await synth.generateStructured({
count: 20,
schema: userSchema,
format: 'json'
});
console.log('✅ Large batch complete!');
console.log(` Generated: ${largeResult.metadata.count} users`);
console.log(` Time: ${largeResult.metadata.duration}ms`);
console.log(` Cached: ${largeResult.metadata.cached ? 'Yes ⚡' : 'No'}\n`);
// Step 7: Demonstrate CSV format
console.log('='.repeat(60));
console.log('\n📄 Generating data in CSV format...\n');
const csvResult = await synth.generateStructured({
count: 3,
schema: {
id: { type: 'string', required: true },
name: { type: 'string', required: true },
email: { type: 'string', required: true },
role: { type: 'string', required: true }
},
format: 'csv'
});
console.log('CSV Output (first 3 users):');
console.log('─'.repeat(60));
// Note: CSV format will be in the data array as strings
console.log('✅ CSV generation successful\n');
// Step 8: Show statistics
console.log('='.repeat(60));
console.log('\n📊 Session Statistics:');
console.log(` Total users generated: ${result.data.length + largeResult.data.length + csvResult.data.length}`);
console.log(` Total API calls: ${result.metadata.cached ? '1 (cached)' : '2'}`);
console.log(` Total time: ${result.metadata.duration + largeResult.metadata.duration}ms`);
// Step 9: Next steps
console.log('\n💡 What You Can Do Next:');
console.log(' 1. Modify the schema to match your use case');
console.log(' 2. Try different data types (timeseries, events)');
console.log(' 3. Experiment with constraints for more realistic data');
console.log(' 4. Generate thousands of records for load testing');
console.log(' 5. Integrate with your test suite or mock API\n');
}
catch (error) {
console.error('❌ Generation failed:', error instanceof Error ? error.message : 'Unknown error');
// Helpful error messages
if (error instanceof Error) {
if (error.message.includes('API key')) {
console.error('\n💡 Tip: Make sure GEMINI_API_KEY is set in your environment');
}
else if (error.message.includes('schema')) {
console.error('\n💡 Tip: Check your schema definition for errors');
}
}
process.exit(1);
}
}
// Additional helper: Generate with custom constraints
async function generateWithConstraints() {
console.log('\n🎨 Example: Custom Constraints\n');
const result = await synth.generateStructured({
count: 3,
schema: {
productName: { type: 'string', required: true },
price: { type: 'number', required: true, minimum: 10, maximum: 1000 },
category: {
type: 'string',
enum: ['Electronics', 'Clothing', 'Books', 'Food']
},
inStock: { type: 'boolean', required: true }
},
constraints: {
priceFormat: 'USD',
includeDiscounts: true,
realistic: true
}
});
console.log('Generated products:', result.data);
}
// Run the example
if (import.meta.url === `file://${process.argv[1]}`) {
generateUserData().catch(error => {
console.error('Fatal error:', error);
process.exit(1);
});
}
//# sourceMappingURL=simple-data-generation.js.map