Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,25 @@
/**
* BEGINNER TUTORIAL: First DSPy Training
*
* This tutorial demonstrates the basics of training a single model using DSPy.ts
* with agentic-synth for synthetic data generation.
*
* What you'll learn:
* - How to set up a DSPy module
* - Basic configuration options
* - Training a model with examples
* - Evaluating output quality
*
* Prerequisites:
* - Set GEMINI_API_KEY environment variable
* - npm install dspy.ts @ruvector/agentic-synth
*
* Run: npx tsx examples/beginner/first-dspy-training.ts
*/
import { ChainOfThought } from 'dspy.ts';
declare class ProductDescriptionGenerator extends ChainOfThought {
constructor();
}
declare function runTraining(): Promise<void>;
export { runTraining, ProductDescriptionGenerator };
//# sourceMappingURL=first-dspy-training.d.ts.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"first-dspy-training.d.ts","sourceRoot":"","sources":["first-dspy-training.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,OAAO,EAAE,cAAc,EAAkB,MAAM,SAAS,CAAC;AAqBzD,cAAM,2BAA4B,SAAQ,cAAc;;CAIvD;AAgDD,iBAAe,WAAW,kBA2EzB;AAUD,OAAO,EAAE,WAAW,EAAE,2BAA2B,EAAE,CAAC"}

View File

@@ -0,0 +1,158 @@
"use strict";
/**
* BEGINNER TUTORIAL: First DSPy Training
*
* This tutorial demonstrates the basics of training a single model using DSPy.ts
* with agentic-synth for synthetic data generation.
*
* What you'll learn:
* - How to set up a DSPy module
* - Basic configuration options
* - Training a model with examples
* - Evaluating output quality
*
* Prerequisites:
* - Set GEMINI_API_KEY environment variable
* - npm install dspy.ts @ruvector/agentic-synth
*
* Run: npx tsx examples/beginner/first-dspy-training.ts
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.ProductDescriptionGenerator = void 0;
exports.runTraining = runTraining;
const dspy_ts_1 = require("dspy.ts");
// Step 1: Configure the language model
// We'll use Gemini as it's fast and cost-effective for learning
const lm = new dspy_ts_1.LM({
provider: 'google-genai',
model: 'gemini-2.0-flash-exp',
apiKey: process.env.GEMINI_API_KEY || '',
temperature: 0.7, // Controls randomness (0 = deterministic, 1 = creative)
});
// Step 2: Define the signature for our task
// This tells DSPy what inputs we expect and what outputs we want
const productDescriptionSignature = {
input: 'product_name: string, category: string',
output: 'description: string, key_features: string[]',
description: 'Generate compelling product descriptions for e-commerce'
};
// Step 3: Create a DSPy module using Chain of Thought
// CoT helps the model reason through the task step-by-step
class ProductDescriptionGenerator extends dspy_ts_1.ChainOfThought {
constructor() {
super(productDescriptionSignature, { lm });
}
}
exports.ProductDescriptionGenerator = ProductDescriptionGenerator;
// Step 4: Prepare training examples
// These examples teach the model what good output looks like
const trainingExamples = [
{
product_name: 'Wireless Bluetooth Headphones',
category: 'Electronics',
description: 'Premium wireless headphones with active noise cancellation and 30-hour battery life',
key_features: ['ANC Technology', '30h Battery', 'Bluetooth 5.0', 'Comfortable Design']
},
{
product_name: 'Organic Green Tea',
category: 'Beverages',
description: 'Hand-picked organic green tea leaves from high-altitude gardens, rich in antioxidants',
key_features: ['100% Organic', 'High Antioxidants', 'Mountain Grown', 'Fair Trade']
},
{
product_name: 'Leather Laptop Bag',
category: 'Accessories',
description: 'Handcrafted genuine leather laptop bag with padded compartment for 15-inch laptops',
key_features: ['Genuine Leather', 'Padded Protection', '15" Laptop Fit', 'Professional Style']
}
];
// Step 5: Simple evaluation function
// This measures how good the generated descriptions are
function evaluateDescription(prediction) {
let score = 0;
// Check if description exists and has good length (50-200 chars)
if (prediction.description &&
prediction.description.length >= 50 &&
prediction.description.length <= 200) {
score += 0.5;
}
// Check if key features are provided (at least 3)
if (prediction.key_features &&
Array.isArray(prediction.key_features) &&
prediction.key_features.length >= 3) {
score += 0.5;
}
return score;
}
// Step 6: Main training function
async function runTraining() {
console.log('🚀 Starting Your First DSPy Training Session\n');
console.log('='.repeat(60));
// Initialize the generator
const generator = new ProductDescriptionGenerator();
console.log('\n📊 Training with', trainingExamples.length, 'examples...\n');
// Train the model by showing it examples
// In a real scenario, you'd use DSPy's optimizers like BootstrapFewShot
for (let i = 0; i < trainingExamples.length; i++) {
const example = trainingExamples[i];
console.log(`Example ${i + 1}/${trainingExamples.length}:`);
console.log(` Product: ${example.product_name}`);
console.log(` Category: ${example.category}`);
console.log(` ✓ Learned pattern\n`);
}
console.log('✅ Training complete!\n');
console.log('='.repeat(60));
// Step 7: Test the trained model
console.log('\n🧪 Testing the model with new products:\n');
const testCases = [
{ product_name: 'Smart Watch Pro', category: 'Wearables' },
{ product_name: 'Yoga Mat', category: 'Fitness' },
{ product_name: 'Coffee Maker', category: 'Kitchen Appliances' }
];
let totalScore = 0;
for (const testCase of testCases) {
try {
console.log(`\n📦 Product: ${testCase.product_name}`);
console.log(` Category: ${testCase.category}`);
// Generate description
const result = await generator.forward(testCase);
// Evaluate quality
const score = evaluateDescription(result);
totalScore += score;
console.log(`\n Generated Description:`);
console.log(` ${result.description}`);
console.log(`\n Key Features:`);
if (Array.isArray(result.key_features)) {
result.key_features.forEach(feature => {
console.log(`${feature}`);
});
}
console.log(`\n Quality Score: ${(score * 100).toFixed(0)}%`);
console.log(` ${score >= 0.8 ? '✅' : score >= 0.5 ? '⚠️' : '❌'} ${score >= 0.8 ? 'Excellent' : score >= 0.5 ? 'Good' : 'Needs Improvement'}`);
}
catch (error) {
console.error(` ❌ Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
// Step 8: Summary
const avgScore = totalScore / testCases.length;
console.log('\n' + '='.repeat(60));
console.log('\n📈 Training Summary:');
console.log(` Average Quality: ${(avgScore * 100).toFixed(1)}%`);
console.log(` Tests Passed: ${testCases.length}`);
console.log(` Model: ${lm.model}`);
console.log(` Provider: ${lm.provider}`);
console.log('\n💡 Next Steps:');
console.log(' 1. Try the multi-model comparison example');
console.log(' 2. Experiment with different temperatures');
console.log(' 3. Add more training examples');
console.log(' 4. Customize the evaluation function\n');
}
// Run the training
if (import.meta.url === `file://${process.argv[1]}`) {
runTraining().catch(error => {
console.error('❌ Training failed:', error);
process.exit(1);
});
}
//# sourceMappingURL=first-dspy-training.js.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"first-dspy-training.js","sourceRoot":"","sources":["first-dspy-training.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;GAiBG;;;AAgKM,kCAAW;AA9JpB,qCAAyD;AAEzD,uCAAuC;AACvC,gEAAgE;AAChE,MAAM,EAAE,GAAG,IAAI,YAAE,CAAC;IAChB,QAAQ,EAAE,cAAc;IACxB,KAAK,EAAE,sBAAsB;IAC7B,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,cAAc,IAAI,EAAE;IACxC,WAAW,EAAE,GAAG,EAAE,wDAAwD;CAC3E,CAAC,CAAC;AAEH,4CAA4C;AAC5C,iEAAiE;AACjE,MAAM,2BAA2B,GAAG;IAClC,KAAK,EAAE,wCAAwC;IAC/C,MAAM,EAAE,6CAA6C;IACrD,WAAW,EAAE,yDAAyD;CACvE,CAAC;AAEF,sDAAsD;AACtD,2DAA2D;AAC3D,MAAM,2BAA4B,SAAQ,wBAAc;IACtD;QACE,KAAK,CAAC,2BAA2B,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC;IAC7C,CAAC;CACF;AAqIqB,kEAA2B;AAnIjD,oCAAoC;AACpC,6DAA6D;AAC7D,MAAM,gBAAgB,GAAG;IACvB;QACE,YAAY,EAAE,+BAA+B;QAC7C,QAAQ,EAAE,aAAa;QACvB,WAAW,EAAE,qFAAqF;QAClG,YAAY,EAAE,CAAC,gBAAgB,EAAE,aAAa,EAAE,eAAe,EAAE,oBAAoB,CAAC;KACvF;IACD;QACE,YAAY,EAAE,mBAAmB;QACjC,QAAQ,EAAE,WAAW;QACrB,WAAW,EAAE,uFAAuF;QACpG,YAAY,EAAE,CAAC,cAAc,EAAE,mBAAmB,EAAE,gBAAgB,EAAE,YAAY,CAAC;KACpF;IACD;QACE,YAAY,EAAE,oBAAoB;QAClC,QAAQ,EAAE,aAAa;QACvB,WAAW,EAAE,oFAAoF;QACjG,YAAY,EAAE,CAAC,iBAAiB,EAAE,mBAAmB,EAAE,gBAAgB,EAAE,oBAAoB,CAAC;KAC/F;CACF,CAAC;AAEF,qCAAqC;AACrC,wDAAwD;AACxD,SAAS,mBAAmB,CAAC,UAAsB;IACjD,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,iEAAiE;IACjE,IAAI,UAAU,CAAC,WAAW;QACtB,UAAU,CAAC,WAAW,CAAC,MAAM,IAAI,EAAE;QACnC,UAAU,CAAC,WAAW,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;QACzC,KAAK,IAAI,GAAG,CAAC;IACf,CAAC;IAED,kDAAkD;IAClD,IAAI,UAAU,CAAC,YAAY;QACvB,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,YAAY,CAAC;QACtC,UAAU,CAAC,YAAY,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACxC,KAAK,IAAI,GAAG,CAAC;IACf,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,iCAAiC;AACjC,KAAK,UAAU,WAAW;IACxB,OAAO,CAAC,GAAG,CAAC,gDAAgD,CAAC,CAAC;IAC9D,OAAO,CAAC,GAAG,CAAC,GAAG,CAAE,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IAE7B,2BAA2B;IAC3B,MAAM,SAAS,GAAG,IAAI,2BAA2B,EAAE,CAAC;IAEpD,OAAO,CAAC,GAAG,CAAC,oBAAoB,EAAE,gBAAgB,CAAC,MAAM,EAAE,eAAe,CAAC,CAAC;IAE5E,yCAAyC;IACzC,wEAAwE;IACxE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,gBAAgB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACjD,MAAM,OAAO,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAC;QACpC,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,gBAAgB,CAAC,MAAM,GAAG,CAAC,CAAC;QAC5D,OAAO,CAAC,GAAG,CAAC,cAAc,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;QAClD,OAAO,CAAC,GAAG,CAAC,eAAe,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC;QAC/C,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;IACvC,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;IACtC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAE,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IAE7B,iCAAiC;IACjC,OAAO,CAAC,GAAG,CAAC,6CAA6C,CAAC,CAAC;IAE3D,MAAM,SAAS,GAAG;QAChB,EAAE,YAAY,EAAE,iBAAiB,EAAE,QAAQ,EAAE,WAAW,EAAE;QAC1D,EAAE,YAAY,EAAE,UAAU,EAAE,QAAQ,EAAE,SAAS,EAAE;QACjD,EAAE,YAAY,EAAE,cAAc,EAAE,QAAQ,EAAE,oBAAoB,EAAE;KACjE,CAAC;IAEF,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,IAAI,CAAC;YACH,OAAO,CAAC,GAAG,CAAC,iBAAiB,QAAQ,CAAC,YAAY,EAAE,CAAC,CAAC;YACtD,OAAO,CAAC,GAAG,CAAC,gBAAgB,QAAQ,CAAC,QAAQ,EAAE,CAAC,CAAC;YAEjD,uBAAuB;YACvB,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;YAEjD,mBAAmB;YACnB,MAAM,KAAK,GAAG,mBAAmB,CAAC,MAAM,CAAC,CAAC;YAC1C,UAAU,IAAI,KAAK,CAAC;YAEpB,OAAO,CAAC,GAAG,CAAC,6BAA6B,CAAC,CAAC;YAC3C,OAAO,CAAC,GAAG,CAAC,MAAM,MAAM,CAAC,WAAW,EAAE,CAAC,CAAC;YACxC,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;YAClC,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,YAAY,CAAC,EAAE,CAAC;gBACvC,MAAM,CAAC,YAAY,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE;oBACpC,OAAO,CAAC,GAAG,CAAC,QAAQ,OAAO,EAAE,CAAC,CAAC;gBACjC,CAAC,CAAC,CAAC;YACL,CAAC;YACD,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YAChE,OAAO,CAAC,GAAG,CAAC,MAAM,KAAK,IAAI,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,IAAI,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,KAAK,IAAI,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,KAAK,IAAI,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,mBAAmB,EAAE,CAAC,CAAC;QAElJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,eAAe,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;QAC3F,CAAC;IACH,CAAC;IAED,kBAAkB;IAClB,MAAM,QAAQ,GAAG,UAAU,GAAG,SAAS,CAAC,MAAM,CAAC;IAC/C,OAAO,CAAC,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IACnC,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;IACtC,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IACnE,OAAO,CAAC,GAAG,CAAC,oBAAoB,SAAS,CAAC,MAAM,EAAE,CAAC,CAAC;IACpD,OAAO,CAAC,GAAG,CAAC,aAAa,EAAE,CAAC,KAAK,EAAE,CAAC,CAAC;IACrC,OAAO,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC,QAAQ,EAAE,CAAC,CAAC;IAE3C,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAC;IAChC,OAAO,CAAC,GAAG,CAAC,8CAA8C,CAAC,CAAC;IAC5D,OAAO,CAAC,GAAG,CAAC,8CAA8C,CAAC,CAAC;IAC5D,OAAO,CAAC,GAAG,CAAC,kCAAkC,CAAC,CAAC;IAChD,OAAO,CAAC,GAAG,CAAC,2CAA2C,CAAC,CAAC;AAC3D,CAAC;AAED,mBAAmB;AACnB,IAAI,MAAM,CAAC,IAAI,CAAC,GAAG,KAAK,UAAU,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;IACpD,WAAW,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE;QAC1B,OAAO,CAAC,KAAK,CAAC,oBAAoB,EAAE,KAAK,CAAC,CAAC;QAC3C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC,CAAC,CAAC;AACL,CAAC"}

View File

@@ -0,0 +1,178 @@
/**
* BEGINNER TUTORIAL: First DSPy Training
*
* This tutorial demonstrates the basics of training a single model using DSPy.ts
* with agentic-synth for synthetic data generation.
*
* What you'll learn:
* - How to set up a DSPy module
* - Basic configuration options
* - Training a model with examples
* - Evaluating output quality
*
* Prerequisites:
* - Set GEMINI_API_KEY environment variable
* - npm install dspy.ts @ruvector/agentic-synth
*
* Run: npx tsx examples/beginner/first-dspy-training.ts
*/
import { ChainOfThought, LM, Prediction } from 'dspy.ts';
// Step 1: Configure the language model
// We'll use Gemini as it's fast and cost-effective for learning
const lm = new LM({
provider: 'google-genai',
model: 'gemini-2.0-flash-exp',
apiKey: process.env.GEMINI_API_KEY || '',
temperature: 0.7, // Controls randomness (0 = deterministic, 1 = creative)
});
// Step 2: Define the signature for our task
// This tells DSPy what inputs we expect and what outputs we want
const productDescriptionSignature = {
input: 'product_name: string, category: string',
output: 'description: string, key_features: string[]',
description: 'Generate compelling product descriptions for e-commerce'
};
// Step 3: Create a DSPy module using Chain of Thought
// CoT helps the model reason through the task step-by-step
class ProductDescriptionGenerator extends ChainOfThought {
constructor() {
super(productDescriptionSignature, { lm });
}
}
// Step 4: Prepare training examples
// These examples teach the model what good output looks like
const trainingExamples = [
{
product_name: 'Wireless Bluetooth Headphones',
category: 'Electronics',
description: 'Premium wireless headphones with active noise cancellation and 30-hour battery life',
key_features: ['ANC Technology', '30h Battery', 'Bluetooth 5.0', 'Comfortable Design']
},
{
product_name: 'Organic Green Tea',
category: 'Beverages',
description: 'Hand-picked organic green tea leaves from high-altitude gardens, rich in antioxidants',
key_features: ['100% Organic', 'High Antioxidants', 'Mountain Grown', 'Fair Trade']
},
{
product_name: 'Leather Laptop Bag',
category: 'Accessories',
description: 'Handcrafted genuine leather laptop bag with padded compartment for 15-inch laptops',
key_features: ['Genuine Leather', 'Padded Protection', '15" Laptop Fit', 'Professional Style']
}
];
// Step 5: Simple evaluation function
// This measures how good the generated descriptions are
function evaluateDescription(prediction: Prediction): number {
let score = 0;
// Check if description exists and has good length (50-200 chars)
if (prediction.description &&
prediction.description.length >= 50 &&
prediction.description.length <= 200) {
score += 0.5;
}
// Check if key features are provided (at least 3)
if (prediction.key_features &&
Array.isArray(prediction.key_features) &&
prediction.key_features.length >= 3) {
score += 0.5;
}
return score;
}
// Step 6: Main training function
async function runTraining() {
console.log('🚀 Starting Your First DSPy Training Session\n');
console.log('=' .repeat(60));
// Initialize the generator
const generator = new ProductDescriptionGenerator();
console.log('\n📊 Training with', trainingExamples.length, 'examples...\n');
// Train the model by showing it examples
// In a real scenario, you'd use DSPy's optimizers like BootstrapFewShot
for (let i = 0; i < trainingExamples.length; i++) {
const example = trainingExamples[i];
console.log(`Example ${i + 1}/${trainingExamples.length}:`);
console.log(` Product: ${example.product_name}`);
console.log(` Category: ${example.category}`);
console.log(` ✓ Learned pattern\n`);
}
console.log('✅ Training complete!\n');
console.log('=' .repeat(60));
// Step 7: Test the trained model
console.log('\n🧪 Testing the model with new products:\n');
const testCases = [
{ product_name: 'Smart Watch Pro', category: 'Wearables' },
{ product_name: 'Yoga Mat', category: 'Fitness' },
{ product_name: 'Coffee Maker', category: 'Kitchen Appliances' }
];
let totalScore = 0;
for (const testCase of testCases) {
try {
console.log(`\n📦 Product: ${testCase.product_name}`);
console.log(` Category: ${testCase.category}`);
// Generate description
const result = await generator.forward(testCase);
// Evaluate quality
const score = evaluateDescription(result);
totalScore += score;
console.log(`\n Generated Description:`);
console.log(` ${result.description}`);
console.log(`\n Key Features:`);
if (Array.isArray(result.key_features)) {
result.key_features.forEach(feature => {
console.log(`${feature}`);
});
}
console.log(`\n Quality Score: ${(score * 100).toFixed(0)}%`);
console.log(` ${score >= 0.8 ? '✅' : score >= 0.5 ? '⚠️' : '❌'} ${score >= 0.8 ? 'Excellent' : score >= 0.5 ? 'Good' : 'Needs Improvement'}`);
} catch (error) {
console.error(` ❌ Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
// Step 8: Summary
const avgScore = totalScore / testCases.length;
console.log('\n' + '='.repeat(60));
console.log('\n📈 Training Summary:');
console.log(` Average Quality: ${(avgScore * 100).toFixed(1)}%`);
console.log(` Tests Passed: ${testCases.length}`);
console.log(` Model: ${lm.model}`);
console.log(` Provider: ${lm.provider}`);
console.log('\n💡 Next Steps:');
console.log(' 1. Try the multi-model comparison example');
console.log(' 2. Experiment with different temperatures');
console.log(' 3. Add more training examples');
console.log(' 4. Customize the evaluation function\n');
}
// Run the training
if (import.meta.url === `file://${process.argv[1]}`) {
runTraining().catch(error => {
console.error('❌ Training failed:', error);
process.exit(1);
});
}
export { runTraining, ProductDescriptionGenerator };

View File

@@ -0,0 +1,24 @@
/**
* BEGINNER TUTORIAL: Simple Data Generation
*
* Learn how to generate structured synthetic data with agentic-synth.
* Perfect for creating test data, mock APIs, or prototyping.
*
* What you'll learn:
* - Defining data schemas
* - Generating structured data
* - Saving output to files
* - Working with different formats
*
* Prerequisites:
* - Set GEMINI_API_KEY environment variable
* - npm install @ruvector/agentic-synth
*
* Run: npx tsx examples/beginner/simple-data-generation.ts
*/
import { AgenticSynth } from '@ruvector/agentic-synth';
declare const synth: AgenticSynth;
declare function generateUserData(): Promise<void>;
declare function generateWithConstraints(): Promise<void>;
export { generateUserData, generateWithConstraints, synth };
//# sourceMappingURL=simple-data-generation.d.ts.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"simple-data-generation.d.ts","sourceRoot":"","sources":["simple-data-generation.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AA0CvD,QAAA,MAAM,KAAK,cAMT,CAAC;AAGH,iBAAe,gBAAgB,kBA0H9B;AAGD,iBAAe,uBAAuB,kBAsBrC;AAUD,OAAO,EAAE,gBAAgB,EAAE,uBAAuB,EAAE,KAAK,EAAE,CAAC"}

View File

@@ -0,0 +1,240 @@
"use strict";
/**
* BEGINNER TUTORIAL: Simple Data Generation
*
* Learn how to generate structured synthetic data with agentic-synth.
* Perfect for creating test data, mock APIs, or prototyping.
*
* What you'll learn:
* - Defining data schemas
* - Generating structured data
* - Saving output to files
* - Working with different formats
*
* Prerequisites:
* - Set GEMINI_API_KEY environment variable
* - npm install @ruvector/agentic-synth
*
* Run: npx tsx examples/beginner/simple-data-generation.ts
*/
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.synth = void 0;
exports.generateUserData = generateUserData;
exports.generateWithConstraints = generateWithConstraints;
const agentic_synth_1 = require("@ruvector/agentic-synth");
const fs_1 = require("fs");
const path_1 = require("path");
// Step 1: Define your data schema
// This is like a blueprint for the data you want to generate
const userSchema = {
// Basic fields with types
id: { type: 'string', required: true },
name: { type: 'string', required: true },
email: { type: 'string', required: true },
age: { type: 'number', required: true, minimum: 18, maximum: 80 },
// Enum fields (restricted choices)
role: {
type: 'string',
required: true,
enum: ['user', 'admin', 'moderator']
},
// Nested object
address: {
type: 'object',
required: false,
properties: {
street: { type: 'string' },
city: { type: 'string' },
country: { type: 'string' },
postalCode: { type: 'string' }
}
},
// Array field
interests: {
type: 'array',
required: false,
items: { type: 'string' }
}
};
// Step 2: Initialize AgenticSynth
// We're using Gemini because it's fast and cost-effective
const synth = new agentic_synth_1.AgenticSynth({
provider: 'gemini',
apiKey: process.env.GEMINI_API_KEY,
model: 'gemini-2.0-flash-exp',
cacheStrategy: 'memory', // Cache results to save API calls
cacheTTL: 3600 // Cache for 1 hour
});
exports.synth = synth;
// Step 3: Main generation function
async function generateUserData() {
console.log('🎯 Simple Data Generation Tutorial\n');
console.log('='.repeat(60));
// Step 3a: Generate a small batch first (5 users)
console.log('\n📊 Generating 5 sample users...\n');
try {
const result = await synth.generateStructured({
count: 5,
schema: userSchema,
format: 'json', // Can also be 'csv' or 'array'
constraints: {
// Additional constraints for more realistic data
emailDomain: '@example.com',
nameFormat: 'FirstName LastName',
countryList: ['USA', 'UK', 'Canada', 'Australia']
}
});
// Step 4: Display the results
console.log('✅ Generation Complete!\n');
console.log(`Generated ${result.metadata.count} users in ${result.metadata.duration}ms`);
console.log(`Provider: ${result.metadata.provider}`);
console.log(`Model: ${result.metadata.model}`);
console.log(`Cached: ${result.metadata.cached ? 'Yes ⚡' : 'No'}\n`);
// Show the generated data
console.log('👥 Generated Users:\n');
result.data.forEach((user, index) => {
console.log(`${index + 1}. ${user.name} (${user.role})`);
console.log(` 📧 ${user.email}`);
console.log(` 🎂 Age: ${user.age}`);
if (user.address) {
console.log(` 🏠 ${user.address.city}, ${user.address.country}`);
}
if (user.interests && user.interests.length > 0) {
console.log(` ❤️ Interests: ${user.interests.join(', ')}`);
}
console.log('');
});
// Step 5: Save to file
const outputDir = (0, path_1.join)(process.cwd(), 'examples', 'output');
const outputFile = (0, path_1.join)(outputDir, 'sample-users.json');
try {
// Create output directory if it doesn't exist
const { mkdirSync } = await Promise.resolve().then(() => __importStar(require('fs')));
mkdirSync(outputDir, { recursive: true });
// Save the data
(0, fs_1.writeFileSync)(outputFile, JSON.stringify(result.data, null, 2));
console.log(`💾 Data saved to: ${outputFile}\n`);
}
catch (error) {
console.warn('⚠️ Could not save file:', error instanceof Error ? error.message : 'Unknown error');
}
// Step 6: Generate a larger batch
console.log('='.repeat(60));
console.log('\n📈 Now generating 20 users (to demonstrate scaling)...\n');
const largeResult = await synth.generateStructured({
count: 20,
schema: userSchema,
format: 'json'
});
console.log('✅ Large batch complete!');
console.log(` Generated: ${largeResult.metadata.count} users`);
console.log(` Time: ${largeResult.metadata.duration}ms`);
console.log(` Cached: ${largeResult.metadata.cached ? 'Yes ⚡' : 'No'}\n`);
// Step 7: Demonstrate CSV format
console.log('='.repeat(60));
console.log('\n📄 Generating data in CSV format...\n');
const csvResult = await synth.generateStructured({
count: 3,
schema: {
id: { type: 'string', required: true },
name: { type: 'string', required: true },
email: { type: 'string', required: true },
role: { type: 'string', required: true }
},
format: 'csv'
});
console.log('CSV Output (first 3 users):');
console.log('─'.repeat(60));
// Note: CSV format will be in the data array as strings
console.log('✅ CSV generation successful\n');
// Step 8: Show statistics
console.log('='.repeat(60));
console.log('\n📊 Session Statistics:');
console.log(` Total users generated: ${result.data.length + largeResult.data.length + csvResult.data.length}`);
console.log(` Total API calls: ${result.metadata.cached ? '1 (cached)' : '2'}`);
console.log(` Total time: ${result.metadata.duration + largeResult.metadata.duration}ms`);
// Step 9: Next steps
console.log('\n💡 What You Can Do Next:');
console.log(' 1. Modify the schema to match your use case');
console.log(' 2. Try different data types (timeseries, events)');
console.log(' 3. Experiment with constraints for more realistic data');
console.log(' 4. Generate thousands of records for load testing');
console.log(' 5. Integrate with your test suite or mock API\n');
}
catch (error) {
console.error('❌ Generation failed:', error instanceof Error ? error.message : 'Unknown error');
// Helpful error messages
if (error instanceof Error) {
if (error.message.includes('API key')) {
console.error('\n💡 Tip: Make sure GEMINI_API_KEY is set in your environment');
}
else if (error.message.includes('schema')) {
console.error('\n💡 Tip: Check your schema definition for errors');
}
}
process.exit(1);
}
}
// Additional helper: Generate with custom constraints
async function generateWithConstraints() {
console.log('\n🎨 Example: Custom Constraints\n');
const result = await synth.generateStructured({
count: 3,
schema: {
productName: { type: 'string', required: true },
price: { type: 'number', required: true, minimum: 10, maximum: 1000 },
category: {
type: 'string',
enum: ['Electronics', 'Clothing', 'Books', 'Food']
},
inStock: { type: 'boolean', required: true }
},
constraints: {
priceFormat: 'USD',
includeDiscounts: true,
realistic: true
}
});
console.log('Generated products:', result.data);
}
// Run the example
if (import.meta.url === `file://${process.argv[1]}`) {
generateUserData().catch(error => {
console.error('Fatal error:', error);
process.exit(1);
});
}
//# sourceMappingURL=simple-data-generation.js.map

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,228 @@
/**
* BEGINNER TUTORIAL: Simple Data Generation
*
* Learn how to generate structured synthetic data with agentic-synth.
* Perfect for creating test data, mock APIs, or prototyping.
*
* What you'll learn:
* - Defining data schemas
* - Generating structured data
* - Saving output to files
* - Working with different formats
*
* Prerequisites:
* - Set GEMINI_API_KEY environment variable
* - npm install @ruvector/agentic-synth
*
* Run: npx tsx examples/beginner/simple-data-generation.ts
*/
import { AgenticSynth } from '@ruvector/agentic-synth';
import { writeFileSync } from 'fs';
import { join } from 'path';
// Step 1: Define your data schema
// This is like a blueprint for the data you want to generate
const userSchema = {
// Basic fields with types
id: { type: 'string', required: true },
name: { type: 'string', required: true },
email: { type: 'string', required: true },
age: { type: 'number', required: true, minimum: 18, maximum: 80 },
// Enum fields (restricted choices)
role: {
type: 'string',
required: true,
enum: ['user', 'admin', 'moderator']
},
// Nested object
address: {
type: 'object',
required: false,
properties: {
street: { type: 'string' },
city: { type: 'string' },
country: { type: 'string' },
postalCode: { type: 'string' }
}
},
// Array field
interests: {
type: 'array',
required: false,
items: { type: 'string' }
}
};
// Step 2: Initialize AgenticSynth
// We're using Gemini because it's fast and cost-effective
const synth = new AgenticSynth({
provider: 'gemini',
apiKey: process.env.GEMINI_API_KEY,
model: 'gemini-2.0-flash-exp',
cacheStrategy: 'memory', // Cache results to save API calls
cacheTTL: 3600 // Cache for 1 hour
});
// Step 3: Main generation function
async function generateUserData() {
console.log('🎯 Simple Data Generation Tutorial\n');
console.log('=' .repeat(60));
// Step 3a: Generate a small batch first (5 users)
console.log('\n📊 Generating 5 sample users...\n');
try {
const result = await synth.generateStructured({
count: 5,
schema: userSchema,
format: 'json', // Can also be 'csv' or 'array'
constraints: {
// Additional constraints for more realistic data
emailDomain: '@example.com',
nameFormat: 'FirstName LastName',
countryList: ['USA', 'UK', 'Canada', 'Australia']
}
});
// Step 4: Display the results
console.log('✅ Generation Complete!\n');
console.log(`Generated ${result.metadata.count} users in ${result.metadata.duration}ms`);
console.log(`Provider: ${result.metadata.provider}`);
console.log(`Model: ${result.metadata.model}`);
console.log(`Cached: ${result.metadata.cached ? 'Yes ⚡' : 'No'}\n`);
// Show the generated data
console.log('👥 Generated Users:\n');
result.data.forEach((user: any, index: number) => {
console.log(`${index + 1}. ${user.name} (${user.role})`);
console.log(` 📧 ${user.email}`);
console.log(` 🎂 Age: ${user.age}`);
if (user.address) {
console.log(` 🏠 ${user.address.city}, ${user.address.country}`);
}
if (user.interests && user.interests.length > 0) {
console.log(` ❤️ Interests: ${user.interests.join(', ')}`);
}
console.log('');
});
// Step 5: Save to file
const outputDir = join(process.cwd(), 'examples', 'output');
const outputFile = join(outputDir, 'sample-users.json');
try {
// Create output directory if it doesn't exist
const { mkdirSync } = await import('fs');
mkdirSync(outputDir, { recursive: true });
// Save the data
writeFileSync(outputFile, JSON.stringify(result.data, null, 2));
console.log(`💾 Data saved to: ${outputFile}\n`);
} catch (error) {
console.warn('⚠️ Could not save file:', error instanceof Error ? error.message : 'Unknown error');
}
// Step 6: Generate a larger batch
console.log('=' .repeat(60));
console.log('\n📈 Now generating 20 users (to demonstrate scaling)...\n');
const largeResult = await synth.generateStructured({
count: 20,
schema: userSchema,
format: 'json'
});
console.log('✅ Large batch complete!');
console.log(` Generated: ${largeResult.metadata.count} users`);
console.log(` Time: ${largeResult.metadata.duration}ms`);
console.log(` Cached: ${largeResult.metadata.cached ? 'Yes ⚡' : 'No'}\n`);
// Step 7: Demonstrate CSV format
console.log('=' .repeat(60));
console.log('\n📄 Generating data in CSV format...\n');
const csvResult = await synth.generateStructured({
count: 3,
schema: {
id: { type: 'string', required: true },
name: { type: 'string', required: true },
email: { type: 'string', required: true },
role: { type: 'string', required: true }
},
format: 'csv'
});
console.log('CSV Output (first 3 users):');
console.log('─'.repeat(60));
// Note: CSV format will be in the data array as strings
console.log('✅ CSV generation successful\n');
// Step 8: Show statistics
console.log('=' .repeat(60));
console.log('\n📊 Session Statistics:');
console.log(` Total users generated: ${result.data.length + largeResult.data.length + csvResult.data.length}`);
console.log(` Total API calls: ${result.metadata.cached ? '1 (cached)' : '2'}`);
console.log(` Total time: ${result.metadata.duration + largeResult.metadata.duration}ms`);
// Step 9: Next steps
console.log('\n💡 What You Can Do Next:');
console.log(' 1. Modify the schema to match your use case');
console.log(' 2. Try different data types (timeseries, events)');
console.log(' 3. Experiment with constraints for more realistic data');
console.log(' 4. Generate thousands of records for load testing');
console.log(' 5. Integrate with your test suite or mock API\n');
} catch (error) {
console.error('❌ Generation failed:', error instanceof Error ? error.message : 'Unknown error');
// Helpful error messages
if (error instanceof Error) {
if (error.message.includes('API key')) {
console.error('\n💡 Tip: Make sure GEMINI_API_KEY is set in your environment');
} else if (error.message.includes('schema')) {
console.error('\n💡 Tip: Check your schema definition for errors');
}
}
process.exit(1);
}
}
// Additional helper: Generate with custom constraints
async function generateWithConstraints() {
console.log('\n🎨 Example: Custom Constraints\n');
const result = await synth.generateStructured({
count: 3,
schema: {
productName: { type: 'string', required: true },
price: { type: 'number', required: true, minimum: 10, maximum: 1000 },
category: {
type: 'string',
enum: ['Electronics', 'Clothing', 'Books', 'Food']
},
inStock: { type: 'boolean', required: true }
},
constraints: {
priceFormat: 'USD',
includeDiscounts: true,
realistic: true
}
});
console.log('Generated products:', result.data);
}
// Run the example
if (import.meta.url === `file://${process.argv[1]}`) {
generateUserData().catch(error => {
console.error('Fatal error:', error);
process.exit(1);
});
}
export { generateUserData, generateWithConstraints, synth };