Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
@@ -0,0 +1,453 @@
|
||||
/**
|
||||
* Version Control Integration Example
|
||||
*
|
||||
* Demonstrates how to use agentic-jujutsu for version controlling
|
||||
* synthetic data generation, tracking changes, branching strategies,
|
||||
* and rolling back to previous versions.
|
||||
*/
|
||||
|
||||
import { AgenticSynth } from '../../src/core/synth';
|
||||
import { execSync } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
interface DataGenerationMetadata {
|
||||
version: string;
|
||||
timestamp: string;
|
||||
schemaHash: string;
|
||||
recordCount: number;
|
||||
generator: string;
|
||||
quality: number;
|
||||
}
|
||||
|
||||
interface JujutsuCommit {
|
||||
hash: string;
|
||||
message: string;
|
||||
metadata: DataGenerationMetadata;
|
||||
timestamp: Date;
|
||||
}
|
||||
|
||||
class VersionControlledDataGenerator {
|
||||
private synth: AgenticSynth;
|
||||
private repoPath: string;
|
||||
private dataPath: string;
|
||||
|
||||
constructor(repoPath: string) {
|
||||
this.synth = new AgenticSynth();
|
||||
this.repoPath = repoPath;
|
||||
this.dataPath = path.join(repoPath, 'data');
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize jujutsu repository for data versioning
|
||||
*/
|
||||
async initializeRepository(): Promise<void> {
|
||||
try {
|
||||
// Initialize jujutsu repo
|
||||
console.log('🔧 Initializing jujutsu repository...');
|
||||
execSync('npx agentic-jujutsu@latest init', {
|
||||
cwd: this.repoPath,
|
||||
stdio: 'inherit'
|
||||
});
|
||||
|
||||
// Create data directory
|
||||
if (!fs.existsSync(this.dataPath)) {
|
||||
fs.mkdirSync(this.dataPath, { recursive: true });
|
||||
}
|
||||
|
||||
// Create .gitignore to ignore node_modules but track data
|
||||
const gitignore = `node_modules/
|
||||
*.log
|
||||
.env
|
||||
!data/
|
||||
`;
|
||||
fs.writeFileSync(path.join(this.repoPath, '.gitignore'), gitignore);
|
||||
|
||||
console.log('✅ Repository initialized successfully');
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to initialize repository: ${(error as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate synthetic data and commit with metadata
|
||||
*/
|
||||
async generateAndCommit(
|
||||
schema: any,
|
||||
count: number,
|
||||
message: string
|
||||
): Promise<JujutsuCommit> {
|
||||
try {
|
||||
console.log(`🎲 Generating ${count} records...`);
|
||||
|
||||
// Generate synthetic data
|
||||
const data = await this.synth.generate(schema, { count });
|
||||
|
||||
// Calculate metadata
|
||||
const metadata: DataGenerationMetadata = {
|
||||
version: '1.0.0',
|
||||
timestamp: new Date().toISOString(),
|
||||
schemaHash: this.hashSchema(schema),
|
||||
recordCount: count,
|
||||
generator: 'agentic-synth',
|
||||
quality: this.calculateQuality(data)
|
||||
};
|
||||
|
||||
// Save data and metadata
|
||||
const timestamp = Date.now();
|
||||
const dataFile = path.join(this.dataPath, `dataset_${timestamp}.json`);
|
||||
const metaFile = path.join(this.dataPath, `dataset_${timestamp}.meta.json`);
|
||||
|
||||
fs.writeFileSync(dataFile, JSON.stringify(data, null, 2));
|
||||
fs.writeFileSync(metaFile, JSON.stringify(metadata, null, 2));
|
||||
|
||||
console.log(`💾 Saved to ${dataFile}`);
|
||||
|
||||
// Add files to jujutsu
|
||||
execSync(`npx agentic-jujutsu@latest add "${dataFile}"`, {
|
||||
cwd: this.repoPath,
|
||||
stdio: 'inherit'
|
||||
});
|
||||
execSync(`npx agentic-jujutsu@latest add "${metaFile}"`, {
|
||||
cwd: this.repoPath,
|
||||
stdio: 'inherit'
|
||||
});
|
||||
|
||||
// Commit with metadata
|
||||
const commitMessage = `${message}\n\nMetadata:\n${JSON.stringify(metadata, null, 2)}`;
|
||||
const result = execSync(
|
||||
`npx agentic-jujutsu@latest commit -m "${commitMessage}"`,
|
||||
{ cwd: this.repoPath, encoding: 'utf-8' }
|
||||
);
|
||||
|
||||
// Get commit hash
|
||||
const hash = this.getLatestCommitHash();
|
||||
|
||||
console.log(`✅ Committed: ${hash.substring(0, 8)}`);
|
||||
|
||||
return {
|
||||
hash,
|
||||
message,
|
||||
metadata,
|
||||
timestamp: new Date()
|
||||
};
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to generate and commit: ${(error as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a branch for experimenting with different generation strategies
|
||||
*/
|
||||
async createGenerationBranch(branchName: string, description: string): Promise<void> {
|
||||
try {
|
||||
console.log(`🌿 Creating branch: ${branchName}`);
|
||||
|
||||
execSync(`npx agentic-jujutsu@latest branch create ${branchName}`, {
|
||||
cwd: this.repoPath,
|
||||
stdio: 'inherit'
|
||||
});
|
||||
|
||||
// Save branch description
|
||||
const branchesDir = path.join(this.repoPath, '.jj', 'branches');
|
||||
if (!fs.existsSync(branchesDir)) {
|
||||
fs.mkdirSync(branchesDir, { recursive: true });
|
||||
}
|
||||
|
||||
const descFile = path.join(branchesDir, `${branchName}.desc`);
|
||||
fs.writeFileSync(descFile, description);
|
||||
|
||||
console.log(`✅ Branch ${branchName} created`);
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to create branch: ${(error as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare datasets between two commits or branches
|
||||
*/
|
||||
async compareDatasets(ref1: string, ref2: string): Promise<any> {
|
||||
try {
|
||||
console.log(`📊 Comparing ${ref1} vs ${ref2}...`);
|
||||
|
||||
// Get file lists at each ref
|
||||
const files1 = this.getDataFilesAtRef(ref1);
|
||||
const files2 = this.getDataFilesAtRef(ref2);
|
||||
|
||||
const comparison = {
|
||||
ref1,
|
||||
ref2,
|
||||
filesAdded: files2.filter(f => !files1.includes(f)),
|
||||
filesRemoved: files1.filter(f => !files2.includes(f)),
|
||||
filesModified: [] as string[],
|
||||
statistics: {} as any
|
||||
};
|
||||
|
||||
// Compare common files
|
||||
const commonFiles = files1.filter(f => files2.includes(f));
|
||||
for (const file of commonFiles) {
|
||||
const diff = execSync(
|
||||
`npx agentic-jujutsu@latest diff ${ref1} ${ref2} -- "${file}"`,
|
||||
{ cwd: this.repoPath, encoding: 'utf-8' }
|
||||
);
|
||||
|
||||
if (diff.trim()) {
|
||||
comparison.filesModified.push(file);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`✅ Comparison complete:`);
|
||||
console.log(` Added: ${comparison.filesAdded.length}`);
|
||||
console.log(` Removed: ${comparison.filesRemoved.length}`);
|
||||
console.log(` Modified: ${comparison.filesModified.length}`);
|
||||
|
||||
return comparison;
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to compare datasets: ${(error as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge data generation branches
|
||||
*/
|
||||
async mergeBranches(sourceBranch: string, targetBranch: string): Promise<void> {
|
||||
try {
|
||||
console.log(`🔀 Merging ${sourceBranch} into ${targetBranch}...`);
|
||||
|
||||
// Switch to target branch
|
||||
execSync(`npx agentic-jujutsu@latest checkout ${targetBranch}`, {
|
||||
cwd: this.repoPath,
|
||||
stdio: 'inherit'
|
||||
});
|
||||
|
||||
// Merge source branch
|
||||
execSync(`npx agentic-jujutsu@latest merge ${sourceBranch}`, {
|
||||
cwd: this.repoPath,
|
||||
stdio: 'inherit'
|
||||
});
|
||||
|
||||
console.log(`✅ Merge complete`);
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to merge branches: ${(error as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Rollback to a previous data version
|
||||
*/
|
||||
async rollbackToVersion(commitHash: string): Promise<void> {
|
||||
try {
|
||||
console.log(`⏮️ Rolling back to ${commitHash.substring(0, 8)}...`);
|
||||
|
||||
// Create a new branch from the target commit
|
||||
const rollbackBranch = `rollback_${Date.now()}`;
|
||||
execSync(
|
||||
`npx agentic-jujutsu@latest branch create ${rollbackBranch} -r ${commitHash}`,
|
||||
{ cwd: this.repoPath, stdio: 'inherit' }
|
||||
);
|
||||
|
||||
// Checkout the rollback branch
|
||||
execSync(`npx agentic-jujutsu@latest checkout ${rollbackBranch}`, {
|
||||
cwd: this.repoPath,
|
||||
stdio: 'inherit'
|
||||
});
|
||||
|
||||
console.log(`✅ Rolled back to ${commitHash.substring(0, 8)}`);
|
||||
console.log(` New branch: ${rollbackBranch}`);
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to rollback: ${(error as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get data generation history
|
||||
*/
|
||||
async getHistory(limit: number = 10): Promise<any[]> {
|
||||
try {
|
||||
const log = execSync(
|
||||
`npx agentic-jujutsu@latest log --limit ${limit} --no-graph`,
|
||||
{ cwd: this.repoPath, encoding: 'utf-8' }
|
||||
);
|
||||
|
||||
// Parse log output
|
||||
const commits = this.parseLogOutput(log);
|
||||
|
||||
console.log(`📜 Retrieved ${commits.length} commits`);
|
||||
return commits;
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to get history: ${(error as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tag a specific data generation
|
||||
*/
|
||||
async tagVersion(tag: string, message: string): Promise<void> {
|
||||
try {
|
||||
console.log(`🏷️ Creating tag: ${tag}`);
|
||||
|
||||
execSync(`npx agentic-jujutsu@latest tag ${tag} -m "${message}"`, {
|
||||
cwd: this.repoPath,
|
||||
stdio: 'inherit'
|
||||
});
|
||||
|
||||
console.log(`✅ Tag created: ${tag}`);
|
||||
} catch (error) {
|
||||
throw new Error(`Failed to create tag: ${(error as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Helper methods
|
||||
|
||||
private hashSchema(schema: any): string {
|
||||
const crypto = require('crypto');
|
||||
return crypto
|
||||
.createHash('sha256')
|
||||
.update(JSON.stringify(schema))
|
||||
.digest('hex')
|
||||
.substring(0, 16);
|
||||
}
|
||||
|
||||
private calculateQuality(data: any[]): number {
|
||||
// Simple quality metric: completeness of data
|
||||
if (!data.length) return 0;
|
||||
|
||||
let totalFields = 0;
|
||||
let completeFields = 0;
|
||||
|
||||
data.forEach(record => {
|
||||
const fields = Object.keys(record);
|
||||
totalFields += fields.length;
|
||||
fields.forEach(field => {
|
||||
if (record[field] !== null && record[field] !== undefined && record[field] !== '') {
|
||||
completeFields++;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return totalFields > 0 ? completeFields / totalFields : 0;
|
||||
}
|
||||
|
||||
private getLatestCommitHash(): string {
|
||||
const result = execSync(
|
||||
'npx agentic-jujutsu@latest log --limit 1 --no-graph --template "{commit_id}"',
|
||||
{ cwd: this.repoPath, encoding: 'utf-8' }
|
||||
);
|
||||
return result.trim();
|
||||
}
|
||||
|
||||
private getDataFilesAtRef(ref: string): string[] {
|
||||
try {
|
||||
const result = execSync(
|
||||
`npx agentic-jujutsu@latest files --revision ${ref}`,
|
||||
{ cwd: this.repoPath, encoding: 'utf-8' }
|
||||
);
|
||||
return result
|
||||
.split('\n')
|
||||
.filter(line => line.includes('data/dataset_'))
|
||||
.map(line => line.trim());
|
||||
} catch (error) {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
private parseLogOutput(log: string): any[] {
|
||||
// Simple log parser - in production, use structured output
|
||||
const commits: any[] = [];
|
||||
const lines = log.split('\n');
|
||||
|
||||
let currentCommit: any = null;
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('commit ')) {
|
||||
if (currentCommit) commits.push(currentCommit);
|
||||
currentCommit = {
|
||||
hash: line.split(' ')[1],
|
||||
message: '',
|
||||
timestamp: new Date()
|
||||
};
|
||||
} else if (currentCommit && line.trim()) {
|
||||
currentCommit.message += line.trim() + ' ';
|
||||
}
|
||||
}
|
||||
if (currentCommit) commits.push(currentCommit);
|
||||
|
||||
return commits;
|
||||
}
|
||||
}
|
||||
|
||||
// Example usage
|
||||
async function main() {
|
||||
console.log('🚀 Agentic-Jujutsu Version Control Integration Example\n');
|
||||
|
||||
const repoPath = path.join(process.cwd(), 'synthetic-data-repo');
|
||||
const generator = new VersionControlledDataGenerator(repoPath);
|
||||
|
||||
try {
|
||||
// Initialize repository
|
||||
await generator.initializeRepository();
|
||||
|
||||
// Define schema for user data
|
||||
const userSchema = {
|
||||
name: 'string',
|
||||
email: 'email',
|
||||
age: 'number',
|
||||
city: 'string',
|
||||
active: 'boolean'
|
||||
};
|
||||
|
||||
// Generate initial dataset
|
||||
const commit1 = await generator.generateAndCommit(
|
||||
userSchema,
|
||||
1000,
|
||||
'Initial user dataset generation'
|
||||
);
|
||||
console.log(`📝 First commit: ${commit1.hash.substring(0, 8)}\n`);
|
||||
|
||||
// Tag the baseline
|
||||
await generator.tagVersion('v1.0-baseline', 'Production baseline dataset');
|
||||
|
||||
// Create experimental branch
|
||||
await generator.createGenerationBranch(
|
||||
'experiment-large-dataset',
|
||||
'Testing larger dataset generation'
|
||||
);
|
||||
|
||||
// Generate more data on experimental branch
|
||||
const commit2 = await generator.generateAndCommit(
|
||||
userSchema,
|
||||
5000,
|
||||
'Large dataset experiment'
|
||||
);
|
||||
console.log(`📝 Second commit: ${commit2.hash.substring(0, 8)}\n`);
|
||||
|
||||
// Compare datasets
|
||||
const comparison = await generator.compareDatasets(
|
||||
commit1.hash,
|
||||
commit2.hash
|
||||
);
|
||||
console.log('\n📊 Comparison result:', JSON.stringify(comparison, null, 2));
|
||||
|
||||
// Merge if experiment was successful
|
||||
await generator.mergeBranches('experiment-large-dataset', 'main');
|
||||
|
||||
// Get history
|
||||
const history = await generator.getHistory(5);
|
||||
console.log('\n📜 Recent history:', history);
|
||||
|
||||
// Demonstrate rollback
|
||||
console.log('\n⏮️ Demonstrating rollback...');
|
||||
await generator.rollbackToVersion(commit1.hash);
|
||||
|
||||
console.log('\n✅ Example completed successfully!');
|
||||
} catch (error) {
|
||||
console.error('❌ Error:', (error as Error).message);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run example if executed directly
|
||||
if (require.main === module) {
|
||||
main().catch(console.error);
|
||||
}
|
||||
|
||||
export { VersionControlledDataGenerator, DataGenerationMetadata, JujutsuCommit };
|
||||
Reference in New Issue
Block a user