Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,195 @@
use bincode::{Decode, Encode};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use serde_json::Value;
/// Snapshot metadata and information
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Snapshot {
/// Unique snapshot identifier
pub id: String,
/// Name of the collection this snapshot represents
pub collection_name: String,
/// Timestamp when the snapshot was created
pub created_at: DateTime<Utc>,
/// Number of vectors in the snapshot
pub vectors_count: usize,
/// SHA-256 checksum of the snapshot data
pub checksum: String,
/// Size of the snapshot in bytes (compressed)
pub size_bytes: u64,
}
/// Complete snapshot data including metadata and vectors
#[derive(Debug, Serialize, Deserialize, Encode, Decode)]
pub struct SnapshotData {
/// Snapshot metadata
pub metadata: SnapshotMetadata,
/// Collection configuration
pub config: CollectionConfig,
/// All vectors in the collection
pub vectors: Vec<VectorRecord>,
}
impl SnapshotData {
/// Create a new snapshot data instance
pub fn new(
collection_name: String,
config: CollectionConfig,
vectors: Vec<VectorRecord>,
) -> Self {
Self {
metadata: SnapshotMetadata {
id: uuid::Uuid::new_v4().to_string(),
collection_name,
created_at: Utc::now().to_rfc3339(),
version: env!("CARGO_PKG_VERSION").to_string(),
},
config,
vectors,
}
}
/// Get the number of vectors in this snapshot
pub fn vectors_count(&self) -> usize {
self.vectors.len()
}
/// Get the snapshot ID
pub fn id(&self) -> &str {
&self.metadata.id
}
/// Get the collection name
pub fn collection_name(&self) -> &str {
&self.metadata.collection_name
}
}
/// Snapshot metadata
#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
pub struct SnapshotMetadata {
/// Unique snapshot identifier
pub id: String,
/// Name of the collection
pub collection_name: String,
/// Creation timestamp (RFC3339 format)
pub created_at: String,
/// Version of the snapshot format
pub version: String,
}
/// Collection configuration stored in snapshot
#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
pub struct CollectionConfig {
/// Vector dimension
pub dimension: usize,
/// Distance metric
pub metric: DistanceMetric,
/// HNSW configuration
pub hnsw_config: Option<HnswConfig>,
}
/// Distance metric for vector similarity
#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
pub enum DistanceMetric {
Cosine,
Euclidean,
DotProduct,
}
/// HNSW index configuration
#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
pub struct HnswConfig {
pub m: usize,
pub ef_construction: usize,
pub ef_search: usize,
}
/// Individual vector record in a snapshot
#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
pub struct VectorRecord {
/// Unique vector identifier
pub id: String,
/// Vector data
pub vector: Vec<f32>,
/// Optional metadata payload (stored as JSON string for bincode compatibility)
#[serde(skip)]
#[bincode(with_serde)]
payload_json: Option<String>,
}
impl VectorRecord {
/// Create a new vector record
pub fn new(id: String, vector: Vec<f32>, payload: Option<Value>) -> Self {
let payload_json = payload.and_then(|v| serde_json::to_string(&v).ok());
Self {
id,
vector,
payload_json,
}
}
/// Get the payload as a serde_json::Value
pub fn payload(&self) -> Option<Value> {
self.payload_json
.as_ref()
.and_then(|s| serde_json::from_str(s).ok())
}
/// Set the payload from a serde_json::Value
pub fn set_payload(&mut self, payload: Option<Value>) {
self.payload_json = payload.and_then(|v| serde_json::to_string(&v).ok());
}
/// Get the dimension of this vector
pub fn dimension(&self) -> usize {
self.vector.len()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_vector_record_creation() {
let record = VectorRecord::new("test-1".to_string(), vec![1.0, 2.0, 3.0], None);
assert_eq!(record.id, "test-1");
assert_eq!(record.dimension(), 3);
}
#[test]
fn test_snapshot_data_creation() {
let config = CollectionConfig {
dimension: 3,
metric: DistanceMetric::Cosine,
hnsw_config: None,
};
let vectors = vec![
VectorRecord::new("v1".to_string(), vec![1.0, 0.0, 0.0], None),
VectorRecord::new("v2".to_string(), vec![0.0, 1.0, 0.0], None),
];
let data = SnapshotData::new("test-collection".to_string(), config, vectors);
assert_eq!(data.vectors_count(), 2);
assert_eq!(data.collection_name(), "test-collection");
assert!(!data.id().is_empty());
}
}