Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,318 @@
#!/usr/bin/env node
/**
* End-to-end RVF CLI smoke test.
*
* Tests the full lifecycle via `npx ruvector rvf` CLI commands:
* create -> ingest -> query -> restart simulation -> query -> verify match
*
* Exits with code 0 on success, code 1 on failure.
*
* Usage:
* node tests/rvf-integration/smoke-test.js
*/
'use strict';
const { execFileSync } = require('child_process');
const fs = require('fs');
const os = require('os');
const path = require('path');
// ---------------------------------------------------------------------------
// Configuration
// ---------------------------------------------------------------------------
const DIM = 128;
const METRIC = 'cosine';
const VECTOR_COUNT = 20;
const K = 5;
// Locate the CLI entry point relative to the repo root.
const REPO_ROOT = path.resolve(__dirname, '..', '..');
const CLI_PATH = path.join(REPO_ROOT, 'npm', 'packages', 'ruvector', 'bin', 'cli.js');
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
let tmpDir;
let storePath;
let inputPath;
let childPath;
let passed = 0;
let failed = 0;
/**
* Deterministic pseudo-random vector generation using an LCG.
* Matches the Rust `random_vector` function for cross-validation.
*/
function randomVector(dim, seed) {
const v = new Float64Array(dim);
let x = BigInt(seed) & 0xFFFFFFFFFFFFFFFFn;
for (let i = 0; i < dim; i++) {
x = (x * 6364136223846793005n + 1442695040888963407n) & 0xFFFFFFFFFFFFFFFFn;
v[i] = Number(x >> 33n) / 4294967295.0 - 0.5;
}
// Normalize for cosine.
let norm = 0;
for (let i = 0; i < dim; i++) norm += v[i] * v[i];
norm = Math.sqrt(norm);
const result = [];
for (let i = 0; i < dim; i++) result.push(norm > 1e-8 ? v[i] / norm : 0);
return result;
}
/**
* Run a CLI command and return stdout as a string.
* Throws on non-zero exit code.
*/
function runCli(args, opts = {}) {
const cmdArgs = ['node', CLI_PATH, 'rvf', ...args];
try {
const stdout = execFileSync(cmdArgs[0], cmdArgs.slice(1), {
cwd: REPO_ROOT,
timeout: 30000,
encoding: 'utf8',
env: {
...process.env,
// Disable chalk colors for easier parsing.
FORCE_COLOR: '0',
NO_COLOR: '1',
},
...opts,
});
return stdout.trim();
} catch (e) {
const stderr = e.stderr ? e.stderr.toString().trim() : '';
const stdout = e.stdout ? e.stdout.toString().trim() : '';
throw new Error(
`CLI failed (exit ${e.status}): ${args.join(' ')}\n` +
` stdout: ${stdout}\n` +
` stderr: ${stderr}`
);
}
}
/**
* Assert a condition and track pass/fail.
*/
function assert(condition, message) {
if (condition) {
passed++;
console.log(` PASS: ${message}`);
} else {
failed++;
console.error(` FAIL: ${message}`);
}
}
/**
* Assert that a function throws (CLI command fails).
*/
function assertThrows(fn, message) {
try {
fn();
failed++;
console.error(` FAIL: ${message} (expected error, got success)`);
} catch (_e) {
passed++;
console.log(` PASS: ${message}`);
}
}
// ---------------------------------------------------------------------------
// Setup
// ---------------------------------------------------------------------------
function setup() {
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'rvf-smoke-'));
storePath = path.join(tmpDir, 'smoke.rvf');
inputPath = path.join(tmpDir, 'vectors.json');
childPath = path.join(tmpDir, 'child.rvf');
// Generate input vectors as JSON.
const entries = [];
for (let i = 0; i < VECTOR_COUNT; i++) {
const id = i + 1;
const vector = randomVector(DIM, id * 17 + 5);
entries.push({ id, vector });
}
fs.writeFileSync(inputPath, JSON.stringify(entries));
}
// ---------------------------------------------------------------------------
// Teardown
// ---------------------------------------------------------------------------
function teardown() {
try {
if (tmpDir && fs.existsSync(tmpDir)) {
fs.rmSync(tmpDir, { recursive: true, force: true });
}
} catch (_e) {
// Best-effort cleanup.
}
}
// ---------------------------------------------------------------------------
// Test steps
// ---------------------------------------------------------------------------
function testCreate() {
console.log('\nStep 1: Create store');
const output = runCli(['create', storePath, '-d', String(DIM), '-m', METRIC]);
assert(output.includes('Created') || output.includes('created'), 'create reports success');
assert(fs.existsSync(storePath), 'store file exists on disk');
}
function testIngest() {
console.log('\nStep 2: Ingest vectors');
const output = runCli(['ingest', storePath, '-i', inputPath]);
assert(
output.includes('Ingested') || output.includes('accepted'),
'ingest reports accepted vectors'
);
}
function testQueryFirst() {
console.log('\nStep 3: Query (first pass)');
// Query with the vector for id=10 (seed = 9 * 17 + 5 = 158).
const queryVec = randomVector(DIM, 9 * 17 + 5);
const vecStr = queryVec.map(v => v.toFixed(8)).join(',');
const output = runCli(['query', storePath, '-v', vecStr, '-k', String(K)]);
assert(output.includes('result'), 'query returns results');
// Parse result count.
const countMatch = output.match(/(\d+)\s*result/);
if (countMatch) {
const count = parseInt(countMatch[1], 10);
assert(count > 0, `query returned ${count} results (> 0)`);
assert(count <= K, `query returned ${count} results (<= ${K})`);
} else {
assert(false, 'could not parse result count from output');
}
return output;
}
function testStatus() {
console.log('\nStep 4: Status check');
const output = runCli(['status', storePath]);
assert(output.includes('total_vectors') || output.includes('totalVectors'), 'status shows vector count');
}
function testSegments() {
console.log('\nStep 5: Segment listing');
const output = runCli(['segments', storePath]);
assert(
output.includes('segment') || output.includes('type='),
'segments command lists segments'
);
}
function testCompact() {
console.log('\nStep 6: Compact');
const output = runCli(['compact', storePath]);
assert(output.includes('Compact') || output.includes('compact'), 'compact reports completion');
}
function testDerive() {
console.log('\nStep 7: Derive child store');
const output = runCli(['derive', storePath, childPath]);
assert(
output.includes('Derived') || output.includes('derived'),
'derive reports success'
);
assert(fs.existsSync(childPath), 'child store file exists on disk');
}
function testChildSegments() {
console.log('\nStep 8: Child segment listing');
const output = runCli(['segments', childPath]);
assert(
output.includes('segment') || output.includes('type='),
'child segments command lists segments'
);
}
function testStatusAfterLifecycle() {
console.log('\nStep 9: Final status check');
const output = runCli(['status', storePath]);
assert(output.length > 0, 'status returns non-empty output');
}
function testExport() {
console.log('\nStep 10: Export');
const exportPath = path.join(tmpDir, 'export.json');
const output = runCli(['export', storePath, '-o', exportPath]);
assert(
output.includes('Exported') || output.includes('exported') || fs.existsSync(exportPath),
'export produces output file'
);
if (fs.existsSync(exportPath)) {
const data = JSON.parse(fs.readFileSync(exportPath, 'utf8'));
assert(data.status !== undefined, 'export contains status');
assert(data.segments !== undefined, 'export contains segments');
}
}
function testNonexistentStore() {
console.log('\nStep 11: Error handling');
assertThrows(
() => runCli(['status', '/tmp/nonexistent_smoke_test_rvf_99999.rvf']),
'status on nonexistent store fails with error'
);
}
// ---------------------------------------------------------------------------
// Main
// ---------------------------------------------------------------------------
function main() {
console.log('=== RVF CLI End-to-End Smoke Test ===');
console.log(` DIM=${DIM} METRIC=${METRIC} VECTORS=${VECTOR_COUNT} K=${K}`);
setup();
try {
// Check if CLI exists before running tests.
if (!fs.existsSync(CLI_PATH)) {
console.error(`\nCLI not found at: ${CLI_PATH}`);
console.error('Skipping CLI smoke test (CLI not built).');
console.log('\n=== SKIPPED (CLI not available) ===');
process.exit(0);
}
testCreate();
testIngest();
testQueryFirst();
testStatus();
testSegments();
testCompact();
testDerive();
testChildSegments();
testStatusAfterLifecycle();
testExport();
testNonexistentStore();
} catch (e) {
// If any step throws unexpectedly, we still want to report and clean up.
failed++;
console.error(`\nUNEXPECTED ERROR: ${e.message}`);
if (e.stack) console.error(e.stack);
} finally {
teardown();
}
// Summary.
const total = passed + failed;
console.log(`\n=== Results: ${passed}/${total} passed, ${failed} failed ===`);
if (failed > 0) {
process.exit(1);
} else {
console.log('All smoke tests passed.');
process.exit(0);
}
}
main();

View File

@@ -0,0 +1,606 @@
//! End-to-end RVF smoke test -- full lifecycle verification.
//!
//! Exercises the complete RVF pipeline through 15 steps:
//! 1. Create a new store (dim=128, cosine metric)
//! 2. Ingest 100 random vectors with metadata
//! 3. Query for 10 nearest neighbors of a known vector
//! 4. Verify results are sorted and distances are valid (0.0..2.0 for cosine)
//! 5. Close the store
//! 6. Reopen the store (simulating process restart)
//! 7. Query again with the same vector
//! 8. Verify results match the first query exactly (persistence verified)
//! 9. Delete some vectors
//! 10. Compact the store
//! 11. Verify deleted vectors no longer appear in results
//! 12. Derive a child store
//! 13. Verify child can be queried independently
//! 14. Verify segment listing works on both parent and child
//! 15. Clean up temporary files
//!
//! NOTE: The `DistanceMetric` is not persisted in the manifest, so after
//! `RvfStore::open()` the metric defaults to L2. The lifecycle test therefore
//! uses L2 for the cross-restart comparison (steps 5-8), while cosine-specific
//! assertions are exercised in a dedicated single-session test.
use rvf_runtime::options::{
DistanceMetric, MetadataEntry, MetadataValue, QueryOptions, RvfOptions,
};
use rvf_runtime::RvfStore;
use rvf_types::DerivationType;
use tempfile::TempDir;
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
/// Deterministic pseudo-random vector generation using an LCG.
/// Produces values in [-0.5, 0.5).
fn random_vector(dim: usize, seed: u64) -> Vec<f32> {
let mut v = Vec::with_capacity(dim);
let mut x = seed;
for _ in 0..dim {
x = x
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
v.push(((x >> 33) as f32) / (u32::MAX as f32) - 0.5);
}
v
}
/// L2-normalize a vector in place so cosine distance is well-defined.
fn normalize(v: &mut [f32]) {
let norm: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt();
if norm > f32::EPSILON {
for x in v.iter_mut() {
*x /= norm;
}
}
}
/// Generate a normalized random vector suitable for cosine queries.
fn random_unit_vector(dim: usize, seed: u64) -> Vec<f32> {
let mut v = random_vector(dim, seed);
normalize(&mut v);
v
}
fn make_options(dim: u16, metric: DistanceMetric) -> RvfOptions {
RvfOptions {
dimension: dim,
metric,
..Default::default()
}
}
// ---------------------------------------------------------------------------
// Full lifecycle smoke test (L2 metric for cross-restart consistency)
// ---------------------------------------------------------------------------
#[test]
fn rvf_smoke_full_lifecycle() {
let dir = TempDir::new().expect("failed to create temp dir");
let store_path = dir.path().join("smoke_lifecycle.rvf");
let child_path = dir.path().join("smoke_child.rvf");
let dim: u16 = 128;
let k: usize = 10;
let vector_count: usize = 100;
// Use L2 metric for the lifecycle test because the metric is not persisted
// in the manifest. After reopen, the store defaults to L2, so using L2
// throughout ensures cross-restart distance comparisons are exact.
let options = make_options(dim, DistanceMetric::L2);
// -----------------------------------------------------------------------
// Step 1: Create a new RVF store with dimension 128 and cosine metric
// -----------------------------------------------------------------------
let mut store = RvfStore::create(&store_path, options.clone())
.expect("step 1: failed to create store");
// Verify initial state.
let initial_status = store.status();
assert_eq!(initial_status.total_vectors, 0, "step 1: new store should be empty");
assert!(!initial_status.read_only, "step 1: new store should not be read-only");
// -----------------------------------------------------------------------
// Step 2: Ingest 100 random vectors with metadata
// -----------------------------------------------------------------------
let vectors: Vec<Vec<f32>> = (0..vector_count as u64)
.map(|i| random_vector(dim as usize, i * 17 + 5))
.collect();
let vec_refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
let ids: Vec<u64> = (1..=vector_count as u64).collect();
// One metadata entry per vector: field_id=0, value=category string.
let metadata: Vec<MetadataEntry> = ids
.iter()
.map(|&id| MetadataEntry {
field_id: 0,
value: MetadataValue::String(format!("group_{}", id % 5)),
})
.collect();
let ingest_result = store
.ingest_batch(&vec_refs, &ids, Some(&metadata))
.expect("step 2: ingest failed");
assert_eq!(
ingest_result.accepted, vector_count as u64,
"step 2: all {} vectors should be accepted",
vector_count,
);
assert_eq!(ingest_result.rejected, 0, "step 2: no vectors should be rejected");
assert!(ingest_result.epoch > 0, "step 2: epoch should advance after ingest");
// -----------------------------------------------------------------------
// Step 3: Query for 10 nearest neighbors of a known vector
// -----------------------------------------------------------------------
// Use vector with id=50 as the query (seed = 49 * 17 + 5 = 838).
let query_vec = random_vector(dim as usize, 49 * 17 + 5);
let results_first = store
.query(&query_vec, k, &QueryOptions::default())
.expect("step 3: query failed");
assert_eq!(
results_first.len(),
k,
"step 3: should return exactly {} results",
k,
);
// The first result should be the exact match (id=50).
assert_eq!(
results_first[0].id, 50,
"step 3: exact match vector should be first result",
);
assert!(
results_first[0].distance < 1e-5,
"step 3: exact match distance should be near zero, got {}",
results_first[0].distance,
);
// -----------------------------------------------------------------------
// Step 4: Verify results are sorted by distance and distances are valid
// (L2 distances are non-negative)
// -----------------------------------------------------------------------
for i in 1..results_first.len() {
assert!(
results_first[i].distance >= results_first[i - 1].distance,
"step 4: results not sorted at position {}: {} > {}",
i,
results_first[i - 1].distance,
results_first[i].distance,
);
}
for r in &results_first {
assert!(
r.distance >= 0.0,
"step 4: L2 distance {} should be non-negative",
r.distance,
);
}
// -----------------------------------------------------------------------
// Step 5: Close the store
// -----------------------------------------------------------------------
store.close().expect("step 5: close failed");
// -----------------------------------------------------------------------
// Step 6: Reopen the store (simulating process restart)
// -----------------------------------------------------------------------
let store = RvfStore::open(&store_path).expect("step 6: reopen failed");
let reopen_status = store.status();
assert_eq!(
reopen_status.total_vectors, vector_count as u64,
"step 6: all {} vectors should persist after reopen",
vector_count,
);
// -----------------------------------------------------------------------
// Step 7: Query again with the same vector
// -----------------------------------------------------------------------
let results_second = store
.query(&query_vec, k, &QueryOptions::default())
.expect("step 7: query after reopen failed");
assert_eq!(
results_second.len(),
k,
"step 7: should return exactly {} results after reopen",
k,
);
// -----------------------------------------------------------------------
// Step 8: Verify results match the first query exactly (persistence)
//
// After reopen, the internal iteration order of vectors may differ, which
// can affect tie-breaking in the k-NN heap. We therefore compare:
// (a) the set of result IDs must be identical,
// (b) distances for each ID must match within floating-point tolerance,
// (c) result count must be the same.
// -----------------------------------------------------------------------
assert_eq!(
results_first.len(),
results_second.len(),
"step 8: result count should match across restart",
);
// Build a map of id -> distance for comparison.
let first_map: std::collections::HashMap<u64, f32> = results_first
.iter()
.map(|r| (r.id, r.distance))
.collect();
let second_map: std::collections::HashMap<u64, f32> = results_second
.iter()
.map(|r| (r.id, r.distance))
.collect();
// Verify the exact same IDs appear in both result sets.
let mut first_ids: Vec<u64> = first_map.keys().copied().collect();
let mut second_ids: Vec<u64> = second_map.keys().copied().collect();
first_ids.sort();
second_ids.sort();
assert_eq!(
first_ids, second_ids,
"step 8: result ID sets must match across restart",
);
// Verify distances match per-ID within tolerance.
for &id in &first_ids {
let d1 = first_map[&id];
let d2 = second_map[&id];
assert!(
(d1 - d2).abs() < 1e-5,
"step 8: distance mismatch for id={}: {} vs {} (pre vs post restart)",
id, d1, d2,
);
}
// Need a mutable store for delete/compact. Drop the read-write handle and
// reopen it mutably.
store.close().expect("step 8: close for mutable reopen failed");
let mut store = RvfStore::open(&store_path).expect("step 8: mutable reopen failed");
// -----------------------------------------------------------------------
// Step 9: Delete some vectors (ids 1..=10)
// -----------------------------------------------------------------------
let delete_ids: Vec<u64> = (1..=10).collect();
let del_result = store
.delete(&delete_ids)
.expect("step 9: delete failed");
assert_eq!(
del_result.deleted, 10,
"step 9: should have deleted 10 vectors",
);
assert!(
del_result.epoch > reopen_status.current_epoch,
"step 9: epoch should advance after delete",
);
// Quick verification: deleted vectors should not appear in query.
let post_delete_results = store
.query(&query_vec, vector_count, &QueryOptions::default())
.expect("step 9: post-delete query failed");
for r in &post_delete_results {
assert!(
r.id > 10,
"step 9: deleted vector {} should not appear in results",
r.id,
);
}
assert_eq!(
post_delete_results.len(),
vector_count - 10,
"step 9: should have {} results after deleting 10",
vector_count - 10,
);
// -----------------------------------------------------------------------
// Step 10: Compact the store
// -----------------------------------------------------------------------
let pre_compact_epoch = store.status().current_epoch;
let compact_result = store.compact().expect("step 10: compact failed");
assert!(
compact_result.segments_compacted > 0 || compact_result.bytes_reclaimed > 0,
"step 10: compaction should reclaim space",
);
assert!(
compact_result.epoch > pre_compact_epoch,
"step 10: epoch should advance after compact",
);
// -----------------------------------------------------------------------
// Step 11: Verify deleted vectors no longer appear in results
// -----------------------------------------------------------------------
let post_compact_results = store
.query(&query_vec, vector_count, &QueryOptions::default())
.expect("step 11: post-compact query failed");
for r in &post_compact_results {
assert!(
r.id > 10,
"step 11: deleted vector {} appeared after compaction",
r.id,
);
}
assert_eq!(
post_compact_results.len(),
vector_count - 10,
"step 11: should still have {} results post-compact",
vector_count - 10,
);
// Verify post-compact status.
let post_compact_status = store.status();
assert_eq!(
post_compact_status.total_vectors,
(vector_count - 10) as u64,
"step 11: status should reflect {} live vectors",
vector_count - 10,
);
// -----------------------------------------------------------------------
// Step 12: Derive a child store
// -----------------------------------------------------------------------
let child = store
.derive(&child_path, DerivationType::Clone, Some(options.clone()))
.expect("step 12: derive failed");
// Verify lineage.
assert_eq!(
child.lineage_depth(),
1,
"step 12: child lineage depth should be 1",
);
assert_eq!(
child.parent_id(),
store.file_id(),
"step 12: child parent_id should match parent file_id",
);
assert_ne!(
child.file_id(),
store.file_id(),
"step 12: child should have a distinct file_id",
);
// -----------------------------------------------------------------------
// Step 13: Verify child can be queried independently
// -----------------------------------------------------------------------
// The child is a fresh derived store (no vectors copied by default via
// derive -- only lineage metadata). Query should return empty or results
// depending on whether vectors were inherited. We just verify it does not
// panic and returns a valid response.
let child_query = random_vector(dim as usize, 999);
let child_results = child
.query(&child_query, k, &QueryOptions::default())
.expect("step 13: child query failed");
// Child is newly derived with no vectors of its own, so results should be empty.
assert!(
child_results.is_empty(),
"step 13: freshly derived child should have no vectors, got {}",
child_results.len(),
);
// -----------------------------------------------------------------------
// Step 14: Verify segment listing works on both parent and child
// -----------------------------------------------------------------------
let parent_segments = store.segment_dir();
assert!(
!parent_segments.is_empty(),
"step 14: parent should have at least one segment",
);
let child_segments = child.segment_dir();
assert!(
!child_segments.is_empty(),
"step 14: child should have at least one segment (manifest)",
);
// Verify segment tuples have valid structure (seg_id > 0, type byte > 0).
for &(seg_id, _offset, _len, seg_type) in parent_segments {
assert!(seg_id > 0, "step 14: parent segment ID should be > 0");
assert!(seg_type > 0, "step 14: parent segment type should be > 0");
}
for &(seg_id, _offset, _len, seg_type) in child_segments {
assert!(seg_id > 0, "step 14: child segment ID should be > 0");
assert!(seg_type > 0, "step 14: child segment type should be > 0");
}
// -----------------------------------------------------------------------
// Step 15: Clean up temporary files
// -----------------------------------------------------------------------
child.close().expect("step 15: child close failed");
store.close().expect("step 15: parent close failed");
// TempDir's Drop impl will remove the directory, but verify the files exist
// before cleanup happens.
assert!(
store_path.exists(),
"step 15: parent store file should exist before cleanup",
);
assert!(
child_path.exists(),
"step 15: child store file should exist before cleanup",
);
// Explicitly drop the TempDir to trigger cleanup.
drop(dir);
}
// ---------------------------------------------------------------------------
// Additional focused smoke tests
// ---------------------------------------------------------------------------
/// Verify that cosine metric returns distances strictly in [0.0, 2.0] range
/// for all query results when using normalized vectors. This test runs within
/// a single session (no restart) to avoid the metric-not-persisted issue.
#[test]
fn smoke_cosine_distance_range() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("cosine_range.rvf");
let dim: u16 = 128;
let options = make_options(dim, DistanceMetric::Cosine);
let mut store = RvfStore::create(&path, options).unwrap();
// Ingest 50 normalized vectors.
let vectors: Vec<Vec<f32>> = (0..50)
.map(|i| random_unit_vector(dim as usize, i * 31 + 3))
.collect();
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
let ids: Vec<u64> = (1..=50).collect();
store.ingest_batch(&refs, &ids, None).unwrap();
// Query with several different vectors and verify distance range.
for seed in [0, 42, 100, 999, 12345] {
let q = random_unit_vector(dim as usize, seed);
let results = store.query(&q, 50, &QueryOptions::default()).unwrap();
for r in &results {
assert!(
r.distance >= 0.0 && r.distance <= 2.0,
"cosine distance {} out of range [0.0, 2.0] for seed {}",
r.distance,
seed,
);
}
// Verify sorting.
for i in 1..results.len() {
assert!(
results[i].distance >= results[i - 1].distance,
"results not sorted for seed {}: {} > {} at position {}",
seed,
results[i - 1].distance,
results[i].distance,
i,
);
}
}
store.close().unwrap();
}
/// Verify persistence across multiple close/reopen cycles with interleaved
/// ingests and deletes. Uses L2 metric for cross-restart consistency.
#[test]
fn smoke_multi_restart_persistence() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("multi_restart.rvf");
let dim: u16 = 128;
let options = make_options(dim, DistanceMetric::L2);
// Cycle 1: create and ingest 50 vectors.
{
let mut store = RvfStore::create(&path, options.clone()).unwrap();
let vectors: Vec<Vec<f32>> = (0..50)
.map(|i| random_vector(dim as usize, i))
.collect();
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
let ids: Vec<u64> = (1..=50).collect();
store.ingest_batch(&refs, &ids, None).unwrap();
assert_eq!(store.status().total_vectors, 50);
store.close().unwrap();
}
// Cycle 2: reopen, ingest 50 more, delete 10, close.
{
let mut store = RvfStore::open(&path).unwrap();
assert_eq!(store.status().total_vectors, 50);
let vectors: Vec<Vec<f32>> = (50..100)
.map(|i| random_vector(dim as usize, i))
.collect();
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
let ids: Vec<u64> = (51..=100).collect();
store.ingest_batch(&refs, &ids, None).unwrap();
assert_eq!(store.status().total_vectors, 100);
store.delete(&[5, 10, 15, 20, 25, 55, 60, 65, 70, 75]).unwrap();
assert_eq!(store.status().total_vectors, 90);
store.close().unwrap();
}
// Cycle 3: reopen, verify counts, compact, close.
{
let mut store = RvfStore::open(&path).unwrap();
assert_eq!(
store.status().total_vectors, 90,
"cycle 3: 90 vectors should survive two restarts",
);
store.compact().unwrap();
assert_eq!(store.status().total_vectors, 90);
// Verify no deleted IDs appear in a full query.
let q = random_vector(dim as usize, 42);
let results = store.query(&q, 100, &QueryOptions::default()).unwrap();
let deleted_ids = [5, 10, 15, 20, 25, 55, 60, 65, 70, 75];
for r in &results {
assert!(
!deleted_ids.contains(&r.id),
"cycle 3: deleted vector {} appeared after compact + restart",
r.id,
);
}
store.close().unwrap();
}
// Cycle 4: final reopen (readonly), verify persistence survived compact.
{
let store = RvfStore::open_readonly(&path).unwrap();
assert_eq!(
store.status().total_vectors, 90,
"cycle 4: 90 vectors should survive compact + restart",
);
assert!(store.status().read_only);
}
}
/// Verify metadata ingestion and that vector IDs are correct after batch
/// operations.
#[test]
fn smoke_metadata_and_ids() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("meta_ids.rvf");
let dim: u16 = 128;
let options = make_options(dim, DistanceMetric::L2);
let mut store = RvfStore::create(&path, options).unwrap();
// Ingest 100 vectors, each with a metadata entry.
let vectors: Vec<Vec<f32>> = (0..100)
.map(|i| random_vector(dim as usize, i * 7 + 1))
.collect();
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
let ids: Vec<u64> = (1..=100).collect();
let metadata: Vec<MetadataEntry> = ids
.iter()
.map(|&id| MetadataEntry {
field_id: 0,
value: MetadataValue::U64(id),
})
.collect();
let result = store.ingest_batch(&refs, &ids, Some(&metadata)).unwrap();
assert_eq!(result.accepted, 100);
assert_eq!(result.rejected, 0);
// Query for exact match of vector id=42.
let query = random_vector(dim as usize, 41 * 7 + 1);
let results = store.query(&query, 1, &QueryOptions::default()).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].id, 42, "exact match should be id=42");
assert!(results[0].distance < 1e-5);
store.close().unwrap();
}