Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
318
vendor/ruvector/tests/rvf-integration/smoke-test.js
vendored
Normal file
318
vendor/ruvector/tests/rvf-integration/smoke-test.js
vendored
Normal file
@@ -0,0 +1,318 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* End-to-end RVF CLI smoke test.
|
||||
*
|
||||
* Tests the full lifecycle via `npx ruvector rvf` CLI commands:
|
||||
* create -> ingest -> query -> restart simulation -> query -> verify match
|
||||
*
|
||||
* Exits with code 0 on success, code 1 on failure.
|
||||
*
|
||||
* Usage:
|
||||
* node tests/rvf-integration/smoke-test.js
|
||||
*/
|
||||
|
||||
'use strict';
|
||||
|
||||
const { execFileSync } = require('child_process');
|
||||
const fs = require('fs');
|
||||
const os = require('os');
|
||||
const path = require('path');
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Configuration
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const DIM = 128;
|
||||
const METRIC = 'cosine';
|
||||
const VECTOR_COUNT = 20;
|
||||
const K = 5;
|
||||
|
||||
// Locate the CLI entry point relative to the repo root.
|
||||
const REPO_ROOT = path.resolve(__dirname, '..', '..');
|
||||
const CLI_PATH = path.join(REPO_ROOT, 'npm', 'packages', 'ruvector', 'bin', 'cli.js');
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
let tmpDir;
|
||||
let storePath;
|
||||
let inputPath;
|
||||
let childPath;
|
||||
let passed = 0;
|
||||
let failed = 0;
|
||||
|
||||
/**
|
||||
* Deterministic pseudo-random vector generation using an LCG.
|
||||
* Matches the Rust `random_vector` function for cross-validation.
|
||||
*/
|
||||
function randomVector(dim, seed) {
|
||||
const v = new Float64Array(dim);
|
||||
let x = BigInt(seed) & 0xFFFFFFFFFFFFFFFFn;
|
||||
for (let i = 0; i < dim; i++) {
|
||||
x = (x * 6364136223846793005n + 1442695040888963407n) & 0xFFFFFFFFFFFFFFFFn;
|
||||
v[i] = Number(x >> 33n) / 4294967295.0 - 0.5;
|
||||
}
|
||||
// Normalize for cosine.
|
||||
let norm = 0;
|
||||
for (let i = 0; i < dim; i++) norm += v[i] * v[i];
|
||||
norm = Math.sqrt(norm);
|
||||
const result = [];
|
||||
for (let i = 0; i < dim; i++) result.push(norm > 1e-8 ? v[i] / norm : 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a CLI command and return stdout as a string.
|
||||
* Throws on non-zero exit code.
|
||||
*/
|
||||
function runCli(args, opts = {}) {
|
||||
const cmdArgs = ['node', CLI_PATH, 'rvf', ...args];
|
||||
try {
|
||||
const stdout = execFileSync(cmdArgs[0], cmdArgs.slice(1), {
|
||||
cwd: REPO_ROOT,
|
||||
timeout: 30000,
|
||||
encoding: 'utf8',
|
||||
env: {
|
||||
...process.env,
|
||||
// Disable chalk colors for easier parsing.
|
||||
FORCE_COLOR: '0',
|
||||
NO_COLOR: '1',
|
||||
},
|
||||
...opts,
|
||||
});
|
||||
return stdout.trim();
|
||||
} catch (e) {
|
||||
const stderr = e.stderr ? e.stderr.toString().trim() : '';
|
||||
const stdout = e.stdout ? e.stdout.toString().trim() : '';
|
||||
throw new Error(
|
||||
`CLI failed (exit ${e.status}): ${args.join(' ')}\n` +
|
||||
` stdout: ${stdout}\n` +
|
||||
` stderr: ${stderr}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Assert a condition and track pass/fail.
|
||||
*/
|
||||
function assert(condition, message) {
|
||||
if (condition) {
|
||||
passed++;
|
||||
console.log(` PASS: ${message}`);
|
||||
} else {
|
||||
failed++;
|
||||
console.error(` FAIL: ${message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Assert that a function throws (CLI command fails).
|
||||
*/
|
||||
function assertThrows(fn, message) {
|
||||
try {
|
||||
fn();
|
||||
failed++;
|
||||
console.error(` FAIL: ${message} (expected error, got success)`);
|
||||
} catch (_e) {
|
||||
passed++;
|
||||
console.log(` PASS: ${message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Setup
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function setup() {
|
||||
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'rvf-smoke-'));
|
||||
storePath = path.join(tmpDir, 'smoke.rvf');
|
||||
inputPath = path.join(tmpDir, 'vectors.json');
|
||||
childPath = path.join(tmpDir, 'child.rvf');
|
||||
|
||||
// Generate input vectors as JSON.
|
||||
const entries = [];
|
||||
for (let i = 0; i < VECTOR_COUNT; i++) {
|
||||
const id = i + 1;
|
||||
const vector = randomVector(DIM, id * 17 + 5);
|
||||
entries.push({ id, vector });
|
||||
}
|
||||
fs.writeFileSync(inputPath, JSON.stringify(entries));
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Teardown
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function teardown() {
|
||||
try {
|
||||
if (tmpDir && fs.existsSync(tmpDir)) {
|
||||
fs.rmSync(tmpDir, { recursive: true, force: true });
|
||||
}
|
||||
} catch (_e) {
|
||||
// Best-effort cleanup.
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test steps
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function testCreate() {
|
||||
console.log('\nStep 1: Create store');
|
||||
const output = runCli(['create', storePath, '-d', String(DIM), '-m', METRIC]);
|
||||
assert(output.includes('Created') || output.includes('created'), 'create reports success');
|
||||
assert(fs.existsSync(storePath), 'store file exists on disk');
|
||||
}
|
||||
|
||||
function testIngest() {
|
||||
console.log('\nStep 2: Ingest vectors');
|
||||
const output = runCli(['ingest', storePath, '-i', inputPath]);
|
||||
assert(
|
||||
output.includes('Ingested') || output.includes('accepted'),
|
||||
'ingest reports accepted vectors'
|
||||
);
|
||||
}
|
||||
|
||||
function testQueryFirst() {
|
||||
console.log('\nStep 3: Query (first pass)');
|
||||
// Query with the vector for id=10 (seed = 9 * 17 + 5 = 158).
|
||||
const queryVec = randomVector(DIM, 9 * 17 + 5);
|
||||
const vecStr = queryVec.map(v => v.toFixed(8)).join(',');
|
||||
const output = runCli(['query', storePath, '-v', vecStr, '-k', String(K)]);
|
||||
assert(output.includes('result'), 'query returns results');
|
||||
|
||||
// Parse result count.
|
||||
const countMatch = output.match(/(\d+)\s*result/);
|
||||
if (countMatch) {
|
||||
const count = parseInt(countMatch[1], 10);
|
||||
assert(count > 0, `query returned ${count} results (> 0)`);
|
||||
assert(count <= K, `query returned ${count} results (<= ${K})`);
|
||||
} else {
|
||||
assert(false, 'could not parse result count from output');
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
function testStatus() {
|
||||
console.log('\nStep 4: Status check');
|
||||
const output = runCli(['status', storePath]);
|
||||
assert(output.includes('total_vectors') || output.includes('totalVectors'), 'status shows vector count');
|
||||
}
|
||||
|
||||
function testSegments() {
|
||||
console.log('\nStep 5: Segment listing');
|
||||
const output = runCli(['segments', storePath]);
|
||||
assert(
|
||||
output.includes('segment') || output.includes('type='),
|
||||
'segments command lists segments'
|
||||
);
|
||||
}
|
||||
|
||||
function testCompact() {
|
||||
console.log('\nStep 6: Compact');
|
||||
const output = runCli(['compact', storePath]);
|
||||
assert(output.includes('Compact') || output.includes('compact'), 'compact reports completion');
|
||||
}
|
||||
|
||||
function testDerive() {
|
||||
console.log('\nStep 7: Derive child store');
|
||||
const output = runCli(['derive', storePath, childPath]);
|
||||
assert(
|
||||
output.includes('Derived') || output.includes('derived'),
|
||||
'derive reports success'
|
||||
);
|
||||
assert(fs.existsSync(childPath), 'child store file exists on disk');
|
||||
}
|
||||
|
||||
function testChildSegments() {
|
||||
console.log('\nStep 8: Child segment listing');
|
||||
const output = runCli(['segments', childPath]);
|
||||
assert(
|
||||
output.includes('segment') || output.includes('type='),
|
||||
'child segments command lists segments'
|
||||
);
|
||||
}
|
||||
|
||||
function testStatusAfterLifecycle() {
|
||||
console.log('\nStep 9: Final status check');
|
||||
const output = runCli(['status', storePath]);
|
||||
assert(output.length > 0, 'status returns non-empty output');
|
||||
}
|
||||
|
||||
function testExport() {
|
||||
console.log('\nStep 10: Export');
|
||||
const exportPath = path.join(tmpDir, 'export.json');
|
||||
const output = runCli(['export', storePath, '-o', exportPath]);
|
||||
assert(
|
||||
output.includes('Exported') || output.includes('exported') || fs.existsSync(exportPath),
|
||||
'export produces output file'
|
||||
);
|
||||
if (fs.existsSync(exportPath)) {
|
||||
const data = JSON.parse(fs.readFileSync(exportPath, 'utf8'));
|
||||
assert(data.status !== undefined, 'export contains status');
|
||||
assert(data.segments !== undefined, 'export contains segments');
|
||||
}
|
||||
}
|
||||
|
||||
function testNonexistentStore() {
|
||||
console.log('\nStep 11: Error handling');
|
||||
assertThrows(
|
||||
() => runCli(['status', '/tmp/nonexistent_smoke_test_rvf_99999.rvf']),
|
||||
'status on nonexistent store fails with error'
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Main
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function main() {
|
||||
console.log('=== RVF CLI End-to-End Smoke Test ===');
|
||||
console.log(` DIM=${DIM} METRIC=${METRIC} VECTORS=${VECTOR_COUNT} K=${K}`);
|
||||
|
||||
setup();
|
||||
|
||||
try {
|
||||
// Check if CLI exists before running tests.
|
||||
if (!fs.existsSync(CLI_PATH)) {
|
||||
console.error(`\nCLI not found at: ${CLI_PATH}`);
|
||||
console.error('Skipping CLI smoke test (CLI not built).');
|
||||
console.log('\n=== SKIPPED (CLI not available) ===');
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
testCreate();
|
||||
testIngest();
|
||||
testQueryFirst();
|
||||
testStatus();
|
||||
testSegments();
|
||||
testCompact();
|
||||
testDerive();
|
||||
testChildSegments();
|
||||
testStatusAfterLifecycle();
|
||||
testExport();
|
||||
testNonexistentStore();
|
||||
} catch (e) {
|
||||
// If any step throws unexpectedly, we still want to report and clean up.
|
||||
failed++;
|
||||
console.error(`\nUNEXPECTED ERROR: ${e.message}`);
|
||||
if (e.stack) console.error(e.stack);
|
||||
} finally {
|
||||
teardown();
|
||||
}
|
||||
|
||||
// Summary.
|
||||
const total = passed + failed;
|
||||
console.log(`\n=== Results: ${passed}/${total} passed, ${failed} failed ===`);
|
||||
|
||||
if (failed > 0) {
|
||||
process.exit(1);
|
||||
} else {
|
||||
console.log('All smoke tests passed.');
|
||||
process.exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
606
vendor/ruvector/tests/rvf-integration/tests/rvf_smoke_test.rs
vendored
Normal file
606
vendor/ruvector/tests/rvf-integration/tests/rvf_smoke_test.rs
vendored
Normal file
@@ -0,0 +1,606 @@
|
||||
//! End-to-end RVF smoke test -- full lifecycle verification.
|
||||
//!
|
||||
//! Exercises the complete RVF pipeline through 15 steps:
|
||||
//! 1. Create a new store (dim=128, cosine metric)
|
||||
//! 2. Ingest 100 random vectors with metadata
|
||||
//! 3. Query for 10 nearest neighbors of a known vector
|
||||
//! 4. Verify results are sorted and distances are valid (0.0..2.0 for cosine)
|
||||
//! 5. Close the store
|
||||
//! 6. Reopen the store (simulating process restart)
|
||||
//! 7. Query again with the same vector
|
||||
//! 8. Verify results match the first query exactly (persistence verified)
|
||||
//! 9. Delete some vectors
|
||||
//! 10. Compact the store
|
||||
//! 11. Verify deleted vectors no longer appear in results
|
||||
//! 12. Derive a child store
|
||||
//! 13. Verify child can be queried independently
|
||||
//! 14. Verify segment listing works on both parent and child
|
||||
//! 15. Clean up temporary files
|
||||
//!
|
||||
//! NOTE: The `DistanceMetric` is not persisted in the manifest, so after
|
||||
//! `RvfStore::open()` the metric defaults to L2. The lifecycle test therefore
|
||||
//! uses L2 for the cross-restart comparison (steps 5-8), while cosine-specific
|
||||
//! assertions are exercised in a dedicated single-session test.
|
||||
|
||||
use rvf_runtime::options::{
|
||||
DistanceMetric, MetadataEntry, MetadataValue, QueryOptions, RvfOptions,
|
||||
};
|
||||
use rvf_runtime::RvfStore;
|
||||
use rvf_types::DerivationType;
|
||||
use tempfile::TempDir;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Deterministic pseudo-random vector generation using an LCG.
|
||||
/// Produces values in [-0.5, 0.5).
|
||||
fn random_vector(dim: usize, seed: u64) -> Vec<f32> {
|
||||
let mut v = Vec::with_capacity(dim);
|
||||
let mut x = seed;
|
||||
for _ in 0..dim {
|
||||
x = x
|
||||
.wrapping_mul(6364136223846793005)
|
||||
.wrapping_add(1442695040888963407);
|
||||
v.push(((x >> 33) as f32) / (u32::MAX as f32) - 0.5);
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
/// L2-normalize a vector in place so cosine distance is well-defined.
|
||||
fn normalize(v: &mut [f32]) {
|
||||
let norm: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
if norm > f32::EPSILON {
|
||||
for x in v.iter_mut() {
|
||||
*x /= norm;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate a normalized random vector suitable for cosine queries.
|
||||
fn random_unit_vector(dim: usize, seed: u64) -> Vec<f32> {
|
||||
let mut v = random_vector(dim, seed);
|
||||
normalize(&mut v);
|
||||
v
|
||||
}
|
||||
|
||||
fn make_options(dim: u16, metric: DistanceMetric) -> RvfOptions {
|
||||
RvfOptions {
|
||||
dimension: dim,
|
||||
metric,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Full lifecycle smoke test (L2 metric for cross-restart consistency)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn rvf_smoke_full_lifecycle() {
|
||||
let dir = TempDir::new().expect("failed to create temp dir");
|
||||
let store_path = dir.path().join("smoke_lifecycle.rvf");
|
||||
let child_path = dir.path().join("smoke_child.rvf");
|
||||
|
||||
let dim: u16 = 128;
|
||||
let k: usize = 10;
|
||||
let vector_count: usize = 100;
|
||||
|
||||
// Use L2 metric for the lifecycle test because the metric is not persisted
|
||||
// in the manifest. After reopen, the store defaults to L2, so using L2
|
||||
// throughout ensures cross-restart distance comparisons are exact.
|
||||
let options = make_options(dim, DistanceMetric::L2);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 1: Create a new RVF store with dimension 128 and cosine metric
|
||||
// -----------------------------------------------------------------------
|
||||
let mut store = RvfStore::create(&store_path, options.clone())
|
||||
.expect("step 1: failed to create store");
|
||||
|
||||
// Verify initial state.
|
||||
let initial_status = store.status();
|
||||
assert_eq!(initial_status.total_vectors, 0, "step 1: new store should be empty");
|
||||
assert!(!initial_status.read_only, "step 1: new store should not be read-only");
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 2: Ingest 100 random vectors with metadata
|
||||
// -----------------------------------------------------------------------
|
||||
let vectors: Vec<Vec<f32>> = (0..vector_count as u64)
|
||||
.map(|i| random_vector(dim as usize, i * 17 + 5))
|
||||
.collect();
|
||||
let vec_refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (1..=vector_count as u64).collect();
|
||||
|
||||
// One metadata entry per vector: field_id=0, value=category string.
|
||||
let metadata: Vec<MetadataEntry> = ids
|
||||
.iter()
|
||||
.map(|&id| MetadataEntry {
|
||||
field_id: 0,
|
||||
value: MetadataValue::String(format!("group_{}", id % 5)),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let ingest_result = store
|
||||
.ingest_batch(&vec_refs, &ids, Some(&metadata))
|
||||
.expect("step 2: ingest failed");
|
||||
|
||||
assert_eq!(
|
||||
ingest_result.accepted, vector_count as u64,
|
||||
"step 2: all {} vectors should be accepted",
|
||||
vector_count,
|
||||
);
|
||||
assert_eq!(ingest_result.rejected, 0, "step 2: no vectors should be rejected");
|
||||
assert!(ingest_result.epoch > 0, "step 2: epoch should advance after ingest");
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 3: Query for 10 nearest neighbors of a known vector
|
||||
// -----------------------------------------------------------------------
|
||||
// Use vector with id=50 as the query (seed = 49 * 17 + 5 = 838).
|
||||
let query_vec = random_vector(dim as usize, 49 * 17 + 5);
|
||||
let results_first = store
|
||||
.query(&query_vec, k, &QueryOptions::default())
|
||||
.expect("step 3: query failed");
|
||||
|
||||
assert_eq!(
|
||||
results_first.len(),
|
||||
k,
|
||||
"step 3: should return exactly {} results",
|
||||
k,
|
||||
);
|
||||
|
||||
// The first result should be the exact match (id=50).
|
||||
assert_eq!(
|
||||
results_first[0].id, 50,
|
||||
"step 3: exact match vector should be first result",
|
||||
);
|
||||
assert!(
|
||||
results_first[0].distance < 1e-5,
|
||||
"step 3: exact match distance should be near zero, got {}",
|
||||
results_first[0].distance,
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 4: Verify results are sorted by distance and distances are valid
|
||||
// (L2 distances are non-negative)
|
||||
// -----------------------------------------------------------------------
|
||||
for i in 1..results_first.len() {
|
||||
assert!(
|
||||
results_first[i].distance >= results_first[i - 1].distance,
|
||||
"step 4: results not sorted at position {}: {} > {}",
|
||||
i,
|
||||
results_first[i - 1].distance,
|
||||
results_first[i].distance,
|
||||
);
|
||||
}
|
||||
for r in &results_first {
|
||||
assert!(
|
||||
r.distance >= 0.0,
|
||||
"step 4: L2 distance {} should be non-negative",
|
||||
r.distance,
|
||||
);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 5: Close the store
|
||||
// -----------------------------------------------------------------------
|
||||
store.close().expect("step 5: close failed");
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 6: Reopen the store (simulating process restart)
|
||||
// -----------------------------------------------------------------------
|
||||
let store = RvfStore::open(&store_path).expect("step 6: reopen failed");
|
||||
let reopen_status = store.status();
|
||||
assert_eq!(
|
||||
reopen_status.total_vectors, vector_count as u64,
|
||||
"step 6: all {} vectors should persist after reopen",
|
||||
vector_count,
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 7: Query again with the same vector
|
||||
// -----------------------------------------------------------------------
|
||||
let results_second = store
|
||||
.query(&query_vec, k, &QueryOptions::default())
|
||||
.expect("step 7: query after reopen failed");
|
||||
|
||||
assert_eq!(
|
||||
results_second.len(),
|
||||
k,
|
||||
"step 7: should return exactly {} results after reopen",
|
||||
k,
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 8: Verify results match the first query exactly (persistence)
|
||||
//
|
||||
// After reopen, the internal iteration order of vectors may differ, which
|
||||
// can affect tie-breaking in the k-NN heap. We therefore compare:
|
||||
// (a) the set of result IDs must be identical,
|
||||
// (b) distances for each ID must match within floating-point tolerance,
|
||||
// (c) result count must be the same.
|
||||
// -----------------------------------------------------------------------
|
||||
assert_eq!(
|
||||
results_first.len(),
|
||||
results_second.len(),
|
||||
"step 8: result count should match across restart",
|
||||
);
|
||||
|
||||
// Build a map of id -> distance for comparison.
|
||||
let first_map: std::collections::HashMap<u64, f32> = results_first
|
||||
.iter()
|
||||
.map(|r| (r.id, r.distance))
|
||||
.collect();
|
||||
let second_map: std::collections::HashMap<u64, f32> = results_second
|
||||
.iter()
|
||||
.map(|r| (r.id, r.distance))
|
||||
.collect();
|
||||
|
||||
// Verify the exact same IDs appear in both result sets.
|
||||
let mut first_ids: Vec<u64> = first_map.keys().copied().collect();
|
||||
let mut second_ids: Vec<u64> = second_map.keys().copied().collect();
|
||||
first_ids.sort();
|
||||
second_ids.sort();
|
||||
assert_eq!(
|
||||
first_ids, second_ids,
|
||||
"step 8: result ID sets must match across restart",
|
||||
);
|
||||
|
||||
// Verify distances match per-ID within tolerance.
|
||||
for &id in &first_ids {
|
||||
let d1 = first_map[&id];
|
||||
let d2 = second_map[&id];
|
||||
assert!(
|
||||
(d1 - d2).abs() < 1e-5,
|
||||
"step 8: distance mismatch for id={}: {} vs {} (pre vs post restart)",
|
||||
id, d1, d2,
|
||||
);
|
||||
}
|
||||
|
||||
// Need a mutable store for delete/compact. Drop the read-write handle and
|
||||
// reopen it mutably.
|
||||
store.close().expect("step 8: close for mutable reopen failed");
|
||||
let mut store = RvfStore::open(&store_path).expect("step 8: mutable reopen failed");
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 9: Delete some vectors (ids 1..=10)
|
||||
// -----------------------------------------------------------------------
|
||||
let delete_ids: Vec<u64> = (1..=10).collect();
|
||||
let del_result = store
|
||||
.delete(&delete_ids)
|
||||
.expect("step 9: delete failed");
|
||||
|
||||
assert_eq!(
|
||||
del_result.deleted, 10,
|
||||
"step 9: should have deleted 10 vectors",
|
||||
);
|
||||
assert!(
|
||||
del_result.epoch > reopen_status.current_epoch,
|
||||
"step 9: epoch should advance after delete",
|
||||
);
|
||||
|
||||
// Quick verification: deleted vectors should not appear in query.
|
||||
let post_delete_results = store
|
||||
.query(&query_vec, vector_count, &QueryOptions::default())
|
||||
.expect("step 9: post-delete query failed");
|
||||
|
||||
for r in &post_delete_results {
|
||||
assert!(
|
||||
r.id > 10,
|
||||
"step 9: deleted vector {} should not appear in results",
|
||||
r.id,
|
||||
);
|
||||
}
|
||||
assert_eq!(
|
||||
post_delete_results.len(),
|
||||
vector_count - 10,
|
||||
"step 9: should have {} results after deleting 10",
|
||||
vector_count - 10,
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 10: Compact the store
|
||||
// -----------------------------------------------------------------------
|
||||
let pre_compact_epoch = store.status().current_epoch;
|
||||
let compact_result = store.compact().expect("step 10: compact failed");
|
||||
|
||||
assert!(
|
||||
compact_result.segments_compacted > 0 || compact_result.bytes_reclaimed > 0,
|
||||
"step 10: compaction should reclaim space",
|
||||
);
|
||||
assert!(
|
||||
compact_result.epoch > pre_compact_epoch,
|
||||
"step 10: epoch should advance after compact",
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 11: Verify deleted vectors no longer appear in results
|
||||
// -----------------------------------------------------------------------
|
||||
let post_compact_results = store
|
||||
.query(&query_vec, vector_count, &QueryOptions::default())
|
||||
.expect("step 11: post-compact query failed");
|
||||
|
||||
for r in &post_compact_results {
|
||||
assert!(
|
||||
r.id > 10,
|
||||
"step 11: deleted vector {} appeared after compaction",
|
||||
r.id,
|
||||
);
|
||||
}
|
||||
assert_eq!(
|
||||
post_compact_results.len(),
|
||||
vector_count - 10,
|
||||
"step 11: should still have {} results post-compact",
|
||||
vector_count - 10,
|
||||
);
|
||||
|
||||
// Verify post-compact status.
|
||||
let post_compact_status = store.status();
|
||||
assert_eq!(
|
||||
post_compact_status.total_vectors,
|
||||
(vector_count - 10) as u64,
|
||||
"step 11: status should reflect {} live vectors",
|
||||
vector_count - 10,
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 12: Derive a child store
|
||||
// -----------------------------------------------------------------------
|
||||
let child = store
|
||||
.derive(&child_path, DerivationType::Clone, Some(options.clone()))
|
||||
.expect("step 12: derive failed");
|
||||
|
||||
// Verify lineage.
|
||||
assert_eq!(
|
||||
child.lineage_depth(),
|
||||
1,
|
||||
"step 12: child lineage depth should be 1",
|
||||
);
|
||||
assert_eq!(
|
||||
child.parent_id(),
|
||||
store.file_id(),
|
||||
"step 12: child parent_id should match parent file_id",
|
||||
);
|
||||
assert_ne!(
|
||||
child.file_id(),
|
||||
store.file_id(),
|
||||
"step 12: child should have a distinct file_id",
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 13: Verify child can be queried independently
|
||||
// -----------------------------------------------------------------------
|
||||
// The child is a fresh derived store (no vectors copied by default via
|
||||
// derive -- only lineage metadata). Query should return empty or results
|
||||
// depending on whether vectors were inherited. We just verify it does not
|
||||
// panic and returns a valid response.
|
||||
let child_query = random_vector(dim as usize, 999);
|
||||
let child_results = child
|
||||
.query(&child_query, k, &QueryOptions::default())
|
||||
.expect("step 13: child query failed");
|
||||
|
||||
// Child is newly derived with no vectors of its own, so results should be empty.
|
||||
assert!(
|
||||
child_results.is_empty(),
|
||||
"step 13: freshly derived child should have no vectors, got {}",
|
||||
child_results.len(),
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 14: Verify segment listing works on both parent and child
|
||||
// -----------------------------------------------------------------------
|
||||
let parent_segments = store.segment_dir();
|
||||
assert!(
|
||||
!parent_segments.is_empty(),
|
||||
"step 14: parent should have at least one segment",
|
||||
);
|
||||
|
||||
let child_segments = child.segment_dir();
|
||||
assert!(
|
||||
!child_segments.is_empty(),
|
||||
"step 14: child should have at least one segment (manifest)",
|
||||
);
|
||||
|
||||
// Verify segment tuples have valid structure (seg_id > 0, type byte > 0).
|
||||
for &(seg_id, _offset, _len, seg_type) in parent_segments {
|
||||
assert!(seg_id > 0, "step 14: parent segment ID should be > 0");
|
||||
assert!(seg_type > 0, "step 14: parent segment type should be > 0");
|
||||
}
|
||||
for &(seg_id, _offset, _len, seg_type) in child_segments {
|
||||
assert!(seg_id > 0, "step 14: child segment ID should be > 0");
|
||||
assert!(seg_type > 0, "step 14: child segment type should be > 0");
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Step 15: Clean up temporary files
|
||||
// -----------------------------------------------------------------------
|
||||
child.close().expect("step 15: child close failed");
|
||||
store.close().expect("step 15: parent close failed");
|
||||
|
||||
// TempDir's Drop impl will remove the directory, but verify the files exist
|
||||
// before cleanup happens.
|
||||
assert!(
|
||||
store_path.exists(),
|
||||
"step 15: parent store file should exist before cleanup",
|
||||
);
|
||||
assert!(
|
||||
child_path.exists(),
|
||||
"step 15: child store file should exist before cleanup",
|
||||
);
|
||||
|
||||
// Explicitly drop the TempDir to trigger cleanup.
|
||||
drop(dir);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Additional focused smoke tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Verify that cosine metric returns distances strictly in [0.0, 2.0] range
|
||||
/// for all query results when using normalized vectors. This test runs within
|
||||
/// a single session (no restart) to avoid the metric-not-persisted issue.
|
||||
#[test]
|
||||
fn smoke_cosine_distance_range() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("cosine_range.rvf");
|
||||
|
||||
let dim: u16 = 128;
|
||||
let options = make_options(dim, DistanceMetric::Cosine);
|
||||
|
||||
let mut store = RvfStore::create(&path, options).unwrap();
|
||||
|
||||
// Ingest 50 normalized vectors.
|
||||
let vectors: Vec<Vec<f32>> = (0..50)
|
||||
.map(|i| random_unit_vector(dim as usize, i * 31 + 3))
|
||||
.collect();
|
||||
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (1..=50).collect();
|
||||
store.ingest_batch(&refs, &ids, None).unwrap();
|
||||
|
||||
// Query with several different vectors and verify distance range.
|
||||
for seed in [0, 42, 100, 999, 12345] {
|
||||
let q = random_unit_vector(dim as usize, seed);
|
||||
let results = store.query(&q, 50, &QueryOptions::default()).unwrap();
|
||||
|
||||
for r in &results {
|
||||
assert!(
|
||||
r.distance >= 0.0 && r.distance <= 2.0,
|
||||
"cosine distance {} out of range [0.0, 2.0] for seed {}",
|
||||
r.distance,
|
||||
seed,
|
||||
);
|
||||
}
|
||||
|
||||
// Verify sorting.
|
||||
for i in 1..results.len() {
|
||||
assert!(
|
||||
results[i].distance >= results[i - 1].distance,
|
||||
"results not sorted for seed {}: {} > {} at position {}",
|
||||
seed,
|
||||
results[i - 1].distance,
|
||||
results[i].distance,
|
||||
i,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
/// Verify persistence across multiple close/reopen cycles with interleaved
|
||||
/// ingests and deletes. Uses L2 metric for cross-restart consistency.
|
||||
#[test]
|
||||
fn smoke_multi_restart_persistence() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("multi_restart.rvf");
|
||||
let dim: u16 = 128;
|
||||
|
||||
let options = make_options(dim, DistanceMetric::L2);
|
||||
|
||||
// Cycle 1: create and ingest 50 vectors.
|
||||
{
|
||||
let mut store = RvfStore::create(&path, options.clone()).unwrap();
|
||||
let vectors: Vec<Vec<f32>> = (0..50)
|
||||
.map(|i| random_vector(dim as usize, i))
|
||||
.collect();
|
||||
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (1..=50).collect();
|
||||
store.ingest_batch(&refs, &ids, None).unwrap();
|
||||
assert_eq!(store.status().total_vectors, 50);
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
// Cycle 2: reopen, ingest 50 more, delete 10, close.
|
||||
{
|
||||
let mut store = RvfStore::open(&path).unwrap();
|
||||
assert_eq!(store.status().total_vectors, 50);
|
||||
|
||||
let vectors: Vec<Vec<f32>> = (50..100)
|
||||
.map(|i| random_vector(dim as usize, i))
|
||||
.collect();
|
||||
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (51..=100).collect();
|
||||
store.ingest_batch(&refs, &ids, None).unwrap();
|
||||
assert_eq!(store.status().total_vectors, 100);
|
||||
|
||||
store.delete(&[5, 10, 15, 20, 25, 55, 60, 65, 70, 75]).unwrap();
|
||||
assert_eq!(store.status().total_vectors, 90);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
// Cycle 3: reopen, verify counts, compact, close.
|
||||
{
|
||||
let mut store = RvfStore::open(&path).unwrap();
|
||||
assert_eq!(
|
||||
store.status().total_vectors, 90,
|
||||
"cycle 3: 90 vectors should survive two restarts",
|
||||
);
|
||||
|
||||
store.compact().unwrap();
|
||||
assert_eq!(store.status().total_vectors, 90);
|
||||
|
||||
// Verify no deleted IDs appear in a full query.
|
||||
let q = random_vector(dim as usize, 42);
|
||||
let results = store.query(&q, 100, &QueryOptions::default()).unwrap();
|
||||
let deleted_ids = [5, 10, 15, 20, 25, 55, 60, 65, 70, 75];
|
||||
for r in &results {
|
||||
assert!(
|
||||
!deleted_ids.contains(&r.id),
|
||||
"cycle 3: deleted vector {} appeared after compact + restart",
|
||||
r.id,
|
||||
);
|
||||
}
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
|
||||
// Cycle 4: final reopen (readonly), verify persistence survived compact.
|
||||
{
|
||||
let store = RvfStore::open_readonly(&path).unwrap();
|
||||
assert_eq!(
|
||||
store.status().total_vectors, 90,
|
||||
"cycle 4: 90 vectors should survive compact + restart",
|
||||
);
|
||||
assert!(store.status().read_only);
|
||||
}
|
||||
}
|
||||
|
||||
/// Verify metadata ingestion and that vector IDs are correct after batch
|
||||
/// operations.
|
||||
#[test]
|
||||
fn smoke_metadata_and_ids() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("meta_ids.rvf");
|
||||
let dim: u16 = 128;
|
||||
|
||||
let options = make_options(dim, DistanceMetric::L2);
|
||||
|
||||
let mut store = RvfStore::create(&path, options).unwrap();
|
||||
|
||||
// Ingest 100 vectors, each with a metadata entry.
|
||||
let vectors: Vec<Vec<f32>> = (0..100)
|
||||
.map(|i| random_vector(dim as usize, i * 7 + 1))
|
||||
.collect();
|
||||
let refs: Vec<&[f32]> = vectors.iter().map(|v| v.as_slice()).collect();
|
||||
let ids: Vec<u64> = (1..=100).collect();
|
||||
let metadata: Vec<MetadataEntry> = ids
|
||||
.iter()
|
||||
.map(|&id| MetadataEntry {
|
||||
field_id: 0,
|
||||
value: MetadataValue::U64(id),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let result = store.ingest_batch(&refs, &ids, Some(&metadata)).unwrap();
|
||||
assert_eq!(result.accepted, 100);
|
||||
assert_eq!(result.rejected, 0);
|
||||
|
||||
// Query for exact match of vector id=42.
|
||||
let query = random_vector(dim as usize, 41 * 7 + 1);
|
||||
let results = store.query(&query, 1, &QueryOptions::default()).unwrap();
|
||||
assert_eq!(results.len(), 1);
|
||||
assert_eq!(results[0].id, 42, "exact match should be id=42");
|
||||
assert!(results[0].distance < 1e-5);
|
||||
|
||||
store.close().unwrap();
|
||||
}
|
||||
Reference in New Issue
Block a user