Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
34
vendor/ruvector/scripts/patches/hnsw_rs/tests/deallocation_test.rs
vendored
Normal file
34
vendor/ruvector/scripts/patches/hnsw_rs/tests/deallocation_test.rs
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
use env_logger::Builder;
|
||||
|
||||
use anndists::dist::DistL1;
|
||||
use hnsw_rs::hnsw::Hnsw;
|
||||
|
||||
// A test program to see if memory from insertions gets deallocated.
|
||||
// This program sets up a process that iteratively builds a new model and lets it go out of scope.
|
||||
// Since the models go out of scope, the desired behavior is that memory consumption is constant while this program is running.
|
||||
fn main() {
|
||||
//
|
||||
Builder::from_default_env().init();
|
||||
//
|
||||
let mut counter: usize = 0;
|
||||
loop {
|
||||
let hnsw: Hnsw<f32, DistL1> = Hnsw::new(15, 100_000, 20, 500_000, DistL1 {});
|
||||
let s1 = [1.0, 0.0, 0.0, 0.0];
|
||||
hnsw.insert_slice((&s1, 0));
|
||||
let s2 = [0.0, 1.0, 1.0];
|
||||
hnsw.insert_slice((&s2, 1));
|
||||
let s3 = [0.0, 0.0, 1.0];
|
||||
hnsw.insert_slice((&s3, 2));
|
||||
let s4 = [1.0, 0.0, 0.0, 1.0];
|
||||
hnsw.insert_slice((&s4, 3));
|
||||
let s5 = [1.0, 1.0, 1.0];
|
||||
hnsw.insert_slice((&s5, 4));
|
||||
let s6 = [1.0, -1.0, 1.0];
|
||||
hnsw.insert_slice((&s6, 5));
|
||||
|
||||
if counter % 1_000_000 == 0 {
|
||||
println!("counter : {}", counter)
|
||||
}
|
||||
counter += 1;
|
||||
}
|
||||
}
|
||||
266
vendor/ruvector/scripts/patches/hnsw_rs/tests/filtertest.rs
vendored
Normal file
266
vendor/ruvector/scripts/patches/hnsw_rs/tests/filtertest.rs
vendored
Normal file
@@ -0,0 +1,266 @@
|
||||
#![allow(clippy::needless_range_loop)]
|
||||
#![allow(clippy::range_zip_with_len)]
|
||||
|
||||
use anndists::dist::*;
|
||||
use hnsw_rs::prelude::*;
|
||||
use rand::{Rng, distr::Uniform};
|
||||
use std::iter;
|
||||
|
||||
#[allow(unused)]
|
||||
fn log_init_test() {
|
||||
let _ = env_logger::builder().is_test(true).try_init();
|
||||
}
|
||||
|
||||
// Shows two ways to do filtering, by a sorted vector or with a closure
|
||||
// We define a hnsw-index with 500 entries
|
||||
// Only ids within 300-400 should be in the result-set
|
||||
|
||||
// Used to create a random string
|
||||
fn generate_random_string(len: usize) -> String {
|
||||
const CHARSET: &[u8] = b"abcdefghij";
|
||||
let mut rng = rand::rng();
|
||||
let one_char = || CHARSET[rng.random_range(0..CHARSET.len())] as char;
|
||||
iter::repeat_with(one_char).take(len).collect()
|
||||
}
|
||||
|
||||
// this function uses a sorted vector as a filter
|
||||
fn search_closure_filter(
|
||||
word: &str,
|
||||
hns: &Hnsw<u16, DistLevenshtein>,
|
||||
words: &[String],
|
||||
filter_vector: &[usize],
|
||||
) {
|
||||
// transform string to u16 values
|
||||
let vec: Vec<u16> = word.chars().map(|c| c as u16).collect();
|
||||
// now create a closure using this filter_vector
|
||||
// here we can off course implement more advanced filter logic
|
||||
let filter = |id: &usize| -> bool { filter_vector.binary_search(id).is_ok() };
|
||||
|
||||
// Now let us do the search by using the defined clojure, which in turn uses our vector
|
||||
// ids not in the vector will not be indluced in the search results
|
||||
println!("========== Search with closure filter");
|
||||
let ef_search = 30;
|
||||
let res = hns.search_possible_filter(&vec, 10, ef_search, Some(&filter));
|
||||
for r in res {
|
||||
println!(
|
||||
"Word: {:?} Id: {:?} Distance: {:?}",
|
||||
words[r.d_id], r.d_id, r.distance
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filter_levenstein() {
|
||||
let nb_elem = 500000; // number of possible words in the dictionary
|
||||
let max_nb_connection = 15;
|
||||
let nb_layer = 16.min((nb_elem as f32).ln().trunc() as usize);
|
||||
let ef_c = 200;
|
||||
let hns = Hnsw::<u16, DistLevenshtein>::new(
|
||||
max_nb_connection,
|
||||
nb_elem,
|
||||
nb_layer,
|
||||
ef_c,
|
||||
DistLevenshtein {},
|
||||
);
|
||||
let mut words = vec![];
|
||||
for _n in 1..1000 {
|
||||
let tw = generate_random_string(8);
|
||||
words.push(tw);
|
||||
}
|
||||
|
||||
for (i, w) in words.iter().enumerate() {
|
||||
let vec: Vec<u16> = w.chars().map(|c| c as u16).collect();
|
||||
hns.insert((&vec, i));
|
||||
if i % 1000 == 0 {
|
||||
println!("Inserting: {:?}", i);
|
||||
}
|
||||
}
|
||||
// Create a sorted vector of ids
|
||||
// the ids in the vector will be used as a filter
|
||||
let filtered_hns = Hnsw::<u16, DistLevenshtein>::new(
|
||||
max_nb_connection,
|
||||
nb_elem,
|
||||
nb_layer,
|
||||
ef_c,
|
||||
DistLevenshtein {},
|
||||
);
|
||||
let mut filter_vector: Vec<usize> = Vec::new();
|
||||
for i in 300..400 {
|
||||
filter_vector.push(i);
|
||||
let v: Vec<u16> = words[i].chars().map(|c| c as u16).collect();
|
||||
filtered_hns.insert((&v, i));
|
||||
}
|
||||
//
|
||||
let ef_search = 30;
|
||||
let tosearch = "abcdefg";
|
||||
let knbn = 10;
|
||||
let vec_tosearch: Vec<u16> = tosearch.chars().map(|c| c as u16).collect();
|
||||
//
|
||||
println!("========== Search in full hns with filter");
|
||||
let vec_res = hns.search_filter(&vec_tosearch, knbn, ef_search, Some(&filter_vector));
|
||||
for r in &vec_res {
|
||||
println!(
|
||||
"Word: {:?} Id: {:?} Distance: {:?}",
|
||||
words[r.d_id], r.d_id, r.distance
|
||||
);
|
||||
}
|
||||
//
|
||||
println!("========== Search in restricted_hns but without filter");
|
||||
//
|
||||
let vec: Vec<u16> = tosearch.chars().map(|c| c as u16).collect();
|
||||
let res: Vec<Neighbour> = filtered_hns.search(&vec, knbn, ef_search);
|
||||
for r in &res {
|
||||
println!(
|
||||
"Word: {:?} Id: {:?} Distance: {:?}",
|
||||
words[r.d_id], r.d_id, r.distance
|
||||
);
|
||||
}
|
||||
//
|
||||
// search with filter
|
||||
// first with closure
|
||||
println!("========== Search in full hns with closure filter");
|
||||
search_closure_filter(tosearch, &hns, &words, &filter_vector);
|
||||
//
|
||||
// now with vector filter and estimate recall
|
||||
//
|
||||
println!("========== Search in full hns with vector filter");
|
||||
let filter_vec_res = hns.search_filter(&vec_tosearch, knbn, ef_search, Some(&filter_vector));
|
||||
for r in &filter_vec_res {
|
||||
println!(
|
||||
"Word: {:?} Id: {:?} Distance: {:?}",
|
||||
words[r.d_id], r.d_id, r.distance
|
||||
);
|
||||
}
|
||||
// how many neighbours in res are in filter_vec_res
|
||||
let mut nb_found: usize = 0;
|
||||
for n in &res {
|
||||
let found = filter_vec_res.iter().find(|&&m| m.d_id == n.d_id);
|
||||
if found.is_some() {
|
||||
nb_found += 1;
|
||||
assert_eq!(n.distance, found.unwrap().distance);
|
||||
}
|
||||
}
|
||||
println!(" recall : {}", nb_found as f32 / res.len() as f32);
|
||||
println!(
|
||||
" last distances ratio : {} ",
|
||||
res.last().unwrap().distance / filter_vec_res.last().unwrap().distance
|
||||
);
|
||||
}
|
||||
|
||||
// A test with random uniform data vectors and L2 distance
|
||||
// We compare a search of a random vector in hnsw structure with a filter to a filtered_hnsw
|
||||
// containing only the data fitting the filter
|
||||
#[test]
|
||||
fn filter_l2() {
|
||||
let nb_elem = 5000;
|
||||
let dim = 25;
|
||||
// generate nb_elem colmuns vectors of dimension dim
|
||||
let mut rng = rand::rng();
|
||||
let unif = Uniform::<f32>::new(0., 1.).unwrap();
|
||||
let mut data = Vec::with_capacity(nb_elem);
|
||||
for _ in 0..nb_elem {
|
||||
let column = (0..dim).map(|_| rng.sample(unif)).collect::<Vec<f32>>();
|
||||
data.push(column);
|
||||
}
|
||||
// give an id to each data
|
||||
let data_with_id = data.iter().zip(0..data.len()).collect::<Vec<_>>();
|
||||
|
||||
let ef_c = 200;
|
||||
let max_nb_connection = 15;
|
||||
let nb_layer = 16.min((nb_elem as f32).ln().trunc() as usize);
|
||||
let hnsw = Hnsw::<f32, DistL2>::new(max_nb_connection, nb_elem, nb_layer, ef_c, DistL2 {});
|
||||
hnsw.parallel_insert(&data_with_id);
|
||||
|
||||
//
|
||||
let ef_search = 30;
|
||||
let knbn = 10;
|
||||
let vec_tosearch = (0..dim).map(|_| rng.sample(unif)).collect::<Vec<f32>>();
|
||||
//
|
||||
// Create a sorted vector of ids
|
||||
// the ids in the vector will be used as a filter
|
||||
let filtered_hns =
|
||||
Hnsw::<f32, DistL2>::new(max_nb_connection, nb_elem, nb_layer, ef_c, DistL2 {});
|
||||
let mut filter_vector: Vec<usize> = Vec::new();
|
||||
for i in 300..400 {
|
||||
filter_vector.push(i);
|
||||
filtered_hns.insert((&data[i], i));
|
||||
}
|
||||
//
|
||||
println!("========== Search in full hnsw with filter");
|
||||
let filter_vec_res = hnsw.search_filter(&vec_tosearch, knbn, ef_search, Some(&filter_vector));
|
||||
for r in &filter_vec_res {
|
||||
println!("Id: {:?} Distance: {:?}", r.d_id, r.distance);
|
||||
}
|
||||
//
|
||||
println!("========== Search in restricted_hns but without filter");
|
||||
let res: Vec<Neighbour> = filtered_hns.search(&vec_tosearch, knbn, ef_search);
|
||||
for r in &res {
|
||||
println!("Id: {:?} Distance: {:?}", r.d_id, r.distance);
|
||||
}
|
||||
// how many neighbours in res are in filter_vec_res and what is the distance gap
|
||||
let mut nb_found: usize = 0;
|
||||
for n in &res {
|
||||
let found = filter_vec_res.iter().find(|&&m| m.d_id == n.d_id);
|
||||
if found.is_some() {
|
||||
nb_found += 1;
|
||||
assert!((1. - n.distance / found.unwrap().distance).abs() < 1.0e-5);
|
||||
}
|
||||
}
|
||||
println!(" recall : {}", nb_found as f32 / res.len() as f32);
|
||||
println!(
|
||||
" last distances ratio : {} ",
|
||||
res.last().unwrap().distance / filter_vec_res.last().unwrap().distance
|
||||
);
|
||||
} // end of filter_l2
|
||||
|
||||
//
|
||||
|
||||
use std::collections::HashMap;
|
||||
#[test]
|
||||
fn filter_villsnow() {
|
||||
println!("\n\n in test villsnow");
|
||||
log_init_test();
|
||||
//
|
||||
let grid_size = 100;
|
||||
let mut hnsw = Hnsw::<f64, DistL2>::new(4, grid_size * grid_size, 16, 100, DistL2::default());
|
||||
let mut points = HashMap::new();
|
||||
|
||||
{
|
||||
for (id, (i, j)) in itertools::iproduct!(0..grid_size, 0..grid_size,).enumerate() {
|
||||
let data = [
|
||||
(i as f64 + 0.5) / grid_size as f64,
|
||||
(j as f64 + 0.5) / grid_size as f64,
|
||||
];
|
||||
hnsw.insert((&data, id));
|
||||
points.insert(id, data);
|
||||
}
|
||||
|
||||
hnsw.set_searching_mode(true);
|
||||
}
|
||||
{
|
||||
println!("first case");
|
||||
// first case
|
||||
let filter = |id: &usize| DistL2::default().eval(&points[id], &[1.0, 1.0]) < 1e-2;
|
||||
dbg!(points.keys().filter(|x| filter(x)).count()); // -> 1
|
||||
|
||||
let hit = hnsw.search_filter(&[0.0, 0.0], 10, 4, Some(&filter));
|
||||
if !hit.is_empty() {
|
||||
log::info!("got point : {:?}", points.get(&hit[0].d_id));
|
||||
log::info!("got {:?}, must be true", filter(&hit[0].d_id)); // -> sometimes false
|
||||
} else {
|
||||
log::info!("found no point");
|
||||
}
|
||||
assert!(hit.len() <= 1);
|
||||
}
|
||||
{
|
||||
println!("second case");
|
||||
// second case
|
||||
let filter = |_id: &usize| false;
|
||||
dbg!(points.keys().filter(|x| filter(x)).count()); // -> 0, obviously
|
||||
|
||||
let hit = hnsw.search_filter(&[0.0, 0.0], 10, 64, Some(&filter));
|
||||
println!("villsnow , {:?}", hit.len());
|
||||
log::info!("got {:?}, must be 0", hit.len()); // -> 1
|
||||
assert_eq!(hit.len(), 0);
|
||||
}
|
||||
}
|
||||
328
vendor/ruvector/scripts/patches/hnsw_rs/tests/serpar.rs
vendored
Normal file
328
vendor/ruvector/scripts/patches/hnsw_rs/tests/serpar.rs
vendored
Normal file
@@ -0,0 +1,328 @@
|
||||
#![allow(clippy::range_zip_with_len)]
|
||||
|
||||
//! some testing utilities.
|
||||
//! run with to get output statistics : cargo test --release -- --nocapture --test test_parallel.
|
||||
//! serial test corresponds to random-10nn-euclidean(k=10)
|
||||
//! parallel test corresponds to random data in 25 dimensions k = 10, dist Cosine
|
||||
|
||||
use rand::distr::Uniform;
|
||||
use rand::prelude::*;
|
||||
|
||||
use skiplist::OrderedSkipList;
|
||||
|
||||
use anndists::dist;
|
||||
use hnsw_rs::prelude::*;
|
||||
use serde::{de::DeserializeOwned, Serialize};
|
||||
|
||||
pub fn gen_random_vector_f32(nbrow: usize) -> Vec<f32> {
|
||||
let mut rng = rand::rng();
|
||||
let unif = Uniform::<f32>::new(0., 1.).unwrap();
|
||||
(0..nbrow).map(|_| rng.sample(unif)).collect::<Vec<f32>>()
|
||||
}
|
||||
|
||||
/// return nbcolumn vectors of dimension nbrow
|
||||
pub fn gen_random_matrix_f32(nbrow: usize, nbcolumn: usize) -> Vec<Vec<f32>> {
|
||||
let mut rng = rand::rng();
|
||||
let unif = Uniform::<f32>::new(0., 1.).unwrap();
|
||||
let mut data = Vec::with_capacity(nbcolumn);
|
||||
for _ in 0..nbcolumn {
|
||||
let column = (0..nbrow).map(|_| rng.sample(unif)).collect::<Vec<f32>>();
|
||||
data.push(column);
|
||||
}
|
||||
data
|
||||
}
|
||||
|
||||
fn brute_force_neighbours<T: Serialize + DeserializeOwned + Copy + Send + Sync>(
|
||||
nb_neighbours: usize,
|
||||
refdata: &PointIndexation<T>,
|
||||
distance: PointDistance<T>,
|
||||
data: &[T],
|
||||
) -> OrderedSkipList<PointIdWithOrder> {
|
||||
let mut neighbours = OrderedSkipList::<PointIdWithOrder>::with_capacity(refdata.get_nb_point());
|
||||
|
||||
let mut ptiter = refdata.into_iter();
|
||||
let mut more = true;
|
||||
while more {
|
||||
if let Some(point) = ptiter.next() {
|
||||
let dist_p = distance.eval(data, point.get_v());
|
||||
let ordered_point = PointIdWithOrder::new(point.get_point_id(), dist_p);
|
||||
// log::debug!(" brute force inserting {:?}", ordered_point);
|
||||
if neighbours.len() < nb_neighbours {
|
||||
neighbours.insert(ordered_point);
|
||||
} else {
|
||||
neighbours.insert(ordered_point);
|
||||
neighbours.pop_back();
|
||||
}
|
||||
} else {
|
||||
more = false;
|
||||
}
|
||||
} // end while
|
||||
neighbours
|
||||
} // end of brute_force_2
|
||||
|
||||
//================================================================================================
|
||||
|
||||
mod tests {
|
||||
use cpu_time::ProcessTime;
|
||||
use std::time::Duration;
|
||||
|
||||
use super::*;
|
||||
use dist::l2_normalize;
|
||||
|
||||
#[test]
|
||||
fn test_serial() {
|
||||
//
|
||||
//
|
||||
let nb_elem = 1000;
|
||||
let dim = 10;
|
||||
let knbn = 10;
|
||||
let ef = 20;
|
||||
let parallel = true;
|
||||
//
|
||||
println!("\n\n test_serial nb_elem {:?}", nb_elem);
|
||||
//
|
||||
let data = gen_random_matrix_f32(dim, nb_elem);
|
||||
let data_with_id = data.iter().zip(0..data.len()).collect::<Vec<_>>();
|
||||
|
||||
let ef_c = 400;
|
||||
let max_nb_connection = 32;
|
||||
let nb_layer = 16.min((nb_elem as f32).ln().trunc() as usize);
|
||||
let mut hns = Hnsw::<f32, dist::DistL1>::new(
|
||||
max_nb_connection,
|
||||
nb_elem,
|
||||
nb_layer,
|
||||
ef_c,
|
||||
dist::DistL1 {},
|
||||
);
|
||||
hns.set_extend_candidates(true);
|
||||
hns.set_keeping_pruned(true);
|
||||
let mut start = ProcessTime::now();
|
||||
if parallel {
|
||||
println!("parallel insertion");
|
||||
hns.parallel_insert(&data_with_id);
|
||||
} else {
|
||||
println!("serial insertion");
|
||||
for (i, d) in data.iter().enumerate() {
|
||||
hns.insert((d, i));
|
||||
}
|
||||
}
|
||||
let mut cpu_time: Duration = start.elapsed();
|
||||
println!(" hnsw serial data insertion {:?}", cpu_time);
|
||||
hns.dump_layer_info();
|
||||
println!(" hnsw data nb point inserted {:?}", hns.get_nb_point());
|
||||
//
|
||||
|
||||
let nbtest = 300;
|
||||
let mut recalls = Vec::<usize>::with_capacity(nbtest);
|
||||
let mut nb_returned = Vec::<usize>::with_capacity(nb_elem);
|
||||
let mut search_times = Vec::<f32>::with_capacity(nbtest);
|
||||
for _itest in 0..nbtest {
|
||||
//
|
||||
let mut r_vec = Vec::<f32>::with_capacity(dim);
|
||||
let mut rng = rand::rng();
|
||||
let unif = Uniform::<f32>::new(0., 1.).unwrap();
|
||||
for _ in 0..dim {
|
||||
r_vec.push(rng.sample(unif));
|
||||
}
|
||||
start = ProcessTime::now();
|
||||
let brute_neighbours = brute_force_neighbours(
|
||||
knbn,
|
||||
hns.get_point_indexation(),
|
||||
Box::new(dist::DistL1 {}),
|
||||
&r_vec,
|
||||
);
|
||||
cpu_time = start.elapsed();
|
||||
if nbtest <= 100 {
|
||||
println!("\n\n **************** test {:?}", _itest);
|
||||
println!("\n brute force neighbours :");
|
||||
println!("======================");
|
||||
println!(" brute force computing {:?} \n ", cpu_time);
|
||||
for i in 0..brute_neighbours.len() {
|
||||
let p = brute_neighbours[i].point_id;
|
||||
println!(" {:?} {:?} ", p, brute_neighbours[i].dist_to_ref);
|
||||
}
|
||||
}
|
||||
//
|
||||
hns.set_searching_mode(true);
|
||||
start = ProcessTime::now();
|
||||
let knn_neighbours = hns.search(&r_vec, knbn, ef);
|
||||
cpu_time = start.elapsed();
|
||||
search_times.push(cpu_time.as_micros() as f32);
|
||||
if nbtest <= 100 {
|
||||
println!("\n\n hnsw searching {:?} \n", cpu_time);
|
||||
println!("\n knn neighbours");
|
||||
println!("======================");
|
||||
for n in &knn_neighbours {
|
||||
println!(" {:?} {:?} {:?} ", n.d_id, n.p_id, n.distance);
|
||||
}
|
||||
}
|
||||
// compute recall
|
||||
let knn_neighbours_dist: Vec<f32> = knn_neighbours.iter().map(|p| p.distance).collect();
|
||||
let max_dist = brute_neighbours[knbn - 1].dist_to_ref;
|
||||
let recall = knn_neighbours_dist
|
||||
.iter()
|
||||
.filter(|d| *d <= &max_dist)
|
||||
.count();
|
||||
if nbtest <= 100 {
|
||||
println!("recall {:?}", (recall as f32) / (knbn as f32));
|
||||
}
|
||||
recalls.push(recall);
|
||||
nb_returned.push(knn_neighbours.len());
|
||||
} // end on nbtest
|
||||
//
|
||||
// compute recall
|
||||
//
|
||||
|
||||
let mean_recall = (recalls.iter().sum::<usize>() as f32) / ((knbn * recalls.len()) as f32);
|
||||
let mean_search_time = (search_times.iter().sum::<f32>()) / (search_times.len() as f32);
|
||||
println!(
|
||||
"\n mean fraction (of knbn) returned by search {:?} ",
|
||||
(nb_returned.iter().sum::<usize>() as f32) / ((nb_returned.len() * knbn) as f32)
|
||||
);
|
||||
println!(
|
||||
"\n nb element {:?} nb search : {:?} recall rate is {:?} search time inverse {:?} ",
|
||||
nb_elem,
|
||||
nbtest,
|
||||
mean_recall,
|
||||
1.0e+6_f32 / mean_search_time
|
||||
);
|
||||
} // end test1
|
||||
|
||||
#[test]
|
||||
fn test_parallel() {
|
||||
//
|
||||
let nb_elem = 1000;
|
||||
let dim = 25;
|
||||
let knbn = 10;
|
||||
let ef_c = 800;
|
||||
let max_nb_connection = 48;
|
||||
let ef = 20;
|
||||
//
|
||||
//
|
||||
let mut data = gen_random_matrix_f32(dim, nb_elem);
|
||||
for v in &mut data {
|
||||
l2_normalize(v);
|
||||
}
|
||||
let data_with_id = data.iter().zip(0..data.len()).collect::<Vec<_>>();
|
||||
let nb_layer = 16.min((nb_elem as f32).ln().trunc() as usize);
|
||||
let mut hns = Hnsw::<f32, dist::DistDot>::new(
|
||||
max_nb_connection,
|
||||
nb_elem,
|
||||
nb_layer,
|
||||
ef_c,
|
||||
dist::DistDot {},
|
||||
);
|
||||
// !
|
||||
// hns.set_extend_candidates(true);
|
||||
let mut start = ProcessTime::now();
|
||||
let now = std::time::SystemTime::now();
|
||||
// parallel insertion
|
||||
hns.parallel_insert(&data_with_id);
|
||||
let mut cpu_time: Duration = start.elapsed();
|
||||
println!(
|
||||
"\n hnsw data parallel insertion cpu time {:?} , system time {:?}",
|
||||
cpu_time,
|
||||
now.elapsed()
|
||||
);
|
||||
// one serial more to check
|
||||
let mut v = gen_random_vector_f32(dim);
|
||||
l2_normalize(&mut v);
|
||||
hns.insert((&v, hns.get_nb_point() + 1));
|
||||
//
|
||||
hns.dump_layer_info();
|
||||
println!(" hnsw data nb point inserted {:?}", hns.get_nb_point());
|
||||
//
|
||||
println!("\n hnsw testing requests ...");
|
||||
let nbtest = 100;
|
||||
let mut recalls = Vec::<usize>::with_capacity(nbtest);
|
||||
let mut recalls_id = Vec::<usize>::with_capacity(nbtest);
|
||||
|
||||
let mut search_times = Vec::<f32>::with_capacity(nbtest);
|
||||
for _itest in 0..nbtest {
|
||||
let mut r_vec = Vec::<f32>::with_capacity(dim);
|
||||
let mut rng = rand::rng();
|
||||
let unif = Uniform::<f32>::new(0., 1.).unwrap();
|
||||
for _ in 0..dim {
|
||||
r_vec.push(rng.sample(unif));
|
||||
}
|
||||
l2_normalize(&mut r_vec);
|
||||
|
||||
start = ProcessTime::now();
|
||||
let brute_neighbours = brute_force_neighbours(
|
||||
knbn,
|
||||
hns.get_point_indexation(),
|
||||
Box::new(dist::DistDot),
|
||||
&r_vec,
|
||||
);
|
||||
cpu_time = start.elapsed();
|
||||
if nbtest <= 100 {
|
||||
println!("\n\n test_par nb_elem {:?}", nb_elem);
|
||||
println!("\n brute force neighbours :");
|
||||
println!("======================");
|
||||
println!(" brute force computing {:?} \n", cpu_time);
|
||||
for i in 0..brute_neighbours.len() {
|
||||
println!(
|
||||
" {:?} {:?} ",
|
||||
brute_neighbours[i].point_id, brute_neighbours[i].dist_to_ref
|
||||
);
|
||||
}
|
||||
}
|
||||
//
|
||||
let knbn = 10;
|
||||
hns.set_searching_mode(true);
|
||||
start = ProcessTime::now();
|
||||
let knn_neighbours = hns.search(&r_vec, knbn, ef);
|
||||
cpu_time = start.elapsed();
|
||||
search_times.push(cpu_time.as_micros() as f32);
|
||||
if nbtest <= 100 {
|
||||
println!("\n knn neighbours");
|
||||
println!("======================");
|
||||
println!(" hnsw searching {:?} \n", cpu_time);
|
||||
for n in &knn_neighbours {
|
||||
println!(" {:?} \t {:?} \t {:?}", n.d_id, n.p_id, n.distance);
|
||||
}
|
||||
}
|
||||
// compute recall with balls
|
||||
let knn_neighbours_dist: Vec<f32> = knn_neighbours.iter().map(|p| p.distance).collect();
|
||||
let max_dist = brute_neighbours[knbn - 1].dist_to_ref;
|
||||
let recall = knn_neighbours_dist
|
||||
.iter()
|
||||
.filter(|d| *d <= &max_dist)
|
||||
.count();
|
||||
if nbtest <= 100 {
|
||||
println!("recall {:?}", (recall as f32) / (knbn as f32));
|
||||
}
|
||||
recalls.push(recall);
|
||||
// compute recall with id
|
||||
let mut recall_id = 0;
|
||||
let mut knn_neighbours_id: Vec<PointId> =
|
||||
knn_neighbours.iter().map(|p| p.p_id).collect();
|
||||
knn_neighbours_id.sort_unstable();
|
||||
let snbn = knbn.min(brute_neighbours.len());
|
||||
for j in 0..snbn {
|
||||
let to_search = brute_neighbours[j].point_id;
|
||||
if knn_neighbours_id.binary_search(&to_search).is_ok() {
|
||||
recall_id += 1;
|
||||
}
|
||||
}
|
||||
recalls_id.push(recall_id);
|
||||
} // end on nbtest
|
||||
//
|
||||
// compute recall
|
||||
//
|
||||
|
||||
let mean_recall = (recalls.iter().sum::<usize>() as f32) / ((knbn * recalls.len()) as f32);
|
||||
let mean_search_time = (search_times.iter().sum::<f32>()) / (search_times.len() as f32);
|
||||
println!(
|
||||
"\n nb search {:?} recall rate is {:?} search time inverse {:?} ",
|
||||
nbtest,
|
||||
mean_recall,
|
||||
1.0e+6_f32 / mean_search_time
|
||||
);
|
||||
let mean_recall_id =
|
||||
(recalls.iter().sum::<usize>() as f32) / ((knbn * recalls.len()) as f32);
|
||||
println!("mean recall rate with point ids {:?}", mean_recall_id);
|
||||
//
|
||||
// assert!(1==0);
|
||||
} // end test_par
|
||||
}
|
||||
Reference in New Issue
Block a user