Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
63
scripts/patches/hnsw_rs/examples/levensthein.rs
Normal file
63
scripts/patches/hnsw_rs/examples/levensthein.rs
Normal file
@@ -0,0 +1,63 @@
|
||||
use anndists::dist::*;
|
||||
|
||||
use hnsw_rs::prelude::*;
|
||||
use rand::Rng;
|
||||
use std::iter;
|
||||
|
||||
fn generate(len: usize) -> String {
|
||||
const CHARSET: &[u8] = b"abcdefghij";
|
||||
let mut rng = rand::rng();
|
||||
let one_char = || CHARSET[rng.random_range(0..CHARSET.len())] as char;
|
||||
iter::repeat_with(one_char).take(len).collect()
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let nb_elem = 500000; // number of possible words in the dictionary
|
||||
let max_nb_connection = 15;
|
||||
let nb_layer = 16.min((nb_elem as f32).ln().trunc() as usize);
|
||||
let ef_c = 200;
|
||||
let nb_words = 1000;
|
||||
let hns = Hnsw::<u16, DistLevenshtein>::new(
|
||||
max_nb_connection,
|
||||
nb_elem,
|
||||
nb_layer,
|
||||
ef_c,
|
||||
DistLevenshtein {},
|
||||
);
|
||||
let mut words = vec![];
|
||||
for _n in 1..nb_words {
|
||||
let tw = generate(5);
|
||||
words.push(tw);
|
||||
}
|
||||
words.push(String::from("abcdj"));
|
||||
//
|
||||
for (i, w) in words.iter().enumerate() {
|
||||
let vec: Vec<u16> = w.chars().map(|c| c as u16).collect();
|
||||
hns.insert((&vec, i));
|
||||
}
|
||||
// create a filter
|
||||
let mut filter: Vec<usize> = Vec::new();
|
||||
for i in 1..100 {
|
||||
filter.push(i);
|
||||
}
|
||||
//
|
||||
let ef_search: usize = 30;
|
||||
let tosearch: Vec<u16> = "abcde".chars().map(|c| c as u16).collect();
|
||||
//
|
||||
println!("========== search with filter ");
|
||||
let res = hns.search_filter(&tosearch, 10, ef_search, Some(&filter));
|
||||
for r in res {
|
||||
println!(
|
||||
"Word: {:?} Id: {:?} Distance: {:?}",
|
||||
words[r.d_id], r.d_id, r.distance
|
||||
);
|
||||
}
|
||||
println!("========== search without filter ");
|
||||
let res3 = hns.search(&tosearch, 10, ef_search);
|
||||
for r in res3 {
|
||||
println!(
|
||||
"Word: {:?} Id: {:?} Distance: {:?}",
|
||||
words[r.d_id], r.d_id, r.distance
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user