Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
413
vendor/ruvector/crates/rvlite/src/cypher/ast.rs
vendored
Normal file
413
vendor/ruvector/crates/rvlite/src/cypher/ast.rs
vendored
Normal file
@@ -0,0 +1,413 @@
|
||||
//! Abstract Syntax Tree definitions for Cypher query language
|
||||
//!
|
||||
//! Represents the parsed structure of Cypher queries including:
|
||||
//! - Pattern matching (MATCH, OPTIONAL MATCH)
|
||||
//! - Filtering (WHERE)
|
||||
//! - Projections (RETURN, WITH)
|
||||
//! - Mutations (CREATE, MERGE, DELETE, SET)
|
||||
//! - Aggregations and ordering
|
||||
//! - Hyperedge support for N-ary relationships
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Top-level query representation
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct Query {
|
||||
pub statements: Vec<Statement>,
|
||||
}
|
||||
|
||||
/// Individual query statement
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum Statement {
|
||||
Match(MatchClause),
|
||||
Create(CreateClause),
|
||||
Merge(MergeClause),
|
||||
Delete(DeleteClause),
|
||||
Set(SetClause),
|
||||
Remove(RemoveClause),
|
||||
Return(ReturnClause),
|
||||
With(WithClause),
|
||||
}
|
||||
|
||||
/// MATCH clause for pattern matching
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct MatchClause {
|
||||
pub optional: bool,
|
||||
pub patterns: Vec<Pattern>,
|
||||
pub where_clause: Option<WhereClause>,
|
||||
}
|
||||
|
||||
/// Pattern matching expressions
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum Pattern {
|
||||
/// Simple node pattern: (n:Label {props})
|
||||
Node(NodePattern),
|
||||
/// Relationship pattern: (a)-[r:TYPE]->(b)
|
||||
Relationship(RelationshipPattern),
|
||||
/// Path pattern: p = (a)-[*1..5]->(b)
|
||||
Path(PathPattern),
|
||||
/// Hyperedge pattern for N-ary relationships: (a)-[r:TYPE]->(b,c,d)
|
||||
Hyperedge(HyperedgePattern),
|
||||
}
|
||||
|
||||
/// Node pattern: (variable:Label {property: value})
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct NodePattern {
|
||||
pub variable: Option<String>,
|
||||
pub labels: Vec<String>,
|
||||
pub properties: Option<PropertyMap>,
|
||||
}
|
||||
|
||||
/// Relationship pattern: [variable:Type {properties}]
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct RelationshipPattern {
|
||||
pub variable: Option<String>,
|
||||
pub rel_type: Option<String>,
|
||||
pub properties: Option<PropertyMap>,
|
||||
pub direction: Direction,
|
||||
pub range: Option<RelationshipRange>,
|
||||
/// Source node pattern
|
||||
pub from: Box<NodePattern>,
|
||||
/// Target - can be a NodePattern or another Pattern for chained relationships
|
||||
/// For simple relationships like (a)-[r]->(b), this is just the node
|
||||
/// For chained patterns like (a)-[r]->(b)<-[s]-(c), the target is nested
|
||||
pub to: Box<Pattern>,
|
||||
}
|
||||
|
||||
/// Hyperedge pattern for N-ary relationships
|
||||
/// Example: (person)-[r:TRANSACTION]->(account1, account2, merchant)
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct HyperedgePattern {
|
||||
pub variable: Option<String>,
|
||||
pub rel_type: String,
|
||||
pub properties: Option<PropertyMap>,
|
||||
pub from: Box<NodePattern>,
|
||||
pub to: Vec<NodePattern>, // Multiple target nodes for N-ary relationships
|
||||
pub arity: usize, // Number of participating nodes (including source)
|
||||
}
|
||||
|
||||
/// Relationship direction
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum Direction {
|
||||
Outgoing, // ->
|
||||
Incoming, // <-
|
||||
Undirected, // -
|
||||
}
|
||||
|
||||
/// Relationship range for path queries: [*min..max]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct RelationshipRange {
|
||||
pub min: Option<usize>,
|
||||
pub max: Option<usize>,
|
||||
}
|
||||
|
||||
/// Path pattern: p = (a)-[*]->(b)
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct PathPattern {
|
||||
pub variable: String,
|
||||
pub pattern: Box<Pattern>,
|
||||
}
|
||||
|
||||
/// Property map: {key: value, ...}
|
||||
pub type PropertyMap = HashMap<String, Expression>;
|
||||
|
||||
/// WHERE clause for filtering
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct WhereClause {
|
||||
pub condition: Expression,
|
||||
}
|
||||
|
||||
/// CREATE clause for creating nodes and relationships
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct CreateClause {
|
||||
pub patterns: Vec<Pattern>,
|
||||
}
|
||||
|
||||
/// MERGE clause for create-or-match
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct MergeClause {
|
||||
pub pattern: Pattern,
|
||||
pub on_create: Option<SetClause>,
|
||||
pub on_match: Option<SetClause>,
|
||||
}
|
||||
|
||||
/// DELETE clause
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct DeleteClause {
|
||||
pub detach: bool,
|
||||
pub expressions: Vec<Expression>,
|
||||
}
|
||||
|
||||
/// SET clause for updating properties
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct SetClause {
|
||||
pub items: Vec<SetItem>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum SetItem {
|
||||
Property {
|
||||
variable: String,
|
||||
property: String,
|
||||
value: Expression,
|
||||
},
|
||||
Variable {
|
||||
variable: String,
|
||||
value: Expression,
|
||||
},
|
||||
Labels {
|
||||
variable: String,
|
||||
labels: Vec<String>,
|
||||
},
|
||||
}
|
||||
|
||||
/// REMOVE clause for removing properties or labels
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct RemoveClause {
|
||||
pub items: Vec<RemoveItem>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum RemoveItem {
|
||||
/// Remove a property: REMOVE n.property
|
||||
Property { variable: String, property: String },
|
||||
/// Remove labels: REMOVE n:Label1:Label2
|
||||
Labels {
|
||||
variable: String,
|
||||
labels: Vec<String>,
|
||||
},
|
||||
}
|
||||
|
||||
/// RETURN clause for projection
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ReturnClause {
|
||||
pub distinct: bool,
|
||||
pub items: Vec<ReturnItem>,
|
||||
pub order_by: Option<OrderBy>,
|
||||
pub skip: Option<Expression>,
|
||||
pub limit: Option<Expression>,
|
||||
}
|
||||
|
||||
/// WITH clause for chaining queries
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct WithClause {
|
||||
pub distinct: bool,
|
||||
pub items: Vec<ReturnItem>,
|
||||
pub where_clause: Option<WhereClause>,
|
||||
pub order_by: Option<OrderBy>,
|
||||
pub skip: Option<Expression>,
|
||||
pub limit: Option<Expression>,
|
||||
}
|
||||
|
||||
/// Return item: expression AS alias
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ReturnItem {
|
||||
pub expression: Expression,
|
||||
pub alias: Option<String>,
|
||||
}
|
||||
|
||||
/// ORDER BY clause
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct OrderBy {
|
||||
pub items: Vec<OrderByItem>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct OrderByItem {
|
||||
pub expression: Expression,
|
||||
pub ascending: bool,
|
||||
}
|
||||
|
||||
/// Expression tree
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum Expression {
|
||||
// Literals
|
||||
Integer(i64),
|
||||
Float(f64),
|
||||
String(String),
|
||||
Boolean(bool),
|
||||
Null,
|
||||
|
||||
// Variables and properties
|
||||
Variable(String),
|
||||
Property {
|
||||
object: Box<Expression>,
|
||||
property: String,
|
||||
},
|
||||
|
||||
// Collections
|
||||
List(Vec<Expression>),
|
||||
Map(HashMap<String, Expression>),
|
||||
|
||||
// Operators
|
||||
BinaryOp {
|
||||
left: Box<Expression>,
|
||||
op: BinaryOperator,
|
||||
right: Box<Expression>,
|
||||
},
|
||||
UnaryOp {
|
||||
op: UnaryOperator,
|
||||
operand: Box<Expression>,
|
||||
},
|
||||
|
||||
// Functions and aggregations
|
||||
FunctionCall {
|
||||
name: String,
|
||||
args: Vec<Expression>,
|
||||
},
|
||||
Aggregation {
|
||||
function: AggregationFunction,
|
||||
expression: Box<Expression>,
|
||||
distinct: bool,
|
||||
},
|
||||
|
||||
// Pattern predicates
|
||||
PatternPredicate(Box<Pattern>),
|
||||
|
||||
// Case expressions
|
||||
Case {
|
||||
expression: Option<Box<Expression>>,
|
||||
alternatives: Vec<(Expression, Expression)>,
|
||||
default: Option<Box<Expression>>,
|
||||
},
|
||||
}
|
||||
|
||||
/// Binary operators
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum BinaryOperator {
|
||||
// Arithmetic
|
||||
Add,
|
||||
Subtract,
|
||||
Multiply,
|
||||
Divide,
|
||||
Modulo,
|
||||
Power,
|
||||
|
||||
// Comparison
|
||||
Equal,
|
||||
NotEqual,
|
||||
LessThan,
|
||||
LessThanOrEqual,
|
||||
GreaterThan,
|
||||
GreaterThanOrEqual,
|
||||
|
||||
// Logical
|
||||
And,
|
||||
Or,
|
||||
Xor,
|
||||
|
||||
// String
|
||||
Contains,
|
||||
StartsWith,
|
||||
EndsWith,
|
||||
Matches, // Regex
|
||||
|
||||
// Collection
|
||||
In,
|
||||
|
||||
// Null checking
|
||||
Is,
|
||||
IsNot,
|
||||
}
|
||||
|
||||
/// Unary operators
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum UnaryOperator {
|
||||
Not,
|
||||
Minus,
|
||||
Plus,
|
||||
IsNull,
|
||||
IsNotNull,
|
||||
}
|
||||
|
||||
/// Aggregation functions
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum AggregationFunction {
|
||||
Count,
|
||||
Sum,
|
||||
Avg,
|
||||
Min,
|
||||
Max,
|
||||
Collect,
|
||||
StdDev,
|
||||
StdDevP,
|
||||
Percentile,
|
||||
}
|
||||
|
||||
impl Query {
|
||||
pub fn new(statements: Vec<Statement>) -> Self {
|
||||
Self { statements }
|
||||
}
|
||||
|
||||
/// Check if query contains only read operations
|
||||
pub fn is_read_only(&self) -> bool {
|
||||
self.statements.iter().all(|stmt| {
|
||||
matches!(
|
||||
stmt,
|
||||
Statement::Match(_) | Statement::Return(_) | Statement::With(_)
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Check if query contains hyperedges
|
||||
pub fn has_hyperedges(&self) -> bool {
|
||||
self.statements.iter().any(|stmt| match stmt {
|
||||
Statement::Match(m) => m
|
||||
.patterns
|
||||
.iter()
|
||||
.any(|p| matches!(p, Pattern::Hyperedge(_))),
|
||||
Statement::Create(c) => c
|
||||
.patterns
|
||||
.iter()
|
||||
.any(|p| matches!(p, Pattern::Hyperedge(_))),
|
||||
Statement::Merge(m) => matches!(&m.pattern, Pattern::Hyperedge(_)),
|
||||
_ => false,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Pattern {
|
||||
/// Get the arity of the pattern (number of nodes involved)
|
||||
pub fn arity(&self) -> usize {
|
||||
match self {
|
||||
Pattern::Node(_) => 1,
|
||||
Pattern::Relationship(_) => 2,
|
||||
Pattern::Path(_) => 2, // Simplified, could be variable
|
||||
Pattern::Hyperedge(h) => h.arity,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Expression {
|
||||
/// Check if expression is constant (no variables)
|
||||
pub fn is_constant(&self) -> bool {
|
||||
match self {
|
||||
Expression::Integer(_)
|
||||
| Expression::Float(_)
|
||||
| Expression::String(_)
|
||||
| Expression::Boolean(_)
|
||||
| Expression::Null => true,
|
||||
Expression::List(items) => items.iter().all(|e| e.is_constant()),
|
||||
Expression::Map(map) => map.values().all(|e| e.is_constant()),
|
||||
Expression::BinaryOp { left, right, .. } => left.is_constant() && right.is_constant(),
|
||||
Expression::UnaryOp { operand, .. } => operand.is_constant(),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if expression contains aggregation
|
||||
pub fn has_aggregation(&self) -> bool {
|
||||
match self {
|
||||
Expression::Aggregation { .. } => true,
|
||||
Expression::BinaryOp { left, right, .. } => {
|
||||
left.has_aggregation() || right.has_aggregation()
|
||||
}
|
||||
Expression::UnaryOp { operand, .. } => operand.has_aggregation(),
|
||||
Expression::FunctionCall { args, .. } => args.iter().any(|e| e.has_aggregation()),
|
||||
Expression::List(items) => items.iter().any(|e| e.has_aggregation()),
|
||||
Expression::Property { object, .. } => object.has_aggregation(),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
616
vendor/ruvector/crates/rvlite/src/cypher/executor.rs
vendored
Normal file
616
vendor/ruvector/crates/rvlite/src/cypher/executor.rs
vendored
Normal file
@@ -0,0 +1,616 @@
|
||||
//! Cypher query executor for in-memory property graph
|
||||
|
||||
use super::ast::*;
|
||||
use super::graph_store::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ExecutionError {
|
||||
#[error("Graph error: {0}")]
|
||||
GraphError(#[from] GraphError),
|
||||
#[error("Variable not found: {0}")]
|
||||
VariableNotFound(String),
|
||||
#[error("Type error: {0}")]
|
||||
TypeError(String),
|
||||
#[error("Unsupported operation: {0}")]
|
||||
UnsupportedOperation(String),
|
||||
#[error("Execution error: {0}")]
|
||||
ExecutionError(String),
|
||||
}
|
||||
|
||||
/// Execution context holding variable bindings
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ExecutionContext {
|
||||
pub variables: HashMap<String, ContextValue>,
|
||||
}
|
||||
|
||||
impl ExecutionContext {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
variables: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn bind(&mut self, name: String, value: ContextValue) {
|
||||
self.variables.insert(name, value);
|
||||
}
|
||||
|
||||
pub fn get(&self, name: &str) -> Option<&ContextValue> {
|
||||
self.variables.get(name)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ExecutionContext {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Value in execution context (node, edge, or property value)
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum ContextValue {
|
||||
Node(Node),
|
||||
Edge(Edge),
|
||||
Value(Value),
|
||||
List(Vec<ContextValue>),
|
||||
Map(HashMap<String, ContextValue>),
|
||||
}
|
||||
|
||||
impl ContextValue {
|
||||
pub fn as_node(&self) -> Option<&Node> {
|
||||
match self {
|
||||
ContextValue::Node(n) => Some(n),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_value(&self) -> Option<&Value> {
|
||||
match self {
|
||||
ContextValue::Value(v) => Some(v),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Query execution result
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ExecutionResult {
|
||||
pub columns: Vec<String>,
|
||||
pub rows: Vec<HashMap<String, ContextValue>>,
|
||||
}
|
||||
|
||||
impl ExecutionResult {
|
||||
pub fn new(columns: Vec<String>) -> Self {
|
||||
Self {
|
||||
columns,
|
||||
rows: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_row(&mut self, row: HashMap<String, ContextValue>) {
|
||||
self.rows.push(row);
|
||||
}
|
||||
}
|
||||
|
||||
/// Cypher query executor
|
||||
pub struct Executor<'a> {
|
||||
graph: &'a mut PropertyGraph,
|
||||
}
|
||||
|
||||
impl<'a> Executor<'a> {
|
||||
pub fn new(graph: &'a mut PropertyGraph) -> Self {
|
||||
Self { graph }
|
||||
}
|
||||
|
||||
/// Execute a parsed Cypher query
|
||||
pub fn execute(&mut self, query: &Query) -> Result<ExecutionResult, ExecutionError> {
|
||||
let mut context = ExecutionContext::new();
|
||||
let mut result = None;
|
||||
|
||||
for statement in &query.statements {
|
||||
result = Some(self.execute_statement(statement, &mut context)?);
|
||||
}
|
||||
|
||||
result.ok_or_else(|| ExecutionError::ExecutionError("No statements to execute".to_string()))
|
||||
}
|
||||
|
||||
fn execute_statement(
|
||||
&mut self,
|
||||
statement: &Statement,
|
||||
context: &mut ExecutionContext,
|
||||
) -> Result<ExecutionResult, ExecutionError> {
|
||||
match statement {
|
||||
Statement::Create(clause) => self.execute_create(clause, context),
|
||||
Statement::Match(clause) => self.execute_match(clause, context),
|
||||
Statement::Return(clause) => self.execute_return(clause, context),
|
||||
Statement::Set(clause) => self.execute_set(clause, context),
|
||||
Statement::Delete(clause) => self.execute_delete(clause, context),
|
||||
_ => Err(ExecutionError::UnsupportedOperation(format!(
|
||||
"Statement {:?} not yet implemented",
|
||||
statement
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
fn execute_create(
|
||||
&mut self,
|
||||
clause: &CreateClause,
|
||||
context: &mut ExecutionContext,
|
||||
) -> Result<ExecutionResult, ExecutionError> {
|
||||
for pattern in &clause.patterns {
|
||||
self.create_pattern(pattern, context)?;
|
||||
}
|
||||
|
||||
Ok(ExecutionResult::new(vec![]))
|
||||
}
|
||||
|
||||
fn create_pattern(
|
||||
&mut self,
|
||||
pattern: &Pattern,
|
||||
context: &mut ExecutionContext,
|
||||
) -> Result<(), ExecutionError> {
|
||||
match pattern {
|
||||
Pattern::Node(node_pattern) => {
|
||||
let node = self.create_node(node_pattern)?;
|
||||
if let Some(var) = &node_pattern.variable {
|
||||
context.bind(var.clone(), ContextValue::Node(node));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Pattern::Relationship(rel_pattern) => {
|
||||
self.create_relationship(rel_pattern, context)?;
|
||||
Ok(())
|
||||
}
|
||||
_ => Err(ExecutionError::UnsupportedOperation(
|
||||
"Only simple node and relationship patterns supported in CREATE".to_string(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn create_node(&mut self, pattern: &NodePattern) -> Result<Node, ExecutionError> {
|
||||
let id = self.graph.generate_node_id();
|
||||
let mut node = Node::new(id).with_labels(pattern.labels.clone());
|
||||
|
||||
// Set properties
|
||||
if let Some(props) = &pattern.properties {
|
||||
for (key, expr) in props {
|
||||
let value = self.evaluate_expression(expr, &ExecutionContext::new())?;
|
||||
node.set_property(key.clone(), value);
|
||||
}
|
||||
}
|
||||
|
||||
let node_id = self.graph.add_node(node.clone());
|
||||
node.id = node_id;
|
||||
Ok(node)
|
||||
}
|
||||
|
||||
fn create_relationship(
|
||||
&mut self,
|
||||
pattern: &RelationshipPattern,
|
||||
context: &mut ExecutionContext,
|
||||
) -> Result<(), ExecutionError> {
|
||||
// Get or create source node
|
||||
let from_node = if let Some(var) = &pattern.from.variable {
|
||||
if let Some(ContextValue::Node(n)) = context.get(var) {
|
||||
n.clone()
|
||||
} else {
|
||||
self.create_node(&pattern.from)?
|
||||
}
|
||||
} else {
|
||||
self.create_node(&pattern.from)?
|
||||
};
|
||||
|
||||
// Get or create target node (only handle simple node targets for now)
|
||||
let to_node = match &*pattern.to {
|
||||
Pattern::Node(node_pattern) => {
|
||||
if let Some(var) = &node_pattern.variable {
|
||||
if let Some(ContextValue::Node(n)) = context.get(var) {
|
||||
n.clone()
|
||||
} else {
|
||||
self.create_node(node_pattern)?
|
||||
}
|
||||
} else {
|
||||
self.create_node(node_pattern)?
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return Err(ExecutionError::UnsupportedOperation(
|
||||
"Complex relationship targets not yet supported".to_string(),
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
// Create the edge
|
||||
let edge_type = pattern
|
||||
.rel_type
|
||||
.clone()
|
||||
.unwrap_or_else(|| "RELATED_TO".to_string());
|
||||
let edge_id = self.graph.generate_edge_id();
|
||||
let mut edge = Edge::new(edge_id, from_node.id.clone(), to_node.id.clone(), edge_type);
|
||||
|
||||
// Set properties
|
||||
if let Some(props) = &pattern.properties {
|
||||
for (key, expr) in props {
|
||||
let value = self.evaluate_expression(expr, context)?;
|
||||
edge.set_property(key.clone(), value);
|
||||
}
|
||||
}
|
||||
|
||||
let edge_id = self.graph.add_edge(edge.clone())?;
|
||||
if let Some(var) = &pattern.variable {
|
||||
edge.id = edge_id;
|
||||
context.bind(var.clone(), ContextValue::Edge(edge));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn execute_match(
|
||||
&mut self,
|
||||
clause: &MatchClause,
|
||||
context: &mut ExecutionContext,
|
||||
) -> Result<ExecutionResult, ExecutionError> {
|
||||
let mut matches = Vec::new();
|
||||
|
||||
for pattern in &clause.patterns {
|
||||
let pattern_matches = self.match_pattern(pattern)?;
|
||||
matches.extend(pattern_matches);
|
||||
}
|
||||
|
||||
// Apply WHERE filter if present
|
||||
if let Some(where_clause) = &clause.where_clause {
|
||||
matches.retain(|ctx| {
|
||||
self.evaluate_condition(&where_clause.condition, ctx)
|
||||
.unwrap_or(false)
|
||||
});
|
||||
}
|
||||
|
||||
// Merge matches into context
|
||||
for match_ctx in matches {
|
||||
for (var, val) in match_ctx.variables {
|
||||
context.bind(var, val);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ExecutionResult::new(vec![]))
|
||||
}
|
||||
|
||||
fn match_pattern(&self, pattern: &Pattern) -> Result<Vec<ExecutionContext>, ExecutionError> {
|
||||
match pattern {
|
||||
Pattern::Node(node_pattern) => self.match_node_pattern(node_pattern),
|
||||
Pattern::Relationship(rel_pattern) => self.match_relationship_pattern(rel_pattern),
|
||||
_ => Err(ExecutionError::UnsupportedOperation(
|
||||
"Pattern type not yet supported in MATCH".to_string(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn match_node_pattern(
|
||||
&self,
|
||||
pattern: &NodePattern,
|
||||
) -> Result<Vec<ExecutionContext>, ExecutionError> {
|
||||
let mut contexts = Vec::new();
|
||||
|
||||
// Find nodes matching labels
|
||||
let candidates: Vec<&Node> = if pattern.labels.is_empty() {
|
||||
self.graph.find_nodes(|_| true)
|
||||
} else {
|
||||
let mut nodes = Vec::new();
|
||||
for label in &pattern.labels {
|
||||
nodes.extend(self.graph.find_nodes_by_label(label));
|
||||
}
|
||||
nodes
|
||||
};
|
||||
|
||||
// Filter by properties
|
||||
for node in candidates {
|
||||
if let Some(props) = &pattern.properties {
|
||||
let mut matches = true;
|
||||
for (key, expr) in props {
|
||||
let expected_value =
|
||||
self.evaluate_expression(expr, &ExecutionContext::new())?;
|
||||
if node.get_property(key) != Some(&expected_value) {
|
||||
matches = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if !matches {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let mut ctx = ExecutionContext::new();
|
||||
if let Some(var) = &pattern.variable {
|
||||
ctx.bind(var.clone(), ContextValue::Node(node.clone()));
|
||||
}
|
||||
contexts.push(ctx);
|
||||
}
|
||||
|
||||
Ok(contexts)
|
||||
}
|
||||
|
||||
fn match_relationship_pattern(
|
||||
&self,
|
||||
pattern: &RelationshipPattern,
|
||||
) -> Result<Vec<ExecutionContext>, ExecutionError> {
|
||||
let mut contexts = Vec::new();
|
||||
|
||||
// Match source nodes
|
||||
let from_contexts = self.match_node_pattern(&pattern.from)?;
|
||||
|
||||
for from_ctx in from_contexts {
|
||||
// Get the source node
|
||||
let from_node = if let Some(var) = &pattern.from.variable {
|
||||
from_ctx
|
||||
.get(var)
|
||||
.and_then(|v| v.as_node())
|
||||
.ok_or_else(|| ExecutionError::VariableNotFound(var.clone()))?
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
|
||||
// Find matching edges
|
||||
let edges = match pattern.direction {
|
||||
Direction::Outgoing => self.graph.get_outgoing_edges(&from_node.id),
|
||||
Direction::Incoming => self.graph.get_incoming_edges(&from_node.id),
|
||||
Direction::Undirected => {
|
||||
let mut all = self.graph.get_outgoing_edges(&from_node.id);
|
||||
all.extend(self.graph.get_incoming_edges(&from_node.id));
|
||||
all
|
||||
}
|
||||
};
|
||||
|
||||
for edge in edges {
|
||||
// Filter by type
|
||||
if let Some(rel_type) = &pattern.rel_type {
|
||||
if &edge.edge_type != rel_type {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Filter by properties
|
||||
if let Some(props) = &pattern.properties {
|
||||
let mut matches = true;
|
||||
for (key, expr) in props {
|
||||
let expected_value =
|
||||
self.evaluate_expression(expr, &ExecutionContext::new())?;
|
||||
if edge.get_property(key) != Some(&expected_value) {
|
||||
matches = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if !matches {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Get target node
|
||||
let to_node_id = if pattern.direction == Direction::Incoming {
|
||||
&edge.from
|
||||
} else {
|
||||
&edge.to
|
||||
};
|
||||
|
||||
if let Some(to_node) = self.graph.get_node(to_node_id) {
|
||||
let mut ctx = from_ctx.clone();
|
||||
if let Some(var) = &pattern.variable {
|
||||
ctx.bind(var.clone(), ContextValue::Edge(edge.clone()));
|
||||
}
|
||||
|
||||
// Bind target node if it's a simple node pattern
|
||||
if let Pattern::Node(to_pattern) = &*pattern.to {
|
||||
if let Some(var) = &to_pattern.variable {
|
||||
ctx.bind(var.clone(), ContextValue::Node(to_node.clone()));
|
||||
}
|
||||
}
|
||||
|
||||
contexts.push(ctx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(contexts)
|
||||
}
|
||||
|
||||
fn execute_return(
|
||||
&self,
|
||||
clause: &ReturnClause,
|
||||
context: &ExecutionContext,
|
||||
) -> Result<ExecutionResult, ExecutionError> {
|
||||
let mut columns = Vec::new();
|
||||
let mut row = HashMap::new();
|
||||
|
||||
for item in &clause.items {
|
||||
let col_name = item
|
||||
.alias
|
||||
.clone()
|
||||
.unwrap_or_else(|| match &item.expression {
|
||||
Expression::Variable(var) => var.clone(),
|
||||
_ => "?column?".to_string(),
|
||||
});
|
||||
|
||||
columns.push(col_name.clone());
|
||||
|
||||
let value = self.evaluate_expression_ctx(&item.expression, context)?;
|
||||
row.insert(col_name, value);
|
||||
}
|
||||
|
||||
let mut result = ExecutionResult::new(columns);
|
||||
result.add_row(row);
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn execute_set(
|
||||
&mut self,
|
||||
clause: &SetClause,
|
||||
context: &ExecutionContext,
|
||||
) -> Result<ExecutionResult, ExecutionError> {
|
||||
for item in &clause.items {
|
||||
match item {
|
||||
SetItem::Property {
|
||||
variable,
|
||||
property,
|
||||
value,
|
||||
} => {
|
||||
let val = self.evaluate_expression(value, context)?;
|
||||
if let Some(ContextValue::Node(node)) = context.get(variable) {
|
||||
if let Some(node_mut) = self.graph.get_node_mut(&node.id) {
|
||||
node_mut.set_property(property.clone(), val);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return Err(ExecutionError::UnsupportedOperation(
|
||||
"Only property SET supported".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ExecutionResult::new(vec![]))
|
||||
}
|
||||
|
||||
fn execute_delete(
|
||||
&mut self,
|
||||
clause: &DeleteClause,
|
||||
context: &ExecutionContext,
|
||||
) -> Result<ExecutionResult, ExecutionError> {
|
||||
for expr in &clause.expressions {
|
||||
if let Expression::Variable(var) = expr {
|
||||
if let Some(ctx_val) = context.get(var) {
|
||||
match ctx_val {
|
||||
ContextValue::Node(node) => {
|
||||
if clause.detach {
|
||||
self.graph.delete_node(&node.id)?;
|
||||
} else {
|
||||
return Err(ExecutionError::ExecutionError(
|
||||
"Cannot delete node with relationships without DETACH"
|
||||
.to_string(),
|
||||
));
|
||||
}
|
||||
}
|
||||
ContextValue::Edge(edge) => {
|
||||
self.graph.delete_edge(&edge.id)?;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ExecutionResult::new(vec![]))
|
||||
}
|
||||
|
||||
fn evaluate_expression(
|
||||
&self,
|
||||
expr: &Expression,
|
||||
context: &ExecutionContext,
|
||||
) -> Result<Value, ExecutionError> {
|
||||
match expr {
|
||||
Expression::Integer(n) => Ok(Value::Integer(*n)),
|
||||
Expression::Float(f) => Ok(Value::Float(*f)),
|
||||
Expression::String(s) => Ok(Value::String(s.clone())),
|
||||
Expression::Boolean(b) => Ok(Value::Boolean(*b)),
|
||||
Expression::Null => Ok(Value::Null),
|
||||
Expression::Variable(var) => {
|
||||
if let Some(ContextValue::Value(v)) = context.get(var) {
|
||||
Ok(v.clone())
|
||||
} else {
|
||||
Err(ExecutionError::VariableNotFound(var.clone()))
|
||||
}
|
||||
}
|
||||
Expression::Property { object, property } => {
|
||||
if let Expression::Variable(var) = &**object {
|
||||
if let Some(ContextValue::Node(node)) = context.get(var) {
|
||||
Ok(node.get_property(property).cloned().unwrap_or(Value::Null))
|
||||
} else {
|
||||
Err(ExecutionError::VariableNotFound(var.clone()))
|
||||
}
|
||||
} else {
|
||||
Err(ExecutionError::UnsupportedOperation(
|
||||
"Nested property access not supported".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
_ => Err(ExecutionError::UnsupportedOperation(format!(
|
||||
"Expression {:?} not yet implemented",
|
||||
expr
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
fn evaluate_expression_ctx(
|
||||
&self,
|
||||
expr: &Expression,
|
||||
context: &ExecutionContext,
|
||||
) -> Result<ContextValue, ExecutionError> {
|
||||
match expr {
|
||||
Expression::Variable(var) => context
|
||||
.get(var)
|
||||
.cloned()
|
||||
.ok_or_else(|| ExecutionError::VariableNotFound(var.clone())),
|
||||
Expression::Property { object, property } => {
|
||||
if let Expression::Variable(var) = &**object {
|
||||
if let Some(ContextValue::Node(node)) = context.get(var) {
|
||||
Ok(ContextValue::Value(
|
||||
node.get_property(property).cloned().unwrap_or(Value::Null),
|
||||
))
|
||||
} else {
|
||||
Err(ExecutionError::VariableNotFound(var.clone()))
|
||||
}
|
||||
} else {
|
||||
Err(ExecutionError::UnsupportedOperation(
|
||||
"Nested property access not supported".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
let val = self.evaluate_expression(expr, context)?;
|
||||
Ok(ContextValue::Value(val))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn evaluate_condition(
|
||||
&self,
|
||||
expr: &Expression,
|
||||
context: &ExecutionContext,
|
||||
) -> Result<bool, ExecutionError> {
|
||||
match expr {
|
||||
Expression::Boolean(b) => Ok(*b),
|
||||
Expression::BinaryOp { left, op, right } => {
|
||||
let left_val = self.evaluate_expression(left, context)?;
|
||||
let right_val = self.evaluate_expression(right, context)?;
|
||||
|
||||
match op {
|
||||
BinaryOperator::Equal => Ok(left_val == right_val),
|
||||
BinaryOperator::NotEqual => Ok(left_val != right_val),
|
||||
BinaryOperator::GreaterThan => {
|
||||
if let (Some(l), Some(r)) = (left_val.as_i64(), right_val.as_i64()) {
|
||||
Ok(l > r)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
BinaryOperator::LessThan => {
|
||||
if let (Some(l), Some(r)) = (left_val.as_i64(), right_val.as_i64()) {
|
||||
Ok(l < r)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
_ => Err(ExecutionError::UnsupportedOperation(format!(
|
||||
"Operator {:?} not implemented",
|
||||
op
|
||||
))),
|
||||
}
|
||||
}
|
||||
_ => Err(ExecutionError::UnsupportedOperation(
|
||||
"Complex conditions not yet supported".to_string(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
438
vendor/ruvector/crates/rvlite/src/cypher/graph_store.rs
vendored
Normal file
438
vendor/ruvector/crates/rvlite/src/cypher/graph_store.rs
vendored
Normal file
@@ -0,0 +1,438 @@
|
||||
//! In-memory property graph storage for WASM-compatible Cypher execution
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use thiserror::Error;
|
||||
|
||||
pub type NodeId = String;
|
||||
pub type EdgeId = String;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum GraphError {
|
||||
#[error("Node not found: {0}")]
|
||||
NodeNotFound(NodeId),
|
||||
#[error("Edge not found: {0}")]
|
||||
EdgeNotFound(EdgeId),
|
||||
#[error("Invalid operation: {0}")]
|
||||
InvalidOperation(String),
|
||||
}
|
||||
|
||||
/// Property value that can be stored in nodes/edges
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum Value {
|
||||
Null,
|
||||
Boolean(bool),
|
||||
Integer(i64),
|
||||
Float(f64),
|
||||
String(String),
|
||||
List(Vec<Value>),
|
||||
Map(HashMap<String, Value>),
|
||||
}
|
||||
|
||||
impl Value {
|
||||
pub fn as_i64(&self) -> Option<i64> {
|
||||
match self {
|
||||
Value::Integer(n) => Some(*n),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_f64(&self) -> Option<f64> {
|
||||
match self {
|
||||
Value::Float(f) => Some(*f),
|
||||
Value::Integer(i) => Some(*i as f64),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> Option<&str> {
|
||||
match self {
|
||||
Value::String(s) => Some(s),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_bool(&self) -> Option<bool> {
|
||||
match self {
|
||||
Value::Boolean(b) => Some(*b),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<bool> for Value {
|
||||
fn from(b: bool) -> Self {
|
||||
Value::Boolean(b)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<i64> for Value {
|
||||
fn from(n: i64) -> Self {
|
||||
Value::Integer(n)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<f64> for Value {
|
||||
fn from(f: f64) -> Self {
|
||||
Value::Float(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for Value {
|
||||
fn from(s: String) -> Self {
|
||||
Value::String(s)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&str> for Value {
|
||||
fn from(s: &str) -> Self {
|
||||
Value::String(s.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// Node in the property graph
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Node {
|
||||
pub id: NodeId,
|
||||
pub labels: Vec<String>,
|
||||
pub properties: HashMap<String, Value>,
|
||||
}
|
||||
|
||||
impl Node {
|
||||
pub fn new(id: NodeId) -> Self {
|
||||
Self {
|
||||
id,
|
||||
labels: Vec::new(),
|
||||
properties: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_label(mut self, label: String) -> Self {
|
||||
self.labels.push(label);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_labels(mut self, labels: Vec<String>) -> Self {
|
||||
self.labels = labels;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_property(mut self, key: String, value: Value) -> Self {
|
||||
self.properties.insert(key, value);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn has_label(&self, label: &str) -> bool {
|
||||
self.labels.iter().any(|l| l == label)
|
||||
}
|
||||
|
||||
pub fn get_property(&self, key: &str) -> Option<&Value> {
|
||||
self.properties.get(key)
|
||||
}
|
||||
|
||||
pub fn set_property(&mut self, key: String, value: Value) {
|
||||
self.properties.insert(key, value);
|
||||
}
|
||||
|
||||
pub fn remove_property(&mut self, key: &str) -> Option<Value> {
|
||||
self.properties.remove(key)
|
||||
}
|
||||
|
||||
pub fn add_label(&mut self, label: String) {
|
||||
if !self.has_label(&label) {
|
||||
self.labels.push(label);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn remove_label(&mut self, label: &str) {
|
||||
self.labels.retain(|l| l != label);
|
||||
}
|
||||
}
|
||||
|
||||
/// Edge/Relationship in the property graph
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Edge {
|
||||
pub id: EdgeId,
|
||||
pub from: NodeId,
|
||||
pub to: NodeId,
|
||||
pub edge_type: String,
|
||||
pub properties: HashMap<String, Value>,
|
||||
}
|
||||
|
||||
impl Edge {
|
||||
pub fn new(id: EdgeId, from: NodeId, to: NodeId, edge_type: String) -> Self {
|
||||
Self {
|
||||
id,
|
||||
from,
|
||||
to,
|
||||
edge_type,
|
||||
properties: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_property(mut self, key: String, value: Value) -> Self {
|
||||
self.properties.insert(key, value);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn get_property(&self, key: &str) -> Option<&Value> {
|
||||
self.properties.get(key)
|
||||
}
|
||||
|
||||
pub fn set_property(&mut self, key: String, value: Value) {
|
||||
self.properties.insert(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
/// In-memory property graph store
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct PropertyGraph {
|
||||
nodes: HashMap<NodeId, Node>,
|
||||
edges: HashMap<EdgeId, Edge>,
|
||||
// Indexes for faster lookups
|
||||
label_index: HashMap<String, Vec<NodeId>>,
|
||||
edge_type_index: HashMap<String, Vec<EdgeId>>,
|
||||
outgoing_edges: HashMap<NodeId, Vec<EdgeId>>,
|
||||
incoming_edges: HashMap<NodeId, Vec<EdgeId>>,
|
||||
next_node_id: usize,
|
||||
next_edge_id: usize,
|
||||
}
|
||||
|
||||
impl PropertyGraph {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
nodes: HashMap::new(),
|
||||
edges: HashMap::new(),
|
||||
label_index: HashMap::new(),
|
||||
edge_type_index: HashMap::new(),
|
||||
outgoing_edges: HashMap::new(),
|
||||
incoming_edges: HashMap::new(),
|
||||
next_node_id: 0,
|
||||
next_edge_id: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate a unique node ID
|
||||
pub fn generate_node_id(&mut self) -> NodeId {
|
||||
let id = format!("n{}", self.next_node_id);
|
||||
self.next_node_id += 1;
|
||||
id
|
||||
}
|
||||
|
||||
/// Generate a unique edge ID
|
||||
pub fn generate_edge_id(&mut self) -> EdgeId {
|
||||
let id = format!("e{}", self.next_edge_id);
|
||||
self.next_edge_id += 1;
|
||||
id
|
||||
}
|
||||
|
||||
/// Add a node to the graph
|
||||
pub fn add_node(&mut self, node: Node) -> NodeId {
|
||||
let id = node.id.clone();
|
||||
|
||||
// Update label index
|
||||
for label in &node.labels {
|
||||
self.label_index
|
||||
.entry(label.clone())
|
||||
.or_insert_with(Vec::new)
|
||||
.push(id.clone());
|
||||
}
|
||||
|
||||
self.nodes.insert(id.clone(), node);
|
||||
id
|
||||
}
|
||||
|
||||
/// Get a node by ID
|
||||
pub fn get_node(&self, id: &NodeId) -> Option<&Node> {
|
||||
self.nodes.get(id)
|
||||
}
|
||||
|
||||
/// Get a mutable reference to a node
|
||||
pub fn get_node_mut(&mut self, id: &NodeId) -> Option<&mut Node> {
|
||||
self.nodes.get_mut(id)
|
||||
}
|
||||
|
||||
/// Find nodes by label
|
||||
pub fn find_nodes_by_label(&self, label: &str) -> Vec<&Node> {
|
||||
if let Some(node_ids) = self.label_index.get(label) {
|
||||
node_ids
|
||||
.iter()
|
||||
.filter_map(|id| self.nodes.get(id))
|
||||
.collect()
|
||||
} else {
|
||||
Vec::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Find all nodes matching a predicate
|
||||
pub fn find_nodes<F>(&self, predicate: F) -> Vec<&Node>
|
||||
where
|
||||
F: Fn(&Node) -> bool,
|
||||
{
|
||||
self.nodes.values().filter(|n| predicate(n)).collect()
|
||||
}
|
||||
|
||||
/// Add an edge to the graph
|
||||
pub fn add_edge(&mut self, edge: Edge) -> Result<EdgeId, GraphError> {
|
||||
// Verify nodes exist
|
||||
if !self.nodes.contains_key(&edge.from) {
|
||||
return Err(GraphError::NodeNotFound(edge.from.clone()));
|
||||
}
|
||||
if !self.nodes.contains_key(&edge.to) {
|
||||
return Err(GraphError::NodeNotFound(edge.to.clone()));
|
||||
}
|
||||
|
||||
let id = edge.id.clone();
|
||||
let from = edge.from.clone();
|
||||
let to = edge.to.clone();
|
||||
let edge_type = edge.edge_type.clone();
|
||||
|
||||
// Update indexes
|
||||
self.edge_type_index
|
||||
.entry(edge_type)
|
||||
.or_insert_with(Vec::new)
|
||||
.push(id.clone());
|
||||
|
||||
self.outgoing_edges
|
||||
.entry(from)
|
||||
.or_insert_with(Vec::new)
|
||||
.push(id.clone());
|
||||
|
||||
self.incoming_edges
|
||||
.entry(to)
|
||||
.or_insert_with(Vec::new)
|
||||
.push(id.clone());
|
||||
|
||||
self.edges.insert(id.clone(), edge);
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
/// Get an edge by ID
|
||||
pub fn get_edge(&self, id: &EdgeId) -> Option<&Edge> {
|
||||
self.edges.get(id)
|
||||
}
|
||||
|
||||
/// Get outgoing edges from a node
|
||||
pub fn get_outgoing_edges(&self, node_id: &NodeId) -> Vec<&Edge> {
|
||||
if let Some(edge_ids) = self.outgoing_edges.get(node_id) {
|
||||
edge_ids
|
||||
.iter()
|
||||
.filter_map(|id| self.edges.get(id))
|
||||
.collect()
|
||||
} else {
|
||||
Vec::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Get incoming edges to a node
|
||||
pub fn get_incoming_edges(&self, node_id: &NodeId) -> Vec<&Edge> {
|
||||
if let Some(edge_ids) = self.incoming_edges.get(node_id) {
|
||||
edge_ids
|
||||
.iter()
|
||||
.filter_map(|id| self.edges.get(id))
|
||||
.collect()
|
||||
} else {
|
||||
Vec::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Get all edges of a specific type
|
||||
pub fn find_edges_by_type(&self, edge_type: &str) -> Vec<&Edge> {
|
||||
if let Some(edge_ids) = self.edge_type_index.get(edge_type) {
|
||||
edge_ids
|
||||
.iter()
|
||||
.filter_map(|id| self.edges.get(id))
|
||||
.collect()
|
||||
} else {
|
||||
Vec::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Delete a node and its connected edges
|
||||
pub fn delete_node(&mut self, id: &NodeId) -> Result<(), GraphError> {
|
||||
let node = self
|
||||
.nodes
|
||||
.remove(id)
|
||||
.ok_or_else(|| GraphError::NodeNotFound(id.clone()))?;
|
||||
|
||||
// Remove from label index
|
||||
for label in &node.labels {
|
||||
if let Some(ids) = self.label_index.get_mut(label) {
|
||||
ids.retain(|nid| nid != id);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove connected edges
|
||||
if let Some(edge_ids) = self.outgoing_edges.remove(id) {
|
||||
for edge_id in edge_ids {
|
||||
self.edges.remove(&edge_id);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(edge_ids) = self.incoming_edges.remove(id) {
|
||||
for edge_id in edge_ids {
|
||||
self.edges.remove(&edge_id);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete an edge
|
||||
pub fn delete_edge(&mut self, id: &EdgeId) -> Result<(), GraphError> {
|
||||
let edge = self
|
||||
.edges
|
||||
.remove(id)
|
||||
.ok_or_else(|| GraphError::EdgeNotFound(id.clone()))?;
|
||||
|
||||
// Remove from type index
|
||||
if let Some(ids) = self.edge_type_index.get_mut(&edge.edge_type) {
|
||||
ids.retain(|eid| eid != id);
|
||||
}
|
||||
|
||||
// Remove from node edge lists
|
||||
if let Some(ids) = self.outgoing_edges.get_mut(&edge.from) {
|
||||
ids.retain(|eid| eid != id);
|
||||
}
|
||||
|
||||
if let Some(ids) = self.incoming_edges.get_mut(&edge.to) {
|
||||
ids.retain(|eid| eid != id);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get statistics about the graph
|
||||
pub fn stats(&self) -> GraphStats {
|
||||
GraphStats {
|
||||
node_count: self.nodes.len(),
|
||||
edge_count: self.edges.len(),
|
||||
label_count: self.label_index.len(),
|
||||
edge_type_count: self.edge_type_index.len(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get all nodes in the graph
|
||||
pub fn all_nodes(&self) -> Vec<&Node> {
|
||||
self.nodes.values().collect()
|
||||
}
|
||||
|
||||
/// Get all edges in the graph
|
||||
pub fn all_edges(&self) -> Vec<&Edge> {
|
||||
self.edges.values().collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for PropertyGraph {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct GraphStats {
|
||||
pub node_count: usize,
|
||||
pub edge_count: usize,
|
||||
pub label_count: usize,
|
||||
pub edge_type_count: usize,
|
||||
}
|
||||
607
vendor/ruvector/crates/rvlite/src/cypher/lexer.rs
vendored
Normal file
607
vendor/ruvector/crates/rvlite/src/cypher/lexer.rs
vendored
Normal file
@@ -0,0 +1,607 @@
|
||||
//! Lexical analyzer (tokenizer) for Cypher query language
|
||||
//!
|
||||
//! Hand-rolled lexer for WASM compatibility - no external dependencies.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt;
|
||||
use std::iter::Peekable;
|
||||
use std::str::Chars;
|
||||
|
||||
/// Token with kind and location information
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct Token {
|
||||
pub kind: TokenKind,
|
||||
pub lexeme: String,
|
||||
pub position: Position,
|
||||
}
|
||||
|
||||
/// Source position for error reporting
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
|
||||
pub struct Position {
|
||||
pub line: usize,
|
||||
pub column: usize,
|
||||
pub offset: usize,
|
||||
}
|
||||
|
||||
/// Token kinds
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum TokenKind {
|
||||
// Keywords
|
||||
Match,
|
||||
OptionalMatch,
|
||||
Where,
|
||||
Return,
|
||||
Create,
|
||||
Merge,
|
||||
Delete,
|
||||
DetachDelete,
|
||||
Set,
|
||||
Remove,
|
||||
With,
|
||||
OrderBy,
|
||||
Limit,
|
||||
Skip,
|
||||
Distinct,
|
||||
As,
|
||||
Asc,
|
||||
Desc,
|
||||
Case,
|
||||
When,
|
||||
Then,
|
||||
Else,
|
||||
End,
|
||||
And,
|
||||
Or,
|
||||
Xor,
|
||||
Not,
|
||||
In,
|
||||
Is,
|
||||
Null,
|
||||
True,
|
||||
False,
|
||||
OnCreate,
|
||||
OnMatch,
|
||||
|
||||
// Identifiers and literals
|
||||
Identifier(String),
|
||||
Integer(i64),
|
||||
Float(f64),
|
||||
String(String),
|
||||
|
||||
// Operators
|
||||
Plus,
|
||||
Minus,
|
||||
Star,
|
||||
Slash,
|
||||
Percent,
|
||||
Caret,
|
||||
Equal,
|
||||
NotEqual,
|
||||
LessThan,
|
||||
LessThanOrEqual,
|
||||
GreaterThan,
|
||||
GreaterThanOrEqual,
|
||||
Arrow, // ->
|
||||
LeftArrow, // <-
|
||||
Dash, // -
|
||||
|
||||
// Delimiters
|
||||
LeftParen,
|
||||
RightParen,
|
||||
LeftBracket,
|
||||
RightBracket,
|
||||
LeftBrace,
|
||||
RightBrace,
|
||||
Comma,
|
||||
Dot,
|
||||
Colon,
|
||||
Semicolon,
|
||||
Pipe,
|
||||
|
||||
// Special
|
||||
DotDot, // ..
|
||||
Eof,
|
||||
}
|
||||
|
||||
impl fmt::Display for TokenKind {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
TokenKind::Identifier(s) => write!(f, "identifier '{}'", s),
|
||||
TokenKind::Integer(n) => write!(f, "integer {}", n),
|
||||
TokenKind::Float(n) => write!(f, "float {}", n),
|
||||
TokenKind::String(s) => write!(f, "string \"{}\"", s),
|
||||
_ => write!(f, "{:?}", self),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Lexer error
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LexerError {
|
||||
pub message: String,
|
||||
pub position: Position,
|
||||
}
|
||||
|
||||
impl fmt::Display for LexerError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"Lexer error at {}:{}: {}",
|
||||
self.position.line, self.position.column, self.message
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for LexerError {}
|
||||
|
||||
/// Hand-rolled Cypher lexer
|
||||
pub struct Lexer<'a> {
|
||||
input: &'a str,
|
||||
chars: Peekable<Chars<'a>>,
|
||||
position: Position,
|
||||
current_offset: usize,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
pub fn new(input: &'a str) -> Self {
|
||||
Self {
|
||||
input,
|
||||
chars: input.chars().peekable(),
|
||||
position: Position {
|
||||
line: 1,
|
||||
column: 1,
|
||||
offset: 0,
|
||||
},
|
||||
current_offset: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn peek(&mut self) -> Option<char> {
|
||||
self.chars.peek().copied()
|
||||
}
|
||||
|
||||
fn advance(&mut self) -> Option<char> {
|
||||
let ch = self.chars.next()?;
|
||||
self.current_offset += ch.len_utf8();
|
||||
if ch == '\n' {
|
||||
self.position.line += 1;
|
||||
self.position.column = 1;
|
||||
} else {
|
||||
self.position.column += 1;
|
||||
}
|
||||
self.position.offset = self.current_offset;
|
||||
Some(ch)
|
||||
}
|
||||
|
||||
fn skip_whitespace(&mut self) {
|
||||
while let Some(ch) = self.peek() {
|
||||
if ch.is_whitespace() {
|
||||
self.advance();
|
||||
} else if ch == '/' && self.lookahead(1) == Some('/') {
|
||||
// Skip line comments
|
||||
while let Some(c) = self.peek() {
|
||||
if c == '\n' {
|
||||
break;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn lookahead(&self, n: usize) -> Option<char> {
|
||||
self.input[self.current_offset..].chars().nth(n)
|
||||
}
|
||||
|
||||
fn make_token(&self, kind: TokenKind, lexeme: &str, start_pos: Position) -> Token {
|
||||
Token {
|
||||
kind,
|
||||
lexeme: lexeme.to_string(),
|
||||
position: start_pos,
|
||||
}
|
||||
}
|
||||
|
||||
fn scan_string(&mut self, quote: char) -> Result<Token, LexerError> {
|
||||
let start = self.position;
|
||||
self.advance(); // consume opening quote
|
||||
let mut value = String::new();
|
||||
|
||||
while let Some(ch) = self.peek() {
|
||||
if ch == quote {
|
||||
self.advance(); // consume closing quote
|
||||
return Ok(self.make_token(TokenKind::String(value.clone()), &value, start));
|
||||
} else if ch == '\\' {
|
||||
self.advance();
|
||||
match self.peek() {
|
||||
Some('n') => {
|
||||
value.push('\n');
|
||||
self.advance();
|
||||
}
|
||||
Some('t') => {
|
||||
value.push('\t');
|
||||
self.advance();
|
||||
}
|
||||
Some('r') => {
|
||||
value.push('\r');
|
||||
self.advance();
|
||||
}
|
||||
Some('\\') => {
|
||||
value.push('\\');
|
||||
self.advance();
|
||||
}
|
||||
Some(c) if c == quote => {
|
||||
value.push(c);
|
||||
self.advance();
|
||||
}
|
||||
_ => value.push('\\'),
|
||||
}
|
||||
} else {
|
||||
value.push(ch);
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
|
||||
Err(LexerError {
|
||||
message: "Unterminated string".to_string(),
|
||||
position: start,
|
||||
})
|
||||
}
|
||||
|
||||
fn scan_number(&mut self) -> Token {
|
||||
let start = self.position;
|
||||
let start_offset = self.current_offset;
|
||||
|
||||
while let Some(ch) = self.peek() {
|
||||
if ch.is_ascii_digit() {
|
||||
self.advance();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for decimal
|
||||
if self.peek() == Some('.')
|
||||
&& self
|
||||
.lookahead(1)
|
||||
.map(|c| c.is_ascii_digit())
|
||||
.unwrap_or(false)
|
||||
{
|
||||
self.advance(); // consume '.'
|
||||
while let Some(ch) = self.peek() {
|
||||
if ch.is_ascii_digit() {
|
||||
self.advance();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
let lexeme = &self.input[start_offset..self.current_offset];
|
||||
let value: f64 = lexeme.parse().unwrap_or(0.0);
|
||||
return self.make_token(TokenKind::Float(value), lexeme, start);
|
||||
}
|
||||
|
||||
// Check for exponent
|
||||
if matches!(self.peek(), Some('e') | Some('E')) {
|
||||
self.advance();
|
||||
if matches!(self.peek(), Some('+') | Some('-')) {
|
||||
self.advance();
|
||||
}
|
||||
while let Some(ch) = self.peek() {
|
||||
if ch.is_ascii_digit() {
|
||||
self.advance();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
let lexeme = &self.input[start_offset..self.current_offset];
|
||||
let value: f64 = lexeme.parse().unwrap_or(0.0);
|
||||
return self.make_token(TokenKind::Float(value), lexeme, start);
|
||||
}
|
||||
|
||||
let lexeme = &self.input[start_offset..self.current_offset];
|
||||
let value: i64 = lexeme.parse().unwrap_or(0);
|
||||
self.make_token(TokenKind::Integer(value), lexeme, start)
|
||||
}
|
||||
|
||||
fn scan_identifier(&mut self) -> Token {
|
||||
let start = self.position;
|
||||
let start_offset = self.current_offset;
|
||||
|
||||
while let Some(ch) = self.peek() {
|
||||
if ch.is_ascii_alphanumeric() || ch == '_' {
|
||||
self.advance();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let lexeme = &self.input[start_offset..self.current_offset];
|
||||
let kind = match lexeme.to_uppercase().as_str() {
|
||||
"MATCH" => TokenKind::Match,
|
||||
"OPTIONAL" if self.peek_keyword("MATCH") => {
|
||||
self.skip_whitespace();
|
||||
self.scan_keyword("MATCH");
|
||||
TokenKind::OptionalMatch
|
||||
}
|
||||
"WHERE" => TokenKind::Where,
|
||||
"RETURN" => TokenKind::Return,
|
||||
"CREATE" => TokenKind::Create,
|
||||
"MERGE" => TokenKind::Merge,
|
||||
"DELETE" => TokenKind::Delete,
|
||||
"DETACH" if self.peek_keyword("DELETE") => {
|
||||
self.skip_whitespace();
|
||||
self.scan_keyword("DELETE");
|
||||
TokenKind::DetachDelete
|
||||
}
|
||||
"SET" => TokenKind::Set,
|
||||
"REMOVE" => TokenKind::Remove,
|
||||
"WITH" => TokenKind::With,
|
||||
"ORDER" if self.peek_keyword("BY") => {
|
||||
self.skip_whitespace();
|
||||
self.scan_keyword("BY");
|
||||
TokenKind::OrderBy
|
||||
}
|
||||
"LIMIT" => TokenKind::Limit,
|
||||
"SKIP" => TokenKind::Skip,
|
||||
"DISTINCT" => TokenKind::Distinct,
|
||||
"AS" => TokenKind::As,
|
||||
"ASC" => TokenKind::Asc,
|
||||
"DESC" => TokenKind::Desc,
|
||||
"CASE" => TokenKind::Case,
|
||||
"WHEN" => TokenKind::When,
|
||||
"THEN" => TokenKind::Then,
|
||||
"ELSE" => TokenKind::Else,
|
||||
"END" => TokenKind::End,
|
||||
"AND" => TokenKind::And,
|
||||
"OR" => TokenKind::Or,
|
||||
"XOR" => TokenKind::Xor,
|
||||
"NOT" => TokenKind::Not,
|
||||
"IN" => TokenKind::In,
|
||||
"IS" => TokenKind::Is,
|
||||
"NULL" => TokenKind::Null,
|
||||
"TRUE" => TokenKind::True,
|
||||
"FALSE" => TokenKind::False,
|
||||
"ON" if self.peek_keyword("CREATE") => {
|
||||
self.skip_whitespace();
|
||||
self.scan_keyword("CREATE");
|
||||
TokenKind::OnCreate
|
||||
}
|
||||
_ if lexeme.to_uppercase() == "ON" && self.peek_keyword("MATCH") => {
|
||||
self.skip_whitespace();
|
||||
self.scan_keyword("MATCH");
|
||||
TokenKind::OnMatch
|
||||
}
|
||||
_ => TokenKind::Identifier(lexeme.to_string()),
|
||||
};
|
||||
|
||||
self.make_token(kind, lexeme, start)
|
||||
}
|
||||
|
||||
fn peek_keyword(&mut self, keyword: &str) -> bool {
|
||||
let saved_offset = self.current_offset;
|
||||
self.skip_whitespace();
|
||||
let remaining = &self.input[self.current_offset..];
|
||||
let matches = remaining.to_uppercase().starts_with(keyword)
|
||||
&& remaining
|
||||
.chars()
|
||||
.nth(keyword.len())
|
||||
.map(|c| !c.is_ascii_alphanumeric() && c != '_')
|
||||
.unwrap_or(true);
|
||||
// Reset position if not consuming
|
||||
if !matches {
|
||||
self.current_offset = saved_offset;
|
||||
self.chars = self.input[saved_offset..].chars().peekable();
|
||||
}
|
||||
matches
|
||||
}
|
||||
|
||||
fn scan_keyword(&mut self, keyword: &str) {
|
||||
for _ in 0..keyword.len() {
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_token(&mut self) -> Result<Token, LexerError> {
|
||||
self.skip_whitespace();
|
||||
|
||||
let start = self.position;
|
||||
|
||||
match self.peek() {
|
||||
None => Ok(self.make_token(TokenKind::Eof, "", start)),
|
||||
Some(ch) => {
|
||||
match ch {
|
||||
// Strings
|
||||
'"' | '\'' => self.scan_string(ch),
|
||||
|
||||
// Numbers
|
||||
'0'..='9' => Ok(self.scan_number()),
|
||||
|
||||
// Identifiers
|
||||
'a'..='z' | 'A'..='Z' | '_' | '$' => Ok(self.scan_identifier()),
|
||||
|
||||
// Backtick-quoted identifiers
|
||||
'`' => {
|
||||
self.advance();
|
||||
let id_start = self.current_offset;
|
||||
while let Some(c) = self.peek() {
|
||||
if c == '`' {
|
||||
break;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
let id = self.input[id_start..self.current_offset].to_string();
|
||||
self.advance(); // consume closing backtick
|
||||
Ok(self.make_token(TokenKind::Identifier(id.clone()), &id, start))
|
||||
}
|
||||
|
||||
// Two-character operators
|
||||
'<' => {
|
||||
self.advance();
|
||||
match self.peek() {
|
||||
Some('=') => {
|
||||
self.advance();
|
||||
Ok(self.make_token(TokenKind::LessThanOrEqual, "<=", start))
|
||||
}
|
||||
Some('>') => {
|
||||
self.advance();
|
||||
Ok(self.make_token(TokenKind::NotEqual, "<>", start))
|
||||
}
|
||||
Some('-') => {
|
||||
self.advance();
|
||||
Ok(self.make_token(TokenKind::LeftArrow, "<-", start))
|
||||
}
|
||||
_ => Ok(self.make_token(TokenKind::LessThan, "<", start)),
|
||||
}
|
||||
}
|
||||
'>' => {
|
||||
self.advance();
|
||||
if self.peek() == Some('=') {
|
||||
self.advance();
|
||||
Ok(self.make_token(TokenKind::GreaterThanOrEqual, ">=", start))
|
||||
} else {
|
||||
Ok(self.make_token(TokenKind::GreaterThan, ">", start))
|
||||
}
|
||||
}
|
||||
'-' => {
|
||||
self.advance();
|
||||
if self.peek() == Some('>') {
|
||||
self.advance();
|
||||
Ok(self.make_token(TokenKind::Arrow, "->", start))
|
||||
} else {
|
||||
Ok(self.make_token(TokenKind::Dash, "-", start))
|
||||
}
|
||||
}
|
||||
'.' => {
|
||||
self.advance();
|
||||
if self.peek() == Some('.') {
|
||||
self.advance();
|
||||
Ok(self.make_token(TokenKind::DotDot, "..", start))
|
||||
} else {
|
||||
Ok(self.make_token(TokenKind::Dot, ".", start))
|
||||
}
|
||||
}
|
||||
'=' => {
|
||||
self.advance();
|
||||
Ok(self.make_token(TokenKind::Equal, "=", start))
|
||||
}
|
||||
|
||||
// Single-character tokens
|
||||
'(' => {
|
||||
self.advance();
|
||||
Ok(self.make_token(TokenKind::LeftParen, "(", start))
|
||||
}
|
||||
')' => {
|
||||
self.advance();
|
||||
Ok(self.make_token(TokenKind::RightParen, ")", start))
|
||||
}
|
||||
'[' => {
|
||||
self.advance();
|
||||
Ok(self.make_token(TokenKind::LeftBracket, "[", start))
|
||||
}
|
||||
']' => {
|
||||
self.advance();
|
||||
Ok(self.make_token(TokenKind::RightBracket, "]", start))
|
||||
}
|
||||
'{' => {
|
||||
self.advance();
|
||||
Ok(self.make_token(TokenKind::LeftBrace, "{", start))
|
||||
}
|
||||
'}' => {
|
||||
self.advance();
|
||||
Ok(self.make_token(TokenKind::RightBrace, "}", start))
|
||||
}
|
||||
',' => {
|
||||
self.advance();
|
||||
Ok(self.make_token(TokenKind::Comma, ",", start))
|
||||
}
|
||||
':' => {
|
||||
self.advance();
|
||||
Ok(self.make_token(TokenKind::Colon, ":", start))
|
||||
}
|
||||
';' => {
|
||||
self.advance();
|
||||
Ok(self.make_token(TokenKind::Semicolon, ";", start))
|
||||
}
|
||||
'|' => {
|
||||
self.advance();
|
||||
Ok(self.make_token(TokenKind::Pipe, "|", start))
|
||||
}
|
||||
'+' => {
|
||||
self.advance();
|
||||
Ok(self.make_token(TokenKind::Plus, "+", start))
|
||||
}
|
||||
'*' => {
|
||||
self.advance();
|
||||
Ok(self.make_token(TokenKind::Star, "*", start))
|
||||
}
|
||||
'/' => {
|
||||
self.advance();
|
||||
Ok(self.make_token(TokenKind::Slash, "/", start))
|
||||
}
|
||||
'%' => {
|
||||
self.advance();
|
||||
Ok(self.make_token(TokenKind::Percent, "%", start))
|
||||
}
|
||||
'^' => {
|
||||
self.advance();
|
||||
Ok(self.make_token(TokenKind::Caret, "^", start))
|
||||
}
|
||||
|
||||
_ => Err(LexerError {
|
||||
message: format!("Unexpected character: '{}'", ch),
|
||||
position: start,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Tokenize a Cypher query string
|
||||
pub fn tokenize(input: &str) -> Result<Vec<Token>, LexerError> {
|
||||
let mut lexer = Lexer::new(input);
|
||||
let mut tokens = Vec::new();
|
||||
|
||||
loop {
|
||||
let token = lexer.next_token()?;
|
||||
let is_eof = token.kind == TokenKind::Eof;
|
||||
tokens.push(token);
|
||||
if is_eof {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(tokens)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_simple_tokens() {
|
||||
let tokens = tokenize("MATCH (n) RETURN n").unwrap();
|
||||
assert_eq!(tokens[0].kind, TokenKind::Match);
|
||||
assert_eq!(tokens[1].kind, TokenKind::LeftParen);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string() {
|
||||
let tokens = tokenize("'hello world'").unwrap();
|
||||
assert_eq!(tokens[0].kind, TokenKind::String("hello world".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_number() {
|
||||
let tokens = tokenize("42 3.14").unwrap();
|
||||
assert_eq!(tokens[0].kind, TokenKind::Integer(42));
|
||||
assert_eq!(tokens[1].kind, TokenKind::Float(3.14));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_relationship() {
|
||||
let tokens = tokenize("(a)-[:KNOWS]->(b)").unwrap();
|
||||
assert!(tokens.iter().any(|t| t.kind == TokenKind::Arrow));
|
||||
}
|
||||
}
|
||||
266
vendor/ruvector/crates/rvlite/src/cypher/mod.rs
vendored
Normal file
266
vendor/ruvector/crates/rvlite/src/cypher/mod.rs
vendored
Normal file
@@ -0,0 +1,266 @@
|
||||
//! Cypher query language parser and execution engine for WASM
|
||||
//!
|
||||
//! This module provides a WASM-compatible Cypher implementation including:
|
||||
//! - Lexical analysis (tokenization)
|
||||
//! - Syntax parsing (AST generation)
|
||||
//! - In-memory property graph storage
|
||||
//! - Query execution engine
|
||||
//!
|
||||
//! Supported operations:
|
||||
//! - CREATE: Create nodes and relationships
|
||||
//! - MATCH: Pattern matching
|
||||
//! - WHERE: Filtering
|
||||
//! - RETURN: Projection
|
||||
//! - SET: Update properties
|
||||
//! - DELETE/DETACH DELETE: Remove nodes and edges
|
||||
|
||||
pub mod ast;
|
||||
pub mod executor;
|
||||
pub mod graph_store;
|
||||
pub mod lexer;
|
||||
pub mod parser;
|
||||
|
||||
pub use ast::{Expression, Pattern, Query, Statement};
|
||||
pub use executor::{ContextValue, ExecutionError, ExecutionResult, Executor};
|
||||
pub use graph_store::{Edge, EdgeId, Node, NodeId, PropertyGraph, Value};
|
||||
pub use lexer::{tokenize, Token, TokenKind};
|
||||
pub use parser::{parse_cypher, ParseError};
|
||||
|
||||
use crate::storage::state::{EdgeState, GraphState, NodeState, PropertyValue};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use wasm_bindgen::prelude::*;
|
||||
|
||||
/// WASM-compatible Cypher engine
|
||||
#[wasm_bindgen]
|
||||
pub struct CypherEngine {
|
||||
graph: PropertyGraph,
|
||||
}
|
||||
|
||||
#[wasm_bindgen]
|
||||
impl CypherEngine {
|
||||
/// Create a new Cypher engine with empty graph
|
||||
#[wasm_bindgen(constructor)]
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
graph: PropertyGraph::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute a Cypher query and return JSON results
|
||||
pub fn execute(&mut self, query: &str) -> Result<JsValue, JsValue> {
|
||||
// Parse the query
|
||||
let ast =
|
||||
parse_cypher(query).map_err(|e| JsValue::from_str(&format!("Parse error: {}", e)))?;
|
||||
|
||||
// Execute the query
|
||||
let mut executor = Executor::new(&mut self.graph);
|
||||
let result = executor
|
||||
.execute(&ast)
|
||||
.map_err(|e| JsValue::from_str(&format!("Execution error: {}", e)))?;
|
||||
|
||||
// Convert to JS value
|
||||
serde_wasm_bindgen::to_value(&result)
|
||||
.map_err(|e| JsValue::from_str(&format!("Serialization error: {}", e)))
|
||||
}
|
||||
|
||||
/// Get graph statistics
|
||||
pub fn stats(&self) -> Result<JsValue, JsValue> {
|
||||
let stats = self.graph.stats();
|
||||
serde_wasm_bindgen::to_value(&stats)
|
||||
.map_err(|e| JsValue::from_str(&format!("Serialization error: {}", e)))
|
||||
}
|
||||
|
||||
/// Clear the graph
|
||||
pub fn clear(&mut self) {
|
||||
self.graph = PropertyGraph::new();
|
||||
}
|
||||
}
|
||||
|
||||
impl CypherEngine {
|
||||
/// Export graph state for persistence
|
||||
pub fn export_state(&self) -> GraphState {
|
||||
let nodes: Vec<NodeState> = self
|
||||
.graph
|
||||
.all_nodes()
|
||||
.into_iter()
|
||||
.map(|n| NodeState {
|
||||
id: n.id.clone(),
|
||||
labels: n.labels.clone(),
|
||||
properties: n
|
||||
.properties
|
||||
.iter()
|
||||
.map(|(k, v)| (k.clone(), value_to_property(v)))
|
||||
.collect(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let edges: Vec<EdgeState> = self
|
||||
.graph
|
||||
.all_edges()
|
||||
.into_iter()
|
||||
.map(|e| EdgeState {
|
||||
id: e.id.clone(),
|
||||
from: e.from.clone(),
|
||||
to: e.to.clone(),
|
||||
edge_type: e.edge_type.clone(),
|
||||
properties: e
|
||||
.properties
|
||||
.iter()
|
||||
.map(|(k, v)| (k.clone(), value_to_property(v)))
|
||||
.collect(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let stats = self.graph.stats();
|
||||
|
||||
GraphState {
|
||||
nodes,
|
||||
edges,
|
||||
next_node_id: stats.node_count,
|
||||
next_edge_id: stats.edge_count,
|
||||
}
|
||||
}
|
||||
|
||||
/// Import state to restore the graph
|
||||
pub fn import_state(&mut self, state: &GraphState) -> Result<(), JsValue> {
|
||||
self.graph = PropertyGraph::new();
|
||||
|
||||
// Import nodes first
|
||||
for node_state in &state.nodes {
|
||||
let mut node = Node::new(node_state.id.clone());
|
||||
for label in &node_state.labels {
|
||||
node = node.with_label(label.clone());
|
||||
}
|
||||
for (key, value) in &node_state.properties {
|
||||
node = node.with_property(key.clone(), property_to_value(value));
|
||||
}
|
||||
self.graph.add_node(node);
|
||||
}
|
||||
|
||||
// Then import edges
|
||||
for edge_state in &state.edges {
|
||||
let mut edge = Edge::new(
|
||||
edge_state.id.clone(),
|
||||
edge_state.from.clone(),
|
||||
edge_state.to.clone(),
|
||||
edge_state.edge_type.clone(),
|
||||
);
|
||||
for (key, value) in &edge_state.properties {
|
||||
edge = edge.with_property(key.clone(), property_to_value(value));
|
||||
}
|
||||
if let Err(e) = self.graph.add_edge(edge) {
|
||||
return Err(JsValue::from_str(&format!("Failed to import edge: {}", e)));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for CypherEngine {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert graph Value to serializable PropertyValue
|
||||
fn value_to_property(v: &Value) -> PropertyValue {
|
||||
match v {
|
||||
Value::Null => PropertyValue::Null,
|
||||
Value::Boolean(b) => PropertyValue::Boolean(*b),
|
||||
Value::Integer(i) => PropertyValue::Integer(*i),
|
||||
Value::Float(f) => PropertyValue::Float(*f),
|
||||
Value::String(s) => PropertyValue::String(s.clone()),
|
||||
Value::List(list) => PropertyValue::List(list.iter().map(value_to_property).collect()),
|
||||
Value::Map(map) => PropertyValue::Map(
|
||||
map.iter()
|
||||
.map(|(k, v)| (k.clone(), value_to_property(v)))
|
||||
.collect(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert PropertyValue back to graph Value
|
||||
fn property_to_value(p: &PropertyValue) -> Value {
|
||||
match p {
|
||||
PropertyValue::Null => Value::Null,
|
||||
PropertyValue::Boolean(b) => Value::Boolean(*b),
|
||||
PropertyValue::Integer(i) => Value::Integer(*i),
|
||||
PropertyValue::Float(f) => Value::Float(*f),
|
||||
PropertyValue::String(s) => Value::String(s.clone()),
|
||||
PropertyValue::List(list) => Value::List(list.iter().map(property_to_value).collect()),
|
||||
PropertyValue::Map(map) => Value::Map(
|
||||
map.iter()
|
||||
.map(|(k, v)| (k.clone(), property_to_value(v)))
|
||||
.collect(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_create_node() {
|
||||
let mut engine = CypherEngine::new();
|
||||
let query = "CREATE (n:Person {name: 'Alice', age: 30})";
|
||||
|
||||
let ast = parse_cypher(query).unwrap();
|
||||
let mut executor = Executor::new(&mut engine.graph);
|
||||
let result = executor.execute(&ast);
|
||||
|
||||
assert!(result.is_ok());
|
||||
assert_eq!(engine.graph.stats().node_count, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_relationship() {
|
||||
let mut engine = CypherEngine::new();
|
||||
let query = "CREATE (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'})";
|
||||
|
||||
let ast = parse_cypher(query).unwrap();
|
||||
let mut executor = Executor::new(&mut engine.graph);
|
||||
let result = executor.execute(&ast);
|
||||
|
||||
assert!(result.is_ok());
|
||||
let stats = engine.graph.stats();
|
||||
assert_eq!(stats.node_count, 2);
|
||||
assert_eq!(stats.edge_count, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_match_nodes() {
|
||||
let mut engine = CypherEngine::new();
|
||||
|
||||
// Create data
|
||||
let create = "CREATE (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'})";
|
||||
let ast = parse_cypher(create).unwrap();
|
||||
let mut executor = Executor::new(&mut engine.graph);
|
||||
executor.execute(&ast).unwrap();
|
||||
|
||||
// Match nodes
|
||||
let match_query = "MATCH (n:Person) RETURN n";
|
||||
let ast = parse_cypher(match_query).unwrap();
|
||||
let mut executor = Executor::new(&mut engine.graph);
|
||||
let result = executor.execute(&ast);
|
||||
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parser() {
|
||||
let queries = vec![
|
||||
"MATCH (n:Person) RETURN n",
|
||||
"CREATE (n:Person {name: 'Alice'})",
|
||||
"MATCH (a)-[r:KNOWS]->(b) RETURN a, r, b",
|
||||
"CREATE (a:Person)-[r:KNOWS]->(b:Person)",
|
||||
];
|
||||
|
||||
for query in queries {
|
||||
let result = parse_cypher(query);
|
||||
assert!(result.is_ok(), "Failed to parse: {}", query);
|
||||
}
|
||||
}
|
||||
}
|
||||
1295
vendor/ruvector/crates/rvlite/src/cypher/parser.rs
vendored
Normal file
1295
vendor/ruvector/crates/rvlite/src/cypher/parser.rs
vendored
Normal file
File diff suppressed because it is too large
Load Diff
888
vendor/ruvector/crates/rvlite/src/lib.rs
vendored
Normal file
888
vendor/ruvector/crates/rvlite/src/lib.rs
vendored
Normal file
@@ -0,0 +1,888 @@
|
||||
//! RvLite - Standalone vector database with SQL, SPARQL, and Cypher
|
||||
//!
|
||||
//! A WASM-compatible vector database powered by RuVector.
|
||||
//!
|
||||
//! # Features
|
||||
//! - Vector storage and similarity search
|
||||
//! - SQL queries with pgvector-compatible syntax
|
||||
//! - SPARQL queries for RDF data
|
||||
//! - Cypher queries for property graphs
|
||||
//! - IndexedDB persistence for browsers
|
||||
//!
|
||||
//! # Example (JavaScript)
|
||||
//! ```javascript
|
||||
//! import init, { RvLite, RvLiteConfig } from './rvlite.js';
|
||||
//!
|
||||
//! await init();
|
||||
//! const config = new RvLiteConfig(384);
|
||||
//! const db = new RvLite(config);
|
||||
//!
|
||||
//! // Insert vectors
|
||||
//! db.insert([0.1, 0.2, 0.3, ...], { label: "test" });
|
||||
//!
|
||||
//! // Search
|
||||
//! const results = db.search([0.1, 0.2, 0.3, ...], 10);
|
||||
//!
|
||||
//! // Cypher queries
|
||||
//! db.cypher("CREATE (n:Person {name: 'Alice'})");
|
||||
//!
|
||||
//! // SPARQL queries
|
||||
//! db.add_triple("<http://example.org/a>", "<http://example.org/knows>", "<http://example.org/b>");
|
||||
//! db.sparql("SELECT ?s WHERE { ?s <http://example.org/knows> ?o }");
|
||||
//!
|
||||
//! // Persistence
|
||||
//! await db.save(); // Save to IndexedDB
|
||||
//! const db2 = await RvLite.load(config); // Load from IndexedDB
|
||||
//! ```
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use wasm_bindgen::prelude::*;
|
||||
use wasm_bindgen_futures::future_to_promise;
|
||||
|
||||
// Import ruvector-core
|
||||
use ruvector_core::types::DbOptions;
|
||||
use ruvector_core::{DistanceMetric, SearchQuery, VectorDB, VectorEntry};
|
||||
|
||||
// Query language modules
|
||||
pub mod cypher;
|
||||
pub mod sparql;
|
||||
pub mod sql;
|
||||
pub mod storage;
|
||||
|
||||
// Re-export storage types
|
||||
pub use storage::{GraphState, RvLiteState, TripleStoreState, VectorState};
|
||||
|
||||
#[wasm_bindgen(start)]
|
||||
pub fn init() {
|
||||
console_error_panic_hook::set_once();
|
||||
web_sys::console::log_1(&"RvLite v0.2.0 - SQL, SPARQL, Cypher + Persistence".into());
|
||||
}
|
||||
|
||||
/// Error type for RvLite
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RvLiteError {
|
||||
pub message: String,
|
||||
pub kind: ErrorKind,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for RvLiteError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{:?}: {}", self.kind, self.message)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for RvLiteError {}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum ErrorKind {
|
||||
VectorError,
|
||||
SqlError,
|
||||
CypherError,
|
||||
SparqlError,
|
||||
StorageError,
|
||||
WasmError,
|
||||
NotImplemented,
|
||||
}
|
||||
|
||||
impl From<ruvector_core::RuvectorError> for RvLiteError {
|
||||
fn from(e: ruvector_core::RuvectorError) -> Self {
|
||||
RvLiteError {
|
||||
message: e.to_string(),
|
||||
kind: ErrorKind::VectorError,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<RvLiteError> for JsValue {
|
||||
fn from(e: RvLiteError) -> Self {
|
||||
serde_wasm_bindgen::to_value(&e).unwrap_or_else(|_| JsValue::from_str(&e.message))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<sparql::SparqlError> for RvLiteError {
|
||||
fn from(e: sparql::SparqlError) -> Self {
|
||||
RvLiteError {
|
||||
message: e.to_string(),
|
||||
kind: ErrorKind::SparqlError,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<sql::ParseError> for RvLiteError {
|
||||
fn from(e: sql::ParseError) -> Self {
|
||||
RvLiteError {
|
||||
message: e.to_string(),
|
||||
kind: ErrorKind::SqlError,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Configuration for RvLite database
|
||||
#[wasm_bindgen]
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
pub struct RvLiteConfig {
|
||||
/// Vector dimensions
|
||||
dimensions: usize,
|
||||
/// Distance metric (euclidean, cosine, dotproduct, manhattan)
|
||||
distance_metric: String,
|
||||
}
|
||||
|
||||
#[wasm_bindgen]
|
||||
impl RvLiteConfig {
|
||||
#[wasm_bindgen(constructor)]
|
||||
pub fn new(dimensions: usize) -> Self {
|
||||
RvLiteConfig {
|
||||
dimensions,
|
||||
distance_metric: "cosine".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Set distance metric (euclidean, cosine, dotproduct, manhattan)
|
||||
pub fn with_distance_metric(mut self, metric: String) -> Self {
|
||||
self.distance_metric = metric;
|
||||
self
|
||||
}
|
||||
|
||||
/// Get dimensions
|
||||
pub fn get_dimensions(&self) -> usize {
|
||||
self.dimensions
|
||||
}
|
||||
|
||||
/// Get distance metric name
|
||||
pub fn get_distance_metric(&self) -> String {
|
||||
self.distance_metric.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl RvLiteConfig {
|
||||
fn to_db_options(&self) -> DbOptions {
|
||||
let metric = match self.distance_metric.to_lowercase().as_str() {
|
||||
"euclidean" => DistanceMetric::Euclidean,
|
||||
"cosine" => DistanceMetric::Cosine,
|
||||
"dotproduct" => DistanceMetric::DotProduct,
|
||||
"manhattan" => DistanceMetric::Manhattan,
|
||||
_ => DistanceMetric::Cosine,
|
||||
};
|
||||
|
||||
DbOptions {
|
||||
dimensions: self.dimensions,
|
||||
distance_metric: metric,
|
||||
storage_path: "memory://".to_string(),
|
||||
hnsw_config: None,
|
||||
quantization: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Main RvLite database
|
||||
#[wasm_bindgen]
|
||||
pub struct RvLite {
|
||||
db: VectorDB,
|
||||
config: RvLiteConfig,
|
||||
cypher_engine: cypher::CypherEngine,
|
||||
sql_engine: sql::SqlEngine,
|
||||
triple_store: sparql::TripleStore,
|
||||
storage: Option<storage::IndexedDBStorage>,
|
||||
}
|
||||
|
||||
#[wasm_bindgen]
|
||||
impl RvLite {
|
||||
/// Create a new RvLite database
|
||||
#[wasm_bindgen(constructor)]
|
||||
pub fn new(config: RvLiteConfig) -> Result<RvLite, JsValue> {
|
||||
let db = VectorDB::new(config.to_db_options()).map_err(|e| RvLiteError::from(e))?;
|
||||
|
||||
Ok(RvLite {
|
||||
db,
|
||||
config,
|
||||
cypher_engine: cypher::CypherEngine::new(),
|
||||
sql_engine: sql::SqlEngine::new(),
|
||||
triple_store: sparql::TripleStore::new(),
|
||||
storage: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Create with default configuration (384 dimensions, cosine similarity)
|
||||
pub fn default() -> Result<RvLite, JsValue> {
|
||||
Self::new(RvLiteConfig::new(384))
|
||||
}
|
||||
|
||||
/// Check if database is ready
|
||||
pub fn is_ready(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
/// Get version string
|
||||
pub fn get_version(&self) -> String {
|
||||
"0.2.0".to_string()
|
||||
}
|
||||
|
||||
/// Get enabled features
|
||||
pub fn get_features(&self) -> Result<JsValue, JsValue> {
|
||||
let features = vec![
|
||||
"core",
|
||||
"vectors",
|
||||
"search",
|
||||
"sql",
|
||||
"sparql",
|
||||
"cypher",
|
||||
"memory-storage",
|
||||
"indexeddb-persistence",
|
||||
];
|
||||
serde_wasm_bindgen::to_value(&features).map_err(|e| JsValue::from_str(&e.to_string()))
|
||||
}
|
||||
|
||||
// ===== Persistence Methods =====
|
||||
|
||||
/// Initialize IndexedDB storage for persistence
|
||||
/// Must be called before save() or load()
|
||||
pub fn init_storage(&mut self) -> js_sys::Promise {
|
||||
let mut storage = storage::IndexedDBStorage::new();
|
||||
|
||||
future_to_promise(async move {
|
||||
storage.init().await?;
|
||||
Ok(JsValue::TRUE)
|
||||
})
|
||||
}
|
||||
|
||||
/// Check if IndexedDB is available in the browser
|
||||
pub fn is_storage_available() -> bool {
|
||||
storage::IndexedDBStorage::is_available()
|
||||
}
|
||||
|
||||
/// Save database state to IndexedDB
|
||||
/// Returns a Promise that resolves when save is complete
|
||||
pub fn save(&self) -> js_sys::Promise {
|
||||
let state = self.export_state();
|
||||
let mut storage = storage::IndexedDBStorage::new();
|
||||
|
||||
future_to_promise(async move {
|
||||
storage.init().await?;
|
||||
storage.save(&state).await?;
|
||||
Ok(JsValue::TRUE)
|
||||
})
|
||||
}
|
||||
|
||||
/// Load database from IndexedDB
|
||||
/// Returns a Promise<RvLite> with the restored database
|
||||
pub fn load(config: RvLiteConfig) -> js_sys::Promise {
|
||||
future_to_promise(async move {
|
||||
let mut storage = storage::IndexedDBStorage::new();
|
||||
storage.init().await?;
|
||||
|
||||
let state = storage.load().await?;
|
||||
|
||||
if let Some(state) = state {
|
||||
// Create new database with restored state
|
||||
let mut rvlite = RvLite::new(config)?;
|
||||
rvlite.import_state(&state)?;
|
||||
Ok(serde_wasm_bindgen::to_value(&"loaded").unwrap())
|
||||
} else {
|
||||
Ok(JsValue::NULL)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Check if saved state exists in IndexedDB
|
||||
pub fn has_saved_state() -> js_sys::Promise {
|
||||
future_to_promise(async move {
|
||||
let mut storage = storage::IndexedDBStorage::new();
|
||||
storage.init().await?;
|
||||
let exists = storage.exists().await?;
|
||||
Ok(JsValue::from_bool(exists))
|
||||
})
|
||||
}
|
||||
|
||||
/// Clear saved state from IndexedDB
|
||||
pub fn clear_storage() -> js_sys::Promise {
|
||||
future_to_promise(async move {
|
||||
let mut storage = storage::IndexedDBStorage::new();
|
||||
storage.init().await?;
|
||||
storage.clear().await?;
|
||||
Ok(JsValue::TRUE)
|
||||
})
|
||||
}
|
||||
|
||||
/// Export database state as JSON (for manual backup)
|
||||
pub fn export_json(&self) -> Result<JsValue, JsValue> {
|
||||
let state = self.export_state();
|
||||
serde_wasm_bindgen::to_value(&state)
|
||||
.map_err(|e| JsValue::from_str(&format!("Export failed: {}", e)))
|
||||
}
|
||||
|
||||
/// Import database state from JSON
|
||||
pub fn import_json(&mut self, json: JsValue) -> Result<(), JsValue> {
|
||||
let state: RvLiteState = serde_wasm_bindgen::from_value(json)
|
||||
.map_err(|e| JsValue::from_str(&format!("Import failed: {}", e)))?;
|
||||
self.import_state(&state)
|
||||
}
|
||||
|
||||
// ===== Vector Operations =====
|
||||
|
||||
/// Insert a vector with optional metadata
|
||||
/// Returns the vector ID
|
||||
pub fn insert(&self, vector: Vec<f32>, metadata: JsValue) -> Result<String, JsValue> {
|
||||
let metadata_map = if metadata.is_null() || metadata.is_undefined() {
|
||||
None
|
||||
} else {
|
||||
Some(
|
||||
serde_wasm_bindgen::from_value::<HashMap<String, serde_json::Value>>(metadata)
|
||||
.map_err(|e| RvLiteError {
|
||||
message: format!("Invalid metadata: {}", e),
|
||||
kind: ErrorKind::WasmError,
|
||||
})?,
|
||||
)
|
||||
};
|
||||
|
||||
let entry = VectorEntry {
|
||||
id: None,
|
||||
vector,
|
||||
metadata: metadata_map,
|
||||
};
|
||||
|
||||
self.db
|
||||
.insert(entry)
|
||||
.map_err(|e| RvLiteError::from(e).into())
|
||||
}
|
||||
|
||||
/// Insert a vector with a specific ID
|
||||
pub fn insert_with_id(
|
||||
&self,
|
||||
id: String,
|
||||
vector: Vec<f32>,
|
||||
metadata: JsValue,
|
||||
) -> Result<(), JsValue> {
|
||||
let metadata_map = if metadata.is_null() || metadata.is_undefined() {
|
||||
None
|
||||
} else {
|
||||
Some(
|
||||
serde_wasm_bindgen::from_value::<HashMap<String, serde_json::Value>>(metadata)
|
||||
.map_err(|e| RvLiteError {
|
||||
message: format!("Invalid metadata: {}", e),
|
||||
kind: ErrorKind::WasmError,
|
||||
})?,
|
||||
)
|
||||
};
|
||||
|
||||
let entry = VectorEntry {
|
||||
id: Some(id),
|
||||
vector,
|
||||
metadata: metadata_map,
|
||||
};
|
||||
|
||||
self.db.insert(entry).map_err(|e| RvLiteError::from(e))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Search for similar vectors
|
||||
pub fn search(&self, query_vector: Vec<f32>, k: usize) -> Result<JsValue, JsValue> {
|
||||
let query = SearchQuery {
|
||||
vector: query_vector,
|
||||
k,
|
||||
filter: None,
|
||||
ef_search: None,
|
||||
};
|
||||
|
||||
let results = self.db.search(query).map_err(|e| RvLiteError::from(e))?;
|
||||
|
||||
serde_wasm_bindgen::to_value(&results).map_err(|e| {
|
||||
RvLiteError {
|
||||
message: format!("Failed to serialize results: {}", e),
|
||||
kind: ErrorKind::WasmError,
|
||||
}
|
||||
.into()
|
||||
})
|
||||
}
|
||||
|
||||
/// Search with metadata filter
|
||||
pub fn search_with_filter(
|
||||
&self,
|
||||
query_vector: Vec<f32>,
|
||||
k: usize,
|
||||
filter: JsValue,
|
||||
) -> Result<JsValue, JsValue> {
|
||||
let filter_map = serde_wasm_bindgen::from_value::<HashMap<String, serde_json::Value>>(
|
||||
filter,
|
||||
)
|
||||
.map_err(|e| RvLiteError {
|
||||
message: format!("Invalid filter: {}", e),
|
||||
kind: ErrorKind::WasmError,
|
||||
})?;
|
||||
|
||||
let query = SearchQuery {
|
||||
vector: query_vector,
|
||||
k,
|
||||
filter: Some(filter_map),
|
||||
ef_search: None,
|
||||
};
|
||||
|
||||
let results = self.db.search(query).map_err(|e| RvLiteError::from(e))?;
|
||||
|
||||
serde_wasm_bindgen::to_value(&results).map_err(|e| {
|
||||
RvLiteError {
|
||||
message: format!("Failed to serialize results: {}", e),
|
||||
kind: ErrorKind::WasmError,
|
||||
}
|
||||
.into()
|
||||
})
|
||||
}
|
||||
|
||||
/// Get a vector by ID
|
||||
pub fn get(&self, id: String) -> Result<JsValue, JsValue> {
|
||||
let entry = self.db.get(&id).map_err(|e| RvLiteError::from(e))?;
|
||||
|
||||
serde_wasm_bindgen::to_value(&entry).map_err(|e| {
|
||||
RvLiteError {
|
||||
message: format!("Failed to serialize entry: {}", e),
|
||||
kind: ErrorKind::WasmError,
|
||||
}
|
||||
.into()
|
||||
})
|
||||
}
|
||||
|
||||
/// Delete a vector by ID
|
||||
pub fn delete(&self, id: String) -> Result<bool, JsValue> {
|
||||
self.db.delete(&id).map_err(|e| RvLiteError::from(e).into())
|
||||
}
|
||||
|
||||
/// Get the number of vectors in the database
|
||||
pub fn len(&self) -> Result<usize, JsValue> {
|
||||
self.db.len().map_err(|e| RvLiteError::from(e).into())
|
||||
}
|
||||
|
||||
/// Check if database is empty
|
||||
pub fn is_empty(&self) -> Result<bool, JsValue> {
|
||||
self.db.is_empty().map_err(|e| RvLiteError::from(e).into())
|
||||
}
|
||||
|
||||
/// Get configuration
|
||||
pub fn get_config(&self) -> Result<JsValue, JsValue> {
|
||||
serde_wasm_bindgen::to_value(&self.config).map_err(|e| {
|
||||
RvLiteError {
|
||||
message: format!("Failed to serialize config: {}", e),
|
||||
kind: ErrorKind::WasmError,
|
||||
}
|
||||
.into()
|
||||
})
|
||||
}
|
||||
|
||||
// ===== SQL Query Methods =====
|
||||
|
||||
/// Execute SQL query
|
||||
///
|
||||
/// Supported syntax:
|
||||
/// - CREATE TABLE vectors (id TEXT PRIMARY KEY, vector VECTOR(384))
|
||||
/// - SELECT * FROM vectors WHERE id = 'x'
|
||||
/// - SELECT id, vector <-> '[...]' AS distance FROM vectors ORDER BY distance LIMIT 10
|
||||
/// - INSERT INTO vectors (id, vector) VALUES ('x', '[...]')
|
||||
/// - DELETE FROM vectors WHERE id = 'x'
|
||||
pub fn sql(&self, query: String) -> Result<JsValue, JsValue> {
|
||||
// Parse SQL
|
||||
let mut parser = sql::SqlParser::new(&query).map_err(|e| RvLiteError {
|
||||
message: e.to_string(),
|
||||
kind: ErrorKind::SqlError,
|
||||
})?;
|
||||
let statement = parser.parse().map_err(|e| RvLiteError {
|
||||
message: e.to_string(),
|
||||
kind: ErrorKind::SqlError,
|
||||
})?;
|
||||
|
||||
// Execute
|
||||
let result = self
|
||||
.sql_engine
|
||||
.execute(statement)
|
||||
.map_err(|e| RvLiteError {
|
||||
message: e.to_string(),
|
||||
kind: ErrorKind::SqlError,
|
||||
})?;
|
||||
|
||||
// Use serde_json + js_sys::JSON::parse for proper serialization
|
||||
// (serde_wasm_bindgen can fail silently on complex enum types)
|
||||
let json_str = serde_json::to_string(&result).map_err(|e| RvLiteError {
|
||||
message: format!("Failed to serialize result: {}", e),
|
||||
kind: ErrorKind::WasmError,
|
||||
})?;
|
||||
|
||||
js_sys::JSON::parse(&json_str).map_err(|e| {
|
||||
RvLiteError {
|
||||
message: format!("Failed to parse JSON: {:?}", e),
|
||||
kind: ErrorKind::WasmError,
|
||||
}
|
||||
.into()
|
||||
})
|
||||
}
|
||||
|
||||
// ===== Cypher Query Methods =====
|
||||
|
||||
/// Execute Cypher query
|
||||
///
|
||||
/// Supported operations:
|
||||
/// - CREATE (n:Label {prop: value})
|
||||
/// - MATCH (n:Label) WHERE n.prop = value RETURN n
|
||||
/// - CREATE (a)-[r:REL]->(b)
|
||||
/// - DELETE n
|
||||
pub fn cypher(&mut self, query: String) -> Result<JsValue, JsValue> {
|
||||
self.cypher_engine.execute(&query)
|
||||
}
|
||||
|
||||
/// Get Cypher graph statistics
|
||||
pub fn cypher_stats(&self) -> Result<JsValue, JsValue> {
|
||||
self.cypher_engine.stats()
|
||||
}
|
||||
|
||||
/// Clear the Cypher graph
|
||||
pub fn cypher_clear(&mut self) {
|
||||
self.cypher_engine.clear();
|
||||
}
|
||||
|
||||
// ===== SPARQL Query Methods =====
|
||||
|
||||
/// Execute SPARQL query
|
||||
///
|
||||
/// Supported operations:
|
||||
/// - SELECT ?s ?p ?o WHERE { ?s ?p ?o }
|
||||
/// - SELECT ?s WHERE { ?s <predicate> ?o }
|
||||
/// - ASK { ?s ?p ?o }
|
||||
pub fn sparql(&self, query: String) -> Result<JsValue, JsValue> {
|
||||
let parsed = sparql::parse_sparql(&query).map_err(|e| RvLiteError {
|
||||
message: format!("SPARQL parse error: {}", e),
|
||||
kind: ErrorKind::SparqlError,
|
||||
})?;
|
||||
|
||||
let result = sparql::execute_sparql(&self.triple_store, &parsed)
|
||||
.map_err(|e| RvLiteError::from(e))?;
|
||||
|
||||
// Convert result to serializable format
|
||||
let serializable = convert_sparql_result(&result);
|
||||
|
||||
// Convert JSON to string and then parse in JS for proper object conversion
|
||||
let json_string = serializable.to_string();
|
||||
let js_obj = js_sys::JSON::parse(&json_string).map_err(|e| RvLiteError {
|
||||
message: format!("Failed to parse JSON: {:?}", e),
|
||||
kind: ErrorKind::WasmError,
|
||||
})?;
|
||||
|
||||
Ok(js_obj)
|
||||
}
|
||||
|
||||
/// Add an RDF triple
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `subject` - Subject IRI or blank node (e.g., "<http://example.org/s>" or "_:b1")
|
||||
/// * `predicate` - Predicate IRI (e.g., "<http://example.org/p>")
|
||||
/// * `object` - Object IRI, blank node, or literal (e.g., "<http://example.org/o>" or '"value"')
|
||||
pub fn add_triple(
|
||||
&self,
|
||||
subject: String,
|
||||
predicate: String,
|
||||
object: String,
|
||||
) -> Result<(), JsValue> {
|
||||
let subj = parse_rdf_term(&subject)?;
|
||||
let pred = parse_iri(&predicate)?;
|
||||
let obj = parse_rdf_term(&object)?;
|
||||
|
||||
let triple = sparql::Triple::new(subj, pred, obj);
|
||||
self.triple_store.insert(triple);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get the number of triples in the store
|
||||
pub fn triple_count(&self) -> usize {
|
||||
self.triple_store.count()
|
||||
}
|
||||
|
||||
/// Clear all triples
|
||||
pub fn clear_triples(&self) {
|
||||
self.triple_store.clear();
|
||||
}
|
||||
}
|
||||
|
||||
// Private impl block for state export/import
|
||||
impl RvLite {
|
||||
/// Export the complete database state
|
||||
fn export_state(&self) -> RvLiteState {
|
||||
use storage::state::*;
|
||||
|
||||
// Get current timestamp
|
||||
let saved_at = js_sys::Date::now() as u64;
|
||||
|
||||
// Export vector state
|
||||
let vector_entries = self
|
||||
.db
|
||||
.keys()
|
||||
.unwrap_or_default()
|
||||
.iter()
|
||||
.filter_map(|id| {
|
||||
self.db
|
||||
.get(id)
|
||||
.ok()
|
||||
.flatten()
|
||||
.map(|entry| storage::state::VectorEntry {
|
||||
id: entry.id.unwrap_or_default(),
|
||||
vector: entry.vector,
|
||||
metadata: entry.metadata,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
let vectors = VectorState {
|
||||
entries: vector_entries,
|
||||
dimensions: self.config.dimensions,
|
||||
distance_metric: self.config.distance_metric.clone(),
|
||||
next_id: 0, // Will be recalculated on load
|
||||
};
|
||||
|
||||
// Export graph state
|
||||
let graph = self.cypher_engine.export_state();
|
||||
|
||||
// Export triple store state
|
||||
let triples = self.export_triple_state();
|
||||
|
||||
// Export SQL schemas (not fully implemented yet)
|
||||
let sql_schemas = Vec::new();
|
||||
|
||||
RvLiteState {
|
||||
version: 1,
|
||||
saved_at,
|
||||
vectors,
|
||||
graph,
|
||||
triples,
|
||||
sql_schemas,
|
||||
}
|
||||
}
|
||||
|
||||
/// Import state into the database
|
||||
fn import_state(&mut self, state: &RvLiteState) -> Result<(), JsValue> {
|
||||
// Import vectors
|
||||
for entry in &state.vectors.entries {
|
||||
let vector_entry = VectorEntry {
|
||||
id: Some(entry.id.clone()),
|
||||
vector: entry.vector.clone(),
|
||||
metadata: entry.metadata.clone(),
|
||||
};
|
||||
self.db
|
||||
.insert(vector_entry)
|
||||
.map_err(|e| RvLiteError::from(e))?;
|
||||
}
|
||||
|
||||
// Import graph
|
||||
self.cypher_engine.import_state(&state.graph)?;
|
||||
|
||||
// Import triples
|
||||
self.import_triple_state(&state.triples)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Export triple store state
|
||||
fn export_triple_state(&self) -> storage::state::TripleStoreState {
|
||||
use storage::state::*;
|
||||
|
||||
let triples: Vec<TripleState> = self
|
||||
.triple_store
|
||||
.all_triples()
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(id, t)| TripleState {
|
||||
id: id as u64,
|
||||
subject: rdf_term_to_state(&t.subject),
|
||||
predicate: t.predicate.0.clone(),
|
||||
object: rdf_term_to_state(&t.object),
|
||||
})
|
||||
.collect();
|
||||
|
||||
TripleStoreState {
|
||||
triples,
|
||||
named_graphs: HashMap::new(),
|
||||
default_graph: Vec::new(),
|
||||
next_id: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Import triple store state
|
||||
fn import_triple_state(&self, state: &storage::state::TripleStoreState) -> Result<(), JsValue> {
|
||||
self.triple_store.clear();
|
||||
|
||||
for triple_state in &state.triples {
|
||||
let subject = state_to_rdf_term(&triple_state.subject)?;
|
||||
let predicate = sparql::Iri::new(&triple_state.predicate);
|
||||
let object = state_to_rdf_term(&triple_state.object)?;
|
||||
|
||||
let triple = sparql::Triple::new(subject, predicate, object);
|
||||
self.triple_store.insert(triple);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to convert RdfTerm to clean JSON value
|
||||
fn term_to_json(term: &sparql::ast::RdfTerm) -> serde_json::Value {
|
||||
use sparql::ast::RdfTerm;
|
||||
match term {
|
||||
RdfTerm::Iri(iri) => serde_json::json!({
|
||||
"type": "iri",
|
||||
"value": iri.as_str()
|
||||
}),
|
||||
RdfTerm::Literal(lit) => {
|
||||
let mut obj = serde_json::Map::new();
|
||||
obj.insert("type".to_string(), serde_json::json!("literal"));
|
||||
obj.insert("value".to_string(), serde_json::json!(lit.value.clone()));
|
||||
if let Some(lang) = &lit.language {
|
||||
obj.insert("language".to_string(), serde_json::json!(lang));
|
||||
}
|
||||
obj.insert(
|
||||
"datatype".to_string(),
|
||||
serde_json::json!(lit.datatype.as_str()),
|
||||
);
|
||||
serde_json::Value::Object(obj)
|
||||
}
|
||||
RdfTerm::BlankNode(id) => serde_json::json!({
|
||||
"type": "bnode",
|
||||
"value": id
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to convert SPARQL result to serializable format
|
||||
fn convert_sparql_result(result: &sparql::executor::QueryResult) -> serde_json::Value {
|
||||
use sparql::executor::QueryResult;
|
||||
|
||||
match result {
|
||||
QueryResult::Select(select_result) => {
|
||||
let bindings: Vec<serde_json::Value> = select_result
|
||||
.bindings
|
||||
.iter()
|
||||
.map(|binding| {
|
||||
let mut obj = serde_json::Map::new();
|
||||
for (var, term) in binding {
|
||||
obj.insert(var.clone(), term_to_json(term));
|
||||
}
|
||||
serde_json::Value::Object(obj)
|
||||
})
|
||||
.collect();
|
||||
|
||||
serde_json::json!({
|
||||
"type": "select",
|
||||
"variables": select_result.variables,
|
||||
"bindings": bindings
|
||||
})
|
||||
}
|
||||
QueryResult::Ask(result) => {
|
||||
serde_json::json!({
|
||||
"type": "ask",
|
||||
"result": result
|
||||
})
|
||||
}
|
||||
QueryResult::Construct(triples) => {
|
||||
let triple_json: Vec<serde_json::Value> = triples
|
||||
.iter()
|
||||
.map(|t| {
|
||||
serde_json::json!({
|
||||
"subject": term_to_json(&t.subject),
|
||||
"predicate": t.predicate.0.clone(),
|
||||
"object": term_to_json(&t.object)
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
serde_json::json!({
|
||||
"type": "construct",
|
||||
"triples": triple_json
|
||||
})
|
||||
}
|
||||
QueryResult::Describe(triples) => {
|
||||
let triple_json: Vec<serde_json::Value> = triples
|
||||
.iter()
|
||||
.map(|t| {
|
||||
serde_json::json!({
|
||||
"subject": term_to_json(&t.subject),
|
||||
"predicate": t.predicate.0.clone(),
|
||||
"object": term_to_json(&t.object)
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
serde_json::json!({
|
||||
"type": "describe",
|
||||
"triples": triple_json
|
||||
})
|
||||
}
|
||||
QueryResult::Update => {
|
||||
serde_json::json!({
|
||||
"type": "update",
|
||||
"success": true
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helper functions for parsing RDF terms
|
||||
fn parse_rdf_term(s: &str) -> Result<sparql::RdfTerm, JsValue> {
|
||||
let s = s.trim();
|
||||
if s.starts_with('<') && s.ends_with('>') {
|
||||
Ok(sparql::RdfTerm::iri(&s[1..s.len() - 1]))
|
||||
} else if s.starts_with("_:") {
|
||||
Ok(sparql::RdfTerm::blank(&s[2..]))
|
||||
} else if s.starts_with('"') {
|
||||
let end = s.rfind('"').unwrap_or(s.len() - 1);
|
||||
let value = &s[1..end];
|
||||
Ok(sparql::RdfTerm::literal(value))
|
||||
} else {
|
||||
Ok(sparql::RdfTerm::literal(s))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_iri(s: &str) -> Result<sparql::Iri, JsValue> {
|
||||
let s = s.trim();
|
||||
if s.starts_with('<') && s.ends_with('>') {
|
||||
Ok(sparql::Iri::new(&s[1..s.len() - 1]))
|
||||
} else {
|
||||
Ok(sparql::Iri::new(s))
|
||||
}
|
||||
}
|
||||
|
||||
// Helper functions for RDF term state conversion
|
||||
fn rdf_term_to_state(term: &sparql::RdfTerm) -> storage::state::RdfTermState {
|
||||
use storage::state::RdfTermState;
|
||||
|
||||
match term {
|
||||
sparql::RdfTerm::Iri(iri) => RdfTermState::Iri {
|
||||
value: iri.0.clone(),
|
||||
},
|
||||
sparql::RdfTerm::Literal(lit) => RdfTermState::Literal {
|
||||
value: lit.value.clone(),
|
||||
datatype: lit.datatype.0.clone(),
|
||||
language: lit.language.clone(),
|
||||
},
|
||||
sparql::RdfTerm::BlankNode(id) => RdfTermState::BlankNode { id: id.clone() },
|
||||
}
|
||||
}
|
||||
|
||||
fn state_to_rdf_term(state: &storage::state::RdfTermState) -> Result<sparql::RdfTerm, JsValue> {
|
||||
use storage::state::RdfTermState;
|
||||
|
||||
match state {
|
||||
RdfTermState::Iri { value } => Ok(sparql::RdfTerm::iri(value)),
|
||||
RdfTermState::Literal {
|
||||
value,
|
||||
datatype: _,
|
||||
language: _,
|
||||
} => Ok(sparql::RdfTerm::literal(value)),
|
||||
RdfTermState::BlankNode { id } => Ok(sparql::RdfTerm::blank(id)),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_config_creation() {
|
||||
let config = RvLiteConfig::new(384);
|
||||
assert_eq!(config.dimensions, 384);
|
||||
assert_eq!(config.distance_metric, "cosine");
|
||||
}
|
||||
}
|
||||
52
vendor/ruvector/crates/rvlite/src/lib_sql.rs
vendored
Normal file
52
vendor/ruvector/crates/rvlite/src/lib_sql.rs
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
// Integration of SQL module into RvLite
|
||||
// This shows the minimal changes needed to lib.rs
|
||||
|
||||
use wasm_bindgen::prelude::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
// Import ruvector-core
|
||||
use ruvector_core::{
|
||||
VectorDB, VectorEntry, SearchQuery,
|
||||
DistanceMetric,
|
||||
};
|
||||
use ruvector_core::types::DbOptions;
|
||||
|
||||
// SQL module
|
||||
pub mod sql;
|
||||
|
||||
// RvLite struct needs to include sql_engine field:
|
||||
// sql_engine: sql::SqlEngine,
|
||||
|
||||
// In RvLite::new(), initialize the SQL engine:
|
||||
// sql_engine: sql::SqlEngine::new(),
|
||||
|
||||
// Replace the sql() method with this implementation:
|
||||
/*
|
||||
/// Execute SQL query
|
||||
pub async fn sql(&self, query: String) -> Result<JsValue, JsValue> {
|
||||
// Parse SQL
|
||||
let mut parser = sql::SqlParser::new(&query)
|
||||
.map_err(|e| RvLiteError {
|
||||
message: format!("SQL parse error: {}", e),
|
||||
kind: ErrorKind::SqlError,
|
||||
})?;
|
||||
|
||||
let statement = parser.parse()
|
||||
.map_err(|e| RvLiteError {
|
||||
message: format!("SQL parse error: {}", e),
|
||||
kind: ErrorKind::SqlError,
|
||||
})?;
|
||||
|
||||
// Execute statement
|
||||
let result = self.sql_engine.execute(statement)
|
||||
.map_err(|e| JsValue::from(e))?;
|
||||
|
||||
// Serialize result
|
||||
serde_wasm_bindgen::to_value(&result)
|
||||
.map_err(|e| RvLiteError {
|
||||
message: format!("Failed to serialize result: {}", e),
|
||||
kind: ErrorKind::WasmError,
|
||||
}.into())
|
||||
}
|
||||
*/
|
||||
907
vendor/ruvector/crates/rvlite/src/sparql/ast.rs
vendored
Normal file
907
vendor/ruvector/crates/rvlite/src/sparql/ast.rs
vendored
Normal file
@@ -0,0 +1,907 @@
|
||||
// SPARQL Abstract Syntax Tree (AST) types
|
||||
//
|
||||
// Provides type-safe representation of SPARQL 1.1 queries following
|
||||
// the W3C specification: https://www.w3.org/TR/sparql11-query/
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Complete SPARQL query or update
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SparqlQuery {
|
||||
/// Base IRI for relative IRI resolution
|
||||
pub base: Option<Iri>,
|
||||
/// PREFIX declarations
|
||||
pub prefixes: HashMap<String, Iri>,
|
||||
/// The query form (SELECT, CONSTRUCT, ASK, DESCRIBE) or update operation
|
||||
pub body: QueryBody,
|
||||
}
|
||||
|
||||
impl SparqlQuery {
|
||||
pub fn new(body: QueryBody) -> Self {
|
||||
Self {
|
||||
base: None,
|
||||
prefixes: HashMap::new(),
|
||||
body,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_base(mut self, base: Iri) -> Self {
|
||||
self.base = Some(base);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_prefix(mut self, prefix: impl Into<String>, iri: Iri) -> Self {
|
||||
self.prefixes.insert(prefix.into(), iri);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for SparqlQuery {
|
||||
fn default() -> Self {
|
||||
Self::new(QueryBody::Select(SelectQuery::default()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Query body - either a query form or update operation
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum QueryBody {
|
||||
Select(SelectQuery),
|
||||
Construct(ConstructQuery),
|
||||
Ask(AskQuery),
|
||||
Describe(DescribeQuery),
|
||||
Update(Vec<UpdateOperation>),
|
||||
}
|
||||
|
||||
/// Query form type
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum QueryForm {
|
||||
Select,
|
||||
Construct,
|
||||
Ask,
|
||||
Describe,
|
||||
}
|
||||
|
||||
/// SELECT query
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SelectQuery {
|
||||
/// Result variables or expressions
|
||||
pub projection: Projection,
|
||||
/// Dataset clauses (FROM, FROM NAMED)
|
||||
pub dataset: Vec<DatasetClause>,
|
||||
/// WHERE clause graph pattern
|
||||
pub where_clause: GraphPattern,
|
||||
/// Solution modifiers
|
||||
pub modifier: SolutionModifier,
|
||||
/// VALUES clause for inline data
|
||||
pub values: Option<ValuesClause>,
|
||||
}
|
||||
|
||||
impl Default for SelectQuery {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
projection: Projection::All,
|
||||
dataset: Vec::new(),
|
||||
where_clause: GraphPattern::Empty,
|
||||
modifier: SolutionModifier::default(),
|
||||
values: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Projection in SELECT clause
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum Projection {
|
||||
/// SELECT * - all variables
|
||||
All,
|
||||
/// SELECT DISTINCT ...
|
||||
Distinct(Vec<ProjectionVar>),
|
||||
/// SELECT REDUCED ...
|
||||
Reduced(Vec<ProjectionVar>),
|
||||
/// SELECT var1 var2 ...
|
||||
Variables(Vec<ProjectionVar>),
|
||||
}
|
||||
|
||||
/// Variable or expression in projection
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ProjectionVar {
|
||||
pub expression: Expression,
|
||||
pub alias: Option<String>,
|
||||
}
|
||||
|
||||
impl ProjectionVar {
|
||||
pub fn variable(name: impl Into<String>) -> Self {
|
||||
Self {
|
||||
expression: Expression::Variable(name.into()),
|
||||
alias: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn expr_as(expr: Expression, alias: impl Into<String>) -> Self {
|
||||
Self {
|
||||
expression: expr,
|
||||
alias: Some(alias.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// CONSTRUCT query
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ConstructQuery {
|
||||
/// Template for constructing triples
|
||||
pub template: Vec<TriplePattern>,
|
||||
/// Dataset clauses
|
||||
pub dataset: Vec<DatasetClause>,
|
||||
/// WHERE clause
|
||||
pub where_clause: GraphPattern,
|
||||
/// Solution modifiers
|
||||
pub modifier: SolutionModifier,
|
||||
}
|
||||
|
||||
impl Default for ConstructQuery {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
template: Vec::new(),
|
||||
dataset: Vec::new(),
|
||||
where_clause: GraphPattern::Empty,
|
||||
modifier: SolutionModifier::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// ASK query
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AskQuery {
|
||||
/// Dataset clauses
|
||||
pub dataset: Vec<DatasetClause>,
|
||||
/// WHERE clause
|
||||
pub where_clause: GraphPattern,
|
||||
}
|
||||
|
||||
impl Default for AskQuery {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
dataset: Vec::new(),
|
||||
where_clause: GraphPattern::Empty,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// DESCRIBE query
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DescribeQuery {
|
||||
/// Resources to describe
|
||||
pub resources: Vec<VarOrIri>,
|
||||
/// Dataset clauses
|
||||
pub dataset: Vec<DatasetClause>,
|
||||
/// Optional WHERE clause
|
||||
pub where_clause: Option<GraphPattern>,
|
||||
}
|
||||
|
||||
impl Default for DescribeQuery {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
resources: Vec::new(),
|
||||
dataset: Vec::new(),
|
||||
where_clause: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Dataset clause (FROM / FROM NAMED)
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DatasetClause {
|
||||
pub iri: Iri,
|
||||
pub named: bool,
|
||||
}
|
||||
|
||||
/// VALUES clause for inline data
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ValuesClause {
|
||||
pub variables: Vec<String>,
|
||||
pub bindings: Vec<Vec<Option<RdfTerm>>>,
|
||||
}
|
||||
|
||||
/// Graph pattern - the WHERE clause body
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum GraphPattern {
|
||||
/// Empty pattern
|
||||
Empty,
|
||||
/// Basic Graph Pattern - set of triple patterns
|
||||
Bgp(Vec<TriplePattern>),
|
||||
/// Join of patterns (implicit AND)
|
||||
Join(Box<GraphPattern>, Box<GraphPattern>),
|
||||
/// Left outer join (OPTIONAL)
|
||||
LeftJoin(Box<GraphPattern>, Box<GraphPattern>, Option<Expression>),
|
||||
/// Union of patterns (UNION)
|
||||
Union(Box<GraphPattern>, Box<GraphPattern>),
|
||||
/// Filter (FILTER)
|
||||
Filter(Box<GraphPattern>, Expression),
|
||||
/// Named graph (GRAPH)
|
||||
Graph(VarOrIri, Box<GraphPattern>),
|
||||
/// Service (FEDERATED query)
|
||||
Service(Iri, Box<GraphPattern>, bool),
|
||||
/// MINUS pattern
|
||||
Minus(Box<GraphPattern>, Box<GraphPattern>),
|
||||
/// EXISTS or NOT EXISTS
|
||||
Exists(Box<GraphPattern>, bool),
|
||||
/// BIND assignment
|
||||
Bind(Expression, String, Box<GraphPattern>),
|
||||
/// GROUP BY aggregation
|
||||
Group(
|
||||
Box<GraphPattern>,
|
||||
Vec<GroupCondition>,
|
||||
Vec<(Aggregate, String)>,
|
||||
),
|
||||
/// Subquery
|
||||
SubSelect(Box<SelectQuery>),
|
||||
/// VALUES inline data
|
||||
Values(ValuesClause),
|
||||
}
|
||||
|
||||
/// Triple pattern
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TriplePattern {
|
||||
pub subject: TermOrVariable,
|
||||
pub predicate: PropertyPath,
|
||||
pub object: TermOrVariable,
|
||||
}
|
||||
|
||||
impl TriplePattern {
|
||||
pub fn new(subject: TermOrVariable, predicate: PropertyPath, object: TermOrVariable) -> Self {
|
||||
Self {
|
||||
subject,
|
||||
predicate,
|
||||
object,
|
||||
}
|
||||
}
|
||||
|
||||
/// Simple triple pattern with IRI predicate
|
||||
pub fn simple(subject: TermOrVariable, predicate: Iri, object: TermOrVariable) -> Self {
|
||||
Self {
|
||||
subject,
|
||||
predicate: PropertyPath::Iri(predicate),
|
||||
object,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Term or variable in triple pattern
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum TermOrVariable {
|
||||
Term(RdfTerm),
|
||||
Variable(String),
|
||||
BlankNode(String),
|
||||
}
|
||||
|
||||
impl TermOrVariable {
|
||||
pub fn var(name: impl Into<String>) -> Self {
|
||||
Self::Variable(name.into())
|
||||
}
|
||||
|
||||
pub fn iri(iri: Iri) -> Self {
|
||||
Self::Term(RdfTerm::Iri(iri))
|
||||
}
|
||||
|
||||
pub fn literal(value: impl Into<String>) -> Self {
|
||||
Self::Term(RdfTerm::Literal(Literal::simple(value)))
|
||||
}
|
||||
|
||||
pub fn blank(id: impl Into<String>) -> Self {
|
||||
Self::BlankNode(id.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// Variable or IRI
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum VarOrIri {
|
||||
Variable(String),
|
||||
Iri(Iri),
|
||||
}
|
||||
|
||||
/// Property path expression
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum PropertyPath {
|
||||
/// Simple IRI predicate
|
||||
Iri(Iri),
|
||||
/// Variable predicate
|
||||
Variable(String),
|
||||
/// Inverse path (^path)
|
||||
Inverse(Box<PropertyPath>),
|
||||
/// Sequence path (path1/path2)
|
||||
Sequence(Box<PropertyPath>, Box<PropertyPath>),
|
||||
/// Alternative path (path1|path2)
|
||||
Alternative(Box<PropertyPath>, Box<PropertyPath>),
|
||||
/// Zero or more (*path)
|
||||
ZeroOrMore(Box<PropertyPath>),
|
||||
/// One or more (+path)
|
||||
OneOrMore(Box<PropertyPath>),
|
||||
/// Zero or one (?path)
|
||||
ZeroOrOne(Box<PropertyPath>),
|
||||
/// Negated property set (!(path1|path2))
|
||||
NegatedPropertySet(Vec<Iri>),
|
||||
/// Fixed length path {n}
|
||||
FixedLength(Box<PropertyPath>, usize),
|
||||
/// Range length path {n,m}
|
||||
RangeLength(Box<PropertyPath>, usize, Option<usize>),
|
||||
}
|
||||
|
||||
/// RDF term
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub enum RdfTerm {
|
||||
/// IRI reference
|
||||
Iri(Iri),
|
||||
/// Literal value
|
||||
Literal(Literal),
|
||||
/// Blank node
|
||||
BlankNode(String),
|
||||
}
|
||||
|
||||
impl RdfTerm {
|
||||
pub fn iri(value: impl Into<String>) -> Self {
|
||||
Self::Iri(Iri::new(value))
|
||||
}
|
||||
|
||||
pub fn literal(value: impl Into<String>) -> Self {
|
||||
Self::Literal(Literal::simple(value))
|
||||
}
|
||||
|
||||
pub fn typed_literal(value: impl Into<String>, datatype: Iri) -> Self {
|
||||
Self::Literal(Literal::typed(value, datatype))
|
||||
}
|
||||
|
||||
pub fn lang_literal(value: impl Into<String>, lang: impl Into<String>) -> Self {
|
||||
Self::Literal(Literal::language(value, lang))
|
||||
}
|
||||
|
||||
pub fn blank(id: impl Into<String>) -> Self {
|
||||
Self::BlankNode(id.into())
|
||||
}
|
||||
|
||||
/// Check if this is an IRI
|
||||
pub fn is_iri(&self) -> bool {
|
||||
matches!(self, Self::Iri(_))
|
||||
}
|
||||
|
||||
/// Check if this is a literal
|
||||
pub fn is_literal(&self) -> bool {
|
||||
matches!(self, Self::Literal(_))
|
||||
}
|
||||
|
||||
/// Check if this is a blank node
|
||||
pub fn is_blank_node(&self) -> bool {
|
||||
matches!(self, Self::BlankNode(_))
|
||||
}
|
||||
}
|
||||
|
||||
/// IRI (Internationalized Resource Identifier)
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub struct Iri(pub String);
|
||||
|
||||
impl Iri {
|
||||
pub fn new(value: impl Into<String>) -> Self {
|
||||
Self(value.into())
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
|
||||
/// Common RDF namespace IRIs
|
||||
pub fn rdf_type() -> Self {
|
||||
Self::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
|
||||
}
|
||||
|
||||
pub fn rdfs_label() -> Self {
|
||||
Self::new("http://www.w3.org/2000/01/rdf-schema#label")
|
||||
}
|
||||
|
||||
pub fn rdfs_comment() -> Self {
|
||||
Self::new("http://www.w3.org/2000/01/rdf-schema#comment")
|
||||
}
|
||||
|
||||
pub fn xsd_string() -> Self {
|
||||
Self::new("http://www.w3.org/2001/XMLSchema#string")
|
||||
}
|
||||
|
||||
pub fn xsd_integer() -> Self {
|
||||
Self::new("http://www.w3.org/2001/XMLSchema#integer")
|
||||
}
|
||||
|
||||
pub fn xsd_decimal() -> Self {
|
||||
Self::new("http://www.w3.org/2001/XMLSchema#decimal")
|
||||
}
|
||||
|
||||
pub fn xsd_double() -> Self {
|
||||
Self::new("http://www.w3.org/2001/XMLSchema#double")
|
||||
}
|
||||
|
||||
pub fn xsd_boolean() -> Self {
|
||||
Self::new("http://www.w3.org/2001/XMLSchema#boolean")
|
||||
}
|
||||
|
||||
pub fn xsd_date() -> Self {
|
||||
Self::new("http://www.w3.org/2001/XMLSchema#date")
|
||||
}
|
||||
|
||||
pub fn xsd_datetime() -> Self {
|
||||
Self::new("http://www.w3.org/2001/XMLSchema#dateTime")
|
||||
}
|
||||
}
|
||||
|
||||
/// RDF Literal
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub struct Literal {
|
||||
/// Lexical form (string value)
|
||||
pub value: String,
|
||||
/// Optional language tag
|
||||
pub language: Option<String>,
|
||||
/// Datatype IRI (defaults to xsd:string)
|
||||
pub datatype: Iri,
|
||||
}
|
||||
|
||||
impl Literal {
|
||||
/// Simple string literal
|
||||
pub fn simple(value: impl Into<String>) -> Self {
|
||||
Self {
|
||||
value: value.into(),
|
||||
language: None,
|
||||
datatype: Iri::xsd_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Typed literal
|
||||
pub fn typed(value: impl Into<String>, datatype: Iri) -> Self {
|
||||
Self {
|
||||
value: value.into(),
|
||||
language: None,
|
||||
datatype,
|
||||
}
|
||||
}
|
||||
|
||||
/// Language-tagged literal
|
||||
pub fn language(value: impl Into<String>, lang: impl Into<String>) -> Self {
|
||||
Self {
|
||||
value: value.into(),
|
||||
language: Some(lang.into()),
|
||||
datatype: Iri::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Integer literal
|
||||
pub fn integer(value: i64) -> Self {
|
||||
Self::typed(value.to_string(), Iri::xsd_integer())
|
||||
}
|
||||
|
||||
/// Decimal literal
|
||||
pub fn decimal(value: f64) -> Self {
|
||||
Self::typed(value.to_string(), Iri::xsd_decimal())
|
||||
}
|
||||
|
||||
/// Double literal
|
||||
pub fn double(value: f64) -> Self {
|
||||
Self::typed(value.to_string(), Iri::xsd_double())
|
||||
}
|
||||
|
||||
/// Boolean literal
|
||||
pub fn boolean(value: bool) -> Self {
|
||||
Self::typed(if value { "true" } else { "false" }, Iri::xsd_boolean())
|
||||
}
|
||||
|
||||
/// Try to parse as integer
|
||||
pub fn as_integer(&self) -> Option<i64> {
|
||||
self.value.parse().ok()
|
||||
}
|
||||
|
||||
/// Try to parse as double
|
||||
pub fn as_double(&self) -> Option<f64> {
|
||||
self.value.parse().ok()
|
||||
}
|
||||
|
||||
/// Try to parse as boolean
|
||||
pub fn as_boolean(&self) -> Option<bool> {
|
||||
match self.value.as_str() {
|
||||
"true" | "1" => Some(true),
|
||||
"false" | "0" => Some(false),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// SPARQL expression
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum Expression {
|
||||
/// Variable reference
|
||||
Variable(String),
|
||||
/// Constant term
|
||||
Term(RdfTerm),
|
||||
/// Binary operation
|
||||
Binary(Box<Expression>, BinaryOp, Box<Expression>),
|
||||
/// Unary operation
|
||||
Unary(UnaryOp, Box<Expression>),
|
||||
/// Function call
|
||||
Function(FunctionCall),
|
||||
/// Aggregate function
|
||||
Aggregate(Aggregate),
|
||||
/// IN expression
|
||||
In(Box<Expression>, Vec<Expression>),
|
||||
/// NOT IN expression
|
||||
NotIn(Box<Expression>, Vec<Expression>),
|
||||
/// EXISTS subquery
|
||||
Exists(Box<GraphPattern>),
|
||||
/// NOT EXISTS subquery
|
||||
NotExists(Box<GraphPattern>),
|
||||
/// Conditional (IF)
|
||||
If(Box<Expression>, Box<Expression>, Box<Expression>),
|
||||
/// COALESCE
|
||||
Coalesce(Vec<Expression>),
|
||||
/// BOUND test
|
||||
Bound(String),
|
||||
/// isIRI test
|
||||
IsIri(Box<Expression>),
|
||||
/// isBlank test
|
||||
IsBlank(Box<Expression>),
|
||||
/// isLiteral test
|
||||
IsLiteral(Box<Expression>),
|
||||
/// isNumeric test
|
||||
IsNumeric(Box<Expression>),
|
||||
/// REGEX pattern matching
|
||||
Regex(Box<Expression>, Box<Expression>, Option<Box<Expression>>),
|
||||
/// LANG function
|
||||
Lang(Box<Expression>),
|
||||
/// DATATYPE function
|
||||
Datatype(Box<Expression>),
|
||||
/// STR function
|
||||
Str(Box<Expression>),
|
||||
/// IRI constructor
|
||||
Iri(Box<Expression>),
|
||||
}
|
||||
|
||||
impl Expression {
|
||||
pub fn var(name: impl Into<String>) -> Self {
|
||||
Self::Variable(name.into())
|
||||
}
|
||||
|
||||
pub fn term(t: RdfTerm) -> Self {
|
||||
Self::Term(t)
|
||||
}
|
||||
|
||||
pub fn literal(value: impl Into<String>) -> Self {
|
||||
Self::Term(RdfTerm::literal(value))
|
||||
}
|
||||
|
||||
pub fn integer(value: i64) -> Self {
|
||||
Self::Term(RdfTerm::Literal(Literal::integer(value)))
|
||||
}
|
||||
|
||||
pub fn binary(left: Expression, op: BinaryOp, right: Expression) -> Self {
|
||||
Self::Binary(Box::new(left), op, Box::new(right))
|
||||
}
|
||||
|
||||
pub fn unary(op: UnaryOp, expr: Expression) -> Self {
|
||||
Self::Unary(op, Box::new(expr))
|
||||
}
|
||||
|
||||
pub fn and(left: Expression, right: Expression) -> Self {
|
||||
Self::binary(left, BinaryOp::And, right)
|
||||
}
|
||||
|
||||
pub fn or(left: Expression, right: Expression) -> Self {
|
||||
Self::binary(left, BinaryOp::Or, right)
|
||||
}
|
||||
|
||||
pub fn eq(left: Expression, right: Expression) -> Self {
|
||||
Self::binary(left, BinaryOp::Eq, right)
|
||||
}
|
||||
|
||||
pub fn neq(left: Expression, right: Expression) -> Self {
|
||||
Self::binary(left, BinaryOp::NotEq, right)
|
||||
}
|
||||
|
||||
pub fn lt(left: Expression, right: Expression) -> Self {
|
||||
Self::binary(left, BinaryOp::Lt, right)
|
||||
}
|
||||
|
||||
pub fn gt(left: Expression, right: Expression) -> Self {
|
||||
Self::binary(left, BinaryOp::Gt, right)
|
||||
}
|
||||
}
|
||||
|
||||
/// Binary operators
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum BinaryOp {
|
||||
// Logical
|
||||
And,
|
||||
Or,
|
||||
// Comparison
|
||||
Eq,
|
||||
NotEq,
|
||||
Lt,
|
||||
LtEq,
|
||||
Gt,
|
||||
GtEq,
|
||||
// Arithmetic
|
||||
Add,
|
||||
Sub,
|
||||
Mul,
|
||||
Div,
|
||||
// String
|
||||
SameTerm,
|
||||
LangMatches,
|
||||
}
|
||||
|
||||
/// Unary operators
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum UnaryOp {
|
||||
Not,
|
||||
Plus,
|
||||
Minus,
|
||||
}
|
||||
|
||||
/// Function call
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct FunctionCall {
|
||||
pub name: String,
|
||||
pub args: Vec<Expression>,
|
||||
}
|
||||
|
||||
impl FunctionCall {
|
||||
pub fn new(name: impl Into<String>, args: Vec<Expression>) -> Self {
|
||||
Self {
|
||||
name: name.into(),
|
||||
args,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Aggregate function
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum Aggregate {
|
||||
Count {
|
||||
expr: Option<Box<Expression>>,
|
||||
distinct: bool,
|
||||
},
|
||||
Sum {
|
||||
expr: Box<Expression>,
|
||||
distinct: bool,
|
||||
},
|
||||
Avg {
|
||||
expr: Box<Expression>,
|
||||
distinct: bool,
|
||||
},
|
||||
Min {
|
||||
expr: Box<Expression>,
|
||||
},
|
||||
Max {
|
||||
expr: Box<Expression>,
|
||||
},
|
||||
GroupConcat {
|
||||
expr: Box<Expression>,
|
||||
separator: Option<String>,
|
||||
distinct: bool,
|
||||
},
|
||||
Sample {
|
||||
expr: Box<Expression>,
|
||||
},
|
||||
}
|
||||
|
||||
/// Filter expression
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Filter {
|
||||
pub expression: Expression,
|
||||
}
|
||||
|
||||
impl Filter {
|
||||
pub fn new(expression: Expression) -> Self {
|
||||
Self { expression }
|
||||
}
|
||||
}
|
||||
|
||||
/// Solution modifier
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct SolutionModifier {
|
||||
pub order_by: Vec<OrderCondition>,
|
||||
pub limit: Option<usize>,
|
||||
pub offset: Option<usize>,
|
||||
pub having: Option<Expression>,
|
||||
}
|
||||
|
||||
impl SolutionModifier {
|
||||
pub fn with_limit(mut self, limit: usize) -> Self {
|
||||
self.limit = Some(limit);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_offset(mut self, offset: usize) -> Self {
|
||||
self.offset = Some(offset);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_order(mut self, conditions: Vec<OrderCondition>) -> Self {
|
||||
self.order_by = conditions;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_having(mut self, expr: Expression) -> Self {
|
||||
self.having = Some(expr);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// ORDER BY condition
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct OrderCondition {
|
||||
pub expression: Expression,
|
||||
pub ascending: bool,
|
||||
}
|
||||
|
||||
impl OrderCondition {
|
||||
pub fn asc(expr: Expression) -> Self {
|
||||
Self {
|
||||
expression: expr,
|
||||
ascending: true,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn desc(expr: Expression) -> Self {
|
||||
Self {
|
||||
expression: expr,
|
||||
ascending: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// GROUP BY condition
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum GroupCondition {
|
||||
Variable(String),
|
||||
Expression(Expression, Option<String>),
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// SPARQL Update Operations
|
||||
// ============================================================================
|
||||
|
||||
/// SPARQL Update operation
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum UpdateOperation {
|
||||
/// INSERT DATA { triples }
|
||||
InsertData(InsertData),
|
||||
/// DELETE DATA { triples }
|
||||
DeleteData(DeleteData),
|
||||
/// DELETE { pattern } INSERT { pattern } WHERE { pattern }
|
||||
Modify(Modify),
|
||||
/// LOAD <iri> INTO GRAPH <iri>
|
||||
Load {
|
||||
source: Iri,
|
||||
destination: Option<Iri>,
|
||||
silent: bool,
|
||||
},
|
||||
/// CLEAR GRAPH <iri>
|
||||
Clear { target: GraphTarget, silent: bool },
|
||||
/// CREATE GRAPH <iri>
|
||||
Create { graph: Iri, silent: bool },
|
||||
/// DROP GRAPH <iri>
|
||||
Drop { target: GraphTarget, silent: bool },
|
||||
/// COPY source TO destination
|
||||
Copy {
|
||||
source: GraphTarget,
|
||||
destination: GraphTarget,
|
||||
silent: bool,
|
||||
},
|
||||
/// MOVE source TO destination
|
||||
Move {
|
||||
source: GraphTarget,
|
||||
destination: GraphTarget,
|
||||
silent: bool,
|
||||
},
|
||||
/// ADD source TO destination
|
||||
Add {
|
||||
source: GraphTarget,
|
||||
destination: GraphTarget,
|
||||
silent: bool,
|
||||
},
|
||||
}
|
||||
|
||||
/// INSERT DATA operation
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct InsertData {
|
||||
pub quads: Vec<Quad>,
|
||||
}
|
||||
|
||||
/// DELETE DATA operation
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DeleteData {
|
||||
pub quads: Vec<Quad>,
|
||||
}
|
||||
|
||||
/// DELETE/INSERT with WHERE
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Modify {
|
||||
pub with_graph: Option<Iri>,
|
||||
pub delete_pattern: Option<Vec<QuadPattern>>,
|
||||
pub insert_pattern: Option<Vec<QuadPattern>>,
|
||||
pub using: Vec<DatasetClause>,
|
||||
pub where_pattern: GraphPattern,
|
||||
}
|
||||
|
||||
/// Quad (triple with optional graph)
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Quad {
|
||||
pub subject: RdfTerm,
|
||||
pub predicate: Iri,
|
||||
pub object: RdfTerm,
|
||||
pub graph: Option<Iri>,
|
||||
}
|
||||
|
||||
/// Quad pattern (for DELETE/INSERT templates)
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct QuadPattern {
|
||||
pub subject: TermOrVariable,
|
||||
pub predicate: VarOrIri,
|
||||
pub object: TermOrVariable,
|
||||
pub graph: Option<VarOrIri>,
|
||||
}
|
||||
|
||||
/// Graph target for management operations
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum GraphTarget {
|
||||
Default,
|
||||
Named(Iri),
|
||||
All,
|
||||
AllNamed,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_rdf_term_creation() {
|
||||
let iri = RdfTerm::iri("http://example.org/resource");
|
||||
assert!(iri.is_iri());
|
||||
|
||||
let lit = RdfTerm::literal("hello");
|
||||
assert!(lit.is_literal());
|
||||
|
||||
let blank = RdfTerm::blank("b0");
|
||||
assert!(blank.is_blank_node());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_literal_parsing() {
|
||||
let int_lit = Literal::integer(42);
|
||||
assert_eq!(int_lit.as_integer(), Some(42));
|
||||
|
||||
let double_lit = Literal::double(3.14);
|
||||
assert!((double_lit.as_double().unwrap() - 3.14).abs() < 0.001);
|
||||
|
||||
let bool_lit = Literal::boolean(true);
|
||||
assert_eq!(bool_lit.as_boolean(), Some(true));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_expression_builder() {
|
||||
let expr = Expression::and(
|
||||
Expression::eq(Expression::var("x"), Expression::integer(10)),
|
||||
Expression::gt(Expression::var("y"), Expression::integer(5)),
|
||||
);
|
||||
|
||||
match expr {
|
||||
Expression::Binary(_, BinaryOp::And, _) => (),
|
||||
_ => panic!("Expected AND expression"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_triple_pattern() {
|
||||
let pattern = TriplePattern::simple(
|
||||
TermOrVariable::var("s"),
|
||||
Iri::rdf_type(),
|
||||
TermOrVariable::iri(Iri::new("http://example.org/Person")),
|
||||
);
|
||||
|
||||
assert!(matches!(pattern.subject, TermOrVariable::Variable(_)));
|
||||
assert!(matches!(pattern.predicate, PropertyPath::Iri(_)));
|
||||
}
|
||||
}
|
||||
928
vendor/ruvector/crates/rvlite/src/sparql/executor.rs
vendored
Normal file
928
vendor/ruvector/crates/rvlite/src/sparql/executor.rs
vendored
Normal file
@@ -0,0 +1,928 @@
|
||||
// SPARQL Query Executor for WASM
|
||||
//
|
||||
// Executes parsed SPARQL queries against an in-memory triple store.
|
||||
// Simplified version for WASM environments (no async, no complex aggregates).
|
||||
|
||||
use super::ast::*;
|
||||
use super::triple_store::{Triple, TripleStore};
|
||||
use super::{SparqlError, SparqlResult};
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Static empty HashMap for default prefixes
|
||||
static EMPTY_PREFIXES: once_cell::sync::Lazy<HashMap<String, Iri>> =
|
||||
once_cell::sync::Lazy::new(HashMap::new);
|
||||
|
||||
/// Solution binding - maps variables to RDF terms
|
||||
pub type Binding = HashMap<String, RdfTerm>;
|
||||
|
||||
/// Solution sequence - list of bindings
|
||||
pub type Solutions = Vec<Binding>;
|
||||
|
||||
/// Execution context for SPARQL queries
|
||||
pub struct SparqlContext<'a> {
|
||||
pub store: &'a TripleStore,
|
||||
pub base: Option<&'a Iri>,
|
||||
pub prefixes: &'a HashMap<String, Iri>,
|
||||
}
|
||||
|
||||
impl<'a> SparqlContext<'a> {
|
||||
pub fn new(store: &'a TripleStore) -> Self {
|
||||
Self {
|
||||
store,
|
||||
base: None,
|
||||
prefixes: &EMPTY_PREFIXES,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_base(mut self, base: Option<&'a Iri>) -> Self {
|
||||
self.base = base;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_prefixes(mut self, prefixes: &'a HashMap<String, Iri>) -> Self {
|
||||
self.prefixes = prefixes;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute a SPARQL query
|
||||
pub fn execute_sparql(store: &TripleStore, query: &SparqlQuery) -> SparqlResult<QueryResult> {
|
||||
let mut ctx = SparqlContext::new(store)
|
||||
.with_base(query.base.as_ref())
|
||||
.with_prefixes(&query.prefixes);
|
||||
|
||||
match &query.body {
|
||||
QueryBody::Select(select) => {
|
||||
let solutions = execute_select(&mut ctx, select)?;
|
||||
Ok(QueryResult::Select(solutions))
|
||||
}
|
||||
QueryBody::Construct(construct) => {
|
||||
let triples = execute_construct(&mut ctx, construct)?;
|
||||
Ok(QueryResult::Construct(triples))
|
||||
}
|
||||
QueryBody::Ask(ask) => {
|
||||
let result = execute_ask(&mut ctx, ask)?;
|
||||
Ok(QueryResult::Ask(result))
|
||||
}
|
||||
QueryBody::Describe(describe) => {
|
||||
let triples = execute_describe(&mut ctx, describe)?;
|
||||
Ok(QueryResult::Describe(triples))
|
||||
}
|
||||
QueryBody::Update(ops) => {
|
||||
for op in ops {
|
||||
execute_update(&mut ctx, op)?;
|
||||
}
|
||||
Ok(QueryResult::Update)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Query result types
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum QueryResult {
|
||||
Select(SelectResult),
|
||||
Construct(Vec<Triple>),
|
||||
Ask(bool),
|
||||
Describe(Vec<Triple>),
|
||||
Update,
|
||||
}
|
||||
|
||||
/// SELECT query result
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SelectResult {
|
||||
pub variables: Vec<String>,
|
||||
pub bindings: Solutions,
|
||||
}
|
||||
|
||||
impl SelectResult {
|
||||
pub fn new(variables: Vec<String>, bindings: Solutions) -> Self {
|
||||
Self {
|
||||
variables,
|
||||
bindings,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// SELECT Query Execution
|
||||
// ============================================================================
|
||||
|
||||
fn execute_select(ctx: &mut SparqlContext, query: &SelectQuery) -> SparqlResult<SelectResult> {
|
||||
// Evaluate WHERE clause
|
||||
let mut solutions = evaluate_graph_pattern(ctx, &query.where_clause)?;
|
||||
|
||||
// Apply solution modifiers
|
||||
solutions = apply_modifiers(solutions, &query.modifier)?;
|
||||
|
||||
// Project variables
|
||||
let (variables, bindings) = project_solutions(&query.projection, solutions)?;
|
||||
|
||||
Ok(SelectResult {
|
||||
variables,
|
||||
bindings,
|
||||
})
|
||||
}
|
||||
|
||||
fn project_solutions(
|
||||
projection: &Projection,
|
||||
solutions: Solutions,
|
||||
) -> SparqlResult<(Vec<String>, Solutions)> {
|
||||
match projection {
|
||||
Projection::All => {
|
||||
// Get all unique variables
|
||||
let mut vars: Vec<String> = Vec::new();
|
||||
for binding in &solutions {
|
||||
for var in binding.keys() {
|
||||
if !vars.contains(var) {
|
||||
vars.push(var.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
vars.sort();
|
||||
Ok((vars, solutions))
|
||||
}
|
||||
Projection::Variables(vars) | Projection::Distinct(vars) | Projection::Reduced(vars) => {
|
||||
let var_names: Vec<String> = vars
|
||||
.iter()
|
||||
.map(|v| {
|
||||
v.alias.clone().unwrap_or_else(|| {
|
||||
if let Expression::Variable(name) = &v.expression {
|
||||
name.clone()
|
||||
} else {
|
||||
"_expr".to_string()
|
||||
}
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut projected: Solutions = Vec::new();
|
||||
|
||||
for binding in solutions {
|
||||
let mut new_binding = Binding::new();
|
||||
|
||||
for (i, pv) in vars.iter().enumerate() {
|
||||
if let Some(value) = evaluate_expression(&pv.expression, &binding)? {
|
||||
new_binding.insert(var_names[i].clone(), value);
|
||||
}
|
||||
}
|
||||
|
||||
// For DISTINCT, check if this binding already exists
|
||||
if matches!(projection, Projection::Distinct(_)) {
|
||||
if !projected.iter().any(|b| bindings_equal(b, &new_binding)) {
|
||||
projected.push(new_binding);
|
||||
}
|
||||
} else {
|
||||
projected.push(new_binding);
|
||||
}
|
||||
}
|
||||
|
||||
Ok((var_names, projected))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn bindings_equal(a: &Binding, b: &Binding) -> bool {
|
||||
if a.len() != b.len() {
|
||||
return false;
|
||||
}
|
||||
a.iter().all(|(k, v)| b.get(k) == Some(v))
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Graph Pattern Evaluation
|
||||
// ============================================================================
|
||||
|
||||
fn evaluate_graph_pattern(ctx: &SparqlContext, pattern: &GraphPattern) -> SparqlResult<Solutions> {
|
||||
match pattern {
|
||||
GraphPattern::Empty => Ok(vec![Binding::new()]),
|
||||
|
||||
GraphPattern::Bgp(triples) => evaluate_bgp(ctx, triples),
|
||||
|
||||
GraphPattern::Join(left, right) => {
|
||||
let left_solutions = evaluate_graph_pattern(ctx, left)?;
|
||||
let right_solutions = evaluate_graph_pattern(ctx, right)?;
|
||||
join_solutions(left_solutions, right_solutions)
|
||||
}
|
||||
|
||||
GraphPattern::LeftJoin(left, right, condition) => {
|
||||
let left_solutions = evaluate_graph_pattern(ctx, left)?;
|
||||
let right_solutions = evaluate_graph_pattern(ctx, right)?;
|
||||
left_join_solutions(left_solutions, right_solutions, condition.as_ref())
|
||||
}
|
||||
|
||||
GraphPattern::Union(left, right) => {
|
||||
let mut left_solutions = evaluate_graph_pattern(ctx, left)?;
|
||||
let right_solutions = evaluate_graph_pattern(ctx, right)?;
|
||||
left_solutions.extend(right_solutions);
|
||||
Ok(left_solutions)
|
||||
}
|
||||
|
||||
GraphPattern::Filter(inner, condition) => {
|
||||
let solutions = evaluate_graph_pattern(ctx, inner)?;
|
||||
filter_solutions(solutions, condition)
|
||||
}
|
||||
|
||||
GraphPattern::Minus(left, right) => {
|
||||
let left_solutions = evaluate_graph_pattern(ctx, left)?;
|
||||
let right_solutions = evaluate_graph_pattern(ctx, right)?;
|
||||
minus_solutions(left_solutions, right_solutions)
|
||||
}
|
||||
|
||||
GraphPattern::Bind(expr, var, inner) => {
|
||||
let mut solutions = evaluate_graph_pattern(ctx, inner)?;
|
||||
for binding in &mut solutions {
|
||||
if let Some(value) = evaluate_expression(expr, binding)? {
|
||||
binding.insert(var.clone(), value);
|
||||
}
|
||||
}
|
||||
Ok(solutions)
|
||||
}
|
||||
|
||||
GraphPattern::Values(values) => {
|
||||
let mut solutions = Vec::new();
|
||||
for row in &values.bindings {
|
||||
let mut binding = Binding::new();
|
||||
for (i, var) in values.variables.iter().enumerate() {
|
||||
if let Some(Some(term)) = row.get(i) {
|
||||
binding.insert(var.clone(), term.clone());
|
||||
}
|
||||
}
|
||||
solutions.push(binding);
|
||||
}
|
||||
Ok(solutions)
|
||||
}
|
||||
|
||||
_ => Err(SparqlError::UnsupportedOperation(format!(
|
||||
"Graph pattern not supported in WASM build: {:?}",
|
||||
pattern
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
fn evaluate_bgp(ctx: &SparqlContext, patterns: &[TriplePattern]) -> SparqlResult<Solutions> {
|
||||
let mut solutions = vec![Binding::new()];
|
||||
|
||||
for pattern in patterns {
|
||||
let mut new_solutions = Vec::new();
|
||||
|
||||
for binding in &solutions {
|
||||
let matches = match_triple_pattern(ctx, pattern, binding)?;
|
||||
new_solutions.extend(matches);
|
||||
}
|
||||
|
||||
solutions = new_solutions;
|
||||
|
||||
if solutions.is_empty() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(solutions)
|
||||
}
|
||||
|
||||
fn match_triple_pattern(
|
||||
ctx: &SparqlContext,
|
||||
pattern: &TriplePattern,
|
||||
binding: &Binding,
|
||||
) -> SparqlResult<Solutions> {
|
||||
// Resolve pattern components
|
||||
let subject = resolve_term_or_var(&pattern.subject, binding);
|
||||
let object = resolve_term_or_var(&pattern.object, binding);
|
||||
|
||||
// Handle simple IRI predicate (most common case)
|
||||
if let PropertyPath::Iri(iri) = &pattern.predicate {
|
||||
return match_simple_triple(
|
||||
ctx,
|
||||
subject,
|
||||
Some(iri),
|
||||
object,
|
||||
&pattern.subject,
|
||||
&pattern.object,
|
||||
binding,
|
||||
);
|
||||
}
|
||||
|
||||
// For now, only support simple IRI predicates in WASM
|
||||
Err(SparqlError::PropertyPathError(
|
||||
"Complex property paths not yet supported in WASM build".to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
fn resolve_term_or_var(tov: &TermOrVariable, binding: &Binding) -> Option<RdfTerm> {
|
||||
match tov {
|
||||
TermOrVariable::Term(t) => Some(t.clone()),
|
||||
TermOrVariable::Variable(v) => binding.get(v).cloned(),
|
||||
TermOrVariable::BlankNode(id) => Some(RdfTerm::BlankNode(id.clone())),
|
||||
}
|
||||
}
|
||||
|
||||
fn match_simple_triple(
|
||||
ctx: &SparqlContext,
|
||||
subject: Option<RdfTerm>,
|
||||
predicate: Option<&Iri>,
|
||||
object: Option<RdfTerm>,
|
||||
subj_pattern: &TermOrVariable,
|
||||
obj_pattern: &TermOrVariable,
|
||||
binding: &Binding,
|
||||
) -> SparqlResult<Solutions> {
|
||||
let triples = ctx
|
||||
.store
|
||||
.query(subject.as_ref(), predicate, object.as_ref());
|
||||
|
||||
let mut solutions = Vec::new();
|
||||
|
||||
for triple in triples {
|
||||
let mut new_binding = binding.clone();
|
||||
let mut matches = true;
|
||||
|
||||
// Bind subject variable
|
||||
if let TermOrVariable::Variable(var) = subj_pattern {
|
||||
if let Some(existing) = new_binding.get(var) {
|
||||
if existing != &triple.subject {
|
||||
matches = false;
|
||||
}
|
||||
} else {
|
||||
new_binding.insert(var.clone(), triple.subject.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// Bind object variable
|
||||
if matches {
|
||||
if let TermOrVariable::Variable(var) = obj_pattern {
|
||||
if let Some(existing) = new_binding.get(var) {
|
||||
if existing != &triple.object {
|
||||
matches = false;
|
||||
}
|
||||
} else {
|
||||
new_binding.insert(var.clone(), triple.object.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if matches {
|
||||
solutions.push(new_binding);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(solutions)
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Solution Operations
|
||||
// ============================================================================
|
||||
|
||||
fn join_solutions(left: Solutions, right: Solutions) -> SparqlResult<Solutions> {
|
||||
if left.is_empty() || right.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let mut result = Vec::new();
|
||||
|
||||
for l in &left {
|
||||
for r in &right {
|
||||
if let Some(merged) = merge_bindings(l, r) {
|
||||
result.push(merged);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn left_join_solutions(
|
||||
left: Solutions,
|
||||
right: Solutions,
|
||||
condition: Option<&Expression>,
|
||||
) -> SparqlResult<Solutions> {
|
||||
let mut result = Vec::new();
|
||||
|
||||
for l in &left {
|
||||
let mut found_match = false;
|
||||
|
||||
for r in &right {
|
||||
if let Some(merged) = merge_bindings(l, r) {
|
||||
// Check condition if present
|
||||
let include = if let Some(cond) = condition {
|
||||
evaluate_expression_as_bool(cond, &merged)?
|
||||
} else {
|
||||
true
|
||||
};
|
||||
|
||||
if include {
|
||||
result.push(merged);
|
||||
found_match = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !found_match {
|
||||
result.push(l.clone());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn minus_solutions(left: Solutions, right: Solutions) -> SparqlResult<Solutions> {
|
||||
let mut result = Vec::new();
|
||||
|
||||
for l in &left {
|
||||
let mut has_compatible = false;
|
||||
|
||||
for r in &right {
|
||||
if bindings_compatible(l, r) {
|
||||
has_compatible = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if !has_compatible {
|
||||
result.push(l.clone());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn merge_bindings(a: &Binding, b: &Binding) -> Option<Binding> {
|
||||
let mut result = a.clone();
|
||||
|
||||
for (k, v) in b {
|
||||
if let Some(existing) = result.get(k) {
|
||||
if existing != v {
|
||||
return None;
|
||||
}
|
||||
} else {
|
||||
result.insert(k.clone(), v.clone());
|
||||
}
|
||||
}
|
||||
|
||||
Some(result)
|
||||
}
|
||||
|
||||
fn bindings_compatible(a: &Binding, b: &Binding) -> bool {
|
||||
for (k, v) in a {
|
||||
if let Some(bv) = b.get(k) {
|
||||
if v != bv {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
fn filter_solutions(solutions: Solutions, condition: &Expression) -> SparqlResult<Solutions> {
|
||||
let mut result = Vec::new();
|
||||
|
||||
for binding in solutions {
|
||||
if evaluate_expression_as_bool(condition, &binding)? {
|
||||
result.push(binding);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Solution Modifiers
|
||||
// ============================================================================
|
||||
|
||||
fn apply_modifiers(
|
||||
mut solutions: Solutions,
|
||||
modifier: &SolutionModifier,
|
||||
) -> SparqlResult<Solutions> {
|
||||
// ORDER BY
|
||||
if !modifier.order_by.is_empty() {
|
||||
solutions.sort_by(|a, b| {
|
||||
for cond in &modifier.order_by {
|
||||
let va = evaluate_expression(&cond.expression, a).ok().flatten();
|
||||
let vb = evaluate_expression(&cond.expression, b).ok().flatten();
|
||||
|
||||
let ord = match (va, vb) {
|
||||
(Some(ta), Some(tb)) => compare_terms(&ta, &tb),
|
||||
(Some(_), None) => std::cmp::Ordering::Less,
|
||||
(None, Some(_)) => std::cmp::Ordering::Greater,
|
||||
(None, None) => std::cmp::Ordering::Equal,
|
||||
};
|
||||
|
||||
let ord = if cond.ascending { ord } else { ord.reverse() };
|
||||
|
||||
if ord != std::cmp::Ordering::Equal {
|
||||
return ord;
|
||||
}
|
||||
}
|
||||
std::cmp::Ordering::Equal
|
||||
});
|
||||
}
|
||||
|
||||
// OFFSET
|
||||
if let Some(offset) = modifier.offset {
|
||||
if offset < solutions.len() {
|
||||
solutions = solutions.into_iter().skip(offset).collect();
|
||||
} else {
|
||||
solutions.clear();
|
||||
}
|
||||
}
|
||||
|
||||
// LIMIT
|
||||
if let Some(limit) = modifier.limit {
|
||||
solutions.truncate(limit);
|
||||
}
|
||||
|
||||
Ok(solutions)
|
||||
}
|
||||
|
||||
fn compare_terms(a: &RdfTerm, b: &RdfTerm) -> std::cmp::Ordering {
|
||||
match (a, b) {
|
||||
(RdfTerm::Literal(la), RdfTerm::Literal(lb)) => {
|
||||
if let (Some(na), Some(nb)) = (la.as_double(), lb.as_double()) {
|
||||
na.partial_cmp(&nb).unwrap_or(std::cmp::Ordering::Equal)
|
||||
} else {
|
||||
la.value.cmp(&lb.value)
|
||||
}
|
||||
}
|
||||
(RdfTerm::Iri(ia), RdfTerm::Iri(ib)) => ia.as_str().cmp(ib.as_str()),
|
||||
_ => std::cmp::Ordering::Equal,
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Expression Evaluation
|
||||
// ============================================================================
|
||||
|
||||
fn evaluate_expression(expr: &Expression, binding: &Binding) -> SparqlResult<Option<RdfTerm>> {
|
||||
match expr {
|
||||
Expression::Variable(var) => Ok(binding.get(var).cloned()),
|
||||
|
||||
Expression::Term(term) => Ok(Some(term.clone())),
|
||||
|
||||
Expression::Binary(left, op, right) => {
|
||||
let lv = evaluate_expression(left, binding)?;
|
||||
let rv = evaluate_expression(right, binding)?;
|
||||
evaluate_binary_op(lv, *op, rv)
|
||||
}
|
||||
|
||||
Expression::Unary(op, inner) => {
|
||||
let v = evaluate_expression(inner, binding)?;
|
||||
evaluate_unary_op(*op, v)
|
||||
}
|
||||
|
||||
Expression::Bound(var) => Ok(Some(RdfTerm::Literal(Literal::boolean(
|
||||
binding.contains_key(var),
|
||||
)))),
|
||||
|
||||
Expression::If(cond, then_expr, else_expr) => {
|
||||
if evaluate_expression_as_bool(cond, binding)? {
|
||||
evaluate_expression(then_expr, binding)
|
||||
} else {
|
||||
evaluate_expression(else_expr, binding)
|
||||
}
|
||||
}
|
||||
|
||||
Expression::Coalesce(exprs) => {
|
||||
for e in exprs {
|
||||
if let Some(v) = evaluate_expression(e, binding)? {
|
||||
return Ok(Some(v));
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
Expression::IsIri(e) => {
|
||||
let v = evaluate_expression(e, binding)?;
|
||||
Ok(Some(RdfTerm::Literal(Literal::boolean(
|
||||
v.map(|t| t.is_iri()).unwrap_or(false),
|
||||
))))
|
||||
}
|
||||
|
||||
Expression::IsBlank(e) => {
|
||||
let v = evaluate_expression(e, binding)?;
|
||||
Ok(Some(RdfTerm::Literal(Literal::boolean(
|
||||
v.map(|t| t.is_blank_node()).unwrap_or(false),
|
||||
))))
|
||||
}
|
||||
|
||||
Expression::IsLiteral(e) => {
|
||||
let v = evaluate_expression(e, binding)?;
|
||||
Ok(Some(RdfTerm::Literal(Literal::boolean(
|
||||
v.map(|t| t.is_literal()).unwrap_or(false),
|
||||
))))
|
||||
}
|
||||
|
||||
Expression::Str(e) => {
|
||||
let v = evaluate_expression(e, binding)?;
|
||||
Ok(v.map(|t| RdfTerm::literal(term_to_string(&t))))
|
||||
}
|
||||
|
||||
Expression::Lang(e) => {
|
||||
let v = evaluate_expression(e, binding)?;
|
||||
Ok(v.and_then(|t| {
|
||||
if let RdfTerm::Literal(lit) = t {
|
||||
Some(RdfTerm::literal(lit.language.unwrap_or_default()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
Expression::Datatype(e) => {
|
||||
let v = evaluate_expression(e, binding)?;
|
||||
Ok(v.and_then(|t| {
|
||||
if let RdfTerm::Literal(lit) = t {
|
||||
Some(RdfTerm::Iri(lit.datatype))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
_ => Err(SparqlError::UnsupportedOperation(
|
||||
"Complex expressions not yet supported in WASM build".to_string(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn evaluate_expression_as_bool(expr: &Expression, binding: &Binding) -> SparqlResult<bool> {
|
||||
let value = evaluate_expression(expr, binding)?;
|
||||
|
||||
Ok(match value {
|
||||
None => false,
|
||||
Some(RdfTerm::Literal(lit)) => {
|
||||
if let Some(b) = lit.as_boolean() {
|
||||
b
|
||||
} else if let Some(n) = lit.as_double() {
|
||||
n != 0.0
|
||||
} else {
|
||||
!lit.value.is_empty()
|
||||
}
|
||||
}
|
||||
Some(_) => true,
|
||||
})
|
||||
}
|
||||
|
||||
fn evaluate_binary_op(
|
||||
left: Option<RdfTerm>,
|
||||
op: BinaryOp,
|
||||
right: Option<RdfTerm>,
|
||||
) -> SparqlResult<Option<RdfTerm>> {
|
||||
match op {
|
||||
BinaryOp::And => {
|
||||
let lb = left.map(|t| term_to_bool(&t)).unwrap_or(false);
|
||||
let rb = right.map(|t| term_to_bool(&t)).unwrap_or(false);
|
||||
Ok(Some(RdfTerm::Literal(Literal::boolean(lb && rb))))
|
||||
}
|
||||
|
||||
BinaryOp::Or => {
|
||||
let lb = left.map(|t| term_to_bool(&t)).unwrap_or(false);
|
||||
let rb = right.map(|t| term_to_bool(&t)).unwrap_or(false);
|
||||
Ok(Some(RdfTerm::Literal(Literal::boolean(lb || rb))))
|
||||
}
|
||||
|
||||
BinaryOp::Eq => Ok(Some(RdfTerm::Literal(Literal::boolean(left == right)))),
|
||||
|
||||
BinaryOp::NotEq => Ok(Some(RdfTerm::Literal(Literal::boolean(left != right)))),
|
||||
|
||||
BinaryOp::Lt | BinaryOp::LtEq | BinaryOp::Gt | BinaryOp::GtEq => {
|
||||
let cmp = match (&left, &right) {
|
||||
(Some(l), Some(r)) => compare_terms(l, r),
|
||||
_ => return Ok(None),
|
||||
};
|
||||
|
||||
let result = match op {
|
||||
BinaryOp::Lt => cmp == std::cmp::Ordering::Less,
|
||||
BinaryOp::LtEq => cmp != std::cmp::Ordering::Greater,
|
||||
BinaryOp::Gt => cmp == std::cmp::Ordering::Greater,
|
||||
BinaryOp::GtEq => cmp != std::cmp::Ordering::Less,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
Ok(Some(RdfTerm::Literal(Literal::boolean(result))))
|
||||
}
|
||||
|
||||
BinaryOp::Add | BinaryOp::Sub | BinaryOp::Mul | BinaryOp::Div => {
|
||||
let ln = left.and_then(|t| term_to_number(&t));
|
||||
let rn = right.and_then(|t| term_to_number(&t));
|
||||
|
||||
match (ln, rn) {
|
||||
(Some(l), Some(r)) => {
|
||||
let result = match op {
|
||||
BinaryOp::Add => l + r,
|
||||
BinaryOp::Sub => l - r,
|
||||
BinaryOp::Mul => l * r,
|
||||
BinaryOp::Div => {
|
||||
if r == 0.0 {
|
||||
return Ok(None);
|
||||
}
|
||||
l / r
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
Ok(Some(RdfTerm::Literal(Literal::decimal(result))))
|
||||
}
|
||||
_ => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
_ => Err(SparqlError::UnsupportedOperation(format!(
|
||||
"Binary operator not supported: {:?}",
|
||||
op
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
fn evaluate_unary_op(op: UnaryOp, value: Option<RdfTerm>) -> SparqlResult<Option<RdfTerm>> {
|
||||
match op {
|
||||
UnaryOp::Not => {
|
||||
let b = value.map(|t| term_to_bool(&t)).unwrap_or(false);
|
||||
Ok(Some(RdfTerm::Literal(Literal::boolean(!b))))
|
||||
}
|
||||
|
||||
UnaryOp::Plus => Ok(value),
|
||||
|
||||
UnaryOp::Minus => {
|
||||
let n = value.and_then(|t| term_to_number(&t));
|
||||
Ok(n.map(|v| RdfTerm::Literal(Literal::decimal(-v))))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn term_to_string(term: &RdfTerm) -> String {
|
||||
match term {
|
||||
RdfTerm::Iri(iri) => iri.as_str().to_string(),
|
||||
RdfTerm::Literal(lit) => lit.value.clone(),
|
||||
RdfTerm::BlankNode(id) => format!("_:{}", id),
|
||||
}
|
||||
}
|
||||
|
||||
fn term_to_number(term: &RdfTerm) -> Option<f64> {
|
||||
match term {
|
||||
RdfTerm::Literal(lit) => lit.as_double(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn term_to_bool(term: &RdfTerm) -> bool {
|
||||
match term {
|
||||
RdfTerm::Literal(lit) => {
|
||||
if let Some(b) = lit.as_boolean() {
|
||||
b
|
||||
} else if let Some(n) = lit.as_double() {
|
||||
n != 0.0
|
||||
} else {
|
||||
!lit.value.is_empty()
|
||||
}
|
||||
}
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Other Query Forms
|
||||
// ============================================================================
|
||||
|
||||
fn execute_construct(ctx: &SparqlContext, query: &ConstructQuery) -> SparqlResult<Vec<Triple>> {
|
||||
let solutions = evaluate_graph_pattern(ctx, &query.where_clause)?;
|
||||
let solutions = apply_modifiers(solutions, &query.modifier)?;
|
||||
|
||||
let mut triples = Vec::new();
|
||||
|
||||
for binding in solutions {
|
||||
for pattern in &query.template {
|
||||
if let (Some(s), Some(o)) = (
|
||||
resolve_term_or_var(&pattern.subject, &binding),
|
||||
resolve_term_or_var(&pattern.object, &binding),
|
||||
) {
|
||||
if let PropertyPath::Iri(p) = &pattern.predicate {
|
||||
triples.push(Triple::new(s, p.clone(), o));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(triples)
|
||||
}
|
||||
|
||||
fn execute_ask(ctx: &SparqlContext, query: &AskQuery) -> SparqlResult<bool> {
|
||||
let solutions = evaluate_graph_pattern(ctx, &query.where_clause)?;
|
||||
Ok(!solutions.is_empty())
|
||||
}
|
||||
|
||||
fn execute_describe(ctx: &SparqlContext, query: &DescribeQuery) -> SparqlResult<Vec<Triple>> {
|
||||
let mut resources: Vec<RdfTerm> = Vec::new();
|
||||
|
||||
// Get resources from query
|
||||
for r in &query.resources {
|
||||
match r {
|
||||
VarOrIri::Iri(iri) => resources.push(RdfTerm::Iri(iri.clone())),
|
||||
VarOrIri::Variable(var) => {
|
||||
if let Some(pattern) = &query.where_clause {
|
||||
let solutions = evaluate_graph_pattern(ctx, pattern)?;
|
||||
for binding in solutions {
|
||||
if let Some(term) = binding.get(var) {
|
||||
if !resources.contains(term) {
|
||||
resources.push(term.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get all triples about each resource
|
||||
let mut triples = Vec::new();
|
||||
for resource in resources {
|
||||
// Triples where resource is subject
|
||||
triples.extend(ctx.store.query(Some(&resource), None, None));
|
||||
// Triples where resource is object
|
||||
triples.extend(ctx.store.query(None, None, Some(&resource)));
|
||||
}
|
||||
|
||||
Ok(triples)
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Update Operations (Simplified)
|
||||
// ============================================================================
|
||||
|
||||
fn execute_update(_ctx: &SparqlContext, _op: &UpdateOperation) -> SparqlResult<()> {
|
||||
// Simplified: Updates not fully implemented in WASM build
|
||||
Err(SparqlError::UnsupportedOperation(
|
||||
"Update operations not yet supported in WASM build".to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::sparql::parser::parse_sparql;
|
||||
|
||||
fn setup_test_store() -> TripleStore {
|
||||
let store = TripleStore::new();
|
||||
|
||||
store.insert(Triple::new(
|
||||
RdfTerm::iri("http://example.org/person/1"),
|
||||
Iri::rdf_type(),
|
||||
RdfTerm::iri("http://example.org/Person"),
|
||||
));
|
||||
store.insert(Triple::new(
|
||||
RdfTerm::iri("http://example.org/person/1"),
|
||||
Iri::new("http://example.org/name"),
|
||||
RdfTerm::literal("Alice"),
|
||||
));
|
||||
store.insert(Triple::new(
|
||||
RdfTerm::iri("http://example.org/person/1"),
|
||||
Iri::new("http://example.org/age"),
|
||||
RdfTerm::Literal(Literal::integer(30)),
|
||||
));
|
||||
|
||||
store
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simple_select() {
|
||||
let store = setup_test_store();
|
||||
let query = parse_sparql("SELECT ?s ?p ?o WHERE { ?s ?p ?o }").unwrap();
|
||||
let result = execute_sparql(&store, &query).unwrap();
|
||||
|
||||
if let QueryResult::Select(select) = result {
|
||||
assert!(!select.bindings.is_empty());
|
||||
} else {
|
||||
panic!("Expected SELECT result");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_select_with_filter() {
|
||||
let store = setup_test_store();
|
||||
let query = parse_sparql(
|
||||
r#"
|
||||
SELECT ?name WHERE {
|
||||
?s <http://example.org/name> ?name .
|
||||
FILTER(?name = "Alice")
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let result = execute_sparql(&store, &query).unwrap();
|
||||
|
||||
if let QueryResult::Select(select) = result {
|
||||
assert_eq!(select.bindings.len(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ask_query() {
|
||||
let store = setup_test_store();
|
||||
|
||||
let query = parse_sparql(
|
||||
r#"
|
||||
ASK { <http://example.org/person/1> <http://example.org/name> "Alice" }
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let result = execute_sparql(&store, &query).unwrap();
|
||||
|
||||
assert!(matches!(result, QueryResult::Ask(true)));
|
||||
}
|
||||
}
|
||||
124
vendor/ruvector/crates/rvlite/src/sparql/mod.rs
vendored
Normal file
124
vendor/ruvector/crates/rvlite/src/sparql/mod.rs
vendored
Normal file
@@ -0,0 +1,124 @@
|
||||
// SPARQL (SPARQL Protocol and RDF Query Language) module for rvlite
|
||||
//
|
||||
// Provides W3C-compliant SPARQL 1.1 query support for RDF data with
|
||||
// in-memory storage for WASM environments.
|
||||
//
|
||||
// Features:
|
||||
// - SPARQL 1.1 Query Language (SELECT, CONSTRUCT, ASK, DESCRIBE)
|
||||
// - Basic Update Language (INSERT DATA, DELETE DATA)
|
||||
// - In-memory RDF triple store with efficient indexing
|
||||
// - Property paths (basic support)
|
||||
// - FILTER expressions and built-in functions
|
||||
// - WASM-compatible implementation
|
||||
|
||||
#![allow(dead_code)]
|
||||
#![allow(unused_variables)]
|
||||
#![allow(unused_mut)]
|
||||
|
||||
pub mod ast;
|
||||
pub mod executor;
|
||||
pub mod parser;
|
||||
pub mod triple_store;
|
||||
|
||||
pub use ast::{
|
||||
Aggregate, AskQuery, ConstructQuery, DeleteData, DescribeQuery, Expression, GraphPattern,
|
||||
InsertData, Iri, Literal, OrderCondition, QueryBody, RdfTerm, SelectQuery, SolutionModifier,
|
||||
SparqlQuery, TriplePattern, UpdateOperation,
|
||||
};
|
||||
pub use executor::{execute_sparql, SparqlContext};
|
||||
pub use parser::parse_sparql;
|
||||
pub use triple_store::{Triple, TripleStore};
|
||||
|
||||
/// SPARQL error type
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum SparqlError {
|
||||
ParseError(String),
|
||||
UnboundVariable(String),
|
||||
TypeMismatch { expected: String, actual: String },
|
||||
StoreNotFound(String),
|
||||
InvalidIri(String),
|
||||
InvalidLiteral(String),
|
||||
UnsupportedOperation(String),
|
||||
ExecutionError(String),
|
||||
AggregateError(String),
|
||||
PropertyPathError(String),
|
||||
}
|
||||
|
||||
impl std::fmt::Display for SparqlError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::ParseError(msg) => write!(f, "Parse error: {}", msg),
|
||||
Self::UnboundVariable(var) => write!(f, "Variable not bound: {}", var),
|
||||
Self::TypeMismatch { expected, actual } => {
|
||||
write!(f, "Type mismatch: expected {}, got {}", expected, actual)
|
||||
}
|
||||
Self::StoreNotFound(name) => write!(f, "Store not found: {}", name),
|
||||
Self::InvalidIri(iri) => write!(f, "Invalid IRI: {}", iri),
|
||||
Self::InvalidLiteral(lit) => write!(f, "Invalid literal: {}", lit),
|
||||
Self::UnsupportedOperation(op) => write!(f, "Unsupported operation: {}", op),
|
||||
Self::ExecutionError(msg) => write!(f, "Execution error: {}", msg),
|
||||
Self::AggregateError(msg) => write!(f, "Aggregate error: {}", msg),
|
||||
Self::PropertyPathError(msg) => write!(f, "Property path error: {}", msg),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for SparqlError {}
|
||||
|
||||
/// Result type for SPARQL operations
|
||||
pub type SparqlResult<T> = Result<T, SparqlError>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_simple_select() {
|
||||
let query = "SELECT ?s ?p ?o WHERE { ?s ?p ?o }";
|
||||
let result = parse_sparql(query);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let parsed = result.unwrap();
|
||||
assert!(matches!(parsed.body, QueryBody::Select(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_triple_store_basic() {
|
||||
let store = TripleStore::new();
|
||||
|
||||
let triple = Triple::new(
|
||||
RdfTerm::iri("http://example.org/subject"),
|
||||
Iri::new("http://example.org/predicate"),
|
||||
RdfTerm::literal("object"),
|
||||
);
|
||||
|
||||
store.insert(triple.clone());
|
||||
assert_eq!(store.count(), 1);
|
||||
|
||||
let results = store.query(None, None, None);
|
||||
assert_eq!(results.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sparql_execution() {
|
||||
let store = TripleStore::new();
|
||||
|
||||
// Add test data
|
||||
store.insert(Triple::new(
|
||||
RdfTerm::iri("http://example.org/person/1"),
|
||||
Iri::rdf_type(),
|
||||
RdfTerm::iri("http://example.org/Person"),
|
||||
));
|
||||
store.insert(Triple::new(
|
||||
RdfTerm::iri("http://example.org/person/1"),
|
||||
Iri::new("http://example.org/name"),
|
||||
RdfTerm::literal("Alice"),
|
||||
));
|
||||
|
||||
let query =
|
||||
parse_sparql("SELECT ?name WHERE { ?person <http://example.org/name> ?name }").unwrap();
|
||||
|
||||
let result = execute_sparql(&store, &query);
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
}
|
||||
2502
vendor/ruvector/crates/rvlite/src/sparql/parser.rs
vendored
Normal file
2502
vendor/ruvector/crates/rvlite/src/sparql/parser.rs
vendored
Normal file
File diff suppressed because it is too large
Load Diff
583
vendor/ruvector/crates/rvlite/src/sparql/triple_store.rs
vendored
Normal file
583
vendor/ruvector/crates/rvlite/src/sparql/triple_store.rs
vendored
Normal file
@@ -0,0 +1,583 @@
|
||||
// RDF Triple Store with efficient in-memory indexing for WASM
|
||||
//
|
||||
// Provides in-memory storage for RDF triples with multiple indexes
|
||||
// for efficient query patterns (SPO, POS, OSP).
|
||||
|
||||
use super::ast::{Iri, RdfTerm};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::sync::RwLock;
|
||||
|
||||
/// RDF Triple
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct Triple {
|
||||
pub subject: RdfTerm,
|
||||
pub predicate: Iri,
|
||||
pub object: RdfTerm,
|
||||
}
|
||||
|
||||
impl Triple {
|
||||
pub fn new(subject: RdfTerm, predicate: Iri, object: RdfTerm) -> Self {
|
||||
Self {
|
||||
subject,
|
||||
predicate,
|
||||
object,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Triple store statistics
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct StoreStats {
|
||||
pub triple_count: u64,
|
||||
pub subject_count: usize,
|
||||
pub predicate_count: usize,
|
||||
pub object_count: usize,
|
||||
pub graph_count: usize,
|
||||
}
|
||||
|
||||
/// RDF Triple Store (WASM-compatible, thread-safe via RwLock)
|
||||
pub struct TripleStore {
|
||||
/// All triples stored by internal ID
|
||||
triples: RwLock<HashMap<u64, Triple>>,
|
||||
|
||||
/// SPO index: subject -> predicate -> object IDs
|
||||
spo_index: RwLock<HashMap<String, HashMap<String, HashSet<u64>>>>,
|
||||
|
||||
/// POS index: predicate -> object -> subject IDs
|
||||
pos_index: RwLock<HashMap<String, HashMap<String, HashSet<u64>>>>,
|
||||
|
||||
/// OSP index: object -> subject -> predicate IDs
|
||||
osp_index: RwLock<HashMap<String, HashMap<String, HashSet<u64>>>>,
|
||||
|
||||
/// Named graphs: graph IRI -> triple IDs
|
||||
graphs: RwLock<HashMap<String, HashSet<u64>>>,
|
||||
|
||||
/// Default graph triple IDs
|
||||
default_graph: RwLock<HashSet<u64>>,
|
||||
|
||||
/// Triple ID counter
|
||||
next_id: AtomicU64,
|
||||
|
||||
/// Unique subjects for statistics
|
||||
subjects: RwLock<HashSet<String>>,
|
||||
|
||||
/// Unique predicates for statistics
|
||||
predicates: RwLock<HashSet<String>>,
|
||||
|
||||
/// Unique objects for statistics
|
||||
objects: RwLock<HashSet<String>>,
|
||||
}
|
||||
|
||||
impl TripleStore {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
triples: RwLock::new(HashMap::new()),
|
||||
spo_index: RwLock::new(HashMap::new()),
|
||||
pos_index: RwLock::new(HashMap::new()),
|
||||
osp_index: RwLock::new(HashMap::new()),
|
||||
graphs: RwLock::new(HashMap::new()),
|
||||
default_graph: RwLock::new(HashSet::new()),
|
||||
next_id: AtomicU64::new(1),
|
||||
subjects: RwLock::new(HashSet::new()),
|
||||
predicates: RwLock::new(HashSet::new()),
|
||||
objects: RwLock::new(HashSet::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert a triple into the default graph
|
||||
pub fn insert(&self, triple: Triple) -> u64 {
|
||||
self.insert_into_graph(triple, None)
|
||||
}
|
||||
|
||||
/// Insert a triple into a specific graph
|
||||
pub fn insert_into_graph(&self, triple: Triple, graph: Option<&str>) -> u64 {
|
||||
let id = self.next_id.fetch_add(1, Ordering::SeqCst);
|
||||
|
||||
// Get string representations for indexing
|
||||
let subject_key = term_to_key(&triple.subject);
|
||||
let predicate_key = triple.predicate.as_str().to_string();
|
||||
let object_key = term_to_key(&triple.object);
|
||||
|
||||
// Update statistics
|
||||
{
|
||||
let mut subjects = self.subjects.write().unwrap();
|
||||
subjects.insert(subject_key.clone());
|
||||
}
|
||||
{
|
||||
let mut predicates = self.predicates.write().unwrap();
|
||||
predicates.insert(predicate_key.clone());
|
||||
}
|
||||
{
|
||||
let mut objects = self.objects.write().unwrap();
|
||||
objects.insert(object_key.clone());
|
||||
}
|
||||
|
||||
// Update SPO index
|
||||
{
|
||||
let mut spo_index = self.spo_index.write().unwrap();
|
||||
spo_index
|
||||
.entry(subject_key.clone())
|
||||
.or_insert_with(HashMap::new)
|
||||
.entry(predicate_key.clone())
|
||||
.or_insert_with(HashSet::new)
|
||||
.insert(id);
|
||||
}
|
||||
|
||||
// Update POS index
|
||||
{
|
||||
let mut pos_index = self.pos_index.write().unwrap();
|
||||
pos_index
|
||||
.entry(predicate_key.clone())
|
||||
.or_insert_with(HashMap::new)
|
||||
.entry(object_key.clone())
|
||||
.or_insert_with(HashSet::new)
|
||||
.insert(id);
|
||||
}
|
||||
|
||||
// Update OSP index
|
||||
{
|
||||
let mut osp_index = self.osp_index.write().unwrap();
|
||||
osp_index
|
||||
.entry(object_key)
|
||||
.or_insert_with(HashMap::new)
|
||||
.entry(subject_key)
|
||||
.or_insert_with(HashSet::new)
|
||||
.insert(id);
|
||||
}
|
||||
|
||||
// Update graph membership
|
||||
if let Some(graph_iri) = graph {
|
||||
let mut graphs = self.graphs.write().unwrap();
|
||||
graphs
|
||||
.entry(graph_iri.to_string())
|
||||
.or_insert_with(HashSet::new)
|
||||
.insert(id);
|
||||
} else {
|
||||
let mut default_graph = self.default_graph.write().unwrap();
|
||||
default_graph.insert(id);
|
||||
}
|
||||
|
||||
// Store the triple
|
||||
{
|
||||
let mut triples = self.triples.write().unwrap();
|
||||
triples.insert(id, triple);
|
||||
}
|
||||
|
||||
id
|
||||
}
|
||||
|
||||
/// Get a triple by ID
|
||||
pub fn get(&self, id: u64) -> Option<Triple> {
|
||||
let triples = self.triples.read().unwrap();
|
||||
triples.get(&id).cloned()
|
||||
}
|
||||
|
||||
/// Query triples matching a pattern (None means any value)
|
||||
pub fn query(
|
||||
&self,
|
||||
subject: Option<&RdfTerm>,
|
||||
predicate: Option<&Iri>,
|
||||
object: Option<&RdfTerm>,
|
||||
) -> Vec<Triple> {
|
||||
self.query_with_graph(subject, predicate, object, None)
|
||||
}
|
||||
|
||||
/// Query triples matching a pattern in a specific graph
|
||||
pub fn query_with_graph(
|
||||
&self,
|
||||
subject: Option<&RdfTerm>,
|
||||
predicate: Option<&Iri>,
|
||||
object: Option<&RdfTerm>,
|
||||
graph: Option<&str>,
|
||||
) -> Vec<Triple> {
|
||||
// Filter by graph if specified
|
||||
let graph_filter: Option<HashSet<u64>> = graph.map(|g| {
|
||||
let graphs = self.graphs.read().unwrap();
|
||||
graphs.get(g).cloned().unwrap_or_default()
|
||||
});
|
||||
|
||||
let spo_index = self.spo_index.read().unwrap();
|
||||
let pos_index = self.pos_index.read().unwrap();
|
||||
let osp_index = self.osp_index.read().unwrap();
|
||||
let triples = self.triples.read().unwrap();
|
||||
|
||||
// Choose the best index based on bound variables
|
||||
let ids = match (subject, predicate, object) {
|
||||
// All bound - direct lookup
|
||||
(Some(s), Some(p), Some(o)) => {
|
||||
let s_key = term_to_key(s);
|
||||
let p_key = p.as_str();
|
||||
let o_key = term_to_key(o);
|
||||
|
||||
spo_index
|
||||
.get(&s_key)
|
||||
.and_then(|pred_map| pred_map.get(p_key))
|
||||
.map(|ids| ids.iter().copied().collect::<Vec<_>>())
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.filter(|id| {
|
||||
triples
|
||||
.get(id)
|
||||
.map(|t| term_to_key(&t.object) == o_key)
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
// Subject and predicate bound - use SPO
|
||||
(Some(s), Some(p), None) => {
|
||||
let s_key = term_to_key(s);
|
||||
let p_key = p.as_str();
|
||||
|
||||
spo_index
|
||||
.get(&s_key)
|
||||
.and_then(|pred_map| pred_map.get(p_key))
|
||||
.map(|ids| ids.iter().copied().collect())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
// Subject only - use SPO
|
||||
(Some(s), None, None) => {
|
||||
let s_key = term_to_key(s);
|
||||
|
||||
spo_index
|
||||
.get(&s_key)
|
||||
.map(|pred_map| {
|
||||
pred_map
|
||||
.values()
|
||||
.flat_map(|ids| ids.iter().copied())
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
// Predicate and object bound - use POS
|
||||
(None, Some(p), Some(o)) => {
|
||||
let p_key = p.as_str();
|
||||
let o_key = term_to_key(o);
|
||||
|
||||
pos_index
|
||||
.get(p_key)
|
||||
.and_then(|obj_map| obj_map.get(&o_key))
|
||||
.map(|ids| ids.iter().copied().collect())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
// Predicate only - use POS
|
||||
(None, Some(p), None) => {
|
||||
let p_key = p.as_str();
|
||||
|
||||
pos_index
|
||||
.get(p_key)
|
||||
.map(|obj_map| {
|
||||
obj_map
|
||||
.values()
|
||||
.flat_map(|ids| ids.iter().copied())
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
// Object only - use OSP
|
||||
(None, None, Some(o)) => {
|
||||
let o_key = term_to_key(o);
|
||||
|
||||
osp_index
|
||||
.get(&o_key)
|
||||
.map(|subj_map| {
|
||||
subj_map
|
||||
.values()
|
||||
.flat_map(|ids| ids.iter().copied())
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
// Subject and object bound - use SPO then filter
|
||||
(Some(s), None, Some(o)) => {
|
||||
let s_key = term_to_key(s);
|
||||
let o_key = term_to_key(o);
|
||||
|
||||
spo_index
|
||||
.get(&s_key)
|
||||
.map(|pred_map| {
|
||||
pred_map
|
||||
.values()
|
||||
.flat_map(|ids| ids.iter().copied())
|
||||
.filter(|id| {
|
||||
triples
|
||||
.get(id)
|
||||
.map(|t| term_to_key(&t.object) == o_key)
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
// Nothing bound - return all
|
||||
(None, None, None) => triples.keys().copied().collect(),
|
||||
};
|
||||
|
||||
// Apply graph filter and collect results
|
||||
ids.into_iter()
|
||||
.filter(|id| {
|
||||
graph_filter
|
||||
.as_ref()
|
||||
.map(|filter| filter.contains(id))
|
||||
.unwrap_or(true)
|
||||
})
|
||||
.filter_map(|id| triples.get(&id).cloned())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get all triples in the store
|
||||
pub fn all_triples(&self) -> Vec<Triple> {
|
||||
let triples = self.triples.read().unwrap();
|
||||
triples.values().cloned().collect()
|
||||
}
|
||||
|
||||
/// Get triple count
|
||||
pub fn count(&self) -> usize {
|
||||
let triples = self.triples.read().unwrap();
|
||||
triples.len()
|
||||
}
|
||||
|
||||
/// Check if store is empty
|
||||
pub fn is_empty(&self) -> bool {
|
||||
let triples = self.triples.read().unwrap();
|
||||
triples.is_empty()
|
||||
}
|
||||
|
||||
/// Clear all triples
|
||||
pub fn clear(&self) {
|
||||
self.triples.write().unwrap().clear();
|
||||
self.spo_index.write().unwrap().clear();
|
||||
self.pos_index.write().unwrap().clear();
|
||||
self.osp_index.write().unwrap().clear();
|
||||
self.graphs.write().unwrap().clear();
|
||||
self.default_graph.write().unwrap().clear();
|
||||
self.subjects.write().unwrap().clear();
|
||||
self.predicates.write().unwrap().clear();
|
||||
self.objects.write().unwrap().clear();
|
||||
}
|
||||
|
||||
/// Clear a specific graph
|
||||
pub fn clear_graph(&self, graph: Option<&str>) {
|
||||
let ids_to_remove: Vec<u64> = if let Some(graph_iri) = graph {
|
||||
let graphs = self.graphs.read().unwrap();
|
||||
graphs
|
||||
.get(graph_iri)
|
||||
.cloned()
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.collect()
|
||||
} else {
|
||||
let default_graph = self.default_graph.read().unwrap();
|
||||
default_graph.iter().copied().collect()
|
||||
};
|
||||
|
||||
for id in ids_to_remove {
|
||||
self.remove(id);
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove a triple by ID
|
||||
pub fn remove(&self, id: u64) -> Option<Triple> {
|
||||
let triple = {
|
||||
let mut triples = self.triples.write().unwrap();
|
||||
triples.remove(&id)
|
||||
}?;
|
||||
|
||||
let subject_key = term_to_key(&triple.subject);
|
||||
let predicate_key = triple.predicate.as_str().to_string();
|
||||
let object_key = term_to_key(&triple.object);
|
||||
|
||||
// Remove from SPO index
|
||||
{
|
||||
let mut spo_index = self.spo_index.write().unwrap();
|
||||
if let Some(pred_map) = spo_index.get_mut(&subject_key) {
|
||||
if let Some(ids) = pred_map.get_mut(&predicate_key) {
|
||||
ids.remove(&id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove from POS index
|
||||
{
|
||||
let mut pos_index = self.pos_index.write().unwrap();
|
||||
if let Some(obj_map) = pos_index.get_mut(&predicate_key) {
|
||||
if let Some(ids) = obj_map.get_mut(&object_key) {
|
||||
ids.remove(&id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove from OSP index
|
||||
{
|
||||
let mut osp_index = self.osp_index.write().unwrap();
|
||||
if let Some(subj_map) = osp_index.get_mut(&object_key) {
|
||||
if let Some(ids) = subj_map.get_mut(&subject_key) {
|
||||
ids.remove(&id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove from graphs
|
||||
{
|
||||
let mut default_graph = self.default_graph.write().unwrap();
|
||||
default_graph.remove(&id);
|
||||
}
|
||||
{
|
||||
let mut graphs = self.graphs.write().unwrap();
|
||||
for (_, ids) in graphs.iter_mut() {
|
||||
ids.remove(&id);
|
||||
}
|
||||
}
|
||||
|
||||
Some(triple)
|
||||
}
|
||||
|
||||
/// Get statistics about the store
|
||||
pub fn stats(&self) -> StoreStats {
|
||||
let triples = self.triples.read().unwrap();
|
||||
let subjects = self.subjects.read().unwrap();
|
||||
let predicates = self.predicates.read().unwrap();
|
||||
let objects = self.objects.read().unwrap();
|
||||
let graphs = self.graphs.read().unwrap();
|
||||
|
||||
StoreStats {
|
||||
triple_count: triples.len() as u64,
|
||||
subject_count: subjects.len(),
|
||||
predicate_count: predicates.len(),
|
||||
object_count: objects.len(),
|
||||
graph_count: graphs.len() + 1, // +1 for default graph
|
||||
}
|
||||
}
|
||||
|
||||
/// List all named graphs
|
||||
pub fn list_graphs(&self) -> Vec<String> {
|
||||
let graphs = self.graphs.read().unwrap();
|
||||
graphs.keys().cloned().collect()
|
||||
}
|
||||
|
||||
/// Get triples from a specific graph
|
||||
pub fn get_graph(&self, graph: &str) -> Vec<Triple> {
|
||||
let graphs = self.graphs.read().unwrap();
|
||||
let triples = self.triples.read().unwrap();
|
||||
|
||||
graphs
|
||||
.get(graph)
|
||||
.map(|ids| {
|
||||
ids.iter()
|
||||
.filter_map(|id| triples.get(id).cloned())
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Get triples from the default graph
|
||||
pub fn get_default_graph(&self) -> Vec<Triple> {
|
||||
let default_graph = self.default_graph.read().unwrap();
|
||||
let triples = self.triples.read().unwrap();
|
||||
|
||||
default_graph
|
||||
.iter()
|
||||
.filter_map(|id| triples.get(id).cloned())
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for TripleStore {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert an RDF term to a string key for indexing
|
||||
fn term_to_key(term: &RdfTerm) -> String {
|
||||
match term {
|
||||
RdfTerm::Iri(iri) => format!("<{}>", iri.as_str()),
|
||||
RdfTerm::Literal(lit) => {
|
||||
if let Some(ref lang) = lit.language {
|
||||
format!("\"{}\"@{}", lit.value, lang)
|
||||
} else if lit.datatype.as_str() != "http://www.w3.org/2001/XMLSchema#string" {
|
||||
format!("\"{}\"^^<{}>", lit.value, lit.datatype.as_str())
|
||||
} else {
|
||||
format!("\"{}\"", lit.value)
|
||||
}
|
||||
}
|
||||
RdfTerm::BlankNode(id) => format!("_:{}", id),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_insert_and_query() {
|
||||
let store = TripleStore::new();
|
||||
|
||||
let triple = Triple::new(
|
||||
RdfTerm::iri("http://example.org/person/1"),
|
||||
Iri::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
RdfTerm::iri("http://example.org/Person"),
|
||||
);
|
||||
|
||||
let id = store.insert(triple.clone());
|
||||
assert!(id > 0);
|
||||
|
||||
let retrieved = store.get(id);
|
||||
assert!(retrieved.is_some());
|
||||
assert_eq!(retrieved.unwrap(), triple);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_by_subject() {
|
||||
let store = TripleStore::new();
|
||||
|
||||
let subject = RdfTerm::iri("http://example.org/person/1");
|
||||
store.insert(Triple::new(
|
||||
subject.clone(),
|
||||
Iri::rdf_type(),
|
||||
RdfTerm::iri("http://example.org/Person"),
|
||||
));
|
||||
store.insert(Triple::new(
|
||||
subject.clone(),
|
||||
Iri::rdfs_label(),
|
||||
RdfTerm::literal("Alice"),
|
||||
));
|
||||
store.insert(Triple::new(
|
||||
RdfTerm::iri("http://example.org/person/2"),
|
||||
Iri::rdf_type(),
|
||||
RdfTerm::iri("http://example.org/Person"),
|
||||
));
|
||||
|
||||
let results = store.query(Some(&subject), None, None);
|
||||
assert_eq!(results.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_statistics() {
|
||||
let store = TripleStore::new();
|
||||
|
||||
store.insert(Triple::new(
|
||||
RdfTerm::iri("http://example.org/s1"),
|
||||
Iri::new("http://example.org/p1"),
|
||||
RdfTerm::literal("o1"),
|
||||
));
|
||||
store.insert(Triple::new(
|
||||
RdfTerm::iri("http://example.org/s2"),
|
||||
Iri::new("http://example.org/p1"),
|
||||
RdfTerm::literal("o2"),
|
||||
));
|
||||
|
||||
let stats = store.stats();
|
||||
assert_eq!(stats.triple_count, 2);
|
||||
assert_eq!(stats.subject_count, 2);
|
||||
assert_eq!(stats.predicate_count, 1);
|
||||
assert_eq!(stats.object_count, 2);
|
||||
}
|
||||
}
|
||||
220
vendor/ruvector/crates/rvlite/src/sql/ast.rs
vendored
Normal file
220
vendor/ruvector/crates/rvlite/src/sql/ast.rs
vendored
Normal file
@@ -0,0 +1,220 @@
|
||||
// AST types for SQL statements
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// SQL statement types
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum SqlStatement {
|
||||
/// CREATE TABLE name (columns)
|
||||
CreateTable { name: String, columns: Vec<Column> },
|
||||
/// INSERT INTO table (columns) VALUES (values)
|
||||
Insert {
|
||||
table: String,
|
||||
columns: Vec<String>,
|
||||
values: Vec<Value>,
|
||||
},
|
||||
/// SELECT columns FROM table WHERE condition ORDER BY ... LIMIT k
|
||||
Select {
|
||||
columns: Vec<SelectColumn>,
|
||||
from: String,
|
||||
where_clause: Option<Expression>,
|
||||
order_by: Option<OrderBy>,
|
||||
limit: Option<usize>,
|
||||
},
|
||||
/// DROP TABLE name
|
||||
Drop { table: String },
|
||||
}
|
||||
|
||||
/// Column definition for CREATE TABLE
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct Column {
|
||||
pub name: String,
|
||||
pub data_type: DataType,
|
||||
}
|
||||
|
||||
/// Data types supported in SQL
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum DataType {
|
||||
/// TEXT type for strings
|
||||
Text,
|
||||
/// INTEGER type
|
||||
Integer,
|
||||
/// REAL/FLOAT type
|
||||
Real,
|
||||
/// VECTOR(dimensions) type for vector data
|
||||
Vector(usize),
|
||||
}
|
||||
|
||||
/// Column selector in SELECT
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum SelectColumn {
|
||||
/// SELECT *
|
||||
Wildcard,
|
||||
/// SELECT column_name
|
||||
Name(String),
|
||||
/// SELECT expression AS alias
|
||||
Expression {
|
||||
expr: Expression,
|
||||
alias: Option<String>,
|
||||
},
|
||||
}
|
||||
|
||||
/// SQL expressions
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum Expression {
|
||||
/// Column reference
|
||||
Column(String),
|
||||
/// Literal value
|
||||
Literal(Value),
|
||||
/// Binary operation (e.g., a = b, a > b)
|
||||
BinaryOp {
|
||||
left: Box<Expression>,
|
||||
op: BinaryOperator,
|
||||
right: Box<Expression>,
|
||||
},
|
||||
/// Logical AND
|
||||
And(Box<Expression>, Box<Expression>),
|
||||
/// Logical OR
|
||||
Or(Box<Expression>, Box<Expression>),
|
||||
/// NOT expression
|
||||
Not(Box<Expression>),
|
||||
/// Function call
|
||||
Function { name: String, args: Vec<Expression> },
|
||||
/// Vector literal [1.0, 2.0, 3.0]
|
||||
VectorLiteral(Vec<f32>),
|
||||
/// Distance operation: column <-> vector
|
||||
/// Used for ORDER BY embedding <-> $vector
|
||||
Distance {
|
||||
column: String,
|
||||
metric: DistanceMetric,
|
||||
vector: Vec<f32>,
|
||||
},
|
||||
}
|
||||
|
||||
/// Binary operators
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum BinaryOperator {
|
||||
/// =
|
||||
Eq,
|
||||
/// !=
|
||||
NotEq,
|
||||
/// >
|
||||
Gt,
|
||||
/// >=
|
||||
GtEq,
|
||||
/// <
|
||||
Lt,
|
||||
/// <=
|
||||
LtEq,
|
||||
/// LIKE
|
||||
Like,
|
||||
}
|
||||
|
||||
/// Distance metrics for vector similarity
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum DistanceMetric {
|
||||
/// L2 distance: <->
|
||||
L2,
|
||||
/// Cosine distance: <=>
|
||||
Cosine,
|
||||
/// Dot product: <#>
|
||||
DotProduct,
|
||||
}
|
||||
|
||||
/// ORDER BY clause
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct OrderBy {
|
||||
pub expression: Expression,
|
||||
pub direction: OrderDirection,
|
||||
}
|
||||
|
||||
/// Sort direction
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum OrderDirection {
|
||||
Asc,
|
||||
Desc,
|
||||
}
|
||||
|
||||
/// SQL values
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum Value {
|
||||
Null,
|
||||
Text(String),
|
||||
Integer(i64),
|
||||
Real(f64),
|
||||
Vector(Vec<f32>),
|
||||
Boolean(bool),
|
||||
}
|
||||
|
||||
impl Value {
|
||||
/// Convert to JSON value for metadata storage
|
||||
pub fn to_json(&self) -> serde_json::Value {
|
||||
match self {
|
||||
Value::Null => serde_json::Value::Null,
|
||||
Value::Text(s) => serde_json::Value::String(s.clone()),
|
||||
Value::Integer(i) => serde_json::Value::Number((*i).into()),
|
||||
Value::Real(f) => {
|
||||
serde_json::Value::Number(serde_json::Number::from_f64(*f).unwrap_or(0.into()))
|
||||
}
|
||||
Value::Vector(v) => serde_json::Value::Array(
|
||||
v.iter()
|
||||
.map(|f| {
|
||||
serde_json::Value::Number(
|
||||
serde_json::Number::from_f64(*f as f64).unwrap_or(0.into()),
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
),
|
||||
Value::Boolean(b) => serde_json::Value::Bool(*b),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse from JSON value
|
||||
pub fn from_json(json: &serde_json::Value) -> Self {
|
||||
match json {
|
||||
serde_json::Value::Null => Value::Null,
|
||||
serde_json::Value::Bool(b) => Value::Boolean(*b),
|
||||
serde_json::Value::Number(n) => {
|
||||
if let Some(i) = n.as_i64() {
|
||||
Value::Integer(i)
|
||||
} else if let Some(f) = n.as_f64() {
|
||||
Value::Real(f)
|
||||
} else {
|
||||
Value::Null
|
||||
}
|
||||
}
|
||||
serde_json::Value::String(s) => Value::Text(s.clone()),
|
||||
serde_json::Value::Array(arr) => {
|
||||
// Try to parse as vector
|
||||
let floats: Option<Vec<f32>> =
|
||||
arr.iter().map(|v| v.as_f64().map(|f| f as f32)).collect();
|
||||
|
||||
if let Some(vec) = floats {
|
||||
Value::Vector(vec)
|
||||
} else {
|
||||
Value::Null
|
||||
}
|
||||
}
|
||||
serde_json::Value::Object(_) => Value::Null,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Value {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Value::Null => write!(f, "NULL"),
|
||||
Value::Text(s) => write!(f, "'{}'", s),
|
||||
Value::Integer(i) => write!(f, "{}", i),
|
||||
Value::Real(r) => write!(f, "{}", r),
|
||||
Value::Vector(v) => write!(
|
||||
f,
|
||||
"[{}]",
|
||||
v.iter()
|
||||
.map(|x| x.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
),
|
||||
Value::Boolean(b) => write!(f, "{}", b),
|
||||
}
|
||||
}
|
||||
}
|
||||
561
vendor/ruvector/crates/rvlite/src/sql/executor.rs
vendored
Normal file
561
vendor/ruvector/crates/rvlite/src/sql/executor.rs
vendored
Normal file
@@ -0,0 +1,561 @@
|
||||
// SQL executor that integrates with ruvector-core VectorDB
|
||||
use super::ast::*;
|
||||
use crate::{ErrorKind, RvLiteError};
|
||||
use parking_lot::RwLock;
|
||||
use ruvector_core::{SearchQuery, VectorDB, VectorEntry};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Table schema definition
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TableSchema {
|
||||
pub name: String,
|
||||
pub columns: Vec<Column>,
|
||||
pub vector_column: Option<String>,
|
||||
pub vector_dimensions: Option<usize>,
|
||||
}
|
||||
|
||||
impl TableSchema {
|
||||
/// Find the vector column in the schema
|
||||
fn find_vector_column(&self) -> Option<(String, usize)> {
|
||||
for col in &self.columns {
|
||||
if let DataType::Vector(dims) = col.data_type {
|
||||
return Some((col.name.clone(), dims));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Validate that columns match the schema
|
||||
fn validate_columns(&self, columns: &[String]) -> Result<(), RvLiteError> {
|
||||
for col in columns {
|
||||
if !self.columns.iter().any(|c| &c.name == col) {
|
||||
return Err(RvLiteError {
|
||||
message: format!("Column '{}' not found in table '{}'", col, self.name),
|
||||
kind: ErrorKind::SqlError,
|
||||
});
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get column data type
|
||||
fn get_column_type(&self, name: &str) -> Option<&DataType> {
|
||||
self.columns
|
||||
.iter()
|
||||
.find(|c| c.name == name)
|
||||
.map(|c| &c.data_type)
|
||||
}
|
||||
}
|
||||
|
||||
/// SQL execution result
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ExecutionResult {
|
||||
pub rows: Vec<HashMap<String, Value>>,
|
||||
pub rows_affected: usize,
|
||||
}
|
||||
|
||||
/// SQL Engine that manages tables and executes queries
|
||||
pub struct SqlEngine {
|
||||
/// Table schemas
|
||||
schemas: RwLock<HashMap<String, TableSchema>>,
|
||||
/// Vector databases (one per table)
|
||||
databases: RwLock<HashMap<String, VectorDB>>,
|
||||
}
|
||||
|
||||
impl SqlEngine {
|
||||
/// Create a new SQL engine
|
||||
pub fn new() -> Self {
|
||||
SqlEngine {
|
||||
schemas: RwLock::new(HashMap::new()),
|
||||
databases: RwLock::new(HashMap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute a SQL statement
|
||||
pub fn execute(&self, statement: SqlStatement) -> Result<ExecutionResult, RvLiteError> {
|
||||
match statement {
|
||||
SqlStatement::CreateTable { name, columns } => self.create_table(name, columns),
|
||||
SqlStatement::Insert {
|
||||
table,
|
||||
columns,
|
||||
values,
|
||||
} => self.insert(table, columns, values),
|
||||
SqlStatement::Select {
|
||||
columns,
|
||||
from,
|
||||
where_clause,
|
||||
order_by,
|
||||
limit,
|
||||
} => self.select(columns, from, where_clause, order_by, limit),
|
||||
SqlStatement::Drop { table } => self.drop_table(table),
|
||||
}
|
||||
}
|
||||
|
||||
fn create_table(
|
||||
&self,
|
||||
name: String,
|
||||
columns: Vec<Column>,
|
||||
) -> Result<ExecutionResult, RvLiteError> {
|
||||
let mut schemas = self.schemas.write();
|
||||
|
||||
if schemas.contains_key(&name) {
|
||||
return Err(RvLiteError {
|
||||
message: format!("Table '{}' already exists", name),
|
||||
kind: ErrorKind::SqlError,
|
||||
});
|
||||
}
|
||||
|
||||
// Find vector column
|
||||
let (vector_column, vector_dimensions) = columns
|
||||
.iter()
|
||||
.find_map(|col| {
|
||||
if let DataType::Vector(dims) = col.data_type {
|
||||
Some((col.name.clone(), dims))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.ok_or_else(|| RvLiteError {
|
||||
message: "Table must have at least one VECTOR column".to_string(),
|
||||
kind: ErrorKind::SqlError,
|
||||
})?;
|
||||
|
||||
let schema = TableSchema {
|
||||
name: name.clone(),
|
||||
columns,
|
||||
vector_column: Some(vector_column),
|
||||
vector_dimensions: Some(vector_dimensions),
|
||||
};
|
||||
|
||||
// Create vector database for this table
|
||||
let db_options = ruvector_core::types::DbOptions {
|
||||
dimensions: vector_dimensions,
|
||||
distance_metric: ruvector_core::DistanceMetric::Cosine,
|
||||
storage_path: "memory://".to_string(),
|
||||
hnsw_config: None,
|
||||
quantization: None,
|
||||
};
|
||||
|
||||
let db = VectorDB::new(db_options).map_err(|e| RvLiteError {
|
||||
message: format!("Failed to create vector database: {}", e),
|
||||
kind: ErrorKind::VectorError,
|
||||
})?;
|
||||
|
||||
let mut databases = self.databases.write();
|
||||
databases.insert(name.clone(), db);
|
||||
schemas.insert(name, schema);
|
||||
|
||||
Ok(ExecutionResult {
|
||||
rows: Vec::new(),
|
||||
rows_affected: 0,
|
||||
})
|
||||
}
|
||||
|
||||
fn insert(
|
||||
&self,
|
||||
table: String,
|
||||
columns: Vec<String>,
|
||||
values: Vec<Value>,
|
||||
) -> Result<ExecutionResult, RvLiteError> {
|
||||
let schemas = self.schemas.read();
|
||||
let schema = schemas.get(&table).ok_or_else(|| RvLiteError {
|
||||
message: format!("Table '{}' not found", table),
|
||||
kind: ErrorKind::SqlError,
|
||||
})?;
|
||||
|
||||
// Validate columns
|
||||
schema.validate_columns(&columns)?;
|
||||
|
||||
if columns.len() != values.len() {
|
||||
return Err(RvLiteError {
|
||||
message: format!(
|
||||
"Column count ({}) does not match value count ({})",
|
||||
columns.len(),
|
||||
values.len()
|
||||
),
|
||||
kind: ErrorKind::SqlError,
|
||||
});
|
||||
}
|
||||
|
||||
// Extract vector and metadata
|
||||
let mut vector: Option<Vec<f32>> = None;
|
||||
let mut metadata = HashMap::new();
|
||||
let mut id: Option<String> = None;
|
||||
|
||||
for (col, val) in columns.iter().zip(values.iter()) {
|
||||
if let Some(DataType::Vector(_)) = schema.get_column_type(col) {
|
||||
if let Value::Vector(v) = val {
|
||||
vector = Some(v.clone());
|
||||
} else {
|
||||
return Err(RvLiteError {
|
||||
message: format!("Expected vector value for column '{}'", col),
|
||||
kind: ErrorKind::SqlError,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// Store as metadata
|
||||
metadata.insert(col.clone(), val.to_json());
|
||||
|
||||
// Use 'id' column as vector ID if present
|
||||
if col == "id" {
|
||||
if let Value::Text(s) = val {
|
||||
id = Some(s.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let vector = vector.ok_or_else(|| RvLiteError {
|
||||
message: "No vector value provided".to_string(),
|
||||
kind: ErrorKind::SqlError,
|
||||
})?;
|
||||
|
||||
// Validate vector dimensions
|
||||
if let Some(expected_dims) = schema.vector_dimensions {
|
||||
if vector.len() != expected_dims {
|
||||
return Err(RvLiteError {
|
||||
message: format!(
|
||||
"Vector dimension mismatch: expected {}, got {}",
|
||||
expected_dims,
|
||||
vector.len()
|
||||
),
|
||||
kind: ErrorKind::SqlError,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Insert into vector database
|
||||
let entry = VectorEntry {
|
||||
id,
|
||||
vector,
|
||||
metadata: Some(metadata),
|
||||
};
|
||||
|
||||
let databases = self.databases.read();
|
||||
let db = databases.get(&table).ok_or_else(|| RvLiteError {
|
||||
message: format!("Database for table '{}' not found", table),
|
||||
kind: ErrorKind::SqlError,
|
||||
})?;
|
||||
|
||||
db.insert(entry).map_err(|e| RvLiteError {
|
||||
message: format!("Failed to insert: {}", e),
|
||||
kind: ErrorKind::VectorError,
|
||||
})?;
|
||||
|
||||
Ok(ExecutionResult {
|
||||
rows: Vec::new(),
|
||||
rows_affected: 1,
|
||||
})
|
||||
}
|
||||
|
||||
fn select(
|
||||
&self,
|
||||
_columns: Vec<SelectColumn>,
|
||||
from: String,
|
||||
where_clause: Option<Expression>,
|
||||
order_by: Option<OrderBy>,
|
||||
limit: Option<usize>,
|
||||
) -> Result<ExecutionResult, RvLiteError> {
|
||||
let schemas = self.schemas.read();
|
||||
let schema = schemas.get(&from).ok_or_else(|| RvLiteError {
|
||||
message: format!("Table '{}' not found", from),
|
||||
kind: ErrorKind::SqlError,
|
||||
})?;
|
||||
|
||||
let databases = self.databases.read();
|
||||
let db = databases.get(&from).ok_or_else(|| RvLiteError {
|
||||
message: format!("Database for table '{}' not found", from),
|
||||
kind: ErrorKind::SqlError,
|
||||
})?;
|
||||
|
||||
// Handle vector similarity search
|
||||
if let Some(order_by) = order_by {
|
||||
if let Expression::Distance {
|
||||
column: _,
|
||||
metric: _,
|
||||
vector,
|
||||
} = order_by.expression
|
||||
{
|
||||
let k = limit.unwrap_or(10);
|
||||
|
||||
// Build filter from WHERE clause
|
||||
let filter = if let Some(where_expr) = where_clause {
|
||||
Some(self.build_filter(where_expr)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let query = SearchQuery {
|
||||
vector,
|
||||
k,
|
||||
filter,
|
||||
ef_search: None,
|
||||
};
|
||||
|
||||
let results = db.search(query).map_err(|e| RvLiteError {
|
||||
message: format!("Search failed: {}", e),
|
||||
kind: ErrorKind::VectorError,
|
||||
})?;
|
||||
|
||||
// Convert results to rows
|
||||
let rows: Vec<HashMap<String, Value>> = results
|
||||
.into_iter()
|
||||
.map(|result| {
|
||||
let mut row = HashMap::new();
|
||||
|
||||
// Add vector if present
|
||||
if let Some(vec_col) = &schema.vector_column {
|
||||
if let Some(vector) = result.vector {
|
||||
row.insert(vec_col.clone(), Value::Vector(vector));
|
||||
}
|
||||
}
|
||||
|
||||
// Add metadata
|
||||
if let Some(metadata) = result.metadata {
|
||||
for (key, val) in metadata {
|
||||
row.insert(key, Value::from_json(&val));
|
||||
}
|
||||
}
|
||||
|
||||
// Add distance score
|
||||
row.insert("_distance".to_string(), Value::Real(result.score as f64));
|
||||
|
||||
row
|
||||
})
|
||||
.collect();
|
||||
|
||||
return Ok(ExecutionResult {
|
||||
rows,
|
||||
rows_affected: 0,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Non-vector query - return all rows (scan all vectors)
|
||||
// This is essentially a table scan through the vector database
|
||||
let k = limit.unwrap_or(1000); // Default to 1000 rows max
|
||||
|
||||
// Create a zero vector for exhaustive search
|
||||
let dims = schema.vector_dimensions.unwrap_or(3);
|
||||
let query_vector = vec![0.0f32; dims];
|
||||
|
||||
// Build filter from WHERE clause
|
||||
let filter = if let Some(where_expr) = where_clause {
|
||||
Some(self.build_filter(where_expr)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let query = SearchQuery {
|
||||
vector: query_vector,
|
||||
k,
|
||||
filter,
|
||||
ef_search: None,
|
||||
};
|
||||
|
||||
let results = db.search(query).map_err(|e| RvLiteError {
|
||||
message: format!("Search failed: {}", e),
|
||||
kind: ErrorKind::VectorError,
|
||||
})?;
|
||||
|
||||
// Convert results to rows
|
||||
let rows: Vec<HashMap<String, Value>> = results
|
||||
.into_iter()
|
||||
.map(|result| {
|
||||
let mut row = HashMap::new();
|
||||
|
||||
// Add vector if present
|
||||
if let Some(vec_col) = &schema.vector_column {
|
||||
if let Some(vector) = result.vector {
|
||||
row.insert(vec_col.clone(), Value::Vector(vector));
|
||||
}
|
||||
}
|
||||
|
||||
// Add metadata
|
||||
if let Some(metadata) = result.metadata {
|
||||
for (key, val) in metadata {
|
||||
row.insert(key, Value::from_json(&val));
|
||||
}
|
||||
}
|
||||
|
||||
row
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(ExecutionResult {
|
||||
rows,
|
||||
rows_affected: 0,
|
||||
})
|
||||
}
|
||||
|
||||
fn drop_table(&self, table: String) -> Result<ExecutionResult, RvLiteError> {
|
||||
let mut schemas = self.schemas.write();
|
||||
let mut databases = self.databases.write();
|
||||
|
||||
schemas.remove(&table).ok_or_else(|| RvLiteError {
|
||||
message: format!("Table '{}' not found", table),
|
||||
kind: ErrorKind::SqlError,
|
||||
})?;
|
||||
|
||||
databases.remove(&table);
|
||||
|
||||
Ok(ExecutionResult {
|
||||
rows: Vec::new(),
|
||||
rows_affected: 0,
|
||||
})
|
||||
}
|
||||
|
||||
/// Build metadata filter from WHERE expression
|
||||
fn build_filter(
|
||||
&self,
|
||||
expr: Expression,
|
||||
) -> Result<HashMap<String, serde_json::Value>, RvLiteError> {
|
||||
let mut filter = HashMap::new();
|
||||
|
||||
match expr {
|
||||
Expression::BinaryOp { left, op, right } => {
|
||||
if let (Expression::Column(col), Expression::Literal(val)) = (*left, *right) {
|
||||
if op == BinaryOperator::Eq {
|
||||
filter.insert(col, val.to_json());
|
||||
} else {
|
||||
return Err(RvLiteError {
|
||||
message: "Only equality filters supported in WHERE clause".to_string(),
|
||||
kind: ErrorKind::NotImplemented,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
Expression::And(left, right) => {
|
||||
let left_filter = self.build_filter(*left)?;
|
||||
let right_filter = self.build_filter(*right)?;
|
||||
filter.extend(left_filter);
|
||||
filter.extend(right_filter);
|
||||
}
|
||||
_ => {
|
||||
return Err(RvLiteError {
|
||||
message: "Unsupported WHERE clause expression".to_string(),
|
||||
kind: ErrorKind::NotImplemented,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(filter)
|
||||
}
|
||||
|
||||
/// List all tables
|
||||
pub fn list_tables(&self) -> Vec<String> {
|
||||
self.schemas.read().keys().cloned().collect()
|
||||
}
|
||||
|
||||
/// Get table schema
|
||||
pub fn get_schema(&self, table: &str) -> Option<TableSchema> {
|
||||
self.schemas.read().get(table).cloned()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for SqlEngine {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_create_and_insert() {
|
||||
let engine = SqlEngine::new();
|
||||
|
||||
// Create table
|
||||
let create = SqlStatement::CreateTable {
|
||||
name: "docs".to_string(),
|
||||
columns: vec![
|
||||
Column {
|
||||
name: "id".to_string(),
|
||||
data_type: DataType::Text,
|
||||
},
|
||||
Column {
|
||||
name: "content".to_string(),
|
||||
data_type: DataType::Text,
|
||||
},
|
||||
Column {
|
||||
name: "embedding".to_string(),
|
||||
data_type: DataType::Vector(3),
|
||||
},
|
||||
],
|
||||
};
|
||||
engine.execute(create).unwrap();
|
||||
|
||||
// Insert row
|
||||
let insert = SqlStatement::Insert {
|
||||
table: "docs".to_string(),
|
||||
columns: vec![
|
||||
"id".to_string(),
|
||||
"content".to_string(),
|
||||
"embedding".to_string(),
|
||||
],
|
||||
values: vec![
|
||||
Value::Text("1".to_string()),
|
||||
Value::Text("hello".to_string()),
|
||||
Value::Vector(vec![1.0, 2.0, 3.0]),
|
||||
],
|
||||
};
|
||||
let result = engine.execute(insert).unwrap();
|
||||
assert_eq!(result.rows_affected, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_search() {
|
||||
let engine = SqlEngine::new();
|
||||
|
||||
// Create table
|
||||
let create = SqlStatement::CreateTable {
|
||||
name: "docs".to_string(),
|
||||
columns: vec![
|
||||
Column {
|
||||
name: "id".to_string(),
|
||||
data_type: DataType::Text,
|
||||
},
|
||||
Column {
|
||||
name: "embedding".to_string(),
|
||||
data_type: DataType::Vector(3),
|
||||
},
|
||||
],
|
||||
};
|
||||
engine.execute(create).unwrap();
|
||||
|
||||
// Insert rows
|
||||
for i in 0..5 {
|
||||
let insert = SqlStatement::Insert {
|
||||
table: "docs".to_string(),
|
||||
columns: vec!["id".to_string(), "embedding".to_string()],
|
||||
values: vec![
|
||||
Value::Text(format!("{}", i)),
|
||||
Value::Vector(vec![i as f32, i as f32 * 2.0, i as f32 * 3.0]),
|
||||
],
|
||||
};
|
||||
engine.execute(insert).unwrap();
|
||||
}
|
||||
|
||||
// Search
|
||||
let select = SqlStatement::Select {
|
||||
columns: vec![SelectColumn::Wildcard],
|
||||
from: "docs".to_string(),
|
||||
where_clause: None,
|
||||
order_by: Some(OrderBy {
|
||||
expression: Expression::Distance {
|
||||
column: "embedding".to_string(),
|
||||
metric: DistanceMetric::L2,
|
||||
vector: vec![2.0, 4.0, 6.0],
|
||||
},
|
||||
direction: OrderDirection::Asc,
|
||||
}),
|
||||
limit: Some(3),
|
||||
};
|
||||
|
||||
let result = engine.execute(select).unwrap();
|
||||
assert_eq!(result.rows.len(), 3);
|
||||
}
|
||||
}
|
||||
13
vendor/ruvector/crates/rvlite/src/sql/mod.rs
vendored
Normal file
13
vendor/ruvector/crates/rvlite/src/sql/mod.rs
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
// SQL query engine module for rvlite
|
||||
// Provides SQL interface for vector database operations with WASM compatibility
|
||||
|
||||
mod ast;
|
||||
mod executor;
|
||||
mod parser;
|
||||
|
||||
pub use ast::*;
|
||||
pub use executor::{ExecutionResult, SqlEngine};
|
||||
pub use parser::{ParseError, SqlParser};
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
823
vendor/ruvector/crates/rvlite/src/sql/parser.rs
vendored
Normal file
823
vendor/ruvector/crates/rvlite/src/sql/parser.rs
vendored
Normal file
@@ -0,0 +1,823 @@
|
||||
// Hand-rolled SQL parser for WASM compatibility
|
||||
// Implements recursive descent parsing for vector-specific SQL
|
||||
|
||||
use super::ast::*;
|
||||
use std::fmt;
|
||||
|
||||
/// Parse error type
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct ParseError {
|
||||
pub message: String,
|
||||
pub position: usize,
|
||||
}
|
||||
|
||||
impl fmt::Display for ParseError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"Parse error at position {}: {}",
|
||||
self.position, self.message
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for ParseError {}
|
||||
|
||||
/// Token types
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
enum Token {
|
||||
// Keywords
|
||||
Select,
|
||||
From,
|
||||
Where,
|
||||
Insert,
|
||||
Into,
|
||||
Values,
|
||||
Create,
|
||||
Table,
|
||||
Drop,
|
||||
OrderBy,
|
||||
Limit,
|
||||
And,
|
||||
Or,
|
||||
Not,
|
||||
As,
|
||||
|
||||
// Data types
|
||||
Text,
|
||||
Integer,
|
||||
Real,
|
||||
Vector,
|
||||
|
||||
// Operators
|
||||
Eq,
|
||||
NotEq,
|
||||
Gt,
|
||||
GtEq,
|
||||
Lt,
|
||||
LtEq,
|
||||
Like,
|
||||
|
||||
// Distance operators
|
||||
L2Distance, // <->
|
||||
CosineDistance, // <=>
|
||||
DotProduct, // <#>
|
||||
|
||||
// Delimiters
|
||||
LeftParen,
|
||||
RightParen,
|
||||
LeftBracket,
|
||||
RightBracket,
|
||||
Comma,
|
||||
Semicolon,
|
||||
Asterisk,
|
||||
|
||||
// Values
|
||||
Identifier(String),
|
||||
StringLiteral(String),
|
||||
NumberLiteral(String),
|
||||
|
||||
// End
|
||||
Eof,
|
||||
}
|
||||
|
||||
/// Tokenizer (lexer)
|
||||
struct Tokenizer {
|
||||
input: Vec<char>,
|
||||
position: usize,
|
||||
}
|
||||
|
||||
impl Tokenizer {
|
||||
fn new(input: &str) -> Self {
|
||||
Tokenizer {
|
||||
input: input.chars().collect(),
|
||||
position: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn current(&self) -> Option<char> {
|
||||
self.input.get(self.position).copied()
|
||||
}
|
||||
|
||||
fn advance(&mut self) {
|
||||
self.position += 1;
|
||||
}
|
||||
|
||||
fn skip_whitespace(&mut self) {
|
||||
while let Some(ch) = self.current() {
|
||||
if ch.is_whitespace() {
|
||||
self.advance();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn read_identifier(&mut self) -> String {
|
||||
let mut result = String::new();
|
||||
while let Some(ch) = self.current() {
|
||||
if ch.is_alphanumeric() || ch == '_' {
|
||||
result.push(ch);
|
||||
self.advance();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn read_string(&mut self) -> Result<String, ParseError> {
|
||||
let mut result = String::new();
|
||||
self.advance(); // Skip opening quote
|
||||
|
||||
while let Some(ch) = self.current() {
|
||||
if ch == '\'' {
|
||||
self.advance();
|
||||
return Ok(result);
|
||||
} else {
|
||||
result.push(ch);
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
|
||||
Err(ParseError {
|
||||
message: "Unterminated string literal".to_string(),
|
||||
position: self.position,
|
||||
})
|
||||
}
|
||||
|
||||
fn read_number(&mut self) -> String {
|
||||
let mut result = String::new();
|
||||
let mut has_dot = false;
|
||||
|
||||
while let Some(ch) = self.current() {
|
||||
if ch.is_numeric() {
|
||||
result.push(ch);
|
||||
self.advance();
|
||||
} else if ch == '.' && !has_dot {
|
||||
has_dot = true;
|
||||
result.push(ch);
|
||||
self.advance();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn next_token(&mut self) -> Result<Token, ParseError> {
|
||||
self.skip_whitespace();
|
||||
|
||||
let ch = match self.current() {
|
||||
Some(c) => c,
|
||||
None => return Ok(Token::Eof),
|
||||
};
|
||||
|
||||
match ch {
|
||||
'(' => {
|
||||
self.advance();
|
||||
Ok(Token::LeftParen)
|
||||
}
|
||||
')' => {
|
||||
self.advance();
|
||||
Ok(Token::RightParen)
|
||||
}
|
||||
'[' => {
|
||||
self.advance();
|
||||
Ok(Token::LeftBracket)
|
||||
}
|
||||
']' => {
|
||||
self.advance();
|
||||
Ok(Token::RightBracket)
|
||||
}
|
||||
',' => {
|
||||
self.advance();
|
||||
Ok(Token::Comma)
|
||||
}
|
||||
';' => {
|
||||
self.advance();
|
||||
Ok(Token::Semicolon)
|
||||
}
|
||||
'*' => {
|
||||
self.advance();
|
||||
Ok(Token::Asterisk)
|
||||
}
|
||||
'=' => {
|
||||
self.advance();
|
||||
Ok(Token::Eq)
|
||||
}
|
||||
'!' => {
|
||||
self.advance();
|
||||
if self.current() == Some('=') {
|
||||
self.advance();
|
||||
Ok(Token::NotEq)
|
||||
} else {
|
||||
Err(ParseError {
|
||||
message: "Expected '=' after '!'".to_string(),
|
||||
position: self.position,
|
||||
})
|
||||
}
|
||||
}
|
||||
'>' => {
|
||||
self.advance();
|
||||
if self.current() == Some('=') {
|
||||
self.advance();
|
||||
Ok(Token::GtEq)
|
||||
} else {
|
||||
Ok(Token::Gt)
|
||||
}
|
||||
}
|
||||
'<' => {
|
||||
self.advance();
|
||||
match self.current() {
|
||||
Some('=') => {
|
||||
self.advance();
|
||||
if self.current() == Some('>') {
|
||||
self.advance();
|
||||
Ok(Token::CosineDistance)
|
||||
} else {
|
||||
Ok(Token::LtEq)
|
||||
}
|
||||
}
|
||||
Some('-') => {
|
||||
self.advance();
|
||||
if self.current() == Some('>') {
|
||||
self.advance();
|
||||
Ok(Token::L2Distance)
|
||||
} else {
|
||||
Err(ParseError {
|
||||
message: "Expected '>' after '<-'".to_string(),
|
||||
position: self.position,
|
||||
})
|
||||
}
|
||||
}
|
||||
Some('#') => {
|
||||
self.advance();
|
||||
if self.current() == Some('>') {
|
||||
self.advance();
|
||||
Ok(Token::DotProduct)
|
||||
} else {
|
||||
Err(ParseError {
|
||||
message: "Expected '>' after '<#'".to_string(),
|
||||
position: self.position,
|
||||
})
|
||||
}
|
||||
}
|
||||
_ => Ok(Token::Lt),
|
||||
}
|
||||
}
|
||||
'\'' => Ok(Token::StringLiteral(self.read_string()?)),
|
||||
_ if ch.is_numeric() => Ok(Token::NumberLiteral(self.read_number())),
|
||||
_ if ch.is_alphabetic() || ch == '_' => {
|
||||
let ident = self.read_identifier();
|
||||
Ok(match ident.to_uppercase().as_str() {
|
||||
"SELECT" => Token::Select,
|
||||
"FROM" => Token::From,
|
||||
"WHERE" => Token::Where,
|
||||
"INSERT" => Token::Insert,
|
||||
"INTO" => Token::Into,
|
||||
"VALUES" => Token::Values,
|
||||
"CREATE" => Token::Create,
|
||||
"TABLE" => Token::Table,
|
||||
"DROP" => Token::Drop,
|
||||
"ORDER" => {
|
||||
self.skip_whitespace();
|
||||
if self.read_identifier().to_uppercase() == "BY" {
|
||||
Token::OrderBy
|
||||
} else {
|
||||
Token::Identifier(ident)
|
||||
}
|
||||
}
|
||||
"LIMIT" => Token::Limit,
|
||||
"AND" => Token::And,
|
||||
"OR" => Token::Or,
|
||||
"NOT" => Token::Not,
|
||||
"AS" => Token::As,
|
||||
"TEXT" => Token::Text,
|
||||
"INTEGER" => Token::Integer,
|
||||
"REAL" => Token::Real,
|
||||
"VECTOR" => Token::Vector,
|
||||
"LIKE" => Token::Like,
|
||||
_ => Token::Identifier(ident),
|
||||
})
|
||||
}
|
||||
_ => Err(ParseError {
|
||||
message: format!("Unexpected character: {}", ch),
|
||||
position: self.position,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// SQL Parser
|
||||
pub struct SqlParser {
|
||||
tokens: Vec<Token>,
|
||||
position: usize,
|
||||
}
|
||||
|
||||
impl SqlParser {
|
||||
/// Create a new parser from SQL string
|
||||
pub fn new(input: &str) -> Result<Self, ParseError> {
|
||||
let mut tokenizer = Tokenizer::new(input);
|
||||
let mut tokens = Vec::new();
|
||||
|
||||
loop {
|
||||
let token = tokenizer.next_token()?;
|
||||
if token == Token::Eof {
|
||||
tokens.push(token);
|
||||
break;
|
||||
}
|
||||
tokens.push(token);
|
||||
}
|
||||
|
||||
Ok(SqlParser {
|
||||
tokens,
|
||||
position: 0,
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse SQL statement
|
||||
pub fn parse(&mut self) -> Result<SqlStatement, ParseError> {
|
||||
let token = self.current().clone();
|
||||
|
||||
match token {
|
||||
Token::Select => self.parse_select(),
|
||||
Token::Insert => self.parse_insert(),
|
||||
Token::Create => self.parse_create(),
|
||||
Token::Drop => self.parse_drop(),
|
||||
_ => Err(ParseError {
|
||||
message: format!("Expected SELECT, INSERT, CREATE, or DROP, got {:?}", token),
|
||||
position: self.position,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn current(&self) -> &Token {
|
||||
self.tokens.get(self.position).unwrap_or(&Token::Eof)
|
||||
}
|
||||
|
||||
fn advance(&mut self) {
|
||||
if self.position < self.tokens.len() {
|
||||
self.position += 1;
|
||||
}
|
||||
}
|
||||
|
||||
fn expect(&mut self, expected: Token) -> Result<(), ParseError> {
|
||||
let current = self.current().clone();
|
||||
if current == expected {
|
||||
self.advance();
|
||||
Ok(())
|
||||
} else {
|
||||
Err(ParseError {
|
||||
message: format!("Expected {:?}, got {:?}", expected, current),
|
||||
position: self.position,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_select(&mut self) -> Result<SqlStatement, ParseError> {
|
||||
self.expect(Token::Select)?;
|
||||
|
||||
let columns = self.parse_select_columns()?;
|
||||
|
||||
self.expect(Token::From)?;
|
||||
let from = self.parse_identifier()?;
|
||||
|
||||
let where_clause = if matches!(self.current(), Token::Where) {
|
||||
self.advance();
|
||||
Some(self.parse_expression()?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let order_by = if matches!(self.current(), Token::OrderBy) {
|
||||
self.advance();
|
||||
Some(self.parse_order_by()?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let limit = if matches!(self.current(), Token::Limit) {
|
||||
self.advance();
|
||||
Some(self.parse_number()? as usize)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(SqlStatement::Select {
|
||||
columns,
|
||||
from,
|
||||
where_clause,
|
||||
order_by,
|
||||
limit,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_select_columns(&mut self) -> Result<Vec<SelectColumn>, ParseError> {
|
||||
if matches!(self.current(), Token::Asterisk) {
|
||||
self.advance();
|
||||
return Ok(vec![SelectColumn::Wildcard]);
|
||||
}
|
||||
|
||||
let mut columns = Vec::new();
|
||||
loop {
|
||||
let name = self.parse_identifier()?;
|
||||
columns.push(SelectColumn::Name(name));
|
||||
|
||||
if !matches!(self.current(), Token::Comma) {
|
||||
break;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
|
||||
Ok(columns)
|
||||
}
|
||||
|
||||
fn parse_insert(&mut self) -> Result<SqlStatement, ParseError> {
|
||||
self.expect(Token::Insert)?;
|
||||
self.expect(Token::Into)?;
|
||||
|
||||
let table = self.parse_identifier()?;
|
||||
|
||||
self.expect(Token::LeftParen)?;
|
||||
let columns = self.parse_identifier_list()?;
|
||||
self.expect(Token::RightParen)?;
|
||||
|
||||
self.expect(Token::Values)?;
|
||||
self.expect(Token::LeftParen)?;
|
||||
let values = self.parse_value_list()?;
|
||||
self.expect(Token::RightParen)?;
|
||||
|
||||
Ok(SqlStatement::Insert {
|
||||
table,
|
||||
columns,
|
||||
values,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_create(&mut self) -> Result<SqlStatement, ParseError> {
|
||||
self.expect(Token::Create)?;
|
||||
self.expect(Token::Table)?;
|
||||
|
||||
let name = self.parse_identifier()?;
|
||||
|
||||
self.expect(Token::LeftParen)?;
|
||||
let columns = self.parse_column_definitions()?;
|
||||
self.expect(Token::RightParen)?;
|
||||
|
||||
Ok(SqlStatement::CreateTable { name, columns })
|
||||
}
|
||||
|
||||
fn parse_drop(&mut self) -> Result<SqlStatement, ParseError> {
|
||||
self.expect(Token::Drop)?;
|
||||
self.expect(Token::Table)?;
|
||||
|
||||
let table = self.parse_identifier()?;
|
||||
|
||||
Ok(SqlStatement::Drop { table })
|
||||
}
|
||||
|
||||
fn parse_column_definitions(&mut self) -> Result<Vec<Column>, ParseError> {
|
||||
let mut columns = Vec::new();
|
||||
|
||||
loop {
|
||||
let name = self.parse_identifier()?;
|
||||
let data_type = self.parse_data_type()?;
|
||||
|
||||
columns.push(Column { name, data_type });
|
||||
|
||||
if !matches!(self.current(), Token::Comma) {
|
||||
break;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
|
||||
Ok(columns)
|
||||
}
|
||||
|
||||
fn parse_data_type(&mut self) -> Result<DataType, ParseError> {
|
||||
match self.current().clone() {
|
||||
Token::Text => {
|
||||
self.advance();
|
||||
Ok(DataType::Text)
|
||||
}
|
||||
Token::Integer => {
|
||||
self.advance();
|
||||
Ok(DataType::Integer)
|
||||
}
|
||||
Token::Real => {
|
||||
self.advance();
|
||||
Ok(DataType::Real)
|
||||
}
|
||||
Token::Vector => {
|
||||
self.advance();
|
||||
self.expect(Token::LeftParen)?;
|
||||
let dims = self.parse_number()? as usize;
|
||||
self.expect(Token::RightParen)?;
|
||||
Ok(DataType::Vector(dims))
|
||||
}
|
||||
_ => Err(ParseError {
|
||||
message: "Expected data type (TEXT, INTEGER, REAL, or VECTOR)".to_string(),
|
||||
position: self.position,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_expression(&mut self) -> Result<Expression, ParseError> {
|
||||
self.parse_or_expression()
|
||||
}
|
||||
|
||||
fn parse_or_expression(&mut self) -> Result<Expression, ParseError> {
|
||||
let mut left = self.parse_and_expression()?;
|
||||
|
||||
while matches!(self.current(), Token::Or) {
|
||||
self.advance();
|
||||
let right = self.parse_and_expression()?;
|
||||
left = Expression::Or(Box::new(left), Box::new(right));
|
||||
}
|
||||
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn parse_and_expression(&mut self) -> Result<Expression, ParseError> {
|
||||
let mut left = self.parse_comparison_expression()?;
|
||||
|
||||
while matches!(self.current(), Token::And) {
|
||||
self.advance();
|
||||
let right = self.parse_comparison_expression()?;
|
||||
left = Expression::And(Box::new(left), Box::new(right));
|
||||
}
|
||||
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn parse_comparison_expression(&mut self) -> Result<Expression, ParseError> {
|
||||
let left = self.parse_primary_expression()?;
|
||||
|
||||
let op = match self.current() {
|
||||
Token::Eq => BinaryOperator::Eq,
|
||||
Token::NotEq => BinaryOperator::NotEq,
|
||||
Token::Gt => BinaryOperator::Gt,
|
||||
Token::GtEq => BinaryOperator::GtEq,
|
||||
Token::Lt => BinaryOperator::Lt,
|
||||
Token::LtEq => BinaryOperator::LtEq,
|
||||
Token::Like => BinaryOperator::Like,
|
||||
_ => return Ok(left),
|
||||
};
|
||||
|
||||
self.advance();
|
||||
let right = self.parse_primary_expression()?;
|
||||
|
||||
Ok(Expression::BinaryOp {
|
||||
left: Box::new(left),
|
||||
op,
|
||||
right: Box::new(right),
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_primary_expression(&mut self) -> Result<Expression, ParseError> {
|
||||
match self.current().clone() {
|
||||
Token::Identifier(name) => {
|
||||
self.advance();
|
||||
Ok(Expression::Column(name))
|
||||
}
|
||||
Token::StringLiteral(s) => {
|
||||
self.advance();
|
||||
Ok(Expression::Literal(Value::Text(s)))
|
||||
}
|
||||
Token::NumberLiteral(n) => {
|
||||
self.advance();
|
||||
let value = if n.contains('.') {
|
||||
Value::Real(n.parse().unwrap())
|
||||
} else {
|
||||
Value::Integer(n.parse().unwrap())
|
||||
};
|
||||
Ok(Expression::Literal(value))
|
||||
}
|
||||
Token::LeftBracket => {
|
||||
self.advance();
|
||||
let vec = self.parse_vector_literal()?;
|
||||
self.expect(Token::RightBracket)?;
|
||||
Ok(Expression::VectorLiteral(vec))
|
||||
}
|
||||
Token::Not => {
|
||||
self.advance();
|
||||
let expr = self.parse_primary_expression()?;
|
||||
Ok(Expression::Not(Box::new(expr)))
|
||||
}
|
||||
_ => Err(ParseError {
|
||||
message: format!("Unexpected token in expression: {:?}", self.current()),
|
||||
position: self.position,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_order_by(&mut self) -> Result<OrderBy, ParseError> {
|
||||
// Parse column <-> vector or column <=> vector
|
||||
let column = self.parse_identifier()?;
|
||||
|
||||
let metric = match self.current() {
|
||||
Token::L2Distance => {
|
||||
self.advance();
|
||||
DistanceMetric::L2
|
||||
}
|
||||
Token::CosineDistance => {
|
||||
self.advance();
|
||||
DistanceMetric::Cosine
|
||||
}
|
||||
Token::DotProduct => {
|
||||
self.advance();
|
||||
DistanceMetric::DotProduct
|
||||
}
|
||||
_ => {
|
||||
return Err(ParseError {
|
||||
message: "Expected distance operator (<->, <=>, or <#>)".to_string(),
|
||||
position: self.position,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let vector = if matches!(self.current(), Token::LeftBracket) {
|
||||
self.advance();
|
||||
let vec = self.parse_vector_literal()?;
|
||||
self.expect(Token::RightBracket)?;
|
||||
vec
|
||||
} else {
|
||||
return Err(ParseError {
|
||||
message: "Expected vector literal after distance operator".to_string(),
|
||||
position: self.position,
|
||||
});
|
||||
};
|
||||
|
||||
Ok(OrderBy {
|
||||
expression: Expression::Distance {
|
||||
column,
|
||||
metric,
|
||||
vector,
|
||||
},
|
||||
direction: OrderDirection::Asc,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_identifier(&mut self) -> Result<String, ParseError> {
|
||||
match self.current().clone() {
|
||||
Token::Identifier(name) => {
|
||||
self.advance();
|
||||
Ok(name)
|
||||
}
|
||||
_ => Err(ParseError {
|
||||
message: "Expected identifier".to_string(),
|
||||
position: self.position,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_identifier_list(&mut self) -> Result<Vec<String>, ParseError> {
|
||||
let mut identifiers = Vec::new();
|
||||
|
||||
loop {
|
||||
identifiers.push(self.parse_identifier()?);
|
||||
|
||||
if !matches!(self.current(), Token::Comma) {
|
||||
break;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
|
||||
Ok(identifiers)
|
||||
}
|
||||
|
||||
fn parse_value_list(&mut self) -> Result<Vec<Value>, ParseError> {
|
||||
let mut values = Vec::new();
|
||||
|
||||
loop {
|
||||
values.push(self.parse_value()?);
|
||||
|
||||
if !matches!(self.current(), Token::Comma) {
|
||||
break;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
|
||||
Ok(values)
|
||||
}
|
||||
|
||||
fn parse_value(&mut self) -> Result<Value, ParseError> {
|
||||
match self.current().clone() {
|
||||
Token::StringLiteral(s) => {
|
||||
self.advance();
|
||||
Ok(Value::Text(s))
|
||||
}
|
||||
Token::NumberLiteral(n) => {
|
||||
self.advance();
|
||||
if n.contains('.') {
|
||||
Ok(Value::Real(n.parse().unwrap()))
|
||||
} else {
|
||||
Ok(Value::Integer(n.parse().unwrap()))
|
||||
}
|
||||
}
|
||||
Token::LeftBracket => {
|
||||
self.advance();
|
||||
let vec = self.parse_vector_literal()?;
|
||||
self.expect(Token::RightBracket)?;
|
||||
Ok(Value::Vector(vec))
|
||||
}
|
||||
_ => Err(ParseError {
|
||||
message: format!("Expected value, got {:?}", self.current()),
|
||||
position: self.position,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_vector_literal(&mut self) -> Result<Vec<f32>, ParseError> {
|
||||
let mut values = Vec::new();
|
||||
|
||||
loop {
|
||||
let n = self.parse_number()?;
|
||||
values.push(n as f32);
|
||||
|
||||
if !matches!(self.current(), Token::Comma) {
|
||||
break;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
|
||||
Ok(values)
|
||||
}
|
||||
|
||||
fn parse_number(&mut self) -> Result<f64, ParseError> {
|
||||
match self.current().clone() {
|
||||
Token::NumberLiteral(n) => {
|
||||
self.advance();
|
||||
n.parse().map_err(|_| ParseError {
|
||||
message: format!("Invalid number: {}", n),
|
||||
position: self.position,
|
||||
})
|
||||
}
|
||||
_ => Err(ParseError {
|
||||
message: "Expected number".to_string(),
|
||||
position: self.position,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_create_table() {
|
||||
let sql = "CREATE TABLE documents (id TEXT, content TEXT, embedding VECTOR(384))";
|
||||
let mut parser = SqlParser::new(sql).unwrap();
|
||||
let stmt = parser.parse().unwrap();
|
||||
|
||||
match stmt {
|
||||
SqlStatement::CreateTable { name, columns } => {
|
||||
assert_eq!(name, "documents");
|
||||
assert_eq!(columns.len(), 3);
|
||||
assert_eq!(columns[2].data_type, DataType::Vector(384));
|
||||
}
|
||||
_ => panic!("Expected CreateTable"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_insert() {
|
||||
let sql =
|
||||
"INSERT INTO documents (id, content, embedding) VALUES ('1', 'hello', [1.0, 2.0, 3.0])";
|
||||
let mut parser = SqlParser::new(sql).unwrap();
|
||||
let stmt = parser.parse().unwrap();
|
||||
|
||||
match stmt {
|
||||
SqlStatement::Insert {
|
||||
table,
|
||||
columns,
|
||||
values,
|
||||
} => {
|
||||
assert_eq!(table, "documents");
|
||||
assert_eq!(columns.len(), 3);
|
||||
assert_eq!(values.len(), 3);
|
||||
}
|
||||
_ => panic!("Expected Insert"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_select_with_vector_search() {
|
||||
let sql = "SELECT * FROM documents ORDER BY embedding <-> [1.0, 2.0, 3.0] LIMIT 5";
|
||||
let mut parser = SqlParser::new(sql).unwrap();
|
||||
let stmt = parser.parse().unwrap();
|
||||
|
||||
match stmt {
|
||||
SqlStatement::Select {
|
||||
order_by, limit, ..
|
||||
} => {
|
||||
assert!(order_by.is_some());
|
||||
assert_eq!(limit, Some(5));
|
||||
}
|
||||
_ => panic!("Expected Select"),
|
||||
}
|
||||
}
|
||||
}
|
||||
147
vendor/ruvector/crates/rvlite/src/sql/tests.rs
vendored
Normal file
147
vendor/ruvector/crates/rvlite/src/sql/tests.rs
vendored
Normal file
@@ -0,0 +1,147 @@
|
||||
// Integration tests for SQL engine
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::sql::{SqlEngine, SqlParser};
|
||||
|
||||
#[test]
|
||||
fn test_full_workflow() {
|
||||
let engine = SqlEngine::new();
|
||||
|
||||
// Create table
|
||||
let create_sql = "CREATE TABLE documents (id TEXT, content TEXT, embedding VECTOR(384))";
|
||||
let mut parser = SqlParser::new(create_sql).unwrap();
|
||||
let stmt = parser.parse().unwrap();
|
||||
engine.execute(stmt).unwrap();
|
||||
|
||||
// Insert data
|
||||
let insert_sql = "INSERT INTO documents (id, content, embedding) VALUES ('doc1', 'hello world', [1.0, 2.0, 3.0])";
|
||||
let mut parser = SqlParser::new(insert_sql).unwrap();
|
||||
let stmt = parser.parse().unwrap();
|
||||
|
||||
// This will fail due to dimension mismatch (3 vs 384), but tests the flow
|
||||
let result = engine.execute(stmt);
|
||||
assert!(result.is_err()); // Expected error due to dimension mismatch
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_similarity_search() {
|
||||
let engine = SqlEngine::new();
|
||||
|
||||
// Create table with small dimensions for testing
|
||||
let create_sql = "CREATE TABLE docs (id TEXT, embedding VECTOR(3))";
|
||||
let mut parser = SqlParser::new(create_sql).unwrap();
|
||||
let stmt = parser.parse().unwrap();
|
||||
engine.execute(stmt).unwrap();
|
||||
|
||||
// Insert test data
|
||||
for i in 0..10 {
|
||||
let insert_sql = format!(
|
||||
"INSERT INTO docs (id, embedding) VALUES ('doc{}', [{}, {}, {}])",
|
||||
i,
|
||||
i,
|
||||
i * 2,
|
||||
i * 3
|
||||
);
|
||||
let mut parser = SqlParser::new(&insert_sql).unwrap();
|
||||
let stmt = parser.parse().unwrap();
|
||||
engine.execute(stmt).unwrap();
|
||||
}
|
||||
|
||||
// Search for similar vectors
|
||||
let search_sql = "SELECT * FROM docs ORDER BY embedding <-> [5.0, 10.0, 15.0] LIMIT 3";
|
||||
let mut parser = SqlParser::new(search_sql).unwrap();
|
||||
let stmt = parser.parse().unwrap();
|
||||
let result = engine.execute(stmt).unwrap();
|
||||
|
||||
assert_eq!(result.rows.len(), 3);
|
||||
// The closest vector should be [5, 10, 15]
|
||||
assert!(result.rows[0].get("id").is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_metadata_filtering() {
|
||||
let engine = SqlEngine::new();
|
||||
|
||||
// Create table
|
||||
let create_sql = "CREATE TABLE docs (id TEXT, category TEXT, embedding VECTOR(3))";
|
||||
let mut parser = SqlParser::new(create_sql).unwrap();
|
||||
let stmt = parser.parse().unwrap();
|
||||
engine.execute(stmt).unwrap();
|
||||
|
||||
// Insert data with categories
|
||||
let categories = vec!["tech", "sports", "tech", "news", "sports"];
|
||||
for (i, cat) in categories.iter().enumerate() {
|
||||
let insert_sql =
|
||||
format!(
|
||||
"INSERT INTO docs (id, category, embedding) VALUES ('doc{}', '{}', [{}, {}, {}])",
|
||||
i, cat, i, i * 2, i * 3
|
||||
);
|
||||
let mut parser = SqlParser::new(&insert_sql).unwrap();
|
||||
let stmt = parser.parse().unwrap();
|
||||
engine.execute(stmt).unwrap();
|
||||
}
|
||||
|
||||
// Search with filter
|
||||
let search_sql = "SELECT * FROM docs WHERE category = 'tech' ORDER BY embedding <-> [2.0, 4.0, 6.0] LIMIT 2";
|
||||
let mut parser = SqlParser::new(search_sql).unwrap();
|
||||
let stmt = parser.parse().unwrap();
|
||||
let result = engine.execute(stmt).unwrap();
|
||||
|
||||
// VectorDB filtering may not be fully precise, so we check for at least 1 result
|
||||
assert!(result.rows.len() >= 1);
|
||||
assert!(result.rows.len() <= 2);
|
||||
// All results should have category = 'tech'
|
||||
for row in &result.rows {
|
||||
if let Some(category) = row.get("category") {
|
||||
assert_eq!(category.to_string(), "'tech'");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_drop_table() {
|
||||
let engine = SqlEngine::new();
|
||||
|
||||
// Create table
|
||||
let create_sql = "CREATE TABLE temp (id TEXT, embedding VECTOR(3))";
|
||||
let mut parser = SqlParser::new(create_sql).unwrap();
|
||||
let stmt = parser.parse().unwrap();
|
||||
engine.execute(stmt).unwrap();
|
||||
|
||||
assert_eq!(engine.list_tables().len(), 1);
|
||||
|
||||
// Drop table
|
||||
let drop_sql = "DROP TABLE temp";
|
||||
let mut parser = SqlParser::new(drop_sql).unwrap();
|
||||
let stmt = parser.parse().unwrap();
|
||||
engine.execute(stmt).unwrap();
|
||||
|
||||
assert_eq!(engine.list_tables().len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cosine_distance() {
|
||||
let engine = SqlEngine::new();
|
||||
|
||||
let create_sql = "CREATE TABLE docs (id TEXT, embedding VECTOR(3))";
|
||||
let mut parser = SqlParser::new(create_sql).unwrap();
|
||||
engine.execute(parser.parse().unwrap()).unwrap();
|
||||
|
||||
// Insert normalized vectors for cosine similarity
|
||||
let insert_sql = "INSERT INTO docs (id, embedding) VALUES ('doc1', [1.0, 0.0, 0.0])";
|
||||
let mut parser = SqlParser::new(insert_sql).unwrap();
|
||||
engine.execute(parser.parse().unwrap()).unwrap();
|
||||
|
||||
let insert_sql = "INSERT INTO docs (id, embedding) VALUES ('doc2', [0.0, 1.0, 0.0])";
|
||||
let mut parser = SqlParser::new(insert_sql).unwrap();
|
||||
engine.execute(parser.parse().unwrap()).unwrap();
|
||||
|
||||
// Search using cosine distance
|
||||
let search_sql = "SELECT * FROM docs ORDER BY embedding <=> [0.9, 0.1, 0.0] LIMIT 1";
|
||||
let mut parser = SqlParser::new(search_sql).unwrap();
|
||||
let result = engine.execute(parser.parse().unwrap()).unwrap();
|
||||
|
||||
assert_eq!(result.rows.len(), 1);
|
||||
// Should return doc1 as it's more similar to [0.9, 0.1, 0.0]
|
||||
}
|
||||
}
|
||||
429
vendor/ruvector/crates/rvlite/src/storage/epoch.rs
vendored
Normal file
429
vendor/ruvector/crates/rvlite/src/storage/epoch.rs
vendored
Normal file
@@ -0,0 +1,429 @@
|
||||
//! Epoch-based reconciliation for hybrid RVF + IndexedDB persistence.
|
||||
//!
|
||||
//! RVF is the source of truth for vectors. IndexedDB is a rebuildable
|
||||
//! cache for metadata. Both stores share a monotonic epoch counter.
|
||||
//!
|
||||
//! Write order:
|
||||
//! 1. Write vectors to RVF (append-only, crash-safe)
|
||||
//! 2. Write metadata to IndexedDB
|
||||
//! 3. Commit shared epoch in both stores
|
||||
//!
|
||||
//! On startup: compare epochs and rebuild the lagging side.
|
||||
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
|
||||
/// Monotonic epoch counter shared between RVF and metadata stores.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Epoch(pub u64);
|
||||
|
||||
impl Epoch {
|
||||
pub const ZERO: Self = Self(0);
|
||||
|
||||
pub fn next(self) -> Self {
|
||||
Self(self.0.checked_add(1).expect("epoch overflow"))
|
||||
}
|
||||
|
||||
pub fn value(self) -> u64 {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// State describing the relationship between RVF and metadata epochs.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum EpochState {
|
||||
/// Both stores agree on the current epoch.
|
||||
Synchronized,
|
||||
/// RVF store is ahead of metadata by the given delta.
|
||||
RvfAhead(u64),
|
||||
/// Metadata store is ahead of RVF by the given delta (anomalous).
|
||||
MetadataAhead(u64),
|
||||
}
|
||||
|
||||
/// Action to take after comparing epochs.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum ReconcileAction {
|
||||
/// No reconciliation needed -- both stores are in sync.
|
||||
None,
|
||||
/// Metadata is stale; rebuild it from the authoritative RVF store.
|
||||
RebuildMetadata,
|
||||
/// RVF is somehow behind metadata; rebuild vectors from RVF file.
|
||||
/// This should not normally happen and indicates a prior incomplete write.
|
||||
RebuildFromRvf,
|
||||
/// Metadata is ahead which should never happen under correct operation.
|
||||
/// Log a warning and trust RVF as the source of truth.
|
||||
LogWarningTrustRvf,
|
||||
}
|
||||
|
||||
/// Result of comparing epochs between RVF and metadata stores.
|
||||
///
|
||||
/// Kept for backward compatibility with existing callers.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum ReconciliationAction {
|
||||
/// Both stores are in sync -- no action needed.
|
||||
InSync,
|
||||
/// RVF is ahead -- rebuild metadata from RVF vectors.
|
||||
RebuildMetadata {
|
||||
rvf_epoch: Epoch,
|
||||
metadata_epoch: Epoch,
|
||||
},
|
||||
/// Metadata is ahead (should not happen) -- log warning, trust RVF.
|
||||
TrustRvf {
|
||||
rvf_epoch: Epoch,
|
||||
metadata_epoch: Epoch,
|
||||
},
|
||||
}
|
||||
|
||||
/// Compare raw epoch values and return the relationship state.
|
||||
pub fn compare_epochs(rvf_epoch: u64, metadata_epoch: u64) -> EpochState {
|
||||
if rvf_epoch == metadata_epoch {
|
||||
EpochState::Synchronized
|
||||
} else if rvf_epoch > metadata_epoch {
|
||||
EpochState::RvfAhead(rvf_epoch - metadata_epoch)
|
||||
} else {
|
||||
EpochState::MetadataAhead(metadata_epoch - rvf_epoch)
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine the reconciliation action for a given epoch state.
|
||||
pub fn reconcile_action(state: &EpochState) -> ReconcileAction {
|
||||
match state {
|
||||
EpochState::Synchronized => ReconcileAction::None,
|
||||
EpochState::RvfAhead(delta) => {
|
||||
if *delta == 1 {
|
||||
// Common case: a single write committed to RVF but metadata
|
||||
// update was lost (e.g. crash between step 1 and step 2).
|
||||
ReconcileAction::RebuildMetadata
|
||||
} else {
|
||||
// Multiple epochs behind -- still rebuild metadata, but the
|
||||
// gap is larger so more data must be replayed.
|
||||
ReconcileAction::RebuildMetadata
|
||||
}
|
||||
}
|
||||
EpochState::MetadataAhead(delta) => {
|
||||
if *delta == 1 {
|
||||
// Metadata committed but RVF write was lost. This means the
|
||||
// RVF file is still valid at its own epoch -- rebuild from it.
|
||||
ReconcileAction::RebuildFromRvf
|
||||
} else {
|
||||
// Large gap with metadata ahead is anomalous. Trust RVF.
|
||||
ReconcileAction::LogWarningTrustRvf
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Compare epochs and determine reconciliation action (legacy API).
|
||||
pub fn reconcile(rvf_epoch: Epoch, metadata_epoch: Epoch) -> ReconciliationAction {
|
||||
match rvf_epoch.cmp(&metadata_epoch) {
|
||||
std::cmp::Ordering::Equal => ReconciliationAction::InSync,
|
||||
std::cmp::Ordering::Greater => ReconciliationAction::RebuildMetadata {
|
||||
rvf_epoch,
|
||||
metadata_epoch,
|
||||
},
|
||||
std::cmp::Ordering::Less => ReconciliationAction::TrustRvf {
|
||||
rvf_epoch,
|
||||
metadata_epoch,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Thread-safe monotonic epoch tracker.
|
||||
///
|
||||
/// Uses `AtomicU64` internally so it can be shared across threads without
|
||||
/// a mutex. The counter is strictly monotonic: it can only move forward.
|
||||
///
|
||||
/// # Write protocol
|
||||
///
|
||||
/// Callers must follow the three-phase commit:
|
||||
/// 1. Call `begin_write()` to get the next epoch value.
|
||||
/// 2. Write vectors to RVF with that epoch.
|
||||
/// 3. Write metadata to IndexedDB with that epoch.
|
||||
/// 4. Call `commit(epoch)` to advance the tracker.
|
||||
///
|
||||
/// If step 2 or 3 fails, do NOT call `commit` -- the tracker stays at the
|
||||
/// previous epoch so that the next startup triggers reconciliation.
|
||||
pub struct EpochTracker {
|
||||
/// Current committed epoch.
|
||||
current: AtomicU64,
|
||||
}
|
||||
|
||||
impl EpochTracker {
|
||||
/// Create a new tracker starting at the given epoch.
|
||||
pub fn new(initial: u64) -> Self {
|
||||
Self {
|
||||
current: AtomicU64::new(initial),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a tracker starting at epoch zero.
|
||||
pub fn zero() -> Self {
|
||||
Self::new(0)
|
||||
}
|
||||
|
||||
/// Read the current committed epoch.
|
||||
pub fn current(&self) -> u64 {
|
||||
self.current.load(Ordering::Acquire)
|
||||
}
|
||||
|
||||
/// Return the next epoch value for a pending write.
|
||||
///
|
||||
/// This does NOT advance the tracker. The caller must call `commit`
|
||||
/// after both RVF and metadata writes succeed.
|
||||
pub fn begin_write(&self) -> u64 {
|
||||
self.current
|
||||
.load(Ordering::Acquire)
|
||||
.checked_add(1)
|
||||
.expect("epoch overflow")
|
||||
}
|
||||
|
||||
/// Commit the given epoch, advancing the tracker.
|
||||
///
|
||||
/// Returns `true` if the commit succeeded (epoch was exactly current + 1).
|
||||
/// Returns `false` if the epoch was stale or out of order, which means
|
||||
/// another writer committed first or the caller passed a wrong value.
|
||||
pub fn commit(&self, epoch: u64) -> bool {
|
||||
let expected = epoch.checked_sub(1).unwrap_or(0);
|
||||
self.current
|
||||
.compare_exchange(expected, epoch, Ordering::AcqRel, Ordering::Acquire)
|
||||
.is_ok()
|
||||
}
|
||||
|
||||
/// Force-set the epoch to a specific value.
|
||||
///
|
||||
/// Used during recovery/reconciliation when we need to align the
|
||||
/// tracker with a known-good state read from disk.
|
||||
pub fn force_set(&self, epoch: u64) {
|
||||
self.current.store(epoch, Ordering::Release);
|
||||
}
|
||||
|
||||
/// Check the relationship between the RVF epoch stored on disk and the
|
||||
/// metadata epoch, then return the appropriate reconciliation action.
|
||||
pub fn check_and_reconcile(&self, rvf_epoch: u64, metadata_epoch: u64) -> ReconcileAction {
|
||||
let state = compare_epochs(rvf_epoch, metadata_epoch);
|
||||
let action = reconcile_action(&state);
|
||||
|
||||
// After reconciliation, align the tracker to the authoritative epoch.
|
||||
match &action {
|
||||
ReconcileAction::None => {
|
||||
self.force_set(rvf_epoch);
|
||||
}
|
||||
ReconcileAction::RebuildMetadata | ReconcileAction::RebuildFromRvf => {
|
||||
// After rebuild, both sides will match the RVF epoch.
|
||||
self.force_set(rvf_epoch);
|
||||
}
|
||||
ReconcileAction::LogWarningTrustRvf => {
|
||||
// Trust RVF -- set tracker to RVF epoch.
|
||||
self.force_set(rvf_epoch);
|
||||
}
|
||||
}
|
||||
|
||||
action
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for EpochTracker {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("EpochTracker")
|
||||
.field("current", &self.current.load(Ordering::Relaxed))
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// ---- Legacy API tests (preserved) ----
|
||||
|
||||
#[test]
|
||||
fn in_sync() {
|
||||
let e = Epoch(5);
|
||||
assert_eq!(reconcile(e, e), ReconciliationAction::InSync);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rvf_ahead_rebuilds_metadata() {
|
||||
let action = reconcile(Epoch(3), Epoch(2));
|
||||
assert_eq!(
|
||||
action,
|
||||
ReconciliationAction::RebuildMetadata {
|
||||
rvf_epoch: Epoch(3),
|
||||
metadata_epoch: Epoch(2),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn metadata_ahead_trusts_rvf() {
|
||||
let action = reconcile(Epoch(1), Epoch(3));
|
||||
assert_eq!(
|
||||
action,
|
||||
ReconciliationAction::TrustRvf {
|
||||
rvf_epoch: Epoch(1),
|
||||
metadata_epoch: Epoch(3),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn epoch_increment() {
|
||||
assert_eq!(Epoch::ZERO.next(), Epoch(1));
|
||||
assert_eq!(Epoch(99).next(), Epoch(100));
|
||||
}
|
||||
|
||||
// ---- New epoch state / reconcile tests ----
|
||||
|
||||
#[test]
|
||||
fn compare_epochs_synchronized() {
|
||||
assert_eq!(compare_epochs(5, 5), EpochState::Synchronized);
|
||||
assert_eq!(compare_epochs(0, 0), EpochState::Synchronized);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compare_epochs_rvf_ahead() {
|
||||
assert_eq!(compare_epochs(10, 7), EpochState::RvfAhead(3));
|
||||
assert_eq!(compare_epochs(1, 0), EpochState::RvfAhead(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compare_epochs_metadata_ahead() {
|
||||
assert_eq!(compare_epochs(3, 8), EpochState::MetadataAhead(5));
|
||||
assert_eq!(compare_epochs(0, 1), EpochState::MetadataAhead(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reconcile_action_none_when_synchronized() {
|
||||
let state = EpochState::Synchronized;
|
||||
assert_eq!(reconcile_action(&state), ReconcileAction::None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reconcile_action_rebuild_metadata_when_rvf_ahead() {
|
||||
assert_eq!(
|
||||
reconcile_action(&EpochState::RvfAhead(1)),
|
||||
ReconcileAction::RebuildMetadata
|
||||
);
|
||||
assert_eq!(
|
||||
reconcile_action(&EpochState::RvfAhead(5)),
|
||||
ReconcileAction::RebuildMetadata
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reconcile_action_rebuild_from_rvf_when_metadata_ahead_by_one() {
|
||||
assert_eq!(
|
||||
reconcile_action(&EpochState::MetadataAhead(1)),
|
||||
ReconcileAction::RebuildFromRvf
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reconcile_action_log_warning_when_metadata_far_ahead() {
|
||||
assert_eq!(
|
||||
reconcile_action(&EpochState::MetadataAhead(3)),
|
||||
ReconcileAction::LogWarningTrustRvf
|
||||
);
|
||||
}
|
||||
|
||||
// ---- EpochTracker tests ----
|
||||
|
||||
#[test]
|
||||
fn tracker_zero_starts_at_zero() {
|
||||
let tracker = EpochTracker::zero();
|
||||
assert_eq!(tracker.current(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_new_starts_at_initial() {
|
||||
let tracker = EpochTracker::new(42);
|
||||
assert_eq!(tracker.current(), 42);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_begin_write_returns_next() {
|
||||
let tracker = EpochTracker::new(10);
|
||||
assert_eq!(tracker.begin_write(), 11);
|
||||
// begin_write is idempotent until commit
|
||||
assert_eq!(tracker.begin_write(), 11);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_commit_advances_epoch() {
|
||||
let tracker = EpochTracker::zero();
|
||||
let next = tracker.begin_write();
|
||||
assert_eq!(next, 1);
|
||||
assert!(tracker.commit(next));
|
||||
assert_eq!(tracker.current(), 1);
|
||||
|
||||
let next2 = tracker.begin_write();
|
||||
assert_eq!(next2, 2);
|
||||
assert!(tracker.commit(next2));
|
||||
assert_eq!(tracker.current(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_commit_rejects_stale_epoch() {
|
||||
let tracker = EpochTracker::new(5);
|
||||
// Try to commit epoch 3 which is behind current
|
||||
assert!(!tracker.commit(3));
|
||||
assert_eq!(tracker.current(), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_commit_rejects_skip() {
|
||||
let tracker = EpochTracker::new(5);
|
||||
// Try to commit epoch 8, skipping 6 and 7
|
||||
assert!(!tracker.commit(8));
|
||||
assert_eq!(tracker.current(), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_force_set() {
|
||||
let tracker = EpochTracker::new(10);
|
||||
tracker.force_set(100);
|
||||
assert_eq!(tracker.current(), 100);
|
||||
// Can also go backward with force_set (recovery scenario)
|
||||
tracker.force_set(5);
|
||||
assert_eq!(tracker.current(), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_check_and_reconcile_in_sync() {
|
||||
let tracker = EpochTracker::zero();
|
||||
let action = tracker.check_and_reconcile(7, 7);
|
||||
assert_eq!(action, ReconcileAction::None);
|
||||
assert_eq!(tracker.current(), 7);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_check_and_reconcile_rvf_ahead() {
|
||||
let tracker = EpochTracker::zero();
|
||||
let action = tracker.check_and_reconcile(10, 8);
|
||||
assert_eq!(action, ReconcileAction::RebuildMetadata);
|
||||
assert_eq!(tracker.current(), 10);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_check_and_reconcile_metadata_far_ahead() {
|
||||
let tracker = EpochTracker::zero();
|
||||
let action = tracker.check_and_reconcile(3, 8);
|
||||
assert_eq!(action, ReconcileAction::LogWarningTrustRvf);
|
||||
assert_eq!(tracker.current(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_debug_format() {
|
||||
let tracker = EpochTracker::new(42);
|
||||
let debug = format!("{:?}", tracker);
|
||||
assert!(debug.contains("EpochTracker"));
|
||||
assert!(debug.contains("42"));
|
||||
}
|
||||
|
||||
// ---- Thread safety (basic) ----
|
||||
|
||||
#[test]
|
||||
fn tracker_is_send_and_sync() {
|
||||
fn assert_send_sync<T: Send + Sync>() {}
|
||||
assert_send_sync::<EpochTracker>();
|
||||
}
|
||||
}
|
||||
296
vendor/ruvector/crates/rvlite/src/storage/id_map.rs
vendored
Normal file
296
vendor/ruvector/crates/rvlite/src/storage/id_map.rs
vendored
Normal file
@@ -0,0 +1,296 @@
|
||||
//! Direct mapping between RVF vector IDs and SQL primary keys.
|
||||
//!
|
||||
//! In rvlite the mapping is identity: RVF u64 IDs are the same as SQL
|
||||
//! primary keys. This zero-cost design avoids an extra lookup table and
|
||||
//! keeps memory usage minimal.
|
||||
//!
|
||||
//! The [`IdMapping`] trait exists for future extensibility -- if a
|
||||
//! non-identity mapping is ever needed (e.g. hashed IDs, composite keys),
|
||||
//! a new implementation can be swapped in without changing call sites.
|
||||
|
||||
/// Trait for converting between RVF vector IDs and SQL primary keys.
|
||||
///
|
||||
/// Implementors define how the two ID spaces relate to each other.
|
||||
/// The default implementation ([`DirectIdMap`]) uses identity mapping.
|
||||
pub trait IdMapping {
|
||||
/// Convert a SQL primary key to an RVF vector ID.
|
||||
fn to_rvf_id(&self, sql_pk: u64) -> u64;
|
||||
|
||||
/// Convert an RVF vector ID back to a SQL primary key.
|
||||
fn to_sql_pk(&self, rvf_id: u64) -> u64;
|
||||
|
||||
/// Validate that every RVF ID in the slice has a corresponding SQL PK
|
||||
/// in the other slice, and vice versa. Both slices must contain the
|
||||
/// same set of values (possibly in different order) for the mapping
|
||||
/// to be considered valid.
|
||||
fn validate_mapping(&self, rvf_ids: &[u64], sql_pks: &[u64]) -> bool;
|
||||
}
|
||||
|
||||
/// Zero-cost identity mapping where RVF u64 IDs equal SQL primary keys.
|
||||
///
|
||||
/// This is the default and recommended mapping for rvlite. Because
|
||||
/// both ID spaces use `u64`, no conversion is needed and the mapping
|
||||
/// functions compile down to no-ops.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// # use rvlite::storage::id_map::{DirectIdMap, IdMapping};
|
||||
/// let map = DirectIdMap;
|
||||
/// assert_eq!(map.to_rvf_id(42), 42);
|
||||
/// assert_eq!(map.to_sql_pk(42), 42);
|
||||
/// ```
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub struct DirectIdMap;
|
||||
|
||||
impl DirectIdMap {
|
||||
/// Create a new direct (identity) ID map.
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
|
||||
/// Convert a SQL primary key to an RVF vector ID (identity).
|
||||
///
|
||||
/// This is a free function alternative to the trait method, useful when
|
||||
/// you know the concrete type and want to avoid dynamic dispatch.
|
||||
#[inline(always)]
|
||||
pub fn to_rvf_id(sql_pk: u64) -> u64 {
|
||||
sql_pk
|
||||
}
|
||||
|
||||
/// Convert an RVF vector ID to a SQL primary key (identity).
|
||||
#[inline(always)]
|
||||
pub fn to_sql_pk(rvf_id: u64) -> u64 {
|
||||
rvf_id
|
||||
}
|
||||
|
||||
/// Validate that the two slices contain the same set of IDs.
|
||||
///
|
||||
/// Under identity mapping, `rvf_ids` and `sql_pks` must be equal
|
||||
/// as sets (same elements, possibly different order).
|
||||
pub fn validate_mapping(rvf_ids: &[u64], sql_pks: &[u64]) -> bool {
|
||||
if rvf_ids.len() != sql_pks.len() {
|
||||
return false;
|
||||
}
|
||||
let mut rvf_sorted: Vec<u64> = rvf_ids.to_vec();
|
||||
let mut sql_sorted: Vec<u64> = sql_pks.to_vec();
|
||||
rvf_sorted.sort_unstable();
|
||||
sql_sorted.sort_unstable();
|
||||
rvf_sorted == sql_sorted
|
||||
}
|
||||
}
|
||||
|
||||
impl IdMapping for DirectIdMap {
|
||||
#[inline(always)]
|
||||
fn to_rvf_id(&self, sql_pk: u64) -> u64 {
|
||||
sql_pk
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn to_sql_pk(&self, rvf_id: u64) -> u64 {
|
||||
rvf_id
|
||||
}
|
||||
|
||||
fn validate_mapping(&self, rvf_ids: &[u64], sql_pks: &[u64]) -> bool {
|
||||
DirectIdMap::validate_mapping(rvf_ids, sql_pks)
|
||||
}
|
||||
}
|
||||
|
||||
/// An offset-based ID mapping where SQL PKs start from a different base.
|
||||
///
|
||||
/// Useful when the SQL table uses auto-increment starting at 1 but
|
||||
/// the RVF store is zero-indexed (or vice versa).
|
||||
///
|
||||
/// `rvf_id = sql_pk + offset`
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct OffsetIdMap {
|
||||
/// Offset added to SQL PK to produce the RVF ID.
|
||||
/// Can be negative via wrapping arithmetic on u64.
|
||||
offset: i64,
|
||||
}
|
||||
|
||||
impl OffsetIdMap {
|
||||
/// Create an offset mapping.
|
||||
///
|
||||
/// `offset` is added to SQL PKs to produce RVF IDs.
|
||||
/// Use a negative offset if RVF IDs are smaller than SQL PKs.
|
||||
pub fn new(offset: i64) -> Self {
|
||||
Self { offset }
|
||||
}
|
||||
}
|
||||
|
||||
impl IdMapping for OffsetIdMap {
|
||||
#[inline]
|
||||
fn to_rvf_id(&self, sql_pk: u64) -> u64 {
|
||||
(sql_pk as i64).wrapping_add(self.offset) as u64
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn to_sql_pk(&self, rvf_id: u64) -> u64 {
|
||||
(rvf_id as i64).wrapping_sub(self.offset) as u64
|
||||
}
|
||||
|
||||
fn validate_mapping(&self, rvf_ids: &[u64], sql_pks: &[u64]) -> bool {
|
||||
if rvf_ids.len() != sql_pks.len() {
|
||||
return false;
|
||||
}
|
||||
let mut expected: Vec<u64> = sql_pks.iter().map(|&pk| self.to_rvf_id(pk)).collect();
|
||||
let mut actual: Vec<u64> = rvf_ids.to_vec();
|
||||
expected.sort_unstable();
|
||||
actual.sort_unstable();
|
||||
expected == actual
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// ---- DirectIdMap tests ----
|
||||
|
||||
#[test]
|
||||
fn direct_to_rvf_id_is_identity() {
|
||||
assert_eq!(DirectIdMap::to_rvf_id(0), 0);
|
||||
assert_eq!(DirectIdMap::to_rvf_id(42), 42);
|
||||
assert_eq!(DirectIdMap::to_rvf_id(u64::MAX), u64::MAX);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_to_sql_pk_is_identity() {
|
||||
assert_eq!(DirectIdMap::to_sql_pk(0), 0);
|
||||
assert_eq!(DirectIdMap::to_sql_pk(42), 42);
|
||||
assert_eq!(DirectIdMap::to_sql_pk(u64::MAX), u64::MAX);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_roundtrip() {
|
||||
for id in [0, 1, 100, u64::MAX / 2, u64::MAX] {
|
||||
assert_eq!(DirectIdMap::to_sql_pk(DirectIdMap::to_rvf_id(id)), id);
|
||||
assert_eq!(DirectIdMap::to_rvf_id(DirectIdMap::to_sql_pk(id)), id);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_validate_same_elements() {
|
||||
let rvf = vec![1, 2, 3];
|
||||
let sql = vec![3, 1, 2];
|
||||
assert!(DirectIdMap::validate_mapping(&rvf, &sql));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_validate_empty() {
|
||||
assert!(DirectIdMap::validate_mapping(&[], &[]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_validate_different_length_fails() {
|
||||
let rvf = vec![1, 2, 3];
|
||||
let sql = vec![1, 2];
|
||||
assert!(!DirectIdMap::validate_mapping(&rvf, &sql));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_validate_different_elements_fails() {
|
||||
let rvf = vec![1, 2, 3];
|
||||
let sql = vec![1, 2, 4];
|
||||
assert!(!DirectIdMap::validate_mapping(&rvf, &sql));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_validate_duplicates_match() {
|
||||
let rvf = vec![1, 1, 2];
|
||||
let sql = vec![1, 2, 1];
|
||||
assert!(DirectIdMap::validate_mapping(&rvf, &sql));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn direct_validate_duplicates_mismatch() {
|
||||
let rvf = vec![1, 1, 2];
|
||||
let sql = vec![1, 2, 2];
|
||||
assert!(!DirectIdMap::validate_mapping(&rvf, &sql));
|
||||
}
|
||||
|
||||
// ---- IdMapping trait via DirectIdMap ----
|
||||
|
||||
#[test]
|
||||
fn trait_direct_to_rvf_id() {
|
||||
let map = DirectIdMap;
|
||||
assert_eq!(IdMapping::to_rvf_id(&map, 99), 99);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trait_direct_to_sql_pk() {
|
||||
let map = DirectIdMap;
|
||||
assert_eq!(IdMapping::to_sql_pk(&map, 99), 99);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trait_direct_validate() {
|
||||
let map = DirectIdMap;
|
||||
assert!(IdMapping::validate_mapping(&map, &[1, 2], &[2, 1]));
|
||||
assert!(!IdMapping::validate_mapping(&map, &[1, 2], &[2, 3]));
|
||||
}
|
||||
|
||||
// ---- OffsetIdMap tests ----
|
||||
|
||||
#[test]
|
||||
fn offset_positive() {
|
||||
let map = OffsetIdMap::new(10);
|
||||
assert_eq!(map.to_rvf_id(0), 10);
|
||||
assert_eq!(map.to_rvf_id(5), 15);
|
||||
assert_eq!(map.to_sql_pk(10), 0);
|
||||
assert_eq!(map.to_sql_pk(15), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_negative() {
|
||||
let map = OffsetIdMap::new(-1);
|
||||
// SQL PK 1 -> RVF ID 0
|
||||
assert_eq!(map.to_rvf_id(1), 0);
|
||||
assert_eq!(map.to_sql_pk(0), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_zero_is_identity() {
|
||||
let map = OffsetIdMap::new(0);
|
||||
for id in [0, 1, 42, 1000] {
|
||||
assert_eq!(map.to_rvf_id(id), id);
|
||||
assert_eq!(map.to_sql_pk(id), id);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_roundtrip() {
|
||||
let map = OffsetIdMap::new(7);
|
||||
for pk in [0, 1, 100, 999] {
|
||||
assert_eq!(map.to_sql_pk(map.to_rvf_id(pk)), pk);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_validate() {
|
||||
let map = OffsetIdMap::new(10);
|
||||
// SQL PKs [0, 1, 2] -> RVF IDs [10, 11, 12]
|
||||
assert!(map.validate_mapping(&[12, 10, 11], &[2, 0, 1]));
|
||||
assert!(!map.validate_mapping(&[10, 11, 12], &[0, 1, 3]));
|
||||
}
|
||||
|
||||
// ---- Dynamic dispatch ----
|
||||
|
||||
#[test]
|
||||
fn trait_object_works() {
|
||||
let direct: Box<dyn IdMapping> = Box::new(DirectIdMap);
|
||||
assert_eq!(direct.to_rvf_id(5), 5);
|
||||
|
||||
let offset: Box<dyn IdMapping> = Box::new(OffsetIdMap::new(100));
|
||||
assert_eq!(offset.to_rvf_id(5), 105);
|
||||
}
|
||||
|
||||
// ---- Default impl ----
|
||||
|
||||
#[test]
|
||||
fn direct_default() {
|
||||
let map: DirectIdMap = Default::default();
|
||||
assert_eq!(map.to_rvf_id(7), 7);
|
||||
}
|
||||
}
|
||||
243
vendor/ruvector/crates/rvlite/src/storage/indexeddb.rs
vendored
Normal file
243
vendor/ruvector/crates/rvlite/src/storage/indexeddb.rs
vendored
Normal file
@@ -0,0 +1,243 @@
|
||||
//! IndexedDB storage implementation for WASM
|
||||
//!
|
||||
//! Uses web-sys bindings to interact with the browser's IndexedDB API
|
||||
//! for persistent storage of RvLite state.
|
||||
|
||||
use super::state::RvLiteState;
|
||||
use js_sys::{Object, Reflect};
|
||||
use wasm_bindgen::prelude::*;
|
||||
use wasm_bindgen::JsCast;
|
||||
use wasm_bindgen_futures::JsFuture;
|
||||
use web_sys::{IdbDatabase, IdbObjectStore, IdbRequest, IdbTransaction, IdbTransactionMode};
|
||||
|
||||
const DB_NAME: &str = "rvlite_db";
|
||||
const DB_VERSION: u32 = 1;
|
||||
const STORE_NAME: &str = "state";
|
||||
const STATE_KEY: &str = "main";
|
||||
|
||||
/// IndexedDB storage backend for RvLite persistence
|
||||
pub struct IndexedDBStorage {
|
||||
db: Option<IdbDatabase>,
|
||||
}
|
||||
|
||||
impl IndexedDBStorage {
|
||||
/// Create a new IndexedDB storage instance
|
||||
pub fn new() -> Self {
|
||||
Self { db: None }
|
||||
}
|
||||
|
||||
/// Initialize and open the IndexedDB database
|
||||
pub async fn init(&mut self) -> Result<(), JsValue> {
|
||||
let window = web_sys::window().ok_or_else(|| JsValue::from_str("No window"))?;
|
||||
let indexed_db = window
|
||||
.indexed_db()?
|
||||
.ok_or_else(|| JsValue::from_str("IndexedDB not available"))?;
|
||||
|
||||
let open_request = indexed_db.open_with_u32(DB_NAME, DB_VERSION)?;
|
||||
|
||||
// Handle database upgrade (create object store if needed)
|
||||
let onupgradeneeded = Closure::once(Box::new(move |event: web_sys::Event| {
|
||||
let target = event.target().unwrap();
|
||||
let request: IdbRequest = target.unchecked_into();
|
||||
let db: IdbDatabase = request.result().unwrap().unchecked_into();
|
||||
|
||||
// Create object store if it doesn't exist
|
||||
if !db.object_store_names().contains(STORE_NAME) {
|
||||
db.create_object_store(STORE_NAME).unwrap();
|
||||
}
|
||||
}) as Box<dyn FnOnce(_)>);
|
||||
|
||||
open_request.set_onupgradeneeded(Some(onupgradeneeded.as_ref().unchecked_ref()));
|
||||
onupgradeneeded.forget(); // Prevent closure from being dropped
|
||||
|
||||
// Wait for database to open using JsFuture
|
||||
let db_result = wait_for_request(&open_request).await?;
|
||||
let db: IdbDatabase = db_result.unchecked_into();
|
||||
|
||||
self.db = Some(db);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check if IndexedDB is available
|
||||
pub fn is_available() -> bool {
|
||||
web_sys::window()
|
||||
.and_then(|w| w.indexed_db().ok().flatten())
|
||||
.is_some()
|
||||
}
|
||||
|
||||
/// Save state to IndexedDB
|
||||
pub async fn save(&self, state: &RvLiteState) -> Result<(), JsValue> {
|
||||
let db = self
|
||||
.db
|
||||
.as_ref()
|
||||
.ok_or_else(|| JsValue::from_str("Database not initialized. Call init() first."))?;
|
||||
|
||||
// Convert state to JsValue
|
||||
let js_state = serde_wasm_bindgen::to_value(state)?;
|
||||
|
||||
// Start transaction
|
||||
let store_names = js_sys::Array::new();
|
||||
store_names.push(&JsValue::from_str(STORE_NAME));
|
||||
|
||||
let transaction =
|
||||
db.transaction_with_str_sequence_and_mode(&store_names, IdbTransactionMode::Readwrite)?;
|
||||
|
||||
let store = transaction.object_store(STORE_NAME)?;
|
||||
|
||||
// Put state with key
|
||||
let request = store.put_with_key(&js_state, &JsValue::from_str(STATE_KEY))?;
|
||||
|
||||
// Wait for completion
|
||||
wait_for_request(&request).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Load state from IndexedDB
|
||||
pub async fn load(&self) -> Result<Option<RvLiteState>, JsValue> {
|
||||
let db = self
|
||||
.db
|
||||
.as_ref()
|
||||
.ok_or_else(|| JsValue::from_str("Database not initialized. Call init() first."))?;
|
||||
|
||||
// Start read transaction
|
||||
let transaction = db.transaction_with_str(STORE_NAME)?;
|
||||
let store = transaction.object_store(STORE_NAME)?;
|
||||
|
||||
// Get state by key
|
||||
let request = store.get(&JsValue::from_str(STATE_KEY))?;
|
||||
|
||||
// Wait for result
|
||||
let result = wait_for_request(&request).await?;
|
||||
|
||||
if result.is_undefined() || result.is_null() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// Deserialize state
|
||||
let state: RvLiteState = serde_wasm_bindgen::from_value(result)?;
|
||||
Ok(Some(state))
|
||||
}
|
||||
|
||||
/// Delete all stored state
|
||||
pub async fn clear(&self) -> Result<(), JsValue> {
|
||||
let db = self
|
||||
.db
|
||||
.as_ref()
|
||||
.ok_or_else(|| JsValue::from_str("Database not initialized. Call init() first."))?;
|
||||
|
||||
let store_names = js_sys::Array::new();
|
||||
store_names.push(&JsValue::from_str(STORE_NAME));
|
||||
|
||||
let transaction =
|
||||
db.transaction_with_str_sequence_and_mode(&store_names, IdbTransactionMode::Readwrite)?;
|
||||
|
||||
let store = transaction.object_store(STORE_NAME)?;
|
||||
let request = store.clear()?;
|
||||
|
||||
wait_for_request(&request).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check if state exists in storage
|
||||
pub async fn exists(&self) -> Result<bool, JsValue> {
|
||||
let db = self
|
||||
.db
|
||||
.as_ref()
|
||||
.ok_or_else(|| JsValue::from_str("Database not initialized. Call init() first."))?;
|
||||
|
||||
let transaction = db.transaction_with_str(STORE_NAME)?;
|
||||
let store = transaction.object_store(STORE_NAME)?;
|
||||
|
||||
let request = store.count_with_key(&JsValue::from_str(STATE_KEY))?;
|
||||
let result = wait_for_request(&request).await?;
|
||||
|
||||
let count = result.as_f64().unwrap_or(0.0) as u32;
|
||||
Ok(count > 0)
|
||||
}
|
||||
|
||||
/// Get storage info (for debugging)
|
||||
pub async fn get_info(&self) -> Result<JsValue, JsValue> {
|
||||
let db = self
|
||||
.db
|
||||
.as_ref()
|
||||
.ok_or_else(|| JsValue::from_str("Database not initialized. Call init() first."))?;
|
||||
|
||||
let transaction = db.transaction_with_str(STORE_NAME)?;
|
||||
let store = transaction.object_store(STORE_NAME)?;
|
||||
|
||||
let count_request = store.count()?;
|
||||
let count = wait_for_request(&count_request).await?;
|
||||
|
||||
let info = Object::new();
|
||||
Reflect::set(&info, &"database".into(), &DB_NAME.into())?;
|
||||
Reflect::set(&info, &"store".into(), &STORE_NAME.into())?;
|
||||
Reflect::set(&info, &"entries".into(), &count)?;
|
||||
|
||||
Ok(info.into())
|
||||
}
|
||||
|
||||
/// Close the database connection
|
||||
pub fn close(&mut self) {
|
||||
if let Some(db) = self.db.take() {
|
||||
db.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for IndexedDBStorage {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for IndexedDBStorage {
|
||||
fn drop(&mut self) {
|
||||
self.close();
|
||||
}
|
||||
}
|
||||
|
||||
/// Wait for an IdbRequest to complete and return the result
|
||||
async fn wait_for_request(request: &IdbRequest) -> Result<JsValue, JsValue> {
|
||||
let promise = js_sys::Promise::new(&mut |resolve, reject| {
|
||||
// Success handler
|
||||
let resolve_clone = resolve.clone();
|
||||
let onsuccess = Closure::once(Box::new(move |_event: web_sys::Event| {
|
||||
// Note: We can't access request here due to lifetime issues
|
||||
// The result will be passed through the event
|
||||
resolve_clone.call0(&JsValue::NULL).unwrap();
|
||||
}) as Box<dyn FnOnce(_)>);
|
||||
|
||||
// Error handler
|
||||
let onerror = Closure::once(Box::new(move |_event: web_sys::Event| {
|
||||
reject
|
||||
.call1(&JsValue::NULL, &JsValue::from_str("IndexedDB error"))
|
||||
.unwrap();
|
||||
}) as Box<dyn FnOnce(_)>);
|
||||
|
||||
request.set_onsuccess(Some(onsuccess.as_ref().unchecked_ref()));
|
||||
request.set_onerror(Some(onerror.as_ref().unchecked_ref()));
|
||||
|
||||
onsuccess.forget();
|
||||
onerror.forget();
|
||||
});
|
||||
|
||||
JsFuture::from(promise).await?;
|
||||
|
||||
// Get the result after the request completes
|
||||
request.result()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// Note: IndexedDB tests require a browser environment
|
||||
// These are placeholder tests for compilation verification
|
||||
|
||||
#[test]
|
||||
fn test_storage_new() {
|
||||
let storage = IndexedDBStorage::new();
|
||||
assert!(storage.db.is_none());
|
||||
}
|
||||
}
|
||||
21
vendor/ruvector/crates/rvlite/src/storage/mod.rs
vendored
Normal file
21
vendor/ruvector/crates/rvlite/src/storage/mod.rs
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
//! IndexedDB storage backend for WASM persistence
|
||||
//!
|
||||
//! Provides async-compatible persistence using IndexedDB for:
|
||||
//! - Vector database state
|
||||
//! - Cypher graph state
|
||||
//! - SPARQL triple store state
|
||||
|
||||
pub mod indexeddb;
|
||||
pub mod state;
|
||||
|
||||
#[cfg(feature = "rvf-backend")]
|
||||
pub mod epoch;
|
||||
|
||||
#[cfg(feature = "rvf-backend")]
|
||||
pub mod writer_lease;
|
||||
|
||||
#[cfg(feature = "rvf-backend")]
|
||||
pub mod id_map;
|
||||
|
||||
pub use indexeddb::IndexedDBStorage;
|
||||
pub use state::{GraphState, RvLiteState, TripleStoreState, VectorState};
|
||||
158
vendor/ruvector/crates/rvlite/src/storage/state.rs
vendored
Normal file
158
vendor/ruvector/crates/rvlite/src/storage/state.rs
vendored
Normal file
@@ -0,0 +1,158 @@
|
||||
//! Serializable state structures for RvLite persistence
|
||||
//!
|
||||
//! These structures represent the complete state of the RvLite database
|
||||
//! in a format that can be serialized to/from IndexedDB.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Complete serializable state for RvLite
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RvLiteState {
|
||||
/// Version for schema migration
|
||||
pub version: u32,
|
||||
/// Timestamp of last save
|
||||
pub saved_at: u64,
|
||||
/// Vector database state
|
||||
pub vectors: VectorState,
|
||||
/// Cypher graph state
|
||||
pub graph: GraphState,
|
||||
/// SPARQL triple store state
|
||||
pub triples: TripleStoreState,
|
||||
/// SQL engine schemas
|
||||
pub sql_schemas: Vec<SqlTableState>,
|
||||
}
|
||||
|
||||
impl Default for RvLiteState {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
version: 1,
|
||||
saved_at: 0,
|
||||
vectors: VectorState::default(),
|
||||
graph: GraphState::default(),
|
||||
triples: TripleStoreState::default(),
|
||||
sql_schemas: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Serializable vector database state
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct VectorState {
|
||||
/// Vector entries: id -> (vector, metadata)
|
||||
pub entries: Vec<VectorEntry>,
|
||||
/// Database dimensions
|
||||
pub dimensions: usize,
|
||||
/// Distance metric name
|
||||
pub distance_metric: String,
|
||||
/// Next auto-generated ID counter
|
||||
pub next_id: u64,
|
||||
}
|
||||
|
||||
/// Single vector entry for serialization
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct VectorEntry {
|
||||
pub id: String,
|
||||
pub vector: Vec<f32>,
|
||||
pub metadata: Option<HashMap<String, serde_json::Value>>,
|
||||
}
|
||||
|
||||
/// Serializable Cypher graph state
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct GraphState {
|
||||
/// All nodes
|
||||
pub nodes: Vec<NodeState>,
|
||||
/// All edges
|
||||
pub edges: Vec<EdgeState>,
|
||||
/// Next node ID counter
|
||||
pub next_node_id: usize,
|
||||
/// Next edge ID counter
|
||||
pub next_edge_id: usize,
|
||||
}
|
||||
|
||||
/// Serializable node
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct NodeState {
|
||||
pub id: String,
|
||||
pub labels: Vec<String>,
|
||||
pub properties: HashMap<String, PropertyValue>,
|
||||
}
|
||||
|
||||
/// Serializable edge
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct EdgeState {
|
||||
pub id: String,
|
||||
pub from: String,
|
||||
pub to: String,
|
||||
pub edge_type: String,
|
||||
pub properties: HashMap<String, PropertyValue>,
|
||||
}
|
||||
|
||||
/// Property value for serialization (mirrors cypher::Value)
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type", content = "value")]
|
||||
pub enum PropertyValue {
|
||||
Null,
|
||||
Boolean(bool),
|
||||
Integer(i64),
|
||||
Float(f64),
|
||||
String(String),
|
||||
List(Vec<PropertyValue>),
|
||||
Map(HashMap<String, PropertyValue>),
|
||||
}
|
||||
|
||||
/// Serializable SPARQL triple store state
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct TripleStoreState {
|
||||
/// All triples
|
||||
pub triples: Vec<TripleState>,
|
||||
/// Named graphs
|
||||
pub named_graphs: HashMap<String, Vec<u64>>,
|
||||
/// Default graph triple IDs
|
||||
pub default_graph: Vec<u64>,
|
||||
/// Next triple ID counter
|
||||
pub next_id: u64,
|
||||
}
|
||||
|
||||
/// Serializable RDF triple
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TripleState {
|
||||
pub id: u64,
|
||||
pub subject: RdfTermState,
|
||||
pub predicate: String,
|
||||
pub object: RdfTermState,
|
||||
}
|
||||
|
||||
/// Serializable RDF term
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum RdfTermState {
|
||||
Iri {
|
||||
value: String,
|
||||
},
|
||||
Literal {
|
||||
value: String,
|
||||
datatype: String,
|
||||
language: Option<String>,
|
||||
},
|
||||
BlankNode {
|
||||
id: String,
|
||||
},
|
||||
}
|
||||
|
||||
/// Serializable SQL table schema state
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SqlTableState {
|
||||
pub name: String,
|
||||
pub columns: Vec<SqlColumnState>,
|
||||
pub vector_column: Option<String>,
|
||||
pub vector_dimensions: Option<usize>,
|
||||
}
|
||||
|
||||
/// Serializable SQL column
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SqlColumnState {
|
||||
pub name: String,
|
||||
pub data_type: String,
|
||||
pub dimensions: Option<usize>,
|
||||
}
|
||||
555
vendor/ruvector/crates/rvlite/src/storage/writer_lease.rs
vendored
Normal file
555
vendor/ruvector/crates/rvlite/src/storage/writer_lease.rs
vendored
Normal file
@@ -0,0 +1,555 @@
|
||||
//! File-based writer lease for single-writer concurrency in rvlite.
|
||||
//!
|
||||
//! Provides a cooperative lock mechanism using a lock file with PID and
|
||||
//! timestamp. Only one writer may hold the lease at a time. The lease
|
||||
//! includes a heartbeat timestamp that is checked for staleness so that
|
||||
//! crashed processes do not permanently block new writers.
|
||||
//!
|
||||
//! Lock file location: `{store_path}.lock`
|
||||
//! Lock file contents: JSON with `pid`, `timestamp_secs`, `hostname`.
|
||||
|
||||
use std::fs;
|
||||
use std::io::{self, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Default staleness threshold -- if the heartbeat is older than this
|
||||
/// duration, the lease is considered abandoned and may be force-acquired.
|
||||
const DEFAULT_STALE_THRESHOLD: Duration = Duration::from_secs(30);
|
||||
|
||||
/// Contents written to the lock file.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct LeaseMeta {
|
||||
/// Process ID of the lock holder.
|
||||
pid: u32,
|
||||
/// Unix timestamp in seconds when the lease was last refreshed.
|
||||
timestamp_secs: u64,
|
||||
/// Hostname of the lock holder.
|
||||
hostname: String,
|
||||
}
|
||||
|
||||
/// A writer lease backed by a lock file on disk.
|
||||
///
|
||||
/// While this struct is alive, the lease is held. Dropping it releases
|
||||
/// the lock file automatically via the `Drop` implementation.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```no_run
|
||||
/// use std::path::Path;
|
||||
/// use std::time::Duration;
|
||||
/// # // This is a doc-test stub; actual usage requires the rvf-backend feature.
|
||||
/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
|
||||
/// // let lease = WriterLease::acquire(Path::new("/data/store.rvf"), Duration::from_secs(5))?;
|
||||
/// // ... perform writes ...
|
||||
/// // lease.release()?; // or just let it drop
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
pub struct WriterLease {
|
||||
/// Path to the lock file.
|
||||
lock_path: PathBuf,
|
||||
/// Our PID, used to verify ownership on release.
|
||||
pid: u32,
|
||||
/// Whether the lease has been explicitly released.
|
||||
released: bool,
|
||||
}
|
||||
|
||||
impl WriterLease {
|
||||
/// Attempt to acquire the writer lease for the given store path.
|
||||
///
|
||||
/// The lock file is created at `{path}.lock`. If another process holds
|
||||
/// the lease, this function will retry until `timeout` elapses. If the
|
||||
/// existing lease is stale (heartbeat older than 30 seconds and the
|
||||
/// holder PID is not alive), the stale lock is broken and acquisition
|
||||
/// proceeds.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns `io::Error` with `WouldBlock` if the timeout expires without
|
||||
/// acquiring the lease, or propagates any underlying I/O errors.
|
||||
pub fn acquire(path: &Path, timeout: Duration) -> io::Result<Self> {
|
||||
let lock_path = lock_path_for(path);
|
||||
let pid = std::process::id();
|
||||
let deadline = Instant::now() + timeout;
|
||||
|
||||
loop {
|
||||
// Try to create the lock file exclusively.
|
||||
match try_create_lock(&lock_path, pid) {
|
||||
Ok(()) => {
|
||||
return Ok(WriterLease {
|
||||
lock_path,
|
||||
pid,
|
||||
released: false,
|
||||
});
|
||||
}
|
||||
Err(e) if e.kind() == io::ErrorKind::AlreadyExists => {
|
||||
// Lock file exists -- check if it is stale.
|
||||
if Self::is_stale(&lock_path, DEFAULT_STALE_THRESHOLD) {
|
||||
// Force-remove the stale lock and retry.
|
||||
let _ = fs::remove_file(&lock_path);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Lock is active. Check timeout.
|
||||
if Instant::now() >= deadline {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::WouldBlock,
|
||||
format!(
|
||||
"writer lease acquisition timed out after {:?} for {:?}",
|
||||
timeout, lock_path
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
// Brief sleep before retrying.
|
||||
std::thread::sleep(Duration::from_millis(50));
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Explicitly release the writer lease.
|
||||
///
|
||||
/// Verifies that the lock file still belongs to this process before
|
||||
/// removing it to avoid deleting a lock acquired by another process
|
||||
/// after a stale break.
|
||||
pub fn release(&mut self) -> io::Result<()> {
|
||||
if self.released {
|
||||
return Ok(());
|
||||
}
|
||||
self.do_release();
|
||||
self.released = true;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Refresh the heartbeat timestamp in the lock file.
|
||||
///
|
||||
/// Writers performing long operations should call this periodically
|
||||
/// (e.g. every 10 seconds) to prevent the lease from appearing stale.
|
||||
pub fn refresh_heartbeat(&self) -> io::Result<()> {
|
||||
if self.released {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
"cannot refresh a released lease",
|
||||
));
|
||||
}
|
||||
// Verify we still own the lock.
|
||||
if !self.owns_lock() {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
"lease was taken over by another process",
|
||||
));
|
||||
}
|
||||
write_lock_file(&self.lock_path, self.pid)
|
||||
}
|
||||
|
||||
/// Check whether the lock file at the given path is stale.
|
||||
///
|
||||
/// A lock is stale if:
|
||||
/// - The lock file does not exist (vacuously stale).
|
||||
/// - The lock file cannot be parsed.
|
||||
/// - The heartbeat timestamp is older than `threshold`.
|
||||
/// - The PID in the lock file is not alive on the current host.
|
||||
pub fn is_stale(path: &Path, threshold: Duration) -> bool {
|
||||
let lock_path = if path.extension().map_or(false, |e| e == "lock") {
|
||||
path.to_path_buf()
|
||||
} else {
|
||||
lock_path_for(path)
|
||||
};
|
||||
|
||||
let content = match fs::read_to_string(&lock_path) {
|
||||
Ok(c) => c,
|
||||
Err(_) => return true, // Missing or unreadable = stale.
|
||||
};
|
||||
|
||||
let meta: LeaseMeta = match serde_json::from_str(&content) {
|
||||
Ok(m) => m,
|
||||
Err(_) => return true, // Corrupt = stale.
|
||||
};
|
||||
|
||||
// Check age.
|
||||
let now_secs = current_unix_secs();
|
||||
let age_secs = now_secs.saturating_sub(meta.timestamp_secs);
|
||||
if age_secs > threshold.as_secs() {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if PID is alive (only meaningful on same host).
|
||||
let our_hostname = get_hostname();
|
||||
if meta.hostname == our_hostname && !is_pid_alive(meta.pid) {
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Return the path to the lock file.
|
||||
pub fn lock_path(&self) -> &Path {
|
||||
&self.lock_path
|
||||
}
|
||||
|
||||
/// Check whether this lease still owns the lock file.
|
||||
fn owns_lock(&self) -> bool {
|
||||
let content = match fs::read_to_string(&self.lock_path) {
|
||||
Ok(c) => c,
|
||||
Err(_) => return false,
|
||||
};
|
||||
let meta: LeaseMeta = match serde_json::from_str(&content) {
|
||||
Ok(m) => m,
|
||||
Err(_) => return false,
|
||||
};
|
||||
meta.pid == self.pid
|
||||
}
|
||||
|
||||
/// Internal release logic.
|
||||
fn do_release(&self) {
|
||||
if self.owns_lock() {
|
||||
let _ = fs::remove_file(&self.lock_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for WriterLease {
|
||||
fn drop(&mut self) {
|
||||
if !self.released {
|
||||
self.do_release();
|
||||
self.released = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for WriterLease {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("WriterLease")
|
||||
.field("lock_path", &self.lock_path)
|
||||
.field("pid", &self.pid)
|
||||
.field("released", &self.released)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
// ---- Helper functions ----
|
||||
|
||||
/// Compute the lock file path for a store path.
|
||||
fn lock_path_for(store_path: &Path) -> PathBuf {
|
||||
let mut p = store_path.as_os_str().to_os_string();
|
||||
p.push(".lock");
|
||||
PathBuf::from(p)
|
||||
}
|
||||
|
||||
/// Try to atomically create the lock file. Fails with `AlreadyExists` if
|
||||
/// another process holds the lock.
|
||||
fn try_create_lock(lock_path: &Path, pid: u32) -> io::Result<()> {
|
||||
// Ensure parent directory exists.
|
||||
if let Some(parent) = lock_path.parent() {
|
||||
fs::create_dir_all(parent)?;
|
||||
}
|
||||
|
||||
// Use create_new for O_CREAT | O_EXCL semantics.
|
||||
let meta = LeaseMeta {
|
||||
pid,
|
||||
timestamp_secs: current_unix_secs(),
|
||||
hostname: get_hostname(),
|
||||
};
|
||||
let content = serde_json::to_string(&meta)
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("serialize lease meta: {e}")))?;
|
||||
|
||||
let mut file = fs::OpenOptions::new()
|
||||
.write(true)
|
||||
.create_new(true)
|
||||
.open(lock_path)?;
|
||||
file.write_all(content.as_bytes())?;
|
||||
file.sync_all()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Overwrite an existing lock file with a fresh timestamp.
|
||||
fn write_lock_file(lock_path: &Path, pid: u32) -> io::Result<()> {
|
||||
let meta = LeaseMeta {
|
||||
pid,
|
||||
timestamp_secs: current_unix_secs(),
|
||||
hostname: get_hostname(),
|
||||
};
|
||||
let content = serde_json::to_string(&meta)
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("serialize lease meta: {e}")))?;
|
||||
fs::write(lock_path, content.as_bytes())
|
||||
}
|
||||
|
||||
/// Get the current Unix timestamp in seconds.
|
||||
fn current_unix_secs() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|d| d.as_secs())
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
/// Best-effort hostname retrieval.
|
||||
fn get_hostname() -> String {
|
||||
std::env::var("HOSTNAME").unwrap_or_else(|_| {
|
||||
fs::read_to_string("/etc/hostname")
|
||||
.unwrap_or_else(|_| "unknown".into())
|
||||
.trim()
|
||||
.to_string()
|
||||
})
|
||||
}
|
||||
|
||||
/// Check whether a process with the given PID is alive.
|
||||
fn is_pid_alive(pid: u32) -> bool {
|
||||
#[cfg(unix)]
|
||||
{
|
||||
// kill(pid, 0) checks existence without sending a signal.
|
||||
let ret = unsafe { libc_kill(pid as i32, 0) };
|
||||
if ret == 0 {
|
||||
return true;
|
||||
}
|
||||
// EPERM means the process exists but belongs to another user.
|
||||
let errno = unsafe { *errno_location() };
|
||||
errno == 1 // EPERM
|
||||
}
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
let _ = pid;
|
||||
true // Conservatively assume alive on non-Unix.
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
extern "C" {
|
||||
fn kill(pid: i32, sig: i32) -> i32;
|
||||
}
|
||||
|
||||
#[cfg(any(target_os = "linux", target_os = "android"))]
|
||||
extern "C" {
|
||||
fn __errno_location() -> *mut i32;
|
||||
}
|
||||
|
||||
#[cfg(any(target_os = "macos", target_os = "ios", target_os = "freebsd"))]
|
||||
extern "C" {
|
||||
fn __error() -> *mut i32;
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
unsafe fn libc_kill(pid: i32, sig: i32) -> i32 {
|
||||
unsafe { kill(pid, sig) }
|
||||
}
|
||||
|
||||
#[cfg(any(target_os = "linux", target_os = "android"))]
|
||||
unsafe fn errno_location() -> *mut i32 {
|
||||
unsafe { __errno_location() }
|
||||
}
|
||||
|
||||
#[cfg(any(target_os = "macos", target_os = "ios", target_os = "freebsd"))]
|
||||
unsafe fn errno_location() -> *mut i32 {
|
||||
unsafe { __error() }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::fs;
|
||||
use std::sync::atomic::{AtomicU64, Ordering as AtomicOrdering};
|
||||
|
||||
/// Counter to generate unique directory names for each test, avoiding
|
||||
/// cross-test interference when running in parallel.
|
||||
static TEST_COUNTER: AtomicU64 = AtomicU64::new(0);
|
||||
|
||||
fn unique_dir(name: &str) -> PathBuf {
|
||||
let id = TEST_COUNTER.fetch_add(1, AtomicOrdering::Relaxed);
|
||||
let dir = std::env::temp_dir().join(format!(
|
||||
"rvlite_lease_{}_{}_{}",
|
||||
std::process::id(),
|
||||
id,
|
||||
name
|
||||
));
|
||||
let _ = fs::create_dir_all(&dir);
|
||||
dir
|
||||
}
|
||||
|
||||
fn cleanup(dir: &Path) {
|
||||
let _ = fs::remove_dir_all(dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lock_path_computation() {
|
||||
let p = Path::new("/tmp/store.rvf");
|
||||
assert_eq!(lock_path_for(p), PathBuf::from("/tmp/store.rvf.lock"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn acquire_and_release() {
|
||||
let dir = unique_dir("acquire_release");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
|
||||
let mut lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
assert!(lease.lock_path().exists());
|
||||
|
||||
lease.release().unwrap();
|
||||
assert!(!lease.lock_path().exists());
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn double_acquire_fails_within_timeout() {
|
||||
let dir = unique_dir("double_acquire");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
|
||||
let _lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
|
||||
// Second acquire should time out quickly. The lock is held by our own
|
||||
// PID and is fresh, so it cannot be broken as stale.
|
||||
let result = WriterLease::acquire(&store_path, Duration::from_millis(150));
|
||||
assert!(result.is_err());
|
||||
assert_eq!(result.unwrap_err().kind(), io::ErrorKind::WouldBlock);
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drop_releases_lease() {
|
||||
let dir = unique_dir("drop_release");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
|
||||
let lock_file = lock_path_for(&store_path);
|
||||
|
||||
{
|
||||
let _lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
assert!(lock_file.exists());
|
||||
}
|
||||
// After drop, lock file should be gone.
|
||||
assert!(!lock_file.exists());
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stale_lease_is_detected() {
|
||||
let dir = unique_dir("stale_detect");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
let lock_path = lock_path_for(&store_path);
|
||||
|
||||
// Write a lock file with a very old timestamp and dead PID.
|
||||
let meta = LeaseMeta {
|
||||
pid: 999_999_999, // Almost certainly not alive.
|
||||
timestamp_secs: current_unix_secs().saturating_sub(120),
|
||||
hostname: get_hostname(),
|
||||
};
|
||||
let content = serde_json::to_string(&meta).unwrap();
|
||||
fs::write(&lock_path, content).unwrap();
|
||||
|
||||
assert!(WriterLease::is_stale(&store_path, DEFAULT_STALE_THRESHOLD));
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fresh_lease_is_not_stale() {
|
||||
let dir = unique_dir("fresh_lease");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
|
||||
let _lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
|
||||
assert!(!WriterLease::is_stale(&store_path, DEFAULT_STALE_THRESHOLD));
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_lock_file_is_stale() {
|
||||
let path = Path::new("/tmp/nonexistent_rvlite_test_12345.rvf");
|
||||
assert!(WriterLease::is_stale(path, DEFAULT_STALE_THRESHOLD));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrupt_lock_file_is_stale() {
|
||||
let dir = unique_dir("corrupt");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let lock_path = lock_path_for(&store_path);
|
||||
|
||||
let _ = fs::create_dir_all(&dir);
|
||||
fs::write(&lock_path, b"not json").unwrap();
|
||||
assert!(WriterLease::is_stale(&store_path, DEFAULT_STALE_THRESHOLD));
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn refresh_heartbeat_updates_timestamp() {
|
||||
let dir = unique_dir("heartbeat");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
|
||||
let lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
|
||||
// refresh_heartbeat overwrites the lock file with a new timestamp.
|
||||
lease.refresh_heartbeat().unwrap();
|
||||
|
||||
// Read back and verify timestamp is recent.
|
||||
let content = fs::read_to_string(lease.lock_path()).unwrap();
|
||||
let meta: LeaseMeta = serde_json::from_str(&content).unwrap();
|
||||
let age = current_unix_secs().saturating_sub(meta.timestamp_secs);
|
||||
assert!(age < 5, "heartbeat should be very recent, got age={age}s");
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stale_lease_force_acquire() {
|
||||
let dir = unique_dir("force_acquire");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
let lock_path = lock_path_for(&store_path);
|
||||
|
||||
// Simulate a stale lock from a dead process.
|
||||
let meta = LeaseMeta {
|
||||
pid: 999_999_999,
|
||||
timestamp_secs: current_unix_secs().saturating_sub(60),
|
||||
hostname: get_hostname(),
|
||||
};
|
||||
fs::write(&lock_path, serde_json::to_string(&meta).unwrap()).unwrap();
|
||||
|
||||
// Should succeed because the existing lock is stale.
|
||||
let mut lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
assert_eq!(lease.pid, std::process::id());
|
||||
|
||||
lease.release().unwrap();
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn release_is_idempotent() {
|
||||
let dir = unique_dir("idempotent");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
|
||||
let mut lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
lease.release().unwrap();
|
||||
// Second release should be a no-op.
|
||||
lease.release().unwrap();
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn debug_format() {
|
||||
let dir = unique_dir("debug_fmt");
|
||||
let store_path = dir.join("test.rvf");
|
||||
let _ = fs::write(&store_path, b"");
|
||||
|
||||
let lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
|
||||
let debug = format!("{:?}", lease);
|
||||
assert!(debug.contains("WriterLease"));
|
||||
assert!(debug.contains("lock_path"));
|
||||
|
||||
cleanup(&dir);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user