Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,413 @@
//! Abstract Syntax Tree definitions for Cypher query language
//!
//! Represents the parsed structure of Cypher queries including:
//! - Pattern matching (MATCH, OPTIONAL MATCH)
//! - Filtering (WHERE)
//! - Projections (RETURN, WITH)
//! - Mutations (CREATE, MERGE, DELETE, SET)
//! - Aggregations and ordering
//! - Hyperedge support for N-ary relationships
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
/// Top-level query representation
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Query {
pub statements: Vec<Statement>,
}
/// Individual query statement
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum Statement {
Match(MatchClause),
Create(CreateClause),
Merge(MergeClause),
Delete(DeleteClause),
Set(SetClause),
Remove(RemoveClause),
Return(ReturnClause),
With(WithClause),
}
/// MATCH clause for pattern matching
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct MatchClause {
pub optional: bool,
pub patterns: Vec<Pattern>,
pub where_clause: Option<WhereClause>,
}
/// Pattern matching expressions
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum Pattern {
/// Simple node pattern: (n:Label {props})
Node(NodePattern),
/// Relationship pattern: (a)-[r:TYPE]->(b)
Relationship(RelationshipPattern),
/// Path pattern: p = (a)-[*1..5]->(b)
Path(PathPattern),
/// Hyperedge pattern for N-ary relationships: (a)-[r:TYPE]->(b,c,d)
Hyperedge(HyperedgePattern),
}
/// Node pattern: (variable:Label {property: value})
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct NodePattern {
pub variable: Option<String>,
pub labels: Vec<String>,
pub properties: Option<PropertyMap>,
}
/// Relationship pattern: [variable:Type {properties}]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct RelationshipPattern {
pub variable: Option<String>,
pub rel_type: Option<String>,
pub properties: Option<PropertyMap>,
pub direction: Direction,
pub range: Option<RelationshipRange>,
/// Source node pattern
pub from: Box<NodePattern>,
/// Target - can be a NodePattern or another Pattern for chained relationships
/// For simple relationships like (a)-[r]->(b), this is just the node
/// For chained patterns like (a)-[r]->(b)<-[s]-(c), the target is nested
pub to: Box<Pattern>,
}
/// Hyperedge pattern for N-ary relationships
/// Example: (person)-[r:TRANSACTION]->(account1, account2, merchant)
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct HyperedgePattern {
pub variable: Option<String>,
pub rel_type: String,
pub properties: Option<PropertyMap>,
pub from: Box<NodePattern>,
pub to: Vec<NodePattern>, // Multiple target nodes for N-ary relationships
pub arity: usize, // Number of participating nodes (including source)
}
/// Relationship direction
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum Direction {
Outgoing, // ->
Incoming, // <-
Undirected, // -
}
/// Relationship range for path queries: [*min..max]
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct RelationshipRange {
pub min: Option<usize>,
pub max: Option<usize>,
}
/// Path pattern: p = (a)-[*]->(b)
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct PathPattern {
pub variable: String,
pub pattern: Box<Pattern>,
}
/// Property map: {key: value, ...}
pub type PropertyMap = HashMap<String, Expression>;
/// WHERE clause for filtering
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct WhereClause {
pub condition: Expression,
}
/// CREATE clause for creating nodes and relationships
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct CreateClause {
pub patterns: Vec<Pattern>,
}
/// MERGE clause for create-or-match
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct MergeClause {
pub pattern: Pattern,
pub on_create: Option<SetClause>,
pub on_match: Option<SetClause>,
}
/// DELETE clause
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct DeleteClause {
pub detach: bool,
pub expressions: Vec<Expression>,
}
/// SET clause for updating properties
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct SetClause {
pub items: Vec<SetItem>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum SetItem {
Property {
variable: String,
property: String,
value: Expression,
},
Variable {
variable: String,
value: Expression,
},
Labels {
variable: String,
labels: Vec<String>,
},
}
/// REMOVE clause for removing properties or labels
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct RemoveClause {
pub items: Vec<RemoveItem>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum RemoveItem {
/// Remove a property: REMOVE n.property
Property { variable: String, property: String },
/// Remove labels: REMOVE n:Label1:Label2
Labels {
variable: String,
labels: Vec<String>,
},
}
/// RETURN clause for projection
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ReturnClause {
pub distinct: bool,
pub items: Vec<ReturnItem>,
pub order_by: Option<OrderBy>,
pub skip: Option<Expression>,
pub limit: Option<Expression>,
}
/// WITH clause for chaining queries
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct WithClause {
pub distinct: bool,
pub items: Vec<ReturnItem>,
pub where_clause: Option<WhereClause>,
pub order_by: Option<OrderBy>,
pub skip: Option<Expression>,
pub limit: Option<Expression>,
}
/// Return item: expression AS alias
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ReturnItem {
pub expression: Expression,
pub alias: Option<String>,
}
/// ORDER BY clause
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct OrderBy {
pub items: Vec<OrderByItem>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct OrderByItem {
pub expression: Expression,
pub ascending: bool,
}
/// Expression tree
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum Expression {
// Literals
Integer(i64),
Float(f64),
String(String),
Boolean(bool),
Null,
// Variables and properties
Variable(String),
Property {
object: Box<Expression>,
property: String,
},
// Collections
List(Vec<Expression>),
Map(HashMap<String, Expression>),
// Operators
BinaryOp {
left: Box<Expression>,
op: BinaryOperator,
right: Box<Expression>,
},
UnaryOp {
op: UnaryOperator,
operand: Box<Expression>,
},
// Functions and aggregations
FunctionCall {
name: String,
args: Vec<Expression>,
},
Aggregation {
function: AggregationFunction,
expression: Box<Expression>,
distinct: bool,
},
// Pattern predicates
PatternPredicate(Box<Pattern>),
// Case expressions
Case {
expression: Option<Box<Expression>>,
alternatives: Vec<(Expression, Expression)>,
default: Option<Box<Expression>>,
},
}
/// Binary operators
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum BinaryOperator {
// Arithmetic
Add,
Subtract,
Multiply,
Divide,
Modulo,
Power,
// Comparison
Equal,
NotEqual,
LessThan,
LessThanOrEqual,
GreaterThan,
GreaterThanOrEqual,
// Logical
And,
Or,
Xor,
// String
Contains,
StartsWith,
EndsWith,
Matches, // Regex
// Collection
In,
// Null checking
Is,
IsNot,
}
/// Unary operators
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum UnaryOperator {
Not,
Minus,
Plus,
IsNull,
IsNotNull,
}
/// Aggregation functions
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum AggregationFunction {
Count,
Sum,
Avg,
Min,
Max,
Collect,
StdDev,
StdDevP,
Percentile,
}
impl Query {
pub fn new(statements: Vec<Statement>) -> Self {
Self { statements }
}
/// Check if query contains only read operations
pub fn is_read_only(&self) -> bool {
self.statements.iter().all(|stmt| {
matches!(
stmt,
Statement::Match(_) | Statement::Return(_) | Statement::With(_)
)
})
}
/// Check if query contains hyperedges
pub fn has_hyperedges(&self) -> bool {
self.statements.iter().any(|stmt| match stmt {
Statement::Match(m) => m
.patterns
.iter()
.any(|p| matches!(p, Pattern::Hyperedge(_))),
Statement::Create(c) => c
.patterns
.iter()
.any(|p| matches!(p, Pattern::Hyperedge(_))),
Statement::Merge(m) => matches!(&m.pattern, Pattern::Hyperedge(_)),
_ => false,
})
}
}
impl Pattern {
/// Get the arity of the pattern (number of nodes involved)
pub fn arity(&self) -> usize {
match self {
Pattern::Node(_) => 1,
Pattern::Relationship(_) => 2,
Pattern::Path(_) => 2, // Simplified, could be variable
Pattern::Hyperedge(h) => h.arity,
}
}
}
impl Expression {
/// Check if expression is constant (no variables)
pub fn is_constant(&self) -> bool {
match self {
Expression::Integer(_)
| Expression::Float(_)
| Expression::String(_)
| Expression::Boolean(_)
| Expression::Null => true,
Expression::List(items) => items.iter().all(|e| e.is_constant()),
Expression::Map(map) => map.values().all(|e| e.is_constant()),
Expression::BinaryOp { left, right, .. } => left.is_constant() && right.is_constant(),
Expression::UnaryOp { operand, .. } => operand.is_constant(),
_ => false,
}
}
/// Check if expression contains aggregation
pub fn has_aggregation(&self) -> bool {
match self {
Expression::Aggregation { .. } => true,
Expression::BinaryOp { left, right, .. } => {
left.has_aggregation() || right.has_aggregation()
}
Expression::UnaryOp { operand, .. } => operand.has_aggregation(),
Expression::FunctionCall { args, .. } => args.iter().any(|e| e.has_aggregation()),
Expression::List(items) => items.iter().any(|e| e.has_aggregation()),
Expression::Property { object, .. } => object.has_aggregation(),
_ => false,
}
}
}

View File

@@ -0,0 +1,616 @@
//! Cypher query executor for in-memory property graph
use super::ast::*;
use super::graph_store::*;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum ExecutionError {
#[error("Graph error: {0}")]
GraphError(#[from] GraphError),
#[error("Variable not found: {0}")]
VariableNotFound(String),
#[error("Type error: {0}")]
TypeError(String),
#[error("Unsupported operation: {0}")]
UnsupportedOperation(String),
#[error("Execution error: {0}")]
ExecutionError(String),
}
/// Execution context holding variable bindings
#[derive(Debug, Clone)]
pub struct ExecutionContext {
pub variables: HashMap<String, ContextValue>,
}
impl ExecutionContext {
pub fn new() -> Self {
Self {
variables: HashMap::new(),
}
}
pub fn bind(&mut self, name: String, value: ContextValue) {
self.variables.insert(name, value);
}
pub fn get(&self, name: &str) -> Option<&ContextValue> {
self.variables.get(name)
}
}
impl Default for ExecutionContext {
fn default() -> Self {
Self::new()
}
}
/// Value in execution context (node, edge, or property value)
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ContextValue {
Node(Node),
Edge(Edge),
Value(Value),
List(Vec<ContextValue>),
Map(HashMap<String, ContextValue>),
}
impl ContextValue {
pub fn as_node(&self) -> Option<&Node> {
match self {
ContextValue::Node(n) => Some(n),
_ => None,
}
}
pub fn as_value(&self) -> Option<&Value> {
match self {
ContextValue::Value(v) => Some(v),
_ => None,
}
}
}
/// Query execution result
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExecutionResult {
pub columns: Vec<String>,
pub rows: Vec<HashMap<String, ContextValue>>,
}
impl ExecutionResult {
pub fn new(columns: Vec<String>) -> Self {
Self {
columns,
rows: Vec::new(),
}
}
pub fn add_row(&mut self, row: HashMap<String, ContextValue>) {
self.rows.push(row);
}
}
/// Cypher query executor
pub struct Executor<'a> {
graph: &'a mut PropertyGraph,
}
impl<'a> Executor<'a> {
pub fn new(graph: &'a mut PropertyGraph) -> Self {
Self { graph }
}
/// Execute a parsed Cypher query
pub fn execute(&mut self, query: &Query) -> Result<ExecutionResult, ExecutionError> {
let mut context = ExecutionContext::new();
let mut result = None;
for statement in &query.statements {
result = Some(self.execute_statement(statement, &mut context)?);
}
result.ok_or_else(|| ExecutionError::ExecutionError("No statements to execute".to_string()))
}
fn execute_statement(
&mut self,
statement: &Statement,
context: &mut ExecutionContext,
) -> Result<ExecutionResult, ExecutionError> {
match statement {
Statement::Create(clause) => self.execute_create(clause, context),
Statement::Match(clause) => self.execute_match(clause, context),
Statement::Return(clause) => self.execute_return(clause, context),
Statement::Set(clause) => self.execute_set(clause, context),
Statement::Delete(clause) => self.execute_delete(clause, context),
_ => Err(ExecutionError::UnsupportedOperation(format!(
"Statement {:?} not yet implemented",
statement
))),
}
}
fn execute_create(
&mut self,
clause: &CreateClause,
context: &mut ExecutionContext,
) -> Result<ExecutionResult, ExecutionError> {
for pattern in &clause.patterns {
self.create_pattern(pattern, context)?;
}
Ok(ExecutionResult::new(vec![]))
}
fn create_pattern(
&mut self,
pattern: &Pattern,
context: &mut ExecutionContext,
) -> Result<(), ExecutionError> {
match pattern {
Pattern::Node(node_pattern) => {
let node = self.create_node(node_pattern)?;
if let Some(var) = &node_pattern.variable {
context.bind(var.clone(), ContextValue::Node(node));
}
Ok(())
}
Pattern::Relationship(rel_pattern) => {
self.create_relationship(rel_pattern, context)?;
Ok(())
}
_ => Err(ExecutionError::UnsupportedOperation(
"Only simple node and relationship patterns supported in CREATE".to_string(),
)),
}
}
fn create_node(&mut self, pattern: &NodePattern) -> Result<Node, ExecutionError> {
let id = self.graph.generate_node_id();
let mut node = Node::new(id).with_labels(pattern.labels.clone());
// Set properties
if let Some(props) = &pattern.properties {
for (key, expr) in props {
let value = self.evaluate_expression(expr, &ExecutionContext::new())?;
node.set_property(key.clone(), value);
}
}
let node_id = self.graph.add_node(node.clone());
node.id = node_id;
Ok(node)
}
fn create_relationship(
&mut self,
pattern: &RelationshipPattern,
context: &mut ExecutionContext,
) -> Result<(), ExecutionError> {
// Get or create source node
let from_node = if let Some(var) = &pattern.from.variable {
if let Some(ContextValue::Node(n)) = context.get(var) {
n.clone()
} else {
self.create_node(&pattern.from)?
}
} else {
self.create_node(&pattern.from)?
};
// Get or create target node (only handle simple node targets for now)
let to_node = match &*pattern.to {
Pattern::Node(node_pattern) => {
if let Some(var) = &node_pattern.variable {
if let Some(ContextValue::Node(n)) = context.get(var) {
n.clone()
} else {
self.create_node(node_pattern)?
}
} else {
self.create_node(node_pattern)?
}
}
_ => {
return Err(ExecutionError::UnsupportedOperation(
"Complex relationship targets not yet supported".to_string(),
))
}
};
// Create the edge
let edge_type = pattern
.rel_type
.clone()
.unwrap_or_else(|| "RELATED_TO".to_string());
let edge_id = self.graph.generate_edge_id();
let mut edge = Edge::new(edge_id, from_node.id.clone(), to_node.id.clone(), edge_type);
// Set properties
if let Some(props) = &pattern.properties {
for (key, expr) in props {
let value = self.evaluate_expression(expr, context)?;
edge.set_property(key.clone(), value);
}
}
let edge_id = self.graph.add_edge(edge.clone())?;
if let Some(var) = &pattern.variable {
edge.id = edge_id;
context.bind(var.clone(), ContextValue::Edge(edge));
}
Ok(())
}
fn execute_match(
&mut self,
clause: &MatchClause,
context: &mut ExecutionContext,
) -> Result<ExecutionResult, ExecutionError> {
let mut matches = Vec::new();
for pattern in &clause.patterns {
let pattern_matches = self.match_pattern(pattern)?;
matches.extend(pattern_matches);
}
// Apply WHERE filter if present
if let Some(where_clause) = &clause.where_clause {
matches.retain(|ctx| {
self.evaluate_condition(&where_clause.condition, ctx)
.unwrap_or(false)
});
}
// Merge matches into context
for match_ctx in matches {
for (var, val) in match_ctx.variables {
context.bind(var, val);
}
}
Ok(ExecutionResult::new(vec![]))
}
fn match_pattern(&self, pattern: &Pattern) -> Result<Vec<ExecutionContext>, ExecutionError> {
match pattern {
Pattern::Node(node_pattern) => self.match_node_pattern(node_pattern),
Pattern::Relationship(rel_pattern) => self.match_relationship_pattern(rel_pattern),
_ => Err(ExecutionError::UnsupportedOperation(
"Pattern type not yet supported in MATCH".to_string(),
)),
}
}
fn match_node_pattern(
&self,
pattern: &NodePattern,
) -> Result<Vec<ExecutionContext>, ExecutionError> {
let mut contexts = Vec::new();
// Find nodes matching labels
let candidates: Vec<&Node> = if pattern.labels.is_empty() {
self.graph.find_nodes(|_| true)
} else {
let mut nodes = Vec::new();
for label in &pattern.labels {
nodes.extend(self.graph.find_nodes_by_label(label));
}
nodes
};
// Filter by properties
for node in candidates {
if let Some(props) = &pattern.properties {
let mut matches = true;
for (key, expr) in props {
let expected_value =
self.evaluate_expression(expr, &ExecutionContext::new())?;
if node.get_property(key) != Some(&expected_value) {
matches = false;
break;
}
}
if !matches {
continue;
}
}
let mut ctx = ExecutionContext::new();
if let Some(var) = &pattern.variable {
ctx.bind(var.clone(), ContextValue::Node(node.clone()));
}
contexts.push(ctx);
}
Ok(contexts)
}
fn match_relationship_pattern(
&self,
pattern: &RelationshipPattern,
) -> Result<Vec<ExecutionContext>, ExecutionError> {
let mut contexts = Vec::new();
// Match source nodes
let from_contexts = self.match_node_pattern(&pattern.from)?;
for from_ctx in from_contexts {
// Get the source node
let from_node = if let Some(var) = &pattern.from.variable {
from_ctx
.get(var)
.and_then(|v| v.as_node())
.ok_or_else(|| ExecutionError::VariableNotFound(var.clone()))?
} else {
continue;
};
// Find matching edges
let edges = match pattern.direction {
Direction::Outgoing => self.graph.get_outgoing_edges(&from_node.id),
Direction::Incoming => self.graph.get_incoming_edges(&from_node.id),
Direction::Undirected => {
let mut all = self.graph.get_outgoing_edges(&from_node.id);
all.extend(self.graph.get_incoming_edges(&from_node.id));
all
}
};
for edge in edges {
// Filter by type
if let Some(rel_type) = &pattern.rel_type {
if &edge.edge_type != rel_type {
continue;
}
}
// Filter by properties
if let Some(props) = &pattern.properties {
let mut matches = true;
for (key, expr) in props {
let expected_value =
self.evaluate_expression(expr, &ExecutionContext::new())?;
if edge.get_property(key) != Some(&expected_value) {
matches = false;
break;
}
}
if !matches {
continue;
}
}
// Get target node
let to_node_id = if pattern.direction == Direction::Incoming {
&edge.from
} else {
&edge.to
};
if let Some(to_node) = self.graph.get_node(to_node_id) {
let mut ctx = from_ctx.clone();
if let Some(var) = &pattern.variable {
ctx.bind(var.clone(), ContextValue::Edge(edge.clone()));
}
// Bind target node if it's a simple node pattern
if let Pattern::Node(to_pattern) = &*pattern.to {
if let Some(var) = &to_pattern.variable {
ctx.bind(var.clone(), ContextValue::Node(to_node.clone()));
}
}
contexts.push(ctx);
}
}
}
Ok(contexts)
}
fn execute_return(
&self,
clause: &ReturnClause,
context: &ExecutionContext,
) -> Result<ExecutionResult, ExecutionError> {
let mut columns = Vec::new();
let mut row = HashMap::new();
for item in &clause.items {
let col_name = item
.alias
.clone()
.unwrap_or_else(|| match &item.expression {
Expression::Variable(var) => var.clone(),
_ => "?column?".to_string(),
});
columns.push(col_name.clone());
let value = self.evaluate_expression_ctx(&item.expression, context)?;
row.insert(col_name, value);
}
let mut result = ExecutionResult::new(columns);
result.add_row(row);
Ok(result)
}
fn execute_set(
&mut self,
clause: &SetClause,
context: &ExecutionContext,
) -> Result<ExecutionResult, ExecutionError> {
for item in &clause.items {
match item {
SetItem::Property {
variable,
property,
value,
} => {
let val = self.evaluate_expression(value, context)?;
if let Some(ContextValue::Node(node)) = context.get(variable) {
if let Some(node_mut) = self.graph.get_node_mut(&node.id) {
node_mut.set_property(property.clone(), val);
}
}
}
_ => {
return Err(ExecutionError::UnsupportedOperation(
"Only property SET supported".to_string(),
))
}
}
}
Ok(ExecutionResult::new(vec![]))
}
fn execute_delete(
&mut self,
clause: &DeleteClause,
context: &ExecutionContext,
) -> Result<ExecutionResult, ExecutionError> {
for expr in &clause.expressions {
if let Expression::Variable(var) = expr {
if let Some(ctx_val) = context.get(var) {
match ctx_val {
ContextValue::Node(node) => {
if clause.detach {
self.graph.delete_node(&node.id)?;
} else {
return Err(ExecutionError::ExecutionError(
"Cannot delete node with relationships without DETACH"
.to_string(),
));
}
}
ContextValue::Edge(edge) => {
self.graph.delete_edge(&edge.id)?;
}
_ => {}
}
}
}
}
Ok(ExecutionResult::new(vec![]))
}
fn evaluate_expression(
&self,
expr: &Expression,
context: &ExecutionContext,
) -> Result<Value, ExecutionError> {
match expr {
Expression::Integer(n) => Ok(Value::Integer(*n)),
Expression::Float(f) => Ok(Value::Float(*f)),
Expression::String(s) => Ok(Value::String(s.clone())),
Expression::Boolean(b) => Ok(Value::Boolean(*b)),
Expression::Null => Ok(Value::Null),
Expression::Variable(var) => {
if let Some(ContextValue::Value(v)) = context.get(var) {
Ok(v.clone())
} else {
Err(ExecutionError::VariableNotFound(var.clone()))
}
}
Expression::Property { object, property } => {
if let Expression::Variable(var) = &**object {
if let Some(ContextValue::Node(node)) = context.get(var) {
Ok(node.get_property(property).cloned().unwrap_or(Value::Null))
} else {
Err(ExecutionError::VariableNotFound(var.clone()))
}
} else {
Err(ExecutionError::UnsupportedOperation(
"Nested property access not supported".to_string(),
))
}
}
_ => Err(ExecutionError::UnsupportedOperation(format!(
"Expression {:?} not yet implemented",
expr
))),
}
}
fn evaluate_expression_ctx(
&self,
expr: &Expression,
context: &ExecutionContext,
) -> Result<ContextValue, ExecutionError> {
match expr {
Expression::Variable(var) => context
.get(var)
.cloned()
.ok_or_else(|| ExecutionError::VariableNotFound(var.clone())),
Expression::Property { object, property } => {
if let Expression::Variable(var) = &**object {
if let Some(ContextValue::Node(node)) = context.get(var) {
Ok(ContextValue::Value(
node.get_property(property).cloned().unwrap_or(Value::Null),
))
} else {
Err(ExecutionError::VariableNotFound(var.clone()))
}
} else {
Err(ExecutionError::UnsupportedOperation(
"Nested property access not supported".to_string(),
))
}
}
_ => {
let val = self.evaluate_expression(expr, context)?;
Ok(ContextValue::Value(val))
}
}
}
fn evaluate_condition(
&self,
expr: &Expression,
context: &ExecutionContext,
) -> Result<bool, ExecutionError> {
match expr {
Expression::Boolean(b) => Ok(*b),
Expression::BinaryOp { left, op, right } => {
let left_val = self.evaluate_expression(left, context)?;
let right_val = self.evaluate_expression(right, context)?;
match op {
BinaryOperator::Equal => Ok(left_val == right_val),
BinaryOperator::NotEqual => Ok(left_val != right_val),
BinaryOperator::GreaterThan => {
if let (Some(l), Some(r)) = (left_val.as_i64(), right_val.as_i64()) {
Ok(l > r)
} else {
Ok(false)
}
}
BinaryOperator::LessThan => {
if let (Some(l), Some(r)) = (left_val.as_i64(), right_val.as_i64()) {
Ok(l < r)
} else {
Ok(false)
}
}
_ => Err(ExecutionError::UnsupportedOperation(format!(
"Operator {:?} not implemented",
op
))),
}
}
_ => Err(ExecutionError::UnsupportedOperation(
"Complex conditions not yet supported".to_string(),
)),
}
}
}

View File

@@ -0,0 +1,438 @@
//! In-memory property graph storage for WASM-compatible Cypher execution
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use thiserror::Error;
pub type NodeId = String;
pub type EdgeId = String;
#[derive(Debug, Error)]
pub enum GraphError {
#[error("Node not found: {0}")]
NodeNotFound(NodeId),
#[error("Edge not found: {0}")]
EdgeNotFound(EdgeId),
#[error("Invalid operation: {0}")]
InvalidOperation(String),
}
/// Property value that can be stored in nodes/edges
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum Value {
Null,
Boolean(bool),
Integer(i64),
Float(f64),
String(String),
List(Vec<Value>),
Map(HashMap<String, Value>),
}
impl Value {
pub fn as_i64(&self) -> Option<i64> {
match self {
Value::Integer(n) => Some(*n),
_ => None,
}
}
pub fn as_f64(&self) -> Option<f64> {
match self {
Value::Float(f) => Some(*f),
Value::Integer(i) => Some(*i as f64),
_ => None,
}
}
pub fn as_str(&self) -> Option<&str> {
match self {
Value::String(s) => Some(s),
_ => None,
}
}
pub fn as_bool(&self) -> Option<bool> {
match self {
Value::Boolean(b) => Some(*b),
_ => None,
}
}
}
impl From<bool> for Value {
fn from(b: bool) -> Self {
Value::Boolean(b)
}
}
impl From<i64> for Value {
fn from(n: i64) -> Self {
Value::Integer(n)
}
}
impl From<f64> for Value {
fn from(f: f64) -> Self {
Value::Float(f)
}
}
impl From<String> for Value {
fn from(s: String) -> Self {
Value::String(s)
}
}
impl From<&str> for Value {
fn from(s: &str) -> Self {
Value::String(s.to_string())
}
}
/// Node in the property graph
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Node {
pub id: NodeId,
pub labels: Vec<String>,
pub properties: HashMap<String, Value>,
}
impl Node {
pub fn new(id: NodeId) -> Self {
Self {
id,
labels: Vec::new(),
properties: HashMap::new(),
}
}
pub fn with_label(mut self, label: String) -> Self {
self.labels.push(label);
self
}
pub fn with_labels(mut self, labels: Vec<String>) -> Self {
self.labels = labels;
self
}
pub fn with_property(mut self, key: String, value: Value) -> Self {
self.properties.insert(key, value);
self
}
pub fn has_label(&self, label: &str) -> bool {
self.labels.iter().any(|l| l == label)
}
pub fn get_property(&self, key: &str) -> Option<&Value> {
self.properties.get(key)
}
pub fn set_property(&mut self, key: String, value: Value) {
self.properties.insert(key, value);
}
pub fn remove_property(&mut self, key: &str) -> Option<Value> {
self.properties.remove(key)
}
pub fn add_label(&mut self, label: String) {
if !self.has_label(&label) {
self.labels.push(label);
}
}
pub fn remove_label(&mut self, label: &str) {
self.labels.retain(|l| l != label);
}
}
/// Edge/Relationship in the property graph
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Edge {
pub id: EdgeId,
pub from: NodeId,
pub to: NodeId,
pub edge_type: String,
pub properties: HashMap<String, Value>,
}
impl Edge {
pub fn new(id: EdgeId, from: NodeId, to: NodeId, edge_type: String) -> Self {
Self {
id,
from,
to,
edge_type,
properties: HashMap::new(),
}
}
pub fn with_property(mut self, key: String, value: Value) -> Self {
self.properties.insert(key, value);
self
}
pub fn get_property(&self, key: &str) -> Option<&Value> {
self.properties.get(key)
}
pub fn set_property(&mut self, key: String, value: Value) {
self.properties.insert(key, value);
}
}
/// In-memory property graph store
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PropertyGraph {
nodes: HashMap<NodeId, Node>,
edges: HashMap<EdgeId, Edge>,
// Indexes for faster lookups
label_index: HashMap<String, Vec<NodeId>>,
edge_type_index: HashMap<String, Vec<EdgeId>>,
outgoing_edges: HashMap<NodeId, Vec<EdgeId>>,
incoming_edges: HashMap<NodeId, Vec<EdgeId>>,
next_node_id: usize,
next_edge_id: usize,
}
impl PropertyGraph {
pub fn new() -> Self {
Self {
nodes: HashMap::new(),
edges: HashMap::new(),
label_index: HashMap::new(),
edge_type_index: HashMap::new(),
outgoing_edges: HashMap::new(),
incoming_edges: HashMap::new(),
next_node_id: 0,
next_edge_id: 0,
}
}
/// Generate a unique node ID
pub fn generate_node_id(&mut self) -> NodeId {
let id = format!("n{}", self.next_node_id);
self.next_node_id += 1;
id
}
/// Generate a unique edge ID
pub fn generate_edge_id(&mut self) -> EdgeId {
let id = format!("e{}", self.next_edge_id);
self.next_edge_id += 1;
id
}
/// Add a node to the graph
pub fn add_node(&mut self, node: Node) -> NodeId {
let id = node.id.clone();
// Update label index
for label in &node.labels {
self.label_index
.entry(label.clone())
.or_insert_with(Vec::new)
.push(id.clone());
}
self.nodes.insert(id.clone(), node);
id
}
/// Get a node by ID
pub fn get_node(&self, id: &NodeId) -> Option<&Node> {
self.nodes.get(id)
}
/// Get a mutable reference to a node
pub fn get_node_mut(&mut self, id: &NodeId) -> Option<&mut Node> {
self.nodes.get_mut(id)
}
/// Find nodes by label
pub fn find_nodes_by_label(&self, label: &str) -> Vec<&Node> {
if let Some(node_ids) = self.label_index.get(label) {
node_ids
.iter()
.filter_map(|id| self.nodes.get(id))
.collect()
} else {
Vec::new()
}
}
/// Find all nodes matching a predicate
pub fn find_nodes<F>(&self, predicate: F) -> Vec<&Node>
where
F: Fn(&Node) -> bool,
{
self.nodes.values().filter(|n| predicate(n)).collect()
}
/// Add an edge to the graph
pub fn add_edge(&mut self, edge: Edge) -> Result<EdgeId, GraphError> {
// Verify nodes exist
if !self.nodes.contains_key(&edge.from) {
return Err(GraphError::NodeNotFound(edge.from.clone()));
}
if !self.nodes.contains_key(&edge.to) {
return Err(GraphError::NodeNotFound(edge.to.clone()));
}
let id = edge.id.clone();
let from = edge.from.clone();
let to = edge.to.clone();
let edge_type = edge.edge_type.clone();
// Update indexes
self.edge_type_index
.entry(edge_type)
.or_insert_with(Vec::new)
.push(id.clone());
self.outgoing_edges
.entry(from)
.or_insert_with(Vec::new)
.push(id.clone());
self.incoming_edges
.entry(to)
.or_insert_with(Vec::new)
.push(id.clone());
self.edges.insert(id.clone(), edge);
Ok(id)
}
/// Get an edge by ID
pub fn get_edge(&self, id: &EdgeId) -> Option<&Edge> {
self.edges.get(id)
}
/// Get outgoing edges from a node
pub fn get_outgoing_edges(&self, node_id: &NodeId) -> Vec<&Edge> {
if let Some(edge_ids) = self.outgoing_edges.get(node_id) {
edge_ids
.iter()
.filter_map(|id| self.edges.get(id))
.collect()
} else {
Vec::new()
}
}
/// Get incoming edges to a node
pub fn get_incoming_edges(&self, node_id: &NodeId) -> Vec<&Edge> {
if let Some(edge_ids) = self.incoming_edges.get(node_id) {
edge_ids
.iter()
.filter_map(|id| self.edges.get(id))
.collect()
} else {
Vec::new()
}
}
/// Get all edges of a specific type
pub fn find_edges_by_type(&self, edge_type: &str) -> Vec<&Edge> {
if let Some(edge_ids) = self.edge_type_index.get(edge_type) {
edge_ids
.iter()
.filter_map(|id| self.edges.get(id))
.collect()
} else {
Vec::new()
}
}
/// Delete a node and its connected edges
pub fn delete_node(&mut self, id: &NodeId) -> Result<(), GraphError> {
let node = self
.nodes
.remove(id)
.ok_or_else(|| GraphError::NodeNotFound(id.clone()))?;
// Remove from label index
for label in &node.labels {
if let Some(ids) = self.label_index.get_mut(label) {
ids.retain(|nid| nid != id);
}
}
// Remove connected edges
if let Some(edge_ids) = self.outgoing_edges.remove(id) {
for edge_id in edge_ids {
self.edges.remove(&edge_id);
}
}
if let Some(edge_ids) = self.incoming_edges.remove(id) {
for edge_id in edge_ids {
self.edges.remove(&edge_id);
}
}
Ok(())
}
/// Delete an edge
pub fn delete_edge(&mut self, id: &EdgeId) -> Result<(), GraphError> {
let edge = self
.edges
.remove(id)
.ok_or_else(|| GraphError::EdgeNotFound(id.clone()))?;
// Remove from type index
if let Some(ids) = self.edge_type_index.get_mut(&edge.edge_type) {
ids.retain(|eid| eid != id);
}
// Remove from node edge lists
if let Some(ids) = self.outgoing_edges.get_mut(&edge.from) {
ids.retain(|eid| eid != id);
}
if let Some(ids) = self.incoming_edges.get_mut(&edge.to) {
ids.retain(|eid| eid != id);
}
Ok(())
}
/// Get statistics about the graph
pub fn stats(&self) -> GraphStats {
GraphStats {
node_count: self.nodes.len(),
edge_count: self.edges.len(),
label_count: self.label_index.len(),
edge_type_count: self.edge_type_index.len(),
}
}
/// Get all nodes in the graph
pub fn all_nodes(&self) -> Vec<&Node> {
self.nodes.values().collect()
}
/// Get all edges in the graph
pub fn all_edges(&self) -> Vec<&Edge> {
self.edges.values().collect()
}
}
impl Default for PropertyGraph {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GraphStats {
pub node_count: usize,
pub edge_count: usize,
pub label_count: usize,
pub edge_type_count: usize,
}

View File

@@ -0,0 +1,607 @@
//! Lexical analyzer (tokenizer) for Cypher query language
//!
//! Hand-rolled lexer for WASM compatibility - no external dependencies.
use serde::{Deserialize, Serialize};
use std::fmt;
use std::iter::Peekable;
use std::str::Chars;
/// Token with kind and location information
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Token {
pub kind: TokenKind,
pub lexeme: String,
pub position: Position,
}
/// Source position for error reporting
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct Position {
pub line: usize,
pub column: usize,
pub offset: usize,
}
/// Token kinds
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum TokenKind {
// Keywords
Match,
OptionalMatch,
Where,
Return,
Create,
Merge,
Delete,
DetachDelete,
Set,
Remove,
With,
OrderBy,
Limit,
Skip,
Distinct,
As,
Asc,
Desc,
Case,
When,
Then,
Else,
End,
And,
Or,
Xor,
Not,
In,
Is,
Null,
True,
False,
OnCreate,
OnMatch,
// Identifiers and literals
Identifier(String),
Integer(i64),
Float(f64),
String(String),
// Operators
Plus,
Minus,
Star,
Slash,
Percent,
Caret,
Equal,
NotEqual,
LessThan,
LessThanOrEqual,
GreaterThan,
GreaterThanOrEqual,
Arrow, // ->
LeftArrow, // <-
Dash, // -
// Delimiters
LeftParen,
RightParen,
LeftBracket,
RightBracket,
LeftBrace,
RightBrace,
Comma,
Dot,
Colon,
Semicolon,
Pipe,
// Special
DotDot, // ..
Eof,
}
impl fmt::Display for TokenKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
TokenKind::Identifier(s) => write!(f, "identifier '{}'", s),
TokenKind::Integer(n) => write!(f, "integer {}", n),
TokenKind::Float(n) => write!(f, "float {}", n),
TokenKind::String(s) => write!(f, "string \"{}\"", s),
_ => write!(f, "{:?}", self),
}
}
}
/// Lexer error
#[derive(Debug, Clone)]
pub struct LexerError {
pub message: String,
pub position: Position,
}
impl fmt::Display for LexerError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"Lexer error at {}:{}: {}",
self.position.line, self.position.column, self.message
)
}
}
impl std::error::Error for LexerError {}
/// Hand-rolled Cypher lexer
pub struct Lexer<'a> {
input: &'a str,
chars: Peekable<Chars<'a>>,
position: Position,
current_offset: usize,
}
impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Self {
Self {
input,
chars: input.chars().peekable(),
position: Position {
line: 1,
column: 1,
offset: 0,
},
current_offset: 0,
}
}
fn peek(&mut self) -> Option<char> {
self.chars.peek().copied()
}
fn advance(&mut self) -> Option<char> {
let ch = self.chars.next()?;
self.current_offset += ch.len_utf8();
if ch == '\n' {
self.position.line += 1;
self.position.column = 1;
} else {
self.position.column += 1;
}
self.position.offset = self.current_offset;
Some(ch)
}
fn skip_whitespace(&mut self) {
while let Some(ch) = self.peek() {
if ch.is_whitespace() {
self.advance();
} else if ch == '/' && self.lookahead(1) == Some('/') {
// Skip line comments
while let Some(c) = self.peek() {
if c == '\n' {
break;
}
self.advance();
}
} else {
break;
}
}
}
fn lookahead(&self, n: usize) -> Option<char> {
self.input[self.current_offset..].chars().nth(n)
}
fn make_token(&self, kind: TokenKind, lexeme: &str, start_pos: Position) -> Token {
Token {
kind,
lexeme: lexeme.to_string(),
position: start_pos,
}
}
fn scan_string(&mut self, quote: char) -> Result<Token, LexerError> {
let start = self.position;
self.advance(); // consume opening quote
let mut value = String::new();
while let Some(ch) = self.peek() {
if ch == quote {
self.advance(); // consume closing quote
return Ok(self.make_token(TokenKind::String(value.clone()), &value, start));
} else if ch == '\\' {
self.advance();
match self.peek() {
Some('n') => {
value.push('\n');
self.advance();
}
Some('t') => {
value.push('\t');
self.advance();
}
Some('r') => {
value.push('\r');
self.advance();
}
Some('\\') => {
value.push('\\');
self.advance();
}
Some(c) if c == quote => {
value.push(c);
self.advance();
}
_ => value.push('\\'),
}
} else {
value.push(ch);
self.advance();
}
}
Err(LexerError {
message: "Unterminated string".to_string(),
position: start,
})
}
fn scan_number(&mut self) -> Token {
let start = self.position;
let start_offset = self.current_offset;
while let Some(ch) = self.peek() {
if ch.is_ascii_digit() {
self.advance();
} else {
break;
}
}
// Check for decimal
if self.peek() == Some('.')
&& self
.lookahead(1)
.map(|c| c.is_ascii_digit())
.unwrap_or(false)
{
self.advance(); // consume '.'
while let Some(ch) = self.peek() {
if ch.is_ascii_digit() {
self.advance();
} else {
break;
}
}
let lexeme = &self.input[start_offset..self.current_offset];
let value: f64 = lexeme.parse().unwrap_or(0.0);
return self.make_token(TokenKind::Float(value), lexeme, start);
}
// Check for exponent
if matches!(self.peek(), Some('e') | Some('E')) {
self.advance();
if matches!(self.peek(), Some('+') | Some('-')) {
self.advance();
}
while let Some(ch) = self.peek() {
if ch.is_ascii_digit() {
self.advance();
} else {
break;
}
}
let lexeme = &self.input[start_offset..self.current_offset];
let value: f64 = lexeme.parse().unwrap_or(0.0);
return self.make_token(TokenKind::Float(value), lexeme, start);
}
let lexeme = &self.input[start_offset..self.current_offset];
let value: i64 = lexeme.parse().unwrap_or(0);
self.make_token(TokenKind::Integer(value), lexeme, start)
}
fn scan_identifier(&mut self) -> Token {
let start = self.position;
let start_offset = self.current_offset;
while let Some(ch) = self.peek() {
if ch.is_ascii_alphanumeric() || ch == '_' {
self.advance();
} else {
break;
}
}
let lexeme = &self.input[start_offset..self.current_offset];
let kind = match lexeme.to_uppercase().as_str() {
"MATCH" => TokenKind::Match,
"OPTIONAL" if self.peek_keyword("MATCH") => {
self.skip_whitespace();
self.scan_keyword("MATCH");
TokenKind::OptionalMatch
}
"WHERE" => TokenKind::Where,
"RETURN" => TokenKind::Return,
"CREATE" => TokenKind::Create,
"MERGE" => TokenKind::Merge,
"DELETE" => TokenKind::Delete,
"DETACH" if self.peek_keyword("DELETE") => {
self.skip_whitespace();
self.scan_keyword("DELETE");
TokenKind::DetachDelete
}
"SET" => TokenKind::Set,
"REMOVE" => TokenKind::Remove,
"WITH" => TokenKind::With,
"ORDER" if self.peek_keyword("BY") => {
self.skip_whitespace();
self.scan_keyword("BY");
TokenKind::OrderBy
}
"LIMIT" => TokenKind::Limit,
"SKIP" => TokenKind::Skip,
"DISTINCT" => TokenKind::Distinct,
"AS" => TokenKind::As,
"ASC" => TokenKind::Asc,
"DESC" => TokenKind::Desc,
"CASE" => TokenKind::Case,
"WHEN" => TokenKind::When,
"THEN" => TokenKind::Then,
"ELSE" => TokenKind::Else,
"END" => TokenKind::End,
"AND" => TokenKind::And,
"OR" => TokenKind::Or,
"XOR" => TokenKind::Xor,
"NOT" => TokenKind::Not,
"IN" => TokenKind::In,
"IS" => TokenKind::Is,
"NULL" => TokenKind::Null,
"TRUE" => TokenKind::True,
"FALSE" => TokenKind::False,
"ON" if self.peek_keyword("CREATE") => {
self.skip_whitespace();
self.scan_keyword("CREATE");
TokenKind::OnCreate
}
_ if lexeme.to_uppercase() == "ON" && self.peek_keyword("MATCH") => {
self.skip_whitespace();
self.scan_keyword("MATCH");
TokenKind::OnMatch
}
_ => TokenKind::Identifier(lexeme.to_string()),
};
self.make_token(kind, lexeme, start)
}
fn peek_keyword(&mut self, keyword: &str) -> bool {
let saved_offset = self.current_offset;
self.skip_whitespace();
let remaining = &self.input[self.current_offset..];
let matches = remaining.to_uppercase().starts_with(keyword)
&& remaining
.chars()
.nth(keyword.len())
.map(|c| !c.is_ascii_alphanumeric() && c != '_')
.unwrap_or(true);
// Reset position if not consuming
if !matches {
self.current_offset = saved_offset;
self.chars = self.input[saved_offset..].chars().peekable();
}
matches
}
fn scan_keyword(&mut self, keyword: &str) {
for _ in 0..keyword.len() {
self.advance();
}
}
pub fn next_token(&mut self) -> Result<Token, LexerError> {
self.skip_whitespace();
let start = self.position;
match self.peek() {
None => Ok(self.make_token(TokenKind::Eof, "", start)),
Some(ch) => {
match ch {
// Strings
'"' | '\'' => self.scan_string(ch),
// Numbers
'0'..='9' => Ok(self.scan_number()),
// Identifiers
'a'..='z' | 'A'..='Z' | '_' | '$' => Ok(self.scan_identifier()),
// Backtick-quoted identifiers
'`' => {
self.advance();
let id_start = self.current_offset;
while let Some(c) = self.peek() {
if c == '`' {
break;
}
self.advance();
}
let id = self.input[id_start..self.current_offset].to_string();
self.advance(); // consume closing backtick
Ok(self.make_token(TokenKind::Identifier(id.clone()), &id, start))
}
// Two-character operators
'<' => {
self.advance();
match self.peek() {
Some('=') => {
self.advance();
Ok(self.make_token(TokenKind::LessThanOrEqual, "<=", start))
}
Some('>') => {
self.advance();
Ok(self.make_token(TokenKind::NotEqual, "<>", start))
}
Some('-') => {
self.advance();
Ok(self.make_token(TokenKind::LeftArrow, "<-", start))
}
_ => Ok(self.make_token(TokenKind::LessThan, "<", start)),
}
}
'>' => {
self.advance();
if self.peek() == Some('=') {
self.advance();
Ok(self.make_token(TokenKind::GreaterThanOrEqual, ">=", start))
} else {
Ok(self.make_token(TokenKind::GreaterThan, ">", start))
}
}
'-' => {
self.advance();
if self.peek() == Some('>') {
self.advance();
Ok(self.make_token(TokenKind::Arrow, "->", start))
} else {
Ok(self.make_token(TokenKind::Dash, "-", start))
}
}
'.' => {
self.advance();
if self.peek() == Some('.') {
self.advance();
Ok(self.make_token(TokenKind::DotDot, "..", start))
} else {
Ok(self.make_token(TokenKind::Dot, ".", start))
}
}
'=' => {
self.advance();
Ok(self.make_token(TokenKind::Equal, "=", start))
}
// Single-character tokens
'(' => {
self.advance();
Ok(self.make_token(TokenKind::LeftParen, "(", start))
}
')' => {
self.advance();
Ok(self.make_token(TokenKind::RightParen, ")", start))
}
'[' => {
self.advance();
Ok(self.make_token(TokenKind::LeftBracket, "[", start))
}
']' => {
self.advance();
Ok(self.make_token(TokenKind::RightBracket, "]", start))
}
'{' => {
self.advance();
Ok(self.make_token(TokenKind::LeftBrace, "{", start))
}
'}' => {
self.advance();
Ok(self.make_token(TokenKind::RightBrace, "}", start))
}
',' => {
self.advance();
Ok(self.make_token(TokenKind::Comma, ",", start))
}
':' => {
self.advance();
Ok(self.make_token(TokenKind::Colon, ":", start))
}
';' => {
self.advance();
Ok(self.make_token(TokenKind::Semicolon, ";", start))
}
'|' => {
self.advance();
Ok(self.make_token(TokenKind::Pipe, "|", start))
}
'+' => {
self.advance();
Ok(self.make_token(TokenKind::Plus, "+", start))
}
'*' => {
self.advance();
Ok(self.make_token(TokenKind::Star, "*", start))
}
'/' => {
self.advance();
Ok(self.make_token(TokenKind::Slash, "/", start))
}
'%' => {
self.advance();
Ok(self.make_token(TokenKind::Percent, "%", start))
}
'^' => {
self.advance();
Ok(self.make_token(TokenKind::Caret, "^", start))
}
_ => Err(LexerError {
message: format!("Unexpected character: '{}'", ch),
position: start,
}),
}
}
}
}
}
/// Tokenize a Cypher query string
pub fn tokenize(input: &str) -> Result<Vec<Token>, LexerError> {
let mut lexer = Lexer::new(input);
let mut tokens = Vec::new();
loop {
let token = lexer.next_token()?;
let is_eof = token.kind == TokenKind::Eof;
tokens.push(token);
if is_eof {
break;
}
}
Ok(tokens)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simple_tokens() {
let tokens = tokenize("MATCH (n) RETURN n").unwrap();
assert_eq!(tokens[0].kind, TokenKind::Match);
assert_eq!(tokens[1].kind, TokenKind::LeftParen);
}
#[test]
fn test_string() {
let tokens = tokenize("'hello world'").unwrap();
assert_eq!(tokens[0].kind, TokenKind::String("hello world".to_string()));
}
#[test]
fn test_number() {
let tokens = tokenize("42 3.14").unwrap();
assert_eq!(tokens[0].kind, TokenKind::Integer(42));
assert_eq!(tokens[1].kind, TokenKind::Float(3.14));
}
#[test]
fn test_relationship() {
let tokens = tokenize("(a)-[:KNOWS]->(b)").unwrap();
assert!(tokens.iter().any(|t| t.kind == TokenKind::Arrow));
}
}

View File

@@ -0,0 +1,266 @@
//! Cypher query language parser and execution engine for WASM
//!
//! This module provides a WASM-compatible Cypher implementation including:
//! - Lexical analysis (tokenization)
//! - Syntax parsing (AST generation)
//! - In-memory property graph storage
//! - Query execution engine
//!
//! Supported operations:
//! - CREATE: Create nodes and relationships
//! - MATCH: Pattern matching
//! - WHERE: Filtering
//! - RETURN: Projection
//! - SET: Update properties
//! - DELETE/DETACH DELETE: Remove nodes and edges
pub mod ast;
pub mod executor;
pub mod graph_store;
pub mod lexer;
pub mod parser;
pub use ast::{Expression, Pattern, Query, Statement};
pub use executor::{ContextValue, ExecutionError, ExecutionResult, Executor};
pub use graph_store::{Edge, EdgeId, Node, NodeId, PropertyGraph, Value};
pub use lexer::{tokenize, Token, TokenKind};
pub use parser::{parse_cypher, ParseError};
use crate::storage::state::{EdgeState, GraphState, NodeState, PropertyValue};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use wasm_bindgen::prelude::*;
/// WASM-compatible Cypher engine
#[wasm_bindgen]
pub struct CypherEngine {
graph: PropertyGraph,
}
#[wasm_bindgen]
impl CypherEngine {
/// Create a new Cypher engine with empty graph
#[wasm_bindgen(constructor)]
pub fn new() -> Self {
Self {
graph: PropertyGraph::new(),
}
}
/// Execute a Cypher query and return JSON results
pub fn execute(&mut self, query: &str) -> Result<JsValue, JsValue> {
// Parse the query
let ast =
parse_cypher(query).map_err(|e| JsValue::from_str(&format!("Parse error: {}", e)))?;
// Execute the query
let mut executor = Executor::new(&mut self.graph);
let result = executor
.execute(&ast)
.map_err(|e| JsValue::from_str(&format!("Execution error: {}", e)))?;
// Convert to JS value
serde_wasm_bindgen::to_value(&result)
.map_err(|e| JsValue::from_str(&format!("Serialization error: {}", e)))
}
/// Get graph statistics
pub fn stats(&self) -> Result<JsValue, JsValue> {
let stats = self.graph.stats();
serde_wasm_bindgen::to_value(&stats)
.map_err(|e| JsValue::from_str(&format!("Serialization error: {}", e)))
}
/// Clear the graph
pub fn clear(&mut self) {
self.graph = PropertyGraph::new();
}
}
impl CypherEngine {
/// Export graph state for persistence
pub fn export_state(&self) -> GraphState {
let nodes: Vec<NodeState> = self
.graph
.all_nodes()
.into_iter()
.map(|n| NodeState {
id: n.id.clone(),
labels: n.labels.clone(),
properties: n
.properties
.iter()
.map(|(k, v)| (k.clone(), value_to_property(v)))
.collect(),
})
.collect();
let edges: Vec<EdgeState> = self
.graph
.all_edges()
.into_iter()
.map(|e| EdgeState {
id: e.id.clone(),
from: e.from.clone(),
to: e.to.clone(),
edge_type: e.edge_type.clone(),
properties: e
.properties
.iter()
.map(|(k, v)| (k.clone(), value_to_property(v)))
.collect(),
})
.collect();
let stats = self.graph.stats();
GraphState {
nodes,
edges,
next_node_id: stats.node_count,
next_edge_id: stats.edge_count,
}
}
/// Import state to restore the graph
pub fn import_state(&mut self, state: &GraphState) -> Result<(), JsValue> {
self.graph = PropertyGraph::new();
// Import nodes first
for node_state in &state.nodes {
let mut node = Node::new(node_state.id.clone());
for label in &node_state.labels {
node = node.with_label(label.clone());
}
for (key, value) in &node_state.properties {
node = node.with_property(key.clone(), property_to_value(value));
}
self.graph.add_node(node);
}
// Then import edges
for edge_state in &state.edges {
let mut edge = Edge::new(
edge_state.id.clone(),
edge_state.from.clone(),
edge_state.to.clone(),
edge_state.edge_type.clone(),
);
for (key, value) in &edge_state.properties {
edge = edge.with_property(key.clone(), property_to_value(value));
}
if let Err(e) = self.graph.add_edge(edge) {
return Err(JsValue::from_str(&format!("Failed to import edge: {}", e)));
}
}
Ok(())
}
}
impl Default for CypherEngine {
fn default() -> Self {
Self::new()
}
}
/// Convert graph Value to serializable PropertyValue
fn value_to_property(v: &Value) -> PropertyValue {
match v {
Value::Null => PropertyValue::Null,
Value::Boolean(b) => PropertyValue::Boolean(*b),
Value::Integer(i) => PropertyValue::Integer(*i),
Value::Float(f) => PropertyValue::Float(*f),
Value::String(s) => PropertyValue::String(s.clone()),
Value::List(list) => PropertyValue::List(list.iter().map(value_to_property).collect()),
Value::Map(map) => PropertyValue::Map(
map.iter()
.map(|(k, v)| (k.clone(), value_to_property(v)))
.collect(),
),
}
}
/// Convert PropertyValue back to graph Value
fn property_to_value(p: &PropertyValue) -> Value {
match p {
PropertyValue::Null => Value::Null,
PropertyValue::Boolean(b) => Value::Boolean(*b),
PropertyValue::Integer(i) => Value::Integer(*i),
PropertyValue::Float(f) => Value::Float(*f),
PropertyValue::String(s) => Value::String(s.clone()),
PropertyValue::List(list) => Value::List(list.iter().map(property_to_value).collect()),
PropertyValue::Map(map) => Value::Map(
map.iter()
.map(|(k, v)| (k.clone(), property_to_value(v)))
.collect(),
),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_create_node() {
let mut engine = CypherEngine::new();
let query = "CREATE (n:Person {name: 'Alice', age: 30})";
let ast = parse_cypher(query).unwrap();
let mut executor = Executor::new(&mut engine.graph);
let result = executor.execute(&ast);
assert!(result.is_ok());
assert_eq!(engine.graph.stats().node_count, 1);
}
#[test]
fn test_create_relationship() {
let mut engine = CypherEngine::new();
let query = "CREATE (a:Person {name: 'Alice'})-[r:KNOWS]->(b:Person {name: 'Bob'})";
let ast = parse_cypher(query).unwrap();
let mut executor = Executor::new(&mut engine.graph);
let result = executor.execute(&ast);
assert!(result.is_ok());
let stats = engine.graph.stats();
assert_eq!(stats.node_count, 2);
assert_eq!(stats.edge_count, 1);
}
#[test]
fn test_match_nodes() {
let mut engine = CypherEngine::new();
// Create data
let create = "CREATE (a:Person {name: 'Alice'}), (b:Person {name: 'Bob'})";
let ast = parse_cypher(create).unwrap();
let mut executor = Executor::new(&mut engine.graph);
executor.execute(&ast).unwrap();
// Match nodes
let match_query = "MATCH (n:Person) RETURN n";
let ast = parse_cypher(match_query).unwrap();
let mut executor = Executor::new(&mut engine.graph);
let result = executor.execute(&ast);
assert!(result.is_ok());
}
#[test]
fn test_parser() {
let queries = vec![
"MATCH (n:Person) RETURN n",
"CREATE (n:Person {name: 'Alice'})",
"MATCH (a)-[r:KNOWS]->(b) RETURN a, r, b",
"CREATE (a:Person)-[r:KNOWS]->(b:Person)",
];
for query in queries {
let result = parse_cypher(query);
assert!(result.is_ok(), "Failed to parse: {}", query);
}
}
}

File diff suppressed because it is too large Load Diff

888
vendor/ruvector/crates/rvlite/src/lib.rs vendored Normal file
View File

@@ -0,0 +1,888 @@
//! RvLite - Standalone vector database with SQL, SPARQL, and Cypher
//!
//! A WASM-compatible vector database powered by RuVector.
//!
//! # Features
//! - Vector storage and similarity search
//! - SQL queries with pgvector-compatible syntax
//! - SPARQL queries for RDF data
//! - Cypher queries for property graphs
//! - IndexedDB persistence for browsers
//!
//! # Example (JavaScript)
//! ```javascript
//! import init, { RvLite, RvLiteConfig } from './rvlite.js';
//!
//! await init();
//! const config = new RvLiteConfig(384);
//! const db = new RvLite(config);
//!
//! // Insert vectors
//! db.insert([0.1, 0.2, 0.3, ...], { label: "test" });
//!
//! // Search
//! const results = db.search([0.1, 0.2, 0.3, ...], 10);
//!
//! // Cypher queries
//! db.cypher("CREATE (n:Person {name: 'Alice'})");
//!
//! // SPARQL queries
//! db.add_triple("<http://example.org/a>", "<http://example.org/knows>", "<http://example.org/b>");
//! db.sparql("SELECT ?s WHERE { ?s <http://example.org/knows> ?o }");
//!
//! // Persistence
//! await db.save(); // Save to IndexedDB
//! const db2 = await RvLite.load(config); // Load from IndexedDB
//! ```
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use wasm_bindgen::prelude::*;
use wasm_bindgen_futures::future_to_promise;
// Import ruvector-core
use ruvector_core::types::DbOptions;
use ruvector_core::{DistanceMetric, SearchQuery, VectorDB, VectorEntry};
// Query language modules
pub mod cypher;
pub mod sparql;
pub mod sql;
pub mod storage;
// Re-export storage types
pub use storage::{GraphState, RvLiteState, TripleStoreState, VectorState};
#[wasm_bindgen(start)]
pub fn init() {
console_error_panic_hook::set_once();
web_sys::console::log_1(&"RvLite v0.2.0 - SQL, SPARQL, Cypher + Persistence".into());
}
/// Error type for RvLite
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RvLiteError {
pub message: String,
pub kind: ErrorKind,
}
impl std::fmt::Display for RvLiteError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}: {}", self.kind, self.message)
}
}
impl std::error::Error for RvLiteError {}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ErrorKind {
VectorError,
SqlError,
CypherError,
SparqlError,
StorageError,
WasmError,
NotImplemented,
}
impl From<ruvector_core::RuvectorError> for RvLiteError {
fn from(e: ruvector_core::RuvectorError) -> Self {
RvLiteError {
message: e.to_string(),
kind: ErrorKind::VectorError,
}
}
}
impl From<RvLiteError> for JsValue {
fn from(e: RvLiteError) -> Self {
serde_wasm_bindgen::to_value(&e).unwrap_or_else(|_| JsValue::from_str(&e.message))
}
}
impl From<sparql::SparqlError> for RvLiteError {
fn from(e: sparql::SparqlError) -> Self {
RvLiteError {
message: e.to_string(),
kind: ErrorKind::SparqlError,
}
}
}
impl From<sql::ParseError> for RvLiteError {
fn from(e: sql::ParseError) -> Self {
RvLiteError {
message: e.to_string(),
kind: ErrorKind::SqlError,
}
}
}
/// Configuration for RvLite database
#[wasm_bindgen]
#[derive(Clone, Serialize, Deserialize)]
pub struct RvLiteConfig {
/// Vector dimensions
dimensions: usize,
/// Distance metric (euclidean, cosine, dotproduct, manhattan)
distance_metric: String,
}
#[wasm_bindgen]
impl RvLiteConfig {
#[wasm_bindgen(constructor)]
pub fn new(dimensions: usize) -> Self {
RvLiteConfig {
dimensions,
distance_metric: "cosine".to_string(),
}
}
/// Set distance metric (euclidean, cosine, dotproduct, manhattan)
pub fn with_distance_metric(mut self, metric: String) -> Self {
self.distance_metric = metric;
self
}
/// Get dimensions
pub fn get_dimensions(&self) -> usize {
self.dimensions
}
/// Get distance metric name
pub fn get_distance_metric(&self) -> String {
self.distance_metric.clone()
}
}
impl RvLiteConfig {
fn to_db_options(&self) -> DbOptions {
let metric = match self.distance_metric.to_lowercase().as_str() {
"euclidean" => DistanceMetric::Euclidean,
"cosine" => DistanceMetric::Cosine,
"dotproduct" => DistanceMetric::DotProduct,
"manhattan" => DistanceMetric::Manhattan,
_ => DistanceMetric::Cosine,
};
DbOptions {
dimensions: self.dimensions,
distance_metric: metric,
storage_path: "memory://".to_string(),
hnsw_config: None,
quantization: None,
}
}
}
/// Main RvLite database
#[wasm_bindgen]
pub struct RvLite {
db: VectorDB,
config: RvLiteConfig,
cypher_engine: cypher::CypherEngine,
sql_engine: sql::SqlEngine,
triple_store: sparql::TripleStore,
storage: Option<storage::IndexedDBStorage>,
}
#[wasm_bindgen]
impl RvLite {
/// Create a new RvLite database
#[wasm_bindgen(constructor)]
pub fn new(config: RvLiteConfig) -> Result<RvLite, JsValue> {
let db = VectorDB::new(config.to_db_options()).map_err(|e| RvLiteError::from(e))?;
Ok(RvLite {
db,
config,
cypher_engine: cypher::CypherEngine::new(),
sql_engine: sql::SqlEngine::new(),
triple_store: sparql::TripleStore::new(),
storage: None,
})
}
/// Create with default configuration (384 dimensions, cosine similarity)
pub fn default() -> Result<RvLite, JsValue> {
Self::new(RvLiteConfig::new(384))
}
/// Check if database is ready
pub fn is_ready(&self) -> bool {
true
}
/// Get version string
pub fn get_version(&self) -> String {
"0.2.0".to_string()
}
/// Get enabled features
pub fn get_features(&self) -> Result<JsValue, JsValue> {
let features = vec![
"core",
"vectors",
"search",
"sql",
"sparql",
"cypher",
"memory-storage",
"indexeddb-persistence",
];
serde_wasm_bindgen::to_value(&features).map_err(|e| JsValue::from_str(&e.to_string()))
}
// ===== Persistence Methods =====
/// Initialize IndexedDB storage for persistence
/// Must be called before save() or load()
pub fn init_storage(&mut self) -> js_sys::Promise {
let mut storage = storage::IndexedDBStorage::new();
future_to_promise(async move {
storage.init().await?;
Ok(JsValue::TRUE)
})
}
/// Check if IndexedDB is available in the browser
pub fn is_storage_available() -> bool {
storage::IndexedDBStorage::is_available()
}
/// Save database state to IndexedDB
/// Returns a Promise that resolves when save is complete
pub fn save(&self) -> js_sys::Promise {
let state = self.export_state();
let mut storage = storage::IndexedDBStorage::new();
future_to_promise(async move {
storage.init().await?;
storage.save(&state).await?;
Ok(JsValue::TRUE)
})
}
/// Load database from IndexedDB
/// Returns a Promise<RvLite> with the restored database
pub fn load(config: RvLiteConfig) -> js_sys::Promise {
future_to_promise(async move {
let mut storage = storage::IndexedDBStorage::new();
storage.init().await?;
let state = storage.load().await?;
if let Some(state) = state {
// Create new database with restored state
let mut rvlite = RvLite::new(config)?;
rvlite.import_state(&state)?;
Ok(serde_wasm_bindgen::to_value(&"loaded").unwrap())
} else {
Ok(JsValue::NULL)
}
})
}
/// Check if saved state exists in IndexedDB
pub fn has_saved_state() -> js_sys::Promise {
future_to_promise(async move {
let mut storage = storage::IndexedDBStorage::new();
storage.init().await?;
let exists = storage.exists().await?;
Ok(JsValue::from_bool(exists))
})
}
/// Clear saved state from IndexedDB
pub fn clear_storage() -> js_sys::Promise {
future_to_promise(async move {
let mut storage = storage::IndexedDBStorage::new();
storage.init().await?;
storage.clear().await?;
Ok(JsValue::TRUE)
})
}
/// Export database state as JSON (for manual backup)
pub fn export_json(&self) -> Result<JsValue, JsValue> {
let state = self.export_state();
serde_wasm_bindgen::to_value(&state)
.map_err(|e| JsValue::from_str(&format!("Export failed: {}", e)))
}
/// Import database state from JSON
pub fn import_json(&mut self, json: JsValue) -> Result<(), JsValue> {
let state: RvLiteState = serde_wasm_bindgen::from_value(json)
.map_err(|e| JsValue::from_str(&format!("Import failed: {}", e)))?;
self.import_state(&state)
}
// ===== Vector Operations =====
/// Insert a vector with optional metadata
/// Returns the vector ID
pub fn insert(&self, vector: Vec<f32>, metadata: JsValue) -> Result<String, JsValue> {
let metadata_map = if metadata.is_null() || metadata.is_undefined() {
None
} else {
Some(
serde_wasm_bindgen::from_value::<HashMap<String, serde_json::Value>>(metadata)
.map_err(|e| RvLiteError {
message: format!("Invalid metadata: {}", e),
kind: ErrorKind::WasmError,
})?,
)
};
let entry = VectorEntry {
id: None,
vector,
metadata: metadata_map,
};
self.db
.insert(entry)
.map_err(|e| RvLiteError::from(e).into())
}
/// Insert a vector with a specific ID
pub fn insert_with_id(
&self,
id: String,
vector: Vec<f32>,
metadata: JsValue,
) -> Result<(), JsValue> {
let metadata_map = if metadata.is_null() || metadata.is_undefined() {
None
} else {
Some(
serde_wasm_bindgen::from_value::<HashMap<String, serde_json::Value>>(metadata)
.map_err(|e| RvLiteError {
message: format!("Invalid metadata: {}", e),
kind: ErrorKind::WasmError,
})?,
)
};
let entry = VectorEntry {
id: Some(id),
vector,
metadata: metadata_map,
};
self.db.insert(entry).map_err(|e| RvLiteError::from(e))?;
Ok(())
}
/// Search for similar vectors
pub fn search(&self, query_vector: Vec<f32>, k: usize) -> Result<JsValue, JsValue> {
let query = SearchQuery {
vector: query_vector,
k,
filter: None,
ef_search: None,
};
let results = self.db.search(query).map_err(|e| RvLiteError::from(e))?;
serde_wasm_bindgen::to_value(&results).map_err(|e| {
RvLiteError {
message: format!("Failed to serialize results: {}", e),
kind: ErrorKind::WasmError,
}
.into()
})
}
/// Search with metadata filter
pub fn search_with_filter(
&self,
query_vector: Vec<f32>,
k: usize,
filter: JsValue,
) -> Result<JsValue, JsValue> {
let filter_map = serde_wasm_bindgen::from_value::<HashMap<String, serde_json::Value>>(
filter,
)
.map_err(|e| RvLiteError {
message: format!("Invalid filter: {}", e),
kind: ErrorKind::WasmError,
})?;
let query = SearchQuery {
vector: query_vector,
k,
filter: Some(filter_map),
ef_search: None,
};
let results = self.db.search(query).map_err(|e| RvLiteError::from(e))?;
serde_wasm_bindgen::to_value(&results).map_err(|e| {
RvLiteError {
message: format!("Failed to serialize results: {}", e),
kind: ErrorKind::WasmError,
}
.into()
})
}
/// Get a vector by ID
pub fn get(&self, id: String) -> Result<JsValue, JsValue> {
let entry = self.db.get(&id).map_err(|e| RvLiteError::from(e))?;
serde_wasm_bindgen::to_value(&entry).map_err(|e| {
RvLiteError {
message: format!("Failed to serialize entry: {}", e),
kind: ErrorKind::WasmError,
}
.into()
})
}
/// Delete a vector by ID
pub fn delete(&self, id: String) -> Result<bool, JsValue> {
self.db.delete(&id).map_err(|e| RvLiteError::from(e).into())
}
/// Get the number of vectors in the database
pub fn len(&self) -> Result<usize, JsValue> {
self.db.len().map_err(|e| RvLiteError::from(e).into())
}
/// Check if database is empty
pub fn is_empty(&self) -> Result<bool, JsValue> {
self.db.is_empty().map_err(|e| RvLiteError::from(e).into())
}
/// Get configuration
pub fn get_config(&self) -> Result<JsValue, JsValue> {
serde_wasm_bindgen::to_value(&self.config).map_err(|e| {
RvLiteError {
message: format!("Failed to serialize config: {}", e),
kind: ErrorKind::WasmError,
}
.into()
})
}
// ===== SQL Query Methods =====
/// Execute SQL query
///
/// Supported syntax:
/// - CREATE TABLE vectors (id TEXT PRIMARY KEY, vector VECTOR(384))
/// - SELECT * FROM vectors WHERE id = 'x'
/// - SELECT id, vector <-> '[...]' AS distance FROM vectors ORDER BY distance LIMIT 10
/// - INSERT INTO vectors (id, vector) VALUES ('x', '[...]')
/// - DELETE FROM vectors WHERE id = 'x'
pub fn sql(&self, query: String) -> Result<JsValue, JsValue> {
// Parse SQL
let mut parser = sql::SqlParser::new(&query).map_err(|e| RvLiteError {
message: e.to_string(),
kind: ErrorKind::SqlError,
})?;
let statement = parser.parse().map_err(|e| RvLiteError {
message: e.to_string(),
kind: ErrorKind::SqlError,
})?;
// Execute
let result = self
.sql_engine
.execute(statement)
.map_err(|e| RvLiteError {
message: e.to_string(),
kind: ErrorKind::SqlError,
})?;
// Use serde_json + js_sys::JSON::parse for proper serialization
// (serde_wasm_bindgen can fail silently on complex enum types)
let json_str = serde_json::to_string(&result).map_err(|e| RvLiteError {
message: format!("Failed to serialize result: {}", e),
kind: ErrorKind::WasmError,
})?;
js_sys::JSON::parse(&json_str).map_err(|e| {
RvLiteError {
message: format!("Failed to parse JSON: {:?}", e),
kind: ErrorKind::WasmError,
}
.into()
})
}
// ===== Cypher Query Methods =====
/// Execute Cypher query
///
/// Supported operations:
/// - CREATE (n:Label {prop: value})
/// - MATCH (n:Label) WHERE n.prop = value RETURN n
/// - CREATE (a)-[r:REL]->(b)
/// - DELETE n
pub fn cypher(&mut self, query: String) -> Result<JsValue, JsValue> {
self.cypher_engine.execute(&query)
}
/// Get Cypher graph statistics
pub fn cypher_stats(&self) -> Result<JsValue, JsValue> {
self.cypher_engine.stats()
}
/// Clear the Cypher graph
pub fn cypher_clear(&mut self) {
self.cypher_engine.clear();
}
// ===== SPARQL Query Methods =====
/// Execute SPARQL query
///
/// Supported operations:
/// - SELECT ?s ?p ?o WHERE { ?s ?p ?o }
/// - SELECT ?s WHERE { ?s <predicate> ?o }
/// - ASK { ?s ?p ?o }
pub fn sparql(&self, query: String) -> Result<JsValue, JsValue> {
let parsed = sparql::parse_sparql(&query).map_err(|e| RvLiteError {
message: format!("SPARQL parse error: {}", e),
kind: ErrorKind::SparqlError,
})?;
let result = sparql::execute_sparql(&self.triple_store, &parsed)
.map_err(|e| RvLiteError::from(e))?;
// Convert result to serializable format
let serializable = convert_sparql_result(&result);
// Convert JSON to string and then parse in JS for proper object conversion
let json_string = serializable.to_string();
let js_obj = js_sys::JSON::parse(&json_string).map_err(|e| RvLiteError {
message: format!("Failed to parse JSON: {:?}", e),
kind: ErrorKind::WasmError,
})?;
Ok(js_obj)
}
/// Add an RDF triple
///
/// # Arguments
/// * `subject` - Subject IRI or blank node (e.g., "<http://example.org/s>" or "_:b1")
/// * `predicate` - Predicate IRI (e.g., "<http://example.org/p>")
/// * `object` - Object IRI, blank node, or literal (e.g., "<http://example.org/o>" or '"value"')
pub fn add_triple(
&self,
subject: String,
predicate: String,
object: String,
) -> Result<(), JsValue> {
let subj = parse_rdf_term(&subject)?;
let pred = parse_iri(&predicate)?;
let obj = parse_rdf_term(&object)?;
let triple = sparql::Triple::new(subj, pred, obj);
self.triple_store.insert(triple);
Ok(())
}
/// Get the number of triples in the store
pub fn triple_count(&self) -> usize {
self.triple_store.count()
}
/// Clear all triples
pub fn clear_triples(&self) {
self.triple_store.clear();
}
}
// Private impl block for state export/import
impl RvLite {
/// Export the complete database state
fn export_state(&self) -> RvLiteState {
use storage::state::*;
// Get current timestamp
let saved_at = js_sys::Date::now() as u64;
// Export vector state
let vector_entries = self
.db
.keys()
.unwrap_or_default()
.iter()
.filter_map(|id| {
self.db
.get(id)
.ok()
.flatten()
.map(|entry| storage::state::VectorEntry {
id: entry.id.unwrap_or_default(),
vector: entry.vector,
metadata: entry.metadata,
})
})
.collect();
let vectors = VectorState {
entries: vector_entries,
dimensions: self.config.dimensions,
distance_metric: self.config.distance_metric.clone(),
next_id: 0, // Will be recalculated on load
};
// Export graph state
let graph = self.cypher_engine.export_state();
// Export triple store state
let triples = self.export_triple_state();
// Export SQL schemas (not fully implemented yet)
let sql_schemas = Vec::new();
RvLiteState {
version: 1,
saved_at,
vectors,
graph,
triples,
sql_schemas,
}
}
/// Import state into the database
fn import_state(&mut self, state: &RvLiteState) -> Result<(), JsValue> {
// Import vectors
for entry in &state.vectors.entries {
let vector_entry = VectorEntry {
id: Some(entry.id.clone()),
vector: entry.vector.clone(),
metadata: entry.metadata.clone(),
};
self.db
.insert(vector_entry)
.map_err(|e| RvLiteError::from(e))?;
}
// Import graph
self.cypher_engine.import_state(&state.graph)?;
// Import triples
self.import_triple_state(&state.triples)?;
Ok(())
}
/// Export triple store state
fn export_triple_state(&self) -> storage::state::TripleStoreState {
use storage::state::*;
let triples: Vec<TripleState> = self
.triple_store
.all_triples()
.into_iter()
.enumerate()
.map(|(id, t)| TripleState {
id: id as u64,
subject: rdf_term_to_state(&t.subject),
predicate: t.predicate.0.clone(),
object: rdf_term_to_state(&t.object),
})
.collect();
TripleStoreState {
triples,
named_graphs: HashMap::new(),
default_graph: Vec::new(),
next_id: 0,
}
}
/// Import triple store state
fn import_triple_state(&self, state: &storage::state::TripleStoreState) -> Result<(), JsValue> {
self.triple_store.clear();
for triple_state in &state.triples {
let subject = state_to_rdf_term(&triple_state.subject)?;
let predicate = sparql::Iri::new(&triple_state.predicate);
let object = state_to_rdf_term(&triple_state.object)?;
let triple = sparql::Triple::new(subject, predicate, object);
self.triple_store.insert(triple);
}
Ok(())
}
}
// Helper function to convert RdfTerm to clean JSON value
fn term_to_json(term: &sparql::ast::RdfTerm) -> serde_json::Value {
use sparql::ast::RdfTerm;
match term {
RdfTerm::Iri(iri) => serde_json::json!({
"type": "iri",
"value": iri.as_str()
}),
RdfTerm::Literal(lit) => {
let mut obj = serde_json::Map::new();
obj.insert("type".to_string(), serde_json::json!("literal"));
obj.insert("value".to_string(), serde_json::json!(lit.value.clone()));
if let Some(lang) = &lit.language {
obj.insert("language".to_string(), serde_json::json!(lang));
}
obj.insert(
"datatype".to_string(),
serde_json::json!(lit.datatype.as_str()),
);
serde_json::Value::Object(obj)
}
RdfTerm::BlankNode(id) => serde_json::json!({
"type": "bnode",
"value": id
}),
}
}
// Helper function to convert SPARQL result to serializable format
fn convert_sparql_result(result: &sparql::executor::QueryResult) -> serde_json::Value {
use sparql::executor::QueryResult;
match result {
QueryResult::Select(select_result) => {
let bindings: Vec<serde_json::Value> = select_result
.bindings
.iter()
.map(|binding| {
let mut obj = serde_json::Map::new();
for (var, term) in binding {
obj.insert(var.clone(), term_to_json(term));
}
serde_json::Value::Object(obj)
})
.collect();
serde_json::json!({
"type": "select",
"variables": select_result.variables,
"bindings": bindings
})
}
QueryResult::Ask(result) => {
serde_json::json!({
"type": "ask",
"result": result
})
}
QueryResult::Construct(triples) => {
let triple_json: Vec<serde_json::Value> = triples
.iter()
.map(|t| {
serde_json::json!({
"subject": term_to_json(&t.subject),
"predicate": t.predicate.0.clone(),
"object": term_to_json(&t.object)
})
})
.collect();
serde_json::json!({
"type": "construct",
"triples": triple_json
})
}
QueryResult::Describe(triples) => {
let triple_json: Vec<serde_json::Value> = triples
.iter()
.map(|t| {
serde_json::json!({
"subject": term_to_json(&t.subject),
"predicate": t.predicate.0.clone(),
"object": term_to_json(&t.object)
})
})
.collect();
serde_json::json!({
"type": "describe",
"triples": triple_json
})
}
QueryResult::Update => {
serde_json::json!({
"type": "update",
"success": true
})
}
}
}
// Helper functions for parsing RDF terms
fn parse_rdf_term(s: &str) -> Result<sparql::RdfTerm, JsValue> {
let s = s.trim();
if s.starts_with('<') && s.ends_with('>') {
Ok(sparql::RdfTerm::iri(&s[1..s.len() - 1]))
} else if s.starts_with("_:") {
Ok(sparql::RdfTerm::blank(&s[2..]))
} else if s.starts_with('"') {
let end = s.rfind('"').unwrap_or(s.len() - 1);
let value = &s[1..end];
Ok(sparql::RdfTerm::literal(value))
} else {
Ok(sparql::RdfTerm::literal(s))
}
}
fn parse_iri(s: &str) -> Result<sparql::Iri, JsValue> {
let s = s.trim();
if s.starts_with('<') && s.ends_with('>') {
Ok(sparql::Iri::new(&s[1..s.len() - 1]))
} else {
Ok(sparql::Iri::new(s))
}
}
// Helper functions for RDF term state conversion
fn rdf_term_to_state(term: &sparql::RdfTerm) -> storage::state::RdfTermState {
use storage::state::RdfTermState;
match term {
sparql::RdfTerm::Iri(iri) => RdfTermState::Iri {
value: iri.0.clone(),
},
sparql::RdfTerm::Literal(lit) => RdfTermState::Literal {
value: lit.value.clone(),
datatype: lit.datatype.0.clone(),
language: lit.language.clone(),
},
sparql::RdfTerm::BlankNode(id) => RdfTermState::BlankNode { id: id.clone() },
}
}
fn state_to_rdf_term(state: &storage::state::RdfTermState) -> Result<sparql::RdfTerm, JsValue> {
use storage::state::RdfTermState;
match state {
RdfTermState::Iri { value } => Ok(sparql::RdfTerm::iri(value)),
RdfTermState::Literal {
value,
datatype: _,
language: _,
} => Ok(sparql::RdfTerm::literal(value)),
RdfTermState::BlankNode { id } => Ok(sparql::RdfTerm::blank(id)),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_config_creation() {
let config = RvLiteConfig::new(384);
assert_eq!(config.dimensions, 384);
assert_eq!(config.distance_metric, "cosine");
}
}

View File

@@ -0,0 +1,52 @@
// Integration of SQL module into RvLite
// This shows the minimal changes needed to lib.rs
use wasm_bindgen::prelude::*;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
// Import ruvector-core
use ruvector_core::{
VectorDB, VectorEntry, SearchQuery,
DistanceMetric,
};
use ruvector_core::types::DbOptions;
// SQL module
pub mod sql;
// RvLite struct needs to include sql_engine field:
// sql_engine: sql::SqlEngine,
// In RvLite::new(), initialize the SQL engine:
// sql_engine: sql::SqlEngine::new(),
// Replace the sql() method with this implementation:
/*
/// Execute SQL query
pub async fn sql(&self, query: String) -> Result<JsValue, JsValue> {
// Parse SQL
let mut parser = sql::SqlParser::new(&query)
.map_err(|e| RvLiteError {
message: format!("SQL parse error: {}", e),
kind: ErrorKind::SqlError,
})?;
let statement = parser.parse()
.map_err(|e| RvLiteError {
message: format!("SQL parse error: {}", e),
kind: ErrorKind::SqlError,
})?;
// Execute statement
let result = self.sql_engine.execute(statement)
.map_err(|e| JsValue::from(e))?;
// Serialize result
serde_wasm_bindgen::to_value(&result)
.map_err(|e| RvLiteError {
message: format!("Failed to serialize result: {}", e),
kind: ErrorKind::WasmError,
}.into())
}
*/

View File

@@ -0,0 +1,907 @@
// SPARQL Abstract Syntax Tree (AST) types
//
// Provides type-safe representation of SPARQL 1.1 queries following
// the W3C specification: https://www.w3.org/TR/sparql11-query/
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
/// Complete SPARQL query or update
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SparqlQuery {
/// Base IRI for relative IRI resolution
pub base: Option<Iri>,
/// PREFIX declarations
pub prefixes: HashMap<String, Iri>,
/// The query form (SELECT, CONSTRUCT, ASK, DESCRIBE) or update operation
pub body: QueryBody,
}
impl SparqlQuery {
pub fn new(body: QueryBody) -> Self {
Self {
base: None,
prefixes: HashMap::new(),
body,
}
}
pub fn with_base(mut self, base: Iri) -> Self {
self.base = Some(base);
self
}
pub fn with_prefix(mut self, prefix: impl Into<String>, iri: Iri) -> Self {
self.prefixes.insert(prefix.into(), iri);
self
}
}
impl Default for SparqlQuery {
fn default() -> Self {
Self::new(QueryBody::Select(SelectQuery::default()))
}
}
/// Query body - either a query form or update operation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum QueryBody {
Select(SelectQuery),
Construct(ConstructQuery),
Ask(AskQuery),
Describe(DescribeQuery),
Update(Vec<UpdateOperation>),
}
/// Query form type
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum QueryForm {
Select,
Construct,
Ask,
Describe,
}
/// SELECT query
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SelectQuery {
/// Result variables or expressions
pub projection: Projection,
/// Dataset clauses (FROM, FROM NAMED)
pub dataset: Vec<DatasetClause>,
/// WHERE clause graph pattern
pub where_clause: GraphPattern,
/// Solution modifiers
pub modifier: SolutionModifier,
/// VALUES clause for inline data
pub values: Option<ValuesClause>,
}
impl Default for SelectQuery {
fn default() -> Self {
Self {
projection: Projection::All,
dataset: Vec::new(),
where_clause: GraphPattern::Empty,
modifier: SolutionModifier::default(),
values: None,
}
}
}
/// Projection in SELECT clause
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Projection {
/// SELECT * - all variables
All,
/// SELECT DISTINCT ...
Distinct(Vec<ProjectionVar>),
/// SELECT REDUCED ...
Reduced(Vec<ProjectionVar>),
/// SELECT var1 var2 ...
Variables(Vec<ProjectionVar>),
}
/// Variable or expression in projection
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProjectionVar {
pub expression: Expression,
pub alias: Option<String>,
}
impl ProjectionVar {
pub fn variable(name: impl Into<String>) -> Self {
Self {
expression: Expression::Variable(name.into()),
alias: None,
}
}
pub fn expr_as(expr: Expression, alias: impl Into<String>) -> Self {
Self {
expression: expr,
alias: Some(alias.into()),
}
}
}
/// CONSTRUCT query
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConstructQuery {
/// Template for constructing triples
pub template: Vec<TriplePattern>,
/// Dataset clauses
pub dataset: Vec<DatasetClause>,
/// WHERE clause
pub where_clause: GraphPattern,
/// Solution modifiers
pub modifier: SolutionModifier,
}
impl Default for ConstructQuery {
fn default() -> Self {
Self {
template: Vec::new(),
dataset: Vec::new(),
where_clause: GraphPattern::Empty,
modifier: SolutionModifier::default(),
}
}
}
/// ASK query
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AskQuery {
/// Dataset clauses
pub dataset: Vec<DatasetClause>,
/// WHERE clause
pub where_clause: GraphPattern,
}
impl Default for AskQuery {
fn default() -> Self {
Self {
dataset: Vec::new(),
where_clause: GraphPattern::Empty,
}
}
}
/// DESCRIBE query
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DescribeQuery {
/// Resources to describe
pub resources: Vec<VarOrIri>,
/// Dataset clauses
pub dataset: Vec<DatasetClause>,
/// Optional WHERE clause
pub where_clause: Option<GraphPattern>,
}
impl Default for DescribeQuery {
fn default() -> Self {
Self {
resources: Vec::new(),
dataset: Vec::new(),
where_clause: None,
}
}
}
/// Dataset clause (FROM / FROM NAMED)
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DatasetClause {
pub iri: Iri,
pub named: bool,
}
/// VALUES clause for inline data
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ValuesClause {
pub variables: Vec<String>,
pub bindings: Vec<Vec<Option<RdfTerm>>>,
}
/// Graph pattern - the WHERE clause body
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum GraphPattern {
/// Empty pattern
Empty,
/// Basic Graph Pattern - set of triple patterns
Bgp(Vec<TriplePattern>),
/// Join of patterns (implicit AND)
Join(Box<GraphPattern>, Box<GraphPattern>),
/// Left outer join (OPTIONAL)
LeftJoin(Box<GraphPattern>, Box<GraphPattern>, Option<Expression>),
/// Union of patterns (UNION)
Union(Box<GraphPattern>, Box<GraphPattern>),
/// Filter (FILTER)
Filter(Box<GraphPattern>, Expression),
/// Named graph (GRAPH)
Graph(VarOrIri, Box<GraphPattern>),
/// Service (FEDERATED query)
Service(Iri, Box<GraphPattern>, bool),
/// MINUS pattern
Minus(Box<GraphPattern>, Box<GraphPattern>),
/// EXISTS or NOT EXISTS
Exists(Box<GraphPattern>, bool),
/// BIND assignment
Bind(Expression, String, Box<GraphPattern>),
/// GROUP BY aggregation
Group(
Box<GraphPattern>,
Vec<GroupCondition>,
Vec<(Aggregate, String)>,
),
/// Subquery
SubSelect(Box<SelectQuery>),
/// VALUES inline data
Values(ValuesClause),
}
/// Triple pattern
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TriplePattern {
pub subject: TermOrVariable,
pub predicate: PropertyPath,
pub object: TermOrVariable,
}
impl TriplePattern {
pub fn new(subject: TermOrVariable, predicate: PropertyPath, object: TermOrVariable) -> Self {
Self {
subject,
predicate,
object,
}
}
/// Simple triple pattern with IRI predicate
pub fn simple(subject: TermOrVariable, predicate: Iri, object: TermOrVariable) -> Self {
Self {
subject,
predicate: PropertyPath::Iri(predicate),
object,
}
}
}
/// Term or variable in triple pattern
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum TermOrVariable {
Term(RdfTerm),
Variable(String),
BlankNode(String),
}
impl TermOrVariable {
pub fn var(name: impl Into<String>) -> Self {
Self::Variable(name.into())
}
pub fn iri(iri: Iri) -> Self {
Self::Term(RdfTerm::Iri(iri))
}
pub fn literal(value: impl Into<String>) -> Self {
Self::Term(RdfTerm::Literal(Literal::simple(value)))
}
pub fn blank(id: impl Into<String>) -> Self {
Self::BlankNode(id.into())
}
}
/// Variable or IRI
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum VarOrIri {
Variable(String),
Iri(Iri),
}
/// Property path expression
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum PropertyPath {
/// Simple IRI predicate
Iri(Iri),
/// Variable predicate
Variable(String),
/// Inverse path (^path)
Inverse(Box<PropertyPath>),
/// Sequence path (path1/path2)
Sequence(Box<PropertyPath>, Box<PropertyPath>),
/// Alternative path (path1|path2)
Alternative(Box<PropertyPath>, Box<PropertyPath>),
/// Zero or more (*path)
ZeroOrMore(Box<PropertyPath>),
/// One or more (+path)
OneOrMore(Box<PropertyPath>),
/// Zero or one (?path)
ZeroOrOne(Box<PropertyPath>),
/// Negated property set (!(path1|path2))
NegatedPropertySet(Vec<Iri>),
/// Fixed length path {n}
FixedLength(Box<PropertyPath>, usize),
/// Range length path {n,m}
RangeLength(Box<PropertyPath>, usize, Option<usize>),
}
/// RDF term
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum RdfTerm {
/// IRI reference
Iri(Iri),
/// Literal value
Literal(Literal),
/// Blank node
BlankNode(String),
}
impl RdfTerm {
pub fn iri(value: impl Into<String>) -> Self {
Self::Iri(Iri::new(value))
}
pub fn literal(value: impl Into<String>) -> Self {
Self::Literal(Literal::simple(value))
}
pub fn typed_literal(value: impl Into<String>, datatype: Iri) -> Self {
Self::Literal(Literal::typed(value, datatype))
}
pub fn lang_literal(value: impl Into<String>, lang: impl Into<String>) -> Self {
Self::Literal(Literal::language(value, lang))
}
pub fn blank(id: impl Into<String>) -> Self {
Self::BlankNode(id.into())
}
/// Check if this is an IRI
pub fn is_iri(&self) -> bool {
matches!(self, Self::Iri(_))
}
/// Check if this is a literal
pub fn is_literal(&self) -> bool {
matches!(self, Self::Literal(_))
}
/// Check if this is a blank node
pub fn is_blank_node(&self) -> bool {
matches!(self, Self::BlankNode(_))
}
}
/// IRI (Internationalized Resource Identifier)
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct Iri(pub String);
impl Iri {
pub fn new(value: impl Into<String>) -> Self {
Self(value.into())
}
pub fn as_str(&self) -> &str {
&self.0
}
/// Common RDF namespace IRIs
pub fn rdf_type() -> Self {
Self::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
}
pub fn rdfs_label() -> Self {
Self::new("http://www.w3.org/2000/01/rdf-schema#label")
}
pub fn rdfs_comment() -> Self {
Self::new("http://www.w3.org/2000/01/rdf-schema#comment")
}
pub fn xsd_string() -> Self {
Self::new("http://www.w3.org/2001/XMLSchema#string")
}
pub fn xsd_integer() -> Self {
Self::new("http://www.w3.org/2001/XMLSchema#integer")
}
pub fn xsd_decimal() -> Self {
Self::new("http://www.w3.org/2001/XMLSchema#decimal")
}
pub fn xsd_double() -> Self {
Self::new("http://www.w3.org/2001/XMLSchema#double")
}
pub fn xsd_boolean() -> Self {
Self::new("http://www.w3.org/2001/XMLSchema#boolean")
}
pub fn xsd_date() -> Self {
Self::new("http://www.w3.org/2001/XMLSchema#date")
}
pub fn xsd_datetime() -> Self {
Self::new("http://www.w3.org/2001/XMLSchema#dateTime")
}
}
/// RDF Literal
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct Literal {
/// Lexical form (string value)
pub value: String,
/// Optional language tag
pub language: Option<String>,
/// Datatype IRI (defaults to xsd:string)
pub datatype: Iri,
}
impl Literal {
/// Simple string literal
pub fn simple(value: impl Into<String>) -> Self {
Self {
value: value.into(),
language: None,
datatype: Iri::xsd_string(),
}
}
/// Typed literal
pub fn typed(value: impl Into<String>, datatype: Iri) -> Self {
Self {
value: value.into(),
language: None,
datatype,
}
}
/// Language-tagged literal
pub fn language(value: impl Into<String>, lang: impl Into<String>) -> Self {
Self {
value: value.into(),
language: Some(lang.into()),
datatype: Iri::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"),
}
}
/// Integer literal
pub fn integer(value: i64) -> Self {
Self::typed(value.to_string(), Iri::xsd_integer())
}
/// Decimal literal
pub fn decimal(value: f64) -> Self {
Self::typed(value.to_string(), Iri::xsd_decimal())
}
/// Double literal
pub fn double(value: f64) -> Self {
Self::typed(value.to_string(), Iri::xsd_double())
}
/// Boolean literal
pub fn boolean(value: bool) -> Self {
Self::typed(if value { "true" } else { "false" }, Iri::xsd_boolean())
}
/// Try to parse as integer
pub fn as_integer(&self) -> Option<i64> {
self.value.parse().ok()
}
/// Try to parse as double
pub fn as_double(&self) -> Option<f64> {
self.value.parse().ok()
}
/// Try to parse as boolean
pub fn as_boolean(&self) -> Option<bool> {
match self.value.as_str() {
"true" | "1" => Some(true),
"false" | "0" => Some(false),
_ => None,
}
}
}
/// SPARQL expression
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Expression {
/// Variable reference
Variable(String),
/// Constant term
Term(RdfTerm),
/// Binary operation
Binary(Box<Expression>, BinaryOp, Box<Expression>),
/// Unary operation
Unary(UnaryOp, Box<Expression>),
/// Function call
Function(FunctionCall),
/// Aggregate function
Aggregate(Aggregate),
/// IN expression
In(Box<Expression>, Vec<Expression>),
/// NOT IN expression
NotIn(Box<Expression>, Vec<Expression>),
/// EXISTS subquery
Exists(Box<GraphPattern>),
/// NOT EXISTS subquery
NotExists(Box<GraphPattern>),
/// Conditional (IF)
If(Box<Expression>, Box<Expression>, Box<Expression>),
/// COALESCE
Coalesce(Vec<Expression>),
/// BOUND test
Bound(String),
/// isIRI test
IsIri(Box<Expression>),
/// isBlank test
IsBlank(Box<Expression>),
/// isLiteral test
IsLiteral(Box<Expression>),
/// isNumeric test
IsNumeric(Box<Expression>),
/// REGEX pattern matching
Regex(Box<Expression>, Box<Expression>, Option<Box<Expression>>),
/// LANG function
Lang(Box<Expression>),
/// DATATYPE function
Datatype(Box<Expression>),
/// STR function
Str(Box<Expression>),
/// IRI constructor
Iri(Box<Expression>),
}
impl Expression {
pub fn var(name: impl Into<String>) -> Self {
Self::Variable(name.into())
}
pub fn term(t: RdfTerm) -> Self {
Self::Term(t)
}
pub fn literal(value: impl Into<String>) -> Self {
Self::Term(RdfTerm::literal(value))
}
pub fn integer(value: i64) -> Self {
Self::Term(RdfTerm::Literal(Literal::integer(value)))
}
pub fn binary(left: Expression, op: BinaryOp, right: Expression) -> Self {
Self::Binary(Box::new(left), op, Box::new(right))
}
pub fn unary(op: UnaryOp, expr: Expression) -> Self {
Self::Unary(op, Box::new(expr))
}
pub fn and(left: Expression, right: Expression) -> Self {
Self::binary(left, BinaryOp::And, right)
}
pub fn or(left: Expression, right: Expression) -> Self {
Self::binary(left, BinaryOp::Or, right)
}
pub fn eq(left: Expression, right: Expression) -> Self {
Self::binary(left, BinaryOp::Eq, right)
}
pub fn neq(left: Expression, right: Expression) -> Self {
Self::binary(left, BinaryOp::NotEq, right)
}
pub fn lt(left: Expression, right: Expression) -> Self {
Self::binary(left, BinaryOp::Lt, right)
}
pub fn gt(left: Expression, right: Expression) -> Self {
Self::binary(left, BinaryOp::Gt, right)
}
}
/// Binary operators
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum BinaryOp {
// Logical
And,
Or,
// Comparison
Eq,
NotEq,
Lt,
LtEq,
Gt,
GtEq,
// Arithmetic
Add,
Sub,
Mul,
Div,
// String
SameTerm,
LangMatches,
}
/// Unary operators
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum UnaryOp {
Not,
Plus,
Minus,
}
/// Function call
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FunctionCall {
pub name: String,
pub args: Vec<Expression>,
}
impl FunctionCall {
pub fn new(name: impl Into<String>, args: Vec<Expression>) -> Self {
Self {
name: name.into(),
args,
}
}
}
/// Aggregate function
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Aggregate {
Count {
expr: Option<Box<Expression>>,
distinct: bool,
},
Sum {
expr: Box<Expression>,
distinct: bool,
},
Avg {
expr: Box<Expression>,
distinct: bool,
},
Min {
expr: Box<Expression>,
},
Max {
expr: Box<Expression>,
},
GroupConcat {
expr: Box<Expression>,
separator: Option<String>,
distinct: bool,
},
Sample {
expr: Box<Expression>,
},
}
/// Filter expression
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Filter {
pub expression: Expression,
}
impl Filter {
pub fn new(expression: Expression) -> Self {
Self { expression }
}
}
/// Solution modifier
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct SolutionModifier {
pub order_by: Vec<OrderCondition>,
pub limit: Option<usize>,
pub offset: Option<usize>,
pub having: Option<Expression>,
}
impl SolutionModifier {
pub fn with_limit(mut self, limit: usize) -> Self {
self.limit = Some(limit);
self
}
pub fn with_offset(mut self, offset: usize) -> Self {
self.offset = Some(offset);
self
}
pub fn with_order(mut self, conditions: Vec<OrderCondition>) -> Self {
self.order_by = conditions;
self
}
pub fn with_having(mut self, expr: Expression) -> Self {
self.having = Some(expr);
self
}
}
/// ORDER BY condition
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OrderCondition {
pub expression: Expression,
pub ascending: bool,
}
impl OrderCondition {
pub fn asc(expr: Expression) -> Self {
Self {
expression: expr,
ascending: true,
}
}
pub fn desc(expr: Expression) -> Self {
Self {
expression: expr,
ascending: false,
}
}
}
/// GROUP BY condition
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum GroupCondition {
Variable(String),
Expression(Expression, Option<String>),
}
// ============================================================================
// SPARQL Update Operations
// ============================================================================
/// SPARQL Update operation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateOperation {
/// INSERT DATA { triples }
InsertData(InsertData),
/// DELETE DATA { triples }
DeleteData(DeleteData),
/// DELETE { pattern } INSERT { pattern } WHERE { pattern }
Modify(Modify),
/// LOAD <iri> INTO GRAPH <iri>
Load {
source: Iri,
destination: Option<Iri>,
silent: bool,
},
/// CLEAR GRAPH <iri>
Clear { target: GraphTarget, silent: bool },
/// CREATE GRAPH <iri>
Create { graph: Iri, silent: bool },
/// DROP GRAPH <iri>
Drop { target: GraphTarget, silent: bool },
/// COPY source TO destination
Copy {
source: GraphTarget,
destination: GraphTarget,
silent: bool,
},
/// MOVE source TO destination
Move {
source: GraphTarget,
destination: GraphTarget,
silent: bool,
},
/// ADD source TO destination
Add {
source: GraphTarget,
destination: GraphTarget,
silent: bool,
},
}
/// INSERT DATA operation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct InsertData {
pub quads: Vec<Quad>,
}
/// DELETE DATA operation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeleteData {
pub quads: Vec<Quad>,
}
/// DELETE/INSERT with WHERE
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Modify {
pub with_graph: Option<Iri>,
pub delete_pattern: Option<Vec<QuadPattern>>,
pub insert_pattern: Option<Vec<QuadPattern>>,
pub using: Vec<DatasetClause>,
pub where_pattern: GraphPattern,
}
/// Quad (triple with optional graph)
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Quad {
pub subject: RdfTerm,
pub predicate: Iri,
pub object: RdfTerm,
pub graph: Option<Iri>,
}
/// Quad pattern (for DELETE/INSERT templates)
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QuadPattern {
pub subject: TermOrVariable,
pub predicate: VarOrIri,
pub object: TermOrVariable,
pub graph: Option<VarOrIri>,
}
/// Graph target for management operations
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum GraphTarget {
Default,
Named(Iri),
All,
AllNamed,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_rdf_term_creation() {
let iri = RdfTerm::iri("http://example.org/resource");
assert!(iri.is_iri());
let lit = RdfTerm::literal("hello");
assert!(lit.is_literal());
let blank = RdfTerm::blank("b0");
assert!(blank.is_blank_node());
}
#[test]
fn test_literal_parsing() {
let int_lit = Literal::integer(42);
assert_eq!(int_lit.as_integer(), Some(42));
let double_lit = Literal::double(3.14);
assert!((double_lit.as_double().unwrap() - 3.14).abs() < 0.001);
let bool_lit = Literal::boolean(true);
assert_eq!(bool_lit.as_boolean(), Some(true));
}
#[test]
fn test_expression_builder() {
let expr = Expression::and(
Expression::eq(Expression::var("x"), Expression::integer(10)),
Expression::gt(Expression::var("y"), Expression::integer(5)),
);
match expr {
Expression::Binary(_, BinaryOp::And, _) => (),
_ => panic!("Expected AND expression"),
}
}
#[test]
fn test_triple_pattern() {
let pattern = TriplePattern::simple(
TermOrVariable::var("s"),
Iri::rdf_type(),
TermOrVariable::iri(Iri::new("http://example.org/Person")),
);
assert!(matches!(pattern.subject, TermOrVariable::Variable(_)));
assert!(matches!(pattern.predicate, PropertyPath::Iri(_)));
}
}

View File

@@ -0,0 +1,928 @@
// SPARQL Query Executor for WASM
//
// Executes parsed SPARQL queries against an in-memory triple store.
// Simplified version for WASM environments (no async, no complex aggregates).
use super::ast::*;
use super::triple_store::{Triple, TripleStore};
use super::{SparqlError, SparqlResult};
use std::collections::HashMap;
/// Static empty HashMap for default prefixes
static EMPTY_PREFIXES: once_cell::sync::Lazy<HashMap<String, Iri>> =
once_cell::sync::Lazy::new(HashMap::new);
/// Solution binding - maps variables to RDF terms
pub type Binding = HashMap<String, RdfTerm>;
/// Solution sequence - list of bindings
pub type Solutions = Vec<Binding>;
/// Execution context for SPARQL queries
pub struct SparqlContext<'a> {
pub store: &'a TripleStore,
pub base: Option<&'a Iri>,
pub prefixes: &'a HashMap<String, Iri>,
}
impl<'a> SparqlContext<'a> {
pub fn new(store: &'a TripleStore) -> Self {
Self {
store,
base: None,
prefixes: &EMPTY_PREFIXES,
}
}
pub fn with_base(mut self, base: Option<&'a Iri>) -> Self {
self.base = base;
self
}
pub fn with_prefixes(mut self, prefixes: &'a HashMap<String, Iri>) -> Self {
self.prefixes = prefixes;
self
}
}
/// Execute a SPARQL query
pub fn execute_sparql(store: &TripleStore, query: &SparqlQuery) -> SparqlResult<QueryResult> {
let mut ctx = SparqlContext::new(store)
.with_base(query.base.as_ref())
.with_prefixes(&query.prefixes);
match &query.body {
QueryBody::Select(select) => {
let solutions = execute_select(&mut ctx, select)?;
Ok(QueryResult::Select(solutions))
}
QueryBody::Construct(construct) => {
let triples = execute_construct(&mut ctx, construct)?;
Ok(QueryResult::Construct(triples))
}
QueryBody::Ask(ask) => {
let result = execute_ask(&mut ctx, ask)?;
Ok(QueryResult::Ask(result))
}
QueryBody::Describe(describe) => {
let triples = execute_describe(&mut ctx, describe)?;
Ok(QueryResult::Describe(triples))
}
QueryBody::Update(ops) => {
for op in ops {
execute_update(&mut ctx, op)?;
}
Ok(QueryResult::Update)
}
}
}
/// Query result types
#[derive(Debug, Clone)]
pub enum QueryResult {
Select(SelectResult),
Construct(Vec<Triple>),
Ask(bool),
Describe(Vec<Triple>),
Update,
}
/// SELECT query result
#[derive(Debug, Clone)]
pub struct SelectResult {
pub variables: Vec<String>,
pub bindings: Solutions,
}
impl SelectResult {
pub fn new(variables: Vec<String>, bindings: Solutions) -> Self {
Self {
variables,
bindings,
}
}
}
// ============================================================================
// SELECT Query Execution
// ============================================================================
fn execute_select(ctx: &mut SparqlContext, query: &SelectQuery) -> SparqlResult<SelectResult> {
// Evaluate WHERE clause
let mut solutions = evaluate_graph_pattern(ctx, &query.where_clause)?;
// Apply solution modifiers
solutions = apply_modifiers(solutions, &query.modifier)?;
// Project variables
let (variables, bindings) = project_solutions(&query.projection, solutions)?;
Ok(SelectResult {
variables,
bindings,
})
}
fn project_solutions(
projection: &Projection,
solutions: Solutions,
) -> SparqlResult<(Vec<String>, Solutions)> {
match projection {
Projection::All => {
// Get all unique variables
let mut vars: Vec<String> = Vec::new();
for binding in &solutions {
for var in binding.keys() {
if !vars.contains(var) {
vars.push(var.clone());
}
}
}
vars.sort();
Ok((vars, solutions))
}
Projection::Variables(vars) | Projection::Distinct(vars) | Projection::Reduced(vars) => {
let var_names: Vec<String> = vars
.iter()
.map(|v| {
v.alias.clone().unwrap_or_else(|| {
if let Expression::Variable(name) = &v.expression {
name.clone()
} else {
"_expr".to_string()
}
})
})
.collect();
let mut projected: Solutions = Vec::new();
for binding in solutions {
let mut new_binding = Binding::new();
for (i, pv) in vars.iter().enumerate() {
if let Some(value) = evaluate_expression(&pv.expression, &binding)? {
new_binding.insert(var_names[i].clone(), value);
}
}
// For DISTINCT, check if this binding already exists
if matches!(projection, Projection::Distinct(_)) {
if !projected.iter().any(|b| bindings_equal(b, &new_binding)) {
projected.push(new_binding);
}
} else {
projected.push(new_binding);
}
}
Ok((var_names, projected))
}
}
}
fn bindings_equal(a: &Binding, b: &Binding) -> bool {
if a.len() != b.len() {
return false;
}
a.iter().all(|(k, v)| b.get(k) == Some(v))
}
// ============================================================================
// Graph Pattern Evaluation
// ============================================================================
fn evaluate_graph_pattern(ctx: &SparqlContext, pattern: &GraphPattern) -> SparqlResult<Solutions> {
match pattern {
GraphPattern::Empty => Ok(vec![Binding::new()]),
GraphPattern::Bgp(triples) => evaluate_bgp(ctx, triples),
GraphPattern::Join(left, right) => {
let left_solutions = evaluate_graph_pattern(ctx, left)?;
let right_solutions = evaluate_graph_pattern(ctx, right)?;
join_solutions(left_solutions, right_solutions)
}
GraphPattern::LeftJoin(left, right, condition) => {
let left_solutions = evaluate_graph_pattern(ctx, left)?;
let right_solutions = evaluate_graph_pattern(ctx, right)?;
left_join_solutions(left_solutions, right_solutions, condition.as_ref())
}
GraphPattern::Union(left, right) => {
let mut left_solutions = evaluate_graph_pattern(ctx, left)?;
let right_solutions = evaluate_graph_pattern(ctx, right)?;
left_solutions.extend(right_solutions);
Ok(left_solutions)
}
GraphPattern::Filter(inner, condition) => {
let solutions = evaluate_graph_pattern(ctx, inner)?;
filter_solutions(solutions, condition)
}
GraphPattern::Minus(left, right) => {
let left_solutions = evaluate_graph_pattern(ctx, left)?;
let right_solutions = evaluate_graph_pattern(ctx, right)?;
minus_solutions(left_solutions, right_solutions)
}
GraphPattern::Bind(expr, var, inner) => {
let mut solutions = evaluate_graph_pattern(ctx, inner)?;
for binding in &mut solutions {
if let Some(value) = evaluate_expression(expr, binding)? {
binding.insert(var.clone(), value);
}
}
Ok(solutions)
}
GraphPattern::Values(values) => {
let mut solutions = Vec::new();
for row in &values.bindings {
let mut binding = Binding::new();
for (i, var) in values.variables.iter().enumerate() {
if let Some(Some(term)) = row.get(i) {
binding.insert(var.clone(), term.clone());
}
}
solutions.push(binding);
}
Ok(solutions)
}
_ => Err(SparqlError::UnsupportedOperation(format!(
"Graph pattern not supported in WASM build: {:?}",
pattern
))),
}
}
fn evaluate_bgp(ctx: &SparqlContext, patterns: &[TriplePattern]) -> SparqlResult<Solutions> {
let mut solutions = vec![Binding::new()];
for pattern in patterns {
let mut new_solutions = Vec::new();
for binding in &solutions {
let matches = match_triple_pattern(ctx, pattern, binding)?;
new_solutions.extend(matches);
}
solutions = new_solutions;
if solutions.is_empty() {
break;
}
}
Ok(solutions)
}
fn match_triple_pattern(
ctx: &SparqlContext,
pattern: &TriplePattern,
binding: &Binding,
) -> SparqlResult<Solutions> {
// Resolve pattern components
let subject = resolve_term_or_var(&pattern.subject, binding);
let object = resolve_term_or_var(&pattern.object, binding);
// Handle simple IRI predicate (most common case)
if let PropertyPath::Iri(iri) = &pattern.predicate {
return match_simple_triple(
ctx,
subject,
Some(iri),
object,
&pattern.subject,
&pattern.object,
binding,
);
}
// For now, only support simple IRI predicates in WASM
Err(SparqlError::PropertyPathError(
"Complex property paths not yet supported in WASM build".to_string(),
))
}
fn resolve_term_or_var(tov: &TermOrVariable, binding: &Binding) -> Option<RdfTerm> {
match tov {
TermOrVariable::Term(t) => Some(t.clone()),
TermOrVariable::Variable(v) => binding.get(v).cloned(),
TermOrVariable::BlankNode(id) => Some(RdfTerm::BlankNode(id.clone())),
}
}
fn match_simple_triple(
ctx: &SparqlContext,
subject: Option<RdfTerm>,
predicate: Option<&Iri>,
object: Option<RdfTerm>,
subj_pattern: &TermOrVariable,
obj_pattern: &TermOrVariable,
binding: &Binding,
) -> SparqlResult<Solutions> {
let triples = ctx
.store
.query(subject.as_ref(), predicate, object.as_ref());
let mut solutions = Vec::new();
for triple in triples {
let mut new_binding = binding.clone();
let mut matches = true;
// Bind subject variable
if let TermOrVariable::Variable(var) = subj_pattern {
if let Some(existing) = new_binding.get(var) {
if existing != &triple.subject {
matches = false;
}
} else {
new_binding.insert(var.clone(), triple.subject.clone());
}
}
// Bind object variable
if matches {
if let TermOrVariable::Variable(var) = obj_pattern {
if let Some(existing) = new_binding.get(var) {
if existing != &triple.object {
matches = false;
}
} else {
new_binding.insert(var.clone(), triple.object.clone());
}
}
}
if matches {
solutions.push(new_binding);
}
}
Ok(solutions)
}
// ============================================================================
// Solution Operations
// ============================================================================
fn join_solutions(left: Solutions, right: Solutions) -> SparqlResult<Solutions> {
if left.is_empty() || right.is_empty() {
return Ok(Vec::new());
}
let mut result = Vec::new();
for l in &left {
for r in &right {
if let Some(merged) = merge_bindings(l, r) {
result.push(merged);
}
}
}
Ok(result)
}
fn left_join_solutions(
left: Solutions,
right: Solutions,
condition: Option<&Expression>,
) -> SparqlResult<Solutions> {
let mut result = Vec::new();
for l in &left {
let mut found_match = false;
for r in &right {
if let Some(merged) = merge_bindings(l, r) {
// Check condition if present
let include = if let Some(cond) = condition {
evaluate_expression_as_bool(cond, &merged)?
} else {
true
};
if include {
result.push(merged);
found_match = true;
}
}
}
if !found_match {
result.push(l.clone());
}
}
Ok(result)
}
fn minus_solutions(left: Solutions, right: Solutions) -> SparqlResult<Solutions> {
let mut result = Vec::new();
for l in &left {
let mut has_compatible = false;
for r in &right {
if bindings_compatible(l, r) {
has_compatible = true;
break;
}
}
if !has_compatible {
result.push(l.clone());
}
}
Ok(result)
}
fn merge_bindings(a: &Binding, b: &Binding) -> Option<Binding> {
let mut result = a.clone();
for (k, v) in b {
if let Some(existing) = result.get(k) {
if existing != v {
return None;
}
} else {
result.insert(k.clone(), v.clone());
}
}
Some(result)
}
fn bindings_compatible(a: &Binding, b: &Binding) -> bool {
for (k, v) in a {
if let Some(bv) = b.get(k) {
if v != bv {
return false;
}
}
}
true
}
fn filter_solutions(solutions: Solutions, condition: &Expression) -> SparqlResult<Solutions> {
let mut result = Vec::new();
for binding in solutions {
if evaluate_expression_as_bool(condition, &binding)? {
result.push(binding);
}
}
Ok(result)
}
// ============================================================================
// Solution Modifiers
// ============================================================================
fn apply_modifiers(
mut solutions: Solutions,
modifier: &SolutionModifier,
) -> SparqlResult<Solutions> {
// ORDER BY
if !modifier.order_by.is_empty() {
solutions.sort_by(|a, b| {
for cond in &modifier.order_by {
let va = evaluate_expression(&cond.expression, a).ok().flatten();
let vb = evaluate_expression(&cond.expression, b).ok().flatten();
let ord = match (va, vb) {
(Some(ta), Some(tb)) => compare_terms(&ta, &tb),
(Some(_), None) => std::cmp::Ordering::Less,
(None, Some(_)) => std::cmp::Ordering::Greater,
(None, None) => std::cmp::Ordering::Equal,
};
let ord = if cond.ascending { ord } else { ord.reverse() };
if ord != std::cmp::Ordering::Equal {
return ord;
}
}
std::cmp::Ordering::Equal
});
}
// OFFSET
if let Some(offset) = modifier.offset {
if offset < solutions.len() {
solutions = solutions.into_iter().skip(offset).collect();
} else {
solutions.clear();
}
}
// LIMIT
if let Some(limit) = modifier.limit {
solutions.truncate(limit);
}
Ok(solutions)
}
fn compare_terms(a: &RdfTerm, b: &RdfTerm) -> std::cmp::Ordering {
match (a, b) {
(RdfTerm::Literal(la), RdfTerm::Literal(lb)) => {
if let (Some(na), Some(nb)) = (la.as_double(), lb.as_double()) {
na.partial_cmp(&nb).unwrap_or(std::cmp::Ordering::Equal)
} else {
la.value.cmp(&lb.value)
}
}
(RdfTerm::Iri(ia), RdfTerm::Iri(ib)) => ia.as_str().cmp(ib.as_str()),
_ => std::cmp::Ordering::Equal,
}
}
// ============================================================================
// Expression Evaluation
// ============================================================================
fn evaluate_expression(expr: &Expression, binding: &Binding) -> SparqlResult<Option<RdfTerm>> {
match expr {
Expression::Variable(var) => Ok(binding.get(var).cloned()),
Expression::Term(term) => Ok(Some(term.clone())),
Expression::Binary(left, op, right) => {
let lv = evaluate_expression(left, binding)?;
let rv = evaluate_expression(right, binding)?;
evaluate_binary_op(lv, *op, rv)
}
Expression::Unary(op, inner) => {
let v = evaluate_expression(inner, binding)?;
evaluate_unary_op(*op, v)
}
Expression::Bound(var) => Ok(Some(RdfTerm::Literal(Literal::boolean(
binding.contains_key(var),
)))),
Expression::If(cond, then_expr, else_expr) => {
if evaluate_expression_as_bool(cond, binding)? {
evaluate_expression(then_expr, binding)
} else {
evaluate_expression(else_expr, binding)
}
}
Expression::Coalesce(exprs) => {
for e in exprs {
if let Some(v) = evaluate_expression(e, binding)? {
return Ok(Some(v));
}
}
Ok(None)
}
Expression::IsIri(e) => {
let v = evaluate_expression(e, binding)?;
Ok(Some(RdfTerm::Literal(Literal::boolean(
v.map(|t| t.is_iri()).unwrap_or(false),
))))
}
Expression::IsBlank(e) => {
let v = evaluate_expression(e, binding)?;
Ok(Some(RdfTerm::Literal(Literal::boolean(
v.map(|t| t.is_blank_node()).unwrap_or(false),
))))
}
Expression::IsLiteral(e) => {
let v = evaluate_expression(e, binding)?;
Ok(Some(RdfTerm::Literal(Literal::boolean(
v.map(|t| t.is_literal()).unwrap_or(false),
))))
}
Expression::Str(e) => {
let v = evaluate_expression(e, binding)?;
Ok(v.map(|t| RdfTerm::literal(term_to_string(&t))))
}
Expression::Lang(e) => {
let v = evaluate_expression(e, binding)?;
Ok(v.and_then(|t| {
if let RdfTerm::Literal(lit) = t {
Some(RdfTerm::literal(lit.language.unwrap_or_default()))
} else {
None
}
}))
}
Expression::Datatype(e) => {
let v = evaluate_expression(e, binding)?;
Ok(v.and_then(|t| {
if let RdfTerm::Literal(lit) = t {
Some(RdfTerm::Iri(lit.datatype))
} else {
None
}
}))
}
_ => Err(SparqlError::UnsupportedOperation(
"Complex expressions not yet supported in WASM build".to_string(),
)),
}
}
fn evaluate_expression_as_bool(expr: &Expression, binding: &Binding) -> SparqlResult<bool> {
let value = evaluate_expression(expr, binding)?;
Ok(match value {
None => false,
Some(RdfTerm::Literal(lit)) => {
if let Some(b) = lit.as_boolean() {
b
} else if let Some(n) = lit.as_double() {
n != 0.0
} else {
!lit.value.is_empty()
}
}
Some(_) => true,
})
}
fn evaluate_binary_op(
left: Option<RdfTerm>,
op: BinaryOp,
right: Option<RdfTerm>,
) -> SparqlResult<Option<RdfTerm>> {
match op {
BinaryOp::And => {
let lb = left.map(|t| term_to_bool(&t)).unwrap_or(false);
let rb = right.map(|t| term_to_bool(&t)).unwrap_or(false);
Ok(Some(RdfTerm::Literal(Literal::boolean(lb && rb))))
}
BinaryOp::Or => {
let lb = left.map(|t| term_to_bool(&t)).unwrap_or(false);
let rb = right.map(|t| term_to_bool(&t)).unwrap_or(false);
Ok(Some(RdfTerm::Literal(Literal::boolean(lb || rb))))
}
BinaryOp::Eq => Ok(Some(RdfTerm::Literal(Literal::boolean(left == right)))),
BinaryOp::NotEq => Ok(Some(RdfTerm::Literal(Literal::boolean(left != right)))),
BinaryOp::Lt | BinaryOp::LtEq | BinaryOp::Gt | BinaryOp::GtEq => {
let cmp = match (&left, &right) {
(Some(l), Some(r)) => compare_terms(l, r),
_ => return Ok(None),
};
let result = match op {
BinaryOp::Lt => cmp == std::cmp::Ordering::Less,
BinaryOp::LtEq => cmp != std::cmp::Ordering::Greater,
BinaryOp::Gt => cmp == std::cmp::Ordering::Greater,
BinaryOp::GtEq => cmp != std::cmp::Ordering::Less,
_ => unreachable!(),
};
Ok(Some(RdfTerm::Literal(Literal::boolean(result))))
}
BinaryOp::Add | BinaryOp::Sub | BinaryOp::Mul | BinaryOp::Div => {
let ln = left.and_then(|t| term_to_number(&t));
let rn = right.and_then(|t| term_to_number(&t));
match (ln, rn) {
(Some(l), Some(r)) => {
let result = match op {
BinaryOp::Add => l + r,
BinaryOp::Sub => l - r,
BinaryOp::Mul => l * r,
BinaryOp::Div => {
if r == 0.0 {
return Ok(None);
}
l / r
}
_ => unreachable!(),
};
Ok(Some(RdfTerm::Literal(Literal::decimal(result))))
}
_ => Ok(None),
}
}
_ => Err(SparqlError::UnsupportedOperation(format!(
"Binary operator not supported: {:?}",
op
))),
}
}
fn evaluate_unary_op(op: UnaryOp, value: Option<RdfTerm>) -> SparqlResult<Option<RdfTerm>> {
match op {
UnaryOp::Not => {
let b = value.map(|t| term_to_bool(&t)).unwrap_or(false);
Ok(Some(RdfTerm::Literal(Literal::boolean(!b))))
}
UnaryOp::Plus => Ok(value),
UnaryOp::Minus => {
let n = value.and_then(|t| term_to_number(&t));
Ok(n.map(|v| RdfTerm::Literal(Literal::decimal(-v))))
}
}
}
fn term_to_string(term: &RdfTerm) -> String {
match term {
RdfTerm::Iri(iri) => iri.as_str().to_string(),
RdfTerm::Literal(lit) => lit.value.clone(),
RdfTerm::BlankNode(id) => format!("_:{}", id),
}
}
fn term_to_number(term: &RdfTerm) -> Option<f64> {
match term {
RdfTerm::Literal(lit) => lit.as_double(),
_ => None,
}
}
fn term_to_bool(term: &RdfTerm) -> bool {
match term {
RdfTerm::Literal(lit) => {
if let Some(b) = lit.as_boolean() {
b
} else if let Some(n) = lit.as_double() {
n != 0.0
} else {
!lit.value.is_empty()
}
}
_ => true,
}
}
// ============================================================================
// Other Query Forms
// ============================================================================
fn execute_construct(ctx: &SparqlContext, query: &ConstructQuery) -> SparqlResult<Vec<Triple>> {
let solutions = evaluate_graph_pattern(ctx, &query.where_clause)?;
let solutions = apply_modifiers(solutions, &query.modifier)?;
let mut triples = Vec::new();
for binding in solutions {
for pattern in &query.template {
if let (Some(s), Some(o)) = (
resolve_term_or_var(&pattern.subject, &binding),
resolve_term_or_var(&pattern.object, &binding),
) {
if let PropertyPath::Iri(p) = &pattern.predicate {
triples.push(Triple::new(s, p.clone(), o));
}
}
}
}
Ok(triples)
}
fn execute_ask(ctx: &SparqlContext, query: &AskQuery) -> SparqlResult<bool> {
let solutions = evaluate_graph_pattern(ctx, &query.where_clause)?;
Ok(!solutions.is_empty())
}
fn execute_describe(ctx: &SparqlContext, query: &DescribeQuery) -> SparqlResult<Vec<Triple>> {
let mut resources: Vec<RdfTerm> = Vec::new();
// Get resources from query
for r in &query.resources {
match r {
VarOrIri::Iri(iri) => resources.push(RdfTerm::Iri(iri.clone())),
VarOrIri::Variable(var) => {
if let Some(pattern) = &query.where_clause {
let solutions = evaluate_graph_pattern(ctx, pattern)?;
for binding in solutions {
if let Some(term) = binding.get(var) {
if !resources.contains(term) {
resources.push(term.clone());
}
}
}
}
}
}
}
// Get all triples about each resource
let mut triples = Vec::new();
for resource in resources {
// Triples where resource is subject
triples.extend(ctx.store.query(Some(&resource), None, None));
// Triples where resource is object
triples.extend(ctx.store.query(None, None, Some(&resource)));
}
Ok(triples)
}
// ============================================================================
// Update Operations (Simplified)
// ============================================================================
fn execute_update(_ctx: &SparqlContext, _op: &UpdateOperation) -> SparqlResult<()> {
// Simplified: Updates not fully implemented in WASM build
Err(SparqlError::UnsupportedOperation(
"Update operations not yet supported in WASM build".to_string(),
))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::sparql::parser::parse_sparql;
fn setup_test_store() -> TripleStore {
let store = TripleStore::new();
store.insert(Triple::new(
RdfTerm::iri("http://example.org/person/1"),
Iri::rdf_type(),
RdfTerm::iri("http://example.org/Person"),
));
store.insert(Triple::new(
RdfTerm::iri("http://example.org/person/1"),
Iri::new("http://example.org/name"),
RdfTerm::literal("Alice"),
));
store.insert(Triple::new(
RdfTerm::iri("http://example.org/person/1"),
Iri::new("http://example.org/age"),
RdfTerm::Literal(Literal::integer(30)),
));
store
}
#[test]
fn test_simple_select() {
let store = setup_test_store();
let query = parse_sparql("SELECT ?s ?p ?o WHERE { ?s ?p ?o }").unwrap();
let result = execute_sparql(&store, &query).unwrap();
if let QueryResult::Select(select) = result {
assert!(!select.bindings.is_empty());
} else {
panic!("Expected SELECT result");
}
}
#[test]
fn test_select_with_filter() {
let store = setup_test_store();
let query = parse_sparql(
r#"
SELECT ?name WHERE {
?s <http://example.org/name> ?name .
FILTER(?name = "Alice")
}
"#,
)
.unwrap();
let result = execute_sparql(&store, &query).unwrap();
if let QueryResult::Select(select) = result {
assert_eq!(select.bindings.len(), 1);
}
}
#[test]
fn test_ask_query() {
let store = setup_test_store();
let query = parse_sparql(
r#"
ASK { <http://example.org/person/1> <http://example.org/name> "Alice" }
"#,
)
.unwrap();
let result = execute_sparql(&store, &query).unwrap();
assert!(matches!(result, QueryResult::Ask(true)));
}
}

View File

@@ -0,0 +1,124 @@
// SPARQL (SPARQL Protocol and RDF Query Language) module for rvlite
//
// Provides W3C-compliant SPARQL 1.1 query support for RDF data with
// in-memory storage for WASM environments.
//
// Features:
// - SPARQL 1.1 Query Language (SELECT, CONSTRUCT, ASK, DESCRIBE)
// - Basic Update Language (INSERT DATA, DELETE DATA)
// - In-memory RDF triple store with efficient indexing
// - Property paths (basic support)
// - FILTER expressions and built-in functions
// - WASM-compatible implementation
#![allow(dead_code)]
#![allow(unused_variables)]
#![allow(unused_mut)]
pub mod ast;
pub mod executor;
pub mod parser;
pub mod triple_store;
pub use ast::{
Aggregate, AskQuery, ConstructQuery, DeleteData, DescribeQuery, Expression, GraphPattern,
InsertData, Iri, Literal, OrderCondition, QueryBody, RdfTerm, SelectQuery, SolutionModifier,
SparqlQuery, TriplePattern, UpdateOperation,
};
pub use executor::{execute_sparql, SparqlContext};
pub use parser::parse_sparql;
pub use triple_store::{Triple, TripleStore};
/// SPARQL error type
#[derive(Debug, Clone)]
pub enum SparqlError {
ParseError(String),
UnboundVariable(String),
TypeMismatch { expected: String, actual: String },
StoreNotFound(String),
InvalidIri(String),
InvalidLiteral(String),
UnsupportedOperation(String),
ExecutionError(String),
AggregateError(String),
PropertyPathError(String),
}
impl std::fmt::Display for SparqlError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::ParseError(msg) => write!(f, "Parse error: {}", msg),
Self::UnboundVariable(var) => write!(f, "Variable not bound: {}", var),
Self::TypeMismatch { expected, actual } => {
write!(f, "Type mismatch: expected {}, got {}", expected, actual)
}
Self::StoreNotFound(name) => write!(f, "Store not found: {}", name),
Self::InvalidIri(iri) => write!(f, "Invalid IRI: {}", iri),
Self::InvalidLiteral(lit) => write!(f, "Invalid literal: {}", lit),
Self::UnsupportedOperation(op) => write!(f, "Unsupported operation: {}", op),
Self::ExecutionError(msg) => write!(f, "Execution error: {}", msg),
Self::AggregateError(msg) => write!(f, "Aggregate error: {}", msg),
Self::PropertyPathError(msg) => write!(f, "Property path error: {}", msg),
}
}
}
impl std::error::Error for SparqlError {}
/// Result type for SPARQL operations
pub type SparqlResult<T> = Result<T, SparqlError>;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_simple_select() {
let query = "SELECT ?s ?p ?o WHERE { ?s ?p ?o }";
let result = parse_sparql(query);
assert!(result.is_ok());
let parsed = result.unwrap();
assert!(matches!(parsed.body, QueryBody::Select(_)));
}
#[test]
fn test_triple_store_basic() {
let store = TripleStore::new();
let triple = Triple::new(
RdfTerm::iri("http://example.org/subject"),
Iri::new("http://example.org/predicate"),
RdfTerm::literal("object"),
);
store.insert(triple.clone());
assert_eq!(store.count(), 1);
let results = store.query(None, None, None);
assert_eq!(results.len(), 1);
}
#[test]
fn test_sparql_execution() {
let store = TripleStore::new();
// Add test data
store.insert(Triple::new(
RdfTerm::iri("http://example.org/person/1"),
Iri::rdf_type(),
RdfTerm::iri("http://example.org/Person"),
));
store.insert(Triple::new(
RdfTerm::iri("http://example.org/person/1"),
Iri::new("http://example.org/name"),
RdfTerm::literal("Alice"),
));
let query =
parse_sparql("SELECT ?name WHERE { ?person <http://example.org/name> ?name }").unwrap();
let result = execute_sparql(&store, &query);
assert!(result.is_ok());
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,583 @@
// RDF Triple Store with efficient in-memory indexing for WASM
//
// Provides in-memory storage for RDF triples with multiple indexes
// for efficient query patterns (SPO, POS, OSP).
use super::ast::{Iri, RdfTerm};
use std::collections::{HashMap, HashSet};
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::RwLock;
/// RDF Triple
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Triple {
pub subject: RdfTerm,
pub predicate: Iri,
pub object: RdfTerm,
}
impl Triple {
pub fn new(subject: RdfTerm, predicate: Iri, object: RdfTerm) -> Self {
Self {
subject,
predicate,
object,
}
}
}
/// Triple store statistics
#[derive(Debug, Clone)]
pub struct StoreStats {
pub triple_count: u64,
pub subject_count: usize,
pub predicate_count: usize,
pub object_count: usize,
pub graph_count: usize,
}
/// RDF Triple Store (WASM-compatible, thread-safe via RwLock)
pub struct TripleStore {
/// All triples stored by internal ID
triples: RwLock<HashMap<u64, Triple>>,
/// SPO index: subject -> predicate -> object IDs
spo_index: RwLock<HashMap<String, HashMap<String, HashSet<u64>>>>,
/// POS index: predicate -> object -> subject IDs
pos_index: RwLock<HashMap<String, HashMap<String, HashSet<u64>>>>,
/// OSP index: object -> subject -> predicate IDs
osp_index: RwLock<HashMap<String, HashMap<String, HashSet<u64>>>>,
/// Named graphs: graph IRI -> triple IDs
graphs: RwLock<HashMap<String, HashSet<u64>>>,
/// Default graph triple IDs
default_graph: RwLock<HashSet<u64>>,
/// Triple ID counter
next_id: AtomicU64,
/// Unique subjects for statistics
subjects: RwLock<HashSet<String>>,
/// Unique predicates for statistics
predicates: RwLock<HashSet<String>>,
/// Unique objects for statistics
objects: RwLock<HashSet<String>>,
}
impl TripleStore {
pub fn new() -> Self {
Self {
triples: RwLock::new(HashMap::new()),
spo_index: RwLock::new(HashMap::new()),
pos_index: RwLock::new(HashMap::new()),
osp_index: RwLock::new(HashMap::new()),
graphs: RwLock::new(HashMap::new()),
default_graph: RwLock::new(HashSet::new()),
next_id: AtomicU64::new(1),
subjects: RwLock::new(HashSet::new()),
predicates: RwLock::new(HashSet::new()),
objects: RwLock::new(HashSet::new()),
}
}
/// Insert a triple into the default graph
pub fn insert(&self, triple: Triple) -> u64 {
self.insert_into_graph(triple, None)
}
/// Insert a triple into a specific graph
pub fn insert_into_graph(&self, triple: Triple, graph: Option<&str>) -> u64 {
let id = self.next_id.fetch_add(1, Ordering::SeqCst);
// Get string representations for indexing
let subject_key = term_to_key(&triple.subject);
let predicate_key = triple.predicate.as_str().to_string();
let object_key = term_to_key(&triple.object);
// Update statistics
{
let mut subjects = self.subjects.write().unwrap();
subjects.insert(subject_key.clone());
}
{
let mut predicates = self.predicates.write().unwrap();
predicates.insert(predicate_key.clone());
}
{
let mut objects = self.objects.write().unwrap();
objects.insert(object_key.clone());
}
// Update SPO index
{
let mut spo_index = self.spo_index.write().unwrap();
spo_index
.entry(subject_key.clone())
.or_insert_with(HashMap::new)
.entry(predicate_key.clone())
.or_insert_with(HashSet::new)
.insert(id);
}
// Update POS index
{
let mut pos_index = self.pos_index.write().unwrap();
pos_index
.entry(predicate_key.clone())
.or_insert_with(HashMap::new)
.entry(object_key.clone())
.or_insert_with(HashSet::new)
.insert(id);
}
// Update OSP index
{
let mut osp_index = self.osp_index.write().unwrap();
osp_index
.entry(object_key)
.or_insert_with(HashMap::new)
.entry(subject_key)
.or_insert_with(HashSet::new)
.insert(id);
}
// Update graph membership
if let Some(graph_iri) = graph {
let mut graphs = self.graphs.write().unwrap();
graphs
.entry(graph_iri.to_string())
.or_insert_with(HashSet::new)
.insert(id);
} else {
let mut default_graph = self.default_graph.write().unwrap();
default_graph.insert(id);
}
// Store the triple
{
let mut triples = self.triples.write().unwrap();
triples.insert(id, triple);
}
id
}
/// Get a triple by ID
pub fn get(&self, id: u64) -> Option<Triple> {
let triples = self.triples.read().unwrap();
triples.get(&id).cloned()
}
/// Query triples matching a pattern (None means any value)
pub fn query(
&self,
subject: Option<&RdfTerm>,
predicate: Option<&Iri>,
object: Option<&RdfTerm>,
) -> Vec<Triple> {
self.query_with_graph(subject, predicate, object, None)
}
/// Query triples matching a pattern in a specific graph
pub fn query_with_graph(
&self,
subject: Option<&RdfTerm>,
predicate: Option<&Iri>,
object: Option<&RdfTerm>,
graph: Option<&str>,
) -> Vec<Triple> {
// Filter by graph if specified
let graph_filter: Option<HashSet<u64>> = graph.map(|g| {
let graphs = self.graphs.read().unwrap();
graphs.get(g).cloned().unwrap_or_default()
});
let spo_index = self.spo_index.read().unwrap();
let pos_index = self.pos_index.read().unwrap();
let osp_index = self.osp_index.read().unwrap();
let triples = self.triples.read().unwrap();
// Choose the best index based on bound variables
let ids = match (subject, predicate, object) {
// All bound - direct lookup
(Some(s), Some(p), Some(o)) => {
let s_key = term_to_key(s);
let p_key = p.as_str();
let o_key = term_to_key(o);
spo_index
.get(&s_key)
.and_then(|pred_map| pred_map.get(p_key))
.map(|ids| ids.iter().copied().collect::<Vec<_>>())
.unwrap_or_default()
.into_iter()
.filter(|id| {
triples
.get(id)
.map(|t| term_to_key(&t.object) == o_key)
.unwrap_or(false)
})
.collect::<Vec<_>>()
}
// Subject and predicate bound - use SPO
(Some(s), Some(p), None) => {
let s_key = term_to_key(s);
let p_key = p.as_str();
spo_index
.get(&s_key)
.and_then(|pred_map| pred_map.get(p_key))
.map(|ids| ids.iter().copied().collect())
.unwrap_or_default()
}
// Subject only - use SPO
(Some(s), None, None) => {
let s_key = term_to_key(s);
spo_index
.get(&s_key)
.map(|pred_map| {
pred_map
.values()
.flat_map(|ids| ids.iter().copied())
.collect::<Vec<_>>()
})
.unwrap_or_default()
}
// Predicate and object bound - use POS
(None, Some(p), Some(o)) => {
let p_key = p.as_str();
let o_key = term_to_key(o);
pos_index
.get(p_key)
.and_then(|obj_map| obj_map.get(&o_key))
.map(|ids| ids.iter().copied().collect())
.unwrap_or_default()
}
// Predicate only - use POS
(None, Some(p), None) => {
let p_key = p.as_str();
pos_index
.get(p_key)
.map(|obj_map| {
obj_map
.values()
.flat_map(|ids| ids.iter().copied())
.collect::<Vec<_>>()
})
.unwrap_or_default()
}
// Object only - use OSP
(None, None, Some(o)) => {
let o_key = term_to_key(o);
osp_index
.get(&o_key)
.map(|subj_map| {
subj_map
.values()
.flat_map(|ids| ids.iter().copied())
.collect::<Vec<_>>()
})
.unwrap_or_default()
}
// Subject and object bound - use SPO then filter
(Some(s), None, Some(o)) => {
let s_key = term_to_key(s);
let o_key = term_to_key(o);
spo_index
.get(&s_key)
.map(|pred_map| {
pred_map
.values()
.flat_map(|ids| ids.iter().copied())
.filter(|id| {
triples
.get(id)
.map(|t| term_to_key(&t.object) == o_key)
.unwrap_or(false)
})
.collect::<Vec<_>>()
})
.unwrap_or_default()
}
// Nothing bound - return all
(None, None, None) => triples.keys().copied().collect(),
};
// Apply graph filter and collect results
ids.into_iter()
.filter(|id| {
graph_filter
.as_ref()
.map(|filter| filter.contains(id))
.unwrap_or(true)
})
.filter_map(|id| triples.get(&id).cloned())
.collect()
}
/// Get all triples in the store
pub fn all_triples(&self) -> Vec<Triple> {
let triples = self.triples.read().unwrap();
triples.values().cloned().collect()
}
/// Get triple count
pub fn count(&self) -> usize {
let triples = self.triples.read().unwrap();
triples.len()
}
/// Check if store is empty
pub fn is_empty(&self) -> bool {
let triples = self.triples.read().unwrap();
triples.is_empty()
}
/// Clear all triples
pub fn clear(&self) {
self.triples.write().unwrap().clear();
self.spo_index.write().unwrap().clear();
self.pos_index.write().unwrap().clear();
self.osp_index.write().unwrap().clear();
self.graphs.write().unwrap().clear();
self.default_graph.write().unwrap().clear();
self.subjects.write().unwrap().clear();
self.predicates.write().unwrap().clear();
self.objects.write().unwrap().clear();
}
/// Clear a specific graph
pub fn clear_graph(&self, graph: Option<&str>) {
let ids_to_remove: Vec<u64> = if let Some(graph_iri) = graph {
let graphs = self.graphs.read().unwrap();
graphs
.get(graph_iri)
.cloned()
.unwrap_or_default()
.into_iter()
.collect()
} else {
let default_graph = self.default_graph.read().unwrap();
default_graph.iter().copied().collect()
};
for id in ids_to_remove {
self.remove(id);
}
}
/// Remove a triple by ID
pub fn remove(&self, id: u64) -> Option<Triple> {
let triple = {
let mut triples = self.triples.write().unwrap();
triples.remove(&id)
}?;
let subject_key = term_to_key(&triple.subject);
let predicate_key = triple.predicate.as_str().to_string();
let object_key = term_to_key(&triple.object);
// Remove from SPO index
{
let mut spo_index = self.spo_index.write().unwrap();
if let Some(pred_map) = spo_index.get_mut(&subject_key) {
if let Some(ids) = pred_map.get_mut(&predicate_key) {
ids.remove(&id);
}
}
}
// Remove from POS index
{
let mut pos_index = self.pos_index.write().unwrap();
if let Some(obj_map) = pos_index.get_mut(&predicate_key) {
if let Some(ids) = obj_map.get_mut(&object_key) {
ids.remove(&id);
}
}
}
// Remove from OSP index
{
let mut osp_index = self.osp_index.write().unwrap();
if let Some(subj_map) = osp_index.get_mut(&object_key) {
if let Some(ids) = subj_map.get_mut(&subject_key) {
ids.remove(&id);
}
}
}
// Remove from graphs
{
let mut default_graph = self.default_graph.write().unwrap();
default_graph.remove(&id);
}
{
let mut graphs = self.graphs.write().unwrap();
for (_, ids) in graphs.iter_mut() {
ids.remove(&id);
}
}
Some(triple)
}
/// Get statistics about the store
pub fn stats(&self) -> StoreStats {
let triples = self.triples.read().unwrap();
let subjects = self.subjects.read().unwrap();
let predicates = self.predicates.read().unwrap();
let objects = self.objects.read().unwrap();
let graphs = self.graphs.read().unwrap();
StoreStats {
triple_count: triples.len() as u64,
subject_count: subjects.len(),
predicate_count: predicates.len(),
object_count: objects.len(),
graph_count: graphs.len() + 1, // +1 for default graph
}
}
/// List all named graphs
pub fn list_graphs(&self) -> Vec<String> {
let graphs = self.graphs.read().unwrap();
graphs.keys().cloned().collect()
}
/// Get triples from a specific graph
pub fn get_graph(&self, graph: &str) -> Vec<Triple> {
let graphs = self.graphs.read().unwrap();
let triples = self.triples.read().unwrap();
graphs
.get(graph)
.map(|ids| {
ids.iter()
.filter_map(|id| triples.get(id).cloned())
.collect()
})
.unwrap_or_default()
}
/// Get triples from the default graph
pub fn get_default_graph(&self) -> Vec<Triple> {
let default_graph = self.default_graph.read().unwrap();
let triples = self.triples.read().unwrap();
default_graph
.iter()
.filter_map(|id| triples.get(id).cloned())
.collect()
}
}
impl Default for TripleStore {
fn default() -> Self {
Self::new()
}
}
/// Convert an RDF term to a string key for indexing
fn term_to_key(term: &RdfTerm) -> String {
match term {
RdfTerm::Iri(iri) => format!("<{}>", iri.as_str()),
RdfTerm::Literal(lit) => {
if let Some(ref lang) = lit.language {
format!("\"{}\"@{}", lit.value, lang)
} else if lit.datatype.as_str() != "http://www.w3.org/2001/XMLSchema#string" {
format!("\"{}\"^^<{}>", lit.value, lit.datatype.as_str())
} else {
format!("\"{}\"", lit.value)
}
}
RdfTerm::BlankNode(id) => format!("_:{}", id),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_insert_and_query() {
let store = TripleStore::new();
let triple = Triple::new(
RdfTerm::iri("http://example.org/person/1"),
Iri::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
RdfTerm::iri("http://example.org/Person"),
);
let id = store.insert(triple.clone());
assert!(id > 0);
let retrieved = store.get(id);
assert!(retrieved.is_some());
assert_eq!(retrieved.unwrap(), triple);
}
#[test]
fn test_query_by_subject() {
let store = TripleStore::new();
let subject = RdfTerm::iri("http://example.org/person/1");
store.insert(Triple::new(
subject.clone(),
Iri::rdf_type(),
RdfTerm::iri("http://example.org/Person"),
));
store.insert(Triple::new(
subject.clone(),
Iri::rdfs_label(),
RdfTerm::literal("Alice"),
));
store.insert(Triple::new(
RdfTerm::iri("http://example.org/person/2"),
Iri::rdf_type(),
RdfTerm::iri("http://example.org/Person"),
));
let results = store.query(Some(&subject), None, None);
assert_eq!(results.len(), 2);
}
#[test]
fn test_statistics() {
let store = TripleStore::new();
store.insert(Triple::new(
RdfTerm::iri("http://example.org/s1"),
Iri::new("http://example.org/p1"),
RdfTerm::literal("o1"),
));
store.insert(Triple::new(
RdfTerm::iri("http://example.org/s2"),
Iri::new("http://example.org/p1"),
RdfTerm::literal("o2"),
));
let stats = store.stats();
assert_eq!(stats.triple_count, 2);
assert_eq!(stats.subject_count, 2);
assert_eq!(stats.predicate_count, 1);
assert_eq!(stats.object_count, 2);
}
}

View File

@@ -0,0 +1,220 @@
// AST types for SQL statements
use serde::{Deserialize, Serialize};
/// SQL statement types
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum SqlStatement {
/// CREATE TABLE name (columns)
CreateTable { name: String, columns: Vec<Column> },
/// INSERT INTO table (columns) VALUES (values)
Insert {
table: String,
columns: Vec<String>,
values: Vec<Value>,
},
/// SELECT columns FROM table WHERE condition ORDER BY ... LIMIT k
Select {
columns: Vec<SelectColumn>,
from: String,
where_clause: Option<Expression>,
order_by: Option<OrderBy>,
limit: Option<usize>,
},
/// DROP TABLE name
Drop { table: String },
}
/// Column definition for CREATE TABLE
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Column {
pub name: String,
pub data_type: DataType,
}
/// Data types supported in SQL
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum DataType {
/// TEXT type for strings
Text,
/// INTEGER type
Integer,
/// REAL/FLOAT type
Real,
/// VECTOR(dimensions) type for vector data
Vector(usize),
}
/// Column selector in SELECT
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum SelectColumn {
/// SELECT *
Wildcard,
/// SELECT column_name
Name(String),
/// SELECT expression AS alias
Expression {
expr: Expression,
alias: Option<String>,
},
}
/// SQL expressions
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum Expression {
/// Column reference
Column(String),
/// Literal value
Literal(Value),
/// Binary operation (e.g., a = b, a > b)
BinaryOp {
left: Box<Expression>,
op: BinaryOperator,
right: Box<Expression>,
},
/// Logical AND
And(Box<Expression>, Box<Expression>),
/// Logical OR
Or(Box<Expression>, Box<Expression>),
/// NOT expression
Not(Box<Expression>),
/// Function call
Function { name: String, args: Vec<Expression> },
/// Vector literal [1.0, 2.0, 3.0]
VectorLiteral(Vec<f32>),
/// Distance operation: column <-> vector
/// Used for ORDER BY embedding <-> $vector
Distance {
column: String,
metric: DistanceMetric,
vector: Vec<f32>,
},
}
/// Binary operators
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum BinaryOperator {
/// =
Eq,
/// !=
NotEq,
/// >
Gt,
/// >=
GtEq,
/// <
Lt,
/// <=
LtEq,
/// LIKE
Like,
}
/// Distance metrics for vector similarity
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum DistanceMetric {
/// L2 distance: <->
L2,
/// Cosine distance: <=>
Cosine,
/// Dot product: <#>
DotProduct,
}
/// ORDER BY clause
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct OrderBy {
pub expression: Expression,
pub direction: OrderDirection,
}
/// Sort direction
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum OrderDirection {
Asc,
Desc,
}
/// SQL values
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum Value {
Null,
Text(String),
Integer(i64),
Real(f64),
Vector(Vec<f32>),
Boolean(bool),
}
impl Value {
/// Convert to JSON value for metadata storage
pub fn to_json(&self) -> serde_json::Value {
match self {
Value::Null => serde_json::Value::Null,
Value::Text(s) => serde_json::Value::String(s.clone()),
Value::Integer(i) => serde_json::Value::Number((*i).into()),
Value::Real(f) => {
serde_json::Value::Number(serde_json::Number::from_f64(*f).unwrap_or(0.into()))
}
Value::Vector(v) => serde_json::Value::Array(
v.iter()
.map(|f| {
serde_json::Value::Number(
serde_json::Number::from_f64(*f as f64).unwrap_or(0.into()),
)
})
.collect(),
),
Value::Boolean(b) => serde_json::Value::Bool(*b),
}
}
/// Parse from JSON value
pub fn from_json(json: &serde_json::Value) -> Self {
match json {
serde_json::Value::Null => Value::Null,
serde_json::Value::Bool(b) => Value::Boolean(*b),
serde_json::Value::Number(n) => {
if let Some(i) = n.as_i64() {
Value::Integer(i)
} else if let Some(f) = n.as_f64() {
Value::Real(f)
} else {
Value::Null
}
}
serde_json::Value::String(s) => Value::Text(s.clone()),
serde_json::Value::Array(arr) => {
// Try to parse as vector
let floats: Option<Vec<f32>> =
arr.iter().map(|v| v.as_f64().map(|f| f as f32)).collect();
if let Some(vec) = floats {
Value::Vector(vec)
} else {
Value::Null
}
}
serde_json::Value::Object(_) => Value::Null,
}
}
}
impl std::fmt::Display for Value {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Value::Null => write!(f, "NULL"),
Value::Text(s) => write!(f, "'{}'", s),
Value::Integer(i) => write!(f, "{}", i),
Value::Real(r) => write!(f, "{}", r),
Value::Vector(v) => write!(
f,
"[{}]",
v.iter()
.map(|x| x.to_string())
.collect::<Vec<_>>()
.join(", ")
),
Value::Boolean(b) => write!(f, "{}", b),
}
}
}

View File

@@ -0,0 +1,561 @@
// SQL executor that integrates with ruvector-core VectorDB
use super::ast::*;
use crate::{ErrorKind, RvLiteError};
use parking_lot::RwLock;
use ruvector_core::{SearchQuery, VectorDB, VectorEntry};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
/// Table schema definition
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TableSchema {
pub name: String,
pub columns: Vec<Column>,
pub vector_column: Option<String>,
pub vector_dimensions: Option<usize>,
}
impl TableSchema {
/// Find the vector column in the schema
fn find_vector_column(&self) -> Option<(String, usize)> {
for col in &self.columns {
if let DataType::Vector(dims) = col.data_type {
return Some((col.name.clone(), dims));
}
}
None
}
/// Validate that columns match the schema
fn validate_columns(&self, columns: &[String]) -> Result<(), RvLiteError> {
for col in columns {
if !self.columns.iter().any(|c| &c.name == col) {
return Err(RvLiteError {
message: format!("Column '{}' not found in table '{}'", col, self.name),
kind: ErrorKind::SqlError,
});
}
}
Ok(())
}
/// Get column data type
fn get_column_type(&self, name: &str) -> Option<&DataType> {
self.columns
.iter()
.find(|c| c.name == name)
.map(|c| &c.data_type)
}
}
/// SQL execution result
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExecutionResult {
pub rows: Vec<HashMap<String, Value>>,
pub rows_affected: usize,
}
/// SQL Engine that manages tables and executes queries
pub struct SqlEngine {
/// Table schemas
schemas: RwLock<HashMap<String, TableSchema>>,
/// Vector databases (one per table)
databases: RwLock<HashMap<String, VectorDB>>,
}
impl SqlEngine {
/// Create a new SQL engine
pub fn new() -> Self {
SqlEngine {
schemas: RwLock::new(HashMap::new()),
databases: RwLock::new(HashMap::new()),
}
}
/// Execute a SQL statement
pub fn execute(&self, statement: SqlStatement) -> Result<ExecutionResult, RvLiteError> {
match statement {
SqlStatement::CreateTable { name, columns } => self.create_table(name, columns),
SqlStatement::Insert {
table,
columns,
values,
} => self.insert(table, columns, values),
SqlStatement::Select {
columns,
from,
where_clause,
order_by,
limit,
} => self.select(columns, from, where_clause, order_by, limit),
SqlStatement::Drop { table } => self.drop_table(table),
}
}
fn create_table(
&self,
name: String,
columns: Vec<Column>,
) -> Result<ExecutionResult, RvLiteError> {
let mut schemas = self.schemas.write();
if schemas.contains_key(&name) {
return Err(RvLiteError {
message: format!("Table '{}' already exists", name),
kind: ErrorKind::SqlError,
});
}
// Find vector column
let (vector_column, vector_dimensions) = columns
.iter()
.find_map(|col| {
if let DataType::Vector(dims) = col.data_type {
Some((col.name.clone(), dims))
} else {
None
}
})
.ok_or_else(|| RvLiteError {
message: "Table must have at least one VECTOR column".to_string(),
kind: ErrorKind::SqlError,
})?;
let schema = TableSchema {
name: name.clone(),
columns,
vector_column: Some(vector_column),
vector_dimensions: Some(vector_dimensions),
};
// Create vector database for this table
let db_options = ruvector_core::types::DbOptions {
dimensions: vector_dimensions,
distance_metric: ruvector_core::DistanceMetric::Cosine,
storage_path: "memory://".to_string(),
hnsw_config: None,
quantization: None,
};
let db = VectorDB::new(db_options).map_err(|e| RvLiteError {
message: format!("Failed to create vector database: {}", e),
kind: ErrorKind::VectorError,
})?;
let mut databases = self.databases.write();
databases.insert(name.clone(), db);
schemas.insert(name, schema);
Ok(ExecutionResult {
rows: Vec::new(),
rows_affected: 0,
})
}
fn insert(
&self,
table: String,
columns: Vec<String>,
values: Vec<Value>,
) -> Result<ExecutionResult, RvLiteError> {
let schemas = self.schemas.read();
let schema = schemas.get(&table).ok_or_else(|| RvLiteError {
message: format!("Table '{}' not found", table),
kind: ErrorKind::SqlError,
})?;
// Validate columns
schema.validate_columns(&columns)?;
if columns.len() != values.len() {
return Err(RvLiteError {
message: format!(
"Column count ({}) does not match value count ({})",
columns.len(),
values.len()
),
kind: ErrorKind::SqlError,
});
}
// Extract vector and metadata
let mut vector: Option<Vec<f32>> = None;
let mut metadata = HashMap::new();
let mut id: Option<String> = None;
for (col, val) in columns.iter().zip(values.iter()) {
if let Some(DataType::Vector(_)) = schema.get_column_type(col) {
if let Value::Vector(v) = val {
vector = Some(v.clone());
} else {
return Err(RvLiteError {
message: format!("Expected vector value for column '{}'", col),
kind: ErrorKind::SqlError,
});
}
} else {
// Store as metadata
metadata.insert(col.clone(), val.to_json());
// Use 'id' column as vector ID if present
if col == "id" {
if let Value::Text(s) = val {
id = Some(s.clone());
}
}
}
}
let vector = vector.ok_or_else(|| RvLiteError {
message: "No vector value provided".to_string(),
kind: ErrorKind::SqlError,
})?;
// Validate vector dimensions
if let Some(expected_dims) = schema.vector_dimensions {
if vector.len() != expected_dims {
return Err(RvLiteError {
message: format!(
"Vector dimension mismatch: expected {}, got {}",
expected_dims,
vector.len()
),
kind: ErrorKind::SqlError,
});
}
}
// Insert into vector database
let entry = VectorEntry {
id,
vector,
metadata: Some(metadata),
};
let databases = self.databases.read();
let db = databases.get(&table).ok_or_else(|| RvLiteError {
message: format!("Database for table '{}' not found", table),
kind: ErrorKind::SqlError,
})?;
db.insert(entry).map_err(|e| RvLiteError {
message: format!("Failed to insert: {}", e),
kind: ErrorKind::VectorError,
})?;
Ok(ExecutionResult {
rows: Vec::new(),
rows_affected: 1,
})
}
fn select(
&self,
_columns: Vec<SelectColumn>,
from: String,
where_clause: Option<Expression>,
order_by: Option<OrderBy>,
limit: Option<usize>,
) -> Result<ExecutionResult, RvLiteError> {
let schemas = self.schemas.read();
let schema = schemas.get(&from).ok_or_else(|| RvLiteError {
message: format!("Table '{}' not found", from),
kind: ErrorKind::SqlError,
})?;
let databases = self.databases.read();
let db = databases.get(&from).ok_or_else(|| RvLiteError {
message: format!("Database for table '{}' not found", from),
kind: ErrorKind::SqlError,
})?;
// Handle vector similarity search
if let Some(order_by) = order_by {
if let Expression::Distance {
column: _,
metric: _,
vector,
} = order_by.expression
{
let k = limit.unwrap_or(10);
// Build filter from WHERE clause
let filter = if let Some(where_expr) = where_clause {
Some(self.build_filter(where_expr)?)
} else {
None
};
let query = SearchQuery {
vector,
k,
filter,
ef_search: None,
};
let results = db.search(query).map_err(|e| RvLiteError {
message: format!("Search failed: {}", e),
kind: ErrorKind::VectorError,
})?;
// Convert results to rows
let rows: Vec<HashMap<String, Value>> = results
.into_iter()
.map(|result| {
let mut row = HashMap::new();
// Add vector if present
if let Some(vec_col) = &schema.vector_column {
if let Some(vector) = result.vector {
row.insert(vec_col.clone(), Value::Vector(vector));
}
}
// Add metadata
if let Some(metadata) = result.metadata {
for (key, val) in metadata {
row.insert(key, Value::from_json(&val));
}
}
// Add distance score
row.insert("_distance".to_string(), Value::Real(result.score as f64));
row
})
.collect();
return Ok(ExecutionResult {
rows,
rows_affected: 0,
});
}
}
// Non-vector query - return all rows (scan all vectors)
// This is essentially a table scan through the vector database
let k = limit.unwrap_or(1000); // Default to 1000 rows max
// Create a zero vector for exhaustive search
let dims = schema.vector_dimensions.unwrap_or(3);
let query_vector = vec![0.0f32; dims];
// Build filter from WHERE clause
let filter = if let Some(where_expr) = where_clause {
Some(self.build_filter(where_expr)?)
} else {
None
};
let query = SearchQuery {
vector: query_vector,
k,
filter,
ef_search: None,
};
let results = db.search(query).map_err(|e| RvLiteError {
message: format!("Search failed: {}", e),
kind: ErrorKind::VectorError,
})?;
// Convert results to rows
let rows: Vec<HashMap<String, Value>> = results
.into_iter()
.map(|result| {
let mut row = HashMap::new();
// Add vector if present
if let Some(vec_col) = &schema.vector_column {
if let Some(vector) = result.vector {
row.insert(vec_col.clone(), Value::Vector(vector));
}
}
// Add metadata
if let Some(metadata) = result.metadata {
for (key, val) in metadata {
row.insert(key, Value::from_json(&val));
}
}
row
})
.collect();
Ok(ExecutionResult {
rows,
rows_affected: 0,
})
}
fn drop_table(&self, table: String) -> Result<ExecutionResult, RvLiteError> {
let mut schemas = self.schemas.write();
let mut databases = self.databases.write();
schemas.remove(&table).ok_or_else(|| RvLiteError {
message: format!("Table '{}' not found", table),
kind: ErrorKind::SqlError,
})?;
databases.remove(&table);
Ok(ExecutionResult {
rows: Vec::new(),
rows_affected: 0,
})
}
/// Build metadata filter from WHERE expression
fn build_filter(
&self,
expr: Expression,
) -> Result<HashMap<String, serde_json::Value>, RvLiteError> {
let mut filter = HashMap::new();
match expr {
Expression::BinaryOp { left, op, right } => {
if let (Expression::Column(col), Expression::Literal(val)) = (*left, *right) {
if op == BinaryOperator::Eq {
filter.insert(col, val.to_json());
} else {
return Err(RvLiteError {
message: "Only equality filters supported in WHERE clause".to_string(),
kind: ErrorKind::NotImplemented,
});
}
}
}
Expression::And(left, right) => {
let left_filter = self.build_filter(*left)?;
let right_filter = self.build_filter(*right)?;
filter.extend(left_filter);
filter.extend(right_filter);
}
_ => {
return Err(RvLiteError {
message: "Unsupported WHERE clause expression".to_string(),
kind: ErrorKind::NotImplemented,
});
}
}
Ok(filter)
}
/// List all tables
pub fn list_tables(&self) -> Vec<String> {
self.schemas.read().keys().cloned().collect()
}
/// Get table schema
pub fn get_schema(&self, table: &str) -> Option<TableSchema> {
self.schemas.read().get(table).cloned()
}
}
impl Default for SqlEngine {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_create_and_insert() {
let engine = SqlEngine::new();
// Create table
let create = SqlStatement::CreateTable {
name: "docs".to_string(),
columns: vec![
Column {
name: "id".to_string(),
data_type: DataType::Text,
},
Column {
name: "content".to_string(),
data_type: DataType::Text,
},
Column {
name: "embedding".to_string(),
data_type: DataType::Vector(3),
},
],
};
engine.execute(create).unwrap();
// Insert row
let insert = SqlStatement::Insert {
table: "docs".to_string(),
columns: vec![
"id".to_string(),
"content".to_string(),
"embedding".to_string(),
],
values: vec![
Value::Text("1".to_string()),
Value::Text("hello".to_string()),
Value::Vector(vec![1.0, 2.0, 3.0]),
],
};
let result = engine.execute(insert).unwrap();
assert_eq!(result.rows_affected, 1);
}
#[test]
fn test_vector_search() {
let engine = SqlEngine::new();
// Create table
let create = SqlStatement::CreateTable {
name: "docs".to_string(),
columns: vec![
Column {
name: "id".to_string(),
data_type: DataType::Text,
},
Column {
name: "embedding".to_string(),
data_type: DataType::Vector(3),
},
],
};
engine.execute(create).unwrap();
// Insert rows
for i in 0..5 {
let insert = SqlStatement::Insert {
table: "docs".to_string(),
columns: vec!["id".to_string(), "embedding".to_string()],
values: vec![
Value::Text(format!("{}", i)),
Value::Vector(vec![i as f32, i as f32 * 2.0, i as f32 * 3.0]),
],
};
engine.execute(insert).unwrap();
}
// Search
let select = SqlStatement::Select {
columns: vec![SelectColumn::Wildcard],
from: "docs".to_string(),
where_clause: None,
order_by: Some(OrderBy {
expression: Expression::Distance {
column: "embedding".to_string(),
metric: DistanceMetric::L2,
vector: vec![2.0, 4.0, 6.0],
},
direction: OrderDirection::Asc,
}),
limit: Some(3),
};
let result = engine.execute(select).unwrap();
assert_eq!(result.rows.len(), 3);
}
}

View File

@@ -0,0 +1,13 @@
// SQL query engine module for rvlite
// Provides SQL interface for vector database operations with WASM compatibility
mod ast;
mod executor;
mod parser;
pub use ast::*;
pub use executor::{ExecutionResult, SqlEngine};
pub use parser::{ParseError, SqlParser};
#[cfg(test)]
mod tests;

View File

@@ -0,0 +1,823 @@
// Hand-rolled SQL parser for WASM compatibility
// Implements recursive descent parsing for vector-specific SQL
use super::ast::*;
use std::fmt;
/// Parse error type
#[derive(Debug, Clone, PartialEq)]
pub struct ParseError {
pub message: String,
pub position: usize,
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"Parse error at position {}: {}",
self.position, self.message
)
}
}
impl std::error::Error for ParseError {}
/// Token types
#[derive(Debug, Clone, PartialEq)]
enum Token {
// Keywords
Select,
From,
Where,
Insert,
Into,
Values,
Create,
Table,
Drop,
OrderBy,
Limit,
And,
Or,
Not,
As,
// Data types
Text,
Integer,
Real,
Vector,
// Operators
Eq,
NotEq,
Gt,
GtEq,
Lt,
LtEq,
Like,
// Distance operators
L2Distance, // <->
CosineDistance, // <=>
DotProduct, // <#>
// Delimiters
LeftParen,
RightParen,
LeftBracket,
RightBracket,
Comma,
Semicolon,
Asterisk,
// Values
Identifier(String),
StringLiteral(String),
NumberLiteral(String),
// End
Eof,
}
/// Tokenizer (lexer)
struct Tokenizer {
input: Vec<char>,
position: usize,
}
impl Tokenizer {
fn new(input: &str) -> Self {
Tokenizer {
input: input.chars().collect(),
position: 0,
}
}
fn current(&self) -> Option<char> {
self.input.get(self.position).copied()
}
fn advance(&mut self) {
self.position += 1;
}
fn skip_whitespace(&mut self) {
while let Some(ch) = self.current() {
if ch.is_whitespace() {
self.advance();
} else {
break;
}
}
}
fn read_identifier(&mut self) -> String {
let mut result = String::new();
while let Some(ch) = self.current() {
if ch.is_alphanumeric() || ch == '_' {
result.push(ch);
self.advance();
} else {
break;
}
}
result
}
fn read_string(&mut self) -> Result<String, ParseError> {
let mut result = String::new();
self.advance(); // Skip opening quote
while let Some(ch) = self.current() {
if ch == '\'' {
self.advance();
return Ok(result);
} else {
result.push(ch);
self.advance();
}
}
Err(ParseError {
message: "Unterminated string literal".to_string(),
position: self.position,
})
}
fn read_number(&mut self) -> String {
let mut result = String::new();
let mut has_dot = false;
while let Some(ch) = self.current() {
if ch.is_numeric() {
result.push(ch);
self.advance();
} else if ch == '.' && !has_dot {
has_dot = true;
result.push(ch);
self.advance();
} else {
break;
}
}
result
}
fn next_token(&mut self) -> Result<Token, ParseError> {
self.skip_whitespace();
let ch = match self.current() {
Some(c) => c,
None => return Ok(Token::Eof),
};
match ch {
'(' => {
self.advance();
Ok(Token::LeftParen)
}
')' => {
self.advance();
Ok(Token::RightParen)
}
'[' => {
self.advance();
Ok(Token::LeftBracket)
}
']' => {
self.advance();
Ok(Token::RightBracket)
}
',' => {
self.advance();
Ok(Token::Comma)
}
';' => {
self.advance();
Ok(Token::Semicolon)
}
'*' => {
self.advance();
Ok(Token::Asterisk)
}
'=' => {
self.advance();
Ok(Token::Eq)
}
'!' => {
self.advance();
if self.current() == Some('=') {
self.advance();
Ok(Token::NotEq)
} else {
Err(ParseError {
message: "Expected '=' after '!'".to_string(),
position: self.position,
})
}
}
'>' => {
self.advance();
if self.current() == Some('=') {
self.advance();
Ok(Token::GtEq)
} else {
Ok(Token::Gt)
}
}
'<' => {
self.advance();
match self.current() {
Some('=') => {
self.advance();
if self.current() == Some('>') {
self.advance();
Ok(Token::CosineDistance)
} else {
Ok(Token::LtEq)
}
}
Some('-') => {
self.advance();
if self.current() == Some('>') {
self.advance();
Ok(Token::L2Distance)
} else {
Err(ParseError {
message: "Expected '>' after '<-'".to_string(),
position: self.position,
})
}
}
Some('#') => {
self.advance();
if self.current() == Some('>') {
self.advance();
Ok(Token::DotProduct)
} else {
Err(ParseError {
message: "Expected '>' after '<#'".to_string(),
position: self.position,
})
}
}
_ => Ok(Token::Lt),
}
}
'\'' => Ok(Token::StringLiteral(self.read_string()?)),
_ if ch.is_numeric() => Ok(Token::NumberLiteral(self.read_number())),
_ if ch.is_alphabetic() || ch == '_' => {
let ident = self.read_identifier();
Ok(match ident.to_uppercase().as_str() {
"SELECT" => Token::Select,
"FROM" => Token::From,
"WHERE" => Token::Where,
"INSERT" => Token::Insert,
"INTO" => Token::Into,
"VALUES" => Token::Values,
"CREATE" => Token::Create,
"TABLE" => Token::Table,
"DROP" => Token::Drop,
"ORDER" => {
self.skip_whitespace();
if self.read_identifier().to_uppercase() == "BY" {
Token::OrderBy
} else {
Token::Identifier(ident)
}
}
"LIMIT" => Token::Limit,
"AND" => Token::And,
"OR" => Token::Or,
"NOT" => Token::Not,
"AS" => Token::As,
"TEXT" => Token::Text,
"INTEGER" => Token::Integer,
"REAL" => Token::Real,
"VECTOR" => Token::Vector,
"LIKE" => Token::Like,
_ => Token::Identifier(ident),
})
}
_ => Err(ParseError {
message: format!("Unexpected character: {}", ch),
position: self.position,
}),
}
}
}
/// SQL Parser
pub struct SqlParser {
tokens: Vec<Token>,
position: usize,
}
impl SqlParser {
/// Create a new parser from SQL string
pub fn new(input: &str) -> Result<Self, ParseError> {
let mut tokenizer = Tokenizer::new(input);
let mut tokens = Vec::new();
loop {
let token = tokenizer.next_token()?;
if token == Token::Eof {
tokens.push(token);
break;
}
tokens.push(token);
}
Ok(SqlParser {
tokens,
position: 0,
})
}
/// Parse SQL statement
pub fn parse(&mut self) -> Result<SqlStatement, ParseError> {
let token = self.current().clone();
match token {
Token::Select => self.parse_select(),
Token::Insert => self.parse_insert(),
Token::Create => self.parse_create(),
Token::Drop => self.parse_drop(),
_ => Err(ParseError {
message: format!("Expected SELECT, INSERT, CREATE, or DROP, got {:?}", token),
position: self.position,
}),
}
}
fn current(&self) -> &Token {
self.tokens.get(self.position).unwrap_or(&Token::Eof)
}
fn advance(&mut self) {
if self.position < self.tokens.len() {
self.position += 1;
}
}
fn expect(&mut self, expected: Token) -> Result<(), ParseError> {
let current = self.current().clone();
if current == expected {
self.advance();
Ok(())
} else {
Err(ParseError {
message: format!("Expected {:?}, got {:?}", expected, current),
position: self.position,
})
}
}
fn parse_select(&mut self) -> Result<SqlStatement, ParseError> {
self.expect(Token::Select)?;
let columns = self.parse_select_columns()?;
self.expect(Token::From)?;
let from = self.parse_identifier()?;
let where_clause = if matches!(self.current(), Token::Where) {
self.advance();
Some(self.parse_expression()?)
} else {
None
};
let order_by = if matches!(self.current(), Token::OrderBy) {
self.advance();
Some(self.parse_order_by()?)
} else {
None
};
let limit = if matches!(self.current(), Token::Limit) {
self.advance();
Some(self.parse_number()? as usize)
} else {
None
};
Ok(SqlStatement::Select {
columns,
from,
where_clause,
order_by,
limit,
})
}
fn parse_select_columns(&mut self) -> Result<Vec<SelectColumn>, ParseError> {
if matches!(self.current(), Token::Asterisk) {
self.advance();
return Ok(vec![SelectColumn::Wildcard]);
}
let mut columns = Vec::new();
loop {
let name = self.parse_identifier()?;
columns.push(SelectColumn::Name(name));
if !matches!(self.current(), Token::Comma) {
break;
}
self.advance();
}
Ok(columns)
}
fn parse_insert(&mut self) -> Result<SqlStatement, ParseError> {
self.expect(Token::Insert)?;
self.expect(Token::Into)?;
let table = self.parse_identifier()?;
self.expect(Token::LeftParen)?;
let columns = self.parse_identifier_list()?;
self.expect(Token::RightParen)?;
self.expect(Token::Values)?;
self.expect(Token::LeftParen)?;
let values = self.parse_value_list()?;
self.expect(Token::RightParen)?;
Ok(SqlStatement::Insert {
table,
columns,
values,
})
}
fn parse_create(&mut self) -> Result<SqlStatement, ParseError> {
self.expect(Token::Create)?;
self.expect(Token::Table)?;
let name = self.parse_identifier()?;
self.expect(Token::LeftParen)?;
let columns = self.parse_column_definitions()?;
self.expect(Token::RightParen)?;
Ok(SqlStatement::CreateTable { name, columns })
}
fn parse_drop(&mut self) -> Result<SqlStatement, ParseError> {
self.expect(Token::Drop)?;
self.expect(Token::Table)?;
let table = self.parse_identifier()?;
Ok(SqlStatement::Drop { table })
}
fn parse_column_definitions(&mut self) -> Result<Vec<Column>, ParseError> {
let mut columns = Vec::new();
loop {
let name = self.parse_identifier()?;
let data_type = self.parse_data_type()?;
columns.push(Column { name, data_type });
if !matches!(self.current(), Token::Comma) {
break;
}
self.advance();
}
Ok(columns)
}
fn parse_data_type(&mut self) -> Result<DataType, ParseError> {
match self.current().clone() {
Token::Text => {
self.advance();
Ok(DataType::Text)
}
Token::Integer => {
self.advance();
Ok(DataType::Integer)
}
Token::Real => {
self.advance();
Ok(DataType::Real)
}
Token::Vector => {
self.advance();
self.expect(Token::LeftParen)?;
let dims = self.parse_number()? as usize;
self.expect(Token::RightParen)?;
Ok(DataType::Vector(dims))
}
_ => Err(ParseError {
message: "Expected data type (TEXT, INTEGER, REAL, or VECTOR)".to_string(),
position: self.position,
}),
}
}
fn parse_expression(&mut self) -> Result<Expression, ParseError> {
self.parse_or_expression()
}
fn parse_or_expression(&mut self) -> Result<Expression, ParseError> {
let mut left = self.parse_and_expression()?;
while matches!(self.current(), Token::Or) {
self.advance();
let right = self.parse_and_expression()?;
left = Expression::Or(Box::new(left), Box::new(right));
}
Ok(left)
}
fn parse_and_expression(&mut self) -> Result<Expression, ParseError> {
let mut left = self.parse_comparison_expression()?;
while matches!(self.current(), Token::And) {
self.advance();
let right = self.parse_comparison_expression()?;
left = Expression::And(Box::new(left), Box::new(right));
}
Ok(left)
}
fn parse_comparison_expression(&mut self) -> Result<Expression, ParseError> {
let left = self.parse_primary_expression()?;
let op = match self.current() {
Token::Eq => BinaryOperator::Eq,
Token::NotEq => BinaryOperator::NotEq,
Token::Gt => BinaryOperator::Gt,
Token::GtEq => BinaryOperator::GtEq,
Token::Lt => BinaryOperator::Lt,
Token::LtEq => BinaryOperator::LtEq,
Token::Like => BinaryOperator::Like,
_ => return Ok(left),
};
self.advance();
let right = self.parse_primary_expression()?;
Ok(Expression::BinaryOp {
left: Box::new(left),
op,
right: Box::new(right),
})
}
fn parse_primary_expression(&mut self) -> Result<Expression, ParseError> {
match self.current().clone() {
Token::Identifier(name) => {
self.advance();
Ok(Expression::Column(name))
}
Token::StringLiteral(s) => {
self.advance();
Ok(Expression::Literal(Value::Text(s)))
}
Token::NumberLiteral(n) => {
self.advance();
let value = if n.contains('.') {
Value::Real(n.parse().unwrap())
} else {
Value::Integer(n.parse().unwrap())
};
Ok(Expression::Literal(value))
}
Token::LeftBracket => {
self.advance();
let vec = self.parse_vector_literal()?;
self.expect(Token::RightBracket)?;
Ok(Expression::VectorLiteral(vec))
}
Token::Not => {
self.advance();
let expr = self.parse_primary_expression()?;
Ok(Expression::Not(Box::new(expr)))
}
_ => Err(ParseError {
message: format!("Unexpected token in expression: {:?}", self.current()),
position: self.position,
}),
}
}
fn parse_order_by(&mut self) -> Result<OrderBy, ParseError> {
// Parse column <-> vector or column <=> vector
let column = self.parse_identifier()?;
let metric = match self.current() {
Token::L2Distance => {
self.advance();
DistanceMetric::L2
}
Token::CosineDistance => {
self.advance();
DistanceMetric::Cosine
}
Token::DotProduct => {
self.advance();
DistanceMetric::DotProduct
}
_ => {
return Err(ParseError {
message: "Expected distance operator (<->, <=>, or <#>)".to_string(),
position: self.position,
});
}
};
let vector = if matches!(self.current(), Token::LeftBracket) {
self.advance();
let vec = self.parse_vector_literal()?;
self.expect(Token::RightBracket)?;
vec
} else {
return Err(ParseError {
message: "Expected vector literal after distance operator".to_string(),
position: self.position,
});
};
Ok(OrderBy {
expression: Expression::Distance {
column,
metric,
vector,
},
direction: OrderDirection::Asc,
})
}
fn parse_identifier(&mut self) -> Result<String, ParseError> {
match self.current().clone() {
Token::Identifier(name) => {
self.advance();
Ok(name)
}
_ => Err(ParseError {
message: "Expected identifier".to_string(),
position: self.position,
}),
}
}
fn parse_identifier_list(&mut self) -> Result<Vec<String>, ParseError> {
let mut identifiers = Vec::new();
loop {
identifiers.push(self.parse_identifier()?);
if !matches!(self.current(), Token::Comma) {
break;
}
self.advance();
}
Ok(identifiers)
}
fn parse_value_list(&mut self) -> Result<Vec<Value>, ParseError> {
let mut values = Vec::new();
loop {
values.push(self.parse_value()?);
if !matches!(self.current(), Token::Comma) {
break;
}
self.advance();
}
Ok(values)
}
fn parse_value(&mut self) -> Result<Value, ParseError> {
match self.current().clone() {
Token::StringLiteral(s) => {
self.advance();
Ok(Value::Text(s))
}
Token::NumberLiteral(n) => {
self.advance();
if n.contains('.') {
Ok(Value::Real(n.parse().unwrap()))
} else {
Ok(Value::Integer(n.parse().unwrap()))
}
}
Token::LeftBracket => {
self.advance();
let vec = self.parse_vector_literal()?;
self.expect(Token::RightBracket)?;
Ok(Value::Vector(vec))
}
_ => Err(ParseError {
message: format!("Expected value, got {:?}", self.current()),
position: self.position,
}),
}
}
fn parse_vector_literal(&mut self) -> Result<Vec<f32>, ParseError> {
let mut values = Vec::new();
loop {
let n = self.parse_number()?;
values.push(n as f32);
if !matches!(self.current(), Token::Comma) {
break;
}
self.advance();
}
Ok(values)
}
fn parse_number(&mut self) -> Result<f64, ParseError> {
match self.current().clone() {
Token::NumberLiteral(n) => {
self.advance();
n.parse().map_err(|_| ParseError {
message: format!("Invalid number: {}", n),
position: self.position,
})
}
_ => Err(ParseError {
message: "Expected number".to_string(),
position: self.position,
}),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_create_table() {
let sql = "CREATE TABLE documents (id TEXT, content TEXT, embedding VECTOR(384))";
let mut parser = SqlParser::new(sql).unwrap();
let stmt = parser.parse().unwrap();
match stmt {
SqlStatement::CreateTable { name, columns } => {
assert_eq!(name, "documents");
assert_eq!(columns.len(), 3);
assert_eq!(columns[2].data_type, DataType::Vector(384));
}
_ => panic!("Expected CreateTable"),
}
}
#[test]
fn test_parse_insert() {
let sql =
"INSERT INTO documents (id, content, embedding) VALUES ('1', 'hello', [1.0, 2.0, 3.0])";
let mut parser = SqlParser::new(sql).unwrap();
let stmt = parser.parse().unwrap();
match stmt {
SqlStatement::Insert {
table,
columns,
values,
} => {
assert_eq!(table, "documents");
assert_eq!(columns.len(), 3);
assert_eq!(values.len(), 3);
}
_ => panic!("Expected Insert"),
}
}
#[test]
fn test_parse_select_with_vector_search() {
let sql = "SELECT * FROM documents ORDER BY embedding <-> [1.0, 2.0, 3.0] LIMIT 5";
let mut parser = SqlParser::new(sql).unwrap();
let stmt = parser.parse().unwrap();
match stmt {
SqlStatement::Select {
order_by, limit, ..
} => {
assert!(order_by.is_some());
assert_eq!(limit, Some(5));
}
_ => panic!("Expected Select"),
}
}
}

View File

@@ -0,0 +1,147 @@
// Integration tests for SQL engine
#[cfg(test)]
mod tests {
use crate::sql::{SqlEngine, SqlParser};
#[test]
fn test_full_workflow() {
let engine = SqlEngine::new();
// Create table
let create_sql = "CREATE TABLE documents (id TEXT, content TEXT, embedding VECTOR(384))";
let mut parser = SqlParser::new(create_sql).unwrap();
let stmt = parser.parse().unwrap();
engine.execute(stmt).unwrap();
// Insert data
let insert_sql = "INSERT INTO documents (id, content, embedding) VALUES ('doc1', 'hello world', [1.0, 2.0, 3.0])";
let mut parser = SqlParser::new(insert_sql).unwrap();
let stmt = parser.parse().unwrap();
// This will fail due to dimension mismatch (3 vs 384), but tests the flow
let result = engine.execute(stmt);
assert!(result.is_err()); // Expected error due to dimension mismatch
}
#[test]
fn test_vector_similarity_search() {
let engine = SqlEngine::new();
// Create table with small dimensions for testing
let create_sql = "CREATE TABLE docs (id TEXT, embedding VECTOR(3))";
let mut parser = SqlParser::new(create_sql).unwrap();
let stmt = parser.parse().unwrap();
engine.execute(stmt).unwrap();
// Insert test data
for i in 0..10 {
let insert_sql = format!(
"INSERT INTO docs (id, embedding) VALUES ('doc{}', [{}, {}, {}])",
i,
i,
i * 2,
i * 3
);
let mut parser = SqlParser::new(&insert_sql).unwrap();
let stmt = parser.parse().unwrap();
engine.execute(stmt).unwrap();
}
// Search for similar vectors
let search_sql = "SELECT * FROM docs ORDER BY embedding <-> [5.0, 10.0, 15.0] LIMIT 3";
let mut parser = SqlParser::new(search_sql).unwrap();
let stmt = parser.parse().unwrap();
let result = engine.execute(stmt).unwrap();
assert_eq!(result.rows.len(), 3);
// The closest vector should be [5, 10, 15]
assert!(result.rows[0].get("id").is_some());
}
#[test]
fn test_metadata_filtering() {
let engine = SqlEngine::new();
// Create table
let create_sql = "CREATE TABLE docs (id TEXT, category TEXT, embedding VECTOR(3))";
let mut parser = SqlParser::new(create_sql).unwrap();
let stmt = parser.parse().unwrap();
engine.execute(stmt).unwrap();
// Insert data with categories
let categories = vec!["tech", "sports", "tech", "news", "sports"];
for (i, cat) in categories.iter().enumerate() {
let insert_sql =
format!(
"INSERT INTO docs (id, category, embedding) VALUES ('doc{}', '{}', [{}, {}, {}])",
i, cat, i, i * 2, i * 3
);
let mut parser = SqlParser::new(&insert_sql).unwrap();
let stmt = parser.parse().unwrap();
engine.execute(stmt).unwrap();
}
// Search with filter
let search_sql = "SELECT * FROM docs WHERE category = 'tech' ORDER BY embedding <-> [2.0, 4.0, 6.0] LIMIT 2";
let mut parser = SqlParser::new(search_sql).unwrap();
let stmt = parser.parse().unwrap();
let result = engine.execute(stmt).unwrap();
// VectorDB filtering may not be fully precise, so we check for at least 1 result
assert!(result.rows.len() >= 1);
assert!(result.rows.len() <= 2);
// All results should have category = 'tech'
for row in &result.rows {
if let Some(category) = row.get("category") {
assert_eq!(category.to_string(), "'tech'");
}
}
}
#[test]
fn test_drop_table() {
let engine = SqlEngine::new();
// Create table
let create_sql = "CREATE TABLE temp (id TEXT, embedding VECTOR(3))";
let mut parser = SqlParser::new(create_sql).unwrap();
let stmt = parser.parse().unwrap();
engine.execute(stmt).unwrap();
assert_eq!(engine.list_tables().len(), 1);
// Drop table
let drop_sql = "DROP TABLE temp";
let mut parser = SqlParser::new(drop_sql).unwrap();
let stmt = parser.parse().unwrap();
engine.execute(stmt).unwrap();
assert_eq!(engine.list_tables().len(), 0);
}
#[test]
fn test_cosine_distance() {
let engine = SqlEngine::new();
let create_sql = "CREATE TABLE docs (id TEXT, embedding VECTOR(3))";
let mut parser = SqlParser::new(create_sql).unwrap();
engine.execute(parser.parse().unwrap()).unwrap();
// Insert normalized vectors for cosine similarity
let insert_sql = "INSERT INTO docs (id, embedding) VALUES ('doc1', [1.0, 0.0, 0.0])";
let mut parser = SqlParser::new(insert_sql).unwrap();
engine.execute(parser.parse().unwrap()).unwrap();
let insert_sql = "INSERT INTO docs (id, embedding) VALUES ('doc2', [0.0, 1.0, 0.0])";
let mut parser = SqlParser::new(insert_sql).unwrap();
engine.execute(parser.parse().unwrap()).unwrap();
// Search using cosine distance
let search_sql = "SELECT * FROM docs ORDER BY embedding <=> [0.9, 0.1, 0.0] LIMIT 1";
let mut parser = SqlParser::new(search_sql).unwrap();
let result = engine.execute(parser.parse().unwrap()).unwrap();
assert_eq!(result.rows.len(), 1);
// Should return doc1 as it's more similar to [0.9, 0.1, 0.0]
}
}

View File

@@ -0,0 +1,429 @@
//! Epoch-based reconciliation for hybrid RVF + IndexedDB persistence.
//!
//! RVF is the source of truth for vectors. IndexedDB is a rebuildable
//! cache for metadata. Both stores share a monotonic epoch counter.
//!
//! Write order:
//! 1. Write vectors to RVF (append-only, crash-safe)
//! 2. Write metadata to IndexedDB
//! 3. Commit shared epoch in both stores
//!
//! On startup: compare epochs and rebuild the lagging side.
use std::sync::atomic::{AtomicU64, Ordering};
/// Monotonic epoch counter shared between RVF and metadata stores.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct Epoch(pub u64);
impl Epoch {
pub const ZERO: Self = Self(0);
pub fn next(self) -> Self {
Self(self.0.checked_add(1).expect("epoch overflow"))
}
pub fn value(self) -> u64 {
self.0
}
}
/// State describing the relationship between RVF and metadata epochs.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum EpochState {
/// Both stores agree on the current epoch.
Synchronized,
/// RVF store is ahead of metadata by the given delta.
RvfAhead(u64),
/// Metadata store is ahead of RVF by the given delta (anomalous).
MetadataAhead(u64),
}
/// Action to take after comparing epochs.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ReconcileAction {
/// No reconciliation needed -- both stores are in sync.
None,
/// Metadata is stale; rebuild it from the authoritative RVF store.
RebuildMetadata,
/// RVF is somehow behind metadata; rebuild vectors from RVF file.
/// This should not normally happen and indicates a prior incomplete write.
RebuildFromRvf,
/// Metadata is ahead which should never happen under correct operation.
/// Log a warning and trust RVF as the source of truth.
LogWarningTrustRvf,
}
/// Result of comparing epochs between RVF and metadata stores.
///
/// Kept for backward compatibility with existing callers.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ReconciliationAction {
/// Both stores are in sync -- no action needed.
InSync,
/// RVF is ahead -- rebuild metadata from RVF vectors.
RebuildMetadata {
rvf_epoch: Epoch,
metadata_epoch: Epoch,
},
/// Metadata is ahead (should not happen) -- log warning, trust RVF.
TrustRvf {
rvf_epoch: Epoch,
metadata_epoch: Epoch,
},
}
/// Compare raw epoch values and return the relationship state.
pub fn compare_epochs(rvf_epoch: u64, metadata_epoch: u64) -> EpochState {
if rvf_epoch == metadata_epoch {
EpochState::Synchronized
} else if rvf_epoch > metadata_epoch {
EpochState::RvfAhead(rvf_epoch - metadata_epoch)
} else {
EpochState::MetadataAhead(metadata_epoch - rvf_epoch)
}
}
/// Determine the reconciliation action for a given epoch state.
pub fn reconcile_action(state: &EpochState) -> ReconcileAction {
match state {
EpochState::Synchronized => ReconcileAction::None,
EpochState::RvfAhead(delta) => {
if *delta == 1 {
// Common case: a single write committed to RVF but metadata
// update was lost (e.g. crash between step 1 and step 2).
ReconcileAction::RebuildMetadata
} else {
// Multiple epochs behind -- still rebuild metadata, but the
// gap is larger so more data must be replayed.
ReconcileAction::RebuildMetadata
}
}
EpochState::MetadataAhead(delta) => {
if *delta == 1 {
// Metadata committed but RVF write was lost. This means the
// RVF file is still valid at its own epoch -- rebuild from it.
ReconcileAction::RebuildFromRvf
} else {
// Large gap with metadata ahead is anomalous. Trust RVF.
ReconcileAction::LogWarningTrustRvf
}
}
}
}
/// Compare epochs and determine reconciliation action (legacy API).
pub fn reconcile(rvf_epoch: Epoch, metadata_epoch: Epoch) -> ReconciliationAction {
match rvf_epoch.cmp(&metadata_epoch) {
std::cmp::Ordering::Equal => ReconciliationAction::InSync,
std::cmp::Ordering::Greater => ReconciliationAction::RebuildMetadata {
rvf_epoch,
metadata_epoch,
},
std::cmp::Ordering::Less => ReconciliationAction::TrustRvf {
rvf_epoch,
metadata_epoch,
},
}
}
/// Thread-safe monotonic epoch tracker.
///
/// Uses `AtomicU64` internally so it can be shared across threads without
/// a mutex. The counter is strictly monotonic: it can only move forward.
///
/// # Write protocol
///
/// Callers must follow the three-phase commit:
/// 1. Call `begin_write()` to get the next epoch value.
/// 2. Write vectors to RVF with that epoch.
/// 3. Write metadata to IndexedDB with that epoch.
/// 4. Call `commit(epoch)` to advance the tracker.
///
/// If step 2 or 3 fails, do NOT call `commit` -- the tracker stays at the
/// previous epoch so that the next startup triggers reconciliation.
pub struct EpochTracker {
/// Current committed epoch.
current: AtomicU64,
}
impl EpochTracker {
/// Create a new tracker starting at the given epoch.
pub fn new(initial: u64) -> Self {
Self {
current: AtomicU64::new(initial),
}
}
/// Create a tracker starting at epoch zero.
pub fn zero() -> Self {
Self::new(0)
}
/// Read the current committed epoch.
pub fn current(&self) -> u64 {
self.current.load(Ordering::Acquire)
}
/// Return the next epoch value for a pending write.
///
/// This does NOT advance the tracker. The caller must call `commit`
/// after both RVF and metadata writes succeed.
pub fn begin_write(&self) -> u64 {
self.current
.load(Ordering::Acquire)
.checked_add(1)
.expect("epoch overflow")
}
/// Commit the given epoch, advancing the tracker.
///
/// Returns `true` if the commit succeeded (epoch was exactly current + 1).
/// Returns `false` if the epoch was stale or out of order, which means
/// another writer committed first or the caller passed a wrong value.
pub fn commit(&self, epoch: u64) -> bool {
let expected = epoch.checked_sub(1).unwrap_or(0);
self.current
.compare_exchange(expected, epoch, Ordering::AcqRel, Ordering::Acquire)
.is_ok()
}
/// Force-set the epoch to a specific value.
///
/// Used during recovery/reconciliation when we need to align the
/// tracker with a known-good state read from disk.
pub fn force_set(&self, epoch: u64) {
self.current.store(epoch, Ordering::Release);
}
/// Check the relationship between the RVF epoch stored on disk and the
/// metadata epoch, then return the appropriate reconciliation action.
pub fn check_and_reconcile(&self, rvf_epoch: u64, metadata_epoch: u64) -> ReconcileAction {
let state = compare_epochs(rvf_epoch, metadata_epoch);
let action = reconcile_action(&state);
// After reconciliation, align the tracker to the authoritative epoch.
match &action {
ReconcileAction::None => {
self.force_set(rvf_epoch);
}
ReconcileAction::RebuildMetadata | ReconcileAction::RebuildFromRvf => {
// After rebuild, both sides will match the RVF epoch.
self.force_set(rvf_epoch);
}
ReconcileAction::LogWarningTrustRvf => {
// Trust RVF -- set tracker to RVF epoch.
self.force_set(rvf_epoch);
}
}
action
}
}
impl std::fmt::Debug for EpochTracker {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("EpochTracker")
.field("current", &self.current.load(Ordering::Relaxed))
.finish()
}
}
#[cfg(test)]
mod tests {
use super::*;
// ---- Legacy API tests (preserved) ----
#[test]
fn in_sync() {
let e = Epoch(5);
assert_eq!(reconcile(e, e), ReconciliationAction::InSync);
}
#[test]
fn rvf_ahead_rebuilds_metadata() {
let action = reconcile(Epoch(3), Epoch(2));
assert_eq!(
action,
ReconciliationAction::RebuildMetadata {
rvf_epoch: Epoch(3),
metadata_epoch: Epoch(2),
}
);
}
#[test]
fn metadata_ahead_trusts_rvf() {
let action = reconcile(Epoch(1), Epoch(3));
assert_eq!(
action,
ReconciliationAction::TrustRvf {
rvf_epoch: Epoch(1),
metadata_epoch: Epoch(3),
}
);
}
#[test]
fn epoch_increment() {
assert_eq!(Epoch::ZERO.next(), Epoch(1));
assert_eq!(Epoch(99).next(), Epoch(100));
}
// ---- New epoch state / reconcile tests ----
#[test]
fn compare_epochs_synchronized() {
assert_eq!(compare_epochs(5, 5), EpochState::Synchronized);
assert_eq!(compare_epochs(0, 0), EpochState::Synchronized);
}
#[test]
fn compare_epochs_rvf_ahead() {
assert_eq!(compare_epochs(10, 7), EpochState::RvfAhead(3));
assert_eq!(compare_epochs(1, 0), EpochState::RvfAhead(1));
}
#[test]
fn compare_epochs_metadata_ahead() {
assert_eq!(compare_epochs(3, 8), EpochState::MetadataAhead(5));
assert_eq!(compare_epochs(0, 1), EpochState::MetadataAhead(1));
}
#[test]
fn reconcile_action_none_when_synchronized() {
let state = EpochState::Synchronized;
assert_eq!(reconcile_action(&state), ReconcileAction::None);
}
#[test]
fn reconcile_action_rebuild_metadata_when_rvf_ahead() {
assert_eq!(
reconcile_action(&EpochState::RvfAhead(1)),
ReconcileAction::RebuildMetadata
);
assert_eq!(
reconcile_action(&EpochState::RvfAhead(5)),
ReconcileAction::RebuildMetadata
);
}
#[test]
fn reconcile_action_rebuild_from_rvf_when_metadata_ahead_by_one() {
assert_eq!(
reconcile_action(&EpochState::MetadataAhead(1)),
ReconcileAction::RebuildFromRvf
);
}
#[test]
fn reconcile_action_log_warning_when_metadata_far_ahead() {
assert_eq!(
reconcile_action(&EpochState::MetadataAhead(3)),
ReconcileAction::LogWarningTrustRvf
);
}
// ---- EpochTracker tests ----
#[test]
fn tracker_zero_starts_at_zero() {
let tracker = EpochTracker::zero();
assert_eq!(tracker.current(), 0);
}
#[test]
fn tracker_new_starts_at_initial() {
let tracker = EpochTracker::new(42);
assert_eq!(tracker.current(), 42);
}
#[test]
fn tracker_begin_write_returns_next() {
let tracker = EpochTracker::new(10);
assert_eq!(tracker.begin_write(), 11);
// begin_write is idempotent until commit
assert_eq!(tracker.begin_write(), 11);
}
#[test]
fn tracker_commit_advances_epoch() {
let tracker = EpochTracker::zero();
let next = tracker.begin_write();
assert_eq!(next, 1);
assert!(tracker.commit(next));
assert_eq!(tracker.current(), 1);
let next2 = tracker.begin_write();
assert_eq!(next2, 2);
assert!(tracker.commit(next2));
assert_eq!(tracker.current(), 2);
}
#[test]
fn tracker_commit_rejects_stale_epoch() {
let tracker = EpochTracker::new(5);
// Try to commit epoch 3 which is behind current
assert!(!tracker.commit(3));
assert_eq!(tracker.current(), 5);
}
#[test]
fn tracker_commit_rejects_skip() {
let tracker = EpochTracker::new(5);
// Try to commit epoch 8, skipping 6 and 7
assert!(!tracker.commit(8));
assert_eq!(tracker.current(), 5);
}
#[test]
fn tracker_force_set() {
let tracker = EpochTracker::new(10);
tracker.force_set(100);
assert_eq!(tracker.current(), 100);
// Can also go backward with force_set (recovery scenario)
tracker.force_set(5);
assert_eq!(tracker.current(), 5);
}
#[test]
fn tracker_check_and_reconcile_in_sync() {
let tracker = EpochTracker::zero();
let action = tracker.check_and_reconcile(7, 7);
assert_eq!(action, ReconcileAction::None);
assert_eq!(tracker.current(), 7);
}
#[test]
fn tracker_check_and_reconcile_rvf_ahead() {
let tracker = EpochTracker::zero();
let action = tracker.check_and_reconcile(10, 8);
assert_eq!(action, ReconcileAction::RebuildMetadata);
assert_eq!(tracker.current(), 10);
}
#[test]
fn tracker_check_and_reconcile_metadata_far_ahead() {
let tracker = EpochTracker::zero();
let action = tracker.check_and_reconcile(3, 8);
assert_eq!(action, ReconcileAction::LogWarningTrustRvf);
assert_eq!(tracker.current(), 3);
}
#[test]
fn tracker_debug_format() {
let tracker = EpochTracker::new(42);
let debug = format!("{:?}", tracker);
assert!(debug.contains("EpochTracker"));
assert!(debug.contains("42"));
}
// ---- Thread safety (basic) ----
#[test]
fn tracker_is_send_and_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<EpochTracker>();
}
}

View File

@@ -0,0 +1,296 @@
//! Direct mapping between RVF vector IDs and SQL primary keys.
//!
//! In rvlite the mapping is identity: RVF u64 IDs are the same as SQL
//! primary keys. This zero-cost design avoids an extra lookup table and
//! keeps memory usage minimal.
//!
//! The [`IdMapping`] trait exists for future extensibility -- if a
//! non-identity mapping is ever needed (e.g. hashed IDs, composite keys),
//! a new implementation can be swapped in without changing call sites.
/// Trait for converting between RVF vector IDs and SQL primary keys.
///
/// Implementors define how the two ID spaces relate to each other.
/// The default implementation ([`DirectIdMap`]) uses identity mapping.
pub trait IdMapping {
/// Convert a SQL primary key to an RVF vector ID.
fn to_rvf_id(&self, sql_pk: u64) -> u64;
/// Convert an RVF vector ID back to a SQL primary key.
fn to_sql_pk(&self, rvf_id: u64) -> u64;
/// Validate that every RVF ID in the slice has a corresponding SQL PK
/// in the other slice, and vice versa. Both slices must contain the
/// same set of values (possibly in different order) for the mapping
/// to be considered valid.
fn validate_mapping(&self, rvf_ids: &[u64], sql_pks: &[u64]) -> bool;
}
/// Zero-cost identity mapping where RVF u64 IDs equal SQL primary keys.
///
/// This is the default and recommended mapping for rvlite. Because
/// both ID spaces use `u64`, no conversion is needed and the mapping
/// functions compile down to no-ops.
///
/// # Example
///
/// ```
/// # use rvlite::storage::id_map::{DirectIdMap, IdMapping};
/// let map = DirectIdMap;
/// assert_eq!(map.to_rvf_id(42), 42);
/// assert_eq!(map.to_sql_pk(42), 42);
/// ```
#[derive(Debug, Clone, Copy, Default)]
pub struct DirectIdMap;
impl DirectIdMap {
/// Create a new direct (identity) ID map.
pub fn new() -> Self {
Self
}
/// Convert a SQL primary key to an RVF vector ID (identity).
///
/// This is a free function alternative to the trait method, useful when
/// you know the concrete type and want to avoid dynamic dispatch.
#[inline(always)]
pub fn to_rvf_id(sql_pk: u64) -> u64 {
sql_pk
}
/// Convert an RVF vector ID to a SQL primary key (identity).
#[inline(always)]
pub fn to_sql_pk(rvf_id: u64) -> u64 {
rvf_id
}
/// Validate that the two slices contain the same set of IDs.
///
/// Under identity mapping, `rvf_ids` and `sql_pks` must be equal
/// as sets (same elements, possibly different order).
pub fn validate_mapping(rvf_ids: &[u64], sql_pks: &[u64]) -> bool {
if rvf_ids.len() != sql_pks.len() {
return false;
}
let mut rvf_sorted: Vec<u64> = rvf_ids.to_vec();
let mut sql_sorted: Vec<u64> = sql_pks.to_vec();
rvf_sorted.sort_unstable();
sql_sorted.sort_unstable();
rvf_sorted == sql_sorted
}
}
impl IdMapping for DirectIdMap {
#[inline(always)]
fn to_rvf_id(&self, sql_pk: u64) -> u64 {
sql_pk
}
#[inline(always)]
fn to_sql_pk(&self, rvf_id: u64) -> u64 {
rvf_id
}
fn validate_mapping(&self, rvf_ids: &[u64], sql_pks: &[u64]) -> bool {
DirectIdMap::validate_mapping(rvf_ids, sql_pks)
}
}
/// An offset-based ID mapping where SQL PKs start from a different base.
///
/// Useful when the SQL table uses auto-increment starting at 1 but
/// the RVF store is zero-indexed (or vice versa).
///
/// `rvf_id = sql_pk + offset`
#[derive(Debug, Clone, Copy)]
pub struct OffsetIdMap {
/// Offset added to SQL PK to produce the RVF ID.
/// Can be negative via wrapping arithmetic on u64.
offset: i64,
}
impl OffsetIdMap {
/// Create an offset mapping.
///
/// `offset` is added to SQL PKs to produce RVF IDs.
/// Use a negative offset if RVF IDs are smaller than SQL PKs.
pub fn new(offset: i64) -> Self {
Self { offset }
}
}
impl IdMapping for OffsetIdMap {
#[inline]
fn to_rvf_id(&self, sql_pk: u64) -> u64 {
(sql_pk as i64).wrapping_add(self.offset) as u64
}
#[inline]
fn to_sql_pk(&self, rvf_id: u64) -> u64 {
(rvf_id as i64).wrapping_sub(self.offset) as u64
}
fn validate_mapping(&self, rvf_ids: &[u64], sql_pks: &[u64]) -> bool {
if rvf_ids.len() != sql_pks.len() {
return false;
}
let mut expected: Vec<u64> = sql_pks.iter().map(|&pk| self.to_rvf_id(pk)).collect();
let mut actual: Vec<u64> = rvf_ids.to_vec();
expected.sort_unstable();
actual.sort_unstable();
expected == actual
}
}
#[cfg(test)]
mod tests {
use super::*;
// ---- DirectIdMap tests ----
#[test]
fn direct_to_rvf_id_is_identity() {
assert_eq!(DirectIdMap::to_rvf_id(0), 0);
assert_eq!(DirectIdMap::to_rvf_id(42), 42);
assert_eq!(DirectIdMap::to_rvf_id(u64::MAX), u64::MAX);
}
#[test]
fn direct_to_sql_pk_is_identity() {
assert_eq!(DirectIdMap::to_sql_pk(0), 0);
assert_eq!(DirectIdMap::to_sql_pk(42), 42);
assert_eq!(DirectIdMap::to_sql_pk(u64::MAX), u64::MAX);
}
#[test]
fn direct_roundtrip() {
for id in [0, 1, 100, u64::MAX / 2, u64::MAX] {
assert_eq!(DirectIdMap::to_sql_pk(DirectIdMap::to_rvf_id(id)), id);
assert_eq!(DirectIdMap::to_rvf_id(DirectIdMap::to_sql_pk(id)), id);
}
}
#[test]
fn direct_validate_same_elements() {
let rvf = vec![1, 2, 3];
let sql = vec![3, 1, 2];
assert!(DirectIdMap::validate_mapping(&rvf, &sql));
}
#[test]
fn direct_validate_empty() {
assert!(DirectIdMap::validate_mapping(&[], &[]));
}
#[test]
fn direct_validate_different_length_fails() {
let rvf = vec![1, 2, 3];
let sql = vec![1, 2];
assert!(!DirectIdMap::validate_mapping(&rvf, &sql));
}
#[test]
fn direct_validate_different_elements_fails() {
let rvf = vec![1, 2, 3];
let sql = vec![1, 2, 4];
assert!(!DirectIdMap::validate_mapping(&rvf, &sql));
}
#[test]
fn direct_validate_duplicates_match() {
let rvf = vec![1, 1, 2];
let sql = vec![1, 2, 1];
assert!(DirectIdMap::validate_mapping(&rvf, &sql));
}
#[test]
fn direct_validate_duplicates_mismatch() {
let rvf = vec![1, 1, 2];
let sql = vec![1, 2, 2];
assert!(!DirectIdMap::validate_mapping(&rvf, &sql));
}
// ---- IdMapping trait via DirectIdMap ----
#[test]
fn trait_direct_to_rvf_id() {
let map = DirectIdMap;
assert_eq!(IdMapping::to_rvf_id(&map, 99), 99);
}
#[test]
fn trait_direct_to_sql_pk() {
let map = DirectIdMap;
assert_eq!(IdMapping::to_sql_pk(&map, 99), 99);
}
#[test]
fn trait_direct_validate() {
let map = DirectIdMap;
assert!(IdMapping::validate_mapping(&map, &[1, 2], &[2, 1]));
assert!(!IdMapping::validate_mapping(&map, &[1, 2], &[2, 3]));
}
// ---- OffsetIdMap tests ----
#[test]
fn offset_positive() {
let map = OffsetIdMap::new(10);
assert_eq!(map.to_rvf_id(0), 10);
assert_eq!(map.to_rvf_id(5), 15);
assert_eq!(map.to_sql_pk(10), 0);
assert_eq!(map.to_sql_pk(15), 5);
}
#[test]
fn offset_negative() {
let map = OffsetIdMap::new(-1);
// SQL PK 1 -> RVF ID 0
assert_eq!(map.to_rvf_id(1), 0);
assert_eq!(map.to_sql_pk(0), 1);
}
#[test]
fn offset_zero_is_identity() {
let map = OffsetIdMap::new(0);
for id in [0, 1, 42, 1000] {
assert_eq!(map.to_rvf_id(id), id);
assert_eq!(map.to_sql_pk(id), id);
}
}
#[test]
fn offset_roundtrip() {
let map = OffsetIdMap::new(7);
for pk in [0, 1, 100, 999] {
assert_eq!(map.to_sql_pk(map.to_rvf_id(pk)), pk);
}
}
#[test]
fn offset_validate() {
let map = OffsetIdMap::new(10);
// SQL PKs [0, 1, 2] -> RVF IDs [10, 11, 12]
assert!(map.validate_mapping(&[12, 10, 11], &[2, 0, 1]));
assert!(!map.validate_mapping(&[10, 11, 12], &[0, 1, 3]));
}
// ---- Dynamic dispatch ----
#[test]
fn trait_object_works() {
let direct: Box<dyn IdMapping> = Box::new(DirectIdMap);
assert_eq!(direct.to_rvf_id(5), 5);
let offset: Box<dyn IdMapping> = Box::new(OffsetIdMap::new(100));
assert_eq!(offset.to_rvf_id(5), 105);
}
// ---- Default impl ----
#[test]
fn direct_default() {
let map: DirectIdMap = Default::default();
assert_eq!(map.to_rvf_id(7), 7);
}
}

View File

@@ -0,0 +1,243 @@
//! IndexedDB storage implementation for WASM
//!
//! Uses web-sys bindings to interact with the browser's IndexedDB API
//! for persistent storage of RvLite state.
use super::state::RvLiteState;
use js_sys::{Object, Reflect};
use wasm_bindgen::prelude::*;
use wasm_bindgen::JsCast;
use wasm_bindgen_futures::JsFuture;
use web_sys::{IdbDatabase, IdbObjectStore, IdbRequest, IdbTransaction, IdbTransactionMode};
const DB_NAME: &str = "rvlite_db";
const DB_VERSION: u32 = 1;
const STORE_NAME: &str = "state";
const STATE_KEY: &str = "main";
/// IndexedDB storage backend for RvLite persistence
pub struct IndexedDBStorage {
db: Option<IdbDatabase>,
}
impl IndexedDBStorage {
/// Create a new IndexedDB storage instance
pub fn new() -> Self {
Self { db: None }
}
/// Initialize and open the IndexedDB database
pub async fn init(&mut self) -> Result<(), JsValue> {
let window = web_sys::window().ok_or_else(|| JsValue::from_str("No window"))?;
let indexed_db = window
.indexed_db()?
.ok_or_else(|| JsValue::from_str("IndexedDB not available"))?;
let open_request = indexed_db.open_with_u32(DB_NAME, DB_VERSION)?;
// Handle database upgrade (create object store if needed)
let onupgradeneeded = Closure::once(Box::new(move |event: web_sys::Event| {
let target = event.target().unwrap();
let request: IdbRequest = target.unchecked_into();
let db: IdbDatabase = request.result().unwrap().unchecked_into();
// Create object store if it doesn't exist
if !db.object_store_names().contains(STORE_NAME) {
db.create_object_store(STORE_NAME).unwrap();
}
}) as Box<dyn FnOnce(_)>);
open_request.set_onupgradeneeded(Some(onupgradeneeded.as_ref().unchecked_ref()));
onupgradeneeded.forget(); // Prevent closure from being dropped
// Wait for database to open using JsFuture
let db_result = wait_for_request(&open_request).await?;
let db: IdbDatabase = db_result.unchecked_into();
self.db = Some(db);
Ok(())
}
/// Check if IndexedDB is available
pub fn is_available() -> bool {
web_sys::window()
.and_then(|w| w.indexed_db().ok().flatten())
.is_some()
}
/// Save state to IndexedDB
pub async fn save(&self, state: &RvLiteState) -> Result<(), JsValue> {
let db = self
.db
.as_ref()
.ok_or_else(|| JsValue::from_str("Database not initialized. Call init() first."))?;
// Convert state to JsValue
let js_state = serde_wasm_bindgen::to_value(state)?;
// Start transaction
let store_names = js_sys::Array::new();
store_names.push(&JsValue::from_str(STORE_NAME));
let transaction =
db.transaction_with_str_sequence_and_mode(&store_names, IdbTransactionMode::Readwrite)?;
let store = transaction.object_store(STORE_NAME)?;
// Put state with key
let request = store.put_with_key(&js_state, &JsValue::from_str(STATE_KEY))?;
// Wait for completion
wait_for_request(&request).await?;
Ok(())
}
/// Load state from IndexedDB
pub async fn load(&self) -> Result<Option<RvLiteState>, JsValue> {
let db = self
.db
.as_ref()
.ok_or_else(|| JsValue::from_str("Database not initialized. Call init() first."))?;
// Start read transaction
let transaction = db.transaction_with_str(STORE_NAME)?;
let store = transaction.object_store(STORE_NAME)?;
// Get state by key
let request = store.get(&JsValue::from_str(STATE_KEY))?;
// Wait for result
let result = wait_for_request(&request).await?;
if result.is_undefined() || result.is_null() {
return Ok(None);
}
// Deserialize state
let state: RvLiteState = serde_wasm_bindgen::from_value(result)?;
Ok(Some(state))
}
/// Delete all stored state
pub async fn clear(&self) -> Result<(), JsValue> {
let db = self
.db
.as_ref()
.ok_or_else(|| JsValue::from_str("Database not initialized. Call init() first."))?;
let store_names = js_sys::Array::new();
store_names.push(&JsValue::from_str(STORE_NAME));
let transaction =
db.transaction_with_str_sequence_and_mode(&store_names, IdbTransactionMode::Readwrite)?;
let store = transaction.object_store(STORE_NAME)?;
let request = store.clear()?;
wait_for_request(&request).await?;
Ok(())
}
/// Check if state exists in storage
pub async fn exists(&self) -> Result<bool, JsValue> {
let db = self
.db
.as_ref()
.ok_or_else(|| JsValue::from_str("Database not initialized. Call init() first."))?;
let transaction = db.transaction_with_str(STORE_NAME)?;
let store = transaction.object_store(STORE_NAME)?;
let request = store.count_with_key(&JsValue::from_str(STATE_KEY))?;
let result = wait_for_request(&request).await?;
let count = result.as_f64().unwrap_or(0.0) as u32;
Ok(count > 0)
}
/// Get storage info (for debugging)
pub async fn get_info(&self) -> Result<JsValue, JsValue> {
let db = self
.db
.as_ref()
.ok_or_else(|| JsValue::from_str("Database not initialized. Call init() first."))?;
let transaction = db.transaction_with_str(STORE_NAME)?;
let store = transaction.object_store(STORE_NAME)?;
let count_request = store.count()?;
let count = wait_for_request(&count_request).await?;
let info = Object::new();
Reflect::set(&info, &"database".into(), &DB_NAME.into())?;
Reflect::set(&info, &"store".into(), &STORE_NAME.into())?;
Reflect::set(&info, &"entries".into(), &count)?;
Ok(info.into())
}
/// Close the database connection
pub fn close(&mut self) {
if let Some(db) = self.db.take() {
db.close();
}
}
}
impl Default for IndexedDBStorage {
fn default() -> Self {
Self::new()
}
}
impl Drop for IndexedDBStorage {
fn drop(&mut self) {
self.close();
}
}
/// Wait for an IdbRequest to complete and return the result
async fn wait_for_request(request: &IdbRequest) -> Result<JsValue, JsValue> {
let promise = js_sys::Promise::new(&mut |resolve, reject| {
// Success handler
let resolve_clone = resolve.clone();
let onsuccess = Closure::once(Box::new(move |_event: web_sys::Event| {
// Note: We can't access request here due to lifetime issues
// The result will be passed through the event
resolve_clone.call0(&JsValue::NULL).unwrap();
}) as Box<dyn FnOnce(_)>);
// Error handler
let onerror = Closure::once(Box::new(move |_event: web_sys::Event| {
reject
.call1(&JsValue::NULL, &JsValue::from_str("IndexedDB error"))
.unwrap();
}) as Box<dyn FnOnce(_)>);
request.set_onsuccess(Some(onsuccess.as_ref().unchecked_ref()));
request.set_onerror(Some(onerror.as_ref().unchecked_ref()));
onsuccess.forget();
onerror.forget();
});
JsFuture::from(promise).await?;
// Get the result after the request completes
request.result()
}
#[cfg(test)]
mod tests {
use super::*;
// Note: IndexedDB tests require a browser environment
// These are placeholder tests for compilation verification
#[test]
fn test_storage_new() {
let storage = IndexedDBStorage::new();
assert!(storage.db.is_none());
}
}

View File

@@ -0,0 +1,21 @@
//! IndexedDB storage backend for WASM persistence
//!
//! Provides async-compatible persistence using IndexedDB for:
//! - Vector database state
//! - Cypher graph state
//! - SPARQL triple store state
pub mod indexeddb;
pub mod state;
#[cfg(feature = "rvf-backend")]
pub mod epoch;
#[cfg(feature = "rvf-backend")]
pub mod writer_lease;
#[cfg(feature = "rvf-backend")]
pub mod id_map;
pub use indexeddb::IndexedDBStorage;
pub use state::{GraphState, RvLiteState, TripleStoreState, VectorState};

View File

@@ -0,0 +1,158 @@
//! Serializable state structures for RvLite persistence
//!
//! These structures represent the complete state of the RvLite database
//! in a format that can be serialized to/from IndexedDB.
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
/// Complete serializable state for RvLite
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RvLiteState {
/// Version for schema migration
pub version: u32,
/// Timestamp of last save
pub saved_at: u64,
/// Vector database state
pub vectors: VectorState,
/// Cypher graph state
pub graph: GraphState,
/// SPARQL triple store state
pub triples: TripleStoreState,
/// SQL engine schemas
pub sql_schemas: Vec<SqlTableState>,
}
impl Default for RvLiteState {
fn default() -> Self {
Self {
version: 1,
saved_at: 0,
vectors: VectorState::default(),
graph: GraphState::default(),
triples: TripleStoreState::default(),
sql_schemas: Vec::new(),
}
}
}
/// Serializable vector database state
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct VectorState {
/// Vector entries: id -> (vector, metadata)
pub entries: Vec<VectorEntry>,
/// Database dimensions
pub dimensions: usize,
/// Distance metric name
pub distance_metric: String,
/// Next auto-generated ID counter
pub next_id: u64,
}
/// Single vector entry for serialization
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VectorEntry {
pub id: String,
pub vector: Vec<f32>,
pub metadata: Option<HashMap<String, serde_json::Value>>,
}
/// Serializable Cypher graph state
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct GraphState {
/// All nodes
pub nodes: Vec<NodeState>,
/// All edges
pub edges: Vec<EdgeState>,
/// Next node ID counter
pub next_node_id: usize,
/// Next edge ID counter
pub next_edge_id: usize,
}
/// Serializable node
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NodeState {
pub id: String,
pub labels: Vec<String>,
pub properties: HashMap<String, PropertyValue>,
}
/// Serializable edge
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EdgeState {
pub id: String,
pub from: String,
pub to: String,
pub edge_type: String,
pub properties: HashMap<String, PropertyValue>,
}
/// Property value for serialization (mirrors cypher::Value)
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", content = "value")]
pub enum PropertyValue {
Null,
Boolean(bool),
Integer(i64),
Float(f64),
String(String),
List(Vec<PropertyValue>),
Map(HashMap<String, PropertyValue>),
}
/// Serializable SPARQL triple store state
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct TripleStoreState {
/// All triples
pub triples: Vec<TripleState>,
/// Named graphs
pub named_graphs: HashMap<String, Vec<u64>>,
/// Default graph triple IDs
pub default_graph: Vec<u64>,
/// Next triple ID counter
pub next_id: u64,
}
/// Serializable RDF triple
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TripleState {
pub id: u64,
pub subject: RdfTermState,
pub predicate: String,
pub object: RdfTermState,
}
/// Serializable RDF term
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum RdfTermState {
Iri {
value: String,
},
Literal {
value: String,
datatype: String,
language: Option<String>,
},
BlankNode {
id: String,
},
}
/// Serializable SQL table schema state
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SqlTableState {
pub name: String,
pub columns: Vec<SqlColumnState>,
pub vector_column: Option<String>,
pub vector_dimensions: Option<usize>,
}
/// Serializable SQL column
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SqlColumnState {
pub name: String,
pub data_type: String,
pub dimensions: Option<usize>,
}

View File

@@ -0,0 +1,555 @@
//! File-based writer lease for single-writer concurrency in rvlite.
//!
//! Provides a cooperative lock mechanism using a lock file with PID and
//! timestamp. Only one writer may hold the lease at a time. The lease
//! includes a heartbeat timestamp that is checked for staleness so that
//! crashed processes do not permanently block new writers.
//!
//! Lock file location: `{store_path}.lock`
//! Lock file contents: JSON with `pid`, `timestamp_secs`, `hostname`.
use std::fs;
use std::io::{self, Write};
use std::path::{Path, PathBuf};
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use serde::{Deserialize, Serialize};
/// Default staleness threshold -- if the heartbeat is older than this
/// duration, the lease is considered abandoned and may be force-acquired.
const DEFAULT_STALE_THRESHOLD: Duration = Duration::from_secs(30);
/// Contents written to the lock file.
#[derive(Debug, Clone, Serialize, Deserialize)]
struct LeaseMeta {
/// Process ID of the lock holder.
pid: u32,
/// Unix timestamp in seconds when the lease was last refreshed.
timestamp_secs: u64,
/// Hostname of the lock holder.
hostname: String,
}
/// A writer lease backed by a lock file on disk.
///
/// While this struct is alive, the lease is held. Dropping it releases
/// the lock file automatically via the `Drop` implementation.
///
/// # Example
///
/// ```no_run
/// use std::path::Path;
/// use std::time::Duration;
/// # // This is a doc-test stub; actual usage requires the rvf-backend feature.
/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
/// // let lease = WriterLease::acquire(Path::new("/data/store.rvf"), Duration::from_secs(5))?;
/// // ... perform writes ...
/// // lease.release()?; // or just let it drop
/// # Ok(())
/// # }
/// ```
pub struct WriterLease {
/// Path to the lock file.
lock_path: PathBuf,
/// Our PID, used to verify ownership on release.
pid: u32,
/// Whether the lease has been explicitly released.
released: bool,
}
impl WriterLease {
/// Attempt to acquire the writer lease for the given store path.
///
/// The lock file is created at `{path}.lock`. If another process holds
/// the lease, this function will retry until `timeout` elapses. If the
/// existing lease is stale (heartbeat older than 30 seconds and the
/// holder PID is not alive), the stale lock is broken and acquisition
/// proceeds.
///
/// # Errors
///
/// Returns `io::Error` with `WouldBlock` if the timeout expires without
/// acquiring the lease, or propagates any underlying I/O errors.
pub fn acquire(path: &Path, timeout: Duration) -> io::Result<Self> {
let lock_path = lock_path_for(path);
let pid = std::process::id();
let deadline = Instant::now() + timeout;
loop {
// Try to create the lock file exclusively.
match try_create_lock(&lock_path, pid) {
Ok(()) => {
return Ok(WriterLease {
lock_path,
pid,
released: false,
});
}
Err(e) if e.kind() == io::ErrorKind::AlreadyExists => {
// Lock file exists -- check if it is stale.
if Self::is_stale(&lock_path, DEFAULT_STALE_THRESHOLD) {
// Force-remove the stale lock and retry.
let _ = fs::remove_file(&lock_path);
continue;
}
// Lock is active. Check timeout.
if Instant::now() >= deadline {
return Err(io::Error::new(
io::ErrorKind::WouldBlock,
format!(
"writer lease acquisition timed out after {:?} for {:?}",
timeout, lock_path
),
));
}
// Brief sleep before retrying.
std::thread::sleep(Duration::from_millis(50));
}
Err(e) => return Err(e),
}
}
}
/// Explicitly release the writer lease.
///
/// Verifies that the lock file still belongs to this process before
/// removing it to avoid deleting a lock acquired by another process
/// after a stale break.
pub fn release(&mut self) -> io::Result<()> {
if self.released {
return Ok(());
}
self.do_release();
self.released = true;
Ok(())
}
/// Refresh the heartbeat timestamp in the lock file.
///
/// Writers performing long operations should call this periodically
/// (e.g. every 10 seconds) to prevent the lease from appearing stale.
pub fn refresh_heartbeat(&self) -> io::Result<()> {
if self.released {
return Err(io::Error::new(
io::ErrorKind::Other,
"cannot refresh a released lease",
));
}
// Verify we still own the lock.
if !self.owns_lock() {
return Err(io::Error::new(
io::ErrorKind::Other,
"lease was taken over by another process",
));
}
write_lock_file(&self.lock_path, self.pid)
}
/// Check whether the lock file at the given path is stale.
///
/// A lock is stale if:
/// - The lock file does not exist (vacuously stale).
/// - The lock file cannot be parsed.
/// - The heartbeat timestamp is older than `threshold`.
/// - The PID in the lock file is not alive on the current host.
pub fn is_stale(path: &Path, threshold: Duration) -> bool {
let lock_path = if path.extension().map_or(false, |e| e == "lock") {
path.to_path_buf()
} else {
lock_path_for(path)
};
let content = match fs::read_to_string(&lock_path) {
Ok(c) => c,
Err(_) => return true, // Missing or unreadable = stale.
};
let meta: LeaseMeta = match serde_json::from_str(&content) {
Ok(m) => m,
Err(_) => return true, // Corrupt = stale.
};
// Check age.
let now_secs = current_unix_secs();
let age_secs = now_secs.saturating_sub(meta.timestamp_secs);
if age_secs > threshold.as_secs() {
return true;
}
// Check if PID is alive (only meaningful on same host).
let our_hostname = get_hostname();
if meta.hostname == our_hostname && !is_pid_alive(meta.pid) {
return true;
}
false
}
/// Return the path to the lock file.
pub fn lock_path(&self) -> &Path {
&self.lock_path
}
/// Check whether this lease still owns the lock file.
fn owns_lock(&self) -> bool {
let content = match fs::read_to_string(&self.lock_path) {
Ok(c) => c,
Err(_) => return false,
};
let meta: LeaseMeta = match serde_json::from_str(&content) {
Ok(m) => m,
Err(_) => return false,
};
meta.pid == self.pid
}
/// Internal release logic.
fn do_release(&self) {
if self.owns_lock() {
let _ = fs::remove_file(&self.lock_path);
}
}
}
impl Drop for WriterLease {
fn drop(&mut self) {
if !self.released {
self.do_release();
self.released = true;
}
}
}
impl std::fmt::Debug for WriterLease {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("WriterLease")
.field("lock_path", &self.lock_path)
.field("pid", &self.pid)
.field("released", &self.released)
.finish()
}
}
// ---- Helper functions ----
/// Compute the lock file path for a store path.
fn lock_path_for(store_path: &Path) -> PathBuf {
let mut p = store_path.as_os_str().to_os_string();
p.push(".lock");
PathBuf::from(p)
}
/// Try to atomically create the lock file. Fails with `AlreadyExists` if
/// another process holds the lock.
fn try_create_lock(lock_path: &Path, pid: u32) -> io::Result<()> {
// Ensure parent directory exists.
if let Some(parent) = lock_path.parent() {
fs::create_dir_all(parent)?;
}
// Use create_new for O_CREAT | O_EXCL semantics.
let meta = LeaseMeta {
pid,
timestamp_secs: current_unix_secs(),
hostname: get_hostname(),
};
let content = serde_json::to_string(&meta)
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("serialize lease meta: {e}")))?;
let mut file = fs::OpenOptions::new()
.write(true)
.create_new(true)
.open(lock_path)?;
file.write_all(content.as_bytes())?;
file.sync_all()?;
Ok(())
}
/// Overwrite an existing lock file with a fresh timestamp.
fn write_lock_file(lock_path: &Path, pid: u32) -> io::Result<()> {
let meta = LeaseMeta {
pid,
timestamp_secs: current_unix_secs(),
hostname: get_hostname(),
};
let content = serde_json::to_string(&meta)
.map_err(|e| io::Error::new(io::ErrorKind::Other, format!("serialize lease meta: {e}")))?;
fs::write(lock_path, content.as_bytes())
}
/// Get the current Unix timestamp in seconds.
fn current_unix_secs() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0)
}
/// Best-effort hostname retrieval.
fn get_hostname() -> String {
std::env::var("HOSTNAME").unwrap_or_else(|_| {
fs::read_to_string("/etc/hostname")
.unwrap_or_else(|_| "unknown".into())
.trim()
.to_string()
})
}
/// Check whether a process with the given PID is alive.
fn is_pid_alive(pid: u32) -> bool {
#[cfg(unix)]
{
// kill(pid, 0) checks existence without sending a signal.
let ret = unsafe { libc_kill(pid as i32, 0) };
if ret == 0 {
return true;
}
// EPERM means the process exists but belongs to another user.
let errno = unsafe { *errno_location() };
errno == 1 // EPERM
}
#[cfg(not(unix))]
{
let _ = pid;
true // Conservatively assume alive on non-Unix.
}
}
#[cfg(unix)]
extern "C" {
fn kill(pid: i32, sig: i32) -> i32;
}
#[cfg(any(target_os = "linux", target_os = "android"))]
extern "C" {
fn __errno_location() -> *mut i32;
}
#[cfg(any(target_os = "macos", target_os = "ios", target_os = "freebsd"))]
extern "C" {
fn __error() -> *mut i32;
}
#[cfg(unix)]
unsafe fn libc_kill(pid: i32, sig: i32) -> i32 {
unsafe { kill(pid, sig) }
}
#[cfg(any(target_os = "linux", target_os = "android"))]
unsafe fn errno_location() -> *mut i32 {
unsafe { __errno_location() }
}
#[cfg(any(target_os = "macos", target_os = "ios", target_os = "freebsd"))]
unsafe fn errno_location() -> *mut i32 {
unsafe { __error() }
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use std::sync::atomic::{AtomicU64, Ordering as AtomicOrdering};
/// Counter to generate unique directory names for each test, avoiding
/// cross-test interference when running in parallel.
static TEST_COUNTER: AtomicU64 = AtomicU64::new(0);
fn unique_dir(name: &str) -> PathBuf {
let id = TEST_COUNTER.fetch_add(1, AtomicOrdering::Relaxed);
let dir = std::env::temp_dir().join(format!(
"rvlite_lease_{}_{}_{}",
std::process::id(),
id,
name
));
let _ = fs::create_dir_all(&dir);
dir
}
fn cleanup(dir: &Path) {
let _ = fs::remove_dir_all(dir);
}
#[test]
fn lock_path_computation() {
let p = Path::new("/tmp/store.rvf");
assert_eq!(lock_path_for(p), PathBuf::from("/tmp/store.rvf.lock"));
}
#[test]
fn acquire_and_release() {
let dir = unique_dir("acquire_release");
let store_path = dir.join("test.rvf");
let _ = fs::write(&store_path, b"");
let mut lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
assert!(lease.lock_path().exists());
lease.release().unwrap();
assert!(!lease.lock_path().exists());
cleanup(&dir);
}
#[test]
fn double_acquire_fails_within_timeout() {
let dir = unique_dir("double_acquire");
let store_path = dir.join("test.rvf");
let _ = fs::write(&store_path, b"");
let _lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
// Second acquire should time out quickly. The lock is held by our own
// PID and is fresh, so it cannot be broken as stale.
let result = WriterLease::acquire(&store_path, Duration::from_millis(150));
assert!(result.is_err());
assert_eq!(result.unwrap_err().kind(), io::ErrorKind::WouldBlock);
cleanup(&dir);
}
#[test]
fn drop_releases_lease() {
let dir = unique_dir("drop_release");
let store_path = dir.join("test.rvf");
let _ = fs::write(&store_path, b"");
let lock_file = lock_path_for(&store_path);
{
let _lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
assert!(lock_file.exists());
}
// After drop, lock file should be gone.
assert!(!lock_file.exists());
cleanup(&dir);
}
#[test]
fn stale_lease_is_detected() {
let dir = unique_dir("stale_detect");
let store_path = dir.join("test.rvf");
let _ = fs::write(&store_path, b"");
let lock_path = lock_path_for(&store_path);
// Write a lock file with a very old timestamp and dead PID.
let meta = LeaseMeta {
pid: 999_999_999, // Almost certainly not alive.
timestamp_secs: current_unix_secs().saturating_sub(120),
hostname: get_hostname(),
};
let content = serde_json::to_string(&meta).unwrap();
fs::write(&lock_path, content).unwrap();
assert!(WriterLease::is_stale(&store_path, DEFAULT_STALE_THRESHOLD));
cleanup(&dir);
}
#[test]
fn fresh_lease_is_not_stale() {
let dir = unique_dir("fresh_lease");
let store_path = dir.join("test.rvf");
let _ = fs::write(&store_path, b"");
let _lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
assert!(!WriterLease::is_stale(&store_path, DEFAULT_STALE_THRESHOLD));
cleanup(&dir);
}
#[test]
fn missing_lock_file_is_stale() {
let path = Path::new("/tmp/nonexistent_rvlite_test_12345.rvf");
assert!(WriterLease::is_stale(path, DEFAULT_STALE_THRESHOLD));
}
#[test]
fn corrupt_lock_file_is_stale() {
let dir = unique_dir("corrupt");
let store_path = dir.join("test.rvf");
let lock_path = lock_path_for(&store_path);
let _ = fs::create_dir_all(&dir);
fs::write(&lock_path, b"not json").unwrap();
assert!(WriterLease::is_stale(&store_path, DEFAULT_STALE_THRESHOLD));
cleanup(&dir);
}
#[test]
fn refresh_heartbeat_updates_timestamp() {
let dir = unique_dir("heartbeat");
let store_path = dir.join("test.rvf");
let _ = fs::write(&store_path, b"");
let lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
// refresh_heartbeat overwrites the lock file with a new timestamp.
lease.refresh_heartbeat().unwrap();
// Read back and verify timestamp is recent.
let content = fs::read_to_string(lease.lock_path()).unwrap();
let meta: LeaseMeta = serde_json::from_str(&content).unwrap();
let age = current_unix_secs().saturating_sub(meta.timestamp_secs);
assert!(age < 5, "heartbeat should be very recent, got age={age}s");
cleanup(&dir);
}
#[test]
fn stale_lease_force_acquire() {
let dir = unique_dir("force_acquire");
let store_path = dir.join("test.rvf");
let _ = fs::write(&store_path, b"");
let lock_path = lock_path_for(&store_path);
// Simulate a stale lock from a dead process.
let meta = LeaseMeta {
pid: 999_999_999,
timestamp_secs: current_unix_secs().saturating_sub(60),
hostname: get_hostname(),
};
fs::write(&lock_path, serde_json::to_string(&meta).unwrap()).unwrap();
// Should succeed because the existing lock is stale.
let mut lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
assert_eq!(lease.pid, std::process::id());
lease.release().unwrap();
cleanup(&dir);
}
#[test]
fn release_is_idempotent() {
let dir = unique_dir("idempotent");
let store_path = dir.join("test.rvf");
let _ = fs::write(&store_path, b"");
let mut lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
lease.release().unwrap();
// Second release should be a no-op.
lease.release().unwrap();
cleanup(&dir);
}
#[test]
fn debug_format() {
let dir = unique_dir("debug_fmt");
let store_path = dir.join("test.rvf");
let _ = fs::write(&store_path, b"");
let lease = WriterLease::acquire(&store_path, Duration::from_secs(1)).unwrap();
let debug = format!("{:?}", lease);
assert!(debug.contains("WriterLease"));
assert!(debug.contains("lock_path"));
cleanup(&dir);
}
}