Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,907 @@
// SPARQL Abstract Syntax Tree (AST) types
//
// Provides type-safe representation of SPARQL 1.1 queries following
// the W3C specification: https://www.w3.org/TR/sparql11-query/
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
/// Complete SPARQL query or update
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SparqlQuery {
/// Base IRI for relative IRI resolution
pub base: Option<Iri>,
/// PREFIX declarations
pub prefixes: HashMap<String, Iri>,
/// The query form (SELECT, CONSTRUCT, ASK, DESCRIBE) or update operation
pub body: QueryBody,
}
impl SparqlQuery {
pub fn new(body: QueryBody) -> Self {
Self {
base: None,
prefixes: HashMap::new(),
body,
}
}
pub fn with_base(mut self, base: Iri) -> Self {
self.base = Some(base);
self
}
pub fn with_prefix(mut self, prefix: impl Into<String>, iri: Iri) -> Self {
self.prefixes.insert(prefix.into(), iri);
self
}
}
impl Default for SparqlQuery {
fn default() -> Self {
Self::new(QueryBody::Select(SelectQuery::default()))
}
}
/// Query body - either a query form or update operation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum QueryBody {
Select(SelectQuery),
Construct(ConstructQuery),
Ask(AskQuery),
Describe(DescribeQuery),
Update(Vec<UpdateOperation>),
}
/// Query form type
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum QueryForm {
Select,
Construct,
Ask,
Describe,
}
/// SELECT query
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SelectQuery {
/// Result variables or expressions
pub projection: Projection,
/// Dataset clauses (FROM, FROM NAMED)
pub dataset: Vec<DatasetClause>,
/// WHERE clause graph pattern
pub where_clause: GraphPattern,
/// Solution modifiers
pub modifier: SolutionModifier,
/// VALUES clause for inline data
pub values: Option<ValuesClause>,
}
impl Default for SelectQuery {
fn default() -> Self {
Self {
projection: Projection::All,
dataset: Vec::new(),
where_clause: GraphPattern::Empty,
modifier: SolutionModifier::default(),
values: None,
}
}
}
/// Projection in SELECT clause
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Projection {
/// SELECT * - all variables
All,
/// SELECT DISTINCT ...
Distinct(Vec<ProjectionVar>),
/// SELECT REDUCED ...
Reduced(Vec<ProjectionVar>),
/// SELECT var1 var2 ...
Variables(Vec<ProjectionVar>),
}
/// Variable or expression in projection
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProjectionVar {
pub expression: Expression,
pub alias: Option<String>,
}
impl ProjectionVar {
pub fn variable(name: impl Into<String>) -> Self {
Self {
expression: Expression::Variable(name.into()),
alias: None,
}
}
pub fn expr_as(expr: Expression, alias: impl Into<String>) -> Self {
Self {
expression: expr,
alias: Some(alias.into()),
}
}
}
/// CONSTRUCT query
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConstructQuery {
/// Template for constructing triples
pub template: Vec<TriplePattern>,
/// Dataset clauses
pub dataset: Vec<DatasetClause>,
/// WHERE clause
pub where_clause: GraphPattern,
/// Solution modifiers
pub modifier: SolutionModifier,
}
impl Default for ConstructQuery {
fn default() -> Self {
Self {
template: Vec::new(),
dataset: Vec::new(),
where_clause: GraphPattern::Empty,
modifier: SolutionModifier::default(),
}
}
}
/// ASK query
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AskQuery {
/// Dataset clauses
pub dataset: Vec<DatasetClause>,
/// WHERE clause
pub where_clause: GraphPattern,
}
impl Default for AskQuery {
fn default() -> Self {
Self {
dataset: Vec::new(),
where_clause: GraphPattern::Empty,
}
}
}
/// DESCRIBE query
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DescribeQuery {
/// Resources to describe
pub resources: Vec<VarOrIri>,
/// Dataset clauses
pub dataset: Vec<DatasetClause>,
/// Optional WHERE clause
pub where_clause: Option<GraphPattern>,
}
impl Default for DescribeQuery {
fn default() -> Self {
Self {
resources: Vec::new(),
dataset: Vec::new(),
where_clause: None,
}
}
}
/// Dataset clause (FROM / FROM NAMED)
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DatasetClause {
pub iri: Iri,
pub named: bool,
}
/// VALUES clause for inline data
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ValuesClause {
pub variables: Vec<String>,
pub bindings: Vec<Vec<Option<RdfTerm>>>,
}
/// Graph pattern - the WHERE clause body
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum GraphPattern {
/// Empty pattern
Empty,
/// Basic Graph Pattern - set of triple patterns
Bgp(Vec<TriplePattern>),
/// Join of patterns (implicit AND)
Join(Box<GraphPattern>, Box<GraphPattern>),
/// Left outer join (OPTIONAL)
LeftJoin(Box<GraphPattern>, Box<GraphPattern>, Option<Expression>),
/// Union of patterns (UNION)
Union(Box<GraphPattern>, Box<GraphPattern>),
/// Filter (FILTER)
Filter(Box<GraphPattern>, Expression),
/// Named graph (GRAPH)
Graph(VarOrIri, Box<GraphPattern>),
/// Service (FEDERATED query)
Service(Iri, Box<GraphPattern>, bool),
/// MINUS pattern
Minus(Box<GraphPattern>, Box<GraphPattern>),
/// EXISTS or NOT EXISTS
Exists(Box<GraphPattern>, bool),
/// BIND assignment
Bind(Expression, String, Box<GraphPattern>),
/// GROUP BY aggregation
Group(
Box<GraphPattern>,
Vec<GroupCondition>,
Vec<(Aggregate, String)>,
),
/// Subquery
SubSelect(Box<SelectQuery>),
/// VALUES inline data
Values(ValuesClause),
}
/// Triple pattern
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TriplePattern {
pub subject: TermOrVariable,
pub predicate: PropertyPath,
pub object: TermOrVariable,
}
impl TriplePattern {
pub fn new(subject: TermOrVariable, predicate: PropertyPath, object: TermOrVariable) -> Self {
Self {
subject,
predicate,
object,
}
}
/// Simple triple pattern with IRI predicate
pub fn simple(subject: TermOrVariable, predicate: Iri, object: TermOrVariable) -> Self {
Self {
subject,
predicate: PropertyPath::Iri(predicate),
object,
}
}
}
/// Term or variable in triple pattern
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum TermOrVariable {
Term(RdfTerm),
Variable(String),
BlankNode(String),
}
impl TermOrVariable {
pub fn var(name: impl Into<String>) -> Self {
Self::Variable(name.into())
}
pub fn iri(iri: Iri) -> Self {
Self::Term(RdfTerm::Iri(iri))
}
pub fn literal(value: impl Into<String>) -> Self {
Self::Term(RdfTerm::Literal(Literal::simple(value)))
}
pub fn blank(id: impl Into<String>) -> Self {
Self::BlankNode(id.into())
}
}
/// Variable or IRI
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum VarOrIri {
Variable(String),
Iri(Iri),
}
/// Property path expression
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum PropertyPath {
/// Simple IRI predicate
Iri(Iri),
/// Variable predicate
Variable(String),
/// Inverse path (^path)
Inverse(Box<PropertyPath>),
/// Sequence path (path1/path2)
Sequence(Box<PropertyPath>, Box<PropertyPath>),
/// Alternative path (path1|path2)
Alternative(Box<PropertyPath>, Box<PropertyPath>),
/// Zero or more (*path)
ZeroOrMore(Box<PropertyPath>),
/// One or more (+path)
OneOrMore(Box<PropertyPath>),
/// Zero or one (?path)
ZeroOrOne(Box<PropertyPath>),
/// Negated property set (!(path1|path2))
NegatedPropertySet(Vec<Iri>),
/// Fixed length path {n}
FixedLength(Box<PropertyPath>, usize),
/// Range length path {n,m}
RangeLength(Box<PropertyPath>, usize, Option<usize>),
}
/// RDF term
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum RdfTerm {
/// IRI reference
Iri(Iri),
/// Literal value
Literal(Literal),
/// Blank node
BlankNode(String),
}
impl RdfTerm {
pub fn iri(value: impl Into<String>) -> Self {
Self::Iri(Iri::new(value))
}
pub fn literal(value: impl Into<String>) -> Self {
Self::Literal(Literal::simple(value))
}
pub fn typed_literal(value: impl Into<String>, datatype: Iri) -> Self {
Self::Literal(Literal::typed(value, datatype))
}
pub fn lang_literal(value: impl Into<String>, lang: impl Into<String>) -> Self {
Self::Literal(Literal::language(value, lang))
}
pub fn blank(id: impl Into<String>) -> Self {
Self::BlankNode(id.into())
}
/// Check if this is an IRI
pub fn is_iri(&self) -> bool {
matches!(self, Self::Iri(_))
}
/// Check if this is a literal
pub fn is_literal(&self) -> bool {
matches!(self, Self::Literal(_))
}
/// Check if this is a blank node
pub fn is_blank_node(&self) -> bool {
matches!(self, Self::BlankNode(_))
}
}
/// IRI (Internationalized Resource Identifier)
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct Iri(pub String);
impl Iri {
pub fn new(value: impl Into<String>) -> Self {
Self(value.into())
}
pub fn as_str(&self) -> &str {
&self.0
}
/// Common RDF namespace IRIs
pub fn rdf_type() -> Self {
Self::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
}
pub fn rdfs_label() -> Self {
Self::new("http://www.w3.org/2000/01/rdf-schema#label")
}
pub fn rdfs_comment() -> Self {
Self::new("http://www.w3.org/2000/01/rdf-schema#comment")
}
pub fn xsd_string() -> Self {
Self::new("http://www.w3.org/2001/XMLSchema#string")
}
pub fn xsd_integer() -> Self {
Self::new("http://www.w3.org/2001/XMLSchema#integer")
}
pub fn xsd_decimal() -> Self {
Self::new("http://www.w3.org/2001/XMLSchema#decimal")
}
pub fn xsd_double() -> Self {
Self::new("http://www.w3.org/2001/XMLSchema#double")
}
pub fn xsd_boolean() -> Self {
Self::new("http://www.w3.org/2001/XMLSchema#boolean")
}
pub fn xsd_date() -> Self {
Self::new("http://www.w3.org/2001/XMLSchema#date")
}
pub fn xsd_datetime() -> Self {
Self::new("http://www.w3.org/2001/XMLSchema#dateTime")
}
}
/// RDF Literal
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct Literal {
/// Lexical form (string value)
pub value: String,
/// Optional language tag
pub language: Option<String>,
/// Datatype IRI (defaults to xsd:string)
pub datatype: Iri,
}
impl Literal {
/// Simple string literal
pub fn simple(value: impl Into<String>) -> Self {
Self {
value: value.into(),
language: None,
datatype: Iri::xsd_string(),
}
}
/// Typed literal
pub fn typed(value: impl Into<String>, datatype: Iri) -> Self {
Self {
value: value.into(),
language: None,
datatype,
}
}
/// Language-tagged literal
pub fn language(value: impl Into<String>, lang: impl Into<String>) -> Self {
Self {
value: value.into(),
language: Some(lang.into()),
datatype: Iri::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"),
}
}
/// Integer literal
pub fn integer(value: i64) -> Self {
Self::typed(value.to_string(), Iri::xsd_integer())
}
/// Decimal literal
pub fn decimal(value: f64) -> Self {
Self::typed(value.to_string(), Iri::xsd_decimal())
}
/// Double literal
pub fn double(value: f64) -> Self {
Self::typed(value.to_string(), Iri::xsd_double())
}
/// Boolean literal
pub fn boolean(value: bool) -> Self {
Self::typed(if value { "true" } else { "false" }, Iri::xsd_boolean())
}
/// Try to parse as integer
pub fn as_integer(&self) -> Option<i64> {
self.value.parse().ok()
}
/// Try to parse as double
pub fn as_double(&self) -> Option<f64> {
self.value.parse().ok()
}
/// Try to parse as boolean
pub fn as_boolean(&self) -> Option<bool> {
match self.value.as_str() {
"true" | "1" => Some(true),
"false" | "0" => Some(false),
_ => None,
}
}
}
/// SPARQL expression
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Expression {
/// Variable reference
Variable(String),
/// Constant term
Term(RdfTerm),
/// Binary operation
Binary(Box<Expression>, BinaryOp, Box<Expression>),
/// Unary operation
Unary(UnaryOp, Box<Expression>),
/// Function call
Function(FunctionCall),
/// Aggregate function
Aggregate(Aggregate),
/// IN expression
In(Box<Expression>, Vec<Expression>),
/// NOT IN expression
NotIn(Box<Expression>, Vec<Expression>),
/// EXISTS subquery
Exists(Box<GraphPattern>),
/// NOT EXISTS subquery
NotExists(Box<GraphPattern>),
/// Conditional (IF)
If(Box<Expression>, Box<Expression>, Box<Expression>),
/// COALESCE
Coalesce(Vec<Expression>),
/// BOUND test
Bound(String),
/// isIRI test
IsIri(Box<Expression>),
/// isBlank test
IsBlank(Box<Expression>),
/// isLiteral test
IsLiteral(Box<Expression>),
/// isNumeric test
IsNumeric(Box<Expression>),
/// REGEX pattern matching
Regex(Box<Expression>, Box<Expression>, Option<Box<Expression>>),
/// LANG function
Lang(Box<Expression>),
/// DATATYPE function
Datatype(Box<Expression>),
/// STR function
Str(Box<Expression>),
/// IRI constructor
Iri(Box<Expression>),
}
impl Expression {
pub fn var(name: impl Into<String>) -> Self {
Self::Variable(name.into())
}
pub fn term(t: RdfTerm) -> Self {
Self::Term(t)
}
pub fn literal(value: impl Into<String>) -> Self {
Self::Term(RdfTerm::literal(value))
}
pub fn integer(value: i64) -> Self {
Self::Term(RdfTerm::Literal(Literal::integer(value)))
}
pub fn binary(left: Expression, op: BinaryOp, right: Expression) -> Self {
Self::Binary(Box::new(left), op, Box::new(right))
}
pub fn unary(op: UnaryOp, expr: Expression) -> Self {
Self::Unary(op, Box::new(expr))
}
pub fn and(left: Expression, right: Expression) -> Self {
Self::binary(left, BinaryOp::And, right)
}
pub fn or(left: Expression, right: Expression) -> Self {
Self::binary(left, BinaryOp::Or, right)
}
pub fn eq(left: Expression, right: Expression) -> Self {
Self::binary(left, BinaryOp::Eq, right)
}
pub fn neq(left: Expression, right: Expression) -> Self {
Self::binary(left, BinaryOp::NotEq, right)
}
pub fn lt(left: Expression, right: Expression) -> Self {
Self::binary(left, BinaryOp::Lt, right)
}
pub fn gt(left: Expression, right: Expression) -> Self {
Self::binary(left, BinaryOp::Gt, right)
}
}
/// Binary operators
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum BinaryOp {
// Logical
And,
Or,
// Comparison
Eq,
NotEq,
Lt,
LtEq,
Gt,
GtEq,
// Arithmetic
Add,
Sub,
Mul,
Div,
// String
SameTerm,
LangMatches,
}
/// Unary operators
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum UnaryOp {
Not,
Plus,
Minus,
}
/// Function call
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FunctionCall {
pub name: String,
pub args: Vec<Expression>,
}
impl FunctionCall {
pub fn new(name: impl Into<String>, args: Vec<Expression>) -> Self {
Self {
name: name.into(),
args,
}
}
}
/// Aggregate function
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Aggregate {
Count {
expr: Option<Box<Expression>>,
distinct: bool,
},
Sum {
expr: Box<Expression>,
distinct: bool,
},
Avg {
expr: Box<Expression>,
distinct: bool,
},
Min {
expr: Box<Expression>,
},
Max {
expr: Box<Expression>,
},
GroupConcat {
expr: Box<Expression>,
separator: Option<String>,
distinct: bool,
},
Sample {
expr: Box<Expression>,
},
}
/// Filter expression
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Filter {
pub expression: Expression,
}
impl Filter {
pub fn new(expression: Expression) -> Self {
Self { expression }
}
}
/// Solution modifier
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct SolutionModifier {
pub order_by: Vec<OrderCondition>,
pub limit: Option<usize>,
pub offset: Option<usize>,
pub having: Option<Expression>,
}
impl SolutionModifier {
pub fn with_limit(mut self, limit: usize) -> Self {
self.limit = Some(limit);
self
}
pub fn with_offset(mut self, offset: usize) -> Self {
self.offset = Some(offset);
self
}
pub fn with_order(mut self, conditions: Vec<OrderCondition>) -> Self {
self.order_by = conditions;
self
}
pub fn with_having(mut self, expr: Expression) -> Self {
self.having = Some(expr);
self
}
}
/// ORDER BY condition
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OrderCondition {
pub expression: Expression,
pub ascending: bool,
}
impl OrderCondition {
pub fn asc(expr: Expression) -> Self {
Self {
expression: expr,
ascending: true,
}
}
pub fn desc(expr: Expression) -> Self {
Self {
expression: expr,
ascending: false,
}
}
}
/// GROUP BY condition
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum GroupCondition {
Variable(String),
Expression(Expression, Option<String>),
}
// ============================================================================
// SPARQL Update Operations
// ============================================================================
/// SPARQL Update operation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateOperation {
/// INSERT DATA { triples }
InsertData(InsertData),
/// DELETE DATA { triples }
DeleteData(DeleteData),
/// DELETE { pattern } INSERT { pattern } WHERE { pattern }
Modify(Modify),
/// LOAD <iri> INTO GRAPH <iri>
Load {
source: Iri,
destination: Option<Iri>,
silent: bool,
},
/// CLEAR GRAPH <iri>
Clear { target: GraphTarget, silent: bool },
/// CREATE GRAPH <iri>
Create { graph: Iri, silent: bool },
/// DROP GRAPH <iri>
Drop { target: GraphTarget, silent: bool },
/// COPY source TO destination
Copy {
source: GraphTarget,
destination: GraphTarget,
silent: bool,
},
/// MOVE source TO destination
Move {
source: GraphTarget,
destination: GraphTarget,
silent: bool,
},
/// ADD source TO destination
Add {
source: GraphTarget,
destination: GraphTarget,
silent: bool,
},
}
/// INSERT DATA operation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct InsertData {
pub quads: Vec<Quad>,
}
/// DELETE DATA operation
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeleteData {
pub quads: Vec<Quad>,
}
/// DELETE/INSERT with WHERE
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Modify {
pub with_graph: Option<Iri>,
pub delete_pattern: Option<Vec<QuadPattern>>,
pub insert_pattern: Option<Vec<QuadPattern>>,
pub using: Vec<DatasetClause>,
pub where_pattern: GraphPattern,
}
/// Quad (triple with optional graph)
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Quad {
pub subject: RdfTerm,
pub predicate: Iri,
pub object: RdfTerm,
pub graph: Option<Iri>,
}
/// Quad pattern (for DELETE/INSERT templates)
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QuadPattern {
pub subject: TermOrVariable,
pub predicate: VarOrIri,
pub object: TermOrVariable,
pub graph: Option<VarOrIri>,
}
/// Graph target for management operations
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum GraphTarget {
Default,
Named(Iri),
All,
AllNamed,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_rdf_term_creation() {
let iri = RdfTerm::iri("http://example.org/resource");
assert!(iri.is_iri());
let lit = RdfTerm::literal("hello");
assert!(lit.is_literal());
let blank = RdfTerm::blank("b0");
assert!(blank.is_blank_node());
}
#[test]
fn test_literal_parsing() {
let int_lit = Literal::integer(42);
assert_eq!(int_lit.as_integer(), Some(42));
let double_lit = Literal::double(3.14);
assert!((double_lit.as_double().unwrap() - 3.14).abs() < 0.001);
let bool_lit = Literal::boolean(true);
assert_eq!(bool_lit.as_boolean(), Some(true));
}
#[test]
fn test_expression_builder() {
let expr = Expression::and(
Expression::eq(Expression::var("x"), Expression::integer(10)),
Expression::gt(Expression::var("y"), Expression::integer(5)),
);
match expr {
Expression::Binary(_, BinaryOp::And, _) => (),
_ => panic!("Expected AND expression"),
}
}
#[test]
fn test_triple_pattern() {
let pattern = TriplePattern::simple(
TermOrVariable::var("s"),
Iri::rdf_type(),
TermOrVariable::iri(Iri::new("http://example.org/Person")),
);
assert!(matches!(pattern.subject, TermOrVariable::Variable(_)));
assert!(matches!(pattern.predicate, PropertyPath::Iri(_)));
}
}

View File

@@ -0,0 +1,928 @@
// SPARQL Query Executor for WASM
//
// Executes parsed SPARQL queries against an in-memory triple store.
// Simplified version for WASM environments (no async, no complex aggregates).
use super::ast::*;
use super::triple_store::{Triple, TripleStore};
use super::{SparqlError, SparqlResult};
use std::collections::HashMap;
/// Static empty HashMap for default prefixes
static EMPTY_PREFIXES: once_cell::sync::Lazy<HashMap<String, Iri>> =
once_cell::sync::Lazy::new(HashMap::new);
/// Solution binding - maps variables to RDF terms
pub type Binding = HashMap<String, RdfTerm>;
/// Solution sequence - list of bindings
pub type Solutions = Vec<Binding>;
/// Execution context for SPARQL queries
pub struct SparqlContext<'a> {
pub store: &'a TripleStore,
pub base: Option<&'a Iri>,
pub prefixes: &'a HashMap<String, Iri>,
}
impl<'a> SparqlContext<'a> {
pub fn new(store: &'a TripleStore) -> Self {
Self {
store,
base: None,
prefixes: &EMPTY_PREFIXES,
}
}
pub fn with_base(mut self, base: Option<&'a Iri>) -> Self {
self.base = base;
self
}
pub fn with_prefixes(mut self, prefixes: &'a HashMap<String, Iri>) -> Self {
self.prefixes = prefixes;
self
}
}
/// Execute a SPARQL query
pub fn execute_sparql(store: &TripleStore, query: &SparqlQuery) -> SparqlResult<QueryResult> {
let mut ctx = SparqlContext::new(store)
.with_base(query.base.as_ref())
.with_prefixes(&query.prefixes);
match &query.body {
QueryBody::Select(select) => {
let solutions = execute_select(&mut ctx, select)?;
Ok(QueryResult::Select(solutions))
}
QueryBody::Construct(construct) => {
let triples = execute_construct(&mut ctx, construct)?;
Ok(QueryResult::Construct(triples))
}
QueryBody::Ask(ask) => {
let result = execute_ask(&mut ctx, ask)?;
Ok(QueryResult::Ask(result))
}
QueryBody::Describe(describe) => {
let triples = execute_describe(&mut ctx, describe)?;
Ok(QueryResult::Describe(triples))
}
QueryBody::Update(ops) => {
for op in ops {
execute_update(&mut ctx, op)?;
}
Ok(QueryResult::Update)
}
}
}
/// Query result types
#[derive(Debug, Clone)]
pub enum QueryResult {
Select(SelectResult),
Construct(Vec<Triple>),
Ask(bool),
Describe(Vec<Triple>),
Update,
}
/// SELECT query result
#[derive(Debug, Clone)]
pub struct SelectResult {
pub variables: Vec<String>,
pub bindings: Solutions,
}
impl SelectResult {
pub fn new(variables: Vec<String>, bindings: Solutions) -> Self {
Self {
variables,
bindings,
}
}
}
// ============================================================================
// SELECT Query Execution
// ============================================================================
fn execute_select(ctx: &mut SparqlContext, query: &SelectQuery) -> SparqlResult<SelectResult> {
// Evaluate WHERE clause
let mut solutions = evaluate_graph_pattern(ctx, &query.where_clause)?;
// Apply solution modifiers
solutions = apply_modifiers(solutions, &query.modifier)?;
// Project variables
let (variables, bindings) = project_solutions(&query.projection, solutions)?;
Ok(SelectResult {
variables,
bindings,
})
}
fn project_solutions(
projection: &Projection,
solutions: Solutions,
) -> SparqlResult<(Vec<String>, Solutions)> {
match projection {
Projection::All => {
// Get all unique variables
let mut vars: Vec<String> = Vec::new();
for binding in &solutions {
for var in binding.keys() {
if !vars.contains(var) {
vars.push(var.clone());
}
}
}
vars.sort();
Ok((vars, solutions))
}
Projection::Variables(vars) | Projection::Distinct(vars) | Projection::Reduced(vars) => {
let var_names: Vec<String> = vars
.iter()
.map(|v| {
v.alias.clone().unwrap_or_else(|| {
if let Expression::Variable(name) = &v.expression {
name.clone()
} else {
"_expr".to_string()
}
})
})
.collect();
let mut projected: Solutions = Vec::new();
for binding in solutions {
let mut new_binding = Binding::new();
for (i, pv) in vars.iter().enumerate() {
if let Some(value) = evaluate_expression(&pv.expression, &binding)? {
new_binding.insert(var_names[i].clone(), value);
}
}
// For DISTINCT, check if this binding already exists
if matches!(projection, Projection::Distinct(_)) {
if !projected.iter().any(|b| bindings_equal(b, &new_binding)) {
projected.push(new_binding);
}
} else {
projected.push(new_binding);
}
}
Ok((var_names, projected))
}
}
}
fn bindings_equal(a: &Binding, b: &Binding) -> bool {
if a.len() != b.len() {
return false;
}
a.iter().all(|(k, v)| b.get(k) == Some(v))
}
// ============================================================================
// Graph Pattern Evaluation
// ============================================================================
fn evaluate_graph_pattern(ctx: &SparqlContext, pattern: &GraphPattern) -> SparqlResult<Solutions> {
match pattern {
GraphPattern::Empty => Ok(vec![Binding::new()]),
GraphPattern::Bgp(triples) => evaluate_bgp(ctx, triples),
GraphPattern::Join(left, right) => {
let left_solutions = evaluate_graph_pattern(ctx, left)?;
let right_solutions = evaluate_graph_pattern(ctx, right)?;
join_solutions(left_solutions, right_solutions)
}
GraphPattern::LeftJoin(left, right, condition) => {
let left_solutions = evaluate_graph_pattern(ctx, left)?;
let right_solutions = evaluate_graph_pattern(ctx, right)?;
left_join_solutions(left_solutions, right_solutions, condition.as_ref())
}
GraphPattern::Union(left, right) => {
let mut left_solutions = evaluate_graph_pattern(ctx, left)?;
let right_solutions = evaluate_graph_pattern(ctx, right)?;
left_solutions.extend(right_solutions);
Ok(left_solutions)
}
GraphPattern::Filter(inner, condition) => {
let solutions = evaluate_graph_pattern(ctx, inner)?;
filter_solutions(solutions, condition)
}
GraphPattern::Minus(left, right) => {
let left_solutions = evaluate_graph_pattern(ctx, left)?;
let right_solutions = evaluate_graph_pattern(ctx, right)?;
minus_solutions(left_solutions, right_solutions)
}
GraphPattern::Bind(expr, var, inner) => {
let mut solutions = evaluate_graph_pattern(ctx, inner)?;
for binding in &mut solutions {
if let Some(value) = evaluate_expression(expr, binding)? {
binding.insert(var.clone(), value);
}
}
Ok(solutions)
}
GraphPattern::Values(values) => {
let mut solutions = Vec::new();
for row in &values.bindings {
let mut binding = Binding::new();
for (i, var) in values.variables.iter().enumerate() {
if let Some(Some(term)) = row.get(i) {
binding.insert(var.clone(), term.clone());
}
}
solutions.push(binding);
}
Ok(solutions)
}
_ => Err(SparqlError::UnsupportedOperation(format!(
"Graph pattern not supported in WASM build: {:?}",
pattern
))),
}
}
fn evaluate_bgp(ctx: &SparqlContext, patterns: &[TriplePattern]) -> SparqlResult<Solutions> {
let mut solutions = vec![Binding::new()];
for pattern in patterns {
let mut new_solutions = Vec::new();
for binding in &solutions {
let matches = match_triple_pattern(ctx, pattern, binding)?;
new_solutions.extend(matches);
}
solutions = new_solutions;
if solutions.is_empty() {
break;
}
}
Ok(solutions)
}
fn match_triple_pattern(
ctx: &SparqlContext,
pattern: &TriplePattern,
binding: &Binding,
) -> SparqlResult<Solutions> {
// Resolve pattern components
let subject = resolve_term_or_var(&pattern.subject, binding);
let object = resolve_term_or_var(&pattern.object, binding);
// Handle simple IRI predicate (most common case)
if let PropertyPath::Iri(iri) = &pattern.predicate {
return match_simple_triple(
ctx,
subject,
Some(iri),
object,
&pattern.subject,
&pattern.object,
binding,
);
}
// For now, only support simple IRI predicates in WASM
Err(SparqlError::PropertyPathError(
"Complex property paths not yet supported in WASM build".to_string(),
))
}
fn resolve_term_or_var(tov: &TermOrVariable, binding: &Binding) -> Option<RdfTerm> {
match tov {
TermOrVariable::Term(t) => Some(t.clone()),
TermOrVariable::Variable(v) => binding.get(v).cloned(),
TermOrVariable::BlankNode(id) => Some(RdfTerm::BlankNode(id.clone())),
}
}
fn match_simple_triple(
ctx: &SparqlContext,
subject: Option<RdfTerm>,
predicate: Option<&Iri>,
object: Option<RdfTerm>,
subj_pattern: &TermOrVariable,
obj_pattern: &TermOrVariable,
binding: &Binding,
) -> SparqlResult<Solutions> {
let triples = ctx
.store
.query(subject.as_ref(), predicate, object.as_ref());
let mut solutions = Vec::new();
for triple in triples {
let mut new_binding = binding.clone();
let mut matches = true;
// Bind subject variable
if let TermOrVariable::Variable(var) = subj_pattern {
if let Some(existing) = new_binding.get(var) {
if existing != &triple.subject {
matches = false;
}
} else {
new_binding.insert(var.clone(), triple.subject.clone());
}
}
// Bind object variable
if matches {
if let TermOrVariable::Variable(var) = obj_pattern {
if let Some(existing) = new_binding.get(var) {
if existing != &triple.object {
matches = false;
}
} else {
new_binding.insert(var.clone(), triple.object.clone());
}
}
}
if matches {
solutions.push(new_binding);
}
}
Ok(solutions)
}
// ============================================================================
// Solution Operations
// ============================================================================
fn join_solutions(left: Solutions, right: Solutions) -> SparqlResult<Solutions> {
if left.is_empty() || right.is_empty() {
return Ok(Vec::new());
}
let mut result = Vec::new();
for l in &left {
for r in &right {
if let Some(merged) = merge_bindings(l, r) {
result.push(merged);
}
}
}
Ok(result)
}
fn left_join_solutions(
left: Solutions,
right: Solutions,
condition: Option<&Expression>,
) -> SparqlResult<Solutions> {
let mut result = Vec::new();
for l in &left {
let mut found_match = false;
for r in &right {
if let Some(merged) = merge_bindings(l, r) {
// Check condition if present
let include = if let Some(cond) = condition {
evaluate_expression_as_bool(cond, &merged)?
} else {
true
};
if include {
result.push(merged);
found_match = true;
}
}
}
if !found_match {
result.push(l.clone());
}
}
Ok(result)
}
fn minus_solutions(left: Solutions, right: Solutions) -> SparqlResult<Solutions> {
let mut result = Vec::new();
for l in &left {
let mut has_compatible = false;
for r in &right {
if bindings_compatible(l, r) {
has_compatible = true;
break;
}
}
if !has_compatible {
result.push(l.clone());
}
}
Ok(result)
}
fn merge_bindings(a: &Binding, b: &Binding) -> Option<Binding> {
let mut result = a.clone();
for (k, v) in b {
if let Some(existing) = result.get(k) {
if existing != v {
return None;
}
} else {
result.insert(k.clone(), v.clone());
}
}
Some(result)
}
fn bindings_compatible(a: &Binding, b: &Binding) -> bool {
for (k, v) in a {
if let Some(bv) = b.get(k) {
if v != bv {
return false;
}
}
}
true
}
fn filter_solutions(solutions: Solutions, condition: &Expression) -> SparqlResult<Solutions> {
let mut result = Vec::new();
for binding in solutions {
if evaluate_expression_as_bool(condition, &binding)? {
result.push(binding);
}
}
Ok(result)
}
// ============================================================================
// Solution Modifiers
// ============================================================================
fn apply_modifiers(
mut solutions: Solutions,
modifier: &SolutionModifier,
) -> SparqlResult<Solutions> {
// ORDER BY
if !modifier.order_by.is_empty() {
solutions.sort_by(|a, b| {
for cond in &modifier.order_by {
let va = evaluate_expression(&cond.expression, a).ok().flatten();
let vb = evaluate_expression(&cond.expression, b).ok().flatten();
let ord = match (va, vb) {
(Some(ta), Some(tb)) => compare_terms(&ta, &tb),
(Some(_), None) => std::cmp::Ordering::Less,
(None, Some(_)) => std::cmp::Ordering::Greater,
(None, None) => std::cmp::Ordering::Equal,
};
let ord = if cond.ascending { ord } else { ord.reverse() };
if ord != std::cmp::Ordering::Equal {
return ord;
}
}
std::cmp::Ordering::Equal
});
}
// OFFSET
if let Some(offset) = modifier.offset {
if offset < solutions.len() {
solutions = solutions.into_iter().skip(offset).collect();
} else {
solutions.clear();
}
}
// LIMIT
if let Some(limit) = modifier.limit {
solutions.truncate(limit);
}
Ok(solutions)
}
fn compare_terms(a: &RdfTerm, b: &RdfTerm) -> std::cmp::Ordering {
match (a, b) {
(RdfTerm::Literal(la), RdfTerm::Literal(lb)) => {
if let (Some(na), Some(nb)) = (la.as_double(), lb.as_double()) {
na.partial_cmp(&nb).unwrap_or(std::cmp::Ordering::Equal)
} else {
la.value.cmp(&lb.value)
}
}
(RdfTerm::Iri(ia), RdfTerm::Iri(ib)) => ia.as_str().cmp(ib.as_str()),
_ => std::cmp::Ordering::Equal,
}
}
// ============================================================================
// Expression Evaluation
// ============================================================================
fn evaluate_expression(expr: &Expression, binding: &Binding) -> SparqlResult<Option<RdfTerm>> {
match expr {
Expression::Variable(var) => Ok(binding.get(var).cloned()),
Expression::Term(term) => Ok(Some(term.clone())),
Expression::Binary(left, op, right) => {
let lv = evaluate_expression(left, binding)?;
let rv = evaluate_expression(right, binding)?;
evaluate_binary_op(lv, *op, rv)
}
Expression::Unary(op, inner) => {
let v = evaluate_expression(inner, binding)?;
evaluate_unary_op(*op, v)
}
Expression::Bound(var) => Ok(Some(RdfTerm::Literal(Literal::boolean(
binding.contains_key(var),
)))),
Expression::If(cond, then_expr, else_expr) => {
if evaluate_expression_as_bool(cond, binding)? {
evaluate_expression(then_expr, binding)
} else {
evaluate_expression(else_expr, binding)
}
}
Expression::Coalesce(exprs) => {
for e in exprs {
if let Some(v) = evaluate_expression(e, binding)? {
return Ok(Some(v));
}
}
Ok(None)
}
Expression::IsIri(e) => {
let v = evaluate_expression(e, binding)?;
Ok(Some(RdfTerm::Literal(Literal::boolean(
v.map(|t| t.is_iri()).unwrap_or(false),
))))
}
Expression::IsBlank(e) => {
let v = evaluate_expression(e, binding)?;
Ok(Some(RdfTerm::Literal(Literal::boolean(
v.map(|t| t.is_blank_node()).unwrap_or(false),
))))
}
Expression::IsLiteral(e) => {
let v = evaluate_expression(e, binding)?;
Ok(Some(RdfTerm::Literal(Literal::boolean(
v.map(|t| t.is_literal()).unwrap_or(false),
))))
}
Expression::Str(e) => {
let v = evaluate_expression(e, binding)?;
Ok(v.map(|t| RdfTerm::literal(term_to_string(&t))))
}
Expression::Lang(e) => {
let v = evaluate_expression(e, binding)?;
Ok(v.and_then(|t| {
if let RdfTerm::Literal(lit) = t {
Some(RdfTerm::literal(lit.language.unwrap_or_default()))
} else {
None
}
}))
}
Expression::Datatype(e) => {
let v = evaluate_expression(e, binding)?;
Ok(v.and_then(|t| {
if let RdfTerm::Literal(lit) = t {
Some(RdfTerm::Iri(lit.datatype))
} else {
None
}
}))
}
_ => Err(SparqlError::UnsupportedOperation(
"Complex expressions not yet supported in WASM build".to_string(),
)),
}
}
fn evaluate_expression_as_bool(expr: &Expression, binding: &Binding) -> SparqlResult<bool> {
let value = evaluate_expression(expr, binding)?;
Ok(match value {
None => false,
Some(RdfTerm::Literal(lit)) => {
if let Some(b) = lit.as_boolean() {
b
} else if let Some(n) = lit.as_double() {
n != 0.0
} else {
!lit.value.is_empty()
}
}
Some(_) => true,
})
}
fn evaluate_binary_op(
left: Option<RdfTerm>,
op: BinaryOp,
right: Option<RdfTerm>,
) -> SparqlResult<Option<RdfTerm>> {
match op {
BinaryOp::And => {
let lb = left.map(|t| term_to_bool(&t)).unwrap_or(false);
let rb = right.map(|t| term_to_bool(&t)).unwrap_or(false);
Ok(Some(RdfTerm::Literal(Literal::boolean(lb && rb))))
}
BinaryOp::Or => {
let lb = left.map(|t| term_to_bool(&t)).unwrap_or(false);
let rb = right.map(|t| term_to_bool(&t)).unwrap_or(false);
Ok(Some(RdfTerm::Literal(Literal::boolean(lb || rb))))
}
BinaryOp::Eq => Ok(Some(RdfTerm::Literal(Literal::boolean(left == right)))),
BinaryOp::NotEq => Ok(Some(RdfTerm::Literal(Literal::boolean(left != right)))),
BinaryOp::Lt | BinaryOp::LtEq | BinaryOp::Gt | BinaryOp::GtEq => {
let cmp = match (&left, &right) {
(Some(l), Some(r)) => compare_terms(l, r),
_ => return Ok(None),
};
let result = match op {
BinaryOp::Lt => cmp == std::cmp::Ordering::Less,
BinaryOp::LtEq => cmp != std::cmp::Ordering::Greater,
BinaryOp::Gt => cmp == std::cmp::Ordering::Greater,
BinaryOp::GtEq => cmp != std::cmp::Ordering::Less,
_ => unreachable!(),
};
Ok(Some(RdfTerm::Literal(Literal::boolean(result))))
}
BinaryOp::Add | BinaryOp::Sub | BinaryOp::Mul | BinaryOp::Div => {
let ln = left.and_then(|t| term_to_number(&t));
let rn = right.and_then(|t| term_to_number(&t));
match (ln, rn) {
(Some(l), Some(r)) => {
let result = match op {
BinaryOp::Add => l + r,
BinaryOp::Sub => l - r,
BinaryOp::Mul => l * r,
BinaryOp::Div => {
if r == 0.0 {
return Ok(None);
}
l / r
}
_ => unreachable!(),
};
Ok(Some(RdfTerm::Literal(Literal::decimal(result))))
}
_ => Ok(None),
}
}
_ => Err(SparqlError::UnsupportedOperation(format!(
"Binary operator not supported: {:?}",
op
))),
}
}
fn evaluate_unary_op(op: UnaryOp, value: Option<RdfTerm>) -> SparqlResult<Option<RdfTerm>> {
match op {
UnaryOp::Not => {
let b = value.map(|t| term_to_bool(&t)).unwrap_or(false);
Ok(Some(RdfTerm::Literal(Literal::boolean(!b))))
}
UnaryOp::Plus => Ok(value),
UnaryOp::Minus => {
let n = value.and_then(|t| term_to_number(&t));
Ok(n.map(|v| RdfTerm::Literal(Literal::decimal(-v))))
}
}
}
fn term_to_string(term: &RdfTerm) -> String {
match term {
RdfTerm::Iri(iri) => iri.as_str().to_string(),
RdfTerm::Literal(lit) => lit.value.clone(),
RdfTerm::BlankNode(id) => format!("_:{}", id),
}
}
fn term_to_number(term: &RdfTerm) -> Option<f64> {
match term {
RdfTerm::Literal(lit) => lit.as_double(),
_ => None,
}
}
fn term_to_bool(term: &RdfTerm) -> bool {
match term {
RdfTerm::Literal(lit) => {
if let Some(b) = lit.as_boolean() {
b
} else if let Some(n) = lit.as_double() {
n != 0.0
} else {
!lit.value.is_empty()
}
}
_ => true,
}
}
// ============================================================================
// Other Query Forms
// ============================================================================
fn execute_construct(ctx: &SparqlContext, query: &ConstructQuery) -> SparqlResult<Vec<Triple>> {
let solutions = evaluate_graph_pattern(ctx, &query.where_clause)?;
let solutions = apply_modifiers(solutions, &query.modifier)?;
let mut triples = Vec::new();
for binding in solutions {
for pattern in &query.template {
if let (Some(s), Some(o)) = (
resolve_term_or_var(&pattern.subject, &binding),
resolve_term_or_var(&pattern.object, &binding),
) {
if let PropertyPath::Iri(p) = &pattern.predicate {
triples.push(Triple::new(s, p.clone(), o));
}
}
}
}
Ok(triples)
}
fn execute_ask(ctx: &SparqlContext, query: &AskQuery) -> SparqlResult<bool> {
let solutions = evaluate_graph_pattern(ctx, &query.where_clause)?;
Ok(!solutions.is_empty())
}
fn execute_describe(ctx: &SparqlContext, query: &DescribeQuery) -> SparqlResult<Vec<Triple>> {
let mut resources: Vec<RdfTerm> = Vec::new();
// Get resources from query
for r in &query.resources {
match r {
VarOrIri::Iri(iri) => resources.push(RdfTerm::Iri(iri.clone())),
VarOrIri::Variable(var) => {
if let Some(pattern) = &query.where_clause {
let solutions = evaluate_graph_pattern(ctx, pattern)?;
for binding in solutions {
if let Some(term) = binding.get(var) {
if !resources.contains(term) {
resources.push(term.clone());
}
}
}
}
}
}
}
// Get all triples about each resource
let mut triples = Vec::new();
for resource in resources {
// Triples where resource is subject
triples.extend(ctx.store.query(Some(&resource), None, None));
// Triples where resource is object
triples.extend(ctx.store.query(None, None, Some(&resource)));
}
Ok(triples)
}
// ============================================================================
// Update Operations (Simplified)
// ============================================================================
fn execute_update(_ctx: &SparqlContext, _op: &UpdateOperation) -> SparqlResult<()> {
// Simplified: Updates not fully implemented in WASM build
Err(SparqlError::UnsupportedOperation(
"Update operations not yet supported in WASM build".to_string(),
))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::sparql::parser::parse_sparql;
fn setup_test_store() -> TripleStore {
let store = TripleStore::new();
store.insert(Triple::new(
RdfTerm::iri("http://example.org/person/1"),
Iri::rdf_type(),
RdfTerm::iri("http://example.org/Person"),
));
store.insert(Triple::new(
RdfTerm::iri("http://example.org/person/1"),
Iri::new("http://example.org/name"),
RdfTerm::literal("Alice"),
));
store.insert(Triple::new(
RdfTerm::iri("http://example.org/person/1"),
Iri::new("http://example.org/age"),
RdfTerm::Literal(Literal::integer(30)),
));
store
}
#[test]
fn test_simple_select() {
let store = setup_test_store();
let query = parse_sparql("SELECT ?s ?p ?o WHERE { ?s ?p ?o }").unwrap();
let result = execute_sparql(&store, &query).unwrap();
if let QueryResult::Select(select) = result {
assert!(!select.bindings.is_empty());
} else {
panic!("Expected SELECT result");
}
}
#[test]
fn test_select_with_filter() {
let store = setup_test_store();
let query = parse_sparql(
r#"
SELECT ?name WHERE {
?s <http://example.org/name> ?name .
FILTER(?name = "Alice")
}
"#,
)
.unwrap();
let result = execute_sparql(&store, &query).unwrap();
if let QueryResult::Select(select) = result {
assert_eq!(select.bindings.len(), 1);
}
}
#[test]
fn test_ask_query() {
let store = setup_test_store();
let query = parse_sparql(
r#"
ASK { <http://example.org/person/1> <http://example.org/name> "Alice" }
"#,
)
.unwrap();
let result = execute_sparql(&store, &query).unwrap();
assert!(matches!(result, QueryResult::Ask(true)));
}
}

View File

@@ -0,0 +1,124 @@
// SPARQL (SPARQL Protocol and RDF Query Language) module for rvlite
//
// Provides W3C-compliant SPARQL 1.1 query support for RDF data with
// in-memory storage for WASM environments.
//
// Features:
// - SPARQL 1.1 Query Language (SELECT, CONSTRUCT, ASK, DESCRIBE)
// - Basic Update Language (INSERT DATA, DELETE DATA)
// - In-memory RDF triple store with efficient indexing
// - Property paths (basic support)
// - FILTER expressions and built-in functions
// - WASM-compatible implementation
#![allow(dead_code)]
#![allow(unused_variables)]
#![allow(unused_mut)]
pub mod ast;
pub mod executor;
pub mod parser;
pub mod triple_store;
pub use ast::{
Aggregate, AskQuery, ConstructQuery, DeleteData, DescribeQuery, Expression, GraphPattern,
InsertData, Iri, Literal, OrderCondition, QueryBody, RdfTerm, SelectQuery, SolutionModifier,
SparqlQuery, TriplePattern, UpdateOperation,
};
pub use executor::{execute_sparql, SparqlContext};
pub use parser::parse_sparql;
pub use triple_store::{Triple, TripleStore};
/// SPARQL error type
#[derive(Debug, Clone)]
pub enum SparqlError {
ParseError(String),
UnboundVariable(String),
TypeMismatch { expected: String, actual: String },
StoreNotFound(String),
InvalidIri(String),
InvalidLiteral(String),
UnsupportedOperation(String),
ExecutionError(String),
AggregateError(String),
PropertyPathError(String),
}
impl std::fmt::Display for SparqlError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::ParseError(msg) => write!(f, "Parse error: {}", msg),
Self::UnboundVariable(var) => write!(f, "Variable not bound: {}", var),
Self::TypeMismatch { expected, actual } => {
write!(f, "Type mismatch: expected {}, got {}", expected, actual)
}
Self::StoreNotFound(name) => write!(f, "Store not found: {}", name),
Self::InvalidIri(iri) => write!(f, "Invalid IRI: {}", iri),
Self::InvalidLiteral(lit) => write!(f, "Invalid literal: {}", lit),
Self::UnsupportedOperation(op) => write!(f, "Unsupported operation: {}", op),
Self::ExecutionError(msg) => write!(f, "Execution error: {}", msg),
Self::AggregateError(msg) => write!(f, "Aggregate error: {}", msg),
Self::PropertyPathError(msg) => write!(f, "Property path error: {}", msg),
}
}
}
impl std::error::Error for SparqlError {}
/// Result type for SPARQL operations
pub type SparqlResult<T> = Result<T, SparqlError>;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_simple_select() {
let query = "SELECT ?s ?p ?o WHERE { ?s ?p ?o }";
let result = parse_sparql(query);
assert!(result.is_ok());
let parsed = result.unwrap();
assert!(matches!(parsed.body, QueryBody::Select(_)));
}
#[test]
fn test_triple_store_basic() {
let store = TripleStore::new();
let triple = Triple::new(
RdfTerm::iri("http://example.org/subject"),
Iri::new("http://example.org/predicate"),
RdfTerm::literal("object"),
);
store.insert(triple.clone());
assert_eq!(store.count(), 1);
let results = store.query(None, None, None);
assert_eq!(results.len(), 1);
}
#[test]
fn test_sparql_execution() {
let store = TripleStore::new();
// Add test data
store.insert(Triple::new(
RdfTerm::iri("http://example.org/person/1"),
Iri::rdf_type(),
RdfTerm::iri("http://example.org/Person"),
));
store.insert(Triple::new(
RdfTerm::iri("http://example.org/person/1"),
Iri::new("http://example.org/name"),
RdfTerm::literal("Alice"),
));
let query =
parse_sparql("SELECT ?name WHERE { ?person <http://example.org/name> ?name }").unwrap();
let result = execute_sparql(&store, &query);
assert!(result.is_ok());
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,583 @@
// RDF Triple Store with efficient in-memory indexing for WASM
//
// Provides in-memory storage for RDF triples with multiple indexes
// for efficient query patterns (SPO, POS, OSP).
use super::ast::{Iri, RdfTerm};
use std::collections::{HashMap, HashSet};
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::RwLock;
/// RDF Triple
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Triple {
pub subject: RdfTerm,
pub predicate: Iri,
pub object: RdfTerm,
}
impl Triple {
pub fn new(subject: RdfTerm, predicate: Iri, object: RdfTerm) -> Self {
Self {
subject,
predicate,
object,
}
}
}
/// Triple store statistics
#[derive(Debug, Clone)]
pub struct StoreStats {
pub triple_count: u64,
pub subject_count: usize,
pub predicate_count: usize,
pub object_count: usize,
pub graph_count: usize,
}
/// RDF Triple Store (WASM-compatible, thread-safe via RwLock)
pub struct TripleStore {
/// All triples stored by internal ID
triples: RwLock<HashMap<u64, Triple>>,
/// SPO index: subject -> predicate -> object IDs
spo_index: RwLock<HashMap<String, HashMap<String, HashSet<u64>>>>,
/// POS index: predicate -> object -> subject IDs
pos_index: RwLock<HashMap<String, HashMap<String, HashSet<u64>>>>,
/// OSP index: object -> subject -> predicate IDs
osp_index: RwLock<HashMap<String, HashMap<String, HashSet<u64>>>>,
/// Named graphs: graph IRI -> triple IDs
graphs: RwLock<HashMap<String, HashSet<u64>>>,
/// Default graph triple IDs
default_graph: RwLock<HashSet<u64>>,
/// Triple ID counter
next_id: AtomicU64,
/// Unique subjects for statistics
subjects: RwLock<HashSet<String>>,
/// Unique predicates for statistics
predicates: RwLock<HashSet<String>>,
/// Unique objects for statistics
objects: RwLock<HashSet<String>>,
}
impl TripleStore {
pub fn new() -> Self {
Self {
triples: RwLock::new(HashMap::new()),
spo_index: RwLock::new(HashMap::new()),
pos_index: RwLock::new(HashMap::new()),
osp_index: RwLock::new(HashMap::new()),
graphs: RwLock::new(HashMap::new()),
default_graph: RwLock::new(HashSet::new()),
next_id: AtomicU64::new(1),
subjects: RwLock::new(HashSet::new()),
predicates: RwLock::new(HashSet::new()),
objects: RwLock::new(HashSet::new()),
}
}
/// Insert a triple into the default graph
pub fn insert(&self, triple: Triple) -> u64 {
self.insert_into_graph(triple, None)
}
/// Insert a triple into a specific graph
pub fn insert_into_graph(&self, triple: Triple, graph: Option<&str>) -> u64 {
let id = self.next_id.fetch_add(1, Ordering::SeqCst);
// Get string representations for indexing
let subject_key = term_to_key(&triple.subject);
let predicate_key = triple.predicate.as_str().to_string();
let object_key = term_to_key(&triple.object);
// Update statistics
{
let mut subjects = self.subjects.write().unwrap();
subjects.insert(subject_key.clone());
}
{
let mut predicates = self.predicates.write().unwrap();
predicates.insert(predicate_key.clone());
}
{
let mut objects = self.objects.write().unwrap();
objects.insert(object_key.clone());
}
// Update SPO index
{
let mut spo_index = self.spo_index.write().unwrap();
spo_index
.entry(subject_key.clone())
.or_insert_with(HashMap::new)
.entry(predicate_key.clone())
.or_insert_with(HashSet::new)
.insert(id);
}
// Update POS index
{
let mut pos_index = self.pos_index.write().unwrap();
pos_index
.entry(predicate_key.clone())
.or_insert_with(HashMap::new)
.entry(object_key.clone())
.or_insert_with(HashSet::new)
.insert(id);
}
// Update OSP index
{
let mut osp_index = self.osp_index.write().unwrap();
osp_index
.entry(object_key)
.or_insert_with(HashMap::new)
.entry(subject_key)
.or_insert_with(HashSet::new)
.insert(id);
}
// Update graph membership
if let Some(graph_iri) = graph {
let mut graphs = self.graphs.write().unwrap();
graphs
.entry(graph_iri.to_string())
.or_insert_with(HashSet::new)
.insert(id);
} else {
let mut default_graph = self.default_graph.write().unwrap();
default_graph.insert(id);
}
// Store the triple
{
let mut triples = self.triples.write().unwrap();
triples.insert(id, triple);
}
id
}
/// Get a triple by ID
pub fn get(&self, id: u64) -> Option<Triple> {
let triples = self.triples.read().unwrap();
triples.get(&id).cloned()
}
/// Query triples matching a pattern (None means any value)
pub fn query(
&self,
subject: Option<&RdfTerm>,
predicate: Option<&Iri>,
object: Option<&RdfTerm>,
) -> Vec<Triple> {
self.query_with_graph(subject, predicate, object, None)
}
/// Query triples matching a pattern in a specific graph
pub fn query_with_graph(
&self,
subject: Option<&RdfTerm>,
predicate: Option<&Iri>,
object: Option<&RdfTerm>,
graph: Option<&str>,
) -> Vec<Triple> {
// Filter by graph if specified
let graph_filter: Option<HashSet<u64>> = graph.map(|g| {
let graphs = self.graphs.read().unwrap();
graphs.get(g).cloned().unwrap_or_default()
});
let spo_index = self.spo_index.read().unwrap();
let pos_index = self.pos_index.read().unwrap();
let osp_index = self.osp_index.read().unwrap();
let triples = self.triples.read().unwrap();
// Choose the best index based on bound variables
let ids = match (subject, predicate, object) {
// All bound - direct lookup
(Some(s), Some(p), Some(o)) => {
let s_key = term_to_key(s);
let p_key = p.as_str();
let o_key = term_to_key(o);
spo_index
.get(&s_key)
.and_then(|pred_map| pred_map.get(p_key))
.map(|ids| ids.iter().copied().collect::<Vec<_>>())
.unwrap_or_default()
.into_iter()
.filter(|id| {
triples
.get(id)
.map(|t| term_to_key(&t.object) == o_key)
.unwrap_or(false)
})
.collect::<Vec<_>>()
}
// Subject and predicate bound - use SPO
(Some(s), Some(p), None) => {
let s_key = term_to_key(s);
let p_key = p.as_str();
spo_index
.get(&s_key)
.and_then(|pred_map| pred_map.get(p_key))
.map(|ids| ids.iter().copied().collect())
.unwrap_or_default()
}
// Subject only - use SPO
(Some(s), None, None) => {
let s_key = term_to_key(s);
spo_index
.get(&s_key)
.map(|pred_map| {
pred_map
.values()
.flat_map(|ids| ids.iter().copied())
.collect::<Vec<_>>()
})
.unwrap_or_default()
}
// Predicate and object bound - use POS
(None, Some(p), Some(o)) => {
let p_key = p.as_str();
let o_key = term_to_key(o);
pos_index
.get(p_key)
.and_then(|obj_map| obj_map.get(&o_key))
.map(|ids| ids.iter().copied().collect())
.unwrap_or_default()
}
// Predicate only - use POS
(None, Some(p), None) => {
let p_key = p.as_str();
pos_index
.get(p_key)
.map(|obj_map| {
obj_map
.values()
.flat_map(|ids| ids.iter().copied())
.collect::<Vec<_>>()
})
.unwrap_or_default()
}
// Object only - use OSP
(None, None, Some(o)) => {
let o_key = term_to_key(o);
osp_index
.get(&o_key)
.map(|subj_map| {
subj_map
.values()
.flat_map(|ids| ids.iter().copied())
.collect::<Vec<_>>()
})
.unwrap_or_default()
}
// Subject and object bound - use SPO then filter
(Some(s), None, Some(o)) => {
let s_key = term_to_key(s);
let o_key = term_to_key(o);
spo_index
.get(&s_key)
.map(|pred_map| {
pred_map
.values()
.flat_map(|ids| ids.iter().copied())
.filter(|id| {
triples
.get(id)
.map(|t| term_to_key(&t.object) == o_key)
.unwrap_or(false)
})
.collect::<Vec<_>>()
})
.unwrap_or_default()
}
// Nothing bound - return all
(None, None, None) => triples.keys().copied().collect(),
};
// Apply graph filter and collect results
ids.into_iter()
.filter(|id| {
graph_filter
.as_ref()
.map(|filter| filter.contains(id))
.unwrap_or(true)
})
.filter_map(|id| triples.get(&id).cloned())
.collect()
}
/// Get all triples in the store
pub fn all_triples(&self) -> Vec<Triple> {
let triples = self.triples.read().unwrap();
triples.values().cloned().collect()
}
/// Get triple count
pub fn count(&self) -> usize {
let triples = self.triples.read().unwrap();
triples.len()
}
/// Check if store is empty
pub fn is_empty(&self) -> bool {
let triples = self.triples.read().unwrap();
triples.is_empty()
}
/// Clear all triples
pub fn clear(&self) {
self.triples.write().unwrap().clear();
self.spo_index.write().unwrap().clear();
self.pos_index.write().unwrap().clear();
self.osp_index.write().unwrap().clear();
self.graphs.write().unwrap().clear();
self.default_graph.write().unwrap().clear();
self.subjects.write().unwrap().clear();
self.predicates.write().unwrap().clear();
self.objects.write().unwrap().clear();
}
/// Clear a specific graph
pub fn clear_graph(&self, graph: Option<&str>) {
let ids_to_remove: Vec<u64> = if let Some(graph_iri) = graph {
let graphs = self.graphs.read().unwrap();
graphs
.get(graph_iri)
.cloned()
.unwrap_or_default()
.into_iter()
.collect()
} else {
let default_graph = self.default_graph.read().unwrap();
default_graph.iter().copied().collect()
};
for id in ids_to_remove {
self.remove(id);
}
}
/// Remove a triple by ID
pub fn remove(&self, id: u64) -> Option<Triple> {
let triple = {
let mut triples = self.triples.write().unwrap();
triples.remove(&id)
}?;
let subject_key = term_to_key(&triple.subject);
let predicate_key = triple.predicate.as_str().to_string();
let object_key = term_to_key(&triple.object);
// Remove from SPO index
{
let mut spo_index = self.spo_index.write().unwrap();
if let Some(pred_map) = spo_index.get_mut(&subject_key) {
if let Some(ids) = pred_map.get_mut(&predicate_key) {
ids.remove(&id);
}
}
}
// Remove from POS index
{
let mut pos_index = self.pos_index.write().unwrap();
if let Some(obj_map) = pos_index.get_mut(&predicate_key) {
if let Some(ids) = obj_map.get_mut(&object_key) {
ids.remove(&id);
}
}
}
// Remove from OSP index
{
let mut osp_index = self.osp_index.write().unwrap();
if let Some(subj_map) = osp_index.get_mut(&object_key) {
if let Some(ids) = subj_map.get_mut(&subject_key) {
ids.remove(&id);
}
}
}
// Remove from graphs
{
let mut default_graph = self.default_graph.write().unwrap();
default_graph.remove(&id);
}
{
let mut graphs = self.graphs.write().unwrap();
for (_, ids) in graphs.iter_mut() {
ids.remove(&id);
}
}
Some(triple)
}
/// Get statistics about the store
pub fn stats(&self) -> StoreStats {
let triples = self.triples.read().unwrap();
let subjects = self.subjects.read().unwrap();
let predicates = self.predicates.read().unwrap();
let objects = self.objects.read().unwrap();
let graphs = self.graphs.read().unwrap();
StoreStats {
triple_count: triples.len() as u64,
subject_count: subjects.len(),
predicate_count: predicates.len(),
object_count: objects.len(),
graph_count: graphs.len() + 1, // +1 for default graph
}
}
/// List all named graphs
pub fn list_graphs(&self) -> Vec<String> {
let graphs = self.graphs.read().unwrap();
graphs.keys().cloned().collect()
}
/// Get triples from a specific graph
pub fn get_graph(&self, graph: &str) -> Vec<Triple> {
let graphs = self.graphs.read().unwrap();
let triples = self.triples.read().unwrap();
graphs
.get(graph)
.map(|ids| {
ids.iter()
.filter_map(|id| triples.get(id).cloned())
.collect()
})
.unwrap_or_default()
}
/// Get triples from the default graph
pub fn get_default_graph(&self) -> Vec<Triple> {
let default_graph = self.default_graph.read().unwrap();
let triples = self.triples.read().unwrap();
default_graph
.iter()
.filter_map(|id| triples.get(id).cloned())
.collect()
}
}
impl Default for TripleStore {
fn default() -> Self {
Self::new()
}
}
/// Convert an RDF term to a string key for indexing
fn term_to_key(term: &RdfTerm) -> String {
match term {
RdfTerm::Iri(iri) => format!("<{}>", iri.as_str()),
RdfTerm::Literal(lit) => {
if let Some(ref lang) = lit.language {
format!("\"{}\"@{}", lit.value, lang)
} else if lit.datatype.as_str() != "http://www.w3.org/2001/XMLSchema#string" {
format!("\"{}\"^^<{}>", lit.value, lit.datatype.as_str())
} else {
format!("\"{}\"", lit.value)
}
}
RdfTerm::BlankNode(id) => format!("_:{}", id),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_insert_and_query() {
let store = TripleStore::new();
let triple = Triple::new(
RdfTerm::iri("http://example.org/person/1"),
Iri::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
RdfTerm::iri("http://example.org/Person"),
);
let id = store.insert(triple.clone());
assert!(id > 0);
let retrieved = store.get(id);
assert!(retrieved.is_some());
assert_eq!(retrieved.unwrap(), triple);
}
#[test]
fn test_query_by_subject() {
let store = TripleStore::new();
let subject = RdfTerm::iri("http://example.org/person/1");
store.insert(Triple::new(
subject.clone(),
Iri::rdf_type(),
RdfTerm::iri("http://example.org/Person"),
));
store.insert(Triple::new(
subject.clone(),
Iri::rdfs_label(),
RdfTerm::literal("Alice"),
));
store.insert(Triple::new(
RdfTerm::iri("http://example.org/person/2"),
Iri::rdf_type(),
RdfTerm::iri("http://example.org/Person"),
));
let results = store.query(Some(&subject), None, None);
assert_eq!(results.len(), 2);
}
#[test]
fn test_statistics() {
let store = TripleStore::new();
store.insert(Triple::new(
RdfTerm::iri("http://example.org/s1"),
Iri::new("http://example.org/p1"),
RdfTerm::literal("o1"),
));
store.insert(Triple::new(
RdfTerm::iri("http://example.org/s2"),
Iri::new("http://example.org/p1"),
RdfTerm::literal("o2"),
));
let stats = store.stats();
assert_eq!(stats.triple_count, 2);
assert_eq!(stats.subject_count, 2);
assert_eq!(stats.predicate_count, 1);
assert_eq!(stats.object_count, 2);
}
}