Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'
This commit is contained in:
533
vendor/ruvector/crates/ruvector-postgres/src/graph/README.md
vendored
Normal file
533
vendor/ruvector/crates/ruvector-postgres/src/graph/README.md
vendored
Normal file
@@ -0,0 +1,533 @@
|
||||
# Graph Operations, Cypher & SPARQL Module
|
||||
|
||||
This module provides graph database capabilities for the ruvector-postgres extension, including graph storage, traversal algorithms, Cypher query support, and W3C-standard SPARQL for RDF data.
|
||||
|
||||
## Features
|
||||
|
||||
- **Concurrent Graph Storage**: Thread-safe graph storage using DashMap
|
||||
- **Node & Edge Management**: Full-featured node and edge storage with properties
|
||||
- **Label Indexing**: Fast node lookups by label
|
||||
- **Adjacency Lists**: Efficient edge traversal with O(1) neighbor access
|
||||
- **Graph Traversal**: BFS, DFS, and Dijkstra's shortest path algorithms
|
||||
- **Cypher Support**: Simplified Cypher query language for graph operations
|
||||
- **SPARQL 1.1 Support**: W3C-standard query language for RDF triple stores
|
||||
- **RDF Triple Store**: Efficient storage with SPO/POS/OSP indexing
|
||||
- **PostgreSQL Integration**: Native pgrx-based PostgreSQL functions
|
||||
|
||||
## Architecture
|
||||
|
||||
### Storage Layer (`storage.rs`)
|
||||
|
||||
```rust
|
||||
// Node with labels and properties
|
||||
pub struct Node {
|
||||
pub id: u64,
|
||||
pub labels: Vec<String>,
|
||||
pub properties: HashMap<String, JsonValue>,
|
||||
}
|
||||
|
||||
// Edge with type and properties
|
||||
pub struct Edge {
|
||||
pub id: u64,
|
||||
pub source: u64,
|
||||
pub target: u64,
|
||||
pub edge_type: String,
|
||||
pub properties: HashMap<String, JsonValue>,
|
||||
}
|
||||
|
||||
// Concurrent storage with indexing
|
||||
pub struct GraphStore {
|
||||
pub nodes: NodeStore, // DashMap-based
|
||||
pub edges: EdgeStore, // DashMap-based
|
||||
}
|
||||
```
|
||||
|
||||
### Traversal Layer (`traversal.rs`)
|
||||
|
||||
Implements common graph algorithms:
|
||||
|
||||
- **BFS**: Breadth-first search for shortest path by hop count
|
||||
- **DFS**: Depth-first search with visitor pattern
|
||||
- **Dijkstra**: Weighted shortest path with custom edge weights
|
||||
- **All Paths**: Find multiple paths between nodes
|
||||
|
||||
### Cypher Layer (`cypher/`)
|
||||
|
||||
Simplified Cypher query language support:
|
||||
|
||||
- **AST** (`ast.rs`): Complete abstract syntax tree for Cypher
|
||||
- **Parser** (`parser.rs`): Basic parser for common Cypher patterns
|
||||
- **Executor** (`executor.rs`): Query execution engine
|
||||
|
||||
Supported Cypher clauses:
|
||||
- `CREATE`: Create nodes and relationships
|
||||
- `MATCH`: Pattern matching
|
||||
- `WHERE`: Filtering
|
||||
- `RETURN`: Result projection
|
||||
- `SET`, `DELETE`, `WITH`: Basic support
|
||||
|
||||
### SPARQL Layer (`sparql/`)
|
||||
|
||||
W3C SPARQL 1.1 implementation for RDF data:
|
||||
|
||||
- **AST** (`ast.rs`): Complete SPARQL abstract syntax tree
|
||||
- **Parser** (`parser.rs`): Full SPARQL 1.1 query parser
|
||||
- **Executor** (`executor.rs`): Query execution with BGP matching, JOINs
|
||||
- **Triple Store** (`triple_store.rs`): Efficient RDF storage with SPO/POS/OSP indexes
|
||||
- **Functions** (`functions.rs`): 50+ built-in SPARQL functions
|
||||
- **Results** (`results.rs`): JSON, XML, CSV, TSV formatters
|
||||
|
||||
Supported SPARQL features:
|
||||
- Query forms: `SELECT`, `CONSTRUCT`, `ASK`, `DESCRIBE`
|
||||
- Graph patterns: `OPTIONAL`, `UNION`, `MINUS`, `FILTER`
|
||||
- Property paths: `/`, `|`, `^`, `*`, `+`, `?`
|
||||
- Aggregates: `COUNT`, `SUM`, `AVG`, `MIN`, `MAX`, `GROUP_CONCAT`
|
||||
- Solution modifiers: `ORDER BY`, `LIMIT`, `OFFSET`, `GROUP BY`, `HAVING`
|
||||
- Update operations: `INSERT DATA`, `DELETE DATA`, `DELETE/INSERT WHERE`
|
||||
|
||||
## PostgreSQL Functions
|
||||
|
||||
### Graph Management
|
||||
|
||||
```sql
|
||||
-- Create a new graph
|
||||
SELECT ruvector_create_graph('my_graph');
|
||||
|
||||
-- List all graphs
|
||||
SELECT ruvector_list_graphs();
|
||||
|
||||
-- Delete a graph
|
||||
SELECT ruvector_delete_graph('my_graph');
|
||||
|
||||
-- Get graph statistics
|
||||
SELECT ruvector_graph_stats('my_graph');
|
||||
-- Returns: {"name": "my_graph", "node_count": 100, "edge_count": 250, ...}
|
||||
```
|
||||
|
||||
### Node Operations
|
||||
|
||||
```sql
|
||||
-- Add a node
|
||||
SELECT ruvector_add_node(
|
||||
'my_graph',
|
||||
ARRAY['Person', 'Employee'], -- Labels
|
||||
'{"name": "Alice", "age": 30, "department": "Engineering"}'::jsonb
|
||||
);
|
||||
-- Returns: node_id (bigint)
|
||||
|
||||
-- Get a node by ID
|
||||
SELECT ruvector_get_node('my_graph', 1);
|
||||
-- Returns: {"id": 1, "labels": ["Person"], "properties": {...}}
|
||||
|
||||
-- Find nodes by label
|
||||
SELECT ruvector_find_nodes_by_label('my_graph', 'Person');
|
||||
-- Returns: array of nodes
|
||||
```
|
||||
|
||||
### Edge Operations
|
||||
|
||||
```sql
|
||||
-- Add an edge
|
||||
SELECT ruvector_add_edge(
|
||||
'my_graph',
|
||||
1, -- source_id
|
||||
2, -- target_id
|
||||
'KNOWS', -- edge_type
|
||||
'{"since": 2020, "weight": 0.8}'::jsonb
|
||||
);
|
||||
-- Returns: edge_id (bigint)
|
||||
|
||||
-- Get an edge by ID
|
||||
SELECT ruvector_get_edge('my_graph', 1);
|
||||
|
||||
-- Get neighbors of a node
|
||||
SELECT ruvector_get_neighbors('my_graph', 1);
|
||||
-- Returns: array of node IDs
|
||||
```
|
||||
|
||||
### Graph Traversal
|
||||
|
||||
```sql
|
||||
-- Find shortest path (unweighted)
|
||||
SELECT ruvector_shortest_path(
|
||||
'my_graph',
|
||||
1, -- start_id
|
||||
10, -- end_id
|
||||
5 -- max_hops
|
||||
);
|
||||
-- Returns: {"nodes": [1, 3, 7, 10], "edges": [12, 45, 89], "length": 4, "cost": 0}
|
||||
|
||||
-- Find weighted shortest path
|
||||
SELECT ruvector_shortest_path_weighted(
|
||||
'my_graph',
|
||||
1, -- start_id
|
||||
10, -- end_id
|
||||
'weight' -- property name for edge weights
|
||||
);
|
||||
-- Returns: {"nodes": [...], "edges": [...], "length": 4, "cost": 2.5}
|
||||
```
|
||||
|
||||
### Cypher Queries
|
||||
|
||||
```sql
|
||||
-- Create nodes
|
||||
SELECT ruvector_cypher(
|
||||
'my_graph',
|
||||
'CREATE (n:Person {name: ''Alice'', age: 30}) RETURN n',
|
||||
NULL
|
||||
);
|
||||
|
||||
-- Match and filter
|
||||
SELECT ruvector_cypher(
|
||||
'my_graph',
|
||||
'MATCH (n:Person) WHERE n.age > 25 RETURN n.name, n.age',
|
||||
NULL
|
||||
);
|
||||
|
||||
-- Parameterized queries
|
||||
SELECT ruvector_cypher(
|
||||
'my_graph',
|
||||
'MATCH (n:Person) WHERE n.name = $name RETURN n',
|
||||
'{"name": "Alice"}'::jsonb
|
||||
);
|
||||
|
||||
-- Create relationships
|
||||
SELECT ruvector_cypher(
|
||||
'my_graph',
|
||||
'CREATE (a:Person {name: ''Alice''})-[:KNOWS {since: 2020}]->(b:Person {name: ''Bob''}) RETURN a, b',
|
||||
NULL
|
||||
);
|
||||
```
|
||||
|
||||
### SPARQL / RDF Operations
|
||||
|
||||
```sql
|
||||
-- Create RDF triple store
|
||||
SELECT ruvector_create_rdf_store('my_knowledge_base');
|
||||
|
||||
-- Insert individual triples
|
||||
SELECT ruvector_insert_triple(
|
||||
'my_knowledge_base',
|
||||
'<http://example.org/person/alice>',
|
||||
'<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>',
|
||||
'<http://example.org/Person>'
|
||||
);
|
||||
|
||||
-- Insert triple into named graph
|
||||
SELECT ruvector_insert_triple_graph(
|
||||
'my_knowledge_base',
|
||||
'<http://example.org/person/alice>',
|
||||
'<http://xmlns.com/foaf/0.1/name>',
|
||||
'"Alice Smith"',
|
||||
'http://example.org/people'
|
||||
);
|
||||
|
||||
-- Bulk load N-Triples format
|
||||
SELECT ruvector_load_ntriples('my_knowledge_base', '
|
||||
<http://example.org/person/bob> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.org/Person> .
|
||||
<http://example.org/person/bob> <http://xmlns.com/foaf/0.1/name> "Bob Jones" .
|
||||
<http://example.org/person/alice> <http://xmlns.com/foaf/0.1/knows> <http://example.org/person/bob> .
|
||||
');
|
||||
|
||||
-- Execute SPARQL SELECT query
|
||||
SELECT ruvector_sparql('my_knowledge_base', '
|
||||
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
|
||||
SELECT ?person ?name
|
||||
WHERE {
|
||||
?person a <http://example.org/Person> .
|
||||
?person foaf:name ?name .
|
||||
}
|
||||
ORDER BY ?name
|
||||
', 'json');
|
||||
|
||||
-- SPARQL ASK query
|
||||
SELECT ruvector_sparql('my_knowledge_base',
|
||||
'ASK { <http://example.org/person/alice> <http://xmlns.com/foaf/0.1/knows> ?friend }',
|
||||
'json'
|
||||
);
|
||||
|
||||
-- Get results as JSONB
|
||||
SELECT ruvector_sparql_json('my_knowledge_base',
|
||||
'SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 10'
|
||||
);
|
||||
|
||||
-- Query triples by pattern (NULL = wildcard)
|
||||
SELECT ruvector_query_triples('my_knowledge_base',
|
||||
'<http://example.org/person/alice>', -- subject
|
||||
NULL, -- any predicate
|
||||
NULL -- any object
|
||||
);
|
||||
|
||||
-- Get store statistics
|
||||
SELECT ruvector_rdf_stats('my_knowledge_base');
|
||||
-- Returns: {"name": "...", "triple_count": 5, "subject_count": 2, ...}
|
||||
|
||||
-- SPARQL UPDATE
|
||||
SELECT ruvector_sparql_update('my_knowledge_base', '
|
||||
INSERT DATA {
|
||||
<http://example.org/person/charlie> <http://xmlns.com/foaf/0.1/name> "Charlie" .
|
||||
}
|
||||
');
|
||||
|
||||
-- Clear store
|
||||
SELECT ruvector_clear_rdf_store('my_knowledge_base');
|
||||
|
||||
-- Delete store
|
||||
SELECT ruvector_delete_rdf_store('my_knowledge_base');
|
||||
|
||||
-- List all stores
|
||||
SELECT ruvector_list_rdf_stores();
|
||||
```
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### Social Network
|
||||
|
||||
```sql
|
||||
-- Create graph
|
||||
SELECT ruvector_create_graph('social_network');
|
||||
|
||||
-- Add users
|
||||
WITH users AS (
|
||||
SELECT ruvector_add_node('social_network', ARRAY['Person'],
|
||||
jsonb_build_object('name', name, 'age', age))
|
||||
FROM (VALUES
|
||||
('Alice', 30),
|
||||
('Bob', 25),
|
||||
('Charlie', 35),
|
||||
('Diana', 28)
|
||||
) AS t(name, age)
|
||||
)
|
||||
|
||||
-- Create friendships
|
||||
SELECT ruvector_add_edge('social_network', 1, 2, 'FRIENDS',
|
||||
'{"since": "2020-01-15"}'::jsonb);
|
||||
SELECT ruvector_add_edge('social_network', 2, 3, 'FRIENDS',
|
||||
'{"since": "2019-06-20"}'::jsonb);
|
||||
SELECT ruvector_add_edge('social_network', 1, 4, 'FRIENDS',
|
||||
'{"since": "2021-03-10"}'::jsonb);
|
||||
|
||||
-- Find connection between Alice and Charlie
|
||||
SELECT ruvector_shortest_path('social_network', 1, 3, 10);
|
||||
|
||||
-- Cypher: Find all friends of friends
|
||||
SELECT ruvector_cypher(
|
||||
'social_network',
|
||||
'MATCH (a:Person)-[:FRIENDS]->(b:Person)-[:FRIENDS]->(c:Person)
|
||||
WHERE a.name = ''Alice'' RETURN c.name',
|
||||
NULL
|
||||
);
|
||||
```
|
||||
|
||||
### SPARQL Knowledge Graph
|
||||
|
||||
```sql
|
||||
-- Create RDF knowledge graph
|
||||
SELECT ruvector_create_rdf_store('dbpedia_subset');
|
||||
|
||||
-- Load sample data
|
||||
SELECT ruvector_load_ntriples('dbpedia_subset', '
|
||||
<http://dbpedia.org/resource/Albert_Einstein> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Scientist> .
|
||||
<http://dbpedia.org/resource/Albert_Einstein> <http://xmlns.com/foaf/0.1/name> "Albert Einstein" .
|
||||
<http://dbpedia.org/resource/Albert_Einstein> <http://dbpedia.org/ontology/birthPlace> <http://dbpedia.org/resource/Ulm> .
|
||||
<http://dbpedia.org/resource/Albert_Einstein> <http://dbpedia.org/ontology/field> <http://dbpedia.org/resource/Physics> .
|
||||
<http://dbpedia.org/resource/Marie_Curie> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://dbpedia.org/ontology/Scientist> .
|
||||
<http://dbpedia.org/resource/Marie_Curie> <http://xmlns.com/foaf/0.1/name> "Marie Curie" .
|
||||
<http://dbpedia.org/resource/Marie_Curie> <http://dbpedia.org/ontology/field> <http://dbpedia.org/resource/Physics> .
|
||||
');
|
||||
|
||||
-- Find all scientists in physics
|
||||
SELECT ruvector_sparql('dbpedia_subset', '
|
||||
PREFIX dbo: <http://dbpedia.org/ontology/>
|
||||
PREFIX dbr: <http://dbpedia.org/resource/>
|
||||
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
|
||||
|
||||
SELECT ?name
|
||||
WHERE {
|
||||
?person a dbo:Scientist .
|
||||
?person dbo:field dbr:Physics .
|
||||
?person foaf:name ?name .
|
||||
}
|
||||
', 'json');
|
||||
|
||||
-- Check if Einstein was a scientist
|
||||
SELECT ruvector_sparql('dbpedia_subset', '
|
||||
PREFIX dbo: <http://dbpedia.org/ontology/>
|
||||
PREFIX dbr: <http://dbpedia.org/resource/>
|
||||
|
||||
ASK { dbr:Albert_Einstein a dbo:Scientist }
|
||||
', 'json');
|
||||
|
||||
-- Get all properties of Einstein
|
||||
SELECT ruvector_query_triples('dbpedia_subset',
|
||||
'<http://dbpedia.org/resource/Albert_Einstein>',
|
||||
NULL,
|
||||
NULL
|
||||
);
|
||||
```
|
||||
|
||||
### Knowledge Graph
|
||||
|
||||
```sql
|
||||
-- Create knowledge graph
|
||||
SELECT ruvector_create_graph('knowledge');
|
||||
|
||||
-- Add concepts
|
||||
SELECT ruvector_add_node('knowledge', ARRAY['Concept'],
|
||||
'{"name": "Machine Learning", "category": "AI"}'::jsonb);
|
||||
SELECT ruvector_add_node('knowledge', ARRAY['Concept'],
|
||||
'{"name": "Neural Networks", "category": "AI"}'::jsonb);
|
||||
SELECT ruvector_add_node('knowledge', ARRAY['Concept'],
|
||||
'{"name": "Deep Learning", "category": "AI"}'::jsonb);
|
||||
|
||||
-- Create relationships
|
||||
SELECT ruvector_add_edge('knowledge', 1, 2, 'INCLUDES',
|
||||
'{"strength": 0.9}'::jsonb);
|
||||
SELECT ruvector_add_edge('knowledge', 2, 3, 'SPECIALIZES_IN',
|
||||
'{"strength": 0.95}'::jsonb);
|
||||
|
||||
-- Find weighted path
|
||||
SELECT ruvector_shortest_path_weighted('knowledge', 1, 3, 'strength');
|
||||
```
|
||||
|
||||
### Recommendation System
|
||||
|
||||
```sql
|
||||
-- Create graph
|
||||
SELECT ruvector_create_graph('recommendations');
|
||||
|
||||
-- Add users and items
|
||||
SELECT ruvector_cypher('recommendations',
|
||||
'CREATE (u:User {name: ''Alice''})
|
||||
CREATE (m1:Movie {title: ''Inception''})
|
||||
CREATE (m2:Movie {title: ''Interstellar''})
|
||||
CREATE (u)-[:WATCHED {rating: 5}]->(m1)
|
||||
CREATE (u)-[:WATCHED {rating: 4}]->(m2)
|
||||
RETURN u, m1, m2',
|
||||
NULL
|
||||
);
|
||||
|
||||
-- Find similar users or items
|
||||
SELECT ruvector_cypher('recommendations',
|
||||
'MATCH (u1:User)-[:WATCHED]->(m:Movie)<-[:WATCHED]-(u2:User)
|
||||
WHERE u1.name = ''Alice''
|
||||
RETURN u2.name, COUNT(m) AS common_movies
|
||||
ORDER BY common_movies DESC',
|
||||
NULL
|
||||
);
|
||||
```
|
||||
|
||||
## Performance Characteristics
|
||||
|
||||
### Storage
|
||||
|
||||
- **Node Lookup**: O(1) by ID, O(k) by label (k = nodes with label)
|
||||
- **Edge Lookup**: O(1) by ID, O(d) for neighbors (d = degree)
|
||||
- **Concurrent Access**: Lock-free reads, minimal contention on writes
|
||||
|
||||
### Traversal
|
||||
|
||||
- **BFS**: O(V + E) time, O(V) space
|
||||
- **DFS**: O(V + E) time, O(h) space (h = max depth)
|
||||
- **Dijkstra**: O((V + E) log V) time with binary heap
|
||||
|
||||
### Scalability
|
||||
|
||||
- Thread-safe concurrent operations
|
||||
- Memory-efficient adjacency lists
|
||||
- Label and type indexing for fast filtering
|
||||
|
||||
## Implementation Details
|
||||
|
||||
### Concurrent Storage
|
||||
|
||||
Uses `DashMap` for lock-free concurrent access:
|
||||
|
||||
```rust
|
||||
pub struct NodeStore {
|
||||
nodes: DashMap<u64, Node>,
|
||||
label_index: DashMap<String, HashSet<u64>>,
|
||||
next_id: AtomicU64,
|
||||
}
|
||||
```
|
||||
|
||||
### Graph Registry
|
||||
|
||||
Global registry for named graphs:
|
||||
|
||||
```rust
|
||||
static GRAPH_REGISTRY: Lazy<DashMap<String, Arc<GraphStore>>> = ...
|
||||
```
|
||||
|
||||
### Cypher Parser
|
||||
|
||||
Basic recursive descent parser:
|
||||
- Handles common patterns: `(n:Label {prop: value})`
|
||||
- Relationship patterns: `-[:TYPE]->`, `<-[:TYPE]-`
|
||||
- WHERE conditions, RETURN projections
|
||||
- Property extraction and type inference
|
||||
|
||||
## Limitations
|
||||
|
||||
### Current Parser Limitations
|
||||
|
||||
The Cypher parser is simplified for demonstration:
|
||||
- No support for complex WHERE conditions (AND/OR)
|
||||
- Limited expression support (basic comparisons only)
|
||||
- No aggregation functions (COUNT, SUM, etc.)
|
||||
- No ORDER BY or GROUP BY clauses
|
||||
- Basic pattern matching only
|
||||
|
||||
### Production Recommendations
|
||||
|
||||
For production use, consider:
|
||||
- Using a proper parser library (nom, pest, lalrpop)
|
||||
- Adding comprehensive error messages
|
||||
- Implementing full Cypher specification
|
||||
- Query optimization and planning
|
||||
- Transaction support
|
||||
- Persistence layer
|
||||
|
||||
## Testing
|
||||
|
||||
Comprehensive test suite included:
|
||||
|
||||
```bash
|
||||
# Run all tests
|
||||
cargo pgrx test
|
||||
|
||||
# Run specific test
|
||||
cargo pgrx test test_create_graph
|
||||
```
|
||||
|
||||
Test coverage:
|
||||
- Node and edge CRUD operations
|
||||
- Graph traversal algorithms
|
||||
- Cypher query execution
|
||||
- PostgreSQL function integration
|
||||
- Concurrent access patterns
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
- [x] SPARQL 1.1 query support
|
||||
- [x] RDF triple store with indexing
|
||||
- [ ] Graph analytics (PageRank, community detection)
|
||||
- [ ] Temporal graphs (time-aware edges)
|
||||
- [ ] Property graph constraints
|
||||
- [ ] Full-text search on properties
|
||||
- [ ] Persistent storage backend
|
||||
- [ ] Query optimization
|
||||
- [ ] Distributed graph support
|
||||
- [ ] GraphQL interface
|
||||
- [ ] SPARQL federated queries
|
||||
- [ ] OWL/RDFS reasoning
|
||||
|
||||
## References
|
||||
|
||||
- [Cypher Query Language](https://neo4j.com/developer/cypher/)
|
||||
- [Property Graph Model](https://en.wikipedia.org/wiki/Graph_database#Labeled-property_graph)
|
||||
- [Graph Algorithms](https://en.wikipedia.org/wiki/Graph_traversal)
|
||||
- [SPARQL 1.1 Query Language](https://www.w3.org/TR/sparql11-query/)
|
||||
- [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/)
|
||||
- [RDF 1.1 Concepts](https://www.w3.org/TR/rdf11-concepts/)
|
||||
- [pgrx Documentation](https://github.com/pgcentralfoundation/pgrx)
|
||||
359
vendor/ruvector/crates/ruvector-postgres/src/graph/cypher/ast.rs
vendored
Normal file
359
vendor/ruvector/crates/ruvector-postgres/src/graph/cypher/ast.rs
vendored
Normal file
@@ -0,0 +1,359 @@
|
||||
// Cypher AST (Abstract Syntax Tree) types
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value as JsonValue;
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Complete Cypher query
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CypherQuery {
|
||||
pub clauses: Vec<Clause>,
|
||||
}
|
||||
|
||||
impl CypherQuery {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
clauses: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_clause(mut self, clause: Clause) -> Self {
|
||||
self.clauses.push(clause);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for CypherQuery {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Query clause
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum Clause {
|
||||
Match(MatchClause),
|
||||
Create(CreateClause),
|
||||
Return(ReturnClause),
|
||||
Where(WhereClause),
|
||||
Set(SetClause),
|
||||
Delete(DeleteClause),
|
||||
With(WithClause),
|
||||
}
|
||||
|
||||
/// MATCH clause
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct MatchClause {
|
||||
pub patterns: Vec<Pattern>,
|
||||
pub optional: bool,
|
||||
}
|
||||
|
||||
impl MatchClause {
|
||||
pub fn new(patterns: Vec<Pattern>) -> Self {
|
||||
Self {
|
||||
patterns,
|
||||
optional: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn optional(mut self) -> Self {
|
||||
self.optional = true;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// CREATE clause
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CreateClause {
|
||||
pub patterns: Vec<Pattern>,
|
||||
}
|
||||
|
||||
impl CreateClause {
|
||||
pub fn new(patterns: Vec<Pattern>) -> Self {
|
||||
Self { patterns }
|
||||
}
|
||||
}
|
||||
|
||||
/// RETURN clause
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ReturnClause {
|
||||
pub items: Vec<ReturnItem>,
|
||||
pub distinct: bool,
|
||||
pub limit: Option<usize>,
|
||||
pub skip: Option<usize>,
|
||||
}
|
||||
|
||||
impl ReturnClause {
|
||||
pub fn new(items: Vec<ReturnItem>) -> Self {
|
||||
Self {
|
||||
items,
|
||||
distinct: false,
|
||||
limit: None,
|
||||
skip: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn distinct(mut self) -> Self {
|
||||
self.distinct = true;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn limit(mut self, limit: usize) -> Self {
|
||||
self.limit = Some(limit);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn skip(mut self, skip: usize) -> Self {
|
||||
self.skip = Some(skip);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ReturnItem {
|
||||
pub expression: Expression,
|
||||
pub alias: Option<String>,
|
||||
}
|
||||
|
||||
impl ReturnItem {
|
||||
pub fn new(expression: Expression) -> Self {
|
||||
Self {
|
||||
expression,
|
||||
alias: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_alias(mut self, alias: impl Into<String>) -> Self {
|
||||
self.alias = Some(alias.into());
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// WHERE clause
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct WhereClause {
|
||||
pub condition: Expression,
|
||||
}
|
||||
|
||||
impl WhereClause {
|
||||
pub fn new(condition: Expression) -> Self {
|
||||
Self { condition }
|
||||
}
|
||||
}
|
||||
|
||||
/// SET clause
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SetClause {
|
||||
pub items: Vec<SetItem>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SetItem {
|
||||
pub variable: String,
|
||||
pub property: String,
|
||||
pub value: Expression,
|
||||
}
|
||||
|
||||
/// DELETE clause
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DeleteClause {
|
||||
pub items: Vec<String>,
|
||||
pub detach: bool,
|
||||
}
|
||||
|
||||
/// WITH clause
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct WithClause {
|
||||
pub items: Vec<ReturnItem>,
|
||||
}
|
||||
|
||||
/// Graph pattern (node)-[relationship]->(node)
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Pattern {
|
||||
pub elements: Vec<PatternElement>,
|
||||
}
|
||||
|
||||
impl Pattern {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
elements: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_element(mut self, element: PatternElement) -> Self {
|
||||
self.elements.push(element);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Pattern {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum PatternElement {
|
||||
Node(NodePattern),
|
||||
Relationship(RelationshipPattern),
|
||||
}
|
||||
|
||||
/// Node pattern (n:Label {property: value})
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct NodePattern {
|
||||
pub variable: Option<String>,
|
||||
pub labels: Vec<String>,
|
||||
pub properties: HashMap<String, Expression>,
|
||||
}
|
||||
|
||||
impl NodePattern {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
variable: None,
|
||||
labels: Vec::new(),
|
||||
properties: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_variable(mut self, variable: impl Into<String>) -> Self {
|
||||
self.variable = Some(variable.into());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_label(mut self, label: impl Into<String>) -> Self {
|
||||
self.labels.push(label.into());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_property(mut self, key: impl Into<String>, value: Expression) -> Self {
|
||||
self.properties.insert(key.into(), value);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for NodePattern {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Relationship pattern -[r:TYPE {property: value}]->
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RelationshipPattern {
|
||||
pub variable: Option<String>,
|
||||
pub rel_type: Option<String>,
|
||||
pub properties: HashMap<String, Expression>,
|
||||
pub direction: Direction,
|
||||
pub min_hops: Option<usize>,
|
||||
pub max_hops: Option<usize>,
|
||||
}
|
||||
|
||||
impl RelationshipPattern {
|
||||
pub fn new(direction: Direction) -> Self {
|
||||
Self {
|
||||
variable: None,
|
||||
rel_type: None,
|
||||
properties: HashMap::new(),
|
||||
direction,
|
||||
min_hops: None,
|
||||
max_hops: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_variable(mut self, variable: impl Into<String>) -> Self {
|
||||
self.variable = Some(variable.into());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_type(mut self, rel_type: impl Into<String>) -> Self {
|
||||
self.rel_type = Some(rel_type.into());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_property(mut self, key: impl Into<String>, value: Expression) -> Self {
|
||||
self.properties.insert(key.into(), value);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_hops(mut self, min: usize, max: usize) -> Self {
|
||||
self.min_hops = Some(min);
|
||||
self.max_hops = Some(max);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum Direction {
|
||||
Outgoing, // ->
|
||||
Incoming, // <-
|
||||
Both, // -
|
||||
}
|
||||
|
||||
/// Expression in Cypher
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum Expression {
|
||||
Literal(JsonValue),
|
||||
Variable(String),
|
||||
Property(String, String), // variable.property
|
||||
Parameter(String), // $param
|
||||
FunctionCall(String, Vec<Expression>),
|
||||
BinaryOp(Box<Expression>, BinaryOperator, Box<Expression>),
|
||||
UnaryOp(UnaryOperator, Box<Expression>),
|
||||
}
|
||||
|
||||
impl Expression {
|
||||
pub fn literal(value: impl Into<JsonValue>) -> Self {
|
||||
Self::Literal(value.into())
|
||||
}
|
||||
|
||||
pub fn variable(name: impl Into<String>) -> Self {
|
||||
Self::Variable(name.into())
|
||||
}
|
||||
|
||||
pub fn property(var: impl Into<String>, prop: impl Into<String>) -> Self {
|
||||
Self::Property(var.into(), prop.into())
|
||||
}
|
||||
|
||||
pub fn parameter(name: impl Into<String>) -> Self {
|
||||
Self::Parameter(name.into())
|
||||
}
|
||||
|
||||
pub fn function(name: impl Into<String>, args: Vec<Expression>) -> Self {
|
||||
Self::FunctionCall(name.into(), args)
|
||||
}
|
||||
|
||||
pub fn binary(left: Expression, op: BinaryOperator, right: Expression) -> Self {
|
||||
Self::BinaryOp(Box::new(left), op, Box::new(right))
|
||||
}
|
||||
|
||||
pub fn unary(op: UnaryOperator, expr: Expression) -> Self {
|
||||
Self::UnaryOp(op, Box::new(expr))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum BinaryOperator {
|
||||
Eq, // =
|
||||
Neq, // <>
|
||||
Lt, // <
|
||||
Lte, // <=
|
||||
Gt, // >
|
||||
Gte, // >=
|
||||
And, // AND
|
||||
Or, // OR
|
||||
Add, // +
|
||||
Sub, // -
|
||||
Mul, // *
|
||||
Div, // /
|
||||
Mod, // %
|
||||
In, // IN
|
||||
Contains, // CONTAINS
|
||||
StartsWith, // STARTS WITH
|
||||
EndsWith, // ENDS WITH
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum UnaryOperator {
|
||||
Not, // NOT
|
||||
Minus, // -
|
||||
}
|
||||
497
vendor/ruvector/crates/ruvector-postgres/src/graph/cypher/executor.rs
vendored
Normal file
497
vendor/ruvector/crates/ruvector-postgres/src/graph/cypher/executor.rs
vendored
Normal file
@@ -0,0 +1,497 @@
|
||||
// Cypher query executor
|
||||
|
||||
use super::ast::*;
|
||||
use crate::graph::storage::GraphStore;
|
||||
use serde_json::{json, Value as JsonValue};
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Execute a parsed Cypher query
|
||||
pub fn execute_cypher(
|
||||
graph: &GraphStore,
|
||||
query: &CypherQuery,
|
||||
params: Option<&JsonValue>,
|
||||
) -> Result<JsonValue, String> {
|
||||
let mut context = ExecutionContext::new(params);
|
||||
|
||||
for clause in &query.clauses {
|
||||
match clause {
|
||||
Clause::Match(m) => execute_match(graph, m, &mut context)?,
|
||||
Clause::Create(c) => execute_create(graph, c, &mut context)?,
|
||||
Clause::Return(r) => return execute_return(graph, r, &context),
|
||||
Clause::Where(w) => execute_where(graph, w, &mut context)?,
|
||||
Clause::Set(s) => execute_set(graph, s, &mut context)?,
|
||||
Clause::Delete(d) => execute_delete(graph, d, &mut context)?,
|
||||
Clause::With(w) => execute_with(graph, w, &mut context)?,
|
||||
}
|
||||
}
|
||||
|
||||
// If no RETURN clause, return empty result
|
||||
Ok(json!([]))
|
||||
}
|
||||
|
||||
/// Execution context holding variable bindings
|
||||
struct ExecutionContext<'a> {
|
||||
bindings: Vec<HashMap<String, Binding>>,
|
||||
params: Option<&'a JsonValue>,
|
||||
}
|
||||
|
||||
impl<'a> ExecutionContext<'a> {
|
||||
fn new(params: Option<&'a JsonValue>) -> Self {
|
||||
Self {
|
||||
bindings: vec![HashMap::new()],
|
||||
params,
|
||||
}
|
||||
}
|
||||
|
||||
fn bind(&mut self, var: &str, binding: Binding) {
|
||||
if let Some(last) = self.bindings.last_mut() {
|
||||
last.insert(var.to_string(), binding);
|
||||
}
|
||||
}
|
||||
|
||||
fn get(&self, var: &str) -> Option<&Binding> {
|
||||
for bindings in self.bindings.iter().rev() {
|
||||
if let Some(binding) = bindings.get(var) {
|
||||
return Some(binding);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn get_param(&self, name: &str) -> Option<&JsonValue> {
|
||||
self.params.and_then(|p| p.get(name))
|
||||
}
|
||||
|
||||
fn push_scope(&mut self) {
|
||||
self.bindings.push(HashMap::new());
|
||||
}
|
||||
|
||||
fn pop_scope(&mut self) {
|
||||
self.bindings.pop();
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
enum Binding {
|
||||
Node(u64),
|
||||
Edge(u64),
|
||||
Value(JsonValue),
|
||||
}
|
||||
|
||||
fn execute_match(
|
||||
graph: &GraphStore,
|
||||
match_clause: &MatchClause,
|
||||
context: &mut ExecutionContext,
|
||||
) -> Result<(), String> {
|
||||
for pattern in &match_clause.patterns {
|
||||
match_pattern(graph, pattern, context)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn match_pattern(
|
||||
graph: &GraphStore,
|
||||
pattern: &Pattern,
|
||||
context: &mut ExecutionContext,
|
||||
) -> Result<(), String> {
|
||||
// Simple implementation: match nodes by label and properties
|
||||
for element in &pattern.elements {
|
||||
match element {
|
||||
PatternElement::Node(node_pattern) => {
|
||||
match_node(graph, node_pattern, context)?;
|
||||
}
|
||||
PatternElement::Relationship(rel_pattern) => {
|
||||
match_relationship(graph, rel_pattern, context)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn match_node(
|
||||
graph: &GraphStore,
|
||||
pattern: &NodePattern,
|
||||
context: &mut ExecutionContext,
|
||||
) -> Result<(), String> {
|
||||
// Find nodes matching labels and properties
|
||||
let candidates = if pattern.labels.is_empty() {
|
||||
graph.nodes.all_nodes()
|
||||
} else {
|
||||
// Find by first label
|
||||
graph.nodes.find_by_label(&pattern.labels[0])
|
||||
};
|
||||
|
||||
for node in candidates {
|
||||
// Check additional labels
|
||||
if !pattern.labels.iter().all(|l| node.has_label(l)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check properties
|
||||
let matches_props = pattern.properties.iter().all(|(key, expr)| {
|
||||
if let Some(node_value) = node.get_property(key) {
|
||||
if let Expression::Literal(expected) = expr {
|
||||
node_value == expected
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
});
|
||||
|
||||
if matches_props {
|
||||
if let Some(var) = &pattern.variable {
|
||||
context.bind(var, Binding::Node(node.id));
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn match_relationship(
|
||||
_graph: &GraphStore,
|
||||
_pattern: &RelationshipPattern,
|
||||
_context: &mut ExecutionContext,
|
||||
) -> Result<(), String> {
|
||||
// Simplified relationship matching
|
||||
// Production code would traverse the graph based on relationship pattern
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn execute_create(
|
||||
graph: &GraphStore,
|
||||
create_clause: &CreateClause,
|
||||
context: &mut ExecutionContext,
|
||||
) -> Result<(), String> {
|
||||
for pattern in &create_clause.patterns {
|
||||
create_pattern(graph, pattern, context)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn create_pattern(
|
||||
graph: &GraphStore,
|
||||
pattern: &Pattern,
|
||||
context: &mut ExecutionContext,
|
||||
) -> Result<(), String> {
|
||||
let mut last_node_id: Option<u64> = None;
|
||||
|
||||
for element in &pattern.elements {
|
||||
match element {
|
||||
PatternElement::Node(node_pattern) => {
|
||||
let node_id = create_node(graph, node_pattern, context)?;
|
||||
last_node_id = Some(node_id);
|
||||
|
||||
if let Some(var) = &node_pattern.variable {
|
||||
context.bind(var, Binding::Node(node_id));
|
||||
}
|
||||
}
|
||||
PatternElement::Relationship(rel_pattern) => {
|
||||
if let Some(source_id) = last_node_id {
|
||||
// For CREATE, we need to get the target node from context or create it
|
||||
// This is simplified - production code would handle more complex patterns
|
||||
let edge_id = create_relationship(graph, rel_pattern, source_id, context)?;
|
||||
|
||||
if let Some(var) = &rel_pattern.variable {
|
||||
context.bind(var, Binding::Edge(edge_id));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn create_node(
|
||||
graph: &GraphStore,
|
||||
pattern: &NodePattern,
|
||||
context: &ExecutionContext,
|
||||
) -> Result<u64, String> {
|
||||
let mut properties = HashMap::new();
|
||||
|
||||
for (key, expr) in &pattern.properties {
|
||||
let value = evaluate_expression(expr, context)?;
|
||||
properties.insert(key.clone(), value);
|
||||
}
|
||||
|
||||
let node_id = graph.add_node(pattern.labels.clone(), properties);
|
||||
Ok(node_id)
|
||||
}
|
||||
|
||||
fn create_relationship(
|
||||
graph: &GraphStore,
|
||||
pattern: &RelationshipPattern,
|
||||
source_id: u64,
|
||||
context: &ExecutionContext,
|
||||
) -> Result<u64, String> {
|
||||
// Simplified: assumes target node is bound in context
|
||||
// Production code would handle more complex patterns
|
||||
|
||||
let mut properties = HashMap::new();
|
||||
|
||||
for (key, expr) in &pattern.properties {
|
||||
let value = evaluate_expression(expr, context)?;
|
||||
properties.insert(key.clone(), value);
|
||||
}
|
||||
|
||||
let edge_type = pattern
|
||||
.rel_type
|
||||
.clone()
|
||||
.unwrap_or_else(|| "RELATED".to_string());
|
||||
|
||||
// For now, create a self-loop. Production code would get target from pattern
|
||||
let target_id = source_id;
|
||||
|
||||
graph.add_edge(source_id, target_id, edge_type, properties)
|
||||
}
|
||||
|
||||
fn execute_return(
|
||||
graph: &GraphStore,
|
||||
return_clause: &ReturnClause,
|
||||
context: &ExecutionContext,
|
||||
) -> Result<JsonValue, String> {
|
||||
let mut results = Vec::new();
|
||||
|
||||
// If no bindings, return empty
|
||||
if context.bindings.is_empty() || context.bindings[0].is_empty() {
|
||||
return Ok(json!([]));
|
||||
}
|
||||
|
||||
// For each binding combination
|
||||
for bindings in &context.bindings {
|
||||
if bindings.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut row = serde_json::Map::new();
|
||||
|
||||
for item in &return_clause.items {
|
||||
let value = evaluate_return_item(graph, item, bindings)?;
|
||||
let key = item.alias.clone().unwrap_or_else(|| {
|
||||
// Generate key from expression
|
||||
match &item.expression {
|
||||
Expression::Variable(v) => v.clone(),
|
||||
Expression::Property(v, p) => format!("{}.{}", v, p),
|
||||
_ => "result".to_string(),
|
||||
}
|
||||
});
|
||||
|
||||
row.insert(key, value);
|
||||
}
|
||||
|
||||
results.push(JsonValue::Object(row));
|
||||
}
|
||||
|
||||
// Apply DISTINCT
|
||||
if return_clause.distinct {
|
||||
results.sort_by(|a, b| a.to_string().cmp(&b.to_string()));
|
||||
results.dedup();
|
||||
}
|
||||
|
||||
// Apply SKIP
|
||||
if let Some(skip) = return_clause.skip {
|
||||
results = results.into_iter().skip(skip).collect();
|
||||
}
|
||||
|
||||
// Apply LIMIT
|
||||
if let Some(limit) = return_clause.limit {
|
||||
results.truncate(limit);
|
||||
}
|
||||
|
||||
Ok(JsonValue::Array(results))
|
||||
}
|
||||
|
||||
fn evaluate_return_item(
|
||||
graph: &GraphStore,
|
||||
item: &ReturnItem,
|
||||
bindings: &HashMap<String, Binding>,
|
||||
) -> Result<JsonValue, String> {
|
||||
match &item.expression {
|
||||
Expression::Variable(var) => {
|
||||
if let Some(binding) = bindings.get(var) {
|
||||
match binding {
|
||||
Binding::Node(id) => {
|
||||
if let Some(node) = graph.nodes.get(*id) {
|
||||
Ok(serde_json::to_value(&node).unwrap())
|
||||
} else {
|
||||
Ok(JsonValue::Null)
|
||||
}
|
||||
}
|
||||
Binding::Edge(id) => {
|
||||
if let Some(edge) = graph.edges.get(*id) {
|
||||
Ok(serde_json::to_value(&edge).unwrap())
|
||||
} else {
|
||||
Ok(JsonValue::Null)
|
||||
}
|
||||
}
|
||||
Binding::Value(v) => Ok(v.clone()),
|
||||
}
|
||||
} else {
|
||||
Ok(JsonValue::Null)
|
||||
}
|
||||
}
|
||||
Expression::Property(var, prop) => {
|
||||
if let Some(Binding::Node(id)) = bindings.get(var) {
|
||||
if let Some(node) = graph.nodes.get(*id) {
|
||||
Ok(node.get_property(prop).cloned().unwrap_or(JsonValue::Null))
|
||||
} else {
|
||||
Ok(JsonValue::Null)
|
||||
}
|
||||
} else {
|
||||
Ok(JsonValue::Null)
|
||||
}
|
||||
}
|
||||
Expression::Literal(value) => Ok(value.clone()),
|
||||
_ => Err("Unsupported return expression".to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
fn execute_where(
|
||||
_graph: &GraphStore,
|
||||
where_clause: &WhereClause,
|
||||
context: &mut ExecutionContext,
|
||||
) -> Result<(), String> {
|
||||
// Evaluate WHERE condition and filter bindings
|
||||
// Simplified implementation
|
||||
let result = evaluate_expression(&where_clause.condition, context)?;
|
||||
|
||||
if !result.as_bool().unwrap_or(false) {
|
||||
// Clear bindings if condition is false
|
||||
if let Some(last) = context.bindings.last_mut() {
|
||||
last.clear();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn execute_set(
|
||||
_graph: &GraphStore,
|
||||
_set_clause: &SetClause,
|
||||
_context: &mut ExecutionContext,
|
||||
) -> Result<(), String> {
|
||||
// Simplified SET implementation
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn execute_delete(
|
||||
_graph: &GraphStore,
|
||||
_delete_clause: &DeleteClause,
|
||||
_context: &mut ExecutionContext,
|
||||
) -> Result<(), String> {
|
||||
// Simplified DELETE implementation
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn execute_with(
|
||||
_graph: &GraphStore,
|
||||
_with_clause: &WithClause,
|
||||
_context: &mut ExecutionContext,
|
||||
) -> Result<(), String> {
|
||||
// Simplified WITH implementation
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate_expression(expr: &Expression, context: &ExecutionContext) -> Result<JsonValue, String> {
|
||||
match expr {
|
||||
Expression::Literal(value) => Ok(value.clone()),
|
||||
Expression::Variable(var) => {
|
||||
if let Some(binding) = context.get(var) {
|
||||
match binding {
|
||||
Binding::Value(v) => Ok(v.clone()),
|
||||
Binding::Node(id) => Ok(json!({ "id": id })),
|
||||
Binding::Edge(id) => Ok(json!({ "id": id })),
|
||||
}
|
||||
} else {
|
||||
Ok(JsonValue::Null)
|
||||
}
|
||||
}
|
||||
Expression::Parameter(name) => {
|
||||
Ok(context.get_param(name).cloned().unwrap_or(JsonValue::Null))
|
||||
}
|
||||
Expression::BinaryOp(left, op, right) => {
|
||||
let left_val = evaluate_expression(left, context)?;
|
||||
let right_val = evaluate_expression(right, context)?;
|
||||
|
||||
match op {
|
||||
BinaryOperator::Eq => Ok(json!(left_val == right_val)),
|
||||
BinaryOperator::Neq => Ok(json!(left_val != right_val)),
|
||||
BinaryOperator::Lt => {
|
||||
if let (Some(l), Some(r)) = (left_val.as_f64(), right_val.as_f64()) {
|
||||
Ok(json!(l < r))
|
||||
} else {
|
||||
Ok(json!(false))
|
||||
}
|
||||
}
|
||||
BinaryOperator::Gt => {
|
||||
if let (Some(l), Some(r)) = (left_val.as_f64(), right_val.as_f64()) {
|
||||
Ok(json!(l > r))
|
||||
} else {
|
||||
Ok(json!(false))
|
||||
}
|
||||
}
|
||||
_ => Err(format!("Unsupported binary operator: {:?}", op)),
|
||||
}
|
||||
}
|
||||
_ => Err("Unsupported expression type".to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_execute_create() {
|
||||
let graph = GraphStore::new();
|
||||
|
||||
let pattern = Pattern::new().with_element(PatternElement::Node(
|
||||
NodePattern::new()
|
||||
.with_variable("n")
|
||||
.with_label("Person")
|
||||
.with_property("name", Expression::literal("Alice")),
|
||||
));
|
||||
|
||||
let create = CreateClause::new(vec![pattern]);
|
||||
let query = CypherQuery::new()
|
||||
.with_clause(Clause::Create(create))
|
||||
.with_clause(Clause::Return(ReturnClause::new(vec![ReturnItem::new(
|
||||
Expression::variable("n"),
|
||||
)])));
|
||||
|
||||
let result = execute_cypher(&graph, &query, None);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let json = result.unwrap();
|
||||
assert!(json.is_array());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_execute_match() {
|
||||
let graph = GraphStore::new();
|
||||
|
||||
// Create a node first
|
||||
graph.add_node(
|
||||
vec!["Person".to_string()],
|
||||
HashMap::from([("name".to_string(), "Alice".into())]),
|
||||
);
|
||||
|
||||
let pattern = Pattern::new().with_element(PatternElement::Node(
|
||||
NodePattern::new().with_variable("n").with_label("Person"),
|
||||
));
|
||||
|
||||
let match_clause = MatchClause::new(vec![pattern]);
|
||||
let query = CypherQuery::new()
|
||||
.with_clause(Clause::Match(match_clause))
|
||||
.with_clause(Clause::Return(ReturnClause::new(vec![ReturnItem::new(
|
||||
Expression::property("n", "name"),
|
||||
)])));
|
||||
|
||||
let result = execute_cypher(&graph, &query, None);
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
}
|
||||
64
vendor/ruvector/crates/ruvector-postgres/src/graph/cypher/mod.rs
vendored
Normal file
64
vendor/ruvector/crates/ruvector-postgres/src/graph/cypher/mod.rs
vendored
Normal file
@@ -0,0 +1,64 @@
|
||||
// Simplified Cypher query support
|
||||
|
||||
pub mod ast;
|
||||
pub mod executor;
|
||||
pub mod parser;
|
||||
|
||||
pub use ast::*;
|
||||
pub use executor::execute_cypher;
|
||||
pub use parser::parse_cypher;
|
||||
|
||||
use super::storage::GraphStore;
|
||||
use serde_json::Value as JsonValue;
|
||||
|
||||
/// Execute a Cypher query against a graph
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `graph` - The graph to query
|
||||
/// * `query` - Cypher query string
|
||||
/// * `params` - Query parameters as JSON
|
||||
///
|
||||
/// # Returns
|
||||
/// Query results as JSON array
|
||||
pub fn query(
|
||||
graph: &GraphStore,
|
||||
query: &str,
|
||||
params: Option<JsonValue>,
|
||||
) -> Result<JsonValue, String> {
|
||||
let parsed = parse_cypher(query)?;
|
||||
execute_cypher(graph, &parsed, params.as_ref())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[test]
|
||||
fn test_cypher_create() {
|
||||
let graph = GraphStore::new();
|
||||
|
||||
let result = query(&graph, "CREATE (n:Person {name: 'Alice'}) RETURN n", None);
|
||||
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cypher_match() {
|
||||
let graph = GraphStore::new();
|
||||
|
||||
// Create a node first
|
||||
graph.add_node(
|
||||
vec!["Person".to_string()],
|
||||
HashMap::from([("name".to_string(), "Alice".into())]),
|
||||
);
|
||||
|
||||
let result = query(
|
||||
&graph,
|
||||
"MATCH (n:Person) WHERE n.name = 'Alice' RETURN n",
|
||||
None,
|
||||
);
|
||||
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
}
|
||||
401
vendor/ruvector/crates/ruvector-postgres/src/graph/cypher/parser.rs
vendored
Normal file
401
vendor/ruvector/crates/ruvector-postgres/src/graph/cypher/parser.rs
vendored
Normal file
@@ -0,0 +1,401 @@
|
||||
// Simplified Cypher parser
|
||||
// Note: This is a basic parser for demonstration. A production parser would use
|
||||
// a proper parsing library like nom, pest, or lalrpop.
|
||||
|
||||
use super::ast::*;
|
||||
use serde_json::Value as JsonValue;
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Parse a Cypher query string
|
||||
pub fn parse_cypher(query: &str) -> Result<CypherQuery, String> {
|
||||
let query = query.trim();
|
||||
|
||||
// Very simple pattern matching for basic queries
|
||||
// Production code should use a proper parser
|
||||
|
||||
if query.to_uppercase().starts_with("CREATE") {
|
||||
parse_create(query)
|
||||
} else if query.to_uppercase().starts_with("MATCH") {
|
||||
parse_match(query)
|
||||
} else {
|
||||
Err(format!("Unsupported query type: {}", query))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_create(query: &str) -> Result<CypherQuery, String> {
|
||||
// Pattern: CREATE (n:Label {prop: value}) RETURN n
|
||||
let mut result = CypherQuery::new();
|
||||
|
||||
// Extract pattern between CREATE and RETURN/end
|
||||
let create_part = if let Some(idx) = query.to_uppercase().find("RETURN") {
|
||||
&query[6..idx].trim()
|
||||
} else {
|
||||
&query[6..].trim()
|
||||
};
|
||||
|
||||
let pattern = parse_pattern(create_part)?;
|
||||
result
|
||||
.clauses
|
||||
.push(Clause::Create(CreateClause::new(vec![pattern])));
|
||||
|
||||
// Check for RETURN clause
|
||||
if let Some(idx) = query.to_uppercase().find("RETURN") {
|
||||
let return_part = &query[idx + 6..].trim();
|
||||
let return_clause = parse_return(return_part)?;
|
||||
result.clauses.push(Clause::Return(return_clause));
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn parse_match(query: &str) -> Result<CypherQuery, String> {
|
||||
// Pattern: MATCH (n:Label) WHERE n.prop = value RETURN n
|
||||
let mut result = CypherQuery::new();
|
||||
|
||||
// Extract MATCH pattern
|
||||
let match_start = 5; // "MATCH".len()
|
||||
let match_end = query
|
||||
.to_uppercase()
|
||||
.find("WHERE")
|
||||
.or_else(|| query.to_uppercase().find("RETURN"))
|
||||
.unwrap_or(query.len());
|
||||
|
||||
let match_part = &query[match_start..match_end].trim();
|
||||
let pattern = parse_pattern(match_part)?;
|
||||
result
|
||||
.clauses
|
||||
.push(Clause::Match(MatchClause::new(vec![pattern])));
|
||||
|
||||
// Check for WHERE clause
|
||||
if let Some(where_idx) = query.to_uppercase().find("WHERE") {
|
||||
let where_start = where_idx + 5; // "WHERE".len()
|
||||
let where_end = query.to_uppercase().find("RETURN").unwrap_or(query.len());
|
||||
|
||||
let where_part = &query[where_start..where_end].trim();
|
||||
let where_clause = parse_where(where_part)?;
|
||||
result.clauses.push(Clause::Where(where_clause));
|
||||
}
|
||||
|
||||
// Check for RETURN clause
|
||||
if let Some(return_idx) = query.to_uppercase().find("RETURN") {
|
||||
let return_part = &query[return_idx + 6..].trim();
|
||||
let return_clause = parse_return(return_part)?;
|
||||
result.clauses.push(Clause::Return(return_clause));
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn parse_pattern(pattern_str: &str) -> Result<Pattern, String> {
|
||||
let pattern_str = pattern_str.trim();
|
||||
let mut pattern = Pattern::new();
|
||||
|
||||
// Simple parser for (n:Label {prop: value})-[:TYPE]->(m)
|
||||
// This is very basic - production code needs proper parsing
|
||||
|
||||
if pattern_str.starts_with('(') {
|
||||
// Node pattern
|
||||
let end = pattern_str.find(')').ok_or("Unclosed node pattern")?;
|
||||
|
||||
let node_content = &pattern_str[1..end];
|
||||
let node_pattern = parse_node_pattern(node_content)?;
|
||||
pattern = pattern.with_element(PatternElement::Node(node_pattern));
|
||||
|
||||
// Check for relationship
|
||||
let remaining = &pattern_str[end + 1..].trim();
|
||||
if !remaining.is_empty() && remaining.starts_with('-') {
|
||||
// Parse relationship
|
||||
let (rel_pattern, rest) = parse_relationship_pattern(remaining)?;
|
||||
pattern = pattern.with_element(PatternElement::Relationship(rel_pattern));
|
||||
|
||||
// Parse target node
|
||||
if rest.starts_with('(') {
|
||||
let end = rest.find(')').ok_or("Unclosed target node pattern")?;
|
||||
let node_content = &rest[1..end];
|
||||
let node_pattern = parse_node_pattern(node_content)?;
|
||||
pattern = pattern.with_element(PatternElement::Node(node_pattern));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(pattern)
|
||||
}
|
||||
|
||||
fn parse_node_pattern(content: &str) -> Result<NodePattern, String> {
|
||||
let content = content.trim();
|
||||
let mut pattern = NodePattern::new();
|
||||
|
||||
if content.is_empty() {
|
||||
return Ok(pattern);
|
||||
}
|
||||
|
||||
// Parse: n:Label {prop: value}
|
||||
let mut parts = content.splitn(2, '{');
|
||||
let var_label = parts.next().unwrap_or("").trim();
|
||||
|
||||
// Parse variable and labels
|
||||
if let Some((var, labels)) = var_label.split_once(':') {
|
||||
let var = var.trim();
|
||||
if !var.is_empty() {
|
||||
pattern = pattern.with_variable(var);
|
||||
}
|
||||
|
||||
let labels = labels.trim();
|
||||
for label in labels.split(':') {
|
||||
let label = label.trim();
|
||||
if !label.is_empty() {
|
||||
pattern = pattern.with_label(label);
|
||||
}
|
||||
}
|
||||
} else if !var_label.is_empty() {
|
||||
// Just a variable
|
||||
pattern = pattern.with_variable(var_label);
|
||||
}
|
||||
|
||||
// Parse properties
|
||||
if let Some(props_str) = parts.next() {
|
||||
let props_str = props_str.trim_end_matches('}').trim();
|
||||
let properties = parse_properties(props_str)?;
|
||||
for (key, value) in properties {
|
||||
pattern = pattern.with_property(key, Expression::Literal(value));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(pattern)
|
||||
}
|
||||
|
||||
fn parse_relationship_pattern(content: &str) -> Result<(RelationshipPattern, &str), String> {
|
||||
let content = content.trim();
|
||||
|
||||
// Determine direction
|
||||
let (direction, start_idx) = if content.starts_with("<-") {
|
||||
(Direction::Incoming, 2)
|
||||
} else if content.starts_with("->") {
|
||||
(Direction::Outgoing, 2)
|
||||
} else if content.starts_with('-') {
|
||||
(Direction::Both, 1)
|
||||
} else {
|
||||
return Err("Invalid relationship pattern".to_string());
|
||||
};
|
||||
|
||||
let mut pattern = RelationshipPattern::new(direction);
|
||||
|
||||
// Find relationship end
|
||||
let _end_markers = if direction == Direction::Incoming {
|
||||
vec!["-", "-("]
|
||||
} else {
|
||||
vec!["->", "-"]
|
||||
};
|
||||
|
||||
let mut rel_content = "";
|
||||
let mut rest_start = start_idx;
|
||||
|
||||
// Parse relationship details if present
|
||||
if content[start_idx..].starts_with('[') {
|
||||
if let Some(end) = content[start_idx..].find(']') {
|
||||
rel_content = &content[start_idx + 1..start_idx + end];
|
||||
rest_start = start_idx + end + 1;
|
||||
|
||||
// Skip closing arrow
|
||||
let rest = &content[rest_start..];
|
||||
if rest.starts_with("->") {
|
||||
rest_start += 2;
|
||||
} else if rest.starts_with('-') {
|
||||
rest_start += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Parse relationship content: r:TYPE {prop: value}
|
||||
if !rel_content.is_empty() {
|
||||
let mut parts = rel_content.splitn(2, '{');
|
||||
let var_type = parts.next().unwrap_or("").trim();
|
||||
|
||||
if let Some((var, rel_type)) = var_type.split_once(':') {
|
||||
let var = var.trim();
|
||||
if !var.is_empty() {
|
||||
pattern = pattern.with_variable(var);
|
||||
}
|
||||
|
||||
let rel_type = rel_type.trim();
|
||||
if !rel_type.is_empty() {
|
||||
pattern = pattern.with_type(rel_type);
|
||||
}
|
||||
} else if !var_type.is_empty() {
|
||||
// Could be variable or type
|
||||
if var_type.chars().next().unwrap_or(' ').is_lowercase() {
|
||||
pattern = pattern.with_variable(var_type);
|
||||
} else {
|
||||
pattern = pattern.with_type(var_type);
|
||||
}
|
||||
}
|
||||
|
||||
// Parse properties
|
||||
if let Some(props_str) = parts.next() {
|
||||
let props_str = props_str.trim_end_matches('}').trim();
|
||||
let properties = parse_properties(props_str)?;
|
||||
for (key, value) in properties {
|
||||
pattern = pattern.with_property(key, Expression::Literal(value));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok((pattern, &content[rest_start..]))
|
||||
}
|
||||
|
||||
fn parse_properties(props_str: &str) -> Result<HashMap<String, JsonValue>, String> {
|
||||
let mut properties = HashMap::new();
|
||||
|
||||
if props_str.is_empty() {
|
||||
return Ok(properties);
|
||||
}
|
||||
|
||||
// Very simple property parser: key: value, key2: value2
|
||||
// Production code should use proper JSON parsing
|
||||
for pair in props_str.split(',') {
|
||||
let pair = pair.trim();
|
||||
if let Some((key, value)) = pair.split_once(':') {
|
||||
let key = key.trim().trim_matches('\'').trim_matches('"');
|
||||
let value = value.trim();
|
||||
|
||||
let json_value = if value.starts_with('\'') || value.starts_with('"') {
|
||||
// String
|
||||
JsonValue::String(value.trim_matches('\'').trim_matches('"').to_string())
|
||||
} else if let Ok(num) = value.parse::<i64>() {
|
||||
// Integer
|
||||
JsonValue::Number(num.into())
|
||||
} else if let Ok(num) = value.parse::<f64>() {
|
||||
// Float
|
||||
JsonValue::Number(serde_json::Number::from_f64(num).ok_or("Invalid number")?)
|
||||
} else if value == "true" || value == "false" {
|
||||
// Boolean
|
||||
JsonValue::Bool(value == "true")
|
||||
} else {
|
||||
// Default to string
|
||||
JsonValue::String(value.to_string())
|
||||
};
|
||||
|
||||
properties.insert(key.to_string(), json_value);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(properties)
|
||||
}
|
||||
|
||||
fn parse_where(where_str: &str) -> Result<WhereClause, String> {
|
||||
// Simple WHERE parser: n.prop = value
|
||||
let where_str = where_str.trim();
|
||||
|
||||
// Parse simple equality
|
||||
if let Some((left, right)) = where_str.split_once('=') {
|
||||
let left = left.trim();
|
||||
let right = right.trim();
|
||||
|
||||
let left_expr = if let Some((var, prop)) = left.split_once('.') {
|
||||
Expression::Property(var.trim().to_string(), prop.trim().to_string())
|
||||
} else {
|
||||
Expression::Variable(left.to_string())
|
||||
};
|
||||
|
||||
let right_expr = if right.starts_with('\'') || right.starts_with('"') {
|
||||
Expression::Literal(JsonValue::String(
|
||||
right.trim_matches('\'').trim_matches('"').to_string(),
|
||||
))
|
||||
} else if let Ok(num) = right.parse::<i64>() {
|
||||
Expression::Literal(JsonValue::Number(num.into()))
|
||||
} else {
|
||||
Expression::Variable(right.to_string())
|
||||
};
|
||||
|
||||
Ok(WhereClause::new(Expression::BinaryOp(
|
||||
Box::new(left_expr),
|
||||
BinaryOperator::Eq,
|
||||
Box::new(right_expr),
|
||||
)))
|
||||
} else {
|
||||
Err("Unsupported WHERE clause format".to_string())
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_return(return_str: &str) -> Result<ReturnClause, String> {
|
||||
let return_str = return_str.trim();
|
||||
let mut items = Vec::new();
|
||||
|
||||
// Parse return items (comma-separated)
|
||||
for item_str in return_str.split(',') {
|
||||
let item_str = item_str.trim();
|
||||
|
||||
// Check for alias: expr AS alias
|
||||
if let Some((expr_str, alias)) = item_str.split_once(" AS ") {
|
||||
let expr = parse_return_expression(expr_str.trim())?;
|
||||
items.push(ReturnItem::new(expr).with_alias(alias.trim()));
|
||||
} else {
|
||||
let expr = parse_return_expression(item_str)?;
|
||||
items.push(ReturnItem::new(expr));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ReturnClause::new(items))
|
||||
}
|
||||
|
||||
fn parse_return_expression(expr_str: &str) -> Result<Expression, String> {
|
||||
let expr_str = expr_str.trim();
|
||||
|
||||
// Check for property access
|
||||
if let Some((var, prop)) = expr_str.split_once('.') {
|
||||
Ok(Expression::Property(
|
||||
var.trim().to_string(),
|
||||
prop.trim().to_string(),
|
||||
))
|
||||
} else {
|
||||
Ok(Expression::Variable(expr_str.to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_create() {
|
||||
let query = "CREATE (n:Person {name: 'Alice', age: 30}) RETURN n";
|
||||
let result = parse_cypher(query);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let parsed = result.unwrap();
|
||||
assert_eq!(parsed.clauses.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_match() {
|
||||
let query = "MATCH (n:Person) WHERE n.name = 'Alice' RETURN n";
|
||||
let result = parse_cypher(query);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let parsed = result.unwrap();
|
||||
assert_eq!(parsed.clauses.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_pattern_with_relationship() {
|
||||
let pattern_str = "(a:Person)-[:KNOWS]->(b:Person)";
|
||||
let result = parse_pattern(pattern_str);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let pattern = result.unwrap();
|
||||
assert_eq!(pattern.elements.len(), 3); // node, rel, node
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_properties() {
|
||||
let props = "name: 'Alice', age: 30, active: true";
|
||||
let result = parse_properties(props);
|
||||
assert!(result.is_ok());
|
||||
|
||||
let properties = result.unwrap();
|
||||
assert_eq!(properties.len(), 3);
|
||||
assert_eq!(properties.get("name").unwrap().as_str().unwrap(), "Alice");
|
||||
assert_eq!(properties.get("age").unwrap().as_i64().unwrap(), 30);
|
||||
assert_eq!(properties.get("active").unwrap().as_bool().unwrap(), true);
|
||||
}
|
||||
}
|
||||
63
vendor/ruvector/crates/ruvector-postgres/src/graph/mod.rs
vendored
Normal file
63
vendor/ruvector/crates/ruvector-postgres/src/graph/mod.rs
vendored
Normal file
@@ -0,0 +1,63 @@
|
||||
// Graph operations module for ruvector-postgres
|
||||
//
|
||||
// Provides graph storage, traversal, Cypher query support, and SPARQL (W3C standard)
|
||||
|
||||
pub mod cypher;
|
||||
pub mod operators;
|
||||
pub mod sparql;
|
||||
pub mod storage;
|
||||
pub mod traversal;
|
||||
|
||||
pub use cypher::{execute_cypher, CypherQuery};
|
||||
pub use storage::{Edge, EdgeStore, GraphStore, Node, NodeStore};
|
||||
pub use traversal::{bfs, dfs, shortest_path_dijkstra, PathResult};
|
||||
|
||||
use dashmap::DashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Global graph storage registry
|
||||
static GRAPH_REGISTRY: once_cell::sync::Lazy<DashMap<String, Arc<GraphStore>>> =
|
||||
once_cell::sync::Lazy::new(|| DashMap::new());
|
||||
|
||||
/// Get or create a graph by name
|
||||
pub fn get_or_create_graph(name: &str) -> Arc<GraphStore> {
|
||||
GRAPH_REGISTRY
|
||||
.entry(name.to_string())
|
||||
.or_insert_with(|| Arc::new(GraphStore::new()))
|
||||
.clone()
|
||||
}
|
||||
|
||||
/// Get an existing graph by name
|
||||
pub fn get_graph(name: &str) -> Option<Arc<GraphStore>> {
|
||||
GRAPH_REGISTRY.get(name).map(|g| g.clone())
|
||||
}
|
||||
|
||||
/// Delete a graph by name
|
||||
pub fn delete_graph(name: &str) -> bool {
|
||||
GRAPH_REGISTRY.remove(name).is_some()
|
||||
}
|
||||
|
||||
/// List all graph names
|
||||
pub fn list_graphs() -> Vec<String> {
|
||||
GRAPH_REGISTRY.iter().map(|e| e.key().clone()).collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_graph_registry() {
|
||||
let graph1 = get_or_create_graph("test_graph");
|
||||
let graph2 = get_graph("test_graph");
|
||||
|
||||
assert!(graph2.is_some());
|
||||
assert!(Arc::ptr_eq(&graph1, &graph2.unwrap()));
|
||||
|
||||
let graphs = list_graphs();
|
||||
assert!(graphs.contains(&"test_graph".to_string()));
|
||||
|
||||
assert!(delete_graph("test_graph"));
|
||||
assert!(get_graph("test_graph").is_none());
|
||||
}
|
||||
}
|
||||
1050
vendor/ruvector/crates/ruvector-postgres/src/graph/operators.rs
vendored
Normal file
1050
vendor/ruvector/crates/ruvector-postgres/src/graph/operators.rs
vendored
Normal file
File diff suppressed because it is too large
Load Diff
907
vendor/ruvector/crates/ruvector-postgres/src/graph/sparql/ast.rs
vendored
Normal file
907
vendor/ruvector/crates/ruvector-postgres/src/graph/sparql/ast.rs
vendored
Normal file
@@ -0,0 +1,907 @@
|
||||
// SPARQL Abstract Syntax Tree (AST) types
|
||||
//
|
||||
// Provides type-safe representation of SPARQL 1.1 queries following
|
||||
// the W3C specification: https://www.w3.org/TR/sparql11-query/
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Complete SPARQL query or update
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SparqlQuery {
|
||||
/// Base IRI for relative IRI resolution
|
||||
pub base: Option<Iri>,
|
||||
/// PREFIX declarations
|
||||
pub prefixes: HashMap<String, Iri>,
|
||||
/// The query form (SELECT, CONSTRUCT, ASK, DESCRIBE) or update operation
|
||||
pub body: QueryBody,
|
||||
}
|
||||
|
||||
impl SparqlQuery {
|
||||
pub fn new(body: QueryBody) -> Self {
|
||||
Self {
|
||||
base: None,
|
||||
prefixes: HashMap::new(),
|
||||
body,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_base(mut self, base: Iri) -> Self {
|
||||
self.base = Some(base);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_prefix(mut self, prefix: impl Into<String>, iri: Iri) -> Self {
|
||||
self.prefixes.insert(prefix.into(), iri);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for SparqlQuery {
|
||||
fn default() -> Self {
|
||||
Self::new(QueryBody::Select(SelectQuery::default()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Query body - either a query form or update operation
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum QueryBody {
|
||||
Select(SelectQuery),
|
||||
Construct(ConstructQuery),
|
||||
Ask(AskQuery),
|
||||
Describe(DescribeQuery),
|
||||
Update(Vec<UpdateOperation>),
|
||||
}
|
||||
|
||||
/// Query form type
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum QueryForm {
|
||||
Select,
|
||||
Construct,
|
||||
Ask,
|
||||
Describe,
|
||||
}
|
||||
|
||||
/// SELECT query
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SelectQuery {
|
||||
/// Result variables or expressions
|
||||
pub projection: Projection,
|
||||
/// Dataset clauses (FROM, FROM NAMED)
|
||||
pub dataset: Vec<DatasetClause>,
|
||||
/// WHERE clause graph pattern
|
||||
pub where_clause: GraphPattern,
|
||||
/// Solution modifiers
|
||||
pub modifier: SolutionModifier,
|
||||
/// VALUES clause for inline data
|
||||
pub values: Option<ValuesClause>,
|
||||
}
|
||||
|
||||
impl Default for SelectQuery {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
projection: Projection::All,
|
||||
dataset: Vec::new(),
|
||||
where_clause: GraphPattern::Empty,
|
||||
modifier: SolutionModifier::default(),
|
||||
values: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Projection in SELECT clause
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum Projection {
|
||||
/// SELECT * - all variables
|
||||
All,
|
||||
/// SELECT DISTINCT ...
|
||||
Distinct(Vec<ProjectionVar>),
|
||||
/// SELECT REDUCED ...
|
||||
Reduced(Vec<ProjectionVar>),
|
||||
/// SELECT var1 var2 ...
|
||||
Variables(Vec<ProjectionVar>),
|
||||
}
|
||||
|
||||
/// Variable or expression in projection
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ProjectionVar {
|
||||
pub expression: Expression,
|
||||
pub alias: Option<String>,
|
||||
}
|
||||
|
||||
impl ProjectionVar {
|
||||
pub fn variable(name: impl Into<String>) -> Self {
|
||||
Self {
|
||||
expression: Expression::Variable(name.into()),
|
||||
alias: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn expr_as(expr: Expression, alias: impl Into<String>) -> Self {
|
||||
Self {
|
||||
expression: expr,
|
||||
alias: Some(alias.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// CONSTRUCT query
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ConstructQuery {
|
||||
/// Template for constructing triples
|
||||
pub template: Vec<TriplePattern>,
|
||||
/// Dataset clauses
|
||||
pub dataset: Vec<DatasetClause>,
|
||||
/// WHERE clause
|
||||
pub where_clause: GraphPattern,
|
||||
/// Solution modifiers
|
||||
pub modifier: SolutionModifier,
|
||||
}
|
||||
|
||||
impl Default for ConstructQuery {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
template: Vec::new(),
|
||||
dataset: Vec::new(),
|
||||
where_clause: GraphPattern::Empty,
|
||||
modifier: SolutionModifier::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// ASK query
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AskQuery {
|
||||
/// Dataset clauses
|
||||
pub dataset: Vec<DatasetClause>,
|
||||
/// WHERE clause
|
||||
pub where_clause: GraphPattern,
|
||||
}
|
||||
|
||||
impl Default for AskQuery {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
dataset: Vec::new(),
|
||||
where_clause: GraphPattern::Empty,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// DESCRIBE query
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DescribeQuery {
|
||||
/// Resources to describe
|
||||
pub resources: Vec<VarOrIri>,
|
||||
/// Dataset clauses
|
||||
pub dataset: Vec<DatasetClause>,
|
||||
/// Optional WHERE clause
|
||||
pub where_clause: Option<GraphPattern>,
|
||||
}
|
||||
|
||||
impl Default for DescribeQuery {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
resources: Vec::new(),
|
||||
dataset: Vec::new(),
|
||||
where_clause: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Dataset clause (FROM / FROM NAMED)
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DatasetClause {
|
||||
pub iri: Iri,
|
||||
pub named: bool,
|
||||
}
|
||||
|
||||
/// VALUES clause for inline data
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ValuesClause {
|
||||
pub variables: Vec<String>,
|
||||
pub bindings: Vec<Vec<Option<RdfTerm>>>,
|
||||
}
|
||||
|
||||
/// Graph pattern - the WHERE clause body
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum GraphPattern {
|
||||
/// Empty pattern
|
||||
Empty,
|
||||
/// Basic Graph Pattern - set of triple patterns
|
||||
Bgp(Vec<TriplePattern>),
|
||||
/// Join of patterns (implicit AND)
|
||||
Join(Box<GraphPattern>, Box<GraphPattern>),
|
||||
/// Left outer join (OPTIONAL)
|
||||
LeftJoin(Box<GraphPattern>, Box<GraphPattern>, Option<Expression>),
|
||||
/// Union of patterns (UNION)
|
||||
Union(Box<GraphPattern>, Box<GraphPattern>),
|
||||
/// Filter (FILTER)
|
||||
Filter(Box<GraphPattern>, Expression),
|
||||
/// Named graph (GRAPH)
|
||||
Graph(VarOrIri, Box<GraphPattern>),
|
||||
/// Service (FEDERATED query)
|
||||
Service(Iri, Box<GraphPattern>, bool),
|
||||
/// MINUS pattern
|
||||
Minus(Box<GraphPattern>, Box<GraphPattern>),
|
||||
/// EXISTS or NOT EXISTS
|
||||
Exists(Box<GraphPattern>, bool),
|
||||
/// BIND assignment
|
||||
Bind(Expression, String, Box<GraphPattern>),
|
||||
/// GROUP BY aggregation
|
||||
Group(
|
||||
Box<GraphPattern>,
|
||||
Vec<GroupCondition>,
|
||||
Vec<(Aggregate, String)>,
|
||||
),
|
||||
/// Subquery
|
||||
SubSelect(Box<SelectQuery>),
|
||||
/// VALUES inline data
|
||||
Values(ValuesClause),
|
||||
}
|
||||
|
||||
/// Triple pattern
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TriplePattern {
|
||||
pub subject: TermOrVariable,
|
||||
pub predicate: PropertyPath,
|
||||
pub object: TermOrVariable,
|
||||
}
|
||||
|
||||
impl TriplePattern {
|
||||
pub fn new(subject: TermOrVariable, predicate: PropertyPath, object: TermOrVariable) -> Self {
|
||||
Self {
|
||||
subject,
|
||||
predicate,
|
||||
object,
|
||||
}
|
||||
}
|
||||
|
||||
/// Simple triple pattern with IRI predicate
|
||||
pub fn simple(subject: TermOrVariable, predicate: Iri, object: TermOrVariable) -> Self {
|
||||
Self {
|
||||
subject,
|
||||
predicate: PropertyPath::Iri(predicate),
|
||||
object,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Term or variable in triple pattern
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum TermOrVariable {
|
||||
Term(RdfTerm),
|
||||
Variable(String),
|
||||
BlankNode(String),
|
||||
}
|
||||
|
||||
impl TermOrVariable {
|
||||
pub fn var(name: impl Into<String>) -> Self {
|
||||
Self::Variable(name.into())
|
||||
}
|
||||
|
||||
pub fn iri(iri: Iri) -> Self {
|
||||
Self::Term(RdfTerm::Iri(iri))
|
||||
}
|
||||
|
||||
pub fn literal(value: impl Into<String>) -> Self {
|
||||
Self::Term(RdfTerm::Literal(Literal::simple(value)))
|
||||
}
|
||||
|
||||
pub fn blank(id: impl Into<String>) -> Self {
|
||||
Self::BlankNode(id.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// Variable or IRI
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum VarOrIri {
|
||||
Variable(String),
|
||||
Iri(Iri),
|
||||
}
|
||||
|
||||
/// Property path expression
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum PropertyPath {
|
||||
/// Simple IRI predicate
|
||||
Iri(Iri),
|
||||
/// Variable predicate
|
||||
Variable(String),
|
||||
/// Inverse path (^path)
|
||||
Inverse(Box<PropertyPath>),
|
||||
/// Sequence path (path1/path2)
|
||||
Sequence(Box<PropertyPath>, Box<PropertyPath>),
|
||||
/// Alternative path (path1|path2)
|
||||
Alternative(Box<PropertyPath>, Box<PropertyPath>),
|
||||
/// Zero or more (*path)
|
||||
ZeroOrMore(Box<PropertyPath>),
|
||||
/// One or more (+path)
|
||||
OneOrMore(Box<PropertyPath>),
|
||||
/// Zero or one (?path)
|
||||
ZeroOrOne(Box<PropertyPath>),
|
||||
/// Negated property set (!(path1|path2))
|
||||
NegatedPropertySet(Vec<Iri>),
|
||||
/// Fixed length path {n}
|
||||
FixedLength(Box<PropertyPath>, usize),
|
||||
/// Range length path {n,m}
|
||||
RangeLength(Box<PropertyPath>, usize, Option<usize>),
|
||||
}
|
||||
|
||||
/// RDF term
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub enum RdfTerm {
|
||||
/// IRI reference
|
||||
Iri(Iri),
|
||||
/// Literal value
|
||||
Literal(Literal),
|
||||
/// Blank node
|
||||
BlankNode(String),
|
||||
}
|
||||
|
||||
impl RdfTerm {
|
||||
pub fn iri(value: impl Into<String>) -> Self {
|
||||
Self::Iri(Iri::new(value))
|
||||
}
|
||||
|
||||
pub fn literal(value: impl Into<String>) -> Self {
|
||||
Self::Literal(Literal::simple(value))
|
||||
}
|
||||
|
||||
pub fn typed_literal(value: impl Into<String>, datatype: Iri) -> Self {
|
||||
Self::Literal(Literal::typed(value, datatype))
|
||||
}
|
||||
|
||||
pub fn lang_literal(value: impl Into<String>, lang: impl Into<String>) -> Self {
|
||||
Self::Literal(Literal::language(value, lang))
|
||||
}
|
||||
|
||||
pub fn blank(id: impl Into<String>) -> Self {
|
||||
Self::BlankNode(id.into())
|
||||
}
|
||||
|
||||
/// Check if this is an IRI
|
||||
pub fn is_iri(&self) -> bool {
|
||||
matches!(self, Self::Iri(_))
|
||||
}
|
||||
|
||||
/// Check if this is a literal
|
||||
pub fn is_literal(&self) -> bool {
|
||||
matches!(self, Self::Literal(_))
|
||||
}
|
||||
|
||||
/// Check if this is a blank node
|
||||
pub fn is_blank_node(&self) -> bool {
|
||||
matches!(self, Self::BlankNode(_))
|
||||
}
|
||||
}
|
||||
|
||||
/// IRI (Internationalized Resource Identifier)
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub struct Iri(pub String);
|
||||
|
||||
impl Iri {
|
||||
pub fn new(value: impl Into<String>) -> Self {
|
||||
Self(value.into())
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
|
||||
/// Common RDF namespace IRIs
|
||||
pub fn rdf_type() -> Self {
|
||||
Self::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
|
||||
}
|
||||
|
||||
pub fn rdfs_label() -> Self {
|
||||
Self::new("http://www.w3.org/2000/01/rdf-schema#label")
|
||||
}
|
||||
|
||||
pub fn rdfs_comment() -> Self {
|
||||
Self::new("http://www.w3.org/2000/01/rdf-schema#comment")
|
||||
}
|
||||
|
||||
pub fn xsd_string() -> Self {
|
||||
Self::new("http://www.w3.org/2001/XMLSchema#string")
|
||||
}
|
||||
|
||||
pub fn xsd_integer() -> Self {
|
||||
Self::new("http://www.w3.org/2001/XMLSchema#integer")
|
||||
}
|
||||
|
||||
pub fn xsd_decimal() -> Self {
|
||||
Self::new("http://www.w3.org/2001/XMLSchema#decimal")
|
||||
}
|
||||
|
||||
pub fn xsd_double() -> Self {
|
||||
Self::new("http://www.w3.org/2001/XMLSchema#double")
|
||||
}
|
||||
|
||||
pub fn xsd_boolean() -> Self {
|
||||
Self::new("http://www.w3.org/2001/XMLSchema#boolean")
|
||||
}
|
||||
|
||||
pub fn xsd_date() -> Self {
|
||||
Self::new("http://www.w3.org/2001/XMLSchema#date")
|
||||
}
|
||||
|
||||
pub fn xsd_datetime() -> Self {
|
||||
Self::new("http://www.w3.org/2001/XMLSchema#dateTime")
|
||||
}
|
||||
}
|
||||
|
||||
/// RDF Literal
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub struct Literal {
|
||||
/// Lexical form (string value)
|
||||
pub value: String,
|
||||
/// Optional language tag
|
||||
pub language: Option<String>,
|
||||
/// Datatype IRI (defaults to xsd:string)
|
||||
pub datatype: Iri,
|
||||
}
|
||||
|
||||
impl Literal {
|
||||
/// Simple string literal
|
||||
pub fn simple(value: impl Into<String>) -> Self {
|
||||
Self {
|
||||
value: value.into(),
|
||||
language: None,
|
||||
datatype: Iri::xsd_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Typed literal
|
||||
pub fn typed(value: impl Into<String>, datatype: Iri) -> Self {
|
||||
Self {
|
||||
value: value.into(),
|
||||
language: None,
|
||||
datatype,
|
||||
}
|
||||
}
|
||||
|
||||
/// Language-tagged literal
|
||||
pub fn language(value: impl Into<String>, lang: impl Into<String>) -> Self {
|
||||
Self {
|
||||
value: value.into(),
|
||||
language: Some(lang.into()),
|
||||
datatype: Iri::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Integer literal
|
||||
pub fn integer(value: i64) -> Self {
|
||||
Self::typed(value.to_string(), Iri::xsd_integer())
|
||||
}
|
||||
|
||||
/// Decimal literal
|
||||
pub fn decimal(value: f64) -> Self {
|
||||
Self::typed(value.to_string(), Iri::xsd_decimal())
|
||||
}
|
||||
|
||||
/// Double literal
|
||||
pub fn double(value: f64) -> Self {
|
||||
Self::typed(value.to_string(), Iri::xsd_double())
|
||||
}
|
||||
|
||||
/// Boolean literal
|
||||
pub fn boolean(value: bool) -> Self {
|
||||
Self::typed(if value { "true" } else { "false" }, Iri::xsd_boolean())
|
||||
}
|
||||
|
||||
/// Try to parse as integer
|
||||
pub fn as_integer(&self) -> Option<i64> {
|
||||
self.value.parse().ok()
|
||||
}
|
||||
|
||||
/// Try to parse as double
|
||||
pub fn as_double(&self) -> Option<f64> {
|
||||
self.value.parse().ok()
|
||||
}
|
||||
|
||||
/// Try to parse as boolean
|
||||
pub fn as_boolean(&self) -> Option<bool> {
|
||||
match self.value.as_str() {
|
||||
"true" | "1" => Some(true),
|
||||
"false" | "0" => Some(false),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// SPARQL expression
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum Expression {
|
||||
/// Variable reference
|
||||
Variable(String),
|
||||
/// Constant term
|
||||
Term(RdfTerm),
|
||||
/// Binary operation
|
||||
Binary(Box<Expression>, BinaryOp, Box<Expression>),
|
||||
/// Unary operation
|
||||
Unary(UnaryOp, Box<Expression>),
|
||||
/// Function call
|
||||
Function(FunctionCall),
|
||||
/// Aggregate function
|
||||
Aggregate(Aggregate),
|
||||
/// IN expression
|
||||
In(Box<Expression>, Vec<Expression>),
|
||||
/// NOT IN expression
|
||||
NotIn(Box<Expression>, Vec<Expression>),
|
||||
/// EXISTS subquery
|
||||
Exists(Box<GraphPattern>),
|
||||
/// NOT EXISTS subquery
|
||||
NotExists(Box<GraphPattern>),
|
||||
/// Conditional (IF)
|
||||
If(Box<Expression>, Box<Expression>, Box<Expression>),
|
||||
/// COALESCE
|
||||
Coalesce(Vec<Expression>),
|
||||
/// BOUND test
|
||||
Bound(String),
|
||||
/// isIRI test
|
||||
IsIri(Box<Expression>),
|
||||
/// isBlank test
|
||||
IsBlank(Box<Expression>),
|
||||
/// isLiteral test
|
||||
IsLiteral(Box<Expression>),
|
||||
/// isNumeric test
|
||||
IsNumeric(Box<Expression>),
|
||||
/// REGEX pattern matching
|
||||
Regex(Box<Expression>, Box<Expression>, Option<Box<Expression>>),
|
||||
/// LANG function
|
||||
Lang(Box<Expression>),
|
||||
/// DATATYPE function
|
||||
Datatype(Box<Expression>),
|
||||
/// STR function
|
||||
Str(Box<Expression>),
|
||||
/// IRI constructor
|
||||
Iri(Box<Expression>),
|
||||
}
|
||||
|
||||
impl Expression {
|
||||
pub fn var(name: impl Into<String>) -> Self {
|
||||
Self::Variable(name.into())
|
||||
}
|
||||
|
||||
pub fn term(t: RdfTerm) -> Self {
|
||||
Self::Term(t)
|
||||
}
|
||||
|
||||
pub fn literal(value: impl Into<String>) -> Self {
|
||||
Self::Term(RdfTerm::literal(value))
|
||||
}
|
||||
|
||||
pub fn integer(value: i64) -> Self {
|
||||
Self::Term(RdfTerm::Literal(Literal::integer(value)))
|
||||
}
|
||||
|
||||
pub fn binary(left: Expression, op: BinaryOp, right: Expression) -> Self {
|
||||
Self::Binary(Box::new(left), op, Box::new(right))
|
||||
}
|
||||
|
||||
pub fn unary(op: UnaryOp, expr: Expression) -> Self {
|
||||
Self::Unary(op, Box::new(expr))
|
||||
}
|
||||
|
||||
pub fn and(left: Expression, right: Expression) -> Self {
|
||||
Self::binary(left, BinaryOp::And, right)
|
||||
}
|
||||
|
||||
pub fn or(left: Expression, right: Expression) -> Self {
|
||||
Self::binary(left, BinaryOp::Or, right)
|
||||
}
|
||||
|
||||
pub fn eq(left: Expression, right: Expression) -> Self {
|
||||
Self::binary(left, BinaryOp::Eq, right)
|
||||
}
|
||||
|
||||
pub fn neq(left: Expression, right: Expression) -> Self {
|
||||
Self::binary(left, BinaryOp::NotEq, right)
|
||||
}
|
||||
|
||||
pub fn lt(left: Expression, right: Expression) -> Self {
|
||||
Self::binary(left, BinaryOp::Lt, right)
|
||||
}
|
||||
|
||||
pub fn gt(left: Expression, right: Expression) -> Self {
|
||||
Self::binary(left, BinaryOp::Gt, right)
|
||||
}
|
||||
}
|
||||
|
||||
/// Binary operators
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum BinaryOp {
|
||||
// Logical
|
||||
And,
|
||||
Or,
|
||||
// Comparison
|
||||
Eq,
|
||||
NotEq,
|
||||
Lt,
|
||||
LtEq,
|
||||
Gt,
|
||||
GtEq,
|
||||
// Arithmetic
|
||||
Add,
|
||||
Sub,
|
||||
Mul,
|
||||
Div,
|
||||
// String
|
||||
SameTerm,
|
||||
LangMatches,
|
||||
}
|
||||
|
||||
/// Unary operators
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum UnaryOp {
|
||||
Not,
|
||||
Plus,
|
||||
Minus,
|
||||
}
|
||||
|
||||
/// Function call
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct FunctionCall {
|
||||
pub name: String,
|
||||
pub args: Vec<Expression>,
|
||||
}
|
||||
|
||||
impl FunctionCall {
|
||||
pub fn new(name: impl Into<String>, args: Vec<Expression>) -> Self {
|
||||
Self {
|
||||
name: name.into(),
|
||||
args,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Aggregate function
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum Aggregate {
|
||||
Count {
|
||||
expr: Option<Box<Expression>>,
|
||||
distinct: bool,
|
||||
},
|
||||
Sum {
|
||||
expr: Box<Expression>,
|
||||
distinct: bool,
|
||||
},
|
||||
Avg {
|
||||
expr: Box<Expression>,
|
||||
distinct: bool,
|
||||
},
|
||||
Min {
|
||||
expr: Box<Expression>,
|
||||
},
|
||||
Max {
|
||||
expr: Box<Expression>,
|
||||
},
|
||||
GroupConcat {
|
||||
expr: Box<Expression>,
|
||||
separator: Option<String>,
|
||||
distinct: bool,
|
||||
},
|
||||
Sample {
|
||||
expr: Box<Expression>,
|
||||
},
|
||||
}
|
||||
|
||||
/// Filter expression
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Filter {
|
||||
pub expression: Expression,
|
||||
}
|
||||
|
||||
impl Filter {
|
||||
pub fn new(expression: Expression) -> Self {
|
||||
Self { expression }
|
||||
}
|
||||
}
|
||||
|
||||
/// Solution modifier
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct SolutionModifier {
|
||||
pub order_by: Vec<OrderCondition>,
|
||||
pub limit: Option<usize>,
|
||||
pub offset: Option<usize>,
|
||||
pub having: Option<Expression>,
|
||||
}
|
||||
|
||||
impl SolutionModifier {
|
||||
pub fn with_limit(mut self, limit: usize) -> Self {
|
||||
self.limit = Some(limit);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_offset(mut self, offset: usize) -> Self {
|
||||
self.offset = Some(offset);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_order(mut self, conditions: Vec<OrderCondition>) -> Self {
|
||||
self.order_by = conditions;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_having(mut self, expr: Expression) -> Self {
|
||||
self.having = Some(expr);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// ORDER BY condition
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct OrderCondition {
|
||||
pub expression: Expression,
|
||||
pub ascending: bool,
|
||||
}
|
||||
|
||||
impl OrderCondition {
|
||||
pub fn asc(expr: Expression) -> Self {
|
||||
Self {
|
||||
expression: expr,
|
||||
ascending: true,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn desc(expr: Expression) -> Self {
|
||||
Self {
|
||||
expression: expr,
|
||||
ascending: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// GROUP BY condition
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum GroupCondition {
|
||||
Variable(String),
|
||||
Expression(Expression, Option<String>),
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// SPARQL Update Operations
|
||||
// ============================================================================
|
||||
|
||||
/// SPARQL Update operation
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum UpdateOperation {
|
||||
/// INSERT DATA { triples }
|
||||
InsertData(InsertData),
|
||||
/// DELETE DATA { triples }
|
||||
DeleteData(DeleteData),
|
||||
/// DELETE { pattern } INSERT { pattern } WHERE { pattern }
|
||||
Modify(Modify),
|
||||
/// LOAD <iri> INTO GRAPH <iri>
|
||||
Load {
|
||||
source: Iri,
|
||||
destination: Option<Iri>,
|
||||
silent: bool,
|
||||
},
|
||||
/// CLEAR GRAPH <iri>
|
||||
Clear { target: GraphTarget, silent: bool },
|
||||
/// CREATE GRAPH <iri>
|
||||
Create { graph: Iri, silent: bool },
|
||||
/// DROP GRAPH <iri>
|
||||
Drop { target: GraphTarget, silent: bool },
|
||||
/// COPY source TO destination
|
||||
Copy {
|
||||
source: GraphTarget,
|
||||
destination: GraphTarget,
|
||||
silent: bool,
|
||||
},
|
||||
/// MOVE source TO destination
|
||||
Move {
|
||||
source: GraphTarget,
|
||||
destination: GraphTarget,
|
||||
silent: bool,
|
||||
},
|
||||
/// ADD source TO destination
|
||||
Add {
|
||||
source: GraphTarget,
|
||||
destination: GraphTarget,
|
||||
silent: bool,
|
||||
},
|
||||
}
|
||||
|
||||
/// INSERT DATA operation
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct InsertData {
|
||||
pub quads: Vec<Quad>,
|
||||
}
|
||||
|
||||
/// DELETE DATA operation
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DeleteData {
|
||||
pub quads: Vec<Quad>,
|
||||
}
|
||||
|
||||
/// DELETE/INSERT with WHERE
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Modify {
|
||||
pub with_graph: Option<Iri>,
|
||||
pub delete_pattern: Option<Vec<QuadPattern>>,
|
||||
pub insert_pattern: Option<Vec<QuadPattern>>,
|
||||
pub using: Vec<DatasetClause>,
|
||||
pub where_pattern: GraphPattern,
|
||||
}
|
||||
|
||||
/// Quad (triple with optional graph)
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Quad {
|
||||
pub subject: RdfTerm,
|
||||
pub predicate: Iri,
|
||||
pub object: RdfTerm,
|
||||
pub graph: Option<Iri>,
|
||||
}
|
||||
|
||||
/// Quad pattern (for DELETE/INSERT templates)
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct QuadPattern {
|
||||
pub subject: TermOrVariable,
|
||||
pub predicate: VarOrIri,
|
||||
pub object: TermOrVariable,
|
||||
pub graph: Option<VarOrIri>,
|
||||
}
|
||||
|
||||
/// Graph target for management operations
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum GraphTarget {
|
||||
Default,
|
||||
Named(Iri),
|
||||
All,
|
||||
AllNamed,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_rdf_term_creation() {
|
||||
let iri = RdfTerm::iri("http://example.org/resource");
|
||||
assert!(iri.is_iri());
|
||||
|
||||
let lit = RdfTerm::literal("hello");
|
||||
assert!(lit.is_literal());
|
||||
|
||||
let blank = RdfTerm::blank("b0");
|
||||
assert!(blank.is_blank_node());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_literal_parsing() {
|
||||
let int_lit = Literal::integer(42);
|
||||
assert_eq!(int_lit.as_integer(), Some(42));
|
||||
|
||||
let double_lit = Literal::double(3.14);
|
||||
assert!((double_lit.as_double().unwrap() - 3.14).abs() < 0.001);
|
||||
|
||||
let bool_lit = Literal::boolean(true);
|
||||
assert_eq!(bool_lit.as_boolean(), Some(true));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_expression_builder() {
|
||||
let expr = Expression::and(
|
||||
Expression::eq(Expression::var("x"), Expression::integer(10)),
|
||||
Expression::gt(Expression::var("y"), Expression::integer(5)),
|
||||
);
|
||||
|
||||
match expr {
|
||||
Expression::Binary(_, BinaryOp::And, _) => (),
|
||||
_ => panic!("Expected AND expression"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_triple_pattern() {
|
||||
let pattern = TriplePattern::simple(
|
||||
TermOrVariable::var("s"),
|
||||
Iri::rdf_type(),
|
||||
TermOrVariable::iri(Iri::new("http://example.org/Person")),
|
||||
);
|
||||
|
||||
assert!(matches!(pattern.subject, TermOrVariable::Variable(_)));
|
||||
assert!(matches!(pattern.predicate, PropertyPath::Iri(_)));
|
||||
}
|
||||
}
|
||||
1883
vendor/ruvector/crates/ruvector-postgres/src/graph/sparql/executor.rs
vendored
Normal file
1883
vendor/ruvector/crates/ruvector-postgres/src/graph/sparql/executor.rs
vendored
Normal file
File diff suppressed because it is too large
Load Diff
703
vendor/ruvector/crates/ruvector-postgres/src/graph/sparql/functions.rs
vendored
Normal file
703
vendor/ruvector/crates/ruvector-postgres/src/graph/sparql/functions.rs
vendored
Normal file
@@ -0,0 +1,703 @@
|
||||
// SPARQL Built-in Functions
|
||||
//
|
||||
// Implementation of SPARQL 1.1 built-in functions:
|
||||
// https://www.w3.org/TR/sparql11-query/#SparqlOps
|
||||
|
||||
use super::ast::{Iri, Literal, RdfTerm};
|
||||
use super::{SparqlError, SparqlResult};
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
/// Evaluate a SPARQL function call
|
||||
pub fn evaluate_function(name: &str, args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let name_upper = name.to_uppercase();
|
||||
|
||||
match name_upper.as_str() {
|
||||
// String functions
|
||||
"STRLEN" => fn_strlen(args),
|
||||
"SUBSTR" | "SUBSTRING" => fn_substr(args),
|
||||
"UCASE" => fn_ucase(args),
|
||||
"LCASE" => fn_lcase(args),
|
||||
"STRSTARTS" => fn_strstarts(args),
|
||||
"STRENDS" => fn_strends(args),
|
||||
"CONTAINS" => fn_contains(args),
|
||||
"STRBEFORE" => fn_strbefore(args),
|
||||
"STRAFTER" => fn_strafter(args),
|
||||
"ENCODE_FOR_URI" => fn_encode_for_uri(args),
|
||||
"CONCAT" => fn_concat(args),
|
||||
"REPLACE" => fn_replace(args),
|
||||
|
||||
// Numeric functions
|
||||
"ABS" => fn_abs(args),
|
||||
"ROUND" => fn_round(args),
|
||||
"CEIL" => fn_ceil(args),
|
||||
"FLOOR" => fn_floor(args),
|
||||
"RAND" => fn_rand(args),
|
||||
|
||||
// Date/time functions
|
||||
"NOW" => fn_now(args),
|
||||
"YEAR" => fn_year(args),
|
||||
"MONTH" => fn_month(args),
|
||||
"DAY" => fn_day(args),
|
||||
"HOURS" => fn_hours(args),
|
||||
"MINUTES" => fn_minutes(args),
|
||||
"SECONDS" => fn_seconds(args),
|
||||
"TIMEZONE" => fn_timezone(args),
|
||||
"TZ" => fn_tz(args),
|
||||
|
||||
// Hash functions
|
||||
"MD5" => fn_hash(args, "md5"),
|
||||
"SHA1" => fn_hash(args, "sha1"),
|
||||
"SHA256" => fn_hash(args, "sha256"),
|
||||
"SHA384" => fn_hash(args, "sha384"),
|
||||
"SHA512" => fn_hash(args, "sha512"),
|
||||
|
||||
// Constructor functions
|
||||
"STRUUID" => fn_struuid(args),
|
||||
"UUID" => fn_uuid(args),
|
||||
"BNODE" => fn_bnode(args),
|
||||
"STRDT" => fn_strdt(args),
|
||||
"STRLANG" => fn_strlang(args),
|
||||
|
||||
// Type conversion
|
||||
"STR" => fn_str(args),
|
||||
|
||||
// RuVector extensions
|
||||
"RUVECTOR_SIMILARITY" => fn_vector_similarity(args),
|
||||
"RUVECTOR_DISTANCE" => fn_vector_distance(args),
|
||||
|
||||
_ => Err(SparqlError::UnsupportedOperation(format!(
|
||||
"Unknown function: {}",
|
||||
name
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// String Functions
|
||||
// ============================================================================
|
||||
|
||||
fn fn_strlen(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let s = get_string_arg(&args, 0)?;
|
||||
Ok(Some(RdfTerm::Literal(Literal::integer(s.len() as i64))))
|
||||
}
|
||||
|
||||
fn fn_substr(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let s = get_string_arg(&args, 0)?;
|
||||
let start = get_integer_arg(&args, 1)? as usize;
|
||||
let length = args
|
||||
.get(2)
|
||||
.and_then(|a| a.as_ref())
|
||||
.and_then(|t| term_to_integer(t))
|
||||
.map(|n| n as usize);
|
||||
|
||||
// SPARQL uses 1-based indexing
|
||||
let start_idx = start.saturating_sub(1);
|
||||
|
||||
let result: String = if let Some(len) = length {
|
||||
s.chars().skip(start_idx).take(len).collect()
|
||||
} else {
|
||||
s.chars().skip(start_idx).collect()
|
||||
};
|
||||
|
||||
Ok(Some(RdfTerm::literal(result)))
|
||||
}
|
||||
|
||||
fn fn_ucase(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let s = get_string_arg(&args, 0)?;
|
||||
Ok(Some(RdfTerm::literal(s.to_uppercase())))
|
||||
}
|
||||
|
||||
fn fn_lcase(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let s = get_string_arg(&args, 0)?;
|
||||
Ok(Some(RdfTerm::literal(s.to_lowercase())))
|
||||
}
|
||||
|
||||
fn fn_strstarts(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let s = get_string_arg(&args, 0)?;
|
||||
let prefix = get_string_arg(&args, 1)?;
|
||||
Ok(Some(RdfTerm::Literal(Literal::boolean(
|
||||
s.starts_with(&prefix),
|
||||
))))
|
||||
}
|
||||
|
||||
fn fn_strends(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let s = get_string_arg(&args, 0)?;
|
||||
let suffix = get_string_arg(&args, 1)?;
|
||||
Ok(Some(RdfTerm::Literal(Literal::boolean(
|
||||
s.ends_with(&suffix),
|
||||
))))
|
||||
}
|
||||
|
||||
fn fn_contains(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let s = get_string_arg(&args, 0)?;
|
||||
let pattern = get_string_arg(&args, 1)?;
|
||||
Ok(Some(RdfTerm::Literal(Literal::boolean(
|
||||
s.contains(&pattern),
|
||||
))))
|
||||
}
|
||||
|
||||
fn fn_strbefore(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let s = get_string_arg(&args, 0)?;
|
||||
let pattern = get_string_arg(&args, 1)?;
|
||||
|
||||
let result = if pattern.is_empty() {
|
||||
String::new()
|
||||
} else if let Some(idx) = s.find(&pattern) {
|
||||
s[..idx].to_string()
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
||||
Ok(Some(RdfTerm::literal(result)))
|
||||
}
|
||||
|
||||
fn fn_strafter(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let s = get_string_arg(&args, 0)?;
|
||||
let pattern = get_string_arg(&args, 1)?;
|
||||
|
||||
let result = if pattern.is_empty() {
|
||||
s
|
||||
} else if let Some(idx) = s.find(&pattern) {
|
||||
s[idx + pattern.len()..].to_string()
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
||||
Ok(Some(RdfTerm::literal(result)))
|
||||
}
|
||||
|
||||
fn fn_encode_for_uri(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let s = get_string_arg(&args, 0)?;
|
||||
|
||||
let encoded: String = s
|
||||
.chars()
|
||||
.map(|c| {
|
||||
if c.is_ascii_alphanumeric() || "-_.~".contains(c) {
|
||||
c.to_string()
|
||||
} else {
|
||||
format!("%{:02X}", c as u32)
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(Some(RdfTerm::literal(encoded)))
|
||||
}
|
||||
|
||||
fn fn_concat(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let mut result = String::new();
|
||||
|
||||
for arg in args {
|
||||
if let Some(term) = arg {
|
||||
result.push_str(&term_to_string(&term));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Some(RdfTerm::literal(result)))
|
||||
}
|
||||
|
||||
fn fn_replace(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let s = get_string_arg(&args, 0)?;
|
||||
let pattern = get_string_arg(&args, 1)?;
|
||||
let replacement = get_string_arg(&args, 2)?;
|
||||
// Note: Full regex support would require the regex crate
|
||||
let result = s.replace(&pattern, &replacement);
|
||||
Ok(Some(RdfTerm::literal(result)))
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Numeric Functions
|
||||
// ============================================================================
|
||||
|
||||
fn fn_abs(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let n = get_number_arg(&args, 0)?;
|
||||
Ok(Some(RdfTerm::Literal(Literal::decimal(n.abs()))))
|
||||
}
|
||||
|
||||
fn fn_round(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let n = get_number_arg(&args, 0)?;
|
||||
Ok(Some(RdfTerm::Literal(Literal::decimal(n.round()))))
|
||||
}
|
||||
|
||||
fn fn_ceil(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let n = get_number_arg(&args, 0)?;
|
||||
Ok(Some(RdfTerm::Literal(Literal::decimal(n.ceil()))))
|
||||
}
|
||||
|
||||
fn fn_floor(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let n = get_number_arg(&args, 0)?;
|
||||
Ok(Some(RdfTerm::Literal(Literal::decimal(n.floor()))))
|
||||
}
|
||||
|
||||
fn fn_rand(_args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
// Simple pseudo-random using hash of current time
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
let nanos = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|d| d.as_nanos())
|
||||
.unwrap_or(0);
|
||||
|
||||
let mut hasher = DefaultHasher::new();
|
||||
nanos.hash(&mut hasher);
|
||||
let hash = hasher.finish();
|
||||
|
||||
let random = (hash as f64) / (u64::MAX as f64);
|
||||
Ok(Some(RdfTerm::Literal(Literal::double(random))))
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Date/Time Functions
|
||||
// ============================================================================
|
||||
|
||||
fn fn_now(_args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
let duration = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map_err(|e| SparqlError::ExecutionError(e.to_string()))?;
|
||||
|
||||
let secs = duration.as_secs();
|
||||
// Simple ISO 8601 format
|
||||
let datetime = format!("{}Z", secs);
|
||||
|
||||
Ok(Some(RdfTerm::typed_literal(datetime, Iri::xsd_datetime())))
|
||||
}
|
||||
|
||||
fn fn_year(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let dt = get_string_arg(&args, 0)?;
|
||||
// Simple parsing - expects YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS format
|
||||
if dt.len() >= 4 {
|
||||
if let Ok(year) = dt[..4].parse::<i64>() {
|
||||
return Ok(Some(RdfTerm::Literal(Literal::integer(year))));
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn fn_month(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let dt = get_string_arg(&args, 0)?;
|
||||
if dt.len() >= 7 && dt.chars().nth(4) == Some('-') {
|
||||
if let Ok(month) = dt[5..7].parse::<i64>() {
|
||||
return Ok(Some(RdfTerm::Literal(Literal::integer(month))));
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn fn_day(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let dt = get_string_arg(&args, 0)?;
|
||||
if dt.len() >= 10 && dt.chars().nth(7) == Some('-') {
|
||||
if let Ok(day) = dt[8..10].parse::<i64>() {
|
||||
return Ok(Some(RdfTerm::Literal(Literal::integer(day))));
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn fn_hours(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let dt = get_string_arg(&args, 0)?;
|
||||
if let Some(t_pos) = dt.find('T') {
|
||||
if dt.len() >= t_pos + 3 {
|
||||
if let Ok(hours) = dt[t_pos + 1..t_pos + 3].parse::<i64>() {
|
||||
return Ok(Some(RdfTerm::Literal(Literal::integer(hours))));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn fn_minutes(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let dt = get_string_arg(&args, 0)?;
|
||||
if let Some(t_pos) = dt.find('T') {
|
||||
if dt.len() >= t_pos + 6 {
|
||||
if let Ok(minutes) = dt[t_pos + 4..t_pos + 6].parse::<i64>() {
|
||||
return Ok(Some(RdfTerm::Literal(Literal::integer(minutes))));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn fn_seconds(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let dt = get_string_arg(&args, 0)?;
|
||||
if let Some(t_pos) = dt.find('T') {
|
||||
if dt.len() >= t_pos + 9 {
|
||||
// Handle both integer and decimal seconds
|
||||
let sec_str = &dt[t_pos + 7..];
|
||||
let end_pos = sec_str
|
||||
.find(|c: char| !c.is_ascii_digit() && c != '.')
|
||||
.unwrap_or(sec_str.len());
|
||||
if let Ok(seconds) = sec_str[..end_pos].parse::<f64>() {
|
||||
return Ok(Some(RdfTerm::Literal(Literal::decimal(seconds))));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn fn_timezone(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let dt = get_string_arg(&args, 0)?;
|
||||
// Look for timezone at end
|
||||
if dt.ends_with('Z') {
|
||||
return Ok(Some(RdfTerm::literal("PT0S")));
|
||||
}
|
||||
|
||||
// Look for +/-HH:MM
|
||||
if let Some(tz_pos) = dt.rfind('+').or_else(|| dt.rfind('-')) {
|
||||
if tz_pos > 10 {
|
||||
// After date part
|
||||
let tz = &dt[tz_pos..];
|
||||
if tz.len() >= 6 {
|
||||
let sign = if tz.starts_with('-') { "-" } else { "" };
|
||||
let hours: i64 = tz[1..3].parse().unwrap_or(0);
|
||||
let minutes: i64 = tz[4..6].parse().unwrap_or(0);
|
||||
let duration = format!("{}PT{}H{}M", sign, hours, minutes);
|
||||
return Ok(Some(RdfTerm::literal(duration)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn fn_tz(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let dt = get_string_arg(&args, 0)?;
|
||||
|
||||
if dt.ends_with('Z') {
|
||||
return Ok(Some(RdfTerm::literal("Z")));
|
||||
}
|
||||
|
||||
if let Some(tz_pos) = dt.rfind('+').or_else(|| dt.rfind('-')) {
|
||||
if tz_pos > 10 {
|
||||
return Ok(Some(RdfTerm::literal(&dt[tz_pos..])));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Some(RdfTerm::literal("")))
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Hash Functions
|
||||
// ============================================================================
|
||||
|
||||
fn fn_hash(args: Vec<Option<RdfTerm>>, algorithm: &str) -> SparqlResult<Option<RdfTerm>> {
|
||||
let s = get_string_arg(&args, 0)?;
|
||||
|
||||
// Simple hash implementation using Rust's hasher
|
||||
// In production, use proper crypto hashes
|
||||
let mut hasher = DefaultHasher::new();
|
||||
s.hash(&mut hasher);
|
||||
algorithm.hash(&mut hasher);
|
||||
let hash = hasher.finish();
|
||||
|
||||
// Format as hex string
|
||||
let hex = format!("{:016x}", hash);
|
||||
|
||||
Ok(Some(RdfTerm::literal(hex)))
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Constructor Functions
|
||||
// ============================================================================
|
||||
|
||||
fn fn_struuid(_args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
// Generate UUID-like string
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
let nanos = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|d| d.as_nanos())
|
||||
.unwrap_or(0);
|
||||
|
||||
let mut hasher = DefaultHasher::new();
|
||||
nanos.hash(&mut hasher);
|
||||
let hash1 = hasher.finish();
|
||||
|
||||
hasher = DefaultHasher::new();
|
||||
(nanos + 1).hash(&mut hasher);
|
||||
let hash2 = hasher.finish();
|
||||
|
||||
let uuid = format!(
|
||||
"{:08x}-{:04x}-4{:03x}-{:04x}-{:012x}",
|
||||
(hash1 >> 32) as u32,
|
||||
(hash1 >> 16) as u16,
|
||||
(hash1 as u16) & 0x0FFF,
|
||||
((hash2 >> 48) as u16 & 0x3FFF) | 0x8000,
|
||||
hash2 & 0xFFFFFFFFFFFF
|
||||
);
|
||||
|
||||
Ok(Some(RdfTerm::literal(uuid)))
|
||||
}
|
||||
|
||||
fn fn_uuid(_args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let struuid = fn_struuid(vec![])?;
|
||||
if let Some(RdfTerm::Literal(lit)) = struuid {
|
||||
Ok(Some(RdfTerm::Iri(Iri::new(format!(
|
||||
"urn:uuid:{}",
|
||||
lit.value
|
||||
)))))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
fn fn_bnode(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
if args.is_empty() || args[0].is_none() {
|
||||
// Generate new blank node
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
let nanos = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|d| d.as_nanos())
|
||||
.unwrap_or(0);
|
||||
Ok(Some(RdfTerm::BlankNode(format!("b{}", nanos))))
|
||||
} else {
|
||||
// Create blank node with given ID
|
||||
let id = get_string_arg(&args, 0)?;
|
||||
Ok(Some(RdfTerm::BlankNode(id)))
|
||||
}
|
||||
}
|
||||
|
||||
fn fn_strdt(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let value = get_string_arg(&args, 0)?;
|
||||
let datatype = get_iri_arg(&args, 1)?;
|
||||
Ok(Some(RdfTerm::typed_literal(value, datatype)))
|
||||
}
|
||||
|
||||
fn fn_strlang(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let value = get_string_arg(&args, 0)?;
|
||||
let lang = get_string_arg(&args, 1)?;
|
||||
Ok(Some(RdfTerm::lang_literal(value, lang)))
|
||||
}
|
||||
|
||||
fn fn_str(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let term = args.get(0).and_then(|a| a.clone());
|
||||
Ok(term.map(|t| RdfTerm::literal(term_to_string(&t))))
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// RuVector Extension Functions
|
||||
// ============================================================================
|
||||
|
||||
/// Compute cosine similarity between two vector literals
|
||||
fn fn_vector_similarity(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let v1 = get_vector_arg(&args, 0)?;
|
||||
let v2 = get_vector_arg(&args, 1)?;
|
||||
|
||||
if v1.len() != v2.len() {
|
||||
return Err(SparqlError::TypeMismatch {
|
||||
expected: format!("vectors of same dimension"),
|
||||
actual: format!("dimensions {} and {}", v1.len(), v2.len()),
|
||||
});
|
||||
}
|
||||
|
||||
// Cosine similarity
|
||||
let dot: f64 = v1.iter().zip(v2.iter()).map(|(a, b)| a * b).sum();
|
||||
let norm1: f64 = v1.iter().map(|x| x * x).sum::<f64>().sqrt();
|
||||
let norm2: f64 = v2.iter().map(|x| x * x).sum::<f64>().sqrt();
|
||||
|
||||
let similarity = if norm1 > 0.0 && norm2 > 0.0 {
|
||||
dot / (norm1 * norm2)
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
Ok(Some(RdfTerm::Literal(Literal::double(similarity))))
|
||||
}
|
||||
|
||||
/// Compute L2 distance between two vector literals
|
||||
fn fn_vector_distance(args: Vec<Option<RdfTerm>>) -> SparqlResult<Option<RdfTerm>> {
|
||||
let v1 = get_vector_arg(&args, 0)?;
|
||||
let v2 = get_vector_arg(&args, 1)?;
|
||||
|
||||
if v1.len() != v2.len() {
|
||||
return Err(SparqlError::TypeMismatch {
|
||||
expected: format!("vectors of same dimension"),
|
||||
actual: format!("dimensions {} and {}", v1.len(), v2.len()),
|
||||
});
|
||||
}
|
||||
|
||||
// L2 (Euclidean) distance
|
||||
let distance: f64 = v1
|
||||
.iter()
|
||||
.zip(v2.iter())
|
||||
.map(|(a, b)| (a - b).powi(2))
|
||||
.sum::<f64>()
|
||||
.sqrt();
|
||||
|
||||
Ok(Some(RdfTerm::Literal(Literal::double(distance))))
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Helper Functions
|
||||
// ============================================================================
|
||||
|
||||
fn get_string_arg(args: &[Option<RdfTerm>], index: usize) -> SparqlResult<String> {
|
||||
args.get(index)
|
||||
.and_then(|a| a.as_ref())
|
||||
.map(|t| term_to_string(t))
|
||||
.ok_or_else(|| SparqlError::ExecutionError(format!("Missing argument {}", index)))
|
||||
}
|
||||
|
||||
fn get_number_arg(args: &[Option<RdfTerm>], index: usize) -> SparqlResult<f64> {
|
||||
args.get(index)
|
||||
.and_then(|a| a.as_ref())
|
||||
.and_then(|t| term_to_number(t))
|
||||
.ok_or_else(|| SparqlError::TypeMismatch {
|
||||
expected: "numeric".to_string(),
|
||||
actual: "non-numeric or missing".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
fn get_integer_arg(args: &[Option<RdfTerm>], index: usize) -> SparqlResult<i64> {
|
||||
args.get(index)
|
||||
.and_then(|a| a.as_ref())
|
||||
.and_then(|t| term_to_integer(t))
|
||||
.ok_or_else(|| SparqlError::TypeMismatch {
|
||||
expected: "integer".to_string(),
|
||||
actual: "non-integer or missing".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
fn get_iri_arg(args: &[Option<RdfTerm>], index: usize) -> SparqlResult<Iri> {
|
||||
args.get(index)
|
||||
.and_then(|a| a.as_ref())
|
||||
.and_then(|t| match t {
|
||||
RdfTerm::Iri(iri) => Some(iri.clone()),
|
||||
RdfTerm::Literal(lit) => Some(Iri::new(&lit.value)),
|
||||
_ => None,
|
||||
})
|
||||
.ok_or_else(|| SparqlError::TypeMismatch {
|
||||
expected: "IRI".to_string(),
|
||||
actual: "non-IRI or missing".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
fn get_vector_arg(args: &[Option<RdfTerm>], index: usize) -> SparqlResult<Vec<f64>> {
|
||||
let s = get_string_arg(args, index)?;
|
||||
|
||||
// Parse vector format: [1.0, 2.0, 3.0] or 1.0,2.0,3.0
|
||||
let s = s.trim().trim_start_matches('[').trim_end_matches(']');
|
||||
|
||||
s.split(',')
|
||||
.map(|v| {
|
||||
v.trim()
|
||||
.parse::<f64>()
|
||||
.map_err(|_| SparqlError::TypeMismatch {
|
||||
expected: "numeric vector".to_string(),
|
||||
actual: format!("invalid number: {}", v),
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn term_to_string(term: &RdfTerm) -> String {
|
||||
match term {
|
||||
RdfTerm::Iri(iri) => iri.as_str().to_string(),
|
||||
RdfTerm::Literal(lit) => lit.value.clone(),
|
||||
RdfTerm::BlankNode(id) => format!("_:{}", id),
|
||||
}
|
||||
}
|
||||
|
||||
fn term_to_number(term: &RdfTerm) -> Option<f64> {
|
||||
match term {
|
||||
RdfTerm::Literal(lit) => lit.as_double(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn term_to_integer(term: &RdfTerm) -> Option<i64> {
|
||||
match term {
|
||||
RdfTerm::Literal(lit) => lit.as_integer(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_strlen() {
|
||||
let result = fn_strlen(vec![Some(RdfTerm::literal("hello"))]).unwrap();
|
||||
assert!(matches!(result, Some(RdfTerm::Literal(l)) if l.as_integer() == Some(5)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_substr() {
|
||||
let result = fn_substr(vec![
|
||||
Some(RdfTerm::literal("hello")),
|
||||
Some(RdfTerm::Literal(Literal::integer(2))),
|
||||
Some(RdfTerm::Literal(Literal::integer(3))),
|
||||
])
|
||||
.unwrap();
|
||||
assert!(matches!(result, Some(RdfTerm::Literal(l)) if l.value == "ell"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ucase_lcase() {
|
||||
let upper = fn_ucase(vec![Some(RdfTerm::literal("hello"))]).unwrap();
|
||||
assert!(matches!(upper, Some(RdfTerm::Literal(l)) if l.value == "HELLO"));
|
||||
|
||||
let lower = fn_lcase(vec![Some(RdfTerm::literal("HELLO"))]).unwrap();
|
||||
assert!(matches!(lower, Some(RdfTerm::Literal(l)) if l.value == "hello"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_contains() {
|
||||
let result = fn_contains(vec![
|
||||
Some(RdfTerm::literal("hello world")),
|
||||
Some(RdfTerm::literal("world")),
|
||||
])
|
||||
.unwrap();
|
||||
assert!(matches!(result, Some(RdfTerm::Literal(l)) if l.as_boolean() == Some(true)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_abs() {
|
||||
let result = fn_abs(vec![Some(RdfTerm::Literal(Literal::decimal(-5.5)))]).unwrap();
|
||||
assert!(matches!(result, Some(RdfTerm::Literal(l)) if l.as_double() == Some(5.5)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_concat() {
|
||||
let result = fn_concat(vec![
|
||||
Some(RdfTerm::literal("hello")),
|
||||
Some(RdfTerm::literal(" ")),
|
||||
Some(RdfTerm::literal("world")),
|
||||
])
|
||||
.unwrap();
|
||||
assert!(matches!(result, Some(RdfTerm::Literal(l)) if l.value == "hello world"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_similarity() {
|
||||
let result = fn_vector_similarity(vec![
|
||||
Some(RdfTerm::literal("[1.0, 0.0, 0.0]")),
|
||||
Some(RdfTerm::literal("[1.0, 0.0, 0.0]")),
|
||||
])
|
||||
.unwrap();
|
||||
|
||||
if let Some(RdfTerm::Literal(l)) = result {
|
||||
let sim = l.as_double().unwrap();
|
||||
assert!((sim - 1.0).abs() < 0.001);
|
||||
} else {
|
||||
panic!("Expected literal result");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_distance() {
|
||||
let result = fn_vector_distance(vec![
|
||||
Some(RdfTerm::literal("[0.0, 0.0]")),
|
||||
Some(RdfTerm::literal("[3.0, 4.0]")),
|
||||
])
|
||||
.unwrap();
|
||||
|
||||
if let Some(RdfTerm::Literal(l)) = result {
|
||||
let dist = l.as_double().unwrap();
|
||||
assert!((dist - 5.0).abs() < 0.001);
|
||||
} else {
|
||||
panic!("Expected literal result");
|
||||
}
|
||||
}
|
||||
}
|
||||
127
vendor/ruvector/crates/ruvector-postgres/src/graph/sparql/mod.rs
vendored
Normal file
127
vendor/ruvector/crates/ruvector-postgres/src/graph/sparql/mod.rs
vendored
Normal file
@@ -0,0 +1,127 @@
|
||||
// SPARQL (SPARQL Protocol and RDF Query Language) module for ruvector-postgres
|
||||
//
|
||||
// Provides W3C-compliant SPARQL 1.1 query support for RDF data with
|
||||
// PostgreSQL storage backend and vector similarity extensions.
|
||||
//
|
||||
// Features:
|
||||
// - SPARQL 1.1 Query Language (SELECT, CONSTRUCT, ASK, DESCRIBE)
|
||||
// - SPARQL 1.1 Update Language (INSERT, DELETE, LOAD, CLEAR)
|
||||
// - RDF triple store with efficient indexing (SPO, POS, OSP)
|
||||
// - Property paths (sequence, alternative, inverse, transitive)
|
||||
// - Aggregates and GROUP BY
|
||||
// - FILTER expressions and built-in functions
|
||||
// - Vector similarity extensions for hybrid semantic search
|
||||
// - Standard result formats (JSON, XML, CSV, TSV)
|
||||
|
||||
// Allow warnings for incomplete SPARQL features
|
||||
#![allow(dead_code)]
|
||||
#![allow(unused_variables)]
|
||||
#![allow(unused_mut)]
|
||||
|
||||
pub mod ast;
|
||||
pub mod executor;
|
||||
pub mod functions;
|
||||
pub mod parser;
|
||||
pub mod results;
|
||||
pub mod triple_store;
|
||||
|
||||
pub use ast::{
|
||||
Aggregate, AskQuery, ConstructQuery, DeleteData, DescribeQuery, Expression, Filter,
|
||||
GraphPattern, GroupCondition, InsertData, Iri, Literal, Modify, OrderCondition, QueryForm,
|
||||
RdfTerm, SelectQuery, SolutionModifier, SparqlQuery, TriplePattern, UpdateOperation,
|
||||
};
|
||||
pub use executor::{execute_sparql, SparqlContext};
|
||||
pub use parser::parse_sparql;
|
||||
pub use results::{format_results, ResultFormat, SparqlResults};
|
||||
pub use triple_store::{Triple, TripleIndex, TripleStore};
|
||||
|
||||
use dashmap::DashMap;
|
||||
use once_cell::sync::Lazy;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Global RDF triple store registry
|
||||
static TRIPLE_STORE_REGISTRY: Lazy<DashMap<String, Arc<TripleStore>>> =
|
||||
Lazy::new(|| DashMap::new());
|
||||
|
||||
/// Get or create a triple store by name
|
||||
pub fn get_or_create_store(name: &str) -> Arc<TripleStore> {
|
||||
TRIPLE_STORE_REGISTRY
|
||||
.entry(name.to_string())
|
||||
.or_insert_with(|| Arc::new(TripleStore::new()))
|
||||
.clone()
|
||||
}
|
||||
|
||||
/// Get an existing triple store by name
|
||||
pub fn get_store(name: &str) -> Option<Arc<TripleStore>> {
|
||||
TRIPLE_STORE_REGISTRY.get(name).map(|s| s.clone())
|
||||
}
|
||||
|
||||
/// Delete a triple store by name
|
||||
pub fn delete_store(name: &str) -> bool {
|
||||
TRIPLE_STORE_REGISTRY.remove(name).is_some()
|
||||
}
|
||||
|
||||
/// List all triple store names
|
||||
pub fn list_stores() -> Vec<String> {
|
||||
TRIPLE_STORE_REGISTRY
|
||||
.iter()
|
||||
.map(|e| e.key().clone())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// SPARQL error type
|
||||
#[derive(Debug, Clone, thiserror::Error)]
|
||||
pub enum SparqlError {
|
||||
#[error("Parse error: {0}")]
|
||||
ParseError(String),
|
||||
|
||||
#[error("Variable not bound: {0}")]
|
||||
UnboundVariable(String),
|
||||
|
||||
#[error("Type mismatch: expected {expected}, got {actual}")]
|
||||
TypeMismatch { expected: String, actual: String },
|
||||
|
||||
#[error("Store not found: {0}")]
|
||||
StoreNotFound(String),
|
||||
|
||||
#[error("Invalid IRI: {0}")]
|
||||
InvalidIri(String),
|
||||
|
||||
#[error("Invalid literal: {0}")]
|
||||
InvalidLiteral(String),
|
||||
|
||||
#[error("Unsupported operation: {0}")]
|
||||
UnsupportedOperation(String),
|
||||
|
||||
#[error("Execution error: {0}")]
|
||||
ExecutionError(String),
|
||||
|
||||
#[error("Aggregate error: {0}")]
|
||||
AggregateError(String),
|
||||
|
||||
#[error("Property path error: {0}")]
|
||||
PropertyPathError(String),
|
||||
}
|
||||
|
||||
/// Result type for SPARQL operations
|
||||
pub type SparqlResult<T> = Result<T, SparqlError>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_store_registry() {
|
||||
let store1 = get_or_create_store("test_sparql_store");
|
||||
let store2 = get_store("test_sparql_store");
|
||||
|
||||
assert!(store2.is_some());
|
||||
assert!(Arc::ptr_eq(&store1, &store2.unwrap()));
|
||||
|
||||
let stores = list_stores();
|
||||
assert!(stores.contains(&"test_sparql_store".to_string()));
|
||||
|
||||
assert!(delete_store("test_sparql_store"));
|
||||
assert!(get_store("test_sparql_store").is_none());
|
||||
}
|
||||
}
|
||||
2496
vendor/ruvector/crates/ruvector-postgres/src/graph/sparql/parser.rs
vendored
Normal file
2496
vendor/ruvector/crates/ruvector-postgres/src/graph/sparql/parser.rs
vendored
Normal file
File diff suppressed because it is too large
Load Diff
566
vendor/ruvector/crates/ruvector-postgres/src/graph/sparql/results.rs
vendored
Normal file
566
vendor/ruvector/crates/ruvector-postgres/src/graph/sparql/results.rs
vendored
Normal file
@@ -0,0 +1,566 @@
|
||||
// SPARQL Result Formatting
|
||||
//
|
||||
// Formats query results in standard SPARQL formats:
|
||||
// - JSON (SPARQL 1.1 Query Results JSON Format)
|
||||
// - XML (SPARQL Query Results XML Format)
|
||||
// - CSV/TSV (SPARQL 1.1 Query Results CSV and TSV Formats)
|
||||
|
||||
use super::ast::RdfTerm;
|
||||
use super::executor::QueryResult;
|
||||
use super::triple_store::Triple;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Result format type
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum ResultFormat {
|
||||
Json,
|
||||
Xml,
|
||||
Csv,
|
||||
Tsv,
|
||||
}
|
||||
|
||||
/// SPARQL results wrapper for serialization
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SparqlResults {
|
||||
pub head: ResultHead,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub results: Option<ResultBindings>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub boolean: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ResultHead {
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub vars: Vec<String>,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub link: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ResultBindings {
|
||||
pub bindings: Vec<HashMap<String, ResultValue>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ResultValue {
|
||||
#[serde(rename = "type")]
|
||||
pub value_type: String,
|
||||
pub value: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub datatype: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(rename = "xml:lang")]
|
||||
pub lang: Option<String>,
|
||||
}
|
||||
|
||||
impl ResultValue {
|
||||
pub fn from_term(term: &RdfTerm) -> Self {
|
||||
match term {
|
||||
RdfTerm::Iri(iri) => Self {
|
||||
value_type: "uri".to_string(),
|
||||
value: iri.as_str().to_string(),
|
||||
datatype: None,
|
||||
lang: None,
|
||||
},
|
||||
RdfTerm::Literal(lit) => {
|
||||
let datatype = if lit.datatype.as_str() != "http://www.w3.org/2001/XMLSchema#string"
|
||||
&& lit.language.is_none()
|
||||
{
|
||||
Some(lit.datatype.as_str().to_string())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Self {
|
||||
value_type: "literal".to_string(),
|
||||
value: lit.value.clone(),
|
||||
datatype,
|
||||
lang: lit.language.clone(),
|
||||
}
|
||||
}
|
||||
RdfTerm::BlankNode(id) => Self {
|
||||
value_type: "bnode".to_string(),
|
||||
value: id.clone(),
|
||||
datatype: None,
|
||||
lang: None,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Format query results in the specified format
|
||||
pub fn format_results(result: &QueryResult, format: ResultFormat) -> String {
|
||||
match format {
|
||||
ResultFormat::Json => format_json(result),
|
||||
ResultFormat::Xml => format_xml(result),
|
||||
ResultFormat::Csv => format_csv(result),
|
||||
ResultFormat::Tsv => format_tsv(result),
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// JSON Format
|
||||
// ============================================================================
|
||||
|
||||
fn format_json(result: &QueryResult) -> String {
|
||||
let sparql_results = match result {
|
||||
QueryResult::Select(select) => {
|
||||
let bindings: Vec<HashMap<String, ResultValue>> = select
|
||||
.bindings
|
||||
.iter()
|
||||
.map(|binding| {
|
||||
binding
|
||||
.iter()
|
||||
.map(|(k, v)| (k.clone(), ResultValue::from_term(v)))
|
||||
.collect()
|
||||
})
|
||||
.collect();
|
||||
|
||||
SparqlResults {
|
||||
head: ResultHead {
|
||||
vars: select.variables.clone(),
|
||||
link: vec![],
|
||||
},
|
||||
results: Some(ResultBindings { bindings }),
|
||||
boolean: None,
|
||||
}
|
||||
}
|
||||
|
||||
QueryResult::Ask(value) => SparqlResults {
|
||||
head: ResultHead {
|
||||
vars: vec![],
|
||||
link: vec![],
|
||||
},
|
||||
results: None,
|
||||
boolean: Some(*value),
|
||||
},
|
||||
|
||||
QueryResult::Construct(triples) | QueryResult::Describe(triples) => {
|
||||
// For CONSTRUCT/DESCRIBE, return as JSON-LD-like format
|
||||
let bindings: Vec<HashMap<String, ResultValue>> = triples
|
||||
.iter()
|
||||
.map(|triple| {
|
||||
let mut binding = HashMap::new();
|
||||
binding.insert(
|
||||
"subject".to_string(),
|
||||
ResultValue::from_term(&triple.subject),
|
||||
);
|
||||
binding.insert(
|
||||
"predicate".to_string(),
|
||||
ResultValue {
|
||||
value_type: "uri".to_string(),
|
||||
value: triple.predicate.as_str().to_string(),
|
||||
datatype: None,
|
||||
lang: None,
|
||||
},
|
||||
);
|
||||
binding.insert("object".to_string(), ResultValue::from_term(&triple.object));
|
||||
binding
|
||||
})
|
||||
.collect();
|
||||
|
||||
SparqlResults {
|
||||
head: ResultHead {
|
||||
vars: vec![
|
||||
"subject".to_string(),
|
||||
"predicate".to_string(),
|
||||
"object".to_string(),
|
||||
],
|
||||
link: vec![],
|
||||
},
|
||||
results: Some(ResultBindings { bindings }),
|
||||
boolean: None,
|
||||
}
|
||||
}
|
||||
|
||||
QueryResult::Update => SparqlResults {
|
||||
head: ResultHead {
|
||||
vars: vec![],
|
||||
link: vec![],
|
||||
},
|
||||
results: None,
|
||||
boolean: Some(true),
|
||||
},
|
||||
};
|
||||
|
||||
serde_json::to_string_pretty(&sparql_results).unwrap_or_else(|_| "{}".to_string())
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// XML Format
|
||||
// ============================================================================
|
||||
|
||||
fn format_xml(result: &QueryResult) -> String {
|
||||
let mut xml = String::from(
|
||||
r#"<?xml version="1.0"?>
|
||||
<sparql xmlns="http://www.w3.org/2005/sparql-results#">
|
||||
"#,
|
||||
);
|
||||
|
||||
match result {
|
||||
QueryResult::Select(select) => {
|
||||
// Head
|
||||
xml.push_str(" <head>\n");
|
||||
for var in &select.variables {
|
||||
xml.push_str(&format!(" <variable name=\"{}\"/>\n", escape_xml(var)));
|
||||
}
|
||||
xml.push_str(" </head>\n");
|
||||
|
||||
// Results
|
||||
xml.push_str(" <results>\n");
|
||||
for binding in &select.bindings {
|
||||
xml.push_str(" <result>\n");
|
||||
for (var, value) in binding {
|
||||
xml.push_str(&format!(" <binding name=\"{}\">\n", escape_xml(var)));
|
||||
xml.push_str(&format_term_xml(value));
|
||||
xml.push_str(" </binding>\n");
|
||||
}
|
||||
xml.push_str(" </result>\n");
|
||||
}
|
||||
xml.push_str(" </results>\n");
|
||||
}
|
||||
|
||||
QueryResult::Ask(value) => {
|
||||
xml.push_str(" <head/>\n");
|
||||
xml.push_str(&format!(" <boolean>{}</boolean>\n", value));
|
||||
}
|
||||
|
||||
QueryResult::Construct(triples) | QueryResult::Describe(triples) => {
|
||||
xml.push_str(" <head>\n");
|
||||
xml.push_str(" <variable name=\"subject\"/>\n");
|
||||
xml.push_str(" <variable name=\"predicate\"/>\n");
|
||||
xml.push_str(" <variable name=\"object\"/>\n");
|
||||
xml.push_str(" </head>\n");
|
||||
|
||||
xml.push_str(" <results>\n");
|
||||
for triple in triples {
|
||||
xml.push_str(" <result>\n");
|
||||
xml.push_str(" <binding name=\"subject\">\n");
|
||||
xml.push_str(&format_term_xml(&triple.subject));
|
||||
xml.push_str(" </binding>\n");
|
||||
xml.push_str(" <binding name=\"predicate\">\n");
|
||||
xml.push_str(&format!(
|
||||
" <uri>{}</uri>\n",
|
||||
escape_xml(triple.predicate.as_str())
|
||||
));
|
||||
xml.push_str(" </binding>\n");
|
||||
xml.push_str(" <binding name=\"object\">\n");
|
||||
xml.push_str(&format_term_xml(&triple.object));
|
||||
xml.push_str(" </binding>\n");
|
||||
xml.push_str(" </result>\n");
|
||||
}
|
||||
xml.push_str(" </results>\n");
|
||||
}
|
||||
|
||||
QueryResult::Update => {
|
||||
xml.push_str(" <head/>\n");
|
||||
xml.push_str(" <boolean>true</boolean>\n");
|
||||
}
|
||||
}
|
||||
|
||||
xml.push_str("</sparql>");
|
||||
xml
|
||||
}
|
||||
|
||||
fn format_term_xml(term: &RdfTerm) -> String {
|
||||
match term {
|
||||
RdfTerm::Iri(iri) => {
|
||||
format!(" <uri>{}</uri>\n", escape_xml(iri.as_str()))
|
||||
}
|
||||
RdfTerm::Literal(lit) => {
|
||||
let mut s = String::from(" <literal");
|
||||
if let Some(lang) = &lit.language {
|
||||
s.push_str(&format!(" xml:lang=\"{}\"", escape_xml(lang)));
|
||||
} else if lit.datatype.as_str() != "http://www.w3.org/2001/XMLSchema#string" {
|
||||
s.push_str(&format!(
|
||||
" datatype=\"{}\"",
|
||||
escape_xml(lit.datatype.as_str())
|
||||
));
|
||||
}
|
||||
s.push_str(&format!(">{}</literal>\n", escape_xml(&lit.value)));
|
||||
s
|
||||
}
|
||||
RdfTerm::BlankNode(id) => {
|
||||
format!(" <bnode>{}</bnode>\n", escape_xml(id))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn escape_xml(s: &str) -> String {
|
||||
s.replace('&', "&")
|
||||
.replace('<', "<")
|
||||
.replace('>', ">")
|
||||
.replace('"', """)
|
||||
.replace('\'', "'")
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// CSV Format
|
||||
// ============================================================================
|
||||
|
||||
fn format_csv(result: &QueryResult) -> String {
|
||||
format_delimited(result, ',')
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// TSV Format
|
||||
// ============================================================================
|
||||
|
||||
fn format_tsv(result: &QueryResult) -> String {
|
||||
format_delimited(result, '\t')
|
||||
}
|
||||
|
||||
fn format_delimited(result: &QueryResult, delimiter: char) -> String {
|
||||
let mut output = String::new();
|
||||
|
||||
match result {
|
||||
QueryResult::Select(select) => {
|
||||
// Header
|
||||
output.push_str(&select.variables.join(&delimiter.to_string()));
|
||||
output.push('\n');
|
||||
|
||||
// Rows
|
||||
for binding in &select.bindings {
|
||||
let row: Vec<String> = select
|
||||
.variables
|
||||
.iter()
|
||||
.map(|var| {
|
||||
binding
|
||||
.get(var)
|
||||
.map(|term| format_term_csv(term, delimiter))
|
||||
.unwrap_or_default()
|
||||
})
|
||||
.collect();
|
||||
output.push_str(&row.join(&delimiter.to_string()));
|
||||
output.push('\n');
|
||||
}
|
||||
}
|
||||
|
||||
QueryResult::Ask(value) => {
|
||||
output.push_str(&format!("{}\n", value));
|
||||
}
|
||||
|
||||
QueryResult::Construct(triples) | QueryResult::Describe(triples) => {
|
||||
output.push_str(&format!(
|
||||
"subject{}predicate{}object\n",
|
||||
delimiter, delimiter
|
||||
));
|
||||
for triple in triples {
|
||||
output.push_str(&format!(
|
||||
"{}{}{}{}{}",
|
||||
format_term_csv(&triple.subject, delimiter),
|
||||
delimiter,
|
||||
escape_csv(triple.predicate.as_str(), delimiter),
|
||||
delimiter,
|
||||
format_term_csv(&triple.object, delimiter),
|
||||
));
|
||||
output.push('\n');
|
||||
}
|
||||
}
|
||||
|
||||
QueryResult::Update => {
|
||||
output.push_str("success\ntrue\n");
|
||||
}
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
fn format_term_csv(term: &RdfTerm, delimiter: char) -> String {
|
||||
match term {
|
||||
RdfTerm::Iri(iri) => escape_csv(iri.as_str(), delimiter),
|
||||
RdfTerm::Literal(lit) => {
|
||||
if lit.language.is_some()
|
||||
|| lit.datatype.as_str() != "http://www.w3.org/2001/XMLSchema#string"
|
||||
{
|
||||
// Use N-Triples-like format for typed/language literals
|
||||
let mut s = format!("\"{}\"", lit.value.replace('"', "\\\""));
|
||||
if let Some(lang) = &lit.language {
|
||||
s.push_str(&format!("@{}", lang));
|
||||
} else {
|
||||
s.push_str(&format!("^^<{}>", lit.datatype.as_str()));
|
||||
}
|
||||
escape_csv(&s, delimiter)
|
||||
} else {
|
||||
escape_csv(&lit.value, delimiter)
|
||||
}
|
||||
}
|
||||
RdfTerm::BlankNode(id) => escape_csv(&format!("_:{}", id), delimiter),
|
||||
}
|
||||
}
|
||||
|
||||
fn escape_csv(s: &str, delimiter: char) -> String {
|
||||
if s.contains(delimiter) || s.contains('"') || s.contains('\n') || s.contains('\r') {
|
||||
format!("\"{}\"", s.replace('"', "\"\""))
|
||||
} else {
|
||||
s.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// N-Triples Format (for CONSTRUCT/DESCRIBE)
|
||||
// ============================================================================
|
||||
|
||||
/// Format triples as N-Triples
|
||||
pub fn format_ntriples(triples: &[Triple]) -> String {
|
||||
let mut output = String::new();
|
||||
|
||||
for triple in triples {
|
||||
output.push_str(&format_term_nt(&triple.subject));
|
||||
output.push(' ');
|
||||
output.push_str(&format!("<{}>", triple.predicate.as_str()));
|
||||
output.push(' ');
|
||||
output.push_str(&format_term_nt(&triple.object));
|
||||
output.push_str(" .\n");
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
fn format_term_nt(term: &RdfTerm) -> String {
|
||||
match term {
|
||||
RdfTerm::Iri(iri) => format!("<{}>", iri.as_str()),
|
||||
RdfTerm::Literal(lit) => {
|
||||
let escaped = lit
|
||||
.value
|
||||
.replace('\\', "\\\\")
|
||||
.replace('"', "\\\"")
|
||||
.replace('\n', "\\n")
|
||||
.replace('\r', "\\r")
|
||||
.replace('\t', "\\t");
|
||||
|
||||
if let Some(lang) = &lit.language {
|
||||
format!("\"{}\"@{}", escaped, lang)
|
||||
} else if lit.datatype.as_str() != "http://www.w3.org/2001/XMLSchema#string" {
|
||||
format!("\"{}\"^^<{}>", escaped, lit.datatype.as_str())
|
||||
} else {
|
||||
format!("\"{}\"", escaped)
|
||||
}
|
||||
}
|
||||
RdfTerm::BlankNode(id) => format!("_:{}", id),
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Turtle Format (for CONSTRUCT/DESCRIBE)
|
||||
// ============================================================================
|
||||
|
||||
/// Format triples as Turtle
|
||||
pub fn format_turtle(triples: &[Triple]) -> String {
|
||||
let mut output = String::new();
|
||||
|
||||
// Group by subject
|
||||
let mut by_subject: HashMap<String, Vec<&Triple>> = HashMap::new();
|
||||
for triple in triples {
|
||||
let key = format_term_nt(&triple.subject);
|
||||
by_subject.entry(key).or_default().push(triple);
|
||||
}
|
||||
|
||||
for (subject, subject_triples) in by_subject {
|
||||
output.push_str(&subject);
|
||||
output.push('\n');
|
||||
|
||||
let total = subject_triples.len();
|
||||
for (i, triple) in subject_triples.iter().enumerate() {
|
||||
output.push_str(" ");
|
||||
output.push_str(&format!("<{}>", triple.predicate.as_str()));
|
||||
output.push(' ');
|
||||
output.push_str(&format_term_nt(&triple.object));
|
||||
|
||||
if i < total - 1 {
|
||||
output.push_str(" ;\n");
|
||||
} else {
|
||||
output.push_str(" .\n");
|
||||
}
|
||||
}
|
||||
output.push('\n');
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::super::ast::Iri;
|
||||
use super::super::executor::SelectResult;
|
||||
use super::*;
|
||||
use std::collections::HashMap;
|
||||
|
||||
fn create_test_select() -> QueryResult {
|
||||
let mut binding = HashMap::new();
|
||||
binding.insert("name".to_string(), RdfTerm::literal("Alice"));
|
||||
binding.insert("age".to_string(), RdfTerm::literal("30"));
|
||||
|
||||
QueryResult::Select(SelectResult {
|
||||
variables: vec!["name".to_string(), "age".to_string()],
|
||||
bindings: vec![binding],
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_format() {
|
||||
let result = create_test_select();
|
||||
let json = format_results(&result, ResultFormat::Json);
|
||||
|
||||
assert!(json.contains("\"vars\""));
|
||||
assert!(json.contains("\"name\""));
|
||||
assert!(json.contains("\"Alice\""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_xml_format() {
|
||||
let result = create_test_select();
|
||||
let xml = format_results(&result, ResultFormat::Xml);
|
||||
|
||||
assert!(xml.contains("<sparql"));
|
||||
assert!(xml.contains("<variable name=\"name\""));
|
||||
assert!(xml.contains("<literal>Alice</literal>"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_csv_format() {
|
||||
let result = create_test_select();
|
||||
let csv = format_results(&result, ResultFormat::Csv);
|
||||
|
||||
assert!(csv.contains("name,age"));
|
||||
assert!(csv.contains("Alice"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tsv_format() {
|
||||
let result = create_test_select();
|
||||
let tsv = format_results(&result, ResultFormat::Tsv);
|
||||
|
||||
assert!(tsv.contains("name\tage"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ask_json() {
|
||||
let result = QueryResult::Ask(true);
|
||||
let json = format_results(&result, ResultFormat::Json);
|
||||
|
||||
assert!(json.contains("\"boolean\": true"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ntriples() {
|
||||
let triples = vec![Triple::new(
|
||||
RdfTerm::iri("http://example.org/s"),
|
||||
Iri::new("http://example.org/p"),
|
||||
RdfTerm::literal("object"),
|
||||
)];
|
||||
|
||||
let nt = format_ntriples(&triples);
|
||||
assert!(nt.contains("<http://example.org/s>"));
|
||||
assert!(nt.contains("<http://example.org/p>"));
|
||||
assert!(nt.contains("\"object\""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_escape_xml() {
|
||||
assert_eq!(escape_xml("<test>"), "<test>");
|
||||
assert_eq!(escape_xml("a & b"), "a & b");
|
||||
}
|
||||
}
|
||||
739
vendor/ruvector/crates/ruvector-postgres/src/graph/sparql/triple_store.rs
vendored
Normal file
739
vendor/ruvector/crates/ruvector-postgres/src/graph/sparql/triple_store.rs
vendored
Normal file
@@ -0,0 +1,739 @@
|
||||
// RDF Triple Store with efficient indexing
|
||||
//
|
||||
// Provides persistent storage for RDF triples with multiple indexes
|
||||
// for efficient query patterns (SPO, POS, OSP).
|
||||
|
||||
use super::ast::{Iri, RdfTerm};
|
||||
use dashmap::DashMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashSet;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
|
||||
/// RDF Triple
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub struct Triple {
|
||||
pub subject: RdfTerm,
|
||||
pub predicate: Iri,
|
||||
pub object: RdfTerm,
|
||||
}
|
||||
|
||||
impl Triple {
|
||||
pub fn new(subject: RdfTerm, predicate: Iri, object: RdfTerm) -> Self {
|
||||
Self {
|
||||
subject,
|
||||
predicate,
|
||||
object,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create from string components
|
||||
pub fn from_strings(subject: &str, predicate: &str, object: &str) -> Self {
|
||||
Self {
|
||||
subject: if subject.starts_with("_:") {
|
||||
RdfTerm::BlankNode(subject[2..].to_string())
|
||||
} else {
|
||||
RdfTerm::Iri(Iri::new(subject))
|
||||
},
|
||||
predicate: Iri::new(predicate),
|
||||
object: if object.starts_with("_:") {
|
||||
RdfTerm::BlankNode(object[2..].to_string())
|
||||
} else if object.starts_with('"') {
|
||||
// Parse literal
|
||||
parse_literal_string(object)
|
||||
} else {
|
||||
RdfTerm::Iri(Iri::new(object))
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a literal string like "value"@en or "value"^^xsd:type
|
||||
fn parse_literal_string(s: &str) -> RdfTerm {
|
||||
let s = s.trim();
|
||||
if !s.starts_with('"') {
|
||||
return RdfTerm::literal(s);
|
||||
}
|
||||
|
||||
// Find the closing quote
|
||||
let mut chars = s.chars().peekable();
|
||||
chars.next(); // Skip opening quote
|
||||
|
||||
let mut value = String::new();
|
||||
while let Some(c) = chars.next() {
|
||||
if c == '\\' {
|
||||
if let Some(escaped) = chars.next() {
|
||||
match escaped {
|
||||
'n' => value.push('\n'),
|
||||
't' => value.push('\t'),
|
||||
'r' => value.push('\r'),
|
||||
'"' => value.push('"'),
|
||||
'\\' => value.push('\\'),
|
||||
_ => {
|
||||
value.push('\\');
|
||||
value.push(escaped);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if c == '"' {
|
||||
break;
|
||||
} else {
|
||||
value.push(c);
|
||||
}
|
||||
}
|
||||
|
||||
// Check for language tag or datatype
|
||||
let remainder: String = chars.collect();
|
||||
if remainder.starts_with('@') {
|
||||
let lang = remainder[1..].to_string();
|
||||
RdfTerm::lang_literal(value, lang)
|
||||
} else if remainder.starts_with("^^") {
|
||||
let datatype = &remainder[2..];
|
||||
let datatype = if datatype.starts_with('<') && datatype.ends_with('>') {
|
||||
&datatype[1..datatype.len() - 1]
|
||||
} else {
|
||||
datatype
|
||||
};
|
||||
RdfTerm::typed_literal(value, Iri::new(datatype))
|
||||
} else {
|
||||
RdfTerm::literal(value)
|
||||
}
|
||||
}
|
||||
|
||||
/// Triple index type
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum TripleIndex {
|
||||
/// Subject-Predicate-Object (for ?p ?o given s)
|
||||
Spo,
|
||||
/// Predicate-Object-Subject (for ?s given p, o)
|
||||
Pos,
|
||||
/// Object-Subject-Predicate (for ?s ?p given o)
|
||||
Osp,
|
||||
/// Subject-Object-Predicate (for ?p given s, o)
|
||||
Sop,
|
||||
/// Predicate-Subject-Object (for ?o given p, s)
|
||||
Pso,
|
||||
/// Object-Predicate-Subject (for ?s given o, p)
|
||||
Ops,
|
||||
}
|
||||
|
||||
/// Index key for triple lookup
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct IndexKey {
|
||||
pub first: String,
|
||||
pub second: Option<String>,
|
||||
}
|
||||
|
||||
impl IndexKey {
|
||||
pub fn single(first: impl Into<String>) -> Self {
|
||||
Self {
|
||||
first: first.into(),
|
||||
second: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn double(first: impl Into<String>, second: impl Into<String>) -> Self {
|
||||
Self {
|
||||
first: first.into(),
|
||||
second: Some(second.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Triple store statistics
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct StoreStats {
|
||||
pub triple_count: u64,
|
||||
pub subject_count: usize,
|
||||
pub predicate_count: usize,
|
||||
pub object_count: usize,
|
||||
pub graph_count: usize,
|
||||
}
|
||||
|
||||
/// RDF Triple Store
|
||||
pub struct TripleStore {
|
||||
/// All triples stored by internal ID
|
||||
triples: DashMap<u64, Triple>,
|
||||
|
||||
/// SPO index: subject -> predicate -> object IDs
|
||||
spo_index: DashMap<String, DashMap<String, HashSet<u64>>>,
|
||||
|
||||
/// POS index: predicate -> object -> subject IDs
|
||||
pos_index: DashMap<String, DashMap<String, HashSet<u64>>>,
|
||||
|
||||
/// OSP index: object -> subject -> predicate IDs
|
||||
osp_index: DashMap<String, DashMap<String, HashSet<u64>>>,
|
||||
|
||||
/// Named graphs: graph IRI -> triple IDs
|
||||
graphs: DashMap<String, HashSet<u64>>,
|
||||
|
||||
/// Default graph triple IDs
|
||||
default_graph: DashMap<u64, ()>,
|
||||
|
||||
/// Triple ID counter
|
||||
next_id: AtomicU64,
|
||||
|
||||
/// Unique subjects for statistics
|
||||
subjects: DashMap<String, ()>,
|
||||
|
||||
/// Unique predicates for statistics
|
||||
predicates: DashMap<String, ()>,
|
||||
|
||||
/// Unique objects for statistics
|
||||
objects: DashMap<String, ()>,
|
||||
}
|
||||
|
||||
impl TripleStore {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
triples: DashMap::new(),
|
||||
spo_index: DashMap::new(),
|
||||
pos_index: DashMap::new(),
|
||||
osp_index: DashMap::new(),
|
||||
graphs: DashMap::new(),
|
||||
default_graph: DashMap::new(),
|
||||
next_id: AtomicU64::new(1),
|
||||
subjects: DashMap::new(),
|
||||
predicates: DashMap::new(),
|
||||
objects: DashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert a triple into the default graph
|
||||
pub fn insert(&self, triple: Triple) -> u64 {
|
||||
self.insert_into_graph(triple, None)
|
||||
}
|
||||
|
||||
/// Insert a triple into a specific graph
|
||||
pub fn insert_into_graph(&self, triple: Triple, graph: Option<&str>) -> u64 {
|
||||
let id = self.next_id.fetch_add(1, Ordering::SeqCst);
|
||||
|
||||
// Get string representations for indexing
|
||||
let subject_key = term_to_key(&triple.subject);
|
||||
let predicate_key = triple.predicate.as_str().to_string();
|
||||
let object_key = term_to_key(&triple.object);
|
||||
|
||||
// Update statistics
|
||||
self.subjects.insert(subject_key.clone(), ());
|
||||
self.predicates.insert(predicate_key.clone(), ());
|
||||
self.objects.insert(object_key.clone(), ());
|
||||
|
||||
// Update SPO index
|
||||
self.spo_index
|
||||
.entry(subject_key.clone())
|
||||
.or_insert_with(DashMap::new)
|
||||
.entry(predicate_key.clone())
|
||||
.or_insert_with(HashSet::new)
|
||||
.insert(id);
|
||||
|
||||
// Update POS index
|
||||
self.pos_index
|
||||
.entry(predicate_key.clone())
|
||||
.or_insert_with(DashMap::new)
|
||||
.entry(object_key.clone())
|
||||
.or_insert_with(HashSet::new)
|
||||
.insert(id);
|
||||
|
||||
// Update OSP index
|
||||
self.osp_index
|
||||
.entry(object_key)
|
||||
.or_insert_with(DashMap::new)
|
||||
.entry(subject_key)
|
||||
.or_insert_with(HashSet::new)
|
||||
.insert(id);
|
||||
|
||||
// Update graph membership
|
||||
if let Some(graph_iri) = graph {
|
||||
self.graphs
|
||||
.entry(graph_iri.to_string())
|
||||
.or_insert_with(HashSet::new)
|
||||
.insert(id);
|
||||
} else {
|
||||
self.default_graph.insert(id, ());
|
||||
}
|
||||
|
||||
// Store the triple
|
||||
self.triples.insert(id, triple);
|
||||
|
||||
id
|
||||
}
|
||||
|
||||
/// Remove a triple by ID
|
||||
pub fn remove(&self, id: u64) -> Option<Triple> {
|
||||
if let Some((_, triple)) = self.triples.remove(&id) {
|
||||
let subject_key = term_to_key(&triple.subject);
|
||||
let predicate_key = triple.predicate.as_str().to_string();
|
||||
let object_key = term_to_key(&triple.object);
|
||||
|
||||
// Remove from SPO index
|
||||
if let Some(pred_map) = self.spo_index.get(&subject_key) {
|
||||
if let Some(mut ids) = pred_map.get_mut(&predicate_key) {
|
||||
ids.remove(&id);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove from POS index
|
||||
if let Some(obj_map) = self.pos_index.get(&predicate_key) {
|
||||
if let Some(mut ids) = obj_map.get_mut(&object_key) {
|
||||
ids.remove(&id);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove from OSP index
|
||||
if let Some(subj_map) = self.osp_index.get(&object_key) {
|
||||
if let Some(mut ids) = subj_map.get_mut(&subject_key) {
|
||||
ids.remove(&id);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove from graphs
|
||||
self.default_graph.remove(&id);
|
||||
for graph in self.graphs.iter() {
|
||||
if let Some(mut ids) = self.graphs.get_mut(graph.key()) {
|
||||
ids.remove(&id);
|
||||
}
|
||||
}
|
||||
|
||||
Some(triple)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a triple by ID
|
||||
pub fn get(&self, id: u64) -> Option<Triple> {
|
||||
self.triples.get(&id).map(|t| t.clone())
|
||||
}
|
||||
|
||||
/// Query triples matching a pattern (None means any value)
|
||||
pub fn query(
|
||||
&self,
|
||||
subject: Option<&RdfTerm>,
|
||||
predicate: Option<&Iri>,
|
||||
object: Option<&RdfTerm>,
|
||||
) -> Vec<Triple> {
|
||||
self.query_with_graph(subject, predicate, object, None)
|
||||
}
|
||||
|
||||
/// Query triples matching a pattern in a specific graph
|
||||
pub fn query_with_graph(
|
||||
&self,
|
||||
subject: Option<&RdfTerm>,
|
||||
predicate: Option<&Iri>,
|
||||
object: Option<&RdfTerm>,
|
||||
graph: Option<&str>,
|
||||
) -> Vec<Triple> {
|
||||
// Filter by graph if specified
|
||||
let graph_filter: Option<HashSet<u64>> = graph.map(|g| {
|
||||
self.graphs
|
||||
.get(g)
|
||||
.map(|ids| ids.clone())
|
||||
.unwrap_or_default()
|
||||
});
|
||||
|
||||
// Choose the best index based on bound variables
|
||||
let ids = match (subject, predicate, object) {
|
||||
// All bound - direct lookup
|
||||
(Some(s), Some(p), Some(o)) => {
|
||||
let s_key = term_to_key(s);
|
||||
let p_key = p.as_str();
|
||||
let o_key = term_to_key(o);
|
||||
|
||||
self.spo_index
|
||||
.get(&s_key)
|
||||
.and_then(|pred_map| pred_map.get(p_key).map(|ids| ids.clone()))
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.filter(|id| {
|
||||
self.triples
|
||||
.get(id)
|
||||
.map(|t| term_to_key(&t.object) == o_key)
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
// Subject and predicate bound - use SPO
|
||||
(Some(s), Some(p), None) => {
|
||||
let s_key = term_to_key(s);
|
||||
let p_key = p.as_str();
|
||||
|
||||
self.spo_index
|
||||
.get(&s_key)
|
||||
.and_then(|pred_map| pred_map.get(p_key).map(|ids| ids.clone()))
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.collect()
|
||||
}
|
||||
|
||||
// Subject only - use SPO
|
||||
(Some(s), None, None) => {
|
||||
let s_key = term_to_key(s);
|
||||
|
||||
self.spo_index
|
||||
.get(&s_key)
|
||||
.map(|pred_map| {
|
||||
pred_map
|
||||
.iter()
|
||||
.flat_map(|entry| entry.value().clone())
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
// Predicate and object bound - use POS
|
||||
(None, Some(p), Some(o)) => {
|
||||
let p_key = p.as_str();
|
||||
let o_key = term_to_key(o);
|
||||
|
||||
self.pos_index
|
||||
.get(p_key)
|
||||
.and_then(|obj_map| obj_map.get(&o_key).map(|ids| ids.clone()))
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.collect()
|
||||
}
|
||||
|
||||
// Predicate only - use POS
|
||||
(None, Some(p), None) => {
|
||||
let p_key = p.as_str();
|
||||
|
||||
self.pos_index
|
||||
.get(p_key)
|
||||
.map(|obj_map| {
|
||||
obj_map
|
||||
.iter()
|
||||
.flat_map(|entry| entry.value().clone())
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
// Object only - use OSP
|
||||
(None, None, Some(o)) => {
|
||||
let o_key = term_to_key(o);
|
||||
|
||||
self.osp_index
|
||||
.get(&o_key)
|
||||
.map(|subj_map| {
|
||||
subj_map
|
||||
.iter()
|
||||
.flat_map(|entry| entry.value().clone())
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
// Subject and object bound - use SPO then filter
|
||||
(Some(s), None, Some(o)) => {
|
||||
let s_key = term_to_key(s);
|
||||
let o_key = term_to_key(o);
|
||||
|
||||
self.spo_index
|
||||
.get(&s_key)
|
||||
.map(|pred_map| {
|
||||
pred_map
|
||||
.iter()
|
||||
.flat_map(|entry| entry.value().clone())
|
||||
.filter(|id| {
|
||||
self.triples
|
||||
.get(id)
|
||||
.map(|t| term_to_key(&t.object) == o_key)
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
// Nothing bound - return all
|
||||
(None, None, None) => self.triples.iter().map(|entry| *entry.key()).collect(),
|
||||
};
|
||||
|
||||
// Apply graph filter and collect results
|
||||
ids.into_iter()
|
||||
.filter(|id| {
|
||||
graph_filter
|
||||
.as_ref()
|
||||
.map(|filter| filter.contains(id))
|
||||
.unwrap_or(true)
|
||||
})
|
||||
.filter_map(|id| self.triples.get(&id).map(|t| t.clone()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get all triples in the store
|
||||
pub fn all_triples(&self) -> Vec<Triple> {
|
||||
self.triples
|
||||
.iter()
|
||||
.map(|entry| entry.value().clone())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get triple count
|
||||
pub fn count(&self) -> usize {
|
||||
self.triples.len()
|
||||
}
|
||||
|
||||
/// Check if store is empty
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.triples.is_empty()
|
||||
}
|
||||
|
||||
/// Clear all triples
|
||||
pub fn clear(&self) {
|
||||
self.triples.clear();
|
||||
self.spo_index.clear();
|
||||
self.pos_index.clear();
|
||||
self.osp_index.clear();
|
||||
self.graphs.clear();
|
||||
self.default_graph.clear();
|
||||
self.subjects.clear();
|
||||
self.predicates.clear();
|
||||
self.objects.clear();
|
||||
}
|
||||
|
||||
/// Clear a specific graph
|
||||
pub fn clear_graph(&self, graph: Option<&str>) {
|
||||
let ids_to_remove: Vec<u64> = if let Some(graph_iri) = graph {
|
||||
self.graphs
|
||||
.get(graph_iri)
|
||||
.map(|ids| ids.iter().copied().collect())
|
||||
.unwrap_or_default()
|
||||
} else {
|
||||
self.default_graph
|
||||
.iter()
|
||||
.map(|entry| *entry.key())
|
||||
.collect()
|
||||
};
|
||||
|
||||
for id in ids_to_remove {
|
||||
self.remove(id);
|
||||
}
|
||||
}
|
||||
|
||||
/// Get statistics about the store
|
||||
pub fn stats(&self) -> StoreStats {
|
||||
StoreStats {
|
||||
triple_count: self.triples.len() as u64,
|
||||
subject_count: self.subjects.len(),
|
||||
predicate_count: self.predicates.len(),
|
||||
object_count: self.objects.len(),
|
||||
graph_count: self.graphs.len() + 1, // +1 for default graph
|
||||
}
|
||||
}
|
||||
|
||||
/// List all named graphs
|
||||
pub fn list_graphs(&self) -> Vec<String> {
|
||||
self.graphs
|
||||
.iter()
|
||||
.map(|entry| entry.key().clone())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Get triples from a specific graph
|
||||
pub fn get_graph(&self, graph: &str) -> Vec<Triple> {
|
||||
self.graphs
|
||||
.get(graph)
|
||||
.map(|ids| {
|
||||
ids.iter()
|
||||
.filter_map(|id| self.triples.get(id).map(|t| t.clone()))
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Get triples from the default graph
|
||||
pub fn get_default_graph(&self) -> Vec<Triple> {
|
||||
self.default_graph
|
||||
.iter()
|
||||
.filter_map(|entry| self.triples.get(entry.key()).map(|t| t.clone()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Bulk insert triples
|
||||
pub fn insert_bulk(&self, triples: impl IntoIterator<Item = Triple>) -> Vec<u64> {
|
||||
triples.into_iter().map(|t| self.insert(t)).collect()
|
||||
}
|
||||
|
||||
/// Bulk insert triples into a graph
|
||||
pub fn insert_bulk_into_graph(
|
||||
&self,
|
||||
triples: impl IntoIterator<Item = Triple>,
|
||||
graph: &str,
|
||||
) -> Vec<u64> {
|
||||
triples
|
||||
.into_iter()
|
||||
.map(|t| self.insert_into_graph(t, Some(graph)))
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for TripleStore {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert an RDF term to a string key for indexing
|
||||
fn term_to_key(term: &RdfTerm) -> String {
|
||||
match term {
|
||||
RdfTerm::Iri(iri) => format!("<{}>", iri.as_str()),
|
||||
RdfTerm::Literal(lit) => {
|
||||
if let Some(ref lang) = lit.language {
|
||||
format!("\"{}\"@{}", lit.value, lang)
|
||||
} else if lit.datatype.as_str() != "http://www.w3.org/2001/XMLSchema#string" {
|
||||
format!("\"{}\"^^<{}>", lit.value, lit.datatype.as_str())
|
||||
} else {
|
||||
format!("\"{}\"", lit.value)
|
||||
}
|
||||
}
|
||||
RdfTerm::BlankNode(id) => format!("_:{}", id),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_insert_and_query() {
|
||||
let store = TripleStore::new();
|
||||
|
||||
let triple = Triple::new(
|
||||
RdfTerm::iri("http://example.org/person/1"),
|
||||
Iri::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||
RdfTerm::iri("http://example.org/Person"),
|
||||
);
|
||||
|
||||
let id = store.insert(triple.clone());
|
||||
assert!(id > 0);
|
||||
|
||||
let retrieved = store.get(id);
|
||||
assert!(retrieved.is_some());
|
||||
assert_eq!(retrieved.unwrap(), triple);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_by_subject() {
|
||||
let store = TripleStore::new();
|
||||
|
||||
let subject = RdfTerm::iri("http://example.org/person/1");
|
||||
store.insert(Triple::new(
|
||||
subject.clone(),
|
||||
Iri::rdf_type(),
|
||||
RdfTerm::iri("http://example.org/Person"),
|
||||
));
|
||||
store.insert(Triple::new(
|
||||
subject.clone(),
|
||||
Iri::rdfs_label(),
|
||||
RdfTerm::literal("Alice"),
|
||||
));
|
||||
store.insert(Triple::new(
|
||||
RdfTerm::iri("http://example.org/person/2"),
|
||||
Iri::rdf_type(),
|
||||
RdfTerm::iri("http://example.org/Person"),
|
||||
));
|
||||
|
||||
let results = store.query(Some(&subject), None, None);
|
||||
assert_eq!(results.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_by_predicate() {
|
||||
let store = TripleStore::new();
|
||||
|
||||
store.insert(Triple::new(
|
||||
RdfTerm::iri("http://example.org/person/1"),
|
||||
Iri::rdf_type(),
|
||||
RdfTerm::iri("http://example.org/Person"),
|
||||
));
|
||||
store.insert(Triple::new(
|
||||
RdfTerm::iri("http://example.org/person/2"),
|
||||
Iri::rdf_type(),
|
||||
RdfTerm::iri("http://example.org/Person"),
|
||||
));
|
||||
store.insert(Triple::new(
|
||||
RdfTerm::iri("http://example.org/person/1"),
|
||||
Iri::rdfs_label(),
|
||||
RdfTerm::literal("Alice"),
|
||||
));
|
||||
|
||||
let results = store.query(None, Some(&Iri::rdf_type()), None);
|
||||
assert_eq!(results.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_named_graphs() {
|
||||
let store = TripleStore::new();
|
||||
|
||||
let triple = Triple::new(
|
||||
RdfTerm::iri("http://example.org/person/1"),
|
||||
Iri::rdf_type(),
|
||||
RdfTerm::iri("http://example.org/Person"),
|
||||
);
|
||||
|
||||
store.insert_into_graph(triple.clone(), Some("http://example.org/graph1"));
|
||||
|
||||
let graph_triples = store.get_graph("http://example.org/graph1");
|
||||
assert_eq!(graph_triples.len(), 1);
|
||||
|
||||
let default_triples = store.get_default_graph();
|
||||
assert_eq!(default_triples.len(), 0);
|
||||
|
||||
let graphs = store.list_graphs();
|
||||
assert!(graphs.contains(&"http://example.org/graph1".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_statistics() {
|
||||
let store = TripleStore::new();
|
||||
|
||||
store.insert(Triple::new(
|
||||
RdfTerm::iri("http://example.org/s1"),
|
||||
Iri::new("http://example.org/p1"),
|
||||
RdfTerm::literal("o1"),
|
||||
));
|
||||
store.insert(Triple::new(
|
||||
RdfTerm::iri("http://example.org/s2"),
|
||||
Iri::new("http://example.org/p1"),
|
||||
RdfTerm::literal("o2"),
|
||||
));
|
||||
|
||||
let stats = store.stats();
|
||||
assert_eq!(stats.triple_count, 2);
|
||||
assert_eq!(stats.subject_count, 2);
|
||||
assert_eq!(stats.predicate_count, 1);
|
||||
assert_eq!(stats.object_count, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_remove() {
|
||||
let store = TripleStore::new();
|
||||
|
||||
let id = store.insert(Triple::new(
|
||||
RdfTerm::iri("http://example.org/s"),
|
||||
Iri::new("http://example.org/p"),
|
||||
RdfTerm::literal("o"),
|
||||
));
|
||||
|
||||
assert_eq!(store.count(), 1);
|
||||
|
||||
let removed = store.remove(id);
|
||||
assert!(removed.is_some());
|
||||
assert_eq!(store.count(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_literal() {
|
||||
let simple = parse_literal_string("\"hello\"");
|
||||
assert!(matches!(simple, RdfTerm::Literal(ref l) if l.value == "hello"));
|
||||
|
||||
let lang = parse_literal_string("\"hello\"@en");
|
||||
assert!(matches!(lang, RdfTerm::Literal(ref l) if l.language == Some("en".to_string())));
|
||||
|
||||
let typed = parse_literal_string("\"42\"^^<http://www.w3.org/2001/XMLSchema#integer>");
|
||||
assert!(
|
||||
matches!(typed, RdfTerm::Literal(ref l) if l.datatype.as_str() == "http://www.w3.org/2001/XMLSchema#integer")
|
||||
);
|
||||
}
|
||||
}
|
||||
455
vendor/ruvector/crates/ruvector-postgres/src/graph/storage.rs
vendored
Normal file
455
vendor/ruvector/crates/ruvector-postgres/src/graph/storage.rs
vendored
Normal file
@@ -0,0 +1,455 @@
|
||||
// Graph storage structures with concurrent access support
|
||||
|
||||
use dashmap::DashMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
|
||||
/// Node in the graph
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Node {
|
||||
pub id: u64,
|
||||
pub labels: Vec<String>,
|
||||
pub properties: HashMap<String, serde_json::Value>,
|
||||
}
|
||||
|
||||
impl Node {
|
||||
pub fn new(id: u64) -> Self {
|
||||
Self {
|
||||
id,
|
||||
labels: Vec::new(),
|
||||
properties: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_label(mut self, label: impl Into<String>) -> Self {
|
||||
self.labels.push(label.into());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_property(
|
||||
mut self,
|
||||
key: impl Into<String>,
|
||||
value: impl Into<serde_json::Value>,
|
||||
) -> Self {
|
||||
self.properties.insert(key.into(), value.into());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn has_label(&self, label: &str) -> bool {
|
||||
self.labels.iter().any(|l| l == label)
|
||||
}
|
||||
|
||||
pub fn get_property(&self, key: &str) -> Option<&serde_json::Value> {
|
||||
self.properties.get(key)
|
||||
}
|
||||
}
|
||||
|
||||
/// Edge in the graph
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Edge {
|
||||
pub id: u64,
|
||||
pub source: u64,
|
||||
pub target: u64,
|
||||
pub edge_type: String,
|
||||
pub properties: HashMap<String, serde_json::Value>,
|
||||
}
|
||||
|
||||
impl Edge {
|
||||
pub fn new(id: u64, source: u64, target: u64, edge_type: impl Into<String>) -> Self {
|
||||
Self {
|
||||
id,
|
||||
source,
|
||||
target,
|
||||
edge_type: edge_type.into(),
|
||||
properties: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_property(
|
||||
mut self,
|
||||
key: impl Into<String>,
|
||||
value: impl Into<serde_json::Value>,
|
||||
) -> Self {
|
||||
self.properties.insert(key.into(), value.into());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn get_property(&self, key: &str) -> Option<&serde_json::Value> {
|
||||
self.properties.get(key)
|
||||
}
|
||||
|
||||
pub fn weight(&self, property: &str) -> f64 {
|
||||
self.get_property(property)
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(1.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// Node storage with label indexing
|
||||
pub struct NodeStore {
|
||||
nodes: DashMap<u64, Node>,
|
||||
label_index: DashMap<String, HashSet<u64>>,
|
||||
next_id: AtomicU64,
|
||||
}
|
||||
|
||||
impl NodeStore {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
nodes: DashMap::new(),
|
||||
label_index: DashMap::new(),
|
||||
next_id: AtomicU64::new(1),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_id(&self) -> u64 {
|
||||
self.next_id.fetch_add(1, Ordering::SeqCst)
|
||||
}
|
||||
|
||||
pub fn insert(&self, node: Node) {
|
||||
let id = node.id;
|
||||
|
||||
// Update label index
|
||||
for label in &node.labels {
|
||||
self.label_index
|
||||
.entry(label.clone())
|
||||
.or_insert_with(HashSet::new)
|
||||
.insert(id);
|
||||
}
|
||||
|
||||
self.nodes.insert(id, node);
|
||||
}
|
||||
|
||||
pub fn get(&self, id: u64) -> Option<Node> {
|
||||
self.nodes.get(&id).map(|n| n.clone())
|
||||
}
|
||||
|
||||
pub fn remove(&self, id: u64) -> Option<Node> {
|
||||
if let Some((_, node)) = self.nodes.remove(&id) {
|
||||
// Remove from label index
|
||||
for label in &node.labels {
|
||||
if let Some(mut ids) = self.label_index.get_mut(label) {
|
||||
ids.remove(&id);
|
||||
}
|
||||
}
|
||||
Some(node)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn find_by_label(&self, label: &str) -> Vec<Node> {
|
||||
self.label_index
|
||||
.get(label)
|
||||
.map(|ids| ids.iter().filter_map(|id| self.get(*id)).collect())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
pub fn all_nodes(&self) -> Vec<Node> {
|
||||
self.nodes.iter().map(|n| n.clone()).collect()
|
||||
}
|
||||
|
||||
pub fn count(&self) -> usize {
|
||||
self.nodes.len()
|
||||
}
|
||||
|
||||
pub fn contains(&self, id: u64) -> bool {
|
||||
self.nodes.contains_key(&id)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for NodeStore {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Edge storage with adjacency list indexing
|
||||
pub struct EdgeStore {
|
||||
edges: DashMap<u64, Edge>,
|
||||
// Adjacency list: source_id -> [(target_id, edge_id)]
|
||||
outgoing: DashMap<u64, Vec<(u64, u64)>>,
|
||||
// Reverse adjacency: target_id -> [(source_id, edge_id)]
|
||||
incoming: DashMap<u64, Vec<(u64, u64)>>,
|
||||
// Type index: edge_type -> [edge_id]
|
||||
type_index: DashMap<String, HashSet<u64>>,
|
||||
next_id: AtomicU64,
|
||||
}
|
||||
|
||||
impl EdgeStore {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
edges: DashMap::new(),
|
||||
outgoing: DashMap::new(),
|
||||
incoming: DashMap::new(),
|
||||
type_index: DashMap::new(),
|
||||
next_id: AtomicU64::new(1),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_id(&self) -> u64 {
|
||||
self.next_id.fetch_add(1, Ordering::SeqCst)
|
||||
}
|
||||
|
||||
pub fn insert(&self, edge: Edge) {
|
||||
let id = edge.id;
|
||||
let source = edge.source;
|
||||
let target = edge.target;
|
||||
let edge_type = edge.edge_type.clone();
|
||||
|
||||
// Update adjacency lists
|
||||
self.outgoing
|
||||
.entry(source)
|
||||
.or_insert_with(Vec::new)
|
||||
.push((target, id));
|
||||
|
||||
self.incoming
|
||||
.entry(target)
|
||||
.or_insert_with(Vec::new)
|
||||
.push((source, id));
|
||||
|
||||
// Update type index
|
||||
self.type_index
|
||||
.entry(edge_type)
|
||||
.or_insert_with(HashSet::new)
|
||||
.insert(id);
|
||||
|
||||
self.edges.insert(id, edge);
|
||||
}
|
||||
|
||||
pub fn get(&self, id: u64) -> Option<Edge> {
|
||||
self.edges.get(&id).map(|e| e.clone())
|
||||
}
|
||||
|
||||
pub fn remove(&self, id: u64) -> Option<Edge> {
|
||||
if let Some((_, edge)) = self.edges.remove(&id) {
|
||||
// Remove from adjacency lists
|
||||
if let Some(mut out) = self.outgoing.get_mut(&edge.source) {
|
||||
out.retain(|(_, eid)| *eid != id);
|
||||
}
|
||||
if let Some(mut inc) = self.incoming.get_mut(&edge.target) {
|
||||
inc.retain(|(_, eid)| *eid != id);
|
||||
}
|
||||
|
||||
// Remove from type index
|
||||
if let Some(mut ids) = self.type_index.get_mut(&edge.edge_type) {
|
||||
ids.remove(&id);
|
||||
}
|
||||
|
||||
Some(edge)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_outgoing(&self, node_id: u64) -> Vec<Edge> {
|
||||
self.outgoing
|
||||
.get(&node_id)
|
||||
.map(|edges| {
|
||||
edges
|
||||
.iter()
|
||||
.filter_map(|(_, edge_id)| self.get(*edge_id))
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
pub fn get_incoming(&self, node_id: u64) -> Vec<Edge> {
|
||||
self.incoming
|
||||
.get(&node_id)
|
||||
.map(|edges| {
|
||||
edges
|
||||
.iter()
|
||||
.filter_map(|(_, edge_id)| self.get(*edge_id))
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
pub fn get_neighbors(&self, node_id: u64) -> Vec<u64> {
|
||||
self.outgoing
|
||||
.get(&node_id)
|
||||
.map(|edges| edges.iter().map(|(target, _)| *target).collect())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
pub fn find_by_type(&self, edge_type: &str) -> Vec<Edge> {
|
||||
self.type_index
|
||||
.get(edge_type)
|
||||
.map(|ids| ids.iter().filter_map(|id| self.get(*id)).collect())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
pub fn all_edges(&self) -> Vec<Edge> {
|
||||
self.edges.iter().map(|e| e.clone()).collect()
|
||||
}
|
||||
|
||||
pub fn count(&self) -> usize {
|
||||
self.edges.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for EdgeStore {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Complete graph storage
|
||||
pub struct GraphStore {
|
||||
pub nodes: NodeStore,
|
||||
pub edges: EdgeStore,
|
||||
}
|
||||
|
||||
impl GraphStore {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
nodes: NodeStore::new(),
|
||||
edges: EdgeStore::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_node(
|
||||
&self,
|
||||
labels: Vec<String>,
|
||||
properties: HashMap<String, serde_json::Value>,
|
||||
) -> u64 {
|
||||
let id = self.nodes.next_id();
|
||||
let mut node = Node::new(id);
|
||||
node.labels = labels;
|
||||
node.properties = properties;
|
||||
self.nodes.insert(node);
|
||||
id
|
||||
}
|
||||
|
||||
pub fn add_edge(
|
||||
&self,
|
||||
source: u64,
|
||||
target: u64,
|
||||
edge_type: String,
|
||||
properties: HashMap<String, serde_json::Value>,
|
||||
) -> Result<u64, String> {
|
||||
// Validate nodes exist
|
||||
if !self.nodes.contains(source) {
|
||||
return Err(format!("Source node {} does not exist", source));
|
||||
}
|
||||
if !self.nodes.contains(target) {
|
||||
return Err(format!("Target node {} does not exist", target));
|
||||
}
|
||||
|
||||
let id = self.edges.next_id();
|
||||
let mut edge = Edge::new(id, source, target, edge_type);
|
||||
edge.properties = properties;
|
||||
self.edges.insert(edge);
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
pub fn stats(&self) -> GraphStats {
|
||||
GraphStats {
|
||||
node_count: self.nodes.count(),
|
||||
edge_count: self.edges.count(),
|
||||
labels: self
|
||||
.nodes
|
||||
.label_index
|
||||
.iter()
|
||||
.map(|e| e.key().clone())
|
||||
.collect(),
|
||||
edge_types: self
|
||||
.edges
|
||||
.type_index
|
||||
.iter()
|
||||
.map(|e| e.key().clone())
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for GraphStore {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct GraphStats {
|
||||
pub node_count: usize,
|
||||
pub edge_count: usize,
|
||||
pub labels: Vec<String>,
|
||||
pub edge_types: Vec<String>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_node_operations() {
|
||||
let store = NodeStore::new();
|
||||
|
||||
let node = Node::new(1)
|
||||
.with_label("Person")
|
||||
.with_property("name", "Alice");
|
||||
|
||||
store.insert(node.clone());
|
||||
|
||||
let retrieved = store.get(1).unwrap();
|
||||
assert_eq!(retrieved.id, 1);
|
||||
assert!(retrieved.has_label("Person"));
|
||||
assert_eq!(
|
||||
retrieved.get_property("name").unwrap().as_str().unwrap(),
|
||||
"Alice"
|
||||
);
|
||||
|
||||
let persons = store.find_by_label("Person");
|
||||
assert_eq!(persons.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_edge_operations() {
|
||||
let store = EdgeStore::new();
|
||||
|
||||
let edge = Edge::new(1, 10, 20, "KNOWS").with_property("since", 2020);
|
||||
|
||||
store.insert(edge);
|
||||
|
||||
let outgoing = store.get_outgoing(10);
|
||||
assert_eq!(outgoing.len(), 1);
|
||||
assert_eq!(outgoing[0].target, 20);
|
||||
|
||||
let neighbors = store.get_neighbors(10);
|
||||
assert_eq!(neighbors, vec![20]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_graph_store() {
|
||||
let graph = GraphStore::new();
|
||||
|
||||
let n1 = graph.add_node(
|
||||
vec!["Person".to_string()],
|
||||
HashMap::from([("name".to_string(), "Alice".into())]),
|
||||
);
|
||||
|
||||
let n2 = graph.add_node(
|
||||
vec!["Person".to_string()],
|
||||
HashMap::from([("name".to_string(), "Bob".into())]),
|
||||
);
|
||||
|
||||
let e1 = graph
|
||||
.add_edge(
|
||||
n1,
|
||||
n2,
|
||||
"KNOWS".to_string(),
|
||||
HashMap::from([("since".to_string(), 2020.into())]),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(graph.nodes.count(), 2);
|
||||
assert_eq!(graph.edges.count(), 1);
|
||||
|
||||
let stats = graph.stats();
|
||||
assert_eq!(stats.node_count, 2);
|
||||
assert_eq!(stats.edge_count, 1);
|
||||
assert!(stats.labels.contains(&"Person".to_string()));
|
||||
assert!(stats.edge_types.contains(&"KNOWS".to_string()));
|
||||
}
|
||||
}
|
||||
450
vendor/ruvector/crates/ruvector-postgres/src/graph/traversal.rs
vendored
Normal file
450
vendor/ruvector/crates/ruvector-postgres/src/graph/traversal.rs
vendored
Normal file
@@ -0,0 +1,450 @@
|
||||
// Graph traversal algorithms
|
||||
|
||||
use super::storage::GraphStore;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::{BinaryHeap, HashMap, HashSet, VecDeque};
|
||||
|
||||
/// Result of a path search
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PathResult {
|
||||
pub nodes: Vec<u64>,
|
||||
pub edges: Vec<u64>,
|
||||
pub cost: f64,
|
||||
}
|
||||
|
||||
impl PathResult {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
nodes: Vec::new(),
|
||||
edges: Vec::new(),
|
||||
cost: 0.0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_nodes(mut self, nodes: Vec<u64>) -> Self {
|
||||
self.nodes = nodes;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_edges(mut self, edges: Vec<u64>) -> Self {
|
||||
self.edges = edges;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_cost(mut self, cost: f64) -> Self {
|
||||
self.cost = cost;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.nodes.len()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.nodes.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
/// Breadth-First Search to find shortest path (by hop count)
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `graph` - The graph to search
|
||||
/// * `start` - Starting node ID
|
||||
/// * `end` - Target node ID
|
||||
/// * `edge_types` - Optional filter for edge types (None means all types)
|
||||
/// * `max_hops` - Maximum path length
|
||||
///
|
||||
/// # Returns
|
||||
/// Some(PathResult) if path found, None otherwise
|
||||
pub fn bfs(
|
||||
graph: &GraphStore,
|
||||
start: u64,
|
||||
end: u64,
|
||||
edge_types: Option<&[String]>,
|
||||
max_hops: usize,
|
||||
) -> Option<PathResult> {
|
||||
if start == end {
|
||||
return Some(PathResult::new().with_nodes(vec![start]));
|
||||
}
|
||||
|
||||
let mut queue = VecDeque::new();
|
||||
let mut visited = HashSet::new();
|
||||
let mut parent: HashMap<u64, (u64, u64)> = HashMap::new(); // node -> (parent_node, edge_id)
|
||||
|
||||
queue.push_back((start, 0)); // (node_id, depth)
|
||||
visited.insert(start);
|
||||
|
||||
while let Some((current, depth)) = queue.pop_front() {
|
||||
if depth >= max_hops {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get outgoing edges
|
||||
let edges = graph.edges.get_outgoing(current);
|
||||
|
||||
for edge in edges {
|
||||
// Filter by edge type if specified
|
||||
if let Some(types) = edge_types {
|
||||
if !types.contains(&edge.edge_type) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let next = edge.target;
|
||||
|
||||
if !visited.contains(&next) {
|
||||
visited.insert(next);
|
||||
parent.insert(next, (current, edge.id));
|
||||
|
||||
if next == end {
|
||||
// Reconstruct path
|
||||
return Some(reconstruct_path(&parent, start, end));
|
||||
}
|
||||
|
||||
queue.push_back((next, depth + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Depth-First Search with visitor pattern
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `graph` - The graph to search
|
||||
/// * `start` - Starting node ID
|
||||
/// * `visitor` - Function called for each visited node, returns false to stop traversal
|
||||
pub fn dfs<F>(graph: &GraphStore, start: u64, mut visitor: F)
|
||||
where
|
||||
F: FnMut(u64) -> bool,
|
||||
{
|
||||
let mut visited = HashSet::new();
|
||||
let mut stack = vec![start];
|
||||
|
||||
while let Some(current) = stack.pop() {
|
||||
if visited.contains(¤t) {
|
||||
continue;
|
||||
}
|
||||
|
||||
visited.insert(current);
|
||||
|
||||
// Call visitor
|
||||
if !visitor(current) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Add neighbors to stack
|
||||
let neighbors = graph.edges.get_neighbors(current);
|
||||
for neighbor in neighbors.into_iter().rev() {
|
||||
if !visited.contains(&neighbor) {
|
||||
stack.push(neighbor);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// State for Dijkstra's algorithm
|
||||
#[derive(Debug, Clone)]
|
||||
struct DijkstraState {
|
||||
node: u64,
|
||||
cost: f64,
|
||||
#[allow(dead_code)]
|
||||
edge: Option<u64>,
|
||||
}
|
||||
|
||||
impl PartialEq for DijkstraState {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.cost == other.cost
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for DijkstraState {}
|
||||
|
||||
impl PartialOrd for DijkstraState {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
// Reverse ordering for min-heap
|
||||
other.cost.partial_cmp(&self.cost)
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for DijkstraState {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
self.partial_cmp(other).unwrap_or(Ordering::Equal)
|
||||
}
|
||||
}
|
||||
|
||||
/// Dijkstra's shortest path algorithm with weighted edges
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `graph` - The graph to search
|
||||
/// * `start` - Starting node ID
|
||||
/// * `end` - Target node ID
|
||||
/// * `weight_property` - Name of edge property to use as weight (defaults to 1.0 if missing)
|
||||
///
|
||||
/// # Returns
|
||||
/// Some(PathResult) with weighted cost if path found, None otherwise
|
||||
pub fn shortest_path_dijkstra(
|
||||
graph: &GraphStore,
|
||||
start: u64,
|
||||
end: u64,
|
||||
weight_property: &str,
|
||||
) -> Option<PathResult> {
|
||||
if start == end {
|
||||
return Some(PathResult::new().with_nodes(vec![start]).with_cost(0.0));
|
||||
}
|
||||
|
||||
let mut heap = BinaryHeap::new();
|
||||
let mut distances: HashMap<u64, f64> = HashMap::new();
|
||||
let mut parent: HashMap<u64, (u64, u64)> = HashMap::new();
|
||||
|
||||
distances.insert(start, 0.0);
|
||||
heap.push(DijkstraState {
|
||||
node: start,
|
||||
cost: 0.0,
|
||||
edge: None,
|
||||
});
|
||||
|
||||
while let Some(DijkstraState { node, cost, .. }) = heap.pop() {
|
||||
if node == end {
|
||||
let mut result = reconstruct_path(&parent, start, end);
|
||||
result.cost = cost;
|
||||
return Some(result);
|
||||
}
|
||||
|
||||
// Skip if we've found a better path already
|
||||
if let Some(&best_cost) = distances.get(&node) {
|
||||
if cost > best_cost {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Check all neighbors
|
||||
let edges = graph.edges.get_outgoing(node);
|
||||
|
||||
for edge in edges {
|
||||
let next = edge.target;
|
||||
let weight = edge.weight(weight_property);
|
||||
let next_cost = cost + weight;
|
||||
|
||||
let is_better = distances
|
||||
.get(&next)
|
||||
.map_or(true, |¤t_cost| next_cost < current_cost);
|
||||
|
||||
if is_better {
|
||||
distances.insert(next, next_cost);
|
||||
parent.insert(next, (node, edge.id));
|
||||
heap.push(DijkstraState {
|
||||
node: next,
|
||||
cost: next_cost,
|
||||
edge: Some(edge.id),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Reconstruct path from parent map
|
||||
fn reconstruct_path(parent: &HashMap<u64, (u64, u64)>, start: u64, end: u64) -> PathResult {
|
||||
let mut nodes = Vec::new();
|
||||
let mut edges = Vec::new();
|
||||
let mut current = end;
|
||||
|
||||
nodes.push(current);
|
||||
|
||||
while current != start {
|
||||
if let Some(&(prev, edge_id)) = parent.get(¤t) {
|
||||
edges.push(edge_id);
|
||||
nodes.push(prev);
|
||||
current = prev;
|
||||
} else {
|
||||
// Path broken, should not happen
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
nodes.reverse();
|
||||
edges.reverse();
|
||||
|
||||
PathResult::new().with_nodes(nodes).with_edges(edges)
|
||||
}
|
||||
|
||||
/// Find all paths between two nodes (up to max_paths)
|
||||
pub fn find_all_paths(
|
||||
graph: &GraphStore,
|
||||
start: u64,
|
||||
end: u64,
|
||||
max_hops: usize,
|
||||
max_paths: usize,
|
||||
) -> Vec<PathResult> {
|
||||
let mut paths = Vec::new();
|
||||
let mut current_path = Vec::new();
|
||||
let mut visited = HashSet::new();
|
||||
|
||||
fn dfs_all_paths(
|
||||
graph: &GraphStore,
|
||||
current: u64,
|
||||
end: u64,
|
||||
max_hops: usize,
|
||||
max_paths: usize,
|
||||
current_path: &mut Vec<u64>,
|
||||
visited: &mut HashSet<u64>,
|
||||
paths: &mut Vec<PathResult>,
|
||||
) {
|
||||
if paths.len() >= max_paths {
|
||||
return;
|
||||
}
|
||||
|
||||
if current_path.len() > max_hops {
|
||||
return;
|
||||
}
|
||||
|
||||
current_path.push(current);
|
||||
visited.insert(current);
|
||||
|
||||
if current == end {
|
||||
paths.push(PathResult::new().with_nodes(current_path.clone()));
|
||||
} else {
|
||||
let neighbors = graph.edges.get_neighbors(current);
|
||||
for neighbor in neighbors {
|
||||
if !visited.contains(&neighbor) {
|
||||
dfs_all_paths(
|
||||
graph,
|
||||
neighbor,
|
||||
end,
|
||||
max_hops,
|
||||
max_paths,
|
||||
current_path,
|
||||
visited,
|
||||
paths,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
current_path.pop();
|
||||
visited.remove(¤t);
|
||||
}
|
||||
|
||||
dfs_all_paths(
|
||||
graph,
|
||||
start,
|
||||
end,
|
||||
max_hops,
|
||||
max_paths,
|
||||
&mut current_path,
|
||||
&mut visited,
|
||||
&mut paths,
|
||||
);
|
||||
|
||||
paths
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::collections::HashMap;
|
||||
|
||||
fn create_test_graph() -> GraphStore {
|
||||
let graph = GraphStore::new();
|
||||
|
||||
// Create nodes: 1 -> 2 -> 3 -> 4
|
||||
// \-> 5 ->/
|
||||
let n1 = graph.add_node(vec![], HashMap::new());
|
||||
let n2 = graph.add_node(vec![], HashMap::new());
|
||||
let n3 = graph.add_node(vec![], HashMap::new());
|
||||
let n4 = graph.add_node(vec![], HashMap::new());
|
||||
let n5 = graph.add_node(vec![], HashMap::new());
|
||||
|
||||
graph
|
||||
.add_edge(n1, n2, "KNOWS".to_string(), HashMap::new())
|
||||
.unwrap();
|
||||
graph
|
||||
.add_edge(n2, n3, "KNOWS".to_string(), HashMap::new())
|
||||
.unwrap();
|
||||
graph
|
||||
.add_edge(n3, n4, "KNOWS".to_string(), HashMap::new())
|
||||
.unwrap();
|
||||
graph
|
||||
.add_edge(n1, n5, "KNOWS".to_string(), HashMap::new())
|
||||
.unwrap();
|
||||
graph
|
||||
.add_edge(n5, n4, "KNOWS".to_string(), HashMap::new())
|
||||
.unwrap();
|
||||
|
||||
graph
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bfs() {
|
||||
let graph = create_test_graph();
|
||||
|
||||
let path = bfs(&graph, 1, 4, None, 10).unwrap();
|
||||
assert_eq!(path.len(), 3); // Shortest path: 1 -> 5 -> 4
|
||||
assert_eq!(path.nodes, vec![1, 5, 4]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dfs() {
|
||||
let graph = create_test_graph();
|
||||
|
||||
let mut visited = Vec::new();
|
||||
dfs(&graph, 1, |node| {
|
||||
visited.push(node);
|
||||
true
|
||||
});
|
||||
|
||||
assert!(visited.contains(&1));
|
||||
assert!(visited.len() <= 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dijkstra() {
|
||||
let graph = GraphStore::new();
|
||||
|
||||
let n1 = graph.add_node(vec![], HashMap::new());
|
||||
let n2 = graph.add_node(vec![], HashMap::new());
|
||||
let n3 = graph.add_node(vec![], HashMap::new());
|
||||
|
||||
graph
|
||||
.add_edge(
|
||||
n1,
|
||||
n2,
|
||||
"KNOWS".to_string(),
|
||||
HashMap::from([("weight".to_string(), 5.0.into())]),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
graph
|
||||
.add_edge(
|
||||
n2,
|
||||
n3,
|
||||
"KNOWS".to_string(),
|
||||
HashMap::from([("weight".to_string(), 3.0.into())]),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
graph
|
||||
.add_edge(
|
||||
n1,
|
||||
n3,
|
||||
"KNOWS".to_string(),
|
||||
HashMap::from([("weight".to_string(), 10.0.into())]),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let path = shortest_path_dijkstra(&graph, n1, n3, "weight").unwrap();
|
||||
assert_eq!(path.cost, 8.0); // 5 + 3
|
||||
assert_eq!(path.nodes, vec![n1, n2, n3]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_all_paths() {
|
||||
let graph = create_test_graph();
|
||||
|
||||
let paths = find_all_paths(&graph, 1, 4, 10, 10);
|
||||
assert!(paths.len() >= 2); // At least two paths from 1 to 4
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user