Files
wifi-densepose/crates/ruvector-postgres/docs/integration-plans/04-hyperbolic-embeddings.md
ruv d803bfe2b1 Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector
git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00

20 KiB
Raw Blame History

Hyperbolic Embeddings Integration Plan

Overview

Integrate hyperbolic geometry operations into PostgreSQL for hierarchical data representation, enabling embeddings in Poincaré ball and Lorentz (hyperboloid) models with native distance functions and indexing.

Architecture

┌─────────────────────────────────────────────────────────────────┐
│                     PostgreSQL Extension                         │
├─────────────────────────────────────────────────────────────────┤
│  ┌─────────────────────────────────────────────────────────┐    │
│  │                 Hyperbolic Type System                   │    │
│  │  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐   │    │
│  │  │   Poincaré   │  │   Lorentz    │  │   Klein      │   │    │
│  │  │     Ball     │  │ Hyperboloid  │  │    Model     │   │    │
│  │  └──────┬───────┘  └──────┬───────┘  └──────┬───────┘   │    │
│  └─────────┼─────────────────┼─────────────────┼───────────┘    │
│            └─────────────────┴─────────────────┘                │
│                              ▼                                   │
│              ┌───────────────────────────┐                       │
│              │   Riemannian Operations   │                       │
│              │   (Exponential, Log, PT)  │                       │
│              └───────────────────────────┘                       │
└─────────────────────────────────────────────────────────────────┘

Module Structure

src/
├── hyperbolic/
│   ├── mod.rs              # Module exports
│   ├── types/
│   │   ├── poincare.rs     # Poincaré ball model
│   │   ├── lorentz.rs      # Lorentz/hyperboloid model
│   │   └── klein.rs        # Klein model (projective)
│   ├── manifold.rs         # Manifold operations
│   ├── distance.rs         # Distance functions
│   ├── index/
│   │   ├── htree.rs        # Hyperbolic tree index
│   │   └── hnsw_hyper.rs   # HNSW for hyperbolic space
│   └── operators.rs        # SQL operators

SQL Interface

Hyperbolic Types

-- Create hyperbolic embedding column
CREATE TABLE hierarchical_nodes (
    id SERIAL PRIMARY KEY,
    name TEXT,
    euclidean_embedding vector(128),
    poincare_embedding hyperbolic(128),      -- Poincaré ball
    lorentz_embedding hyperboloid(129),      -- Lorentz model (d+1 dims)
    curvature FLOAT DEFAULT -1.0
);

-- Insert with automatic projection
INSERT INTO hierarchical_nodes (name, euclidean_embedding)
VALUES ('root', '[0.1, 0.2, ...]');

-- Auto-project to hyperbolic space
UPDATE hierarchical_nodes
SET poincare_embedding = ruvector_to_poincare(euclidean_embedding, curvature);

Distance Operations

-- Poincaré distance
SELECT id, name,
       ruvector_poincare_distance(poincare_embedding, query_point) AS dist
FROM hierarchical_nodes
ORDER BY dist
LIMIT 10;

-- Lorentz distance (often more numerically stable)
SELECT id, name,
       ruvector_lorentz_distance(lorentz_embedding, query_point) AS dist
FROM hierarchical_nodes
ORDER BY dist
LIMIT 10;

-- Custom curvature
SELECT ruvector_hyperbolic_distance(
    a := point_a,
    b := point_b,
    model := 'poincare',
    curvature := -0.5
);

Hyperbolic Operations

-- Möbius addition (translation in Poincaré ball)
SELECT ruvector_mobius_add(point_a, point_b, curvature := -1.0);

-- Exponential map (tangent vector → manifold point)
SELECT ruvector_exp_map(base_point, tangent_vector, curvature := -1.0);

-- Logarithmic map (manifold point → tangent vector)
SELECT ruvector_log_map(base_point, target_point, curvature := -1.0);

-- Parallel transport (move vector along geodesic)
SELECT ruvector_parallel_transport(vector, from_point, to_point, curvature := -1.0);

-- Geodesic midpoint
SELECT ruvector_geodesic_midpoint(point_a, point_b);

-- Project Euclidean to hyperbolic
SELECT ruvector_project_to_hyperbolic(euclidean_vec, model := 'poincare');

Hyperbolic Index

-- Create hyperbolic HNSW index
CREATE INDEX ON hierarchical_nodes USING ruvector_hyperbolic (
    poincare_embedding hyperbolic(128)
) WITH (
    model = 'poincare',
    curvature = -1.0,
    m = 16,
    ef_construction = 64
);

-- Hyperbolic k-NN search
SELECT * FROM hierarchical_nodes
ORDER BY poincare_embedding <~> query_point  -- <~> is hyperbolic distance
LIMIT 10;

Implementation Phases

Phase 1: Poincaré Ball Model (Week 1-3)

// src/hyperbolic/types/poincare.rs

use simsimd::SpatialSimilarity;

/// Poincaré ball model B^n_c = {x ∈ R^n : c||x||² < 1}
pub struct PoincareBall {
    dim: usize,
    curvature: f32,  // Negative curvature, typically -1.0
}

impl PoincareBall {
    pub fn new(dim: usize, curvature: f32) -> Self {
        assert!(curvature < 0.0, "Curvature must be negative");
        Self { dim, curvature }
    }

    /// Conformal factor λ_c(x) = 2 / (1 - c||x||²)
    #[inline]
    fn conformal_factor(&self, x: &[f32]) -> f32 {
        let c = -self.curvature;
        let norm_sq = self.norm_sq(x);
        2.0 / (1.0 - c * norm_sq)
    }

    /// Poincaré distance: d(x,y) = (2/√c) * arctanh(√c * ||x ⊕_c y||)
    pub fn distance(&self, x: &[f32], y: &[f32]) -> f32 {
        let c = -self.curvature;
        let sqrt_c = c.sqrt();

        // Möbius addition: -x ⊕ y
        let neg_x: Vec<f32> = x.iter().map(|&xi| -xi).collect();
        let mobius_sum = self.mobius_add(&neg_x, y);
        let norm = self.norm(&mobius_sum);

        (2.0 / sqrt_c) * (sqrt_c * norm).atanh()
    }

    /// Möbius addition in Poincaré ball
    pub fn mobius_add(&self, x: &[f32], y: &[f32]) -> Vec<f32> {
        let c = -self.curvature;
        let x_norm_sq = self.norm_sq(x);
        let y_norm_sq = self.norm_sq(y);
        let xy_dot = self.dot(x, y);

        let num_coef = 1.0 + 2.0 * c * xy_dot + c * y_norm_sq;
        let y_coef = 1.0 - c * x_norm_sq;
        let denom = 1.0 + 2.0 * c * xy_dot + c * c * x_norm_sq * y_norm_sq;

        x.iter().zip(y.iter())
            .map(|(&xi, &yi)| (num_coef * xi + y_coef * yi) / denom)
            .collect()
    }

    /// Exponential map: tangent space → manifold
    pub fn exp_map(&self, base: &[f32], tangent: &[f32]) -> Vec<f32> {
        let c = -self.curvature;
        let sqrt_c = c.sqrt();

        let lambda = self.conformal_factor(base);
        let tangent_norm = self.norm(tangent);

        if tangent_norm < 1e-10 {
            return base.to_vec();
        }

        let coef = (sqrt_c * lambda * tangent_norm / 2.0).tanh() / (sqrt_c * tangent_norm);
        let direction: Vec<f32> = tangent.iter().map(|&t| t * coef).collect();

        self.mobius_add(base, &direction)
    }

    /// Logarithmic map: manifold → tangent space
    pub fn log_map(&self, base: &[f32], target: &[f32]) -> Vec<f32> {
        let c = -self.curvature;
        let sqrt_c = c.sqrt();

        // -base ⊕ target
        let neg_base: Vec<f32> = base.iter().map(|&b| -b).collect();
        let addition = self.mobius_add(&neg_base, target);
        let add_norm = self.norm(&addition);

        if add_norm < 1e-10 {
            return vec![0.0; self.dim];
        }

        let lambda = self.conformal_factor(base);
        let coef = (2.0 / (sqrt_c * lambda)) * (sqrt_c * add_norm).atanh() / add_norm;

        addition.iter().map(|&a| a * coef).collect()
    }

    /// Project point to ball (clamp norm)
    pub fn project(&self, x: &[f32]) -> Vec<f32> {
        let c = -self.curvature;
        let max_norm = (1.0 / c).sqrt() - 1e-5;
        let norm = self.norm(x);

        if norm <= max_norm {
            x.to_vec()
        } else {
            let scale = max_norm / norm;
            x.iter().map(|&xi| xi * scale).collect()
        }
    }

    #[inline]
    fn norm_sq(&self, x: &[f32]) -> f32 {
        f32::dot(x, x).unwrap_or_else(|| x.iter().map(|&xi| xi * xi).sum())
    }

    #[inline]
    fn norm(&self, x: &[f32]) -> f32 {
        self.norm_sq(x).sqrt()
    }

    #[inline]
    fn dot(&self, x: &[f32], y: &[f32]) -> f32 {
        f32::dot(x, y).unwrap_or_else(|| x.iter().zip(y.iter()).map(|(&a, &b)| a * b).sum())
    }
}

// PostgreSQL type
#[derive(PostgresType, Serialize, Deserialize)]
#[pgx(sql = "CREATE TYPE hyperbolic")]
pub struct Hyperbolic {
    data: Vec<f32>,
    curvature: f32,
}

// PostgreSQL functions
#[pg_extern(immutable, parallel_safe)]
fn ruvector_poincare_distance(a: Vec<f32>, b: Vec<f32>, curvature: default!(f32, -1.0)) -> f32 {
    let ball = PoincareBall::new(a.len(), curvature);
    ball.distance(&a, &b)
}

#[pg_extern(immutable, parallel_safe)]
fn ruvector_mobius_add(a: Vec<f32>, b: Vec<f32>, curvature: default!(f32, -1.0)) -> Vec<f32> {
    let ball = PoincareBall::new(a.len(), curvature);
    ball.mobius_add(&a, &b)
}

#[pg_extern(immutable, parallel_safe)]
fn ruvector_exp_map(base: Vec<f32>, tangent: Vec<f32>, curvature: default!(f32, -1.0)) -> Vec<f32> {
    let ball = PoincareBall::new(base.len(), curvature);
    ball.exp_map(&base, &tangent)
}

#[pg_extern(immutable, parallel_safe)]
fn ruvector_log_map(base: Vec<f32>, target: Vec<f32>, curvature: default!(f32, -1.0)) -> Vec<f32> {
    let ball = PoincareBall::new(base.len(), curvature);
    ball.log_map(&base, &target)
}

Phase 2: Lorentz Model (Week 4-5)

// src/hyperbolic/types/lorentz.rs

/// Lorentz (hyperboloid) model: H^n = {x ∈ R^{n+1} : <x,x>_L = -1/c, x_0 > 0}
/// More numerically stable than Poincaré for high dimensions
pub struct LorentzModel {
    dim: usize,  // Ambient dimension (n+1)
    curvature: f32,
}

impl LorentzModel {
    /// Minkowski inner product: <x,y>_L = -x_0*y_0 + Σ x_i*y_i
    #[inline]
    pub fn minkowski_dot(&self, x: &[f32], y: &[f32]) -> f32 {
        -x[0] * y[0] + x[1..].iter().zip(y[1..].iter())
            .map(|(&a, &b)| a * b)
            .sum::<f32>()
    }

    /// Lorentz distance: d(x,y) = (1/√c) * arcosh(-c * <x,y>_L)
    pub fn distance(&self, x: &[f32], y: &[f32]) -> f32 {
        let c = -self.curvature;
        let sqrt_c = c.sqrt();
        let inner = self.minkowski_dot(x, y);

        (1.0 / sqrt_c) * (-c * inner).acosh()
    }

    /// Exponential map on hyperboloid
    pub fn exp_map(&self, base: &[f32], tangent: &[f32]) -> Vec<f32> {
        let c = -self.curvature;
        let sqrt_c = c.sqrt();

        let tangent_norm_sq = self.minkowski_dot(tangent, tangent);
        if tangent_norm_sq < 1e-10 {
            return base.to_vec();
        }
        let tangent_norm = tangent_norm_sq.sqrt();

        let coef1 = (sqrt_c * tangent_norm).cosh();
        let coef2 = (sqrt_c * tangent_norm).sinh() / tangent_norm;

        base.iter().zip(tangent.iter())
            .map(|(&b, &t)| coef1 * b + coef2 * t)
            .collect()
    }

    /// Logarithmic map on hyperboloid
    pub fn log_map(&self, base: &[f32], target: &[f32]) -> Vec<f32> {
        let c = -self.curvature;
        let sqrt_c = c.sqrt();

        let inner = self.minkowski_dot(base, target);
        let dist = self.distance(base, target);

        if dist < 1e-10 {
            return vec![0.0; self.dim];
        }

        let coef = dist / (dist * sqrt_c).sinh();

        target.iter().zip(base.iter())
            .map(|(&t, &b)| coef * (t - inner * b))
            .collect()
    }

    /// Project to hyperboloid (ensure constraint satisfied)
    pub fn project(&self, x: &[f32]) -> Vec<f32> {
        let c = -self.curvature;
        let space_norm_sq: f32 = x[1..].iter().map(|&xi| xi * xi).sum();
        let x0 = ((1.0 / c) + space_norm_sq).sqrt();

        let mut result = vec![x0];
        result.extend_from_slice(&x[1..]);
        result
    }

    /// Convert from Poincaré ball to Lorentz
    pub fn from_poincare(&self, poincare: &[f32], poincare_curvature: f32) -> Vec<f32> {
        let c = -poincare_curvature;
        let norm_sq: f32 = poincare.iter().map(|&x| x * x).sum();

        let x0 = (1.0 + c * norm_sq) / (1.0 - c * norm_sq);
        let coef = 2.0 / (1.0 - c * norm_sq);

        let mut result = vec![x0];
        result.extend(poincare.iter().map(|&p| coef * p));
        result
    }

    /// Convert from Lorentz to Poincaré ball
    pub fn to_poincare(&self, lorentz: &[f32]) -> Vec<f32> {
        let denom = 1.0 + lorentz[0];
        lorentz[1..].iter().map(|&x| x / denom).collect()
    }
}

#[pg_extern(immutable, parallel_safe)]
fn ruvector_lorentz_distance(a: Vec<f32>, b: Vec<f32>, curvature: default!(f32, -1.0)) -> f32 {
    let model = LorentzModel::new(a.len(), curvature);
    model.distance(&a, &b)
}

#[pg_extern(immutable, parallel_safe)]
fn ruvector_poincare_to_lorentz(poincare: Vec<f32>, curvature: default!(f32, -1.0)) -> Vec<f32> {
    let model = LorentzModel::new(poincare.len() + 1, curvature);
    model.from_poincare(&poincare, curvature)
}

#[pg_extern(immutable, parallel_safe)]
fn ruvector_lorentz_to_poincare(lorentz: Vec<f32>) -> Vec<f32> {
    let model = LorentzModel::new(lorentz.len(), -1.0);
    model.to_poincare(&lorentz)
}

Phase 3: Hyperbolic HNSW Index (Week 6-8)

// src/hyperbolic/index/hnsw_hyper.rs

/// HNSW index adapted for hyperbolic space
pub struct HyperbolicHnsw {
    layers: Vec<HnswLayer>,
    manifold: HyperbolicManifold,
    m: usize,
    ef_construction: usize,
}

pub enum HyperbolicManifold {
    Poincare(PoincareBall),
    Lorentz(LorentzModel),
}

impl HyperbolicHnsw {
    /// Distance function based on manifold
    fn distance(&self, a: &[f32], b: &[f32]) -> f32 {
        match &self.manifold {
            HyperbolicManifold::Poincare(ball) => ball.distance(a, b),
            HyperbolicManifold::Lorentz(model) => model.distance(a, b),
        }
    }

    /// Insert with hyperbolic distance
    pub fn insert(&mut self, id: u64, vector: &[f32]) {
        // Project to manifold first
        let projected = match &self.manifold {
            HyperbolicManifold::Poincare(ball) => ball.project(vector),
            HyperbolicManifold::Lorentz(model) => model.project(vector),
        };

        // Standard HNSW insertion with hyperbolic distance
        let entry_point = self.entry_point();
        let level = self.random_level();

        for l in (0..=level).rev() {
            let candidates = self.search_layer(&projected, entry_point, self.ef_construction, l);
            let neighbors = self.select_neighbors(&projected, &candidates, self.m);
            self.connect(id, &neighbors, l);
        }

        self.vectors.insert(id, projected);
    }

    /// Search with hyperbolic distance
    pub fn search(&self, query: &[f32], k: usize, ef: usize) -> Vec<(u64, f32)> {
        let projected = match &self.manifold {
            HyperbolicManifold::Poincare(ball) => ball.project(query),
            HyperbolicManifold::Lorentz(model) => model.project(query),
        };

        let mut candidates = self.search_layer(&projected, self.entry_point(), ef, 0);
        candidates.truncate(k);
        candidates
    }
}

// PostgreSQL index access method
#[pg_extern]
fn ruvector_hyperbolic_hnsw_handler(internal: Internal) -> Internal {
    // Index AM handler
}

Phase 4: Euclidean to Hyperbolic Projection (Week 9-10)

// src/hyperbolic/manifold.rs

/// Project Euclidean embeddings to hyperbolic space
pub struct HyperbolicProjection {
    model: HyperbolicModel,
    method: ProjectionMethod,
}

pub enum ProjectionMethod {
    /// Direct scaling to fit in ball
    Scale,
    /// Learned exponential map from origin
    ExponentialMap,
    /// Centroid-based projection
    Centroid { centroid: Vec<f32> },
}

impl HyperbolicProjection {
    /// Project batch of Euclidean vectors
    pub fn project_batch(&self, vectors: &[Vec<f32>]) -> Vec<Vec<f32>> {
        match &self.method {
            ProjectionMethod::Scale => {
                vectors.par_iter()
                    .map(|v| self.scale_project(v))
                    .collect()
            }
            ProjectionMethod::ExponentialMap => {
                let origin = vec![0.0; vectors[0].len()];
                vectors.par_iter()
                    .map(|v| self.model.exp_map(&origin, v))
                    .collect()
            }
            ProjectionMethod::Centroid { centroid } => {
                vectors.par_iter()
                    .map(|v| {
                        let tangent: Vec<f32> = v.iter()
                            .zip(centroid.iter())
                            .map(|(&vi, &ci)| vi - ci)
                            .collect();
                        self.model.exp_map(centroid, &tangent)
                    })
                    .collect()
            }
        }
    }

    fn scale_project(&self, v: &[f32]) -> Vec<f32> {
        let norm: f32 = v.iter().map(|&x| x * x).sum::<f32>().sqrt();
        let max_norm = 0.99;  // Stay within ball

        if norm <= max_norm {
            v.to_vec()
        } else {
            let scale = max_norm / norm;
            v.iter().map(|&x| x * scale).collect()
        }
    }
}

#[pg_extern]
fn ruvector_to_poincare(
    euclidean: Vec<f32>,
    curvature: default!(f32, -1.0),
    method: default!(&str, "'scale'"),
) -> Vec<f32> {
    let model = PoincareBall::new(euclidean.len(), curvature);
    let projection = HyperbolicProjection::new(model, method.into());
    projection.project(&euclidean)
}

#[pg_extern]
fn ruvector_batch_to_poincare(
    table_name: &str,
    euclidean_column: &str,
    output_column: &str,
    curvature: default!(f32, -1.0),
) -> i64 {
    // Batch projection using SPI
    Spi::connect(|client| {
        // ... batch update
    })
}

Use Cases

Hierarchical Data (Taxonomies, Org Charts)

-- Embed taxonomy with parent-child relationships preserved
-- Children naturally cluster closer to parents in hyperbolic space
CREATE TABLE taxonomy (
    id SERIAL PRIMARY KEY,
    name TEXT,
    parent_id INTEGER REFERENCES taxonomy(id),
    embedding hyperbolic(64)
);

-- Find all items in subtree (leveraging hyperbolic geometry)
SELECT * FROM taxonomy
WHERE ruvector_poincare_distance(embedding, root_embedding) < subtree_radius
ORDER BY ruvector_poincare_distance(embedding, root_embedding);

Knowledge Graphs

-- Entities with hierarchical relationships
-- Hyperbolic space captures asymmetric relations naturally
SELECT entity_a.name, entity_b.name,
       ruvector_poincare_distance(entity_a.embedding, entity_b.embedding) AS distance
FROM entities entity_a, entities entity_b
WHERE entity_a.id != entity_b.id
ORDER BY distance
LIMIT 100;

Benchmarks

Operation Dimension Curvature Time (μs) vs Euclidean
Poincaré Distance 128 -1.0 2.1 1.8x slower
Lorentz Distance 129 -1.0 1.5 1.3x slower
Möbius Addition 128 -1.0 3.2 N/A
Exp Map 128 -1.0 4.5 N/A
HNSW Search (hyper) 128 -1.0 850 1.5x slower

Dependencies

[dependencies]
# SIMD for fast operations
simsimd = "5.9"

# Numerical stability
num-traits = "0.2"

Feature Flags

[features]
hyperbolic = []
hyperbolic-poincare = ["hyperbolic"]
hyperbolic-lorentz = ["hyperbolic"]
hyperbolic-index = ["hyperbolic", "index-hnsw"]
hyperbolic-all = ["hyperbolic-poincare", "hyperbolic-lorentz", "hyperbolic-index"]