Merge commit 'd803bfe2b1fe7f5e219e50ac20d6801a0a58ac75' as 'vendor/ruvector'

This commit is contained in:
ruv
2026-02-28 14:39:40 -05:00
7854 changed files with 3522914 additions and 0 deletions

View File

@@ -0,0 +1,199 @@
# RuVector-Postgres Development & Testing Dockerfile
# Multi-stage build with PostgreSQL version support (14-17)
# Default: PostgreSQL 17 (latest with pgrx 0.12 support)
# Note: PostgreSQL 18 requires pgrx 0.15.0+ (planned for future release)
ARG PG_VERSION=17
ARG RUST_VERSION=1.85
# ============================================================================
# Stage 1: Base Builder with Rust and PostgreSQL dev dependencies
# ============================================================================
FROM rust:${RUST_VERSION}-bookworm AS base-builder
ARG PG_VERSION
# Add PostgreSQL APT repository
RUN sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt bookworm-pgdg main" > /etc/apt/sources.list.d/pgdg.list' && \
wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add -
# Install PostgreSQL development dependencies for specified version
RUN apt-get update && apt-get install -y --no-install-recommends \
postgresql-${PG_VERSION} \
postgresql-server-dev-${PG_VERSION} \
libclang-dev \
clang \
pkg-config \
libssl-dev \
cmake \
wget \
git \
&& rm -rf /var/lib/apt/lists/*
# Install pgrx (compatible with pgrx = "0.12" in Cargo.toml)
RUN cargo install cargo-pgrx --version 0.12.6 --locked
# Initialize pgrx for the specified PostgreSQL version
RUN cargo pgrx init --pg${PG_VERSION} /usr/lib/postgresql/${PG_VERSION}/bin/pg_config
# Set PGRX environment for consistent builds
ENV PGRX_PG_CONFIG_PATH=/usr/lib/postgresql/${PG_VERSION}/bin/pg_config
ENV PGRX_HOME=/root/.pgrx
ENV PG_VERSION=${PG_VERSION}
# ============================================================================
# Stage 2: Dependency Cache Builder
# ============================================================================
FROM base-builder AS deps-builder
ARG PG_VERSION
# Use workspace layout: /build is the workspace root
WORKDIR /build/crates/ruvector-postgres
# Copy only dependency files first for better caching
COPY crates/ruvector-postgres/Cargo.toml ./
COPY crates/ruvector-postgres/build.rs ./
# Create dummy src to build dependencies
RUN mkdir -p src && \
echo "fn main() {}" > src/main.rs && \
echo "#[no_mangle] pub extern \"C\" fn pg_finfo_dummy() {}" > src/lib.rs
# Build dependencies only (this layer is cached)
RUN cargo build --release --features pg${PG_VERSION} || true
RUN rm -rf src
# ============================================================================
# Stage 3: Extension Builder
# ============================================================================
FROM deps-builder AS extension-builder
ARG PG_VERSION
# Create a minimal workspace Cargo.toml so dependency crates can resolve
# workspace inheritance (edition.workspace, version.workspace, etc.)
RUN cat > /build/Cargo.toml << 'WORKSPACE_EOF'
[workspace]
members = [
"crates/ruvector-postgres",
"crates/ruvector-solver",
"crates/ruvector-math",
"crates/ruvector-attention",
"crates/sona",
"crates/ruvector-domain-expansion",
"crates/ruvector-mincut-gated-transformer",
]
resolver = "2"
[workspace.package]
version = "2.0.4"
edition = "2021"
rust-version = "1.77"
license = "MIT"
authors = ["Ruvector Team"]
repository = "https://github.com/ruvnet/ruvector"
[workspace.dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
thiserror = "2.0"
rand = "0.8"
rand_distr = "0.4"
tracing = "0.1"
rayon = "1.10"
crossbeam = "0.8"
dashmap = "6.1"
parking_lot = "0.12"
once_cell = "1.20"
criterion = { version = "0.5", features = ["html_reports"] }
proptest = "1.5"
nalgebra = { version = "0.33", default-features = false, features = ["std"] }
ndarray = "0.16"
chrono = "0.4"
anyhow = "1.0"
[profile.release]
opt-level = 3
lto = "fat"
codegen-units = 1
strip = true
panic = "unwind"
WORKSPACE_EOF
# Copy the ruvector-mincut-gated-transformer dependency (required for gated-transformer feature)
COPY crates/ruvector-mincut-gated-transformer /build/crates/ruvector-mincut-gated-transformer/
# Copy v0.3 dependencies (workspace layout preserves inheritance resolution)
COPY crates/ruvector-solver /build/crates/ruvector-solver/
COPY crates/ruvector-math /build/crates/ruvector-math/
COPY crates/ruvector-attention /build/crates/ruvector-attention/
COPY crates/sona /build/crates/sona/
COPY crates/ruvector-domain-expansion /build/crates/ruvector-domain-expansion/
# Copy rvf crates (optional path deps of ruvector-domain-expansion, Cargo validates they exist)
COPY crates/rvf/rvf-types /build/crates/rvf/rvf-types/
COPY crates/rvf/rvf-wire /build/crates/rvf/rvf-wire/
COPY crates/rvf/rvf-crypto /build/crates/rvf/rvf-crypto/
# Copy actual source code
COPY crates/ruvector-postgres/Cargo.toml ./
COPY crates/ruvector-postgres/build.rs ./
COPY crates/ruvector-postgres/ruvector.control ./
COPY crates/ruvector-postgres/src ./src/
COPY crates/ruvector-postgres/sql ./sql/
COPY crates/ruvector-postgres/benches ./benches/
# Build the extension with all features including v0.3 modules
RUN cargo pgrx package \
--pg-config /usr/lib/postgresql/${PG_VERSION}/bin/pg_config \
--features pg${PG_VERSION},graph-complete,gated-transformer,analytics-complete,attention-extended,sona-learning,domain-expansion
# pgrx generates .control and .so but not SQL - copy our hand-written SQL files
# In a workspace, target/ is at the workspace root /build/target/, not per-crate
RUN cp sql/ruvector--0.3.0.sql /build/target/release/ruvector-pg${PG_VERSION}/usr/share/postgresql/${PG_VERSION}/extension/ 2>/dev/null || true && \
cp sql/ruvector--2.0.0.sql /build/target/release/ruvector-pg${PG_VERSION}/usr/share/postgresql/${PG_VERSION}/extension/ 2>/dev/null || true && \
cp sql/ruvector--2.0.0--0.3.0.sql /build/target/release/ruvector-pg${PG_VERSION}/usr/share/postgresql/${PG_VERSION}/extension/ 2>/dev/null || true
# ============================================================================
# Stage 4: Runtime (Production)
# ============================================================================
FROM postgres:${PG_VERSION}-bookworm AS runtime
ARG PG_VERSION
# Install runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
libssl3 \
&& rm -rf /var/lib/apt/lists/*
# Copy built extension from builder (workspace target is at /build/target/)
COPY --from=extension-builder /build/target/release/ruvector-pg${PG_VERSION}/usr/share/postgresql/${PG_VERSION}/extension/* /usr/share/postgresql/${PG_VERSION}/extension/
COPY --from=extension-builder /build/target/release/ruvector-pg${PG_VERSION}/usr/lib/postgresql/${PG_VERSION}/lib/* /usr/lib/postgresql/${PG_VERSION}/lib/
# Copy initialization script with proper permissions
COPY --chmod=644 crates/ruvector-postgres/docker/init.sql /docker-entrypoint-initdb.d/
# Set environment variables
ENV POSTGRES_USER=ruvector
ENV POSTGRES_PASSWORD=ruvector
ENV POSTGRES_DB=ruvector_test
ENV PG_VERSION=${PG_VERSION}
# PostgreSQL performance tuning
ENV POSTGRES_INITDB_ARGS="--data-checksums"
# Labels for version tracking
LABEL org.opencontainers.image.title="RuVector PostgreSQL Extension v0.3"
LABEL org.opencontainers.image.description="High-performance vector database extension for PostgreSQL with 143 SQL functions, Solver, Math, TDA, Extended Attention, Sona, and Domain Expansion"
LABEL org.opencontainers.image.version="0.3.0"
LABEL org.opencontainers.image.vendor="ruv.io"
LABEL org.opencontainers.image.source="https://github.com/ruvnet/ruvector"
LABEL ruvector.pg.version="${PG_VERSION}"
LABEL ruvector.features="attention,gnn,hybrid,tenancy,healing,learning,hyperbolic,graph,solver,math,tda,sona,domain-expansion"
# Health check
HEALTHCHECK --interval=5s --timeout=5s --start-period=10s --retries=5 \
CMD pg_isready -U $POSTGRES_USER -d $POSTGRES_DB || exit 1
EXPOSE 5432

View File

@@ -0,0 +1,59 @@
# Integration Test Runner Dockerfile for RuVector-Postgres
# Provides full Rust toolchain and test dependencies
FROM rust:1.83-bookworm
ARG PG_VERSION=17
# Add PostgreSQL APT repository for client tools
RUN sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt bookworm-pgdg main" > /etc/apt/sources.list.d/pgdg.list' && \
wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add -
# Install dependencies
RUN apt-get update && apt-get install -y \
# PostgreSQL client
postgresql-client-${PG_VERSION} \
# Build dependencies
libclang-dev \
clang \
pkg-config \
libssl-dev \
cmake \
# Test utilities
jq \
curl \
netcat-openbsd \
# Performance analysis
linux-perf \
&& rm -rf /var/lib/apt/lists/*
# Install pgrx for PostgreSQL extension testing
RUN cargo install cargo-pgrx --version 0.12.6 --locked
# Install additional Rust tools for testing
RUN cargo install cargo-nextest --locked && \
cargo install cargo-criterion --locked && \
cargo install cargo-llvm-cov --locked
# Create app directory
WORKDIR /app
# Pre-download common dependencies (speeds up subsequent builds)
RUN cargo new --lib dummy && \
cd dummy && \
echo 'pgrx = "0.12"' >> Cargo.toml && \
echo 'serde = { version = "1.0", features = ["derive"] }' >> Cargo.toml && \
echo 'serde_json = "1.0"' >> Cargo.toml && \
echo 'rand = "0.8"' >> Cargo.toml && \
cargo fetch && \
cd .. && \
rm -rf dummy
# Environment setup
ENV RUST_BACKTRACE=1
ENV RUST_LOG=info
ENV CARGO_HOME=/usr/local/cargo
ENV PATH="${CARGO_HOME}/bin:${PATH}"
# Default command runs tests
CMD ["cargo", "test", "--release", "--features", "pg_test"]

View File

@@ -0,0 +1,24 @@
# Test Runner Dockerfile for RuVector-Postgres
FROM rust:1.75-bookworm
# Install dependencies
RUN apt-get update && apt-get install -y \
postgresql-client-16 \
libclang-dev \
clang \
pkg-config \
libssl-dev \
cmake \
&& rm -rf /var/lib/apt/lists/*
# Install pgrx
RUN cargo install cargo-pgrx --version 0.12.6 --locked
# Install additional test tools
RUN cargo install cargo-nextest --locked
RUN cargo install cargo-criterion --locked
WORKDIR /app
# Default command
CMD ["cargo", "test", "--features", "pg_test"]

View File

@@ -0,0 +1,392 @@
# RuVector-Postgres v2 Docker
High-performance PostgreSQL vector database extension with 230+ SQL functions, SIMD acceleration, Flash Attention, GNN layers, hybrid search, multi-tenancy, and self-healing capabilities.
## Docker Hub
```bash
# Pull the latest image
docker pull ruvector/ruvector-postgres:latest
# Or specific PostgreSQL version
docker pull ruvector/ruvector-postgres:2.0.0-pg17
docker pull ruvector/ruvector-postgres:2.0.0-pg16
docker pull ruvector/ruvector-postgres:2.0.0-pg15
docker pull ruvector/ruvector-postgres:2.0.0-pg14
# Run container
docker run -d \
--name ruvector \
-p 5432:5432 \
-e POSTGRES_PASSWORD=secret \
ruvector/ruvector-postgres:latest
# Connect and test
psql -h localhost -U ruvector -d ruvector_test -c "SELECT ruvector_version();"
```
## v2 Features
| Feature | Description | SQL Functions |
|---------|-------------|---------------|
| **Core Vectors** | pgvector-compatible vector type | `vector`, `<->`, `<=>`, `<#>` |
| **SIMD** | AVX2/AVX512/NEON acceleration | `ruvector_simd_info()` |
| **HNSW Index** | Approximate nearest neighbor | `CREATE INDEX ... USING hnsw` |
| **IVFFlat Index** | Inverted file index | `CREATE INDEX ... USING ivfflat` |
| **Quantization** | Binary, scalar, product quantization | 6 functions |
| **Flash Attention** | Memory-efficient attention | `ruvector_flash_attention()` |
| **Multi-Head Attention** | Transformer attention | `ruvector_multi_head_attention()` |
| **GNN Layers** | GCN, GraphSAGE, GAT | 5+ functions |
| **Hybrid Search** | BM25 + vector fusion | 7 functions |
| **Multi-Tenancy** | Tenant isolation, quotas | 17 functions |
| **Self-Healing** | Automatic recovery | 23 functions |
| **Self-Learning** | Adaptive optimization | 10 functions |
| **Hyperbolic** | Poincaré/Lorentz embeddings | 8+ functions |
| **Graph** | Cypher-style queries | 25+ functions |
## Quick Start
### Development Environment
```bash
# Start development environment
./dev.sh start
# Open psql shell
./dev.sh psql
# Watch for changes and auto-reload
./dev.sh watch
# Stop environment
./dev.sh stop
```
### Running Tests
```bash
# Run full test suite
./run-tests.sh
# Run integration tests only
./run-tests.sh --integration
# Keep container running for debugging
./run-tests.sh --keep-running
# Clean rebuild
./run-tests.sh --clean
```
## Scripts Overview
### `dev.sh` - Development Environment
Manages a PostgreSQL development environment with hot-reload support.
**Commands:**
- `start` - Start development environment (default)
- `stop` - Stop development environment
- `restart` - Restart development environment
- `logs` - Show PostgreSQL logs
- `psql` - Open psql shell
- `watch` - Start file watcher for hot-reload (requires cargo-watch)
- `rebuild` - Rebuild and reload extension
- `status` - Show container status
**Options:**
- `-p, --port PORT` - PostgreSQL port (default: 5432)
- `-u, --user USER` - PostgreSQL user (default: postgres)
- `-d, --database DB` - PostgreSQL database (default: ruvector_dev)
- `-f, --foreground` - Start in foreground with logs
- `-h, --help` - Show help message
**Examples:**
```bash
# Start on custom port
./dev.sh --port 5433 start
# View logs
./dev.sh logs
# Rebuild extension
./dev.sh rebuild
```
### `run-tests.sh` - Test Runner
Builds Docker image, runs tests, and manages test infrastructure.
**Options:**
- `-b, --build-only` - Build Docker image only, don't run tests
- `-t, --test-only` - Run tests only (skip build)
- `-i, --integration` - Run integration tests only
- `-k, --keep-running` - Keep container running after tests
- `-c, --clean` - Clean up before starting
- `-v, --keep-volumes` - Keep volumes after cleanup
- `-p, --port PORT` - PostgreSQL port (default: 5433)
- `-h, --help` - Show help message
**Examples:**
```bash
# Build and test
./run-tests.sh
# Integration tests with container kept running
./run-tests.sh --integration --keep-running
# Clean rebuild
./run-tests.sh --clean --build-only
```
## Docker Files
### `Dockerfile` - Main Build File
Multi-stage Docker build for PostgreSQL 16 with pgrx 0.12.6 support.
**Features:**
- Rust 1.75 with Bookworm base
- PostgreSQL 16 with development headers
- cargo-pgrx 0.12.6 pre-installed
- Optimized layer caching for dependencies
- Health checks built-in
### `docker-compose.yml` - Orchestration
Complete development stack with PostgreSQL and pgAdmin.
**Services:**
- `postgres` - PostgreSQL 16 with ruvector extension
- `pgadmin` - Web-based database management (port 5050)
**Usage:**
```bash
# Start all services
docker-compose up -d
# View logs
docker-compose logs -f
# Stop services
docker-compose down
# Access pgAdmin
# URL: http://localhost:5050
# Email: admin@ruvector.dev
# Password: admin
```
### `init.sql` - Database Initialization
SQL script for automatic database setup with:
- Extension creation
- Sample tables and indexes
- Test data
- Performance monitoring views
## Development Workflow
### 1. Initial Setup
```bash
# Start development environment
./dev.sh start
# This will:
# - Pull PostgreSQL 16 image
# - Create development database
# - Expose on localhost:5432
# - Show connection string
```
### 2. Build Extension
```bash
cd /workspaces/ruvector/crates/ruvector-postgres
# Build and install extension
cargo pgrx install --release
```
### 3. Test Changes
```bash
# Quick test in psql
./dev.sh psql
# In psql:
# CREATE EXTENSION ruvector_postgres;
# SELECT '[1,2,3]'::vector;
```
### 4. Hot-Reload Development
```bash
# Install cargo-watch (one time)
cargo install cargo-watch
# Start watching for changes
./dev.sh watch
# Now edit code - extension auto-reloads on save!
```
### 5. Run Full Test Suite
```bash
# Run all tests
./run-tests.sh
# Or run just integration tests
./run-tests.sh --integration
```
## Environment Variables
### Development (`dev.sh`)
```bash
POSTGRES_PORT=5432 # PostgreSQL port
POSTGRES_USER=postgres # PostgreSQL user
POSTGRES_PASSWORD=postgres # PostgreSQL password
POSTGRES_DB=ruvector_dev # Database name
```
### Testing (`run-tests.sh`)
```bash
POSTGRES_PORT=5433 # PostgreSQL port (different from dev)
POSTGRES_USER=ruvector # PostgreSQL user
POSTGRES_PASSWORD=ruvector # PostgreSQL password
POSTGRES_DB=ruvector_test # Test database name
KEEP_VOLUMES=false # Keep volumes after cleanup
EXPORT_DB=false # Export database dump
```
## Platform Support
Both scripts support:
- ✅ Linux (Ubuntu, Debian, RHEL, etc.)
- ✅ macOS (Intel and Apple Silicon)
- ✅ Windows (via WSL2)
The scripts automatically detect the platform and adjust behavior accordingly.
## Troubleshooting
### Port Already in Use
```bash
# Check what's using the port
lsof -i :5432
# Use a different port
./dev.sh --port 5433 start
```
### Extension Not Loading
```bash
# Rebuild extension
./dev.sh rebuild
# Or manually:
cd /workspaces/ruvector/crates/ruvector-postgres
cargo pgrx install --release
# Then reload in database
./dev.sh psql
# DROP EXTENSION ruvector_postgres CASCADE;
# CREATE EXTENSION ruvector_postgres;
```
### Docker Build Fails
```bash
# Clean build
docker system prune -a
./run-tests.sh --clean --build-only
# Check Docker resources
docker info
```
### Tests Fail
```bash
# Keep container running to debug
./run-tests.sh --keep-running
# Connect to inspect
./dev.sh psql
# View logs
docker logs ruvector-postgres-test
```
## Performance Tips
### Build Optimization
```bash
# Use BuildKit for faster builds
export DOCKER_BUILDKIT=1
./run-tests.sh
# Parallel builds
docker build --build-arg MAKEFLAGS="-j$(nproc)" ...
```
### Development Speed
```bash
# Use cargo-watch for instant feedback
./dev.sh watch
# Or use cargo-pgrx run for interactive development
cd /workspaces/ruvector/crates/ruvector-postgres
cargo pgrx run pg16
```
## CI/CD Integration
### GitHub Actions Example
```yaml
name: Test RuVector-Postgres
on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Run tests
run: |
cd crates/ruvector-postgres/docker
./run-tests.sh
```
### GitLab CI Example
```yaml
test:
image: docker:latest
services:
- docker:dind
script:
- cd crates/ruvector-postgres/docker
- ./run-tests.sh
```
## Resources
- [pgrx Documentation](https://github.com/pgcentralfoundation/pgrx)
- [PostgreSQL Docker Hub](https://hub.docker.com/_/postgres)
- [RuVector Repository](https://github.com/ruvnet/ruvector)
## License
MIT License - See project root for details

View File

@@ -0,0 +1,60 @@
# Benchmark Dockerfile for RuVector-Postgres
# Runs performance benchmarks and generates reports
#
# Usage:
# docker build -f docker/benchmark/Dockerfile -t ruvector-benchmark .
# docker run --rm -v ./results:/benchmark-results ruvector-benchmark
ARG PG_VERSION=17
ARG RUST_VERSION=1.83
# ============================================================================
# Stage 1: Benchmark Runner
# ============================================================================
FROM rust:${RUST_VERSION}-bookworm AS benchmark-runner
ARG PG_VERSION
# Add PostgreSQL APT repository
RUN sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt bookworm-pgdg main" > /etc/apt/sources.list.d/pgdg.list' && \
wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add -
# Install dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
postgresql-${PG_VERSION} \
postgresql-server-dev-${PG_VERSION} \
postgresql-client-${PG_VERSION} \
libclang-dev \
clang \
pkg-config \
libssl-dev \
cmake \
git \
jq \
gnuplot \
&& rm -rf /var/lib/apt/lists/*
# Install pgrx and benchmarking tools
RUN cargo install cargo-pgrx --version 0.12.6 --locked && \
cargo install cargo-criterion --locked && \
cargo install hyperfine --locked
# Initialize pgrx for the specified PostgreSQL version
RUN cargo pgrx init --pg${PG_VERSION} /usr/lib/postgresql/${PG_VERSION}/bin/pg_config
# Set environment variables
ENV PGRX_PG_CONFIG_PATH=/usr/lib/postgresql/${PG_VERSION}/bin/pg_config
ENV PGRX_HOME=/root/.pgrx
ENV PG_VERSION=${PG_VERSION}
ENV RUST_LOG=info
WORKDIR /app
# Create directories for benchmark results
RUN mkdir -p /benchmark-results /baseline
# Copy benchmark runner script
COPY --chmod=755 crates/ruvector-postgres/docker/benchmark/run-benchmarks.sh /usr/local/bin/run-benchmarks.sh
# Default command runs benchmarks
CMD ["/usr/local/bin/run-benchmarks.sh"]

View File

@@ -0,0 +1,191 @@
#!/usr/bin/env bash
# RuVector-Postgres Benchmark Runner Script
# Runs performance benchmarks and generates reports
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m'
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
log_header() { echo -e "${CYAN}=== $1 ===${NC}"; }
# Configuration
PG_VERSION="${PG_VERSION:-17}"
RESULTS_DIR="${RESULTS_DIR:-/benchmark-results}"
BASELINE_DIR="${BASELINE_DIR:-/baseline}"
COMPARE_BASELINE="${COMPARE_BASELINE:-false}"
BENCHMARK_FILTER="${BENCHMARK_FILTER:-}"
# Ensure results directory exists
mkdir -p "${RESULTS_DIR}"
log_header "RuVector-Postgres Benchmark Runner"
log_info "PostgreSQL Version: ${PG_VERSION}"
log_info "Results Directory: ${RESULTS_DIR}"
log_info "Compare Baseline: ${COMPARE_BASELINE}"
# Navigate to the crate directory
cd /app/crates/ruvector-postgres 2>/dev/null || cd /app
# Check if we have the source code
if [ ! -f "Cargo.toml" ]; then
log_error "Cargo.toml not found. Mount the source code to /app"
exit 1
fi
# Start benchmark execution
START_TIME=$(date +%s)
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
REPORT_DIR="${RESULTS_DIR}/${TIMESTAMP}"
mkdir -p "${REPORT_DIR}"
# Build with optimizations
log_info "Building with release optimizations..."
cargo build --release --features pg${PG_VERSION}
# Run Criterion benchmarks
log_header "Running Criterion Benchmarks"
BENCH_CMD="cargo bench --features pg${PG_VERSION}"
if [ -n "${BENCHMARK_FILTER}" ]; then
BENCH_CMD="${BENCH_CMD} -- ${BENCHMARK_FILTER}"
fi
# Run benchmarks and capture output
log_info "Executing: ${BENCH_CMD}"
set +e
${BENCH_CMD} 2>&1 | tee "${REPORT_DIR}/benchmark.log"
BENCH_EXIT_CODE=${PIPESTATUS[0]}
set -e
# Copy Criterion report if it exists
if [ -d "target/criterion" ]; then
log_info "Copying Criterion HTML reports..."
cp -r target/criterion "${REPORT_DIR}/"
fi
# Run individual benchmark suites with detailed output
log_header "Running Detailed Benchmark Suites"
# Distance benchmarks
log_info "Running distance_bench..."
cargo bench --features pg${PG_VERSION} --bench distance_bench -- --output-format bencher 2>&1 \
| tee "${REPORT_DIR}/distance_bench.txt" || true
# Quantization benchmarks
log_info "Running quantization_bench..."
cargo bench --features pg${PG_VERSION} --bench quantization_bench -- --output-format bencher 2>&1 \
| tee "${REPORT_DIR}/quantization_bench.txt" || true
# Index benchmarks
log_info "Running index_bench..."
cargo bench --features pg${PG_VERSION} --bench index_bench -- --output-format bencher 2>&1 \
| tee "${REPORT_DIR}/index_bench.txt" || true
# Quantized distance benchmarks
log_info "Running quantized_distance_bench..."
cargo bench --features pg${PG_VERSION} --bench quantized_distance_bench -- --output-format bencher 2>&1 \
| tee "${REPORT_DIR}/quantized_distance_bench.txt" || true
END_TIME=$(date +%s)
DURATION=$((END_TIME - START_TIME))
# Compare with baseline if requested
if [ "${COMPARE_BASELINE}" == "true" ] && [ -d "${BASELINE_DIR}" ]; then
log_header "Comparing with Baseline"
# Simple comparison using diff
for bench_file in distance_bench.txt quantization_bench.txt index_bench.txt quantized_distance_bench.txt; do
if [ -f "${BASELINE_DIR}/${bench_file}" ] && [ -f "${REPORT_DIR}/${bench_file}" ]; then
log_info "Comparing ${bench_file}..."
diff -u "${BASELINE_DIR}/${bench_file}" "${REPORT_DIR}/${bench_file}" \
> "${REPORT_DIR}/diff_${bench_file}" 2>&1 || true
fi
done
fi
# Generate summary report
log_header "Generating Summary Report"
cat > "${REPORT_DIR}/summary.json" << EOF
{
"timestamp": "$(date -Iseconds)",
"pg_version": "${PG_VERSION}",
"duration_seconds": ${DURATION},
"benchmark_exit_code": ${BENCH_EXIT_CODE},
"benchmarks_run": [
"distance_bench",
"quantization_bench",
"index_bench",
"quantized_distance_bench"
],
"report_directory": "${REPORT_DIR}"
}
EOF
# Generate markdown report
cat > "${REPORT_DIR}/REPORT.md" << EOF
# RuVector-Postgres Benchmark Report
**Date**: $(date)
**PostgreSQL Version**: ${PG_VERSION}
**Duration**: ${DURATION}s
## Benchmark Results
### Distance Benchmarks
\`\`\`
$(cat "${REPORT_DIR}/distance_bench.txt" 2>/dev/null | head -50 || echo "No results")
\`\`\`
### Quantization Benchmarks
\`\`\`
$(cat "${REPORT_DIR}/quantization_bench.txt" 2>/dev/null | head -50 || echo "No results")
\`\`\`
### Index Benchmarks
\`\`\`
$(cat "${REPORT_DIR}/index_bench.txt" 2>/dev/null | head -50 || echo "No results")
\`\`\`
### Quantized Distance Benchmarks
\`\`\`
$(cat "${REPORT_DIR}/quantized_distance_bench.txt" 2>/dev/null | head -50 || echo "No results")
\`\`\`
## Full Reports
See the \`criterion/\` directory for detailed HTML reports.
EOF
# Create symlink to latest results
ln -sfn "${REPORT_DIR}" "${RESULTS_DIR}/latest"
# Print summary
echo ""
echo "=========================================="
echo " BENCHMARK SUMMARY"
echo "=========================================="
echo "PostgreSQL Version: ${PG_VERSION}"
echo "Duration: ${DURATION}s"
echo "Exit Code: ${BENCH_EXIT_CODE}"
echo "Report: ${REPORT_DIR}/REPORT.md"
echo "HTML Reports: ${REPORT_DIR}/criterion/"
echo "=========================================="
if [ "${BENCH_EXIT_CODE}" != "0" ]; then
log_warn "Some benchmarks may have failed"
exit ${BENCH_EXIT_CODE}
fi
log_success "Benchmarks completed successfully!"
exit 0

View File

@@ -0,0 +1,385 @@
#!/usr/bin/env bash
# RuVector-Postgres Development Environment
# Starts PostgreSQL with hot-reload support for extension development
set -e # Exit on error
set -u # Exit on undefined variable
set -o pipefail # Exit on pipe failure
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color
# Configuration
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
CONTAINER_NAME="ruvector-postgres-dev"
IMAGE_NAME="ruvector-postgres:dev"
POSTGRES_PORT="${POSTGRES_PORT:-5432}"
POSTGRES_USER="${POSTGRES_USER:-postgres}"
POSTGRES_PASSWORD="${POSTGRES_PASSWORD:-postgres}"
POSTGRES_DB="${POSTGRES_DB:-ruvector_dev}"
# Detect OS
OS_TYPE="$(uname -s)"
case "${OS_TYPE}" in
Linux*) PLATFORM="linux";;
Darwin*) PLATFORM="macos";;
*) PLATFORM="unknown";;
esac
# Functions
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
log_success() {
echo -e "${GREEN}[✓]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[⚠]${NC} $1"
}
log_error() {
echo -e "${RED}[✗]${NC} $1"
}
log_cmd() {
echo -e "${CYAN}[$]${NC} $1"
}
check_dependencies() {
log_info "Checking dependencies..."
# Check Docker
if ! command -v docker &> /dev/null; then
log_error "Docker is not installed. Please install Docker first."
exit 1
fi
log_success "Docker found"
# Check cargo-pgrx
if ! command -v cargo-pgrx &> /dev/null; then
log_warn "cargo-pgrx not found. Installing..."
cargo install cargo-pgrx --version 0.12.6 --locked
fi
log_success "cargo-pgrx found"
}
cleanup() {
log_info "Stopping development environment..."
docker stop "${CONTAINER_NAME}" 2>/dev/null || true
docker rm "${CONTAINER_NAME}" 2>/dev/null || true
}
wait_for_postgres() {
log_info "Waiting for PostgreSQL to be ready..."
local max_attempts=30
local attempt=1
while [ ${attempt} -le ${max_attempts} ]; do
if docker exec "${CONTAINER_NAME}" pg_isready -U "${POSTGRES_USER}" &>/dev/null; then
log_success "PostgreSQL is ready!"
return 0
fi
echo -n "."
sleep 1
attempt=$((attempt + 1))
done
log_error "PostgreSQL failed to become ready"
docker logs "${CONTAINER_NAME}"
return 1
}
build_extension() {
log_info "Building ruvector-postgres extension..."
cd "${PROJECT_ROOT}/crates/ruvector-postgres"
# Build with pgrx
cargo pgrx install --pg-config "$(which pg_config)" --release
log_success "Extension built and installed"
}
start_dev_container() {
log_info "Starting development PostgreSQL container..."
# Create volume for data persistence
docker volume create "${CONTAINER_NAME}_data" || true
# Start PostgreSQL container
docker run -d \
--name "${CONTAINER_NAME}" \
-p "${POSTGRES_PORT}:5432" \
-e POSTGRES_USER="${POSTGRES_USER}" \
-e POSTGRES_PASSWORD="${POSTGRES_PASSWORD}" \
-e POSTGRES_DB="${POSTGRES_DB}" \
-v "${CONTAINER_NAME}_data:/var/lib/postgresql/data" \
-v "${HOME}/.pgrx:/home/postgres/.pgrx:ro" \
--health-cmd="pg_isready -U ${POSTGRES_USER}" \
--health-interval=5s \
--health-timeout=5s \
--health-retries=5 \
postgres:16-bookworm
log_success "Container started: ${CONTAINER_NAME}"
}
setup_extension() {
log_info "Setting up extension in database..."
# Create extension
docker exec -it "${CONTAINER_NAME}" psql -U "${POSTGRES_USER}" -d "${POSTGRES_DB}" -c "CREATE EXTENSION IF NOT EXISTS ruvector_postgres CASCADE;" || {
log_warn "Extension not yet installed. Run 'cargo pgrx install' first."
return 1
}
log_success "Extension loaded successfully"
}
show_connection_info() {
local connection_string="postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@localhost:${POSTGRES_PORT}/${POSTGRES_DB}"
echo ""
echo -e "${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${GREEN} RuVector-Postgres Development Environment Ready!${NC}"
echo -e "${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""
echo -e "${CYAN}Connection String:${NC}"
echo -e " ${connection_string}"
echo ""
echo -e "${CYAN}Quick Connect Commands:${NC}"
log_cmd "psql ${connection_string}"
log_cmd "docker exec -it ${CONTAINER_NAME} psql -U ${POSTGRES_USER} -d ${POSTGRES_DB}"
echo ""
echo -e "${CYAN}Development Workflow:${NC}"
echo -e " 1. Make changes to extension code"
echo -e " 2. Rebuild: ${YELLOW}cargo pgrx install${NC}"
echo -e " 3. Reload: ${YELLOW}docker exec ${CONTAINER_NAME} psql -U ${POSTGRES_USER} -d ${POSTGRES_DB} -c 'DROP EXTENSION ruvector_postgres CASCADE; CREATE EXTENSION ruvector_postgres;'${NC}"
echo ""
echo -e "${CYAN}Useful Commands:${NC}"
log_cmd "cargo pgrx test pg16 # Run tests"
log_cmd "cargo pgrx package # Create distributable package"
log_cmd "docker logs -f ${CONTAINER_NAME} # View PostgreSQL logs"
log_cmd "docker stop ${CONTAINER_NAME} # Stop development environment"
echo ""
echo -e "${CYAN}Container Info:${NC}"
echo -e " Name: ${CONTAINER_NAME}"
echo -e " Port: ${POSTGRES_PORT}"
echo -e " User: ${POSTGRES_USER}"
echo -e " Database: ${POSTGRES_DB}"
echo -e " Platform: ${PLATFORM}"
echo ""
echo -e "${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""
}
watch_and_reload() {
log_info "Starting file watcher for hot-reload..."
log_warn "File watching requires 'cargo-watch'. Install with: cargo install cargo-watch"
cd "${PROJECT_ROOT}/crates/ruvector-postgres"
cargo watch -x "pgrx install" -s "docker exec ${CONTAINER_NAME} psql -U ${POSTGRES_USER} -d ${POSTGRES_DB} -c 'DROP EXTENSION IF EXISTS ruvector_postgres CASCADE; CREATE EXTENSION ruvector_postgres;'"
}
show_usage() {
cat << EOF
RuVector-Postgres Development Environment
Usage: $0 [OPTIONS] [COMMAND]
Commands:
start Start development environment (default)
stop Stop development environment
restart Restart development environment
logs Show PostgreSQL logs
psql Open psql shell
watch Start file watcher for hot-reload
rebuild Rebuild and reload extension
status Show container status
Options:
-p, --port PORT PostgreSQL port (default: 5432)
-u, --user USER PostgreSQL user (default: postgres)
-d, --database DB PostgreSQL database (default: ruvector_dev)
-b, --background Start in background (default)
-f, --foreground Start in foreground with logs
-h, --help Show this help message
Environment Variables:
POSTGRES_PORT PostgreSQL port (default: 5432)
POSTGRES_USER PostgreSQL user (default: postgres)
POSTGRES_PASSWORD PostgreSQL password (default: postgres)
POSTGRES_DB PostgreSQL database (default: ruvector_dev)
Examples:
# Start development environment
$0 start
# Start with custom port
$0 --port 5433 start
# Open psql shell
$0 psql
# Watch for changes and auto-reload
$0 watch
# View logs
$0 logs
EOF
}
cmd_start() {
check_dependencies
# Stop existing container if running
docker stop "${CONTAINER_NAME}" 2>/dev/null || true
docker rm "${CONTAINER_NAME}" 2>/dev/null || true
start_dev_container
wait_for_postgres
# Try to setup extension if already built
setup_extension || log_warn "Run 'cargo pgrx install' to build and install the extension"
show_connection_info
}
cmd_stop() {
cleanup
log_success "Development environment stopped"
}
cmd_restart() {
cmd_stop
sleep 2
cmd_start
}
cmd_logs() {
docker logs -f "${CONTAINER_NAME}"
}
cmd_psql() {
docker exec -it "${CONTAINER_NAME}" psql -U "${POSTGRES_USER}" -d "${POSTGRES_DB}"
}
cmd_rebuild() {
log_info "Rebuilding extension..."
cd "${PROJECT_ROOT}/crates/ruvector-postgres"
cargo pgrx install --release
log_info "Reloading extension in database..."
docker exec "${CONTAINER_NAME}" psql -U "${POSTGRES_USER}" -d "${POSTGRES_DB}" << 'EOF'
DROP EXTENSION IF EXISTS ruvector_postgres CASCADE;
CREATE EXTENSION ruvector_postgres;
SELECT extname, extversion FROM pg_extension WHERE extname = 'ruvector_postgres';
EOF
log_success "Extension rebuilt and reloaded!"
}
cmd_status() {
if docker ps --filter "name=${CONTAINER_NAME}" --format "{{.Names}}" | grep -q "${CONTAINER_NAME}"; then
log_success "Container ${CONTAINER_NAME} is running"
docker ps --filter "name=${CONTAINER_NAME}"
echo ""
show_connection_info
else
log_warn "Container ${CONTAINER_NAME} is not running"
echo "Start with: $0 start"
fi
}
main() {
local command="start"
local foreground=false
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
start|stop|restart|logs|psql|watch|rebuild|status)
command="$1"
shift
;;
-p|--port)
POSTGRES_PORT="$2"
shift 2
;;
-u|--user)
POSTGRES_USER="$2"
shift 2
;;
-d|--database)
POSTGRES_DB="$2"
shift 2
;;
-b|--background)
foreground=false
shift
;;
-f|--foreground)
foreground=true
shift
;;
-h|--help)
show_usage
exit 0
;;
*)
log_error "Unknown option: $1"
show_usage
exit 1
;;
esac
done
# Execute command
case "${command}" in
start)
cmd_start
if [ "${foreground}" == "true" ]; then
cmd_logs
fi
;;
stop)
cmd_stop
;;
restart)
cmd_restart
;;
logs)
cmd_logs
;;
psql)
cmd_psql
;;
watch)
watch_and_reload
;;
rebuild)
cmd_rebuild
;;
status)
cmd_status
;;
*)
log_error "Unknown command: ${command}"
show_usage
exit 1
;;
esac
}
# Run main function
main "$@"

View File

@@ -0,0 +1,137 @@
version: '3.8'
# Docker Compose configuration for RuVector Postgres Integration Tests
# Provides isolated environment for comprehensive test execution
services:
# PostgreSQL with RuVector extension
postgres:
build:
context: ../../..
dockerfile: crates/ruvector-postgres/docker/Dockerfile
args:
PG_VERSION: ${PG_VERSION:-17}
container_name: ruvector-postgres-integration
ports:
- "${POSTGRES_PORT:-5433}:5432"
environment:
POSTGRES_USER: ruvector
POSTGRES_PASSWORD: ruvector
POSTGRES_DB: ruvector_test
POSTGRES_INITDB_ARGS: "--data-checksums"
# Performance tuning for tests
POSTGRES_SHARED_BUFFERS: 256MB
POSTGRES_EFFECTIVE_CACHE_SIZE: 512MB
POSTGRES_WORK_MEM: 64MB
POSTGRES_MAINTENANCE_WORK_MEM: 128MB
volumes:
- postgres_data:/var/lib/postgresql/data
- ./init-integration.sql:/docker-entrypoint-initdb.d/01-init.sql
- ./test_sql:/test_sql:ro
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ruvector -d ruvector_test"]
interval: 5s
timeout: 5s
retries: 10
start_period: 30s
networks:
- integration-network
deploy:
resources:
limits:
memory: 2G
reservations:
memory: 512M
# Test runner container with Rust toolchain
test-runner:
build:
context: ../../..
dockerfile: crates/ruvector-postgres/docker/Dockerfile.integration-test
container_name: ruvector-integration-runner
depends_on:
postgres:
condition: service_healthy
environment:
DATABASE_URL: postgres://ruvector:ruvector@postgres:5432/ruvector_test
RUST_LOG: ${RUST_LOG:-info}
RUST_BACKTRACE: 1
TEST_TIMEOUT: ${TEST_TIMEOUT:-600}
volumes:
- ../../..:/app:ro
- cargo_cache:/usr/local/cargo/registry
- target_cache:/app/target
- test_results:/app/test-results
networks:
- integration-network
working_dir: /app/crates/ruvector-postgres
command: >
cargo test
--release
--features pg${PG_VERSION:-17},graph-complete
--test integration
--
--test-threads=1
--nocapture
# Performance benchmark runner
benchmark:
build:
context: ../../..
dockerfile: crates/ruvector-postgres/docker/Dockerfile.integration-test
container_name: ruvector-benchmark-runner
depends_on:
postgres:
condition: service_healthy
environment:
DATABASE_URL: postgres://ruvector:ruvector@postgres:5432/ruvector_test
RUST_LOG: info
RUST_BACKTRACE: 1
volumes:
- ../../..:/app:ro
- cargo_cache:/usr/local/cargo/registry
- target_cache:/app/target
- test_results:/app/test-results
networks:
- integration-network
working_dir: /app/crates/ruvector-postgres
command: >
cargo bench
--features pg${PG_VERSION:-17},graph-complete
profiles:
- benchmark
# pgvector reference container for compatibility testing
pgvector-reference:
image: pgvector/pgvector:pg${PG_VERSION:-17}
container_name: pgvector-reference
ports:
- "5434:5432"
environment:
POSTGRES_USER: pgvector
POSTGRES_PASSWORD: pgvector
POSTGRES_DB: pgvector_test
healthcheck:
test: ["CMD-SHELL", "pg_isready -U pgvector -d pgvector_test"]
interval: 5s
timeout: 5s
retries: 5
networks:
- integration-network
profiles:
- compatibility
volumes:
postgres_data:
name: ruvector-integration-pg-data
cargo_cache:
name: ruvector-integration-cargo-cache
target_cache:
name: ruvector-integration-target-cache
test_results:
name: ruvector-integration-results
networks:
integration-network:
driver: bridge
name: ruvector-integration-network

View File

@@ -0,0 +1,291 @@
# RuVector-Postgres Docker Compose Configuration
# Provides development, testing, and benchmarking services
#
# Usage:
# docker-compose up postgres # Start PostgreSQL with extension
# docker-compose up test-runner # Run tests
# docker-compose --profile benchmark up benchmark # Run benchmarks
#
# Build for specific PostgreSQL version:
# PG_VERSION=16 docker-compose build
version: '3.8'
# Build arguments shared across services
x-build-args: &build-args
PG_VERSION: ${PG_VERSION:-17}
RUST_VERSION: ${RUST_VERSION:-1.83}
# Common environment for test containers
x-test-env: &test-env
DATABASE_URL: postgres://ruvector:ruvector@postgres:5432/ruvector_test
RUST_LOG: ${RUST_LOG:-info}
RUST_BACKTRACE: ${RUST_BACKTRACE:-1}
PG_VERSION: ${PG_VERSION:-17}
# Common volume mounts for development
x-dev-volumes: &dev-volumes
- ../../..:/app:cached
- cargo_cache:/usr/local/cargo/registry
- cargo_git:/usr/local/cargo/git
- target_cache:/app/target
services:
# ===========================================================================
# PostgreSQL with RuVector Extension
# ===========================================================================
postgres:
build:
context: ../../..
dockerfile: crates/ruvector-postgres/docker/Dockerfile
args:
<<: *build-args
container_name: ruvector-postgres
hostname: postgres
ports:
- "${POSTGRES_PORT:-5432}:5432"
environment:
POSTGRES_USER: ${POSTGRES_USER:-ruvector}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-ruvector}
POSTGRES_DB: ${POSTGRES_DB:-ruvector_test}
POSTGRES_INITDB_ARGS: "--data-checksums"
# PostgreSQL performance tuning
POSTGRES_HOST_AUTH_METHOD: scram-sha-256
volumes:
- postgres_data:/var/lib/postgresql/data
- ./init.sql:/docker-entrypoint-initdb.d/01-init.sql:ro
- ./postgresql.conf:/etc/postgresql/postgresql.conf:ro
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-ruvector} -d ${POSTGRES_DB:-ruvector_test}"]
interval: 5s
timeout: 5s
retries: 10
start_period: 10s
networks:
- ruvector-network
deploy:
resources:
limits:
memory: 2G
reservations:
memory: 512M
restart: unless-stopped
# ===========================================================================
# Test Runner Container
# ===========================================================================
test-runner:
build:
context: ../../..
dockerfile: crates/ruvector-postgres/docker/test-runner/Dockerfile
args:
<<: *build-args
container_name: ruvector-test-runner
depends_on:
postgres:
condition: service_healthy
environment:
<<: *test-env
TEST_RESULTS_DIR: /test-results
JUNIT_OUTPUT: /test-results/junit.xml
volumes:
- ../../..:/app:cached
- cargo_cache:/usr/local/cargo/registry
- cargo_git:/usr/local/cargo/git
- target_cache:/app/target
- test_results:/test-results
networks:
- ruvector-network
working_dir: /app/crates/ruvector-postgres
command: ["/usr/local/bin/run-tests.sh"]
deploy:
resources:
limits:
memory: 4G
reservations:
memory: 1G
# ===========================================================================
# Benchmark Runner Container
# ===========================================================================
benchmark:
build:
context: ../../..
dockerfile: crates/ruvector-postgres/docker/benchmark/Dockerfile
args:
<<: *build-args
container_name: ruvector-benchmark
depends_on:
postgres:
condition: service_healthy
environment:
<<: *test-env
RESULTS_DIR: /benchmark-results
COMPARE_BASELINE: ${COMPARE_BASELINE:-false}
BASELINE_DIR: /baseline
BENCHMARK_FILTER: ${BENCHMARK_FILTER:-}
volumes:
- ../../..:/app:cached
- cargo_cache:/usr/local/cargo/registry
- cargo_git:/usr/local/cargo/git
- target_cache:/app/target
- benchmark_results:/benchmark-results
- ${BASELINE_DIR:-./baseline}:/baseline:ro
networks:
- ruvector-network
working_dir: /app/crates/ruvector-postgres
command: ["/usr/local/bin/run-benchmarks.sh"]
profiles:
- benchmark
deploy:
resources:
limits:
memory: 8G
reservations:
memory: 2G
# ===========================================================================
# Development Shell Container
# ===========================================================================
dev:
build:
context: ../../..
dockerfile: crates/ruvector-postgres/docker/test-runner/Dockerfile
args:
<<: *build-args
container_name: ruvector-dev
depends_on:
postgres:
condition: service_healthy
environment:
<<: *test-env
volumes:
*dev-volumes
networks:
- ruvector-network
working_dir: /app/crates/ruvector-postgres
command: ["bash"]
stdin_open: true
tty: true
profiles:
- dev
# ===========================================================================
# PostgreSQL Versions for Matrix Testing
# ===========================================================================
postgres-pg14:
build:
context: ../../..
dockerfile: crates/ruvector-postgres/docker/Dockerfile
args:
PG_VERSION: 14
RUST_VERSION: ${RUST_VERSION:-1.83}
container_name: ruvector-postgres-pg14
ports:
- "5414:5432"
environment:
POSTGRES_USER: ruvector
POSTGRES_PASSWORD: ruvector
POSTGRES_DB: ruvector_test
volumes:
- postgres_data_pg14:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ruvector"]
interval: 5s
timeout: 5s
retries: 10
networks:
- ruvector-network
profiles:
- matrix
postgres-pg15:
build:
context: ../../..
dockerfile: crates/ruvector-postgres/docker/Dockerfile
args:
PG_VERSION: 15
RUST_VERSION: ${RUST_VERSION:-1.83}
container_name: ruvector-postgres-pg15
ports:
- "5415:5432"
environment:
POSTGRES_USER: ruvector
POSTGRES_PASSWORD: ruvector
POSTGRES_DB: ruvector_test
volumes:
- postgres_data_pg15:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ruvector"]
interval: 5s
timeout: 5s
retries: 10
networks:
- ruvector-network
profiles:
- matrix
postgres-pg16:
build:
context: ../../..
dockerfile: crates/ruvector-postgres/docker/Dockerfile
args:
PG_VERSION: 16
RUST_VERSION: ${RUST_VERSION:-1.83}
container_name: ruvector-postgres-pg16
ports:
- "5416:5432"
environment:
POSTGRES_USER: ruvector
POSTGRES_PASSWORD: ruvector
POSTGRES_DB: ruvector_test
volumes:
- postgres_data_pg16:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ruvector"]
interval: 5s
timeout: 5s
retries: 10
networks:
- ruvector-network
profiles:
- matrix
# ===========================================================================
# Volumes
# ===========================================================================
volumes:
# PostgreSQL data volumes (per version)
postgres_data:
name: ruvector-postgres-data
postgres_data_pg14:
name: ruvector-postgres-data-pg14
postgres_data_pg15:
name: ruvector-postgres-data-pg15
postgres_data_pg16:
name: ruvector-postgres-data-pg16
# Cargo cache volumes (shared across containers)
cargo_cache:
name: ruvector-cargo-cache
cargo_git:
name: ruvector-cargo-git
target_cache:
name: ruvector-target-cache
# Test and benchmark results
test_results:
name: ruvector-test-results
benchmark_results:
name: ruvector-benchmark-results
# ===========================================================================
# Networks
# ===========================================================================
networks:
ruvector-network:
name: ruvector-network
driver: bridge
ipam:
config:
- subnet: 172.28.0.0/16

View File

@@ -0,0 +1,326 @@
-- RuVector-Postgres Integration Test Initialization
-- Sets up comprehensive test environment with multiple schemas and test data
-- Enable required extensions
CREATE EXTENSION IF NOT EXISTS ruvector;
-- Log initialization
DO $$
BEGIN
RAISE NOTICE '========================================';
RAISE NOTICE 'RuVector Integration Test Initialization';
RAISE NOTICE '========================================';
END $$;
-- ============================================================================
-- Test Schemas
-- ============================================================================
-- pgvector compatibility tests
CREATE SCHEMA IF NOT EXISTS test_pgvector;
COMMENT ON SCHEMA test_pgvector IS 'pgvector SQL compatibility tests';
-- Integrity system tests
CREATE SCHEMA IF NOT EXISTS test_integrity;
COMMENT ON SCHEMA test_integrity IS 'Integrity and mincut tests';
-- Hybrid search tests
CREATE SCHEMA IF NOT EXISTS test_hybrid;
COMMENT ON SCHEMA test_hybrid IS 'Hybrid BM25+vector search tests';
-- Multi-tenancy tests
CREATE SCHEMA IF NOT EXISTS test_tenancy;
COMMENT ON SCHEMA test_tenancy IS 'Multi-tenant isolation tests';
-- Self-healing tests
CREATE SCHEMA IF NOT EXISTS test_healing;
COMMENT ON SCHEMA test_healing IS 'Self-healing and recovery tests';
-- Performance tests
CREATE SCHEMA IF NOT EXISTS test_perf;
COMMENT ON SCHEMA test_perf IS 'Performance benchmarks';
-- ============================================================================
-- Test Tables
-- ============================================================================
-- pgvector compatibility test table
CREATE TABLE test_pgvector.vectors (
id SERIAL PRIMARY KEY,
embedding vector(128),
metadata JSONB,
category TEXT,
created_at TIMESTAMP DEFAULT NOW()
);
-- Table for HNSW index testing
CREATE TABLE test_pgvector.hnsw_vectors (
id SERIAL PRIMARY KEY,
embedding vector(128),
label TEXT
);
-- Table for IVFFlat index testing
CREATE TABLE test_pgvector.ivfflat_vectors (
id SERIAL PRIMARY KEY,
embedding vector(128),
label TEXT
);
-- Integrity test tables
CREATE TABLE test_integrity.graph_nodes (
id SERIAL PRIMARY KEY,
embedding vector(64),
layer INTEGER DEFAULT 0,
connections INTEGER[]
);
CREATE TABLE test_integrity.metrics (
id SERIAL PRIMARY KEY,
timestamp TIMESTAMP DEFAULT NOW(),
mincut_value INTEGER,
load_factor FLOAT,
error_rate FLOAT,
state TEXT
);
-- Hybrid search test tables
CREATE TABLE test_hybrid.documents (
id SERIAL PRIMARY KEY,
title TEXT NOT NULL,
content TEXT NOT NULL,
embedding vector(384),
created_at TIMESTAMP DEFAULT NOW()
);
CREATE TABLE test_hybrid.search_results (
id SERIAL PRIMARY KEY,
query_id INTEGER,
doc_id INTEGER,
vector_score FLOAT,
text_score FLOAT,
fused_score FLOAT,
rank INTEGER
);
-- Multi-tenancy test tables
CREATE TABLE test_tenancy.tenant_config (
tenant_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name TEXT NOT NULL,
max_vectors BIGINT DEFAULT 100000,
max_storage_bytes BIGINT DEFAULT 1073741824,
created_at TIMESTAMP DEFAULT NOW()
);
CREATE TABLE test_tenancy.tenant_vectors (
id SERIAL,
tenant_id UUID NOT NULL,
embedding vector(128),
metadata JSONB,
created_at TIMESTAMP DEFAULT NOW(),
PRIMARY KEY (tenant_id, id)
);
CREATE TABLE test_tenancy.tenant_usage (
tenant_id UUID PRIMARY KEY,
vector_count BIGINT DEFAULT 0,
storage_bytes BIGINT DEFAULT 0,
query_count BIGINT DEFAULT 0,
last_updated TIMESTAMP DEFAULT NOW()
);
-- Self-healing test tables
CREATE TABLE test_healing.health_metrics (
id SERIAL PRIMARY KEY,
timestamp TIMESTAMP DEFAULT NOW(),
metric_name TEXT NOT NULL,
metric_value FLOAT NOT NULL,
threshold FLOAT,
status TEXT
);
CREATE TABLE test_healing.remediation_log (
id SERIAL PRIMARY KEY,
timestamp TIMESTAMP DEFAULT NOW(),
problem_type TEXT NOT NULL,
action_taken TEXT NOT NULL,
success BOOLEAN,
recovery_time_ms INTEGER,
notes TEXT
);
CREATE TABLE test_healing.learning_records (
id SERIAL PRIMARY KEY,
timestamp TIMESTAMP DEFAULT NOW(),
problem_context JSONB,
action TEXT,
outcome JSONB,
confidence FLOAT DEFAULT 0.5
);
-- Performance test tables
CREATE TABLE test_perf.benchmark_vectors (
id SERIAL PRIMARY KEY,
embedding vector(128),
metadata JSONB,
created_at TIMESTAMP DEFAULT NOW()
);
CREATE TABLE test_perf.benchmark_results (
id SERIAL PRIMARY KEY,
benchmark_name TEXT NOT NULL,
timestamp TIMESTAMP DEFAULT NOW(),
iterations INTEGER,
total_time_ms FLOAT,
avg_time_ms FLOAT,
p50_time_ms FLOAT,
p95_time_ms FLOAT,
p99_time_ms FLOAT,
throughput FLOAT,
notes TEXT
);
-- ============================================================================
-- Indexes
-- ============================================================================
-- HNSW indexes for different test scenarios
CREATE INDEX test_pgvector_vectors_hnsw ON test_pgvector.vectors
USING hnsw (embedding vector_l2_ops) WITH (m = 16, ef_construction = 64);
CREATE INDEX test_pgvector_hnsw_idx ON test_pgvector.hnsw_vectors
USING hnsw (embedding vector_l2_ops) WITH (m = 16, ef_construction = 64);
-- IVFFlat index
CREATE INDEX test_pgvector_ivfflat_idx ON test_pgvector.ivfflat_vectors
USING ivfflat (embedding vector_l2_ops) WITH (lists = 100);
-- Performance benchmark index
CREATE INDEX test_perf_benchmark_hnsw ON test_perf.benchmark_vectors
USING hnsw (embedding vector_l2_ops) WITH (m = 16, ef_construction = 64);
-- Hybrid search indexes
CREATE INDEX test_hybrid_docs_embedding ON test_hybrid.documents
USING hnsw (embedding vector_l2_ops) WITH (m = 16, ef_construction = 64);
-- GIN index for text search
CREATE INDEX test_hybrid_docs_content ON test_hybrid.documents
USING gin (to_tsvector('english', content));
-- Multi-tenancy indexes
CREATE INDEX test_tenancy_vectors_tenant ON test_tenancy.tenant_vectors (tenant_id);
CREATE INDEX test_tenancy_vectors_hnsw ON test_tenancy.tenant_vectors
USING hnsw (embedding vector_l2_ops) WITH (m = 16, ef_construction = 64);
-- ============================================================================
-- Test Data
-- ============================================================================
-- Insert pgvector compatibility test data
INSERT INTO test_pgvector.vectors (embedding, metadata, category)
SELECT
(SELECT array_agg(random()::real) FROM generate_series(1, 128))::vector,
jsonb_build_object('idx', i, 'batch', 'init'),
CASE WHEN i % 3 = 0 THEN 'A' WHEN i % 3 = 1 THEN 'B' ELSE 'C' END
FROM generate_series(1, 1000) i;
-- Insert HNSW test data
INSERT INTO test_pgvector.hnsw_vectors (embedding, label)
SELECT
(SELECT array_agg(random()::real) FROM generate_series(1, 128))::vector,
'hnsw_' || i
FROM generate_series(1, 500) i;
-- Insert IVFFlat test data
INSERT INTO test_pgvector.ivfflat_vectors (embedding, label)
SELECT
(SELECT array_agg(random()::real) FROM generate_series(1, 128))::vector,
'ivf_' || i
FROM generate_series(1, 500) i;
-- Insert hybrid search test data
INSERT INTO test_hybrid.documents (title, content, embedding)
VALUES
('Machine Learning Basics', 'Introduction to supervised and unsupervised learning algorithms.',
(SELECT array_agg(random()::real) FROM generate_series(1, 384))::vector),
('Deep Learning', 'Neural networks and deep learning architectures for complex pattern recognition.',
(SELECT array_agg(random()::real) FROM generate_series(1, 384))::vector),
('Natural Language Processing', 'Text processing and understanding using transformer models.',
(SELECT array_agg(random()::real) FROM generate_series(1, 384))::vector),
('Computer Vision', 'Image recognition and object detection with convolutional networks.',
(SELECT array_agg(random()::real) FROM generate_series(1, 384))::vector),
('Reinforcement Learning', 'Agent-based learning through reward optimization.',
(SELECT array_agg(random()::real) FROM generate_series(1, 384))::vector);
-- Insert multi-tenancy test data
INSERT INTO test_tenancy.tenant_config (tenant_id, name, max_vectors, max_storage_bytes)
VALUES
('00000000-0000-0000-0000-000000000001', 'Tenant A', 100000, 1073741824),
('00000000-0000-0000-0000-000000000002', 'Tenant B', 50000, 536870912),
('00000000-0000-0000-0000-000000000003', 'Tenant C', 200000, 2147483648);
-- Insert vectors for each tenant
INSERT INTO test_tenancy.tenant_vectors (tenant_id, embedding, metadata)
SELECT
'00000000-0000-0000-0000-00000000000' || ((i % 3) + 1)::text,
(SELECT array_agg(random()::real) FROM generate_series(1, 128))::vector,
jsonb_build_object('idx', i)
FROM generate_series(1, 300) i;
-- Update usage tracking
INSERT INTO test_tenancy.tenant_usage (tenant_id, vector_count, storage_bytes)
SELECT
tenant_id,
COUNT(*),
COUNT(*) * 512 -- Approximate bytes per vector
FROM test_tenancy.tenant_vectors
GROUP BY tenant_id;
-- ============================================================================
-- Row-Level Security Setup
-- ============================================================================
-- Enable RLS on tenant tables
ALTER TABLE test_tenancy.tenant_vectors ENABLE ROW LEVEL SECURITY;
-- Create tenant isolation policy
CREATE POLICY tenant_isolation ON test_tenancy.tenant_vectors
USING (tenant_id = COALESCE(
NULLIF(current_setting('app.tenant_id', true), '')::uuid,
tenant_id
));
-- ============================================================================
-- Statistics and Verification
-- ============================================================================
-- Analyze all test tables
ANALYZE test_pgvector.vectors;
ANALYZE test_pgvector.hnsw_vectors;
ANALYZE test_pgvector.ivfflat_vectors;
ANALYZE test_hybrid.documents;
ANALYZE test_tenancy.tenant_vectors;
ANALYZE test_perf.benchmark_vectors;
-- Verify setup
DO $$
DECLARE
vec_count INTEGER;
idx_count INTEGER;
schema_count INTEGER;
BEGIN
SELECT COUNT(*) INTO vec_count FROM test_pgvector.vectors;
SELECT COUNT(*) INTO idx_count FROM pg_indexes WHERE schemaname LIKE 'test_%';
SELECT COUNT(*) INTO schema_count FROM information_schema.schemata WHERE schema_name LIKE 'test_%';
RAISE NOTICE '========================================';
RAISE NOTICE 'Integration Test Setup Complete';
RAISE NOTICE '========================================';
RAISE NOTICE 'Test schemas created: %', schema_count;
RAISE NOTICE 'Test vectors inserted: %', vec_count;
RAISE NOTICE 'Test indexes created: %', idx_count;
RAISE NOTICE '';
RAISE NOTICE 'Extension version: %', ruvector_version();
RAISE NOTICE 'SIMD info: %', ruvector_simd_info();
RAISE NOTICE '========================================';
END $$;

View File

@@ -0,0 +1,91 @@
-- RuVector-Postgres Initialization Script
-- Creates extension and verifies basic functionality
-- Create the extension
CREATE EXTENSION IF NOT EXISTS ruvector;
-- Create test schema
CREATE SCHEMA IF NOT EXISTS ruvector_test;
-- Test table for basic usage
CREATE TABLE ruvector_test.test_basic (
id SERIAL PRIMARY KEY,
name TEXT NOT NULL,
category TEXT,
metadata JSONB,
created_at TIMESTAMP DEFAULT NOW()
);
-- Create ruvector role if it doesn't exist (optional app user)
DO $$
BEGIN
IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = 'ruvector') THEN
CREATE ROLE ruvector WITH LOGIN PASSWORD 'ruvector';
END IF;
END $$;
-- Grant permissions to ruvector role and public
GRANT USAGE ON SCHEMA ruvector_test TO PUBLIC;
GRANT ALL ON SCHEMA ruvector_test TO ruvector;
GRANT ALL ON ALL TABLES IN SCHEMA ruvector_test TO ruvector;
GRANT ALL ON ALL SEQUENCES IN SCHEMA ruvector_test TO ruvector;
-- Log initialization and test basic functions
DO $$
DECLARE
version_info TEXT;
simd_info TEXT;
BEGIN
-- Test version function
SELECT ruvector_version() INTO version_info;
RAISE NOTICE 'RuVector-Postgres initialized successfully';
RAISE NOTICE 'Extension version: %', version_info;
-- Test SIMD info function
SELECT ruvector_simd_info() INTO simd_info;
RAISE NOTICE 'SIMD info: %', simd_info;
-- Test distance functions with array functions
RAISE NOTICE 'Testing distance functions...';
RAISE NOTICE 'Inner product: %', inner_product_arr(ARRAY[1.0, 2.0, 3.0]::real[], ARRAY[1.0, 2.0, 3.0]::real[]);
RAISE NOTICE 'Cosine distance: %', cosine_distance_arr(ARRAY[1.0, 0.0, 0.0]::real[], ARRAY[0.0, 1.0, 0.0]::real[]);
RAISE NOTICE 'All basic tests passed!';
-- ================================================================
-- v0.3 Module Tests
-- ================================================================
RAISE NOTICE '--- v0.3 Module Tests ---';
-- Solver: PageRank
RAISE NOTICE 'Solver PageRank: %', ruvector_pagerank('{"edges":[[0,1],[1,2],[2,0]]}'::jsonb);
-- Solver: Info
RAISE NOTICE 'Solver algorithms available';
-- Solver: Matrix analyze
RAISE NOTICE 'Matrix analyze: %', ruvector_matrix_analyze('{"rows":3,"cols":3,"entries":[[0,0,4],[0,1,-1],[1,0,-1],[1,1,4],[2,2,2]]}'::jsonb);
-- Math: Wasserstein distance
RAISE NOTICE 'Wasserstein distance: %', ruvector_wasserstein_distance(ARRAY[0.5,0.5]::real[], ARRAY[0.3,0.7]::real[]);
-- Math: KL divergence
RAISE NOTICE 'KL divergence: %', ruvector_kl_divergence(ARRAY[0.5,0.5]::real[], ARRAY[0.3,0.7]::real[]);
-- Math: Jensen-Shannon
RAISE NOTICE 'Jensen-Shannon: %', ruvector_jensen_shannon(ARRAY[0.5,0.5]::real[], ARRAY[0.3,0.7]::real[]);
-- TDA: Persistent homology
RAISE NOTICE 'Persistent homology: %', ruvector_persistent_homology('[[1,0],[0,1],[-1,0],[0,-1]]'::jsonb, 1, 3.0);
-- TDA: Betti numbers
RAISE NOTICE 'Betti numbers: %', ruvector_betti_numbers('[[0,0],[1,0],[0,1]]'::jsonb, 1.5);
-- Attention: Linear attention
RAISE NOTICE 'Linear attention: %', ruvector_linear_attention(ARRAY[1,0,0,0]::real[], '[[1,0,0,0],[0,1,0,0]]'::jsonb, '[[5,10],[15,20]]'::jsonb);
-- Attention: Benchmark
RAISE NOTICE 'Attention benchmark: %', ruvector_attention_benchmark(64, 128, 'scaled_dot');
RAISE NOTICE 'All v0.3 tests passed!';
END $$;

View File

@@ -0,0 +1,81 @@
# PostgreSQL Configuration for RuVector Development/Testing
# Optimized for vector operations and development workflow
# =============================================================================
# Memory Settings
# =============================================================================
shared_buffers = 256MB
effective_cache_size = 1GB
work_mem = 64MB
maintenance_work_mem = 256MB
# =============================================================================
# Checkpoint Settings
# =============================================================================
checkpoint_completion_target = 0.9
wal_buffers = 16MB
min_wal_size = 1GB
max_wal_size = 4GB
# =============================================================================
# Connection Settings
# =============================================================================
max_connections = 100
listen_addresses = '*'
# =============================================================================
# Logging
# =============================================================================
log_destination = 'stderr'
logging_collector = on
log_directory = 'log'
log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log'
log_rotation_age = 1d
log_rotation_size = 100MB
log_min_messages = info
log_min_error_statement = error
log_line_prefix = '%t [%p]: [%l-1] user=%u,db=%d,app=%a,client=%h '
log_checkpoints = on
log_connections = on
log_disconnections = on
log_lock_waits = on
log_statement = 'ddl'
log_temp_files = 0
# =============================================================================
# Development Settings
# =============================================================================
# More verbose error messages for development
client_min_messages = notice
debug_print_parse = off
debug_print_rewritten = off
debug_print_plan = off
debug_pretty_print = on
# =============================================================================
# Performance Settings
# =============================================================================
random_page_cost = 1.1
effective_io_concurrency = 200
default_statistics_target = 100
# Enable parallel query
max_parallel_workers_per_gather = 2
max_parallel_workers = 4
max_parallel_maintenance_workers = 2
parallel_setup_cost = 100
parallel_tuple_cost = 0.01
# =============================================================================
# Extension Settings
# =============================================================================
# Preload shared libraries (if needed)
# shared_preload_libraries = ''
# =============================================================================
# JIT Settings
# =============================================================================
jit = on
jit_above_cost = 100000
jit_inline_above_cost = 500000
jit_optimize_above_cost = 500000

View File

@@ -0,0 +1,227 @@
#!/usr/bin/env bash
# RuVector-Postgres Docker Hub Publication Script
# Builds and publishes multi-arch Docker images to Docker Hub
#
# Usage:
# ./publish-dockerhub.sh # Build and push v2.0.0
# ./publish-dockerhub.sh --dry-run # Build only, don't push
# ./publish-dockerhub.sh --pg-version 16 # Build for specific PG version
# ./publish-dockerhub.sh --all-versions # Build for all PG versions
set -e
set -u
set -o pipefail
# Configuration
DOCKER_REGISTRY="${DOCKER_REGISTRY:-ruvector}"
IMAGE_NAME="${IMAGE_NAME:-ruvector-postgres}"
VERSION="2.0.0"
RUST_VERSION="1.83"
# Supported PostgreSQL versions
PG_VERSIONS=(14 15 16 17)
DEFAULT_PG_VERSION=17
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
# Flags
DRY_RUN=false
ALL_VERSIONS=false
SINGLE_PG_VERSION=""
PUSH_LATEST=true
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
--dry-run)
DRY_RUN=true
shift
;;
--pg-version)
SINGLE_PG_VERSION="$2"
shift 2
;;
--all-versions)
ALL_VERSIONS=true
shift
;;
--no-latest)
PUSH_LATEST=false
shift
;;
--help)
echo "Usage: $0 [OPTIONS]"
echo ""
echo "Options:"
echo " --dry-run Build only, don't push to Docker Hub"
echo " --pg-version N Build for specific PostgreSQL version (14-17)"
echo " --all-versions Build for all supported PostgreSQL versions"
echo " --no-latest Don't tag as 'latest'"
echo " --help Show this help"
exit 0
;;
*)
log_error "Unknown option: $1"
exit 1
;;
esac
done
# Determine which versions to build
if [[ -n "$SINGLE_PG_VERSION" ]]; then
VERSIONS_TO_BUILD=("$SINGLE_PG_VERSION")
elif [[ "$ALL_VERSIONS" == "true" ]]; then
VERSIONS_TO_BUILD=("${PG_VERSIONS[@]}")
else
VERSIONS_TO_BUILD=("$DEFAULT_PG_VERSION")
fi
# Get script and project directories
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
log_info "=== RuVector-Postgres Docker Hub Publication ==="
log_info "Version: ${VERSION}"
log_info "Registry: ${DOCKER_REGISTRY}/${IMAGE_NAME}"
log_info "PostgreSQL versions: ${VERSIONS_TO_BUILD[*]}"
log_info "Dry run: ${DRY_RUN}"
# Verify Docker is available
if ! command -v docker &> /dev/null; then
log_error "Docker is not installed"
exit 1
fi
# Check Docker buildx for multi-arch support
if ! docker buildx version &> /dev/null; then
log_warn "Docker buildx not available, multi-arch builds disabled"
MULTI_ARCH=false
else
log_info "Docker buildx available for multi-arch builds"
MULTI_ARCH=true
fi
# Login check (skip for dry run)
if [[ "$DRY_RUN" == "false" ]]; then
if ! docker info 2>/dev/null | grep -q "Username"; then
log_warn "Not logged into Docker Hub. Please run: docker login"
log_warn "Continuing with build only..."
DRY_RUN=true
fi
fi
# Create buildx builder if needed
if [[ "$MULTI_ARCH" == "true" ]]; then
BUILDER_NAME="ruvector-builder"
if ! docker buildx inspect "$BUILDER_NAME" &> /dev/null; then
log_info "Creating buildx builder: ${BUILDER_NAME}"
docker buildx create --name "$BUILDER_NAME" --driver docker-container --bootstrap
fi
docker buildx use "$BUILDER_NAME"
fi
# Build function
build_image() {
local pg_version=$1
local tags=()
# Version tags
tags+=("${DOCKER_REGISTRY}/${IMAGE_NAME}:${VERSION}-pg${pg_version}")
tags+=("${DOCKER_REGISTRY}/${IMAGE_NAME}:v${VERSION}-pg${pg_version}")
tags+=("${DOCKER_REGISTRY}/${IMAGE_NAME}:pg${pg_version}")
# Latest tag for default PG version
if [[ "$pg_version" == "$DEFAULT_PG_VERSION" && "$PUSH_LATEST" == "true" ]]; then
tags+=("${DOCKER_REGISTRY}/${IMAGE_NAME}:latest")
tags+=("${DOCKER_REGISTRY}/${IMAGE_NAME}:${VERSION}")
tags+=("${DOCKER_REGISTRY}/${IMAGE_NAME}:v${VERSION}")
fi
log_info "Building image for PostgreSQL ${pg_version}..."
log_info "Tags: ${tags[*]}"
# Build tag arguments
local tag_args=""
for tag in "${tags[@]}"; do
tag_args+=" -t ${tag}"
done
cd "$PROJECT_ROOT"
if [[ "$MULTI_ARCH" == "true" ]]; then
# Multi-arch build (amd64 + arm64)
local push_flag=""
if [[ "$DRY_RUN" == "false" ]]; then
push_flag="--push"
else
push_flag="--load"
fi
docker buildx build \
--platform linux/amd64,linux/arm64 \
-f crates/ruvector-postgres/docker/Dockerfile \
--build-arg PG_VERSION="${pg_version}" \
--build-arg RUST_VERSION="${RUST_VERSION}" \
${tag_args} \
${push_flag} \
.
else
# Single-arch build
docker build \
-f crates/ruvector-postgres/docker/Dockerfile \
--build-arg PG_VERSION="${pg_version}" \
--build-arg RUST_VERSION="${RUST_VERSION}" \
${tag_args} \
.
# Push if not dry run
if [[ "$DRY_RUN" == "false" ]]; then
for tag in "${tags[@]}"; do
docker push "$tag"
done
fi
fi
log_success "Built image for PostgreSQL ${pg_version}"
}
# Build all requested versions
for pg_ver in "${VERSIONS_TO_BUILD[@]}"; do
build_image "$pg_ver"
done
# Summary
echo ""
log_success "=== Publication Complete ==="
log_info "Images built:"
for pg_ver in "${VERSIONS_TO_BUILD[@]}"; do
echo " - ${DOCKER_REGISTRY}/${IMAGE_NAME}:${VERSION}-pg${pg_ver}"
done
if [[ "$DRY_RUN" == "true" ]]; then
log_warn "Dry run mode - images were NOT pushed to Docker Hub"
log_info "To push, run without --dry-run flag"
else
log_success "Images pushed to Docker Hub!"
log_info "Pull with: docker pull ${DOCKER_REGISTRY}/${IMAGE_NAME}:${VERSION}"
fi
# Print usage examples
echo ""
log_info "=== Usage Examples ==="
echo " docker pull ${DOCKER_REGISTRY}/${IMAGE_NAME}:latest"
echo " docker pull ${DOCKER_REGISTRY}/${IMAGE_NAME}:${VERSION}"
echo " docker pull ${DOCKER_REGISTRY}/${IMAGE_NAME}:${VERSION}-pg17"
echo " docker pull ${DOCKER_REGISTRY}/${IMAGE_NAME}:pg16"
echo ""
echo " docker run -d -p 5432:5432 ${DOCKER_REGISTRY}/${IMAGE_NAME}:latest"

View File

@@ -0,0 +1,560 @@
#!/usr/bin/env bash
# RuVector-Postgres Integration Test Runner
# Builds Docker environment, runs comprehensive integration tests, and reports results
set -e # Exit on error
set -u # Exit on undefined variable
set -o pipefail # Exit on pipe failure
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color
# Configuration
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
COMPOSE_FILE="${SCRIPT_DIR}/docker-compose.integration.yml"
TEST_RESULTS_DIR="${PROJECT_ROOT}/test-results/integration"
POSTGRES_CONTAINER="ruvector-postgres-integration"
TEST_RUNNER_CONTAINER="ruvector-integration-runner"
# Default settings
PG_VERSION="${PG_VERSION:-17}"
RUST_LOG="${RUST_LOG:-info}"
TEST_TIMEOUT="${TEST_TIMEOUT:-600}"
KEEP_RUNNING="${KEEP_RUNNING:-false}"
# Test categories
declare -a TEST_CATEGORIES=(
"pgvector_compat"
"integrity_tests"
"hybrid_search_tests"
"tenancy_tests"
"healing_tests"
"perf_tests"
)
# Functions
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
log_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
log_section() {
echo -e "\n${CYAN}=== $1 ===${NC}\n"
}
cleanup() {
if [ "${KEEP_RUNNING}" != "true" ]; then
log_info "Cleaning up Docker containers..."
docker-compose -f "${COMPOSE_FILE}" down -v 2>/dev/null || true
docker rm -f "${POSTGRES_CONTAINER}" 2>/dev/null || true
docker rm -f "${TEST_RUNNER_CONTAINER}" 2>/dev/null || true
else
log_info "Keeping containers running for debugging"
fi
}
wait_for_postgres() {
log_info "Waiting for PostgreSQL to be ready..."
local max_attempts=60
local attempt=1
while [ ${attempt} -le ${max_attempts} ]; do
if docker exec "${POSTGRES_CONTAINER}" pg_isready -U ruvector -d ruvector_test &>/dev/null; then
log_success "PostgreSQL is ready!"
return 0
fi
echo -n "."
sleep 1
attempt=$((attempt + 1))
done
log_error "PostgreSQL failed to start after ${max_attempts} seconds"
docker logs "${POSTGRES_CONTAINER}" 2>&1 | tail -50
return 1
}
verify_extension() {
log_info "Verifying RuVector extension..."
docker exec "${POSTGRES_CONTAINER}" psql -U ruvector -d ruvector_test -c "
SELECT ruvector_version();
SELECT ruvector_simd_info();
" || {
log_error "Failed to verify RuVector extension"
return 1
}
log_success "RuVector extension verified"
}
build_extension() {
log_section "Building RuVector Extension"
cd "${PROJECT_ROOT}"
DOCKER_BUILDKIT=1 docker build \
-f crates/ruvector-postgres/docker/Dockerfile \
-t "ruvector-postgres:pg${PG_VERSION}-test" \
--build-arg PG_VERSION="${PG_VERSION}" \
--progress=plain \
. || {
log_error "Failed to build extension"
return 1
}
log_success "Extension built successfully"
}
start_postgres() {
log_section "Starting PostgreSQL Container"
docker run -d \
--name "${POSTGRES_CONTAINER}" \
-e POSTGRES_USER=ruvector \
-e POSTGRES_PASSWORD=ruvector \
-e POSTGRES_DB=ruvector_test \
-p 5433:5432 \
--health-cmd="pg_isready -U ruvector -d ruvector_test" \
--health-interval=5s \
--health-timeout=5s \
--health-retries=10 \
"ruvector-postgres:pg${PG_VERSION}-test"
wait_for_postgres
verify_extension
}
setup_test_schema() {
log_info "Setting up test schema..."
docker exec "${POSTGRES_CONTAINER}" psql -U ruvector -d ruvector_test << 'EOF'
-- Create test schemas for each category
CREATE SCHEMA IF NOT EXISTS test_pgvector;
CREATE SCHEMA IF NOT EXISTS test_integrity;
CREATE SCHEMA IF NOT EXISTS test_hybrid;
CREATE SCHEMA IF NOT EXISTS test_tenancy;
CREATE SCHEMA IF NOT EXISTS test_healing;
CREATE SCHEMA IF NOT EXISTS test_perf;
-- Grant permissions
GRANT ALL ON ALL SCHEMAS IN DATABASE ruvector_test TO ruvector;
-- Create test tables
CREATE TABLE IF NOT EXISTS test_pgvector.vectors (
id SERIAL PRIMARY KEY,
embedding vector(128),
metadata JSONB,
created_at TIMESTAMP DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS test_perf.benchmark_vectors (
id SERIAL PRIMARY KEY,
embedding vector(128),
metadata JSONB,
created_at TIMESTAMP DEFAULT NOW()
);
-- Create indexes
CREATE INDEX IF NOT EXISTS test_pgvector_hnsw ON test_pgvector.vectors
USING hnsw (embedding vector_l2_ops) WITH (m = 16, ef_construction = 64);
CREATE INDEX IF NOT EXISTS test_perf_hnsw ON test_perf.benchmark_vectors
USING hnsw (embedding vector_l2_ops) WITH (m = 16, ef_construction = 64);
-- Insert test data
INSERT INTO test_pgvector.vectors (embedding, metadata)
SELECT
(SELECT array_agg(random()::real) FROM generate_series(1, 128))::vector,
jsonb_build_object('idx', i)
FROM generate_series(1, 1000) i;
ANALYZE test_pgvector.vectors;
\echo 'Test schema setup complete'
EOF
log_success "Test schema created"
}
run_unit_tests() {
log_section "Running Unit Tests"
cd "${PROJECT_ROOT}/crates/ruvector-postgres"
# Run tests in release mode for performance
cargo test \
--release \
--features "pg${PG_VERSION},graph-complete" \
--lib \
-- \
--test-threads=4 \
2>&1 | tee "${TEST_RESULTS_DIR}/unit_tests.log"
local exit_code=$?
if [ ${exit_code} -eq 0 ]; then
log_success "Unit tests passed"
else
log_error "Unit tests failed"
fi
return ${exit_code}
}
run_integration_tests() {
log_section "Running Integration Tests"
cd "${PROJECT_ROOT}/crates/ruvector-postgres"
export DATABASE_URL="postgresql://ruvector:ruvector@localhost:5433/ruvector_test"
export RUST_LOG="${RUST_LOG}"
export RUST_BACKTRACE=1
local failed_categories=()
for category in "${TEST_CATEGORIES[@]}"; do
log_info "Running ${category} tests..."
cargo test \
--release \
--features "pg${PG_VERSION},graph-complete" \
--test integration \
"${category}" \
-- \
--test-threads=1 \
2>&1 | tee "${TEST_RESULTS_DIR}/${category}.log"
if [ ${PIPESTATUS[0]} -ne 0 ]; then
log_error "${category} tests failed"
failed_categories+=("${category}")
else
log_success "${category} tests passed"
fi
done
if [ ${#failed_categories[@]} -gt 0 ]; then
log_error "Failed test categories: ${failed_categories[*]}"
return 1
fi
log_success "All integration tests passed"
return 0
}
run_sql_tests() {
log_section "Running SQL Integration Tests"
local test_sql_dir="${SCRIPT_DIR}/test_sql"
mkdir -p "${test_sql_dir}"
# Generate and run SQL tests
cat > "${test_sql_dir}/pgvector_compat.sql" << 'EOF'
-- pgvector compatibility tests
\echo 'Testing pgvector compatibility...'
-- Test vector type
SELECT '[1,2,3]'::vector AS test_vector;
-- Test operators
SELECT '[1,2,3]'::vector <-> '[4,5,6]'::vector AS l2_distance;
SELECT '[1,2,3]'::vector <=> '[4,5,6]'::vector AS cosine_distance;
SELECT '[1,2,3]'::vector <#> '[4,5,6]'::vector AS inner_product;
-- Test nearest neighbor search
SELECT id, embedding <-> '[0.5, 0.5, 0.5]'::vector(3) AS distance
FROM (VALUES (1, '[1,2,3]'::vector), (2, '[2,3,4]'::vector)) AS t(id, embedding)
ORDER BY embedding <-> '[0.5, 0.5, 0.5]'::vector(3)
LIMIT 2;
\echo 'pgvector compatibility tests passed!'
EOF
docker exec "${POSTGRES_CONTAINER}" psql -U ruvector -d ruvector_test \
-f /dev/stdin < "${test_sql_dir}/pgvector_compat.sql" \
2>&1 | tee "${TEST_RESULTS_DIR}/sql_tests.log"
log_success "SQL integration tests completed"
}
run_performance_benchmark() {
log_section "Running Performance Benchmark"
docker exec "${POSTGRES_CONTAINER}" psql -U ruvector -d ruvector_test << 'EOF'
\timing on
-- Insert benchmark
\echo 'Insert benchmark (1000 vectors)...'
INSERT INTO test_perf.benchmark_vectors (embedding, metadata)
SELECT
(SELECT array_agg(random()::real) FROM generate_series(1, 128))::vector,
jsonb_build_object('idx', i)
FROM generate_series(1, 1000) i;
-- Query benchmark
\echo 'Query benchmark (100 queries)...'
DO $$
DECLARE
query_vec vector;
start_time timestamp;
total_time interval := '0'::interval;
i integer;
BEGIN
FOR i IN 1..100 LOOP
query_vec := (SELECT array_agg(random()::real) FROM generate_series(1, 128))::vector;
start_time := clock_timestamp();
PERFORM id FROM test_perf.benchmark_vectors
ORDER BY embedding <-> query_vec
LIMIT 10;
total_time := total_time + (clock_timestamp() - start_time);
END LOOP;
RAISE NOTICE 'Total time for 100 queries: %', total_time;
RAISE NOTICE 'Average query time: %', total_time / 100;
END;
$$;
\echo 'Performance benchmark complete!'
EOF
log_success "Performance benchmark completed"
}
generate_report() {
log_section "Generating Test Report"
local report_file="${TEST_RESULTS_DIR}/report.md"
cat > "${report_file}" << EOF
# RuVector Postgres Integration Test Report
Generated: $(date -Iseconds)
PostgreSQL Version: ${PG_VERSION}
## Test Results Summary
| Category | Status |
|----------|--------|
EOF
for category in "${TEST_CATEGORIES[@]}"; do
local status="PASS"
if grep -q "FAILED" "${TEST_RESULTS_DIR}/${category}.log" 2>/dev/null; then
status="FAIL"
fi
echo "| ${category} | ${status} |" >> "${report_file}"
done
cat >> "${report_file}" << EOF
## Test Categories
### pgvector Compatibility
- Vector type creation and operators
- HNSW and IVFFlat index creation
- Basic CRUD operations
### Integrity System
- Contracted graph construction
- Mincut computation
- State transitions
### Hybrid Search
- BM25 scoring accuracy
- RRF fusion
- Linear fusion
### Multi-Tenancy
- Schema isolation
- RLS policies
- Quota enforcement
### Self-Healing
- Problem detection
- Remediation strategies
- Recovery from failures
### Performance
- Insert throughput
- Query latency (p50, p95, p99)
- SIMD acceleration
- Concurrent scaling
## Logs
Test logs are available in: ${TEST_RESULTS_DIR}/
## Environment
- Docker: $(docker --version)
- Rust: $(rustc --version)
- PostgreSQL: ${PG_VERSION}
EOF
log_success "Report generated: ${report_file}"
}
show_usage() {
cat << EOF
RuVector-Postgres Integration Test Runner
Usage: $0 [OPTIONS]
Options:
-b, --build-only Build Docker image only
-t, --tests-only Run tests only (skip build)
-c, --category CAT Run specific test category
-s, --sql-only Run SQL tests only
-p, --perf Run performance benchmarks
-k, --keep-running Keep containers after tests
--pg-version VER PostgreSQL version (default: 17)
-h, --help Show this help
Test Categories:
pgvector_compat pgvector SQL compatibility
integrity_tests Integrity system tests
hybrid_search_tests Hybrid search tests
tenancy_tests Multi-tenancy tests
healing_tests Self-healing tests
perf_tests Performance tests
Examples:
# Run all tests
$0
# Run specific category
$0 -c pgvector_compat
# Run performance benchmark only
$0 -p
# Keep containers for debugging
$0 -k
EOF
}
main() {
local build_only=false
local tests_only=false
local sql_only=false
local perf_only=false
local specific_category=""
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
-b|--build-only)
build_only=true
shift
;;
-t|--tests-only)
tests_only=true
shift
;;
-c|--category)
specific_category="$2"
shift 2
;;
-s|--sql-only)
sql_only=true
shift
;;
-p|--perf)
perf_only=true
shift
;;
-k|--keep-running)
KEEP_RUNNING=true
shift
;;
--pg-version)
PG_VERSION="$2"
shift 2
;;
-h|--help)
show_usage
exit 0
;;
*)
log_error "Unknown option: $1"
show_usage
exit 1
;;
esac
done
# Setup trap for cleanup
trap cleanup EXIT
# Create results directory
mkdir -p "${TEST_RESULTS_DIR}"
log_section "RuVector Integration Test Suite"
log_info "PostgreSQL Version: ${PG_VERSION}"
log_info "Results Directory: ${TEST_RESULTS_DIR}"
# Build phase
if [ "${tests_only}" != "true" ]; then
build_extension
fi
if [ "${build_only}" == "true" ]; then
log_success "Build complete!"
exit 0
fi
# Start PostgreSQL
start_postgres
setup_test_schema
# Run tests
local test_result=0
if [ "${sql_only}" == "true" ]; then
run_sql_tests || test_result=$?
elif [ "${perf_only}" == "true" ]; then
run_performance_benchmark || test_result=$?
elif [ -n "${specific_category}" ]; then
TEST_CATEGORIES=("${specific_category}")
run_integration_tests || test_result=$?
else
# Run all tests
run_unit_tests || test_result=$?
run_integration_tests || test_result=$?
run_sql_tests || test_result=$?
run_performance_benchmark || test_result=$?
fi
# Generate report
generate_report
if [ ${test_result} -eq 0 ]; then
log_success "All tests completed successfully!"
else
log_error "Some tests failed. Check logs in ${TEST_RESULTS_DIR}/"
fi
exit ${test_result}
}
# Run main function
main "$@"

View File

@@ -0,0 +1,363 @@
#!/usr/bin/env bash
# RuVector-Postgres Test Runner
# Builds Docker image, runs tests, and cleans up
set -e # Exit on error
set -u # Exit on undefined variable
set -o pipefail # Exit on pipe failure
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Configuration
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
CONTAINER_NAME="ruvector-postgres-test"
IMAGE_NAME="ruvector-postgres:test"
POSTGRES_PORT="${POSTGRES_PORT:-5433}"
POSTGRES_USER="${POSTGRES_USER:-ruvector}"
POSTGRES_PASSWORD="${POSTGRES_PASSWORD:-ruvector}"
POSTGRES_DB="${POSTGRES_DB:-ruvector_test}"
# Detect OS
OS_TYPE="$(uname -s)"
case "${OS_TYPE}" in
Linux*) PLATFORM="linux";;
Darwin*) PLATFORM="macos";;
*) PLATFORM="unknown";;
esac
# Functions
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
log_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
cleanup() {
log_info "Cleaning up containers and volumes..."
docker stop "${CONTAINER_NAME}" 2>/dev/null || true
docker rm "${CONTAINER_NAME}" 2>/dev/null || true
if [ "${KEEP_VOLUMES:-false}" != "true" ]; then
docker volume rm "${CONTAINER_NAME}_data" 2>/dev/null || true
fi
}
wait_for_postgres() {
log_info "Waiting for PostgreSQL to be healthy..."
local max_attempts=30
local attempt=1
while [ ${attempt} -le ${max_attempts} ]; do
if docker exec "${CONTAINER_NAME}" pg_isready -U "${POSTGRES_USER}" -d "${POSTGRES_DB}" &>/dev/null; then
log_success "PostgreSQL is ready!"
return 0
fi
echo -n "."
sleep 1
attempt=$((attempt + 1))
done
log_error "PostgreSQL failed to become ready after ${max_attempts} seconds"
docker logs "${CONTAINER_NAME}"
return 1
}
build_image() {
log_info "Building Docker image: ${IMAGE_NAME}"
log_info "Platform: ${PLATFORM}"
cd "${PROJECT_ROOT}"
# Build with BuildKit for better caching
DOCKER_BUILDKIT=1 docker build \
-f crates/ruvector-postgres/docker/Dockerfile \
-t "${IMAGE_NAME}" \
--build-arg BUILDKIT_INLINE_CACHE=1 \
--progress=plain \
.
log_success "Docker image built successfully"
}
start_container() {
log_info "Starting PostgreSQL container: ${CONTAINER_NAME}"
# Create volume for data persistence
docker volume create "${CONTAINER_NAME}_data" || true
# Start container
docker run -d \
--name "${CONTAINER_NAME}" \
-p "${POSTGRES_PORT}:5432" \
-e POSTGRES_USER="${POSTGRES_USER}" \
-e POSTGRES_PASSWORD="${POSTGRES_PASSWORD}" \
-e POSTGRES_DB="${POSTGRES_DB}" \
-v "${CONTAINER_NAME}_data:/var/lib/postgresql/data" \
--health-cmd="pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}" \
--health-interval=5s \
--health-timeout=5s \
--health-retries=5 \
"${IMAGE_NAME}"
log_success "Container started"
}
run_tests() {
log_info "Running test suite..."
# Export connection string for tests
export DATABASE_URL="postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@localhost:${POSTGRES_PORT}/${POSTGRES_DB}"
log_info "Connection string: ${DATABASE_URL}"
# Run pgrx tests
cd "${PROJECT_ROOT}/crates/ruvector-postgres"
log_info "Running pgrx tests..."
if cargo pgrx test pg16; then
log_success "All tests passed!"
return 0
else
log_error "Tests failed!"
return 1
fi
}
run_integration_tests() {
log_info "Running integration tests via SQL..."
# Wait a bit more for full initialization
sleep 2
# Test extension loading
log_info "Testing extension installation..."
docker exec -it "${CONTAINER_NAME}" psql -U "${POSTGRES_USER}" -d "${POSTGRES_DB}" -c "CREATE EXTENSION IF NOT EXISTS ruvector_postgres;" || {
log_error "Failed to create extension"
return 1
}
# Test basic vector operations
log_info "Testing basic vector operations..."
docker exec -it "${CONTAINER_NAME}" psql -U "${POSTGRES_USER}" -d "${POSTGRES_DB}" << 'EOF'
-- Test vector creation
SELECT '[1,2,3]'::vector;
-- Test distance functions
SELECT vector_l2_distance('[1,2,3]'::vector, '[4,5,6]'::vector);
SELECT vector_cosine_distance('[1,2,3]'::vector, '[4,5,6]'::vector);
SELECT vector_inner_product('[1,2,3]'::vector, '[4,5,6]'::vector);
-- Test table creation with vector column
CREATE TABLE IF NOT EXISTS test_vectors (
id SERIAL PRIMARY KEY,
embedding vector(3)
);
-- Insert test data
INSERT INTO test_vectors (embedding) VALUES
('[1,2,3]'::vector),
('[4,5,6]'::vector),
('[7,8,9]'::vector);
-- Test similarity search
SELECT * FROM test_vectors ORDER BY embedding <-> '[1,2,3]'::vector LIMIT 3;
-- Cleanup
DROP TABLE test_vectors;
EOF
if [ $? -eq 0 ]; then
log_success "Integration tests passed!"
return 0
else
log_error "Integration tests failed!"
return 1
fi
}
collect_results() {
log_info "Collecting test results..."
# Create results directory
local results_dir="${PROJECT_ROOT}/test-results"
mkdir -p "${results_dir}"
# Export container logs
docker logs "${CONTAINER_NAME}" > "${results_dir}/postgres.log" 2>&1
# Export test database dump (if needed)
if [ "${EXPORT_DB:-false}" == "true" ]; then
log_info "Exporting database dump..."
docker exec "${CONTAINER_NAME}" pg_dump -U "${POSTGRES_USER}" "${POSTGRES_DB}" > "${results_dir}/test_db_dump.sql"
fi
log_success "Results collected in ${results_dir}"
}
show_usage() {
cat << EOF
RuVector-Postgres Test Runner
Usage: $0 [OPTIONS]
Options:
-b, --build-only Build Docker image only, don't run tests
-t, --test-only Run tests only (skip build)
-i, --integration Run integration tests only
-k, --keep-running Keep container running after tests
-c, --clean Clean up before starting
-v, --keep-volumes Keep volumes after cleanup
-p, --port PORT PostgreSQL port (default: 5433)
-h, --help Show this help message
Environment Variables:
POSTGRES_PORT PostgreSQL port (default: 5433)
POSTGRES_USER PostgreSQL user (default: ruvector)
POSTGRES_PASSWORD PostgreSQL password (default: ruvector)
POSTGRES_DB PostgreSQL database (default: ruvector_test)
KEEP_VOLUMES Keep volumes after cleanup (default: false)
EXPORT_DB Export database dump (default: false)
Examples:
# Run full test suite
$0
# Build and keep container running for debugging
$0 --keep-running
# Run integration tests only
$0 --integration --test-only
# Clean rebuild
$0 --clean --build-only
EOF
}
main() {
local build_only=false
local test_only=false
local integration_only=false
local keep_running=false
local clean_first=false
# Parse arguments
while [[ $# -gt 0 ]]; do
case $1 in
-b|--build-only)
build_only=true
shift
;;
-t|--test-only)
test_only=true
shift
;;
-i|--integration)
integration_only=true
shift
;;
-k|--keep-running)
keep_running=true
shift
;;
-c|--clean)
clean_first=true
shift
;;
-v|--keep-volumes)
KEEP_VOLUMES=true
shift
;;
-p|--port)
POSTGRES_PORT="$2"
shift 2
;;
-h|--help)
show_usage
exit 0
;;
*)
log_error "Unknown option: $1"
show_usage
exit 1
;;
esac
done
# Setup trap for cleanup
if [ "${keep_running}" != "true" ]; then
trap cleanup EXIT
fi
log_info "RuVector-Postgres Test Runner"
log_info "Platform: ${PLATFORM}"
log_info "PostgreSQL Port: ${POSTGRES_PORT}"
# Clean if requested
if [ "${clean_first}" == "true" ]; then
cleanup
fi
# Build phase
if [ "${test_only}" != "true" ]; then
build_image
fi
if [ "${build_only}" == "true" ]; then
log_success "Build complete!"
exit 0
fi
# Test phase
start_container
wait_for_postgres
local test_result=0
if [ "${integration_only}" == "true" ]; then
run_integration_tests || test_result=$?
else
# Run both pgrx and integration tests
run_integration_tests || test_result=$?
if [ ${test_result} -eq 0 ]; then
# Only run pgrx tests if integration tests passed
run_tests || test_result=$?
fi
fi
collect_results
if [ "${keep_running}" == "true" ]; then
log_info "Container is still running: ${CONTAINER_NAME}"
log_info "Connection: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@localhost:${POSTGRES_PORT}/${POSTGRES_DB}"
log_info "To stop: docker stop ${CONTAINER_NAME}"
trap - EXIT # Disable cleanup trap
fi
if [ ${test_result} -eq 0 ]; then
log_success "All tests completed successfully!"
exit 0
else
log_error "Tests failed with exit code ${test_result}"
exit ${test_result}
fi
}
# Run main function
main "$@"

View File

@@ -0,0 +1,60 @@
# Test Runner Dockerfile for RuVector-Postgres
# Multi-stage build for efficient test execution with JUnit XML output
#
# Usage:
# docker build -f docker/test-runner/Dockerfile -t ruvector-test-runner .
# docker run --rm ruvector-test-runner
ARG PG_VERSION=17
ARG RUST_VERSION=1.83
# ============================================================================
# Stage 1: Test Runner Base
# ============================================================================
FROM rust:${RUST_VERSION}-bookworm AS test-runner
ARG PG_VERSION
# Add PostgreSQL APT repository
RUN sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt bookworm-pgdg main" > /etc/apt/sources.list.d/pgdg.list' && \
wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add -
# Install dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
postgresql-${PG_VERSION} \
postgresql-server-dev-${PG_VERSION} \
postgresql-client-${PG_VERSION} \
libclang-dev \
clang \
pkg-config \
libssl-dev \
cmake \
git \
jq \
&& rm -rf /var/lib/apt/lists/*
# Install pgrx and testing tools
RUN cargo install cargo-pgrx --version 0.12.6 --locked && \
cargo install cargo-nextest --locked && \
cargo install cargo2junit --locked
# Initialize pgrx for the specified PostgreSQL version
RUN cargo pgrx init --pg${PG_VERSION} /usr/lib/postgresql/${PG_VERSION}/bin/pg_config
# Set environment variables
ENV PGRX_PG_CONFIG_PATH=/usr/lib/postgresql/${PG_VERSION}/bin/pg_config
ENV PGRX_HOME=/root/.pgrx
ENV PG_VERSION=${PG_VERSION}
ENV RUST_LOG=info
ENV RUST_BACKTRACE=1
WORKDIR /app
# Create directories for test results
RUN mkdir -p /test-results /coverage
# Copy test runner script
COPY --chmod=755 crates/ruvector-postgres/docker/test-runner/run-tests.sh /usr/local/bin/run-tests.sh
# Default command runs pgrx tests and outputs JUnit XML
CMD ["/usr/local/bin/run-tests.sh"]

View File

@@ -0,0 +1,119 @@
#!/usr/bin/env bash
# RuVector-Postgres Test Runner Script
# Runs pgrx tests and outputs JUnit XML for CI integration
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
# Configuration
PG_VERSION="${PG_VERSION:-17}"
TEST_RESULTS_DIR="${TEST_RESULTS_DIR:-/test-results}"
JUNIT_OUTPUT="${JUNIT_OUTPUT:-${TEST_RESULTS_DIR}/junit.xml}"
TEST_LOG="${TEST_RESULTS_DIR}/test.log"
# Ensure test results directory exists
mkdir -p "${TEST_RESULTS_DIR}"
log_info "RuVector-Postgres Test Runner"
log_info "PostgreSQL Version: ${PG_VERSION}"
log_info "Test Results Directory: ${TEST_RESULTS_DIR}"
# Navigate to the crate directory
cd /app/crates/ruvector-postgres 2>/dev/null || cd /app
# Check if we have the source code
if [ ! -f "Cargo.toml" ]; then
log_error "Cargo.toml not found. Mount the source code to /app"
exit 1
fi
# Run pgrx tests with JSON output for conversion to JUnit
log_info "Running pgrx tests for pg${PG_VERSION}..."
# Start test execution timestamp
START_TIME=$(date +%s)
# Run cargo test with JSON output and capture result
set +e
cargo test --features pg${PG_VERSION} --no-fail-fast -- -Z unstable-options --format json 2>&1 | tee "${TEST_LOG}.json"
TEST_EXIT_CODE=${PIPESTATUS[0]}
set -e
END_TIME=$(date +%s)
DURATION=$((END_TIME - START_TIME))
log_info "Test execution completed in ${DURATION}s"
# Convert JSON output to JUnit XML
if command -v cargo2junit &> /dev/null; then
log_info "Converting test results to JUnit XML..."
cat "${TEST_LOG}.json" | cargo2junit > "${JUNIT_OUTPUT}" 2>/dev/null || true
else
log_warn "cargo2junit not found, generating basic JUnit XML..."
# Generate basic JUnit XML
TESTS_RUN=$(grep -c '"type":"test"' "${TEST_LOG}.json" 2>/dev/null || echo "0")
TESTS_FAILED=$(grep -c '"event":"failed"' "${TEST_LOG}.json" 2>/dev/null || echo "0")
TESTS_PASSED=$((TESTS_RUN - TESTS_FAILED))
cat > "${JUNIT_OUTPUT}" << EOF
<?xml version="1.0" encoding="UTF-8"?>
<testsuites name="ruvector-postgres" tests="${TESTS_RUN}" failures="${TESTS_FAILED}" time="${DURATION}">
<testsuite name="cargo-test" tests="${TESTS_RUN}" failures="${TESTS_FAILED}" time="${DURATION}">
<testcase name="pgrx-tests" classname="ruvector_postgres" time="${DURATION}">
$([ "${TEST_EXIT_CODE}" != "0" ] && echo "<failure message=\"Tests failed with exit code ${TEST_EXIT_CODE}\"/>" || true)
</testcase>
</testsuite>
</testsuites>
EOF
fi
# Run pgrx-specific tests if available
log_info "Running pgrx integration tests..."
set +e
cargo pgrx test pg${PG_VERSION} 2>&1 | tee -a "${TEST_LOG}"
PGRX_EXIT_CODE=$?
set -e
# Generate test summary
log_info "Generating test summary..."
cat > "${TEST_RESULTS_DIR}/summary.json" << EOF
{
"timestamp": "$(date -Iseconds)",
"pg_version": "${PG_VERSION}",
"duration_seconds": ${DURATION},
"cargo_test_exit_code": ${TEST_EXIT_CODE},
"pgrx_test_exit_code": ${PGRX_EXIT_CODE},
"success": $([ "${TEST_EXIT_CODE}" == "0" ] && [ "${PGRX_EXIT_CODE}" == "0" ] && echo "true" || echo "false")
}
EOF
# Print summary
echo ""
echo "=========================================="
echo " TEST SUMMARY"
echo "=========================================="
echo "PostgreSQL Version: ${PG_VERSION}"
echo "Duration: ${DURATION}s"
echo "Cargo Test Exit Code: ${TEST_EXIT_CODE}"
echo "PGRX Test Exit Code: ${PGRX_EXIT_CODE}"
echo "JUnit XML: ${JUNIT_OUTPUT}"
echo "=========================================="
if [ "${TEST_EXIT_CODE}" != "0" ] || [ "${PGRX_EXIT_CODE}" != "0" ]; then
log_error "Tests failed!"
exit 1
fi
log_success "All tests passed!"
exit 0