Squashed 'vendor/ruvector/' content from commit b64c2172

git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
2026-02-28 14:39:40 -05:00
commit d803bfe2b1
7854 changed files with 3522914 additions and 0 deletions
--- a/examples/google-cloud/cloudrun.yaml
+++ b/examples/google-cloud/cloudrun.yaml
@@ -0,0 +1,277 @@
+# =============================================================================
+# RuVector Cloud Run Service Configuration
+# Multi-service deployment with GPU, Raft, and Replication support
+# =============================================================================
+
+# -----------------------------------------------------------------------------
+# Benchmark Service (GPU-enabled)
+# -----------------------------------------------------------------------------
+apiVersion: serving.knative.dev/v1
+kind: Service
+metadata:
+  name: ruvector-benchmark
+  labels:
+    app: ruvector
+    component: benchmark
+  annotations:
+    run.googleapis.com/description: "RuVector GPU Benchmark Service"
+    run.googleapis.com/launch-stage: BETA
+spec:
+  template:
+    metadata:
+      annotations:
+        # GPU Configuration
+        run.googleapis.com/execution-environment: gen2
+        run.googleapis.com/gpu-type: nvidia-l4
+        run.googleapis.com/gpu-count: "1"
+
+        # Scaling Configuration
+        autoscaling.knative.dev/minScale: "0"
+        autoscaling.knative.dev/maxScale: "10"
+
+        # Performance Configuration
+        run.googleapis.com/cpu-throttling: "false"
+        run.googleapis.com/startup-cpu-boost: "true"
+    spec:
+      containerConcurrency: 80
+      timeoutSeconds: 3600
+      serviceAccountName: ruvector-sa
+      containers:
+        - name: ruvector
+          image: gcr.io/PROJECT_ID/ruvector-benchmark:latest
+          ports:
+            - containerPort: 8080
+          resources:
+            limits:
+              cpu: "4"
+              memory: "8Gi"
+              nvidia.com/gpu: "1"
+          env:
+            - name: RUVECTOR_GPU_ENABLED
+              value: "true"
+            - name: RUST_LOG
+              value: "info"
+            - name: RUVECTOR_MODE
+              value: "benchmark"
+          startupProbe:
+            httpGet:
+              path: /health
+              port: 8080
+            initialDelaySeconds: 10
+            periodSeconds: 10
+            failureThreshold: 3
+          livenessProbe:
+            httpGet:
+              path: /health
+              port: 8080
+            periodSeconds: 30
+          readinessProbe:
+            httpGet:
+              path: /health
+              port: 8080
+            periodSeconds: 10
+
+---
+# -----------------------------------------------------------------------------
+# Attention/GNN Service (High Memory GPU)
+# -----------------------------------------------------------------------------
+apiVersion: serving.knative.dev/v1
+kind: Service
+metadata:
+  name: ruvector-attention
+  labels:
+    app: ruvector
+    component: attention
+  annotations:
+    run.googleapis.com/description: "RuVector Attention/GNN Inference Service"
+spec:
+  template:
+    metadata:
+      annotations:
+        run.googleapis.com/execution-environment: gen2
+        run.googleapis.com/gpu-type: nvidia-l4
+        run.googleapis.com/gpu-count: "1"
+        autoscaling.knative.dev/minScale: "1"
+        autoscaling.knative.dev/maxScale: "5"
+        run.googleapis.com/cpu-throttling: "false"
+    spec:
+      containerConcurrency: 20
+      timeoutSeconds: 3600
+      containers:
+        - name: ruvector
+          image: gcr.io/PROJECT_ID/ruvector-benchmark:latest
+          ports:
+            - containerPort: 8080
+          resources:
+            limits:
+              cpu: "8"
+              memory: "16Gi"
+              nvidia.com/gpu: "1"
+          env:
+            - name: RUVECTOR_MODE
+              value: "attention"
+            - name: RUVECTOR_GNN_LAYERS
+              value: "3"
+            - name: RUVECTOR_GNN_HEADS
+              value: "8"
+            - name: RUVECTOR_GNN_HIDDEN_DIM
+              value: "512"
+            - name: RUST_LOG
+              value: "info"
+
+---
+# -----------------------------------------------------------------------------
+# Raft Consensus Node (Stateful)
+# -----------------------------------------------------------------------------
+apiVersion: serving.knative.dev/v1
+kind: Service
+metadata:
+  name: ruvector-raft-node-1
+  labels:
+    app: ruvector
+    component: raft
+    raft-node-id: "0"
+  annotations:
+    run.googleapis.com/description: "RuVector Raft Consensus Node"
+spec:
+  template:
+    metadata:
+      annotations:
+        autoscaling.knative.dev/minScale: "1"
+        autoscaling.knative.dev/maxScale: "1"
+        run.googleapis.com/cpu-throttling: "false"
+    spec:
+      containerConcurrency: 100
+      timeoutSeconds: 3600
+      containers:
+        - name: ruvector
+          image: gcr.io/PROJECT_ID/ruvector-benchmark:latest
+          ports:
+            - containerPort: 8080
+          resources:
+            limits:
+              cpu: "2"
+              memory: "4Gi"
+          env:
+            - name: RUVECTOR_MODE
+              value: "raft"
+            - name: RUVECTOR_NODE_ID
+              value: "0"
+            - name: RUVECTOR_CLUSTER_SIZE
+              value: "3"
+            - name: RUVECTOR_RAFT_ELECTION_TIMEOUT
+              value: "150"
+            - name: RUVECTOR_RAFT_HEARTBEAT_INTERVAL
+              value: "50"
+            - name: RUST_LOG
+              value: "info,raft=debug"
+          volumeMounts:
+            - name: raft-data
+              mountPath: /data/raft
+      volumes:
+        - name: raft-data
+          emptyDir:
+            sizeLimit: "10Gi"
+
+---
+# -----------------------------------------------------------------------------
+# Replication Primary Node
+# -----------------------------------------------------------------------------
+apiVersion: serving.knative.dev/v1
+kind: Service
+metadata:
+  name: ruvector-primary
+  labels:
+    app: ruvector
+    component: replication
+    role: primary
+  annotations:
+    run.googleapis.com/description: "RuVector Primary Node (Replication)"
+spec:
+  template:
+    metadata:
+      annotations:
+        run.googleapis.com/execution-environment: gen2
+        run.googleapis.com/gpu-type: nvidia-l4
+        run.googleapis.com/gpu-count: "1"
+        autoscaling.knative.dev/minScale: "1"
+        autoscaling.knative.dev/maxScale: "1"
+        run.googleapis.com/cpu-throttling: "false"
+    spec:
+      containerConcurrency: 100
+      timeoutSeconds: 3600
+      containers:
+        - name: ruvector
+          image: gcr.io/PROJECT_ID/ruvector-benchmark:latest
+          ports:
+            - containerPort: 8080
+          resources:
+            limits:
+              cpu: "4"
+              memory: "8Gi"
+              nvidia.com/gpu: "1"
+          env:
+            - name: RUVECTOR_MODE
+              value: "primary"
+            - name: RUVECTOR_REPLICATION_FACTOR
+              value: "3"
+            - name: RUVECTOR_SYNC_MODE
+              value: "async"
+            - name: RUST_LOG
+              value: "info"
+
+---
+# -----------------------------------------------------------------------------
+# Replication Replica Node
+# -----------------------------------------------------------------------------
+apiVersion: serving.knative.dev/v1
+kind: Service
+metadata:
+  name: ruvector-replica
+  labels:
+    app: ruvector
+    component: replication
+    role: replica
+  annotations:
+    run.googleapis.com/description: "RuVector Replica Node (Replication)"
+spec:
+  template:
+    metadata:
+      annotations:
+        run.googleapis.com/execution-environment: gen2
+        run.googleapis.com/gpu-type: nvidia-l4
+        run.googleapis.com/gpu-count: "1"
+        autoscaling.knative.dev/minScale: "2"
+        autoscaling.knative.dev/maxScale: "5"
+        run.googleapis.com/cpu-throttling: "false"
+    spec:
+      containerConcurrency: 100
+      timeoutSeconds: 3600
+      containers:
+        - name: ruvector
+          image: gcr.io/PROJECT_ID/ruvector-benchmark:latest
+          ports:
+            - containerPort: 8080
+          resources:
+            limits:
+              cpu: "4"
+              memory: "8Gi"
+              nvidia.com/gpu: "1"
+          env:
+            - name: RUVECTOR_MODE
+              value: "replica"
+            - name: RUVECTOR_PRIMARY_URL
+              value: "https://ruvector-primary-HASH.run.app"
+            - name: RUST_LOG
+              value: "info"
+
+---
+# -----------------------------------------------------------------------------
+# Service Account
+# -----------------------------------------------------------------------------
+apiVersion: iam.cnrm.cloud.google.com/v1beta1
+kind: IAMServiceAccount
+metadata:
+  name: ruvector-sa
+spec:
+  displayName: "RuVector Cloud Run Service Account"