git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
278 lines
8.2 KiB
YAML
278 lines
8.2 KiB
YAML
# =============================================================================
|
|
# RuVector Cloud Run Service Configuration
|
|
# Multi-service deployment with GPU, Raft, and Replication support
|
|
# =============================================================================
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Benchmark Service (GPU-enabled)
|
|
# -----------------------------------------------------------------------------
|
|
apiVersion: serving.knative.dev/v1
|
|
kind: Service
|
|
metadata:
|
|
name: ruvector-benchmark
|
|
labels:
|
|
app: ruvector
|
|
component: benchmark
|
|
annotations:
|
|
run.googleapis.com/description: "RuVector GPU Benchmark Service"
|
|
run.googleapis.com/launch-stage: BETA
|
|
spec:
|
|
template:
|
|
metadata:
|
|
annotations:
|
|
# GPU Configuration
|
|
run.googleapis.com/execution-environment: gen2
|
|
run.googleapis.com/gpu-type: nvidia-l4
|
|
run.googleapis.com/gpu-count: "1"
|
|
|
|
# Scaling Configuration
|
|
autoscaling.knative.dev/minScale: "0"
|
|
autoscaling.knative.dev/maxScale: "10"
|
|
|
|
# Performance Configuration
|
|
run.googleapis.com/cpu-throttling: "false"
|
|
run.googleapis.com/startup-cpu-boost: "true"
|
|
spec:
|
|
containerConcurrency: 80
|
|
timeoutSeconds: 3600
|
|
serviceAccountName: ruvector-sa
|
|
containers:
|
|
- name: ruvector
|
|
image: gcr.io/PROJECT_ID/ruvector-benchmark:latest
|
|
ports:
|
|
- containerPort: 8080
|
|
resources:
|
|
limits:
|
|
cpu: "4"
|
|
memory: "8Gi"
|
|
nvidia.com/gpu: "1"
|
|
env:
|
|
- name: RUVECTOR_GPU_ENABLED
|
|
value: "true"
|
|
- name: RUST_LOG
|
|
value: "info"
|
|
- name: RUVECTOR_MODE
|
|
value: "benchmark"
|
|
startupProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: 8080
|
|
initialDelaySeconds: 10
|
|
periodSeconds: 10
|
|
failureThreshold: 3
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: 8080
|
|
periodSeconds: 30
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: 8080
|
|
periodSeconds: 10
|
|
|
|
---
|
|
# -----------------------------------------------------------------------------
|
|
# Attention/GNN Service (High Memory GPU)
|
|
# -----------------------------------------------------------------------------
|
|
apiVersion: serving.knative.dev/v1
|
|
kind: Service
|
|
metadata:
|
|
name: ruvector-attention
|
|
labels:
|
|
app: ruvector
|
|
component: attention
|
|
annotations:
|
|
run.googleapis.com/description: "RuVector Attention/GNN Inference Service"
|
|
spec:
|
|
template:
|
|
metadata:
|
|
annotations:
|
|
run.googleapis.com/execution-environment: gen2
|
|
run.googleapis.com/gpu-type: nvidia-l4
|
|
run.googleapis.com/gpu-count: "1"
|
|
autoscaling.knative.dev/minScale: "1"
|
|
autoscaling.knative.dev/maxScale: "5"
|
|
run.googleapis.com/cpu-throttling: "false"
|
|
spec:
|
|
containerConcurrency: 20
|
|
timeoutSeconds: 3600
|
|
containers:
|
|
- name: ruvector
|
|
image: gcr.io/PROJECT_ID/ruvector-benchmark:latest
|
|
ports:
|
|
- containerPort: 8080
|
|
resources:
|
|
limits:
|
|
cpu: "8"
|
|
memory: "16Gi"
|
|
nvidia.com/gpu: "1"
|
|
env:
|
|
- name: RUVECTOR_MODE
|
|
value: "attention"
|
|
- name: RUVECTOR_GNN_LAYERS
|
|
value: "3"
|
|
- name: RUVECTOR_GNN_HEADS
|
|
value: "8"
|
|
- name: RUVECTOR_GNN_HIDDEN_DIM
|
|
value: "512"
|
|
- name: RUST_LOG
|
|
value: "info"
|
|
|
|
---
|
|
# -----------------------------------------------------------------------------
|
|
# Raft Consensus Node (Stateful)
|
|
# -----------------------------------------------------------------------------
|
|
apiVersion: serving.knative.dev/v1
|
|
kind: Service
|
|
metadata:
|
|
name: ruvector-raft-node-1
|
|
labels:
|
|
app: ruvector
|
|
component: raft
|
|
raft-node-id: "0"
|
|
annotations:
|
|
run.googleapis.com/description: "RuVector Raft Consensus Node"
|
|
spec:
|
|
template:
|
|
metadata:
|
|
annotations:
|
|
autoscaling.knative.dev/minScale: "1"
|
|
autoscaling.knative.dev/maxScale: "1"
|
|
run.googleapis.com/cpu-throttling: "false"
|
|
spec:
|
|
containerConcurrency: 100
|
|
timeoutSeconds: 3600
|
|
containers:
|
|
- name: ruvector
|
|
image: gcr.io/PROJECT_ID/ruvector-benchmark:latest
|
|
ports:
|
|
- containerPort: 8080
|
|
resources:
|
|
limits:
|
|
cpu: "2"
|
|
memory: "4Gi"
|
|
env:
|
|
- name: RUVECTOR_MODE
|
|
value: "raft"
|
|
- name: RUVECTOR_NODE_ID
|
|
value: "0"
|
|
- name: RUVECTOR_CLUSTER_SIZE
|
|
value: "3"
|
|
- name: RUVECTOR_RAFT_ELECTION_TIMEOUT
|
|
value: "150"
|
|
- name: RUVECTOR_RAFT_HEARTBEAT_INTERVAL
|
|
value: "50"
|
|
- name: RUST_LOG
|
|
value: "info,raft=debug"
|
|
volumeMounts:
|
|
- name: raft-data
|
|
mountPath: /data/raft
|
|
volumes:
|
|
- name: raft-data
|
|
emptyDir:
|
|
sizeLimit: "10Gi"
|
|
|
|
---
|
|
# -----------------------------------------------------------------------------
|
|
# Replication Primary Node
|
|
# -----------------------------------------------------------------------------
|
|
apiVersion: serving.knative.dev/v1
|
|
kind: Service
|
|
metadata:
|
|
name: ruvector-primary
|
|
labels:
|
|
app: ruvector
|
|
component: replication
|
|
role: primary
|
|
annotations:
|
|
run.googleapis.com/description: "RuVector Primary Node (Replication)"
|
|
spec:
|
|
template:
|
|
metadata:
|
|
annotations:
|
|
run.googleapis.com/execution-environment: gen2
|
|
run.googleapis.com/gpu-type: nvidia-l4
|
|
run.googleapis.com/gpu-count: "1"
|
|
autoscaling.knative.dev/minScale: "1"
|
|
autoscaling.knative.dev/maxScale: "1"
|
|
run.googleapis.com/cpu-throttling: "false"
|
|
spec:
|
|
containerConcurrency: 100
|
|
timeoutSeconds: 3600
|
|
containers:
|
|
- name: ruvector
|
|
image: gcr.io/PROJECT_ID/ruvector-benchmark:latest
|
|
ports:
|
|
- containerPort: 8080
|
|
resources:
|
|
limits:
|
|
cpu: "4"
|
|
memory: "8Gi"
|
|
nvidia.com/gpu: "1"
|
|
env:
|
|
- name: RUVECTOR_MODE
|
|
value: "primary"
|
|
- name: RUVECTOR_REPLICATION_FACTOR
|
|
value: "3"
|
|
- name: RUVECTOR_SYNC_MODE
|
|
value: "async"
|
|
- name: RUST_LOG
|
|
value: "info"
|
|
|
|
---
|
|
# -----------------------------------------------------------------------------
|
|
# Replication Replica Node
|
|
# -----------------------------------------------------------------------------
|
|
apiVersion: serving.knative.dev/v1
|
|
kind: Service
|
|
metadata:
|
|
name: ruvector-replica
|
|
labels:
|
|
app: ruvector
|
|
component: replication
|
|
role: replica
|
|
annotations:
|
|
run.googleapis.com/description: "RuVector Replica Node (Replication)"
|
|
spec:
|
|
template:
|
|
metadata:
|
|
annotations:
|
|
run.googleapis.com/execution-environment: gen2
|
|
run.googleapis.com/gpu-type: nvidia-l4
|
|
run.googleapis.com/gpu-count: "1"
|
|
autoscaling.knative.dev/minScale: "2"
|
|
autoscaling.knative.dev/maxScale: "5"
|
|
run.googleapis.com/cpu-throttling: "false"
|
|
spec:
|
|
containerConcurrency: 100
|
|
timeoutSeconds: 3600
|
|
containers:
|
|
- name: ruvector
|
|
image: gcr.io/PROJECT_ID/ruvector-benchmark:latest
|
|
ports:
|
|
- containerPort: 8080
|
|
resources:
|
|
limits:
|
|
cpu: "4"
|
|
memory: "8Gi"
|
|
nvidia.com/gpu: "1"
|
|
env:
|
|
- name: RUVECTOR_MODE
|
|
value: "replica"
|
|
- name: RUVECTOR_PRIMARY_URL
|
|
value: "https://ruvector-primary-HASH.run.app"
|
|
- name: RUST_LOG
|
|
value: "info"
|
|
|
|
---
|
|
# -----------------------------------------------------------------------------
|
|
# Service Account
|
|
# -----------------------------------------------------------------------------
|
|
apiVersion: iam.cnrm.cloud.google.com/v1beta1
|
|
kind: IAMServiceAccount
|
|
metadata:
|
|
name: ruvector-sa
|
|
spec:
|
|
displayName: "RuVector Cloud Run Service Account"
|