Squashed 'vendor/ruvector/' content from commit b64c2172
git-subtree-dir: vendor/ruvector git-subtree-split: b64c21726f2bb37286d9ee36a7869fef60cc6900
This commit is contained in:
277
examples/google-cloud/cloudrun.yaml
Normal file
277
examples/google-cloud/cloudrun.yaml
Normal file
@@ -0,0 +1,277 @@
|
||||
# =============================================================================
|
||||
# RuVector Cloud Run Service Configuration
|
||||
# Multi-service deployment with GPU, Raft, and Replication support
|
||||
# =============================================================================
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Benchmark Service (GPU-enabled)
|
||||
# -----------------------------------------------------------------------------
|
||||
apiVersion: serving.knative.dev/v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: ruvector-benchmark
|
||||
labels:
|
||||
app: ruvector
|
||||
component: benchmark
|
||||
annotations:
|
||||
run.googleapis.com/description: "RuVector GPU Benchmark Service"
|
||||
run.googleapis.com/launch-stage: BETA
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
# GPU Configuration
|
||||
run.googleapis.com/execution-environment: gen2
|
||||
run.googleapis.com/gpu-type: nvidia-l4
|
||||
run.googleapis.com/gpu-count: "1"
|
||||
|
||||
# Scaling Configuration
|
||||
autoscaling.knative.dev/minScale: "0"
|
||||
autoscaling.knative.dev/maxScale: "10"
|
||||
|
||||
# Performance Configuration
|
||||
run.googleapis.com/cpu-throttling: "false"
|
||||
run.googleapis.com/startup-cpu-boost: "true"
|
||||
spec:
|
||||
containerConcurrency: 80
|
||||
timeoutSeconds: 3600
|
||||
serviceAccountName: ruvector-sa
|
||||
containers:
|
||||
- name: ruvector
|
||||
image: gcr.io/PROJECT_ID/ruvector-benchmark:latest
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
resources:
|
||||
limits:
|
||||
cpu: "4"
|
||||
memory: "8Gi"
|
||||
nvidia.com/gpu: "1"
|
||||
env:
|
||||
- name: RUVECTOR_GPU_ENABLED
|
||||
value: "true"
|
||||
- name: RUST_LOG
|
||||
value: "info"
|
||||
- name: RUVECTOR_MODE
|
||||
value: "benchmark"
|
||||
startupProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 8080
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
failureThreshold: 3
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 8080
|
||||
periodSeconds: 30
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 8080
|
||||
periodSeconds: 10
|
||||
|
||||
---
|
||||
# -----------------------------------------------------------------------------
|
||||
# Attention/GNN Service (High Memory GPU)
|
||||
# -----------------------------------------------------------------------------
|
||||
apiVersion: serving.knative.dev/v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: ruvector-attention
|
||||
labels:
|
||||
app: ruvector
|
||||
component: attention
|
||||
annotations:
|
||||
run.googleapis.com/description: "RuVector Attention/GNN Inference Service"
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
run.googleapis.com/execution-environment: gen2
|
||||
run.googleapis.com/gpu-type: nvidia-l4
|
||||
run.googleapis.com/gpu-count: "1"
|
||||
autoscaling.knative.dev/minScale: "1"
|
||||
autoscaling.knative.dev/maxScale: "5"
|
||||
run.googleapis.com/cpu-throttling: "false"
|
||||
spec:
|
||||
containerConcurrency: 20
|
||||
timeoutSeconds: 3600
|
||||
containers:
|
||||
- name: ruvector
|
||||
image: gcr.io/PROJECT_ID/ruvector-benchmark:latest
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
resources:
|
||||
limits:
|
||||
cpu: "8"
|
||||
memory: "16Gi"
|
||||
nvidia.com/gpu: "1"
|
||||
env:
|
||||
- name: RUVECTOR_MODE
|
||||
value: "attention"
|
||||
- name: RUVECTOR_GNN_LAYERS
|
||||
value: "3"
|
||||
- name: RUVECTOR_GNN_HEADS
|
||||
value: "8"
|
||||
- name: RUVECTOR_GNN_HIDDEN_DIM
|
||||
value: "512"
|
||||
- name: RUST_LOG
|
||||
value: "info"
|
||||
|
||||
---
|
||||
# -----------------------------------------------------------------------------
|
||||
# Raft Consensus Node (Stateful)
|
||||
# -----------------------------------------------------------------------------
|
||||
apiVersion: serving.knative.dev/v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: ruvector-raft-node-1
|
||||
labels:
|
||||
app: ruvector
|
||||
component: raft
|
||||
raft-node-id: "0"
|
||||
annotations:
|
||||
run.googleapis.com/description: "RuVector Raft Consensus Node"
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
autoscaling.knative.dev/minScale: "1"
|
||||
autoscaling.knative.dev/maxScale: "1"
|
||||
run.googleapis.com/cpu-throttling: "false"
|
||||
spec:
|
||||
containerConcurrency: 100
|
||||
timeoutSeconds: 3600
|
||||
containers:
|
||||
- name: ruvector
|
||||
image: gcr.io/PROJECT_ID/ruvector-benchmark:latest
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
resources:
|
||||
limits:
|
||||
cpu: "2"
|
||||
memory: "4Gi"
|
||||
env:
|
||||
- name: RUVECTOR_MODE
|
||||
value: "raft"
|
||||
- name: RUVECTOR_NODE_ID
|
||||
value: "0"
|
||||
- name: RUVECTOR_CLUSTER_SIZE
|
||||
value: "3"
|
||||
- name: RUVECTOR_RAFT_ELECTION_TIMEOUT
|
||||
value: "150"
|
||||
- name: RUVECTOR_RAFT_HEARTBEAT_INTERVAL
|
||||
value: "50"
|
||||
- name: RUST_LOG
|
||||
value: "info,raft=debug"
|
||||
volumeMounts:
|
||||
- name: raft-data
|
||||
mountPath: /data/raft
|
||||
volumes:
|
||||
- name: raft-data
|
||||
emptyDir:
|
||||
sizeLimit: "10Gi"
|
||||
|
||||
---
|
||||
# -----------------------------------------------------------------------------
|
||||
# Replication Primary Node
|
||||
# -----------------------------------------------------------------------------
|
||||
apiVersion: serving.knative.dev/v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: ruvector-primary
|
||||
labels:
|
||||
app: ruvector
|
||||
component: replication
|
||||
role: primary
|
||||
annotations:
|
||||
run.googleapis.com/description: "RuVector Primary Node (Replication)"
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
run.googleapis.com/execution-environment: gen2
|
||||
run.googleapis.com/gpu-type: nvidia-l4
|
||||
run.googleapis.com/gpu-count: "1"
|
||||
autoscaling.knative.dev/minScale: "1"
|
||||
autoscaling.knative.dev/maxScale: "1"
|
||||
run.googleapis.com/cpu-throttling: "false"
|
||||
spec:
|
||||
containerConcurrency: 100
|
||||
timeoutSeconds: 3600
|
||||
containers:
|
||||
- name: ruvector
|
||||
image: gcr.io/PROJECT_ID/ruvector-benchmark:latest
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
resources:
|
||||
limits:
|
||||
cpu: "4"
|
||||
memory: "8Gi"
|
||||
nvidia.com/gpu: "1"
|
||||
env:
|
||||
- name: RUVECTOR_MODE
|
||||
value: "primary"
|
||||
- name: RUVECTOR_REPLICATION_FACTOR
|
||||
value: "3"
|
||||
- name: RUVECTOR_SYNC_MODE
|
||||
value: "async"
|
||||
- name: RUST_LOG
|
||||
value: "info"
|
||||
|
||||
---
|
||||
# -----------------------------------------------------------------------------
|
||||
# Replication Replica Node
|
||||
# -----------------------------------------------------------------------------
|
||||
apiVersion: serving.knative.dev/v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: ruvector-replica
|
||||
labels:
|
||||
app: ruvector
|
||||
component: replication
|
||||
role: replica
|
||||
annotations:
|
||||
run.googleapis.com/description: "RuVector Replica Node (Replication)"
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
run.googleapis.com/execution-environment: gen2
|
||||
run.googleapis.com/gpu-type: nvidia-l4
|
||||
run.googleapis.com/gpu-count: "1"
|
||||
autoscaling.knative.dev/minScale: "2"
|
||||
autoscaling.knative.dev/maxScale: "5"
|
||||
run.googleapis.com/cpu-throttling: "false"
|
||||
spec:
|
||||
containerConcurrency: 100
|
||||
timeoutSeconds: 3600
|
||||
containers:
|
||||
- name: ruvector
|
||||
image: gcr.io/PROJECT_ID/ruvector-benchmark:latest
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
resources:
|
||||
limits:
|
||||
cpu: "4"
|
||||
memory: "8Gi"
|
||||
nvidia.com/gpu: "1"
|
||||
env:
|
||||
- name: RUVECTOR_MODE
|
||||
value: "replica"
|
||||
- name: RUVECTOR_PRIMARY_URL
|
||||
value: "https://ruvector-primary-HASH.run.app"
|
||||
- name: RUST_LOG
|
||||
value: "info"
|
||||
|
||||
---
|
||||
# -----------------------------------------------------------------------------
|
||||
# Service Account
|
||||
# -----------------------------------------------------------------------------
|
||||
apiVersion: iam.cnrm.cloud.google.com/v1beta1
|
||||
kind: IAMServiceAccount
|
||||
metadata:
|
||||
name: ruvector-sa
|
||||
spec:
|
||||
displayName: "RuVector Cloud Run Service Account"
|
||||
Reference in New Issue
Block a user