v1.11.0 had a critical proxy connection leak in the instance-manager (longhorn/longhorn#12575) that consumed 38.8GB on apollo, pushing the server to 92% memory with swap exhausted. v1.11.1 fixes the leak. Also adds a 2Gi per-container LimitRange in longhorn-system as a safety net against future regressions.
43 lines
1.5 KiB
YAML
43 lines
1.5 KiB
YAML
# Longhorn distributed block storage
|
|
# Single-node production deployment — RAID1 hardware already provides redundancy.
|
|
|
|
defaultSettings:
|
|
# 1 replica: RAID1 mirrors the disk, so software replication adds no benefit
|
|
# on a single node and would halve available capacity.
|
|
defaultReplicaCount: 1
|
|
|
|
# Orphan auto-deletion: clean up node/instance orphaned resources automatically
|
|
orphanAutoDeletion: true
|
|
|
|
# Allow volumes to be scheduled on the only available node even when disk
|
|
# pressure is detected (single-node: no other node to reschedule to).
|
|
allowVolumeCreationWithDegradedAvailability: true
|
|
|
|
# Reduce reserved percentage to 10% — RAID1 hardware provides physical redundancy,
|
|
# so Longhorn doesn't need to hold back 30% for software replicas.
|
|
# With 937 GiB disk: 843 GiB schedulable (600 SW + 100 PG + ~143 headroom).
|
|
storageReservedPercentageForDefaultDisk: 10
|
|
|
|
# Set Longhorn as the default StorageClass.
|
|
persistence:
|
|
defaultClass: true
|
|
defaultClassReplicaCount: 1
|
|
|
|
# Cap instance-manager memory as a safety net against future leaks.
|
|
# v1.11.0 had a proxy connection leak (longhorn/longhorn#12575) that let
|
|
# instance-manager grow to 38 GB+. Fixed in v1.11.1, but the LimitRange
|
|
# ensures any regression is OOM-killed at 2 Gi instead of eating all RAM.
|
|
extraObjects:
|
|
- apiVersion: v1
|
|
kind: LimitRange
|
|
metadata:
|
|
name: instance-manager-limits
|
|
namespace: longhorn-system
|
|
spec:
|
|
limits:
|
|
- type: Container
|
|
default:
|
|
memory: 2Gi
|
|
defaultRequest:
|
|
memory: 128Mi
|