From 38e3e12ba320209abd2b126bef670e2700df7cde Mon Sep 17 00:00:00 2001 From: Saadi Myftija Date: Mon, 19 Jan 2026 10:47:04 +0000 Subject: [PATCH] feat(supervisor): add per-machine-preset resource request ratios Adds support to configure CPU/memory request ratios per machine preset. Falls back to the global request ratio configs if no specific override is specified. Runs across different machine presets have different usage patters, so this enables use to manage the available capacity better. --- apps/supervisor/src/env.ts | 19 +++++++++++ .../src/workloadManager/kubernetes.ts | 34 +++++++++++++++++-- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/apps/supervisor/src/env.ts b/apps/supervisor/src/env.ts index 447991de64..9ef0cff253 100644 --- a/apps/supervisor/src/env.ts +++ b/apps/supervisor/src/env.ts @@ -89,6 +89,25 @@ const Env = z.object({ KUBERNETES_CPU_REQUEST_RATIO: z.coerce.number().min(0).max(1).default(0.75), // Ratio of CPU limit, so 0.75 = 75% of CPU limit KUBERNETES_MEMORY_REQUEST_MIN_GB: z.coerce.number().min(0).default(0), KUBERNETES_MEMORY_REQUEST_RATIO: z.coerce.number().min(0).max(1).default(1), // Ratio of memory limit, so 1 = 100% of memory limit + + // Per-preset overrides of the global KUBERNETES_CPU_REQUEST_RATIO + KUBERNETES_CPU_REQUEST_RATIO_MICRO: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_CPU_REQUEST_RATIO_SMALL_1X: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_CPU_REQUEST_RATIO_SMALL_2X: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_CPU_REQUEST_RATIO_MEDIUM_1X: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_CPU_REQUEST_RATIO_MEDIUM_2X: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_CPU_REQUEST_RATIO_LARGE_1X: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_CPU_REQUEST_RATIO_LARGE_2X: z.coerce.number().min(0).max(1).optional(), + + // Per-preset overrides of the global KUBERNETES_MEMORY_REQUEST_RATIO + KUBERNETES_MEMORY_REQUEST_RATIO_MICRO: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_MEMORY_REQUEST_RATIO_SMALL_1X: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_MEMORY_REQUEST_RATIO_SMALL_2X: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_MEMORY_REQUEST_RATIO_MEDIUM_1X: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_MEMORY_REQUEST_RATIO_MEDIUM_2X: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_MEMORY_REQUEST_RATIO_LARGE_1X: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_MEMORY_REQUEST_RATIO_LARGE_2X: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_MEMORY_OVERHEAD_GB: z.coerce.number().min(0).optional(), // Optional memory overhead to add to the limit in GB KUBERNETES_SCHEDULER_NAME: z.string().optional(), // Custom scheduler name for pods KUBERNETES_LARGE_MACHINE_POOL_LABEL: z.string().optional(), // if set, large-* presets affinity for machinepool= diff --git a/apps/supervisor/src/workloadManager/kubernetes.ts b/apps/supervisor/src/workloadManager/kubernetes.ts index 90d6b3985b..a725971a84 100644 --- a/apps/supervisor/src/workloadManager/kubernetes.ts +++ b/apps/supervisor/src/workloadManager/kubernetes.ts @@ -4,7 +4,12 @@ import { type WorkloadManagerCreateOptions, type WorkloadManagerOptions, } from "./types.js"; -import type { EnvironmentType, MachinePreset, PlacementTag } from "@trigger.dev/core/v3"; +import type { + EnvironmentType, + MachinePreset, + MachinePresetName, + PlacementTag, +} from "@trigger.dev/core/v3"; import { PlacementTagProcessor } from "@trigger.dev/core/v3/serverOnly"; import { env } from "../env.js"; import { type K8sApi, createK8sApi, type k8s } from "../clients/kubernetes.js"; @@ -14,6 +19,26 @@ type ResourceQuantities = { [K in "cpu" | "memory" | "ephemeral-storage"]?: string; }; +const cpuRequestRatioByMachinePreset: Record = { + micro: env.KUBERNETES_CPU_REQUEST_RATIO_MICRO, + "small-1x": env.KUBERNETES_CPU_REQUEST_RATIO_SMALL_1X, + "small-2x": env.KUBERNETES_CPU_REQUEST_RATIO_SMALL_2X, + "medium-1x": env.KUBERNETES_CPU_REQUEST_RATIO_MEDIUM_1X, + "medium-2x": env.KUBERNETES_CPU_REQUEST_RATIO_MEDIUM_2X, + "large-1x": env.KUBERNETES_CPU_REQUEST_RATIO_LARGE_1X, + "large-2x": env.KUBERNETES_CPU_REQUEST_RATIO_LARGE_2X, +}; + +const memoryRequestRatioByMachinePreset: Record = { + micro: env.KUBERNETES_MEMORY_REQUEST_RATIO_MICRO, + "small-1x": env.KUBERNETES_MEMORY_REQUEST_RATIO_SMALL_1X, + "small-2x": env.KUBERNETES_MEMORY_REQUEST_RATIO_SMALL_2X, + "medium-1x": env.KUBERNETES_MEMORY_REQUEST_RATIO_MEDIUM_1X, + "medium-2x": env.KUBERNETES_MEMORY_REQUEST_RATIO_MEDIUM_2X, + "large-1x": env.KUBERNETES_MEMORY_REQUEST_RATIO_LARGE_1X, + "large-2x": env.KUBERNETES_MEMORY_REQUEST_RATIO_LARGE_2X, +}; + export class KubernetesWorkloadManager implements WorkloadManager { private readonly logger = new SimpleStructuredLogger("kubernetes-workload-provider"); private k8s: K8sApi; @@ -321,8 +346,11 @@ export class KubernetesWorkloadManager implements WorkloadManager { } #getResourceRequestsForMachine(preset: MachinePreset): ResourceQuantities { - const cpuRequest = preset.cpu * this.cpuRequestRatio; - const memoryRequest = preset.memory * this.memoryRequestRatio; + const cpuRatio = cpuRequestRatioByMachinePreset[preset.name] ?? this.cpuRequestRatio; + const memoryRatio = memoryRequestRatioByMachinePreset[preset.name] ?? this.memoryRequestRatio; + + const cpuRequest = preset.cpu * cpuRatio; + const memoryRequest = preset.memory * memoryRatio; // Clamp between min and max const clampedCpu = this.clamp(cpuRequest, this.cpuRequestMinCores, preset.cpu);