diff --git a/apps/supervisor/src/env.ts b/apps/supervisor/src/env.ts index 447991de64..9ef0cff253 100644 --- a/apps/supervisor/src/env.ts +++ b/apps/supervisor/src/env.ts @@ -89,6 +89,25 @@ const Env = z.object({ KUBERNETES_CPU_REQUEST_RATIO: z.coerce.number().min(0).max(1).default(0.75), // Ratio of CPU limit, so 0.75 = 75% of CPU limit KUBERNETES_MEMORY_REQUEST_MIN_GB: z.coerce.number().min(0).default(0), KUBERNETES_MEMORY_REQUEST_RATIO: z.coerce.number().min(0).max(1).default(1), // Ratio of memory limit, so 1 = 100% of memory limit + + // Per-preset overrides of the global KUBERNETES_CPU_REQUEST_RATIO + KUBERNETES_CPU_REQUEST_RATIO_MICRO: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_CPU_REQUEST_RATIO_SMALL_1X: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_CPU_REQUEST_RATIO_SMALL_2X: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_CPU_REQUEST_RATIO_MEDIUM_1X: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_CPU_REQUEST_RATIO_MEDIUM_2X: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_CPU_REQUEST_RATIO_LARGE_1X: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_CPU_REQUEST_RATIO_LARGE_2X: z.coerce.number().min(0).max(1).optional(), + + // Per-preset overrides of the global KUBERNETES_MEMORY_REQUEST_RATIO + KUBERNETES_MEMORY_REQUEST_RATIO_MICRO: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_MEMORY_REQUEST_RATIO_SMALL_1X: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_MEMORY_REQUEST_RATIO_SMALL_2X: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_MEMORY_REQUEST_RATIO_MEDIUM_1X: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_MEMORY_REQUEST_RATIO_MEDIUM_2X: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_MEMORY_REQUEST_RATIO_LARGE_1X: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_MEMORY_REQUEST_RATIO_LARGE_2X: z.coerce.number().min(0).max(1).optional(), + KUBERNETES_MEMORY_OVERHEAD_GB: z.coerce.number().min(0).optional(), // Optional memory overhead to add to the limit in GB KUBERNETES_SCHEDULER_NAME: z.string().optional(), // Custom scheduler name for pods KUBERNETES_LARGE_MACHINE_POOL_LABEL: z.string().optional(), // if set, large-* presets affinity for machinepool= diff --git a/apps/supervisor/src/workloadManager/kubernetes.ts b/apps/supervisor/src/workloadManager/kubernetes.ts index 90d6b3985b..a725971a84 100644 --- a/apps/supervisor/src/workloadManager/kubernetes.ts +++ b/apps/supervisor/src/workloadManager/kubernetes.ts @@ -4,7 +4,12 @@ import { type WorkloadManagerCreateOptions, type WorkloadManagerOptions, } from "./types.js"; -import type { EnvironmentType, MachinePreset, PlacementTag } from "@trigger.dev/core/v3"; +import type { + EnvironmentType, + MachinePreset, + MachinePresetName, + PlacementTag, +} from "@trigger.dev/core/v3"; import { PlacementTagProcessor } from "@trigger.dev/core/v3/serverOnly"; import { env } from "../env.js"; import { type K8sApi, createK8sApi, type k8s } from "../clients/kubernetes.js"; @@ -14,6 +19,26 @@ type ResourceQuantities = { [K in "cpu" | "memory" | "ephemeral-storage"]?: string; }; +const cpuRequestRatioByMachinePreset: Record = { + micro: env.KUBERNETES_CPU_REQUEST_RATIO_MICRO, + "small-1x": env.KUBERNETES_CPU_REQUEST_RATIO_SMALL_1X, + "small-2x": env.KUBERNETES_CPU_REQUEST_RATIO_SMALL_2X, + "medium-1x": env.KUBERNETES_CPU_REQUEST_RATIO_MEDIUM_1X, + "medium-2x": env.KUBERNETES_CPU_REQUEST_RATIO_MEDIUM_2X, + "large-1x": env.KUBERNETES_CPU_REQUEST_RATIO_LARGE_1X, + "large-2x": env.KUBERNETES_CPU_REQUEST_RATIO_LARGE_2X, +}; + +const memoryRequestRatioByMachinePreset: Record = { + micro: env.KUBERNETES_MEMORY_REQUEST_RATIO_MICRO, + "small-1x": env.KUBERNETES_MEMORY_REQUEST_RATIO_SMALL_1X, + "small-2x": env.KUBERNETES_MEMORY_REQUEST_RATIO_SMALL_2X, + "medium-1x": env.KUBERNETES_MEMORY_REQUEST_RATIO_MEDIUM_1X, + "medium-2x": env.KUBERNETES_MEMORY_REQUEST_RATIO_MEDIUM_2X, + "large-1x": env.KUBERNETES_MEMORY_REQUEST_RATIO_LARGE_1X, + "large-2x": env.KUBERNETES_MEMORY_REQUEST_RATIO_LARGE_2X, +}; + export class KubernetesWorkloadManager implements WorkloadManager { private readonly logger = new SimpleStructuredLogger("kubernetes-workload-provider"); private k8s: K8sApi; @@ -321,8 +346,11 @@ export class KubernetesWorkloadManager implements WorkloadManager { } #getResourceRequestsForMachine(preset: MachinePreset): ResourceQuantities { - const cpuRequest = preset.cpu * this.cpuRequestRatio; - const memoryRequest = preset.memory * this.memoryRequestRatio; + const cpuRatio = cpuRequestRatioByMachinePreset[preset.name] ?? this.cpuRequestRatio; + const memoryRatio = memoryRequestRatioByMachinePreset[preset.name] ?? this.memoryRequestRatio; + + const cpuRequest = preset.cpu * cpuRatio; + const memoryRequest = preset.memory * memoryRatio; // Clamp between min and max const clampedCpu = this.clamp(cpuRequest, this.cpuRequestMinCores, preset.cpu);