From 854485a3a4d504192f0d928dd4a9d88c072b47ba Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Wed, 21 Jan 2026 14:18:41 +0100 Subject: [PATCH] add docs and add dag-processor role to check changed DAG --- .../airflow/pages/troubleshooting/index.adoc | 16 ++++++++++++++++ .../30-install-airflow-cluster.yaml.j2 | 10 ++++++++++ .../30-install-airflow-cluster.yaml.j2 | 9 +++++++++ 3 files changed, 35 insertions(+) diff --git a/docs/modules/airflow/pages/troubleshooting/index.adoc b/docs/modules/airflow/pages/troubleshooting/index.adoc index f89d5438..8403872f 100644 --- a/docs/modules/airflow/pages/troubleshooting/index.adoc +++ b/docs/modules/airflow/pages/troubleshooting/index.adoc @@ -145,3 +145,19 @@ When a custom DAG folder (e.g. `/dags`) is defined with `envOverrides` and some ---- NOTE: Generally speaking it is https://airflow.apache.org/docs/apache-airflow/stable/configurations-ref.html#configuration-reference[recommended] by Airflow to have the same config everywhere across all components. + +== GitSync race condition + +Sometimes a race condition can arise when the long-running Python process started by the Airflow dag-processor caches python submodules before the gitsync fetch is complete. This is indeterminate but can be avoided by adding the following lines to the top of each DAG file that references submodules: + +[source,python] +---- +import importlib +import site + +# invalidate cache due to race condition when using dag-processor +importlib.reload(site) +importlib.invalidate_caches() +---- + +This will add some overhead that should be minimal in comparison to parsing the DAG as a whole. diff --git a/tests/templates/kuttl/mount-dags-gitsync/30-install-airflow-cluster.yaml.j2 b/tests/templates/kuttl/mount-dags-gitsync/30-install-airflow-cluster.yaml.j2 index bebf7a2a..1e3620dd 100644 --- a/tests/templates/kuttl/mount-dags-gitsync/30-install-airflow-cluster.yaml.j2 +++ b/tests/templates/kuttl/mount-dags-gitsync/30-install-airflow-cluster.yaml.j2 @@ -144,3 +144,13 @@ spec: envOverrides: AIRFLOW_CONN_KUBERNETES_IN_CLUSTER: "kubernetes://?__extra__=%7B%22extra__kubernetes__in_cluster%22%3A+true%2C+%22extra__kubernetes__kube_config%22%3A+%22%22%2C+%22extra__kubernetes__kube_config_path%22%3A+%22%22%2C+%22extra__kubernetes__namespace%22%3A+%22%22%7D" replicas: 1 + dagProcessors: + config: + gracefulShutdownTimeout: 10s + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + envOverrides: + AIRFLOW_CONN_KUBERNETES_IN_CLUSTER: "kubernetes://?__extra__=%7B%22extra__kubernetes__in_cluster%22%3A+true%2C+%22extra__kubernetes__kube_config%22%3A+%22%22%2C+%22extra__kubernetes__kube_config_path%22%3A+%22%22%2C+%22extra__kubernetes__namespace%22%3A+%22%22%7D" + replicas: 1 diff --git a/tests/templates/kuttl/versioning/30-install-airflow-cluster.yaml.j2 b/tests/templates/kuttl/versioning/30-install-airflow-cluster.yaml.j2 index 682fe765..21c05198 100644 --- a/tests/templates/kuttl/versioning/30-install-airflow-cluster.yaml.j2 +++ b/tests/templates/kuttl/versioning/30-install-airflow-cluster.yaml.j2 @@ -79,3 +79,12 @@ spec: default: envOverrides: *envOverrides replicas: 1 + dagProcessors: + config: + gracefulShutdownTimeout: 10s + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + envOverrides: *envOverrides + replicas: 1