diff --git a/docs/modules/airflow/pages/troubleshooting/index.adoc b/docs/modules/airflow/pages/troubleshooting/index.adoc index f89d5438..8403872f 100644 --- a/docs/modules/airflow/pages/troubleshooting/index.adoc +++ b/docs/modules/airflow/pages/troubleshooting/index.adoc @@ -145,3 +145,19 @@ When a custom DAG folder (e.g. `/dags`) is defined with `envOverrides` and some ---- NOTE: Generally speaking it is https://airflow.apache.org/docs/apache-airflow/stable/configurations-ref.html#configuration-reference[recommended] by Airflow to have the same config everywhere across all components. + +== GitSync race condition + +Sometimes a race condition can arise when the long-running Python process started by the Airflow dag-processor caches python submodules before the gitsync fetch is complete. This is indeterminate but can be avoided by adding the following lines to the top of each DAG file that references submodules: + +[source,python] +---- +import importlib +import site + +# invalidate cache due to race condition when using dag-processor +importlib.reload(site) +importlib.invalidate_caches() +---- + +This will add some overhead that should be minimal in comparison to parsing the DAG as a whole. diff --git a/tests/templates/kuttl/mount-dags-gitsync/30-install-airflow-cluster.yaml.j2 b/tests/templates/kuttl/mount-dags-gitsync/30-install-airflow-cluster.yaml.j2 index bebf7a2a..1e3620dd 100644 --- a/tests/templates/kuttl/mount-dags-gitsync/30-install-airflow-cluster.yaml.j2 +++ b/tests/templates/kuttl/mount-dags-gitsync/30-install-airflow-cluster.yaml.j2 @@ -144,3 +144,13 @@ spec: envOverrides: AIRFLOW_CONN_KUBERNETES_IN_CLUSTER: "kubernetes://?__extra__=%7B%22extra__kubernetes__in_cluster%22%3A+true%2C+%22extra__kubernetes__kube_config%22%3A+%22%22%2C+%22extra__kubernetes__kube_config_path%22%3A+%22%22%2C+%22extra__kubernetes__namespace%22%3A+%22%22%7D" replicas: 1 + dagProcessors: + config: + gracefulShutdownTimeout: 10s + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + envOverrides: + AIRFLOW_CONN_KUBERNETES_IN_CLUSTER: "kubernetes://?__extra__=%7B%22extra__kubernetes__in_cluster%22%3A+true%2C+%22extra__kubernetes__kube_config%22%3A+%22%22%2C+%22extra__kubernetes__kube_config_path%22%3A+%22%22%2C+%22extra__kubernetes__namespace%22%3A+%22%22%7D" + replicas: 1 diff --git a/tests/templates/kuttl/versioning/30-install-airflow-cluster.yaml.j2 b/tests/templates/kuttl/versioning/30-install-airflow-cluster.yaml.j2 index 682fe765..21c05198 100644 --- a/tests/templates/kuttl/versioning/30-install-airflow-cluster.yaml.j2 +++ b/tests/templates/kuttl/versioning/30-install-airflow-cluster.yaml.j2 @@ -79,3 +79,12 @@ spec: default: envOverrides: *envOverrides replicas: 1 + dagProcessors: + config: + gracefulShutdownTimeout: 10s + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + envOverrides: *envOverrides + replicas: 1