-
-
Notifications
You must be signed in to change notification settings - Fork 3
feat: Support multiple git-syncs for multiple repositories #729
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f939934
d6b4c95
49848cf
fe99463
65e39a9
2cbae70
3ea363a
26c11a7
7c7f25f
ffa249a
984d7c1
5021b33
6538c67
0e0b09f
04ec315
3e2c438
010bfd3
50427cb
a54f9e5
8b2f4b6
61dc8ce
7e80e9a
d9558ea
c5befaf
c2b6e44
cf8b245
1c188c5
db8ecbd
8db36d6
c7052a8
1b43f0d
bd8d7a4
ff8bb20
e6a3e29
9d85772
3982793
158b801
7fcad63
e9d4567
58b8977
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,7 +42,7 @@ For multiple DAGs, it is easier to expose them via `gitsync`, as shown below. | |
|
|
||
| == Via `git-sync` | ||
|
|
||
| {git-sync}[git-sync] is a command that pulls a git repository into a local directory and is supplied as a sidecar container for use within Kubernetes. | ||
| {git-sync}[git-sync] is a command that pulls a git repository into a local directory and is supplied as a sidecar container for use within Kubernetes. {git-sync}[git-sync] folders will be provided at `/stackable/app/allDAGs/current-i`, with i in {0,1,..,n-1}. | ||
| The Stackable Airflow images already ship with git-sync included, and the operator takes care of calling the tool and mounting volumes, so that only the repository and synchronization details are required: | ||
|
|
||
| .git-sync usage example: https | ||
|
|
@@ -82,3 +82,24 @@ include::example$example-airflow-gitsync-ssh.yaml[] | |
|
|
||
| NOTE: git-sync can be used with DAGs that make use of Python modules, as Python is configured to use the git-sync target folder as the "root" location when looking for referenced files. | ||
| See the xref:usage-guide/applying-custom-resources.adoc[] example for more details. | ||
|
|
||
| === Multiple repositories via git-sync | ||
|
|
||
| If you want to access multiple branches of a repository or load dags from multiple repositories, you can extend the list shown above. | ||
|
|
||
| The default mount-path for git-sync resources is `/stackable/app/allDAGs/current-{i}` where i corresponds to: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think the user needs to know this as it is (or should be?) an internal detail.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tend to say yes, but there would still be the corner case if you want to deploy / run files from airflow you need to know where this is mounted. Then it becomes valuable information I believe.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The path needs updating due to the image changes (and I think it is now only an internal detail?) |
||
|
|
||
| [source,yaml] | ||
| ---- | ||
| dagsGitSync: | ||
| - repo: ssh://git@github.com/stackable-airflow/dags.git # <-- current-0 | ||
| branch: test/git-sync-0 | ||
| credentials: | ||
| sshPrivateKeySecretName: git-sync-ssh | ||
| - repo: ssh://git@github.com/stackable-airflow/dags.git # <-- current-1 | ||
| branch: test/git-sync-1 | ||
| credentials: | ||
| sshPrivateKeySecretName: git-sync-ssh | ||
| ---- | ||
|
|
||
| NOTE: Using DAGs from ConfigMaps will require you to either use celeryExecutors and mount them under `/stackable/app/allDAGs/<name>` or use the kubernetesExecutor and mount them somewhere else, changing the PYTHONPATH (if using submodules) and `AIRFLOW\__CORE__DAGS_FOLDER` accordingly. | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -71,6 +71,7 @@ pub const STACKABLE_LOG_DIR: &str = "/stackable/log"; | |
| pub const LOG_CONFIG_DIR: &str = "/stackable/app/log_config"; | ||
| pub const AIRFLOW_HOME: &str = "/stackable/airflow"; | ||
| pub const AIRFLOW_CONFIG_FILENAME: &str = "webserver_config.py"; | ||
| pub const AIRFLOW_DAGS_FOLDER: &str = "/stackable/app/allDAGs"; | ||
|
|
||
| pub const TEMPLATE_VOLUME_NAME: &str = "airflow-executor-pod-template"; | ||
| pub const TEMPLATE_LOCATION: &str = "/templates"; | ||
|
|
@@ -439,6 +440,38 @@ impl v1alpha2::AirflowCluster { | |
| fragment::validate(conf_rolegroup).context(FragmentValidationFailureSnafu) | ||
| } | ||
|
|
||
| // Softlink from each single folder git-{i} into {AIRFLOW_DAGS_FOLDER}/. | ||
| pub fn get_multi_gitsync_commands(&self) -> Vec<String> { | ||
| let mut symlinks = Vec::<String>::new(); | ||
| for (i, _) in self.spec.cluster_config.dags_git_sync.iter().enumerate() { | ||
| symlinks | ||
| .push(format!("ln -s /stackable/app/git-{i} {AIRFLOW_DAGS_FOLDER}/").to_string()) | ||
| } | ||
| symlinks | ||
| } | ||
|
|
||
| // kubernetesExecuter needs to copy into an empty volume since git-{0}/current | ||
| // is a softlink and thus can't be shared via a volume mount (volume remains empty). | ||
| pub fn get_kubernetes_executer_multi_gitsync_commands(&self) -> Vec<String> { | ||
| let mut cp_commands = Vec::<String>::new(); | ||
| for (i, _) in self.spec.cluster_config.dags_git_sync.iter().enumerate() { | ||
| cp_commands | ||
| .push(format!("cp -r /stackable/app/git-{i} {AIRFLOW_DAGS_FOLDER}/").to_string()); | ||
| } | ||
| // init-container seems to only accept one command line. | ||
| vec![cp_commands.join(" && ")] | ||
| } | ||
|
|
||
| // PYTHONPATH contains folder-name provided in CRD. | ||
| pub fn get_gitsync_absolute_paths(&self) -> Vec<String> { | ||
| let mut python_path = Vec::<String>::new(); | ||
| for (i, git_sync) in self.spec.cluster_config.dags_git_sync.iter().enumerate() { | ||
| let folder = &git_sync.git_folder.display(); | ||
| python_path.push(format!("{AIRFLOW_DAGS_FOLDER}/git-{i}/current/{folder}").to_string()) | ||
| } | ||
| python_path | ||
| } | ||
|
|
||
| /// Retrieve and merge resource configs for the executor template | ||
| pub fn merged_executor_config( | ||
| &self, | ||
|
|
@@ -576,10 +609,13 @@ impl AirflowRole { | |
| format!( | ||
| "cp -RL {CONFIG_PATH}/{AIRFLOW_CONFIG_FILENAME} {AIRFLOW_HOME}/{AIRFLOW_CONFIG_FILENAME}" | ||
| ), | ||
| // Adding cm as dags within the same AIRFLOW_DAGS_FOLDER may lead to problems, thus checking if exists. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does mean that we can't simply have, say, 2 DAGs in a single ConfigMap, which are mounted to |
||
| format!("mkdir -p {AIRFLOW_DAGS_FOLDER}"), | ||
| // graceful shutdown part | ||
| COMMON_BASH_TRAP_FUNCTIONS.to_string(), | ||
| remove_vector_shutdown_file_command(STACKABLE_LOG_DIR), | ||
| ]; | ||
| command.extend(airflow.get_multi_gitsync_commands()); | ||
|
|
||
| if resolved_product_image.product_version.starts_with("3.") { | ||
| // Start-up commands have changed in 3.x. | ||
|
|
@@ -639,6 +675,8 @@ impl AirflowRole { | |
| container_debug_command(), | ||
| "airflow triggerer &".to_string(), | ||
| ]), | ||
| // KubernetesExecutor intentionally not covered, it requires command | ||
| // generated by airflow and written into a pod template. | ||
| AirflowRole::Worker => command.extend(vec![ | ||
| "prepare_signal_handlers".to_string(), | ||
| container_debug_command(), | ||
|
|
@@ -691,6 +729,8 @@ impl AirflowRole { | |
| container_debug_command(), | ||
| "airflow triggerer &".to_string(), | ||
| ]), | ||
| // KubernetesExecutor intentionally not covered, it requires command | ||
| // generated by airflow and written into a pod template. | ||
| AirflowRole::Worker => command.extend(vec![ | ||
| "prepare_signal_handlers".to_string(), | ||
| container_debug_command(), | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This path needs updating to be in line with stackabletech/docker-images#1444