diff --git a/docs/guide/container_component.md b/docs/guide/container_component.md index 67449cc7b9..04fcab136e 100644 --- a/docs/guide/container_component.md +++ b/docs/guide/container_component.md @@ -70,8 +70,8 @@ grep_component = tfx.dsl.components.create_container_component( parameters={ 'pattern': str, }, - # The component code uses gsutil to upload the data to Google Cloud Storage, so the - # container image needs to have gsutil installed and configured. + # The component code uses gcloud storage to upload the data to Google Cloud Storage, so the + # container image needs to have gcloud storage installed and configured. image='google/cloud-sdk:278.0.0', command=[ 'sh', '-exc', @@ -83,13 +83,13 @@ grep_component = tfx.dsl.components.create_container_component( filtered_text_path=$(mktemp) # Getting data into the container - gsutil cp "$text_uri" "$text_path" + gcloud storage cp "$text_uri" "$text_path" # Running the main code grep "$pattern" "$text_path" >"$filtered_text_path" # Getting data out of the container - gsutil cp "$filtered_text_path" "$filtered_text_uri" + gcloud storage cp "$filtered_text_path" "$filtered_text_uri" ''', '--pattern', tfx.dsl.placeholders.InputValuePlaceholder('pattern'), '--text', tfx.dsl.placeholders.InputUriPlaceholder('text'), diff --git a/docs/tutorials/tfx/cloud-ai-platform-pipelines.md b/docs/tutorials/tfx/cloud-ai-platform-pipelines.md index 40977a0d05..94979aab69 100644 --- a/docs/tutorials/tfx/cloud-ai-platform-pipelines.md +++ b/docs/tutorials/tfx/cloud-ai-platform-pipelines.md @@ -359,7 +359,7 @@ The notebook will upload our sample data to GCS bucket so that we can use it in our pipeline later. ```python -!gsutil cp data/data.csv gs://{GOOGLE_CLOUD_PROJECT}-kubeflowpipelines-default/tfx-template/data/taxi/data.csv +!gcloud storage cp data/data.csv gs://{GOOGLE_CLOUD_PROJECT}-kubeflowpipelines-default/tfx-template/data/taxi/data.csv ``` The notebook then uses the `tfx pipeline create` command to create the pipeline. diff --git a/docs/tutorials/tfx/gcp/vertex_pipelines_bq.ipynb b/docs/tutorials/tfx/gcp/vertex_pipelines_bq.ipynb index bc35bdb777..0c4dbccc6a 100644 --- a/docs/tutorials/tfx/gcp/vertex_pipelines_bq.ipynb +++ b/docs/tutorials/tfx/gcp/vertex_pipelines_bq.ipynb @@ -637,7 +637,7 @@ }, "outputs": [], "source": [ - "!gsutil cp {_trainer_module_file} {MODULE_ROOT}/" + "!gcloud storage cp {_trainer_module_file} {MODULE_ROOT}/" ] }, { diff --git a/docs/tutorials/tfx/gcp/vertex_pipelines_simple.ipynb b/docs/tutorials/tfx/gcp/vertex_pipelines_simple.ipynb index 3c63483712..576bf8107f 100644 --- a/docs/tutorials/tfx/gcp/vertex_pipelines_simple.ipynb +++ b/docs/tutorials/tfx/gcp/vertex_pipelines_simple.ipynb @@ -387,7 +387,7 @@ }, "outputs": [], "source": [ - "!gsutil cp gs://download.tensorflow.org/data/palmer_penguins/penguins_processed.csv {DATA_ROOT}/" + "!gcloud storage cp gs://download.tensorflow.org/data/palmer_penguins/penguins_processed.csv {DATA_ROOT}/" ] }, { @@ -407,7 +407,7 @@ }, "outputs": [], "source": [ - "!gsutil cat {DATA_ROOT}/penguins_processed.csv | head" + "!gcloud storage cat {DATA_ROOT}/penguins_processed.csv | head" ] }, { @@ -607,7 +607,7 @@ }, "outputs": [], "source": [ - "!gsutil cp {_trainer_module_file} {MODULE_ROOT}/" + "!gcloud storage cp {_trainer_module_file} {MODULE_ROOT}/" ] }, { diff --git a/docs/tutorials/tfx/stub_template.md b/docs/tutorials/tfx/stub_template.md index d99fa455dd..59e5334dd0 100644 --- a/docs/tutorials/tfx/stub_template.md +++ b/docs/tutorials/tfx/stub_template.md @@ -143,7 +143,7 @@ tfx run create --pipeline-name $pipeline_name --endpoint=$endpoint \ Use command `fg` to access the port-forwarding in the background then ctrl-C to terminate. You can delete the directory with recorded pipeline outputs using -`gsutil -m rm -R $output_dir`. +`gcloud storage rm --recursive $output_dir`. To clean up all Google Cloud resources used in this project, you can [delete the Google Cloud project](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) diff --git a/docs/tutorials/tfx/template.ipynb b/docs/tutorials/tfx/template.ipynb index bf9592cbd4..16adb681ea 100644 --- a/docs/tutorials/tfx/template.ipynb +++ b/docs/tutorials/tfx/template.ipynb @@ -408,7 +408,7 @@ }, "outputs": [], "source": [ - "!gsutil cp data/data.csv gs://{GOOGLE_CLOUD_PROJECT}-kubeflowpipelines-default/tfx-template/data/taxi/data.csv" + "!gcloud storage cp data/data.csv gs://{GOOGLE_CLOUD_PROJECT}-kubeflowpipelines-default/tfx-template/data/taxi/data.csv" ] }, { diff --git a/tfx/examples/chicago_taxi_pipeline/serving/start_model_server_aiplatform.sh b/tfx/examples/chicago_taxi_pipeline/serving/start_model_server_aiplatform.sh index b8648b547f..b10ee68105 100644 --- a/tfx/examples/chicago_taxi_pipeline/serving/start_model_server_aiplatform.sh +++ b/tfx/examples/chicago_taxi_pipeline/serving/start_model_server_aiplatform.sh @@ -20,10 +20,10 @@ echo Running cloud serving... # Dir for model exported for serving, e.g., gs:///serving_model/chicago_taxi_pipeline_kubeflow CLOUD_MODEL_DIR=$1 -gsutil ls $CLOUD_MODEL_DIR +gcloud storage ls $CLOUD_MODEL_DIR # Pick out the directory containing the last trained model. -MODEL_BINARIES=$(gsutil ls $CLOUD_MODEL_DIR \ +MODEL_BINARIES=$(gcloud storage ls $CLOUD_MODEL_DIR \ | sort | grep '\/[0-9]*\/$' | tail -n1) echo latest model: $MODEL_BINARIES diff --git a/tfx/examples/custom_components/container_components/download_grep_print_pipeline.py b/tfx/examples/custom_components/container_components/download_grep_print_pipeline.py index e55cc1f20f..32f306876f 100644 --- a/tfx/examples/custom_components/container_components/download_grep_print_pipeline.py +++ b/tfx/examples/custom_components/container_components/download_grep_print_pipeline.py @@ -41,7 +41,7 @@ wget "$0" -O "$output_data_path" || curl "$0" > "$output_data_path" # Getting data out of the container - gsutil cp "$output_data_path" "$output_data_uri" + gcloud storage cp "$output_data_path" "$output_data_uri" ''', placeholders.InputValuePlaceholder('url'), placeholders.OutputUriPlaceholder('data'), @@ -74,13 +74,13 @@ filtered_text_path=$(mktemp) # Getting data into the container - gsutil cp "$text_uri" "$text_path" + gcloud storage cp "$text_uri" "$text_path" # Running the main code grep "$pattern" "$text_path" >"$filtered_text_path" # Getting data out of the container - gsutil cp "$filtered_text_path" "$filtered_text_uri" + gcloud storage cp "$filtered_text_path" "$filtered_text_uri" ''', placeholders.InputValuePlaceholder('pattern'), placeholders.InputUriPlaceholder('text'), @@ -105,7 +105,7 @@ text_path=$(mktemp) # Getting data into the container - gsutil cp "$text_uri" "$text_path" + gcloud storage cp "$text_uri" "$text_path" # Running the main code cat "$text_path" diff --git a/tfx/examples/custom_components/slack/README.md b/tfx/examples/custom_components/slack/README.md index 7d5e621407..e3d6cd9d1b 100644 --- a/tfx/examples/custom_components/slack/README.md +++ b/tfx/examples/custom_components/slack/README.md @@ -114,7 +114,7 @@ pipeline in Google Cloud Platform for production. First prepare a gcs bucket for the pipeline run root: ```bash -gsutil mb -p ${PROJECT_ID} gs://${BUCKET_NAME} +gcloud storage buckets create gs://${BUCKET_NAME} --project=${PROJECT_ID} ``` Let's copy the dataset CSV to the GCS where TFX ExampleGen will ingest it diff --git a/tfx/examples/penguin/experimental/README.md b/tfx/examples/penguin/experimental/README.md index 93b280e185..02402552ec 100644 --- a/tfx/examples/penguin/experimental/README.md +++ b/tfx/examples/penguin/experimental/README.md @@ -67,8 +67,8 @@ pipeline python file. Output can be found at `[BUCKET]/tfx`.
 vi ~/penguin/experimental/penguin_pipeline_sklearn_gcp.py
-gsutil -m cp -r ~/penguin/data/* gs://[BUCKET]/penguin/data/
-gsutil -m cp ~/penguin/experimental/\*.py gs://[BUCKET]/penguin/experimental/
+gcloud storage cp --recursive ~/penguin/data/* gs://[BUCKET]/penguin/data/
+gcloud storage cp ~/penguin/experimental/\*.py gs://[BUCKET]/penguin/experimental/
 
 tfx pipeline create \
   --engine kubeflow \
@@ -77,7 +77,7 @@ tfx pipeline create \
 
Note that -`gsutil -m cp ~/penguin/experimental/*.py gs://[BUCKET]/penguin/experimental` +`gcloud storage cp ~/penguin/experimental/*.py gs://[BUCKET]/penguin/experimental` will need to be run every time updates are made to the GCP example. Additionally, subsequent pipeline deployments should use `tfx pipeline update` instead of `tfx pipeline create`. diff --git a/tfx/orchestration/kubeflow/test_utils.py b/tfx/orchestration/kubeflow/test_utils.py index 71e81f24f3..b3f4dffde0 100644 --- a/tfx/orchestration/kubeflow/test_utils.py +++ b/tfx/orchestration/kubeflow/test_utils.py @@ -197,7 +197,7 @@ class HelloWorldComponent(BaseComponent): args=[ 'echo "hello ' + ph.exec_property('word') + - '" | gsutil cp - ' + + '" | gcloud storage cp - ' + ph.output('greeting')[0].uri ]) diff --git a/tfx/orchestration/kubeflow/v2/e2e_tests/artifact_value_placeholder_integration_test.py b/tfx/orchestration/kubeflow/v2/e2e_tests/artifact_value_placeholder_integration_test.py index f5002c84f0..3e41497828 100644 --- a/tfx/orchestration/kubeflow/v2/e2e_tests/artifact_value_placeholder_integration_test.py +++ b/tfx/orchestration/kubeflow/v2/e2e_tests/artifact_value_placeholder_integration_test.py @@ -46,7 +46,7 @@ def _tasks_for_pipeline_with_artifact_value_passing(): echo "Hello $message" >"$output_data_path" # Getting data out of the container - gsutil cp -r "$output_data_path" "$output_data_uri" + gcloud storage cp --recursive "$output_data_path" "$output_data_uri" """, placeholders.InputValuePlaceholder('message'), placeholders.OutputUriPlaceholder('data'), diff --git a/tfx/orchestration/test_pipelines/download_grep_print_pipeline.py b/tfx/orchestration/test_pipelines/download_grep_print_pipeline.py index 8af8aadc6f..aa440f2e56 100644 --- a/tfx/orchestration/test_pipelines/download_grep_print_pipeline.py +++ b/tfx/orchestration/test_pipelines/download_grep_print_pipeline.py @@ -41,7 +41,7 @@ wget "$0" -O "$output_data_path" || curl "$0" > "$output_data_path" # Getting data out of the container - gsutil cp "$output_data_path" "$output_data_uri" + gcloud storage cp "$output_data_path" "$output_data_uri" ''', ph.exec_property('url'), ph.output('data')[0].uri, @@ -74,13 +74,13 @@ filtered_text_path=$(mktemp) # Getting data into the container - gsutil cp "$text_uri" "$text_path" + gcloud storage cp "$text_uri" "$text_path" # Running the main code grep "$pattern" "$text_path" >"$filtered_text_path" # Getting data out of the container - gsutil cp "$filtered_text_path" "$filtered_text_uri" + gcloud storage cp "$filtered_text_path" "$filtered_text_uri" ''', ph.exec_property('pattern'), ph.input('text')[0].uri, @@ -105,7 +105,7 @@ text_path=$(mktemp) # Getting data into the container - gsutil cp "$text_uri" "$text_path" + gcloud storage cp "$text_uri" "$text_path" # Running the main code cat "$text_path"