@@ -15,6 +15,11 @@ PBF_FILTERED_FILE="planet-filtered.pbf"
1515IMPORT_DIR=" import"
1616DOCKER_IMAGE=" dedicatedcode/paikka:develop"
1717
18+ DOWNLOAD_DIR=" ${DOWNLOAD_DIR:- $LOCAL_WORK_DIR } "
19+ IMPORT_DATA_DIR=" ${IMPORT_DATA_DIR:- $LOCAL_WORK_DIR / $IMPORT_DIR } "
20+ IMPORT_MEMORY=" ${IMPORT_MEMORY:- 16G} "
21+ IMPORT_THREADS=" ${IMPORT_THREADS:- 10} "
22+
1823# --- Remote Machine Settings ---
1924REMOTE_BASE_DIR=" /opt/paikka/data"
2025
@@ -24,7 +29,9 @@ GEOCODER_TEST_URL_BASE="http://localhost:8080/v1/reverse"
2429
2530# --- Verification Test Cases ---
2631declare -A TEST_CASES=(
27- [" lat=52.516280&lon=13.377635" ]=" 518071791" # Brandenburger Tor
32+ [" lat=52.516280&lon=13.377635" ]=" 518071791" # Brandenburger Tor
33+ [" lat=48.85826&lon=2.2945008" ]=" 5013364" # Eiffel Tower
34+ [" lat=40.68924&lon=-74.044502" ]=" 32965412" # Statue of Liberty
2835)
2936
3037# Global variables that will be set by parse_args_and_configure or environment
@@ -56,31 +63,42 @@ parse_args_and_configure() {
5663 REMOTE_HOST=" ${2:- $REMOTE_HOST } "
5764 GEOCODER_API_TOKEN=" ${3:- $GEOCODER_API_TOKEN } "
5865
66+ DOWNLOAD_DIR=" ${4:- $DOWNLOAD_DIR } "
67+ IMPORT_DATA_DIR=" ${5:- $IMPORT_DATA_DIR } "
68+ IMPORT_MEMORY=" ${6:- $IMPORT_MEMORY } "
69+ IMPORT_THREADS=" ${7:- $IMPORT_THREADS } "
70+
5971 if [ -z " $REMOTE_USER " ] || [ -z " $REMOTE_HOST " ] || [ -z " $GEOCODER_API_TOKEN " ]; then
60- echo " Usage: $0 <REMOTE_USER> <REMOTE_HOST> <API_TOKEN>"
72+ echo " Usage: $0 <REMOTE_USER> <REMOTE_HOST> <API_TOKEN> [DOWNLOAD_DIR] [IMPORT_DATA_DIR] [MEMORY] [THREADS]"
73+ echo " DOWNLOAD_DIR: Where to download PBF files (default: current directory)"
74+ echo " IMPORT_DATA_DIR: Where to store import data (default: ./import)"
75+ echo " MEMORY: Memory for import (default: 16G)"
76+ echo " THREADS: Threads for import (default: 10)"
6177 echo " Error: Missing required configuration."
62- echo " Provide arguments or set them as environment variables (REMOTE_USER, REMOTE_HOST, GEOCODER_API_TOKEN)."
6378 exit 1
6479 fi
65- # Set a dynamic variable that depends on other config being set
66- ZIP_FILENAME=" paikka_import_$( date +%Y%m%d_%H%M%S) .zip"
6780 echo " Configuration loaded for ${REMOTE_USER} @${REMOTE_HOST} "
68- }
81+ echo " Download directory: $DOWNLOAD_DIR "
82+ echo " Import data directory: $IMPORT_DATA_DIR "
83+ echo " Import memory: $IMPORT_MEMORY "
84+ echo " Import threads: $IMPORT_THREADS " }
6985
7086# ##
7187# LOCAL: Creates the necessary working directories.
7288# ##
7389local_prepare_directories () {
7490 log " LOCAL: Ensuring import directory exists"
75- mkdir -p " $LOCAL_WORK_DIR /$IMPORT_DIR "
76- cd " $LOCAL_WORK_DIR "
91+ mkdir -p " $DOWNLOAD_DIR "
92+ mkdir -p " $IMPORT_DATA_DIR "
93+ cd " $DOWNLOAD_DIR "
7794}
7895
7996# ##
8097# LOCAL: Downloads the latest OSM planet file.
8198# ##
8299local_download_planet_file () {
83- log " LOCAL: Downloading latest OSM planet file (if newer)"
100+ log " LOCAL: Downloading latest OSM planet file to $DOWNLOAD_DIR "
101+ cd " $DOWNLOAD_DIR "
84102 wget -N " $PLANET_URL "
85103}
86104
@@ -97,53 +115,50 @@ local_pull_docker_image() {
97115# ##
98116local_filter_pbf () {
99117 log " LOCAL: Filtering PBF file (approx. 50 mins)"
100- sudo docker run --rm -v " $( pwd) " :/data " $DOCKER_IMAGE " prepare " $PBF_INPUT_FILE " " $PBF_FILTERED_FILE "
118+ cd " $DOWNLOAD_DIR "
119+ sudo docker run --rm -ti -v " $DOWNLOAD_DIR " :/data " $DOCKER_IMAGE " prepare " $PBF_INPUT_FILE " " $PBF_FILTERED_FILE "
101120}
102121
103122# ##
104123# LOCAL: Creates the geocoder import bundle from the filtered PBF.
105124# ##
106125local_create_import_bundle () {
107- log " LOCAL: Creating import bundle (approx. 15 hours)"
108- sudo docker run --rm -ti -v " $( pwd ) " :/data " $DOCKER_IMAGE " import \
109- --memory 16G \
110- --threads 10 \
111- --data-dir " /data/ $IMPORT_DIR /" \
112- " $PBF_FILTERED_FILE "
126+ log " LOCAL: Creating import bundle (approx. 15 hours) with $IMPORT_MEMORY memory and $IMPORT_THREADS threads "
127+ sudo docker run --rm -ti -v " $DOWNLOAD_DIR " :/download -v " $IMPORT_DATA_DIR " :/import " $DOCKER_IMAGE " import \
128+ --memory " $IMPORT_MEMORY " \
129+ --threads " $IMPORT_THREADS " \
130+ --data-dir " /import /" \
131+ " /download/ $PBF_FILTERED_FILE "
113132}
114133
115134# ##
116135# LOCAL: Removes the large, intermediate PBF files.
117136# ##
118137local_cleanup_pbf () {
119- log " LOCAL: Cleaning up intermediate PBF files"
138+ log " LOCAL: Cleaning up intermediate PBF files from $DOWNLOAD_DIR "
139+ cd " $DOWNLOAD_DIR "
120140 rm -f " $PBF_FILTERED_FILE " " $PBF_INPUT_FILE "
121- echo " Deleted '$PBF_FILTERED_FILE ' and '$PBF_INPUT_FILE '"
141+ echo " Deleted '$DOWNLOAD_DIR / $ PBF_FILTERED_FILE ' and '$DOWNLOAD_DIR / $PBF_INPUT_FILE '"
122142}
123143# ##
124144# REMOTE: Syncs the import directory to the remote server using rsync.
125145# Uses --link-dest to minimize bandwidth and remote disk usage.
126146# ##
127147remote_sync_bundle () {
128- log " REMOTE: Syncing bundle via rsync (Delta transfer)"
148+ log " REMOTE: Syncing bundle via rsync (Delta transfer)"
129149
130- # We need to know the current live directory to use it as a link-dest
131- CURRENT_LIVE=$( ssh " ${REMOTE_USER} @${REMOTE_HOST} " " readlink -f ${REMOTE_BASE_DIR} /live_data || true" )
132- NEW_RELEASE_TIMESTAMP=$( date +%Y%m%d%H%M%S)
133- NEW_RELEASE_DIR=" ${REMOTE_BASE_DIR} /releases/${NEW_RELEASE_TIMESTAMP} "
150+ CURRENT_LIVE=$( ssh " ${REMOTE_USER} @${REMOTE_HOST} " " readlink -f ${REMOTE_BASE_DIR} /live_data || true" )
151+ NEW_RELEASE_TIMESTAMP=$( date +%Y%m%d%H%M%S)
152+ NEW_RELEASE_DIR=" ${REMOTE_BASE_DIR} /releases/${NEW_RELEASE_TIMESTAMP} "
134153
135- # Ensure the remote releases directory exists
136- ssh " ${REMOTE_USER} @${REMOTE_HOST} " " mkdir -p ${REMOTE_BASE_DIR} /releases"
154+ ssh " ${REMOTE_USER} @${REMOTE_HOST} " " mkdir -p ${REMOTE_BASE_DIR} /releases"
137155
138- # --link-dest makes rsync hard-link unchanged files from the current release
139- # into the new release directory, saving bandwidth and space.
140- rsync -avz --progress \
141- ${CURRENT_LIVE: +--link-dest=" $CURRENT_LIVE " } \
142- " $LOCAL_WORK_DIR /$IMPORT_DIR /" \
143- " ${REMOTE_USER} @${REMOTE_HOST} :$NEW_RELEASE_DIR /"
156+ rsync -avz --progress \
157+ ${CURRENT_LIVE: +--link-dest=" $CURRENT_LIVE " } \
158+ " $IMPORT_DATA_DIR /" \
159+ " ${REMOTE_USER} @${REMOTE_HOST} :$NEW_RELEASE_DIR /"
144160
145- # Export this for the next step
146- export LATEST_RELEASE_DIR_NAME=" $NEW_RELEASE_TIMESTAMP "
161+ export LATEST_RELEASE_DIR_NAME=" $NEW_RELEASE_TIMESTAMP "
147162}
148163
149164# ##
@@ -152,6 +167,12 @@ remote_sync_bundle() {
152167remote_deploy_and_verify () {
153168 log " REMOTE: Executing remote deployment (Atomic Swap)"
154169
170+ # Convert TEST_CASES to a format that can be passed to remote shell
171+ local test_cases_str=" "
172+ for key in " ${! TEST_CASES} " ; do
173+ test_cases_str+=" [\" $key \" ]=\" ${TEST_CASES[$key]} \" "
174+ done
175+
155176 ssh " ${REMOTE_USER} @${REMOTE_HOST} " /bin/bash << EOF
156177 set -e
157178 BASE_DIR="${REMOTE_BASE_DIR} "
@@ -161,26 +182,34 @@ remote_deploy_and_verify() {
161182 NEW_RELEASE_DIR="releases/${LATEST_RELEASE_DIR_NAME} "
162183 LIVE_DATA_SYMLINK="live_data"
163184
185+ # Define TESTS array on remote side
186+ declare -A TESTS=($test_cases_str )
187+
188+ echo_remote() {
189+ echo "[REMOTE] \$ 1"
190+ }
191+
164192 cd "\$ BASE_DIR"
165193
166194 OLD_RELEASE_DIR=""
167195 [ -L "\$ LIVE_DATA_SYMLINK" ] && OLD_RELEASE_DIR=\$ (readlink \$ LIVE_DATA_SYMLINK)
168196
169- echo "Switching symlink: \$ LIVE_DATA_SYMLINK -> \$ NEW_RELEASE_DIR"
197+ echo_remote "Switching symlink: \$ LIVE_DATA_SYMLINK -> \$ NEW_RELEASE_DIR"
170198 ln -sfn "\$ NEW_RELEASE_DIR" "\$ LIVE_DATA_SYMLINK"
171199
172- echo "Refreshing Geocoder DB..."
200+ echo_remote "Refreshing Geocoder DB..."
173201 HTTP_STATUS=\$ (curl -s -o /dev/null -w "%{http_code}" -X POST -H "X-Admin-Token: \$ API_TOKEN" "\$ ADMIN_URL")
174202
175203 if [ "\$ HTTP_STATUS" -ne 200 ]; then
176- echo "ERROR: Refresh failed (\$ HTTP_STATUS). Rolling back."
204+ echo_remote "ERROR: Refresh failed (\$ HTTP_STATUS). Rolling back."
177205 [ -n "\$ OLD_RELEASE_DIR" ] && ln -sfn "\$ OLD_RELEASE_DIR" "\$ LIVE_DATA_SYMLINK"
178206 exit 1
179207 fi
180- # --- 2. Verify ---
208+
209+ # --- 2. Verify ---
181210 echo_remote "Verifying new data..."
182211 VERIFICATION_FAILED=0
183- for query in "\$ {!TESTS[@] }"; do
212+ for query in "\$ {!TESTS}"; do
184213 ACTUAL_ID=\$ (curl -s "\$ TEST_URL_BASE?\$ query" | jq -r '.[0].id // "not_found"')
185214 if [ "\$ ACTUAL_ID" != "\$ {TESTS[\$ query]}" ]; then
186215 echo_remote " --> FAILED: For \$ query, expected '\$ {TESTS[\$ query]}', got '\$ ACTUAL_ID'"
@@ -195,7 +224,7 @@ remote_deploy_and_verify() {
195224 echo_remote "VERIFICATION FAILED. Rolling back and re-refreshing."
196225 if [ -n "\$ OLD_RELEASE_DIR" ] && [ -d "\$ OLD_RELEASE_DIR" ]; then
197226 ln -sfn "\$ OLD_RELEASE_DIR" "\$ LIVE_DATA_SYMLINK"
198- curl -s -o /dev/null -X POST -H "Authorization: Bearer \$ API_TOKEN" "\$ ADMIN_URL"
227+ curl -s -o /dev/null -X POST -H "X-Admin-Token: \$ API_TOKEN" "\$ ADMIN_URL"
199228 echo_remote "Rollback to \$ OLD_RELEASE_DIR complete. Faulty data in \$ NEW_RELEASE_DIR is kept for inspection."
200229 exit 1
201230 else
@@ -205,17 +234,54 @@ remote_deploy_and_verify() {
205234 else
206235 echo_remote "VERIFICATION SUCCEEDED. Cleaning up old release and archive."
207236 [ -n "\$ OLD_RELEASE_DIR" ] && [ -d "\$ OLD_RELEASE_DIR" ] && rm -rf "\$ OLD_RELEASE_DIR"
208- rm "\$ ZIP_FILENAME"
209237 echo_remote "Deployment successful."
210238 fi
211-
212- if [ \$ VERIFICATION_FAILED -eq 0 ]; then
213- echo "Success. Cleaning up old release..."
214- [ -n "\$ OLD_RELEASE_DIR" ] && [ "\$ OLD_RELEASE_DIR" != "\$ NEW_RELEASE_DIR" ] && rm -rf "\$ OLD_RELEASE_DIR"
215- fi
216239EOF
217240}
218241
242+ remote_cleanup_old_releases () {
243+ log " REMOTE: Cleaning up old releases (keeping last 3 successful ones)"
244+
245+ ssh " ${REMOTE_USER} @${REMOTE_HOST} " /bin/bash << EOF
246+ set -e
247+ BASE_DIR="${REMOTE_BASE_DIR} "
248+
249+ echo_remote() {
250+ echo "[REMOTE CLEANUP] \$ 1"
251+ }
252+
253+ cd "\$ BASE_DIR"
254+
255+ # Keep last 3 successful releases (excluding current live)
256+ echo_remote "Finding old releases to clean up..."
257+ CURRENT_LIVE=\$ (readlink -f live_data 2>/dev/null || echo "")
258+
259+ # List all releases, sort by timestamp, exclude current live
260+ RELEASES=\$ (find releases -maxdepth 1 -type d -name "[0-9]*" | sort -r)
261+
262+ KEEP_COUNT=3
263+ COUNT=0
264+ for release in \$ RELEASES; do
265+ if [ "\$ release" = "\$ CURRENT_LIVE" ] || [ "\$ release" = "\$ (basename "\$ CURRENT_LIVE")" ]; then
266+ echo_remote "Skipping current live release: \$ release"
267+ continue
268+ fi
269+
270+ COUNT=\$ ((COUNT + 1))
271+ if [ \$ COUNT -gt \$ KEEP_COUNT ]; then
272+ echo_remote "Removing old release: \$ release"
273+ rm -rf "\$ release"
274+ else
275+ echo_remote "Keeping release: \$ release"
276+ fi
277+ done
278+
279+ # Also clean up any empty directories
280+ find releases -type d -empty -delete 2>/dev/null || true
281+
282+ echo_remote "Cleanup complete"
283+ EOF
284+ }
219285# ==============================================================================
220286# MAIN ORCHESTRATION FUNCTION
221287# ==============================================================================
@@ -231,7 +297,7 @@ main() {
231297 local_cleanup_pbf
232298 remote_sync_bundle
233299 remote_deploy_and_verify
234-
300+ remote_cleanup_old_releases
235301
236302 log " Update process finished."
237303}
0 commit comments