Skip to content

Commit 1d2f89e

Browse files
committed
chore: enhance update.sh with configurable directories, memory, and threads; add new test cases and improved cleanup logic
1 parent e5321db commit 1d2f89e

1 file changed

Lines changed: 112 additions & 46 deletions

File tree

scripts/update.sh

Lines changed: 112 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ PBF_FILTERED_FILE="planet-filtered.pbf"
1515
IMPORT_DIR="import"
1616
DOCKER_IMAGE="dedicatedcode/paikka:develop"
1717

18+
DOWNLOAD_DIR="${DOWNLOAD_DIR:-$LOCAL_WORK_DIR}"
19+
IMPORT_DATA_DIR="${IMPORT_DATA_DIR:-$LOCAL_WORK_DIR/$IMPORT_DIR}"
20+
IMPORT_MEMORY="${IMPORT_MEMORY:-16G}"
21+
IMPORT_THREADS="${IMPORT_THREADS:-10}"
22+
1823
# --- Remote Machine Settings ---
1924
REMOTE_BASE_DIR="/opt/paikka/data"
2025

@@ -24,7 +29,9 @@ GEOCODER_TEST_URL_BASE="http://localhost:8080/v1/reverse"
2429

2530
# --- Verification Test Cases ---
2631
declare -A TEST_CASES=(
27-
["lat=52.516280&lon=13.377635"]="518071791" # Brandenburger Tor
32+
["lat=52.516280&lon=13.377635"]="518071791" # Brandenburger Tor
33+
["lat=48.85826&lon=2.2945008"]="5013364" # Eiffel Tower
34+
["lat=40.68924&lon=-74.044502"]="32965412" # Statue of Liberty
2835
)
2936

3037
# Global variables that will be set by parse_args_and_configure or environment
@@ -56,31 +63,42 @@ parse_args_and_configure() {
5663
REMOTE_HOST="${2:-$REMOTE_HOST}"
5764
GEOCODER_API_TOKEN="${3:-$GEOCODER_API_TOKEN}"
5865

66+
DOWNLOAD_DIR="${4:-$DOWNLOAD_DIR}"
67+
IMPORT_DATA_DIR="${5:-$IMPORT_DATA_DIR}"
68+
IMPORT_MEMORY="${6:-$IMPORT_MEMORY}"
69+
IMPORT_THREADS="${7:-$IMPORT_THREADS}"
70+
5971
if [ -z "$REMOTE_USER" ] || [ -z "$REMOTE_HOST" ] || [ -z "$GEOCODER_API_TOKEN" ]; then
60-
echo "Usage: $0 <REMOTE_USER> <REMOTE_HOST> <API_TOKEN>"
72+
echo "Usage: $0 <REMOTE_USER> <REMOTE_HOST> <API_TOKEN> [DOWNLOAD_DIR] [IMPORT_DATA_DIR] [MEMORY] [THREADS]"
73+
echo " DOWNLOAD_DIR: Where to download PBF files (default: current directory)"
74+
echo " IMPORT_DATA_DIR: Where to store import data (default: ./import)"
75+
echo " MEMORY: Memory for import (default: 16G)"
76+
echo " THREADS: Threads for import (default: 10)"
6177
echo "Error: Missing required configuration."
62-
echo "Provide arguments or set them as environment variables (REMOTE_USER, REMOTE_HOST, GEOCODER_API_TOKEN)."
6378
exit 1
6479
fi
65-
# Set a dynamic variable that depends on other config being set
66-
ZIP_FILENAME="paikka_import_$(date +%Y%m%d_%H%M%S).zip"
6780
echo "Configuration loaded for ${REMOTE_USER}@${REMOTE_HOST}"
68-
}
81+
echo " Download directory: $DOWNLOAD_DIR"
82+
echo " Import data directory: $IMPORT_DATA_DIR"
83+
echo " Import memory: $IMPORT_MEMORY"
84+
echo " Import threads: $IMPORT_THREADS"}
6985

7086
###
7187
# LOCAL: Creates the necessary working directories.
7288
###
7389
local_prepare_directories() {
7490
log "LOCAL: Ensuring import directory exists"
75-
mkdir -p "$LOCAL_WORK_DIR/$IMPORT_DIR"
76-
cd "$LOCAL_WORK_DIR"
91+
mkdir -p "$DOWNLOAD_DIR"
92+
mkdir -p "$IMPORT_DATA_DIR"
93+
cd "$DOWNLOAD_DIR"
7794
}
7895

7996
###
8097
# LOCAL: Downloads the latest OSM planet file.
8198
###
8299
local_download_planet_file() {
83-
log "LOCAL: Downloading latest OSM planet file (if newer)"
100+
log "LOCAL: Downloading latest OSM planet file to $DOWNLOAD_DIR"
101+
cd "$DOWNLOAD_DIR"
84102
wget -N "$PLANET_URL"
85103
}
86104

@@ -97,53 +115,50 @@ local_pull_docker_image() {
97115
###
98116
local_filter_pbf() {
99117
log "LOCAL: Filtering PBF file (approx. 50 mins)"
100-
sudo docker run --rm -v "$(pwd)":/data "$DOCKER_IMAGE" prepare "$PBF_INPUT_FILE" "$PBF_FILTERED_FILE"
118+
cd "$DOWNLOAD_DIR"
119+
sudo docker run --rm -ti -v "$DOWNLOAD_DIR":/data "$DOCKER_IMAGE" prepare "$PBF_INPUT_FILE" "$PBF_FILTERED_FILE"
101120
}
102121

103122
###
104123
# LOCAL: Creates the geocoder import bundle from the filtered PBF.
105124
###
106125
local_create_import_bundle() {
107-
log "LOCAL: Creating import bundle (approx. 15 hours)"
108-
sudo docker run --rm -ti -v "$(pwd)":/data "$DOCKER_IMAGE" import \
109-
--memory 16G \
110-
--threads 10 \
111-
--data-dir "/data/$IMPORT_DIR/" \
112-
"$PBF_FILTERED_FILE"
126+
log "LOCAL: Creating import bundle (approx. 15 hours) with $IMPORT_MEMORY memory and $IMPORT_THREADS threads"
127+
sudo docker run --rm -ti -v "$DOWNLOAD_DIR":/download -v "$IMPORT_DATA_DIR":/import "$DOCKER_IMAGE" import \
128+
--memory "$IMPORT_MEMORY" \
129+
--threads "$IMPORT_THREADS" \
130+
--data-dir "/import/" \
131+
"/download/$PBF_FILTERED_FILE"
113132
}
114133

115134
###
116135
# LOCAL: Removes the large, intermediate PBF files.
117136
###
118137
local_cleanup_pbf() {
119-
log "LOCAL: Cleaning up intermediate PBF files"
138+
log "LOCAL: Cleaning up intermediate PBF files from $DOWNLOAD_DIR"
139+
cd "$DOWNLOAD_DIR"
120140
rm -f "$PBF_FILTERED_FILE" "$PBF_INPUT_FILE"
121-
echo "Deleted '$PBF_FILTERED_FILE' and '$PBF_INPUT_FILE'"
141+
echo "Deleted '$DOWNLOAD_DIR/$PBF_FILTERED_FILE' and '$DOWNLOAD_DIR/$PBF_INPUT_FILE'"
122142
}
123143
###
124144
# REMOTE: Syncs the import directory to the remote server using rsync.
125145
# Uses --link-dest to minimize bandwidth and remote disk usage.
126146
###
127147
remote_sync_bundle() {
128-
log "REMOTE: Syncing bundle via rsync (Delta transfer)"
148+
log "REMOTE: Syncing bundle via rsync (Delta transfer)"
129149

130-
# We need to know the current live directory to use it as a link-dest
131-
CURRENT_LIVE=$(ssh "${REMOTE_USER}@${REMOTE_HOST}" "readlink -f ${REMOTE_BASE_DIR}/live_data || true")
132-
NEW_RELEASE_TIMESTAMP=$(date +%Y%m%d%H%M%S)
133-
NEW_RELEASE_DIR="${REMOTE_BASE_DIR}/releases/${NEW_RELEASE_TIMESTAMP}"
150+
CURRENT_LIVE=$(ssh "${REMOTE_USER}@${REMOTE_HOST}" "readlink -f ${REMOTE_BASE_DIR}/live_data || true")
151+
NEW_RELEASE_TIMESTAMP=$(date +%Y%m%d%H%M%S)
152+
NEW_RELEASE_DIR="${REMOTE_BASE_DIR}/releases/${NEW_RELEASE_TIMESTAMP}"
134153

135-
# Ensure the remote releases directory exists
136-
ssh "${REMOTE_USER}@${REMOTE_HOST}" "mkdir -p ${REMOTE_BASE_DIR}/releases"
154+
ssh "${REMOTE_USER}@${REMOTE_HOST}" "mkdir -p ${REMOTE_BASE_DIR}/releases"
137155

138-
# --link-dest makes rsync hard-link unchanged files from the current release
139-
# into the new release directory, saving bandwidth and space.
140-
rsync -avz --progress \
141-
${CURRENT_LIVE:+--link-dest="$CURRENT_LIVE"} \
142-
"$LOCAL_WORK_DIR/$IMPORT_DIR/" \
143-
"${REMOTE_USER}@${REMOTE_HOST}:$NEW_RELEASE_DIR/"
156+
rsync -avz --progress \
157+
${CURRENT_LIVE:+--link-dest="$CURRENT_LIVE"} \
158+
"$IMPORT_DATA_DIR/" \
159+
"${REMOTE_USER}@${REMOTE_HOST}:$NEW_RELEASE_DIR/"
144160

145-
# Export this for the next step
146-
export LATEST_RELEASE_DIR_NAME="$NEW_RELEASE_TIMESTAMP"
161+
export LATEST_RELEASE_DIR_NAME="$NEW_RELEASE_TIMESTAMP"
147162
}
148163

149164
###
@@ -152,6 +167,12 @@ remote_sync_bundle() {
152167
remote_deploy_and_verify() {
153168
log "REMOTE: Executing remote deployment (Atomic Swap)"
154169

170+
# Convert TEST_CASES to a format that can be passed to remote shell
171+
local test_cases_str=""
172+
for key in "${!TEST_CASES}"; do
173+
test_cases_str+="[\"$key\"]=\"${TEST_CASES[$key]}\" "
174+
done
175+
155176
ssh "${REMOTE_USER}@${REMOTE_HOST}" /bin/bash << EOF
156177
set -e
157178
BASE_DIR="${REMOTE_BASE_DIR}"
@@ -161,26 +182,34 @@ remote_deploy_and_verify() {
161182
NEW_RELEASE_DIR="releases/${LATEST_RELEASE_DIR_NAME}"
162183
LIVE_DATA_SYMLINK="live_data"
163184
185+
# Define TESTS array on remote side
186+
declare -A TESTS=($test_cases_str)
187+
188+
echo_remote() {
189+
echo "[REMOTE] \$1"
190+
}
191+
164192
cd "\$BASE_DIR"
165193
166194
OLD_RELEASE_DIR=""
167195
[ -L "\$LIVE_DATA_SYMLINK" ] && OLD_RELEASE_DIR=\$(readlink \$LIVE_DATA_SYMLINK)
168196
169-
echo "Switching symlink: \$LIVE_DATA_SYMLINK -> \$NEW_RELEASE_DIR"
197+
echo_remote "Switching symlink: \$LIVE_DATA_SYMLINK -> \$NEW_RELEASE_DIR"
170198
ln -sfn "\$NEW_RELEASE_DIR" "\$LIVE_DATA_SYMLINK"
171199
172-
echo "Refreshing Geocoder DB..."
200+
echo_remote "Refreshing Geocoder DB..."
173201
HTTP_STATUS=\$(curl -s -o /dev/null -w "%{http_code}" -X POST -H "X-Admin-Token: \$API_TOKEN" "\$ADMIN_URL")
174202
175203
if [ "\$HTTP_STATUS" -ne 200 ]; then
176-
echo "ERROR: Refresh failed (\$HTTP_STATUS). Rolling back."
204+
echo_remote "ERROR: Refresh failed (\$HTTP_STATUS). Rolling back."
177205
[ -n "\$OLD_RELEASE_DIR" ] && ln -sfn "\$OLD_RELEASE_DIR" "\$LIVE_DATA_SYMLINK"
178206
exit 1
179207
fi
180-
# --- 2. Verify ---
208+
209+
# --- 2. Verify ---
181210
echo_remote "Verifying new data..."
182211
VERIFICATION_FAILED=0
183-
for query in "\${!TESTS[@]}"; do
212+
for query in "\${!TESTS}"; do
184213
ACTUAL_ID=\$(curl -s "\$TEST_URL_BASE?\$query" | jq -r '.[0].id // "not_found"')
185214
if [ "\$ACTUAL_ID" != "\${TESTS[\$query]}" ]; then
186215
echo_remote " --> FAILED: For \$query, expected '\${TESTS[\$query]}', got '\$ACTUAL_ID'"
@@ -195,7 +224,7 @@ remote_deploy_and_verify() {
195224
echo_remote "VERIFICATION FAILED. Rolling back and re-refreshing."
196225
if [ -n "\$OLD_RELEASE_DIR" ] && [ -d "\$OLD_RELEASE_DIR" ]; then
197226
ln -sfn "\$OLD_RELEASE_DIR" "\$LIVE_DATA_SYMLINK"
198-
curl -s -o /dev/null -X POST -H "Authorization: Bearer \$API_TOKEN" "\$ADMIN_URL"
227+
curl -s -o /dev/null -X POST -H "X-Admin-Token: \$API_TOKEN" "\$ADMIN_URL"
199228
echo_remote "Rollback to \$OLD_RELEASE_DIR complete. Faulty data in \$NEW_RELEASE_DIR is kept for inspection."
200229
exit 1
201230
else
@@ -205,17 +234,54 @@ remote_deploy_and_verify() {
205234
else
206235
echo_remote "VERIFICATION SUCCEEDED. Cleaning up old release and archive."
207236
[ -n "\$OLD_RELEASE_DIR" ] && [ -d "\$OLD_RELEASE_DIR" ] && rm -rf "\$OLD_RELEASE_DIR"
208-
rm "\$ZIP_FILENAME"
209237
echo_remote "Deployment successful."
210238
fi
211-
212-
if [ \$VERIFICATION_FAILED -eq 0 ]; then
213-
echo "Success. Cleaning up old release..."
214-
[ -n "\$OLD_RELEASE_DIR" ] && [ "\$OLD_RELEASE_DIR" != "\$NEW_RELEASE_DIR" ] && rm -rf "\$OLD_RELEASE_DIR"
215-
fi
216239
EOF
217240
}
218241

242+
remote_cleanup_old_releases() {
243+
log "REMOTE: Cleaning up old releases (keeping last 3 successful ones)"
244+
245+
ssh "${REMOTE_USER}@${REMOTE_HOST}" /bin/bash << EOF
246+
set -e
247+
BASE_DIR="${REMOTE_BASE_DIR}"
248+
249+
echo_remote() {
250+
echo "[REMOTE CLEANUP] \$1"
251+
}
252+
253+
cd "\$BASE_DIR"
254+
255+
# Keep last 3 successful releases (excluding current live)
256+
echo_remote "Finding old releases to clean up..."
257+
CURRENT_LIVE=\$(readlink -f live_data 2>/dev/null || echo "")
258+
259+
# List all releases, sort by timestamp, exclude current live
260+
RELEASES=\$(find releases -maxdepth 1 -type d -name "[0-9]*" | sort -r)
261+
262+
KEEP_COUNT=3
263+
COUNT=0
264+
for release in \$RELEASES; do
265+
if [ "\$release" = "\$CURRENT_LIVE" ] || [ "\$release" = "\$(basename "\$CURRENT_LIVE")" ]; then
266+
echo_remote "Skipping current live release: \$release"
267+
continue
268+
fi
269+
270+
COUNT=\$((COUNT + 1))
271+
if [ \$COUNT -gt \$KEEP_COUNT ]; then
272+
echo_remote "Removing old release: \$release"
273+
rm -rf "\$release"
274+
else
275+
echo_remote "Keeping release: \$release"
276+
fi
277+
done
278+
279+
# Also clean up any empty directories
280+
find releases -type d -empty -delete 2>/dev/null || true
281+
282+
echo_remote "Cleanup complete"
283+
EOF
284+
}
219285
# ==============================================================================
220286
# MAIN ORCHESTRATION FUNCTION
221287
# ==============================================================================
@@ -231,7 +297,7 @@ main() {
231297
local_cleanup_pbf
232298
remote_sync_bundle
233299
remote_deploy_and_verify
234-
300+
remote_cleanup_old_releases
235301

236302
log "Update process finished."
237303
}

0 commit comments

Comments
 (0)