diff --git a/.github/workflows/chartpress.yaml b/.github/workflows/chartpress.yaml index fcf6b306..328d8e0a 100644 --- a/.github/workflows/chartpress.yaml +++ b/.github/workflows/chartpress.yaml @@ -4,7 +4,7 @@ on: branches: - 'main' - 'staging' - - 'vtiles_admin_attr' + - 'tags_imposm' jobs: build: runs-on: ubuntu-22.04 @@ -71,7 +71,7 @@ jobs: OHM_SLACK_WEBHOOK_URL: ${{ secrets.OHM_SLACK_WEBHOOK_URL }} ################ Staging secrets ################ - name: Staging - substitute secrets - if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/vtiles_admin_attr' + if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/tags_imposm' uses: bluwy/substitute-string-action@v1 with: _input-file: 'values.staging.template.yaml' @@ -189,14 +189,14 @@ jobs: PRODUCTION_OPENSTREETMAP_AUTH_SECRET: ${{ secrets.PRODUCTION_OPENSTREETMAP_AUTH_SECRET }} - name: AWS Credentials - if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/vtiles_admin_attr' + if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/tags_imposm' uses: aws-actions/configure-aws-credentials@v1 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws-region: us-east-1 - name: Setup Kubectl and Helm Dependencies - if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/vtiles_admin_attr' + if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/tags_imposm' run: | sudo pip install awscli --ignore-installed six sudo curl -L -o /usr/bin/kubectl https://amazon-eks.s3.us-west-2.amazonaws.com/1.17.7/2020-07-08/bin/linux/amd64/kubectl @@ -210,22 +210,22 @@ jobs: helm version - name: Update kube-config staging - if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/vtiles_admin_attr' + if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/tags_imposm' run: aws eks --region us-east-1 update-kubeconfig --name osmseed-staging - name: Update kube-config prod if: github.ref == 'refs/heads/main' run: aws eks --region us-east-1 update-kubeconfig --name osmseed-production-v2 - name: Add Helm repository - if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/vtiles_admin_attr' + if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/tags_imposm' run: | helm repo add osm-seed https://osm-seed.github.io/osm-seed-chart/ helm repo update - name: Install helm dependencies for - if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/vtiles_admin_attr' + if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/tags_imposm' run: cd ohm && helm dep up # Staging - name: Staging - helm deploy - if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/vtiles_admin_attr' + if: github.ref == 'refs/heads/staging' || github.ref == 'refs/heads/tags_imposm' run: helm upgrade --install staging --wait ohm/ -f values.staging.yaml -f ohm/values.yaml # Production - name: Production - helm deploy diff --git a/hetzner/tiler/config/postgresql.production.conf b/hetzner/tiler/config/postgresql.production.conf index fd5e32da..0db7b5f4 100644 --- a/hetzner/tiler/config/postgresql.production.conf +++ b/hetzner/tiler/config/postgresql.production.conf @@ -10,10 +10,10 @@ superuser_reserved_connections = 5 # Reserve connections for superusers #------------------------------------------------------------------------------ # - Memory Configuration - -shared_buffers = 14GB # ~25% of total 55GB; adjust if needed +shared_buffers = 10GB # ~25% of 40GB container limit work_mem = 256MB # Memory for each sort/hash operation; be cautious with many parallel queries maintenance_work_mem = 4GB # Larger memory for VACUUM / CREATE INDEX / ALTER -effective_cache_size = 36GB # ~60-70% of total memory to inform the planner +effective_cache_size = 26GB # ~65% of 40GB container limit # - Disk Optimization for SSD (if using SSD) - random_page_cost = 1.0 # Lower cost for random I/O on SSD @@ -42,9 +42,9 @@ autovacuum_vacuum_cost_limit = -1 # Let PostgreSQL adjust vacuum cost d effective_io_concurrency = 300 # For SSD; helps the planner estimate IO concurrency parallel_tuple_cost = 0.001 # Lower cost to encourage parallelization parallel_setup_cost = 100 # Lower to encourage more parallel plans -max_worker_processes = 28 # Allow up to 28 worker processes -max_parallel_workers_per_gather = 8 # Max workers that can help a single query -max_parallel_workers = 28 # Total number of parallel workers across all queries +max_worker_processes = 25 # Match 25 CPUs container limit +max_parallel_workers_per_gather = 6 # Max workers that can help a single query +max_parallel_workers = 25 # Total number of parallel workers across all queries #------------------------------------------------------------------------------ # LOGGING diff --git a/hetzner/tiler/tiler.production.yml b/hetzner/tiler/tiler.production.yml index 2aa06700..f8ef62d9 100644 --- a/hetzner/tiler/tiler.production.yml +++ b/hetzner/tiler/tiler.production.yml @@ -2,6 +2,7 @@ services: tiler_db: container_name: tiler_db image: ghcr.io/openhistoricalmap/tiler-db:0.0.1-0.dev.git.2166.hc55c4cd + command: postgres -c config_file=/etc/postgresql/postgresql.conf volumes: !overwrite - tiler_pgdata:/var/lib/postgresql/data - ./config/postgresql.production.conf:/etc/postgresql/postgresql.conf @@ -9,14 +10,14 @@ services: - "54329:5432" env_file: - .env.tiler - mem_limit: 55G - cpus: "28.0" + mem_limit: 40G + cpus: "25.0" networks: - ohm_network tiler_imposm: container_name: tiler_imposm - image: ghcr.io/openhistoricalmap/tiler-imposm:0.0.1-0.dev.git.3325.hb9f97989 + image: ghcr.io/openhistoricalmap/tiler-imposm:0.0.1-0.dev.git.3323.haf36ae7d volumes: - tiler_imposm_data:/mnt/data env_file: @@ -136,10 +137,10 @@ services: volumes: tiler_pgdata: driver: local - name: tiler_db_10_03 + name: tiler_db_16_03 tiler_imposm_data: driver: local - name: tiler_imposm_10_03 + name: tiler_imposm_16_03 networks: ohm_network: diff --git a/images/tiler-imposm/config/imposm3.template.json b/images/tiler-imposm/config/imposm3.template.json index fa864ecb..3d49319a 100644 --- a/images/tiler-imposm/config/imposm3.template.json +++ b/images/tiler-imposm/config/imposm3.template.json @@ -4,7 +4,134 @@ "exclude": [ "created_by", "source", - "source:datetime" + "source:*", + "source_ref", + "note", + "note:*", + "fixme", + "fixme:*", + "FIXME", + "todo", + "description", + "description:*", + "comment", + "wikimedia_commons", + "image", + "image:*", + "website", + "url", + "email", + "phone", + "fax", + "contact:*", + "opening_hours", + "opening_hours:*", + "addr:*", + "is_in", + "is_in:*", + "attribution", + "license", + + "tiger:*", + "gnis:*", + "NHD:*", + "nhd:*", + "NHDPlus:*", + "ref:*", + "nysgissam:*", + "nypl:*", + "nygisid", + "bkln:*", + "base_bbl", + "yh:*", + "nl_ahcb:*", + "ign:*", + "istatcom:*", + "hf:*", + "gvr:*", + "LINZ:*", + "TMC:*", + "NJDOT_*", + "A45_*", + "ANR", + "FIPS", + "EDGE_ID", + "OBJECTID", + "GlobalID", + "GLOBALID", + "HFCS", + "zhb_code", + + "import", + "import:*", + "import_uuid", + "import_edge_id", + "converted_by", + "upload", + "pre_download", + "dataset", + "odbl", + "odbl:note", + "history", + + "roof:*", + "building:levels", + "building:part", + "building:material", + "building:colour", + "generator:*", + "tactile_paving", + "crossing:markings", + "crossing:island", + "crossing:barrier", + "traffic_sign", + "traffic_sign:*", + "traffic_signals", + "traffic_signals:*", + "button_operated", + "sidewalk", + "sidewalk:*", + "parking", + "parking:*", + "lit", + "smoothness", + "direction", + "ele", + + "building", + "natural", + "landuse", + "highway", + "railway", + "aeroway", + "waterway", + "barrier", + "leisure", + "historic", + "man_made", + "power", + "military", + "amenity", + "place", + "tourism", + "shop", + "craft", + "boundary", + "communication", + "route", + "start_date", + "end_date", + "name", + "oneway", + "bridge", + "access", + "service", + "ford", + "surface", + "lanes", + "maxspeed", + "admin_level", + "type" ] }, "generalized_tables": {}, diff --git a/images/tiler-imposm/scripts/refresh_mviews.sh b/images/tiler-imposm/scripts/refresh_mviews.sh index 1dd0b8b1..1708878e 100755 --- a/images/tiler-imposm/scripts/refresh_mviews.sh +++ b/images/tiler-imposm/scripts/refresh_mviews.sh @@ -28,20 +28,37 @@ source ./scripts/utils.sh # Example: # refresh_mviews_group "WATER" 180 "${water_views[@]}" & # ============================================================================ +LIGHT_WORK_MEM="64MB" +LIGHT_MAINT_MEM="256MB" +HEAVY_WORK_MEM="512MB" +HEAVY_MAINT_MEM="4GB" + function refresh_mviews_group() { local group_name="$1" local sleep_interval="$2" - shift 2 + local mem_profile="${3:-light}" # "light" or "heavy" + shift 3 local materialized_views=("$@") + local work_mem="$LIGHT_WORK_MEM" + local maint_mem="$LIGHT_MAINT_MEM" + if [ "$mem_profile" = "heavy" ]; then + work_mem="$HEAVY_WORK_MEM" + maint_mem="$HEAVY_MAINT_MEM" + fi + while true; do for mview in "${materialized_views[@]}"; do - log_message "[$group_name] Refreshing $mview..." + log_message "[$group_name] Refreshing $mview (work_mem=$work_mem, maintenance_work_mem=$maint_mem)..." local error_output # Disable statement_timeout for long-running refresh operations (0 = no limit) local exit_code=0 local start_time=$SECONDS - error_output=$(psql "$PG_CONNECTION" -v ON_ERROR_STOP=1 -c "SET statement_timeout = 0" -c "REFRESH MATERIALIZED VIEW CONCURRENTLY $mview;" 2>&1) || exit_code=$? + error_output=$(psql "$PG_CONNECTION" -v ON_ERROR_STOP=1 \ + -c "SET statement_timeout = 0" \ + -c "SET work_mem = '$work_mem'" \ + -c "SET maintenance_work_mem = '$maint_mem'" \ + -c "REFRESH MATERIALIZED VIEW CONCURRENTLY $mview;" 2>&1) || exit_code=$? local elapsed=$((SECONDS - start_time)) if [ $exit_code -eq 0 ]; then log_message "[$group_name] ✅ Successfully refreshed $mview. Time: ${elapsed}s" @@ -249,18 +266,21 @@ no_admin_boundaries_views=( ) -refresh_mviews_group "ADMIN_BOUNDARIES_LINES" 60 "${admin_boundaries_lines_views[@]}" & -refresh_mviews_group "ADMIN_BOUNDARIES_AREAS_CENTROIDS" 180 "${admin_boundaries_areas_centroids_views[@]}" & -refresh_mviews_group "ADMIN_MARITIME_LINES" 300 "${admin_maritime_lines_views[@]}" & -refresh_mviews_group "TRANSPORTS" 180 "${transport_views[@]}" & -refresh_mviews_group "AMENITY" 180 "${amenity_views[@]}" & -refresh_mviews_group "LANDUSE" 180 "${landuse_views[@]}" & -refresh_mviews_group "OTHERS" 180 "${others_views[@]}" & -refresh_mviews_group "COMMUNICATION" 180 "${communication_views[@]}" & -refresh_mviews_group "PLACES" 180 "${places_views[@]}" & -refresh_mviews_group "WATER" 180 "${water_views[@]}" & -refresh_mviews_group "BUILDINGS" 180 "${buildings_views[@]}" & -refresh_mviews_group "ROUTES" 180 "${routes_views[@]}" & +# Heavy groups - admin boundaries have the largest tables +refresh_mviews_group "ADMIN_BOUNDARIES_LINES" 60 heavy "${admin_boundaries_lines_views[@]}" & +refresh_mviews_group "ADMIN_BOUNDARIES_AREAS_CENTROIDS" 180 heavy "${admin_boundaries_areas_centroids_views[@]}" & + +# Light groups - smaller tables, minimal resources +refresh_mviews_group "ADMIN_MARITIME_LINES" 300 light "${admin_maritime_lines_views[@]}" & +refresh_mviews_group "TRANSPORTS" 180 heavy "${transport_views[@]}" & +refresh_mviews_group "AMENITY" 180 light "${amenity_views[@]}" & +refresh_mviews_group "LANDUSE" 180 light "${landuse_views[@]}" & +refresh_mviews_group "OTHERS" 180 light "${others_views[@]}" & +refresh_mviews_group "COMMUNICATION" 180 light "${communication_views[@]}" & +refresh_mviews_group "PLACES" 180 light "${places_views[@]}" & +refresh_mviews_group "WATER" 180 light "${water_views[@]}" & +refresh_mviews_group "BUILDINGS" 180 light "${buildings_views[@]}" & +refresh_mviews_group "ROUTES" 180 light "${routes_views[@]}" & ## This group high demand, so we refresh every 1 hour -refresh_mviews_group "NO_ADMIN_BOUNDARIES" 36000 "${no_admin_boundaries_views[@]}" & +refresh_mviews_group "NO_ADMIN_BOUNDARIES" 36000 light "${no_admin_boundaries_views[@]}" & diff --git a/images/tiler-imposm/scripts/setup_imposm_role.sh b/images/tiler-imposm/scripts/setup_imposm_role.sh new file mode 100755 index 00000000..61519bb3 --- /dev/null +++ b/images/tiler-imposm/scripts/setup_imposm_role.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# Setup a dedicated 'imposm' PostgreSQL role with optimized session parameters. +# This avoids affecting Tegola/other services that share the same postgres user. +set -e +source "$(dirname "$0")/utils.sh" + +log_message "Setting up imposm database role with optimized parameters..." + +psql "$PG_CONNECTION" <"$WORKDIR/config.json" { "cachedir": "$CACHE_DIR", "diffdir": "$DIFF_DIR", - "connection": "postgis://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST/$POSTGRES_DB", + "connection": "postgis://imposm:${IMPOSM_DB_PASSWORD:-$POSTGRES_PASSWORD}@$POSTGRES_HOST/$POSTGRES_DB", "mapping": "/osm/config/imposm3.json", "replication_url": "$REPLICATION_URL" } @@ -317,6 +317,9 @@ done log_message "PostgreSQL is ready! Proceeding with setup..." +# Setup dedicated imposm role with optimized session parameters +./scripts/setup_imposm_role.sh + # Run date functions execute_sql_file /usr/local/datefunctions/datefunctions.sql