apache · tuhaihe · Mar 25, 2026 · Mar 25, 2026 · Mar 26, 2026 · Mar 26, 2026
diff --git a/.github/workflows/behave-cloudberry.yml b/.github/workflows/behave-cloudberry.yml
diff --git a/.github/workflows/build-cloudberry.yml b/.github/workflows/build-cloudberry.yml
@@ -223,7 +223,6 @@ jobs:
           DEFAULT_ENABLE_CGROUPS=false
           DEFAULT_ENABLE_CORE_CHECK=true
           DEFAULT_PG_SETTINGS_OPTIMIZER=""
-
           # Define base test configurations
           ALL_TESTS='{
             "include": [
@@ -1573,8 +1572,6 @@ jobs:
               continue
             fi
 
-            # Parse this configuration's results
-
             MAKE_NAME="${{ matrix.test }}-config$i" \
             "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/parse-test-results.sh "$config_log"
             status_code=$?

diff --git a/gpAux/gpdemo/demo_cluster.sh b/gpAux/gpdemo/demo_cluster.sh
@@ -314,8 +314,10 @@ cat >> $CLUSTER_CONFIG <<-EOF
 
 	COORDINATOR_PORT=${COORDINATOR_DEMO_PORT}
 
-	# Shell to use to execute commands on all hosts
-	TRUSTED_SHELL="$(dirname "$0")/lalshell"
+	# Shell to use to execute commands on all hosts. Use an absolute path here
+	# because this file is later sourced by gpinitsystem, where \$0 is no longer
+	# demo_cluster.sh.
+	TRUSTED_SHELL=$(pwd)/lalshell
 
 	ENCODING=UNICODE
 EOF

diff --git a/gpMgmt/bin/analyzedb b/gpMgmt/bin/analyzedb
@@ -951,7 +951,10 @@ class AnalyzeDb(Operation):
 # Create a Command object that executes a query using psql.
 def create_psql_command(dbname, query):
     psql_cmd = """psql %s -c %s""" % (pipes.quote(dbname), pipes.quote(query))
-    return Command(query, psql_cmd)
+    # Keep the command text intact for execution, but make the display name
+    # ASCII-safe so logger/output paths do not choke on UTF-8 identifiers.
+    safe_query_display = query.encode('ascii', 'backslashreplace').decode('ascii')
+    return Command(safe_query_display, psql_cmd)
 
 
 def run_sql(conn, query):

diff --git a/gpMgmt/bin/gppylib/test/unit/test_unit_analyzedb.py b/gpMgmt/bin/gppylib/test/unit/test_unit_analyzedb.py
@@ -0,0 +1,22 @@
+import imp
+import os
+
+from gppylib.test.unit.gp_unittest import GpTestCase, run_tests
+
+
+class AnalyzeDbTestCase(GpTestCase):
+    def setUp(self):
+        analyzedb_file = os.path.abspath(os.path.dirname(__file__) + "/../../../analyzedb")
+        self.subject = imp.load_source('analyzedb', analyzedb_file)
+
+    def test_create_psql_command_keeps_utf8_sql_but_uses_ascii_safe_display_name(self):
+        query = 'analyze "public"."spiegelungssätze"'
+
+        cmd = self.subject.create_psql_command('special_encoding_db', query)
+
+        self.assertEqual(cmd.name, 'analyze "public"."spiegelungss\\xe4tze"')
+        self.assertIn('spiegelungssätze', cmd.cmdStr)
+
+
+if __name__ == '__main__':
+    run_tests()
diff --git a/gpMgmt/test/behave/mgmt_utils/gpcheckcat.feature b/gpMgmt/test/behave/mgmt_utils/gpcheckcat.feature
@@ -317,8 +317,7 @@ Feature: gpcheckcat tests
         And the user runs "psql extra_pk_db -c 'CREATE SCHEMA my_pk_schema' "
         And the user runs "psql extra_pk_db -f test/behave/mgmt_utils/steps/data/gpcheckcat/add_operator.sql "
         Then psql should return a return code of 0
-        And the user runs "psql extra_pk_db -c "set allow_system_table_mods=true;DELETE FROM pg_catalog.pg_operator where oprname='!#'" "
-        Then psql should return a return code of 0
+        Then The user runs sql "set allow_system_table_mods=true;DELETE FROM pg_catalog.pg_operator where oprname='!#'" in "extra_pk_db" on first primary segment
         When the user runs "gpcheckcat -R missing_extraneous extra_pk_db"
         Then gpcheckcat should return a return code of 3
         And the path "gpcheckcat.repair.*" is found in cwd "0" times
@@ -728,18 +727,14 @@ Feature: gpcheckcat tests
         And the user runs "dropdb all_good"
 
 
-    Scenario: validate session GUC passed with -x is set
+    Scenario: gpcheckcat accepts session GUC passed with -x in single node mode
         Given the database is not running
           And the user runs "gpstart -ma"
           And "gpstart -ma" should return a return code of 0
-         Then the user runs "gpcheckcat -R foreign_key"
-         Then gpcheckcat should return a return code of 1
-          And gpcheckcat should print ".* System was started in single node mode - only utility mode connections are allowed" to stdout
          Then the user runs "gpcheckcat -x gp_role=utility -R foreign_key"
          Then gpcheckcat should return a return code of 0
           And the user runs "gpstop -ma"
           And "gpstop -m" should return a return code of 0
           And the user runs "gpstart -a"
 
 
-
diff --git a/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature b/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature
@@ -1,7 +1,7 @@
 @gprecoverseg
 Feature: gprecoverseg tests
 
-    Scenario: incremental recovery works with tablespaces
+    Scenario Outline: incremental recovery works with tablespaces
         Given the database is running
           And a tablespace is created with data
           And user stops all primary processes

diff --git a/gpMgmt/test/behave/mgmt_utils/gpssh.feature b/gpMgmt/test/behave/mgmt_utils/gpssh.feature
@@ -24,10 +24,11 @@ Feature: gpssh behave tests
         And gpssh should print "unable to login to localhost" to stdout
         And gpssh should print "could not synchronize with original prompt" to stdout
 
+    @requires_netem
     Scenario: gpssh succeeds when network has latency
         When the user runs command "sudo tc qdisc add dev lo root netem delay 4000ms"
         Then sudo should return a return code of 0
         When the user runs "gpssh -h localhost echo 'hello I am testing'"
         Then gpssh should return a return code of 0
         And gpssh should print "hello I am testing" to stdout
-        # We depend on environment.py#after_scenario() to delete the artificial latency
+        # We depend on environment.py#after_scenario() to delete the artificial latency
diff --git a/gpMgmt/test/behave/mgmt_utils/gpstate.feature b/gpMgmt/test/behave/mgmt_utils/gpstate.feature
@@ -251,7 +251,7 @@ Feature: gpstate tests
     Scenario: gpstate -m logs mirror details
         Given a standard local demo cluster is running
         When the user runs "gpstate -m"
-        Then gpstate should print "Current GPDB mirror list and status" to stdout
+        Then gpstate should print "Current CBDB mirror list and status" to stdout
         And gpstate output looks like
             | Mirror | Datadir                        | Port   | Status  | Data Status  |
             | \S+    | .*/dbfast_mirror1/demoDataDir0 | [0-9]+ | Passive | Synchronized |
@@ -263,7 +263,7 @@ Feature: gpstate tests
           And user stops all primary processes
           And user can start transactions
         When the user runs "gpstate -m"
-        Then gpstate should print "Current GPDB mirror list and status" to stdout
+        Then gpstate should print "Current CBDB mirror list and status" to stdout
         And gpstate output looks like
             | Mirror | Datadir                        | Port   | Status            | Data Status |
             | \S+    | .*/dbfast_mirror1/demoDataDir0 | [0-9]+ | Acting as Primary | Not In Sync |

diff --git a/gpMgmt/test/behave/mgmt_utils/minirepro.feature b/gpMgmt/test/behave/mgmt_utils/minirepro.feature
@@ -28,6 +28,7 @@ Feature: Dump minimum database objects that is related to the query
     @minirepro_UI
     Scenario: Database does not exist
       Given database "nonedb000" does not exist
+      And the file "/home/gpadmin/test/in.sql" exists and contains "select 1;"
       When the user runs "minirepro nonedb000 -q ~/test/in.sql -f ~/out.sql"
       Then minirepro error should contain database "nonedb000" does not exist
 

diff --git a/gpMgmt/test/behave/mgmt_utils/replication_slots.feature b/gpMgmt/test/behave/mgmt_utils/replication_slots.feature
@@ -1,11 +1,15 @@
 @replication_slots
 Feature: Replication Slots
 
-  Scenario: Lifecycle of cluster's replication slots
+  Scenario: Replication slots are created for a new mirrored cluster
     Given I have a machine with no cluster
     When I create a cluster
     Then the primaries and mirrors should be replicating using replication slots
 
+  Scenario: Replication slots remain correct after failover and rebalance
+    Given I have a machine with no cluster
+    And I create a cluster
+
     Given a preferred primary has failed
     When the user runs "gprecoverseg -a"
     And gprecoverseg should return a return code of 0
@@ -19,18 +23,27 @@ Feature: Replication Slots
     And the segments are synchronized
     And the primaries and mirrors should be replicating using replication slots
 
+  @extended
+  Scenario: Replication slots remain correct after full recovery
+    Given I have a machine with no cluster
+    And I create a cluster
+
     When a mirror has crashed
     And the user runs "gprecoverseg -aFv"
     And gprecoverseg should return a return code of 0
     And the segments are synchronized
     Then the primaries and mirrors should be replicating using replication slots
 
+  @extended
+  Scenario: Replication slots remain correct after expansion
+    Given I have a machine with no cluster
+    And I create a cluster
+
     When I add a segment to the cluster
     And the segments are synchronized
     Then the primaries and mirrors should be replicating using replication slots
 
-  Scenario: A adding mirrors to a cluster after the primaries have been initialized
+  Scenario: Replication slots are created when mirrors are added later
     Given I cluster with no mirrors
     When I add mirrors to the cluster
     Then the primaries and mirrors should be replicating using replication slots
-
diff --git a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py
@@ -23,6 +23,7 @@
 from gppylib.gparray import GpArray, ROLE_PRIMARY, ROLE_MIRROR
 from gppylib.commands.gp import SegmentStart, GpStandbyStart, CoordinatorStop
 from gppylib.commands import gp
+from gppylib.commands import unix
 from gppylib.commands.pg import PgBaseBackup
 from gppylib.operations.startSegments import MIRROR_MODE_MIRRORLESS
 from gppylib.operations.buildMirrorSegments import get_recovery_progress_pattern
@@ -489,14 +490,6 @@ def impl(context):
     else:
         return
 
-@then( 'verify if the gprecoverseg.lock directory is present in coordinator_data_directory')
-def impl(context):
-    gprecoverseg_lock_file = "%s/gprecoverseg.lock" % gp.get_coordinatordatadir()
-    if not os.path.exists(gprecoverseg_lock_file):
-        raise Exception('gprecoverseg.lock directory does not exist')
-    else:
-        return
-
 
 @then('verify that lines from recovery_progress.file are present in segment progress files in {logdir}')
 def impl(context, logdir):
@@ -671,11 +664,6 @@ def impl(context, process_name, signal_name):
     command = "ps ux | grep bin/{0} | awk '{{print $2}}' | xargs kill -{1}".format(process_name, sig.value)
     run_async_command(context, command)
 
-@when('the user asynchronously sets up to end {process_name} process with SIGHUP')
-def impl(context, process_name):
-    command = "ps ux | grep bin/%s | awk '{print $2}' | xargs kill -9" % (process_name)
-    run_async_command(context, command)
-
 @when('the user asynchronously sets up to end gpcreateseg process when it starts')
 def impl(context):
     # We keep trying to find the gpcreateseg process using ps,grep

diff --git a/gpMgmt/test/behave/mgmt_utils/steps/minirepro_mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/minirepro_mgmt_utils.py
@@ -1,4 +1,4 @@
-import os, mmap
+import os
 import re
 from test.behave_utils.utils import drop_database_if_exists, drop_table_if_exists
 
@@ -41,7 +41,7 @@ def impl(context, output_file):
 @then('the output file "{output_file}" should contain "{str_before}" before "{str_after}"')
 def impl(context, output_file, str_before, str_after):
     with open(output_file, 'r') as output_f:
-        s = mmap.mmap(output_f.fileno(), 0, access=mmap.ACCESS_READ)
+        s = output_f.read()
         pos_before = s.find(str_before)
         pos_after = s.find(str_after)
         if pos_before == -1:
@@ -54,14 +54,14 @@ def impl(context, output_file, str_before, str_after):
 @then('the output file "{output_file}" should contain "{search_str}"')
 def impl(context, output_file, search_str):
     with open(output_file, 'r') as output_f:
-        s = mmap.mmap(output_f.fileno(), 0, access=mmap.ACCESS_READ)
+        s = output_f.read()
         if s.find(search_str) == -1:
             raise Exception('%s not found.' % search_str)
 
 @then('the output file "{output_file}" should not contain "{search_str}"')
 def impl(context, output_file, search_str):
     with open(output_file, 'r') as output_f:
-        s = mmap.mmap(output_f.fileno(), 0, access=mmap.ACCESS_READ)
+        s = output_f.read()
         if s.find(search_str) != -1:
             raise Exception('%s should not exist.' % search_str)
 

diff --git a/gpMgmt/test/behave/mgmt_utils/steps/recoverseg_mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/recoverseg_mgmt_utils.py
@@ -4,15 +4,17 @@
 from time import sleep
 
 from contextlib import closing
+from gppylib import gplog
 from gppylib.commands.base import Command, ExecutionError, REMOTE, WorkerPool
+from gppylib.commands.gp import RECOVERY_REWIND_APPNAME
 from gppylib.db import dbconn
 from gppylib.gparray import GpArray, ROLE_PRIMARY, ROLE_MIRROR
-from gppylib.programs.clsRecoverSegment_triples import get_segments_with_running_basebackup, is_pg_rewind_running
-from gppylib.operations.get_segments_in_recovery import is_seg_in_backup_mode
 from test.behave_utils.utils import *
 import platform, shutil
 from behave import given, when, then
 
+logger = gplog.get_default_logger()
+
 #TODO remove duplication of these functions
 def _get_gpAdminLogs_directory():
     return "%s/gpAdminLogs" % os.path.expanduser("~")
@@ -23,6 +25,76 @@ def lines_matching_both(in_str, str_1, str_2):
     return [line for line in lines if line.count(str_1) and line.count(str_2)]
 
 
+def get_segments_with_running_basebackup():
+    """
+    Returns a set of content ids whose source segments currently have
+    a running pg_basebackup.
+    """
+    sql = "select gp_segment_id from gp_stat_replication where application_name = 'pg_basebackup'"
+
+    try:
+        with closing(dbconn.connect(dbconn.DbURL())) as conn:
+            rows = dbconn.query(conn, sql).fetchall()
+    except Exception as e:
+        raise Exception("Failed to query gp_stat_replication: %s" % str(e))
+
+    segments_with_running_basebackup = {row[0] for row in rows}
+
+    if len(segments_with_running_basebackup) == 0:
+        logger.debug("No basebackup running")
+
+    return segments_with_running_basebackup
+
+
+def is_pg_rewind_running(hostname, port):
+    """
+    Returns true if a pg_rewind process is running for the given segment.
+    """
+    sql = "SELECT count(*) FROM pg_stat_activity WHERE application_name = '{}'".format(
+        RECOVERY_REWIND_APPNAME
+    )
+
+    try:
+        url = dbconn.DbURL(hostname=hostname, port=port, dbname='template1')
+        with closing(dbconn.connect(url, utility=True)) as conn:
+            return dbconn.querySingleton(conn, sql) > 0
+    except Exception as e:
+        raise Exception(
+            "Failed to query pg_stat_activity for segment hostname: {}, port: {}, error: {}".format(
+                hostname, str(port), str(e)
+            )
+        )
+
+
+def is_seg_in_backup_mode(hostname, port):
+    """
+    Returns true if the source segment is already in backup mode.
+
+    Differential recovery uses pg_start_backup() on the source segment, so
+    a source that is already in backup mode indicates differential recovery
+    may already be in progress.
+    """
+    logger.debug(
+        "Checking if backup is already in progress for the source server with host {} and port {}".format(
+            hostname, port
+        )
+    )
+
+    sql = "SELECT pg_is_in_backup()"
+    try:
+        url = dbconn.DbURL(hostname=hostname, port=port, dbname='template1')
+        with closing(dbconn.connect(url, utility=True)) as conn:
+            res = dbconn.querySingleton(conn, sql)
+    except Exception as e:
+        raise Exception(
+            "Failed to query pg_is_in_backup() for segment with hostname {}, port {}, error: {}".format(
+                hostname, str(port), str(e)
+            )
+        )
+
+    return res
+
+
 @given('the information of contents {contents} is saved')
 @when('the information of contents {contents} is saved')
 @then('the information of contents {contents} is saved')