diff --git a/docs/deployment/build-and-install.md b/docs/build/build-and-install.md similarity index 100% rename from docs/deployment/build-and-install.md rename to docs/build/build-and-install.md diff --git a/docs/deployment/build-based-on-docker.md b/docs/build/build-based-on-docker.md similarity index 100% rename from docs/deployment/build-based-on-docker.md rename to docs/build/build-based-on-docker.md diff --git a/docs/deployment/configure.md b/docs/build/configure.md similarity index 100% rename from docs/deployment/configure.md rename to docs/build/configure.md diff --git a/docs/deployment/create-gpadmin-user.md b/docs/build/create-gpadmin-user.md similarity index 100% rename from docs/deployment/create-gpadmin-user.md rename to docs/build/create-gpadmin-user.md diff --git a/docs/deployment/download-source-code.md b/docs/build/download-source-code.md similarity index 100% rename from docs/deployment/download-source-code.md rename to docs/build/download-source-code.md diff --git a/docs/build/index.md b/docs/build/index.md new file mode 100644 index 00000000000..f8444617876 --- /dev/null +++ b/docs/build/index.md @@ -0,0 +1,87 @@ +--- +title: "Build Apache Cloudberry from Source: Complete Guide" +--- + +This document is intended for developers interested in exploring and potentially contributing to Apache Cloudberry. The build environment described here is optimized for development and testing purposes only. + +## Target audience + +- Developers interested in contributing to Apache Cloudberry. +- PostgreSQL developers wanting to explore Cloudberry's extensions. +- Database enthusiasts interested in learning about distributed query processing. +- Anyone considering joining the Apache Cloudberry community. + +The build process described here enables development activities such as: + +- Debugging and testing new features. +- Exploring the codebase with development tools. +- Running test suites and validation checks. +- Making and testing code modifications. + +:::tip +If you are new to Apache Cloudberry or PostgreSQL development: + +- Consider building PostgreSQL first to understand the basic workflow +- Join the project's [mailing lists](/community/mailing-lists) to connect with other developers +- Review the project's issue tracker to understand current development priorities +- Be prepared for longer build times and iterative testing as you explore the codebase +::: + +## Process of building Apache Cloudberry + +The build process for Apache Cloudberry (Incubating) closely resembles that of PostgreSQL. If you have previously set up development environments for PostgreSQL, you'll find the steps for Cloudberry very familiar. + +For those new to Cloudberry or PostgreSQL, we recommend starting with a PostgreSQL build first. The PostgreSQL development community has established excellent documentation and tooling to guide you through the process. Familiarizing yourself with PostgreSQL's build process will make transitioning to Cloudberry significantly easier. + +## Prerequisites + +### Provision a Rocky Linux 8+ / Ubuntu 20.04+ Environment + +- Use any platform to create a virtual machine or container: + + - **Cloud providers**: You can use the Rocky Linux 8+ or Ubuntu 20.04+ images provided by the cloud providers, such as AWS, Google Cloud, Microsoft Azure, and more. + - **VirtualBox**: Use the official [Rocky Linux 8+](https://rockylinux.org/download) / [Ubuntu 20.04+](https://ubuntu.com/download) ISO or Vagrant boxes. + - **Docker**: These instructions were validated under Rocky Linux 8+ and Ubuntu 20.04 official base docker images, but should work with any of their based container. + - For example, you can run the following command to start a Rocky Linux 8 container: + + ```bash + docker run -it --shm-size=2gb -h cdw rockylinux/rockylinux:8 + + # Start a Ubuntu 20.04 container: + # docker run -it --shm-size=2gb -h cdw ubuntu:20.04 + ``` + + The hostname `cdw` (Coordinator Data Warehouse) is just an example of how we started the container for testing. + + To ensure test suites run successfully, you may need to increase the container's shared memory using `--shm-size=2gb`. Test failures can occur when the Cloudberry cluster lacks sufficient shared memory resources. + +- Ensure the VM or container has: + - Internet connectivity for package installation. + - SSH or console access for user interaction. + - Sufficient resources (CPU, memory, and storage) for a development environment. + +:::note +Specific steps to provision the environment are not covered in this guide because they vary by platforms. This guide assumes you have successfully created a VM or container and can log in as the default user (for example, `rocky` for Rocky Linux on AWS). +::: + +### System requirements + +Minimum requirements for development environment: + +- CPU: 4 cores recommended (2 cores minimum) + - CPU architecture: x86, x86_64, ARM, MIPS +- RAM: 8GB recommended (4GB minimum) +- Storage: 20GB free space recommended +- Network: Broadband internet connection for package downloads + +## Build Apache Cloudberry from source code + +The following steps guide you through building Apache Cloudberry from source code on Rocky Linux 8+ or Ubuntu 20.04+. The process is similar for both operating systems, with minor differences in package management, dependencies and software versions between these two distributions. + +Just go ahead and follow the steps below to build Apache Cloudberry from source code: + +```mdx-code-block +import DocCardList from '@theme/DocCardList'; + + +``` \ No newline at end of file diff --git a/docs/deployment/install-required-packages.md b/docs/build/install-required-packages.md similarity index 100% rename from docs/deployment/install-required-packages.md rename to docs/build/install-required-packages.md diff --git a/docs/deployment/post-installation.md b/docs/build/post-installation.md similarity index 100% rename from docs/deployment/post-installation.md rename to docs/build/post-installation.md diff --git a/docs/deployment/quick-build.md b/docs/build/quick-build.md similarity index 100% rename from docs/deployment/quick-build.md rename to docs/build/quick-build.md diff --git a/docs/deployment/sandbox.md b/docs/build/sandbox.md similarity index 100% rename from docs/deployment/sandbox.md rename to docs/build/sandbox.md diff --git a/docs/deployment/set-demo-cluster.md b/docs/build/set-demo-cluster.md similarity index 100% rename from docs/deployment/set-demo-cluster.md rename to docs/build/set-demo-cluster.md diff --git a/docs/deployment/system-settings.md b/docs/build/system-settings.md similarity index 100% rename from docs/deployment/system-settings.md rename to docs/build/system-settings.md diff --git a/docs/cbdb-op-deploy-guide.md b/docs/cbdb-op-deploy-guide.md deleted file mode 100644 index 016cf34bcef..00000000000 --- a/docs/cbdb-op-deploy-guide.md +++ /dev/null @@ -1,305 +0,0 @@ ---- -title: Deploy Manually Using RPM Package ---- - -# Deploy Apache Cloudberry Manually Using RPM Package - -This document introduces how to manually deploy Apache Cloudberry on physical/virtual machines using RPM package. Before reading this document, it is recommended to first read the [Software and Hardware Configuration Requirements](/docs/cbdb-op-software-hardware.md) and [Prepare to Deploy Apache Cloudberry](/docs/cbdb-op-prepare-to-deploy.md). - -The deployment method in this document is for production environments. - -The example in this document uses CentOS 7.6 and deploys Apache Cloudberry v1.0.0. The main steps are as follows: - -1. [Prepare node servers](#step-1-prepare-server-nodes). -2. [Install the RPM package](#step-2-install-the-rpm-package). -3. [Configure mutual trust between nodes](#step-3-configure-mutual-trust-between-nodes). -4. [Initialize the database](#step-4-initialize-apache-cloudberry). -5. [Log into the database](#step-5-log-into-apache-cloudberry). - -## Step 1: Prepare server nodes - -Read the [Prepare to Deploy Apache Cloudberry](/docs/cbdb-op-prepare-to-deploy.md) document to prepare the server nodes. - -## Step 2. Install the RPM package - -After the preparation, it is time to install Apache Cloudberry. You need to download the corresponding RPM package from [Apache Cloudberry Releases](https://github.com/apache/cloudberry/releases), and then install the database on each node using the installation package. - -1. Download the RPM package to the home directory of `gpadmin`. - - ```bash - wget -P /home/gpadmin - ``` - -2. Install the RPM package in the `/home/gpadmin` directory. - - When running the following command, you need to replace `` with the actual RPM package path, as the `root` user. During the installation, the directory `/usr/local/cloudberry/` is automatically created. - - ```bash - cd /home/gpadmin - yum install - ``` - -3. Grant the `gpadmin` user the permission to access the `/usr/local/cloudberry/` directory. - - ```bash - chown -R gpadmin:gpadmin /usr/local - chown -R gpadmin:gpadmin /usr/local/cloudberry* - ``` - -## Step 3. Configure mutual trust between nodes - -1. Switch to the `gpadmin` user, and use the `gpadmin` user for subsequent operations. -2. Create a configuration file for node information. - - Create the node configuration file in the `/home/gpadmin/` directory, including the `all_hosts` and `seg_hosts` files, which store the host information of all nodes and data nodes respectively. The example node information is as follows: - - ```bash - [gpadmin@cbdb-coordinator gpadmin]$ cat all_hosts - - cbdb-coordinator - cbdb-standbycoordinator - cbdb-datanode01 - cbdb-datanode02 - cbdb-datanode03 - - [gpadmin@cbdb-coordinator gpadmin]$ cat seg_hosts - - cbdb-datanode01 - cbdb-datanode02 - cbdb-datanode03 - ``` - -3. Configure SSH trust between hosts. - - 1. Run `ssh-keygen` on each host to generate SSH key. For example: - - ```bash - [gpadmin@cbbd-coordinator cloudberry-1.0.0]$ ssh-keygen - - Generating public/private rsa key pair. - Enter file in which to save the key (/usr/local/cloudberry/.ssh/id_rsa): - Enter passphrase (empty for no passphrase): - Enter same passphrase again: - Your identification has been saved in /usr/local/cloudberry/.ssh/id_rsa. - Your public key has been saved in /usr/local/cloudberry/.ssh/id_rsa.pub. - The key fingerprint is: - SHA256:cvcYS87egYCyh/v6UtdqrejVU5qqF7OvpcHg/T9lRrg gpadmin@cbbd-coordinator - The key's randomart image is: - +---[RSA 2048]----+ - | | - | | - | + | - |+ O | - |o ... S | - |. +o= B C | - | o B=00 D | - |.o=o0o.. = | - |O=++*+o+.. | - +----[SHA256]-----+ - ``` - - 2. Run `ssh-copy-id` on each host to configure password-free login. The example is as follows: - - ```bash - ssh-copy-id cbdb-coordinator - ssh-copy-id cbdb-standbycoordinator - ssh-copy-id cbdb-datanode01 - ssh-copy-id cbdb-datanode02 - ssh-copy-id cbdb-datanode03 - ``` - - 3. Verify that SSH between nodes is all connected, that is, the password-free login between servers is successful. The example is as follows: - - ```bash - [gpadmin@cbdb-coordinator ~]$ gpssh -f all_hosts - => pwd - [ cbdb-datanode03] b'/usr/local/cloudberry\r' - [ cbdb-coordinator] b'/usr/local/cloudberry\r' - [ cbdb-datanode02] b'/usr/local/cloudberry\r' - [cbdb-standbycoordinator] b'/usr/local/cloudberry\r' - [ cbdb-datanode01] b'/usr/local/cloudberry\r' - => - ``` - - If you fail to run `gpssh`, you can first run `source /usr/local/cloudberry/cliudberry-env.sh` on the coordinator node. - -## Step 4. Initialize Apache Cloudberry - -Before performing the following operations, run `su - gpadmin` to switch to the `gpadmin` user. - -1. Add a new line of `source` command to the `~/.bashrc` files of all nodes (coordinator/standby coordinator/segment). The example is as follows: - - ```bash - source /usr/local/cloudberry/cloudberry-env.sh - ``` - -2. Run the `source` command to make the newly added content effective: - - ```bash - source ~/.bashrc - ``` - -3. Use the `gpssh` command on the coordinator node to create data directories and mirror directories for segment nodes. In this document, the 2 directories are `/data0/primary/` and `/data0/mirror/`, respectively. The example is as follows: - - ```bash - gpssh -f seg_hosts - mkdir -p /data0/primary/ - mkdir -p /data0/mirror/ - ``` - -4. Create data directory on the coordinator node. In this document, the directory is `/data0/coordinator/`. - - ```bash - mkdir -p /data0/coordinator/ - ``` - -5. Use the `gpssh` command on the coordinator node to create data directory for the standby node. In this document, the directory is `/data0/coordinator/`. - - ```bash - gpssh -h cbdb-standbycoordinator -e 'mkdir -p /data0/coordinator/' - ``` - -6. On the hosts of the coordinator and standby nodes, add a line to the `~/.bashrc` file to declare the path of `COORDINATOR_DATA_DIRECTORY`, which is `{the path step 5}` + `gpseg-1`. For example: - - ```bash - export COORDINATOR_DATA_DIRECTORY=/data0/coordinator/gpseg-1 - ``` - -7. Run the following command on the hosts of the coordinator and standby nodes to make the declaration of `COORDINATOR_DATA_DIRECTORY` in the previous step effective. - - ```bash - source ~/.bashrc - ``` - -8. Configure the `gpinitsystem_config` initialization script: - - 1. On the host where the coordinator node is located, copy the template configuration file to the current directory: - - ```bash - cp $GPHOME/docs/cli_help/gpconfigs/gpinitsystem_config . - ``` - - 2. Modify the `gpinitsystem_config` file as follows: - - - Pay attention to the port, coordinator node, segment node, and mirror node. - - Modify `DATA_DIRECTORY` to the data directory of the segment node, for example, `/data0/primary`. - - Modify `COORDINATOR_HOSTNAME` to the hostname of the coordinator node, for example, `cbdb-coordinator`. - - Modify `COORDINATOR_DIRECTORY` to the data directory of the coordinator node, for example, `/data0/coordinator`. - - Modify `MIRROR_DATA_DIRECTORY` to the data directory of the mirror node, for example, `/data0/mirror`. - - ```bash - [gpadmin@cbdb-coordinator ~]$ cat gpinitsystem_config - # FILE NAME: gpinitsystem_config - - # Configuration file needed by the gpinitsystem - - ######################################## - #### REQUIRED PARAMETERS - ######################################## - - #### Naming convention for utility-generated data directories. - SEG_PREFIX=gpseg - - #### Base number by which primary segment port numbers - #### are calculated. - PORT_BASE=6000 - - #### File system location(s) where primary segment data directories - #### will be created. The number of locations in the list dictate - #### the number of primary segments that will get created per - #### physical host (if multiple addresses for a host are listed in - #### the hostfile, the number of segments will be spread evenly across - #### the specified interface addresses). - declare -a DATA_DIRECTORY=(/data0/primary) - - #### OS-configured hostname or IP address of the coordinator host. - COORDINATOR_HOSTNAME=cbdb-coordinator - - #### File system location where the coordinator data directory - #### will be created. - COORDINATOR_DIRECTORY=/data0/coordinator - - #### Port number for the coordinator instance. - COORDINATOR_PORT=5432 - - #### Shell utility used to connect to remote hosts. - TRUSTED_SHELL=ssh - - #### Default server-side character set encoding. - ENCODING=UNICODE - - ######################################## - #### OPTIONAL MIRROR PARAMETERS - ######################################## - - #### Base number by which mirror segment port numbers - #### are calculated. - MIRROR_PORT_BASE=7000 - - #### File system location(s) where mirror segment data directories - #### will be created. The number of mirror locations must equal the - #### number of primary locations as specified in the - #### DATA_DIRECTORY parameter. - declare -a MIRROR_DATA_DIRECTORY=(/data0/mirror) - ``` - - - To create a default database during initialization, you need to fill in the database name. In this example, the `warehouse` database is created during initialization - - ```conf - ######################################## - #### OTHER OPTIONAL PARAMETERS - ######################################## - - #### Create a database of this name after initialization. - DATABASE_NAME=warehouse - ``` - -9. Use `gpinitsystem` to initialize Apache Cloudberry. For example: - - ```bash - gpinitsystem -c gpinitsystem_config -h /home/gpadmin/seg_hosts - ``` - - In the command above, `-c` specifies the configuration file and `-h` specifies the computing node list. - - If you need to initialize the standby coordinator node, refer to the following command: - - ```bash - gpinitstandby -s cbdb-standbycoordinator - ``` - -## Step 5. Log into Apache Cloudberry - -Now you have successfully deployed Apache Cloudberry. To log into the database, refer to the following command: - -```bash -psql -h -p -U -d -``` - -In the command above: - -- `` is the IP address of the coordinator node of the Apache Cloudberry server. -- `` is the default port number of Apache Cloudberry, which is `5432` by default. -- `` is the user name of the database. -- `` is the name of the database to connect. - -After you run the `psql` command, the system will prompt you to enter the database password. After you enter the correct password, you will successfully log into Apache Cloudberry and can perform SQL queries and operations. Make sure that you have the correct permissions to access the target database. - -```sql -[gpadmin@cddb-coordinator ~]$ psql warehouse -psql (14.4, server 14.4) -Type "help" for help. - -warehouse=# SELECT * FROM gp_segment_configuration; -dbid | content | role | preferred_role | mode | status | port | hostname | address | datadir ------------------------------------------------------------------------------------------- -1 | -1 | p | p | n | u | 5432 | cddb-coordinator | cddb-coordinator | /data0/coordinator/gpseg-1 -8 | -1 | m | m | s | u | 5432 | cddb-standbycoordinator | cddb-standbycoordinator | /data0/coordinator/gpseg-1 -2 | 0 | p | p | s | u | 6000 | cddb-datanode01 | cddb-datanode01 | /data0/primary/gpseg0 -5 | 0 | m | m | s | u | 7000 | cddb-datanode02 | cddb-datanode02 | /data0/mirror/gpseg0 -3 | 1 | p | p | s | u | 6000 | cddb-datanode02 | cddb-datanode02 | /data0/primary/gpseg1 -6 | 1 | m | m | s | u | 7000 | cddb-datanode03 | cddb-datanode03 | /data0/mirror/gpseg1 -4 | 2 | p | p | s | u | 6000 | cddb-datanode03 | cddb-datanode03 | /data0/primary/gpseg2 -7 | 2 | m | m | s | u | 7000 | cddb-datanode01 | cddb-datanode01 | /data0/mirror/gpseg2 -(8 rows) -``` diff --git a/docs/cbdb-op-prepare-to-deploy.md b/docs/cbdb-op-prepare-to-deploy.md deleted file mode 100644 index 58fa2dc9c3d..00000000000 --- a/docs/cbdb-op-prepare-to-deploy.md +++ /dev/null @@ -1,442 +0,0 @@ ---- -title: Prepare to Deploy ---- - -# Prepare to Deploy on Physical or Virtual Machine - -Before deploying Apache Cloudberry on physical or virtual machines, you need to do some preparations. Read this document and [Software and Hardware Configuration Requirements](/docs/cbdb-op-software-hardware.md) before you start to deploy Apache Cloudberry. - -## Plan the deployment architecture - -Plan your deployment architecture based on the [Apache Cloudberry Architecture](/docs/cbdb-architecture.md) and [Software and Hardware Configuration Requirements](/docs/cbdb-op-software-hardware.md), and determine the number of servers needed. Ensure that all servers are within a single security group and have mutual trust configured. - -The deployment plan for the example of this document includes 1 coordinator + 1 standby + 3 segments (primary + mirror), totaling 5 servers. - -## Modify server settings - -Log into each host as the `root` user, and modify the settings of each node server in the order of the following sections. - -### Change hostname - -Use the `hostnamectl set-hostname` command to modify the hostname of each server respectively, following these naming conventions: - -- Only include letters, numbers, and the hyphen `-`. Note: The underscore `_` is not a valid character. -- Case-insensitive, but it is recommended to use all lowercase letters. Using uppercase letters for the hostname might cause Kerberos authentication to fail. -- Each hostname must be globally unique across all hosts. - -Example: - -```bash -hostnamectl set-hostname cbdb-coordinator -hostnamectl set-hostname cbdb-standbycoordinator -hostnamectl set-hostname cbdb-datanode01 -hostnamectl set-hostname cbdb-datanode02 -hostnamectl set-hostname cbdb-datanode03 -``` - -### Add `gpadmin` admin user - -Follow the example below to create a user group and username `gpadmin`. Set the user group and username identifier to `520`. Create and specify the `gpadmin` home directory `/home/gpadmin`. - -```bash -groupadd -g 520 gpadmin # Adds user group gpadmin. -useradd -g 520 -u 520 -m -d /home/gpadmin/ -s /bin/bash gpadmin # Adds username gpadmin and creates the home directory of gpadmin. -passwd gpadmin # Sets a password for gpadmin; after executing, follow the prompts to input the password. -``` - -### Disable SELinux and firewall software - -Run `systemctl status firewalld` to view the firewall status. If the firewall is on, you need to turn it off by setting the `SELINUX` parameter to `disabled` in the `/etc/selinux/config` file. - -```bash -SELINUX=disabled -``` - -You can also disable the firewall using the following commands: - -```bash -systemctl stop firewalld.service -systemctl disable firewalld.service -``` - -### Modify network mapping - -Check the `/etc/hosts` file to make sure that it contains mappings of all host aliases to their network IP addresses. Examples are as follows: - -``` -192.168.1.101 cbdb-coordinator -192.168.1.102 cbdb-standbycoordinator -192.168.1.103 cbdb-datanode01 -192.168.1.104 cbdb-datanode02 -192.168.1.105 cbdb-datanode03 -``` - -### Set system parameters - -Add relevant system parameters in the `/etc/sysctl.conf` configuration file, and run the `sysctl -p` command to make the configuration file effective. - -When setting the configuration parameters, you can take the following example as a reference and set them according to your needs. Details of some of these parameters and recommended settings are provided below. - -```conf -# kernel.shmall = _PHYS_PAGES / 2 -kernel.shmall = 197951838 -# kernel.shmmax = kernel.shmall * PAGE_SIZE -kernel.shmmax = 810810728448 -kernel.shmmni = 4096 -vm.overcommit_memory = 2 -vm.overcommit_ratio = 95 -net.ipv4.ip_local_port_range = 10000 65535 -kernel.sem = 250 2048000 200 8192 -kernel.sysrq = 1 -kernel.core_uses_pid = 1 -kernel.msgmnb = 65536 -kernel.msgmax = 65536 -kernel.msgmni = 2048 -net.ipv4.tcp_syncookies = 1 -net.ipv4.conf.default.accept_source_route = 0 -net.ipv4.tcp_max_syn_backlog = 4096 -net.ipv4.conf.all.arp_filter = 1 -net.ipv4.ipfrag_high_thresh = 41943040 -net.ipv4.ipfrag_low_thresh = 31457280 -net.ipv4.ipfrag_time = 60 -net.core.netdev_max_backlog = 10000 -net.core.rmem_max = 2097152 -net.core.wmem_max = 2097152 -vm.swappiness = 10 -vm.zone_reclaim_mode = 0 -vm.dirty_expire_centisecs = 500 -vm.dirty_writeback_centisecs = 100 -vm.dirty_background_ratio = 0 -vm.dirty_ratio = 0 -vm.dirty_background_bytes = 1610612736 -vm.dirty_bytes = 4294967296 -``` - -#### Shared memory - -In the `/etc/sysctl.conf` configuration file, `kernel.shmall` represents the total amount of available shared memory, in pages. `kernel.shmmax` represents the maximum size of a single shared memory segment, in bytes. - -You can define these 2 values ​​using the operating system's `_PHYS_PAGES` and `PAGE_SIZE` parameters: - -```conf -kernel.shmall = ( _PHYS_PAGES / 2) -kernel.shmmax = ( _PHYS_PAGES / 2) * PAGE_SIZE -``` - -To get the values ​​of these 2 operating system parameters, you can use `getconf`, for example: - -```bash -$ echo $(expr $(getconf _PHYS_PAGES) / 2) -$ echo $(expr $(getconf _PHYS_PAGES) / 2 \* $(getconf PAGE_SIZE)) -``` - -#### Segment memory - -In the `/etc/sysctl.conf` configuration file, - -- `vm.overcommit_memory` indicates the overcommit handling modes for memory. Available options are: - - - `0`: Heuristic overcommit handling - - `1`: Always overcommit - - `2`: Don't overcommit - - Set the value of this parameter to `2` to refuse overcommit. - -- `vm.overcommit_ratio` is a kernel parameter and is the percentage of RAM occupied by the application process. The default value on CentOS is `50`. `vm.overcommit_ratio` is calculated as follows: - - ``` - vm.overcommit_ratio = (RAM - 0.026 * gp_vmem) / RAM - ``` - - The calculation method of `gp_vmem` is as follows: - - ``` - # If the system memory is less than 256 GB, use the following formula to calculate: - gp_vmem = ((SWAP + RAM) – (7.5GB + 0.05 * RAM)) / 1.7 - - # If the system memory is greater than or equal to 256 GB, use the following formula to calculate: - gp_vmem = ((SWAP + RAM) – (7.5GB + 0.05 * RAM)) / 1.17 - - # In the above formulas, SWAP is the swap space on the host, in GB. - # RAM is the size of the memory installed on the host, in GB. - ``` - -#### Port - -In the `/etc/sysctl.conf` configuration file, `net.ipv4.ip_local_port_range` is used to specify the port range. To avoid port conflicts between Apache Cloudberry and other applications, you need to specify the port range via operating system parameters. When you later set Apache Cloudberry initialization parameters, avoid setting Apache Cloudberry related ports in this range. - -For example, for `net.ipv4.ip_local_port_range = 10000 65535`, you need to avoid setting the Apache Cloudberry related ports in the interval `[10000,65535]`. You can set them to `6000` and `7000`: - -``` -PORT_BASE = 6000 -MIRROR_PORT_BASE = 7000 -``` - -#### IP segmentation - -When the Apache Cloudberry uses the UDP protocol for internal connection, the network card controls the fragmentation and reassembly of IP packets. If the size of a UDP message is larger than the maximum size of network transmission unit (MTU), the IP layer fragments the message. - -- `net.ipv4.ipfrag_high_thresh`: When the total size of IP fragments exceeds this threshold, the kernel will attempt to reorganize IP fragments. If the fragments exceed this threshold but all fragments have not arrived within the specified time, the kernel will not reorganize the fragments. This threshold is typically used to control whether larger shards are reorganized. The default value is `4194304` bytes (4 MB). -- `net.ipv4.ipfrag_low_thresh`: Indicates that when the total size of IP fragments is below this threshold, the kernel will wait as long as possible for more fragments to arrive, to allow for larger reorganizations. This threshold is used to minimize unfinished reorganization operations and improve system performance. The default value is `3145728` bytes (3 MB). -- `net.ipv4.ipfrag_time` is a kernel parameter that controls the IP fragment reassembly timeout. The default value is `30`. - -It is recommended to set the above parameters to the following values: - -```conf -net.ipv4.ipfrag_high_thresh = 41943040 -net.ipv4.ipfrag_low_thresh = 31457280 -net.ipv4.ipfrag_time = 60 -``` - -#### System memory - -- If the server memory exceeds 64 GB, it is recommended to set the following parameters in the `/etc/sysctl.conf` configuration file: - - ```conf - vm.dirty_background_ratio = 0 - vm.dirty_ratio = 0 - vm.dirty_background_bytes = 1610612736 # 1.5GB - vm.dirty_bytes = 4294967296 # 4GB - ``` - -- If the server memory is less than 64 GB, do not set `vm.dirty_background_bytes` and `vm.dirty_bytes`. It is recommended to set the following parameters in the `/etc/sysctl.conf` configuration file: - - ```conf - vm.dirty_background_ratio = 3 - vm.dirty_ratio = 10 - ``` - -- To deal with emergencies when the system encounters memory pressure, it is recommended to add the `vm.min_free_kbytes` parameter in the `/etc/sysctl.conf` configuration file to specify the amount of available memory reserved by the system. It is recommended to set `vm.min_free_kbytes` to 3% of the system's physical memory. The command is as follows: - - ```bash - awk 'BEGIN {OFMT = "%.0f";} /MemTotal/ {print "vm.min_free_kbytes =", $2 * .03;}' /proc/meminfo >> /etc/sysctl.conf - ``` - - It is not recommended that the setting of `vm.min_free_kbytes` exceed 5% of the system's physical memory. - -#### Resource limit - -Edit the `/etc/security/limits.conf` file and add the following content, which limits the usage of software and hardware resources. - -``` -*soft nofile 524288 -*hard nofile 524288 -*soft nproc 131072 -*hard nproc 131072 -``` - -#### CORE DUMP - -1. Add the following parameter to the `/etc/sysctl.conf` configuration file: - - ```conf - kernel.core_pattern=/var/core/core.%h.%t - ``` - -2. Run the following command to make the configuration effective: - - ```bash - sysctl -p - ``` - -3. Add the following parameter to `/etc/security/limits.conf`: - - ``` - * soft core unlimited - ``` - -#### Set mount options for the XFS file system - -XFS is the file system for the data directory of Apache Cloudberry. XFS has the following mount options: - -``` -rw,nodev,noatime,inode64 -``` - -You can set up XFS file mounting in the `/etc/fstab` file. See the following commands. You need to choose the file path according to the actual situation: - -```bash -mkdir -p /data0/ -mkfs.xfs -f /dev/vdc -echo "/dev/vdc /data0 xfs rw,nodev,noatime,nobarrier,inode64 0 0" >> /etc/fstab -mount /data0 -chown -R gpadmin:gpadmin /data0/ -``` - -Run the following command to check whether the mounting is successful: - -```bash -df -h -``` - -#### Blockdev value - -The blockdev value for each disk file should be `16384`. To verify the blockdev value of a disk device, use the following command: - -```bash -sudo /sbin/blockdev --getra -``` - -For example, to verify the blockdev value of the example server disk: - -```bash -sudo /sbin/blockdev --getra /dev/vdc -``` - -To modify the blockdev value of a device file, use the following command: - -```bash -sudo /sbin/blockdev --setra -``` - -For example, to modify the file blockdev value of the hard disk of the example server: - -```bash -sudo /sbin/blockdev --setra 16384 /dev/vdc -``` - -#### I/O scheduling policy settings for disks - -The disk type, operating system and scheduling policies of Apache Cloudberry are as follows: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Storage device typeOSRecommended scheduling policy
NVMeRHEL 7none
RHEL 8none
Ubuntunone
SSDRHEL 7noop
RHEL 8none
Ubuntunone
OtherRHEL 7deadline
RHEL 8mq-deadline
Ubuntumq-deadline
- -Refer to the following command to modify the scheduling policy. Note that this command is only a temporary modification, and the modification becomes invalid after the server is restarted. - -```bash -echo schedulername > /sys/block//queue/scheduler -``` - -For example, temporarily modify the disk I/O scheduling policy of the example server: - -```bash -echo deadline > /sys/block/vdc/queue/scheduler -``` - -To permanently modify the scheduling policy, use the system utility `grubby`. After using `grubby`, the modification takes effect immediately after you restart the server. The sample command is as follows: - -```bash -grubby --update-kernel=ALL --args="elevator=deadline" -``` - -To view the kernel parameter settings, use the following command: - -```bash -grubby --info=ALL -``` - -#### Disable Transparent Huge Pages (THP) - -You need to disable Transparent Huge Pages (THP), because it reduces database performance. The command is as follows: - -```bash -grubby --update-kernel=ALL --args="transparent_hugepage=never" -``` - -Check the status of THP: - -```bash -cat /sys/kernel/mm/*transparent_hugepage/enabled -``` - -#### Disable IPC object deletion - -Disable IPC object deletion by setting the value of `RemoveIPC` to `no`. You can set this parameter in the `/etc/systemd/logind.conf` file of Apache Cloudberry. - -``` -RemoveIPC=no -``` - -After disabling it, run the following command to restart the server to make the disabling setting effective: - -```bash -service systemd-logind restart -``` - -#### SSH connection threshold - -To set the SSH connection threshold, you need to modify the `MaxStartups` and `MaxSessions` parameters in the `/etc/ssh/sshd_config` configuration file. Both of the following writing methods are acceptable. - -``` -MaxStartups 200 -MaxSessions 200 -``` - -``` -MaxStartups 10:30:200 -MaxSessions 200 -``` - -Run the following command to restart the server to make the setting take effect: - -```bash -service sshd restart -``` - -#### Clock synchronization - -Apache Cloudberry requires the clock synchronization to be configured for all hosts, and the clock synchronization service should be started when the host starts. You can choose one of the following synchronization methods: - -- Use the coordinator node's time as the source, and other hosts synchronize the clock of the coordinator node host. -- Synchronize clocks using an external clock source. - -The example in this document uses an external clock source for synchronization, that is, adding the following configuration to the `/etc/chrony.conf` configuration file: - -```conf -# Use public servers from the pool.ntp.org project. -# Please consider joining the pool (http://www.pool.ntp.org/join.html). -server 0.centos.pool.ntp.org iburst -``` - -After setting, you can run the following command to check the clock synchronization status: - -```bash -systemctl status chronyd -``` diff --git a/docs/cbdb-op-software-hardware.md b/docs/cbdb-op-software-hardware.md deleted file mode 100644 index a920da2f3f7..00000000000 --- a/docs/cbdb-op-software-hardware.md +++ /dev/null @@ -1,124 +0,0 @@ ---- -title: Software and Hardware Configuration ---- - -# Software and Hardware Configuration - -This document introduces the software and hardware configuration required for Apache Cloudberry. - -## Hardware requirements - -### Supported deployment environments - -Apache Cloudberry supports deployment on both physical machines and virtual machines. Below are the recommended configurations for the environments. - -#### For development or test environments - -| Component | CPU | Memory | Disk type | Network | Number of instances | -| ------- | ---- | ---- | -------- | -------------------- | -------- | -| Coordinator | 4 cores | 8 GB | SSD | 10 Gbps NIC (2 preferred) | 1+ | -| Segment | 4 cores | 8 GB | SSD | 10 Gbps NIC (2 preferred) | 1+ | - -#### For production environments - -| Component | CPU | Memory | Disk type | Network | Instance count | -| ------- | ------ | ------ | -------- | -------------------- | -------- | -| Coordinator | 16+ cores | 32+ GB | SSD | 10 Gbps NIC (2 preferred) | 2+ | -| Segment | 8+ cores | 32+ GB | SSD | 10 Gbps NIC (2 preferred) | 2+ | - -Apache Cloudberry can also be deployed on public cloud platforms such as AWS, Azure, and GCP. The hardware requirements for cloud-based deployments might vary based on the instance types selected on these platforms. Refer to the specific cloud provider’s documentation for instance configurations that meet or exceed the recommended hardware specifications. - -### Storage - -- To prevent a high data disk load from affecting the operating system's normal I/O response, mount the operating system and the data disk on separate disks. -- If the host configuration allows, it is recommended to use 2 independent SAS disks as the system disk (RAID1), and another 10 SAS disks as the data disk (RAID5). -- It is recommended to use LVM logical volumes to manage disks for more flexible disk configuration. - -For the system disk: The system disk should use an independent disk to avoid impact on the operating system when data disks are heavily loaded. It is recommended that the system disk be configured in dual-disk RAID 1 and the operating system of the system disk be XFS. - -For data disks: It is recommended to use LVM to manage data disks. According to test statistics, creating an independent logical volume for each physical volume can achieve the best disk performance. For example: - -```bash -pvcreate /dev/vdb -pvcreate /dev/vdc -pvcreate /dev/vdd -vgcreate data /dev/vdb /dev/vdc /dev/vdd -lvcreate --extents 100%pvs -n data0 data /dev/vdb -lvcreate --extents 100%pvs -n data1 data /dev/vdc -lvcreate --extents 100%pvs -n data2 data /dev/vdd -``` - -The names of mount points must be consecutive, and the mount points of data disks should be `/data0`, `/data1`, ..., `/dataN`. Data disks should use the XFS file format. For example: - -```bash -mkdir -p /data0 /data1 /data2 -mkfs.xfs /dev/data/data0 -mkfs.xfs /dev/data/data1 -mkfs.xfs /dev/data/data2 -mount /dev/data/data0 /data0/ -mount /dev/data/data1 /data1/ -mount /dev/data/data2 /data2/ -``` - -## Data exchange network - -- **Network card configuration** - - The data exchange network is used for transmitting business data, which has high requirements on network performance and throughput. In a production environment, two 10 Gbps NICs are generally required, and they will be used after bonding. The recommended bond 4 parameter are as follows: - - ```bash - BONDING_OPTS='mode=4 miimon=100 xmit_hash_policy=layer3+4' - ``` - -- **Connectivity requirements** - - - Connect the management console and the database host in the data exchange network. If there is a firewall device between the management console and the database host, ensure that the TCP idle connection can be kept for more than 12 hours. - - Connect database hosts and management console hosts in the data exchange network, and do not limit the TCP idle connection time. - - Connect database clients and application programs that access the database with the database coordinator node in the data exchange network. - - Ensure that the TCP idle connection can be kept for more than 12 hours. - -- **Default gateway** - - If the host is configured with a management network, the network card (bond0) of the data exchange network should be used as the default gateway device; otherwise, it might cause abnormal traffic monitoring of the host network, deployment failure, and performance problems. The following is an example of viewing the default gateway. - - ```bash - netstat -rn | grep ^0.0.0.0 - ``` - -- **Switch** - - - Make sure that the egress bandwidth of the data network switch from layer 1 to layer 2 is no lower than the maximum disk I/O throughput capacity of a single cabinet (calculated with a single RAID card of 500 MBps). - - A switch convergence ratio of 4:1 is recommended. When the convergence ratio reaches 6:1, most links will be saturated. Significant packet loss occurs when the convergence ratio reaches 8:1. - -## Software requirements - -### Supported OS - -Apache Cloudberry supports the following operating systems: - -- Kylin V10 SP1 or SP2 -- NeoKylin V7update6 -- RHEL/CentOS 7.6+ -- openEuler 20.3 LTS SP2 - -### OS configurations - -- SSH configuration - - The recommended configuration for the SSH server side (`/etc/ssh/sshd_config`) is as follows. After the configuration is complete, run `systemctl restart sshd.service` to make it effective. - - | Parameter | Value | Description | - | ---------------------- | ---- | ---------------- | - | Port | 22 | Listening port. | - | PasswordAuthentication | yes | Allows password login, which can be changed after cluster initialization. | - | PermitEmptyPass words | no | Empty password is not allowed for login. | - | UseDNS | no | DNS is not used. | - -### SSH password-free login - -Configure SSH password-free login for all nodes. For example: - -```bash -ssh-keygen -t rsa -ssh-copy-id root@192.168.66.154 -``` diff --git a/docs/connect-to-cbdb.md b/docs/database-basic/connect-to-cbdb.md similarity index 100% rename from docs/connect-to-cbdb.md rename to docs/database-basic/connect-to-cbdb.md diff --git a/docs/operate-with-data/operate-with-db-objects/create-and-manage-database.md b/docs/database-basic/create-and-manage-database.md similarity index 100% rename from docs/operate-with-data/operate-with-db-objects/create-and-manage-database.md rename to docs/database-basic/create-and-manage-database.md diff --git a/docs/start-and-stop-cbdb-database.md b/docs/database-basic/start-and-stop-cbdb-database.md similarity index 100% rename from docs/start-and-stop-cbdb-database.md rename to docs/database-basic/start-and-stop-cbdb-database.md diff --git a/docs/deployment/ansible-example.md b/docs/deployment/ansible-example.md new file mode 100644 index 00000000000..2a817ba6572 --- /dev/null +++ b/docs/deployment/ansible-example.md @@ -0,0 +1,111 @@ +--- +title: Example Ansible Playbook +--- + +# Example Ansible Playbook + +A sample Ansible playbook to install an Apache Cloudberry software release onto the hosts that will comprise an Apache Cloudberry system. + +This Ansible playbook shows how tasks described in [Installing the Apache Cloudberry Software](./install_cloudberry.md) might be automated using [Ansible](https://docs.ansible.com). + +:::important +This playbook is provided as an *example only* to illustrate how Apache Cloudberry cluster configuration and software installation tasks can be automated using provisioning tools such as Ansible, Chef, or Puppet. The Apache Cloudberry community does not provide support for Ansible or for the playbook presented in this example. +::: + +The example playbook is designed for use with RHEL/Rocky Linux. It creates the `gpadmin` user, installs the Apache Cloudberry software release, sets the owner and group of the installed software to `gpadmin`, and sets the PAM security limits for the `gpadmin` user. + +You can revise the script to work with your operating system platform and to perform additional host configuration tasks. + +Following are steps to use this Ansible playbook. + +1. Install Ansible on the control node using your package manager. See the [Ansible documentation](https://docs.ansible.com) for help with installation. + +2. Set up passwordless SSH from the control node to all hosts that will be a part of the Apache Cloudberry cluster. You can use the `ssh-copy-id` command to install your public SSH key on each host in the cluster. Alternatively, your provisioning software may provide more convenient ways to securely install public keys on multiple hosts. + +3. Create an Ansible inventory by creating a file called `hosts` with a list of the hosts that will comprise your Apache Cloudberry cluster. For example: + + ``` + cdw + scdw + sdw1 + sdw2 + sdw3 + ``` + + This file can be edited and used with the Apache Cloudberry `gpssh-exkeys` and `gpinitsystem` utilities later on. + +4. Copy the playbook code below to a file `ansible-playbook.yml` on your Ansible control node. + +5. Edit the playbook variables at the top of the playbook, such as the `gpadmin` administrative user and password to create, and the version of Apache Cloudberry you are installing. + +6. Run the playbook, passing the package to be installed to the `package_path` parameter. + + ```bash + ansible-playbook ansible-playbook.yml -i hosts -e package_path=./apache-cloudberry-db-incubating-2.1.0.el8.x86_64.rpm + ``` + +## Ansible Playbook - Apache Cloudberry Installation for RHEL/Rocky Linux + +```yaml +--- + +- hosts: all + vars: + - version: "2.1.0" + - cloudberry_admin_user: "gpadmin" + - cloudberry_admin_password: "changeme" + # - package_path: passed via the command line with: -e package_path=./apache-cloudberry-db-incubating-2.1.0.el8.x86_64.rpm + remote_user: root + become: yes + become_method: sudo + connection: ssh + gather_facts: yes + tasks: + - name: create cloudberry admin user + user: + name: "{{ cloudberry_admin_user }}" + password: "{{ cloudberry_admin_password | password_hash('sha512', 'DvkPtCtNH+UdbePZfm9muQ9pU') }}" + - name: copy package to host + copy: + src: "{{ package_path }}" + dest: /tmp + - name: install package + yum: + name: "/tmp/{{ package_path | basename }}" + state: present + - name: cleanup package file from host + file: + path: "/tmp/{{ package_path | basename }}" + state: absent + - name: find install directory + find: + paths: /usr/local + patterns: 'cloudberry*' + file_type: directory + register: installed_dir + - name: change install directory ownership + file: + path: '{{ item.path }}' + owner: "{{ cloudberry_admin_user }}" + group: "{{ cloudberry_admin_user }}" + recurse: yes + with_items: "{{ installed_dir.files }}" + - name: update pam_limits + pam_limits: + domain: "{{ cloudberry_admin_user }}" + limit_type: '-' + limit_item: "{{ item.key }}" + value: "{{ item.value }}" + with_dict: + nofile: 524288 + nproc: 131072 + - name: find installed cloudberry version + shell: . /usr/local/cloudberry-db/cloudberry-env.sh && /usr/local/cloudberry-db/bin/postgres --gp-version + register: postgres_gp_version + - name: fail if the correct cloudberry version is not installed + fail: + msg: "Expected cloudberry version {{ version }}, but found '{{ postgres_gp_version.stdout }}'" + when: "version is not defined or version not in postgres_gp_version.stdout" +``` + +When the playbook has run successfully, you can proceed with [Creating the Data Storage Areas](./create_data_dirs.md) and [Initializing an Apache Cloudberry System](./init_cloudberry.md). diff --git a/docs/deployment/capacity_planning.md b/docs/deployment/capacity_planning.md new file mode 100644 index 00000000000..46b818e8a05 --- /dev/null +++ b/docs/deployment/capacity_planning.md @@ -0,0 +1,68 @@ +--- +title: Estimating storage capacity +--- + +To estimate how much data your Apache Cloudberry system can accommodate, use these measurements as guidelines. Also keep in mind that you may want to have extra space for landing backup files and data load files on each segment host. + +## Calculating usable disk capacity + +Start with the raw capacity of the physical disks on a segment host that are available for data storage: + +To calculate how much data a Apache Cloudberry system can hold, you have to calculate the usable disk capacity per segment host and then multiply that by the number of segment hosts in your Apache Cloudberry array. Start with the raw capacity of the physical disks on a segment host that are available for data storage \(raw\_capacity\), which is: + +``` +disk_size * number_of_disks = raw_capacity +``` + +Account for file system formatting overhead (roughly 10 percent) and the RAID level you are using. For example, if using RAID-10, the calculation would be: + +``` +(raw_capacity * 0.9) / 2 = formatted_disk_space +``` + +For optimal performance, do not completely fill your disks to capacity, but run at 70% or lower. So with this in mind, calculate the usable disk space as follows:: + +``` +formatted_disk_space * 0.7 = usable_disk_space +``` + +Using only 70% of your disk space allows Apache Cloudberry to use the other 30% for temporary and transaction files on the same disks. If your host systems have a separate disk system that can be used for temporary and transaction files, you can specify a tablespace that Apache Cloudberry uses for the files. Moving the location of the files might improve performance depending on the performance of the disk system. + +Once you have formatted RAID disk arrays and accounted for the maximum recommended capacity \(usable\_disk\_space\), you will need to calculate how much storage is actually available for user data (`U`). If using Apache Cloudberry mirrors for data redundancy, this would then double the size of your user data (`2 * U`). Apache Cloudberry also requires some space be reserved as a working area for active queries. The work space should be approximately one third the size of your user data (work space = `U/3`): + +``` +With mirrors: (2 * U) + U/3 = usable_disk_space +Without mirrors: U + U/3 = usable_disk_space +``` + +Guidelines for temporary file space and user data space assume a typical analytic workload. Highly concurrent workloads or workloads with queries that require very large amounts of temporary space can benefit from reserving a larger working area. Typically, overall system throughput can be increased while decreasing work area usage through proper workload management. Additionally, temporary space and user space can be isolated from each other by specifying that they reside on different tablespaces. + +## Calculating User Data Size + +As with all databases, the size of your raw data will be slightly larger once it is loaded into the database. On average, raw data will be about 1.4 times larger on disk after it is loaded into the database, but could be smaller or larger depending on the data types you are using, table storage type, in-database compression, and so on. + +- Page Overhead - When your data is loaded into Apache Cloudberry, it is divided into pages of 32KB each. Each page has 20 bytes of page overhead. +- Row Overhead - In a regular 'heap' storage table, each row of data has 24 bytes of row overhead. An 'append-optimized' storage table has only 4 bytes of row overhead. +- Attribute Overhead - For the data values itself, the size associated with each attribute value is dependent upon the data type chosen. As a general rule, you want to use the smallest data type possible to store your data \(assuming you know the possible values a column will have\). +- Indexes - In Apache Cloudberry, indexes are distributed across the segment hosts as is table data. The default index type in Apache Cloudberry is B-tree. Because index size depends on the number of unique values in the index and the data to be inserted, precalculating the exact size of an index is impossible. However, you can roughly estimate the size of an index using these formulas. + + ``` + B-tree: unique_values * (data_type_size + 24 bytes) + + Bitmap: (unique_values * =number_of_rows * 1 bit * compression_ratio / 8) + (unique_values * 32) + ``` + +## Calculating Space Requirements for Metadata and Logs + +On each segment host, you will also want to account for space for Apache Cloudberry log files and metadata: + +- **System Metadata** — For each Apache Cloudberry segment instance \(primary or mirror\) or coordinator instance running on a host, estimate approximately 20 MB for the system catalogs and metadata. +- **Write Ahead Log** — For each Apache Cloudberry segment \(primary or mirror\) or coordinator instance running on a host, allocate space for the write ahead log \(WAL\). The WAL is divided into segment files of 64 MB each. At most, the maximum number of WAL files will be: + + ``` + max_wal_size / 64MB + ``` + + You can use this to estimate space requirements for WAL. The default checkpoint\_segments setting for a Apache Cloudberry instance is 8, meaning 1088 MB WAL space allocated for each segment or coordinator instance on a host. + +- **Apache Cloudberry Log Files** — Each segment instance and the coordinator instance generates database log files, which will grow over time. Sufficient space should be allocated for these log files, and some type of log rotation facility should be used to ensure that to log files do not grow too large. \ No newline at end of file diff --git a/docs/deployment/create_data_dirs.md b/docs/deployment/create_data_dirs.md new file mode 100644 index 00000000000..06a9d3291a1 --- /dev/null +++ b/docs/deployment/create_data_dirs.md @@ -0,0 +1,60 @@ +--- +title: Creating the Data Storage Areas +--- + +## Creating Data Storage Areas on the Coordinator and Standby Coordinator Hosts + +A data storage area is required on the Apache Cloudberry coordinator and standby coordinator hosts to store Apache Cloudberry system data such as catalog data and other system metadata. + +### To create the data directory location on the coordinator + +The data directory location on the coordinator is different than those on the segments. The coordinator does not store any user data, only the system catalog tables and system metadata are stored on the coordinator instance, therefore you do not need to designate as much storage space as on the segments. + +1. Create or choose a directory that will serve as your coordinator data storage area. This directory should have sufficient disk space for your data and be owned by the `gpadmin` user and group. For example, run the following commands as `root`: + + ``` + # mkdir -p /data/coordinator + ``` + +2. Change ownership of this directory to the `gpadmin` user. For example: + + ``` + # chown gpadmin:gpadmin /data/coordinator + ``` + +3. Using [gpssh](/sys-utilities/gpssh.md), create the coordinator data directory location on your standby coordinator as well. For example: + + ``` + # source /usr/local/cloudberry-db/cloudberry-env.sh + # gpssh -h scdw -e 'sudo mkdir -p /data/coordinator' + # gpssh -h scdw -e 'sudo chown gpadmin:gpadmin /data/coordinator' + ``` + +## Creating Data Storage Areas on Segment Hosts + +Data storage areas are required on the Apache Cloudberry segment hosts for primary segments. Separate storage areas are required for mirror segments. + +### To create the data directory locations on all segment hosts + +1. On the coordinator host, log in as `root`: + + ``` + # su + ``` + +2. Create a file called `hostfile_gpssh_segonly`. This file should have only one machine configured host name for each segment host. For example, if you have three segment hosts: + + ``` + sdw1 + sdw2 + sdw3 + ``` + +3. Using [gpssh](/sys-utilities/gpssh.md), create the primary and mirror data directory locations on all segment hosts at once using the `hostfile_gpssh_segonly` file you just created. For example: + + ``` + # source /usr/local/cloudberry-db/cloudberry-env.sh + # gpssh -f hostfile_gpssh_segonly -e 'sudo mkdir -p /data/primary' + # gpssh -f hostfile_gpssh_segonly -e 'sudo mkdir -p /data/mirror' + # gpssh -f hostfile_gpssh_segonly -e 'sudo chown -R gpadmin /data/*' + ``` diff --git a/docs/deployment/index.md b/docs/deployment/index.md index f8444617876..d6902e5adad 100644 --- a/docs/deployment/index.md +++ b/docs/deployment/index.md @@ -1,87 +1,17 @@ --- -title: "Build Apache Cloudberry from Source: Complete Guide" +title: "Apache Cloudberry Deployment" --- -This document is intended for developers interested in exploring and potentially contributing to Apache Cloudberry. The build environment described here is optimized for development and testing purposes only. +This guide provides instructions for deploying Apache Cloudberry in production environments. It covers the full deployment lifecycle from hardware planning to cluster initialization and login. -## Target audience +The guide includes the following topics: -- Developers interested in contributing to Apache Cloudberry. -- PostgreSQL developers wanting to explore Cloudberry's extensions. -- Database enthusiasts interested in learning about distributed query processing. -- Anyone considering joining the Apache Cloudberry community. - -The build process described here enables development activities such as: - -- Debugging and testing new features. -- Exploring the codebase with development tools. -- Running test suites and validation checks. -- Making and testing code modifications. - -:::tip -If you are new to Apache Cloudberry or PostgreSQL development: - -- Consider building PostgreSQL first to understand the basic workflow -- Join the project's [mailing lists](/community/mailing-lists) to connect with other developers -- Review the project's issue tracker to understand current development priorities -- Be prepared for longer build times and iterative testing as you explore the codebase -::: - -## Process of building Apache Cloudberry - -The build process for Apache Cloudberry (Incubating) closely resembles that of PostgreSQL. If you have previously set up development environments for PostgreSQL, you'll find the steps for Cloudberry very familiar. - -For those new to Cloudberry or PostgreSQL, we recommend starting with a PostgreSQL build first. The PostgreSQL development community has established excellent documentation and tooling to guide you through the process. Familiarizing yourself with PostgreSQL's build process will make transitioning to Cloudberry significantly easier. - -## Prerequisites - -### Provision a Rocky Linux 8+ / Ubuntu 20.04+ Environment - -- Use any platform to create a virtual machine or container: - - - **Cloud providers**: You can use the Rocky Linux 8+ or Ubuntu 20.04+ images provided by the cloud providers, such as AWS, Google Cloud, Microsoft Azure, and more. - - **VirtualBox**: Use the official [Rocky Linux 8+](https://rockylinux.org/download) / [Ubuntu 20.04+](https://ubuntu.com/download) ISO or Vagrant boxes. - - **Docker**: These instructions were validated under Rocky Linux 8+ and Ubuntu 20.04 official base docker images, but should work with any of their based container. - - For example, you can run the following command to start a Rocky Linux 8 container: - - ```bash - docker run -it --shm-size=2gb -h cdw rockylinux/rockylinux:8 - - # Start a Ubuntu 20.04 container: - # docker run -it --shm-size=2gb -h cdw ubuntu:20.04 - ``` - - The hostname `cdw` (Coordinator Data Warehouse) is just an example of how we started the container for testing. - - To ensure test suites run successfully, you may need to increase the container's shared memory using `--shm-size=2gb`. Test failures can occur when the Cloudberry cluster lacks sufficient shared memory resources. - -- Ensure the VM or container has: - - Internet connectivity for package installation. - - SSH or console access for user interaction. - - Sufficient resources (CPU, memory, and storage) for a development environment. - -:::note -Specific steps to provision the environment are not covered in this guide because they vary by platforms. This guide assumes you have successfully created a VM or container and can log in as the default user (for example, `rocky` for Rocky Linux on AWS). -::: - -### System requirements - -Minimum requirements for development environment: - -- CPU: 4 cores recommended (2 cores minimum) - - CPU architecture: x86, x86_64, ARM, MIPS -- RAM: 8GB recommended (4GB minimum) -- Storage: 20GB free space recommended -- Network: Broadband internet connection for package downloads - -## Build Apache Cloudberry from source code - -The following steps guide you through building Apache Cloudberry from source code on Rocky Linux 8+ or Ubuntu 20.04+. The process is similar for both operating systems, with minor differences in package management, dependencies and software versions between these two distributions. - -Just go ahead and follow the steps below to build Apache Cloudberry from source code: - -```mdx-code-block -import DocCardList from '@theme/DocCardList'; - - -``` \ No newline at end of file +- **[Platform Requirements](./platform-requirements)** — Hardware and software requirements for Apache Cloudberry hosts. +- **[Estimating Storage Capacity](./capacity_planning)** — How to estimate disk space needed for your data. +- **[Configuring Your Systems](./prepare-to-deploy)** — OS-level configuration including kernel parameters, SELinux, firewall, NTP, and user setup. +- **[Deploy Using RPM/DEB Package](./install_cloudberry)** — Installing the Apache Cloudberry software on all hosts. +- **[Creating the Data Storage Areas](./create_data_dirs)** — Setting up data directories for coordinator and segment instances. +- **[Validating Hardware and Network](./validate)** — Verifying network, disk I/O, and memory bandwidth performance. +- **[Initialize Apache Cloudberry](./init_cloudberry)** — Initializing the database cluster using `gpinitsystem`. +- **[Logging into Apache Cloudberry](./login_cloudberry)** — Connecting to the database after deployment. +- **[Deploy with a Single Computing Node](./single-node)** — Deploying Apache Cloudberry without segment nodes for development or testing. diff --git a/docs/deployment/init_cloudberry.md b/docs/deployment/init_cloudberry.md new file mode 100644 index 00000000000..dcc662606aa --- /dev/null +++ b/docs/deployment/init_cloudberry.md @@ -0,0 +1,278 @@ +--- +title: Initializing Apache Cloudberry +--- + +Because Apache Cloudberry is distributed, the process for initializing a Apache Cloudberry Database management system involves initializing several individual PostgreSQL database instances (called *segment instances* in Cloudberry). + +Each database instance \(the coordinator and all segments\) must be initialized across all of the hosts in the system in such a way that they can all work together as a unified DBMS. Cloudberry provides its own version of `initdb` called [gpinitsystem](/sys-utilities/gpinitsystem.md), which takes care of initializing the database on the coordinator and on each segment instance, and starting each instance in the correct order. + +After the Apache Cloudberry database system has been initialized and started, you can then create and manage databases as you would in a regular PostgreSQL DBMS by connecting to the Cloudberry coordinator. + +When performing the following initialization tasks, you must be logged into the coordinator host as the `gpadmin` user, and to run Apache Cloudberry utilities, you must source the `cloudberry-env.sh` file to set Apache Cloudberry environment variables. For example, if you are logged into the coordinator, run these commands. + +``` +$ su - gpadmin +$ source /usr/local/cloudberry-db/cloudberry-env.sh +``` + +## Creating the Initialization Host File + +The [gpinitsystem](/sys-utilities/gpinitsystem.md) utility requires a host file that contains the list of addresses for each segment host. The initialization utility determines the number of segment instances per host by the number host addresses listed per host times the number of data directory locations specified in the `gpinitsystem_config` file. + +This file should only contain segment host addresses (not the coordinator or standby coordinator). For segment machines with multiple, unbonded network interfaces, this file should list the host address names for each interface — one per line. + +:::note +The Apache Cloudberry segment host naming convention is sdwN where sdw is a prefix and N is an integer. For example, `sdw2` and so on. If hosts have multiple unbonded NICs, the convention is to append a dash (`-`) and number to the host name. For example, `sdw1-1` and `sdw1-2` are the two interface names for host `sdw1`. However, NIC bonding is recommended to create a load-balanced, fault-tolerant network. +::: + +### To create the initialization host file + +1. Create a file named `hostfile_gpinitsystem`. In this file add the host address name\(s\) of your *segment* host interfaces, one name per line, no extra lines or spaces. For example, if you have four segment hosts with two unbonded network interfaces each: + + ``` + sdw1-1 + sdw1-2 + sdw2-1 + sdw2-2 + sdw3-1 + sdw3-2 + sdw4-1 + sdw4-2 + ``` + +2. Save and close the file. + +:::note +If you are not sure of the host names and/or interface address names used by your machines, look in the `/etc/hosts` file. +::: + +## Creating the Apache Cloudberry Configuration File + +Your Cloudberry configuration file tells the [gpinitsystem](/sys-utilities/gpinitsystem.md) utility how you want to configure your Apache Cloudberry system. An example configuration file can be found in `$GPHOME/docs/cli_help/gpconfigs/gpinitsystem_config`. + +### To create a gpinitsystem_config file + +1. Make a copy of the `gpinitsystem_config` file to use as a starting point. For example: + + ``` + $ mkdir -p gpconfigs + $ cp $GPHOME/docs/cli_help/gpconfigs/gpinitsystem_config \ + /home/gpadmin/gpconfigs/gpinitsystem_config + ``` + +2. Open the file you just copied in a text editor. + + Set all of the required parameters according to your environment. See [gpinitsystem](/sys-utilities/gpinitsystem.md) for more information. A Apache Cloudberry system must contain a coordinator instance and at *least two* segment instances (even if setting up a single node system). + + The `DATA_DIRECTORY` parameter is what determines how many segments per host will be created. If your segment hosts have multiple network interfaces, and you used their interface address names in your host file, the number of segments will be evenly spread over the number of available interfaces. + + The `COORDINATOR_HOSTNAME` is the hostname of the coordinator node. + + The `COORDINATOR_DIRECTORY` is the data directory of the coordinator node. + + To specify `PORT_BASE`, review the port range specified in the `net.ipv4.ip_local_port_range` parameter in the `/etc/sysctl.conf` file. + + Here is an example of the *required* parameters in the `gpinitsystem_config` file: + + ``` + SEG_PREFIX=gpseg + PORT_BASE=6000 + declare -a DATA_DIRECTORY=(/data1/primary /data1/primary /data1/primary /data2/primary /data2/primary /data2/primary) + COORDINATOR_HOSTNAME=cdw + COORDINATOR_DIRECTORY=/data/coordinator + COORDINATOR_PORT=5432 + TRUSTED SHELL=ssh + CHECK_POINT_SEGMENTS=8 + ENCODING=UNICODE + ``` + +3. (Optional) If you want to deploy mirror segments, uncomment and set the mirroring parameters according to your environment. To specify `MIRROR_PORT_BASE`, review the port range specified under the `net.ipv4.ip_local_port_range` parameter in the `/etc/sysctl.conf` file. Here is an example of the *optional* mirror parameters in the `gpinitsystem_config` file: + + ``` + MIRROR_PORT_BASE=7000 + declare -a MIRROR_DATA_DIRECTORY=(/data1/mirror /data1/mirror /data1/mirror /data2/mirror /data2/mirror /data2/mirror) + ``` + + :::note + You can initialize your Cloudberry system with primary segments only and deploy mirrors later using the [gpaddmirrors](/sys-utilities/gpaddmirrors.md) utility. + +4. To create a default database during initialization, you need to fill in the database name. In this example, the `warehouse` database is created during initialization: + + ``` + DATABASE_NAME=warehouse + ``` + +5. Save and close the file. + +## Running the Initialization Utility + +The [gpinitsystem](/sys-utilities/gpinitsystem.md) utility will create a Apache Cloudberry system using the values defined in the configuration file. + +These steps assume you are logged in as the `gpadmin` user and have sourced the `cloudberry-env.sh` file to set Apache Cloudberry environment variables. + +### To run the initialization utility + +1. Run the following command referencing the path and file name of your initialization configuration file (`gpinitsystem_config`) and host file (`hostfile_gpinitsystem`). For example: + + ``` + $ cd ~ + $ gpinitsystem -c gpconfigs/gpinitsystem_config -h gpconfigs/hostfile_gpinitsystem + ``` + + In the command above, `-c` specifies the configuration file and `-h` specifies the computing node list. + + For a fully redundant system (with a standby coordinator and a *spread* mirror configuration) include the `-s` and `--mirror-mode=spread` options. For example: + + ``` + $ gpinitsystem -c gpconfigs/gpinitsystem_config -h gpconfigs/hostfile_gpinitsystem \ + -s --mirror-mode=spread + ``` + + During a new cluster creation, you may use the `-O output_configuration_file` option to save the cluster configuration details in a file. For example: + + ``` + $ gpinitsystem -c gpconfigs/gpinitsystem_config -O gpconfigs/config_template + ``` + + This output file can be edited and used at a later stage as the input file of the `-I` option, to create a new cluster or to recover from a backup. See [gpinitsystem](/sys-utilities/gpinitsystem.md) for further details. + + :::note + Calling `gpinitsystem` with the `-O` option does not initialize the Apache Cloudberry system; it merely generates and saves a file with cluster configuration details. + ::: + +2. The utility will verify your setup information and make sure it can connect to each host and access the data directories specified in your configuration. If all of the pre-checks are successful, the utility will prompt you to confirm your configuration. For example: + + ``` + => Continue with Cloudberry creation? Yy/Nn + ``` + +3. Press `y` to start the initialization. +4. The utility will then begin setup and initialization of the coordinator instance and each segment instance in the system. Each segment instance is set up in parallel. Depending on the number of segments, this process can take a while. +5. At the end of a successful setup, the utility will start your Apache Cloudberry system. You should see: + + ``` + => Apache Cloudberry instance successfully created. + ``` + +### (Optional) Initializing a Standby Coordinator + +:::note +If you included the `-s ` option in the `gpinitsystem` command, the standby coordinator is already initialized and you can skip this step. +::: + +If you did not include the `-s` option when running `gpinitsystem`, you can initialize a standby coordinator separately after the cluster is up and running using the `gpinitstandby` utility. + +The standby coordinator serves as a warm backup of the coordinator instance. If the primary coordinator becomes unavailable, the standby can be activated to take over. + +1. Ensure the standby coordinator host has the same data directory created and owned by `gpadmin`: + + ```bash + $ gpssh -h scdw -e 'sudo mkdir -p /data/coordinator' + $ gpssh -h scdw -e 'sudo chown gpadmin:gpadmin /data/coordinator' + ``` + +2. Run `gpinitstandby` from the coordinator host as `gpadmin`: + + ```bash + $ gpinitstandby -s scdw + ``` + +3. Verify the standby coordinator is synchronized: + + ```bash + $ gpstate -f + ``` + + The output should show the standby coordinator with `Sync state: sync`. + +### Troubleshooting Initialization Problems + +If the utility encounters any errors while setting up an instance, the entire process will fail, and could possibly leave you with a partially created system. Refer to the error messages and logs to determine the cause of the failure and where in the process the failure occurred. Log files are created in `~/gpAdminLogs`. + +Depending on when the error occurred in the process, you may need to clean up and then try the `gpinitsystem` utility again. For example, if some segment instances were created and some failed, you may need to stop `postgres` processes and remove any utility-created data directories from your data storage area(s). A backout script is created to help with this cleanup if necessary. + +#### Using the Backout Script + +If the gpinitsystem utility fails, it will create the following backout script if it has left your system in a partially installed state: + +`~/gpAdminLogs/backout_gpinitsystem__` + +You can use this script to clean up a partially created Apache Cloudberry system. This backout script will remove any utility-created data directories, `postgres` processes, and log files. After correcting the error that caused `gpinitsystem` to fail and running the backout script, you should be ready to retry initializing your Apache Cloudberry array. + +The following example shows how to run the backout script: + +``` +$ bash ~/gpAdminLogs/backout_gpinitsystem_gpadmin_20071031_121053 +``` + +## Setting the Apache Cloudberry Timezone + +As a best practice, configure Apache Cloudberry and the host systems to use a known, supported timezone. Apache Cloudberry uses a timezone from a set of internally stored PostgreSQL timezones. Setting the Apache Cloudberry timezone prevents Apache Cloudberry from selecting a timezone each time the cluster is restarted and sets the timezone for the Apache Cloudberry coordinator and segment instances. + +Use the [gpconfig](/sys-utilities/gpconfig.md) utility to show and set the Apache Cloudberry timezone. For example, these commands show the Apache Cloudberry timezone and set the timezone to `US/Pacific`. + +``` +$ gpconfig -s TimeZone +$ gpconfig -c TimeZone -v 'US/Pacific' +``` + +You must restart Apache Cloudberry after changing the timezone. The command `gpstop -ra` restarts Apache Cloudberry. The catalog view `pg_timezone_names` provides Apache Cloudberry timezone information. + +## Setting Apache Cloudberry Environment Variables + +You must set environment variables in the Apache Cloudberry user (`gpadmin`) environment that runs Apache Cloudberry on the Apache Cloudberry coordinator and standby coordinator hosts. A `cloudberry-env.sh` file is provided in the Apache Cloudberry installation directory with environment variable settings for Apache Cloudberry. + +The Apache Cloudberry management utilities also require that the `COORDINATOR_DATA_DIRECTORY` environment variable be set. This should point to the directory created by the `gpinitsystem` utility in the coordinator data directory location. + +:::note +The `cloudberry-env.sh` script changes the operating environment in order to support running the Apache Cloudberry-specific utilities. These same changes to the environment can negatively affect the operation of other system-level utilities, such as `ps` or `yum`. Use separate accounts for performing system administration and database administration, instead of attempting to perform both functions as `gpadmin`. +::: + +These steps ensure that the environment variables are set for the `gpadmin` user after a system reboot. + +### To set up the gpadmin environment for Apache Cloudberry + +1. Open the `gpadmin` profile file (such as `.bashrc`) in a text editor. For example: + + ``` + $ vi ~/.bashrc + ``` + +2. Add lines to this file to source the `cloudberry-env.sh` file and set the `COORDINATOR_DATA_DIRECTORY` environment variable. For example: + + ``` + source /usr/local/cloudberry-db/cloudberry-env.sh + export COORDINATOR_DATA_DIRECTORY=/data/coordinator/gpseg-1 + ``` + +3. (Optional) You may also want to set some client session environment variables such as `PGPORT`, `PGUSER` and `PGDATABASE` for convenience. For example: + + ``` + export PGPORT=5432 + export PGUSER=gpadmin + export PGDATABASE=warehouse + ``` + +4. (Optional) If you use RHEL/Oracle/Rocky Linux, add the following line to the end of the `.bashrc` file to enable using the `ps` command in the `cloudberry-env.sh` environment: + + ``` + export LD_PRELOAD=/lib64/libz.so.1 ps + ``` + +5. Save and close the file. +6. After editing the profile file, source it to make the changes active. For example: + + ``` + $ source ~/.bashrc + ``` + +7. If you have a standby coordinator host, copy your environment file to the standby coordinator as well. For example: + + ``` + $ cd ~ + $ scp .bashrc :`pwd` + ``` + +:::note +The `.bashrc` file should not produce any output. If you wish to have a message display to users upon logging in, use the `.bash_profile` file instead. +::: diff --git a/docs/deployment/install_cloudberry.md b/docs/deployment/install_cloudberry.md new file mode 100644 index 00000000000..051c1a29bb6 --- /dev/null +++ b/docs/deployment/install_cloudberry.md @@ -0,0 +1,177 @@ +--- +title: Installing Using RPM/DEB Package +--- + +This document introduces how to manually deploy Apache Cloudberry on physical/virtual machines using RPM or DEB packages. Before reading this document, it is recommended to first read the [Software and Hardware Configuration Requirements](./platform-requirements.md) and [Prepare to Deploy Apache Cloudberry](./prepare-to-deploy.md). + +You must install Apache Cloudberry on each host machine of the Apache Cloudberry system. + +:::info +Starting from Apache Cloudberry 2.1, RPM and DEB packages are officially provided for download. RPM packages support Rocky Linux 8/9, RHEL 8/9, and compatible distributions. DEB packages support Ubuntu 22.04. + +After downloading, please verify the signatures and checksums of the files. +::: + + +## Install the package + +After the preparation, it is time to install Apache Cloudberry. You need to download the corresponding package from [Apache Cloudberry Releases](https://cloudberry.apache.org/releases), and then install the database on each node using the installation package. + +1. Download and copy the package to the home directory of `gpadmin` on the coordinator, standby coordinator, and every segment host machine. + +2. With sudo (or as `root`), install the Apache Cloudberry package on each host machine using your system's package manager software: + + ```bash + # For RPM (Rocky Linux, RHEL, etc.) + sudo dnf install + # Or for older systems: sudo yum install + + # For DEB (Ubuntu) + sudo apt install --fix-broken + # Or alternatively: sudo dpkg -i && apt-get install -f + ``` + + The `yum`, `dnf` and `apt` command automatically installs software dependencies, copies the Apache Cloudberry software files into a version-specific directory under `/usr/local`, `/usr/local/cloudberry-db-`, and creates the symbolic link `/usr/local/cloudberry-db` to the installation directory. + +3. Grant the `gpadmin` user the permission to access the `/usr/local/cloudberry-db/` directory. + + ```bash + sudo chown -R gpadmin:gpadmin /usr/local/cloudberry* + ``` + +## (Optional) Installing to a non-default directory + +:::caution +Custom installation directory is not supported in Apache Cloudberry 2.1. This feature is planned for Apache Cloudberry 2.2. The instructions below are provided for future reference only. +::: + +### For RPM-based systems (Rocky Linux, RHEL, etc.) + +On RHEL-based systems, you can use the `rpm` command with the `--prefix` option to install Apache Cloudberry to a non-default directory (instead of under `/usr/local`). Note that using `rpm` directly does not automatically install dependencies; you must manually install them on each host. + +1. Download and copy the Apache Cloudberry package to the `gpadmin` user's home directory on the coordinator, standby coordinator, and every segment host machine. + +2. Manually install the dependencies to each host system: + + ```bash + $ sudo dnf install openssh-server openssh-clients sudo shadow-utils \ + bash procps-ng ca-certificates python3 apr bzip2 krb5-libs libevent \ + libicu liburing libuuid libxml2 libyaml libzstd lz4 ncurses openldap \ + openssl pam pcre2 perl protobuf readline zlib libuv iproute \ + net-tools which rsync keyutils libssh libpsl xz + ``` + +3. Use `rpm` with the `--prefix` option to install the Apache Cloudberry package to your chosen installation directory on each host machine: + + ```bash + $ sudo rpm --install ./ --prefix= + ``` + + The `rpm` command copies the Apache Cloudberry software files into a version-specific directory under your chosen ``, and creates a symbolic link `/cloudberry-db` to the versioned directory. + +4. Change the owner and group of the installed files to `gpadmin`: + + ```bash + $ sudo chown -R gpadmin:gpadmin /cloudberry* + ``` + +If you install to a non-default directory using `rpm`, you will need to continue using `rpm` (instead of `dnf`) to perform minor version upgrades. + +## Enabling Passwordless SSH + +The `gpadmin` user on each Cloudberry host must be able to SSH from any host in the cluster to any other host in the cluster without entering a password or passphrase (called "passwordless SSH"). If you enable passwordless SSH from the coordinator host to every other host in the cluster ("1-*n* passwordless SSH"), you can use the `gpssh-exkeys` command-line utility to enable passwordless SSH from every host to every other host ("*n*-*n* passwordless SSH"). + +1. Log in to the coordinator host as the `gpadmin` user, and use the `gpadmin` user for subsequent operations. +2. Source the `path` file in the Apache Cloudberry installation directory. + + ``` + $ source /usr/local/cloudberry-db-/cloudberry-env.sh + ``` + + :::caution + Since Cloudberry 2.1, the path file name has been changed from `greenplum_path.sh` to `cloudberry-env.sh`, see the [blog](https://cloudberry.apache.org/blog/from-greenplum-path.sh-to-cloudberry-env.sh/) for more details. + ::: + + :::note + Add the above `source` command to the `gpadmin` user's `.bashrc` or other shell startup file so that the Apache Cloudberry path and environment variables are set whenever you log in as `gpadmin`. + ::: + +3. Use the `ssh-copy-id` command to add the `gpadmin` user's public key to the `authorized_hosts` SSH file on every other host in the cluster. + + ``` + $ ssh-copy-id scdw + $ ssh-copy-id sdw1 + $ ssh-copy-id sdw2 + $ ssh-copy-id sdw3 + . . . + ``` + + This enables 1-*n* passwordless SSH. You will be prompted to enter the `gpadmin` user's password for each host. If you have the `sshpass` command on your system, you can use a command like the following to avoid the prompt. + + ``` + $ SSHPASS= sshpass -e ssh-copy-id scdw + ``` + +4. In the `gpadmin` home directory, create a file named `hostfile_exkeys` that has the machine configured host names and host addresses (interface names) for each host in your cloudberry system (coordinator, standby coordinator, and segment hosts). Make sure there are no blank lines or extra spaces. Check the `/etc/hosts` file on your systems for the correct host names to use for your environment. + + For example, if you have a coordinator, standby coordinator, and three segment hosts with two unbonded network interfaces per host, your file would look something like this: + + ``` + cdw + cdw-1 + cdw-2 + scdw + scdw-1 + scdw-2 + sdw1 + sdw1-1 + sdw1-2 + sdw2 + sdw2-1 + sdw2-2 + sdw3 + sdw3-1 + sdw3-2 + ``` + +5. Run the `gpssh-exkeys` utility with your `hostfile_exkeys` file to enable *n*-*n* passwordless SSH for the `gpadmin` user. + + ``` + $ gpssh-exkeys -f hostfile_exkeys + ``` + +## Confirm the installation + +To make sure the Apache Cloudberry software was installed and configured correctly, run the following confirmation steps from your Apache Cloudberry coordinator host. If necessary, correct any problems before continuing on to the next task. + +1. Log in to the coordinator host as `gpadmin`: + + ```bash + $ su - gpadmin + ``` + +2. Use the `gpssh` utility to see if you can log in to all hosts without a password prompt, and to confirm that the Apache Cloudberry software was installed on all hosts. Use the `hostfile_exkeys` file you created earlier. For example: + + ```bash + $ gpssh -f hostfile_exkeys -e 'ls -l /usr/local/cloudberry-db-*' + ``` + + If the installation was successful, you should be able to log in to all hosts without a password prompt. All hosts should show that they have the same contents in their installation directories, and that the directories are owned by the `gpadmin` user. + + If you are prompted for a password, run the following command to redo the ssh key exchange: + + ```bash + $ gpssh-exkeys -f hostfile_exkeys + ``` + +## About Your Apache Cloudberry Installation + +- **`cloudberry-env.sh`** — This file contains the environment variables for Apache Cloudberry. +- **bin** — This directory contains the Apache Cloudberry management utilities. This directory also contains the PostgreSQL client and server programs, most of which are also used in Apache Cloudberry. +- **docs/cli_help** — This directory contains help files for Apache Cloudberry command-line utilities. +- **docs/cli_help/gpconfigs** — This directory contains sample `gpinitsystem` configuration files and host files that can be modified and used when installing and initializing a Apache Cloudberry system. +- **ext** — Bundled programs (such as Python) used by some Apache Cloudberry utilities. +- **include** — The C header files for Apache Cloudberry. +- **lib** — Apache Cloudberry and PostgreSQL library files. +- **sbin** — Supporting/Internal scripts and programs. +- **share** — Shared files for Apache Cloudberry. diff --git a/docs/deployment/login_cloudberry.md b/docs/deployment/login_cloudberry.md new file mode 100644 index 00000000000..909508a2771 --- /dev/null +++ b/docs/deployment/login_cloudberry.md @@ -0,0 +1,42 @@ +--- +title: Logging into Apache Cloudberry +--- + +Now you have successfully deployed Apache Cloudberry. To log into the database, refer to the following command: + +```bash +psql -h -p -U -d +``` + +In the command above: + +- `` is the IP address of the coordinator node of the Apache Cloudberry server. +- `` is the default port number of Apache Cloudberry, which is `5432` by default. +- `` is the user name of the database. +- `` is the name of the database to connect. + +After you run the `psql` command, the system will prompt you to enter the database password. After you enter the correct password, you will successfully log into Apache Cloudberry and can perform SQL queries and operations. Make sure that you have the correct permissions to access the target database. + +```sql +[gpadmin@cdw ~]$ psql warehouse +psql (14.4, server 14.4) +Type "help" for help. + +warehouse=# SELECT * FROM gp_segment_configuration; + dbid | content | role | preferred_role | mode | status | port | hostname | address | datadir | warehouseid +------+---------+------+----------------+------+--------+------+----------+---------+---------------------------+------------- + 1 | -1 | p | p | n | u | 5432 | cdw | cdw | /data/coordinator/gpseg-1 | 0 + 2 | 0 | p | p | s | u | 6000 | sdw1 | sdw1 | /data/primary/gpseg0 | 0 + 8 | 0 | m | m | s | u | 7000 | sdw2 | sdw2 | /data/mirror/gpseg0 | 0 + 3 | 1 | p | p | s | u | 6001 | sdw1 | sdw1 | /data/primary/gpseg1 | 0 + 9 | 1 | m | m | s | u | 7001 | sdw3 | sdw3 | /data/mirror/gpseg1 | 0 + 4 | 2 | p | p | s | u | 6000 | sdw2 | sdw2 | /data/primary/gpseg2 | 0 + 10 | 2 | m | m | s | u | 7000 | sdw3 | sdw3 | /data/mirror/gpseg2 | 0 + 5 | 3 | p | p | s | u | 6001 | sdw2 | sdw2 | /data/primary/gpseg3 | 0 + 11 | 3 | m | m | s | u | 7001 | sdw1 | sdw1 | /data/mirror/gpseg3 | 0 + 6 | 4 | p | p | s | u | 6000 | sdw3 | sdw3 | /data/primary/gpseg4 | 0 + 12 | 4 | m | m | s | u | 7000 | sdw1 | sdw1 | /data/mirror/gpseg4 | 0 + 7 | 5 | p | p | s | u | 6001 | sdw3 | sdw3 | /data/primary/gpseg5 | 0 + 13 | 5 | m | m | s | u | 7001 | sdw2 | sdw2 | /data/mirror/gpseg5 | 0 +(13 rows) +``` \ No newline at end of file diff --git a/docs/deployment/platform-requirements.md b/docs/deployment/platform-requirements.md new file mode 100644 index 00000000000..0bb94e54606 --- /dev/null +++ b/docs/deployment/platform-requirements.md @@ -0,0 +1,198 @@ +--- +title: Platform Requirements +--- + +This topic describes the Apache Cloudberry platform and operating system software requirements for deploying the software to on-premise hardware, or to public cloud services such as AWS, GCP, or Azure. + +## Hardware requirements + +### Supported deployment environments + +Apache Cloudberry supports deployment on both physical machines and virtual machines. Below are the recommended configurations for the environments. + +#### For development or test environments + +| Component | CPU | Memory | Disk type | Network | Number of instances | +| ------- | ---- | ---- | -------- | -------------------- | -------- | +| Coordinator | 4 cores | 8 GB | SSD | 10 Gbps NIC (2 preferred) | 1+ | +| Segment | 4 cores | 8 GB | SSD | 10 Gbps NIC (2 preferred) | 1+ | + +#### For production environments + +| Component | CPU | Memory | Disk type | Network | Instance count | +| ------- | ------ | ------ | -------- | -------------------- | -------- | +| Coordinator | 16+ cores | 32+ GB | SSD | 10 Gbps NIC (2 preferred) | 2+ | +| Segment | 8+ cores | 32+ GB | SSD | 10 Gbps NIC (2 preferred) | 2+ | + +Apache Cloudberry can also be deployed on public cloud platforms such as AWS, Azure, and GCP. The hardware requirements for cloud-based deployments might vary based on the instance types selected on these platforms. Refer to the specific cloud provider’s documentation for instance configurations that meet or exceed the recommended hardware specifications. + +#### Minimum hardware requirements + +The following lists minimum recommended specifications for hardware servers intended to support Apache Cloudberry on Linux systems in a production environment. All host servers in your Apache Cloudberry system must have the same hardware and software configuration. Apache Cloudberry also provides hardware build guides for its certified hardware platforms. Work with a Cloudberry Systems Engineer to review your anticipated environment to ensure an appropriate hardware configuration for Apache Cloudberry. + +- Minimum CPU: Any x86_64/AARCH64 compatible CPU +- Minimum Memory: 16 GB RAM per server +- Disk Space Requirements: + - 150MB per host for Cloudberry installation + - Approximately 300MB per segment instance for metadata + - Cap disk capacity at 70% full to accommodate temporary files and prevent performance degradation +- Network Requirements: 10 Gigabit Ethernet within the array; NIC bonding is recommended when multiple interfaces are present Apache Cloudberry can use either IPV4 or IPV6 protocols. + +**Hyperthreading** + +Resource Groups - one of the key Apache Cloudberry features - can control transaction concurrency, CPU and memory resources, workload isolation, and dynamic bursting. + +When using resource groups to control resource allocation on Intel based systems, consider switching off Hyper-Threading (HT) in the server BIOS (for Intel cores the default is ON). Switching off HT might cause a small throughput reduction (less than 15%), but can achieve greater isolation between resource groups, and higher query performance with lower concurrency workloads. + +### CPU architecture support + +Apache Cloudberry supports running on both **x86_64** and **ARM (AARCH64)** CPU architectures, making it suitable for a wide range of hardware platforms including cloud instances and ARM-based servers. + +| Architecture | Source Build | Convenience binaries | +|---|---|---| +| x86_64 | Supported | Available (2.1+) | +| ARM (AARCH64) | Supported | Planned for 2.2 | + +For ARM-based deployments in the current release, you can [build Apache Cloudberry from source](../build/index.md). + +### Storage + +- To prevent a high data disk load from affecting the operating system's normal I/O response, mount the operating system and the data disk on separate disks. +- If the host configuration allows, it is recommended to use 2 independent SAS disks as the system disk (RAID1), and another 10 SAS disks as the data disk (RAID5). +- It is recommended to use LVM logical volumes to manage disks for more flexible disk configuration. + +**For the system disk**: The system disk should use an independent disk to avoid impact on the operating system when data disks are heavily loaded. It is recommended that the system disk be configured in dual-disk RAID 1 and the operating system of the system disk be XFS. + +**For data disks**: It is recommended to use LVM to manage data disks. According to test statistics, creating an independent logical volume for each physical volume can achieve the best disk performance. For example: + +```bash +pvcreate /dev/vdb +pvcreate /dev/vdc +pvcreate /dev/vdd +vgcreate data /dev/vdb /dev/vdc /dev/vdd +lvcreate --extents 100%pvs -n data0 data /dev/vdb +lvcreate --extents 100%pvs -n data1 data /dev/vdc +lvcreate --extents 100%pvs -n data2 data /dev/vdd +``` + +The names of mount points must be consecutive, and the mount points of data disks should be `/data0`, `/data1`, ..., `/dataN`. Data disks should use the XFS file format. For example: + +```bash +mkdir -p /data0 /data1 /data2 +mkfs.xfs /dev/data/data0 +mkfs.xfs /dev/data/data1 +mkfs.xfs /dev/data/data2 +mount /dev/data/data0 /data0/ +mount /dev/data/data1 /data1/ +mount /dev/data/data2 /data2/ +``` + +## Data exchange network + +- **Network card configuration** + + The data exchange network is used for transmitting business data, which has high requirements on network performance and throughput. In a production environment, two 10 Gbps NICs are generally required, and they will be used after bonding. The recommended bond 4 parameter are as follows: + + ```bash + BONDING_OPTS='mode=4 miimon=100 xmit_hash_policy=layer3+4' + ``` + +- **Connectivity requirements** + + - Connect the management console and the database host in the data exchange network. If there is a firewall device between the management console and the database host, ensure that the TCP idle connection can be kept for more than 12 hours. + - Connect database hosts and management console hosts in the data exchange network, and do not limit the TCP idle connection time. + - Connect database clients and application programs that access the database with the database coordinator node in the data exchange network. + - Ensure that the TCP idle connection can be kept for more than 12 hours. + +- **Default gateway** + + If the host is configured with a management network, the network card (bond0) of the data exchange network should be used as the default gateway device; otherwise, it might cause abnormal traffic monitoring of the host network, deployment failure, and performance problems. The following is an example of viewing the default gateway. + + ```bash + netstat -rn | grep ^0.0.0.0 + ``` + +- **Switch** + + - Make sure that the egress bandwidth of the data network switch from layer 1 to layer 2 is no lower than the maximum disk I/O throughput capacity of a single cabinet (calculated with a single RAID card of 500 MBps). + - A switch convergence ratio of 4:1 is recommended. When the convergence ratio reaches 6:1, most links will be saturated. Significant packet loss occurs when the convergence ratio reaches 8:1. + +## Software requirements + +### Supported OS + +Apache Cloudberry supports the following operating systems: + +- Rocky Linux 8/9 +- Ubuntu 22.04 +- RHEL 8/9 and compatible distributions (Oracle Linux, AlmaLinux, etc.) + +### Software dependencies + +The following runtime packages are required on all Apache Cloudberry hosts. These dependencies are automatically resolved when installing via `dnf` (RPM) or `apt` (DEB), but are listed here for reference. + +#### Common dependencies (all platforms) + +``` +bash, openssh, rsync, perl, python3, less, hostname, iproute / iproute2, iputils / iputils-ping, which / debianutils +``` + +#### For Rocky Linux / RHEL 8 + +``` +apr, audit, bash, bzip2, hostname, iproute, iputils, keyutils, +less, libcurl, libevent, libidn2, libselinux, libstdc++, libuuid, +libuv, libxml2, libyaml, libzstd, lz4, openldap, openssh, +openssh-clients, openssh-server, openssl, pam, perl, python3, +readline, rsync, which +``` + +#### For Rocky Linux / RHEL 9 + +``` +apr, bash, bzip2, glibc, hostname, iproute, iputils, keyutils, +less, libcap, libcurl, libidn2, libpsl, libssh, libstdc++, +libxml2, libyaml, libzstd, lz4, openldap, openssh, +openssh-clients, openssh-server, openssl, pam, pcre2, perl, +python3, readline, rsync, which, xz +``` + +#### For Ubuntu 22.04 + +``` +curl, cgroup-tools, debianutils, hostname, iputils-ping, iproute2, +keyutils, krb5-multidev, less, libapr1, libbz2-1.0, libcurl4, +libcurl3-gnutls, libevent-2.1-7, libreadline8, libxml2, libyaml-0-2, +libldap-2.5-0, libzstd1, libcgroup1, libssl3, libpam0g, libprotobuf23, +libpsl5, libuv1, liburing2, libxerces-c3.2, locales, lsof, lz4, +net-tools, openssh-client, openssh-server, openssl, python3, rsync, +wget, xz-utils, zlib1g +``` + +### Java + +Apache Cloudberry supports these Java versions for PL/Java and PXF: + +- Open JDK 8 or Open JDK 11, 17, available from [AdoptOpenJDK](https://adoptopenjdk.net) +- Oracle JDK 8 or Oracle JDK 11, 17 + +### File system + +XFS is the required file system for data storage on Apache Cloudberry hosts. + +Apache Cloudberry is supported on network or shared storage if the shared storage is presented as a block device to the servers running Apache Cloudberry and the XFS file system is mounted on the block device. Network file systems are not supported. When using network or shared storage, Apache Cloudberry mirroring must be used in the same way as with local storage, and no modifications may be made to the mirroring scheme or the recovery scheme of the segments. + +Apache Cloudberry can be deployed to virtualized systems only if the storage is presented as block devices and the XFS file system is mounted for the storage of the segment directories. + +Apache Cloudberry is supported on Amazon Web Services (AWS) servers using either Amazon instance store (Amazon uses the volume names ephemeral[0-23]) or Amazon Elastic Block Store (Amazon EBS) storage. If using Amazon EBS storage the storage should be RAID of Amazon EBS volumes and mounted with the XFS file system for it to be a supported configuration. + +### SSH configuration + +The recommended configuration for the SSH server side (`/etc/ssh/sshd_config`) is as follows. After the configuration is complete, run `systemctl restart sshd.service` to make it effective. + +| Parameter | Value | Description | +| ---------------------- | ---- | ---------------- | +| Port | 22 | Listening port. | +| PasswordAuthentication | yes | Allows password login, which can be changed after cluster initialization. | +| PermitEmptyPasswords | no | Empty password is not allowed for login. | +| UseDNS | no | DNS is not used. | diff --git a/docs/deployment/prepare-to-deploy.md b/docs/deployment/prepare-to-deploy.md new file mode 100644 index 00000000000..1d2188c3330 --- /dev/null +++ b/docs/deployment/prepare-to-deploy.md @@ -0,0 +1,725 @@ +--- +title: Configuring Your Systems +--- + +Describes how to prepare your operating system environment for Apache Cloudberry software installation. Before deploying Apache Cloudberry on physical or virtual machines, you need to do some preparations. Read this document and [Software and Hardware Configuration Requirements](./platform-requirements.md) before you start to deploy Apache Cloudberry. + +Perform the following tasks in order: + +1. Make sure your host systems meet the requirements described in [Software and Hardware Configuration Requirements](./platform-requirements.md). +2. [Deactivate or configure SELinux.](#deactivate-or-configure-selinux) +3. [Deactivate or configure firewall software.](#deactivate-or-configure-firewall-software) +4. [Set the required operating system parameters.](#recommended-os-parameters-settings) +5. [Synchronize system clocks.](#synchronizing-system-clocks) +6. [Create the gpadmin account.](#creating-the-cloudberry-administrative-user) + +Unless noted, these tasks should be performed for *all* hosts in your Apache Cloudberry array (coordinator, standby coordinator, and segment hosts). + +The Apache Cloudberry host naming convention for the coordinator host is `cdw` and for the standby coordinator host is `scdw`. + +The segment host naming convention is `sdwN` where `sdw` is a prefix and `N` is an integer. For example, segment host names would be `sdw1`, `sdw2` and so on. NIC bonding is recommended for hosts with multiple interfaces, but when the interfaces are not bonded, the convention is to append a dash (`-`) and number to the host name. For example, `sdw1-1` and `sdw1-2` are the two interface names for host `sdw1`. + +:::important +When data loss is not acceptable for an Apache Cloudberry cluster, coordinator and segment mirroring is recommended. If mirroring is not enabled then Apache Cloudberry stores only one copy of the data, so the underlying storage media provides the only guarantee for data availability and correctness in the event of a hardware failure. +::: + +## Deactivate or configure SELinux + +:::note +This section applies to RHEL/Oracle/Rocky Linux only. On Ubuntu, SELinux is not installed by default — Ubuntu uses AppArmor instead, which does not require any configuration for Apache Cloudberry. Ubuntu users can skip this section. +::: + +For all Apache Cloudberry host systems running RHEL/Oracle/Rocky Linux, SELinux must either be `Disabled` or configured to allow unconfined access to Apache Cloudberry processes, directories, and the `gpadmin` user. + +If you choose to deactivate SELinux: + +1. As the root user, check the status of SELinux: + + ```bash + # sestatus + + SELinuxstatus: disabled + ``` + +2. If SELinux is not deactivated, deactivate it by editing the `/etc/selinux/config` file. As root, change the value of the `SELINUX` parameter in the `config` file as follows: + + ```bash + SELINUX=disabled + ``` + +3. If the System Security Services Daemon (SSSD) is installed on your systems, edit the SSSD configuration file and set the `selinux_provider` parameter to `none` to prevent SELinux-related SSH authentication denials that could occur even with SELinux deactivated. As root, edit `/etc/sssd/sssd.conf` and add this parameter: + + ``` + selinux_provider=none + ``` + +4. Reboot the system to apply any changes that you made and verify that SELinux is deactivated. + +If you choose to enable SELinux in `Enforcing` mode, then Apache Cloudberry processes and users can operate successfully in the default `Unconfined` context. If you require increased SELinux confinement for Apache Cloudberry processes and users, you must test your configuration to ensure that there are no functionality or performance impacts to Apache Cloudberry. + +## Deactivate or configure firewall software + +You should also deactivate firewall software such as `firewalld` (on RHEL systems) or `ufw` (on Ubuntu systems, deactivated by default). If firewall software is not deactivated, you must instead configure your software to allow required communication between Apache Cloudberry hosts. + + +- For RHEL/Rocky Linux (firewalld) + + Check the status of `firewalld`: + + ```bash + systemctl status firewalld + ``` + + If `firewalld` is deactivated, the command output is: + + ``` + * firewalld.service - firewalld - dynamic firewall daemon + Loaded: loaded (/usr/lib/systemd/system/firewalld.service; disabled; vendor preset: enabled) + Active: inactive (dead) + ``` + + If necessary, run these commands as root to deactivate `firewalld`: + + ```bash + systemctl stop firewalld.service + systemctl disable firewalld.service + ``` + +- For Ubuntu (ufw) + + On Ubuntu, `ufw` is disabled by default. Verify the status with: + + ```bash + ufw status + ``` + + If the output is `Status: inactive`, no action is needed. If it is active, disable it: + + ```bash + ufw disable + ``` + +See the documentation for the firewall or your operating system for additional information. + +## Recommended OS parameters settings + +Apache Cloudberry requires that certain Linux operating system (OS) parameters be set on all hosts in your Apache Cloudberry system (coordinators and segments). + +In general, the following categories of system parameters need to be altered: + +- **Shared Memory** - An Apache Cloudberry instance will not work unless the shared memory segment for your kernel is properly sized. Most default OS installations have the shared memory values set too low for Apache Cloudberry. On Linux systems, you must also deactivate the OOM (out of memory) killer. +- **Network** - On high-volume Apache Cloudberry systems, certain network-related tuning parameters must be set to optimize network connections made by the Cloudberry interconnect. +- **User Limits** - User limits control the resources available to processes started by a user's shell. Apache Cloudberry requires a higher limit on the allowed number of file descriptors that a single process can have open. The default settings may cause some Apache Cloudberry queries to fail because they will run out of file descriptors needed to process the query. + +More specifically, you need to edit the following Linux configuration settings: + +- [The hosts file](#the-hosts-file) +- [The sysctl.conf file](#the-sysctlconf-file) +- [System resources limits](#system-resources-limits) +- [Core dump](#core-dump) +- [XFS mount options](#xfs-mount-options) +- [Disk I/O settings](#disk-io-settings) +- [Networking](#networking) +- [Transparent Huge Pages (THP)](#transparent-huge-pages-thp) +- [IPC object removal](#ipc-object-removal) +- [SSH connection threshold](#ssh-connection-threshold) + +### The hosts file + +Edit the `/etc/hosts` file and make sure that it includes the host names and all interface address names for every machine participating in your Apache Cloudberry system. + +First, use the `hostnamectl` command to set the hostname on each host. The hostname should only include letters, numbers, and the hyphen (`-`). It is recommended to use all lowercase letters. Each hostname must be globally unique across all hosts. For example: + +```bash +# Run on each host respectively +hostnamectl set-hostname cdw # on coordinator +hostnamectl set-hostname scdw # on standby coordinator +hostnamectl set-hostname sdw1 # on segment 1 +hostnamectl set-hostname sdw2 # on segment 2 +hostnamectl set-hostname sdw3 # on segment 3 + +# Verify (takes effect immediately) +hostname + +# Refresh the shell prompt to show the new hostname +exec bash +``` + +Then, edit the `/etc/hosts` file on all hosts to add the mappings of all host names to their IP addresses. For example: + +``` +192.168.1.1 cdw +192.168.1.2 scdw +192.168.1.3 sdw1 +192.168.1.4 sdw2 +192.168.1.5 sdw3 +``` + +### The sysctl.conf file + +The `sysctl.conf` parameters listed in this topic are for performance, optimization, and consistency in a wide variety of environments. Change these settings according to your specific situation and setup. + +Set the parameters in the `/etc/sysctl.conf` file and reload with `sysctl -p`: + +```conf +# kernel.shmall = _PHYS_PAGES / 2 # See Shared Memory Pages +kernel.shmall = 197951838 +# kernel.shmmax = kernel.shmall * PAGE_SIZE +kernel.shmmax = 810810728448 +kernel.shmmni = 4096 +vm.overcommit_memory = 2 # See Segment Host Memory +vm.overcommit_ratio = 95 # See Segment Host Memory + +net.ipv4.ip_local_port_range = 10000 65535 # See Port Settings +kernel.sem = 250 2048000 200 8192 +kernel.sysrq = 1 +kernel.core_uses_pid = 1 +kernel.msgmnb = 65536 +kernel.msgmax = 65536 +kernel.msgmni = 2048 +net.ipv4.tcp_syncookies = 1 +net.ipv4.conf.default.accept_source_route = 0 +net.ipv4.tcp_max_syn_backlog = 4096 +net.ipv4.conf.all.arp_filter = 1 +net.ipv4.ipfrag_high_thresh = 41943040 +net.ipv4.ipfrag_low_thresh = 31457280 +net.ipv4.ipfrag_time = 60 +net.core.netdev_max_backlog = 10000 +net.core.rmem_max = 2097152 +net.core.wmem_max = 2097152 +vm.swappiness = 10 +vm.zone_reclaim_mode = 0 +vm.dirty_expire_centisecs = 500 +vm.dirty_writeback_centisecs = 100 +vm.dirty_background_ratio = 0 # See System Memory +vm.dirty_ratio = 0 +vm.dirty_background_bytes = 1610612736 +vm.dirty_bytes = 4294967296 +``` + +#### Shared memory pages + +Apache Cloudberry uses shared memory to communicate between `postgres` processes that are part of the same `postgres` instance. `kernel.shmall` sets the total amount of shared memory, in pages, that can be used system wide. `kernel.shmmax` sets the maximum size of a single shared memory segment in bytes. + +Set `kernel.shmall` and `kernel.shmmax` values based on your system's physical memory and page size. In general, the value for both parameters should be one half of the system physical memory. + +Use the operating system variables `_PHYS_PAGES` and `PAGE_SIZE` to set the parameters. + +``` +kernel.shmall = ( _PHYS_PAGES / 2) +kernel.shmmax = ( _PHYS_PAGES / 2) * PAGE_SIZE +``` + +To calculate the values for `kernel.shmall` and `kernel.shmmax`, run the following commands using the `getconf` command, which returns the value of an operating system variable. + +```bash +$ echo $(expr $(getconf _PHYS_PAGES) / 2) +$ echo $(expr $(getconf _PHYS_PAGES) / 2 \* $(getconf PAGE_SIZE)) +``` + +As best practice, we recommend you set the following values in the `/etc/sysctl.conf` file using calculated values. For example, a host system has 1583 GB of memory installed and returns these values: `_PHYS_PAGES = 395903676` and `PAGE_SIZE = 4096`. These would be the `kernel.shmall` and `kernel.shmmax` values: + +``` +kernel.shmall = 197951838 +kernel.shmmax = 810810728448 +``` + +If the Apache Cloudberry coordinator has a different shared memory configuration than the segment hosts, the `_PHYS_PAGES` and `PAGE_SIZE` values might differ, and the `kernel.shmall` and `kernel.shmmax` values on the coordinator host will differ from those on the segment hosts. + +#### Segment host memory + +The `vm.overcommit_memory` Linux kernel parameter is used by the OS to determine how much memory can be allocated to processes. For Apache Cloudberry, this parameter should always be set to 2. + +`vm.overcommit_ratio` is the percent of RAM that is used for application processes and the remainder is reserved for the operating system. The default is 50 on Red Hat Enterprise Linux. + +`vm.overcommit_ratio` is calculated as follows: + +``` +vm.overcommit_ratio = (RAM - 0.026 * gp_vmem) / RAM +``` + +The calculation method of `gp_vmem` is as follows: + +``` +# If the system memory is less than 256 GB, use the following formula to calculate: +gp_vmem = ((SWAP + RAM) – (7.5GB + 0.05 * RAM)) / 1.7 + +# If the system memory is greater than or equal to 256 GB, use the following formula to calculate: +gp_vmem = ((SWAP + RAM) – (7.5GB + 0.05 * RAM)) / 1.17 + +# In the above formulas, SWAP is the swap space on the host, in GB. +# RAM is the size of the memory installed on the host, in GB. +``` + +#### Port settings + +To avoid port conflicts between Apache Cloudberry and other applications during initialization, make a note of the port range specified by the operating system parameter `net.ipv4.ip_local_port_range`. When initializing Apache Cloudberry using the `gpinitsystem` cluster configuration file, do not specify Apache Cloudberry ports in that range. + +For example, if `net.ipv4.ip_local_port_range = 10000 65535`, set the Apache Cloudberry base port numbers to these values. + +``` +PORT_BASE = 6000 +MIRROR_PORT_BASE = 7000 +``` + +#### IP fragmentation settings + +When the Apache Cloudberry interconnect uses UDP (the default), the network interface card controls IP packet fragmentation and reassemblies. + +If the UDP message size is larger than the size of the maximum transmission unit (MTU) of a network, the IP layer fragments the message. (Refer to [Networking](#networking) later in this topic for more information about MTU sizes for Apache Cloudberry.) The receiver must store the fragments in a buffer before it can reorganize and reassemble the message. + +The following `sysctl.conf` operating system parameters control the reassembly process: + +| OS Parameter | Description | +|---|---| +| `net.ipv4.ipfrag_high_thresh` | The maximum amount of memory used to reassemble IP fragments before the kernel starts to remove fragments to free up resources. The default value is 4194304 bytes (4MB). | +| `net.ipv4.ipfrag_low_thresh` | The minimum amount of memory used to reassemble IP fragments. The default value is 3145728 bytes (3MB). (Deprecated after kernel version 4.17.) | +| `net.ipv4.ipfrag_time` | The maximum amount of time (in seconds) to keep an IP fragment in memory. The default value is 30. | + +The recommended settings for these parameters for Apache Cloudberry follow: + +```conf +net.ipv4.ipfrag_high_thresh = 41943040 +net.ipv4.ipfrag_low_thresh = 31457280 +net.ipv4.ipfrag_time = 60 +``` + +#### System memory + +For host systems with more than 64GB of memory, these settings are recommended: + +```conf +vm.dirty_background_ratio = 0 +vm.dirty_ratio = 0 +vm.dirty_background_bytes = 1610612736 # 1.5GB +vm.dirty_bytes = 4294967296 # 4GB +``` + +For host systems with 64GB of memory or less, remove `vm.dirty_background_bytes` and `vm.dirty_bytes` and set the two `ratio` parameters to these values: + +```conf +vm.dirty_background_ratio = 3 +vm.dirty_ratio = 10 +``` + +Increase `vm.min_free_kbytes` to ensure `PF_MEMALLOC` requests from network and storage drivers are easily satisfied. This is especially critical on systems with large amounts of system memory. The default value is often far too low on these systems. Use this awk command to set `vm.min_free_kbytes` to a recommended 3% of system physical memory: + +```bash +awk 'BEGIN {OFMT = "%.0f";} /MemTotal/ {print "vm.min_free_kbytes =", $2 * .03;}' /proc/meminfo >> /etc/sysctl.conf +``` + +Do not set `vm.min_free_kbytes` to higher than 5% of system memory as doing so might cause out of memory conditions. + +### System resources limits + +Set the following parameters in the `/etc/security/limits.conf` file: + +``` +* soft nofile 524288 +* hard nofile 524288 +* soft nproc 131072 +* hard nproc 131072 +``` + +For Red Hat Enterprise Linux (RHEL) systems, parameter values in the `/etc/security/limits.d/20-nproc.conf` file override the values in the `limits.conf` file. Ensure that any parameters in the override file are set to the required value. The Linux module `pam_limits` sets user limits by reading the values from the `limits.conf` file and then from the override file. For information about PAM and user limits, see the documentation on PAM and `pam_limits`. + +Run the `ulimit -u` command on each segment host to display the maximum number of processes that are available to each user. Validate that the return value is 131072. + +### Core dump + +Enable core file generation to a known location by adding the following line to `/etc/sysctl.conf`: + +```conf +kernel.core_pattern=/var/core/core.%h.%t +``` + +Add the following line to `/etc/security/limits.conf`: + +``` +* soft core unlimited +``` + +To apply the changes to the live kernel, run the following command: + +```bash +# sysctl -p +``` + +### XFS mount options + +XFS is the preferred data storage file system on Linux platforms for Apache Cloudberry data directories. Use the `mount` command with the following recommended XFS mount options: + +``` +rw,nodev,noatime,inode64 +``` + +:::note +The `nobarrier` option is not supported on RHEL 8/9 or Ubuntu systems. Use only the options `rw,nodev,noatime,inode64`. +::: + +:::tip +If you are using cloud VMs with only a single system disk (no additional data disk), you can skip the disk formatting and mounting steps below. Simply create the data directory on the existing filesystem: + +```bash +mkdir -p /data +chown -R gpadmin:gpadmin /data +``` +::: + +If you have a dedicated data disk, set up XFS file mounting in the `/etc/fstab` file. Adjust the device path according to your actual situation: + +```bash +mkdir -p /data +mkfs.xfs -f /dev/sdb + +# Use device name directly +echo "/dev/sdb /data xfs rw,nodev,noatime,inode64 0 0" >> /etc/fstab +# Or use UUID for stable identification across reboots (recommended for cloud/VM environments) +# echo "UUID=$(blkid -s UUID -o value /dev/vdb1) /data xfs rw,nodev,noatime,inode64 0 0" >> /etc/fstab + +mount /data +chown -R gpadmin:gpadmin /data +``` + +Run the following command to check whether the mounting is successful: + +```bash +df -h +``` + +### Disk I/O settings + +#### Read-ahead value + +Each disk device file should have a read-ahead (`blockdev`) value of 16384. To verify the read-ahead value of a disk device: + +```bash +# sudo /sbin/blockdev --getra +``` + +For example: + +```bash +# sudo /sbin/blockdev --getra /dev/sdb +``` + +To set blockdev (read-ahead) on a device: + +```bash +# sudo /sbin/blockdev --setra +``` + +For example: + +```bash +# sudo /sbin/blockdev --setra 16384 /dev/sdb +``` + +:::note +The `blockdev --setra` command is not persistent. You must ensure the read-ahead value is set whenever the system restarts. How to set the value will vary based on your system. +::: + +One method to set the `blockdev` value at system startup is by adding the `/sbin/blockdev --setra` command in the `rc.local` file. For example, add this line to the `rc.local` file to set the read-ahead value for the disk `sdb`. + +``` +/sbin/blockdev --setra 16384 /dev/sdb +``` + +On systems that use systemd, you must also set the execute permissions on the `rc.local` file to enable it to run at startup. For example, on a RHEL system, this command sets execute permissions on the file. + +```bash +# chmod +x /etc/rc.d/rc.local +``` + +Restart the system to have the setting take effect. + +#### Disk I/O scheduler + +The Linux disk scheduler orders the I/O requests submitted to a storage device, controlling the way the kernel commits reads and writes to disk. + +A typical Linux disk I/O scheduler supports multiple access policies. The optimal policy selection depends on the underlying storage infrastructure. The recommended scheduler policy settings for Apache Cloudberry systems for specific OSs and storage device types follow: + +| Storage Device Type | OS | Recommended Scheduler Policy | +|---|---|---| +| Non-Volatile Memory Express (NVMe) | RHEL 8/9, Ubuntu | `none` | +| Solid-State Drives (SSD) | RHEL 8/9, Ubuntu | `none` | +| Other | RHEL 8/9, Ubuntu | `mq-deadline` | + +To specify a scheduler until the next system reboot, run the following: + +```bash +# echo schedulername > /sys/block//queue/scheduler +``` + +For example: + +```bash +# echo mq-deadline > /sys/block/sdb/queue/scheduler +``` + +:::note +Using the `echo` command to set the disk I/O scheduler policy is not persistent; you must ensure that you run the command whenever the system reboots. How to run the command will vary based on your system. +::: + +To specify the I/O scheduler at boot time on systems that use `grub2`, you use the system utility `grubby` on RHEL or edit the Grub configure file directly on Ubuntu. + +To permanently set the I/O scheduler at boot time: + +- For RHEL/Rocky Linux — use `grubby`. This command adds the parameter when run as `root`: + + ```bash + # grubby --update-kernel=ALL --args="elevator=mq-deadline" + ``` + + This `grubby` command displays kernel parameter settings: + + ```bash + # grubby --info=ALL + ``` + +- For Ubuntu — edit `/etc/default/grub` and run `update-grub`: + + ```bash + # sed -i 's/GRUB_CMDLINE_LINUX="\(.*\)"/GRUB_CMDLINE_LINUX="\1 elevator=mq-deadline"/' /etc/default/grub + + # update-grub + ``` + +After adding the parameter, reboot the system. + +### Networking + +The maximum transmission unit (MTU) of a network specifies the size (in bytes) of the largest data packet/frame accepted by a network-connected device. A jumbo frame is a frame that contains more than the standard MTU of 1500 bytes. + +Apache Cloudberry utilizes 3 distinct MTU settings: + +- The Apache Cloudberry `gp_max_packet_size` server configuration parameter. The default max packet size is 8192. This default assumes a jumbo frame MTU. +- The operating system MTU setting. +- The rack switch MTU setting. + +These settings are connected, in that they should always be either the same, or close to the same, value, or otherwise in the order of Apache Cloudberry < OS < switch for MTU size. + +9000 is a common supported setting for switches, and is the recommended OS and rack switch MTU setting for your Apache Cloudberry hosts. + +### Transparent Huge Pages (THP) + +Deactivate Transparent Huge Pages (THP) as it degrades Apache Cloudberry performance. + +First, check the current THP status: + +```bash +cat /sys/kernel/mm/*transparent_hugepage/enabled +``` + +If the output shows `[never]`, THP is already disabled. Otherwise, disable it permanently: + + - For RHEL/Rocky Linux — use `grubby`. This command adds the parameter when run as root: + + ```bash + # grubby --update-kernel=ALL --args="transparent_hugepage=never" + ``` + + - For Ubuntu — edit `/etc/default/grub` and run `update-grub`: + + ```bash + # sed -i 's/GRUB_CMDLINE_LINUX="\(.*\)"/GRUB_CMDLINE_LINUX="\1 transparent_hugepage=never"/' /etc/default/grub + + # update-grub + ``` + +After adding the parameter, reboot the system. + +This cat command checks the state of THP. The output indicates that THP is deactivated: + +```bash +cat /sys/kernel/mm/*transparent_hugepage/enabled +# Expected output: always madvise [never] +``` + +### IPC object removal + +Deactivate IPC object removal. The default `systemd` setting `RemoveIPC=yes` removes IPC connections when non-system user accounts log out. This causes the Apache Cloudberry utility `gpinitsystem` to fail with semaphore errors. Perform one of the following to avoid this issue. + +- When you add the `gpadmin` operating system user account to the coordinator node in [Creating the Apache Cloudberry Administrative User](#creating-the-cloudberry-administrative-user), create the user as a system account. +- Deactivate `RemoveIPC`. Set this parameter in `/etc/systemd/logind.conf` on the Apache Cloudberry host systems. + + ``` + RemoveIPC=no + ``` + + The setting takes effect after restarting the `systemd-login` service or rebooting the system. To restart the service, run this command as the root user. + + ```bash + service systemd-logind restart + ``` + +### SSH connection threshold + +Certain Apache Cloudberry management utilities including `gpexpand`, `gpinitsystem`, and `gpaddmirrors`, use secure shell (SSH) connections between systems to perform their tasks. In large Apache Cloudberry deployments, cloud deployments, or deployments with a large number of segments per host, these utilities may exceed the host's maximum threshold for unauthenticated connections. When this occurs, you receive errors such as: `ssh_exchange_identification: Connection closed by remote host`. + +To increase this connection threshold for your Apache Cloudberry system, update the SSH `MaxStartups` and `MaxSessions` configuration parameters in the `/etc/ssh/sshd_config` SSH daemon configuration file. + +:::note +You must have root permission to edit these two files. +::: + +If you specify `MaxStartups` and `MaxSessions` using a single integer value, you identify the maximum number of concurrent unauthenticated connections (`MaxStartups`) and maximum number of open shell, login, or subsystem sessions permitted per network connection (`MaxSessions`). For example: + +``` +MaxStartups 200 +MaxSessions 200 +``` + +If you specify `MaxStartups` using the "start:rate:full" syntax, you enable random early connection drop by the SSH daemon. `start` identifies the maximum number of unauthenticated SSH connection attempts allowed. Once `start` number of unauthenticated connection attempts is reached, the SSH daemon refuses `rate` percent of subsequent connection attempts. `full` identifies the maximum number of unauthenticated connection attempts after which all attempts are refused. For example: + +``` +MaxStartups 10:30:200 +MaxSessions 200 +``` + +Restart the SSH daemon after you update `MaxStartups` and `MaxSessions`: + +```bash +# service sshd restart +``` + +## Synchronizing system clocks + +You must use NTP (Network Time Protocol) to synchronize the system clocks on all hosts that comprise your Apache Cloudberry system. Accurate time keeping is essential to ensure reliable operations on the database and data integrity. + +There are many different architectures you may choose from to implement NTP. We recommend you use one of the following: + +- Configure coordinator as the NTP primary source and the other hosts in the cluster connect to it. +- Configure an external NTP primary source and all hosts in the cluster connect to it. + +Depending on your operating system version, the NTP protocol may be implemented by the `ntpd` daemon, the `chronyd` daemon, or other. Refer to your preferred NTP protocol documentation for more details. + +### Option 1: Configure system clocks with the coordinator as the primary source + +1. On the coordinator host, log in as root and edit your NTP daemon configuration file. Set the `server` parameter to point to your data center's NTP time server. For example (if `10.6.220.20` was the IP address of your data center's NTP server): + + ``` + server 10.6.220.20 + ``` + +2. On each segment host, log in as root and edit your NTP daemon configuration file. Set the first `server` parameter to point to the coordinator host, and the second server parameter to point to the standby coordinator host. For example: + + ``` + server cdw prefer + server scdw + ``` + +3. On the standby coordinator host, log in as root and edit your NTP daemon configuration file. Set the first `server` parameter to point to the primary coordinator host, and the second server parameter to point to your data center's NTP time server. For example: + + ``` + server cdw prefer + server 10.6.220.20 + ``` + +4. Synchronize the system clocks on all Apache Cloudberry hosts as root. + + If you are using the `ntpd` daemon: + + ```bash + systemctl restart ntpd + ``` + + If you are using the `chronyd` daemon: + + ```bash + # For RHEL/Rocky Linux + systemctl restart chronyd + + # For Ubuntu (restart works with either name, but enable requires 'chrony') + systemctl restart chronyd + ``` + +### Option 2: Configure system clocks with an external primary source + +1. On each host, including coordinator, standby coordinator, and segments, log in as root and edit your NTP daemon configuration file. Set the first `server` parameter to point to your data center's NTP time server. For example (if `10.6.220.20` was the IP address of your data center's NTP server): + + ``` + server 10.6.220.20 + ``` + +2. On the coordinator host, use your NTP daemon to synchronize the system clocks on all Apache Cloudberry hosts. For example, using `gpssh`: + + If you are using the `ntpd` daemon: + + ```bash + gpssh -f hostfile_gpssh_allhosts -v -e 'systemctl restart ntpd' + ``` + + If you are using the `chronyd` daemon: + + ```bash + gpssh -f hostfile_gpssh_allhosts -v -e 'systemctl restart chronyd' + ``` + +## Creating the Cloudberry administrative user + +Create a dedicated operating system user account on each node to run and administer Apache Cloudberry. This user account is named `gpadmin` by convention. + +:::important +You cannot run the Apache Cloudberry server as `root`. +::: + +The `gpadmin` user must have permission to access the services and directories required to install and run Apache Cloudberry. + +The `gpadmin` user on each Apache Cloudberry host must have an SSH key pair installed and be able to SSH from any host in the cluster to any other host in the cluster without entering a password or passphrase (called "passwordless SSH"). If you enable passwordless SSH from the coordinator host to every other host in the cluster ("1-*n* passwordless SSH"), you can use the Apache Cloudberry `gpssh-exkeys` command-line utility later to enable passwordless SSH from every host to every other host ("*n*-*n* passwordless SSH"). + +You can optionally give the `gpadmin` user sudo privilege, so that you can easily administer all hosts in the Apache Cloudberry cluster as `gpadmin` using the `sudo`, `ssh/rsync`, and `gpssh/gpsync` commands. + +The following steps show how to set up the `gpadmin` user on a host, set a password, create an SSH key pair, and (optionally) enable sudo capability. These steps must be performed as root on every Apache Cloudberry cluster host. (For a large Apache Cloudberry cluster you will want to automate these steps using your system provisioning tools.) + +1. Create the `gpadmin` group and user. + + :::note + Make sure the `gpadmin` user has the same user id (uid) and group id (gid) numbers on each host to prevent problems with scripts or services that use them for identity or permissions. For example, backing up Apache Cloudberry databases to some networked file systems or storage appliances could fail if the `gpadmin` user has different uid or gid numbers on different segment hosts. When you create the `gpadmin` group and user, you can use the `groupadd -g` option to specify a gid number and the `useradd -u` option to specify the uid number. Use the command `id gpadmin` to see the uid and gid for the `gpadmin` user on the current host. + ::: + + This example creates the `gpadmin` group, creates the `gpadmin` user as a system account with a home directory and as a member of the `gpadmin` group, and creates a password for the user. + + ```bash + # groupadd gpadmin + # useradd gpadmin -r -m -g gpadmin + # passwd gpadmin + New password: + Retype new password: + ``` + +2. Switch to the `gpadmin` user and generate an SSH key pair for the `gpadmin` user. + + ```bash + $ su - gpadmin + $ ssh-keygen -t rsa -b 4096 + Generating public/private rsa key pair. + Enter file in which to save the key (/home/gpadmin/.ssh/id_rsa): + Created directory '/home/gpadmin/.ssh'. + Enter passphrase (empty for no passphrase): + Enter same passphrase again: + ``` + + At the passphrase prompts, press Enter so that SSH connections will not require entry of a passphrase. + +3. Grant sudo access to the `gpadmin` user. + + **For RHEL/Rocky Linux** — run `visudo` and uncomment the `%wheel` group entry: + + ``` + %wheel ALL=(ALL) NOPASSWD: ALL + ``` + + Make sure you uncomment the line that has the `NOPASSWD` keyword. Then add `gpadmin` to the `wheel` group: + + ```bash + # usermod -aG wheel gpadmin + ``` + + **For Ubuntu** — add `gpadmin` to the `sudo` group and create a sudoers drop-in file for passwordless sudo: + + ```bash + usermod -aG sudo gpadmin + echo "gpadmin ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/gpadmin + chmod 440 /etc/sudoers.d/gpadmin + ``` diff --git a/docs/deployment/quick-deploy.md b/docs/deployment/quick-deploy.md new file mode 100644 index 00000000000..f0744d5b4f5 --- /dev/null +++ b/docs/deployment/quick-deploy.md @@ -0,0 +1,485 @@ +--- +title: Quick Deployment Guide +--- + +# Quick Deployment Guide + +This guide provides a streamlined, copy-paste-friendly procedure for experienced administrators. This guide shows how to deploy a 5-node Apache Cloudberry cluster: + +- 1 Coordinator (`cdw`) +- 1 Standby Coordinator (`scdw`) +- 3 Segments (`sdw1`, `sdw2`, `sdw3`), each with 2 primary and 2 mirror instances + +:::info +For detailed explanations of each step, refer to the full deployment guides: +[Configuring Your Systems](./prepare-to-deploy.md), [Installing Using RPM/DEB Package](./install_cloudberry.md), [Creating the Data Storage Areas](./create_data_dirs.md), and [Initializing Apache Cloudberry](./init_cloudberry.md). +::: + +## Cluster layout + +| Host | Role | IP (example) | +|------|------|--------------| +| cdw | Coordinator | 192.168.1.1 | +| scdw | Standby Coordinator | 192.168.1.2 | +| sdw1 | Segment 1 | 192.168.1.3 | +| sdw2 | Segment 2 | 192.168.1.4 | +| sdw3 | Segment 3 | 192.168.1.5 | + +Data directories used: + +- Coordinator/Standby: `/data/coordinator` +- Segment primary: `/data/primary` (2 instances per host) +- Segment mirror: `/data/mirror` (2 instances per host) + +--- + +## 1. Configuring your systems (as root on all hosts) + +### 1.1 Deactivate SELinux + +:::note +This step applies to RHEL/Oracle/Rocky Linux only. On Ubuntu, SELinux is not installed by default and this step can be skipped. +::: + +```bash +# Check current status +sestatus + +# If not disabled, deactivate it +sed -i 's/^SELINUX=.*/SELINUX=disabled/' /etc/selinux/config + +# Reboot is required to take effect +``` + +### 1.2 Deactivate firewall + +**For RHEL/Rocky Linux (firewalld):** + +```bash +# Check current status +systemctl status firewalld + +# If not disabled, deactivate it +systemctl stop firewalld.service +systemctl disable firewalld.service +``` + +**For Ubuntu (ufw):** + +```bash +# Check current status (disabled by default) +ufw status + +# If active, disable it +ufw disable +``` + +### 1.3 Set the hosts file + +Set the hostname on each host (takes effect immediately, no reboot required): + +```bash +# Run on each host respectively +hostnamectl set-hostname cdw # on coordinator +hostnamectl set-hostname scdw # on standby coordinator +hostnamectl set-hostname sdw1 # on segment 1 +hostnamectl set-hostname sdw2 # on segment 2 +hostnamectl set-hostname sdw3 # on segment 3 + +# Verify +hostname + +# Refresh the shell prompt to show the new hostname +exec bash +``` + +Edit `/etc/hosts` on all hosts: + +```bash +cat >> /etc/hosts <> /etc/sysctl.conf <> /etc/security/limits.conf <> /etc/fstab +# Or use UUID for stable identification across reboots (recommended for cloud/VM environments) +# echo "UUID=$(blkid -s UUID -o value /dev/vdb1) /data xfs rw,nodev,noatime,inode64 0 0" >> /etc/fstab + +mount /data +``` + +### 1.7 Disk I/O settings + +```bash +# Set read-ahead value +/sbin/blockdev --setra 16384 /dev/sdb + +# Persist read-ahead across reboots +echo '/sbin/blockdev --setra 16384 /dev/sdb' >> /etc/rc.d/rc.local +chmod +x /etc/rc.d/rc.local + +# Set I/O scheduler permanently (for non-NVMe/SSD disks) +# For RHEL/Rocky Linux: +grubby --update-kernel=ALL --args="elevator=mq-deadline" +# For Ubuntu: +sed -i 's/GRUB_CMDLINE_LINUX="\(.*\)"/GRUB_CMDLINE_LINUX="\1 elevator=mq-deadline"/' /etc/default/grub && update-grub +``` + +### 1.8 Disable Transparent Huge Pages + +```bash +# Check current THP status +cat /sys/kernel/mm/*transparent_hugepage/enabled + +# If not [never], disable it permanently +# For RHEL/Rocky Linux: +grubby --update-kernel=ALL --args="transparent_hugepage=never" +# For Ubuntu: +sed -i 's/GRUB_CMDLINE_LINUX="\(.*\)"/GRUB_CMDLINE_LINUX="\1 transparent_hugepage=never"/' /etc/default/grub && update-grub + +# Reboot is required to take effect +``` + +### 1.9 Disable IPC object removal + +```bash +sed -i 's/^#RemoveIPC=.*/RemoveIPC=no/' /etc/systemd/logind.conf +service systemd-logind restart +``` + +### 1.10 SSH connection threshold + +```bash +sed -i 's/^#MaxStartups.*/MaxStartups 10:30:200/' /etc/ssh/sshd_config +sed -i 's/^#MaxSessions.*/MaxSessions 200/' /etc/ssh/sshd_config +service sshd restart +``` + +### 1.11 Synchronize system clocks + +```bash +# For RHEL/Rocky Linux +systemctl enable chronyd +systemctl restart chronyd +chronyc tracking + +# For Ubuntu (enable must use 'chrony', restart works with either name) +systemctl enable chrony +systemctl restart chronyd +chronyc tracking +``` + +### 1.12 Create the gpadmin administrative user + +```bash +groupadd gpadmin +useradd gpadmin -r -m -g gpadmin +passwd gpadmin + +# Grant passwordless sudo +# For RHEL/Rocky Linux: uncomment the %wheel NOPASSWD line in sudoers +# Or use visudo to uncomment: %wheel ALL=(ALL) NOPASSWD: ALL +sed -i 's/^# %wheel\tALL=(ALL)\tNOPASSWD: ALL/%wheel\tALL=(ALL)\tNOPASSWD: ALL/' /etc/sudoers +usermod -aG wheel gpadmin + +# For Ubuntu: add gpadmin to sudo group and create a sudoers drop-in file +usermod -aG sudo gpadmin +echo "gpadmin ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/gpadmin +chmod 440 /etc/sudoers.d/gpadmin + +# Set data directory ownership to gpadmin +chown -R gpadmin:gpadmin /data +``` + +:::note +Make sure the `gpadmin` user has the same UID/GID across all hosts. Use `id gpadmin` to verify, or pass `-g ` to `groupadd` and `-u ` to `useradd` to set fixed values. +::: + +--- + +## 2. Install the Apache Cloudberry package (as root on each host) + +Download the package from [Apache Cloudberry Releases](https://cloudberry.apache.org/releases) to `/home/gpadmin/` on every host, then install: + +```bash +# For RPM (Rocky Linux, RHEL, etc.) +dnf install /home/gpadmin/apache-cloudberry-db-incubating-*.rpm + +# For DEB (Ubuntu) +# apt install --fix-broken /home/gpadmin/apache-cloudberry-db-incubating-*.deb + +# Set ownership +chown -R gpadmin:gpadmin /usr/local/cloudberry* +``` + +--- + +## 3. Enable passwordless SSH (as gpadmin on cdw) + +```bash +su - gpadmin +source /usr/local/cloudberry-db/cloudberry-env.sh + +# Generate SSH key (press Enter at passphrase prompts) +ssh-keygen -t rsa -b 4096 -N "" -f ~/.ssh/id_rsa + +# Enable 1-n passwordless SSH (will prompt for gpadmin password) +ssh-copy-id cdw +ssh-copy-id scdw +ssh-copy-id sdw1 +ssh-copy-id sdw2 +ssh-copy-id sdw3 + +# Create the host file for gpssh-exkeys +cat > ~/hostfile_exkeys < ~/hostfile_gpinitsystem < ~/gpconfigs/gpinitsystem_config < Apache Cloudberry instance successfully created. +``` + +:::tip +The `-s scdw` option initializes the standby coordinator during cluster creation. If you omit `-s` during `gpinitsystem`, you can initialize the standby separately afterwards: + +```bash +gpinitstandby -s scdw +``` + +Verify the standby is synchronized: + +```bash +gpstate -f +``` +::: + +### 5.4 Set the timezone (optional) + +```bash +gpconfig -c TimeZone -v 'US/Pacific' +gpstop -ra +``` + +### 5.5 Set environment variables + +```bash +cat >> ~/.bashrc < NVMe - RHEL 7 - none RHEL 8 @@ -276,8 +278,6 @@ The disk type, operating system, and scheduling policies of Apache Cloudberry ar SSD - RHEL 7 - noop RHEL 8 @@ -289,8 +289,6 @@ The disk type, operating system, and scheduling policies of Apache Cloudberry ar Other - RHEL 7 - deadline RHEL 8 @@ -398,29 +396,35 @@ systemctl status chronyd ### Step 2. Install Apache Cloudberry -1. Download the RPM package to the home directory of `gpadmin`. +:::info +Starting from Apache Cloudberry 2.1, RPM and DEB packages are officially provided. RPM packages support Rocky Linux 8/9, RHEL 8/9, and compatible distributions. DEB packages support Ubuntu 22.04. +::: - ```bash - wget -P /home/gpadmin - ``` +1. Download the package to the home directory of `gpadmin`. -2. Install the RPM package in the `/home/gpadmin` directory. +2. Install the package in the `/home/gpadmin` directory. - When running the following command, you need to replace `` with the actual RPM package path, as the `root` user. During the installation, the directory `/usr/local/cloudberry/` is automatically created. + When running the following command, you need to replace `` with the actual package path, as the `root` user. During the installation, the directory `/usr/local/cloudberry-db/` is automatically created. ```bash cd /home/gpadmin - yum install + + # For RPM (Rocky Linux, RHEL, etc.) + dnf install + # Or for older systems: yum install + + # For DEB (Ubuntu) + apt install + # Or alternatively: dpkg -i && apt-get install -f ``` -3. Grant the `gpadmin` user the permission to access the `/usr/local/cloudberry/` directory. +3. Grant the `gpadmin` user the permission to access the `/usr/local/cloudberry-db/` directory. ```bash - chown -R gpadmin:gpadmin /usr/local chown -R gpadmin:gpadmin /usr/local/cloudberry* ``` -4. Configure local SSH connection for the node. As the `gpadmin ` user, perform the following operations: +4. Configure local SSH connection for the node. As the `gpadmin` user, perform the following operations: ```bash ssh-keygen diff --git a/docs/deployment/validate.md b/docs/deployment/validate.md new file mode 100644 index 00000000000..bc3faf01aae --- /dev/null +++ b/docs/deployment/validate.md @@ -0,0 +1,72 @@ +--- +title: Validating Hardware and Network +--- + +Cloudberry provides a management utility called [gpcheckperf](/sys-utilities/gpcheckperf.md), which can be used to identify hardware and system-level issues on the machines in your Apache Cloudberry array. `gpcheckperf` starts a session on the specified hosts and runs the following performance tests: + +- Network Performance (`gpnetbench*`) +- Disk I/O Performance (`dd` test) +- Memory Bandwidth (`stream` test) + +Before using `gpcheckperf`, you must have a trusted host setup between the hosts involved in the performance test. You can use the utility [gpssh-exkeys](/sys-utilities/gpssh-exkeys.md) to update the known host files and exchange public keys between hosts if you have not done so already. Note that `gpcheckperf` calls to [gpssh](/sys-utilities/gpssh.md) and [gpsync](/sys-utilities/gpsync.md), so these Apache Cloudberry utilities must be in your `$PATH`. + +## Validating Network Performance + +To test network performance, run `gpcheckperf` with one of the network test run options: parallel pair test (`-r N`), serial pair test (`-r n`), or full matrix test (`-r M`). The utility runs a network benchmark program that transfers a 5 second stream of data from the current host to each remote host included in the test. By default, the data is transferred in parallel to each remote host and the minimum, maximum, average and median network transfer rates are reported in megabytes (MB) per second. If the summary transfer rate is slower than expected (less than 100 MB/s), you can run the network test serially using the `-r n` option to obtain per-host results. To run a full-matrix bandwidth test, you can specify `-r M` which will cause every host to send and receive data from every other host specified. This test is best used to validate if the switch fabric can tolerate a full-matrix workload. + +Most systems in a Apache Cloudberry array are configured with multiple network interface cards (NICs), each NIC on its own subnet. When testing network performance, it is important to test each subnet individually. For example, considering the following network configuration of two NICs per host: + +|Cloudberry Host|Subnet1 NICs|Subnet2 NICs| +|--------------|------------|------------| +|Segment 1|sdw1-1|sdw1-2| +|Segment 2|sdw2-1|sdw2-2| +|Segment 3|sdw3-1|sdw3-2| + +You would create four distinct host files for use with the `gpcheckperf` network test: + +|hostfile_gpchecknet_ic1|hostfile_gpchecknet_ic2| +|-------------------------|-------------------------| +|sdw1-1|sdw1-2| +|sdw2-1|sdw2-2| +|sdw3-1|sdw3-2| + +You would then run `gpcheckperf` once per subnet. For example (if testing an *even* number of hosts, run in parallel pairs test mode): + +``` +$ gpcheckperf -f hostfile_gpchecknet_ic1 -r N -d /tmp > subnet1.out +$ gpcheckperf -f hostfile_gpchecknet_ic2 -r N -d /tmp > subnet2.out +``` + +If you have an *odd* number of hosts to test, you can run in serial test mode (`-r n`). + +## Validating Disk I/O and Memory Bandwidth + +To test disk and memory bandwidth performance, run `gpcheckperf` with the disk and stream test run options (`-r ds`). The disk test uses the `dd` command (a standard UNIX utility) to test the sequential throughput performance of a logical disk or file system. The memory test uses the STREAM benchmark program to measure sustainable memory bandwidth. Results are reported in MB per second (MB/s). + +### Running the disk and stream tests + +1. Log in on the coordinator host as the `gpadmin` user. +2. Source the `cloudberry-env.sh` path file from your Apache Cloudberry installation. For example: + + ``` + $ source /usr/local/cloudberry-db/cloudberry-env.sh + ``` + +3. Create a host file named `hostfile_gpcheckperf` that has one host name per segment host. Do not include the coordinator host. For example: + + ``` + sdw1 + sdw2 + sdw3 + sdw4 + ``` + +4. Run the `gpcheckperf` utility using the `hostfile_gpcheckperf` file you just created. Use the `-d` option to specify the file systems you want to test on each host (you must have write access to these directories). You will want to test all primary and mirror segment data directory locations. For example: + + ``` + $ gpcheckperf -f hostfile_gpcheckperf -r ds -D \ +   -d /data1/primary -d /data2/primary \ +   -d /data1/mirror -d /data2/mirror + ``` + +5. The utility may take a while to perform the tests as it is copying very large files between the hosts. When it is finished you will see the summary results for the Disk Write, Disk Read, and Stream tests. diff --git a/docs/cbdb-architecture.md b/docs/introduction/cbdb-architecture.md similarity index 98% rename from docs/cbdb-architecture.md rename to docs/introduction/cbdb-architecture.md index 606f24ba441..b9a3efce19e 100644 --- a/docs/cbdb-architecture.md +++ b/docs/introduction/cbdb-architecture.md @@ -16,7 +16,7 @@ From users' view, Apache Cloudberry is a complete relational database management The architecture diagram of Apache Cloudberry is as follows: -![Apache Cloudberry Architecture](./media/cbdb-arch.png) +![Apache Cloudberry Architecture](../media/cbdb-arch.png) - **Coordinator node** (or control node) is the gateway to the Apache Cloudberry system, which accepts client connections and SQL queries, and allocates tasks to data node instances. Users interact with Apache Cloudberry by connecting to the coordinator node using a client program (such as psql) or an application programming interface (API) (such as JDBC, ODBC, or libpq PostgreSQL C API). - The coordinator node acts as the global system directory, containing a set of system tables that record the metadata of Apache Cloudberry. diff --git a/docs/cbdb-overview.md b/docs/introduction/cbdb-overview.md similarity index 100% rename from docs/cbdb-overview.md rename to docs/introduction/cbdb-overview.md diff --git a/docs/cbdb-scenarios.md b/docs/introduction/cbdb-scenarios.md similarity index 100% rename from docs/cbdb-scenarios.md rename to docs/introduction/cbdb-scenarios.md diff --git a/docs/cbdb-vs-gp-features.md b/docs/introduction/cbdb-vs-gp-features.md similarity index 100% rename from docs/cbdb-vs-gp-features.md rename to docs/introduction/cbdb-vs-gp-features.md diff --git a/docs/performance/memory-overview.md b/docs/performance/memory-overview.md index aa89106fe17..7b99160d4ad 100644 --- a/docs/performance/memory-overview.md +++ b/docs/performance/memory-overview.md @@ -34,7 +34,7 @@ Host memory is the total memory shared by all applications on the segment host. - Allocate swap space to increase the size of virtual memory. - Adjust the kernel parameter `vm.overcommit_ratio` to configure how the operating system handles large memory allocation requests. -The physical RAM and OS configuration are usually managed by the platform team and system administrators. See the [Software and Hardware Requirements](../cbdb-op-software-hardware.md) for the recommended kernel parameters and for how to set the `/etc/sysctl.conf` file parameters. +The physical RAM and OS configuration are usually managed by the platform team and system administrators. See the [Software and Hardware Requirements](../deployment/platform-requirements.md) for the recommended kernel parameters and for how to set the `/etc/sysctl.conf` file parameters. The amount of memory to reserve for the operating system and other processes is workload dependent. The minimum recommendation for operating system memory is 32GB, but if there is much concurrency in Apache Cloudberry, increasing to 64GB of reserved memory may be required. The largest user of operating system memory is SLAB, which increases as Apache Cloudberry concurrency and the number of sockets used increases. diff --git a/docs/sys-admin/configure-proxy.md b/docs/sys-admin/configure-proxy.md index fdb24b33269..364e01ba86b 100644 --- a/docs/sys-admin/configure-proxy.md +++ b/docs/sys-admin/configure-proxy.md @@ -6,7 +6,7 @@ title: Configure Proxies for the Interconnect You can configure a Cloudberry system to use proxies for interconnect communication to reduce the use of connections and ports during query processing. -The Cloudberry *interconnect* (the networking layer) refers to the inter-process communication between segments and the network infrastructure on which this communication relies. For information about the Cloudberry architecture and interconnect, see [About the Cloudberry Architecture](../cbdb-architecture.md). +The Cloudberry *interconnect* (the networking layer) refers to the inter-process communication between segments and the network infrastructure on which this communication relies. For information about the Cloudberry architecture and interconnect, see [About the Cloudberry Architecture](../introduction/cbdb-architecture.md). In general, when running a query, a QD (query dispatcher) on the Cloudberry coordinator creates connections to one or more QE (query executor) processes on segments, and a QE can create connections to other QEs. For a description of Cloudberry query processing and parallel query processing, see [About Cloudberry Query Processing](../performance/optimize-queries/parallel-query-execution.md). diff --git a/docs/sys-admin/expand-cluster/prepare-and-add-hosts.md b/docs/sys-admin/expand-cluster/prepare-and-add-hosts.md index 781f2a4bf38..293489432ce 100644 --- a/docs/sys-admin/expand-cluster/prepare-and-add-hosts.md +++ b/docs/sys-admin/expand-cluster/prepare-and-add-hosts.md @@ -13,7 +13,7 @@ Run performance tests first on the new hosts and then all hosts. Run the tests o Generally, you should run performance tests when an administrator modifies host networking or other special conditions in the system. For example, if you will run the expanded system on two network clusters, run tests on each cluster. :::note -Preparing host systems for use by a Apache Cloudberry system assumes that the new hosts' operating system has been properly configured to match the existing hosts, described in [Configuring Your Systems](../../cbdb-op-software-hardware.md#supported-os). +Preparing host systems for use by a Apache Cloudberry system assumes that the new hosts' operating system has been properly configured to match the existing hosts, described in [Configuring Your Systems](../../deployment/platform-requirements#supported-os). ::: ## Add new hosts to the trusted host environment diff --git a/docs/sys-admin/high-availability/enable-coordinator-mirroring.md b/docs/sys-admin/high-availability/enable-coordinator-mirroring.md index 5b7c7664eef..1cb5eef7254 100644 --- a/docs/sys-admin/high-availability/enable-coordinator-mirroring.md +++ b/docs/sys-admin/high-availability/enable-coordinator-mirroring.md @@ -33,7 +33,7 @@ Make sure that you have already configured a standby coordinator on a different :::note -If you follow the steps described in the [Prepare to Deploy](../../cbdb-op-prepare-to-deploy.md) and [Deploy Apache Cloudberry Manually Using RPM Package](../../cbdb-op-deploy-guide.md) topics to deploy the cluster, a host for the standby coordinator ( `cbdb-standbycoordinator`) is already configured in the cluster. +If you follow the steps described in the [Prepare to Deploy](../../deployment/prepare-to-deploy.md) and [Deploy Apache Cloudberry Manually Using RPM Package](../../deployment/install_cloudberry.md) topics to deploy the cluster, a host for the standby coordinator ( `scdw`) is already configured in the cluster. ::: @@ -41,15 +41,15 @@ If you follow the steps described in the [Prepare to Deploy](../../cbdb-op-prepa You need to first enable the standby coordinator using the `gpinitstandby` utility: -1. Run the `gpinitstandby` utility on the currently active primary coordinator (`cbdb-coordinator`) host to add a standby coordinator host to your CBDB cluster. For example: +1. Run the `gpinitstandby` utility on the currently active primary coordinator (`scdw`) host to add a standby coordinator host to your CBDB cluster. For example: ```shell - $ gpinitstandby -s cbdb-standbycoordinator + $ gpinitstandby -s scdw ``` The `-s` option specifies the standby coordinator hostname. - You will be prompted with the following message when the initialization is completed: `-Successfully created standby coordinator on cbdb-coordinator`. + You will be prompted with the following message when the initialization is completed: `-Successfully created standby coordinator on scdw`. 2. You can run the `gpstate` utility with the `-f` option to display details of the standby coordinator host. @@ -114,13 +114,13 @@ Take the steps below to configure the failed primary coordinator to become a sta You can remove the backup directory once the standby is successfully configured. -3. Initialize a standby coordinator on the original coordinator host. For example, run this command from the current coordinator host, `cbdb-standbycoordinator`: +3. Initialize a standby coordinator on the original coordinator host. For example, run this command from the current coordinator host, `scdw`: ```shell - $ gpinitstandby -s cbdb-coordinator + $ gpinitstandby -s scdw ``` -4. After the initialization is completed, check the status of the standby coordinator `cbdb-coordinator`. Run `gpstate` with the `-f` option to check the standby coordinator status: +4. After the initialization is completed, check the status of the standby coordinator `scdw`. Run `gpstate` with the `-f` option to check the standby coordinator status: ```shell $ gpstate -f diff --git a/docs/sys-admin/high-availability/enable-segment-mirroring.md b/docs/sys-admin/high-availability/enable-segment-mirroring.md index 4a32699a104..c4865fd7d71 100644 --- a/docs/sys-admin/high-availability/enable-segment-mirroring.md +++ b/docs/sys-admin/high-availability/enable-segment-mirroring.md @@ -24,7 +24,7 @@ During the online data replication process, Apache Cloudberry should be in a qui ## To add segment mirrors to an existing system (different hosts from primaries) -1. Ensure the Apache Cloudberry software is installed on all hosts. See the [Apache Cloudberry Installation Guide](../../cbdb-op-deploy-guide.md) for detailed installation instructions. +1. Ensure the Apache Cloudberry software is installed on all hosts. See the [Apache Cloudberry Installation Guide](../../deployment/index.md) for detailed installation instructions. 2. Allocate the data storage area for mirror data, and tablespaces if needed, on all segment hosts. 3. Use `gpssh-exkeys` to ensure the segment hosts can SSH and remote sync to each other without a password prompt. 4. Create a configuration file that lists the host names, ports, and data directories on which to create mirrors. To create a sample configuration file to use as a starting point, run: diff --git a/docs/sys-admin/use-compression.md b/docs/sys-admin/use-compression.md index 0ed1d829901..96423afb498 100644 --- a/docs/sys-admin/use-compression.md +++ b/docs/sys-admin/use-compression.md @@ -14,4 +14,4 @@ You can configure support for data compression with these features and utilities - Workfiles (temporary spill files that are created when running a query that requires more memory than it is allocated) can be compressed. See the server configuration parameter `gp_workfile_compression`. - The Apache Cloudberry utilities [`gpbackup`](../sys-utilities/gpbackup.md), [`gprestore`](../sys-utilities/gprestore.md), [`gpload`](../sys-utilities/gpload.md), and [`gplogfilter`](../sys-utilities/gplogfilter.md) support compression. -For some compression algorithms (such as zlib) Apache Cloudberry requires software packages installed on the host system. For information about required software packages, see the [Apache Cloudberry Installation Guide](../cbdb-op-software-hardware.md). +For some compression algorithms (such as zlib) Apache Cloudberry requires software packages installed on the host system. For information about required software packages, see the [Apache Cloudberry Installation Guide](../deployment/index.md). diff --git a/docs/sys-utilities/gpdemo.md b/docs/sys-utilities/gpdemo.md index 7e9ce23dc51..00fda9952b7 100644 --- a/docs/sys-utilities/gpdemo.md +++ b/docs/sys-utilities/gpdemo.md @@ -52,7 +52,7 @@ NUM_PRIMARY_MIRROR_PAIRS=3 gpdemo :::info - Each segment node consists of a primary node and a mirror node. So every time the parameter value increases by `1`, 2 more nodes will be created. To better capture data distribution issues, it is recommended to set the value to an odd number. -- When the parameter value is set to 0, a single-computing-node cluster is deployed. See [Deploy Apache Cloudberry with a Single Computing Node](/docs/deploy-cbdb-with-single-node.md) for details. +- When the parameter value is set to 0, a single-computing-node cluster is deployed. See [Deploy Apache Cloudberry with a Single Computing Node](../deployment/single-node.md) for details. ::: #### Specify the data directory of a node diff --git a/docs/sys-utilities/gpinitsystem.md b/docs/sys-utilities/gpinitsystem.md index 273224a0f03..6ccd804dc01 100644 --- a/docs/sys-utilities/gpinitsystem.md +++ b/docs/sys-utilities/gpinitsystem.md @@ -372,4 +372,4 @@ gp7c~gp7c-2~50000~/data/mirror1/gpseg1~5~1 ## See also -[gpssh-exkeys](/docs/sys-utilities/gpssh-exkeys.md), [gpdeletesystem](/docs/sys-utilities/gpdeletesystem.md), [Start and Stop Apache Cloudberry](/docs/start-and-stop-cbdb-database.md) +[gpssh-exkeys](/docs/sys-utilities/gpssh-exkeys.md), [gpdeletesystem](/docs/sys-utilities/gpdeletesystem.md), [Start and Stop Apache Cloudberry](../database-basic/start-and-stop-cbdb-database.md) diff --git a/docs/tutorials/best-practices/index.md b/docs/tutorials/best-practices/index.md index 9d823cc2bc1..a48296d5fee 100644 --- a/docs/tutorials/best-practices/index.md +++ b/docs/tutorials/best-practices/index.md @@ -115,7 +115,7 @@ See [Distributions](./schema-design-best-practices.md). - Ensure that resource queue memory allocations do not exceed the setting for `gp_vmem_protect_limit`. - Dynamically update resource queue settings to match daily operations flow. -See [Setting the Cloudberry Recommended OS Parameters](../../deployment/system-settings.md). +See [Setting the Cloudberry Recommended OS Parameters](../../build/system-settings.md). ## Partitioning diff --git a/docs/tutorials/best-practices/resource-group-best-practices.md b/docs/tutorials/best-practices/resource-group-best-practices.md index 875f08324bd..81584d6e734 100644 --- a/docs/tutorials/best-practices/resource-group-best-practices.md +++ b/docs/tutorials/best-practices/resource-group-best-practices.md @@ -22,7 +22,7 @@ The following operating system and Apache Cloudberry memory settings are signifi - **vm.overcommit_memory** - This Linux kernel parameter, set in [`/etc/sysctl.conf`](../../cbdb-op-prepare-to-deploy.md#set-system-parameters), identifies the method that the operating system uses to determine how much memory can be allocated to processes. `vm.overcommit_memory` must always be set to 2 for Apache Cloudberry systems. + This Linux kernel parameter, set in [`/etc/sysctl.conf`](../../deployment/prepare-to-deploy#the-sysctlconf-file), identifies the method that the operating system uses to determine how much memory can be allocated to processes. `vm.overcommit_memory` must always be set to 2 for Apache Cloudberry systems. - **vm.overcommit_ratio** diff --git a/docs/tutorials/best-practices/system-configuration-best-practices.md b/docs/tutorials/best-practices/system-configuration-best-practices.md index bf4af96dd40..257b7a1e38d 100644 --- a/docs/tutorials/best-practices/system-configuration-best-practices.md +++ b/docs/tutorials/best-practices/system-configuration-best-practices.md @@ -25,11 +25,11 @@ You must restart Apache Cloudberry after changing the timezone. The command `gps ## Configure the file system -XFS is the file system used for Apache Cloudberry data directories. Use the mount options described in [Configuring Your Systems](../../cbdb-op-prepare-to-deploy.md). +XFS is the file system used for Apache Cloudberry data directories. Use the mount options described in [Configuring Your Systems](../../deployment/prepare-to-deploy#xfs-mount-options). ## Configure ports -See the [recommended OS parameter settings](../../cbdb-op-prepare-to-deploy.md#set-system-parameters) for further details. +See the [recommended OS parameter settings](../../deployment/prepare-to-deploy#the-sysctlconf-file) for further details. Set up `ip_local_port_range` so it does not conflict with the Apache Cloudberry port ranges. For example, setting this range in `/etc/sysctl.conf`: @@ -44,7 +44,7 @@ PORT_BASE = 6000 MIRROR_PORT_BASE = 7000 ``` -See the [Recommended OS Parameters Settings](../../cbdb-op-prepare-to-deploy.md#set-system-parameters) for further details. +See the [Recommended OS Parameters Settings](../../deployment/prepare-to-deploy#the-sysctlconf-file) for further details. ## Configure I/O diff --git a/sidebars.ts b/sidebars.ts index 8ea76158619..4d8f29e0f86 100644 --- a/sidebars.ts +++ b/sidebars.ts @@ -16,32 +16,52 @@ const sidebars: SidebarsConfig = { { type: 'category', label: 'Introduction', - items: ['cbdb-overview', 'cbdb-architecture', 'cbdb-scenarios', 'cbdb-vs-gp-features'] + items: ['introduction/cbdb-overview', 'introduction/cbdb-architecture', 'introduction/cbdb-scenarios', 'introduction/cbdb-vs-gp-features'] }, { type: 'category', - label: 'Deploy and Build', + label: 'Build Apache Cloudberry', items: [ - 'deployment/build-based-on-docker', + 'build/build-based-on-docker', { type: 'category', label: 'Build from Source (Complete Guide)', link: { type: "doc", - id: 'deployment/index', + id: 'build/index', }, - items: [`deployment/quick-build`, `deployment/create-gpadmin-user`, `deployment/system-settings`, `deployment/install-required-packages`, `deployment/download-source-code`, `deployment/configure`, `deployment/build-and-install`, `deployment/set-demo-cluster`, `deployment/post-installation`] + items: [`build/quick-build`, `build/create-gpadmin-user`, `build/system-settings`, `build/install-required-packages`, `build/download-source-code`, `build/configure`, `build/build-and-install`, `build/set-demo-cluster`, `build/post-installation`] }, - { - type: 'category', - label: 'Deploy on Physical or Virtual Machine', - items: ['cbdb-op-software-hardware', 'cbdb-op-prepare-to-deploy', 'cbdb-op-deploy-guide', 'deploy-cbdb-with-single-node'] - }, - 'deployment/sandbox' + 'build/sandbox' ] }, - + { + type: 'category', + label: 'Deploy in Production', + link: { + type: "doc", + id: "deployment/index", + }, + items: [ + 'deployment/quick-deploy', + 'deployment/platform-requirements', + 'deployment/capacity_planning', + 'deployment/prepare-to-deploy', + 'deployment/install_cloudberry', + 'deployment/create_data_dirs', + 'deployment/validate', + 'deployment/init_cloudberry', + 'deployment/login_cloudberry', + 'deployment/single-node', + 'deployment/ansible-example' + ] + }, + { + type: 'category', + label: 'Basic Database Operations', + items: ['database-basic/create-and-manage-database', 'database-basic/start-and-stop-cbdb-database', 'database-basic/connect-to-cbdb'] + }, { type: 'category', label: 'Load Data', @@ -75,12 +95,6 @@ const sidebars: SidebarsConfig = { ], }, - { - type: 'category', - label: 'Create and Prepare', - items: ['operate-with-data/operate-with-db-objects/create-and-manage-database', 'start-and-stop-cbdb-database', 'connect-to-cbdb'] - }, - { type: 'category', label: 'Operate with Data', diff --git a/versioned_docs/version-1.x/sys-utilities/gpdemo.md b/versioned_docs/version-1.x/sys-utilities/gpdemo.md index 39a5e7b84a5..6d948bfffd0 100644 --- a/versioned_docs/version-1.x/sys-utilities/gpdemo.md +++ b/versioned_docs/version-1.x/sys-utilities/gpdemo.md @@ -52,7 +52,7 @@ NUM_PRIMARY_MIRROR_PAIRS=3 gpdemo :::info - Each segment node consists of a primary node and a mirror node. So every time the parameter value increases by `1`, 2 more nodes will be created. To better capture data distribution issues, it is recommended to set the value to an odd number. -- When the parameter value is set to 0, a single-computing-node cluster is deployed. See [Deploy Apache Cloudberry with a Single Computing Node](/docs/deploy-cbdb-with-single-node.md) for details. +- When the parameter value is set to 0, a single-computing-node cluster is deployed. See [Deploy Apache Cloudberry with a Single Computing Node](../deploy-cbdb-with-single-node.md) for details. ::: #### Specify the data directory of a node diff --git a/versioned_docs/version-1.x/sys-utilities/gpinitsystem.md b/versioned_docs/version-1.x/sys-utilities/gpinitsystem.md index 273224a0f03..140c07e7e70 100644 --- a/versioned_docs/version-1.x/sys-utilities/gpinitsystem.md +++ b/versioned_docs/version-1.x/sys-utilities/gpinitsystem.md @@ -372,4 +372,4 @@ gp7c~gp7c-2~50000~/data/mirror1/gpseg1~5~1 ## See also -[gpssh-exkeys](/docs/sys-utilities/gpssh-exkeys.md), [gpdeletesystem](/docs/sys-utilities/gpdeletesystem.md), [Start and Stop Apache Cloudberry](/docs/start-and-stop-cbdb-database.md) +[gpssh-exkeys](/docs/sys-utilities/gpssh-exkeys.md), [gpdeletesystem](/docs/sys-utilities/gpdeletesystem.md), [Start and Stop Apache Cloudberry](../start-and-stop-cbdb-database.md) diff --git a/versioned_docs/version-2.x/deployment/build-and-install.md b/versioned_docs/version-2.x/build/build-and-install.md similarity index 100% rename from versioned_docs/version-2.x/deployment/build-and-install.md rename to versioned_docs/version-2.x/build/build-and-install.md diff --git a/versioned_docs/version-2.x/deployment/build-based-on-docker.md b/versioned_docs/version-2.x/build/build-based-on-docker.md similarity index 100% rename from versioned_docs/version-2.x/deployment/build-based-on-docker.md rename to versioned_docs/version-2.x/build/build-based-on-docker.md diff --git a/versioned_docs/version-2.x/deployment/configure.md b/versioned_docs/version-2.x/build/configure.md similarity index 100% rename from versioned_docs/version-2.x/deployment/configure.md rename to versioned_docs/version-2.x/build/configure.md diff --git a/versioned_docs/version-2.x/deployment/create-gpadmin-user.md b/versioned_docs/version-2.x/build/create-gpadmin-user.md similarity index 100% rename from versioned_docs/version-2.x/deployment/create-gpadmin-user.md rename to versioned_docs/version-2.x/build/create-gpadmin-user.md diff --git a/versioned_docs/version-2.x/deployment/download-source-code.md b/versioned_docs/version-2.x/build/download-source-code.md similarity index 100% rename from versioned_docs/version-2.x/deployment/download-source-code.md rename to versioned_docs/version-2.x/build/download-source-code.md diff --git a/versioned_docs/version-2.x/build/index.md b/versioned_docs/version-2.x/build/index.md new file mode 100644 index 00000000000..e3d24ad371f --- /dev/null +++ b/versioned_docs/version-2.x/build/index.md @@ -0,0 +1,88 @@ +--- +title: "Build Apache Cloudberry from Source: Complete Guide" +--- + +This document is intended for developers interested in exploring and potentially contributing to Apache Cloudberry. The build environment described here is optimized for development and testing purposes only. + +## Target audience + +- Developers interested in contributing to Apache Cloudberry. +- PostgreSQL developers wanting to explore Cloudberry's extensions. +- Database enthusiasts interested in learning about distributed query processing. +- Anyone considering joining the Apache Cloudberry community. + +The build process described here enables development activities such as: + +- Debugging and testing new features. +- Exploring the codebase with development tools. +- Running test suites and validation checks. +- Making and testing code modifications. + +:::tip +If you are new to Apache Cloudberry or PostgreSQL development: + +- Consider building PostgreSQL first to understand the basic workflow +- Join the project's [mailing lists](/community/mailing-lists) to connect with other developers +- Review the project's issue tracker to understand current development priorities +- Be prepared for longer build times and iterative testing as you explore the codebase +::: + +## Process of building Apache Cloudberry + +The build process for Apache Cloudberry (Incubating) closely resembles that of PostgreSQL. If you have previously set up development environments for PostgreSQL, you'll find the steps for Cloudberry very familiar. + +For those new to Cloudberry or PostgreSQL, we recommend starting with a PostgreSQL build first. The PostgreSQL development community has established excellent documentation and tooling to guide you through the process. Familiarizing yourself with PostgreSQL's build process will make transitioning to Cloudberry significantly easier. + +## Prerequisites + +### Provision a Rocky Linux 8+ / Ubuntu 20.04+ Environment + +- Use any platform to create a virtual machine or container: + + - **Cloud providers**: You can use the Rocky Linux 8+ or Ubuntu 20.04+ images provided by the cloud providers, such as AWS, Google Cloud, Microsoft Azure, and more. + - **VirtualBox**: Use the official [Rocky Linux 8+](https://rockylinux.org/download) / [Ubuntu 20.04+](https://ubuntu.com/download) ISO or Vagrant boxes. + - **Docker**: These instructions were validated under Rocky Linux 8+ and Ubuntu 20.04 official base docker images, but should work with any of their based container. + - For example, you can run the following command to start a Rocky Linux 8 container: + + ```bash + docker run -it --shm-size=2gb -h cdw rockylinux/rockylinux:8 + + # Start a Ubuntu 20.04 container: + # docker run -it --shm-size=2gb -h cdw ubuntu:20.04 + ``` + + The hostname `cdw` (Coordinator Data Warehouse) is just an example of how we started the container for testing. + + To ensure test suites run successfully, you may need to increase the container's shared memory using `--shm-size=2gb`. Test failures can occur when the Cloudberry cluster lacks sufficient shared memory resources. + +- Ensure the VM or container has: + - Internet connectivity for package installation. + - SSH or console access for user interaction. + - Sufficient resources (CPU, memory, and storage) for a development environment. + +:::note +Specific steps to provision the environment are not covered in this guide because they vary by platforms. This guide assumes you have successfully created a VM or container and can log in as the default user (for example, `rocky` for Rocky Linux on AWS). +::: + +### System requirements + +Minimum requirements for development environment: + +- CPU: 4 cores recommended (2 cores minimum) + - CPU architecture: x86, x86_64, ARM, MIPS +- RAM: 8GB recommended (4GB minimum) +- Storage: 20GB free space recommended +- Network: Broadband internet connection for package downloads + +## Build Apache Cloudberry from source code + +The following steps guide you through building Apache Cloudberry from source code on Rocky Linux 8+ or Ubuntu 20.04+. The process is similar for both operating systems, with minor differences in package management, dependencies and software versions between these two distributions. + +Just go ahead and follow the steps below to build Apache Cloudberry from source code: + + +```mdx-code-block +import DocCardList from '@theme/DocCardList'; + + +``` \ No newline at end of file diff --git a/versioned_docs/version-2.x/deployment/install-required-packages.md b/versioned_docs/version-2.x/build/install-required-packages.md similarity index 100% rename from versioned_docs/version-2.x/deployment/install-required-packages.md rename to versioned_docs/version-2.x/build/install-required-packages.md diff --git a/versioned_docs/version-2.x/deployment/post-installation.md b/versioned_docs/version-2.x/build/post-installation.md similarity index 100% rename from versioned_docs/version-2.x/deployment/post-installation.md rename to versioned_docs/version-2.x/build/post-installation.md diff --git a/versioned_docs/version-2.x/deployment/quick-build.md b/versioned_docs/version-2.x/build/quick-build.md similarity index 100% rename from versioned_docs/version-2.x/deployment/quick-build.md rename to versioned_docs/version-2.x/build/quick-build.md diff --git a/versioned_docs/version-2.x/deployment/sandbox.md b/versioned_docs/version-2.x/build/sandbox.md similarity index 100% rename from versioned_docs/version-2.x/deployment/sandbox.md rename to versioned_docs/version-2.x/build/sandbox.md diff --git a/versioned_docs/version-2.x/deployment/set-demo-cluster.md b/versioned_docs/version-2.x/build/set-demo-cluster.md similarity index 100% rename from versioned_docs/version-2.x/deployment/set-demo-cluster.md rename to versioned_docs/version-2.x/build/set-demo-cluster.md diff --git a/versioned_docs/version-2.x/deployment/system-settings.md b/versioned_docs/version-2.x/build/system-settings.md similarity index 100% rename from versioned_docs/version-2.x/deployment/system-settings.md rename to versioned_docs/version-2.x/build/system-settings.md diff --git a/versioned_docs/version-2.x/cbdb-op-deploy-guide.md b/versioned_docs/version-2.x/cbdb-op-deploy-guide.md deleted file mode 100644 index 0e1edd1d33a..00000000000 --- a/versioned_docs/version-2.x/cbdb-op-deploy-guide.md +++ /dev/null @@ -1,305 +0,0 @@ ---- -title: Deploy Manually Using RPM Package ---- - -# Deploy Apache Cloudberry Manually Using RPM Package - -This document introduces how to manually deploy Apache Cloudberry on physical/virtual machines using RPM package. Before reading this document, it is recommended to first read the [Software and Hardware Configuration Requirements](/docs/cbdb-op-software-hardware.md) and [Prepare to Deploy Apache Cloudberry](/docs/cbdb-op-prepare-to-deploy.md). - -The deployment method in this document is for production environments. - -The example in this document uses CentOS 7.6 and deploys Apache Cloudberry v1.0.0. The main steps are as follows: - -1. [Prepare node servers](#step-1-prepare-server-nodes). -2. [Install the RPM package](#step-2-install-the-rpm-package). -3. [Configure mutual trust between nodes](#step-3-configure-mutual-trust-between-nodes). -4. [Initialize the database](#step-4-initialize-apache-cloudberry). -5. [Log into the database](#step-5-log-into-apache-cloudberry). - -## Step 1: Prepare server nodes - -Read the [Prepare to Deploy Apache Cloudberry](/docs/cbdb-op-prepare-to-deploy.md) document to prepare the server nodes. - -## Step 2. Install the RPM package - -After the preparation, it is time to install Apache Cloudberry. You need to download the corresponding RPM package from [Apache Cloudberry Releases](/releases), and then install the database on each node using the installation package. - -1. Download the RPM package to the home directory of `gpadmin`. - - ```bash - wget -P /home/gpadmin - ``` - -2. Install the RPM package in the `/home/gpadmin` directory. - - When running the following command, you need to replace `` with the actual RPM package path, as the `root` user. During the installation, the directory `/usr/local/cloudberry/` is automatically created. - - ```bash - cd /home/gpadmin - yum install - ``` - -3. Grant the `gpadmin` user the permission to access the `/usr/local/cloudberry/` directory. - - ```bash - chown -R gpadmin:gpadmin /usr/local - chown -R gpadmin:gpadmin /usr/local/cloudberry* - ``` - -## Step 3. Configure mutual trust between nodes - -1. Switch to the `gpadmin` user, and use the `gpadmin` user for subsequent operations. -2. Create a configuration file for node information. - - Create the node configuration file in the `/home/gpadmin/` directory, including the `all_hosts` and `seg_hosts` files, which store the host information of all nodes and data nodes respectively. The example node information is as follows: - - ```bash - [gpadmin@cbdb-coordinator gpadmin]$ cat all_hosts - - cbdb-coordinator - cbdb-standbycoordinator - cbdb-datanode01 - cbdb-datanode02 - cbdb-datanode03 - - [gpadmin@cbdb-coordinator gpadmin]$ cat seg_hosts - - cbdb-datanode01 - cbdb-datanode02 - cbdb-datanode03 - ``` - -3. Configure SSH trust between hosts. - - 1. Run `ssh-keygen` on each host to generate SSH key. For example: - - ```bash - [gpadmin@cbbd-coordinator cloudberry-1.0.0]$ ssh-keygen - - Generating public/private rsa key pair. - Enter file in which to save the key (/usr/local/cloudberry/.ssh/id_rsa): - Enter passphrase (empty for no passphrase): - Enter same passphrase again: - Your identification has been saved in /usr/local/cloudberry/.ssh/id_rsa. - Your public key has been saved in /usr/local/cloudberry/.ssh/id_rsa.pub. - The key fingerprint is: - SHA256:cvcYS87egYCyh/v6UtdqrejVU5qqF7OvpcHg/T9lRrg gpadmin@cbbd-coordinator - The key's randomart image is: - +---[RSA 2048]----+ - | | - | | - | + | - |+ O | - |o ... S | - |. +o= B C | - | o B=00 D | - |.o=o0o.. = | - |O=++*+o+.. | - +----[SHA256]-----+ - ``` - - 2. Run `ssh-copy-id` on each host to configure password-free login. The example is as follows: - - ```bash - ssh-copy-id cbdb-coordinator - ssh-copy-id cbdb-standbycoordinator - ssh-copy-id cbdb-datanode01 - ssh-copy-id cbdb-datanode02 - ssh-copy-id cbdb-datanode03 - ``` - - 3. Verify that SSH between nodes is all connected, that is, the password-free login between servers is successful. The example is as follows: - - ```bash - [gpadmin@cbdb-coordinator ~]$ gpssh -f all_hosts - => pwd - [ cbdb-datanode03] b'/usr/local/cloudberry\r' - [ cbdb-coordinator] b'/usr/local/cloudberry\r' - [ cbdb-datanode02] b'/usr/local/cloudberry\r' - [cbdb-standbycoordinator] b'/usr/local/cloudberry\r' - [ cbdb-datanode01] b'/usr/local/cloudberry\r' - => - ``` - - If you fail to run `gpssh`, you can first run `source /usr/local/cloudberry/greenplum_path.sh` on the coordinator node. - -## Step 4. Initialize Apache Cloudberry - -Before performing the following operations, run `su - gpadmin` to switch to the `gpadmin` user. - -1. Add a new line of `source` command to the `~/.bashrc` files of all nodes (coordinator/standby coordinator/segment). The example is as follows: - - ```bash - source /usr/local/cloudberry/greenplum_path.sh - ``` - -2. Run the `source` command to make the newly added content effective: - - ```bash - source ~/.bashrc - ``` - -3. Use the `gpssh` command on the coordinator node to create data directories and mirror directories for segment nodes. In this document, the 2 directories are `/data0/primary/` and `/data0/mirror/`, respectively. The example is as follows: - - ```bash - gpssh -f seg_hosts - mkdir -p /data0/primary/ - mkdir -p /data0/mirror/ - ``` - -4. Create data directory on the coordinator node. In this document, the directory is `/data0/coordinator/`. - - ```bash - mkdir -p /data0/coordinator/ - ``` - -5. Use the `gpssh` command on the coordinator node to create data directory for the standby node. In this document, the directory is `/data0/coordinator/`. - - ```bash - gpssh -h cbdb-standbycoordinator -e 'mkdir -p /data0/coordinator/' - ``` - -6. On the hosts of the coordinator and standby nodes, add a line to the `~/.bashrc` file to declare the path of `COORDINATOR_DATA_DIRECTORY`, which is `{the path step 5}` + `gpseg-1`. For example: - - ```bash - export COORDINATOR_DATA_DIRECTORY=/data0/coordinator/gpseg-1 - ``` - -7. Run the following command on the hosts of the coordinator and standby nodes to make the declaration of `COORDINATOR_DATA_DIRECTORY` in the previous step effective. - - ```bash - source ~/.bashrc - ``` - -8. Configure the `gpinitsystem_config` initialization script: - - 1. On the host where the coordinator node is located, copy the template configuration file to the current directory: - - ```bash - cp $GPHOME/docs/cli_help/gpconfigs/gpinitsystem_config . - ``` - - 2. Modify the `gpinitsystem_config` file as follows: - - - Pay attention to the port, coordinator node, segment node, and mirror node. - - Modify `DATA_DIRECTORY` to the data directory of the segment node, for example, `/data0/primary`. - - Modify `COORDINATOR_HOSTNAME` to the hostname of the coordinator node, for example, `cbdb-coordinator`. - - Modify `COORDINATOR_DIRECTORY` to the data directory of the coordinator node, for example, `/data0/coordinator`. - - Modify `MIRROR_DATA_DIRECTORY` to the data directory of the mirror node, for example, `/data0/mirror`. - - ```bash - [gpadmin@cbdb-coordinator ~]$ cat gpinitsystem_config - # FILE NAME: gpinitsystem_config - - # Configuration file needed by the gpinitsystem - - ######################################## - #### REQUIRED PARAMETERS - ######################################## - - #### Naming convention for utility-generated data directories. - SEG_PREFIX=gpseg - - #### Base number by which primary segment port numbers - #### are calculated. - PORT_BASE=6000 - - #### File system location(s) where primary segment data directories - #### will be created. The number of locations in the list dictate - #### the number of primary segments that will get created per - #### physical host (if multiple addresses for a host are listed in - #### the hostfile, the number of segments will be spread evenly across - #### the specified interface addresses). - declare -a DATA_DIRECTORY=(/data0/primary) - - #### OS-configured hostname or IP address of the coordinator host. - COORDINATOR_HOSTNAME=cbdb-coordinator - - #### File system location where the coordinator data directory - #### will be created. - COORDINATOR_DIRECTORY=/data0/coordinator - - #### Port number for the coordinator instance. - COORDINATOR_PORT=5432 - - #### Shell utility used to connect to remote hosts. - TRUSTED_SHELL=ssh - - #### Default server-side character set encoding. - ENCODING=UNICODE - - ######################################## - #### OPTIONAL MIRROR PARAMETERS - ######################################## - - #### Base number by which mirror segment port numbers - #### are calculated. - MIRROR_PORT_BASE=7000 - - #### File system location(s) where mirror segment data directories - #### will be created. The number of mirror locations must equal the - #### number of primary locations as specified in the - #### DATA_DIRECTORY parameter. - declare -a MIRROR_DATA_DIRECTORY=(/data0/mirror) - ``` - - - To create a default database during initialization, you need to fill in the database name. In this example, the `warehouse` database is created during initialization - - ```conf - ######################################## - #### OTHER OPTIONAL PARAMETERS - ######################################## - - #### Create a database of this name after initialization. - DATABASE_NAME=warehouse - ``` - -9. Use `gpinitsystem` to initialize Apache Cloudberry. For example: - - ```bash - gpinitsystem -c gpinitsystem_config -h /home/gpadmin/seg_hosts - ``` - - In the command above, `-c` specifies the configuration file and `-h` specifies the computing node list. - - If you need to initialize the standby coordinator node, refer to the following command: - - ```bash - gpinitstandby -s cbdb-standbycoordinator - ``` - -## Step 5. Log into Apache Cloudberry - -Now you have successfully deployed Apache Cloudberry. To log into the database, refer to the following command: - -```bash -psql -h -p -U -d -``` - -In the command above: - -- `` is the IP address of the coordinator node of the Apache Cloudberry server. -- `` is the default port number of Apache Cloudberry, which is `5432` by default. -- `` is the user name of the database. -- `` is the name of the database to connect. - -After you run the `psql` command, the system will prompt you to enter the database password. After you enter the correct password, you will successfully log into Apache Cloudberry and can perform SQL queries and operations. Make sure that you have the correct permissions to access the target database. - -```sql -[gpadmin@cddb-coordinator ~]$ psql warehouse -psql (14.4, server 14.4) -Type "help" for help. - -warehouse=# SELECT * FROM gp_segment_configuration; -dbid | content | role | preferred_role | mode | status | port | hostname | address | datadir ------------------------------------------------------------------------------------------- -1 | -1 | p | p | n | u | 5432 | cddb-coordinator | cddb-coordinator | /data0/coordinator/gpseg-1 -8 | -1 | m | m | s | u | 5432 | cddb-standbycoordinator | cddb-standbycoordinator | /data0/coordinator/gpseg-1 -2 | 0 | p | p | s | u | 6000 | cddb-datanode01 | cddb-datanode01 | /data0/primary/gpseg0 -5 | 0 | m | m | s | u | 7000 | cddb-datanode02 | cddb-datanode02 | /data0/mirror/gpseg0 -3 | 1 | p | p | s | u | 6000 | cddb-datanode02 | cddb-datanode02 | /data0/primary/gpseg1 -6 | 1 | m | m | s | u | 7000 | cddb-datanode03 | cddb-datanode03 | /data0/mirror/gpseg1 -4 | 2 | p | p | s | u | 6000 | cddb-datanode03 | cddb-datanode03 | /data0/primary/gpseg2 -7 | 2 | m | m | s | u | 7000 | cddb-datanode01 | cddb-datanode01 | /data0/mirror/gpseg2 -(8 rows) -``` diff --git a/versioned_docs/version-2.x/cbdb-op-prepare-to-deploy.md b/versioned_docs/version-2.x/cbdb-op-prepare-to-deploy.md deleted file mode 100644 index 58fa2dc9c3d..00000000000 --- a/versioned_docs/version-2.x/cbdb-op-prepare-to-deploy.md +++ /dev/null @@ -1,442 +0,0 @@ ---- -title: Prepare to Deploy ---- - -# Prepare to Deploy on Physical or Virtual Machine - -Before deploying Apache Cloudberry on physical or virtual machines, you need to do some preparations. Read this document and [Software and Hardware Configuration Requirements](/docs/cbdb-op-software-hardware.md) before you start to deploy Apache Cloudberry. - -## Plan the deployment architecture - -Plan your deployment architecture based on the [Apache Cloudberry Architecture](/docs/cbdb-architecture.md) and [Software and Hardware Configuration Requirements](/docs/cbdb-op-software-hardware.md), and determine the number of servers needed. Ensure that all servers are within a single security group and have mutual trust configured. - -The deployment plan for the example of this document includes 1 coordinator + 1 standby + 3 segments (primary + mirror), totaling 5 servers. - -## Modify server settings - -Log into each host as the `root` user, and modify the settings of each node server in the order of the following sections. - -### Change hostname - -Use the `hostnamectl set-hostname` command to modify the hostname of each server respectively, following these naming conventions: - -- Only include letters, numbers, and the hyphen `-`. Note: The underscore `_` is not a valid character. -- Case-insensitive, but it is recommended to use all lowercase letters. Using uppercase letters for the hostname might cause Kerberos authentication to fail. -- Each hostname must be globally unique across all hosts. - -Example: - -```bash -hostnamectl set-hostname cbdb-coordinator -hostnamectl set-hostname cbdb-standbycoordinator -hostnamectl set-hostname cbdb-datanode01 -hostnamectl set-hostname cbdb-datanode02 -hostnamectl set-hostname cbdb-datanode03 -``` - -### Add `gpadmin` admin user - -Follow the example below to create a user group and username `gpadmin`. Set the user group and username identifier to `520`. Create and specify the `gpadmin` home directory `/home/gpadmin`. - -```bash -groupadd -g 520 gpadmin # Adds user group gpadmin. -useradd -g 520 -u 520 -m -d /home/gpadmin/ -s /bin/bash gpadmin # Adds username gpadmin and creates the home directory of gpadmin. -passwd gpadmin # Sets a password for gpadmin; after executing, follow the prompts to input the password. -``` - -### Disable SELinux and firewall software - -Run `systemctl status firewalld` to view the firewall status. If the firewall is on, you need to turn it off by setting the `SELINUX` parameter to `disabled` in the `/etc/selinux/config` file. - -```bash -SELINUX=disabled -``` - -You can also disable the firewall using the following commands: - -```bash -systemctl stop firewalld.service -systemctl disable firewalld.service -``` - -### Modify network mapping - -Check the `/etc/hosts` file to make sure that it contains mappings of all host aliases to their network IP addresses. Examples are as follows: - -``` -192.168.1.101 cbdb-coordinator -192.168.1.102 cbdb-standbycoordinator -192.168.1.103 cbdb-datanode01 -192.168.1.104 cbdb-datanode02 -192.168.1.105 cbdb-datanode03 -``` - -### Set system parameters - -Add relevant system parameters in the `/etc/sysctl.conf` configuration file, and run the `sysctl -p` command to make the configuration file effective. - -When setting the configuration parameters, you can take the following example as a reference and set them according to your needs. Details of some of these parameters and recommended settings are provided below. - -```conf -# kernel.shmall = _PHYS_PAGES / 2 -kernel.shmall = 197951838 -# kernel.shmmax = kernel.shmall * PAGE_SIZE -kernel.shmmax = 810810728448 -kernel.shmmni = 4096 -vm.overcommit_memory = 2 -vm.overcommit_ratio = 95 -net.ipv4.ip_local_port_range = 10000 65535 -kernel.sem = 250 2048000 200 8192 -kernel.sysrq = 1 -kernel.core_uses_pid = 1 -kernel.msgmnb = 65536 -kernel.msgmax = 65536 -kernel.msgmni = 2048 -net.ipv4.tcp_syncookies = 1 -net.ipv4.conf.default.accept_source_route = 0 -net.ipv4.tcp_max_syn_backlog = 4096 -net.ipv4.conf.all.arp_filter = 1 -net.ipv4.ipfrag_high_thresh = 41943040 -net.ipv4.ipfrag_low_thresh = 31457280 -net.ipv4.ipfrag_time = 60 -net.core.netdev_max_backlog = 10000 -net.core.rmem_max = 2097152 -net.core.wmem_max = 2097152 -vm.swappiness = 10 -vm.zone_reclaim_mode = 0 -vm.dirty_expire_centisecs = 500 -vm.dirty_writeback_centisecs = 100 -vm.dirty_background_ratio = 0 -vm.dirty_ratio = 0 -vm.dirty_background_bytes = 1610612736 -vm.dirty_bytes = 4294967296 -``` - -#### Shared memory - -In the `/etc/sysctl.conf` configuration file, `kernel.shmall` represents the total amount of available shared memory, in pages. `kernel.shmmax` represents the maximum size of a single shared memory segment, in bytes. - -You can define these 2 values ​​using the operating system's `_PHYS_PAGES` and `PAGE_SIZE` parameters: - -```conf -kernel.shmall = ( _PHYS_PAGES / 2) -kernel.shmmax = ( _PHYS_PAGES / 2) * PAGE_SIZE -``` - -To get the values ​​of these 2 operating system parameters, you can use `getconf`, for example: - -```bash -$ echo $(expr $(getconf _PHYS_PAGES) / 2) -$ echo $(expr $(getconf _PHYS_PAGES) / 2 \* $(getconf PAGE_SIZE)) -``` - -#### Segment memory - -In the `/etc/sysctl.conf` configuration file, - -- `vm.overcommit_memory` indicates the overcommit handling modes for memory. Available options are: - - - `0`: Heuristic overcommit handling - - `1`: Always overcommit - - `2`: Don't overcommit - - Set the value of this parameter to `2` to refuse overcommit. - -- `vm.overcommit_ratio` is a kernel parameter and is the percentage of RAM occupied by the application process. The default value on CentOS is `50`. `vm.overcommit_ratio` is calculated as follows: - - ``` - vm.overcommit_ratio = (RAM - 0.026 * gp_vmem) / RAM - ``` - - The calculation method of `gp_vmem` is as follows: - - ``` - # If the system memory is less than 256 GB, use the following formula to calculate: - gp_vmem = ((SWAP + RAM) – (7.5GB + 0.05 * RAM)) / 1.7 - - # If the system memory is greater than or equal to 256 GB, use the following formula to calculate: - gp_vmem = ((SWAP + RAM) – (7.5GB + 0.05 * RAM)) / 1.17 - - # In the above formulas, SWAP is the swap space on the host, in GB. - # RAM is the size of the memory installed on the host, in GB. - ``` - -#### Port - -In the `/etc/sysctl.conf` configuration file, `net.ipv4.ip_local_port_range` is used to specify the port range. To avoid port conflicts between Apache Cloudberry and other applications, you need to specify the port range via operating system parameters. When you later set Apache Cloudberry initialization parameters, avoid setting Apache Cloudberry related ports in this range. - -For example, for `net.ipv4.ip_local_port_range = 10000 65535`, you need to avoid setting the Apache Cloudberry related ports in the interval `[10000,65535]`. You can set them to `6000` and `7000`: - -``` -PORT_BASE = 6000 -MIRROR_PORT_BASE = 7000 -``` - -#### IP segmentation - -When the Apache Cloudberry uses the UDP protocol for internal connection, the network card controls the fragmentation and reassembly of IP packets. If the size of a UDP message is larger than the maximum size of network transmission unit (MTU), the IP layer fragments the message. - -- `net.ipv4.ipfrag_high_thresh`: When the total size of IP fragments exceeds this threshold, the kernel will attempt to reorganize IP fragments. If the fragments exceed this threshold but all fragments have not arrived within the specified time, the kernel will not reorganize the fragments. This threshold is typically used to control whether larger shards are reorganized. The default value is `4194304` bytes (4 MB). -- `net.ipv4.ipfrag_low_thresh`: Indicates that when the total size of IP fragments is below this threshold, the kernel will wait as long as possible for more fragments to arrive, to allow for larger reorganizations. This threshold is used to minimize unfinished reorganization operations and improve system performance. The default value is `3145728` bytes (3 MB). -- `net.ipv4.ipfrag_time` is a kernel parameter that controls the IP fragment reassembly timeout. The default value is `30`. - -It is recommended to set the above parameters to the following values: - -```conf -net.ipv4.ipfrag_high_thresh = 41943040 -net.ipv4.ipfrag_low_thresh = 31457280 -net.ipv4.ipfrag_time = 60 -``` - -#### System memory - -- If the server memory exceeds 64 GB, it is recommended to set the following parameters in the `/etc/sysctl.conf` configuration file: - - ```conf - vm.dirty_background_ratio = 0 - vm.dirty_ratio = 0 - vm.dirty_background_bytes = 1610612736 # 1.5GB - vm.dirty_bytes = 4294967296 # 4GB - ``` - -- If the server memory is less than 64 GB, do not set `vm.dirty_background_bytes` and `vm.dirty_bytes`. It is recommended to set the following parameters in the `/etc/sysctl.conf` configuration file: - - ```conf - vm.dirty_background_ratio = 3 - vm.dirty_ratio = 10 - ``` - -- To deal with emergencies when the system encounters memory pressure, it is recommended to add the `vm.min_free_kbytes` parameter in the `/etc/sysctl.conf` configuration file to specify the amount of available memory reserved by the system. It is recommended to set `vm.min_free_kbytes` to 3% of the system's physical memory. The command is as follows: - - ```bash - awk 'BEGIN {OFMT = "%.0f";} /MemTotal/ {print "vm.min_free_kbytes =", $2 * .03;}' /proc/meminfo >> /etc/sysctl.conf - ``` - - It is not recommended that the setting of `vm.min_free_kbytes` exceed 5% of the system's physical memory. - -#### Resource limit - -Edit the `/etc/security/limits.conf` file and add the following content, which limits the usage of software and hardware resources. - -``` -*soft nofile 524288 -*hard nofile 524288 -*soft nproc 131072 -*hard nproc 131072 -``` - -#### CORE DUMP - -1. Add the following parameter to the `/etc/sysctl.conf` configuration file: - - ```conf - kernel.core_pattern=/var/core/core.%h.%t - ``` - -2. Run the following command to make the configuration effective: - - ```bash - sysctl -p - ``` - -3. Add the following parameter to `/etc/security/limits.conf`: - - ``` - * soft core unlimited - ``` - -#### Set mount options for the XFS file system - -XFS is the file system for the data directory of Apache Cloudberry. XFS has the following mount options: - -``` -rw,nodev,noatime,inode64 -``` - -You can set up XFS file mounting in the `/etc/fstab` file. See the following commands. You need to choose the file path according to the actual situation: - -```bash -mkdir -p /data0/ -mkfs.xfs -f /dev/vdc -echo "/dev/vdc /data0 xfs rw,nodev,noatime,nobarrier,inode64 0 0" >> /etc/fstab -mount /data0 -chown -R gpadmin:gpadmin /data0/ -``` - -Run the following command to check whether the mounting is successful: - -```bash -df -h -``` - -#### Blockdev value - -The blockdev value for each disk file should be `16384`. To verify the blockdev value of a disk device, use the following command: - -```bash -sudo /sbin/blockdev --getra -``` - -For example, to verify the blockdev value of the example server disk: - -```bash -sudo /sbin/blockdev --getra /dev/vdc -``` - -To modify the blockdev value of a device file, use the following command: - -```bash -sudo /sbin/blockdev --setra -``` - -For example, to modify the file blockdev value of the hard disk of the example server: - -```bash -sudo /sbin/blockdev --setra 16384 /dev/vdc -``` - -#### I/O scheduling policy settings for disks - -The disk type, operating system and scheduling policies of Apache Cloudberry are as follows: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Storage device typeOSRecommended scheduling policy
NVMeRHEL 7none
RHEL 8none
Ubuntunone
SSDRHEL 7noop
RHEL 8none
Ubuntunone
OtherRHEL 7deadline
RHEL 8mq-deadline
Ubuntumq-deadline
- -Refer to the following command to modify the scheduling policy. Note that this command is only a temporary modification, and the modification becomes invalid after the server is restarted. - -```bash -echo schedulername > /sys/block//queue/scheduler -``` - -For example, temporarily modify the disk I/O scheduling policy of the example server: - -```bash -echo deadline > /sys/block/vdc/queue/scheduler -``` - -To permanently modify the scheduling policy, use the system utility `grubby`. After using `grubby`, the modification takes effect immediately after you restart the server. The sample command is as follows: - -```bash -grubby --update-kernel=ALL --args="elevator=deadline" -``` - -To view the kernel parameter settings, use the following command: - -```bash -grubby --info=ALL -``` - -#### Disable Transparent Huge Pages (THP) - -You need to disable Transparent Huge Pages (THP), because it reduces database performance. The command is as follows: - -```bash -grubby --update-kernel=ALL --args="transparent_hugepage=never" -``` - -Check the status of THP: - -```bash -cat /sys/kernel/mm/*transparent_hugepage/enabled -``` - -#### Disable IPC object deletion - -Disable IPC object deletion by setting the value of `RemoveIPC` to `no`. You can set this parameter in the `/etc/systemd/logind.conf` file of Apache Cloudberry. - -``` -RemoveIPC=no -``` - -After disabling it, run the following command to restart the server to make the disabling setting effective: - -```bash -service systemd-logind restart -``` - -#### SSH connection threshold - -To set the SSH connection threshold, you need to modify the `MaxStartups` and `MaxSessions` parameters in the `/etc/ssh/sshd_config` configuration file. Both of the following writing methods are acceptable. - -``` -MaxStartups 200 -MaxSessions 200 -``` - -``` -MaxStartups 10:30:200 -MaxSessions 200 -``` - -Run the following command to restart the server to make the setting take effect: - -```bash -service sshd restart -``` - -#### Clock synchronization - -Apache Cloudberry requires the clock synchronization to be configured for all hosts, and the clock synchronization service should be started when the host starts. You can choose one of the following synchronization methods: - -- Use the coordinator node's time as the source, and other hosts synchronize the clock of the coordinator node host. -- Synchronize clocks using an external clock source. - -The example in this document uses an external clock source for synchronization, that is, adding the following configuration to the `/etc/chrony.conf` configuration file: - -```conf -# Use public servers from the pool.ntp.org project. -# Please consider joining the pool (http://www.pool.ntp.org/join.html). -server 0.centos.pool.ntp.org iburst -``` - -After setting, you can run the following command to check the clock synchronization status: - -```bash -systemctl status chronyd -``` diff --git a/versioned_docs/version-2.x/cbdb-op-software-hardware.md b/versioned_docs/version-2.x/cbdb-op-software-hardware.md deleted file mode 100644 index a920da2f3f7..00000000000 --- a/versioned_docs/version-2.x/cbdb-op-software-hardware.md +++ /dev/null @@ -1,124 +0,0 @@ ---- -title: Software and Hardware Configuration ---- - -# Software and Hardware Configuration - -This document introduces the software and hardware configuration required for Apache Cloudberry. - -## Hardware requirements - -### Supported deployment environments - -Apache Cloudberry supports deployment on both physical machines and virtual machines. Below are the recommended configurations for the environments. - -#### For development or test environments - -| Component | CPU | Memory | Disk type | Network | Number of instances | -| ------- | ---- | ---- | -------- | -------------------- | -------- | -| Coordinator | 4 cores | 8 GB | SSD | 10 Gbps NIC (2 preferred) | 1+ | -| Segment | 4 cores | 8 GB | SSD | 10 Gbps NIC (2 preferred) | 1+ | - -#### For production environments - -| Component | CPU | Memory | Disk type | Network | Instance count | -| ------- | ------ | ------ | -------- | -------------------- | -------- | -| Coordinator | 16+ cores | 32+ GB | SSD | 10 Gbps NIC (2 preferred) | 2+ | -| Segment | 8+ cores | 32+ GB | SSD | 10 Gbps NIC (2 preferred) | 2+ | - -Apache Cloudberry can also be deployed on public cloud platforms such as AWS, Azure, and GCP. The hardware requirements for cloud-based deployments might vary based on the instance types selected on these platforms. Refer to the specific cloud provider’s documentation for instance configurations that meet or exceed the recommended hardware specifications. - -### Storage - -- To prevent a high data disk load from affecting the operating system's normal I/O response, mount the operating system and the data disk on separate disks. -- If the host configuration allows, it is recommended to use 2 independent SAS disks as the system disk (RAID1), and another 10 SAS disks as the data disk (RAID5). -- It is recommended to use LVM logical volumes to manage disks for more flexible disk configuration. - -For the system disk: The system disk should use an independent disk to avoid impact on the operating system when data disks are heavily loaded. It is recommended that the system disk be configured in dual-disk RAID 1 and the operating system of the system disk be XFS. - -For data disks: It is recommended to use LVM to manage data disks. According to test statistics, creating an independent logical volume for each physical volume can achieve the best disk performance. For example: - -```bash -pvcreate /dev/vdb -pvcreate /dev/vdc -pvcreate /dev/vdd -vgcreate data /dev/vdb /dev/vdc /dev/vdd -lvcreate --extents 100%pvs -n data0 data /dev/vdb -lvcreate --extents 100%pvs -n data1 data /dev/vdc -lvcreate --extents 100%pvs -n data2 data /dev/vdd -``` - -The names of mount points must be consecutive, and the mount points of data disks should be `/data0`, `/data1`, ..., `/dataN`. Data disks should use the XFS file format. For example: - -```bash -mkdir -p /data0 /data1 /data2 -mkfs.xfs /dev/data/data0 -mkfs.xfs /dev/data/data1 -mkfs.xfs /dev/data/data2 -mount /dev/data/data0 /data0/ -mount /dev/data/data1 /data1/ -mount /dev/data/data2 /data2/ -``` - -## Data exchange network - -- **Network card configuration** - - The data exchange network is used for transmitting business data, which has high requirements on network performance and throughput. In a production environment, two 10 Gbps NICs are generally required, and they will be used after bonding. The recommended bond 4 parameter are as follows: - - ```bash - BONDING_OPTS='mode=4 miimon=100 xmit_hash_policy=layer3+4' - ``` - -- **Connectivity requirements** - - - Connect the management console and the database host in the data exchange network. If there is a firewall device between the management console and the database host, ensure that the TCP idle connection can be kept for more than 12 hours. - - Connect database hosts and management console hosts in the data exchange network, and do not limit the TCP idle connection time. - - Connect database clients and application programs that access the database with the database coordinator node in the data exchange network. - - Ensure that the TCP idle connection can be kept for more than 12 hours. - -- **Default gateway** - - If the host is configured with a management network, the network card (bond0) of the data exchange network should be used as the default gateway device; otherwise, it might cause abnormal traffic monitoring of the host network, deployment failure, and performance problems. The following is an example of viewing the default gateway. - - ```bash - netstat -rn | grep ^0.0.0.0 - ``` - -- **Switch** - - - Make sure that the egress bandwidth of the data network switch from layer 1 to layer 2 is no lower than the maximum disk I/O throughput capacity of a single cabinet (calculated with a single RAID card of 500 MBps). - - A switch convergence ratio of 4:1 is recommended. When the convergence ratio reaches 6:1, most links will be saturated. Significant packet loss occurs when the convergence ratio reaches 8:1. - -## Software requirements - -### Supported OS - -Apache Cloudberry supports the following operating systems: - -- Kylin V10 SP1 or SP2 -- NeoKylin V7update6 -- RHEL/CentOS 7.6+ -- openEuler 20.3 LTS SP2 - -### OS configurations - -- SSH configuration - - The recommended configuration for the SSH server side (`/etc/ssh/sshd_config`) is as follows. After the configuration is complete, run `systemctl restart sshd.service` to make it effective. - - | Parameter | Value | Description | - | ---------------------- | ---- | ---------------- | - | Port | 22 | Listening port. | - | PasswordAuthentication | yes | Allows password login, which can be changed after cluster initialization. | - | PermitEmptyPass words | no | Empty password is not allowed for login. | - | UseDNS | no | DNS is not used. | - -### SSH password-free login - -Configure SSH password-free login for all nodes. For example: - -```bash -ssh-keygen -t rsa -ssh-copy-id root@192.168.66.154 -``` diff --git a/versioned_docs/version-2.x/connect-to-cbdb.md b/versioned_docs/version-2.x/database-basic/connect-to-cbdb.md similarity index 100% rename from versioned_docs/version-2.x/connect-to-cbdb.md rename to versioned_docs/version-2.x/database-basic/connect-to-cbdb.md diff --git a/versioned_docs/version-2.x/operate-with-data/operate-with-db-objects/create-and-manage-database.md b/versioned_docs/version-2.x/database-basic/create-and-manage-database.md similarity index 100% rename from versioned_docs/version-2.x/operate-with-data/operate-with-db-objects/create-and-manage-database.md rename to versioned_docs/version-2.x/database-basic/create-and-manage-database.md diff --git a/versioned_docs/version-2.x/start-and-stop-cbdb-database.md b/versioned_docs/version-2.x/database-basic/start-and-stop-cbdb-database.md similarity index 100% rename from versioned_docs/version-2.x/start-and-stop-cbdb-database.md rename to versioned_docs/version-2.x/database-basic/start-and-stop-cbdb-database.md diff --git a/versioned_docs/version-2.x/deployment/ansible-example.md b/versioned_docs/version-2.x/deployment/ansible-example.md new file mode 100644 index 00000000000..4a6129d181b --- /dev/null +++ b/versioned_docs/version-2.x/deployment/ansible-example.md @@ -0,0 +1,112 @@ +--- +title: Example Ansible Playbook +--- + +# Example Ansible Playbook + +A sample Ansible playbook to install an Apache Cloudberry software release onto the hosts that will comprise an Apache Cloudberry system. + +This Ansible playbook shows how tasks described in [Installing the Apache Cloudberry Software](./install_cloudberry.md) might be automated using [Ansible](https://docs.ansible.com). + +:::important +This playbook is provided as an *example only* to illustrate how Apache Cloudberry cluster configuration and software installation tasks can be automated using provisioning tools such as Ansible, Chef, or Puppet. The Apache Cloudberry community does not provide support for Ansible or for the playbook presented in this example. +::: + +The example playbook is designed for use with RHEL/Rocky Linux. It creates the `gpadmin` user, installs the Apache Cloudberry software release, sets the owner and group of the installed software to `gpadmin`, and sets the PAM security limits for the `gpadmin` user. + +You can revise the script to work with your operating system platform and to perform additional host configuration tasks. + +Following are steps to use this Ansible playbook. + +1. Install Ansible on the control node using your package manager. See the [Ansible documentation](https://docs.ansible.com) for help with installation. + +2. Set up passwordless SSH from the control node to all hosts that will be a part of the Apache Cloudberry cluster. You can use the `ssh-copy-id` command to install your public SSH key on each host in the cluster. Alternatively, your provisioning software may provide more convenient ways to securely install public keys on multiple hosts. + +3. Create an Ansible inventory by creating a file called `hosts` with a list of the hosts that will comprise your Apache Cloudberry cluster. For example: + + ``` + cdw + scdw + sdw1 + sdw2 + sdw3 + ``` + + This file can be edited and used with the Apache Cloudberry `gpssh-exkeys` and `gpinitsystem` utilities later on. + +4. Copy the playbook code below to a file `ansible-playbook.yml` on your Ansible control node. + +5. Edit the playbook variables at the top of the playbook, such as the `gpadmin` administrative user and password to create, and the version of Apache Cloudberry you are installing. + +6. Run the playbook, passing the package to be installed to the `package_path` parameter. + + ```bash + ansible-playbook ansible-playbook.yml -i hosts -e package_path=./apache-cloudberry-db-incubating-2.1.0.el8.x86_64.rpm + ``` + +## Ansible Playbook - Apache Cloudberry Installation for RHEL/Rocky Linux + +```yaml +--- + +- hosts: all + vars: + - version: "2.1.0" + - cloudberry_admin_user: "gpadmin" + - cloudberry_admin_password: "changeme" + # - package_path: passed via the command line with: -e package_path=./apache-cloudberry-db-incubating-2.1.0.el8.x86_64.rpm + remote_user: root + become: yes + become_method: sudo + connection: ssh + gather_facts: yes + tasks: + - name: create cloudberry admin user + user: + name: "{{ cloudberry_admin_user }}" + password: "{{ cloudberry_admin_password | password_hash('sha512', 'DvkPtCtNH+UdbePZfm9muQ9pU') }}" + - name: copy package to host + copy: + src: "{{ package_path }}" + dest: /tmp + - name: install package + yum: + name: "/tmp/{{ package_path | basename }}" + state: present + - name: cleanup package file from host + file: + path: "/tmp/{{ package_path | basename }}" + state: absent + - name: find install directory + find: + paths: /usr/local + patterns: 'cloudberry*' + file_type: directory + register: installed_dir + - name: change install directory ownership + file: + path: '{{ item.path }}' + owner: "{{ cloudberry_admin_user }}" + group: "{{ cloudberry_admin_user }}" + recurse: yes + with_items: "{{ installed_dir.files }}" + - name: update pam_limits + pam_limits: + domain: "{{ cloudberry_admin_user }}" + limit_type: '-' + limit_item: "{{ item.key }}" + value: "{{ item.value }}" + with_dict: + nofile: 524288 + nproc: 131072 + - name: find installed cloudberry version + shell: . /usr/local/cloudberry-db/cloudberry-env.sh && /usr/local/cloudberry-db/bin/postgres --gp-version + register: postgres_gp_version + - name: fail if the correct cloudberry version is not installed + fail: + msg: "Expected cloudberry version {{ version }}, but found '{{ postgres_gp_version.stdout }}'" + when: "version is not defined or version not in postgres_gp_version.stdout" +``` + +When the playbook has run successfully, you can proceed with [Creating the Data Storage Areas](./create_data_dirs.md) and [Initializing an Apache Cloudberry System](./init_cloudberry.md). + diff --git a/versioned_docs/version-2.x/deployment/capacity_planning.md b/versioned_docs/version-2.x/deployment/capacity_planning.md new file mode 100644 index 00000000000..35865983405 --- /dev/null +++ b/versioned_docs/version-2.x/deployment/capacity_planning.md @@ -0,0 +1,69 @@ +--- +title: Estimating storage capacity +--- + +To estimate how much data your Apache Cloudberry system can accommodate, use these measurements as guidelines. Also keep in mind that you may want to have extra space for landing backup files and data load files on each segment host. + +## Calculating usable disk capacity + +Start with the raw capacity of the physical disks on a segment host that are available for data storage: + +To calculate how much data a Apache Cloudberry system can hold, you have to calculate the usable disk capacity per segment host and then multiply that by the number of segment hosts in your Apache Cloudberry array. Start with the raw capacity of the physical disks on a segment host that are available for data storage \(raw\_capacity\), which is: + +``` +disk_size * number_of_disks = raw_capacity +``` + +Account for file system formatting overhead (roughly 10 percent) and the RAID level you are using. For example, if using RAID-10, the calculation would be: + +``` +(raw_capacity * 0.9) / 2 = formatted_disk_space +``` + +For optimal performance, do not completely fill your disks to capacity, but run at 70% or lower. So with this in mind, calculate the usable disk space as follows:: + +``` +formatted_disk_space * 0.7 = usable_disk_space +``` + +Using only 70% of your disk space allows Apache Cloudberry to use the other 30% for temporary and transaction files on the same disks. If your host systems have a separate disk system that can be used for temporary and transaction files, you can specify a tablespace that Apache Cloudberry uses for the files. Moving the location of the files might improve performance depending on the performance of the disk system. + +Once you have formatted RAID disk arrays and accounted for the maximum recommended capacity \(usable\_disk\_space\), you will need to calculate how much storage is actually available for user data (`U`). If using Apache Cloudberry mirrors for data redundancy, this would then double the size of your user data (`2 * U`). Apache Cloudberry also requires some space be reserved as a working area for active queries. The work space should be approximately one third the size of your user data (work space = `U/3`): + +``` +With mirrors: (2 * U) + U/3 = usable_disk_space +Without mirrors: U + U/3 = usable_disk_space +``` + +Guidelines for temporary file space and user data space assume a typical analytic workload. Highly concurrent workloads or workloads with queries that require very large amounts of temporary space can benefit from reserving a larger working area. Typically, overall system throughput can be increased while decreasing work area usage through proper workload management. Additionally, temporary space and user space can be isolated from each other by specifying that they reside on different tablespaces. + +## Calculating User Data Size + +As with all databases, the size of your raw data will be slightly larger once it is loaded into the database. On average, raw data will be about 1.4 times larger on disk after it is loaded into the database, but could be smaller or larger depending on the data types you are using, table storage type, in-database compression, and so on. + +- Page Overhead - When your data is loaded into Apache Cloudberry, it is divided into pages of 32KB each. Each page has 20 bytes of page overhead. +- Row Overhead - In a regular 'heap' storage table, each row of data has 24 bytes of row overhead. An 'append-optimized' storage table has only 4 bytes of row overhead. +- Attribute Overhead - For the data values itself, the size associated with each attribute value is dependent upon the data type chosen. As a general rule, you want to use the smallest data type possible to store your data \(assuming you know the possible values a column will have\). +- Indexes - In Apache Cloudberry, indexes are distributed across the segment hosts as is table data. The default index type in Apache Cloudberry is B-tree. Because index size depends on the number of unique values in the index and the data to be inserted, precalculating the exact size of an index is impossible. However, you can roughly estimate the size of an index using these formulas. + + ``` + B-tree: unique_values * (data_type_size + 24 bytes) + + Bitmap: (unique_values * =number_of_rows * 1 bit * compression_ratio / 8) + (unique_values * 32) + ``` + +## Calculating Space Requirements for Metadata and Logs + +On each segment host, you will also want to account for space for Apache Cloudberry log files and metadata: + +- **System Metadata** — For each Apache Cloudberry segment instance \(primary or mirror\) or coordinator instance running on a host, estimate approximately 20 MB for the system catalogs and metadata. +- **Write Ahead Log** — For each Apache Cloudberry segment \(primary or mirror\) or coordinator instance running on a host, allocate space for the write ahead log \(WAL\). The WAL is divided into segment files of 64 MB each. At most, the maximum number of WAL files will be: + + ``` + max_wal_size / 64MB + ``` + + You can use this to estimate space requirements for WAL. The default checkpoint\_segments setting for a Apache Cloudberry instance is 8, meaning 1088 MB WAL space allocated for each segment or coordinator instance on a host. + +- **Apache Cloudberry Log Files** — Each segment instance and the coordinator instance generates database log files, which will grow over time. Sufficient space should be allocated for these log files, and some type of log rotation facility should be used to ensure that to log files do not grow too large. + diff --git a/versioned_docs/version-2.x/deployment/create_data_dirs.md b/versioned_docs/version-2.x/deployment/create_data_dirs.md new file mode 100644 index 00000000000..dadd6ef308c --- /dev/null +++ b/versioned_docs/version-2.x/deployment/create_data_dirs.md @@ -0,0 +1,61 @@ +--- +title: Creating the Data Storage Areas +--- + +## Creating Data Storage Areas on the Coordinator and Standby Coordinator Hosts + +A data storage area is required on the Apache Cloudberry coordinator and standby coordinator hosts to store Apache Cloudberry system data such as catalog data and other system metadata. + +### To create the data directory location on the coordinator + +The data directory location on the coordinator is different than those on the segments. The coordinator does not store any user data, only the system catalog tables and system metadata are stored on the coordinator instance, therefore you do not need to designate as much storage space as on the segments. + +1. Create or choose a directory that will serve as your coordinator data storage area. This directory should have sufficient disk space for your data and be owned by the `gpadmin` user and group. For example, run the following commands as `root`: + + ``` + # mkdir -p /data/coordinator + ``` + +2. Change ownership of this directory to the `gpadmin` user. For example: + + ``` + # chown gpadmin:gpadmin /data/coordinator + ``` + +3. Using [gpssh](/sys-utilities/gpssh.md), create the coordinator data directory location on your standby coordinator as well. For example: + + ``` + # source /usr/local/cloudberry-db/cloudberry-env.sh + # gpssh -h scdw -e 'sudo mkdir -p /data/coordinator' + # gpssh -h scdw -e 'sudo chown gpadmin:gpadmin /data/coordinator' + ``` + +## Creating Data Storage Areas on Segment Hosts + +Data storage areas are required on the Apache Cloudberry segment hosts for primary segments. Separate storage areas are required for mirror segments. + +### To create the data directory locations on all segment hosts + +1. On the coordinator host, log in as `root`: + + ``` + # su + ``` + +2. Create a file called `hostfile_gpssh_segonly`. This file should have only one machine configured host name for each segment host. For example, if you have three segment hosts: + + ``` + sdw1 + sdw2 + sdw3 + ``` + +3. Using [gpssh](/sys-utilities/gpssh.md), create the primary and mirror data directory locations on all segment hosts at once using the `hostfile_gpssh_segonly` file you just created. For example: + + ``` + # source /usr/local/cloudberry-db/cloudberry-env.sh + # gpssh -f hostfile_gpssh_segonly -e 'sudo mkdir -p /data/primary' + # gpssh -f hostfile_gpssh_segonly -e 'sudo mkdir -p /data/mirror' + # gpssh -f hostfile_gpssh_segonly -e 'sudo chown -R gpadmin /data/*' + ``` + diff --git a/versioned_docs/version-2.x/deployment/index.md b/versioned_docs/version-2.x/deployment/index.md index f8444617876..d6902e5adad 100644 --- a/versioned_docs/version-2.x/deployment/index.md +++ b/versioned_docs/version-2.x/deployment/index.md @@ -1,87 +1,17 @@ --- -title: "Build Apache Cloudberry from Source: Complete Guide" +title: "Apache Cloudberry Deployment" --- -This document is intended for developers interested in exploring and potentially contributing to Apache Cloudberry. The build environment described here is optimized for development and testing purposes only. +This guide provides instructions for deploying Apache Cloudberry in production environments. It covers the full deployment lifecycle from hardware planning to cluster initialization and login. -## Target audience +The guide includes the following topics: -- Developers interested in contributing to Apache Cloudberry. -- PostgreSQL developers wanting to explore Cloudberry's extensions. -- Database enthusiasts interested in learning about distributed query processing. -- Anyone considering joining the Apache Cloudberry community. - -The build process described here enables development activities such as: - -- Debugging and testing new features. -- Exploring the codebase with development tools. -- Running test suites and validation checks. -- Making and testing code modifications. - -:::tip -If you are new to Apache Cloudberry or PostgreSQL development: - -- Consider building PostgreSQL first to understand the basic workflow -- Join the project's [mailing lists](/community/mailing-lists) to connect with other developers -- Review the project's issue tracker to understand current development priorities -- Be prepared for longer build times and iterative testing as you explore the codebase -::: - -## Process of building Apache Cloudberry - -The build process for Apache Cloudberry (Incubating) closely resembles that of PostgreSQL. If you have previously set up development environments for PostgreSQL, you'll find the steps for Cloudberry very familiar. - -For those new to Cloudberry or PostgreSQL, we recommend starting with a PostgreSQL build first. The PostgreSQL development community has established excellent documentation and tooling to guide you through the process. Familiarizing yourself with PostgreSQL's build process will make transitioning to Cloudberry significantly easier. - -## Prerequisites - -### Provision a Rocky Linux 8+ / Ubuntu 20.04+ Environment - -- Use any platform to create a virtual machine or container: - - - **Cloud providers**: You can use the Rocky Linux 8+ or Ubuntu 20.04+ images provided by the cloud providers, such as AWS, Google Cloud, Microsoft Azure, and more. - - **VirtualBox**: Use the official [Rocky Linux 8+](https://rockylinux.org/download) / [Ubuntu 20.04+](https://ubuntu.com/download) ISO or Vagrant boxes. - - **Docker**: These instructions were validated under Rocky Linux 8+ and Ubuntu 20.04 official base docker images, but should work with any of their based container. - - For example, you can run the following command to start a Rocky Linux 8 container: - - ```bash - docker run -it --shm-size=2gb -h cdw rockylinux/rockylinux:8 - - # Start a Ubuntu 20.04 container: - # docker run -it --shm-size=2gb -h cdw ubuntu:20.04 - ``` - - The hostname `cdw` (Coordinator Data Warehouse) is just an example of how we started the container for testing. - - To ensure test suites run successfully, you may need to increase the container's shared memory using `--shm-size=2gb`. Test failures can occur when the Cloudberry cluster lacks sufficient shared memory resources. - -- Ensure the VM or container has: - - Internet connectivity for package installation. - - SSH or console access for user interaction. - - Sufficient resources (CPU, memory, and storage) for a development environment. - -:::note -Specific steps to provision the environment are not covered in this guide because they vary by platforms. This guide assumes you have successfully created a VM or container and can log in as the default user (for example, `rocky` for Rocky Linux on AWS). -::: - -### System requirements - -Minimum requirements for development environment: - -- CPU: 4 cores recommended (2 cores minimum) - - CPU architecture: x86, x86_64, ARM, MIPS -- RAM: 8GB recommended (4GB minimum) -- Storage: 20GB free space recommended -- Network: Broadband internet connection for package downloads - -## Build Apache Cloudberry from source code - -The following steps guide you through building Apache Cloudberry from source code on Rocky Linux 8+ or Ubuntu 20.04+. The process is similar for both operating systems, with minor differences in package management, dependencies and software versions between these two distributions. - -Just go ahead and follow the steps below to build Apache Cloudberry from source code: - -```mdx-code-block -import DocCardList from '@theme/DocCardList'; - - -``` \ No newline at end of file +- **[Platform Requirements](./platform-requirements)** — Hardware and software requirements for Apache Cloudberry hosts. +- **[Estimating Storage Capacity](./capacity_planning)** — How to estimate disk space needed for your data. +- **[Configuring Your Systems](./prepare-to-deploy)** — OS-level configuration including kernel parameters, SELinux, firewall, NTP, and user setup. +- **[Deploy Using RPM/DEB Package](./install_cloudberry)** — Installing the Apache Cloudberry software on all hosts. +- **[Creating the Data Storage Areas](./create_data_dirs)** — Setting up data directories for coordinator and segment instances. +- **[Validating Hardware and Network](./validate)** — Verifying network, disk I/O, and memory bandwidth performance. +- **[Initialize Apache Cloudberry](./init_cloudberry)** — Initializing the database cluster using `gpinitsystem`. +- **[Logging into Apache Cloudberry](./login_cloudberry)** — Connecting to the database after deployment. +- **[Deploy with a Single Computing Node](./single-node)** — Deploying Apache Cloudberry without segment nodes for development or testing. diff --git a/versioned_docs/version-2.x/deployment/init_cloudberry.md b/versioned_docs/version-2.x/deployment/init_cloudberry.md new file mode 100644 index 00000000000..dcc662606aa --- /dev/null +++ b/versioned_docs/version-2.x/deployment/init_cloudberry.md @@ -0,0 +1,278 @@ +--- +title: Initializing Apache Cloudberry +--- + +Because Apache Cloudberry is distributed, the process for initializing a Apache Cloudberry Database management system involves initializing several individual PostgreSQL database instances (called *segment instances* in Cloudberry). + +Each database instance \(the coordinator and all segments\) must be initialized across all of the hosts in the system in such a way that they can all work together as a unified DBMS. Cloudberry provides its own version of `initdb` called [gpinitsystem](/sys-utilities/gpinitsystem.md), which takes care of initializing the database on the coordinator and on each segment instance, and starting each instance in the correct order. + +After the Apache Cloudberry database system has been initialized and started, you can then create and manage databases as you would in a regular PostgreSQL DBMS by connecting to the Cloudberry coordinator. + +When performing the following initialization tasks, you must be logged into the coordinator host as the `gpadmin` user, and to run Apache Cloudberry utilities, you must source the `cloudberry-env.sh` file to set Apache Cloudberry environment variables. For example, if you are logged into the coordinator, run these commands. + +``` +$ su - gpadmin +$ source /usr/local/cloudberry-db/cloudberry-env.sh +``` + +## Creating the Initialization Host File + +The [gpinitsystem](/sys-utilities/gpinitsystem.md) utility requires a host file that contains the list of addresses for each segment host. The initialization utility determines the number of segment instances per host by the number host addresses listed per host times the number of data directory locations specified in the `gpinitsystem_config` file. + +This file should only contain segment host addresses (not the coordinator or standby coordinator). For segment machines with multiple, unbonded network interfaces, this file should list the host address names for each interface — one per line. + +:::note +The Apache Cloudberry segment host naming convention is sdwN where sdw is a prefix and N is an integer. For example, `sdw2` and so on. If hosts have multiple unbonded NICs, the convention is to append a dash (`-`) and number to the host name. For example, `sdw1-1` and `sdw1-2` are the two interface names for host `sdw1`. However, NIC bonding is recommended to create a load-balanced, fault-tolerant network. +::: + +### To create the initialization host file + +1. Create a file named `hostfile_gpinitsystem`. In this file add the host address name\(s\) of your *segment* host interfaces, one name per line, no extra lines or spaces. For example, if you have four segment hosts with two unbonded network interfaces each: + + ``` + sdw1-1 + sdw1-2 + sdw2-1 + sdw2-2 + sdw3-1 + sdw3-2 + sdw4-1 + sdw4-2 + ``` + +2. Save and close the file. + +:::note +If you are not sure of the host names and/or interface address names used by your machines, look in the `/etc/hosts` file. +::: + +## Creating the Apache Cloudberry Configuration File + +Your Cloudberry configuration file tells the [gpinitsystem](/sys-utilities/gpinitsystem.md) utility how you want to configure your Apache Cloudberry system. An example configuration file can be found in `$GPHOME/docs/cli_help/gpconfigs/gpinitsystem_config`. + +### To create a gpinitsystem_config file + +1. Make a copy of the `gpinitsystem_config` file to use as a starting point. For example: + + ``` + $ mkdir -p gpconfigs + $ cp $GPHOME/docs/cli_help/gpconfigs/gpinitsystem_config \ + /home/gpadmin/gpconfigs/gpinitsystem_config + ``` + +2. Open the file you just copied in a text editor. + + Set all of the required parameters according to your environment. See [gpinitsystem](/sys-utilities/gpinitsystem.md) for more information. A Apache Cloudberry system must contain a coordinator instance and at *least two* segment instances (even if setting up a single node system). + + The `DATA_DIRECTORY` parameter is what determines how many segments per host will be created. If your segment hosts have multiple network interfaces, and you used their interface address names in your host file, the number of segments will be evenly spread over the number of available interfaces. + + The `COORDINATOR_HOSTNAME` is the hostname of the coordinator node. + + The `COORDINATOR_DIRECTORY` is the data directory of the coordinator node. + + To specify `PORT_BASE`, review the port range specified in the `net.ipv4.ip_local_port_range` parameter in the `/etc/sysctl.conf` file. + + Here is an example of the *required* parameters in the `gpinitsystem_config` file: + + ``` + SEG_PREFIX=gpseg + PORT_BASE=6000 + declare -a DATA_DIRECTORY=(/data1/primary /data1/primary /data1/primary /data2/primary /data2/primary /data2/primary) + COORDINATOR_HOSTNAME=cdw + COORDINATOR_DIRECTORY=/data/coordinator + COORDINATOR_PORT=5432 + TRUSTED SHELL=ssh + CHECK_POINT_SEGMENTS=8 + ENCODING=UNICODE + ``` + +3. (Optional) If you want to deploy mirror segments, uncomment and set the mirroring parameters according to your environment. To specify `MIRROR_PORT_BASE`, review the port range specified under the `net.ipv4.ip_local_port_range` parameter in the `/etc/sysctl.conf` file. Here is an example of the *optional* mirror parameters in the `gpinitsystem_config` file: + + ``` + MIRROR_PORT_BASE=7000 + declare -a MIRROR_DATA_DIRECTORY=(/data1/mirror /data1/mirror /data1/mirror /data2/mirror /data2/mirror /data2/mirror) + ``` + + :::note + You can initialize your Cloudberry system with primary segments only and deploy mirrors later using the [gpaddmirrors](/sys-utilities/gpaddmirrors.md) utility. + +4. To create a default database during initialization, you need to fill in the database name. In this example, the `warehouse` database is created during initialization: + + ``` + DATABASE_NAME=warehouse + ``` + +5. Save and close the file. + +## Running the Initialization Utility + +The [gpinitsystem](/sys-utilities/gpinitsystem.md) utility will create a Apache Cloudberry system using the values defined in the configuration file. + +These steps assume you are logged in as the `gpadmin` user and have sourced the `cloudberry-env.sh` file to set Apache Cloudberry environment variables. + +### To run the initialization utility + +1. Run the following command referencing the path and file name of your initialization configuration file (`gpinitsystem_config`) and host file (`hostfile_gpinitsystem`). For example: + + ``` + $ cd ~ + $ gpinitsystem -c gpconfigs/gpinitsystem_config -h gpconfigs/hostfile_gpinitsystem + ``` + + In the command above, `-c` specifies the configuration file and `-h` specifies the computing node list. + + For a fully redundant system (with a standby coordinator and a *spread* mirror configuration) include the `-s` and `--mirror-mode=spread` options. For example: + + ``` + $ gpinitsystem -c gpconfigs/gpinitsystem_config -h gpconfigs/hostfile_gpinitsystem \ + -s --mirror-mode=spread + ``` + + During a new cluster creation, you may use the `-O output_configuration_file` option to save the cluster configuration details in a file. For example: + + ``` + $ gpinitsystem -c gpconfigs/gpinitsystem_config -O gpconfigs/config_template + ``` + + This output file can be edited and used at a later stage as the input file of the `-I` option, to create a new cluster or to recover from a backup. See [gpinitsystem](/sys-utilities/gpinitsystem.md) for further details. + + :::note + Calling `gpinitsystem` with the `-O` option does not initialize the Apache Cloudberry system; it merely generates and saves a file with cluster configuration details. + ::: + +2. The utility will verify your setup information and make sure it can connect to each host and access the data directories specified in your configuration. If all of the pre-checks are successful, the utility will prompt you to confirm your configuration. For example: + + ``` + => Continue with Cloudberry creation? Yy/Nn + ``` + +3. Press `y` to start the initialization. +4. The utility will then begin setup and initialization of the coordinator instance and each segment instance in the system. Each segment instance is set up in parallel. Depending on the number of segments, this process can take a while. +5. At the end of a successful setup, the utility will start your Apache Cloudberry system. You should see: + + ``` + => Apache Cloudberry instance successfully created. + ``` + +### (Optional) Initializing a Standby Coordinator + +:::note +If you included the `-s ` option in the `gpinitsystem` command, the standby coordinator is already initialized and you can skip this step. +::: + +If you did not include the `-s` option when running `gpinitsystem`, you can initialize a standby coordinator separately after the cluster is up and running using the `gpinitstandby` utility. + +The standby coordinator serves as a warm backup of the coordinator instance. If the primary coordinator becomes unavailable, the standby can be activated to take over. + +1. Ensure the standby coordinator host has the same data directory created and owned by `gpadmin`: + + ```bash + $ gpssh -h scdw -e 'sudo mkdir -p /data/coordinator' + $ gpssh -h scdw -e 'sudo chown gpadmin:gpadmin /data/coordinator' + ``` + +2. Run `gpinitstandby` from the coordinator host as `gpadmin`: + + ```bash + $ gpinitstandby -s scdw + ``` + +3. Verify the standby coordinator is synchronized: + + ```bash + $ gpstate -f + ``` + + The output should show the standby coordinator with `Sync state: sync`. + +### Troubleshooting Initialization Problems + +If the utility encounters any errors while setting up an instance, the entire process will fail, and could possibly leave you with a partially created system. Refer to the error messages and logs to determine the cause of the failure and where in the process the failure occurred. Log files are created in `~/gpAdminLogs`. + +Depending on when the error occurred in the process, you may need to clean up and then try the `gpinitsystem` utility again. For example, if some segment instances were created and some failed, you may need to stop `postgres` processes and remove any utility-created data directories from your data storage area(s). A backout script is created to help with this cleanup if necessary. + +#### Using the Backout Script + +If the gpinitsystem utility fails, it will create the following backout script if it has left your system in a partially installed state: + +`~/gpAdminLogs/backout_gpinitsystem__` + +You can use this script to clean up a partially created Apache Cloudberry system. This backout script will remove any utility-created data directories, `postgres` processes, and log files. After correcting the error that caused `gpinitsystem` to fail and running the backout script, you should be ready to retry initializing your Apache Cloudberry array. + +The following example shows how to run the backout script: + +``` +$ bash ~/gpAdminLogs/backout_gpinitsystem_gpadmin_20071031_121053 +``` + +## Setting the Apache Cloudberry Timezone + +As a best practice, configure Apache Cloudberry and the host systems to use a known, supported timezone. Apache Cloudberry uses a timezone from a set of internally stored PostgreSQL timezones. Setting the Apache Cloudberry timezone prevents Apache Cloudberry from selecting a timezone each time the cluster is restarted and sets the timezone for the Apache Cloudberry coordinator and segment instances. + +Use the [gpconfig](/sys-utilities/gpconfig.md) utility to show and set the Apache Cloudberry timezone. For example, these commands show the Apache Cloudberry timezone and set the timezone to `US/Pacific`. + +``` +$ gpconfig -s TimeZone +$ gpconfig -c TimeZone -v 'US/Pacific' +``` + +You must restart Apache Cloudberry after changing the timezone. The command `gpstop -ra` restarts Apache Cloudberry. The catalog view `pg_timezone_names` provides Apache Cloudberry timezone information. + +## Setting Apache Cloudberry Environment Variables + +You must set environment variables in the Apache Cloudberry user (`gpadmin`) environment that runs Apache Cloudberry on the Apache Cloudberry coordinator and standby coordinator hosts. A `cloudberry-env.sh` file is provided in the Apache Cloudberry installation directory with environment variable settings for Apache Cloudberry. + +The Apache Cloudberry management utilities also require that the `COORDINATOR_DATA_DIRECTORY` environment variable be set. This should point to the directory created by the `gpinitsystem` utility in the coordinator data directory location. + +:::note +The `cloudberry-env.sh` script changes the operating environment in order to support running the Apache Cloudberry-specific utilities. These same changes to the environment can negatively affect the operation of other system-level utilities, such as `ps` or `yum`. Use separate accounts for performing system administration and database administration, instead of attempting to perform both functions as `gpadmin`. +::: + +These steps ensure that the environment variables are set for the `gpadmin` user after a system reboot. + +### To set up the gpadmin environment for Apache Cloudberry + +1. Open the `gpadmin` profile file (such as `.bashrc`) in a text editor. For example: + + ``` + $ vi ~/.bashrc + ``` + +2. Add lines to this file to source the `cloudberry-env.sh` file and set the `COORDINATOR_DATA_DIRECTORY` environment variable. For example: + + ``` + source /usr/local/cloudberry-db/cloudberry-env.sh + export COORDINATOR_DATA_DIRECTORY=/data/coordinator/gpseg-1 + ``` + +3. (Optional) You may also want to set some client session environment variables such as `PGPORT`, `PGUSER` and `PGDATABASE` for convenience. For example: + + ``` + export PGPORT=5432 + export PGUSER=gpadmin + export PGDATABASE=warehouse + ``` + +4. (Optional) If you use RHEL/Oracle/Rocky Linux, add the following line to the end of the `.bashrc` file to enable using the `ps` command in the `cloudberry-env.sh` environment: + + ``` + export LD_PRELOAD=/lib64/libz.so.1 ps + ``` + +5. Save and close the file. +6. After editing the profile file, source it to make the changes active. For example: + + ``` + $ source ~/.bashrc + ``` + +7. If you have a standby coordinator host, copy your environment file to the standby coordinator as well. For example: + + ``` + $ cd ~ + $ scp .bashrc :`pwd` + ``` + +:::note +The `.bashrc` file should not produce any output. If you wish to have a message display to users upon logging in, use the `.bash_profile` file instead. +::: diff --git a/versioned_docs/version-2.x/deployment/install_cloudberry.md b/versioned_docs/version-2.x/deployment/install_cloudberry.md new file mode 100644 index 00000000000..051c1a29bb6 --- /dev/null +++ b/versioned_docs/version-2.x/deployment/install_cloudberry.md @@ -0,0 +1,177 @@ +--- +title: Installing Using RPM/DEB Package +--- + +This document introduces how to manually deploy Apache Cloudberry on physical/virtual machines using RPM or DEB packages. Before reading this document, it is recommended to first read the [Software and Hardware Configuration Requirements](./platform-requirements.md) and [Prepare to Deploy Apache Cloudberry](./prepare-to-deploy.md). + +You must install Apache Cloudberry on each host machine of the Apache Cloudberry system. + +:::info +Starting from Apache Cloudberry 2.1, RPM and DEB packages are officially provided for download. RPM packages support Rocky Linux 8/9, RHEL 8/9, and compatible distributions. DEB packages support Ubuntu 22.04. + +After downloading, please verify the signatures and checksums of the files. +::: + + +## Install the package + +After the preparation, it is time to install Apache Cloudberry. You need to download the corresponding package from [Apache Cloudberry Releases](https://cloudberry.apache.org/releases), and then install the database on each node using the installation package. + +1. Download and copy the package to the home directory of `gpadmin` on the coordinator, standby coordinator, and every segment host machine. + +2. With sudo (or as `root`), install the Apache Cloudberry package on each host machine using your system's package manager software: + + ```bash + # For RPM (Rocky Linux, RHEL, etc.) + sudo dnf install + # Or for older systems: sudo yum install + + # For DEB (Ubuntu) + sudo apt install --fix-broken + # Or alternatively: sudo dpkg -i && apt-get install -f + ``` + + The `yum`, `dnf` and `apt` command automatically installs software dependencies, copies the Apache Cloudberry software files into a version-specific directory under `/usr/local`, `/usr/local/cloudberry-db-`, and creates the symbolic link `/usr/local/cloudberry-db` to the installation directory. + +3. Grant the `gpadmin` user the permission to access the `/usr/local/cloudberry-db/` directory. + + ```bash + sudo chown -R gpadmin:gpadmin /usr/local/cloudberry* + ``` + +## (Optional) Installing to a non-default directory + +:::caution +Custom installation directory is not supported in Apache Cloudberry 2.1. This feature is planned for Apache Cloudberry 2.2. The instructions below are provided for future reference only. +::: + +### For RPM-based systems (Rocky Linux, RHEL, etc.) + +On RHEL-based systems, you can use the `rpm` command with the `--prefix` option to install Apache Cloudberry to a non-default directory (instead of under `/usr/local`). Note that using `rpm` directly does not automatically install dependencies; you must manually install them on each host. + +1. Download and copy the Apache Cloudberry package to the `gpadmin` user's home directory on the coordinator, standby coordinator, and every segment host machine. + +2. Manually install the dependencies to each host system: + + ```bash + $ sudo dnf install openssh-server openssh-clients sudo shadow-utils \ + bash procps-ng ca-certificates python3 apr bzip2 krb5-libs libevent \ + libicu liburing libuuid libxml2 libyaml libzstd lz4 ncurses openldap \ + openssl pam pcre2 perl protobuf readline zlib libuv iproute \ + net-tools which rsync keyutils libssh libpsl xz + ``` + +3. Use `rpm` with the `--prefix` option to install the Apache Cloudberry package to your chosen installation directory on each host machine: + + ```bash + $ sudo rpm --install ./ --prefix= + ``` + + The `rpm` command copies the Apache Cloudberry software files into a version-specific directory under your chosen ``, and creates a symbolic link `/cloudberry-db` to the versioned directory. + +4. Change the owner and group of the installed files to `gpadmin`: + + ```bash + $ sudo chown -R gpadmin:gpadmin /cloudberry* + ``` + +If you install to a non-default directory using `rpm`, you will need to continue using `rpm` (instead of `dnf`) to perform minor version upgrades. + +## Enabling Passwordless SSH + +The `gpadmin` user on each Cloudberry host must be able to SSH from any host in the cluster to any other host in the cluster without entering a password or passphrase (called "passwordless SSH"). If you enable passwordless SSH from the coordinator host to every other host in the cluster ("1-*n* passwordless SSH"), you can use the `gpssh-exkeys` command-line utility to enable passwordless SSH from every host to every other host ("*n*-*n* passwordless SSH"). + +1. Log in to the coordinator host as the `gpadmin` user, and use the `gpadmin` user for subsequent operations. +2. Source the `path` file in the Apache Cloudberry installation directory. + + ``` + $ source /usr/local/cloudberry-db-/cloudberry-env.sh + ``` + + :::caution + Since Cloudberry 2.1, the path file name has been changed from `greenplum_path.sh` to `cloudberry-env.sh`, see the [blog](https://cloudberry.apache.org/blog/from-greenplum-path.sh-to-cloudberry-env.sh/) for more details. + ::: + + :::note + Add the above `source` command to the `gpadmin` user's `.bashrc` or other shell startup file so that the Apache Cloudberry path and environment variables are set whenever you log in as `gpadmin`. + ::: + +3. Use the `ssh-copy-id` command to add the `gpadmin` user's public key to the `authorized_hosts` SSH file on every other host in the cluster. + + ``` + $ ssh-copy-id scdw + $ ssh-copy-id sdw1 + $ ssh-copy-id sdw2 + $ ssh-copy-id sdw3 + . . . + ``` + + This enables 1-*n* passwordless SSH. You will be prompted to enter the `gpadmin` user's password for each host. If you have the `sshpass` command on your system, you can use a command like the following to avoid the prompt. + + ``` + $ SSHPASS= sshpass -e ssh-copy-id scdw + ``` + +4. In the `gpadmin` home directory, create a file named `hostfile_exkeys` that has the machine configured host names and host addresses (interface names) for each host in your cloudberry system (coordinator, standby coordinator, and segment hosts). Make sure there are no blank lines or extra spaces. Check the `/etc/hosts` file on your systems for the correct host names to use for your environment. + + For example, if you have a coordinator, standby coordinator, and three segment hosts with two unbonded network interfaces per host, your file would look something like this: + + ``` + cdw + cdw-1 + cdw-2 + scdw + scdw-1 + scdw-2 + sdw1 + sdw1-1 + sdw1-2 + sdw2 + sdw2-1 + sdw2-2 + sdw3 + sdw3-1 + sdw3-2 + ``` + +5. Run the `gpssh-exkeys` utility with your `hostfile_exkeys` file to enable *n*-*n* passwordless SSH for the `gpadmin` user. + + ``` + $ gpssh-exkeys -f hostfile_exkeys + ``` + +## Confirm the installation + +To make sure the Apache Cloudberry software was installed and configured correctly, run the following confirmation steps from your Apache Cloudberry coordinator host. If necessary, correct any problems before continuing on to the next task. + +1. Log in to the coordinator host as `gpadmin`: + + ```bash + $ su - gpadmin + ``` + +2. Use the `gpssh` utility to see if you can log in to all hosts without a password prompt, and to confirm that the Apache Cloudberry software was installed on all hosts. Use the `hostfile_exkeys` file you created earlier. For example: + + ```bash + $ gpssh -f hostfile_exkeys -e 'ls -l /usr/local/cloudberry-db-*' + ``` + + If the installation was successful, you should be able to log in to all hosts without a password prompt. All hosts should show that they have the same contents in their installation directories, and that the directories are owned by the `gpadmin` user. + + If you are prompted for a password, run the following command to redo the ssh key exchange: + + ```bash + $ gpssh-exkeys -f hostfile_exkeys + ``` + +## About Your Apache Cloudberry Installation + +- **`cloudberry-env.sh`** — This file contains the environment variables for Apache Cloudberry. +- **bin** — This directory contains the Apache Cloudberry management utilities. This directory also contains the PostgreSQL client and server programs, most of which are also used in Apache Cloudberry. +- **docs/cli_help** — This directory contains help files for Apache Cloudberry command-line utilities. +- **docs/cli_help/gpconfigs** — This directory contains sample `gpinitsystem` configuration files and host files that can be modified and used when installing and initializing a Apache Cloudberry system. +- **ext** — Bundled programs (such as Python) used by some Apache Cloudberry utilities. +- **include** — The C header files for Apache Cloudberry. +- **lib** — Apache Cloudberry and PostgreSQL library files. +- **sbin** — Supporting/Internal scripts and programs. +- **share** — Shared files for Apache Cloudberry. diff --git a/versioned_docs/version-2.x/deployment/login_cloudberry.md b/versioned_docs/version-2.x/deployment/login_cloudberry.md new file mode 100644 index 00000000000..2d04686a94e --- /dev/null +++ b/versioned_docs/version-2.x/deployment/login_cloudberry.md @@ -0,0 +1,42 @@ +--- +title: Logging into Apache Cloudberry +--- + +Now you have successfully deployed Apache Cloudberry. To log into the database, refer to the following command: + +```bash +psql -h -p -U -d +``` + +In the command above: + +- `` is the IP address of the coordinator node of the Apache Cloudberry server. +- `` is the default port number of Apache Cloudberry, which is `5432` by default. +- `` is the user name of the database. +- `` is the name of the database to connect. + +After you run the `psql` command, the system will prompt you to enter the database password. After you enter the correct password, you will successfully log into Apache Cloudberry and can perform SQL queries and operations. Make sure that you have the correct permissions to access the target database. + +```sql +[gpadmin@cdw ~]$ psql warehouse +psql (14.4, server 14.4) +Type "help" for help. + +warehouse=# SELECT * FROM gp_segment_configuration; + dbid | content | role | preferred_role | mode | status | port | hostname | address | datadir | warehouseid +------+---------+------+----------------+------+--------+------+----------+---------+---------------------------+------------- + 1 | -1 | p | p | n | u | 5432 | cdw | cdw | /data/coordinator/gpseg-1 | 0 + 2 | 0 | p | p | s | u | 6000 | sdw1 | sdw1 | /data/primary/gpseg0 | 0 + 8 | 0 | m | m | s | u | 7000 | sdw2 | sdw2 | /data/mirror/gpseg0 | 0 + 3 | 1 | p | p | s | u | 6001 | sdw1 | sdw1 | /data/primary/gpseg1 | 0 + 9 | 1 | m | m | s | u | 7001 | sdw3 | sdw3 | /data/mirror/gpseg1 | 0 + 4 | 2 | p | p | s | u | 6000 | sdw2 | sdw2 | /data/primary/gpseg2 | 0 + 10 | 2 | m | m | s | u | 7000 | sdw3 | sdw3 | /data/mirror/gpseg2 | 0 + 5 | 3 | p | p | s | u | 6001 | sdw2 | sdw2 | /data/primary/gpseg3 | 0 + 11 | 3 | m | m | s | u | 7001 | sdw1 | sdw1 | /data/mirror/gpseg3 | 0 + 6 | 4 | p | p | s | u | 6000 | sdw3 | sdw3 | /data/primary/gpseg4 | 0 + 12 | 4 | m | m | s | u | 7000 | sdw1 | sdw1 | /data/mirror/gpseg4 | 0 + 7 | 5 | p | p | s | u | 6001 | sdw3 | sdw3 | /data/primary/gpseg5 | 0 + 13 | 5 | m | m | s | u | 7001 | sdw2 | sdw2 | /data/mirror/gpseg5 | 0 +(13 rows) +``` diff --git a/versioned_docs/version-2.x/deployment/platform-requirements.md b/versioned_docs/version-2.x/deployment/platform-requirements.md new file mode 100644 index 00000000000..0bb94e54606 --- /dev/null +++ b/versioned_docs/version-2.x/deployment/platform-requirements.md @@ -0,0 +1,198 @@ +--- +title: Platform Requirements +--- + +This topic describes the Apache Cloudberry platform and operating system software requirements for deploying the software to on-premise hardware, or to public cloud services such as AWS, GCP, or Azure. + +## Hardware requirements + +### Supported deployment environments + +Apache Cloudberry supports deployment on both physical machines and virtual machines. Below are the recommended configurations for the environments. + +#### For development or test environments + +| Component | CPU | Memory | Disk type | Network | Number of instances | +| ------- | ---- | ---- | -------- | -------------------- | -------- | +| Coordinator | 4 cores | 8 GB | SSD | 10 Gbps NIC (2 preferred) | 1+ | +| Segment | 4 cores | 8 GB | SSD | 10 Gbps NIC (2 preferred) | 1+ | + +#### For production environments + +| Component | CPU | Memory | Disk type | Network | Instance count | +| ------- | ------ | ------ | -------- | -------------------- | -------- | +| Coordinator | 16+ cores | 32+ GB | SSD | 10 Gbps NIC (2 preferred) | 2+ | +| Segment | 8+ cores | 32+ GB | SSD | 10 Gbps NIC (2 preferred) | 2+ | + +Apache Cloudberry can also be deployed on public cloud platforms such as AWS, Azure, and GCP. The hardware requirements for cloud-based deployments might vary based on the instance types selected on these platforms. Refer to the specific cloud provider’s documentation for instance configurations that meet or exceed the recommended hardware specifications. + +#### Minimum hardware requirements + +The following lists minimum recommended specifications for hardware servers intended to support Apache Cloudberry on Linux systems in a production environment. All host servers in your Apache Cloudberry system must have the same hardware and software configuration. Apache Cloudberry also provides hardware build guides for its certified hardware platforms. Work with a Cloudberry Systems Engineer to review your anticipated environment to ensure an appropriate hardware configuration for Apache Cloudberry. + +- Minimum CPU: Any x86_64/AARCH64 compatible CPU +- Minimum Memory: 16 GB RAM per server +- Disk Space Requirements: + - 150MB per host for Cloudberry installation + - Approximately 300MB per segment instance for metadata + - Cap disk capacity at 70% full to accommodate temporary files and prevent performance degradation +- Network Requirements: 10 Gigabit Ethernet within the array; NIC bonding is recommended when multiple interfaces are present Apache Cloudberry can use either IPV4 or IPV6 protocols. + +**Hyperthreading** + +Resource Groups - one of the key Apache Cloudberry features - can control transaction concurrency, CPU and memory resources, workload isolation, and dynamic bursting. + +When using resource groups to control resource allocation on Intel based systems, consider switching off Hyper-Threading (HT) in the server BIOS (for Intel cores the default is ON). Switching off HT might cause a small throughput reduction (less than 15%), but can achieve greater isolation between resource groups, and higher query performance with lower concurrency workloads. + +### CPU architecture support + +Apache Cloudberry supports running on both **x86_64** and **ARM (AARCH64)** CPU architectures, making it suitable for a wide range of hardware platforms including cloud instances and ARM-based servers. + +| Architecture | Source Build | Convenience binaries | +|---|---|---| +| x86_64 | Supported | Available (2.1+) | +| ARM (AARCH64) | Supported | Planned for 2.2 | + +For ARM-based deployments in the current release, you can [build Apache Cloudberry from source](../build/index.md). + +### Storage + +- To prevent a high data disk load from affecting the operating system's normal I/O response, mount the operating system and the data disk on separate disks. +- If the host configuration allows, it is recommended to use 2 independent SAS disks as the system disk (RAID1), and another 10 SAS disks as the data disk (RAID5). +- It is recommended to use LVM logical volumes to manage disks for more flexible disk configuration. + +**For the system disk**: The system disk should use an independent disk to avoid impact on the operating system when data disks are heavily loaded. It is recommended that the system disk be configured in dual-disk RAID 1 and the operating system of the system disk be XFS. + +**For data disks**: It is recommended to use LVM to manage data disks. According to test statistics, creating an independent logical volume for each physical volume can achieve the best disk performance. For example: + +```bash +pvcreate /dev/vdb +pvcreate /dev/vdc +pvcreate /dev/vdd +vgcreate data /dev/vdb /dev/vdc /dev/vdd +lvcreate --extents 100%pvs -n data0 data /dev/vdb +lvcreate --extents 100%pvs -n data1 data /dev/vdc +lvcreate --extents 100%pvs -n data2 data /dev/vdd +``` + +The names of mount points must be consecutive, and the mount points of data disks should be `/data0`, `/data1`, ..., `/dataN`. Data disks should use the XFS file format. For example: + +```bash +mkdir -p /data0 /data1 /data2 +mkfs.xfs /dev/data/data0 +mkfs.xfs /dev/data/data1 +mkfs.xfs /dev/data/data2 +mount /dev/data/data0 /data0/ +mount /dev/data/data1 /data1/ +mount /dev/data/data2 /data2/ +``` + +## Data exchange network + +- **Network card configuration** + + The data exchange network is used for transmitting business data, which has high requirements on network performance and throughput. In a production environment, two 10 Gbps NICs are generally required, and they will be used after bonding. The recommended bond 4 parameter are as follows: + + ```bash + BONDING_OPTS='mode=4 miimon=100 xmit_hash_policy=layer3+4' + ``` + +- **Connectivity requirements** + + - Connect the management console and the database host in the data exchange network. If there is a firewall device between the management console and the database host, ensure that the TCP idle connection can be kept for more than 12 hours. + - Connect database hosts and management console hosts in the data exchange network, and do not limit the TCP idle connection time. + - Connect database clients and application programs that access the database with the database coordinator node in the data exchange network. + - Ensure that the TCP idle connection can be kept for more than 12 hours. + +- **Default gateway** + + If the host is configured with a management network, the network card (bond0) of the data exchange network should be used as the default gateway device; otherwise, it might cause abnormal traffic monitoring of the host network, deployment failure, and performance problems. The following is an example of viewing the default gateway. + + ```bash + netstat -rn | grep ^0.0.0.0 + ``` + +- **Switch** + + - Make sure that the egress bandwidth of the data network switch from layer 1 to layer 2 is no lower than the maximum disk I/O throughput capacity of a single cabinet (calculated with a single RAID card of 500 MBps). + - A switch convergence ratio of 4:1 is recommended. When the convergence ratio reaches 6:1, most links will be saturated. Significant packet loss occurs when the convergence ratio reaches 8:1. + +## Software requirements + +### Supported OS + +Apache Cloudberry supports the following operating systems: + +- Rocky Linux 8/9 +- Ubuntu 22.04 +- RHEL 8/9 and compatible distributions (Oracle Linux, AlmaLinux, etc.) + +### Software dependencies + +The following runtime packages are required on all Apache Cloudberry hosts. These dependencies are automatically resolved when installing via `dnf` (RPM) or `apt` (DEB), but are listed here for reference. + +#### Common dependencies (all platforms) + +``` +bash, openssh, rsync, perl, python3, less, hostname, iproute / iproute2, iputils / iputils-ping, which / debianutils +``` + +#### For Rocky Linux / RHEL 8 + +``` +apr, audit, bash, bzip2, hostname, iproute, iputils, keyutils, +less, libcurl, libevent, libidn2, libselinux, libstdc++, libuuid, +libuv, libxml2, libyaml, libzstd, lz4, openldap, openssh, +openssh-clients, openssh-server, openssl, pam, perl, python3, +readline, rsync, which +``` + +#### For Rocky Linux / RHEL 9 + +``` +apr, bash, bzip2, glibc, hostname, iproute, iputils, keyutils, +less, libcap, libcurl, libidn2, libpsl, libssh, libstdc++, +libxml2, libyaml, libzstd, lz4, openldap, openssh, +openssh-clients, openssh-server, openssl, pam, pcre2, perl, +python3, readline, rsync, which, xz +``` + +#### For Ubuntu 22.04 + +``` +curl, cgroup-tools, debianutils, hostname, iputils-ping, iproute2, +keyutils, krb5-multidev, less, libapr1, libbz2-1.0, libcurl4, +libcurl3-gnutls, libevent-2.1-7, libreadline8, libxml2, libyaml-0-2, +libldap-2.5-0, libzstd1, libcgroup1, libssl3, libpam0g, libprotobuf23, +libpsl5, libuv1, liburing2, libxerces-c3.2, locales, lsof, lz4, +net-tools, openssh-client, openssh-server, openssl, python3, rsync, +wget, xz-utils, zlib1g +``` + +### Java + +Apache Cloudberry supports these Java versions for PL/Java and PXF: + +- Open JDK 8 or Open JDK 11, 17, available from [AdoptOpenJDK](https://adoptopenjdk.net) +- Oracle JDK 8 or Oracle JDK 11, 17 + +### File system + +XFS is the required file system for data storage on Apache Cloudberry hosts. + +Apache Cloudberry is supported on network or shared storage if the shared storage is presented as a block device to the servers running Apache Cloudberry and the XFS file system is mounted on the block device. Network file systems are not supported. When using network or shared storage, Apache Cloudberry mirroring must be used in the same way as with local storage, and no modifications may be made to the mirroring scheme or the recovery scheme of the segments. + +Apache Cloudberry can be deployed to virtualized systems only if the storage is presented as block devices and the XFS file system is mounted for the storage of the segment directories. + +Apache Cloudberry is supported on Amazon Web Services (AWS) servers using either Amazon instance store (Amazon uses the volume names ephemeral[0-23]) or Amazon Elastic Block Store (Amazon EBS) storage. If using Amazon EBS storage the storage should be RAID of Amazon EBS volumes and mounted with the XFS file system for it to be a supported configuration. + +### SSH configuration + +The recommended configuration for the SSH server side (`/etc/ssh/sshd_config`) is as follows. After the configuration is complete, run `systemctl restart sshd.service` to make it effective. + +| Parameter | Value | Description | +| ---------------------- | ---- | ---------------- | +| Port | 22 | Listening port. | +| PasswordAuthentication | yes | Allows password login, which can be changed after cluster initialization. | +| PermitEmptyPasswords | no | Empty password is not allowed for login. | +| UseDNS | no | DNS is not used. | diff --git a/versioned_docs/version-2.x/deployment/prepare-to-deploy.md b/versioned_docs/version-2.x/deployment/prepare-to-deploy.md new file mode 100644 index 00000000000..1d2188c3330 --- /dev/null +++ b/versioned_docs/version-2.x/deployment/prepare-to-deploy.md @@ -0,0 +1,725 @@ +--- +title: Configuring Your Systems +--- + +Describes how to prepare your operating system environment for Apache Cloudberry software installation. Before deploying Apache Cloudberry on physical or virtual machines, you need to do some preparations. Read this document and [Software and Hardware Configuration Requirements](./platform-requirements.md) before you start to deploy Apache Cloudberry. + +Perform the following tasks in order: + +1. Make sure your host systems meet the requirements described in [Software and Hardware Configuration Requirements](./platform-requirements.md). +2. [Deactivate or configure SELinux.](#deactivate-or-configure-selinux) +3. [Deactivate or configure firewall software.](#deactivate-or-configure-firewall-software) +4. [Set the required operating system parameters.](#recommended-os-parameters-settings) +5. [Synchronize system clocks.](#synchronizing-system-clocks) +6. [Create the gpadmin account.](#creating-the-cloudberry-administrative-user) + +Unless noted, these tasks should be performed for *all* hosts in your Apache Cloudberry array (coordinator, standby coordinator, and segment hosts). + +The Apache Cloudberry host naming convention for the coordinator host is `cdw` and for the standby coordinator host is `scdw`. + +The segment host naming convention is `sdwN` where `sdw` is a prefix and `N` is an integer. For example, segment host names would be `sdw1`, `sdw2` and so on. NIC bonding is recommended for hosts with multiple interfaces, but when the interfaces are not bonded, the convention is to append a dash (`-`) and number to the host name. For example, `sdw1-1` and `sdw1-2` are the two interface names for host `sdw1`. + +:::important +When data loss is not acceptable for an Apache Cloudberry cluster, coordinator and segment mirroring is recommended. If mirroring is not enabled then Apache Cloudberry stores only one copy of the data, so the underlying storage media provides the only guarantee for data availability and correctness in the event of a hardware failure. +::: + +## Deactivate or configure SELinux + +:::note +This section applies to RHEL/Oracle/Rocky Linux only. On Ubuntu, SELinux is not installed by default — Ubuntu uses AppArmor instead, which does not require any configuration for Apache Cloudberry. Ubuntu users can skip this section. +::: + +For all Apache Cloudberry host systems running RHEL/Oracle/Rocky Linux, SELinux must either be `Disabled` or configured to allow unconfined access to Apache Cloudberry processes, directories, and the `gpadmin` user. + +If you choose to deactivate SELinux: + +1. As the root user, check the status of SELinux: + + ```bash + # sestatus + + SELinuxstatus: disabled + ``` + +2. If SELinux is not deactivated, deactivate it by editing the `/etc/selinux/config` file. As root, change the value of the `SELINUX` parameter in the `config` file as follows: + + ```bash + SELINUX=disabled + ``` + +3. If the System Security Services Daemon (SSSD) is installed on your systems, edit the SSSD configuration file and set the `selinux_provider` parameter to `none` to prevent SELinux-related SSH authentication denials that could occur even with SELinux deactivated. As root, edit `/etc/sssd/sssd.conf` and add this parameter: + + ``` + selinux_provider=none + ``` + +4. Reboot the system to apply any changes that you made and verify that SELinux is deactivated. + +If you choose to enable SELinux in `Enforcing` mode, then Apache Cloudberry processes and users can operate successfully in the default `Unconfined` context. If you require increased SELinux confinement for Apache Cloudberry processes and users, you must test your configuration to ensure that there are no functionality or performance impacts to Apache Cloudberry. + +## Deactivate or configure firewall software + +You should also deactivate firewall software such as `firewalld` (on RHEL systems) or `ufw` (on Ubuntu systems, deactivated by default). If firewall software is not deactivated, you must instead configure your software to allow required communication between Apache Cloudberry hosts. + + +- For RHEL/Rocky Linux (firewalld) + + Check the status of `firewalld`: + + ```bash + systemctl status firewalld + ``` + + If `firewalld` is deactivated, the command output is: + + ``` + * firewalld.service - firewalld - dynamic firewall daemon + Loaded: loaded (/usr/lib/systemd/system/firewalld.service; disabled; vendor preset: enabled) + Active: inactive (dead) + ``` + + If necessary, run these commands as root to deactivate `firewalld`: + + ```bash + systemctl stop firewalld.service + systemctl disable firewalld.service + ``` + +- For Ubuntu (ufw) + + On Ubuntu, `ufw` is disabled by default. Verify the status with: + + ```bash + ufw status + ``` + + If the output is `Status: inactive`, no action is needed. If it is active, disable it: + + ```bash + ufw disable + ``` + +See the documentation for the firewall or your operating system for additional information. + +## Recommended OS parameters settings + +Apache Cloudberry requires that certain Linux operating system (OS) parameters be set on all hosts in your Apache Cloudberry system (coordinators and segments). + +In general, the following categories of system parameters need to be altered: + +- **Shared Memory** - An Apache Cloudberry instance will not work unless the shared memory segment for your kernel is properly sized. Most default OS installations have the shared memory values set too low for Apache Cloudberry. On Linux systems, you must also deactivate the OOM (out of memory) killer. +- **Network** - On high-volume Apache Cloudberry systems, certain network-related tuning parameters must be set to optimize network connections made by the Cloudberry interconnect. +- **User Limits** - User limits control the resources available to processes started by a user's shell. Apache Cloudberry requires a higher limit on the allowed number of file descriptors that a single process can have open. The default settings may cause some Apache Cloudberry queries to fail because they will run out of file descriptors needed to process the query. + +More specifically, you need to edit the following Linux configuration settings: + +- [The hosts file](#the-hosts-file) +- [The sysctl.conf file](#the-sysctlconf-file) +- [System resources limits](#system-resources-limits) +- [Core dump](#core-dump) +- [XFS mount options](#xfs-mount-options) +- [Disk I/O settings](#disk-io-settings) +- [Networking](#networking) +- [Transparent Huge Pages (THP)](#transparent-huge-pages-thp) +- [IPC object removal](#ipc-object-removal) +- [SSH connection threshold](#ssh-connection-threshold) + +### The hosts file + +Edit the `/etc/hosts` file and make sure that it includes the host names and all interface address names for every machine participating in your Apache Cloudberry system. + +First, use the `hostnamectl` command to set the hostname on each host. The hostname should only include letters, numbers, and the hyphen (`-`). It is recommended to use all lowercase letters. Each hostname must be globally unique across all hosts. For example: + +```bash +# Run on each host respectively +hostnamectl set-hostname cdw # on coordinator +hostnamectl set-hostname scdw # on standby coordinator +hostnamectl set-hostname sdw1 # on segment 1 +hostnamectl set-hostname sdw2 # on segment 2 +hostnamectl set-hostname sdw3 # on segment 3 + +# Verify (takes effect immediately) +hostname + +# Refresh the shell prompt to show the new hostname +exec bash +``` + +Then, edit the `/etc/hosts` file on all hosts to add the mappings of all host names to their IP addresses. For example: + +``` +192.168.1.1 cdw +192.168.1.2 scdw +192.168.1.3 sdw1 +192.168.1.4 sdw2 +192.168.1.5 sdw3 +``` + +### The sysctl.conf file + +The `sysctl.conf` parameters listed in this topic are for performance, optimization, and consistency in a wide variety of environments. Change these settings according to your specific situation and setup. + +Set the parameters in the `/etc/sysctl.conf` file and reload with `sysctl -p`: + +```conf +# kernel.shmall = _PHYS_PAGES / 2 # See Shared Memory Pages +kernel.shmall = 197951838 +# kernel.shmmax = kernel.shmall * PAGE_SIZE +kernel.shmmax = 810810728448 +kernel.shmmni = 4096 +vm.overcommit_memory = 2 # See Segment Host Memory +vm.overcommit_ratio = 95 # See Segment Host Memory + +net.ipv4.ip_local_port_range = 10000 65535 # See Port Settings +kernel.sem = 250 2048000 200 8192 +kernel.sysrq = 1 +kernel.core_uses_pid = 1 +kernel.msgmnb = 65536 +kernel.msgmax = 65536 +kernel.msgmni = 2048 +net.ipv4.tcp_syncookies = 1 +net.ipv4.conf.default.accept_source_route = 0 +net.ipv4.tcp_max_syn_backlog = 4096 +net.ipv4.conf.all.arp_filter = 1 +net.ipv4.ipfrag_high_thresh = 41943040 +net.ipv4.ipfrag_low_thresh = 31457280 +net.ipv4.ipfrag_time = 60 +net.core.netdev_max_backlog = 10000 +net.core.rmem_max = 2097152 +net.core.wmem_max = 2097152 +vm.swappiness = 10 +vm.zone_reclaim_mode = 0 +vm.dirty_expire_centisecs = 500 +vm.dirty_writeback_centisecs = 100 +vm.dirty_background_ratio = 0 # See System Memory +vm.dirty_ratio = 0 +vm.dirty_background_bytes = 1610612736 +vm.dirty_bytes = 4294967296 +``` + +#### Shared memory pages + +Apache Cloudberry uses shared memory to communicate between `postgres` processes that are part of the same `postgres` instance. `kernel.shmall` sets the total amount of shared memory, in pages, that can be used system wide. `kernel.shmmax` sets the maximum size of a single shared memory segment in bytes. + +Set `kernel.shmall` and `kernel.shmmax` values based on your system's physical memory and page size. In general, the value for both parameters should be one half of the system physical memory. + +Use the operating system variables `_PHYS_PAGES` and `PAGE_SIZE` to set the parameters. + +``` +kernel.shmall = ( _PHYS_PAGES / 2) +kernel.shmmax = ( _PHYS_PAGES / 2) * PAGE_SIZE +``` + +To calculate the values for `kernel.shmall` and `kernel.shmmax`, run the following commands using the `getconf` command, which returns the value of an operating system variable. + +```bash +$ echo $(expr $(getconf _PHYS_PAGES) / 2) +$ echo $(expr $(getconf _PHYS_PAGES) / 2 \* $(getconf PAGE_SIZE)) +``` + +As best practice, we recommend you set the following values in the `/etc/sysctl.conf` file using calculated values. For example, a host system has 1583 GB of memory installed and returns these values: `_PHYS_PAGES = 395903676` and `PAGE_SIZE = 4096`. These would be the `kernel.shmall` and `kernel.shmmax` values: + +``` +kernel.shmall = 197951838 +kernel.shmmax = 810810728448 +``` + +If the Apache Cloudberry coordinator has a different shared memory configuration than the segment hosts, the `_PHYS_PAGES` and `PAGE_SIZE` values might differ, and the `kernel.shmall` and `kernel.shmmax` values on the coordinator host will differ from those on the segment hosts. + +#### Segment host memory + +The `vm.overcommit_memory` Linux kernel parameter is used by the OS to determine how much memory can be allocated to processes. For Apache Cloudberry, this parameter should always be set to 2. + +`vm.overcommit_ratio` is the percent of RAM that is used for application processes and the remainder is reserved for the operating system. The default is 50 on Red Hat Enterprise Linux. + +`vm.overcommit_ratio` is calculated as follows: + +``` +vm.overcommit_ratio = (RAM - 0.026 * gp_vmem) / RAM +``` + +The calculation method of `gp_vmem` is as follows: + +``` +# If the system memory is less than 256 GB, use the following formula to calculate: +gp_vmem = ((SWAP + RAM) – (7.5GB + 0.05 * RAM)) / 1.7 + +# If the system memory is greater than or equal to 256 GB, use the following formula to calculate: +gp_vmem = ((SWAP + RAM) – (7.5GB + 0.05 * RAM)) / 1.17 + +# In the above formulas, SWAP is the swap space on the host, in GB. +# RAM is the size of the memory installed on the host, in GB. +``` + +#### Port settings + +To avoid port conflicts between Apache Cloudberry and other applications during initialization, make a note of the port range specified by the operating system parameter `net.ipv4.ip_local_port_range`. When initializing Apache Cloudberry using the `gpinitsystem` cluster configuration file, do not specify Apache Cloudberry ports in that range. + +For example, if `net.ipv4.ip_local_port_range = 10000 65535`, set the Apache Cloudberry base port numbers to these values. + +``` +PORT_BASE = 6000 +MIRROR_PORT_BASE = 7000 +``` + +#### IP fragmentation settings + +When the Apache Cloudberry interconnect uses UDP (the default), the network interface card controls IP packet fragmentation and reassemblies. + +If the UDP message size is larger than the size of the maximum transmission unit (MTU) of a network, the IP layer fragments the message. (Refer to [Networking](#networking) later in this topic for more information about MTU sizes for Apache Cloudberry.) The receiver must store the fragments in a buffer before it can reorganize and reassemble the message. + +The following `sysctl.conf` operating system parameters control the reassembly process: + +| OS Parameter | Description | +|---|---| +| `net.ipv4.ipfrag_high_thresh` | The maximum amount of memory used to reassemble IP fragments before the kernel starts to remove fragments to free up resources. The default value is 4194304 bytes (4MB). | +| `net.ipv4.ipfrag_low_thresh` | The minimum amount of memory used to reassemble IP fragments. The default value is 3145728 bytes (3MB). (Deprecated after kernel version 4.17.) | +| `net.ipv4.ipfrag_time` | The maximum amount of time (in seconds) to keep an IP fragment in memory. The default value is 30. | + +The recommended settings for these parameters for Apache Cloudberry follow: + +```conf +net.ipv4.ipfrag_high_thresh = 41943040 +net.ipv4.ipfrag_low_thresh = 31457280 +net.ipv4.ipfrag_time = 60 +``` + +#### System memory + +For host systems with more than 64GB of memory, these settings are recommended: + +```conf +vm.dirty_background_ratio = 0 +vm.dirty_ratio = 0 +vm.dirty_background_bytes = 1610612736 # 1.5GB +vm.dirty_bytes = 4294967296 # 4GB +``` + +For host systems with 64GB of memory or less, remove `vm.dirty_background_bytes` and `vm.dirty_bytes` and set the two `ratio` parameters to these values: + +```conf +vm.dirty_background_ratio = 3 +vm.dirty_ratio = 10 +``` + +Increase `vm.min_free_kbytes` to ensure `PF_MEMALLOC` requests from network and storage drivers are easily satisfied. This is especially critical on systems with large amounts of system memory. The default value is often far too low on these systems. Use this awk command to set `vm.min_free_kbytes` to a recommended 3% of system physical memory: + +```bash +awk 'BEGIN {OFMT = "%.0f";} /MemTotal/ {print "vm.min_free_kbytes =", $2 * .03;}' /proc/meminfo >> /etc/sysctl.conf +``` + +Do not set `vm.min_free_kbytes` to higher than 5% of system memory as doing so might cause out of memory conditions. + +### System resources limits + +Set the following parameters in the `/etc/security/limits.conf` file: + +``` +* soft nofile 524288 +* hard nofile 524288 +* soft nproc 131072 +* hard nproc 131072 +``` + +For Red Hat Enterprise Linux (RHEL) systems, parameter values in the `/etc/security/limits.d/20-nproc.conf` file override the values in the `limits.conf` file. Ensure that any parameters in the override file are set to the required value. The Linux module `pam_limits` sets user limits by reading the values from the `limits.conf` file and then from the override file. For information about PAM and user limits, see the documentation on PAM and `pam_limits`. + +Run the `ulimit -u` command on each segment host to display the maximum number of processes that are available to each user. Validate that the return value is 131072. + +### Core dump + +Enable core file generation to a known location by adding the following line to `/etc/sysctl.conf`: + +```conf +kernel.core_pattern=/var/core/core.%h.%t +``` + +Add the following line to `/etc/security/limits.conf`: + +``` +* soft core unlimited +``` + +To apply the changes to the live kernel, run the following command: + +```bash +# sysctl -p +``` + +### XFS mount options + +XFS is the preferred data storage file system on Linux platforms for Apache Cloudberry data directories. Use the `mount` command with the following recommended XFS mount options: + +``` +rw,nodev,noatime,inode64 +``` + +:::note +The `nobarrier` option is not supported on RHEL 8/9 or Ubuntu systems. Use only the options `rw,nodev,noatime,inode64`. +::: + +:::tip +If you are using cloud VMs with only a single system disk (no additional data disk), you can skip the disk formatting and mounting steps below. Simply create the data directory on the existing filesystem: + +```bash +mkdir -p /data +chown -R gpadmin:gpadmin /data +``` +::: + +If you have a dedicated data disk, set up XFS file mounting in the `/etc/fstab` file. Adjust the device path according to your actual situation: + +```bash +mkdir -p /data +mkfs.xfs -f /dev/sdb + +# Use device name directly +echo "/dev/sdb /data xfs rw,nodev,noatime,inode64 0 0" >> /etc/fstab +# Or use UUID for stable identification across reboots (recommended for cloud/VM environments) +# echo "UUID=$(blkid -s UUID -o value /dev/vdb1) /data xfs rw,nodev,noatime,inode64 0 0" >> /etc/fstab + +mount /data +chown -R gpadmin:gpadmin /data +``` + +Run the following command to check whether the mounting is successful: + +```bash +df -h +``` + +### Disk I/O settings + +#### Read-ahead value + +Each disk device file should have a read-ahead (`blockdev`) value of 16384. To verify the read-ahead value of a disk device: + +```bash +# sudo /sbin/blockdev --getra +``` + +For example: + +```bash +# sudo /sbin/blockdev --getra /dev/sdb +``` + +To set blockdev (read-ahead) on a device: + +```bash +# sudo /sbin/blockdev --setra +``` + +For example: + +```bash +# sudo /sbin/blockdev --setra 16384 /dev/sdb +``` + +:::note +The `blockdev --setra` command is not persistent. You must ensure the read-ahead value is set whenever the system restarts. How to set the value will vary based on your system. +::: + +One method to set the `blockdev` value at system startup is by adding the `/sbin/blockdev --setra` command in the `rc.local` file. For example, add this line to the `rc.local` file to set the read-ahead value for the disk `sdb`. + +``` +/sbin/blockdev --setra 16384 /dev/sdb +``` + +On systems that use systemd, you must also set the execute permissions on the `rc.local` file to enable it to run at startup. For example, on a RHEL system, this command sets execute permissions on the file. + +```bash +# chmod +x /etc/rc.d/rc.local +``` + +Restart the system to have the setting take effect. + +#### Disk I/O scheduler + +The Linux disk scheduler orders the I/O requests submitted to a storage device, controlling the way the kernel commits reads and writes to disk. + +A typical Linux disk I/O scheduler supports multiple access policies. The optimal policy selection depends on the underlying storage infrastructure. The recommended scheduler policy settings for Apache Cloudberry systems for specific OSs and storage device types follow: + +| Storage Device Type | OS | Recommended Scheduler Policy | +|---|---|---| +| Non-Volatile Memory Express (NVMe) | RHEL 8/9, Ubuntu | `none` | +| Solid-State Drives (SSD) | RHEL 8/9, Ubuntu | `none` | +| Other | RHEL 8/9, Ubuntu | `mq-deadline` | + +To specify a scheduler until the next system reboot, run the following: + +```bash +# echo schedulername > /sys/block//queue/scheduler +``` + +For example: + +```bash +# echo mq-deadline > /sys/block/sdb/queue/scheduler +``` + +:::note +Using the `echo` command to set the disk I/O scheduler policy is not persistent; you must ensure that you run the command whenever the system reboots. How to run the command will vary based on your system. +::: + +To specify the I/O scheduler at boot time on systems that use `grub2`, you use the system utility `grubby` on RHEL or edit the Grub configure file directly on Ubuntu. + +To permanently set the I/O scheduler at boot time: + +- For RHEL/Rocky Linux — use `grubby`. This command adds the parameter when run as `root`: + + ```bash + # grubby --update-kernel=ALL --args="elevator=mq-deadline" + ``` + + This `grubby` command displays kernel parameter settings: + + ```bash + # grubby --info=ALL + ``` + +- For Ubuntu — edit `/etc/default/grub` and run `update-grub`: + + ```bash + # sed -i 's/GRUB_CMDLINE_LINUX="\(.*\)"/GRUB_CMDLINE_LINUX="\1 elevator=mq-deadline"/' /etc/default/grub + + # update-grub + ``` + +After adding the parameter, reboot the system. + +### Networking + +The maximum transmission unit (MTU) of a network specifies the size (in bytes) of the largest data packet/frame accepted by a network-connected device. A jumbo frame is a frame that contains more than the standard MTU of 1500 bytes. + +Apache Cloudberry utilizes 3 distinct MTU settings: + +- The Apache Cloudberry `gp_max_packet_size` server configuration parameter. The default max packet size is 8192. This default assumes a jumbo frame MTU. +- The operating system MTU setting. +- The rack switch MTU setting. + +These settings are connected, in that they should always be either the same, or close to the same, value, or otherwise in the order of Apache Cloudberry < OS < switch for MTU size. + +9000 is a common supported setting for switches, and is the recommended OS and rack switch MTU setting for your Apache Cloudberry hosts. + +### Transparent Huge Pages (THP) + +Deactivate Transparent Huge Pages (THP) as it degrades Apache Cloudberry performance. + +First, check the current THP status: + +```bash +cat /sys/kernel/mm/*transparent_hugepage/enabled +``` + +If the output shows `[never]`, THP is already disabled. Otherwise, disable it permanently: + + - For RHEL/Rocky Linux — use `grubby`. This command adds the parameter when run as root: + + ```bash + # grubby --update-kernel=ALL --args="transparent_hugepage=never" + ``` + + - For Ubuntu — edit `/etc/default/grub` and run `update-grub`: + + ```bash + # sed -i 's/GRUB_CMDLINE_LINUX="\(.*\)"/GRUB_CMDLINE_LINUX="\1 transparent_hugepage=never"/' /etc/default/grub + + # update-grub + ``` + +After adding the parameter, reboot the system. + +This cat command checks the state of THP. The output indicates that THP is deactivated: + +```bash +cat /sys/kernel/mm/*transparent_hugepage/enabled +# Expected output: always madvise [never] +``` + +### IPC object removal + +Deactivate IPC object removal. The default `systemd` setting `RemoveIPC=yes` removes IPC connections when non-system user accounts log out. This causes the Apache Cloudberry utility `gpinitsystem` to fail with semaphore errors. Perform one of the following to avoid this issue. + +- When you add the `gpadmin` operating system user account to the coordinator node in [Creating the Apache Cloudberry Administrative User](#creating-the-cloudberry-administrative-user), create the user as a system account. +- Deactivate `RemoveIPC`. Set this parameter in `/etc/systemd/logind.conf` on the Apache Cloudberry host systems. + + ``` + RemoveIPC=no + ``` + + The setting takes effect after restarting the `systemd-login` service or rebooting the system. To restart the service, run this command as the root user. + + ```bash + service systemd-logind restart + ``` + +### SSH connection threshold + +Certain Apache Cloudberry management utilities including `gpexpand`, `gpinitsystem`, and `gpaddmirrors`, use secure shell (SSH) connections between systems to perform their tasks. In large Apache Cloudberry deployments, cloud deployments, or deployments with a large number of segments per host, these utilities may exceed the host's maximum threshold for unauthenticated connections. When this occurs, you receive errors such as: `ssh_exchange_identification: Connection closed by remote host`. + +To increase this connection threshold for your Apache Cloudberry system, update the SSH `MaxStartups` and `MaxSessions` configuration parameters in the `/etc/ssh/sshd_config` SSH daemon configuration file. + +:::note +You must have root permission to edit these two files. +::: + +If you specify `MaxStartups` and `MaxSessions` using a single integer value, you identify the maximum number of concurrent unauthenticated connections (`MaxStartups`) and maximum number of open shell, login, or subsystem sessions permitted per network connection (`MaxSessions`). For example: + +``` +MaxStartups 200 +MaxSessions 200 +``` + +If you specify `MaxStartups` using the "start:rate:full" syntax, you enable random early connection drop by the SSH daemon. `start` identifies the maximum number of unauthenticated SSH connection attempts allowed. Once `start` number of unauthenticated connection attempts is reached, the SSH daemon refuses `rate` percent of subsequent connection attempts. `full` identifies the maximum number of unauthenticated connection attempts after which all attempts are refused. For example: + +``` +MaxStartups 10:30:200 +MaxSessions 200 +``` + +Restart the SSH daemon after you update `MaxStartups` and `MaxSessions`: + +```bash +# service sshd restart +``` + +## Synchronizing system clocks + +You must use NTP (Network Time Protocol) to synchronize the system clocks on all hosts that comprise your Apache Cloudberry system. Accurate time keeping is essential to ensure reliable operations on the database and data integrity. + +There are many different architectures you may choose from to implement NTP. We recommend you use one of the following: + +- Configure coordinator as the NTP primary source and the other hosts in the cluster connect to it. +- Configure an external NTP primary source and all hosts in the cluster connect to it. + +Depending on your operating system version, the NTP protocol may be implemented by the `ntpd` daemon, the `chronyd` daemon, or other. Refer to your preferred NTP protocol documentation for more details. + +### Option 1: Configure system clocks with the coordinator as the primary source + +1. On the coordinator host, log in as root and edit your NTP daemon configuration file. Set the `server` parameter to point to your data center's NTP time server. For example (if `10.6.220.20` was the IP address of your data center's NTP server): + + ``` + server 10.6.220.20 + ``` + +2. On each segment host, log in as root and edit your NTP daemon configuration file. Set the first `server` parameter to point to the coordinator host, and the second server parameter to point to the standby coordinator host. For example: + + ``` + server cdw prefer + server scdw + ``` + +3. On the standby coordinator host, log in as root and edit your NTP daemon configuration file. Set the first `server` parameter to point to the primary coordinator host, and the second server parameter to point to your data center's NTP time server. For example: + + ``` + server cdw prefer + server 10.6.220.20 + ``` + +4. Synchronize the system clocks on all Apache Cloudberry hosts as root. + + If you are using the `ntpd` daemon: + + ```bash + systemctl restart ntpd + ``` + + If you are using the `chronyd` daemon: + + ```bash + # For RHEL/Rocky Linux + systemctl restart chronyd + + # For Ubuntu (restart works with either name, but enable requires 'chrony') + systemctl restart chronyd + ``` + +### Option 2: Configure system clocks with an external primary source + +1. On each host, including coordinator, standby coordinator, and segments, log in as root and edit your NTP daemon configuration file. Set the first `server` parameter to point to your data center's NTP time server. For example (if `10.6.220.20` was the IP address of your data center's NTP server): + + ``` + server 10.6.220.20 + ``` + +2. On the coordinator host, use your NTP daemon to synchronize the system clocks on all Apache Cloudberry hosts. For example, using `gpssh`: + + If you are using the `ntpd` daemon: + + ```bash + gpssh -f hostfile_gpssh_allhosts -v -e 'systemctl restart ntpd' + ``` + + If you are using the `chronyd` daemon: + + ```bash + gpssh -f hostfile_gpssh_allhosts -v -e 'systemctl restart chronyd' + ``` + +## Creating the Cloudberry administrative user + +Create a dedicated operating system user account on each node to run and administer Apache Cloudberry. This user account is named `gpadmin` by convention. + +:::important +You cannot run the Apache Cloudberry server as `root`. +::: + +The `gpadmin` user must have permission to access the services and directories required to install and run Apache Cloudberry. + +The `gpadmin` user on each Apache Cloudberry host must have an SSH key pair installed and be able to SSH from any host in the cluster to any other host in the cluster without entering a password or passphrase (called "passwordless SSH"). If you enable passwordless SSH from the coordinator host to every other host in the cluster ("1-*n* passwordless SSH"), you can use the Apache Cloudberry `gpssh-exkeys` command-line utility later to enable passwordless SSH from every host to every other host ("*n*-*n* passwordless SSH"). + +You can optionally give the `gpadmin` user sudo privilege, so that you can easily administer all hosts in the Apache Cloudberry cluster as `gpadmin` using the `sudo`, `ssh/rsync`, and `gpssh/gpsync` commands. + +The following steps show how to set up the `gpadmin` user on a host, set a password, create an SSH key pair, and (optionally) enable sudo capability. These steps must be performed as root on every Apache Cloudberry cluster host. (For a large Apache Cloudberry cluster you will want to automate these steps using your system provisioning tools.) + +1. Create the `gpadmin` group and user. + + :::note + Make sure the `gpadmin` user has the same user id (uid) and group id (gid) numbers on each host to prevent problems with scripts or services that use them for identity or permissions. For example, backing up Apache Cloudberry databases to some networked file systems or storage appliances could fail if the `gpadmin` user has different uid or gid numbers on different segment hosts. When you create the `gpadmin` group and user, you can use the `groupadd -g` option to specify a gid number and the `useradd -u` option to specify the uid number. Use the command `id gpadmin` to see the uid and gid for the `gpadmin` user on the current host. + ::: + + This example creates the `gpadmin` group, creates the `gpadmin` user as a system account with a home directory and as a member of the `gpadmin` group, and creates a password for the user. + + ```bash + # groupadd gpadmin + # useradd gpadmin -r -m -g gpadmin + # passwd gpadmin + New password: + Retype new password: + ``` + +2. Switch to the `gpadmin` user and generate an SSH key pair for the `gpadmin` user. + + ```bash + $ su - gpadmin + $ ssh-keygen -t rsa -b 4096 + Generating public/private rsa key pair. + Enter file in which to save the key (/home/gpadmin/.ssh/id_rsa): + Created directory '/home/gpadmin/.ssh'. + Enter passphrase (empty for no passphrase): + Enter same passphrase again: + ``` + + At the passphrase prompts, press Enter so that SSH connections will not require entry of a passphrase. + +3. Grant sudo access to the `gpadmin` user. + + **For RHEL/Rocky Linux** — run `visudo` and uncomment the `%wheel` group entry: + + ``` + %wheel ALL=(ALL) NOPASSWD: ALL + ``` + + Make sure you uncomment the line that has the `NOPASSWD` keyword. Then add `gpadmin` to the `wheel` group: + + ```bash + # usermod -aG wheel gpadmin + ``` + + **For Ubuntu** — add `gpadmin` to the `sudo` group and create a sudoers drop-in file for passwordless sudo: + + ```bash + usermod -aG sudo gpadmin + echo "gpadmin ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/gpadmin + chmod 440 /etc/sudoers.d/gpadmin + ``` diff --git a/versioned_docs/version-2.x/deployment/quick-deploy.md b/versioned_docs/version-2.x/deployment/quick-deploy.md new file mode 100644 index 00000000000..f0744d5b4f5 --- /dev/null +++ b/versioned_docs/version-2.x/deployment/quick-deploy.md @@ -0,0 +1,485 @@ +--- +title: Quick Deployment Guide +--- + +# Quick Deployment Guide + +This guide provides a streamlined, copy-paste-friendly procedure for experienced administrators. This guide shows how to deploy a 5-node Apache Cloudberry cluster: + +- 1 Coordinator (`cdw`) +- 1 Standby Coordinator (`scdw`) +- 3 Segments (`sdw1`, `sdw2`, `sdw3`), each with 2 primary and 2 mirror instances + +:::info +For detailed explanations of each step, refer to the full deployment guides: +[Configuring Your Systems](./prepare-to-deploy.md), [Installing Using RPM/DEB Package](./install_cloudberry.md), [Creating the Data Storage Areas](./create_data_dirs.md), and [Initializing Apache Cloudberry](./init_cloudberry.md). +::: + +## Cluster layout + +| Host | Role | IP (example) | +|------|------|--------------| +| cdw | Coordinator | 192.168.1.1 | +| scdw | Standby Coordinator | 192.168.1.2 | +| sdw1 | Segment 1 | 192.168.1.3 | +| sdw2 | Segment 2 | 192.168.1.4 | +| sdw3 | Segment 3 | 192.168.1.5 | + +Data directories used: + +- Coordinator/Standby: `/data/coordinator` +- Segment primary: `/data/primary` (2 instances per host) +- Segment mirror: `/data/mirror` (2 instances per host) + +--- + +## 1. Configuring your systems (as root on all hosts) + +### 1.1 Deactivate SELinux + +:::note +This step applies to RHEL/Oracle/Rocky Linux only. On Ubuntu, SELinux is not installed by default and this step can be skipped. +::: + +```bash +# Check current status +sestatus + +# If not disabled, deactivate it +sed -i 's/^SELINUX=.*/SELINUX=disabled/' /etc/selinux/config + +# Reboot is required to take effect +``` + +### 1.2 Deactivate firewall + +**For RHEL/Rocky Linux (firewalld):** + +```bash +# Check current status +systemctl status firewalld + +# If not disabled, deactivate it +systemctl stop firewalld.service +systemctl disable firewalld.service +``` + +**For Ubuntu (ufw):** + +```bash +# Check current status (disabled by default) +ufw status + +# If active, disable it +ufw disable +``` + +### 1.3 Set the hosts file + +Set the hostname on each host (takes effect immediately, no reboot required): + +```bash +# Run on each host respectively +hostnamectl set-hostname cdw # on coordinator +hostnamectl set-hostname scdw # on standby coordinator +hostnamectl set-hostname sdw1 # on segment 1 +hostnamectl set-hostname sdw2 # on segment 2 +hostnamectl set-hostname sdw3 # on segment 3 + +# Verify +hostname + +# Refresh the shell prompt to show the new hostname +exec bash +``` + +Edit `/etc/hosts` on all hosts: + +```bash +cat >> /etc/hosts <> /etc/sysctl.conf <> /etc/security/limits.conf <> /etc/fstab +# Or use UUID for stable identification across reboots (recommended for cloud/VM environments) +# echo "UUID=$(blkid -s UUID -o value /dev/vdb1) /data xfs rw,nodev,noatime,inode64 0 0" >> /etc/fstab + +mount /data +``` + +### 1.7 Disk I/O settings + +```bash +# Set read-ahead value +/sbin/blockdev --setra 16384 /dev/sdb + +# Persist read-ahead across reboots +echo '/sbin/blockdev --setra 16384 /dev/sdb' >> /etc/rc.d/rc.local +chmod +x /etc/rc.d/rc.local + +# Set I/O scheduler permanently (for non-NVMe/SSD disks) +# For RHEL/Rocky Linux: +grubby --update-kernel=ALL --args="elevator=mq-deadline" +# For Ubuntu: +sed -i 's/GRUB_CMDLINE_LINUX="\(.*\)"/GRUB_CMDLINE_LINUX="\1 elevator=mq-deadline"/' /etc/default/grub && update-grub +``` + +### 1.8 Disable Transparent Huge Pages + +```bash +# Check current THP status +cat /sys/kernel/mm/*transparent_hugepage/enabled + +# If not [never], disable it permanently +# For RHEL/Rocky Linux: +grubby --update-kernel=ALL --args="transparent_hugepage=never" +# For Ubuntu: +sed -i 's/GRUB_CMDLINE_LINUX="\(.*\)"/GRUB_CMDLINE_LINUX="\1 transparent_hugepage=never"/' /etc/default/grub && update-grub + +# Reboot is required to take effect +``` + +### 1.9 Disable IPC object removal + +```bash +sed -i 's/^#RemoveIPC=.*/RemoveIPC=no/' /etc/systemd/logind.conf +service systemd-logind restart +``` + +### 1.10 SSH connection threshold + +```bash +sed -i 's/^#MaxStartups.*/MaxStartups 10:30:200/' /etc/ssh/sshd_config +sed -i 's/^#MaxSessions.*/MaxSessions 200/' /etc/ssh/sshd_config +service sshd restart +``` + +### 1.11 Synchronize system clocks + +```bash +# For RHEL/Rocky Linux +systemctl enable chronyd +systemctl restart chronyd +chronyc tracking + +# For Ubuntu (enable must use 'chrony', restart works with either name) +systemctl enable chrony +systemctl restart chronyd +chronyc tracking +``` + +### 1.12 Create the gpadmin administrative user + +```bash +groupadd gpadmin +useradd gpadmin -r -m -g gpadmin +passwd gpadmin + +# Grant passwordless sudo +# For RHEL/Rocky Linux: uncomment the %wheel NOPASSWD line in sudoers +# Or use visudo to uncomment: %wheel ALL=(ALL) NOPASSWD: ALL +sed -i 's/^# %wheel\tALL=(ALL)\tNOPASSWD: ALL/%wheel\tALL=(ALL)\tNOPASSWD: ALL/' /etc/sudoers +usermod -aG wheel gpadmin + +# For Ubuntu: add gpadmin to sudo group and create a sudoers drop-in file +usermod -aG sudo gpadmin +echo "gpadmin ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/gpadmin +chmod 440 /etc/sudoers.d/gpadmin + +# Set data directory ownership to gpadmin +chown -R gpadmin:gpadmin /data +``` + +:::note +Make sure the `gpadmin` user has the same UID/GID across all hosts. Use `id gpadmin` to verify, or pass `-g ` to `groupadd` and `-u ` to `useradd` to set fixed values. +::: + +--- + +## 2. Install the Apache Cloudberry package (as root on each host) + +Download the package from [Apache Cloudberry Releases](https://cloudberry.apache.org/releases) to `/home/gpadmin/` on every host, then install: + +```bash +# For RPM (Rocky Linux, RHEL, etc.) +dnf install /home/gpadmin/apache-cloudberry-db-incubating-*.rpm + +# For DEB (Ubuntu) +# apt install --fix-broken /home/gpadmin/apache-cloudberry-db-incubating-*.deb + +# Set ownership +chown -R gpadmin:gpadmin /usr/local/cloudberry* +``` + +--- + +## 3. Enable passwordless SSH (as gpadmin on cdw) + +```bash +su - gpadmin +source /usr/local/cloudberry-db/cloudberry-env.sh + +# Generate SSH key (press Enter at passphrase prompts) +ssh-keygen -t rsa -b 4096 -N "" -f ~/.ssh/id_rsa + +# Enable 1-n passwordless SSH (will prompt for gpadmin password) +ssh-copy-id cdw +ssh-copy-id scdw +ssh-copy-id sdw1 +ssh-copy-id sdw2 +ssh-copy-id sdw3 + +# Create the host file for gpssh-exkeys +cat > ~/hostfile_exkeys < ~/hostfile_gpinitsystem < ~/gpconfigs/gpinitsystem_config < Apache Cloudberry instance successfully created. +``` + +:::tip +The `-s scdw` option initializes the standby coordinator during cluster creation. If you omit `-s` during `gpinitsystem`, you can initialize the standby separately afterwards: + +```bash +gpinitstandby -s scdw +``` + +Verify the standby is synchronized: + +```bash +gpstate -f +``` +::: + +### 5.4 Set the timezone (optional) + +```bash +gpconfig -c TimeZone -v 'US/Pacific' +gpstop -ra +``` + +### 5.5 Set environment variables + +```bash +cat >> ~/.bashrc < NVMe - RHEL 7 - none RHEL 8 @@ -276,8 +278,6 @@ The disk type, operating system, and scheduling policies of Apache Cloudberry ar SSD - RHEL 7 - noop RHEL 8 @@ -289,8 +289,6 @@ The disk type, operating system, and scheduling policies of Apache Cloudberry ar Other - RHEL 7 - deadline RHEL 8 @@ -398,29 +396,35 @@ systemctl status chronyd ### Step 2. Install Apache Cloudberry -1. Download the RPM package to the home directory of `gpadmin`. +:::info +Starting from Apache Cloudberry 2.1, RPM and DEB packages are officially provided. RPM packages support Rocky Linux 8/9, RHEL 8/9, and compatible distributions. DEB packages support Ubuntu 22.04. +::: - ```bash - wget -P /home/gpadmin - ``` +1. Download the package to the home directory of `gpadmin`. -2. Install the RPM package in the `/home/gpadmin` directory. +2. Install the package in the `/home/gpadmin` directory. - When running the following command, you need to replace `` with the actual RPM package path, as the `root` user. During the installation, the directory `/usr/local/cloudberry/` is automatically created. + When running the following command, you need to replace `` with the actual package path, as the `root` user. During the installation, the directory `/usr/local/cloudberry-db/` is automatically created. ```bash cd /home/gpadmin - yum install + + # For RPM (Rocky Linux, RHEL, etc.) + dnf install + # Or for older systems: yum install + + # For DEB (Ubuntu) + apt install + # Or alternatively: dpkg -i && apt-get install -f ``` -3. Grant the `gpadmin` user the permission to access the `/usr/local/cloudberry/` directory. +3. Grant the `gpadmin` user the permission to access the `/usr/local/cloudberry-db/` directory. ```bash - chown -R gpadmin:gpadmin /usr/local chown -R gpadmin:gpadmin /usr/local/cloudberry* ``` -4. Configure local SSH connection for the node. As the `gpadmin ` user, perform the following operations: +4. Configure local SSH connection for the node. As the `gpadmin` user, perform the following operations: ```bash ssh-keygen diff --git a/versioned_docs/version-2.x/deployment/validate.md b/versioned_docs/version-2.x/deployment/validate.md new file mode 100644 index 00000000000..bc3faf01aae --- /dev/null +++ b/versioned_docs/version-2.x/deployment/validate.md @@ -0,0 +1,72 @@ +--- +title: Validating Hardware and Network +--- + +Cloudberry provides a management utility called [gpcheckperf](/sys-utilities/gpcheckperf.md), which can be used to identify hardware and system-level issues on the machines in your Apache Cloudberry array. `gpcheckperf` starts a session on the specified hosts and runs the following performance tests: + +- Network Performance (`gpnetbench*`) +- Disk I/O Performance (`dd` test) +- Memory Bandwidth (`stream` test) + +Before using `gpcheckperf`, you must have a trusted host setup between the hosts involved in the performance test. You can use the utility [gpssh-exkeys](/sys-utilities/gpssh-exkeys.md) to update the known host files and exchange public keys between hosts if you have not done so already. Note that `gpcheckperf` calls to [gpssh](/sys-utilities/gpssh.md) and [gpsync](/sys-utilities/gpsync.md), so these Apache Cloudberry utilities must be in your `$PATH`. + +## Validating Network Performance + +To test network performance, run `gpcheckperf` with one of the network test run options: parallel pair test (`-r N`), serial pair test (`-r n`), or full matrix test (`-r M`). The utility runs a network benchmark program that transfers a 5 second stream of data from the current host to each remote host included in the test. By default, the data is transferred in parallel to each remote host and the minimum, maximum, average and median network transfer rates are reported in megabytes (MB) per second. If the summary transfer rate is slower than expected (less than 100 MB/s), you can run the network test serially using the `-r n` option to obtain per-host results. To run a full-matrix bandwidth test, you can specify `-r M` which will cause every host to send and receive data from every other host specified. This test is best used to validate if the switch fabric can tolerate a full-matrix workload. + +Most systems in a Apache Cloudberry array are configured with multiple network interface cards (NICs), each NIC on its own subnet. When testing network performance, it is important to test each subnet individually. For example, considering the following network configuration of two NICs per host: + +|Cloudberry Host|Subnet1 NICs|Subnet2 NICs| +|--------------|------------|------------| +|Segment 1|sdw1-1|sdw1-2| +|Segment 2|sdw2-1|sdw2-2| +|Segment 3|sdw3-1|sdw3-2| + +You would create four distinct host files for use with the `gpcheckperf` network test: + +|hostfile_gpchecknet_ic1|hostfile_gpchecknet_ic2| +|-------------------------|-------------------------| +|sdw1-1|sdw1-2| +|sdw2-1|sdw2-2| +|sdw3-1|sdw3-2| + +You would then run `gpcheckperf` once per subnet. For example (if testing an *even* number of hosts, run in parallel pairs test mode): + +``` +$ gpcheckperf -f hostfile_gpchecknet_ic1 -r N -d /tmp > subnet1.out +$ gpcheckperf -f hostfile_gpchecknet_ic2 -r N -d /tmp > subnet2.out +``` + +If you have an *odd* number of hosts to test, you can run in serial test mode (`-r n`). + +## Validating Disk I/O and Memory Bandwidth + +To test disk and memory bandwidth performance, run `gpcheckperf` with the disk and stream test run options (`-r ds`). The disk test uses the `dd` command (a standard UNIX utility) to test the sequential throughput performance of a logical disk or file system. The memory test uses the STREAM benchmark program to measure sustainable memory bandwidth. Results are reported in MB per second (MB/s). + +### Running the disk and stream tests + +1. Log in on the coordinator host as the `gpadmin` user. +2. Source the `cloudberry-env.sh` path file from your Apache Cloudberry installation. For example: + + ``` + $ source /usr/local/cloudberry-db/cloudberry-env.sh + ``` + +3. Create a host file named `hostfile_gpcheckperf` that has one host name per segment host. Do not include the coordinator host. For example: + + ``` + sdw1 + sdw2 + sdw3 + sdw4 + ``` + +4. Run the `gpcheckperf` utility using the `hostfile_gpcheckperf` file you just created. Use the `-d` option to specify the file systems you want to test on each host (you must have write access to these directories). You will want to test all primary and mirror segment data directory locations. For example: + + ``` + $ gpcheckperf -f hostfile_gpcheckperf -r ds -D \ +   -d /data1/primary -d /data2/primary \ +   -d /data1/mirror -d /data2/mirror + ``` + +5. The utility may take a while to perform the tests as it is copying very large files between the hosts. When it is finished you will see the summary results for the Disk Write, Disk Read, and Stream tests. diff --git a/versioned_docs/version-2.x/cbdb-architecture.md b/versioned_docs/version-2.x/introduction/cbdb-architecture.md similarity index 98% rename from versioned_docs/version-2.x/cbdb-architecture.md rename to versioned_docs/version-2.x/introduction/cbdb-architecture.md index 606f24ba441..b9a3efce19e 100644 --- a/versioned_docs/version-2.x/cbdb-architecture.md +++ b/versioned_docs/version-2.x/introduction/cbdb-architecture.md @@ -16,7 +16,7 @@ From users' view, Apache Cloudberry is a complete relational database management The architecture diagram of Apache Cloudberry is as follows: -![Apache Cloudberry Architecture](./media/cbdb-arch.png) +![Apache Cloudberry Architecture](../media/cbdb-arch.png) - **Coordinator node** (or control node) is the gateway to the Apache Cloudberry system, which accepts client connections and SQL queries, and allocates tasks to data node instances. Users interact with Apache Cloudberry by connecting to the coordinator node using a client program (such as psql) or an application programming interface (API) (such as JDBC, ODBC, or libpq PostgreSQL C API). - The coordinator node acts as the global system directory, containing a set of system tables that record the metadata of Apache Cloudberry. diff --git a/versioned_docs/version-2.x/cbdb-overview.md b/versioned_docs/version-2.x/introduction/cbdb-overview.md similarity index 100% rename from versioned_docs/version-2.x/cbdb-overview.md rename to versioned_docs/version-2.x/introduction/cbdb-overview.md diff --git a/versioned_docs/version-2.x/cbdb-scenarios.md b/versioned_docs/version-2.x/introduction/cbdb-scenarios.md similarity index 100% rename from versioned_docs/version-2.x/cbdb-scenarios.md rename to versioned_docs/version-2.x/introduction/cbdb-scenarios.md diff --git a/versioned_docs/version-2.x/cbdb-vs-gp-features.md b/versioned_docs/version-2.x/introduction/cbdb-vs-gp-features.md similarity index 100% rename from versioned_docs/version-2.x/cbdb-vs-gp-features.md rename to versioned_docs/version-2.x/introduction/cbdb-vs-gp-features.md diff --git a/versioned_docs/version-2.x/performance/memory-overview.md b/versioned_docs/version-2.x/performance/memory-overview.md index aa89106fe17..7b99160d4ad 100644 --- a/versioned_docs/version-2.x/performance/memory-overview.md +++ b/versioned_docs/version-2.x/performance/memory-overview.md @@ -34,7 +34,7 @@ Host memory is the total memory shared by all applications on the segment host. - Allocate swap space to increase the size of virtual memory. - Adjust the kernel parameter `vm.overcommit_ratio` to configure how the operating system handles large memory allocation requests. -The physical RAM and OS configuration are usually managed by the platform team and system administrators. See the [Software and Hardware Requirements](../cbdb-op-software-hardware.md) for the recommended kernel parameters and for how to set the `/etc/sysctl.conf` file parameters. +The physical RAM and OS configuration are usually managed by the platform team and system administrators. See the [Software and Hardware Requirements](../deployment/platform-requirements.md) for the recommended kernel parameters and for how to set the `/etc/sysctl.conf` file parameters. The amount of memory to reserve for the operating system and other processes is workload dependent. The minimum recommendation for operating system memory is 32GB, but if there is much concurrency in Apache Cloudberry, increasing to 64GB of reserved memory may be required. The largest user of operating system memory is SLAB, which increases as Apache Cloudberry concurrency and the number of sockets used increases. diff --git a/versioned_docs/version-2.x/sys-admin/configure-proxy.md b/versioned_docs/version-2.x/sys-admin/configure-proxy.md index fdb24b33269..364e01ba86b 100644 --- a/versioned_docs/version-2.x/sys-admin/configure-proxy.md +++ b/versioned_docs/version-2.x/sys-admin/configure-proxy.md @@ -6,7 +6,7 @@ title: Configure Proxies for the Interconnect You can configure a Cloudberry system to use proxies for interconnect communication to reduce the use of connections and ports during query processing. -The Cloudberry *interconnect* (the networking layer) refers to the inter-process communication between segments and the network infrastructure on which this communication relies. For information about the Cloudberry architecture and interconnect, see [About the Cloudberry Architecture](../cbdb-architecture.md). +The Cloudberry *interconnect* (the networking layer) refers to the inter-process communication between segments and the network infrastructure on which this communication relies. For information about the Cloudberry architecture and interconnect, see [About the Cloudberry Architecture](../introduction/cbdb-architecture.md). In general, when running a query, a QD (query dispatcher) on the Cloudberry coordinator creates connections to one or more QE (query executor) processes on segments, and a QE can create connections to other QEs. For a description of Cloudberry query processing and parallel query processing, see [About Cloudberry Query Processing](../performance/optimize-queries/parallel-query-execution.md). diff --git a/versioned_docs/version-2.x/sys-admin/expand-cluster/prepare-and-add-hosts.md b/versioned_docs/version-2.x/sys-admin/expand-cluster/prepare-and-add-hosts.md index d94e7875654..32245a5cdbc 100644 --- a/versioned_docs/version-2.x/sys-admin/expand-cluster/prepare-and-add-hosts.md +++ b/versioned_docs/version-2.x/sys-admin/expand-cluster/prepare-and-add-hosts.md @@ -13,7 +13,7 @@ Run performance tests first on the new hosts and then all hosts. Run the tests o Generally, you should run performance tests when an administrator modifies host networking or other special conditions in the system. For example, if you will run the expanded system on two network clusters, run tests on each cluster. :::note -Preparing host systems for use by a Apache Cloudberry system assumes that the new hosts' operating system has been properly configured to match the existing hosts, described in [Configuring Your Systems](../../cbdb-op-software-hardware.md#supported-os). +Preparing host systems for use by a Apache Cloudberry system assumes that the new hosts' operating system has been properly configured to match the existing hosts, described in [Configuring Your Systems](../../deployment/platform-requirements#supported-os). ::: ## Add new hosts to the trusted host environment diff --git a/versioned_docs/version-2.x/sys-admin/high-availability/enable-coordinator-mirroring.md b/versioned_docs/version-2.x/sys-admin/high-availability/enable-coordinator-mirroring.md index 5b7c7664eef..1cb5eef7254 100644 --- a/versioned_docs/version-2.x/sys-admin/high-availability/enable-coordinator-mirroring.md +++ b/versioned_docs/version-2.x/sys-admin/high-availability/enable-coordinator-mirroring.md @@ -33,7 +33,7 @@ Make sure that you have already configured a standby coordinator on a different :::note -If you follow the steps described in the [Prepare to Deploy](../../cbdb-op-prepare-to-deploy.md) and [Deploy Apache Cloudberry Manually Using RPM Package](../../cbdb-op-deploy-guide.md) topics to deploy the cluster, a host for the standby coordinator ( `cbdb-standbycoordinator`) is already configured in the cluster. +If you follow the steps described in the [Prepare to Deploy](../../deployment/prepare-to-deploy.md) and [Deploy Apache Cloudberry Manually Using RPM Package](../../deployment/install_cloudberry.md) topics to deploy the cluster, a host for the standby coordinator ( `scdw`) is already configured in the cluster. ::: @@ -41,15 +41,15 @@ If you follow the steps described in the [Prepare to Deploy](../../cbdb-op-prepa You need to first enable the standby coordinator using the `gpinitstandby` utility: -1. Run the `gpinitstandby` utility on the currently active primary coordinator (`cbdb-coordinator`) host to add a standby coordinator host to your CBDB cluster. For example: +1. Run the `gpinitstandby` utility on the currently active primary coordinator (`scdw`) host to add a standby coordinator host to your CBDB cluster. For example: ```shell - $ gpinitstandby -s cbdb-standbycoordinator + $ gpinitstandby -s scdw ``` The `-s` option specifies the standby coordinator hostname. - You will be prompted with the following message when the initialization is completed: `-Successfully created standby coordinator on cbdb-coordinator`. + You will be prompted with the following message when the initialization is completed: `-Successfully created standby coordinator on scdw`. 2. You can run the `gpstate` utility with the `-f` option to display details of the standby coordinator host. @@ -114,13 +114,13 @@ Take the steps below to configure the failed primary coordinator to become a sta You can remove the backup directory once the standby is successfully configured. -3. Initialize a standby coordinator on the original coordinator host. For example, run this command from the current coordinator host, `cbdb-standbycoordinator`: +3. Initialize a standby coordinator on the original coordinator host. For example, run this command from the current coordinator host, `scdw`: ```shell - $ gpinitstandby -s cbdb-coordinator + $ gpinitstandby -s scdw ``` -4. After the initialization is completed, check the status of the standby coordinator `cbdb-coordinator`. Run `gpstate` with the `-f` option to check the standby coordinator status: +4. After the initialization is completed, check the status of the standby coordinator `scdw`. Run `gpstate` with the `-f` option to check the standby coordinator status: ```shell $ gpstate -f diff --git a/versioned_docs/version-2.x/sys-admin/high-availability/enable-segment-mirroring.md b/versioned_docs/version-2.x/sys-admin/high-availability/enable-segment-mirroring.md index 4a32699a104..c4865fd7d71 100644 --- a/versioned_docs/version-2.x/sys-admin/high-availability/enable-segment-mirroring.md +++ b/versioned_docs/version-2.x/sys-admin/high-availability/enable-segment-mirroring.md @@ -24,7 +24,7 @@ During the online data replication process, Apache Cloudberry should be in a qui ## To add segment mirrors to an existing system (different hosts from primaries) -1. Ensure the Apache Cloudberry software is installed on all hosts. See the [Apache Cloudberry Installation Guide](../../cbdb-op-deploy-guide.md) for detailed installation instructions. +1. Ensure the Apache Cloudberry software is installed on all hosts. See the [Apache Cloudberry Installation Guide](../../deployment/index.md) for detailed installation instructions. 2. Allocate the data storage area for mirror data, and tablespaces if needed, on all segment hosts. 3. Use `gpssh-exkeys` to ensure the segment hosts can SSH and remote sync to each other without a password prompt. 4. Create a configuration file that lists the host names, ports, and data directories on which to create mirrors. To create a sample configuration file to use as a starting point, run: diff --git a/versioned_docs/version-2.x/sys-admin/use-compression.md b/versioned_docs/version-2.x/sys-admin/use-compression.md index 0ed1d829901..96423afb498 100644 --- a/versioned_docs/version-2.x/sys-admin/use-compression.md +++ b/versioned_docs/version-2.x/sys-admin/use-compression.md @@ -14,4 +14,4 @@ You can configure support for data compression with these features and utilities - Workfiles (temporary spill files that are created when running a query that requires more memory than it is allocated) can be compressed. See the server configuration parameter `gp_workfile_compression`. - The Apache Cloudberry utilities [`gpbackup`](../sys-utilities/gpbackup.md), [`gprestore`](../sys-utilities/gprestore.md), [`gpload`](../sys-utilities/gpload.md), and [`gplogfilter`](../sys-utilities/gplogfilter.md) support compression. -For some compression algorithms (such as zlib) Apache Cloudberry requires software packages installed on the host system. For information about required software packages, see the [Apache Cloudberry Installation Guide](../cbdb-op-software-hardware.md). +For some compression algorithms (such as zlib) Apache Cloudberry requires software packages installed on the host system. For information about required software packages, see the [Apache Cloudberry Installation Guide](../deployment/index.md). diff --git a/versioned_docs/version-2.x/sys-utilities/gpdemo.md b/versioned_docs/version-2.x/sys-utilities/gpdemo.md index 6c848f0aa5b..3857d8007d5 100644 --- a/versioned_docs/version-2.x/sys-utilities/gpdemo.md +++ b/versioned_docs/version-2.x/sys-utilities/gpdemo.md @@ -52,7 +52,7 @@ NUM_PRIMARY_MIRROR_PAIRS=3 gpdemo :::info - Each segment node consists of a primary node and a mirror node. So every time the parameter value increases by `1`, 2 more nodes will be created. To better capture data distribution issues, it is recommended to set the value to an odd number. -- When the parameter value is set to 0, a single-computing-node cluster is deployed. See [Deploy Apache Cloudberry with a Single Computing Node](/docs/deploy-cbdb-with-single-node.md) for details. +- When the parameter value is set to 0, a single-computing-node cluster is deployed. See [Deploy Apache Cloudberry with a Single Computing Node](../deployment/single-node.md) for details. ::: #### Specify the data directory of a node diff --git a/versioned_docs/version-2.x/sys-utilities/gpinitsystem.md b/versioned_docs/version-2.x/sys-utilities/gpinitsystem.md index 273224a0f03..6ccd804dc01 100644 --- a/versioned_docs/version-2.x/sys-utilities/gpinitsystem.md +++ b/versioned_docs/version-2.x/sys-utilities/gpinitsystem.md @@ -372,4 +372,4 @@ gp7c~gp7c-2~50000~/data/mirror1/gpseg1~5~1 ## See also -[gpssh-exkeys](/docs/sys-utilities/gpssh-exkeys.md), [gpdeletesystem](/docs/sys-utilities/gpdeletesystem.md), [Start and Stop Apache Cloudberry](/docs/start-and-stop-cbdb-database.md) +[gpssh-exkeys](/docs/sys-utilities/gpssh-exkeys.md), [gpdeletesystem](/docs/sys-utilities/gpdeletesystem.md), [Start and Stop Apache Cloudberry](../database-basic/start-and-stop-cbdb-database.md) diff --git a/versioned_docs/version-2.x/tutorials/best-practices/index.md b/versioned_docs/version-2.x/tutorials/best-practices/index.md index 9d823cc2bc1..a48296d5fee 100644 --- a/versioned_docs/version-2.x/tutorials/best-practices/index.md +++ b/versioned_docs/version-2.x/tutorials/best-practices/index.md @@ -115,7 +115,7 @@ See [Distributions](./schema-design-best-practices.md). - Ensure that resource queue memory allocations do not exceed the setting for `gp_vmem_protect_limit`. - Dynamically update resource queue settings to match daily operations flow. -See [Setting the Cloudberry Recommended OS Parameters](../../deployment/system-settings.md). +See [Setting the Cloudberry Recommended OS Parameters](../../build/system-settings.md). ## Partitioning diff --git a/versioned_docs/version-2.x/tutorials/best-practices/resource-group-best-practices.md b/versioned_docs/version-2.x/tutorials/best-practices/resource-group-best-practices.md index 875f08324bd..81584d6e734 100644 --- a/versioned_docs/version-2.x/tutorials/best-practices/resource-group-best-practices.md +++ b/versioned_docs/version-2.x/tutorials/best-practices/resource-group-best-practices.md @@ -22,7 +22,7 @@ The following operating system and Apache Cloudberry memory settings are signifi - **vm.overcommit_memory** - This Linux kernel parameter, set in [`/etc/sysctl.conf`](../../cbdb-op-prepare-to-deploy.md#set-system-parameters), identifies the method that the operating system uses to determine how much memory can be allocated to processes. `vm.overcommit_memory` must always be set to 2 for Apache Cloudberry systems. + This Linux kernel parameter, set in [`/etc/sysctl.conf`](../../deployment/prepare-to-deploy#the-sysctlconf-file), identifies the method that the operating system uses to determine how much memory can be allocated to processes. `vm.overcommit_memory` must always be set to 2 for Apache Cloudberry systems. - **vm.overcommit_ratio** diff --git a/versioned_docs/version-2.x/tutorials/best-practices/system-configuration-best-practices.md b/versioned_docs/version-2.x/tutorials/best-practices/system-configuration-best-practices.md index bf4af96dd40..1b0002cba04 100644 --- a/versioned_docs/version-2.x/tutorials/best-practices/system-configuration-best-practices.md +++ b/versioned_docs/version-2.x/tutorials/best-practices/system-configuration-best-practices.md @@ -25,11 +25,11 @@ You must restart Apache Cloudberry after changing the timezone. The command `gps ## Configure the file system -XFS is the file system used for Apache Cloudberry data directories. Use the mount options described in [Configuring Your Systems](../../cbdb-op-prepare-to-deploy.md). +XFS is the file system used for Apache Cloudberry data directories. Use the mount options described in [Configuring Your Systems](../../deployment/prepare-to-deploy.md). ## Configure ports -See the [recommended OS parameter settings](../../cbdb-op-prepare-to-deploy.md#set-system-parameters) for further details. +See the [recommended OS parameter settings](../../deployment/prepare-to-deploy#the-sysctlconf-file) for further details. Set up `ip_local_port_range` so it does not conflict with the Apache Cloudberry port ranges. For example, setting this range in `/etc/sysctl.conf`: @@ -44,7 +44,7 @@ PORT_BASE = 6000 MIRROR_PORT_BASE = 7000 ``` -See the [Recommended OS Parameters Settings](../../cbdb-op-prepare-to-deploy.md#set-system-parameters) for further details. +See the [Recommended OS Parameters Settings](../../deployment/prepare-to-deploy#the-sysctlconf-file) for further details. ## Configure I/O diff --git a/versioned_sidebars/version-2.x-sidebars.json b/versioned_sidebars/version-2.x-sidebars.json index a471b17978c..82d0d2f987f 100644 --- a/versioned_sidebars/version-2.x-sidebars.json +++ b/versioned_sidebars/version-2.x-sidebars.json @@ -4,49 +4,65 @@ "type": "category", "label": "Introduction", "items": [ - "cbdb-overview", - "cbdb-architecture", - "cbdb-scenarios", - "cbdb-vs-gp-features" + "introduction/cbdb-overview", + "introduction/cbdb-architecture", + "introduction/cbdb-scenarios", + "introduction/cbdb-vs-gp-features" ] }, { "type": "category", - "label": "Deploy and Build", + "label": "Build Apache Cloudberry", "items": [ { "type": "category", "label": "Build from Source (Complete Guide)", "link": { "type": "doc", - "id": "deployment/index" + "id": "build/index" }, "items": [ - "deployment/quick-build", - "deployment/create-gpadmin-user", - "deployment/system-settings", - "deployment/install-required-packages", - "deployment/download-source-code", - "deployment/configure", - "deployment/build-and-install", - "deployment/set-demo-cluster", - "deployment/post-installation" + "build/quick-build", + "build/create-gpadmin-user", + "build/system-settings", + "build/install-required-packages", + "build/download-source-code", + "build/configure", + "build/build-and-install", + "build/set-demo-cluster", + "build/post-installation" ] }, - "deployment/build-based-on-docker", - { - "type": "category", - "label": "Deploy on Physical or Virtual Machine", - "items": [ - "cbdb-op-software-hardware", - "cbdb-op-prepare-to-deploy", - "cbdb-op-deploy-guide", - "deploy-cbdb-with-single-node" - ] - }, - "deployment/sandbox" + "build/build-based-on-docker", + "build/sandbox" ] }, + { + "type": "category", + "label": "Deploy in Production", + "link": { + "type": "doc", + "id": "deployment/index" + }, + "items": [ + "deployment/quick-deploy", + "deployment/platform-requirements", + "deployment/capacity_planning", + "deployment/prepare-to-deploy", + "deployment/install_cloudberry", + "deployment/create_data_dirs", + "deployment/validate", + "deployment/init_cloudberry", + "deployment/login_cloudberry", + "deployment/single-node", + "deployment/ansible-example" + ] + }, + { + "type": "category", + "label": "Basic Database Operations", + "items": ["database-basic/create-and-manage-database", "database-basic/start-and-stop-cbdb-database", "database-basic/connect-to-cbdb"] + }, { "type": "category", "label": "Load Data", @@ -79,15 +95,6 @@ "data-loading/handle-data-errors" ] }, - { - "type": "category", - "label": "Create and Prepare", - "items": [ - "operate-with-data/operate-with-db-objects/create-and-manage-database", - "start-and-stop-cbdb-database", - "connect-to-cbdb" - ] - }, { "type": "category", "label": "Operate with Data",