diff --git a/crates/bin/ampctl/src/cmd/manifest/generate.rs b/crates/bin/ampctl/src/cmd/manifest/generate.rs index 7f8c7a5c8..803a089fe 100644 --- a/crates/bin/ampctl/src/cmd/manifest/generate.rs +++ b/crates/bin/ampctl/src/cmd/manifest/generate.rs @@ -14,13 +14,15 @@ //! Note: Derived datasets are not supported for automatic generation as they //! require custom SQL transformation definitions. //! -//! # Configuration +//! # Configuration (CLI-only flags) //! -//! - Dataset Kind: `--kind` flag or `GM_KIND` env var -//! - Network: `--network` flag or `GM_NETWORK` env var -//! - Output: `--out` flag or `GM_OUT` env var (optional, defaults to stdout) -//! - Start Block: `--start-block` flag or `GM_START_BLOCK` env var (optional, defaults to 0) -//! - Finalized Blocks Only: `--finalized-blocks-only` flag or `GM_FINALIZED_BLOCKS_ONLY` env var +//! All parameters are CLI-only flags with no environment variable equivalents: +//! +//! - `--kind` — Dataset kind (required) +//! - `--network` — Network name (required) +//! - `-o, --out` — Output file/directory (optional, defaults to stdout) +//! - `--start-block` — Starting block number (optional, defaults to 0) +//! - `--finalized-blocks-only` — Only include finalized blocks use std::path::PathBuf; @@ -40,26 +42,26 @@ use solana_datasets::SolanaDatasetKind; #[derive(Debug, clap::Args)] pub struct Args { /// Kind of the dataset (evm-rpc, firehose, solana). - #[arg(long, required = true, env = "GM_KIND")] + #[arg(long, required = true)] pub kind: DatasetKindStr, /// The name of the network. - #[arg(long, required = true, env = "GM_NETWORK")] + #[arg(long, required = true)] pub network: NetworkId, /// Output file or directory. If it's a directory, the generated file name will /// match the `kind` parameter. /// /// If not specified, the manifest will be printed to stdout. - #[arg(short, long, env = "GM_OUT")] + #[arg(short, long)] pub out: Option, /// The starting block number for the dataset. Defaults to 0. - #[arg(long, env = "GM_START_BLOCK")] + #[arg(long)] pub start_block: Option, /// Only include finalized block data. - #[arg(long, env = "GM_FINALIZED_BLOCKS_ONLY")] + #[arg(long)] pub finalized_blocks_only: bool, } diff --git a/crates/bin/ampd/src/main.rs b/crates/bin/ampd/src/main.rs index bdee4ed8b..a4a48a4be 100644 --- a/crates/bin/ampd/src/main.rs +++ b/crates/bin/ampd/src/main.rs @@ -28,41 +28,67 @@ struct Args { #[derive(Debug, Clone, clap::Subcommand)] enum Command { - /// Run Amp in local development mode with all services + /// All-in-one mode: server, controller and worker in a single process. + /// + /// Starts Flight, JSONL, and Admin API endpoints plus an embedded worker. + /// If any --*-server flag is set, only those endpoints are enabled. + /// Intended for local development, testing, and CI — not production. #[command(alias = "dev")] Solo { - /// Directory for Amp daemon state and data. Defaults to `/.amp/`. - /// If `--config` is not provided, searches for `config.toml` within this directory. + /// Amp state directory containing data, manifests, and providers. + /// + /// Defaults to `/.amp/`. When --config is not provided, looks + /// for `config.toml` inside this directory. #[arg(long, env = "AMP_DIR")] amp_dir: Option, - /// Enable Arrow Flight RPC Server. - #[arg(long, env = "FLIGHT_SERVER")] + /// Enable the Arrow Flight gRPC endpoint (port 1602). + /// + /// When any --*-server flag is set, only flagged endpoints start. + #[arg(long, env = "AMP_FLIGHT_SERVER")] flight_server: bool, - /// Enable JSON Lines Server. - #[arg(long, env = "JSONL_SERVER")] + /// Enable the JSON Lines HTTP endpoint (port 1603). + /// + /// When any --*-server flag is set, only flagged endpoints start. + #[arg(long, env = "AMP_JSONL_SERVER")] jsonl_server: bool, - /// Enable Admin API Server. - #[arg(long, env = "ADMIN_SERVER")] + /// Enable the Admin API HTTP endpoint (port 1610). + /// + /// When any --*-server flag is set, only flagged endpoints start. + #[arg(long, env = "AMP_ADMIN_SERVER")] admin_server: bool, }, - /// Run query server (Arrow Flight, JSON Lines) + /// Query server exposing Flight and JSONL endpoints. + /// + /// Requires --config. If any --*-server flag is set, only those + /// endpoints are enabled; otherwise both start. Server { - /// Enable Arrow Flight RPC Server. - #[arg(long, env = "FLIGHT_SERVER")] + /// Enable the Arrow Flight gRPC endpoint (port 1602). + #[arg(long, env = "AMP_FLIGHT_SERVER")] flight_server: bool, - /// Enable JSON Lines Server. - #[arg(long, env = "JSONL_SERVER")] + /// Enable the JSON Lines HTTP endpoint (port 1603). + #[arg(long, env = "AMP_JSONL_SERVER")] jsonl_server: bool, }, - /// Run a distributed worker node + /// Extraction worker that executes scheduled data ingestion jobs. + /// + /// Requires --config and --node-id. Registers with the metadata database, + /// listens for job assignments, and writes Parquet files to storage. Worker { - /// The node id of the worker. + /// Unique identifier for this worker instance. + /// + /// Used for job assignment, heartbeat tracking, and log correlation. #[arg(long, env = "AMP_NODE_ID")] node_id: String, }, - /// Run the controller with Admin API + /// Controller providing the Admin API for job scheduling and management. + /// + /// Requires --config. Exposes a REST API (port 1610) for dataset + /// registration, job control, worker monitoring, and storage queries. Controller, - /// Run migrations on the metadata database + /// Apply pending schema migrations to the metadata database and exit. + /// + /// Requires --config. Connects to PostgreSQL, runs any unapplied + /// migrations, then terminates. Safe to run multiple times. Migrate, } @@ -111,6 +137,22 @@ async fn main_inner() -> Result<(), Error> { mut jsonl_server, mut admin_server, } => { + // Backward compat: FLIGHT_SERVER -> AMP_FLIGHT_SERVER + if !flight_server && matches!(env::var("FLIGHT_SERVER").as_deref(), Ok("true" | "1")) { + eprintln!("env var FLIGHT_SERVER is deprecated, use AMP_FLIGHT_SERVER instead"); + flight_server = true; + } + // Backward compat: JSONL_SERVER -> AMP_JSONL_SERVER + if !jsonl_server && matches!(env::var("JSONL_SERVER").as_deref(), Ok("true" | "1")) { + eprintln!("env var JSONL_SERVER is deprecated, use AMP_JSONL_SERVER instead"); + jsonl_server = true; + } + // Backward compat: ADMIN_SERVER -> AMP_ADMIN_SERVER + if !admin_server && matches!(env::var("ADMIN_SERVER").as_deref(), Ok("true" | "1")) { + eprintln!("env var ADMIN_SERVER is deprecated, use AMP_ADMIN_SERVER instead"); + admin_server = true; + } + if !flight_server && !jsonl_server && !admin_server { flight_server = true; jsonl_server = true; @@ -153,6 +195,17 @@ async fn main_inner() -> Result<(), Error> { mut flight_server, mut jsonl_server, } => { + // Backward compat: FLIGHT_SERVER -> AMP_FLIGHT_SERVER + if !flight_server && matches!(env::var("FLIGHT_SERVER").as_deref(), Ok("true" | "1")) { + eprintln!("env var FLIGHT_SERVER is deprecated, use AMP_FLIGHT_SERVER instead"); + flight_server = true; + } + // Backward compat: JSONL_SERVER -> AMP_JSONL_SERVER + if !jsonl_server && matches!(env::var("JSONL_SERVER").as_deref(), Ok("true" | "1")) { + eprintln!("env var JSONL_SERVER is deprecated, use AMP_JSONL_SERVER instead"); + jsonl_server = true; + } + // If neither of the flags are set, enable both servers if !flight_server && !jsonl_server { flight_server = true; diff --git a/docs/features/app-ampctl.md b/docs/features/app-ampctl.md index da3b16c9b..2736ffede 100644 --- a/docs/features/app-ampctl.md +++ b/docs/features/app-ampctl.md @@ -31,16 +31,16 @@ ampctl communicates with the Amp controller's Admin API, providing CLI access to ### Capabilities -| Capability | Description | -|------------|-------------| -| Dataset Management | List, register, deploy datasets | -| Job Control | Monitor, stop, delete extraction jobs | +| Capability | Description | +|------------------------|-----------------------------------------------------------| +| Dataset Management | List, register, deploy datasets | +| Job Control | Monitor, stop, delete extraction jobs | | Table Revision Control | Activate, deactivate, and manage table revision lifecycle | -| Storage Management | Query storage locations and file metadata | -| Provider Configuration | Manage EVM RPC and Firehose sources | -| Worker Monitoring | List workers and check heartbeat status | -| Schema Analysis | Validate SQL queries and infer schemas | -| Data Verification | Verify dataset integrity | +| Storage Management | Query storage locations and file metadata | +| Provider Configuration | Manage EVM RPC and Firehose sources | +| Worker Monitoring | List workers and check heartbeat status | +| Schema Analysis | Validate SQL queries and infer schemas | +| Data Verification | Verify dataset integrity | Run `ampctl --help` for a complete list of supported commands. @@ -113,6 +113,14 @@ ampctl dataset list ampctl dataset list --json ``` +### Configuration Reference + +| Flag | Env Var | Default | Description | +|----------------|------------------|-------------------------|------------------------| +| `--admin-url` | `AMP_ADMIN_URL` | `http://localhost:1610` | Admin API URL | +| `--auth-token` | `AMP_AUTH_TOKEN` | — | Bearer auth token | +| `--json` | — | Human readable | JSON output (CLI-only) | + ## References - [admin](admin.md) - Related: Administration overview diff --git a/docs/features/app-ampd-controller.md b/docs/features/app-ampd-controller.md index 2ebb9bd24..a1742426c 100644 --- a/docs/features/app-ampd-controller.md +++ b/docs/features/app-ampd-controller.md @@ -2,6 +2,7 @@ name: "app-ampd-controller" description: "ampd controller for job scheduling and admin API. Load when asking about controller, job scheduling, or admin API" type: feature +status: "stable" components: "app:ampd,service:controller,crate:config" --- diff --git a/docs/features/app-ampd-migrate.md b/docs/features/app-ampd-migrate.md new file mode 100644 index 000000000..5c9c88d4c --- /dev/null +++ b/docs/features/app-ampd-migrate.md @@ -0,0 +1,70 @@ +--- +name: "app-ampd-migrate" +description: "ampd migrate command for metadata database migrations. Load when asking about database migrations, schema upgrades, or ampd migrate" +type: feature +status: "stable" +components: "app:ampd,crate:metadata-db,crate:config" +--- + +# ampd Migrate + +## Summary + +The `ampd migrate` command runs database migrations on the metadata database. It connects to the configured PostgreSQL instance, applies any pending schema migrations, and exits. This is a one-shot administrative command used during upgrades or initial setup. + +## Table of Contents + +1. [Key Concepts](#key-concepts) +2. [Configuration](#configuration) +3. [Usage](#usage) +4. [References](#references) + +## Key Concepts + +- **Metadata Database**: PostgreSQL database storing job state, worker registrations, dataset definitions, and table revisions +- **Schema Migration**: Automatic application of pending DDL changes to bring the database schema up to date +- **One-Shot Command**: Unlike other ampd subcommands, `migrate` runs to completion and exits + +## Configuration + +`ampd migrate` requires `--config` (or `AMP_CONFIG`) to be provided. The config file must contain a valid +`metadata_db.url` (or the `AMP_CONFIG_METADATA_DB__URL` environment variable must be set). + +| Setting | Source | Description | +|-------------------|----------------------------------------------|------------------------------| +| `metadata_db.url` | Config file or `AMP_CONFIG_METADATA_DB__URL` | PostgreSQL connection string | + +## Usage + +### Running Migrations + +```bash +# Apply pending migrations using a config file +ampd --config .amp/config.toml migrate + +# Using the AMP_CONFIG environment variable +export AMP_CONFIG=.amp/config.toml +ampd migrate +``` + +### When to Run + +- **Initial setup**: Before starting any ampd service for the first time +- **After upgrades**: When a new ampd version includes schema changes +- **CI/CD pipelines**: As a pre-deployment step + +### Auto-Migration Alternative + +In non-production environments, the metadata database connection can be configured with +`auto_migrate = true` in the config file (defaults to `true` in solo mode). When enabled, services automatically apply migrations on startup, making the explicit +`migrate` command unnecessary. + +```toml +[metadata_db] +url = "postgresql://localhost/amp" +auto_migrate = true +``` + +## References + +- [app-ampd](app-ampd.md) - Base: ampd daemon overview diff --git a/docs/features/app-ampd-server.md b/docs/features/app-ampd-server.md index 3303fb333..dc2179ff9 100644 --- a/docs/features/app-ampd-server.md +++ b/docs/features/app-ampd-server.md @@ -89,13 +89,28 @@ This command does not create directories itself; it relies on the configured pat ## Usage +### Endpoint Selection + +By default, `ampd server` starts both Flight and JSONL endpoints. Use `--flight-server` and `--jsonl-server` flags (or their env var equivalents `AMP_FLIGHT_SERVER` / `AMP_JSONL_SERVER`) to selectively enable only specific endpoints: + +```bash +# Start only the Flight endpoint +ampd server --flight-server + +# Start only the JSONL endpoint +ampd server --jsonl-server + +# Via environment variables +AMP_FLIGHT_SERVER=true ampd server +``` + ### Starting the Server ```bash -# Start ampd with default server configuration -ampd +# Start ampd with default server configuration (both endpoints) +ampd server -# The query server starts automatically with both endpoints: +# The query server starts with both endpoints: # - Arrow Flight on port 1602 # - JSON Lines on port 1603 ``` diff --git a/docs/features/app-ampd-solo.md b/docs/features/app-ampd-solo.md index c8c151f2c..ad6b1acce 100644 --- a/docs/features/app-ampd-solo.md +++ b/docs/features/app-ampd-solo.md @@ -2,7 +2,7 @@ name: "app-ampd-solo" description: "ampd solo mode for local development and testing. Load when asking about solo mode, development setup, or single-node deployments" type: feature -status: "unstable" +status: "stable" components: "app:ampd,service:server,service:controller,service:worker" --- @@ -81,6 +81,14 @@ ampd solo --flight-server --admin-server ampd solo --jsonl-server ``` +These flags can also be set via environment variables: + +```bash +export AMP_FLIGHT_SERVER=true +export AMP_JSONL_SERVER=true +export AMP_ADMIN_SERVER=true +``` + **Note:** The embedded worker always runs regardless of flags. ### When to Use Solo Mode diff --git a/docs/features/app-ampd-worker.md b/docs/features/app-ampd-worker.md index f21881efd..f8981d6e6 100644 --- a/docs/features/app-ampd-worker.md +++ b/docs/features/app-ampd-worker.md @@ -2,6 +2,7 @@ name: "app-ampd-worker" description: "ampd worker for blockchain data extraction. Load when asking about workers, extraction jobs, or data ingestion" type: feature +status: "stable" components: "app:ampd,service:worker,crate:config" --- @@ -50,9 +51,9 @@ For detailed deployment patterns, see [Operational Modes](../modes.md). ## Configuration -| Setting | Default | Description | -|---------|---------|-------------| -| `node_id` | Required | Unique worker identifier | +| Setting | Flag | Env Var | Default | Description | +|-----------|-------------|---------------|----------|--------------------------| +| `node_id` | `--node-id` | `AMP_NODE_ID` | Required | Unique worker identifier | ### Environment Variables diff --git a/docs/features/app-ampd.md b/docs/features/app-ampd.md index 792692786..e9d4c50d6 100644 --- a/docs/features/app-ampd.md +++ b/docs/features/app-ampd.md @@ -2,6 +2,7 @@ name: "app-ampd" description: "ampd daemon for data extraction, transformation, and query serving. Load when asking about ampd, operational modes, or the Amp daemon" type: meta +status: "stable" components: "app:ampd" --- @@ -29,12 +30,20 @@ ampd supports multiple operational modes and deployment patterns. For detailed i ### Quick Reference -| Command | Description | -|---------|-------------| -| `ampd solo` | Single-node mode for development | -| `ampd server` | Query server (Flight + JSONL) | -| `ampd controller` | Job scheduling and admin API | -| `ampd worker` | Data extraction worker | +| Command | Description | +|-------------------|----------------------------------| +| `ampd solo` | Single-node mode for development | +| `ampd server` | Query server (Flight + JSONL) | +| `ampd controller` | Job scheduling and admin API | +| `ampd worker` | Data extraction worker | + +### Global CLI Options + +| Flag | Env Var | Description | +|------------|--------------|-------------------------| +| `--config` | `AMP_CONFIG` | Configuration file path | + +For subcommand-specific options, see the individual mode documentation. ## References