Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 13 additions & 11 deletions crates/bin/ampctl/src/cmd/manifest/generate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,15 @@
//! Note: Derived datasets are not supported for automatic generation as they
//! require custom SQL transformation definitions.
//!
//! # Configuration
//! # Configuration (CLI-only flags)
//!
//! - Dataset Kind: `--kind` flag or `GM_KIND` env var
//! - Network: `--network` flag or `GM_NETWORK` env var
//! - Output: `--out` flag or `GM_OUT` env var (optional, defaults to stdout)
//! - Start Block: `--start-block` flag or `GM_START_BLOCK` env var (optional, defaults to 0)
//! - Finalized Blocks Only: `--finalized-blocks-only` flag or `GM_FINALIZED_BLOCKS_ONLY` env var
//! All parameters are CLI-only flags with no environment variable equivalents:
//!
//! - `--kind` — Dataset kind (required)
//! - `--network` — Network name (required)
//! - `-o, --out` — Output file/directory (optional, defaults to stdout)
//! - `--start-block` — Starting block number (optional, defaults to 0)
//! - `--finalized-blocks-only` — Only include finalized blocks

use std::path::PathBuf;

Expand All @@ -40,26 +42,26 @@ use solana_datasets::SolanaDatasetKind;
#[derive(Debug, clap::Args)]
pub struct Args {
/// Kind of the dataset (evm-rpc, firehose, solana).
#[arg(long, required = true, env = "GM_KIND")]
#[arg(long, required = true)]
pub kind: DatasetKindStr,

/// The name of the network.
#[arg(long, required = true, env = "GM_NETWORK")]
#[arg(long, required = true)]
pub network: NetworkId,

/// Output file or directory. If it's a directory, the generated file name will
/// match the `kind` parameter.
///
/// If not specified, the manifest will be printed to stdout.
#[arg(short, long, env = "GM_OUT")]
#[arg(short, long)]
pub out: Option<PathBuf>,

/// The starting block number for the dataset. Defaults to 0.
#[arg(long, env = "GM_START_BLOCK")]
#[arg(long)]
pub start_block: Option<u64>,

/// Only include finalized block data.
#[arg(long, env = "GM_FINALIZED_BLOCKS_ONLY")]
#[arg(long)]
pub finalized_blocks_only: bool,
}

Expand Down
89 changes: 71 additions & 18 deletions crates/bin/ampd/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,41 +28,67 @@ struct Args {

#[derive(Debug, Clone, clap::Subcommand)]
enum Command {
/// Run Amp in local development mode with all services
/// All-in-one mode: server, controller and worker in a single process.
///
/// Starts Flight, JSONL, and Admin API endpoints plus an embedded worker.
/// If any --*-server flag is set, only those endpoints are enabled.
/// Intended for local development, testing, and CI — not production.
#[command(alias = "dev")]
Solo {
/// Directory for Amp daemon state and data. Defaults to `<cwd>/.amp/`.
/// If `--config` is not provided, searches for `config.toml` within this directory.
/// Amp state directory containing data, manifests, and providers.
///
/// Defaults to `<cwd>/.amp/`. When --config is not provided, looks
/// for `config.toml` inside this directory.
#[arg(long, env = "AMP_DIR")]
amp_dir: Option<PathBuf>,
/// Enable Arrow Flight RPC Server.
#[arg(long, env = "FLIGHT_SERVER")]
/// Enable the Arrow Flight gRPC endpoint (port 1602).
///
/// When any --*-server flag is set, only flagged endpoints start.
#[arg(long, env = "AMP_FLIGHT_SERVER")]
flight_server: bool,
/// Enable JSON Lines Server.
#[arg(long, env = "JSONL_SERVER")]
/// Enable the JSON Lines HTTP endpoint (port 1603).
///
/// When any --*-server flag is set, only flagged endpoints start.
#[arg(long, env = "AMP_JSONL_SERVER")]
jsonl_server: bool,
/// Enable Admin API Server.
#[arg(long, env = "ADMIN_SERVER")]
/// Enable the Admin API HTTP endpoint (port 1610).
///
/// When any --*-server flag is set, only flagged endpoints start.
#[arg(long, env = "AMP_ADMIN_SERVER")]
admin_server: bool,
},
/// Run query server (Arrow Flight, JSON Lines)
/// Query server exposing Flight and JSONL endpoints.
///
/// Requires --config. If any --*-server flag is set, only those
/// endpoints are enabled; otherwise both start.
Server {
/// Enable Arrow Flight RPC Server.
#[arg(long, env = "FLIGHT_SERVER")]
/// Enable the Arrow Flight gRPC endpoint (port 1602).
#[arg(long, env = "AMP_FLIGHT_SERVER")]
flight_server: bool,
/// Enable JSON Lines Server.
#[arg(long, env = "JSONL_SERVER")]
/// Enable the JSON Lines HTTP endpoint (port 1603).
#[arg(long, env = "AMP_JSONL_SERVER")]
jsonl_server: bool,
},
/// Run a distributed worker node
/// Extraction worker that executes scheduled data ingestion jobs.
///
/// Requires --config and --node-id. Registers with the metadata database,
/// listens for job assignments, and writes Parquet files to storage.
Worker {
/// The node id of the worker.
/// Unique identifier for this worker instance.
///
/// Used for job assignment, heartbeat tracking, and log correlation.
#[arg(long, env = "AMP_NODE_ID")]
node_id: String,
},
/// Run the controller with Admin API
/// Controller providing the Admin API for job scheduling and management.
///
/// Requires --config. Exposes a REST API (port 1610) for dataset
/// registration, job control, worker monitoring, and storage queries.
Controller,
/// Run migrations on the metadata database
/// Apply pending schema migrations to the metadata database and exit.
///
/// Requires --config. Connects to PostgreSQL, runs any unapplied
/// migrations, then terminates. Safe to run multiple times.
Migrate,
}

Expand Down Expand Up @@ -111,6 +137,22 @@ async fn main_inner() -> Result<(), Error> {
mut jsonl_server,
mut admin_server,
} => {
// Backward compat: FLIGHT_SERVER -> AMP_FLIGHT_SERVER
if !flight_server && matches!(env::var("FLIGHT_SERVER").as_deref(), Ok("true" | "1")) {
eprintln!("env var FLIGHT_SERVER is deprecated, use AMP_FLIGHT_SERVER instead");
flight_server = true;
}
// Backward compat: JSONL_SERVER -> AMP_JSONL_SERVER
if !jsonl_server && matches!(env::var("JSONL_SERVER").as_deref(), Ok("true" | "1")) {
eprintln!("env var JSONL_SERVER is deprecated, use AMP_JSONL_SERVER instead");
jsonl_server = true;
}
// Backward compat: ADMIN_SERVER -> AMP_ADMIN_SERVER
if !admin_server && matches!(env::var("ADMIN_SERVER").as_deref(), Ok("true" | "1")) {
eprintln!("env var ADMIN_SERVER is deprecated, use AMP_ADMIN_SERVER instead");
admin_server = true;
}

if !flight_server && !jsonl_server && !admin_server {
flight_server = true;
jsonl_server = true;
Expand Down Expand Up @@ -153,6 +195,17 @@ async fn main_inner() -> Result<(), Error> {
mut flight_server,
mut jsonl_server,
} => {
// Backward compat: FLIGHT_SERVER -> AMP_FLIGHT_SERVER
if !flight_server && matches!(env::var("FLIGHT_SERVER").as_deref(), Ok("true" | "1")) {
eprintln!("env var FLIGHT_SERVER is deprecated, use AMP_FLIGHT_SERVER instead");
flight_server = true;
}
// Backward compat: JSONL_SERVER -> AMP_JSONL_SERVER
if !jsonl_server && matches!(env::var("JSONL_SERVER").as_deref(), Ok("true" | "1")) {
eprintln!("env var JSONL_SERVER is deprecated, use AMP_JSONL_SERVER instead");
jsonl_server = true;
}

// If neither of the flags are set, enable both servers
if !flight_server && !jsonl_server {
flight_server = true;
Expand Down
26 changes: 17 additions & 9 deletions docs/features/app-ampctl.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,16 @@ ampctl communicates with the Amp controller's Admin API, providing CLI access to

### Capabilities

| Capability | Description |
|------------|-------------|
| Dataset Management | List, register, deploy datasets |
| Job Control | Monitor, stop, delete extraction jobs |
| Capability | Description |
|------------------------|-----------------------------------------------------------|
| Dataset Management | List, register, deploy datasets |
| Job Control | Monitor, stop, delete extraction jobs |
| Table Revision Control | Activate, deactivate, and manage table revision lifecycle |
| Storage Management | Query storage locations and file metadata |
| Provider Configuration | Manage EVM RPC and Firehose sources |
| Worker Monitoring | List workers and check heartbeat status |
| Schema Analysis | Validate SQL queries and infer schemas |
| Data Verification | Verify dataset integrity |
| Storage Management | Query storage locations and file metadata |
| Provider Configuration | Manage EVM RPC and Firehose sources |
| Worker Monitoring | List workers and check heartbeat status |
| Schema Analysis | Validate SQL queries and infer schemas |
| Data Verification | Verify dataset integrity |

Run `ampctl --help` for a complete list of supported commands.

Expand Down Expand Up @@ -113,6 +113,14 @@ ampctl dataset list
ampctl dataset list --json
```

### Configuration Reference

| Flag | Env Var | Default | Description |
|----------------|------------------|-------------------------|------------------------|
| `--admin-url` | `AMP_ADMIN_URL` | `http://localhost:1610` | Admin API URL |
| `--auth-token` | `AMP_AUTH_TOKEN` | — | Bearer auth token |
| `--json` | — | Human readable | JSON output (CLI-only) |

## References

- [admin](admin.md) - Related: Administration overview
1 change: 1 addition & 0 deletions docs/features/app-ampd-controller.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
name: "app-ampd-controller"
description: "ampd controller for job scheduling and admin API. Load when asking about controller, job scheduling, or admin API"
type: feature
status: "stable"
components: "app:ampd,service:controller,crate:config"
---

Expand Down
70 changes: 70 additions & 0 deletions docs/features/app-ampd-migrate.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
---
name: "app-ampd-migrate"
description: "ampd migrate command for metadata database migrations. Load when asking about database migrations, schema upgrades, or ampd migrate"
type: feature
status: "stable"
components: "app:ampd,crate:metadata-db,crate:config"
---

# ampd Migrate

## Summary

The `ampd migrate` command runs database migrations on the metadata database. It connects to the configured PostgreSQL instance, applies any pending schema migrations, and exits. This is a one-shot administrative command used during upgrades or initial setup.

## Table of Contents

1. [Key Concepts](#key-concepts)
2. [Configuration](#configuration)
3. [Usage](#usage)
4. [References](#references)

## Key Concepts

- **Metadata Database**: PostgreSQL database storing job state, worker registrations, dataset definitions, and table revisions
- **Schema Migration**: Automatic application of pending DDL changes to bring the database schema up to date
- **One-Shot Command**: Unlike other ampd subcommands, `migrate` runs to completion and exits

## Configuration

`ampd migrate` requires `--config` (or `AMP_CONFIG`) to be provided. The config file must contain a valid
`metadata_db.url` (or the `AMP_CONFIG_METADATA_DB__URL` environment variable must be set).

| Setting | Source | Description |
|-------------------|----------------------------------------------|------------------------------|
| `metadata_db.url` | Config file or `AMP_CONFIG_METADATA_DB__URL` | PostgreSQL connection string |

## Usage

### Running Migrations

```bash
# Apply pending migrations using a config file
ampd --config .amp/config.toml migrate

# Using the AMP_CONFIG environment variable
export AMP_CONFIG=.amp/config.toml
ampd migrate
```

### When to Run

- **Initial setup**: Before starting any ampd service for the first time
- **After upgrades**: When a new ampd version includes schema changes
- **CI/CD pipelines**: As a pre-deployment step

### Auto-Migration Alternative

In non-production environments, the metadata database connection can be configured with
`auto_migrate = true` in the config file (defaults to `true` in solo mode). When enabled, services automatically apply migrations on startup, making the explicit
`migrate` command unnecessary.

```toml
[metadata_db]
url = "postgresql://localhost/amp"
auto_migrate = true
```

## References

- [app-ampd](app-ampd.md) - Base: ampd daemon overview
21 changes: 18 additions & 3 deletions docs/features/app-ampd-server.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,28 @@ This command does not create directories itself; it relies on the configured pat

## Usage

### Endpoint Selection

By default, `ampd server` starts both Flight and JSONL endpoints. Use `--flight-server` and `--jsonl-server` flags (or their env var equivalents `AMP_FLIGHT_SERVER` / `AMP_JSONL_SERVER`) to selectively enable only specific endpoints:

```bash
# Start only the Flight endpoint
ampd server --flight-server

# Start only the JSONL endpoint
ampd server --jsonl-server

# Via environment variables
AMP_FLIGHT_SERVER=true ampd server
```

### Starting the Server

```bash
# Start ampd with default server configuration
ampd
# Start ampd with default server configuration (both endpoints)
ampd server

# The query server starts automatically with both endpoints:
# The query server starts with both endpoints:
# - Arrow Flight on port 1602
# - JSON Lines on port 1603
```
Expand Down
10 changes: 9 additions & 1 deletion docs/features/app-ampd-solo.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name: "app-ampd-solo"
description: "ampd solo mode for local development and testing. Load when asking about solo mode, development setup, or single-node deployments"
type: feature
status: "unstable"
status: "stable"
components: "app:ampd,service:server,service:controller,service:worker"
---

Expand Down Expand Up @@ -81,6 +81,14 @@ ampd solo --flight-server --admin-server
ampd solo --jsonl-server
```

These flags can also be set via environment variables:

```bash
export AMP_FLIGHT_SERVER=true
export AMP_JSONL_SERVER=true
export AMP_ADMIN_SERVER=true
```

**Note:** The embedded worker always runs regardless of flags.

### When to Use Solo Mode
Expand Down
7 changes: 4 additions & 3 deletions docs/features/app-ampd-worker.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
name: "app-ampd-worker"
description: "ampd worker for blockchain data extraction. Load when asking about workers, extraction jobs, or data ingestion"
type: feature
status: "stable"
components: "app:ampd,service:worker,crate:config"
---

Expand Down Expand Up @@ -50,9 +51,9 @@ For detailed deployment patterns, see [Operational Modes](../modes.md).

## Configuration

| Setting | Default | Description |
|---------|---------|-------------|
| `node_id` | Required | Unique worker identifier |
| Setting | Flag | Env Var | Default | Description |
|-----------|-------------|---------------|----------|--------------------------|
| `node_id` | `--node-id` | `AMP_NODE_ID` | Required | Unique worker identifier |

### Environment Variables

Expand Down
Loading