Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 93 additions & 0 deletions .github/workflows/docs-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
name: Docs CI

on:
push:
branches: [main]
paths:
- "docs/**"
pull_request:
branches: [main]
paths:
- "docs/**"

# Cancel in-progress runs for the same branch/PR to save resources
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

jobs:
format:
name: Format Check
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- name: Setup Node.js
uses: actions/setup-node@v6
with:
node-version: "22"
- name: Setup PNPM
uses: pnpm/action-setup@v4
with:
version: "10.11.0"
run_install: false
- name: Get pnpm store directory
id: pnpm-cache
shell: bash
run: |
echo "STORE_PATH=$(pnpm store path)" >> $GITHUB_OUTPUT
- name: Setup pnpm cache
uses: actions/cache@v4
with:
path: ${{ steps.pnpm-cache.outputs.STORE_PATH }}
key: ${{ runner.os }}-pnpm-store-${{ hashFiles('**/docs/pnpm-lock.yaml') }}
restore-keys: |
${{ runner.os }}-pnpm-store-
- name: Install dependencies
working-directory: ./docs
run: pnpm install
- name: Check formatting
working-directory: ./docs
run: pnpm prettier --check .

build:
name: Validate Build
runs-on: ubuntu-latest
needs: [format]
steps:
- uses: actions/checkout@v6
- name: Setup Node.js
uses: actions/setup-node@v6
with:
node-version: "22"
- name: Setup PNPM
uses: pnpm/action-setup@v4
with:
version: "10.11.0"
run_install: false
- name: Get pnpm store directory
id: pnpm-cache
shell: bash
run: |
echo "STORE_PATH=$(pnpm store path)" >> $GITHUB_OUTPUT
- name: Setup pnpm cache
uses: actions/cache@v4
with:
path: ${{ steps.pnpm-cache.outputs.STORE_PATH }}
key: ${{ runner.os }}-pnpm-store-${{ hashFiles('**/docs/pnpm-lock.yaml') }}
restore-keys: |
${{ runner.os }}-pnpm-store-
- name: Install dependencies
working-directory: ./docs
run: pnpm install
- name: Validate build
working-directory: ./docs
run: pnpm build:dry-run

vale-lint:
name: Vale lint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: errata-ai/vale-action@v2.1.1
with:
files: "docs/src/content/"
17 changes: 17 additions & 0 deletions .vale.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
StylesPath = .github/styles

MinAlertLevel = suggestion

Packages = Google, write-good, MDX

Vocab = embucket

[*.{md,mdx}]
BasedOnStyles = Vale, Google, write-good

[formats]
mdx = md

[*.mdx]
CommentDelimiters = {/*, */}
TokenIgnores = (import\s+[^;]+;)
14 changes: 12 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,18 @@ Embucket features:
Start Embucket and run your first query in 30 seconds:

```bash
docker run --name embucket --rm -p 3000:3000 embucket/embucket
mkdir -p config
cat > config/metastore.yaml <<'EOF'
volumes:
- ident: embucket
type: memory
database: embucket
EOF

docker run --name embucket --rm -p 3000:3000 \
-v $PWD/config:/app/config \
embucket/embucket \
./embucketd --metastore-config config/metastore.yaml
```

Install and configure the Snowflake CLI against the local endpoint:
Expand Down Expand Up @@ -100,7 +111,6 @@ docker run --name embucket --rm -p 3000:3000 \

```bash
./embucketd \
--no-bootstrap \
--metastore-config config/metastore.yaml
```

Expand Down
5 changes: 4 additions & 1 deletion crates/embucket-lambda/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ default-binary = "bootstrap"
[package.metadata.lambda.deploy]
# Function name: Can be set via CARGO_LAMBDA_FUNCTION_NAME env var
# If not set, falls back to binary name or pass as CLI argument
memory = 3008
memory = 1024
timeout = 30
tracing = "Active"
# Note: include path is relative to workspace root
Expand All @@ -55,3 +55,6 @@ include = ["config"]
[package.metadata.lambda.deploy.env]
LOG_FORMAT = "json"
METASTORE_CONFIG = "config/metastore.yaml"
JWT_SECRET = "secret"
QUERY_TIMEOUT_SECS = "29"
MEM_POOL_SIZE_MB = "800"
9 changes: 0 additions & 9 deletions crates/embucket-lambda/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ pub struct EnvConfig {
pub mem_enable_track_consumers_pool: Option<bool>,
pub disk_pool_size_mb: Option<usize>,
pub query_history_rows_limit: usize,
pub bootstrap_default_entities: bool,
pub embucket_version: String,
pub metastore_config: Option<PathBuf>,
pub jwt_secret: Option<String>,
Expand All @@ -38,7 +37,6 @@ impl EnvConfig {
disk_pool_size_mb: parse_env("DISK_POOL_SIZE_MB"),
query_history_rows_limit: parse_env("QUERY_HISTORY_ROWS_LIMIT")
.unwrap_or(DEFAULT_QUERY_HISTORY_ROWS_LIMIT),
bootstrap_default_entities: !env_bool("NO_BOOTSTRAP"),
embucket_version: env_or_default("EMBUCKET_VERSION", "0.1.0"),
metastore_config: env::var("METASTORE_CONFIG").ok().map(PathBuf::from),
jwt_secret: env::var("JWT_SECRET").ok(),
Expand All @@ -51,7 +49,6 @@ impl EnvConfig {
pub fn execution_config(&self) -> ExecutionConfig {
ExecutionConfig {
embucket_version: self.embucket_version.clone(),
bootstrap_default_entities: self.bootstrap_default_entities,
sql_parser_dialect: self.sql_parser_dialect.clone(),
query_timeout_secs: self.query_timeout_secs,
max_concurrency_level: self.max_concurrency_level,
Expand All @@ -70,12 +67,6 @@ fn env_or_default(name: &str, default: &str) -> String {
env::var(name).unwrap_or_else(|_| default.to_string())
}

fn env_bool(name: &str) -> bool {
env::var(name)
.map(|value| matches!(value.to_lowercase().as_str(), "1" | "true" | "yes" | "on"))
.unwrap_or(false)
}

fn parse_mem_pool_type() -> Option<MemPoolType> {
env::var("MEM_POOL_TYPE")
.ok()
Expand Down
1 change: 0 additions & 1 deletion crates/embucket-lambda/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ async fn main() -> Result<(), LambdaError> {
mem_pool_type = ?env_config.mem_pool_type,
mem_pool_size_mb = ?env_config.mem_pool_size_mb,
disk_pool_size_mb = ?env_config.disk_pool_size_mb,
bootstrap_default_entities = env_config.bootstrap_default_entities,
read_only = env_config.read_only,
metastore_config = env_config.metastore_config.as_ref().map(|p| p.display().to_string()),
"Loaded Lambda configuration"
Expand Down
8 changes: 0 additions & 8 deletions crates/embucketd/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,6 @@ use tracing_subscriber::filter::LevelFilter;
#[derive(Parser)]
#[command(version, about, long_about=None)]
pub struct CliOpts {
#[arg(
long,
env = "NO_BOOTSTRAP",
default_value = "false",
help = "Disable bootstrap functionality"
)]
pub no_bootstrap: bool,

#[arg(
long,
env = "METASTORE_CONFIG",
Expand Down
6 changes: 0 additions & 6 deletions crates/embucketd/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,14 +129,8 @@ async fn async_main(
opts.auth_demo_password.clone().unwrap(),
);

// Bootstrap the service if no flag is present (`--no-bootstrap`) with:
// 1. Creation of a default in-memory volume named `embucket`
// 2. Creation of a default database `embucket` in the volume `embucket`
// 3. Creation of a default schema `public` in the database `embucket`

let execution_cfg = ExecutionConfig {
embucket_version: "0.1.0".to_string(),
bootstrap_default_entities: !opts.no_bootstrap,
sql_parser_dialect: opts.sql_parser_dialect.clone(),
query_timeout_secs: opts.query_timeout_secs,
max_concurrency_level: opts.max_concurrency_level,
Expand Down
9 changes: 0 additions & 9 deletions crates/executor/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -600,15 +600,6 @@ pub enum Error {
location: Location,
},

#[snafu(display("Failed to bootstrap {entity_type}: {source}"))]
Bootstrap {
entity_type: String,
#[snafu(source(from(catalog_metastore::error::Error, Box::new)))]
source: Box<catalog_metastore::error::Error>,
#[snafu(implicit)]
location: Location,
},

#[snafu(display("Failed to get async result for query [{query_id}]: {error}"))]
AsyncResultTaskJoin {
#[snafu(source)]
Expand Down
68 changes: 2 additions & 66 deletions crates/executor/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,13 @@ use crate::running_queries::RunningQueryId;
use crate::session::{SESSION_INACTIVITY_EXPIRATION_SECONDS, to_unix};
use crate::tracing::SpanTracer;
use crate::utils::{Config, MemPoolType};
use catalog::catalog_list::{DEFAULT_CATALOG, EmbucketCatalogList};
use catalog_metastore::{
Database, InMemoryMetastore, Metastore, Schema, SchemaIdent, TableIdent as MetastoreTableIdent,
Volume, VolumeType,
};
use catalog::catalog_list::EmbucketCatalogList;
use catalog_metastore::{InMemoryMetastore, Metastore, TableIdent as MetastoreTableIdent};
use tokio::sync::RwLock;
use tokio::time::Duration;
use tracing::Instrument;
use uuid::Uuid;

const DEFAULT_SCHEMA: &str = "public";

pub const TIMEOUT_SIGNAL_INTERVAL_SECONDS: u64 = 60;

#[async_trait::async_trait]
Expand Down Expand Up @@ -159,11 +154,6 @@ impl CoreExecutionService {
err
)]
pub async fn new(metastore: Arc<dyn Metastore>, config: Arc<Config>) -> Result<Self> {
if config.bootstrap_default_entities {
// do not fail on bootstrap errors
let _ = Self::bootstrap(metastore.clone()).await;
}

Self::initialize_datafusion_tracer();

let catalog_list = Self::catalog_list(metastore.clone(), &config).await?;
Expand All @@ -178,60 +168,6 @@ impl CoreExecutionService {
})
}

///This function bootstraps the service if no flag is present (`--no-bootstrap`) with:
/// 1. Creation of a default in-memory volume named `embucket`
/// 2. Creation of a default database `embucket` in the volume `embucket`
/// 3. Creation of a default schema `public` in the database `embucket`
///
/// Only traces the errors, doesn't panic.
#[tracing::instrument(
name = "CoreExecutionService::bootstrap",
level = "info",
skip(metastore),
err
)]
#[allow(clippy::cognitive_complexity)]
async fn bootstrap(metastore: Arc<dyn Metastore>) -> Result<()> {
let ident = DEFAULT_CATALOG.to_string();
metastore
.create_volume(&ident, Volume::new(ident.clone(), VolumeType::Memory))
.await
.context(ex_error::BootstrapSnafu {
entity_type: "volume",
})?;

metastore
.create_database(
&ident,
Database {
ident: ident.clone(),
properties: None,
volume: ident.clone(),
should_refresh: false,
},
)
.await
.context(ex_error::BootstrapSnafu {
entity_type: "database",
})?;

let schema_ident = SchemaIdent::new(ident.clone(), DEFAULT_SCHEMA.to_string());
metastore
.create_schema(
&schema_ident,
Schema {
ident: schema_ident.clone(),
properties: None,
},
)
.await
.context(ex_error::BootstrapSnafu {
entity_type: "schema",
})?;

Ok(())
}

#[tracing::instrument(
name = "CoreExecutionService::catalog_list",
level = "debug",
Expand Down
12 changes: 0 additions & 12 deletions crates/executor/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ pub static DEFAULT_QUERY_HISTORY_ROWS_LIMIT: usize = 50;
#[derive(Clone, Debug)]
pub struct Config {
pub embucket_version: String,
/// If true, bootstrap default volume, database and schema
pub bootstrap_default_entities: bool,
pub sql_parser_dialect: Option<String>,
pub query_timeout_secs: u64,
pub max_concurrency_level: usize,
Expand All @@ -60,7 +58,6 @@ impl Default for Config {
fn default() -> Self {
Self {
embucket_version: "0.1.0".to_string(),
bootstrap_default_entities: true,
sql_parser_dialect: None,
query_timeout_secs: 1200, // 20 minutes
max_concurrency_level: 100,
Expand Down Expand Up @@ -88,15 +85,6 @@ impl Config {
self
}

#[must_use]
pub const fn with_bootstrap_default_entities(
mut self,
bootstrap_default_entities: bool,
) -> Self {
self.bootstrap_default_entities = bootstrap_default_entities;
self
}

#[must_use]
pub const fn with_query_history_rows_limit(mut self, limit: usize) -> Self {
self.query_history_rows_limit = limit;
Expand Down
24 changes: 24 additions & 0 deletions docs/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Dependencies
node_modules
.pnpm-debug.log*

# Build artifacts
/dist
/.astro

# OS-specific
.DS_Store
*.swo
*~
*.swp

# Logs
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*

# Environment variables
.env
.env*.local
Loading
Loading