Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions crates/bashkit/src/builtins/awk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
use async_trait::async_trait;
use regex::Regex;
use std::collections::HashMap;

use super::search_common::build_regex;
use std::path::PathBuf;
use std::sync::Arc;

Expand Down Expand Up @@ -686,7 +688,7 @@ impl<'a> AwkParser<'a> {
if c == '/' {
let pattern = &self.input[start..self.pos];
self.pos += 1;
let regex = Regex::new(pattern)
let regex = build_regex(pattern)
.map_err(|e| Error::Execution(format!("awk: invalid regex: {}", e)))?;
return Ok(Some(AwkPattern::Regex(regex)));
} else if c == '\\' {
Expand Down Expand Up @@ -2162,7 +2164,7 @@ impl AwkInterpreter {
fn eval_expr_as_bool(&mut self, expr: &AwkExpr) -> bool {
if let AwkExpr::Regex(pattern) = expr {
let line = self.state.get_field(0).as_string();
if let Ok(re) = Regex::new(pattern) {
if let Ok(re) = build_regex(pattern) {
return re.is_match(&line);
}
return false;
Expand Down Expand Up @@ -2236,7 +2238,7 @@ impl AwkInterpreter {
AwkValue::Number(if lb || rb { 1.0 } else { 0.0 })
}
"~" => {
if let Ok(re) = Regex::new(&r.as_string()) {
if let Ok(re) = build_regex(&r.as_string()) {
AwkValue::Number(if re.is_match(&l.as_string()) {
1.0
} else {
Expand All @@ -2247,7 +2249,7 @@ impl AwkInterpreter {
}
}
"!~" => {
if let Ok(re) = Regex::new(&r.as_string()) {
if let Ok(re) = build_regex(&r.as_string()) {
AwkValue::Number(if !re.is_match(&l.as_string()) {
1.0
} else {
Expand Down Expand Up @@ -2363,7 +2365,7 @@ impl AwkInterpreter {
}
AwkExpr::Match(expr, pattern) => {
let s = self.eval_expr(expr).as_string();
if let Ok(re) = Regex::new(pattern) {
if let Ok(re) = build_regex(pattern) {
AwkValue::Number(if re.is_match(&s) { 1.0 } else { 0.0 })
} else {
AwkValue::Number(0.0)
Expand Down Expand Up @@ -2514,7 +2516,7 @@ impl AwkInterpreter {

let target = self.eval_expr(&target_expr).as_string();

if let Ok(re) = Regex::new(&pattern) {
if let Ok(re) = build_regex(&pattern) {
let (result, count) = if name == "gsub" {
let count = re.find_iter(&target).count();
(
Expand Down Expand Up @@ -2600,7 +2602,7 @@ impl AwkInterpreter {
} else {
None
};
if let Ok(re) = Regex::new(&pattern) {
if let Ok(re) = build_regex(&pattern) {
if let Some(caps) = re.captures(&s) {
let m = caps.get(0).unwrap();
let rstart = m.start() + 1; // awk is 1-indexed
Expand Down Expand Up @@ -2648,7 +2650,7 @@ impl AwkInterpreter {
} else {
self.state.get_field(0).as_string()
};
if let Ok(re) = Regex::new(&pattern) {
if let Ok(re) = build_regex(&pattern) {
if how == "g" || how == "G" {
AwkValue::String(re.replace_all(&target, replacement.as_str()).to_string())
} else {
Expand Down
8 changes: 3 additions & 5 deletions crates/bashkit/src/builtins/grep.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@
//! grep --line-buffered pattern # line-buffered (no-op)

use async_trait::async_trait;
use regex::{Regex, RegexBuilder};
use regex::Regex;

use super::search_common::parse_numeric_flag_arg;
use super::search_common::{build_regex_opts, parse_numeric_flag_arg};
use super::{Builtin, Context};
use crate::error::{Error, Result};
use crate::interpreter::ExecResult;
Expand Down Expand Up @@ -295,9 +295,7 @@ impl GrepOptions {
combined
};

RegexBuilder::new(&final_pattern)
.case_insensitive(self.ignore_case)
.build()
build_regex_opts(&final_pattern, self.ignore_case)
.map_err(|e| Error::Execution(format!("grep: invalid pattern: {}", e)))
}
}
Expand Down
27 changes: 24 additions & 3 deletions crates/bashkit/src/builtins/search_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,29 @@ use regex::{Regex, RegexBuilder};
use crate::error::{Error, Result};
use crate::fs::FileSystem;

/// Default compiled-regex size limit (1 MB).
pub(crate) const REGEX_SIZE_LIMIT: usize = 1_000_000;

/// Default DFA size limit (1 MB).
pub(crate) const REGEX_DFA_SIZE_LIMIT: usize = 1_000_000;

/// Build a regex with enforced size limits.
pub(crate) fn build_regex(pattern: &str) -> std::result::Result<Regex, regex::Error> {
build_regex_opts(pattern, false)
}

/// Build a regex with enforced size limits and optional case-insensitivity.
pub(crate) fn build_regex_opts(
pattern: &str,
case_insensitive: bool,
) -> std::result::Result<Regex, regex::Error> {
RegexBuilder::new(pattern)
.case_insensitive(case_insensitive)
.size_limit(REGEX_SIZE_LIMIT)
.dfa_size_limit(REGEX_DFA_SIZE_LIMIT)
.build()
}

/// Recursively collect all files under the given directories in the VFS.
///
/// Returns sorted list of file paths (directories are traversed but not included).
Expand Down Expand Up @@ -60,9 +83,7 @@ pub(crate) fn build_search_regex(
pat
};

RegexBuilder::new(&pat)
.case_insensitive(ignore_case)
.build()
build_regex_opts(&pat, ignore_case)
.map_err(|e| Error::Execution(format!("{}: invalid pattern: {}", cmd_name, e)))
}

Expand Down
14 changes: 7 additions & 7 deletions crates/bashkit/src/builtins/sed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
#![allow(clippy::unwrap_used)]

use async_trait::async_trait;
use regex::{Regex, RegexBuilder};
use regex::Regex;

use super::search_common::{build_regex, build_regex_opts};

use super::{Builtin, Context, read_text_file};
use crate::error::{Error, Result};
Expand Down Expand Up @@ -341,7 +343,7 @@ fn parse_address(s: &str) -> Result<(Option<Address>, &str)> {
Error::Execution("sed: unterminated address regex".to_string())
})?;
let pattern = &after_slash[..end2];
let regex = Regex::new(pattern)
let regex = build_regex(pattern)
.map_err(|e| Error::Execution(format!("sed: invalid regex: {}", e)))?;
if num == 0 {
return Ok((Some(Address::ZeroRegex(regex)), &after_slash[end2 + 1..]));
Expand Down Expand Up @@ -377,7 +379,7 @@ fn parse_address(s: &str) -> Result<(Option<Address>, &str)> {
.find('/')
.ok_or_else(|| Error::Execution("sed: unterminated address regex".to_string()))?;
let pattern = &s[1..end + 1];
let regex = Regex::new(pattern)
let regex = build_regex(pattern)
.map_err(|e| Error::Execution(format!("sed: invalid regex: {}", e)))?;
let rest = &s[end + 2..];

Expand All @@ -398,7 +400,7 @@ fn parse_address(s: &str) -> Result<(Option<Address>, &str)> {
Error::Execution("sed: unterminated address regex".to_string())
})?;
let pattern2 = &after_slash[..end2];
let regex2 = Regex::new(pattern2)
let regex2 = build_regex(pattern2)
.map_err(|e| Error::Execution(format!("sed: invalid regex: {}", e)))?;
return Ok((
Some(Address::RegexRange(regex, regex2)),
Expand Down Expand Up @@ -496,9 +498,7 @@ fn parse_sed_command(s: &str, extended_regex: bool) -> Result<(Option<Address>,
};
// Build regex with optional case-insensitive flag
let case_insensitive = flags.contains('i');
let regex = RegexBuilder::new(&pattern)
.case_insensitive(case_insensitive)
.build()
let regex = build_regex_opts(&pattern, case_insensitive)
.map_err(|e| Error::Execution(format!("sed: invalid pattern: {}", e)))?;

// Convert sed replacement syntax to regex replacement syntax
Expand Down
110 changes: 110 additions & 0 deletions crates/bashkit/tests/regex_limit_tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
//! Regex size limit tests for grep, sed, and awk builtins
//!
//! Verifies that oversized regex patterns are rejected rather than causing
//! resource exhaustion (issue #984).

use bashkit::Bash;
use std::time::Duration;

/// Helper: generate a large alternation pattern like "1|2|3|...|N"
fn huge_alternation_pattern(n: usize) -> String {
(1..=n).map(|i| i.to_string()).collect::<Vec<_>>().join("|")
}

fn test_bash() -> Bash {
Bash::builder()
.limits(bashkit::ExecutionLimits::new().timeout(Duration::from_secs(10)))
.build()
}

#[tokio::test]
async fn grep_rejects_huge_regex() {
let mut bash = test_bash();
let pattern = huge_alternation_pattern(50_000);
let script = format!("echo test | grep '{}'", pattern);
match bash.exec(&script).await {
Ok(result) => {
assert_ne!(result.exit_code, 0, "grep should fail with oversized regex");
}
Err(e) => {
let msg = e.to_string();
assert!(
msg.contains("size limit") || msg.contains("invalid pattern"),
"error should mention size limit, got: {}",
msg
);
}
}
}

#[tokio::test]
async fn grep_accepts_normal_regex() {
let mut bash = Bash::new();
let result = bash
.exec("echo 'hello world' | grep 'hello'")
.await
.unwrap();
assert_eq!(result.exit_code, 0);
assert_eq!(result.stdout.trim(), "hello world");
}

#[tokio::test]
async fn sed_rejects_huge_regex() {
let mut bash = test_bash();
let pattern = huge_alternation_pattern(50_000);
let script = format!("echo test | sed 's/{}/replaced/'", pattern);
match bash.exec(&script).await {
Ok(result) => {
// sed error propagates through pipeline — the key security
// property is it completes quickly without resource exhaustion.
// Depending on how the interpreter handles pipeline errors,
// exit code may or may not be non-zero.
assert!(
result.exit_code != 0 || result.stdout.trim() == "test",
"sed should either fail or pass input through with oversized regex, \
exit={}, stdout='{}'",
result.exit_code,
result.stdout.trim()
);
}
Err(e) => {
let msg = e.to_string();
assert!(
msg.contains("size limit") || msg.contains("invalid"),
"error should mention size limit, got: {}",
msg
);
}
}
}

#[tokio::test]
async fn awk_rejects_huge_regex_in_match() {
let mut bash = test_bash();
let pattern = huge_alternation_pattern(50_000);
let script = format!(
"echo test | awk '{{ if (match($0, \"{}\" )) print }}'",
pattern
);
match bash.exec(&script).await {
Ok(result) => {
// awk silently handles invalid regex in match() — the key security
// property is it completes quickly without resource exhaustion.
assert!(
result.stdout.trim().is_empty() || result.exit_code != 0,
"awk should not match with oversized regex, \
exit={}, stdout='{}'",
result.exit_code,
result.stdout.trim()
);
}
Err(e) => {
let msg = e.to_string();
assert!(
msg.contains("size limit") || msg.contains("invalid"),
"error should mention size limit, got: {}",
msg
);
}
}
}
8 changes: 8 additions & 0 deletions crates/bashkit/tests/spec_cases/bash/regex-limit.test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Regex size/complexity limit tests

### grep_normal_regex_works
# Normal regex should work fine
echo "hello world" | grep "hello"
### expect
hello world
### end
Loading