Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
9860aea
fix(storage): ensure BufferPoolManager flushes all dirty pages on des…
poyrazK Mar 8, 2026
3e8ac2a
fix(storage): resolve 1024-row bug by simplifying page space validati…
poyrazK Mar 8, 2026
62cd6a2
test(e2e): enhance Python client robustness and fix binary name in or…
poyrazK Mar 8, 2026
8255d84
docs: add Stability & Testing Refinement to the roadmap
poyrazK Mar 8, 2026
b3cc142
chore: cleanup reproduction test and revert build changes
poyrazK Mar 8, 2026
5ef2128
chore: ignore heap files and remove them from index
poyrazK Mar 8, 2026
349af8e
style: automated clang-format fixes
poyrazK Mar 8, 2026
d69635c
fix(storage): harden BufferPoolManager destructor with exception safety
poyrazK Mar 8, 2026
f409e85
chore(storage): remove debug logging and dead code from HeapTable
poyrazK Mar 8, 2026
5bfe0ae
test(e2e): improve client reliability and ensure background process c…
poyrazK Mar 8, 2026
c6b8eb2
docs: formalize Phase 9 roadmap and reorganize gitignore categories
poyrazK Mar 8, 2026
b474388
style: automated clang-format fixes
poyrazK Mar 8, 2026
4f35ce6
feat(parser,executor): implement HAVING clause and expand test coverage
poyrazK Mar 8, 2026
355e4ea
style: automated clang-format fixes
poyrazK Mar 8, 2026
878d551
test: expand operator and parser coverage with advanced test cases
poyrazK Mar 8, 2026
bd6f8df
style: automated clang-format fixes
poyrazK Mar 8, 2026
88872b4
test: refine MVCC visibility and expand vectorized operator tests
poyrazK Mar 8, 2026
821ffa6
style: automated clang-format fixes
poyrazK Mar 8, 2026
e2388b2
test: major coverage boost for network, recovery, and executor compon…
poyrazK Mar 8, 2026
2991320
style: automated clang-format fixes
poyrazK Mar 8, 2026
a1f186a
test: achieve significant coverage boost for network and recovery
poyrazK Mar 8, 2026
51b82e4
style: automated clang-format fixes
poyrazK Mar 8, 2026
a5b64a8
test: reach for 70% coverage with exhaustive parser and network edge …
poyrazK Mar 8, 2026
17447c9
parser: implement CreateIndexStatement AST and parse_create_index
poyrazK Mar 9, 2026
5f5eb18
tests: harden cloudSQL execution and visibility tests
poyrazK Mar 9, 2026
b09bdce
tests: resolve raft simulation and networking race conditions
poyrazK Mar 9, 2026
a77dc20
tests: enhance recovery log validation and server handshake reliability
poyrazK Mar 9, 2026
5cd90ae
style: automated clang-format fixes
poyrazK Mar 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ coverage/
*.orig
*.rej

# Storage Files
# ==============
*.heap

# ==============
# Emacs
# ==============
Expand Down
7 changes: 7 additions & 0 deletions docs/phases/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@ This directory contains the technical documentation for the lifecycle of the clo
- Batch-at-a-time vectorized execution model (Scan, Filter, Project, Aggregate).
- High-performance `NumericVector` and `VectorBatch` data structures.

### Phase 9 — Stability & Testing Refinement
**Focus**: Engine Robustness & E2E Validation.
- Slotted-page layout fixes for large table support.
- Buffer Pool Manager lifecycle management (destructor flushing).
- Robust Python E2E client with partial-read handling and numeric validation.
- Standardized test orchestration via `run_test.sh`.

---

## Technical Standards
Expand Down
6 changes: 6 additions & 0 deletions src/executor/query_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,12 @@ std::unique_ptr<Operator> QueryExecutor::build_plan(const parser::SelectStatemen
}
current_root = std::make_unique<AggregateOperator>(std::move(current_root),
std::move(group_by), std::move(aggs));

/* 3.5. Having */
if (stmt.having()) {
current_root =
std::make_unique<FilterOperator>(std::move(current_root), stmt.having()->clone());
}
}

/* 4. Sort (ORDER BY) */
Expand Down
3 changes: 2 additions & 1 deletion src/parser/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ std::map<std::string, TokenType> Lexer::init_keywords() {
{"CHAR", TokenType::TypeChar},
{"BOOL", TokenType::TypeBool},
{"BOOLEAN", TokenType::TypeBool},
{"DISTINCT", TokenType::Distinct}};
{"DISTINCT", TokenType::Distinct},
{"HAVING", TokenType::Having}};
}

Token Lexer::next_token() {
Expand Down
21 changes: 19 additions & 2 deletions src/storage/buffer_pool_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include <cstdint>
#include <cstring>
#include <iostream>
#include <memory>
#include <mutex>
#include <string>
Expand All @@ -28,7 +29,19 @@ BufferPoolManager::BufferPoolManager(size_t pool_size, StorageManager& storage_m
}
}

BufferPoolManager::~BufferPoolManager() = default;
BufferPoolManager::~BufferPoolManager() {
try {
flush_all_pages();
} catch (const std::exception& e) {
// Log error to stderr; avoid throwing from destructor to prevent std::terminate
std::cerr << "[Error] Exception in BufferPoolManager destructor during flush_all_pages: "
<< e.what() << std::endl;
} catch (...) {
std::cerr
<< "[Error] Unknown exception in BufferPoolManager destructor during flush_all_pages"
<< std::endl;
}
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

Page* BufferPoolManager::fetch_page(const std::string& file_name, uint32_t page_id) {
const std::scoped_lock<std::mutex> lock(latch_);
Expand Down Expand Up @@ -62,7 +75,11 @@ Page* BufferPoolManager::fetch_page(const std::string& file_name, uint32_t page_
page->file_name_ = file_name;
page->pin_count_ = 1;
page->is_dirty_ = false;
storage_manager_.read_page(file_name, page_id, page->get_data());

if (!storage_manager_.read_page(file_name, page_id, page->get_data())) {
// If read fails (e.g. file too short), initialize with zeros
std::memset(page->get_data(), 0, Page::PAGE_SIZE);
}
Comment on lines +78 to +82
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Check how storage_manager_.read_page handles EOF vs actual errors
ast-grep --pattern 'bool read_page($_, $_, $_) {
  $$$
}'

Repository: poyrazK/cloudSQL

Length of output: 42


🏁 Script executed:

# Find the storage manager files
find . -type f -name "*storage*manager*" | head -20

Repository: poyrazK/cloudSQL

Length of output: 131


🏁 Script executed:

# Look at the file in question to understand the context
head -n 100 src/storage/buffer_pool_manager.cpp | tail -n 30

Repository: poyrazK/cloudSQL

Length of output: 993


🏁 Script executed:

# Search for read_page implementations in storage layer
rg "bool read_page" --type cpp -A 5

Repository: poyrazK/cloudSQL

Length of output: 1239


🏁 Script executed:

# Check the read_page implementation in storage_manager.cpp
cat -n src/storage/storage_manager.cpp | head -150

Repository: poyrazK/cloudSQL

Length of output: 5020


🏁 Script executed:

# Find where HeapTable::read_page and BTreeIndex::read_page are implemented
rg "HeapTable::read_page|BTreeIndex::read_page" --type cpp -B 2 -A 10

Repository: poyrazK/cloudSQL

Length of output: 1513


🏁 Script executed:

# Get the fetch_page function signature and what it returns
rg "fetch_page" include/storage/buffer_pool_manager.hpp -A 3

Repository: poyrazK/cloudSQL

Length of output: 174


Callers cannot detect I/O failures—both EOF and actual read errors result in zero-filled pages returned as success.

StorageManager::read_page correctly distinguishes EOF (returns true with zero-fill) from I/O errors (returns false). However, BufferPoolManager::fetch_page masks this distinction by:

  1. Zero-filling on read_page failure
  2. Always returning a valid Page* pointer (never nullptr)

Since HeapTable::read_page and BTreeIndex::read_page only check for nullptr, they cannot differentiate successful reads from I/O failures. A disk read error becomes indistinguishable from a legitimate EOF case—both appear as success with zero-filled data to callers.

This is appropriate for new pages beyond file EOF, but actual I/O errors (disk failures, corruption) are silently masked, potentially causing silent data corruption.

The storage layer should propagate read failures so callers can handle them appropriately, or the buffer pool should use an out-parameter or status code to signal failures separately from the page pointer.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/storage/buffer_pool_manager.cpp` around lines 78 - 82,
BufferPoolManager::fetch_page currently masks StorageManager::read_page failures
by zero-filling and always returning a Page*, so callers can't distinguish EOF
vs I/O error; change fetch_page (and any helper that installs pages into frames)
to treat storage_manager_.read_page returning false as a fatal read error: do
not zero-fill or return the Page* — instead release any reserved frame and
return nullptr (or propagate an error status) so callers like
HeapTable::read_page and BTreeIndex::read_page can detect failures; keep the
existing zero-fill only for the case where read_page returns true but indicates
EOF (if StorageManager encodes EOF as a successful result).


replacer_.pin(frame_id);
return page;
Expand Down
4 changes: 1 addition & 3 deletions src/storage/heap_table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,10 @@ HeapTable::TupleId HeapTable::insert(const executor::Tuple& tuple, uint64_t xmin
}

const auto required = static_cast<uint16_t>(data_str.size() + 1);
const auto slot_array_end =
static_cast<uint16_t>(sizeof(PageHeader) + ((header.num_slots + 1) * sizeof(uint16_t)));

/* Check for sufficient free space in the current page */
if (header.free_space_offset + required < Page::PAGE_SIZE &&
slot_array_end < header.free_space_offset) {
header.num_slots < DEFAULT_SLOT_COUNT) {
const uint16_t offset = header.free_space_offset;
std::memcpy(std::next(buffer.data(), static_cast<std::ptrdiff_t>(offset)),
data_str.c_str(), data_str.size() + 1);
Expand Down
36 changes: 36 additions & 0 deletions tests/analytics_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,4 +219,40 @@ TEST(AnalyticsTests, AggregateNullHandling) {
EXPECT_TRUE(result_batch->get_column(1).is_null(0));
}

TEST(AnalyticsTests, VectorizedExpressionAdvanced) {
StorageManager storage("./test_analytics");
Schema schema;
schema.add_column("a", common::ValueType::TYPE_INT64, true);
schema.add_column("b", common::ValueType::TYPE_INT64, true);

auto batch = VectorBatch::create(schema);
// Row 0: (10, 20)
batch->append_tuple(Tuple({common::Value::make_int64(10), common::Value::make_int64(20)}));
// Row 1: (NULL, 30)
batch->append_tuple(Tuple({common::Value::make_null(), common::Value::make_int64(30)}));
// Row 2: (40, NULL)
batch->append_tuple(Tuple({common::Value::make_int64(40), common::Value::make_null()}));

// Test: (a IS NULL) OR (a > 20)
auto col_a = std::make_unique<ColumnExpr>("a");
auto is_null = std::make_unique<IsNullExpr>(std::move(col_a), false);
auto col_a_2 = std::make_unique<ColumnExpr>("a");
auto gt_20 =
std::make_unique<BinaryExpr>(std::move(col_a_2), TokenType::Gt,
std::make_unique<ConstantExpr>(common::Value::make_int64(20)));

BinaryExpr or_expr(std::move(is_null), TokenType::Or, std::move(gt_20));

NumericVector<bool> res(common::ValueType::TYPE_BOOL);
or_expr.evaluate_vectorized(*batch, schema, res);

ASSERT_EQ(res.size(), 3U);
// Row 0: (10 IS NULL) OR (10 > 20) -> FALSE OR FALSE -> FALSE
EXPECT_FALSE(res.get(0).as_bool());
// Row 1: (NULL IS NULL) OR (NULL > 20) -> TRUE OR NULL -> TRUE
EXPECT_TRUE(res.get(1).as_bool());
// Row 2: (40 IS NULL) OR (40 > 20) -> FALSE OR TRUE -> TRUE
EXPECT_TRUE(res.get(2).as_bool());
}

} // namespace
160 changes: 160 additions & 0 deletions tests/cloudSQL_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -778,4 +778,164 @@ TEST(CatalogTests, Stats) {
catalog->print();
}

// ============= Parser Advanced Tests =============

TEST(ParserAdvanced, JoinAndComplexSelect) {
/* 1. Left Join and multiple joins */
{
auto lexer = std::make_unique<Lexer>(
"SELECT a.id, b.val FROM t1 LEFT JOIN t2 ON a.id = b.id JOIN t3 ON b.x = t3.x WHERE "
"a.id > 10");
Parser parser(std::move(lexer));
auto stmt = parser.parse_statement();
ASSERT_NE(stmt, nullptr);
const auto* const select = dynamic_cast<const SelectStatement*>(stmt.get());
ASSERT_NE(select, nullptr);
EXPECT_EQ(select->joins().size(), 2U);
EXPECT_EQ(select->joins()[0].type, SelectStatement::JoinType::Left);
EXPECT_EQ(select->joins()[1].type, SelectStatement::JoinType::Inner);
}

/* 2. Group By and Having */
{
auto lexer = std::make_unique<Lexer>(
"SELECT cat, SUM(val) FROM items GROUP BY cat HAVING SUM(val) > 1000 ORDER BY cat "
"DESC");
Parser parser(std::move(lexer));
auto stmt = parser.parse_statement();
ASSERT_NE(stmt, nullptr);
const auto* const select = dynamic_cast<const SelectStatement*>(stmt.get());
ASSERT_NE(select, nullptr);
EXPECT_EQ(select->group_by().size(), 1U);
ASSERT_NE(select->having(), nullptr);
EXPECT_EQ(select->order_by().size(), 1U);
}

/* 3. Transaction Statements */
{
auto lexer = std::make_unique<Lexer>("BEGIN");
Parser parser(std::move(lexer));
auto s1 = parser.parse_statement();
ASSERT_NE(s1, nullptr);
EXPECT_EQ(s1->type(), StmtType::TransactionBegin);

auto lexer2 = std::make_unique<Lexer>("COMMIT");
Parser parser2(std::move(lexer2));
auto s2 = parser2.parse_statement();
ASSERT_NE(s2, nullptr);
EXPECT_EQ(s2->type(), StmtType::TransactionCommit);

auto lexer3 = std::make_unique<Lexer>("ROLLBACK");
Parser parser3(std::move(lexer3));
auto s3 = parser3.parse_statement();
ASSERT_NE(s3, nullptr);
EXPECT_EQ(s3->type(), StmtType::TransactionRollback);
}
}

TEST(ParserAdvanced, ParserErrorPaths) {
/* Invalid CREATE syntax */
{
auto lexer = std::make_unique<Lexer>("CREATE TABLE (id INT)"); // Missing table name
Parser parser(std::move(lexer));
EXPECT_EQ(parser.parse_statement(), nullptr);
}
/* Invalid JOIN syntax */
{
auto lexer = std::make_unique<Lexer>("SELECT * FROM t1 LEFT t2"); // Missing JOIN keyword
Parser parser(std::move(lexer));
EXPECT_EQ(parser.parse_statement(), nullptr);
}
/* Invalid GROUP BY syntax */
{
auto lexer = std::make_unique<Lexer>("SELECT * FROM t1 GROUP cat"); // Missing BY keyword
Parser parser(std::move(lexer));
EXPECT_EQ(parser.parse_statement(), nullptr);
}
}

// ============= Execution Advanced Tests =============

TEST(ExecutionTests, AggregationHaving) {
static_cast<void>(std::remove("./test_data/having_test.heap"));
StorageManager disk_manager("./test_data");
BufferPoolManager sm(128, disk_manager);
auto catalog = Catalog::create();
LockManager lm;
TransactionManager tm(lm, *catalog, sm, nullptr);
QueryExecutor exec(*catalog, sm, lm, tm);
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

static_cast<void>(
exec.execute(*Parser(std::make_unique<Lexer>("CREATE TABLE having_test (grp INT, val INT)"))
.parse_statement()));
static_cast<void>(exec.execute(
*Parser(std::make_unique<Lexer>("INSERT INTO having_test VALUES (1, 10), (1, 20), (2, 5)"))
.parse_statement()));

// SELECT grp, SUM(val) FROM having_test GROUP BY grp HAVING SUM(val) > 10
auto res = exec.execute(
*Parser(std::make_unique<Lexer>(
"SELECT grp, SUM(val) FROM having_test GROUP BY grp HAVING SUM(val) > 10"))
.parse_statement());

EXPECT_TRUE(res.success());
ASSERT_EQ(res.row_count(), 1U); // Only group 1 should pass (sum=30)
EXPECT_STREQ(res.rows()[0].get(0).to_string().c_str(), "1");
static_cast<void>(std::remove("./test_data/having_test.heap"));
}

TEST(OperatorTests, AggregateTypes) {
static_cast<void>(std::remove("./test_data/agg_types.heap"));
StorageManager disk_manager("./test_data");
BufferPoolManager sm(128, disk_manager);
auto catalog = Catalog::create();
LockManager lm;
TransactionManager tm(lm, *catalog, sm, nullptr);
QueryExecutor exec(*catalog, sm, lm, tm);

static_cast<void>(exec.execute(
*Parser(std::make_unique<Lexer>("CREATE TABLE agg_types (val DOUBLE)")).parse_statement()));
static_cast<void>(exec.execute(
*Parser(std::make_unique<Lexer>("INSERT INTO agg_types VALUES (10.0), (20.0), (30.0)"))
.parse_statement()));

auto res = exec.execute(
*Parser(std::make_unique<Lexer>(
"SELECT MIN(val), MAX(val), AVG(val), SUM(val), COUNT(val) FROM agg_types"))
.parse_statement());
EXPECT_TRUE(res.success());
ASSERT_EQ(res.row_count(), 1U);
EXPECT_DOUBLE_EQ(res.rows()[0].get(0).to_float64(), 10.0);
EXPECT_DOUBLE_EQ(res.rows()[0].get(1).to_float64(), 30.0);
EXPECT_DOUBLE_EQ(res.rows()[0].get(2).to_float64(), 20.0);
EXPECT_DOUBLE_EQ(res.rows()[0].get(3).to_float64(), 60.0);
EXPECT_EQ(res.rows()[0].get(4).to_int64(), 3);
static_cast<void>(std::remove("./test_data/agg_types.heap"));
}

TEST(OperatorTests, LimitOffset) {
static_cast<void>(std::remove("./test_data/lim_off.heap"));
StorageManager disk_manager("./test_data");
BufferPoolManager sm(128, disk_manager);
auto catalog = Catalog::create();
LockManager lm;
TransactionManager tm(lm, *catalog, sm, nullptr);
QueryExecutor exec(*catalog, sm, lm, tm);

static_cast<void>(exec.execute(
*Parser(std::make_unique<Lexer>("CREATE TABLE lim_off (val INT)")).parse_statement()));
static_cast<void>(exec.execute(
*Parser(std::make_unique<Lexer>("INSERT INTO lim_off VALUES (1), (2), (3), (4), (5)"))
.parse_statement()));

auto res = exec.execute(
*Parser(std::make_unique<Lexer>("SELECT val FROM lim_off ORDER BY val LIMIT 2 OFFSET 2"))
.parse_statement());
EXPECT_TRUE(res.success());
ASSERT_EQ(res.row_count(), 2U);
EXPECT_EQ(res.rows()[0].get(0).to_int64(), 3);
EXPECT_EQ(res.rows()[1].get(0).to_int64(), 4);
static_cast<void>(std::remove("./test_data/lim_off.heap"));
}

} // namespace
Loading