Skip to content

Commit 8d5c150

Browse files
committed
Add new features for 0.3.0, improve hash handling
1 parent 10e8257 commit 8d5c150

File tree

9 files changed

+148
-27
lines changed

9 files changed

+148
-27
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
**/__pycache__/
2+
.venv/
3+
**/build/

libbbf/__init__.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# libbbf/__init__.py
2+
from ._bbf import BBFReader, BBFBuilder
3+
4+
from .exceptions import (
5+
BBFError,
6+
BBFNotFoundError,
7+
BBFInvalidFormatError,
8+
BBFCorruptionError
9+
)
10+
11+
__all__ = [
12+
"BBFReader",
13+
"BBFBuilder",
14+
# Export exceptions
15+
"BBFError",
16+
"BBFNotFoundError",
17+
"BBFInvalidFormatError",
18+
"BBFCorruptionError"
19+
]

libbbf/exceptions.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# libbbf/exceptions.py
2+
3+
class BBFError(Exception):
4+
"""Base class for all exceptions in the libbbf library."""
5+
pass
6+
7+
class BBFNotFoundError(BBFError, FileNotFoundError):
8+
"""Raised when the BBF file cannot be found."""
9+
pass
10+
11+
class BBFInvalidFormatError(BBFError):
12+
"""Raised when the file exists but isn't a valid BBF (wrong magic/header)."""
13+
pass
14+
15+
class BBFCorruptionError(BBFError):
16+
"""Raised when a hash check fails."""
17+
def __init__(self, message: str, asset_index: int = -1):
18+
super().__init__(message)
19+
self.asset_index = asset_index

libbbf_tools/__init__.py

Whitespace-only changes.

setup.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
# Define the C++ extension
2222
ext_modules = [
2323
Pybind11Extension(
24-
"libbbf",
24+
"libbbf._bbf",
2525
[
2626
"src/bindings.cpp",
2727
"src/libbbf.cpp",
@@ -34,7 +34,7 @@
3434

3535
setup(
3636
name="libbbf",
37-
version="0.2.13",
37+
version="0.3.0",
3838
author="EF1500",
3939
author_email="rosemilovelockofficial@proton.me",
4040
description="Bound Book Format (BBF) tools and bindings",
@@ -48,8 +48,8 @@
4848

4949
entry_points={
5050
"console_scripts": [
51-
"cbx2bbf=libbbf_tools.cbx2bbf:main",
52-
"bbf2cbx=libbbf_tools.bbf2cbx:main",
51+
"cbx2bbf=libbbf.cbx2bbf:main",
52+
"bbf2cbx=libbbf.bbf2cbx:main",
5353
],
5454
},
5555

src/bbf_reader.h

Lines changed: 84 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,8 @@ struct MemoryMappedFile {
7676
~MemoryMappedFile() { unmap(); }
7777
};
7878

79-
class BBFReader {
79+
class BBFReader
80+
{
8081
private:
8182
const char* data_ptr = nullptr;
8283
const BBFSection* sections_ = nullptr;
@@ -92,7 +93,8 @@ class BBFReader {
9293
MemoryMappedFile mmap;
9394
bool isValid = false;
9495

95-
BBFReader(const std::string& path) {
96+
BBFReader(const std::string& path)
97+
{
9698
if (!mmap.map(path)) return;
9799
data_ptr = static_cast<const char*>(mmap.data);
98100

@@ -121,24 +123,31 @@ class BBFReader {
121123
isValid = true;
122124
}
123125

124-
std::string_view getStringView(uint32_t offset) const {
126+
// Get string view
127+
std::string_view getStringView(uint32_t offset) const
128+
{
125129
if (offset >= stringPoolSize_) return {};
126130
return std::string_view(stringPool_ + offset);
127131
}
128132

129-
struct PySection {
133+
struct PySection
134+
{
130135
std::string title;
131136
uint32_t startPage;
132137
uint32_t parent;
133138
};
134139

135-
std::vector<PySection> getSections() const {
140+
// Get sections
141+
std::vector<PySection> getSections() const
142+
{
136143
std::vector<PySection> result;
137144
if (!isValid) return result;
138145

139146
result.reserve(footer.sectionCount);
140-
for (uint32_t i = 0; i < footer.sectionCount; i++) {
141-
result.push_back({
147+
for (uint32_t i = 0; i < footer.sectionCount; i++)
148+
{
149+
result.push_back(
150+
{
142151
std::string(getStringView(sections_[i].sectionTitleOffset)),
143152
sections_[i].sectionStartIndex,
144153
sections_[i].parentSectionIndex
@@ -147,12 +156,15 @@ class BBFReader {
147156
return result;
148157
}
149158

150-
std::vector<std::pair<std::string, std::string>> getMetadata() const {
159+
// Get metadata
160+
std::vector<std::pair<std::string, std::string>> getMetadata() const
161+
{
151162
std::vector<std::pair<std::string, std::string>> result;
152163
if (!isValid) return result;
153164

154165
result.reserve(footer.keyCount);
155-
for (uint32_t i = 0; i < footer.keyCount; i++) {
166+
for (uint32_t i = 0; i < footer.keyCount; i++)
167+
{
156168
result.emplace_back(
157169
getStringView(meta_[i].keyOffset),
158170
getStringView(meta_[i].valOffset)
@@ -161,7 +173,9 @@ class BBFReader {
161173
return result;
162174
}
163175

164-
std::pair<const char*, size_t> getPageRaw(uint32_t pageIndex) const {
176+
// Get page raw
177+
std::pair<const char*, size_t> getPageRaw(uint32_t pageIndex) const
178+
{
165179
if (!isValid || pageIndex >= footer.pageCount) return {nullptr, 0};
166180

167181
const auto& asset = assets_[pages_[pageIndex].assetIndex];
@@ -171,7 +185,36 @@ class BBFReader {
171185
return { data_ptr + asset.offset, static_cast<size_t>(asset.length) };
172186
}
173187

174-
std::map<std::string, uint64_t> getPageInfo(uint32_t pageIndex) const {
188+
// Get footer info
189+
std::map<std::string, uint64_t> getFooterInfo() const
190+
{
191+
// If invalid, return empty set.
192+
if (!isValid) return {};
193+
194+
// Otherwise return all this juicy information!
195+
return
196+
{
197+
{"stringPoolOffset", footer.stringPoolOffset},
198+
{"assetTableOffset", footer.assetTableOffset},
199+
{"assetCount", static_cast<uint64_t>(footer.assetCount)},
200+
201+
{"pageTableOffset", footer.pageTableOffset},
202+
{"pageCount", static_cast<uint64_t>(footer.pageCount)},
203+
204+
{"sectionTableOffset", footer.sectionTableOffset},
205+
{"sectionCount", footer.sectionCount},
206+
207+
{"metaTableOffset", footer.metaTableOffset},
208+
{"keyCount", static_cast<uint64_t>(footer.keyCount)},
209+
210+
{"extraOffset", footer.extraOffset},
211+
{"indexHash", footer.indexHash}
212+
};
213+
}
214+
215+
// Get page info
216+
std::map<std::string, uint64_t> getPageInfo(uint32_t pageIndex) const
217+
{
175218
if (!isValid || pageIndex >= footer.pageCount) return {};
176219

177220
const auto& asset = assets_[pages_[pageIndex].assetIndex];
@@ -180,12 +223,29 @@ class BBFReader {
180223
{"offset", asset.offset},
181224
{"hash", asset.xxh3Hash},
182225
{"type", asset.type},
226+
{"flags", asset.flags}, // Add flags
183227
{"decodedLength", asset.decodedLength} // ADDED: v1.1 Spec
184228
};
185229
}
186230

231+
// verify a specific page
232+
std::map<uint64_t, bool> verifyPage(uint32_t pageIndex)
233+
{
234+
if (!isValid || pageIndex >= footer.pageCount) return {};
235+
// Get page
236+
const auto& asset = assets_[pages_[pageIndex].assetIndex];
237+
// Check mmap length
238+
if (asset.offset + asset.length > mmap.size) return {};
239+
// hash
240+
uint64_t xxhHash = XXH3_64bits((const uint8_t*)data_ptr + asset.offset, asset.length);
241+
242+
bool match = (xxhHash == asset.xxh3Hash);
243+
return {{xxhHash, match}};
244+
}
245+
187246
// Returns -1 for Success, -2 for Directory Error, or >=0 for Asset Index Error
188-
int64_t verify() const {
247+
int64_t verify() const
248+
{
189249
if (!isValid) return -2;
190250

191251
// 1. Directory Hash Check
@@ -200,13 +260,16 @@ class BBFReader {
200260
size_t max_size = mmap.size;
201261

202262
// Lambda returns -1 if OK, or the index if Bad
203-
auto verifyRange = [local_assets, local_data, max_size](size_t start, size_t end) -> int64_t {
204-
for (size_t i = start; i < end; ++i) {
263+
auto verifyRange = [local_assets, local_data, max_size](size_t start, size_t end) -> int64_t
264+
{
265+
for (size_t i = start; i < end; ++i)
266+
{
205267
const auto& a = local_assets[i];
206268
// Bounds check before hash
207269
if (a.offset + a.length > max_size) return (int64_t)i;
208270

209-
if (XXH3_64bits((const uint8_t*)local_data + a.offset, a.length) != a.xxh3Hash) {
271+
if (XXH3_64bits((const uint8_t*)local_data + a.offset, a.length) != a.xxh3Hash)
272+
{
210273
return (int64_t)i; // Return the corrupted index
211274
}
212275
}
@@ -216,22 +279,25 @@ class BBFReader {
216279
size_t numThreads = std::thread::hardware_concurrency();
217280
if (numThreads == 0) numThreads = 1;
218281

219-
if (count < 128 || numThreads == 1) {
282+
if (count < 128 || numThreads == 1)
283+
{
220284
return verifyRange(0, count);
221285
}
222286

223287
size_t chunkSize = count / numThreads;
224288
std::vector<std::future<int64_t>> futures; // Changed from bool to int64_t
225289
futures.reserve(numThreads);
226290

227-
for (size_t i = 0; i < numThreads; ++i) {
291+
for (size_t i = 0; i < numThreads; ++i)
292+
{
228293
size_t start = i * chunkSize;
229294
size_t end = (i == numThreads - 1) ? count : start + chunkSize;
230295
futures.push_back(std::async(std::launch::async, verifyRange, start, end));
231296
}
232297

233298
// Check results
234-
for (auto& f : futures) {
299+
for (auto& f : futures)
300+
{
235301
int64_t result = f.get();
236302
if (result != -1) return result; // Bubble up the error index
237303
}

src/bindings.cpp

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
namespace py = pybind11;
77

8-
PYBIND11_MODULE(libbbf, m) {
8+
PYBIND11_MODULE(_bbf, m) {
99
m.doc() = "Bound Book Format (BBF) Python Bindings";
1010

1111
// --- BBFBuilder (Writer) ---
@@ -26,15 +26,24 @@ PYBIND11_MODULE(libbbf, m) {
2626
py::class_<BBFReader>(m, "BBFReader")
2727
.def(py::init<const std::string &>())
2828
.def_readonly("is_valid", &BBFReader::isValid)
29-
// We don't need to expose footer struct directly unless you wrote a binding for BBFFooter
29+
.def("close", [](BBFReader& r)
30+
{
31+
r.mmap.unmap();
32+
r.isValid = false; // Prevent further reads
33+
}, "Unmaps the file immediately.")
34+
3035
.def("get_page_count", [](BBFReader& r) { return r.footer.pageCount; })
3136
.def("get_asset_count", [](BBFReader& r) { return r.footer.assetCount; })
3237

3338
.def("verify", &BBFReader::verify,
3439
py::call_guard<py::gil_scoped_release>(),
3540
"Verify integrity. Returns: -1 (Success), -2 (Directory Fail), or >=0 (Index of corrupt asset).")
41+
42+
.def("verify_page", &BBFReader::verifyPage,
43+
"Verify a single page, returns a dict <uint64_t, bool>, {calculated hash, match?}.")
3644

37-
.def("get_sections", [](BBFReader& r) {
45+
.def("get_sections", [](BBFReader& r)
46+
{
3847
py::list result;
3948
const auto sections = r.getSections();
4049
for (const auto& s : sections) {
@@ -47,19 +56,24 @@ PYBIND11_MODULE(libbbf, m) {
4756
return result;
4857
}, "Returns sections as [{'title': str, 'startPage': int, 'parent': int}]")
4958

59+
.def("get_footer", &BBFReader::getFooterInfo,
60+
"Returns a dict representing the footer.")
61+
5062
.def("get_metadata", &BBFReader::getMetadata,
5163
"Returns a list of (Key, Value) tuples.")
5264

5365
.def("get_page_info", &BBFReader::getPageInfo,
5466
"Returns dict with keys: length, offset, hash, type, decodedLength")
5567

56-
.def("get_page_data", [](BBFReader& r, uint32_t idx) {
68+
.def("get_page_data", [](BBFReader& r, uint32_t idx)
69+
{
5770
auto raw = r.getPageRaw(idx);
5871
if (!raw.first) return py::bytes("");
5972
return py::bytes(raw.first, raw.second);
6073
}, "Returns the raw bytes of the page asset (1-Copy).")
6174

62-
.def("get_page_view", [](BBFReader& r, uint32_t idx) {
75+
.def("get_page_view", [](BBFReader& r, uint32_t idx)
76+
{
6377
auto raw = r.getPageRaw(idx);
6478
if (!raw.first) return py::memoryview(py::bytes(""));
6579

0 commit comments

Comments
 (0)