From 2437944da5ae29fd7b7f62c0ac410a50cd31b290 Mon Sep 17 00:00:00 2001 From: Kira Sotnikov Date: Fri, 11 Apr 2025 10:00:58 +0300 Subject: [PATCH] Cli tool to read WT files --- Cargo.toml | 3 +- src/lib.rs | 6 +-- src/page/data.rs | 2 +- src/page/mod.rs | 2 +- src/page/util.rs | 7 +++ tools/create-data-file/Cargo.toml | 2 + tools/create-data-file/src/main.rs | 44 +++++++++++-------- tools/dump-data-file/Cargo.toml | 2 + tools/dump-data-file/src/main.rs | 68 ++++++++++++++++-------------- 9 files changed, 81 insertions(+), 55 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index bd0f79d..c12eafe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,5 +18,6 @@ derive_more = { version = "1.0.0", features = ["from", "error", "display", "into rkyv = { version = "0.8.9", features = ["uuid-1"] } lockfree = "0.5.1" uuid = { version = "1.11.0", features = ["v4"] } -indexset = { version = "0.11.2", features = ["concurrent", "cdc", "multimap"] } +indexset = { version = "0.11.3", features = ["concurrent", "cdc", "multimap"] } + tokio = { version = "1", features = ["full"] } diff --git a/src/lib.rs b/src/lib.rs index e0d2713..3c9cc19 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,9 +12,9 @@ pub use data_bucket_codegen::SizeMeasure; pub use page::{ get_index_page_size_from_data_length, map_data_pages_to_general, parse_data_page, parse_general_header_by_index, parse_page, persist_page, seek_by_link, seek_to_page_start, - update_at, DataPage, GeneralHeader, GeneralPage, IndexPage, IndexValue, Interval, PageType, - SpaceInfoPage, TableOfContentsPage, DATA_VERSION, GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, - PAGE_SIZE, + update_at, DataPage, GeneralHeader, GeneralPage, IndexPage, IndexValue, Interval, PageId, + PageType, SpaceInfoPage, TableOfContentsPage, DATA_VERSION, GENERAL_HEADER_SIZE, + INNER_PAGE_SIZE, PAGE_SIZE, }; pub use persistence::{PersistableIndex, PersistableTable}; pub use space::Id as SpaceId; diff --git a/src/page/data.rs b/src/page/data.rs index 3d948c4..8ac382b 100644 --- a/src/page/data.rs +++ b/src/page/data.rs @@ -2,7 +2,7 @@ use crate::Link; use crate::Persistable; use eyre::{eyre, Result}; -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct DataPage { pub length: u32, pub data: [u8; DATA_LENGTH], diff --git a/src/page/mod.rs b/src/page/mod.rs index cca1aa4..42c4431 100644 --- a/src/page/mod.rs +++ b/src/page/mod.rs @@ -68,7 +68,7 @@ pub const INNER_PAGE_SIZE: usize = PAGE_SIZE - GENERAL_HEADER_SIZE; Serialize, SizeMeasure, )] -pub struct PageId(u32); +pub struct PageId(pub u32); impl PageId { pub fn next(self) -> Self { diff --git a/src/page/util.rs b/src/page/util.rs index c9ba277..b182f1f 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -211,13 +211,20 @@ pub async fn parse_space_info( file.seek(SeekFrom::Start(0)).await?; let header = parse_general_header(file).await?; + println!("Header {:?}", header); + let mut buffer = vec![0u8; header.data_length as usize]; + + println!("bufffer {:?}", buffer); + file.read_exact(&mut buffer).await?; let archived = unsafe { rkyv::access_unchecked::<::Archived>(&buffer[..]) }; let space_info: SpaceInfoPage = rkyv::deserialize::<_, rkyv::rancor::Error>(archived).expect("data should be valid"); + println!("Space Info {:?}", space_info); + Ok(space_info) } diff --git a/tools/create-data-file/Cargo.toml b/tools/create-data-file/Cargo.toml index f873007..797e441 100644 --- a/tools/create-data-file/Cargo.toml +++ b/tools/create-data-file/Cargo.toml @@ -8,4 +8,6 @@ clap = { version = "4.5.21", features = ["derive"] } data_bucket = { path = "../.." } eyre = "0.6.12" rkyv = { version = "0.8.9", features = ["uuid-1"]} +tokio = { version = "1", features = ["full"] } + diff --git a/tools/create-data-file/src/main.rs b/tools/create-data-file/src/main.rs index 4c2637d..af78fb4 100644 --- a/tools/create-data-file/src/main.rs +++ b/tools/create-data-file/src/main.rs @@ -1,12 +1,11 @@ use clap::Parser; use data_bucket::{persist_page, GeneralHeader, GeneralPage, PageType, DATA_VERSION}; -use data_bucket::{IndexData, IndexValue, Interval, Link, SpaceInfoData}; +use data_bucket::{IndexPage, IndexValue, Link, SpaceInfoPage}; +use tokio::fs::File; + use rkyv::rancor::Error; use rkyv::{Archive, Deserialize, Serialize}; -use std::{ - fs::{remove_file, File}, - str, -}; +use std::{fs::remove_file, str}; #[derive(Parser, Debug)] struct Args { @@ -16,10 +15,11 @@ struct Args { count: usize, } -fn main() -> eyre::Result<()> { +#[tokio::main] +async fn main() -> eyre::Result<()> { let args = Args::parse(); _ = remove_file(args.filename.as_str()); - let mut output_file = File::create(args.filename.as_str())?; + let mut output_file = File::create(args.filename.as_str()).await?; let space_info_header = GeneralHeader { data_version: DATA_VERSION, @@ -31,7 +31,7 @@ fn main() -> eyre::Result<()> { data_length: 0u32, }; - let space_info = SpaceInfoData { + let space_info = SpaceInfoPage { id: 1.into(), page_count: 4, name: "generated space".to_owned(), @@ -40,19 +40,18 @@ fn main() -> eyre::Result<()> { ("attr".to_string(), "String".to_string()), ], primary_key_fields: vec!["val".to_string()], - primary_key_intervals: vec![Interval(1, 1)], - secondary_index_types: vec![], - secondary_index_intervals: Default::default(), - data_intervals: vec![], pk_gen_state: (), empty_links_list: vec![], + secondary_index_types: vec![], }; let mut space_info_page = GeneralPage { header: space_info_header, inner: space_info, }; - persist_page(&mut space_info_page, &mut output_file).unwrap(); + persist_page(&mut space_info_page, &mut output_file) + .await + .unwrap(); let index_header = GeneralHeader { data_version: DATA_VERSION, @@ -82,7 +81,9 @@ fn main() -> eyre::Result<()> { let end = usize::min(start + page_size, args.count); let (mut data_page, offsets) = generate_data_page(start as i32, end - start, data_header); - persist_page(&mut data_page, &mut output_file).unwrap(); + persist_page(&mut data_page, &mut output_file) + .await + .unwrap(); let index_data = create_index_data(&data_page, &offsets); @@ -90,7 +91,9 @@ fn main() -> eyre::Result<()> { header: index_header, inner: index_data, }; - persist_page(&mut index_page, &mut output_file).unwrap(); + persist_page(&mut index_page, &mut output_file) + .await + .unwrap(); } Ok(()) @@ -134,7 +137,7 @@ pub fn generate_data_page( ) } -fn create_index_data(page: &GeneralPage>, offsets: &[(i32, u32, u32)]) -> IndexData { +fn create_index_data(page: &GeneralPage>, offsets: &[(i32, u32, u32)]) -> IndexPage { let index_values = offsets .iter() .map(|(key, offset, length)| IndexValue:: { @@ -147,5 +150,12 @@ fn create_index_data(page: &GeneralPage>, offsets: &[(i32, u32, u32)]) - }) .collect(); - IndexData { index_values } + IndexPage { + current_index: 0, + size: 1024, + node_id: 0, + current_length: 1024, + slots: vec![], + index_values: index_values, + } } diff --git a/tools/dump-data-file/Cargo.toml b/tools/dump-data-file/Cargo.toml index 26b6d3d..48f77c2 100644 --- a/tools/dump-data-file/Cargo.toml +++ b/tools/dump-data-file/Cargo.toml @@ -7,3 +7,5 @@ edition = "2021" clap = { version = "4.5.21", features = ["derive"] } data_bucket = { path = "../.." } eyre = "0.6.12" +tokio = { version = "1", features = ["full"] } + diff --git a/tools/dump-data-file/src/main.rs b/tools/dump-data-file/src/main.rs index ec8e7c2..e151527 100644 --- a/tools/dump-data-file/src/main.rs +++ b/tools/dump-data-file/src/main.rs @@ -1,10 +1,11 @@ use clap::Parser; use data_bucket::{ - page::{parse_space_info, DataIterator, LinksIterator, PageIterator}, - persistence::data::DataTypeValue, - read_data_pages, PAGE_SIZE, + page::parse_space_info, parse_data_page, parse_general_header_by_index, + persistence::data::DataTypeValue, space, PAGE_SIZE, }; -use std::{fs::File, str}; +use data_bucket::{parse_page, GeneralPage, SpaceInfoPage}; +use std::str; +use tokio::fs::File; #[derive(Parser, Debug)] struct Args { @@ -65,39 +66,42 @@ fn format_table(header: &Vec, rows: &Vec>) { print_horizontal_cells_delimiters(&column_widths[..]); } -fn main() -> eyre::Result<()> { +#[tokio::main] +async fn main() -> eyre::Result<()> { let args = Args::parse(); - let mut file = File::open(args.filename)?; + let mut file = File::open(args.filename).await.unwrap(); - let space_info = parse_space_info::(&mut file)?; - let row_schema = space_info.row_schema.clone(); + println!("{:?}", file.metadata().await); - let mut rows: Vec> = vec![]; + let space_info = parse_space_info::(&mut file).await; - let pages = PageIterator::new(space_info.primary_key_intervals.clone()); - for page in pages { - let links = LinksIterator::new(&mut file, page, &space_info).collect::>(); - for row in DataIterator::new(&mut file, row_schema.clone(), links) { - rows.push(row); - } - } + let info = parse_general_header_by_index(&mut file, 0).await; + let info2 = parse_data_page::(&mut file, 0).await; + + let space_info2 = parse_page::, { PAGE_SIZE as u32 }>(&mut file, 0) + .await + .unwrap(); + + //let t1 = space_info.header; + + let _rows: Vec> = vec![]; + + println!("1{:?}", space_info); + println!("2{:?}", space_info2); + + println!("Info {:?}", info); + //println!("Info {:?}", info2); + // println!("Head er {:?}", t1); + + //let pages = PageIterator::new(space_info.unwrap().primary_key_fields.clone()); + // for page in pages { + //let links = LinksIterator::new(&mut file, page, &space_info).collect::>(); + // for row in DataIterator::new(&mut file, row_schema.clone(), links) { + // rows.push(row); + // } + //} - let rows: Vec> = read_data_pages::(&mut file)?; - - let header: Vec = row_schema - .iter() - .map(|(column, _data_type)| column.to_owned()) - .collect(); - let rows: Vec> = rows - .iter() - .map(|row| { - row.iter() - .map(|column| column.to_string()) - .collect::>() - }) - .collect(); - - format_table(&header, &rows); + //let rows: Vec> = read_data_pages::(&mut file)?; Ok(()) }