From 38eba34271eed17d13107bdfc2acbbccb3851d75 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Thu, 5 Dec 2024 15:00:39 +0300 Subject: [PATCH 01/41] Add `row_schema` field to `SpaceInfo` --- src/page/space_info.rs | 2 ++ src/page/util.rs | 1 + 2 files changed, 3 insertions(+) diff --git a/src/page/space_info.rs b/src/page/space_info.rs index 6d10e8a..8b76e22 100644 --- a/src/page/space_info.rs +++ b/src/page/space_info.rs @@ -20,6 +20,7 @@ pub struct SpaceInfo { pub id: space::Id, pub page_count: u32, pub name: SpaceName, + pub row_schema: Vec<(String, DataType)>, pub primary_key_intervals: Vec, pub secondary_index_intervals: HashMap>, pub data_intervals: Vec, @@ -60,6 +61,7 @@ mod test { id: 0.into(), page_count: 0, name: "Test".to_string(), + row_schema: vec![], primary_key_intervals: vec![], secondary_index_intervals: HashMap::new(), data_intervals: vec![], diff --git a/src/page/util.rs b/src/page/util.rs index 2f725ad..2f1cfc2 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -295,6 +295,7 @@ mod test { id: 0.into(), page_count: 0, name: "Test".to_string(), + row_schema: vec![], primary_key_intervals: vec![], secondary_index_intervals: HashMap::from([( "string_index".to_owned(), From 927c95f30d5633f911b2d7a84f1c441fc4647a38 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Mon, 9 Dec 2024 14:54:25 +0300 Subject: [PATCH 02/41] WIP reading of fields --- src/page/util.rs | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/src/page/util.rs b/src/page/util.rs index 2f1cfc2..41ab3b2 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -3,11 +3,12 @@ use std::io::prelude::*; use eyre::eyre; use rkyv::{Archive, Deserialize}; +use scc::HashMap; use crate::page::header::GeneralHeader; use crate::page::ty::PageType; use crate::page::General; -use crate::{DataPage, GeneralPage, IndexData, Persistable, GENERAL_HEADER_SIZE, PAGE_SIZE}; +use crate::{space, DataPage, DataType, GeneralPage, IndexData, Persistable, GENERAL_HEADER_SIZE, PAGE_SIZE}; use super::{Interval, SpaceInfo}; @@ -219,6 +220,36 @@ where Ok(result) } +fn parse_row(data_page: ) + +fn read_data_pages_from_space(file: &mut std::fs::File) -> eyre::Result>> { + let space_info = parse_space_info::(file)?; + let mut result: Vec> = vec![]; + for interval in space_info.data_intervals.iter() { + for index in interval.0 .. interval.1 { + let data_page = parse_data_page(file, index)?; + + file.seek(io::SeekFrom::Start(PAGE_SIZE * index))?; + for column in space_info.row_schema { + let value = match column.1 { + DataType::String => { + todo!() + }, + DataType::Integer => { + todo!() + }, + DataType::Float => { + todo!() + } + }; + } + + let row: HashMap = parse_binary_row(&data_page, &row_schema); + } + } + todo!() +} + #[cfg(test)] mod test { use std::collections::HashMap; From b8c9124fe73097da22a15b0536b75398d1b82188 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Mon, 9 Dec 2024 20:16:53 +0300 Subject: [PATCH 03/41] Store column data types as strings --- src/lib.rs | 2 +- src/page/data_type.rs | 8 ------ src/page/mod.rs | 2 -- src/page/space_info.rs | 5 ++-- src/page/util.rs | 62 ++++++++++++++++++++---------------------- 5 files changed, 33 insertions(+), 46 deletions(-) delete mode 100644 src/page/data_type.rs diff --git a/src/lib.rs b/src/lib.rs index 0cdc8e2..3a19f42 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,7 +10,7 @@ pub use data_bucket_codegen::SizeMeasure; pub use page::{ map_data_pages_to_general, map_index_pages_to_general, map_tree_index, map_unique_tree_index, parse_data_page, parse_index_page, parse_page, persist_page, read_index_pages, - Data as DataPage, DataType, General as GeneralPage, GeneralHeader, IndexPage as IndexData, + Data as DataPage, General as GeneralPage, GeneralHeader, IndexPage as IndexData, Interval, PageType, SpaceInfo as SpaceInfoData, DATA_VERSION, GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, PAGE_SIZE, }; diff --git a/src/page/data_type.rs b/src/page/data_type.rs deleted file mode 100644 index c70ac1a..0000000 --- a/src/page/data_type.rs +++ /dev/null @@ -1,8 +0,0 @@ -use rkyv::{Archive, Deserialize, Serialize}; - -#[derive(Archive, Clone, Deserialize, Debug, PartialEq, Serialize)] -pub enum DataType { - String = 0, - Integer = 1, // 64-bit integer - Float = 2, // 64-bit float -} diff --git a/src/page/mod.rs b/src/page/mod.rs index 2d61d69..62365db 100644 --- a/src/page/mod.rs +++ b/src/page/mod.rs @@ -1,5 +1,4 @@ mod data; -mod data_type; mod header; mod index; mod space_info; @@ -10,7 +9,6 @@ use derive_more::{Display, From}; use rkyv::{Archive, Deserialize, Serialize}; pub use data::Data; -pub use data_type::DataType; pub use header::{GeneralHeader, DATA_VERSION}; pub use index::{map_tree_index, map_unique_tree_index, IndexPage}; pub use space_info::{Interval, SpaceInfo}; diff --git a/src/page/space_info.rs b/src/page/space_info.rs index 8b76e22..62607d7 100644 --- a/src/page/space_info.rs +++ b/src/page/space_info.rs @@ -6,7 +6,6 @@ use rkyv::{Archive, Deserialize, Serialize}; use crate::page::INNER_PAGE_SIZE; use crate::util::Persistable; -use crate::DataType; use crate::{space, Link}; pub type SpaceName = String; @@ -20,13 +19,13 @@ pub struct SpaceInfo { pub id: space::Id, pub page_count: u32, pub name: SpaceName, - pub row_schema: Vec<(String, DataType)>, + pub row_schema: Vec<(String, String)>, pub primary_key_intervals: Vec, pub secondary_index_intervals: HashMap>, pub data_intervals: Vec, pub pk_gen_state: Pk, pub empty_links_list: Vec, - pub secondary_index_map: HashMap, + pub secondary_index_map: HashMap, } /// Represents some interval between values. diff --git a/src/page/util.rs b/src/page/util.rs index 41ab3b2..9055af4 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -8,7 +8,7 @@ use scc::HashMap; use crate::page::header::GeneralHeader; use crate::page::ty::PageType; use crate::page::General; -use crate::{space, DataPage, DataType, GeneralPage, IndexData, Persistable, GENERAL_HEADER_SIZE, PAGE_SIZE}; +use crate::{space, DataPage, GeneralPage, IndexData, Persistable, GENERAL_HEADER_SIZE, PAGE_SIZE}; use super::{Interval, SpaceInfo}; @@ -220,35 +220,33 @@ where Ok(result) } -fn parse_row(data_page: ) - -fn read_data_pages_from_space(file: &mut std::fs::File) -> eyre::Result>> { - let space_info = parse_space_info::(file)?; - let mut result: Vec> = vec![]; - for interval in space_info.data_intervals.iter() { - for index in interval.0 .. interval.1 { - let data_page = parse_data_page(file, index)?; - - file.seek(io::SeekFrom::Start(PAGE_SIZE * index))?; - for column in space_info.row_schema { - let value = match column.1 { - DataType::String => { - todo!() - }, - DataType::Integer => { - todo!() - }, - DataType::Float => { - todo!() - } - }; - } - - let row: HashMap = parse_binary_row(&data_page, &row_schema); - } - } - todo!() -} +// fn read_data_pages_from_space(file: &mut std::fs::File) -> eyre::Result>> { +// let space_info = parse_space_info::(file)?; +// let mut result: Vec> = vec![]; +// for interval in space_info.data_intervals.iter() { +// for index in interval.0 .. interval.1 { +// let data_page = parse_data_page(file, index)?; + +// file.seek(io::SeekFrom::Start(PAGE_SIZE * index))?; +// for column in space_info.row_schema { +// let value = match column.1 { +// DataType::String => { +// rkyv::from_bytes_unchecked(bytes) +// }, +// DataType::Integer => { +// todo!() +// }, +// DataType::Float => { +// todo!() +// } +// }; +// } + +// let row: HashMap = parse_binary_row(&data_page, &row_schema); +// } +// } +// todo!() +// } #[cfg(test)] mod test { @@ -260,7 +258,7 @@ mod test { use crate::page::index::IndexValue; use crate::page::INNER_PAGE_SIZE; use crate::{ - map_index_pages_to_general, map_unique_tree_index, DataType, GeneralHeader, GeneralPage, + map_index_pages_to_general, map_unique_tree_index, GeneralHeader, GeneralPage, IndexData, Interval, Link, PageType, SpaceInfoData, DATA_VERSION, PAGE_SIZE, }; @@ -335,7 +333,7 @@ mod test { data_intervals: vec![], pk_gen_state: (), empty_links_list: vec![], - secondary_index_map: HashMap::from([("string_index".to_owned(), DataType::String)]), + secondary_index_map: HashMap::from([("string_index".to_string(), "String".to_string())]), }; let space_info_page = GeneralPage { header: space_info_header, From 582a2acc3596cec81b3dcd868513025d462a8c97 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Tue, 10 Dec 2024 12:33:56 +0300 Subject: [PATCH 04/41] Rename `secondary_index_map` to `secondary_index_types` --- src/page/space_info.rs | 4 ++-- src/page/util.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/page/space_info.rs b/src/page/space_info.rs index 62607d7..79d49bd 100644 --- a/src/page/space_info.rs +++ b/src/page/space_info.rs @@ -25,7 +25,7 @@ pub struct SpaceInfo { pub data_intervals: Vec, pub pk_gen_state: Pk, pub empty_links_list: Vec, - pub secondary_index_map: HashMap, + pub secondary_index_types: Vec<(String, String)>, } /// Represents some interval between values. @@ -66,7 +66,7 @@ mod test { data_intervals: vec![], pk_gen_state: (), empty_links_list: vec![], - secondary_index_map: HashMap::new(), + secondary_index_types: vec![], }; let bytes = info.as_bytes(); assert!(bytes.as_ref().len() < INNER_PAGE_SIZE) diff --git a/src/page/util.rs b/src/page/util.rs index 9055af4..d140062 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -333,7 +333,7 @@ mod test { data_intervals: vec![], pk_gen_state: (), empty_links_list: vec![], - secondary_index_map: HashMap::from([("string_index".to_string(), "String".to_string())]), + secondary_index_types: vec![("string_index".to_string(), "String".to_string())], }; let space_info_page = GeneralPage { header: space_info_header, From 858aaa78984b9ff7ba6f16af03b5970621c60e76 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Fri, 13 Dec 2024 16:14:13 +0300 Subject: [PATCH 05/41] Add a function that reads arbitrary archived structs --- src/util/rkyv_data.rs | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 src/util/rkyv_data.rs diff --git a/src/util/rkyv_data.rs b/src/util/rkyv_data.rs new file mode 100644 index 0000000..d3bb71d --- /dev/null +++ b/src/util/rkyv_data.rs @@ -0,0 +1,34 @@ +use rkyv::{primitive::ArchivedI32, string::ArchivedString}; + +pub fn parse_archived_row(buf: &[u8], columns: Vec<(String, String)>) -> Vec { + let data_length: usize = { + let mut accum: usize = 0; + for column in columns.iter() { + match column.1.as_str() { + "String" => accum += std::mem::size_of::(), + "i32" => accum += std::mem::size_of::(), + _ => panic!("Unknown data type {:?}", column.1), + } + } + accum + }; + + let mut data_pointer: *const u8 = unsafe { buf.as_ptr().add(buf.len()).sub(data_length) }; + let mut output: Vec = vec![]; + for column in columns.iter() { + match column.1.as_str() { + "String" => { + let archived_ptr: *const ArchivedString = data_pointer.cast(); + output.push(unsafe { (*archived_ptr).to_string() }); + data_pointer = unsafe { data_pointer.add(std::mem::size_of::()) }; + }, + "i32" => { + let archived_ptr: *const ArchivedI32 = data_pointer.cast(); + output.push(unsafe { (*archived_ptr).to_string() }); + data_pointer = unsafe { data_pointer.add(std::mem::size_of::()) }; + }, + _ => panic!("Unknown data type: {:?}", column.1), + } + } + output +} \ No newline at end of file From cff4ebf09f5f95c5202667c2cf416cf1c7a54f30 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Mon, 16 Dec 2024 16:01:42 +0300 Subject: [PATCH 06/41] Support more types and correct padding in `parse_archived_row` function --- src/util/mod.rs | 1 + src/util/rkyv_data.rs | 229 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 220 insertions(+), 10 deletions(-) diff --git a/src/util/mod.rs b/src/util/mod.rs index d343007..8e3da68 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,5 +1,6 @@ mod persistable; mod sized; +mod rkyv_data; pub use persistable::Persistable; pub use sized::{align, SizeMeasurable}; diff --git a/src/util/rkyv_data.rs b/src/util/rkyv_data.rs index d3bb71d..198e3f2 100644 --- a/src/util/rkyv_data.rs +++ b/src/util/rkyv_data.rs @@ -1,34 +1,243 @@ -use rkyv::{primitive::ArchivedI32, string::ArchivedString}; +use rkyv::{primitive::{ArchivedI128, ArchivedI16, ArchivedI32, ArchivedI64, ArchivedU128, ArchivedU16, ArchivedU32, ArchivedU64}, string::ArchivedString}; pub fn parse_archived_row(buf: &[u8], columns: Vec<(String, String)>) -> Vec { - let data_length: usize = { + let mut data_length: usize = { let mut accum: usize = 0; for column in columns.iter() { match column.1.as_str() { - "String" => accum += std::mem::size_of::(), - "i32" => accum += std::mem::size_of::(), + "String" => { + if accum % 4 != 0 { + accum += 4 - accum % 4; + } + accum += std::mem::size_of::(); + }, + + "i128" => { + if accum % std::mem::size_of::() != 0 { + accum += std::mem::size_of::() - accum % std::mem::size_of::(); + } + accum += std::mem::size_of::(); + }, + "i64" => { + if accum % std::mem::size_of::() != 0 { + accum += std::mem::size_of::() - accum % std::mem::size_of::(); + } + accum += std::mem::size_of::(); + } + "i32" => { + if accum % std::mem::size_of::() != 0 { + accum += std::mem::size_of::() - accum % std::mem::size_of::(); + } + accum += std::mem::size_of::(); + } + "i16" => { + if accum % std::mem::size_of::() != 0 { + accum += std::mem::size_of::() - accum % std::mem::size_of::(); + } + accum += std::mem::size_of::(); + } + "i8" => accum += std::mem::size_of::(), + + "u128" => { + if accum % std::mem::size_of::() != 0 { + accum += std::mem::size_of::() - accum % std::mem::size_of::(); + } + accum += std::mem::size_of::(); + } + "u64" => { + if accum % std::mem::size_of::() != 0 { + accum += std::mem::size_of::() - accum % std::mem::size_of::(); + } + accum += std::mem::size_of::(); + } + "u32" => { + if accum % std::mem::size_of::() != 0 { + accum += std::mem::size_of::() - accum % std::mem::size_of::(); + } + accum += std::mem::size_of::(); + } + "u16" => { + if accum % std::mem::size_of::() != 0 { + accum += std::mem::size_of::() - accum % std::mem::size_of::(); + } + accum += std::mem::size_of::(); + } + "u8" => accum += std::mem::size_of::(), + _ => panic!("Unknown data type {:?}", column.1), } } accum }; + if data_length % 4 != 0 { + data_length += 4 - data_length % 4; + } - let mut data_pointer: *const u8 = unsafe { buf.as_ptr().add(buf.len()).sub(data_length) }; + let start_pointer = unsafe { buf.as_ptr().add(buf.len()).sub(data_length) }; + let mut current_pointer = start_pointer; let mut output: Vec = vec![]; for column in columns.iter() { match column.1.as_str() { "String" => { - let archived_ptr: *const ArchivedString = data_pointer.cast(); + if unsafe { current_pointer.byte_offset_from(start_pointer) } % 4 != 0 { + current_pointer = unsafe { current_pointer.add((4 - current_pointer.byte_offset_from(start_pointer) % 4) as usize) }; + } + let archived_ptr: *const ArchivedString = current_pointer.cast(); + output.push(unsafe { (*archived_ptr).to_string() }); + current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; + }, + + "i128" => { + if unsafe { current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize != 0 } { + current_pointer = unsafe { current_pointer.add( + (std::mem::size_of::() as isize - + current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize) as usize) }; + } + let archived_ptr: *const ArchivedI128 = current_pointer.cast(); + output.push(unsafe { (*archived_ptr).to_string() }); + current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; + }, + "i64" => { + if unsafe { current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize != 0 } { + current_pointer = unsafe { current_pointer.add( + (std::mem::size_of::() as isize - + current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize) as usize) }; + } + let archived_ptr: *const ArchivedI64 = current_pointer.cast(); output.push(unsafe { (*archived_ptr).to_string() }); - data_pointer = unsafe { data_pointer.add(std::mem::size_of::()) }; + current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; }, "i32" => { - let archived_ptr: *const ArchivedI32 = data_pointer.cast(); + if unsafe { current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize != 0 } { + current_pointer = unsafe { current_pointer.add( + (std::mem::size_of::() as isize - + current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize) as usize) }; + } + let archived_ptr: *const ArchivedI32 = current_pointer.cast(); + output.push(unsafe { (*archived_ptr).to_string() }); + current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; + }, + "i16" => { + if unsafe { current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize != 0 } { + current_pointer = unsafe { current_pointer.add( + (std::mem::size_of::() as isize - + current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize) as usize) }; + } + let archived_ptr: *const ArchivedI16 = current_pointer.cast(); + output.push(unsafe { (*archived_ptr).to_string() }); + current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; + }, + "i8" => { + let archived_ptr: *const i8 = current_pointer.cast(); + output.push(unsafe { (*archived_ptr).to_string() }); + current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; + }, + + "u128" => { + if unsafe { current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize != 0 } { + current_pointer = unsafe { current_pointer.add( + (std::mem::size_of::() as isize - + current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize) as usize) }; + } + let archived_ptr: *const ArchivedU128 = current_pointer.cast(); output.push(unsafe { (*archived_ptr).to_string() }); - data_pointer = unsafe { data_pointer.add(std::mem::size_of::()) }; + current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; }, + "u64" => { + if unsafe { current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize != 0 } { + current_pointer = unsafe { current_pointer.add( + (std::mem::size_of::() as isize - + current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize) as usize) }; + } + let archived_ptr: *const ArchivedU64 = current_pointer.cast(); + output.push(unsafe { (*archived_ptr).to_string() }); + current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; + }, + "u32" => { + if unsafe { current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize != 0 } { + current_pointer = unsafe { current_pointer.add( + (std::mem::size_of::() as isize - + current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize) as usize) }; + } + let archived_ptr: *const ArchivedU32 = current_pointer.cast(); + output.push(unsafe { (*archived_ptr).to_string() }); + current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; + }, + "u16" => { + if unsafe { current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize != 0 } { + current_pointer = unsafe { current_pointer.add( + (std::mem::size_of::() as isize - + current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize) as usize) }; + } + let archived_ptr: *const ArchivedU16 = current_pointer.cast(); + output.push(unsafe { (*archived_ptr).to_string() }); + current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; + }, + "u8" => { + let archived_ptr: *const u8 = current_pointer.cast(); + output.push(unsafe { (*archived_ptr).to_string() }); + current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; + }, + _ => panic!("Unknown data type: {:?}", column.1), } } output -} \ No newline at end of file +} + +#[cfg(test)] +mod test { + use rkyv::{Archive, Deserialize, Serialize}; + + use super::parse_archived_row; + + #[derive(Archive, Serialize, Deserialize, Debug)] + struct Struct { + pub string1: String, + pub int1: u32, + pub string2: String, + pub int2: u8, + pub int3: i8, + pub int4: u8, + pub int5: i32, + pub int6: u8, + pub string7: String, + } + + #[test] + fn test_struct() { + let buffer = rkyv::to_bytes::(&Struct { + string1: "000000000000000".to_string(), + int1: 20, + string2: "aaaaaaaa".to_string(), + int2: 3, + int3: 4, + int4: 5, + int5: 6, + int6: 7, + string7: "x".to_owned(), + }).unwrap(); + let parsed = parse_archived_row(&buffer, vec![ + ("string1".to_string(), "String".to_string()), + ("int1".to_string(), "i32".to_string()), + ("string2".to_string(), "String".to_string()), + ("int2".to_string(), "u8".to_string()), + ("int3".to_string(), "i8".to_string()), + ("int4".to_string(), "u8".to_string()), + ("int5".to_string(), "i32".to_string()), + ("int6".to_string(), "u8".to_string()), + ("string7".to_string(), "String".to_string()), + ]); + assert_eq!(parsed, [ + "000000000000000".to_string(), + "20".to_string(), + "aaaaaaaa".to_string(), + "3".to_string(), + "4".to_string(), + "5".to_string(), + "6".to_string(), + "7".to_string(), + "x".to_string(), + ]) + } +} From ed0f458f41db66cedfa0cf0f2760b4e2b790a1c8 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Mon, 16 Dec 2024 16:16:41 +0300 Subject: [PATCH 07/41] Make code more DRY --- src/util/rkyv_data.rs | 104 ++++++++++++++---------------------------- 1 file changed, 34 insertions(+), 70 deletions(-) diff --git a/src/util/rkyv_data.rs b/src/util/rkyv_data.rs index 198e3f2..fbd251e 100644 --- a/src/util/rkyv_data.rs +++ b/src/util/rkyv_data.rs @@ -1,65 +1,63 @@ use rkyv::{primitive::{ArchivedI128, ArchivedI16, ArchivedI32, ArchivedI64, ArchivedU128, ArchivedU16, ArchivedU32, ArchivedU64}, string::ArchivedString}; +fn advance_accum_for_padding(mut accum: usize, padding: usize) -> usize { + if accum % padding != 0 { + accum += padding - accum % padding; + } + accum +} + +fn advance_pointer_for_padding(mut current_pointer: *const u8, start_pointer: *const u8, padding: usize) -> *const u8 { + if unsafe { current_pointer.byte_offset_from(start_pointer) % padding as isize != 0 } { + current_pointer = unsafe { current_pointer.add( + (padding as isize - current_pointer.byte_offset_from(start_pointer) % padding as isize) as usize) + }; + } + current_pointer +} + pub fn parse_archived_row(buf: &[u8], columns: Vec<(String, String)>) -> Vec { let mut data_length: usize = { let mut accum: usize = 0; for column in columns.iter() { match column.1.as_str() { "String" => { - if accum % 4 != 0 { - accum += 4 - accum % 4; - } + accum = advance_accum_for_padding(accum, 4); accum += std::mem::size_of::(); }, "i128" => { - if accum % std::mem::size_of::() != 0 { - accum += std::mem::size_of::() - accum % std::mem::size_of::(); - } + accum = advance_accum_for_padding(accum, std::mem::size_of::()); accum += std::mem::size_of::(); }, "i64" => { - if accum % std::mem::size_of::() != 0 { - accum += std::mem::size_of::() - accum % std::mem::size_of::(); - } + accum = advance_accum_for_padding(accum, std::mem::size_of::()); accum += std::mem::size_of::(); } "i32" => { - if accum % std::mem::size_of::() != 0 { - accum += std::mem::size_of::() - accum % std::mem::size_of::(); - } + accum = advance_accum_for_padding(accum, std::mem::size_of::()); accum += std::mem::size_of::(); } "i16" => { - if accum % std::mem::size_of::() != 0 { - accum += std::mem::size_of::() - accum % std::mem::size_of::(); - } + accum = advance_accum_for_padding(accum, std::mem::size_of::()); accum += std::mem::size_of::(); } "i8" => accum += std::mem::size_of::(), "u128" => { - if accum % std::mem::size_of::() != 0 { - accum += std::mem::size_of::() - accum % std::mem::size_of::(); - } + accum = advance_accum_for_padding(accum, std::mem::size_of::()); accum += std::mem::size_of::(); } "u64" => { - if accum % std::mem::size_of::() != 0 { - accum += std::mem::size_of::() - accum % std::mem::size_of::(); - } + accum = advance_accum_for_padding(accum, std::mem::size_of::()); accum += std::mem::size_of::(); } "u32" => { - if accum % std::mem::size_of::() != 0 { - accum += std::mem::size_of::() - accum % std::mem::size_of::(); - } + accum = advance_accum_for_padding(accum, std::mem::size_of::()); accum += std::mem::size_of::(); } "u16" => { - if accum % std::mem::size_of::() != 0 { - accum += std::mem::size_of::() - accum % std::mem::size_of::(); - } + accum = advance_accum_for_padding(accum, std::mem::size_of::()); accum += std::mem::size_of::(); } "u8" => accum += std::mem::size_of::(), @@ -79,50 +77,32 @@ pub fn parse_archived_row(buf: &[u8], columns: Vec<(String, String)>) -> Vec { - if unsafe { current_pointer.byte_offset_from(start_pointer) } % 4 != 0 { - current_pointer = unsafe { current_pointer.add((4 - current_pointer.byte_offset_from(start_pointer) % 4) as usize) }; - } + current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, 4); let archived_ptr: *const ArchivedString = current_pointer.cast(); output.push(unsafe { (*archived_ptr).to_string() }); current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; }, "i128" => { - if unsafe { current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize != 0 } { - current_pointer = unsafe { current_pointer.add( - (std::mem::size_of::() as isize - - current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize) as usize) }; - } + current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, std::mem::size_of::()); let archived_ptr: *const ArchivedI128 = current_pointer.cast(); output.push(unsafe { (*archived_ptr).to_string() }); current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; }, "i64" => { - if unsafe { current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize != 0 } { - current_pointer = unsafe { current_pointer.add( - (std::mem::size_of::() as isize - - current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize) as usize) }; - } + current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, std::mem::size_of::()); let archived_ptr: *const ArchivedI64 = current_pointer.cast(); output.push(unsafe { (*archived_ptr).to_string() }); current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; }, "i32" => { - if unsafe { current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize != 0 } { - current_pointer = unsafe { current_pointer.add( - (std::mem::size_of::() as isize - - current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize) as usize) }; - } + current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, std::mem::size_of::()); let archived_ptr: *const ArchivedI32 = current_pointer.cast(); output.push(unsafe { (*archived_ptr).to_string() }); current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; }, "i16" => { - if unsafe { current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize != 0 } { - current_pointer = unsafe { current_pointer.add( - (std::mem::size_of::() as isize - - current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize) as usize) }; - } + current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, std::mem::size_of::()); let archived_ptr: *const ArchivedI16 = current_pointer.cast(); output.push(unsafe { (*archived_ptr).to_string() }); current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; @@ -134,41 +114,25 @@ pub fn parse_archived_row(buf: &[u8], columns: Vec<(String, String)>) -> Vec { - if unsafe { current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize != 0 } { - current_pointer = unsafe { current_pointer.add( - (std::mem::size_of::() as isize - - current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize) as usize) }; - } + current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, std::mem::size_of::()); let archived_ptr: *const ArchivedU128 = current_pointer.cast(); output.push(unsafe { (*archived_ptr).to_string() }); current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; }, "u64" => { - if unsafe { current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize != 0 } { - current_pointer = unsafe { current_pointer.add( - (std::mem::size_of::() as isize - - current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize) as usize) }; - } + current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, std::mem::size_of::()); let archived_ptr: *const ArchivedU64 = current_pointer.cast(); output.push(unsafe { (*archived_ptr).to_string() }); current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; }, "u32" => { - if unsafe { current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize != 0 } { - current_pointer = unsafe { current_pointer.add( - (std::mem::size_of::() as isize - - current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize) as usize) }; - } + current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, std::mem::size_of::()); let archived_ptr: *const ArchivedU32 = current_pointer.cast(); output.push(unsafe { (*archived_ptr).to_string() }); current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; }, "u16" => { - if unsafe { current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize != 0 } { - current_pointer = unsafe { current_pointer.add( - (std::mem::size_of::() as isize - - current_pointer.byte_offset_from(start_pointer) % std::mem::size_of::() as isize) as usize) }; - } + current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, std::mem::size_of::()); let archived_ptr: *const ArchivedU16 = current_pointer.cast(); output.push(unsafe { (*archived_ptr).to_string() }); current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; From b17598e07b9c9df3b7ef14668fddb748ddc28bc0 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Mon, 16 Dec 2024 16:30:36 +0300 Subject: [PATCH 08/41] Support `f64` and `f32` data types --- src/util/rkyv_data.rs | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/src/util/rkyv_data.rs b/src/util/rkyv_data.rs index fbd251e..0f62266 100644 --- a/src/util/rkyv_data.rs +++ b/src/util/rkyv_data.rs @@ -1,4 +1,4 @@ -use rkyv::{primitive::{ArchivedI128, ArchivedI16, ArchivedI32, ArchivedI64, ArchivedU128, ArchivedU16, ArchivedU32, ArchivedU64}, string::ArchivedString}; +use rkyv::{primitive::{ArchivedF32, ArchivedF64, ArchivedI128, ArchivedI16, ArchivedI32, ArchivedI64, ArchivedU128, ArchivedU16, ArchivedU32, ArchivedU64}, string::ArchivedString}; fn advance_accum_for_padding(mut accum: usize, padding: usize) -> usize { if accum % padding != 0 { @@ -62,6 +62,15 @@ pub fn parse_archived_row(buf: &[u8], columns: Vec<(String, String)>) -> Vec accum += std::mem::size_of::(), + "f64" => { + accum = advance_accum_for_padding(accum, std::mem::size_of::()); + accum += std::mem::size_of::(); + } + "f32" => { + accum = advance_accum_for_padding(accum, std::mem::size_of::()); + accum += std::mem::size_of::(); + } + _ => panic!("Unknown data type {:?}", column.1), } } @@ -143,6 +152,19 @@ pub fn parse_archived_row(buf: &[u8], columns: Vec<(String, String)>) -> Vec()) }; }, + "f64" => { + current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, std::mem::size_of::()); + let archived_ptr: *const ArchivedF64 = current_pointer.cast(); + output.push(unsafe { (*archived_ptr).to_string() }); + current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; + }, + "f32" => { + current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, std::mem::size_of::()); + let archived_ptr: *const ArchivedF32 = current_pointer.cast(); + output.push(unsafe { (*archived_ptr).to_string() }); + current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; + }, + _ => panic!("Unknown data type: {:?}", column.1), } } @@ -165,7 +187,9 @@ mod test { pub int4: u8, pub int5: i32, pub int6: u8, - pub string7: String, + pub string3: String, + pub int7: i8, + pub float1: f64, } #[test] @@ -179,7 +203,9 @@ mod test { int4: 5, int5: 6, int6: 7, - string7: "x".to_owned(), + string3: "x".to_owned(), + int7: 8, + float1: 3.14159265358 }).unwrap(); let parsed = parse_archived_row(&buffer, vec![ ("string1".to_string(), "String".to_string()), @@ -190,7 +216,9 @@ mod test { ("int4".to_string(), "u8".to_string()), ("int5".to_string(), "i32".to_string()), ("int6".to_string(), "u8".to_string()), - ("string7".to_string(), "String".to_string()), + ("string3".to_string(), "String".to_string()), + ("int7".to_string(), "i8".to_string()), + ("float1".to_string(), "f64".to_string()), ]); assert_eq!(parsed, [ "000000000000000".to_string(), @@ -202,6 +230,8 @@ mod test { "6".to_string(), "7".to_string(), "x".to_string(), + "8".to_string(), + "3.14159265358".to_string(), ]) } } From 7d839e3ef547833a5b28c2136ea33eec03c2aab0 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Mon, 16 Dec 2024 16:31:59 +0300 Subject: [PATCH 09/41] Make usage of commas in the match arms more consistent --- src/util/rkyv_data.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/util/rkyv_data.rs b/src/util/rkyv_data.rs index 0f62266..bfbb6ec 100644 --- a/src/util/rkyv_data.rs +++ b/src/util/rkyv_data.rs @@ -33,39 +33,39 @@ pub fn parse_archived_row(buf: &[u8], columns: Vec<(String, String)>) -> Vec { accum = advance_accum_for_padding(accum, std::mem::size_of::()); accum += std::mem::size_of::(); - } + }, "i32" => { accum = advance_accum_for_padding(accum, std::mem::size_of::()); accum += std::mem::size_of::(); - } + }, "i16" => { accum = advance_accum_for_padding(accum, std::mem::size_of::()); accum += std::mem::size_of::(); - } + }, "i8" => accum += std::mem::size_of::(), "u128" => { accum = advance_accum_for_padding(accum, std::mem::size_of::()); accum += std::mem::size_of::(); - } + }, "u64" => { accum = advance_accum_for_padding(accum, std::mem::size_of::()); accum += std::mem::size_of::(); - } + }, "u32" => { accum = advance_accum_for_padding(accum, std::mem::size_of::()); accum += std::mem::size_of::(); - } + }, "u16" => { accum = advance_accum_for_padding(accum, std::mem::size_of::()); accum += std::mem::size_of::(); - } + }, "u8" => accum += std::mem::size_of::(), "f64" => { accum = advance_accum_for_padding(accum, std::mem::size_of::()); accum += std::mem::size_of::(); - } + }, "f32" => { accum = advance_accum_for_padding(accum, std::mem::size_of::()); accum += std::mem::size_of::(); From 276b5bc327103e05e760b6612ca773549cbcd563 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Mon, 16 Dec 2024 16:33:25 +0300 Subject: [PATCH 10/41] Rename the test for `parse_archived_row` --- src/util/rkyv_data.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/rkyv_data.rs b/src/util/rkyv_data.rs index bfbb6ec..0249a71 100644 --- a/src/util/rkyv_data.rs +++ b/src/util/rkyv_data.rs @@ -193,7 +193,7 @@ mod test { } #[test] - fn test_struct() { + fn test_parse_archived_row() { let buffer = rkyv::to_bytes::(&Struct { string1: "000000000000000".to_string(), int1: 20, From 5e606b8137b08f58e03b71f175e08d5c7a2bfc62 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Tue, 17 Dec 2024 09:28:18 +0300 Subject: [PATCH 11/41] Add `primary_key_type` field --- src/page/space_info.rs | 4 +++- src/page/util.rs | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/page/space_info.rs b/src/page/space_info.rs index 1b2b1ec..36c7efa 100644 --- a/src/page/space_info.rs +++ b/src/page/space_info.rs @@ -23,12 +23,13 @@ pub struct SpaceInfo { pub page_count: u32, pub name: SpaceName, pub row_schema: Vec<(String, String)>, + pub primary_key_type: (String, String), pub primary_key_intervals: Vec, + pub secondary_index_types: Vec<(String, String)>, pub secondary_index_intervals: HashMap>, pub data_intervals: Vec, pub pk_gen_state: Pk, pub empty_links_list: Vec, - pub secondary_index_types: Vec<(String, String)>, } /// Represents some interval between values. @@ -67,6 +68,7 @@ mod test { page_count: 0, name: "Test".to_string(), row_schema: vec![], + primary_key_type: ("id".to_string(), "i32".to_string()), primary_key_intervals: vec![], secondary_index_intervals: HashMap::new(), data_intervals: vec![], diff --git a/src/page/util.rs b/src/page/util.rs index 6ca279d..a4488e0 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -326,6 +326,7 @@ mod test { page_count: 0, name: "Test".to_string(), row_schema: vec![], + primary_key_type: ("id".to_string(), "i32".to_string()), primary_key_intervals: vec![], secondary_index_intervals: HashMap::from([( "string_index".to_owned(), From fc147acd7173a9694ef4c11ea9e2bd6dc62e718e Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Tue, 17 Dec 2024 12:59:22 +0300 Subject: [PATCH 12/41] Change `primary_key_type` to `primary_key_fields` in `SpaceInfo` --- src/page/space_info.rs | 2 +- src/page/util.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/page/space_info.rs b/src/page/space_info.rs index 36c7efa..e9e9d5d 100644 --- a/src/page/space_info.rs +++ b/src/page/space_info.rs @@ -23,7 +23,7 @@ pub struct SpaceInfo { pub page_count: u32, pub name: SpaceName, pub row_schema: Vec<(String, String)>, - pub primary_key_type: (String, String), + pub primary_key_fields: Vec, pub primary_key_intervals: Vec, pub secondary_index_types: Vec<(String, String)>, pub secondary_index_intervals: HashMap>, diff --git a/src/page/util.rs b/src/page/util.rs index a4488e0..9149a7e 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -326,7 +326,7 @@ mod test { page_count: 0, name: "Test".to_string(), row_schema: vec![], - primary_key_type: ("id".to_string(), "i32".to_string()), + primary_key_fields: vec![], primary_key_intervals: vec![], secondary_index_intervals: HashMap::from([( "string_index".to_owned(), From 3ef34f06233729d2697115430c4f6d85f95b14df Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Tue, 17 Dec 2024 13:04:31 +0300 Subject: [PATCH 13/41] Fix `test_as_bytes` test --- src/page/space_info.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/page/space_info.rs b/src/page/space_info.rs index e9e9d5d..2e903da 100644 --- a/src/page/space_info.rs +++ b/src/page/space_info.rs @@ -68,7 +68,7 @@ mod test { page_count: 0, name: "Test".to_string(), row_schema: vec![], - primary_key_type: ("id".to_string(), "i32".to_string()), + primary_key_fields: vec![], primary_key_intervals: vec![], secondary_index_intervals: HashMap::new(), data_intervals: vec![], From 6ca08cc88454741f82d352c43179fd50210de646 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Tue, 17 Dec 2024 13:57:12 +0300 Subject: [PATCH 14/41] Storea and read vectors of index records in index pages --- src/page/header.rs | 12 ++++++++++++ src/page/util.rs | 24 +++++++++++------------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/src/page/header.rs b/src/page/header.rs index 49da323..fe2d787 100644 --- a/src/page/header.rs +++ b/src/page/header.rs @@ -1,5 +1,10 @@ //! [`GeneralHeader`] definitions. +use rkyv::rancor::Strategy; +use rkyv::ser::allocator::ArenaHandle; +use rkyv::ser::sharing::Share; +use rkyv::ser::Serializer; +use rkyv::util::AlignedVec; use rkyv::{Archive, Deserialize, Serialize}; use crate::page::ty::PageType; @@ -75,6 +80,13 @@ impl Persistable for GeneralHeader { } } +impl Persistable for Vec +where T: Persistable + for<'a> Serialize, Share>, rkyv::rancor::Error>> { + fn as_bytes(&self) -> impl AsRef<[u8]> { + rkyv::to_bytes::(self).unwrap() + } +} + #[cfg(test)] mod test { use crate::page::header::DATA_VERSION; diff --git a/src/page/util.rs b/src/page/util.rs index 9149a7e..62b9012 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -142,7 +142,7 @@ pub fn parse_data_page( pub fn parse_index_page( file: &mut std::fs::File, index: u32, -) -> eyre::Result>> +) -> eyre::Result>> where T: Archive, ::Archived: rkyv::Deserialize>, @@ -152,14 +152,12 @@ where let mut buffer: Vec = vec![0u8; header.data_length as usize]; file.read_exact(&mut buffer)?; - let archived = - unsafe { rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) }; - let index: IndexData = rkyv::deserialize(archived).expect("data should be valid"); + let archived = unsafe { + rkyv::access_unchecked::<> as Archive>::Archived>(&buffer[..]) + }; + let index_records: Vec> = rkyv::deserialize(archived).expect("data should be valid"); - Ok(GeneralPage { - header, - inner: index, - }) + Ok(index_records) } pub fn parse_space_info( @@ -211,8 +209,8 @@ where let mut result: Vec> = vec![]; for interval in intervals.iter() { for index in interval.0..interval.1 { - let index_page = parse_index_page::(file, index as u32)?; - result.push(index_page.inner); + let mut index_records = parse_index_page::(file, index as u32)?; + result.append(&mut index_records); } } @@ -345,8 +343,8 @@ mod test { space_info_page } - fn create_index_pages(intervals: &Vec) -> Vec>> { - let mut index_pages = Vec::>>::new(); + fn create_index_pages(intervals: &Vec) -> Vec>>> { + let mut index_pages = Vec::>>>::new(); for interval in intervals { for index in interval.0..interval.1 { @@ -371,7 +369,7 @@ mod test { }; let index_page = GeneralPage { header: index_header, - inner: index_data, + inner: vec![index_data], }; index_pages.push(index_page); } From cc1e313e4ec4d687c4fdeccfd2e56441d4a68c00 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Tue, 17 Dec 2024 15:00:13 +0300 Subject: [PATCH 15/41] WIP reading of data pages --- src/page/util.rs | 64 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 4 deletions(-) diff --git a/src/page/util.rs b/src/page/util.rs index 62b9012..7725a5c 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -8,7 +8,7 @@ use rkyv::Archive; use crate::page::header::GeneralHeader; use crate::page::ty::PageType; use crate::page::General; -use crate::{space, DataPage, GeneralPage, IndexData, Persistable, GENERAL_HEADER_SIZE, PAGE_SIZE}; +use crate::{space, DataPage, GeneralPage, IndexData, Link, Persistable, GENERAL_HEADER_SIZE, PAGE_SIZE}; use super::{Interval, SpaceInfo}; @@ -114,13 +114,23 @@ where }) } -pub fn parse_data_page( +pub fn parse_data_record( file: &mut std::fs::File, index: u32, -) -> eyre::Result>> { + offset: u32, + length: u32, +) -> eyre::Result> { seek_to_page_start(file, index)?; let header = parse_general_header(file)?; + if header.page_type != PageType::Data { + return Err(eyre::Report::msg(format!("The type of the page with index {} is not `Data`", index))); + } + + file.seek(io::SeekFrom::Current(offset as i64))?; + let mut buffer = vec![0u8; length as usize]; + file.read_exact(&mut buffer)?; +; let mut buffer = [0u8; INNER_PAGE_SIZE]; if header.next_id == 0.into() { file.read(&mut buffer)?; @@ -176,7 +186,7 @@ pub fn parse_space_info( Ok(space_info) } -pub fn read_index_pages( +pub fn read_secondary_index_pages( file: &mut std::fs::File, index_name: &str, intervals: Vec, @@ -217,6 +227,52 @@ where Ok(result) } +pub fn read_index_pages( + file: &mut std::fs::File, + intervals: &Vec, +) -> eyre::Result>> +where + T: Archive, + ::Archived: rkyv::Deserialize>, +{ + let mut result: Vec> = vec![]; + for interval in intervals.iter() { + for index in interval.0..interval.1 { + let mut index_records = parse_index_page::(file, index as u32)?; + result.append(&mut index_records); + } + } + Ok(result) +} + +fn read_data_pages(mut file: &mut std::fs::File) -> eyre::Result>> { + let space_info = parse_space_info(file)?; + let primary_key_fields = space_info.primary_key_fields; + if primary_key_fields.len() != 1 { + panic!("Currently only single primary key is supported"); + } + + let primary_key_type = space_info.row_schema.iter() + .filter(|(field_name, field_type)| field_name == &primary_key_fields[0]) + .map(|(field_name, field_type)| field_type) + .take(1) + .collect::>()[0].as_str(); + let links = match primary_key_type { + "i64" => read_index_pages::(&mut file, &space_info.primary_key_intervals)? + .iter() + .map(|index_page| index_page.index_values) + .flatten() + .map(|index_value| index_value.link) + .collect::>(), + _ => panic!("Unsupported primary key data type"), + }; + for link in links { + let page = parse_data_page::(&mut file, link.page_id.0)?; + } + + todo!() +} + // fn read_data_pages_from_space(file: &mut std::fs::File) -> eyre::Result>> { // let space_info = parse_space_info::(file)?; // let mut result: Vec> = vec![]; From e719f0bc78d94109219ce168cce25b5cdb654164 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Tue, 17 Dec 2024 18:08:02 +0300 Subject: [PATCH 16/41] An implementation of reading rows from the database --- src/lib.rs | 2 +- src/page/mod.rs | 3 +- src/{util => page}/rkyv_data.rs | 4 +- src/page/util.rs | 68 ++++++++------------------------- src/util/mod.rs | 1 - 5 files changed, 21 insertions(+), 57 deletions(-) rename src/{util => page}/rkyv_data.rs (98%) diff --git a/src/lib.rs b/src/lib.rs index 3a19f42..d6fa4ff 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,7 +9,7 @@ pub use link::Link; pub use data_bucket_codegen::SizeMeasure; pub use page::{ map_data_pages_to_general, map_index_pages_to_general, map_tree_index, map_unique_tree_index, - parse_data_page, parse_index_page, parse_page, persist_page, read_index_pages, + parse_index_page, parse_page, persist_page, read_index_pages, Data as DataPage, General as GeneralPage, GeneralHeader, IndexPage as IndexData, Interval, PageType, SpaceInfo as SpaceInfoData, DATA_VERSION, GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, PAGE_SIZE, diff --git a/src/page/mod.rs b/src/page/mod.rs index 7e0f3ae..d6ee329 100644 --- a/src/page/mod.rs +++ b/src/page/mod.rs @@ -4,6 +4,7 @@ mod index; mod space_info; mod ty; mod util; +mod rkyv_data; use derive_more::{Display, From}; use rkyv::{Archive, Deserialize, Serialize}; @@ -14,7 +15,7 @@ pub use index::{map_tree_index, map_unique_tree_index, IndexPage}; pub use space_info::{Interval, SpaceInfo}; pub use ty::PageType; pub use util::{ - map_data_pages_to_general, map_index_pages_to_general, parse_data_page, parse_index_page, + map_data_pages_to_general, map_index_pages_to_general, parse_index_page, parse_page, persist_page, read_index_pages, }; diff --git a/src/util/rkyv_data.rs b/src/page/rkyv_data.rs similarity index 98% rename from src/util/rkyv_data.rs rename to src/page/rkyv_data.rs index 0249a71..155c57d 100644 --- a/src/util/rkyv_data.rs +++ b/src/page/rkyv_data.rs @@ -16,7 +16,7 @@ fn advance_pointer_for_padding(mut current_pointer: *const u8, start_pointer: *c current_pointer } -pub fn parse_archived_row(buf: &[u8], columns: Vec<(String, String)>) -> Vec { +pub fn parse_archived_row(buf: &[u8], columns: &Vec<(String, String)>) -> Vec { let mut data_length: usize = { let mut accum: usize = 0; for column in columns.iter() { @@ -207,7 +207,7 @@ mod test { int7: 8, float1: 3.14159265358 }).unwrap(); - let parsed = parse_archived_row(&buffer, vec![ + let parsed = parse_archived_row(&buffer, &vec![ ("string1".to_string(), "String".to_string()), ("int1".to_string(), "i32".to_string()), ("string2".to_string(), "String".to_string()), diff --git a/src/page/util.rs b/src/page/util.rs index 7725a5c..734db3f 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -10,6 +10,7 @@ use crate::page::ty::PageType; use crate::page::General; use crate::{space, DataPage, GeneralPage, IndexData, Link, Persistable, GENERAL_HEADER_SIZE, PAGE_SIZE}; +use super::rkyv_data::parse_archived_row; use super::{Interval, SpaceInfo}; pub fn map_index_pages_to_general( @@ -119,6 +120,7 @@ pub fn parse_data_record( index: u32, offset: u32, length: u32, + schema: &Vec<(String, String)>, ) -> eyre::Result> { seek_to_page_start(file, index)?; let header = parse_general_header(file)?; @@ -130,23 +132,9 @@ pub fn parse_data_record( let mut buffer = vec![0u8; length as usize]; file.read_exact(&mut buffer)?; -; - let mut buffer = [0u8; INNER_PAGE_SIZE]; - if header.next_id == 0.into() { - file.read(&mut buffer)?; - } else { - file.read_exact(&mut buffer)?; - } - - let data = DataPage { - data: buffer, - length: header.data_length, - }; + let parsed_record = parse_archived_row(&buffer, &schema); - Ok(GeneralPage { - header, - inner: data, - }) + Ok(parsed_record) } pub fn parse_index_page( @@ -246,61 +234,36 @@ where } fn read_data_pages(mut file: &mut std::fs::File) -> eyre::Result>> { - let space_info = parse_space_info(file)?; + let space_info = parse_space_info::(file)?; let primary_key_fields = space_info.primary_key_fields; if primary_key_fields.len() != 1 { panic!("Currently only single primary key is supported"); } let primary_key_type = space_info.row_schema.iter() - .filter(|(field_name, field_type)| field_name == &primary_key_fields[0]) - .map(|(field_name, field_type)| field_type) + .filter(|(field_name, _field_type)| field_name == &primary_key_fields[0]) + .map(|(_field_name, field_type)| field_type) .take(1) .collect::>()[0].as_str(); let links = match primary_key_type { "i64" => read_index_pages::(&mut file, &space_info.primary_key_intervals)? .iter() - .map(|index_page| index_page.index_values) + .map(|index_page| &index_page.index_values) .flatten() .map(|index_value| index_value.link) .collect::>(), - _ => panic!("Unsupported primary key data type"), + _ => panic!("Unsupported primary key data type `{}`", primary_key_type), }; + + let mut result: Vec> = vec![]; for link in links { - let page = parse_data_page::(&mut file, link.page_id.0)?; + let row = parse_data_record::(&mut file, link.page_id.0, link.offset, link.length, &space_info.row_schema)?; + result.push(row); } - todo!() + Ok(result) } -// fn read_data_pages_from_space(file: &mut std::fs::File) -> eyre::Result>> { -// let space_info = parse_space_info::(file)?; -// let mut result: Vec> = vec![]; -// for interval in space_info.data_intervals.iter() { -// for index in interval.0 .. interval.1 { -// let data_page = parse_data_page(file, index)?; - -// file.seek(io::SeekFrom::Start(PAGE_SIZE * index))?; -// for column in space_info.row_schema { -// let value = match column.1 { -// DataType::String => { -// rkyv::from_bytes_unchecked(bytes) -// }, -// DataType::Integer => { -// todo!() -// }, -// DataType::Float => { -// todo!() -// } -// }; -// } - -// let row: HashMap = parse_binary_row(&data_page, &row_schema); -// } -// } -// todo!() -// } - #[cfg(test)] mod test { use scc::ebr::Guard; @@ -310,6 +273,7 @@ mod test { use std::path::Path; use crate::page::index::IndexValue; + use crate::page::util::read_secondary_index_pages; use crate::page::INNER_PAGE_SIZE; use crate::{ map_index_pages_to_general, map_unique_tree_index, GeneralHeader, GeneralPage, @@ -455,7 +419,7 @@ mod test { // read the data let mut file = std::fs::File::open(filename).unwrap(); - let index_pages = read_index_pages::( + let index_pages = read_secondary_index_pages::( &mut file, "string_index", vec![Interval(1, 2), Interval(5, 6)], diff --git a/src/util/mod.rs b/src/util/mod.rs index 8e3da68..d343007 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,6 +1,5 @@ mod persistable; mod sized; -mod rkyv_data; pub use persistable::Persistable; pub use sized::{align, SizeMeasurable}; From 6f629408fce160f91ff66a4127cf046487e19409 Mon Sep 17 00:00:00 2001 From: Handy-caT <37216852+Handy-caT@users.noreply.github.com> Date: Tue, 17 Dec 2024 20:52:54 +0300 Subject: [PATCH 17/41] corrections --- src/lib.rs | 7 +- src/page/header.rs | 12 -- src/page/mod.rs | 5 +- src/page/rkyv_data.rs | 237 ---------------------- src/page/util.rs | 56 ++++-- src/persistence/data/mod.rs | 11 + src/persistence/data/rkyv_data.rs | 323 ++++++++++++++++++++++++++++++ src/persistence/data/types.rs | 50 +++++ src/persistence/data/util.rs | 23 +++ src/persistence/mod.rs | 1 + src/util/persistable.rs | 19 ++ src/util/sized.rs | 5 +- 12 files changed, 470 insertions(+), 279 deletions(-) delete mode 100644 src/page/rkyv_data.rs create mode 100644 src/persistence/data/mod.rs create mode 100644 src/persistence/data/rkyv_data.rs create mode 100644 src/persistence/data/types.rs create mode 100644 src/persistence/data/util.rs diff --git a/src/lib.rs b/src/lib.rs index d6fa4ff..4ff7da8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,10 +9,9 @@ pub use link::Link; pub use data_bucket_codegen::SizeMeasure; pub use page::{ map_data_pages_to_general, map_index_pages_to_general, map_tree_index, map_unique_tree_index, - parse_index_page, parse_page, persist_page, read_index_pages, - Data as DataPage, General as GeneralPage, GeneralHeader, IndexPage as IndexData, - Interval, PageType, SpaceInfo as SpaceInfoData, DATA_VERSION, GENERAL_HEADER_SIZE, - INNER_PAGE_SIZE, PAGE_SIZE, + parse_index_page, parse_page, persist_page, read_index_pages, Data as DataPage, + General as GeneralPage, GeneralHeader, IndexPage as IndexData, Interval, PageType, + SpaceInfo as SpaceInfoData, DATA_VERSION, GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, PAGE_SIZE, }; pub use persistence::{PersistableIndex, PersistableTable}; pub use util::{align, Persistable, SizeMeasurable}; diff --git a/src/page/header.rs b/src/page/header.rs index fe2d787..49da323 100644 --- a/src/page/header.rs +++ b/src/page/header.rs @@ -1,10 +1,5 @@ //! [`GeneralHeader`] definitions. -use rkyv::rancor::Strategy; -use rkyv::ser::allocator::ArenaHandle; -use rkyv::ser::sharing::Share; -use rkyv::ser::Serializer; -use rkyv::util::AlignedVec; use rkyv::{Archive, Deserialize, Serialize}; use crate::page::ty::PageType; @@ -80,13 +75,6 @@ impl Persistable for GeneralHeader { } } -impl Persistable for Vec -where T: Persistable + for<'a> Serialize, Share>, rkyv::rancor::Error>> { - fn as_bytes(&self) -> impl AsRef<[u8]> { - rkyv::to_bytes::(self).unwrap() - } -} - #[cfg(test)] mod test { use crate::page::header::DATA_VERSION; diff --git a/src/page/mod.rs b/src/page/mod.rs index d6ee329..7fdd596 100644 --- a/src/page/mod.rs +++ b/src/page/mod.rs @@ -4,7 +4,6 @@ mod index; mod space_info; mod ty; mod util; -mod rkyv_data; use derive_more::{Display, From}; use rkyv::{Archive, Deserialize, Serialize}; @@ -15,8 +14,8 @@ pub use index::{map_tree_index, map_unique_tree_index, IndexPage}; pub use space_info::{Interval, SpaceInfo}; pub use ty::PageType; pub use util::{ - map_data_pages_to_general, map_index_pages_to_general, parse_index_page, - parse_page, persist_page, read_index_pages, + map_data_pages_to_general, map_index_pages_to_general, parse_index_page, parse_page, + persist_page, read_index_pages, }; // TODO: Move to config diff --git a/src/page/rkyv_data.rs b/src/page/rkyv_data.rs deleted file mode 100644 index 155c57d..0000000 --- a/src/page/rkyv_data.rs +++ /dev/null @@ -1,237 +0,0 @@ -use rkyv::{primitive::{ArchivedF32, ArchivedF64, ArchivedI128, ArchivedI16, ArchivedI32, ArchivedI64, ArchivedU128, ArchivedU16, ArchivedU32, ArchivedU64}, string::ArchivedString}; - -fn advance_accum_for_padding(mut accum: usize, padding: usize) -> usize { - if accum % padding != 0 { - accum += padding - accum % padding; - } - accum -} - -fn advance_pointer_for_padding(mut current_pointer: *const u8, start_pointer: *const u8, padding: usize) -> *const u8 { - if unsafe { current_pointer.byte_offset_from(start_pointer) % padding as isize != 0 } { - current_pointer = unsafe { current_pointer.add( - (padding as isize - current_pointer.byte_offset_from(start_pointer) % padding as isize) as usize) - }; - } - current_pointer -} - -pub fn parse_archived_row(buf: &[u8], columns: &Vec<(String, String)>) -> Vec { - let mut data_length: usize = { - let mut accum: usize = 0; - for column in columns.iter() { - match column.1.as_str() { - "String" => { - accum = advance_accum_for_padding(accum, 4); - accum += std::mem::size_of::(); - }, - - "i128" => { - accum = advance_accum_for_padding(accum, std::mem::size_of::()); - accum += std::mem::size_of::(); - }, - "i64" => { - accum = advance_accum_for_padding(accum, std::mem::size_of::()); - accum += std::mem::size_of::(); - }, - "i32" => { - accum = advance_accum_for_padding(accum, std::mem::size_of::()); - accum += std::mem::size_of::(); - }, - "i16" => { - accum = advance_accum_for_padding(accum, std::mem::size_of::()); - accum += std::mem::size_of::(); - }, - "i8" => accum += std::mem::size_of::(), - - "u128" => { - accum = advance_accum_for_padding(accum, std::mem::size_of::()); - accum += std::mem::size_of::(); - }, - "u64" => { - accum = advance_accum_for_padding(accum, std::mem::size_of::()); - accum += std::mem::size_of::(); - }, - "u32" => { - accum = advance_accum_for_padding(accum, std::mem::size_of::()); - accum += std::mem::size_of::(); - }, - "u16" => { - accum = advance_accum_for_padding(accum, std::mem::size_of::()); - accum += std::mem::size_of::(); - }, - "u8" => accum += std::mem::size_of::(), - - "f64" => { - accum = advance_accum_for_padding(accum, std::mem::size_of::()); - accum += std::mem::size_of::(); - }, - "f32" => { - accum = advance_accum_for_padding(accum, std::mem::size_of::()); - accum += std::mem::size_of::(); - } - - _ => panic!("Unknown data type {:?}", column.1), - } - } - accum - }; - if data_length % 4 != 0 { - data_length += 4 - data_length % 4; - } - - let start_pointer = unsafe { buf.as_ptr().add(buf.len()).sub(data_length) }; - let mut current_pointer = start_pointer; - let mut output: Vec = vec![]; - for column in columns.iter() { - match column.1.as_str() { - "String" => { - current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, 4); - let archived_ptr: *const ArchivedString = current_pointer.cast(); - output.push(unsafe { (*archived_ptr).to_string() }); - current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; - }, - - "i128" => { - current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, std::mem::size_of::()); - let archived_ptr: *const ArchivedI128 = current_pointer.cast(); - output.push(unsafe { (*archived_ptr).to_string() }); - current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; - }, - "i64" => { - current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, std::mem::size_of::()); - let archived_ptr: *const ArchivedI64 = current_pointer.cast(); - output.push(unsafe { (*archived_ptr).to_string() }); - current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; - }, - "i32" => { - current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, std::mem::size_of::()); - let archived_ptr: *const ArchivedI32 = current_pointer.cast(); - output.push(unsafe { (*archived_ptr).to_string() }); - current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; - }, - "i16" => { - current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, std::mem::size_of::()); - let archived_ptr: *const ArchivedI16 = current_pointer.cast(); - output.push(unsafe { (*archived_ptr).to_string() }); - current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; - }, - "i8" => { - let archived_ptr: *const i8 = current_pointer.cast(); - output.push(unsafe { (*archived_ptr).to_string() }); - current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; - }, - - "u128" => { - current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, std::mem::size_of::()); - let archived_ptr: *const ArchivedU128 = current_pointer.cast(); - output.push(unsafe { (*archived_ptr).to_string() }); - current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; - }, - "u64" => { - current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, std::mem::size_of::()); - let archived_ptr: *const ArchivedU64 = current_pointer.cast(); - output.push(unsafe { (*archived_ptr).to_string() }); - current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; - }, - "u32" => { - current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, std::mem::size_of::()); - let archived_ptr: *const ArchivedU32 = current_pointer.cast(); - output.push(unsafe { (*archived_ptr).to_string() }); - current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; - }, - "u16" => { - current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, std::mem::size_of::()); - let archived_ptr: *const ArchivedU16 = current_pointer.cast(); - output.push(unsafe { (*archived_ptr).to_string() }); - current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; - }, - "u8" => { - let archived_ptr: *const u8 = current_pointer.cast(); - output.push(unsafe { (*archived_ptr).to_string() }); - current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; - }, - - "f64" => { - current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, std::mem::size_of::()); - let archived_ptr: *const ArchivedF64 = current_pointer.cast(); - output.push(unsafe { (*archived_ptr).to_string() }); - current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; - }, - "f32" => { - current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, std::mem::size_of::()); - let archived_ptr: *const ArchivedF32 = current_pointer.cast(); - output.push(unsafe { (*archived_ptr).to_string() }); - current_pointer = unsafe { current_pointer.add(std::mem::size_of::()) }; - }, - - _ => panic!("Unknown data type: {:?}", column.1), - } - } - output -} - -#[cfg(test)] -mod test { - use rkyv::{Archive, Deserialize, Serialize}; - - use super::parse_archived_row; - - #[derive(Archive, Serialize, Deserialize, Debug)] - struct Struct { - pub string1: String, - pub int1: u32, - pub string2: String, - pub int2: u8, - pub int3: i8, - pub int4: u8, - pub int5: i32, - pub int6: u8, - pub string3: String, - pub int7: i8, - pub float1: f64, - } - - #[test] - fn test_parse_archived_row() { - let buffer = rkyv::to_bytes::(&Struct { - string1: "000000000000000".to_string(), - int1: 20, - string2: "aaaaaaaa".to_string(), - int2: 3, - int3: 4, - int4: 5, - int5: 6, - int6: 7, - string3: "x".to_owned(), - int7: 8, - float1: 3.14159265358 - }).unwrap(); - let parsed = parse_archived_row(&buffer, &vec![ - ("string1".to_string(), "String".to_string()), - ("int1".to_string(), "i32".to_string()), - ("string2".to_string(), "String".to_string()), - ("int2".to_string(), "u8".to_string()), - ("int3".to_string(), "i8".to_string()), - ("int4".to_string(), "u8".to_string()), - ("int5".to_string(), "i32".to_string()), - ("int6".to_string(), "u8".to_string()), - ("string3".to_string(), "String".to_string()), - ("int7".to_string(), "i8".to_string()), - ("float1".to_string(), "f64".to_string()), - ]); - assert_eq!(parsed, [ - "000000000000000".to_string(), - "20".to_string(), - "aaaaaaaa".to_string(), - "3".to_string(), - "4".to_string(), - "5".to_string(), - "6".to_string(), - "7".to_string(), - "x".to_string(), - "8".to_string(), - "3.14159265358".to_string(), - ]) - } -} diff --git a/src/page/util.rs b/src/page/util.rs index 734db3f..b9174d8 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -5,13 +5,15 @@ use eyre::eyre; use rkyv::api::high::HighDeserializer; use rkyv::Archive; +use super::{Interval, SpaceInfo}; use crate::page::header::GeneralHeader; use crate::page::ty::PageType; use crate::page::General; -use crate::{space, DataPage, GeneralPage, IndexData, Link, Persistable, GENERAL_HEADER_SIZE, PAGE_SIZE}; - -use super::rkyv_data::parse_archived_row; -use super::{Interval, SpaceInfo}; +use crate::persistence::data::rkyv_data::parse_archived_row; +use crate::persistence::data::DataTypeValue; +use crate::{ + space, DataPage, GeneralPage, IndexData, Link, Persistable, GENERAL_HEADER_SIZE, PAGE_SIZE, +}; pub fn map_index_pages_to_general( pages: Vec>, @@ -86,8 +88,10 @@ fn seek_to_page_start(file: &mut std::fs::File, index: u32) -> eyre::Result<()> fn parse_general_header(file: &mut std::fs::File) -> eyre::Result { let mut buffer = [0; GENERAL_HEADER_SIZE]; file.read_exact(&mut buffer)?; - let archived = unsafe { rkyv::access_unchecked::<::Archived>(&buffer[..]) }; - let header = rkyv::deserialize::<_, rkyv::rancor::Error>(archived).expect("data should be valid"); + let archived = + unsafe { rkyv::access_unchecked::<::Archived>(&buffer[..]) }; + let header = + rkyv::deserialize::<_, rkyv::rancor::Error>(archived).expect("data should be valid"); Ok(header) } @@ -121,11 +125,14 @@ pub fn parse_data_record( offset: u32, length: u32, schema: &Vec<(String, String)>, -) -> eyre::Result> { +) -> eyre::Result> { seek_to_page_start(file, index)?; let header = parse_general_header(file)?; if header.page_type != PageType::Data { - return Err(eyre::Report::msg(format!("The type of the page with index {} is not `Data`", index))); + return Err(eyre::Report::msg(format!( + "The type of the page with index {} is not `Data`", + index + ))); } file.seek(io::SeekFrom::Current(offset as i64))?; @@ -150,10 +157,10 @@ where let mut buffer: Vec = vec![0u8; header.data_length as usize]; file.read_exact(&mut buffer)?; - let archived = unsafe { - rkyv::access_unchecked::<> as Archive>::Archived>(&buffer[..]) - }; - let index_records: Vec> = rkyv::deserialize(archived).expect("data should be valid"); + let archived = + unsafe { rkyv::access_unchecked::<> as Archive>::Archived>(&buffer[..]) }; + let index_records: Vec> = + rkyv::deserialize(archived).expect("data should be valid"); Ok(index_records) } @@ -233,18 +240,23 @@ where Ok(result) } -fn read_data_pages(mut file: &mut std::fs::File) -> eyre::Result>> { +fn read_data_pages( + mut file: &mut std::fs::File, +) -> eyre::Result>> { let space_info = parse_space_info::(file)?; let primary_key_fields = space_info.primary_key_fields; if primary_key_fields.len() != 1 { panic!("Currently only single primary key is supported"); } - let primary_key_type = space_info.row_schema.iter() + let primary_key_type = space_info + .row_schema + .iter() .filter(|(field_name, _field_type)| field_name == &primary_key_fields[0]) .map(|(_field_name, field_type)| field_type) .take(1) - .collect::>()[0].as_str(); + .collect::>()[0] + .as_str(); let links = match primary_key_type { "i64" => read_index_pages::(&mut file, &space_info.primary_key_intervals)? .iter() @@ -255,9 +267,15 @@ fn read_data_pages(mut file: &mut std::fs::File) -> eyre _ => panic!("Unsupported primary key data type `{}`", primary_key_type), }; - let mut result: Vec> = vec![]; + let mut result: Vec> = vec![]; for link in links { - let row = parse_data_record::(&mut file, link.page_id.0, link.offset, link.length, &space_info.row_schema)?; + let row = parse_data_record::( + &mut file, + link.page_id.0, + link.offset, + link.length, + &space_info.row_schema, + )?; result.push(row); } @@ -276,8 +294,8 @@ mod test { use crate::page::util::read_secondary_index_pages; use crate::page::INNER_PAGE_SIZE; use crate::{ - map_index_pages_to_general, map_unique_tree_index, GeneralHeader, GeneralPage, - IndexData, Interval, Link, PageType, SpaceInfoData, DATA_VERSION, PAGE_SIZE, + map_index_pages_to_general, map_unique_tree_index, GeneralHeader, GeneralPage, IndexData, + Interval, Link, PageType, SpaceInfoData, DATA_VERSION, PAGE_SIZE, }; use super::{persist_page, read_index_pages}; diff --git a/src/persistence/data/mod.rs b/src/persistence/data/mod.rs new file mode 100644 index 0000000..0264a81 --- /dev/null +++ b/src/persistence/data/mod.rs @@ -0,0 +1,11 @@ +pub mod rkyv_data; +mod types; +mod util; + +pub use types::DataTypeValue; + +pub trait DataType { + fn advance_accum(&self, accum: &mut usize); + fn from_pointer(&self, pointer: *const u8, start_pointer: *const u8) -> DataTypeValue; + fn advance_pointer(&self, pointer: *const u8); +} diff --git a/src/persistence/data/rkyv_data.rs b/src/persistence/data/rkyv_data.rs new file mode 100644 index 0000000..8468c46 --- /dev/null +++ b/src/persistence/data/rkyv_data.rs @@ -0,0 +1,323 @@ +use crate::persistence::data::types::DataTypeValue; +use crate::persistence::data::util::{advance_accum_for_padding, advance_pointer_for_padding}; +use rkyv::{ + primitive::{ + ArchivedF32, ArchivedF64, ArchivedI128, ArchivedI16, ArchivedI32, ArchivedI64, + ArchivedU128, ArchivedU16, ArchivedU32, ArchivedU64, + }, + string::ArchivedString, +}; +use std::str::FromStr; + +pub fn parse_archived_row, S2: AsRef>( + buf: &[u8], + columns: &Vec<(S1, S2)>, +) -> Vec { + let mut data_length: usize = { + let mut accum: usize = 0; + for column in columns.iter() { + let value = + DataTypeValue::from_str(column.1.as_ref()).expect("data type should be supported"); + let data_type = value.as_data_type(); + data_type.advance_accum(&mut accum); + + // match column.1.as_str() { + // "String" => { + // accum = advance_accum_for_padding(accum, 4); + // accum += size_of::(); + // } + // + // "i128" => { + // accum = advance_accum_for_padding(accum, size_of::()); + // accum += size_of::(); + // } + // "i64" => { + // accum = advance_accum_for_padding(accum, size_of::()); + // accum += size_of::(); + // } + // "i32" => { + // accum = advance_accum_for_padding(accum, size_of::()); + // accum += size_of::(); + // } + // "i16" => { + // accum = advance_accum_for_padding(accum, size_of::()); + // accum += size_of::(); + // } + // "i8" => accum += size_of::(), + // + // "u128" => { + // accum = advance_accum_for_padding(accum, size_of::()); + // accum += size_of::(); + // } + // "u64" => { + // accum = advance_accum_for_padding(accum, size_of::()); + // accum += size_of::(); + // } + // "u32" => { + // accum = advance_accum_for_padding(accum, size_of::()); + // accum += size_of::(); + // } + // "u16" => { + // accum = advance_accum_for_padding(accum, size_of::()); + // accum += size_of::(); + // } + // "u8" => accum += size_of::(), + // + // "f64" => { + // accum = advance_accum_for_padding(accum, size_of::()); + // accum += size_of::(); + // } + // "f32" => { + // accum = advance_accum_for_padding(accum, size_of::()); + // accum += size_of::(); + // } + // + // _ => panic!("Unknown data type {:?}", column.1), + // } + } + accum + }; + if data_length % 4 != 0 { + data_length += 4 - data_length % 4; + } + + let start_pointer = unsafe { buf.as_ptr().add(buf.len()).sub(data_length) }; + let current_pointer = start_pointer; + let mut output: Vec<_> = vec![]; + for column in columns.iter() { + let value = + DataTypeValue::from_str(column.1.as_ref()).expect("data type should be supported"); + let data_type = value.as_data_type(); + let deserialized = data_type.from_pointer(current_pointer, start_pointer); + data_type.advance_pointer(current_pointer); + output.push(deserialized); + + // match column.1.as_str() { + // "String" => { + // current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, 4); + // let archived_ptr: *const ArchivedString = current_pointer.cast(); + // output.push(unsafe { (*archived_ptr).to_string() }); + // current_pointer = + // unsafe { current_pointer.add(size_of::()) }; + // } + // + // "i128" => { + // current_pointer = advance_pointer_for_padding( + // current_pointer, + // start_pointer, + // size_of::(), + // ); + // let archived_ptr: *const ArchivedI128 = current_pointer.cast(); + // output.push(unsafe { (*archived_ptr).to_string() }); + // current_pointer = + // unsafe { current_pointer.add(size_of::()) }; + // } + // "i64" => { + // current_pointer = advance_pointer_for_padding( + // current_pointer, + // start_pointer, + // size_of::(), + // ); + // let archived_ptr: *const ArchivedI64 = current_pointer.cast(); + // output.push(unsafe { (*archived_ptr).to_string() }); + // current_pointer = + // unsafe { current_pointer.add(size_of::()) }; + // } + // "i32" => { + // current_pointer = advance_pointer_for_padding( + // current_pointer, + // start_pointer, + // size_of::(), + // ); + // let archived_ptr: *const ArchivedI32 = current_pointer.cast(); + // output.push(unsafe { (*archived_ptr).to_string() }); + // current_pointer = + // unsafe { current_pointer.add(size_of::()) }; + // } + // "i16" => { + // current_pointer = advance_pointer_for_padding( + // current_pointer, + // start_pointer, + // size_of::(), + // ); + // let archived_ptr: *const ArchivedI16 = current_pointer.cast(); + // output.push(unsafe { (*archived_ptr).to_string() }); + // current_pointer = + // unsafe { current_pointer.add(size_of::()) }; + // } + // "i8" => { + // let archived_ptr: *const i8 = current_pointer.cast(); + // output.push(unsafe { (*archived_ptr).to_string() }); + // current_pointer = unsafe { current_pointer.add(size_of::()) }; + // } + // + // "u128" => { + // current_pointer = advance_pointer_for_padding( + // current_pointer, + // start_pointer, + // size_of::(), + // ); + // let archived_ptr: *const ArchivedU128 = current_pointer.cast(); + // output.push(unsafe { (*archived_ptr).to_string() }); + // current_pointer = + // unsafe { current_pointer.add(size_of::()) }; + // } + // "u64" => { + // current_pointer = advance_pointer_for_padding( + // current_pointer, + // start_pointer, + // size_of::(), + // ); + // let archived_ptr: *const ArchivedU64 = current_pointer.cast(); + // output.push(unsafe { (*archived_ptr).to_string() }); + // current_pointer = + // unsafe { current_pointer.add(size_of::()) }; + // } + // "u32" => { + // current_pointer = advance_pointer_for_padding( + // current_pointer, + // start_pointer, + // size_of::(), + // ); + // let archived_ptr: *const ArchivedU32 = current_pointer.cast(); + // output.push(unsafe { (*archived_ptr).to_string() }); + // current_pointer = + // unsafe { current_pointer.add(size_of::()) }; + // } + // "u16" => { + // current_pointer = advance_pointer_for_padding( + // current_pointer, + // start_pointer, + // size_of::(), + // ); + // let archived_ptr: *const ArchivedU16 = current_pointer.cast(); + // output.push(unsafe { (*archived_ptr).to_string() }); + // current_pointer = + // unsafe { current_pointer.add(size_of::()) }; + // } + // "u8" => { + // let archived_ptr: *const u8 = current_pointer.cast(); + // output.push(unsafe { (*archived_ptr).to_string() }); + // current_pointer = unsafe { current_pointer.add(size_of::()) }; + // } + // + // "f64" => { + // current_pointer = advance_pointer_for_padding( + // current_pointer, + // start_pointer, + // size_of::(), + // ); + // let archived_ptr: *const ArchivedF64 = current_pointer.cast(); + // output.push(unsafe { (*archived_ptr).to_string() }); + // current_pointer = + // unsafe { current_pointer.add(size_of::()) }; + // } + // "f32" => { + // current_pointer = advance_pointer_for_padding( + // current_pointer, + // start_pointer, + // size_of::(), + // ); + // let archived_ptr: *const ArchivedF32 = current_pointer.cast(); + // output.push(unsafe { (*archived_ptr).to_string() }); + // current_pointer = + // unsafe { current_pointer.add(size_of::()) }; + // } + // + // _ => panic!("Unknown data type: {:?}", column.1), + // } + } + output +} + +#[cfg(test)] +mod test { + use super::parse_archived_row; + use crate::persistence::data::{self, types::DataTypeValue}; + use rkyv::{Archive, Deserialize, Serialize}; + + #[derive(Archive, Serialize, Deserialize, Debug)] + struct Struct { + pub string1: String, + pub int1: u32, + pub string2: String, + pub int2: u8, + pub int3: i8, + pub int4: u8, + pub int5: i32, + pub int6: u8, + pub string3: String, + pub int7: i8, + pub float1: f64, + } + + #[derive(Archive, Serialize, Deserialize, Debug)] + struct Struct1 { + pub string1: String, + } + + #[test] + fn test_parse_archived_row() { + let buffer = rkyv::to_bytes::(&Struct1 { + string1: "000000000000000".to_string(), + }) + .unwrap(); + let parsed = parse_archived_row(&buffer, &vec![("string1", "String")]); + assert_eq!( + parsed, + [DataTypeValue::String("000000000000000".to_string())] + ) + } + + // TODO: make this test working after other types are added. + + // #[test] + // fn test_parse_archived_row() { + // let buffer = rkyv::to_bytes::(&Struct { + // string1: "000000000000000".to_string(), + // int1: 20, + // string2: "aaaaaaaa".to_string(), + // int2: 3, + // int3: 4, + // int4: 5, + // int5: 6, + // int6: 7, + // string3: "x".to_owned(), + // int7: 8, + // float1: 3.14159265358, + // }) + // .unwrap(); + // let parsed = parse_archived_row( + // &buffer, + // &vec![ + // ("string1".to_string(), "String".to_string()), + // ("int1".to_string(), "i32".to_string()), + // ("string2".to_string(), "String".to_string()), + // ("int2".to_string(), "u8".to_string()), + // ("int3".to_string(), "i8".to_string()), + // ("int4".to_string(), "u8".to_string()), + // ("int5".to_string(), "i32".to_string()), + // ("int6".to_string(), "u8".to_string()), + // ("string3".to_string(), "String".to_string()), + // ("int7".to_string(), "i8".to_string()), + // ("float1".to_string(), "f64".to_string()), + // ], + // ); + // assert_eq!( + // parsed, + // [ + // "000000000000000".to_string(), + // "20".to_string(), + // "aaaaaaaa".to_string(), + // "3".to_string(), + // "4".to_string(), + // "5".to_string(), + // "6".to_string(), + // "7".to_string(), + // "x".to_string(), + // "8".to_string(), + // "3.14159265358".to_string(), + // ] + // ) + // } +} diff --git a/src/persistence/data/types.rs b/src/persistence/data/types.rs new file mode 100644 index 0000000..72e12a1 --- /dev/null +++ b/src/persistence/data/types.rs @@ -0,0 +1,50 @@ +use std::str::FromStr; + +use derive_more::From; +use rkyv::string::ArchivedString; + +use crate::persistence::data::util::{advance_accum_for_padding, advance_pointer_for_padding}; +use crate::persistence::data::DataType; + +#[derive(Debug, From, PartialEq)] +pub enum DataTypeValue { + String(String), + // TODO: add other types. +} + +impl DataTypeValue { + pub fn as_data_type(&self) -> &dyn DataType { + match self { + Self::String(s) => s, + _ => unreachable!(), + } + } +} + +impl FromStr for DataTypeValue { + type Err = (); + + fn from_str(s: &str) -> Result { + Ok(match s.as_ref() { + "String" => String::default().into(), + _ => unreachable!(), + }) + } +} + +impl DataType for String { + fn advance_accum(&self, accum: &mut usize) { + *accum = advance_accum_for_padding(*accum, 4); + *accum += size_of::(); + } + + fn from_pointer(&self, pointer: *const u8, start_pointer: *const u8) -> DataTypeValue { + let current_pointer = advance_pointer_for_padding(pointer, start_pointer, 4); + let archived_ptr: *const ArchivedString = current_pointer.cast(); + unsafe { (*archived_ptr).to_string() }.into() + } + + fn advance_pointer(&self, pointer: *const u8) { + unsafe { pointer.add(size_of::()) }; + } +} diff --git a/src/persistence/data/util.rs b/src/persistence/data/util.rs new file mode 100644 index 0000000..ad816e1 --- /dev/null +++ b/src/persistence/data/util.rs @@ -0,0 +1,23 @@ +pub fn advance_accum_for_padding(mut accum: usize, padding: usize) -> usize { + if accum % padding != 0 { + accum += padding - accum % padding; + } + accum +} + +pub fn advance_pointer_for_padding( + mut current_pointer: *const u8, + start_pointer: *const u8, + padding: usize, +) -> *const u8 { + if unsafe { current_pointer.byte_offset_from(start_pointer) % padding as isize != 0 } { + current_pointer = unsafe { + current_pointer.add( + (padding as isize + - current_pointer.byte_offset_from(start_pointer) % padding as isize) + as usize, + ) + }; + } + current_pointer +} diff --git a/src/persistence/mod.rs b/src/persistence/mod.rs index 6890cba..692fc9b 100644 --- a/src/persistence/mod.rs +++ b/src/persistence/mod.rs @@ -1,3 +1,4 @@ +pub mod data; mod index; mod table; diff --git a/src/util/persistable.rs b/src/util/persistable.rs index 5b05d72..958fd09 100644 --- a/src/util/persistable.rs +++ b/src/util/persistable.rs @@ -1,3 +1,22 @@ +use rkyv::rancor::Strategy; +use rkyv::ser::allocator::ArenaHandle; +use rkyv::ser::sharing::Share; +use rkyv::ser::Serializer; +use rkyv::util::AlignedVec; +use rkyv::Serialize; + pub trait Persistable { fn as_bytes(&self) -> impl AsRef<[u8]>; } + +impl Persistable for Vec +where + T: Persistable + + for<'a> Serialize< + Strategy, Share>, rkyv::rancor::Error>, + >, +{ + fn as_bytes(&self) -> impl AsRef<[u8]> { + rkyv::to_bytes::(self).unwrap() + } +} diff --git a/src/util/sized.rs b/src/util/sized.rs index 4861c81..6a4298b 100644 --- a/src/util/sized.rs +++ b/src/util/sized.rs @@ -73,10 +73,7 @@ impl SizeMeasurable for Arc { } impl SizeMeasurable for lockfree::set::Set { fn aligned_size(&self) -> usize { - self - .iter() - .map(|elem| elem.aligned_size()) - .sum() + self.iter().map(|elem| elem.aligned_size()).sum() } } From 7cfd2776f58eeec4f8af770dac2b2911682c3b49 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Wed, 18 Dec 2024 19:57:49 +0300 Subject: [PATCH 18/41] Implement `DataType` for numerical types --- src/persistence/data/mod.rs | 2 +- src/persistence/data/rkyv_data.rs | 4 +- src/persistence/data/types.rs | 73 +++++++++++++++++++++++++++++-- 3 files changed, 72 insertions(+), 7 deletions(-) diff --git a/src/persistence/data/mod.rs b/src/persistence/data/mod.rs index 0264a81..fe6e532 100644 --- a/src/persistence/data/mod.rs +++ b/src/persistence/data/mod.rs @@ -7,5 +7,5 @@ pub use types::DataTypeValue; pub trait DataType { fn advance_accum(&self, accum: &mut usize); fn from_pointer(&self, pointer: *const u8, start_pointer: *const u8) -> DataTypeValue; - fn advance_pointer(&self, pointer: *const u8); + fn advance_pointer(&self, pointer: &mut *const u8); } diff --git a/src/persistence/data/rkyv_data.rs b/src/persistence/data/rkyv_data.rs index 8468c46..9c6db0c 100644 --- a/src/persistence/data/rkyv_data.rs +++ b/src/persistence/data/rkyv_data.rs @@ -82,14 +82,14 @@ pub fn parse_archived_row, S2: AsRef>( } let start_pointer = unsafe { buf.as_ptr().add(buf.len()).sub(data_length) }; - let current_pointer = start_pointer; + let mut current_pointer = start_pointer; let mut output: Vec<_> = vec![]; for column in columns.iter() { let value = DataTypeValue::from_str(column.1.as_ref()).expect("data type should be supported"); let data_type = value.as_data_type(); let deserialized = data_type.from_pointer(current_pointer, start_pointer); - data_type.advance_pointer(current_pointer); + data_type.advance_pointer(&mut current_pointer); output.push(deserialized); // match column.1.as_str() { diff --git a/src/persistence/data/types.rs b/src/persistence/data/types.rs index 72e12a1..390fdb8 100644 --- a/src/persistence/data/types.rs +++ b/src/persistence/data/types.rs @@ -1,6 +1,10 @@ use std::str::FromStr; use derive_more::From; +use rkyv::primitive::{ + ArchivedF32, ArchivedF64, ArchivedI128, ArchivedI16, ArchivedI32, ArchivedI64, ArchivedU128, + ArchivedU16, ArchivedU32, ArchivedU64, +}; use rkyv::string::ArchivedString; use crate::persistence::data::util::{advance_accum_for_padding, advance_pointer_for_padding}; @@ -9,14 +13,36 @@ use crate::persistence::data::DataType; #[derive(Debug, From, PartialEq)] pub enum DataTypeValue { String(String), - // TODO: add other types. + I128(i128), + I64(i64), + I32(i32), + I16(i16), + I8(i8), + U128(u128), + U64(u64), + U32(u32), + U16(u16), + U8(u8), + F64(f64), + F32(f32), } impl DataTypeValue { pub fn as_data_type(&self) -> &dyn DataType { match self { Self::String(s) => s, - _ => unreachable!(), + Self::I128(i) => i, + Self::I64(i) => i, + Self::I32(i) => i, + Self::I16(i) => i, + Self::I8(i) => i, + Self::U128(u) => u, + Self::U64(u) => u, + Self::U32(u) => u, + Self::U16(u) => u, + Self::U8(u) => u, + Self::F64(f) => f, + Self::F32(f) => f, } } } @@ -27,6 +53,7 @@ impl FromStr for DataTypeValue { fn from_str(s: &str) -> Result { Ok(match s.as_ref() { "String" => String::default().into(), + "i32" => i32::default().into(), _ => unreachable!(), }) } @@ -44,7 +71,45 @@ impl DataType for String { unsafe { (*archived_ptr).to_string() }.into() } - fn advance_pointer(&self, pointer: *const u8) { - unsafe { pointer.add(size_of::()) }; + fn advance_pointer(&self, pointer: &mut *const u8) { + *pointer = unsafe { pointer.add(size_of::()) }; } } + +macro_rules! impl_datatype { + ($datatype:ty, $archived_datatype:ty) => { + impl DataType for $datatype { + fn advance_accum(&self, accum: &mut usize) { + *accum = advance_accum_for_padding(*accum, size_of::<$archived_datatype>()); + *accum += size_of::<$archived_datatype>(); + } + + fn from_pointer(&self, pointer: *const u8, start_pointer: *const u8) -> DataTypeValue { + let current_pointer = advance_pointer_for_padding( + pointer, + start_pointer, + size_of::<$archived_datatype>(), + ); + let archived_ptr: *const $archived_datatype = current_pointer.cast(); + unsafe { (*archived_ptr).to_string() }.into() + } + + fn advance_pointer(&self, pointer: &mut *const u8) { + *pointer = unsafe { pointer.add(size_of::<$archived_datatype>()) }; + } + } + }; +} + +impl_datatype! {i128, ArchivedI128} +impl_datatype! {i64, ArchivedI64} +impl_datatype! {i32, ArchivedI32} +impl_datatype! {i16, ArchivedI16} +impl_datatype! {i8, i8} +impl_datatype! {u128, ArchivedU128} +impl_datatype! {u64, ArchivedU64} +impl_datatype! {u32, ArchivedU32} +impl_datatype! {u16, ArchivedU16} +impl_datatype! {u8, u8} +impl_datatype! {f64, ArchivedF64} +impl_datatype! {f32, ArchivedF32} From aaf7d45a7a7afa534728834bf412dfe5505d7bb6 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Thu, 19 Dec 2024 11:51:43 +0300 Subject: [PATCH 19/41] Support all data types --- src/persistence/data/mod.rs | 1 + src/persistence/data/rkyv_data.rs | 361 +++++++++--------------------- src/persistence/data/types.rs | 48 ++-- 3 files changed, 135 insertions(+), 275 deletions(-) diff --git a/src/persistence/data/mod.rs b/src/persistence/data/mod.rs index fe6e532..290d568 100644 --- a/src/persistence/data/mod.rs +++ b/src/persistence/data/mod.rs @@ -7,5 +7,6 @@ pub use types::DataTypeValue; pub trait DataType { fn advance_accum(&self, accum: &mut usize); fn from_pointer(&self, pointer: *const u8, start_pointer: *const u8) -> DataTypeValue; + fn advance_pointer_for_padding(&self, pointer: &mut *const u8, start_pointer: *const u8); fn advance_pointer(&self, pointer: &mut *const u8); } diff --git a/src/persistence/data/rkyv_data.rs b/src/persistence/data/rkyv_data.rs index 9c6db0c..642a362 100644 --- a/src/persistence/data/rkyv_data.rs +++ b/src/persistence/data/rkyv_data.rs @@ -1,13 +1,5 @@ use crate::persistence::data::types::DataTypeValue; -use crate::persistence::data::util::{advance_accum_for_padding, advance_pointer_for_padding}; -use rkyv::{ - primitive::{ - ArchivedF32, ArchivedF64, ArchivedI128, ArchivedI16, ArchivedI32, ArchivedI64, - ArchivedU128, ArchivedU16, ArchivedU32, ArchivedU64, - }, - string::ArchivedString, -}; -use std::str::FromStr; +use std::{os::unix::net::UnixDatagram, str::FromStr}; pub fn parse_archived_row, S2: AsRef>( buf: &[u8], @@ -20,60 +12,6 @@ pub fn parse_archived_row, S2: AsRef>( DataTypeValue::from_str(column.1.as_ref()).expect("data type should be supported"); let data_type = value.as_data_type(); data_type.advance_accum(&mut accum); - - // match column.1.as_str() { - // "String" => { - // accum = advance_accum_for_padding(accum, 4); - // accum += size_of::(); - // } - // - // "i128" => { - // accum = advance_accum_for_padding(accum, size_of::()); - // accum += size_of::(); - // } - // "i64" => { - // accum = advance_accum_for_padding(accum, size_of::()); - // accum += size_of::(); - // } - // "i32" => { - // accum = advance_accum_for_padding(accum, size_of::()); - // accum += size_of::(); - // } - // "i16" => { - // accum = advance_accum_for_padding(accum, size_of::()); - // accum += size_of::(); - // } - // "i8" => accum += size_of::(), - // - // "u128" => { - // accum = advance_accum_for_padding(accum, size_of::()); - // accum += size_of::(); - // } - // "u64" => { - // accum = advance_accum_for_padding(accum, size_of::()); - // accum += size_of::(); - // } - // "u32" => { - // accum = advance_accum_for_padding(accum, size_of::()); - // accum += size_of::(); - // } - // "u16" => { - // accum = advance_accum_for_padding(accum, size_of::()); - // accum += size_of::(); - // } - // "u8" => accum += size_of::(), - // - // "f64" => { - // accum = advance_accum_for_padding(accum, size_of::()); - // accum += size_of::(); - // } - // "f32" => { - // accum = advance_accum_for_padding(accum, size_of::()); - // accum += size_of::(); - // } - // - // _ => panic!("Unknown data type {:?}", column.1), - // } } accum }; @@ -89,143 +27,9 @@ pub fn parse_archived_row, S2: AsRef>( DataTypeValue::from_str(column.1.as_ref()).expect("data type should be supported"); let data_type = value.as_data_type(); let deserialized = data_type.from_pointer(current_pointer, start_pointer); - data_type.advance_pointer(&mut current_pointer); + data_type.advance_pointer_for_padding(&mut current_pointer, start_pointer); output.push(deserialized); - - // match column.1.as_str() { - // "String" => { - // current_pointer = advance_pointer_for_padding(current_pointer, start_pointer, 4); - // let archived_ptr: *const ArchivedString = current_pointer.cast(); - // output.push(unsafe { (*archived_ptr).to_string() }); - // current_pointer = - // unsafe { current_pointer.add(size_of::()) }; - // } - // - // "i128" => { - // current_pointer = advance_pointer_for_padding( - // current_pointer, - // start_pointer, - // size_of::(), - // ); - // let archived_ptr: *const ArchivedI128 = current_pointer.cast(); - // output.push(unsafe { (*archived_ptr).to_string() }); - // current_pointer = - // unsafe { current_pointer.add(size_of::()) }; - // } - // "i64" => { - // current_pointer = advance_pointer_for_padding( - // current_pointer, - // start_pointer, - // size_of::(), - // ); - // let archived_ptr: *const ArchivedI64 = current_pointer.cast(); - // output.push(unsafe { (*archived_ptr).to_string() }); - // current_pointer = - // unsafe { current_pointer.add(size_of::()) }; - // } - // "i32" => { - // current_pointer = advance_pointer_for_padding( - // current_pointer, - // start_pointer, - // size_of::(), - // ); - // let archived_ptr: *const ArchivedI32 = current_pointer.cast(); - // output.push(unsafe { (*archived_ptr).to_string() }); - // current_pointer = - // unsafe { current_pointer.add(size_of::()) }; - // } - // "i16" => { - // current_pointer = advance_pointer_for_padding( - // current_pointer, - // start_pointer, - // size_of::(), - // ); - // let archived_ptr: *const ArchivedI16 = current_pointer.cast(); - // output.push(unsafe { (*archived_ptr).to_string() }); - // current_pointer = - // unsafe { current_pointer.add(size_of::()) }; - // } - // "i8" => { - // let archived_ptr: *const i8 = current_pointer.cast(); - // output.push(unsafe { (*archived_ptr).to_string() }); - // current_pointer = unsafe { current_pointer.add(size_of::()) }; - // } - // - // "u128" => { - // current_pointer = advance_pointer_for_padding( - // current_pointer, - // start_pointer, - // size_of::(), - // ); - // let archived_ptr: *const ArchivedU128 = current_pointer.cast(); - // output.push(unsafe { (*archived_ptr).to_string() }); - // current_pointer = - // unsafe { current_pointer.add(size_of::()) }; - // } - // "u64" => { - // current_pointer = advance_pointer_for_padding( - // current_pointer, - // start_pointer, - // size_of::(), - // ); - // let archived_ptr: *const ArchivedU64 = current_pointer.cast(); - // output.push(unsafe { (*archived_ptr).to_string() }); - // current_pointer = - // unsafe { current_pointer.add(size_of::()) }; - // } - // "u32" => { - // current_pointer = advance_pointer_for_padding( - // current_pointer, - // start_pointer, - // size_of::(), - // ); - // let archived_ptr: *const ArchivedU32 = current_pointer.cast(); - // output.push(unsafe { (*archived_ptr).to_string() }); - // current_pointer = - // unsafe { current_pointer.add(size_of::()) }; - // } - // "u16" => { - // current_pointer = advance_pointer_for_padding( - // current_pointer, - // start_pointer, - // size_of::(), - // ); - // let archived_ptr: *const ArchivedU16 = current_pointer.cast(); - // output.push(unsafe { (*archived_ptr).to_string() }); - // current_pointer = - // unsafe { current_pointer.add(size_of::()) }; - // } - // "u8" => { - // let archived_ptr: *const u8 = current_pointer.cast(); - // output.push(unsafe { (*archived_ptr).to_string() }); - // current_pointer = unsafe { current_pointer.add(size_of::()) }; - // } - // - // "f64" => { - // current_pointer = advance_pointer_for_padding( - // current_pointer, - // start_pointer, - // size_of::(), - // ); - // let archived_ptr: *const ArchivedF64 = current_pointer.cast(); - // output.push(unsafe { (*archived_ptr).to_string() }); - // current_pointer = - // unsafe { current_pointer.add(size_of::()) }; - // } - // "f32" => { - // current_pointer = advance_pointer_for_padding( - // current_pointer, - // start_pointer, - // size_of::(), - // ); - // let archived_ptr: *const ArchivedF32 = current_pointer.cast(); - // output.push(unsafe { (*archived_ptr).to_string() }); - // current_pointer = - // unsafe { current_pointer.add(size_of::()) }; - // } - // - // _ => panic!("Unknown data type: {:?}", column.1), - // } + data_type.advance_pointer(&mut current_pointer); } output } @@ -237,7 +41,62 @@ mod test { use rkyv::{Archive, Deserialize, Serialize}; #[derive(Archive, Serialize, Deserialize, Debug)] - struct Struct { + struct Struct1 { + pub string1: String, + } + + #[test] + fn test_parse_archived_row() { + let buffer = rkyv::to_bytes::(&Struct1 { + string1: "000000000000000".to_string(), + }) + .unwrap(); + let parsed = parse_archived_row(&buffer, &vec![("string1", "String")]); + assert_eq!( + parsed, + [DataTypeValue::String("000000000000000".to_string())] + ) + } + + + #[derive(Archive, Serialize, Deserialize, Debug)] + struct Struct2 { + pub int1: i32, + } + + #[test] + fn test_parse_archived_row_int() { + let buffer = rkyv::to_bytes::(&Struct2 { + int1: 3, + }) + .unwrap(); + let parsed = parse_archived_row(&buffer, &vec![("int1", "i32")]); + assert_eq!( + parsed, + [DataTypeValue::I32(3)] + ) + } + + #[derive(Archive, Serialize, Deserialize, Debug)] + struct Struct3 { + pub float1: f64, + } + + #[test] + fn test_parse_archived_row_float() { + let buffer = rkyv::to_bytes::(&Struct3 { + float1: 3.14159265358, + }) + .unwrap(); + let parsed = parse_archived_row(&buffer, &vec![("float1", "f64")]); + assert_eq!( + parsed, + [DataTypeValue::F64(3.14159265358)] + ) + } + + #[derive(Archive, Serialize, Deserialize, Debug)] + struct Struct4 { pub string1: String, pub int1: u32, pub string2: String, @@ -251,73 +110,53 @@ mod test { pub float1: f64, } - #[derive(Archive, Serialize, Deserialize, Debug)] - struct Struct1 { - pub string1: String, - } - #[test] - fn test_parse_archived_row() { - let buffer = rkyv::to_bytes::(&Struct1 { + fn test_parse_archived_row_many_fields() { + let buffer = rkyv::to_bytes::(&Struct4 { string1: "000000000000000".to_string(), + int1: 20, + string2: "aaaaaaaa".to_string(), + int2: 3, + int3: 4, + int4: 5, + int5: 6, + int6: 7, + string3: "x".to_string(), + int7: 8, + float1: 3.14159265358, }) .unwrap(); - let parsed = parse_archived_row(&buffer, &vec![("string1", "String")]); + let parsed = parse_archived_row( + &buffer, + &vec![ + ("string1".to_string(), "String".to_string()), + ("int1".to_string(), "i32".to_string()), + ("string2".to_string(), "String".to_string()), + ("int2".to_string(), "u8".to_string()), + ("int3".to_string(), "i8".to_string()), + ("int4".to_string(), "u8".to_string()), + ("int5".to_string(), "i32".to_string()), + ("int6".to_string(), "u8".to_string()), + ("string3".to_string(), "String".to_string()), + ("int7".to_string(), "i8".to_string()), + ("float1".to_string(), "f64".to_string()), + ], + ); assert_eq!( parsed, - [DataTypeValue::String("000000000000000".to_string())] + [ + DataTypeValue::String("000000000000000".to_string()), + DataTypeValue::I32(20), + DataTypeValue::String("aaaaaaaa".to_string()), + DataTypeValue::U8(3), + DataTypeValue::I8(4), + DataTypeValue::U8(5), + DataTypeValue::I32(6), + DataTypeValue::U8(7), + DataTypeValue::String("x".to_string()), + DataTypeValue::I8(8), + DataTypeValue::F64(3.14159265358f64), + ] ) } - - // TODO: make this test working after other types are added. - - // #[test] - // fn test_parse_archived_row() { - // let buffer = rkyv::to_bytes::(&Struct { - // string1: "000000000000000".to_string(), - // int1: 20, - // string2: "aaaaaaaa".to_string(), - // int2: 3, - // int3: 4, - // int4: 5, - // int5: 6, - // int6: 7, - // string3: "x".to_owned(), - // int7: 8, - // float1: 3.14159265358, - // }) - // .unwrap(); - // let parsed = parse_archived_row( - // &buffer, - // &vec![ - // ("string1".to_string(), "String".to_string()), - // ("int1".to_string(), "i32".to_string()), - // ("string2".to_string(), "String".to_string()), - // ("int2".to_string(), "u8".to_string()), - // ("int3".to_string(), "i8".to_string()), - // ("int4".to_string(), "u8".to_string()), - // ("int5".to_string(), "i32".to_string()), - // ("int6".to_string(), "u8".to_string()), - // ("string3".to_string(), "String".to_string()), - // ("int7".to_string(), "i8".to_string()), - // ("float1".to_string(), "f64".to_string()), - // ], - // ); - // assert_eq!( - // parsed, - // [ - // "000000000000000".to_string(), - // "20".to_string(), - // "aaaaaaaa".to_string(), - // "3".to_string(), - // "4".to_string(), - // "5".to_string(), - // "6".to_string(), - // "7".to_string(), - // "x".to_string(), - // "8".to_string(), - // "3.14159265358".to_string(), - // ] - // ) - // } } diff --git a/src/persistence/data/types.rs b/src/persistence/data/types.rs index 390fdb8..3f9ef27 100644 --- a/src/persistence/data/types.rs +++ b/src/persistence/data/types.rs @@ -53,7 +53,18 @@ impl FromStr for DataTypeValue { fn from_str(s: &str) -> Result { Ok(match s.as_ref() { "String" => String::default().into(), + "i128" => i128::default().into(), + "i64" => i64::default().into(), "i32" => i32::default().into(), + "i16" => i16::default().into(), + "i8" => i8::default().into(), + "u128" => u128::default().into(), + "u64" => u64::default().into(), + "u32" => u32::default().into(), + "u16" => u16::default().into(), + "u8" => u8::default().into(), + "f64" => f64::default().into(), + "f32" => f32::default().into(), _ => unreachable!(), }) } @@ -71,13 +82,17 @@ impl DataType for String { unsafe { (*archived_ptr).to_string() }.into() } + fn advance_pointer_for_padding(&self, pointer: &mut *const u8, start_pointer: *const u8) { + *pointer = advance_pointer_for_padding(*pointer, start_pointer, 4); + } + fn advance_pointer(&self, pointer: &mut *const u8) { *pointer = unsafe { pointer.add(size_of::()) }; } } macro_rules! impl_datatype { - ($datatype:ty, $archived_datatype:ty) => { + ($datatype:ty, $archived_datatype:ty, $datatype_value:expr) => { impl DataType for $datatype { fn advance_accum(&self, accum: &mut usize) { *accum = advance_accum_for_padding(*accum, size_of::<$archived_datatype>()); @@ -91,7 +106,12 @@ macro_rules! impl_datatype { size_of::<$archived_datatype>(), ); let archived_ptr: *const $archived_datatype = current_pointer.cast(); - unsafe { (*archived_ptr).to_string() }.into() + + $datatype_value(unsafe { (*archived_ptr) }.into()) + } + + fn advance_pointer_for_padding(&self, pointer: &mut *const u8, start_pointer: *const u8) { + *pointer = advance_pointer_for_padding(*pointer, start_pointer, size_of::<$archived_datatype>()); } fn advance_pointer(&self, pointer: &mut *const u8) { @@ -101,15 +121,15 @@ macro_rules! impl_datatype { }; } -impl_datatype! {i128, ArchivedI128} -impl_datatype! {i64, ArchivedI64} -impl_datatype! {i32, ArchivedI32} -impl_datatype! {i16, ArchivedI16} -impl_datatype! {i8, i8} -impl_datatype! {u128, ArchivedU128} -impl_datatype! {u64, ArchivedU64} -impl_datatype! {u32, ArchivedU32} -impl_datatype! {u16, ArchivedU16} -impl_datatype! {u8, u8} -impl_datatype! {f64, ArchivedF64} -impl_datatype! {f32, ArchivedF32} +impl_datatype! {i128, ArchivedI128, DataTypeValue::I128} +impl_datatype! {i64, ArchivedI64, DataTypeValue::I64} +impl_datatype! {i32, ArchivedI32, DataTypeValue::I32} +impl_datatype! {i16, ArchivedI16, DataTypeValue::I16} +impl_datatype! {i8, i8, DataTypeValue::I8} +impl_datatype! {u128, ArchivedU128, DataTypeValue::U128} +impl_datatype! {u64, ArchivedU64, DataTypeValue::U64} +impl_datatype! {u32, ArchivedU32, DataTypeValue::U32} +impl_datatype! {u16, ArchivedU16, DataTypeValue::U16} +impl_datatype! {u8, u8, DataTypeValue::U8} +impl_datatype! {f64, ArchivedF64, DataTypeValue::F64} +impl_datatype! {f32, ArchivedF32, DataTypeValue::F32} From ea2529d8f2fdd54eedf515aad501795abeeb8d8f Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Thu, 19 Dec 2024 11:52:20 +0300 Subject: [PATCH 20/41] Run `cargo fmt` --- src/persistence/data/rkyv_data.rs | 18 ++++-------------- src/persistence/data/types.rs | 12 ++++++++++-- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/persistence/data/rkyv_data.rs b/src/persistence/data/rkyv_data.rs index 642a362..688df87 100644 --- a/src/persistence/data/rkyv_data.rs +++ b/src/persistence/data/rkyv_data.rs @@ -1,5 +1,5 @@ use crate::persistence::data::types::DataTypeValue; -use std::{os::unix::net::UnixDatagram, str::FromStr}; +use std::str::FromStr; pub fn parse_archived_row, S2: AsRef>( buf: &[u8], @@ -58,7 +58,6 @@ mod test { ) } - #[derive(Archive, Serialize, Deserialize, Debug)] struct Struct2 { pub int1: i32, @@ -66,15 +65,9 @@ mod test { #[test] fn test_parse_archived_row_int() { - let buffer = rkyv::to_bytes::(&Struct2 { - int1: 3, - }) - .unwrap(); + let buffer = rkyv::to_bytes::(&Struct2 { int1: 3 }).unwrap(); let parsed = parse_archived_row(&buffer, &vec![("int1", "i32")]); - assert_eq!( - parsed, - [DataTypeValue::I32(3)] - ) + assert_eq!(parsed, [DataTypeValue::I32(3)]) } #[derive(Archive, Serialize, Deserialize, Debug)] @@ -89,10 +82,7 @@ mod test { }) .unwrap(); let parsed = parse_archived_row(&buffer, &vec![("float1", "f64")]); - assert_eq!( - parsed, - [DataTypeValue::F64(3.14159265358)] - ) + assert_eq!(parsed, [DataTypeValue::F64(3.14159265358)]) } #[derive(Archive, Serialize, Deserialize, Debug)] diff --git a/src/persistence/data/types.rs b/src/persistence/data/types.rs index 3f9ef27..41a49e9 100644 --- a/src/persistence/data/types.rs +++ b/src/persistence/data/types.rs @@ -110,8 +110,16 @@ macro_rules! impl_datatype { $datatype_value(unsafe { (*archived_ptr) }.into()) } - fn advance_pointer_for_padding(&self, pointer: &mut *const u8, start_pointer: *const u8) { - *pointer = advance_pointer_for_padding(*pointer, start_pointer, size_of::<$archived_datatype>()); + fn advance_pointer_for_padding( + &self, + pointer: &mut *const u8, + start_pointer: *const u8, + ) { + *pointer = advance_pointer_for_padding( + *pointer, + start_pointer, + size_of::<$archived_datatype>(), + ); } fn advance_pointer(&self, pointer: &mut *const u8) { From 0ba0db18d15dc414c7c12722daf9b71afe19a23c Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Thu, 19 Dec 2024 11:55:14 +0300 Subject: [PATCH 21/41] Remove unused imports --- src/page/util.rs | 6 ++---- src/persistence/data/rkyv_data.rs | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/page/util.rs b/src/page/util.rs index b9174d8..d855934 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -11,9 +11,7 @@ use crate::page::ty::PageType; use crate::page::General; use crate::persistence::data::rkyv_data::parse_archived_row; use crate::persistence::data::DataTypeValue; -use crate::{ - space, DataPage, GeneralPage, IndexData, Link, Persistable, GENERAL_HEADER_SIZE, PAGE_SIZE, -}; +use crate::{DataPage, GeneralPage, IndexData, Link, Persistable, GENERAL_HEADER_SIZE, PAGE_SIZE}; pub fn map_index_pages_to_general( pages: Vec>, @@ -298,7 +296,7 @@ mod test { Interval, Link, PageType, SpaceInfoData, DATA_VERSION, PAGE_SIZE, }; - use super::{persist_page, read_index_pages}; + use super::persist_page; #[test] fn test_map() { diff --git a/src/persistence/data/rkyv_data.rs b/src/persistence/data/rkyv_data.rs index 688df87..97f4811 100644 --- a/src/persistence/data/rkyv_data.rs +++ b/src/persistence/data/rkyv_data.rs @@ -37,7 +37,7 @@ pub fn parse_archived_row, S2: AsRef>( #[cfg(test)] mod test { use super::parse_archived_row; - use crate::persistence::data::{self, types::DataTypeValue}; + use crate::persistence::data::types::DataTypeValue; use rkyv::{Archive, Deserialize, Serialize}; #[derive(Archive, Serialize, Deserialize, Debug)] From ac882f985e64cf65fd3044ad739e7cc6da130cb7 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Thu, 19 Dec 2024 19:57:35 +0300 Subject: [PATCH 22/41] Start implementing a test for reading row data --- src/page/util.rs | 115 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 114 insertions(+), 1 deletion(-) diff --git a/src/page/util.rs b/src/page/util.rs index d855934..e5b88c8 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -282,6 +282,7 @@ fn read_data_pages( #[cfg(test)] mod test { + use rkyv::{Archive, Deserialize, Serialize}; use scc::ebr::Guard; use scc::TreeIndex; use std::collections::HashMap; @@ -420,7 +421,7 @@ mod test { if Path::new(filename).exists() { remove_file(filename).unwrap(); } - let mut file = std::fs::File::create(filename).unwrap(); + let mut file: std::fs::File = std::fs::File::create(filename).unwrap(); let intervals = vec![Interval(1, 3), Interval(5, 8)]; @@ -447,4 +448,116 @@ mod test { assert_eq!(index_pages[0].index_values[0].link.offset, 0); assert_eq!(index_pages[0].index_values[0].link.length, 0); } + + #[derive(Archive, Debug, Deserialize, Serialize)] + struct TableStruct { + int1: i32, + string1: String, + } + + #[test] + fn test_read_table_data() { + let filename = "tests/data/table_with_rows.wt"; + if Path::new(filename).exists() { + remove_file(filename).unwrap(); + } + let mut file: std::fs::File = std::fs::File::create(filename).unwrap(); + + let space_info_header = GeneralHeader { + data_version: DATA_VERSION, + space_id: 1.into(), + page_id: 0.into(), + previous_id: 0.into(), + next_id: 1.into(), + page_type: PageType::SpaceInfo, + data_length: 0u32, + }; + let space_info = SpaceInfoData { + id: 1.into(), + page_count: 4, + name: "test space".to_owned(), + row_schema: vec![ + ("int1".to_string(), "i32".to_string()), + ("string1".to_string(), "String".to_string()), + ], + primary_key_fields: vec!["int1".to_string()], + primary_key_intervals: vec![Interval(1, 3)], + secondary_index_types: vec![], + secondary_index_intervals: Default::default(), + data_intervals: vec![], + pk_gen_state: (), + empty_links_list: vec![], + }; + let mut space_info_page = GeneralPage { + header: space_info_header, + inner: space_info + }; + persist_page(&mut space_info_page, &mut file).unwrap(); + + let index1_header = GeneralHeader { + data_version: DATA_VERSION, + space_id: 1.into(), + page_id: 1.into(), + previous_id: 0.into(), + next_id: 2.into(), + page_type: PageType::Index, + data_length: 0, + }; + + let index2_header = GeneralHeader { + data_version: DATA_VERSION, + space_id: 1.into(), + page_id: 2.into(), + previous_id: 0.into(), + next_id: 3.into(), + page_type: PageType::Index, + data_length: 0, + }; + + let data1_header = GeneralHeader { + data_version: DATA_VERSION, + space_id: 1.into(), + page_id: 3.into(), + previous_id: 2.into(), + next_id: 4.into(), + page_type: PageType::Data, + data_length: 0, + }; + + let data1_row1 = TableStruct { + int1: 1, + string1: "first string".to_string(), + }; + + let data1_row2 = TableStruct { + int1: 2, + string1: "second string".to_string(), + }; + + let data1_inner = rkyv::to_bytes::(&data1_row1).unwrap(); + let data2_inner = rkyv::to_bytes::(&data1_row2).unwrap(); + + let data2_header = GeneralHeader { + data_version: DATA_VERSION, + space_id: 1.into(), + page_id: 4.into(), + previous_id: 3.into(), + next_id: 5.into(), + page_type: PageType::Data, + data_length: 0, + }; + + let index_data: IndexData = IndexData { + index_values: vec![ + IndexValue { + key: 0, + link: Link { + page_id: 2.into(), + offset: 0, + length: archived_page.len(), + } + } + ] + }; + } } From cedd6b0da59d71bc3966b2d927ce83f50f85f26c Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Mon, 23 Dec 2024 14:22:36 +0300 Subject: [PATCH 23/41] Create a mock database for `test_read_table_data` test --- src/lib.rs | 7 +-- src/page/mod.rs | 2 +- src/page/util.rs | 94 ++++++++++++++++++++++++----------------- src/util/persistable.rs | 6 +++ 4 files changed, 67 insertions(+), 42 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 4ff7da8..9980393 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,9 +9,10 @@ pub use link::Link; pub use data_bucket_codegen::SizeMeasure; pub use page::{ map_data_pages_to_general, map_index_pages_to_general, map_tree_index, map_unique_tree_index, - parse_index_page, parse_page, persist_page, read_index_pages, Data as DataPage, - General as GeneralPage, GeneralHeader, IndexPage as IndexData, Interval, PageType, - SpaceInfo as SpaceInfoData, DATA_VERSION, GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, PAGE_SIZE, + parse_index_page, parse_page, persist_page, read_data_pages, read_index_pages, + Data as DataPage, General as GeneralPage, GeneralHeader, IndexPage as IndexData, Interval, + PageType, SpaceInfo as SpaceInfoData, DATA_VERSION, GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, + PAGE_SIZE, }; pub use persistence::{PersistableIndex, PersistableTable}; pub use util::{align, Persistable, SizeMeasurable}; diff --git a/src/page/mod.rs b/src/page/mod.rs index 7fdd596..bc2f1f0 100644 --- a/src/page/mod.rs +++ b/src/page/mod.rs @@ -15,7 +15,7 @@ pub use space_info::{Interval, SpaceInfo}; pub use ty::PageType; pub use util::{ map_data_pages_to_general, map_index_pages_to_general, parse_index_page, parse_page, - persist_page, read_index_pages, + persist_page, read_data_pages, read_index_pages, }; // TODO: Move to config diff --git a/src/page/util.rs b/src/page/util.rs index e5b88c8..9a35a99 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -238,7 +238,7 @@ where Ok(result) } -fn read_data_pages( +pub fn read_data_pages( mut file: &mut std::fs::File, ) -> eyre::Result>> { let space_info = parse_space_info::(file)?; @@ -256,7 +256,7 @@ fn read_data_pages( .collect::>()[0] .as_str(); let links = match primary_key_type { - "i64" => read_index_pages::(&mut file, &space_info.primary_key_intervals)? + "i32" => read_index_pages::(&mut file, &space_info.primary_key_intervals)? .iter() .map(|index_page| &index_page.index_values) .flatten() @@ -292,9 +292,11 @@ mod test { use crate::page::index::IndexValue; use crate::page::util::read_secondary_index_pages; use crate::page::INNER_PAGE_SIZE; + use crate::persistence::data::DataTypeValue; use crate::{ - map_index_pages_to_general, map_unique_tree_index, GeneralHeader, GeneralPage, IndexData, - Interval, Link, PageType, SpaceInfoData, DATA_VERSION, PAGE_SIZE, + map_index_pages_to_general, map_unique_tree_index, read_data_pages, DataPage, GeneralHeader, GeneralPage, + IndexData, Interval, Link, PageType, SpaceInfoData, DATA_VERSION, GENERAL_HEADER_SIZE, + PAGE_SIZE, }; use super::persist_page; @@ -457,6 +459,7 @@ mod test { #[test] fn test_read_table_data() { + // create a test database manually let filename = "tests/data/table_with_rows.wt"; if Path::new(filename).exists() { remove_file(filename).unwrap(); @@ -490,11 +493,11 @@ mod test { }; let mut space_info_page = GeneralPage { header: space_info_header, - inner: space_info + inner: space_info, }; persist_page(&mut space_info_page, &mut file).unwrap(); - let index1_header = GeneralHeader { + let index_header = GeneralHeader { data_version: DATA_VERSION, space_id: 1.into(), page_id: 1.into(), @@ -504,60 +507,75 @@ mod test { data_length: 0, }; - let index2_header = GeneralHeader { + let data_header = GeneralHeader { data_version: DATA_VERSION, space_id: 1.into(), page_id: 2.into(), - previous_id: 0.into(), - next_id: 3.into(), - page_type: PageType::Index, - data_length: 0, - }; - - let data1_header = GeneralHeader { - data_version: DATA_VERSION, - space_id: 1.into(), - page_id: 3.into(), previous_id: 2.into(), next_id: 4.into(), page_type: PageType::Data, data_length: 0, }; - let data1_row1 = TableStruct { + let data_row1 = TableStruct { int1: 1, string1: "first string".to_string(), }; - let data1_row2 = TableStruct { + let data_row2 = TableStruct { int1: 2, string1: "second string".to_string(), }; - let data1_inner = rkyv::to_bytes::(&data1_row1).unwrap(); - let data2_inner = rkyv::to_bytes::(&data1_row2).unwrap(); + let data_row1_inner = rkyv::to_bytes::(&data_row1).unwrap(); + let data_row1_offset = GENERAL_HEADER_SIZE; + let data_row1_length = data_row1_inner.len(); - let data2_header = GeneralHeader { - data_version: DATA_VERSION, - space_id: 1.into(), - page_id: 4.into(), - previous_id: 3.into(), - next_id: 5.into(), - page_type: PageType::Data, - data_length: 0, + let data_row2_inner = rkyv::to_bytes::(&data_row2).unwrap(); + let data_row2_offset = data_row1_offset + data_row1_length; + let data_row2_length = data_row2_inner.len(); + + let data_rows12_buffer = [data_row1_inner, data_row2_inner].concat(); + + let mut data_page = GeneralPage::> { + header: data_header, + inner: data_rows12_buffer, }; - let index_data: IndexData = IndexData { + let index_data: IndexData = IndexData:: { index_values: vec![ - IndexValue { - key: 0, + IndexValue:: { + key: 1, + link: Link { + page_id: data_header.page_id, + offset: data_row1_offset as u32, + length: data_row1_length as u32, + }, + }, + IndexValue:: { + key: 2, link: Link { - page_id: 2.into(), - offset: 0, - length: archived_page.len(), - } - } - ] + page_id: data_header.page_id, + offset: data_row2_offset as u32, + length: data_row2_length as u32, + }, + }, + ], + }; + let mut index_page = GeneralPage { + header: index_header, + inner: index_data, }; + + persist_page(&mut index_page, &mut file).unwrap(); + persist_page(&mut data_page, &mut file).unwrap(); + + // read the data from the database + let mut file: std::fs::File = std::fs::File::open(filename).unwrap(); + // let data_pages = read_data_pages::(&mut file).unwrap(); + // assert_eq!(data_pages[0][0], DataTypeValue::I32(1)); + // assert_eq!(data_pages[0][1], DataTypeValue::String("first string".to_string())); + // assert_eq!(data_pages[1][0], DataTypeValue::I32(2)); + // assert_eq!(data_pages[1][1], DataTypeValue::String("second string".to_string())); } } diff --git a/src/util/persistable.rs b/src/util/persistable.rs index 958fd09..d140547 100644 --- a/src/util/persistable.rs +++ b/src/util/persistable.rs @@ -20,3 +20,9 @@ where rkyv::to_bytes::(self).unwrap() } } + +impl Persistable for u8 { + fn as_bytes(&self) -> impl AsRef<[u8]> { + rkyv::to_bytes::(self).unwrap() + } +} From e4b338657f7a63780e3453f3216b479a99edef7e Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Mon, 23 Dec 2024 15:49:05 +0300 Subject: [PATCH 24/41] Read vectors of `IndexValue` instead of vectors of `IndexData` --- src/page/util.rs | 48 +++++++++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/src/page/util.rs b/src/page/util.rs index 9a35a99..67ae175 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -5,6 +5,7 @@ use eyre::eyre; use rkyv::api::high::HighDeserializer; use rkyv::Archive; +use super::index::IndexValue; use super::{Interval, SpaceInfo}; use crate::page::header::GeneralHeader; use crate::page::ty::PageType; @@ -132,10 +133,8 @@ pub fn parse_data_record( index ))); } - file.seek(io::SeekFrom::Current(offset as i64))?; let mut buffer = vec![0u8; length as usize]; - file.read_exact(&mut buffer)?; let parsed_record = parse_archived_row(&buffer, &schema); @@ -145,7 +144,7 @@ pub fn parse_data_record( pub fn parse_index_page( file: &mut std::fs::File, index: u32, -) -> eyre::Result>> +) -> eyre::Result>> where T: Archive, ::Archived: rkyv::Deserialize>, @@ -156,9 +155,10 @@ where let mut buffer: Vec = vec![0u8; header.data_length as usize]; file.read_exact(&mut buffer)?; let archived = - unsafe { rkyv::access_unchecked::<> as Archive>::Archived>(&buffer[..]) }; - let index_records: Vec> = - rkyv::deserialize(archived).expect("data should be valid"); + unsafe { rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) }; + let index_records: Vec> = rkyv::deserialize::, _>(archived) + .expect("data should be valid") + .index_values; Ok(index_records) } @@ -183,7 +183,7 @@ pub fn read_secondary_index_pages( file: &mut std::fs::File, index_name: &str, intervals: Vec, -) -> eyre::Result>> +) -> eyre::Result>> where T: Archive, ::Archived: rkyv::Deserialize>, @@ -209,7 +209,7 @@ where } } - let mut result: Vec> = vec![]; + let mut result: Vec> = vec![]; for interval in intervals.iter() { for index in interval.0..interval.1 { let mut index_records = parse_index_page::(file, index as u32)?; @@ -223,12 +223,12 @@ where pub fn read_index_pages( file: &mut std::fs::File, intervals: &Vec, -) -> eyre::Result>> +) -> eyre::Result>> where T: Archive, ::Archived: rkyv::Deserialize>, { - let mut result: Vec> = vec![]; + let mut result: Vec> = vec![]; for interval in intervals.iter() { for index in interval.0..interval.1 { let mut index_records = parse_index_page::(file, index as u32)?; @@ -258,8 +258,6 @@ pub fn read_data_pages( let links = match primary_key_type { "i32" => read_index_pages::(&mut file, &space_info.primary_key_intervals)? .iter() - .map(|index_page| &index_page.index_values) - .flatten() .map(|index_value| index_value.link) .collect::>(), _ => panic!("Unsupported primary key data type `{}`", primary_key_type), @@ -294,9 +292,9 @@ mod test { use crate::page::INNER_PAGE_SIZE; use crate::persistence::data::DataTypeValue; use crate::{ - map_index_pages_to_general, map_unique_tree_index, read_data_pages, DataPage, GeneralHeader, GeneralPage, - IndexData, Interval, Link, PageType, SpaceInfoData, DATA_VERSION, GENERAL_HEADER_SIZE, - PAGE_SIZE, + map_index_pages_to_general, map_unique_tree_index, read_data_pages, DataPage, + GeneralHeader, GeneralPage, IndexData, Interval, Link, PageType, SpaceInfoData, + DATA_VERSION, GENERAL_HEADER_SIZE, PAGE_SIZE, }; use super::persist_page; @@ -382,8 +380,8 @@ mod test { space_info_page } - fn create_index_pages(intervals: &Vec) -> Vec>>> { - let mut index_pages = Vec::>>>::new(); + fn create_index_pages(intervals: &Vec) -> Vec>> { + let mut index_pages = Vec::>>::new(); for interval in intervals { for index in interval.0..interval.1 { @@ -408,7 +406,7 @@ mod test { }; let index_page = GeneralPage { header: index_header, - inner: vec![index_data], + inner: index_data, }; index_pages.push(index_page); } @@ -444,11 +442,11 @@ mod test { vec![Interval(1, 2), Interval(5, 6)], ) .unwrap(); - assert_eq!(index_pages[0].index_values.len(), 1); - assert_eq!(index_pages[0].index_values[0].key, "first_value"); - assert_eq!(index_pages[0].index_values[0].link.page_id, 2.into()); - assert_eq!(index_pages[0].index_values[0].link.offset, 0); - assert_eq!(index_pages[0].index_values[0].link.length, 0); + assert_eq!(index_pages.len(), 2); + assert_eq!(index_pages[0].key, "first_value"); + assert_eq!(index_pages[0].link.page_id, 2.into()); + assert_eq!(index_pages[0].link.offset, 0); + assert_eq!(index_pages[0].link.length, 0); } #[derive(Archive, Debug, Deserialize, Serialize)] @@ -528,7 +526,7 @@ mod test { }; let data_row1_inner = rkyv::to_bytes::(&data_row1).unwrap(); - let data_row1_offset = GENERAL_HEADER_SIZE; + let data_row1_offset = 0; let data_row1_length = data_row1_inner.len(); let data_row2_inner = rkyv::to_bytes::(&data_row2).unwrap(); @@ -572,7 +570,7 @@ mod test { // read the data from the database let mut file: std::fs::File = std::fs::File::open(filename).unwrap(); - // let data_pages = read_data_pages::(&mut file).unwrap(); + let data_pages: Vec> = read_data_pages::(&mut file).unwrap(); // assert_eq!(data_pages[0][0], DataTypeValue::I32(1)); // assert_eq!(data_pages[0][1], DataTypeValue::String("first string".to_string())); // assert_eq!(data_pages[1][0], DataTypeValue::I32(2)); From 878f061d071553b9906bc7830e2ae1c038ff458f Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Mon, 23 Dec 2024 16:23:40 +0300 Subject: [PATCH 25/41] Make `test_read_table_data` test pass --- src/page/util.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/page/util.rs b/src/page/util.rs index 67ae175..304171c 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -135,6 +135,7 @@ pub fn parse_data_record( } file.seek(io::SeekFrom::Current(offset as i64))?; let mut buffer = vec![0u8; length as usize]; + file.read_exact(&mut buffer)?; let parsed_record = parse_archived_row(&buffer, &schema); @@ -482,7 +483,7 @@ mod test { ("string1".to_string(), "String".to_string()), ], primary_key_fields: vec!["int1".to_string()], - primary_key_intervals: vec![Interval(1, 3)], + primary_key_intervals: vec![Interval(1, 2)], secondary_index_types: vec![], secondary_index_intervals: Default::default(), data_intervals: vec![], @@ -571,9 +572,9 @@ mod test { // read the data from the database let mut file: std::fs::File = std::fs::File::open(filename).unwrap(); let data_pages: Vec> = read_data_pages::(&mut file).unwrap(); - // assert_eq!(data_pages[0][0], DataTypeValue::I32(1)); - // assert_eq!(data_pages[0][1], DataTypeValue::String("first string".to_string())); - // assert_eq!(data_pages[1][0], DataTypeValue::I32(2)); - // assert_eq!(data_pages[1][1], DataTypeValue::String("second string".to_string())); + assert_eq!(data_pages[0][0], DataTypeValue::I32(1)); + assert_eq!(data_pages[0][1], DataTypeValue::String("first string".to_string())); + assert_eq!(data_pages[1][0], DataTypeValue::I32(2)); + assert_eq!(data_pages[1][1], DataTypeValue::String("second string".to_string())); } } From cf90edae826283de9a19d1ddd59310599542e91d Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Mon, 23 Dec 2024 16:32:45 +0300 Subject: [PATCH 26/41] Run `cargo fmt` --- src/lib.rs | 6 +++--- src/page/util.rs | 10 ++++++++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f23fda4..8945693 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,9 +10,9 @@ pub use data_bucket_codegen::SizeMeasure; pub use page::{ map_data_pages_to_general, map_index_pages_to_general, map_tree_index, map_unique_tree_index, parse_index_page, parse_page, persist_page, read_data_pages, read_index_pages, seek_by_link, - seek_to_page_start, update_at, Data as DataPage, General as GeneralPage, GeneralHeader, IndexPage as IndexData, Interval, - PageType, SpaceInfo as SpaceInfoData, DATA_VERSION, GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, - PAGE_SIZE, + seek_to_page_start, update_at, Data as DataPage, General as GeneralPage, GeneralHeader, + IndexPage as IndexData, Interval, PageType, SpaceInfo as SpaceInfoData, DATA_VERSION, + GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, PAGE_SIZE, }; pub use persistence::{PersistableIndex, PersistableTable}; pub use util::{align, Persistable, SizeMeasurable}; diff --git a/src/page/util.rs b/src/page/util.rs index 9bc26b7..9ce541f 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -607,8 +607,14 @@ mod test { let mut file: std::fs::File = std::fs::File::open(filename).unwrap(); let data_pages: Vec> = read_data_pages::(&mut file).unwrap(); assert_eq!(data_pages[0][0], DataTypeValue::I32(1)); - assert_eq!(data_pages[0][1], DataTypeValue::String("first string".to_string())); + assert_eq!( + data_pages[0][1], + DataTypeValue::String("first string".to_string()) + ); assert_eq!(data_pages[1][0], DataTypeValue::I32(2)); - assert_eq!(data_pages[1][1], DataTypeValue::String("second string".to_string())); + assert_eq!( + data_pages[1][1], + DataTypeValue::String("second string".to_string()) + ); } } From bdf019f79460e1c0eea97cc1e64a8ddc395089c0 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Mon, 23 Dec 2024 17:48:29 +0300 Subject: [PATCH 27/41] Add `parse_data_page` function --- src/lib.rs | 2 +- src/page/mod.rs | 2 +- src/page/util.rs | 25 +++++++++++++++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 8945693..40d689a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,7 +9,7 @@ pub use link::Link; pub use data_bucket_codegen::SizeMeasure; pub use page::{ map_data_pages_to_general, map_index_pages_to_general, map_tree_index, map_unique_tree_index, - parse_index_page, parse_page, persist_page, read_data_pages, read_index_pages, seek_by_link, + parse_index_page, parse_page, parse_data_page, persist_page, read_data_pages, read_index_pages, seek_by_link, seek_to_page_start, update_at, Data as DataPage, General as GeneralPage, GeneralHeader, IndexPage as IndexData, Interval, PageType, SpaceInfo as SpaceInfoData, DATA_VERSION, GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, PAGE_SIZE, diff --git a/src/page/mod.rs b/src/page/mod.rs index 2bb3883..f0c5cd4 100644 --- a/src/page/mod.rs +++ b/src/page/mod.rs @@ -14,7 +14,7 @@ pub use index::{map_tree_index, map_unique_tree_index, IndexPage}; pub use space_info::{Interval, SpaceInfo}; pub use ty::PageType; pub use util::{ - map_data_pages_to_general, map_index_pages_to_general, parse_index_page, parse_page, + map_data_pages_to_general, map_index_pages_to_general, parse_index_page, parse_page, parse_data_page, persist_page, read_data_pages, read_index_pages, seek_by_link, seek_to_page_start, update_at, }; diff --git a/src/page/util.rs b/src/page/util.rs index 9ce541f..469f160 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -152,6 +152,31 @@ where }) } +pub fn parse_data_page( + file: &mut std::fs::File, + index: u32, +) -> eyre::Result>> { + seek_to_page_start(file, index)?; + let header = parse_general_header(file)?; + + let mut buffer = [0u8; INNER_PAGE_SIZE]; + if header.next_id == 0.into() { + file.read(&mut buffer)?; + } else { + file.read_exact(&mut buffer)?; + } + + let data = DataPage { + data: buffer, + length: header.data_length, + }; + + Ok(GeneralPage { + header, + inner: data, + }) +} + pub fn parse_data_record( file: &mut std::fs::File, index: u32, From e9ffb39d98fa029f648661dc6c247f4dd5e70d94 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Tue, 24 Dec 2024 10:18:02 +0300 Subject: [PATCH 28/41] Support more primary key data types --- src/page/util.rs | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/page/util.rs b/src/page/util.rs index 469f160..13e1b75 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -298,11 +298,18 @@ where Ok(result) } +fn read_links (mut file: &mut std::fs::File, space_info: &SpaceInfo) -> eyre::Result> { + Ok(read_index_pages::(&mut file, &space_info.primary_key_intervals)? + .iter() + .map(|index_value| index_value.link) + .collect::>()) +} + pub fn read_data_pages( mut file: &mut std::fs::File, ) -> eyre::Result>> { let space_info = parse_space_info::(file)?; - let primary_key_fields = space_info.primary_key_fields; + let primary_key_fields = &space_info.primary_key_fields; if primary_key_fields.len() != 1 { panic!("Currently only single primary key is supported"); } @@ -316,10 +323,19 @@ pub fn read_data_pages( .collect::>()[0] .as_str(); let links = match primary_key_type { - "i32" => read_index_pages::(&mut file, &space_info.primary_key_intervals)? - .iter() - .map(|index_value| index_value.link) - .collect::>(), + "String" => read_links::(&mut file, &space_info)?, + "i128" => read_links::(&mut file, &space_info)?, + "i64" => read_links::(&mut file, &space_info)?, + "i32" => read_links::(&mut file, &space_info)?, + "i16" => read_links::(&mut file, &space_info)?, + "i8" => read_links::(&mut file, &space_info)?, + "u128" => read_links::(&mut file, &space_info)?, + "u64" => read_links::(&mut file, &space_info)?, + "u32" => read_links::(&mut file, &space_info)?, + "u16" => read_links::(&mut file, &space_info)?, + "u8" => read_links::(&mut file, &space_info)?, + "f64" => read_links::(&mut file, &space_info)?, + "f32" => read_links::(&mut file, &space_info)?, _ => panic!("Unsupported primary key data type `{}`", primary_key_type), }; From 6cc253014f4d4f85455e4a2189b98143e66f4db2 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Tue, 24 Dec 2024 14:35:07 +0300 Subject: [PATCH 29/41] Make intervals closed --- src/page/util.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/page/util.rs b/src/page/util.rs index 13e1b75..386c65d 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -271,7 +271,7 @@ where let mut result: Vec> = vec![]; for interval in intervals.iter() { - for index in interval.0..interval.1 { + for index in interval.0..=interval.1 { let mut index_records = parse_index_page::(file, index as u32)?; result.append(&mut index_records); } @@ -290,7 +290,7 @@ where { let mut result: Vec> = vec![]; for interval in intervals.iter() { - for index in interval.0..interval.1 { + for index in interval.0..=interval.1 { let mut index_records = parse_index_page::(file, index as u32)?; result.append(&mut index_records); } @@ -460,7 +460,7 @@ mod test { let mut index_pages = Vec::>>::new(); for interval in intervals { - for index in interval.0..interval.1 { + for index in interval.0..=interval.1 { let index_header = GeneralHeader { data_version: DATA_VERSION, space_id: 1.into(), @@ -499,7 +499,7 @@ mod test { } let mut file: std::fs::File = std::fs::File::create(filename).unwrap(); - let intervals = vec![Interval(1, 3), Interval(5, 8)]; + let intervals = vec![Interval(1, 2), Interval(5, 7)]; // create the space page let mut space_info_page = create_space_with_intervals(&intervals); @@ -558,7 +558,7 @@ mod test { ("string1".to_string(), "String".to_string()), ], primary_key_fields: vec!["int1".to_string()], - primary_key_intervals: vec![Interval(1, 2)], + primary_key_intervals: vec![Interval(1, 1)], secondary_index_types: vec![], secondary_index_intervals: Default::default(), data_intervals: vec![], From 4b9c15c2784f5cb1a2bc302ccbd6782ee326a69e Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Tue, 24 Dec 2024 14:37:17 +0300 Subject: [PATCH 30/41] Fix a broken test --- src/page/util.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/page/util.rs b/src/page/util.rs index 386c65d..31c9674 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -518,7 +518,7 @@ mod test { vec![Interval(1, 2), Interval(5, 6)], ) .unwrap(); - assert_eq!(index_pages.len(), 2); + assert_eq!(index_pages.len(), 4); assert_eq!(index_pages[0].key, "first_value"); assert_eq!(index_pages[0].link.page_id, 2.into()); assert_eq!(index_pages[0].link.offset, 0); From 9d22e371f397e2b2885cb776782c5f0988b638dc Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Tue, 24 Dec 2024 15:09:52 +0300 Subject: [PATCH 31/41] Add `read_rows_schema` function --- src/lib.rs | 8 ++++---- src/page/mod.rs | 5 +++-- src/page/util.rs | 22 +++++++++++++++++----- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 40d689a..3351a68 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,10 +9,10 @@ pub use link::Link; pub use data_bucket_codegen::SizeMeasure; pub use page::{ map_data_pages_to_general, map_index_pages_to_general, map_tree_index, map_unique_tree_index, - parse_index_page, parse_page, parse_data_page, persist_page, read_data_pages, read_index_pages, seek_by_link, - seek_to_page_start, update_at, Data as DataPage, General as GeneralPage, GeneralHeader, - IndexPage as IndexData, Interval, PageType, SpaceInfo as SpaceInfoData, DATA_VERSION, - GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, PAGE_SIZE, + parse_data_page, parse_index_page, parse_page, persist_page, read_data_pages, read_index_pages, + read_rows_schema, seek_by_link, seek_to_page_start, update_at, Data as DataPage, + General as GeneralPage, GeneralHeader, IndexPage as IndexData, Interval, PageType, + SpaceInfo as SpaceInfoData, DATA_VERSION, GENERAL_HEADER_SIZE, INNER_PAGE_SIZE, PAGE_SIZE, }; pub use persistence::{PersistableIndex, PersistableTable}; pub use util::{align, Persistable, SizeMeasurable}; diff --git a/src/page/mod.rs b/src/page/mod.rs index f0c5cd4..91ec13a 100644 --- a/src/page/mod.rs +++ b/src/page/mod.rs @@ -14,8 +14,9 @@ pub use index::{map_tree_index, map_unique_tree_index, IndexPage}; pub use space_info::{Interval, SpaceInfo}; pub use ty::PageType; pub use util::{ - map_data_pages_to_general, map_index_pages_to_general, parse_index_page, parse_page, parse_data_page, - persist_page, read_data_pages, read_index_pages, seek_by_link, seek_to_page_start, update_at, + map_data_pages_to_general, map_index_pages_to_general, parse_data_page, parse_index_page, + parse_page, persist_page, read_data_pages, read_index_pages, read_rows_schema, seek_by_link, + seek_to_page_start, update_at, }; // TODO: Move to config diff --git a/src/page/util.rs b/src/page/util.rs index 31c9674..d89dd98 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -298,11 +298,23 @@ where Ok(result) } -fn read_links (mut file: &mut std::fs::File, space_info: &SpaceInfo) -> eyre::Result> { - Ok(read_index_pages::(&mut file, &space_info.primary_key_intervals)? - .iter() - .map(|index_value| index_value.link) - .collect::>()) +fn read_links( + mut file: &mut std::fs::File, + space_info: &SpaceInfo, +) -> eyre::Result> { + Ok( + read_index_pages::(&mut file, &space_info.primary_key_intervals)? + .iter() + .map(|index_value| index_value.link) + .collect::>(), + ) +} + +pub fn read_rows_schema( + mut file: &mut std::fs::File, +) -> eyre::Result> { + let space_info = parse_space_info::(file)?; + Ok(space_info.row_schema) } pub fn read_data_pages( From 6dd3b95eea5ce75bd1e6ef626436e1e2203841ee Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Tue, 24 Dec 2024 16:01:22 +0300 Subject: [PATCH 32/41] Add `Display` trait to `DataValueType` --- src/persistence/data/types.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/persistence/data/types.rs b/src/persistence/data/types.rs index 41a49e9..7ccbb3d 100644 --- a/src/persistence/data/types.rs +++ b/src/persistence/data/types.rs @@ -1,5 +1,6 @@ use std::str::FromStr; +use derive_more::derive::Display; use derive_more::From; use rkyv::primitive::{ ArchivedF32, ArchivedF64, ArchivedI128, ArchivedI16, ArchivedI32, ArchivedI64, ArchivedU128, @@ -10,7 +11,7 @@ use rkyv::string::ArchivedString; use crate::persistence::data::util::{advance_accum_for_padding, advance_pointer_for_padding}; use crate::persistence::data::DataType; -#[derive(Debug, From, PartialEq)] +#[derive(Debug, Display,From, PartialEq)] pub enum DataTypeValue { String(String), I128(i128), From 94f25fd2985b7a236210fd5ea29d010644143bc7 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Wed, 25 Dec 2024 10:23:43 +0300 Subject: [PATCH 33/41] Add `PageIterator` --- src/page/iterators.rs | 72 +++++++++++++++++++++++++++++++++++ src/page/mod.rs | 2 +- src/persistence/data/types.rs | 2 +- 3 files changed, 74 insertions(+), 2 deletions(-) create mode 100644 src/page/iterators.rs diff --git a/src/page/iterators.rs b/src/page/iterators.rs new file mode 100644 index 0000000..423ab7f --- /dev/null +++ b/src/page/iterators.rs @@ -0,0 +1,72 @@ +use crate::Link; + +use super::Interval; + +pub struct PageIterator { + intervals: Vec, + current_intervals_index: usize, + current_position_in_interval: usize, +} + +impl PageIterator { + pub fn new(intervals: Vec) -> PageIterator { + PageIterator { + current_intervals_index: 0, + current_position_in_interval: if intervals.len() > 0 { intervals[0].0 } else { 0 }, + intervals, + } + } +} + +impl Iterator for PageIterator { + type Item = u32; + + fn next(&mut self) -> Option { + let mut result: Option = None; + + if self.current_intervals_index >= self.intervals.len() { + result = None + } else if self.current_position_in_interval + >= self.intervals[self.current_intervals_index].0 + && self.current_position_in_interval <= self.intervals[self.current_intervals_index].1 + { + result = Some(self.current_position_in_interval as u32); + self.current_position_in_interval += 1; + } else if self.current_position_in_interval > self.intervals[self.current_intervals_index].1 + { + self.current_intervals_index += 1; + if self.current_intervals_index >= self.intervals.len() { + result = None; + } else { + self.current_position_in_interval = self.intervals[self.current_intervals_index].0; + result = Some(self.current_position_in_interval as u32); + self.current_position_in_interval += 1; + } + } + + result + } +} + +#[cfg(test)] +mod test { + use crate::Interval; + + use super::PageIterator; + + #[test] + fn test_page_iterator() { + let interval1 = Interval(1, 2); + let interval2 = Interval(5, 7); + let page_iterator = PageIterator::new(vec![interval1, interval2]); + let collected = page_iterator.collect::>(); + assert_eq!(collected, vec![1, 2, 5, 6, 7]); + } + + #[test] + fn test_page_iterator_empty() { + let page_iterator = PageIterator::new(vec![]); + let collected = page_iterator.collect::>(); + assert_eq!(collected, Vec::::new()); + } +} diff --git a/src/page/mod.rs b/src/page/mod.rs index 91ec13a..b12401a 100644 --- a/src/page/mod.rs +++ b/src/page/mod.rs @@ -1,10 +1,10 @@ mod data; mod header; mod index; +mod iterators; mod space_info; mod ty; mod util; - use derive_more::{Display, From}; use rkyv::{Archive, Deserialize, Serialize}; diff --git a/src/persistence/data/types.rs b/src/persistence/data/types.rs index 7ccbb3d..c3d0d31 100644 --- a/src/persistence/data/types.rs +++ b/src/persistence/data/types.rs @@ -11,7 +11,7 @@ use rkyv::string::ArchivedString; use crate::persistence::data::util::{advance_accum_for_padding, advance_pointer_for_padding}; use crate::persistence::data::DataType; -#[derive(Debug, Display,From, PartialEq)] +#[derive(Debug, Display, From, PartialEq)] pub enum DataTypeValue { String(String), I128(i128), From 0fc7a8a106caffbeb3dee41e2e73d9e40e460253 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Wed, 25 Dec 2024 12:06:53 +0300 Subject: [PATCH 34/41] Add `LinkIterator` --- src/page/iterators.rs | 108 ++++++++++++++++++++++++++++++++++++++++-- src/page/util.rs | 16 ++++--- 2 files changed, 112 insertions(+), 12 deletions(-) diff --git a/src/page/iterators.rs b/src/page/iterators.rs index 423ab7f..b43ecfe 100644 --- a/src/page/iterators.rs +++ b/src/page/iterators.rs @@ -1,6 +1,10 @@ -use crate::Link; +use std::io::Read; -use super::Interval; +use rkyv::{api::high::HighDeserializer, Archive}; + +use crate::{page::util::parse_general_header, IndexData, Link}; + +use super::{index::IndexValue, seek_to_page_start, Interval}; pub struct PageIterator { intervals: Vec, @@ -12,7 +16,11 @@ impl PageIterator { pub fn new(intervals: Vec) -> PageIterator { PageIterator { current_intervals_index: 0, - current_position_in_interval: if intervals.len() > 0 { intervals[0].0 } else { 0 }, + current_position_in_interval: if intervals.len() > 0 { + intervals[0].0 + } else { + 0 + }, intervals, } } @@ -48,11 +56,77 @@ impl Iterator for PageIterator { } } +struct LinksIterator<'a, T> +where + T: Archive, + ::Archived: rkyv::Deserialize>, +{ + file: &'a mut std::fs::File, + page_id: u32, + index_records: Option>>, + index_record_index: usize, +} + +impl LinksIterator<'_, T> +where + T: Archive, + ::Archived: rkyv::Deserialize>, +{ + pub fn new(file: &mut std::fs::File, page_id: u32) -> LinksIterator<'_, T> { + LinksIterator { + file, + page_id, + index_records: None, + index_record_index: 0, + } + } +} + +impl Iterator for LinksIterator<'_, T> +where + T: Archive, + ::Archived: rkyv::Deserialize>, +{ + type Item = Link; + + fn next(&mut self) -> Option { + if self.index_records.is_none() { + seek_to_page_start(&mut self.file, self.page_id).expect("page should be seekable"); + let header = parse_general_header(&mut self.file).expect("header should be readable"); + + let mut buffer: Vec = vec![0u8; header.data_length as usize]; + self.file + .read_exact(&mut buffer) + .expect("index data should be readable"); + let archived = unsafe { + rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) + }; + self.index_records = Some( + rkyv::deserialize::, _>(archived) + .expect("data should be valid") + .index_values, + ); + } + + if self.index_record_index < self.index_records.as_deref().unwrap().len() { + let result = Some( + self.index_records.as_deref().unwrap()[self.index_record_index] + .link + .clone(), + ); + self.index_record_index += 1; + result + } else { + None + } + } +} + #[cfg(test)] mod test { - use crate::Interval; + use crate::{page::PageId, Interval, Link}; - use super::PageIterator; + use super::{LinksIterator, PageIterator}; #[test] fn test_page_iterator() { @@ -69,4 +143,28 @@ mod test { let collected = page_iterator.collect::>(); assert_eq!(collected, Vec::::new()); } + + #[test] + fn test_links_iterator() { + let filename = "tests/data/table_with_rows.wt"; + super::super::util::test::create_test_database_file(filename); + + let mut file = std::fs::File::open(filename).unwrap(); + let links = LinksIterator::<'_, i32>::new(&mut file, 1); + assert_eq!( + links.collect::>(), + vec![ + Link { + page_id: PageId(2), + offset: 0, + length: 24 + }, + Link { + page_id: PageId(2), + offset: 24, + length: 28 + } + ] + ); + } } diff --git a/src/page/util.rs b/src/page/util.rs index d89dd98..4aa9667 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -118,7 +118,7 @@ pub fn update_at( Ok(()) } -fn parse_general_header(file: &mut std::fs::File) -> eyre::Result { +pub fn parse_general_header(file: &mut std::fs::File) -> eyre::Result { let mut buffer = [0; GENERAL_HEADER_SIZE]; file.read_exact(&mut buffer)?; let archived = @@ -367,7 +367,7 @@ pub fn read_data_pages( } #[cfg(test)] -mod test { +pub mod test { use rkyv::{Archive, Deserialize, Serialize}; use scc::ebr::Guard; use scc::TreeIndex; @@ -543,10 +543,7 @@ mod test { string1: String, } - #[test] - fn test_read_table_data() { - // create a test database manually - let filename = "tests/data/table_with_rows.wt"; + pub fn create_test_database_file(filename: &str) { if Path::new(filename).exists() { remove_file(filename).unwrap(); } @@ -655,8 +652,13 @@ mod test { persist_page(&mut index_page, &mut file).unwrap(); persist_page(&mut data_page, &mut file).unwrap(); + } + + #[test] + fn test_read_table_data() { + let filename = "tests/data/table_with_rows.wt"; + create_test_database_file(filename); - // read the data from the database let mut file: std::fs::File = std::fs::File::open(filename).unwrap(); let data_pages: Vec> = read_data_pages::(&mut file).unwrap(); assert_eq!(data_pages[0][0], DataTypeValue::I32(1)); From 3de33c157ef70866fa15e27687d9b3ed490d1da2 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Wed, 25 Dec 2024 14:50:09 +0300 Subject: [PATCH 35/41] Use absolute seek instead of relative seeks --- src/page/util.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/page/util.rs b/src/page/util.rs index 4aa9667..f1ba7b9 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -77,16 +77,13 @@ where } pub fn seek_to_page_start(file: &mut std::fs::File, index: u32) -> eyre::Result<()> { - let current_position: u64 = file.stream_position()?; - let start_pos = index as i64 * PAGE_SIZE as i64; - file.seek(io::SeekFrom::Current(start_pos - current_position as i64))?; + file.seek(io::SeekFrom::Start(index as u64 * PAGE_SIZE as u64))?; Ok(()) } pub fn seek_by_link(file: &mut std::fs::File, link: Link) -> eyre::Result<()> { - seek_to_page_start(file, link.page_id.0)?; - file.seek(io::SeekFrom::Current(link.offset as i64))?; + file.seek(io::SeekFrom::Start(link.page_id.0 as u64 * PAGE_SIZE as u64 + GENERAL_HEADER_SIZE as u64 + link.offset as u64))?; Ok(()) } From 5db7cb2dba477c207fcbd23a8572f624bbc280b6 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Wed, 25 Dec 2024 15:20:47 +0300 Subject: [PATCH 36/41] Add `DataIterator` and a test for it --- src/page/iterators.rs | 70 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 66 insertions(+), 4 deletions(-) diff --git a/src/page/iterators.rs b/src/page/iterators.rs index b43ecfe..a0f0111 100644 --- a/src/page/iterators.rs +++ b/src/page/iterators.rs @@ -2,9 +2,9 @@ use std::io::Read; use rkyv::{api::high::HighDeserializer, Archive}; -use crate::{page::util::parse_general_header, IndexData, Link}; +use crate::{page::util::parse_general_header, persistence::data::{rkyv_data::parse_archived_row, DataTypeValue}, IndexData, Link}; -use super::{index::IndexValue, seek_to_page_start, Interval}; +use super::{index::IndexValue, seek_by_link, seek_to_page_start, Interval}; pub struct PageIterator { intervals: Vec, @@ -122,9 +122,51 @@ where } } +struct DataIterator<'a> { + file: &'a mut std::fs::File, + schema: Vec<(String, String)>, + links: Vec, + link_index: usize, +} + +impl DataIterator<'_> { + pub fn new(file: &mut std::fs::File, schema: Vec<(String, String)>, mut links: Vec) -> DataIterator<'_> { + links.sort_by(|a, b| (a.page_id, a.offset).partial_cmp(&(b.page_id, b.offset)).unwrap()); + + DataIterator { + file, + schema, + links, + link_index: 0, + } + } +} + +impl Iterator for DataIterator<'_> { + type Item = Vec; + + fn next(&mut self) -> Option { + if self.link_index >= self.links.len() { + return None; + } + + let current_link = self.links[self.link_index]; + seek_by_link(&mut self.file, current_link).expect("the seek should be successful"); + let mut buffer = vec![0u8; current_link.length as usize]; + self.file.read_exact(&mut buffer).expect("the data should be read"); + let row = parse_archived_row(&buffer, &self.schema); + + self.link_index += 1; + + Some(row) + } +} + #[cfg(test)] mod test { - use crate::{page::PageId, Interval, Link}; + use crate::{ + page::{self, iterators::DataIterator, util::parse_space_info, PageId}, persistence::data::DataTypeValue, Interval, Link, PAGE_SIZE + }; use super::{LinksIterator, PageIterator}; @@ -146,7 +188,7 @@ mod test { #[test] fn test_links_iterator() { - let filename = "tests/data/table_with_rows.wt"; + let filename = "tests/data/table_links_test.wt"; super::super::util::test::create_test_database_file(filename); let mut file = std::fs::File::open(filename).unwrap(); @@ -167,4 +209,24 @@ mod test { ] ); } + + #[test] + fn test_pages_and_links_iterators() { + let filename = "tests/data/table_pages_and_links_test.wt"; + super::super::util::test::create_test_database_file(filename); + + let mut file = std::fs::File::open(filename).unwrap(); + let space_info = parse_space_info::(&mut file).unwrap(); + let index_intervals = space_info.primary_key_intervals; + + let pages_ids = PageIterator::new(index_intervals).collect::>(); + assert_eq!(pages_ids, vec![1]); + + let links = LinksIterator::<'_, i32>::new(&mut file, pages_ids[0]).collect::>(); + let data_iterator: DataIterator<'_> = DataIterator::new(&mut file, space_info.row_schema, links); + assert_eq!(data_iterator.collect::>(), vec![ + vec![DataTypeValue::I32(1), DataTypeValue::String("first string".to_string())], + vec![DataTypeValue::I32(2), DataTypeValue::String("second string".to_string())] + ]); + } } From a8a24129962795688851404145d5b29acb990e5b Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Wed, 25 Dec 2024 15:32:09 +0300 Subject: [PATCH 37/41] Remove an unused import --- src/page/iterators.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/page/iterators.rs b/src/page/iterators.rs index a0f0111..3db851d 100644 --- a/src/page/iterators.rs +++ b/src/page/iterators.rs @@ -165,7 +165,7 @@ impl Iterator for DataIterator<'_> { #[cfg(test)] mod test { use crate::{ - page::{self, iterators::DataIterator, util::parse_space_info, PageId}, persistence::data::DataTypeValue, Interval, Link, PAGE_SIZE + page::{iterators::DataIterator, util::parse_space_info, PageId}, persistence::data::DataTypeValue, Interval, Link, PAGE_SIZE }; use super::{LinksIterator, PageIterator}; From b46ff541f01b783221326fe946889fe436e780ac Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Fri, 27 Dec 2024 13:42:30 +0300 Subject: [PATCH 38/41] Infer the type of the primary key from SpaceInfo --- src/page/iterators.rs | 319 +++++++++++++++++++++++++++++++++++------- src/page/mod.rs | 5 +- src/page/util.rs | 4 +- 3 files changed, 273 insertions(+), 55 deletions(-) diff --git a/src/page/iterators.rs b/src/page/iterators.rs index 3db851d..e6e4a0e 100644 --- a/src/page/iterators.rs +++ b/src/page/iterators.rs @@ -1,10 +1,14 @@ use std::io::Read; -use rkyv::{api::high::HighDeserializer, Archive}; +use rkyv::{api::high::HighDeserializer, primitive, Archive}; -use crate::{page::util::parse_general_header, persistence::data::{rkyv_data::parse_archived_row, DataTypeValue}, IndexData, Link}; +use crate::{ + page::util::parse_general_header, + persistence::data::{rkyv_data::parse_archived_row, DataTypeValue}, + IndexData, Link, +}; -use super::{index::IndexValue, seek_by_link, seek_to_page_start, Interval}; +use super::{index::IndexValue, seek_by_link, seek_to_page_start, Interval, SpaceInfo}; pub struct PageIterator { intervals: Vec, @@ -56,41 +60,45 @@ impl Iterator for PageIterator { } } -struct LinksIterator<'a, T> -where - T: Archive, - ::Archived: rkyv::Deserialize>, -{ +pub struct LinksIterator<'a> { file: &'a mut std::fs::File, page_id: u32, - index_records: Option>>, - index_record_index: usize, + links: Option>, + link_index: usize, + primary_key_type: String, } -impl LinksIterator<'_, T> -where - T: Archive, - ::Archived: rkyv::Deserialize>, -{ - pub fn new(file: &mut std::fs::File, page_id: u32) -> LinksIterator<'_, T> { +impl<'a> LinksIterator<'a> { + pub fn new( + file: &'a mut std::fs::File, + page_id: u32, + space_info: &SpaceInfo, + ) -> LinksIterator<'a> { + let primary_key_fields = &space_info.primary_key_fields; + let primary_key_type = space_info + .row_schema + .iter() + .filter(|(field_name, _field_type)| field_name == &primary_key_fields[0]) + .map(|(_field_name, field_type)| field_type) + .take(1) + .collect::>()[0]; LinksIterator { file, page_id, - index_records: None, - index_record_index: 0, + links: None, + link_index: 0, + primary_key_type: primary_key_type.clone(), } } } -impl Iterator for LinksIterator<'_, T> -where - T: Archive, - ::Archived: rkyv::Deserialize>, -{ +fn parse_index_records(buffer: &[u8]) {} + +impl Iterator for LinksIterator<'_> { type Item = Link; fn next(&mut self) -> Option { - if self.index_records.is_none() { + if self.links.is_none() { seek_to_page_start(&mut self.file, self.page_id).expect("page should be seekable"); let header = parse_general_header(&mut self.file).expect("header should be readable"); @@ -98,23 +106,206 @@ where self.file .read_exact(&mut buffer) .expect("index data should be readable"); - let archived = unsafe { - rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) - }; - self.index_records = Some( - rkyv::deserialize::, _>(archived) - .expect("data should be valid") - .index_values, - ); + + self.links = Some(match self.primary_key_type.as_str() { + "String" => { + let archived = unsafe { + rkyv::access_unchecked::< as Archive>::Archived>( + &buffer[..], + ) + }; + let index_records = + rkyv::deserialize::, rkyv::rancor::Error>(archived) + .expect("data should be valid") + .index_values; + + index_records + .iter() + .map(|index_value| index_value.link) + .collect::>() + } + "i128" => { + let archived = unsafe { + rkyv::access_unchecked::< as Archive>::Archived>( + &buffer[..], + ) + }; + let index_records = + rkyv::deserialize::, rkyv::rancor::Error>(archived) + .expect("data should be valid") + .index_values; + + index_records + .iter() + .map(|index_value| index_value.link) + .collect::>() + } + "i64" => { + let archived = unsafe { + rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) + }; + let index_records = + rkyv::deserialize::, rkyv::rancor::Error>(archived) + .expect("data should be valid") + .index_values; + + index_records + .iter() + .map(|index_value| index_value.link) + .collect::>() + } + "i32" => { + let archived = unsafe { + rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) + }; + let index_records = + rkyv::deserialize::, rkyv::rancor::Error>(archived) + .expect("data should be valid") + .index_values; + + index_records + .iter() + .map(|index_value| index_value.link) + .collect::>() + } + "i16" => { + let archived = unsafe { + rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) + }; + let index_records = + rkyv::deserialize::, rkyv::rancor::Error>(archived) + .expect("data should be valid") + .index_values; + + index_records + .iter() + .map(|index_value| index_value.link) + .collect::>() + } + "i8" => { + let archived = unsafe { + rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) + }; + let index_records = + rkyv::deserialize::, rkyv::rancor::Error>(archived) + .expect("data should be valid") + .index_values; + + index_records + .iter() + .map(|index_value| index_value.link) + .collect::>() + } + "u128" => { + let archived = unsafe { + rkyv::access_unchecked::< as Archive>::Archived>( + &buffer[..], + ) + }; + let index_records = + rkyv::deserialize::, rkyv::rancor::Error>(archived) + .expect("data should be valid") + .index_values; + + index_records + .iter() + .map(|index_value| index_value.link) + .collect::>() + } + "u64" => { + let archived = unsafe { + rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) + }; + let index_records = + rkyv::deserialize::, rkyv::rancor::Error>(archived) + .expect("data should be valid") + .index_values; + + index_records + .iter() + .map(|index_value| index_value.link) + .collect::>() + } + "u32" => { + let archived = unsafe { + rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) + }; + let index_records = + rkyv::deserialize::, rkyv::rancor::Error>(archived) + .expect("data should be valid") + .index_values; + + index_records + .iter() + .map(|index_value| index_value.link) + .collect::>() + } + "u16" => { + let archived = unsafe { + rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) + }; + let index_records = + rkyv::deserialize::, rkyv::rancor::Error>(archived) + .expect("data should be valid") + .index_values; + + index_records + .iter() + .map(|index_value| index_value.link) + .collect::>() + } + "u8" => { + let archived = unsafe { + rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) + }; + let index_records = + rkyv::deserialize::, rkyv::rancor::Error>(archived) + .expect("data should be valid") + .index_values; + + index_records + .iter() + .map(|index_value| index_value.link) + .collect::>() + } + "f64" => { + let archived = unsafe { + rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) + }; + let index_records = + rkyv::deserialize::, rkyv::rancor::Error>(archived) + .expect("data should be valid") + .index_values; + + index_records + .iter() + .map(|index_value| index_value.link) + .collect::>() + } + "f32" => { + let archived = unsafe { + rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) + }; + let index_records = + rkyv::deserialize::, rkyv::rancor::Error>(archived) + .expect("data should be valid") + .index_values; + + index_records + .iter() + .map(|index_value| index_value.link) + .collect::>() + } + _ => panic!( + "Unsupported primary key data type `{}`", + self.primary_key_type + ), + }); } - if self.index_record_index < self.index_records.as_deref().unwrap().len() { - let result = Some( - self.index_records.as_deref().unwrap()[self.index_record_index] - .link - .clone(), - ); - self.index_record_index += 1; + if self.link_index < self.links.as_deref().unwrap().len() { + let result = Some(self.links.as_deref().unwrap()[self.link_index]); + self.link_index += 1; result } else { None @@ -122,7 +313,7 @@ where } } -struct DataIterator<'a> { +pub struct DataIterator<'a> { file: &'a mut std::fs::File, schema: Vec<(String, String)>, links: Vec, @@ -130,8 +321,16 @@ struct DataIterator<'a> { } impl DataIterator<'_> { - pub fn new(file: &mut std::fs::File, schema: Vec<(String, String)>, mut links: Vec) -> DataIterator<'_> { - links.sort_by(|a, b| (a.page_id, a.offset).partial_cmp(&(b.page_id, b.offset)).unwrap()); + pub fn new( + file: &mut std::fs::File, + schema: Vec<(String, String)>, + mut links: Vec, + ) -> DataIterator<'_> { + links.sort_by(|a, b| { + (a.page_id, a.offset) + .partial_cmp(&(b.page_id, b.offset)) + .unwrap() + }); DataIterator { file, @@ -153,7 +352,9 @@ impl Iterator for DataIterator<'_> { let current_link = self.links[self.link_index]; seek_by_link(&mut self.file, current_link).expect("the seek should be successful"); let mut buffer = vec![0u8; current_link.length as usize]; - self.file.read_exact(&mut buffer).expect("the data should be read"); + self.file + .read_exact(&mut buffer) + .expect("the data should be read"); let row = parse_archived_row(&buffer, &self.schema); self.link_index += 1; @@ -165,7 +366,9 @@ impl Iterator for DataIterator<'_> { #[cfg(test)] mod test { use crate::{ - page::{iterators::DataIterator, util::parse_space_info, PageId}, persistence::data::DataTypeValue, Interval, Link, PAGE_SIZE + page::{iterators::DataIterator, util::parse_space_info, PageId}, + persistence::data::DataTypeValue, + Interval, Link, PAGE_SIZE, }; use super::{LinksIterator, PageIterator}; @@ -192,7 +395,8 @@ mod test { super::super::util::test::create_test_database_file(filename); let mut file = std::fs::File::open(filename).unwrap(); - let links = LinksIterator::<'_, i32>::new(&mut file, 1); + let space_info = parse_space_info::(&mut file).unwrap(); + let links = LinksIterator::<'_>::new(&mut file, 1, &space_info); assert_eq!( links.collect::>(), vec![ @@ -217,16 +421,27 @@ mod test { let mut file = std::fs::File::open(filename).unwrap(); let space_info = parse_space_info::(&mut file).unwrap(); - let index_intervals = space_info.primary_key_intervals; + let index_intervals = space_info.primary_key_intervals.clone(); let pages_ids = PageIterator::new(index_intervals).collect::>(); assert_eq!(pages_ids, vec![1]); - let links = LinksIterator::<'_, i32>::new(&mut file, pages_ids[0]).collect::>(); - let data_iterator: DataIterator<'_> = DataIterator::new(&mut file, space_info.row_schema, links); - assert_eq!(data_iterator.collect::>(), vec![ - vec![DataTypeValue::I32(1), DataTypeValue::String("first string".to_string())], - vec![DataTypeValue::I32(2), DataTypeValue::String("second string".to_string())] - ]); + let links = + LinksIterator::<'_>::new(&mut file, pages_ids[0], &space_info).collect::>(); + let data_iterator: DataIterator<'_> = + DataIterator::new(&mut file, space_info.row_schema, links); + assert_eq!( + data_iterator.collect::>(), + vec![ + vec![ + DataTypeValue::I32(1), + DataTypeValue::String("first string".to_string()) + ], + vec![ + DataTypeValue::I32(2), + DataTypeValue::String("second string".to_string()) + ] + ] + ); } } diff --git a/src/page/mod.rs b/src/page/mod.rs index b12401a..c511210 100644 --- a/src/page/mod.rs +++ b/src/page/mod.rs @@ -11,12 +11,13 @@ use rkyv::{Archive, Deserialize, Serialize}; pub use data::Data; pub use header::{GeneralHeader, DATA_VERSION}; pub use index::{map_tree_index, map_unique_tree_index, IndexPage}; +pub use iterators::{DataIterator, LinksIterator, PageIterator}; pub use space_info::{Interval, SpaceInfo}; pub use ty::PageType; pub use util::{ map_data_pages_to_general, map_index_pages_to_general, parse_data_page, parse_index_page, - parse_page, persist_page, read_data_pages, read_index_pages, read_rows_schema, seek_by_link, - seek_to_page_start, update_at, + parse_page, parse_space_info, persist_page, read_data_pages, read_index_pages, + read_rows_schema, seek_by_link, seek_to_page_start, update_at, }; // TODO: Move to config diff --git a/src/page/util.rs b/src/page/util.rs index f1ba7b9..d99e42a 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -83,7 +83,9 @@ pub fn seek_to_page_start(file: &mut std::fs::File, index: u32) -> eyre::Result< } pub fn seek_by_link(file: &mut std::fs::File, link: Link) -> eyre::Result<()> { - file.seek(io::SeekFrom::Start(link.page_id.0 as u64 * PAGE_SIZE as u64 + GENERAL_HEADER_SIZE as u64 + link.offset as u64))?; + file.seek(io::SeekFrom::Start( + link.page_id.0 as u64 * PAGE_SIZE as u64 + GENERAL_HEADER_SIZE as u64 + link.offset as u64, + ))?; Ok(()) } From 1812810f3dad170584b2f5134611808620097f6a Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Fri, 27 Dec 2024 13:50:32 +0300 Subject: [PATCH 39/41] Make the code more DRY --- src/page/iterators.rs | 225 +++++++----------------------------------- 1 file changed, 34 insertions(+), 191 deletions(-) diff --git a/src/page/iterators.rs b/src/page/iterators.rs index e6e4a0e..477d7cb 100644 --- a/src/page/iterators.rs +++ b/src/page/iterators.rs @@ -1,6 +1,6 @@ use std::io::Read; -use rkyv::{api::high::HighDeserializer, primitive, Archive}; +use rkyv::{api::high::HighDeserializer, de::Pool, primitive, rancor::Strategy, Archive, DeserializeUnsized}; use crate::{ page::util::parse_general_header, @@ -8,7 +8,7 @@ use crate::{ IndexData, Link, }; -use super::{index::IndexValue, seek_by_link, seek_to_page_start, Interval, SpaceInfo}; +use super::{index::{ArchivedIndexValue, IndexValue}, seek_by_link, seek_to_page_start, Interval, SpaceInfo}; pub struct PageIterator { intervals: Vec, @@ -92,7 +92,25 @@ impl<'a> LinksIterator<'a> { } } -fn parse_index_records(buffer: &[u8]) {} +fn parse_links(buffer: &[u8]) -> Vec +where T: Archive, + [ArchivedIndexValue]: DeserializeUnsized<[IndexValue], Strategy> +{ + let archived = unsafe { + rkyv::access_unchecked::< as Archive>::Archived>( + &buffer[..], + ) + }; + let index_records = + rkyv::deserialize::, rkyv::rancor::Error>(archived) + .expect("data should be valid") + .index_values; + + index_records + .iter() + .map(|index_value| index_value.link) + .collect::>() +} impl Iterator for LinksIterator<'_> { type Item = Link; @@ -108,194 +126,19 @@ impl Iterator for LinksIterator<'_> { .expect("index data should be readable"); self.links = Some(match self.primary_key_type.as_str() { - "String" => { - let archived = unsafe { - rkyv::access_unchecked::< as Archive>::Archived>( - &buffer[..], - ) - }; - let index_records = - rkyv::deserialize::, rkyv::rancor::Error>(archived) - .expect("data should be valid") - .index_values; - - index_records - .iter() - .map(|index_value| index_value.link) - .collect::>() - } - "i128" => { - let archived = unsafe { - rkyv::access_unchecked::< as Archive>::Archived>( - &buffer[..], - ) - }; - let index_records = - rkyv::deserialize::, rkyv::rancor::Error>(archived) - .expect("data should be valid") - .index_values; - - index_records - .iter() - .map(|index_value| index_value.link) - .collect::>() - } - "i64" => { - let archived = unsafe { - rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) - }; - let index_records = - rkyv::deserialize::, rkyv::rancor::Error>(archived) - .expect("data should be valid") - .index_values; - - index_records - .iter() - .map(|index_value| index_value.link) - .collect::>() - } - "i32" => { - let archived = unsafe { - rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) - }; - let index_records = - rkyv::deserialize::, rkyv::rancor::Error>(archived) - .expect("data should be valid") - .index_values; - - index_records - .iter() - .map(|index_value| index_value.link) - .collect::>() - } - "i16" => { - let archived = unsafe { - rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) - }; - let index_records = - rkyv::deserialize::, rkyv::rancor::Error>(archived) - .expect("data should be valid") - .index_values; - - index_records - .iter() - .map(|index_value| index_value.link) - .collect::>() - } - "i8" => { - let archived = unsafe { - rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) - }; - let index_records = - rkyv::deserialize::, rkyv::rancor::Error>(archived) - .expect("data should be valid") - .index_values; - - index_records - .iter() - .map(|index_value| index_value.link) - .collect::>() - } - "u128" => { - let archived = unsafe { - rkyv::access_unchecked::< as Archive>::Archived>( - &buffer[..], - ) - }; - let index_records = - rkyv::deserialize::, rkyv::rancor::Error>(archived) - .expect("data should be valid") - .index_values; - - index_records - .iter() - .map(|index_value| index_value.link) - .collect::>() - } - "u64" => { - let archived = unsafe { - rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) - }; - let index_records = - rkyv::deserialize::, rkyv::rancor::Error>(archived) - .expect("data should be valid") - .index_values; - - index_records - .iter() - .map(|index_value| index_value.link) - .collect::>() - } - "u32" => { - let archived = unsafe { - rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) - }; - let index_records = - rkyv::deserialize::, rkyv::rancor::Error>(archived) - .expect("data should be valid") - .index_values; - - index_records - .iter() - .map(|index_value| index_value.link) - .collect::>() - } - "u16" => { - let archived = unsafe { - rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) - }; - let index_records = - rkyv::deserialize::, rkyv::rancor::Error>(archived) - .expect("data should be valid") - .index_values; - - index_records - .iter() - .map(|index_value| index_value.link) - .collect::>() - } - "u8" => { - let archived = unsafe { - rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) - }; - let index_records = - rkyv::deserialize::, rkyv::rancor::Error>(archived) - .expect("data should be valid") - .index_values; - - index_records - .iter() - .map(|index_value| index_value.link) - .collect::>() - } - "f64" => { - let archived = unsafe { - rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) - }; - let index_records = - rkyv::deserialize::, rkyv::rancor::Error>(archived) - .expect("data should be valid") - .index_values; - - index_records - .iter() - .map(|index_value| index_value.link) - .collect::>() - } - "f32" => { - let archived = unsafe { - rkyv::access_unchecked::< as Archive>::Archived>(&buffer[..]) - }; - let index_records = - rkyv::deserialize::, rkyv::rancor::Error>(archived) - .expect("data should be valid") - .index_values; - - index_records - .iter() - .map(|index_value| index_value.link) - .collect::>() - } + "String" => parse_links::(&buffer), + "i128" => parse_links::(&buffer), + "i64" => parse_links::(&buffer), + "i32" => parse_links::(&buffer), + "i16" => parse_links::(&buffer), + "i8" => parse_links::(&buffer), + "u128" => parse_links::(&buffer), + "u64" => parse_links::(&buffer), + "u32" => parse_links::(&buffer), + "u16" => parse_links::(&buffer), + "u8" => parse_links::(&buffer), + "f64" => parse_links::(&buffer), + "f32" => parse_links::(&buffer), _ => panic!( "Unsupported primary key data type `{}`", self.primary_key_type From 7a49d0d9905a7462c959f146bebf3e5896d99d74 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Mon, 30 Dec 2024 09:58:11 +0300 Subject: [PATCH 40/41] Remove unused imports and unneeded `mut` --- src/page/iterators.rs | 2 +- src/page/util.rs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/page/iterators.rs b/src/page/iterators.rs index 477d7cb..f0c0634 100644 --- a/src/page/iterators.rs +++ b/src/page/iterators.rs @@ -1,6 +1,6 @@ use std::io::Read; -use rkyv::{api::high::HighDeserializer, de::Pool, primitive, rancor::Strategy, Archive, DeserializeUnsized}; +use rkyv::{de::Pool, rancor::Strategy, Archive, DeserializeUnsized}; use crate::{ page::util::parse_general_header, diff --git a/src/page/util.rs b/src/page/util.rs index d99e42a..9584375 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -310,7 +310,7 @@ fn read_links( } pub fn read_rows_schema( - mut file: &mut std::fs::File, + file: &mut std::fs::File, ) -> eyre::Result> { let space_info = parse_space_info::(file)?; Ok(space_info.row_schema) @@ -379,9 +379,9 @@ pub mod test { use crate::page::INNER_PAGE_SIZE; use crate::persistence::data::DataTypeValue; use crate::{ - map_index_pages_to_general, map_unique_tree_index, read_data_pages, DataPage, + map_index_pages_to_general, map_unique_tree_index, read_data_pages, GeneralHeader, GeneralPage, IndexData, Interval, Link, PageType, SpaceInfoData, - DATA_VERSION, GENERAL_HEADER_SIZE, PAGE_SIZE, + DATA_VERSION, PAGE_SIZE, }; use super::persist_page; From c52f5b89066534c920faaf6d37e48ca8d09470fb Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Mon, 30 Dec 2024 14:54:58 +0300 Subject: [PATCH 41/41] Merge branch `main` --- src/page/util.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/page/util.rs b/src/page/util.rs index b1d2d03..0e0c715 100644 --- a/src/page/util.rs +++ b/src/page/util.rs @@ -14,8 +14,6 @@ use crate::persistence::data::rkyv_data::parse_archived_row; use crate::persistence::data::DataTypeValue; use crate::{DataPage, GeneralPage, IndexData, Link, Persistable, GENERAL_HEADER_SIZE, PAGE_SIZE}; -use super::{Interval, SpaceInfo}; - pub fn map_index_pages_to_general(pages: Vec>) -> Vec>> { let mut header = &mut GeneralHeader::new(0.into(), PageType::Index, 0.into()); let mut general_pages = vec![];