Skip to content
Open
33 changes: 33 additions & 0 deletions include/svs/core/data/io.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,22 @@ void populate_impl(
}
}

template <data::MemoryDataset Data> void populate(std::istream& is, Data& data) {
auto accessor = DefaultWriteAccessor();

size_t num_vectors = data.size();
size_t dims = data.dimensions();

auto max_lines = Dynamic;
auto nvectors = std::min(num_vectors, max_lines);

auto reader = lib::VectorReader<typename Data::element_type>(dims);
for (size_t i = 0; i < nvectors; ++i) {
reader.read(is);
accessor.set(data, i, reader.data());
}
}

// Intercept the native file to perform dispatch on the actual file type.
template <data::MemoryDataset Data, typename WriteAccessor>
void populate_impl(
Expand Down Expand Up @@ -120,6 +136,15 @@ void save(const Dataset& data, const File& file, const lib::UUID& uuid = lib::Ze
return save(data, accessor, file, uuid);
}

template <data::ImmutableMemoryDataset Dataset>
void save(const Dataset& data, std::ostream& os) {
auto accessor = DefaultReadAccessor();
auto writer = svs::io::v1::StreamWriter<void>(os);
for (size_t i = 0; i < data.size(); ++i) {
writer << accessor.get(data, i);
}
}

///
/// @brief Save the dataset as a "*vecs" file.
///
Expand Down Expand Up @@ -169,6 +194,14 @@ lib::lazy_result_t<F, size_t, size_t> load_dataset(const File& file, const F& la
return load_impl(detail::to_native(file), default_accessor, lazy);
}

template <lib::LazyInvocable<size_t, size_t> F>
lib::lazy_result_t<F, size_t, size_t>
load_dataset(std::istream& is, const F& lazy, size_t num_vectors, size_t dims) {
auto data = lazy(num_vectors, dims);
populate(is, data);
return data;
}

// Return whether or not a file is directly loadable via file-extension.
inline bool special_by_file_extension(std::string_view path) {
return (path.ends_with("svs") || path.ends_with("vecs") || path.ends_with("bin"));
Expand Down
87 changes: 79 additions & 8 deletions include/svs/core/data/simple.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,24 +75,42 @@ class GenericSerializer {
}

template <data::ImmutableMemoryDataset Data>
static lib::SaveTable save(const Data& data, const lib::SaveContext& ctx) {
static lib::SaveTable metadata(const Data& data) {
using T = typename Data::element_type;
// UUID used to identify the file.
auto uuid = lib::UUID{};
auto filename = ctx.generate_name("data");
io::save(data, io::NativeFile(filename), uuid);
return lib::SaveTable(
auto table = lib::SaveTable(
serialization_schema,
save_version,
{
{"name", "uncompressed"},
{"binary_file", lib::save(filename.filename())},
{"dims", lib::save(data.dimensions())},
{"num_vectors", lib::save(data.size())},
{"uuid", uuid.str()},
{"eltype", lib::save(datatype_v<T>)},
}
);
return table;
}

template <data::ImmutableMemoryDataset Data, class FileName_t>
static lib::SaveTable
metadata(const Data& data, const FileName_t& filename, const lib::UUID& uuid) {
auto table = metadata(data);
table.insert("binary_file", filename);
table.insert("uuid", uuid.str());
return table;
}

template <data::ImmutableMemoryDataset Data>
static lib::SaveTable save(const Data& data, const lib::SaveContext& ctx) {
// UUID used to identify the file.
auto uuid = lib::UUID{};
auto filename = ctx.generate_name("data");
io::save(data, io::NativeFile(filename), uuid);
return metadata(data, lib::save(filename.filename()), uuid);
}

template <data::ImmutableMemoryDataset Data>
static void save(const Data& data, std::ostream& os) {
io::save(data, os);
}

template <typename T, lib::LazyInvocable<size_t, size_t> F>
Expand All @@ -116,6 +134,25 @@ class GenericSerializer {
}
return io::load_dataset(binaryfile.value(), lazy);
}

template <typename T, lib::LazyInvocable<size_t, size_t> F>
static lib::lazy_result_t<F, size_t, size_t>
load(const lib::ContextFreeLoadTable& table, std::istream& is, const F& lazy) {
auto datatype = lib::load_at<DataType>(table, "eltype");
if (datatype != datatype_v<T>) {
throw ANNEXCEPTION(
"Trying to load an uncompressed dataset with element types {} to a dataset "
"with element types {}.",
name(datatype),
name<datatype_v<T>>()
);
}

size_t num_vectors = lib::load_at<size_t>(table, "num_vectors");
size_t dims = lib::load_at<size_t>(table, "dims");

return io::load_dataset(is, lazy, num_vectors, dims);
}
};

struct Matcher {
Expand Down Expand Up @@ -405,6 +442,10 @@ class SimpleData {
return GenericSerializer::save(*this, ctx);
}

void save(std::ostream& os) const { return GenericSerializer::save(*this, os); }

lib::SaveTable metadata() const { return GenericSerializer::metadata(*this); }

static bool check_load_compatibility(std::string_view schema, lib::Version version) {
return GenericSerializer::check_compatibility(schema, version);
}
Expand All @@ -431,6 +472,20 @@ class SimpleData {
);
}

static SimpleData load(
const lib::ContextFreeLoadTable& table,
std::istream& is,
const allocator_type& allocator = {}
)
requires(!is_view)
{
return GenericSerializer::load<T>(
table, is, lib::Lazy([&](size_t n_elements, size_t n_dimensions) {
return SimpleData(n_elements, n_dimensions, allocator);
})
);
}

///
/// @brief Try to automatically load the dataset.
///
Expand Down Expand Up @@ -805,6 +860,10 @@ class SimpleData<T, Extent, Blocked<Alloc>> {
return GenericSerializer::save(*this, ctx);
}

void save(std::ostream& os) const { return GenericSerializer::save(*this, os); }

lib::SaveTable metadata() const { return GenericSerializer::metadata(*this); }

static bool check_load_compatibility(std::string_view schema, lib::Version version) {
return GenericSerializer::check_compatibility(schema, version);
}
Expand All @@ -818,6 +877,18 @@ class SimpleData<T, Extent, Blocked<Alloc>> {
);
}

static SimpleData load(
const lib::ContextFreeLoadTable& table,
std::istream& is,
const Blocked<Alloc>& allocator = {}
) {
return GenericSerializer::load<T>(
table, is, lib::Lazy([&allocator](size_t n_elements, size_t n_dimensions) {
return SimpleData(n_elements, n_dimensions, allocator);
})
);
}

static SimpleData
load(const std::filesystem::path& path, const Blocked<Alloc>& allocator = {}) {
if (detail::is_likely_reload(path)) {
Expand Down
88 changes: 81 additions & 7 deletions include/svs/core/graph/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -276,22 +276,36 @@ template <std::unsigned_integral Idx, data::MemoryDataset Data> class SimpleGrap
///// Saving
static constexpr lib::Version save_version = lib::Version(0, 0, 0);
static constexpr std::string_view serialization_schema = "default_graph";
lib::SaveTable save(const lib::SaveContext& ctx) const {
auto uuid = lib::UUID{};
auto filename = ctx.generate_name("graph");
io::save(data_, io::NativeFile(filename), uuid);
return lib::SaveTable(

lib::SaveTable metadata() const {
auto table = lib::SaveTable(
serialization_schema,
save_version,
{{"name", "graph"},
{"binary_file", lib::save(filename.filename())},
{"max_degree", lib::save(max_degree())},
{"num_vertices", lib::save(n_nodes())},
{"uuid", lib::save(uuid.str())},
{"eltype", lib::save(datatype_v<Idx>)}}
);
return table;
}

template <class FileName>
lib::SaveTable metadata(const FileName& filename, const lib::UUID& uuid) const {
auto table = metadata();
table.insert("binary_file", filename);
table.insert("uuid", uuid.str());
return table;
}

lib::SaveTable save(const lib::SaveContext& ctx) const {
auto uuid = lib::UUID{};
auto filename = ctx.generate_name("graph");
io::save(data_, io::NativeFile(filename), uuid);
return metadata(lib::save(filename.filename()), uuid);
}

void save(std::ostream& os) const { io::save(data_, os); }

protected:
template <lib::LazyInvocable<data_type> F, typename... Args>
static lib::lazy_result_t<F, data_type>
Expand All @@ -317,6 +331,42 @@ template <std::unsigned_integral Idx, data::MemoryDataset Data> class SimpleGrap
return lazy(data_type::load(binaryfile.value(), std::forward<Args>(args)...));
}

template <lib::LazyInvocable<data_type> F, typename... AllocArgs>
static lib::lazy_result_t<F, data_type> load(
const lib::ContextFreeLoadTable& table,
const F& lazy,
std::istream& is,
AllocArgs&&... alloc_args
) {
// Perform a sanity check on the element type.
// Make sure we're loading the correct kind.
auto eltype = lib::load_at<DataType>(table, "eltype");
if (eltype != datatype_v<Idx>) {
throw ANNEXCEPTION(
"Trying to load a graph with adjacency list types {} to a graph with "
"adjacency list types {}.",
name(eltype),
name<datatype_v<Idx>>()
);
}

size_t num_vertices = lib::load_at<size_t>(table, "num_vertices");
size_t max_degree = lib::load_at<size_t>(table, "max_degree");

// Build a table compatible with GenericSerializer
auto data_table = toml::table{
{lib::config_schema_key, data::GenericSerializer::serialization_schema},
{lib::config_version_key, data::GenericSerializer::save_version.str()},
{"eltype", lib::save(datatype_v<Idx>)},
{"num_vectors", lib::save(num_vertices)},
{"dims", lib::save(max_degree + 1)},
};

return lazy(
data_type::load(lib::ContextFreeLoadTable(data_table), is, alloc_args...)
);
}

protected:
data_type data_;
Idx max_degree_;
Expand Down Expand Up @@ -366,6 +416,15 @@ class SimpleGraph : public SimpleGraphBase<Idx, data::SimpleData<Idx, Dynamic, A
return parent_type::load(table, lazy, allocator);
}

static constexpr SimpleGraph load(
const lib::ContextFreeLoadTable& table,
std::istream& is,
const Alloc& allocator = {}
) {
auto lazy = lib::Lazy([](data_type data) { return SimpleGraph(std::move(data)); });
return parent_type::load(table, lazy, is, allocator);
}

static constexpr SimpleGraph
load(const std::filesystem::path& path, const Alloc& allocator = {}) {
if (data::detail::is_likely_reload(path)) {
Expand All @@ -374,6 +433,10 @@ class SimpleGraph : public SimpleGraphBase<Idx, data::SimpleData<Idx, Dynamic, A
return SimpleGraph(data_type::load(path, allocator));
}
}

static constexpr SimpleGraph load(std::istream& is, const Alloc& allocator = {}) {
return lib::load_from_stream<SimpleGraph>(is, allocator);
}
};

template <typename Idx, typename A1, typename A2>
Expand Down Expand Up @@ -406,13 +469,24 @@ class SimpleBlockedGraph
return parent_type::load(table, lazy);
}

static constexpr SimpleBlockedGraph
load(const lib::ContextFreeLoadTable& table, std::istream& is) {
auto lazy =
lib::Lazy([](data_type data) { return SimpleBlockedGraph(std::move(data)); });
return parent_type::load(table, lazy, is);
}

static constexpr SimpleBlockedGraph load(const std::filesystem::path& path) {
if (data::detail::is_likely_reload(path)) {
return lib::load_from_disk<SimpleBlockedGraph>(path);
} else {
return SimpleBlockedGraph(data_type::load(path));
}
}

static constexpr SimpleBlockedGraph load(std::istream& is) {
return lib::load_from_stream<SimpleBlockedGraph>(is);
}
};

} // namespace svs::graphs
Loading
Loading