From c4a1238a2fe70ce7b64d57e2055dbf36c28f3417 Mon Sep 17 00:00:00 2001 From: 761417898 <761417898@qq.com> Date: Fri, 3 Apr 2026 19:21:21 +0800 Subject: [PATCH 1/7] c and python get_metadata interface wrapper --- cpp/src/cwrapper/tsfile_cwrapper.cc | 236 ++++++++++++++++++++ cpp/src/cwrapper/tsfile_cwrapper.h | 78 +++++++ cpp/test/cwrapper/cwrapper_metadata_test.cc | 139 ++++++++++++ python/tests/test_reader_metadata.py | 84 +++++++ python/tsfile/schema.py | 33 +++ python/tsfile/tsfile_cpp.pxd | 39 ++++ python/tsfile/tsfile_py_cpp.pxd | 3 + python/tsfile/tsfile_py_cpp.pyx | 106 +++++++++ python/tsfile/tsfile_reader.pyx | 17 +- 9 files changed, 734 insertions(+), 1 deletion(-) create mode 100644 cpp/test/cwrapper/cwrapper_metadata_test.cc create mode 100644 python/tests/test_reader_metadata.py diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc b/cpp/src/cwrapper/tsfile_cwrapper.cc index 8cf7b6223..1f1010c84 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.cc +++ b/cpp/src/cwrapper/tsfile_cwrapper.cc @@ -26,8 +26,12 @@ #include #include +#include +#include "common/device_id.h" +#include "common/statistic.h" #include "common/tablet.h" +#include "common/tsfile_common.h" #include "reader/result_set.h" #include "reader/table_result_set.h" #include "reader/tsfile_reader.h" @@ -695,6 +699,238 @@ DeviceSchema* tsfile_reader_get_all_timeseries_schemas(TsFileReader reader, return device_schema; } +const DeviceID tsfile_c_metadata_empty_device_list_marker = {nullptr}; + +namespace { + +void clear_timeseries_statistic(TimeseriesStatistic* s) { + memset(s, 0, sizeof(*s)); +} + +void fill_timeseries_statistic(storage::Statistic* st, + TimeseriesStatistic* out) { + clear_timeseries_statistic(out); + if (st == nullptr) { + return; + } + out->has_statistic = true; + out->row_count = st->get_count(); + out->start_time = st->start_time_; + out->end_time = st->get_end_time(); + out->sum_valid = false; + out->sum = 0.0; + const common::TSDataType t = st->get_type(); + switch (t) { + case common::BOOLEAN: { + auto* bs = static_cast(st); + out->sum_valid = true; + out->sum = static_cast(bs->sum_value_); + break; + } + case common::INT32: + case common::DATE: { + auto* is = static_cast(st); + out->sum_valid = true; + out->sum = static_cast(is->sum_value_); + break; + } + case common::INT64: + case common::TIMESTAMP: { + auto* ls = static_cast(st); + out->sum_valid = true; + out->sum = ls->sum_value_; + break; + } + case common::FLOAT: { + auto* fs = static_cast(st); + out->sum_valid = true; + out->sum = static_cast(fs->sum_value_); + break; + } + case common::DOUBLE: { + auto* ds = static_cast(st); + out->sum_valid = true; + out->sum = ds->sum_value_; + break; + } + default: + break; + } +} + +void free_device_timeseries_metadata_entries_partial( + DeviceTimeseriesMetadataEntry* entries, size_t filled_count) { + if (entries == nullptr) { + return; + } + for (size_t i = 0; i < filled_count; i++) { + free(entries[i].device.path); + entries[i].device.path = nullptr; + if (entries[i].timeseries != nullptr) { + for (uint32_t j = 0; j < entries[i].timeseries_count; j++) { + free(entries[i].timeseries[j].measurement_name); + } + free(entries[i].timeseries); + entries[i].timeseries = nullptr; + } + } + free(entries); +} + +} // namespace + +ERRNO tsfile_reader_get_all_devices(TsFileReader reader, DeviceID** out_devices, + uint32_t* out_length) { + if (reader == nullptr || out_devices == nullptr || out_length == nullptr) { + return common::E_INVALID_ARG; + } + *out_devices = nullptr; + *out_length = 0; + auto* r = static_cast(reader); + const auto ids = r->get_all_devices(); + if (ids.empty()) { + return common::E_OK; + } + auto* arr = static_cast(malloc(sizeof(DeviceID) * ids.size())); + if (arr == nullptr) { + return common::E_OOM; + } + memset(arr, 0, sizeof(DeviceID) * ids.size()); + for (size_t i = 0; i < ids.size(); i++) { + const std::string name = + ids[i] ? ids[i]->get_device_name() : std::string(); + arr[i].path = strdup(name.c_str()); + if (arr[i].path == nullptr) { + tsfile_free_device_id_array(arr, static_cast(i)); + return common::E_OOM; + } + } + *out_devices = arr; + *out_length = static_cast(ids.size()); + return common::E_OK; +} + +void tsfile_free_device_id_array(DeviceID* devices, uint32_t length) { + if (devices == nullptr) { + return; + } + for (uint32_t i = 0; i < length; i++) { + free(devices[i].path); + devices[i].path = nullptr; + } + free(devices); +} + +ERRNO tsfile_reader_get_timeseries_metadata( + TsFileReader reader, const DeviceID* device_ids, uint32_t length, + DeviceTimeseriesMetadataMap* out_map) { + if (reader == nullptr || out_map == nullptr) { + return common::E_INVALID_ARG; + } + out_map->entries = nullptr; + out_map->device_count = 0; + auto* r = static_cast(reader); + storage::DeviceTimeseriesMetadataMap cpp_map; + if (device_ids == nullptr) { + cpp_map = r->get_timeseries_metadata(); + } else if (length == 0) { + return common::E_OK; + } else { + std::vector> query_ids; + query_ids.reserve(length); + for (uint32_t i = 0; i < length; i++) { + if (device_ids[i].path == nullptr) { + return common::E_INVALID_ARG; + } + query_ids.push_back(std::make_shared( + std::string(device_ids[i].path))); + } + cpp_map = r->get_timeseries_metadata(query_ids); + } + if (cpp_map.empty()) { + return common::E_OK; + } + const uint32_t dev_n = static_cast(cpp_map.size()); + auto* entries = static_cast( + malloc(sizeof(DeviceTimeseriesMetadataEntry) * dev_n)); + if (entries == nullptr) { + return common::E_OOM; + } + memset(entries, 0, sizeof(DeviceTimeseriesMetadataEntry) * dev_n); + size_t di = 0; + for (const auto& kv : cpp_map) { + DeviceTimeseriesMetadataEntry& e = entries[di]; + const std::string dname = + kv.first ? kv.first->get_device_name() : std::string(); + e.device.path = strdup(dname.c_str()); + if (e.device.path == nullptr) { + free_device_timeseries_metadata_entries_partial(entries, di); + return common::E_OOM; + } + const auto& vec = kv.second; + e.timeseries_count = static_cast(vec.size()); + if (e.timeseries_count == 0) { + e.timeseries = nullptr; + di++; + continue; + } + e.timeseries = static_cast( + malloc(sizeof(TimeseriesMetadata) * e.timeseries_count)); + if (e.timeseries == nullptr) { + free(e.device.path); + e.device.path = nullptr; + free_device_timeseries_metadata_entries_partial(entries, di); + return common::E_OOM; + } + memset(e.timeseries, 0, + sizeof(TimeseriesMetadata) * e.timeseries_count); + for (uint32_t ti = 0; ti < e.timeseries_count; ti++) { + const auto& idx = vec[ti]; + TimeseriesMetadata& m = e.timeseries[ti]; + if (idx == nullptr) { + continue; + } + common::String mn = idx->get_measurement_name(); + m.measurement_name = strdup(mn.to_std_string().c_str()); + if (m.measurement_name == nullptr) { + for (uint32_t u = 0; u <= ti; u++) { + free(e.timeseries[u].measurement_name); + } + free(e.timeseries); + e.timeseries = nullptr; + free(e.device.path); + e.device.path = nullptr; + free_device_timeseries_metadata_entries_partial(entries, di); + return common::E_OOM; + } + m.data_type = static_cast(idx->get_data_type()); + storage::Statistic* st = idx->get_statistic(); + int32_t chunk_cnt = 0; + auto* cl = idx->get_chunk_meta_list(); + if (cl != nullptr) { + chunk_cnt = static_cast(cl->size()); + } + m.chunk_meta_count = chunk_cnt; + fill_timeseries_statistic(st, &m.statistic); + } + di++; + } + out_map->entries = entries; + out_map->device_count = dev_n; + return common::E_OK; +} + +void tsfile_free_device_timeseries_metadata_map( + DeviceTimeseriesMetadataMap* map) { + if (map == nullptr) { + return; + } + free_device_timeseries_metadata_entries_partial(map->entries, + map->device_count); + map->entries = nullptr; + map->device_count = 0; +} + // delete pointer void _free_tsfile_ts_record(TsRecord* record) { if (*record != nullptr) { diff --git a/cpp/src/cwrapper/tsfile_cwrapper.h b/cpp/src/cwrapper/tsfile_cwrapper.h index 4f4ce8d6e..1cad8c473 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.h +++ b/cpp/src/cwrapper/tsfile_cwrapper.h @@ -104,6 +104,56 @@ typedef struct device_schema { int timeseries_num; } DeviceSchema; +/** + * @brief Device identifier for C API (canonical path string from IDeviceID). + */ +typedef struct DeviceID { + char* path; +} DeviceID; + +/** + * @brief Aggregated statistic for one timeseries (subset of C++ Statistic). + */ +typedef struct TimeseriesStatistic { + bool has_statistic; + int32_t row_count; + int64_t start_time; + int64_t end_time; + /** True when @p sum is meaningful (numeric / boolean aggregate types). */ + bool sum_valid; + /** Sum when sum_valid; boolean uses sum of true as int-like aggregate. */ + double sum; +} TimeseriesStatistic; + +/** + * @brief One measurement's metadata as exposed to C. + */ +typedef struct TimeseriesMetadata { + char* measurement_name; + TSDataType data_type; + int32_t chunk_meta_count; + TimeseriesStatistic statistic; +} TimeseriesMetadata; + +typedef struct DeviceTimeseriesMetadataEntry { + DeviceID device; + TimeseriesMetadata* timeseries; + uint32_t timeseries_count; +} DeviceTimeseriesMetadataEntry; + +/** + * @brief Map device -> list of TimeseriesMetadata (C layout with explicit + * counts). + */ +typedef struct DeviceTimeseriesMetadataMap { + DeviceTimeseriesMetadataEntry* entries; + uint32_t device_count; +} DeviceTimeseriesMetadataMap; + +/** Sentinel: optional address for bindings when querying an empty device_id + * list (length 0). */ +extern const DeviceID tsfile_c_metadata_empty_device_list_marker; + typedef struct result_set_meta_data { char** column_names; TSDataType* data_types; @@ -316,6 +366,34 @@ ERRNO tsfile_writer_close(TsFileWriter writer); */ ERRNO tsfile_reader_close(TsFileReader reader); +/** + * @brief Lists all devices in the file. + * + * @param out_devices [out] Allocated array; caller frees with + * tsfile_free_device_id_array. + * @param out_length [out] Number of devices. + */ +ERRNO tsfile_reader_get_all_devices(TsFileReader reader, DeviceID** out_devices, + uint32_t* out_length); + +void tsfile_free_device_id_array(DeviceID* devices, uint32_t length); + +/** + * @brief Timeseries metadata for none, some, or all devices. + * + * @param device_ids NULL: all devices (length ignored). + * Non-NULL with length==0: empty result (E_OK), device_ids + * not read. Non-NULL with length>0: only these devices (existing only). + * @param out_map [out] Must point to zeroed struct; filled on success. + * Free with tsfile_free_device_timeseries_metadata_map. + */ +ERRNO tsfile_reader_get_timeseries_metadata( + TsFileReader reader, const DeviceID* device_ids, uint32_t length, + DeviceTimeseriesMetadataMap* out_map); + +void tsfile_free_device_timeseries_metadata_map( + DeviceTimeseriesMetadataMap* map); + /*--------------------------Tablet API------------------------ */ /** diff --git a/cpp/test/cwrapper/cwrapper_metadata_test.cc b/cpp/test/cwrapper/cwrapper_metadata_test.cc new file mode 100644 index 000000000..16b29df5a --- /dev/null +++ b/cpp/test/cwrapper/cwrapper_metadata_test.cc @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include + +#include +#include + +extern "C" { +#include "cwrapper/errno_define_c.h" +#include "cwrapper/tsfile_cwrapper.h" +} + +namespace cwrapper_metadata { + +class CWrapperMetadataTest : public testing::Test {}; + +TEST_F(CWrapperMetadataTest, GetAllDevicesAndMetadataWithStatistic) { + ERRNO code = RET_OK; + const char* filename = "cwrapper_metadata_stat.tsfile"; + remove(filename); + + const char* device = "root.sg.d1"; + char* m_int = strdup("s_int"); + timeseries_schema sch{}; + sch.timeseries_name = m_int; + sch.data_type = TS_DATATYPE_INT32; + sch.encoding = TS_ENCODING_PLAIN; + sch.compression = TS_COMPRESSION_UNCOMPRESSED; + + auto* writer = static_cast( + _tsfile_writer_new(filename, 128 * 1024 * 1024, &code)); + ASSERT_EQ(RET_OK, code); + ASSERT_EQ(RET_OK, _tsfile_writer_register_timeseries(writer, device, &sch)); + + for (int row = 0; row < 3; row++) { + auto* record = static_cast( + _ts_record_new(device, static_cast(row + 1), 1)); + const int32_t v = static_cast((row + 1) * 10); + ASSERT_EQ(RET_OK, _insert_data_into_ts_record_by_name_int32_t( + record, m_int, v)); + ASSERT_EQ(RET_OK, _tsfile_writer_write_ts_record(writer, record)); + _free_tsfile_ts_record(reinterpret_cast(&record)); + } + ASSERT_EQ(RET_OK, _tsfile_writer_close(writer)); + + TsFileReader reader = tsfile_reader_new(filename, &code); + ASSERT_EQ(RET_OK, code); + ASSERT_NE(nullptr, reader); + + DeviceID* devices = nullptr; + uint32_t n_dev = 0; + ASSERT_EQ(RET_OK, tsfile_reader_get_all_devices(reader, &devices, &n_dev)); + ASSERT_EQ(1u, n_dev); + ASSERT_NE(nullptr, devices); + ASSERT_STREQ(device, devices[0].path); + tsfile_free_device_id_array(devices, n_dev); + + DeviceTimeseriesMetadataMap map{}; + ASSERT_EQ(RET_OK, + tsfile_reader_get_timeseries_metadata(reader, nullptr, 0, &map)); + ASSERT_EQ(1u, map.device_count); + ASSERT_NE(nullptr, map.entries); + ASSERT_STREQ(device, map.entries[0].device.path); + ASSERT_EQ(1u, map.entries[0].timeseries_count); + ASSERT_NE(nullptr, map.entries[0].timeseries); + TimeseriesMetadata& tm = map.entries[0].timeseries[0]; + ASSERT_STREQ(m_int, tm.measurement_name); + ASSERT_EQ(TS_DATATYPE_INT32, tm.data_type); + ASSERT_TRUE(tm.statistic.has_statistic); + EXPECT_EQ(3, tm.statistic.row_count); + EXPECT_EQ(1, tm.statistic.start_time); + EXPECT_EQ(3, tm.statistic.end_time); + ASSERT_TRUE(tm.statistic.sum_valid); + EXPECT_DOUBLE_EQ(60.0, tm.statistic.sum); + + tsfile_free_device_timeseries_metadata_map(&map); + + DeviceTimeseriesMetadataMap empty{}; + ASSERT_EQ(RET_OK, tsfile_reader_get_timeseries_metadata( + reader, &tsfile_c_metadata_empty_device_list_marker, + 0, &empty)); + EXPECT_EQ(0u, empty.device_count); + EXPECT_EQ(nullptr, empty.entries); + + DeviceID q{}; + q.path = const_cast(device); + DeviceTimeseriesMetadataMap one{}; + ASSERT_EQ(RET_OK, + tsfile_reader_get_timeseries_metadata(reader, &q, 1, &one)); + ASSERT_EQ(1u, one.device_count); + tsfile_free_device_timeseries_metadata_map(&one); + + ASSERT_EQ(RET_OK, tsfile_reader_close(reader)); + free(m_int); + remove(filename); +} + +TEST_F(CWrapperMetadataTest, GetTimeseriesMetadataInvalidArgs) { + ERRNO code = RET_OK; + const char* filename = "cwrapper_metadata_empty.tsfile"; + remove(filename); + + auto* writer = static_cast( + _tsfile_writer_new(filename, 128 * 1024 * 1024, &code)); + ASSERT_EQ(RET_OK, code); + ASSERT_EQ(RET_OK, _tsfile_writer_close(writer)); + + TsFileReader reader = tsfile_reader_new(filename, &code); + ASSERT_EQ(RET_OK, code); + + DeviceTimeseriesMetadataMap map{}; + EXPECT_NE(RET_OK, + tsfile_reader_get_timeseries_metadata(nullptr, nullptr, 0, &map)); + EXPECT_NE(RET_OK, tsfile_reader_get_timeseries_metadata(reader, nullptr, 0, + nullptr)); + + ASSERT_EQ(RET_OK, tsfile_reader_close(reader)); + remove(filename); +} + +} // namespace cwrapper_metadata diff --git a/python/tests/test_reader_metadata.py b/python/tests/test_reader_metadata.py new file mode 100644 index 000000000..f58c0f675 --- /dev/null +++ b/python/tests/test_reader_metadata.py @@ -0,0 +1,84 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +import os +import tempfile + +import pytest + +from tsfile import Field, RowRecord, TimeseriesSchema, TsFileReader, TsFileWriter +from tsfile import TSDataType +from tsfile.schema import DeviceID + + +def test_get_all_devices_and_timeseries_metadata_statistic(): + path = os.path.join(tempfile.gettempdir(), "py_reader_metadata_stat.tsfile") + try: + os.unlink(path) + except OSError: + pass + + device = "root.sg.py_meta" + writer = TsFileWriter(path) + writer.register_timeseries( + device, TimeseriesSchema("m_int", TSDataType.INT32)) + for row in range(3): + v = (row + 1) * 10 + writer.write_row_record( + RowRecord( + device, + row + 1, + [Field("m_int", v, TSDataType.INT32)], + ) + ) + writer.close() + + reader = TsFileReader(path) + try: + devices = reader.get_all_devices() + assert len(devices) == 1 + assert devices[0].path == device + + meta_all = reader.get_timeseries_metadata(None) + assert list(meta_all.keys()) == [device] + series = meta_all[device] + assert len(series) == 1 + m = series[0] + assert m.measurement_name == "m_int" + assert m.data_type == TSDataType.INT32 + st = m.statistic + assert st.has_statistic + assert st.row_count == 3 + assert st.start_time == 1 + assert st.end_time == 3 + assert st.sum_valid + assert st.sum == pytest.approx(60.0) + + assert reader.get_timeseries_metadata([]) == {} + + sub = reader.get_timeseries_metadata([DeviceID(device)]) + assert device in sub + assert len(sub[device]) == 1 + + sub_str = reader.get_timeseries_metadata([device]) + assert device in sub_str + finally: + reader.close() + try: + os.unlink(path) + except OSError: + pass diff --git a/python/tsfile/schema.py b/python/tsfile/schema.py index c89649bf3..955253eaa 100644 --- a/python/tsfile/schema.py +++ b/python/tsfile/schema.py @@ -15,12 +15,45 @@ # specific language governing permissions and limitations # under the License. # +from dataclasses import dataclass from typing import List from .exceptions import TypeMismatchError from .constants import TSDataType, ColumnCategory, TSEncoding, Compressor +@dataclass(frozen=True) +class DeviceID: + """Device path string as returned by the native reader (tree/table file layout).""" + + path: str + + def __str__(self) -> str: + return self.path + + +@dataclass(frozen=True) +class TimeseriesStatistic: + """Subset of file chunk statistic exposed through the C API.""" + + has_statistic: bool + row_count: int + start_time: int + end_time: int + sum_valid: bool + sum: float + + +@dataclass(frozen=True) +class TimeseriesMetadata: + """Per-measurement metadata from get_timeseries_metadata (includes statistic when present).""" + + measurement_name: str + data_type: TSDataType + chunk_meta_count: int + statistic: TimeseriesStatistic + + class TimeseriesSchema: """ Metadata schema for a time series (name, data type, encoding, compression). diff --git a/python/tsfile/tsfile_cpp.pxd b/python/tsfile/tsfile_cpp.pxd index 29008148d..22f324596 100644 --- a/python/tsfile/tsfile_cpp.pxd +++ b/python/tsfile/tsfile_cpp.pxd @@ -103,6 +103,34 @@ cdef extern from "cwrapper/tsfile_cwrapper.h": TimeseriesSchema * timeseries_schema int timeseries_num + ctypedef struct DeviceID: + char * path + + ctypedef struct TimeseriesStatistic: + bint has_statistic + int32_t row_count + int64_t start_time + int64_t end_time + bint sum_valid + double sum + + ctypedef struct TimeseriesMetadata: + char * measurement_name + TSDataType data_type + int32_t chunk_meta_count + TimeseriesStatistic statistic + + ctypedef struct DeviceTimeseriesMetadataEntry: + DeviceID device + TimeseriesMetadata * timeseries + uint32_t timeseries_count + + ctypedef struct DeviceTimeseriesMetadataMap: + DeviceTimeseriesMetadataEntry * entries + uint32_t device_count + + const DeviceID tsfile_c_metadata_empty_device_list_marker + ctypedef struct ResultSetMetaData: char** column_names TSDataType * data_types @@ -218,6 +246,17 @@ cdef extern from "cwrapper/tsfile_cwrapper.h": DeviceSchema * tsfile_reader_get_all_timeseries_schemas(TsFileReader reader, uint32_t * size); + ErrorCode tsfile_reader_get_all_devices(TsFileReader reader, + DeviceID ** out_devices, + uint32_t * out_length); + void tsfile_free_device_id_array(DeviceID * devices, uint32_t length); + + ErrorCode tsfile_reader_get_timeseries_metadata( + TsFileReader reader, const DeviceID * device_ids, uint32_t length, + DeviceTimeseriesMetadataMap * out_map); + void tsfile_free_device_timeseries_metadata_map( + DeviceTimeseriesMetadataMap * map); + # resultSet : get data from resultSet bint tsfile_result_set_next(ResultSet result_set, ErrorCode * err_code); bint tsfile_result_set_is_null_by_index(ResultSet result_set, uint32_t column_index); diff --git a/python/tsfile/tsfile_py_cpp.pxd b/python/tsfile/tsfile_py_cpp.pxd index 197a4ec87..b6baee80d 100644 --- a/python/tsfile/tsfile_py_cpp.pxd +++ b/python/tsfile/tsfile_py_cpp.pxd @@ -67,5 +67,8 @@ cdef public api ResultSet tsfile_reader_query_table_by_row_c(TsFileReader reader cdef public api object get_table_schema(TsFileReader reader, object table_name) cdef public api object get_all_table_schema(TsFileReader reader) cdef public api object get_all_timeseries_schema(TsFileReader reader) +cdef public api object reader_get_all_devices_c(TsFileReader reader) +cdef public api object reader_get_timeseries_metadata_c(TsFileReader reader, + object device_ids) cpdef public api object get_tsfile_config() cpdef public api void set_tsfile_config(dict new_config) \ No newline at end of file diff --git a/python/tsfile/tsfile_py_cpp.pyx b/python/tsfile/tsfile_py_cpp.pyx index 4febeb731..d913b0c4e 100644 --- a/python/tsfile/tsfile_py_cpp.pyx +++ b/python/tsfile/tsfile_py_cpp.pyx @@ -26,6 +26,7 @@ import numpy as np from libc.stdlib cimport free from libc.stdlib cimport malloc from libc.string cimport strdup +from libc.string cimport memset from cpython.exc cimport PyErr_SetObject from cpython.unicode cimport PyUnicode_AsUTF8String, PyUnicode_AsUTF8, PyUnicode_AsUTF8AndSize from cpython.bytes cimport PyBytes_AsString, PyBytes_AsStringAndSize @@ -36,6 +37,9 @@ from tsfile.schema import TSDataType as TSDataTypePy, TSEncoding as TSEncodingPy from tsfile.schema import Compressor as CompressorPy, ColumnCategory as CategoryPy from tsfile.schema import TableSchema as TableSchemaPy, ColumnSchema as ColumnSchemaPy from tsfile.schema import DeviceSchema as DeviceSchemaPy, TimeseriesSchema as TimeseriesSchemaPy +from tsfile.schema import DeviceID as ReaderDeviceID +from tsfile.schema import TimeseriesStatistic as TimeseriesStatisticPy +from tsfile.schema import TimeseriesMetadata as TimeseriesMetadataPy # check exception and set py exception object cdef inline void check_error(int errcode, const char * context=NULL) except*: @@ -922,3 +926,105 @@ cdef object get_all_timeseries_schema(TsFileReader reader): device_schemas.update([(schema_py.get_device_name(), schema_py)]) free(schemas) return device_schemas + +cdef object timeseries_metadata_c_to_py(TimeseriesMetadata* m): + cdef str name_py + if m == NULL or m.measurement_name == NULL: + name_py = "" + else: + name_py = m.measurement_name.decode('utf-8') + cdef object stat = TimeseriesStatisticPy( + bool(m.statistic.has_statistic), + int(m.statistic.row_count), + int(m.statistic.start_time), + int(m.statistic.end_time), + bool(m.statistic.sum_valid), + float(m.statistic.sum), + ) + return TimeseriesMetadataPy( + name_py, + TSDataTypePy(m.data_type), + int(m.chunk_meta_count), + stat, + ) + +cdef dict device_timeseries_metadata_map_to_py(DeviceTimeseriesMetadataMap* mmap): + cdef dict out = {} + cdef uint32_t di, ti + cdef char* p + cdef str key + cdef list series + for di in range(mmap.device_count): + p = mmap.entries[di].device.path + if p == NULL: + key = "" + else: + key = p.decode('utf-8') + series = [] + for ti in range(mmap.entries[di].timeseries_count): + series.append( + timeseries_metadata_c_to_py( + &mmap.entries[di].timeseries[ti])) + out[key] = series + return out + +cdef public api object reader_get_all_devices_c(TsFileReader reader): + cdef DeviceID* arr = NULL + cdef uint32_t n = 0 + cdef int err + cdef list out = [] + cdef uint32_t i + err = tsfile_reader_get_all_devices(reader, &arr, &n) + check_error(err) + try: + for i in range(n): + out.append(ReaderDeviceID(arr[i].path.decode('utf-8'))) + finally: + tsfile_free_device_id_array(arr, n) + return out + +cdef public api object reader_get_timeseries_metadata_c(TsFileReader reader, + object device_ids): + cdef DeviceTimeseriesMetadataMap mmap + cdef DeviceID* q = NULL + cdef uint32_t qlen = 0 + cdef uint32_t i + cdef int err + cdef bytes bpath + cdef const char* raw + memset(&mmap, 0, sizeof(DeviceTimeseriesMetadataMap)) + if device_ids is None: + err = tsfile_reader_get_timeseries_metadata(reader, NULL, 0, &mmap) + check_error(err) + elif len(device_ids) == 0: + err = tsfile_reader_get_timeseries_metadata( + reader, &tsfile_c_metadata_empty_device_list_marker, 0, &mmap) + check_error(err) + else: + qlen = len(device_ids) + q = malloc(sizeof(DeviceID) * qlen) + if q == NULL: + raise MemoryError() + memset(q, 0, sizeof(DeviceID) * qlen) + try: + for i in range(qlen): + dev = device_ids[i] + try: + path_s = dev.path + except AttributeError: + path_s = str(dev) + bpath = path_s.encode('utf-8') + raw = PyBytes_AsString(bpath) + q[i].path = strdup(raw) + if q[i].path == NULL: + raise MemoryError() + err = tsfile_reader_get_timeseries_metadata(reader, q, qlen, &mmap) + check_error(err) + finally: + for i in range(qlen): + free(q[i].path) + free(q) + try: + return device_timeseries_metadata_map_to_py(&mmap) + finally: + tsfile_free_device_timeseries_metadata_map(&mmap) diff --git a/python/tsfile/tsfile_reader.pyx b/python/tsfile/tsfile_reader.pyx index 3a1a15d4d..52a9a94f7 100644 --- a/python/tsfile/tsfile_reader.pyx +++ b/python/tsfile/tsfile_reader.pyx @@ -19,7 +19,7 @@ #cython: language_level=3 import weakref -from typing import List +from typing import List, Optional, Dict import pandas as pd from libc.stdint cimport INT64_MIN, INT64_MAX @@ -427,6 +427,21 @@ cdef class TsFileReaderPy: """ return get_all_timeseries_schema(self.reader) + def get_all_devices(self): + """ + Return all device IDs in the file as :class:`tsfile.schema.DeviceID`. + """ + return reader_get_all_devices_c(self.reader) + + def get_timeseries_metadata(self, device_ids: Optional[List] = None) -> Dict[str, list]: + """ + Return map device path -> list of :class:`tsfile.schema.TimeseriesMetadata`. + + ``device_ids is None``: all devices. ``device_ids == []``: empty map. + Non-empty list restricts to those devices (only existing devices appear). + """ + return reader_get_timeseries_metadata_c(self.reader, device_ids) + def close(self): """ Close TsFile Reader, if reader has result sets, invalid them. From 9fc3e828b41568b1a0c79cb9d8929404929930ea Mon Sep 17 00:00:00 2001 From: 761417898 <761417898@qq.com> Date: Fri, 3 Apr 2026 19:55:09 +0800 Subject: [PATCH 2/7] python c metadata interface --- cpp/src/cwrapper/tsfile_cwrapper.cc | 157 ++++++++++++++++++-- cpp/src/cwrapper/tsfile_cwrapper.h | 29 ++++ cpp/test/cwrapper/cwrapper_metadata_test.cc | 137 +++++++++++++++++ python/tests/test_reader_metadata.py | 86 +++++++++++ python/tsfile/schema.py | 20 ++- python/tsfile/tsfile_cpp.pxd | 18 +++ python/tsfile/tsfile_py_cpp.pyx | 23 +++ 7 files changed, 460 insertions(+), 10 deletions(-) diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc b/cpp/src/cwrapper/tsfile_cwrapper.cc index 1f1010c84..3582f5f68 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.cc +++ b/cpp/src/cwrapper/tsfile_cwrapper.cc @@ -703,15 +703,45 @@ const DeviceID tsfile_c_metadata_empty_device_list_marker = {nullptr}; namespace { +char* dup_common_string_to_cstr(const common::String& s) { + if (s.buf_ == nullptr || s.len_ == 0) { + return strdup(""); + } + char* p = static_cast(malloc(static_cast(s.len_) + 1U)); + if (p == nullptr) { + return nullptr; + } + memcpy(p, s.buf_, static_cast(s.len_)); + p[s.len_] = '\0'; + return p; +} + +void free_timeseries_statistic_heap(TimeseriesStatistic* s) { + if (s == nullptr) { + return; + } + free(s->str_min); + s->str_min = nullptr; + free(s->str_max); + s->str_max = nullptr; + free(s->str_first); + s->str_first = nullptr; + free(s->str_last); + s->str_last = nullptr; +} + void clear_timeseries_statistic(TimeseriesStatistic* s) { memset(s, 0, sizeof(*s)); } -void fill_timeseries_statistic(storage::Statistic* st, - TimeseriesStatistic* out) { +/** + * Fills @p out from C++ Statistic. On allocation failure returns E_OOM and + * clears/frees any partial string fields in @p out. + */ +int fill_timeseries_statistic(storage::Statistic* st, TimeseriesStatistic* out) { clear_timeseries_statistic(out); if (st == nullptr) { - return; + return common::E_OK; } out->has_statistic = true; out->row_count = st->get_count(); @@ -725,6 +755,9 @@ void fill_timeseries_statistic(storage::Statistic* st, auto* bs = static_cast(st); out->sum_valid = true; out->sum = static_cast(bs->sum_value_); + out->bool_ext_valid = true; + out->first_bool = bs->first_value_; + out->last_bool = bs->last_value_; break; } case common::INT32: @@ -732,6 +765,13 @@ void fill_timeseries_statistic(storage::Statistic* st, auto* is = static_cast(st); out->sum_valid = true; out->sum = static_cast(is->sum_value_); + if (out->row_count > 0) { + out->int_range_valid = true; + out->min_int64 = static_cast(is->min_value_); + out->max_int64 = static_cast(is->max_value_); + out->first_int64 = static_cast(is->first_value_); + out->last_int64 = static_cast(is->last_value_); + } break; } case common::INT64: @@ -739,23 +779,98 @@ void fill_timeseries_statistic(storage::Statistic* st, auto* ls = static_cast(st); out->sum_valid = true; out->sum = ls->sum_value_; + if (out->row_count > 0) { + out->int_range_valid = true; + out->min_int64 = ls->min_value_; + out->max_int64 = ls->max_value_; + out->first_int64 = ls->first_value_; + out->last_int64 = ls->last_value_; + } break; } case common::FLOAT: { auto* fs = static_cast(st); out->sum_valid = true; out->sum = static_cast(fs->sum_value_); + if (out->row_count > 0) { + out->float_range_valid = true; + out->min_float64 = static_cast(fs->min_value_); + out->max_float64 = static_cast(fs->max_value_); + out->first_float64 = static_cast(fs->first_value_); + out->last_float64 = static_cast(fs->last_value_); + } break; } case common::DOUBLE: { auto* ds = static_cast(st); out->sum_valid = true; out->sum = ds->sum_value_; + if (out->row_count > 0) { + out->float_range_valid = true; + out->min_float64 = ds->min_value_; + out->max_float64 = ds->max_value_; + out->first_float64 = ds->first_value_; + out->last_float64 = ds->last_value_; + } + break; + } + case common::STRING: { + auto* ss = static_cast(st); + out->str_ext_valid = true; + out->str_min = dup_common_string_to_cstr(ss->min_value_); + if (out->str_min == nullptr) { + free_timeseries_statistic_heap(out); + clear_timeseries_statistic(out); + return common::E_OOM; + } + out->str_max = dup_common_string_to_cstr(ss->max_value_); + if (out->str_max == nullptr) { + free_timeseries_statistic_heap(out); + clear_timeseries_statistic(out); + return common::E_OOM; + } + out->str_first = dup_common_string_to_cstr(ss->first_value_); + if (out->str_first == nullptr) { + free_timeseries_statistic_heap(out); + clear_timeseries_statistic(out); + return common::E_OOM; + } + out->str_last = dup_common_string_to_cstr(ss->last_value_); + if (out->str_last == nullptr) { + free_timeseries_statistic_heap(out); + clear_timeseries_statistic(out); + return common::E_OOM; + } + break; + } + case common::TEXT: { + auto* ts = static_cast(st); + out->str_ext_valid = true; + out->str_min = strdup(""); + out->str_max = strdup(""); + if (out->str_min == nullptr || out->str_max == nullptr) { + free_timeseries_statistic_heap(out); + clear_timeseries_statistic(out); + return common::E_OOM; + } + out->str_first = dup_common_string_to_cstr(ts->first_value_); + if (out->str_first == nullptr) { + free_timeseries_statistic_heap(out); + clear_timeseries_statistic(out); + return common::E_OOM; + } + out->str_last = dup_common_string_to_cstr(ts->last_value_); + if (out->str_last == nullptr) { + free_timeseries_statistic_heap(out); + clear_timeseries_statistic(out); + return common::E_OOM; + } break; } default: break; } + return common::E_OK; } void free_device_timeseries_metadata_entries_partial( @@ -768,6 +883,8 @@ void free_device_timeseries_metadata_entries_partial( entries[i].device.path = nullptr; if (entries[i].timeseries != nullptr) { for (uint32_t j = 0; j < entries[i].timeseries_count; j++) { + free_timeseries_statistic_heap( + &entries[i].timeseries[j].statistic); free(entries[i].timeseries[j].measurement_name); } free(entries[i].timeseries); @@ -868,7 +985,13 @@ ERRNO tsfile_reader_get_timeseries_metadata( return common::E_OOM; } const auto& vec = kv.second; - e.timeseries_count = static_cast(vec.size()); + uint32_t n_ts = 0; + for (const auto& idx_nz : vec) { + if (idx_nz != nullptr) { + n_ts++; + } + } + e.timeseries_count = n_ts; if (e.timeseries_count == 0) { e.timeseries = nullptr; di++; @@ -884,16 +1007,17 @@ ERRNO tsfile_reader_get_timeseries_metadata( } memset(e.timeseries, 0, sizeof(TimeseriesMetadata) * e.timeseries_count); - for (uint32_t ti = 0; ti < e.timeseries_count; ti++) { - const auto& idx = vec[ti]; - TimeseriesMetadata& m = e.timeseries[ti]; + uint32_t slot = 0; + for (const auto& idx : vec) { if (idx == nullptr) { continue; } + TimeseriesMetadata& m = e.timeseries[slot]; common::String mn = idx->get_measurement_name(); m.measurement_name = strdup(mn.to_std_string().c_str()); if (m.measurement_name == nullptr) { - for (uint32_t u = 0; u <= ti; u++) { + for (uint32_t u = 0; u < slot; u++) { + free_timeseries_statistic_heap(&e.timeseries[u].statistic); free(e.timeseries[u].measurement_name); } free(e.timeseries); @@ -911,7 +1035,22 @@ ERRNO tsfile_reader_get_timeseries_metadata( chunk_cnt = static_cast(cl->size()); } m.chunk_meta_count = chunk_cnt; - fill_timeseries_statistic(st, &m.statistic); + const int st_rc = fill_timeseries_statistic(st, &m.statistic); + if (st_rc != common::E_OK) { + for (uint32_t u = 0; u < slot; u++) { + free_timeseries_statistic_heap(&e.timeseries[u].statistic); + free(e.timeseries[u].measurement_name); + } + free_timeseries_statistic_heap(&m.statistic); + free(m.measurement_name); + free(e.timeseries); + e.timeseries = nullptr; + free(e.device.path); + e.device.path = nullptr; + free_device_timeseries_metadata_entries_partial(entries, di); + return st_rc; + } + slot++; } di++; } diff --git a/cpp/src/cwrapper/tsfile_cwrapper.h b/cpp/src/cwrapper/tsfile_cwrapper.h index 1cad8c473..bfa2430a7 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.h +++ b/cpp/src/cwrapper/tsfile_cwrapper.h @@ -113,6 +113,9 @@ typedef struct DeviceID { /** * @brief Aggregated statistic for one timeseries (subset of C++ Statistic). + * + * String pointers str_* are allocated with malloc; freed by + * tsfile_free_device_timeseries_metadata_map (do not free individually). */ typedef struct TimeseriesStatistic { bool has_statistic; @@ -123,6 +126,32 @@ typedef struct TimeseriesStatistic { bool sum_valid; /** Sum when sum_valid; boolean uses sum of true as int-like aggregate. */ double sum; + + /** INT32, DATE, INT64, TIMESTAMP: min/max/first/last in int64_t form. */ + bool int_range_valid; + int64_t min_int64; + int64_t max_int64; + int64_t first_int64; + int64_t last_int64; + + /** FLOAT, DOUBLE: min/max/first/last. */ + bool float_range_valid; + double min_float64; + double max_float64; + double first_float64; + double last_float64; + + /** BOOLEAN: first/last sample values. */ + bool bool_ext_valid; + bool first_bool; + bool last_bool; + + /** STRING: min/max lexicographic; TEXT: first/last only (min/max unused). */ + bool str_ext_valid; + char* str_min; + char* str_max; + char* str_first; + char* str_last; } TimeseriesStatistic; /** diff --git a/cpp/test/cwrapper/cwrapper_metadata_test.cc b/cpp/test/cwrapper/cwrapper_metadata_test.cc index 16b29df5a..897b838c2 100644 --- a/cpp/test/cwrapper/cwrapper_metadata_test.cc +++ b/cpp/test/cwrapper/cwrapper_metadata_test.cc @@ -90,6 +90,11 @@ TEST_F(CWrapperMetadataTest, GetAllDevicesAndMetadataWithStatistic) { EXPECT_EQ(3, tm.statistic.end_time); ASSERT_TRUE(tm.statistic.sum_valid); EXPECT_DOUBLE_EQ(60.0, tm.statistic.sum); + ASSERT_TRUE(tm.statistic.int_range_valid); + EXPECT_EQ(10, tm.statistic.min_int64); + EXPECT_EQ(30, tm.statistic.max_int64); + EXPECT_EQ(10, tm.statistic.first_int64); + EXPECT_EQ(30, tm.statistic.last_int64); tsfile_free_device_timeseries_metadata_map(&map); @@ -113,6 +118,138 @@ TEST_F(CWrapperMetadataTest, GetAllDevicesAndMetadataWithStatistic) { remove(filename); } +TEST_F(CWrapperMetadataTest, GetTimeseriesMetadataBooleanStatistic) { + ERRNO code = RET_OK; + const char* filename = "cwrapper_metadata_bool.tsfile"; + remove(filename); + + const char* device = "root.sg.bool"; + char* m_b = strdup("s_bool"); + timeseries_schema sch{}; + sch.timeseries_name = m_b; + sch.data_type = TS_DATATYPE_BOOLEAN; + sch.encoding = TS_ENCODING_PLAIN; + sch.compression = TS_COMPRESSION_UNCOMPRESSED; + + auto* writer = static_cast( + _tsfile_writer_new(filename, 128 * 1024 * 1024, &code)); + ASSERT_EQ(RET_OK, code); + ASSERT_EQ(RET_OK, _tsfile_writer_register_timeseries(writer, device, &sch)); + + const bool vals[] = {true, false, true}; + for (int row = 0; row < 3; row++) { + auto* record = static_cast( + _ts_record_new(device, static_cast(row + 1), 1)); + ASSERT_EQ(RET_OK, _insert_data_into_ts_record_by_name_bool(record, m_b, + vals[row])); + ASSERT_EQ(RET_OK, _tsfile_writer_write_ts_record(writer, record)); + _free_tsfile_ts_record(reinterpret_cast(&record)); + } + ASSERT_EQ(RET_OK, _tsfile_writer_close(writer)); + + TsFileReader reader = tsfile_reader_new(filename, &code); + ASSERT_EQ(RET_OK, code); + + DeviceTimeseriesMetadataMap map{}; + ASSERT_EQ(RET_OK, + tsfile_reader_get_timeseries_metadata(reader, nullptr, 0, &map)); + TimeseriesMetadata& tm = map.entries[0].timeseries[0]; + ASSERT_STREQ(m_b, tm.measurement_name); + ASSERT_EQ(TS_DATATYPE_BOOLEAN, tm.data_type); + ASSERT_TRUE(tm.statistic.has_statistic); + ASSERT_TRUE(tm.statistic.sum_valid); + EXPECT_DOUBLE_EQ(2.0, tm.statistic.sum); + ASSERT_TRUE(tm.statistic.bool_ext_valid); + EXPECT_TRUE(tm.statistic.first_bool); + EXPECT_TRUE(tm.statistic.last_bool); + + tsfile_free_device_timeseries_metadata_map(&map); + ASSERT_EQ(RET_OK, tsfile_reader_close(reader)); + free(m_b); + remove(filename); +} + +TEST_F(CWrapperMetadataTest, GetTimeseriesMetadataStringStatistic) { + ERRNO code = RET_OK; + const char* filename = "cwrapper_metadata_str.tsfile"; + remove(filename); + + const char* device = "root.sg.str"; + char* m_str = strdup("s_str"); + timeseries_schema sch{}; + sch.timeseries_name = m_str; + sch.data_type = TS_DATATYPE_STRING; + sch.encoding = TS_ENCODING_PLAIN; + sch.compression = TS_COMPRESSION_UNCOMPRESSED; + + auto* writer = static_cast( + _tsfile_writer_new(filename, 128 * 1024 * 1024, &code)); + ASSERT_EQ(RET_OK, code); + ASSERT_EQ(RET_OK, _tsfile_writer_register_timeseries(writer, device, &sch)); + + const char* vals[] = {"aa", "cc", "bb"}; + for (int row = 0; row < 3; row++) { + auto* record = static_cast( + _ts_record_new(device, static_cast(row + 1), 1)); + ASSERT_EQ(RET_OK, + _insert_data_into_ts_record_by_name_string_with_len( + record, m_str, vals[row], + static_cast(std::strlen(vals[row])))); + ASSERT_EQ(RET_OK, _tsfile_writer_write_ts_record(writer, record)); + _free_tsfile_ts_record(reinterpret_cast(&record)); + } + ASSERT_EQ(RET_OK, _tsfile_writer_close(writer)); + + TsFileReader reader = tsfile_reader_new(filename, &code); + ASSERT_EQ(RET_OK, code); + + DeviceTimeseriesMetadataMap map{}; + ASSERT_EQ(RET_OK, + tsfile_reader_get_timeseries_metadata(reader, nullptr, 0, &map)); + ASSERT_EQ(1u, map.device_count); + TimeseriesMetadata& tm = map.entries[0].timeseries[0]; + ASSERT_STREQ(m_str, tm.measurement_name); + ASSERT_EQ(TS_DATATYPE_STRING, tm.data_type); + ASSERT_TRUE(tm.statistic.has_statistic); + ASSERT_TRUE(tm.statistic.str_ext_valid); + ASSERT_NE(nullptr, tm.statistic.str_min); + ASSERT_NE(nullptr, tm.statistic.str_max); + ASSERT_NE(nullptr, tm.statistic.str_first); + ASSERT_NE(nullptr, tm.statistic.str_last); + EXPECT_STREQ("aa", tm.statistic.str_min); + EXPECT_STREQ("cc", tm.statistic.str_max); + EXPECT_STREQ("aa", tm.statistic.str_first); + EXPECT_STREQ("bb", tm.statistic.str_last); + + tsfile_free_device_timeseries_metadata_map(&map); + ASSERT_EQ(RET_OK, tsfile_reader_close(reader)); + free(m_str); + remove(filename); +} + +TEST_F(CWrapperMetadataTest, GetTimeseriesMetadataNullDevicePath) { + ERRNO code = RET_OK; + const char* filename = "cwrapper_metadata_null_path.tsfile"; + remove(filename); + + auto* writer = static_cast( + _tsfile_writer_new(filename, 128 * 1024 * 1024, &code)); + ASSERT_EQ(RET_OK, code); + ASSERT_EQ(RET_OK, _tsfile_writer_close(writer)); + + TsFileReader reader = tsfile_reader_new(filename, &code); + ASSERT_EQ(RET_OK, code); + + DeviceID bad{}; + bad.path = nullptr; + DeviceTimeseriesMetadataMap map{}; + EXPECT_EQ(RET_INVALID_ARG, + tsfile_reader_get_timeseries_metadata(reader, &bad, 1, &map)); + + ASSERT_EQ(RET_OK, tsfile_reader_close(reader)); + remove(filename); +} + TEST_F(CWrapperMetadataTest, GetTimeseriesMetadataInvalidArgs) { ERRNO code = RET_OK; const char* filename = "cwrapper_metadata_empty.tsfile"; diff --git a/python/tests/test_reader_metadata.py b/python/tests/test_reader_metadata.py index f58c0f675..fc10c40af 100644 --- a/python/tests/test_reader_metadata.py +++ b/python/tests/test_reader_metadata.py @@ -67,6 +67,11 @@ def test_get_all_devices_and_timeseries_metadata_statistic(): assert st.end_time == 3 assert st.sum_valid assert st.sum == pytest.approx(60.0) + assert st.int_range_valid + assert st.min_int64 == 10 + assert st.max_int64 == 30 + assert st.first_int64 == 10 + assert st.last_int64 == 30 assert reader.get_timeseries_metadata([]) == {} @@ -82,3 +87,84 @@ def test_get_all_devices_and_timeseries_metadata_statistic(): os.unlink(path) except OSError: pass + + +def test_get_timeseries_metadata_boolean_statistic(): + path = os.path.join(tempfile.gettempdir(), "py_reader_metadata_bool.tsfile") + try: + os.unlink(path) + except OSError: + pass + + device = "root.sg.py_bool" + writer = TsFileWriter(path) + writer.register_timeseries( + device, TimeseriesSchema("m_b", TSDataType.BOOLEAN)) + for row, b in enumerate([True, False, True]): + writer.write_row_record( + RowRecord( + device, + row + 1, + [Field("m_b", b, TSDataType.BOOLEAN)], + ) + ) + writer.close() + + reader = TsFileReader(path) + try: + meta_all = reader.get_timeseries_metadata(None) + st = meta_all[device][0].statistic + assert st.has_statistic + assert st.sum_valid + assert st.sum == pytest.approx(2.0) + assert st.bool_ext_valid + assert st.first_bool is True + assert st.last_bool is True + finally: + reader.close() + try: + os.unlink(path) + except OSError: + pass + + +def test_get_timeseries_metadata_string_statistic(): + path = os.path.join(tempfile.gettempdir(), "py_reader_metadata_str.tsfile") + try: + os.unlink(path) + except OSError: + pass + + device = "root.sg.py_str" + writer = TsFileWriter(path) + writer.register_timeseries( + device, TimeseriesSchema("m_str", TSDataType.STRING)) + for row, s in enumerate(["aa", "cc", "bb"]): + writer.write_row_record( + RowRecord( + device, + row + 1, + [Field("m_str", s, TSDataType.STRING)], + ) + ) + writer.close() + + reader = TsFileReader(path) + try: + meta_all = reader.get_timeseries_metadata(None) + m = meta_all[device][0] + assert m.measurement_name == "m_str" + assert m.data_type == TSDataType.STRING + st = m.statistic + assert st.has_statistic + assert st.str_ext_valid + assert st.str_min == "aa" + assert st.str_max == "cc" + assert st.str_first == "aa" + assert st.str_last == "bb" + finally: + reader.close() + try: + os.unlink(path) + except OSError: + pass diff --git a/python/tsfile/schema.py b/python/tsfile/schema.py index 955253eaa..10b2412ad 100644 --- a/python/tsfile/schema.py +++ b/python/tsfile/schema.py @@ -16,7 +16,7 @@ # under the License. # from dataclasses import dataclass -from typing import List +from typing import List, Optional from .exceptions import TypeMismatchError from .constants import TSDataType, ColumnCategory, TSEncoding, Compressor @@ -42,6 +42,24 @@ class TimeseriesStatistic: end_time: int sum_valid: bool sum: float + int_range_valid: bool = False + min_int64: int = 0 + max_int64: int = 0 + first_int64: int = 0 + last_int64: int = 0 + float_range_valid: bool = False + min_float64: float = 0.0 + max_float64: float = 0.0 + first_float64: float = 0.0 + last_float64: float = 0.0 + bool_ext_valid: bool = False + first_bool: bool = False + last_bool: bool = False + str_ext_valid: bool = False + str_min: Optional[str] = None + str_max: Optional[str] = None + str_first: Optional[str] = None + str_last: Optional[str] = None @dataclass(frozen=True) diff --git a/python/tsfile/tsfile_cpp.pxd b/python/tsfile/tsfile_cpp.pxd index 22f324596..10ba05a6c 100644 --- a/python/tsfile/tsfile_cpp.pxd +++ b/python/tsfile/tsfile_cpp.pxd @@ -113,6 +113,24 @@ cdef extern from "cwrapper/tsfile_cwrapper.h": int64_t end_time bint sum_valid double sum + bint int_range_valid + int64_t min_int64 + int64_t max_int64 + int64_t first_int64 + int64_t last_int64 + bint float_range_valid + double min_float64 + double max_float64 + double first_float64 + double last_float64 + bint bool_ext_valid + bint first_bool + bint last_bool + bint str_ext_valid + char* str_min + char* str_max + char* str_first + char* str_last ctypedef struct TimeseriesMetadata: char * measurement_name diff --git a/python/tsfile/tsfile_py_cpp.pyx b/python/tsfile/tsfile_py_cpp.pyx index d913b0c4e..91910175a 100644 --- a/python/tsfile/tsfile_py_cpp.pyx +++ b/python/tsfile/tsfile_py_cpp.pyx @@ -927,6 +927,11 @@ cdef object get_all_timeseries_schema(TsFileReader reader): free(schemas) return device_schemas +cdef object _c_str_to_py_utf8_or_none(char* p): + if p == NULL: + return None + return p.decode('utf-8') + cdef object timeseries_metadata_c_to_py(TimeseriesMetadata* m): cdef str name_py if m == NULL or m.measurement_name == NULL: @@ -940,6 +945,24 @@ cdef object timeseries_metadata_c_to_py(TimeseriesMetadata* m): int(m.statistic.end_time), bool(m.statistic.sum_valid), float(m.statistic.sum), + bool(m.statistic.int_range_valid), + int(m.statistic.min_int64), + int(m.statistic.max_int64), + int(m.statistic.first_int64), + int(m.statistic.last_int64), + bool(m.statistic.float_range_valid), + float(m.statistic.min_float64), + float(m.statistic.max_float64), + float(m.statistic.first_float64), + float(m.statistic.last_float64), + bool(m.statistic.bool_ext_valid), + bool(m.statistic.first_bool), + bool(m.statistic.last_bool), + bool(m.statistic.str_ext_valid), + _c_str_to_py_utf8_or_none(m.statistic.str_min), + _c_str_to_py_utf8_or_none(m.statistic.str_max), + _c_str_to_py_utf8_or_none(m.statistic.str_first), + _c_str_to_py_utf8_or_none(m.statistic.str_last), ) return TimeseriesMetadataPy( name_py, From 135a2c0dae60cedd25c0bc59470ce45a44fa472d Mon Sep 17 00:00:00 2001 From: 761417898 <761417898@qq.com> Date: Fri, 3 Apr 2026 20:08:53 +0800 Subject: [PATCH 3/7] spotless apply --- cpp/src/cwrapper/tsfile_cwrapper.cc | 3 ++- cpp/src/cwrapper/tsfile_cwrapper.h | 3 ++- cpp/test/cwrapper/cwrapper_metadata_test.cc | 7 +++---- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc b/cpp/src/cwrapper/tsfile_cwrapper.cc index 3582f5f68..1066305b2 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.cc +++ b/cpp/src/cwrapper/tsfile_cwrapper.cc @@ -738,7 +738,8 @@ void clear_timeseries_statistic(TimeseriesStatistic* s) { * Fills @p out from C++ Statistic. On allocation failure returns E_OOM and * clears/frees any partial string fields in @p out. */ -int fill_timeseries_statistic(storage::Statistic* st, TimeseriesStatistic* out) { +int fill_timeseries_statistic(storage::Statistic* st, + TimeseriesStatistic* out) { clear_timeseries_statistic(out); if (st == nullptr) { return common::E_OK; diff --git a/cpp/src/cwrapper/tsfile_cwrapper.h b/cpp/src/cwrapper/tsfile_cwrapper.h index bfa2430a7..4ab9c8611 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.h +++ b/cpp/src/cwrapper/tsfile_cwrapper.h @@ -146,7 +146,8 @@ typedef struct TimeseriesStatistic { bool first_bool; bool last_bool; - /** STRING: min/max lexicographic; TEXT: first/last only (min/max unused). */ + /** STRING: min/max lexicographic; TEXT: first/last only (min/max unused). + */ bool str_ext_valid; char* str_min; char* str_max; diff --git a/cpp/test/cwrapper/cwrapper_metadata_test.cc b/cpp/test/cwrapper/cwrapper_metadata_test.cc index 897b838c2..6faadb676 100644 --- a/cpp/test/cwrapper/cwrapper_metadata_test.cc +++ b/cpp/test/cwrapper/cwrapper_metadata_test.cc @@ -191,10 +191,9 @@ TEST_F(CWrapperMetadataTest, GetTimeseriesMetadataStringStatistic) { for (int row = 0; row < 3; row++) { auto* record = static_cast( _ts_record_new(device, static_cast(row + 1), 1)); - ASSERT_EQ(RET_OK, - _insert_data_into_ts_record_by_name_string_with_len( - record, m_str, vals[row], - static_cast(std::strlen(vals[row])))); + ASSERT_EQ(RET_OK, _insert_data_into_ts_record_by_name_string_with_len( + record, m_str, vals[row], + static_cast(std::strlen(vals[row])))); ASSERT_EQ(RET_OK, _tsfile_writer_write_ts_record(writer, record)); _free_tsfile_ts_record(reinterpret_cast(&record)); } From a8091203842661c190b16f1243f46dd3ea9c9dd9 Mon Sep 17 00:00:00 2001 From: 761417898 <761417898@qq.com> Date: Tue, 7 Apr 2026 10:08:36 +0800 Subject: [PATCH 4/7] fix DeviceTimeseriesMetadataEntry details --- cpp/src/cwrapper/tsfile_cwrapper.cc | 186 ++++++++++++++++++-- cpp/src/cwrapper/tsfile_cwrapper.h | 31 ++++ cpp/test/cwrapper/cwrapper_metadata_test.cc | 21 +++ python/tests/test_reader_metadata.py | 47 ++++- python/tsfile/schema.py | 20 ++- python/tsfile/tsfile_cpp.pxd | 15 ++ python/tsfile/tsfile_py_cpp.pxd | 1 + python/tsfile/tsfile_py_cpp.pyx | 54 +++++- python/tsfile/tsfile_reader.pyx | 15 +- 9 files changed, 371 insertions(+), 19 deletions(-) diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc b/cpp/src/cwrapper/tsfile_cwrapper.cc index 1066305b2..dc1df3c57 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.cc +++ b/cpp/src/cwrapper/tsfile_cwrapper.cc @@ -882,6 +882,16 @@ void free_device_timeseries_metadata_entries_partial( for (size_t i = 0; i < filled_count; i++) { free(entries[i].device.path); entries[i].device.path = nullptr; + free(entries[i].device_table_name); + entries[i].device_table_name = nullptr; + if (entries[i].device_segments != nullptr) { + for (uint32_t k = 0; k < entries[i].device_segment_count; k++) { + free(entries[i].device_segments[k]); + } + free(entries[i].device_segments); + entries[i].device_segments = nullptr; + } + entries[i].device_segment_count = 0; if (entries[i].timeseries != nullptr) { for (uint32_t j = 0; j < entries[i].timeseries_count; j++) { free_timeseries_statistic_heap( @@ -895,6 +905,97 @@ void free_device_timeseries_metadata_entries_partial( free(entries); } +/** + * Copies path, table name, and segment strings from IDeviceID into heap + * buffers. On failure, frees any partial allocations and returns E_OOM. + */ +int duplicate_ideviceid_to_device_fields(storage::IDeviceID* id, + char** out_path, char** out_table_name, + uint32_t* out_segment_count, + char*** out_segments) { + *out_path = nullptr; + *out_table_name = nullptr; + *out_segment_count = 0; + *out_segments = nullptr; + if (id == nullptr) { + *out_path = strdup(""); + *out_table_name = strdup(""); + if (*out_path == nullptr || *out_table_name == nullptr) { + free(*out_path); + free(*out_table_name); + *out_path = nullptr; + *out_table_name = nullptr; + return common::E_OOM; + } + return common::E_OK; + } + const std::string dname = id->get_device_name(); + *out_path = strdup(dname.c_str()); + if (*out_path == nullptr) { + return common::E_OOM; + } + const std::string tname = id->get_table_name(); + *out_table_name = strdup(tname.c_str()); + if (*out_table_name == nullptr) { + free(*out_path); + *out_path = nullptr; + return common::E_OOM; + } + const int n = id->segment_num(); + if (n <= 0) { + return common::E_OK; + } + auto* seg_arr = + static_cast(malloc(sizeof(char*) * static_cast(n))); + if (seg_arr == nullptr) { + free(*out_table_name); + *out_table_name = nullptr; + free(*out_path); + *out_path = nullptr; + return common::E_OOM; + } + memset(seg_arr, 0, sizeof(char*) * static_cast(n)); + const auto& segs = id->get_segments(); + for (int i = 0; i < n; i++) { + const std::string* ps = + (static_cast(i) < segs.size()) ? segs[i] : nullptr; + const char* lit = (ps != nullptr) ? ps->c_str() : "null"; + seg_arr[i] = strdup(lit); + if (seg_arr[i] == nullptr) { + for (int j = 0; j < i; j++) { + free(seg_arr[j]); + } + free(seg_arr); + free(*out_table_name); + *out_table_name = nullptr; + free(*out_path); + *out_path = nullptr; + return common::E_OOM; + } + } + *out_segment_count = static_cast(n); + *out_segments = seg_arr; + return common::E_OK; +} + +void clear_metadata_entry_device_only(DeviceTimeseriesMetadataEntry* e) { + if (e == nullptr) { + return; + } + free(e->device.path); + e->device.path = nullptr; + free(e->device_table_name); + e->device_table_name = nullptr; + if (e->device_segments != nullptr) { + for (uint32_t k = 0; k < e->device_segment_count; k++) { + free(e->device_segments[k]); + } + free(e->device_segments); + e->device_segments = nullptr; + } + e->device_segment_count = 0; +} + } // namespace ERRNO tsfile_reader_get_all_devices(TsFileReader reader, DeviceID** out_devices, @@ -939,6 +1040,72 @@ void tsfile_free_device_id_array(DeviceID* devices, uint32_t length) { free(devices); } +ERRNO tsfile_reader_get_all_device_details(TsFileReader reader, + TsDeviceDetails** out_details, + uint32_t* out_length) { + if (reader == nullptr || out_details == nullptr || out_length == nullptr) { + return common::E_INVALID_ARG; + } + *out_details = nullptr; + *out_length = 0; + auto* r = static_cast(reader); + const auto ids = r->get_all_devices(); + if (ids.empty()) { + return common::E_OK; + } + auto* arr = static_cast( + malloc(sizeof(TsDeviceDetails) * ids.size())); + if (arr == nullptr) { + return common::E_OOM; + } + memset(arr, 0, sizeof(TsDeviceDetails) * ids.size()); + for (size_t i = 0; i < ids.size(); i++) { + TsDeviceDetails& d = arr[i]; + const int rc = duplicate_ideviceid_to_device_fields( + ids[i].get(), &d.path, &d.table_name, &d.segment_count, + &d.segments); + if (rc != common::E_OK) { + for (size_t j = 0; j < i; j++) { + free(arr[j].path); + free(arr[j].table_name); + if (arr[j].segments != nullptr) { + for (uint32_t k = 0; k < arr[j].segment_count; k++) { + free(arr[j].segments[k]); + } + free(arr[j].segments); + } + } + free(arr); + return rc; + } + } + *out_details = arr; + *out_length = static_cast(ids.size()); + return common::E_OK; +} + +void tsfile_free_device_details_array(TsDeviceDetails* details, + uint32_t length) { + if (details == nullptr) { + return; + } + for (uint32_t i = 0; i < length; i++) { + free(details[i].path); + details[i].path = nullptr; + free(details[i].table_name); + details[i].table_name = nullptr; + if (details[i].segments != nullptr) { + for (uint32_t k = 0; k < details[i].segment_count; k++) { + free(details[i].segments[k]); + } + free(details[i].segments); + details[i].segments = nullptr; + } + details[i].segment_count = 0; + } + free(details); +} + ERRNO tsfile_reader_get_timeseries_metadata( TsFileReader reader, const DeviceID* device_ids, uint32_t length, DeviceTimeseriesMetadataMap* out_map) { @@ -978,12 +1145,12 @@ ERRNO tsfile_reader_get_timeseries_metadata( size_t di = 0; for (const auto& kv : cpp_map) { DeviceTimeseriesMetadataEntry& e = entries[di]; - const std::string dname = - kv.first ? kv.first->get_device_name() : std::string(); - e.device.path = strdup(dname.c_str()); - if (e.device.path == nullptr) { + const int dup_rc = duplicate_ideviceid_to_device_fields( + kv.first ? kv.first.get() : nullptr, &e.device.path, + &e.device_table_name, &e.device_segment_count, &e.device_segments); + if (dup_rc != common::E_OK) { free_device_timeseries_metadata_entries_partial(entries, di); - return common::E_OOM; + return dup_rc; } const auto& vec = kv.second; uint32_t n_ts = 0; @@ -1001,8 +1168,7 @@ ERRNO tsfile_reader_get_timeseries_metadata( e.timeseries = static_cast( malloc(sizeof(TimeseriesMetadata) * e.timeseries_count)); if (e.timeseries == nullptr) { - free(e.device.path); - e.device.path = nullptr; + clear_metadata_entry_device_only(&e); free_device_timeseries_metadata_entries_partial(entries, di); return common::E_OOM; } @@ -1023,8 +1189,7 @@ ERRNO tsfile_reader_get_timeseries_metadata( } free(e.timeseries); e.timeseries = nullptr; - free(e.device.path); - e.device.path = nullptr; + clear_metadata_entry_device_only(&e); free_device_timeseries_metadata_entries_partial(entries, di); return common::E_OOM; } @@ -1046,8 +1211,7 @@ ERRNO tsfile_reader_get_timeseries_metadata( free(m.measurement_name); free(e.timeseries); e.timeseries = nullptr; - free(e.device.path); - e.device.path = nullptr; + clear_metadata_entry_device_only(&e); free_device_timeseries_metadata_entries_partial(entries, di); return st_rc; } diff --git a/cpp/src/cwrapper/tsfile_cwrapper.h b/cpp/src/cwrapper/tsfile_cwrapper.h index 4ab9c8611..30f2e9a81 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.h +++ b/cpp/src/cwrapper/tsfile_cwrapper.h @@ -165,8 +165,29 @@ typedef struct TimeseriesMetadata { TimeseriesStatistic statistic; } TimeseriesMetadata; +/** + * @brief Device identity fields from IDeviceID (path, table name, segments). + * Allocated by tsfile_reader_get_all_device_details; freed by + * tsfile_free_device_details_array. + */ +typedef struct TsDeviceDetails { + char* path; + char* table_name; + uint32_t segment_count; + char** segments; +} TsDeviceDetails; + +/** + * @brief One device's timeseries metadata list plus structured device fields. + * + * device_table_name / device_segments are malloc'd; freed by + * tsfile_free_device_timeseries_metadata_map (do not free individually). + */ typedef struct DeviceTimeseriesMetadataEntry { DeviceID device; + char* device_table_name; + uint32_t device_segment_count; + char** device_segments; TimeseriesMetadata* timeseries; uint32_t timeseries_count; } DeviceTimeseriesMetadataEntry; @@ -408,6 +429,16 @@ ERRNO tsfile_reader_get_all_devices(TsFileReader reader, DeviceID** out_devices, void tsfile_free_device_id_array(DeviceID* devices, uint32_t length); +/** + * @brief Lists all devices with table name and path segments (from IDeviceID). + */ +ERRNO tsfile_reader_get_all_device_details(TsFileReader reader, + TsDeviceDetails** out_details, + uint32_t* out_length); + +void tsfile_free_device_details_array(TsDeviceDetails* details, + uint32_t length); + /** * @brief Timeseries metadata for none, some, or all devices. * diff --git a/cpp/test/cwrapper/cwrapper_metadata_test.cc b/cpp/test/cwrapper/cwrapper_metadata_test.cc index 6faadb676..2594fe33e 100644 --- a/cpp/test/cwrapper/cwrapper_metadata_test.cc +++ b/cpp/test/cwrapper/cwrapper_metadata_test.cc @@ -73,12 +73,33 @@ TEST_F(CWrapperMetadataTest, GetAllDevicesAndMetadataWithStatistic) { ASSERT_STREQ(device, devices[0].path); tsfile_free_device_id_array(devices, n_dev); + TsDeviceDetails* details = nullptr; + uint32_t n_det = 0; + ASSERT_EQ(RET_OK, + tsfile_reader_get_all_device_details(reader, &details, &n_det)); + ASSERT_EQ(1u, n_det); + ASSERT_NE(nullptr, details); + ASSERT_STREQ(device, details[0].path); + ASSERT_NE(nullptr, details[0].table_name); + EXPECT_STREQ("root.sg", details[0].table_name); + EXPECT_EQ(2u, details[0].segment_count); + ASSERT_NE(nullptr, details[0].segments); + EXPECT_STREQ("root.sg", details[0].segments[0]); + EXPECT_STREQ("d1", details[0].segments[1]); + tsfile_free_device_details_array(details, n_det); + DeviceTimeseriesMetadataMap map{}; ASSERT_EQ(RET_OK, tsfile_reader_get_timeseries_metadata(reader, nullptr, 0, &map)); ASSERT_EQ(1u, map.device_count); ASSERT_NE(nullptr, map.entries); ASSERT_STREQ(device, map.entries[0].device.path); + ASSERT_NE(nullptr, map.entries[0].device_table_name); + EXPECT_STREQ("root.sg", map.entries[0].device_table_name); + EXPECT_EQ(2u, map.entries[0].device_segment_count); + ASSERT_NE(nullptr, map.entries[0].device_segments); + EXPECT_STREQ("root.sg", map.entries[0].device_segments[0]); + EXPECT_STREQ("d1", map.entries[0].device_segments[1]); ASSERT_EQ(1u, map.entries[0].timeseries_count); ASSERT_NE(nullptr, map.entries[0].timeseries); TimeseriesMetadata& tm = map.entries[0].timeseries[0]; diff --git a/python/tests/test_reader_metadata.py b/python/tests/test_reader_metadata.py index fc10c40af..1b6a4633b 100644 --- a/python/tests/test_reader_metadata.py +++ b/python/tests/test_reader_metadata.py @@ -25,6 +25,42 @@ from tsfile.schema import DeviceID +def test_get_all_device_details_segments(): + path = os.path.join(tempfile.gettempdir(), "py_reader_metadata_details.tsfile") + try: + os.unlink(path) + except OSError: + pass + + device = "root.sg.py_details" + writer = TsFileWriter(path) + writer.register_timeseries( + device, TimeseriesSchema("m", TSDataType.INT32)) + writer.write_row_record( + RowRecord(device, 1, [Field("m", 1, TSDataType.INT32)])) + writer.close() + + reader = TsFileReader(path) + try: + details = reader.get_all_device_details() + assert len(details) == 1 + d0 = details[0] + assert d0.path == device + assert d0.table_name == "root.sg" + assert d0.segments == ("root.sg", "py_details") + + grp = reader.get_timeseries_metadata(None)[device] + assert grp.table_name == "root.sg" + assert grp.segments == ("root.sg", "py_details") + assert len(grp.timeseries) == 1 + finally: + reader.close() + try: + os.unlink(path) + except OSError: + pass + + def test_get_all_devices_and_timeseries_metadata_statistic(): path = os.path.join(tempfile.gettempdir(), "py_reader_metadata_stat.tsfile") try: @@ -55,7 +91,10 @@ def test_get_all_devices_and_timeseries_metadata_statistic(): meta_all = reader.get_timeseries_metadata(None) assert list(meta_all.keys()) == [device] - series = meta_all[device] + grp = meta_all[device] + assert grp.table_name == "root.sg" + assert grp.segments == ("root.sg", "py_meta") + series = grp.timeseries assert len(series) == 1 m = series[0] assert m.measurement_name == "m_int" @@ -77,7 +116,7 @@ def test_get_all_devices_and_timeseries_metadata_statistic(): sub = reader.get_timeseries_metadata([DeviceID(device)]) assert device in sub - assert len(sub[device]) == 1 + assert len(sub[device].timeseries) == 1 sub_str = reader.get_timeseries_metadata([device]) assert device in sub_str @@ -113,7 +152,7 @@ def test_get_timeseries_metadata_boolean_statistic(): reader = TsFileReader(path) try: meta_all = reader.get_timeseries_metadata(None) - st = meta_all[device][0].statistic + st = meta_all[device].timeseries[0].statistic assert st.has_statistic assert st.sum_valid assert st.sum == pytest.approx(2.0) @@ -152,7 +191,7 @@ def test_get_timeseries_metadata_string_statistic(): reader = TsFileReader(path) try: meta_all = reader.get_timeseries_metadata(None) - m = meta_all[device][0] + m = meta_all[device].timeseries[0] assert m.measurement_name == "m_str" assert m.data_type == TSDataType.STRING st = m.statistic diff --git a/python/tsfile/schema.py b/python/tsfile/schema.py index 10b2412ad..48319a34a 100644 --- a/python/tsfile/schema.py +++ b/python/tsfile/schema.py @@ -16,7 +16,7 @@ # under the License. # from dataclasses import dataclass -from typing import List, Optional +from typing import List, Optional, Tuple from .exceptions import TypeMismatchError from .constants import TSDataType, ColumnCategory, TSEncoding, Compressor @@ -72,6 +72,24 @@ class TimeseriesMetadata: statistic: TimeseriesStatistic +@dataclass(frozen=True) +class DeviceDetails: + """Structured device identity from the native reader (path, table name, segments).""" + + path: str + table_name: str + segments: Tuple[str, ...] + + +@dataclass(frozen=True) +class DeviceTimeseriesMetadataGroup: + """One device's timeseries list plus table name and path segments (dict key is device path).""" + + table_name: str + segments: Tuple[str, ...] + timeseries: List[TimeseriesMetadata] + + class TimeseriesSchema: """ Metadata schema for a time series (name, data type, encoding, compression). diff --git a/python/tsfile/tsfile_cpp.pxd b/python/tsfile/tsfile_cpp.pxd index 10ba05a6c..4ea9df123 100644 --- a/python/tsfile/tsfile_cpp.pxd +++ b/python/tsfile/tsfile_cpp.pxd @@ -138,8 +138,17 @@ cdef extern from "cwrapper/tsfile_cwrapper.h": int32_t chunk_meta_count TimeseriesStatistic statistic + ctypedef struct TsDeviceDetails: + char * path + char * table_name + uint32_t segment_count + char ** segments + ctypedef struct DeviceTimeseriesMetadataEntry: DeviceID device + char * device_table_name + uint32_t device_segment_count + char ** device_segments TimeseriesMetadata * timeseries uint32_t timeseries_count @@ -269,6 +278,12 @@ cdef extern from "cwrapper/tsfile_cwrapper.h": uint32_t * out_length); void tsfile_free_device_id_array(DeviceID * devices, uint32_t length); + ErrorCode tsfile_reader_get_all_device_details( + TsFileReader reader, TsDeviceDetails ** out_details, + uint32_t * out_length); + void tsfile_free_device_details_array(TsDeviceDetails * details, + uint32_t length); + ErrorCode tsfile_reader_get_timeseries_metadata( TsFileReader reader, const DeviceID * device_ids, uint32_t length, DeviceTimeseriesMetadataMap * out_map); diff --git a/python/tsfile/tsfile_py_cpp.pxd b/python/tsfile/tsfile_py_cpp.pxd index b6baee80d..7f501fd34 100644 --- a/python/tsfile/tsfile_py_cpp.pxd +++ b/python/tsfile/tsfile_py_cpp.pxd @@ -68,6 +68,7 @@ cdef public api object get_table_schema(TsFileReader reader, object table_name) cdef public api object get_all_table_schema(TsFileReader reader) cdef public api object get_all_timeseries_schema(TsFileReader reader) cdef public api object reader_get_all_devices_c(TsFileReader reader) +cdef public api object reader_get_all_device_details_c(TsFileReader reader) cdef public api object reader_get_timeseries_metadata_c(TsFileReader reader, object device_ids) cpdef public api object get_tsfile_config() diff --git a/python/tsfile/tsfile_py_cpp.pyx b/python/tsfile/tsfile_py_cpp.pyx index 91910175a..3fe93bcc6 100644 --- a/python/tsfile/tsfile_py_cpp.pyx +++ b/python/tsfile/tsfile_py_cpp.pyx @@ -38,6 +38,8 @@ from tsfile.schema import Compressor as CompressorPy, ColumnCategory as Category from tsfile.schema import TableSchema as TableSchemaPy, ColumnSchema as ColumnSchemaPy from tsfile.schema import DeviceSchema as DeviceSchemaPy, TimeseriesSchema as TimeseriesSchemaPy from tsfile.schema import DeviceID as ReaderDeviceID +from tsfile.schema import DeviceDetails as DeviceDetailsPy +from tsfile.schema import DeviceTimeseriesMetadataGroup as DeviceTimeseriesMetadataGroupPy from tsfile.schema import TimeseriesStatistic as TimeseriesStatisticPy from tsfile.schema import TimeseriesMetadata as TimeseriesMetadataPy @@ -971,11 +973,24 @@ cdef object timeseries_metadata_c_to_py(TimeseriesMetadata* m): stat, ) +cdef tuple c_device_segments_to_tuple(char** segs, uint32_t n): + cdef uint32_t i + cdef list out = [] + for i in range(n): + if segs[i] == NULL: + out.append("") + else: + out.append(segs[i].decode('utf-8')) + return tuple(out) + cdef dict device_timeseries_metadata_map_to_py(DeviceTimeseriesMetadataMap* mmap): cdef dict out = {} cdef uint32_t di, ti cdef char* p + cdef char* tnp cdef str key + cdef str table_py + cdef tuple segs_py cdef list series for di in range(mmap.device_count): p = mmap.entries[di].device.path @@ -983,12 +998,21 @@ cdef dict device_timeseries_metadata_map_to_py(DeviceTimeseriesMetadataMap* mmap key = "" else: key = p.decode('utf-8') + tnp = mmap.entries[di].device_table_name + if tnp == NULL: + table_py = "" + else: + table_py = tnp.decode('utf-8') + segs_py = c_device_segments_to_tuple( + mmap.entries[di].device_segments, + mmap.entries[di].device_segment_count) series = [] for ti in range(mmap.entries[di].timeseries_count): series.append( timeseries_metadata_c_to_py( &mmap.entries[di].timeseries[ti])) - out[key] = series + out[key] = DeviceTimeseriesMetadataGroupPy( + table_py, segs_py, series) return out cdef public api object reader_get_all_devices_c(TsFileReader reader): @@ -1006,6 +1030,34 @@ cdef public api object reader_get_all_devices_c(TsFileReader reader): tsfile_free_device_id_array(arr, n) return out +cdef public api object reader_get_all_device_details_c(TsFileReader reader): + cdef TsDeviceDetails* arr = NULL + cdef uint32_t n = 0 + cdef int err + cdef list out = [] + cdef uint32_t i + cdef str path_py + cdef str tname_py + cdef tuple segs_py + err = tsfile_reader_get_all_device_details(reader, &arr, &n) + check_error(err) + try: + for i in range(n): + if arr[i].path == NULL: + path_py = "" + else: + path_py = arr[i].path.decode('utf-8') + if arr[i].table_name == NULL: + tname_py = "" + else: + tname_py = arr[i].table_name.decode('utf-8') + segs_py = c_device_segments_to_tuple(arr[i].segments, + arr[i].segment_count) + out.append(DeviceDetailsPy(path_py, tname_py, segs_py)) + finally: + tsfile_free_device_details_array(arr, n) + return out + cdef public api object reader_get_timeseries_metadata_c(TsFileReader reader, object device_ids): cdef DeviceTimeseriesMetadataMap mmap diff --git a/python/tsfile/tsfile_reader.pyx b/python/tsfile/tsfile_reader.pyx index 52a9a94f7..8164b6f4a 100644 --- a/python/tsfile/tsfile_reader.pyx +++ b/python/tsfile/tsfile_reader.pyx @@ -30,6 +30,7 @@ import pyarrow as pa from libc.stdint cimport INT64_MIN, INT64_MAX, uintptr_t from tsfile.schema import TSDataType as TSDataTypePy +from tsfile.schema import DeviceDetails, DeviceTimeseriesMetadataGroup from .date_utils import parse_int_to_date from .tsfile_cpp cimport * from .tsfile_py_cpp cimport * @@ -433,9 +434,19 @@ cdef class TsFileReaderPy: """ return reader_get_all_devices_c(self.reader) - def get_timeseries_metadata(self, device_ids: Optional[List] = None) -> Dict[str, list]: + def get_all_device_details(self) -> List[DeviceDetails]: """ - Return map device path -> list of :class:`tsfile.schema.TimeseriesMetadata`. + Return all devices with path, table name, and segment list from + :class:`tsfile.schema.DeviceDetails`. + """ + return reader_get_all_device_details_c(self.reader) + + def get_timeseries_metadata( + self, device_ids: Optional[List] = None + ) -> Dict[str, DeviceTimeseriesMetadataGroup]: + """ + Return map device path -> :class:`tsfile.schema.DeviceTimeseriesMetadataGroup` + (table name, segments, and list of :class:`tsfile.schema.TimeseriesMetadata`). ``device_ids is None``: all devices. ``device_ids == []``: empty map. Non-empty list restricts to those devices (only existing devices appear). From 6bfe1ef495df7342914661f3c165ad3934b36433 Mon Sep 17 00:00:00 2001 From: 761417898 <761417898@qq.com> Date: Tue, 7 Apr 2026 10:54:21 +0800 Subject: [PATCH 5/7] replace DeviceID to TsDeviceDetails --- cpp/src/cwrapper/tsfile_cwrapper.cc | 280 ++++++++------------ cpp/src/cwrapper/tsfile_cwrapper.h | 65 ++--- cpp/test/cwrapper/cwrapper_metadata_test.cc | 59 ++--- python/tests/test_reader_metadata.py | 8 +- python/tsfile/schema.py | 10 - python/tsfile/tsfile_cpp.pxd | 25 +- python/tsfile/tsfile_py_cpp.pxd | 1 - python/tsfile/tsfile_py_cpp.pyx | 39 +-- python/tsfile/tsfile_reader.pyx | 12 +- 9 files changed, 195 insertions(+), 304 deletions(-) diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc b/cpp/src/cwrapper/tsfile_cwrapper.cc index dc1df3c57..809895ce0 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.cc +++ b/cpp/src/cwrapper/tsfile_cwrapper.cc @@ -699,7 +699,23 @@ DeviceSchema* tsfile_reader_get_all_timeseries_schemas(TsFileReader reader, return device_schema; } -const DeviceID tsfile_c_metadata_empty_device_list_marker = {nullptr}; +void tsfile_device_details_free_contents(TsDeviceDetails* d) { + if (d == nullptr) { + return; + } + free(d->path); + d->path = nullptr; + free(d->table_name); + d->table_name = nullptr; + if (d->segments != nullptr) { + for (uint32_t k = 0; k < d->segment_count; k++) { + free(d->segments[k]); + } + free(d->segments); + d->segments = nullptr; + } + d->segment_count = 0; +} namespace { @@ -880,18 +896,7 @@ void free_device_timeseries_metadata_entries_partial( return; } for (size_t i = 0; i < filled_count; i++) { - free(entries[i].device.path); - entries[i].device.path = nullptr; - free(entries[i].device_table_name); - entries[i].device_table_name = nullptr; - if (entries[i].device_segments != nullptr) { - for (uint32_t k = 0; k < entries[i].device_segment_count; k++) { - free(entries[i].device_segments[k]); - } - free(entries[i].device_segments); - entries[i].device_segments = nullptr; - } - entries[i].device_segment_count = 0; + tsfile_device_details_free_contents(&entries[i].device); if (entries[i].timeseries != nullptr) { for (uint32_t j = 0; j < entries[i].timeseries_count; j++) { free_timeseries_statistic_heap( @@ -978,160 +983,23 @@ int duplicate_ideviceid_to_device_fields(storage::IDeviceID* id, return common::E_OK; } -void clear_metadata_entry_device_only(DeviceTimeseriesMetadataEntry* e) { - if (e == nullptr) { - return; - } - free(e->device.path); - e->device.path = nullptr; - free(e->device_table_name); - e->device_table_name = nullptr; - if (e->device_segments != nullptr) { - for (uint32_t k = 0; k < e->device_segment_count; k++) { - free(e->device_segments[k]); - } - free(e->device_segments); - e->device_segments = nullptr; - } - e->device_segment_count = 0; -} - -} // namespace - -ERRNO tsfile_reader_get_all_devices(TsFileReader reader, DeviceID** out_devices, - uint32_t* out_length) { - if (reader == nullptr || out_devices == nullptr || out_length == nullptr) { - return common::E_INVALID_ARG; - } - *out_devices = nullptr; - *out_length = 0; - auto* r = static_cast(reader); - const auto ids = r->get_all_devices(); - if (ids.empty()) { - return common::E_OK; - } - auto* arr = static_cast(malloc(sizeof(DeviceID) * ids.size())); - if (arr == nullptr) { - return common::E_OOM; - } - memset(arr, 0, sizeof(DeviceID) * ids.size()); - for (size_t i = 0; i < ids.size(); i++) { - const std::string name = - ids[i] ? ids[i]->get_device_name() : std::string(); - arr[i].path = strdup(name.c_str()); - if (arr[i].path == nullptr) { - tsfile_free_device_id_array(arr, static_cast(i)); - return common::E_OOM; - } - } - *out_devices = arr; - *out_length = static_cast(ids.size()); - return common::E_OK; +int fill_tsdevice_details_from_id(storage::IDeviceID* id, + TsDeviceDetails* out) { + memset(out, 0, sizeof(*out)); + return duplicate_ideviceid_to_device_fields( + id, &out->path, &out->table_name, &out->segment_count, &out->segments); } -void tsfile_free_device_id_array(DeviceID* devices, uint32_t length) { - if (devices == nullptr) { +void clear_metadata_entry_device_only(DeviceTimeseriesMetadataEntry* e) { + if (e == nullptr) { return; } - for (uint32_t i = 0; i < length; i++) { - free(devices[i].path); - devices[i].path = nullptr; - } - free(devices); + tsfile_device_details_free_contents(&e->device); } -ERRNO tsfile_reader_get_all_device_details(TsFileReader reader, - TsDeviceDetails** out_details, - uint32_t* out_length) { - if (reader == nullptr || out_details == nullptr || out_length == nullptr) { - return common::E_INVALID_ARG; - } - *out_details = nullptr; - *out_length = 0; - auto* r = static_cast(reader); - const auto ids = r->get_all_devices(); - if (ids.empty()) { - return common::E_OK; - } - auto* arr = static_cast( - malloc(sizeof(TsDeviceDetails) * ids.size())); - if (arr == nullptr) { - return common::E_OOM; - } - memset(arr, 0, sizeof(TsDeviceDetails) * ids.size()); - for (size_t i = 0; i < ids.size(); i++) { - TsDeviceDetails& d = arr[i]; - const int rc = duplicate_ideviceid_to_device_fields( - ids[i].get(), &d.path, &d.table_name, &d.segment_count, - &d.segments); - if (rc != common::E_OK) { - for (size_t j = 0; j < i; j++) { - free(arr[j].path); - free(arr[j].table_name); - if (arr[j].segments != nullptr) { - for (uint32_t k = 0; k < arr[j].segment_count; k++) { - free(arr[j].segments[k]); - } - free(arr[j].segments); - } - } - free(arr); - return rc; - } - } - *out_details = arr; - *out_length = static_cast(ids.size()); - return common::E_OK; -} - -void tsfile_free_device_details_array(TsDeviceDetails* details, - uint32_t length) { - if (details == nullptr) { - return; - } - for (uint32_t i = 0; i < length; i++) { - free(details[i].path); - details[i].path = nullptr; - free(details[i].table_name); - details[i].table_name = nullptr; - if (details[i].segments != nullptr) { - for (uint32_t k = 0; k < details[i].segment_count; k++) { - free(details[i].segments[k]); - } - free(details[i].segments); - details[i].segments = nullptr; - } - details[i].segment_count = 0; - } - free(details); -} - -ERRNO tsfile_reader_get_timeseries_metadata( - TsFileReader reader, const DeviceID* device_ids, uint32_t length, +ERRNO populate_c_metadata_map_from_cpp( + storage::DeviceTimeseriesMetadataMap& cpp_map, DeviceTimeseriesMetadataMap* out_map) { - if (reader == nullptr || out_map == nullptr) { - return common::E_INVALID_ARG; - } - out_map->entries = nullptr; - out_map->device_count = 0; - auto* r = static_cast(reader); - storage::DeviceTimeseriesMetadataMap cpp_map; - if (device_ids == nullptr) { - cpp_map = r->get_timeseries_metadata(); - } else if (length == 0) { - return common::E_OK; - } else { - std::vector> query_ids; - query_ids.reserve(length); - for (uint32_t i = 0; i < length; i++) { - if (device_ids[i].path == nullptr) { - return common::E_INVALID_ARG; - } - query_ids.push_back(std::make_shared( - std::string(device_ids[i].path))); - } - cpp_map = r->get_timeseries_metadata(query_ids); - } if (cpp_map.empty()) { return common::E_OK; } @@ -1145,9 +1013,8 @@ ERRNO tsfile_reader_get_timeseries_metadata( size_t di = 0; for (const auto& kv : cpp_map) { DeviceTimeseriesMetadataEntry& e = entries[di]; - const int dup_rc = duplicate_ideviceid_to_device_fields( - kv.first ? kv.first.get() : nullptr, &e.device.path, - &e.device_table_name, &e.device_segment_count, &e.device_segments); + const int dup_rc = fill_tsdevice_details_from_id( + kv.first ? kv.first.get() : nullptr, &e.device); if (dup_rc != common::E_OK) { free_device_timeseries_metadata_entries_partial(entries, di); return dup_rc; @@ -1224,6 +1091,93 @@ ERRNO tsfile_reader_get_timeseries_metadata( return common::E_OK; } +} // namespace + +void tsfile_free_device_details_array(TsDeviceDetails* details, + uint32_t length) { + if (details == nullptr) { + return; + } + for (uint32_t i = 0; i < length; i++) { + tsfile_device_details_free_contents(&details[i]); + } + free(details); +} + +ERRNO tsfile_reader_get_all_devices(TsFileReader reader, + TsDeviceDetails** out_devices, + uint32_t* out_length) { + if (reader == nullptr || out_devices == nullptr || out_length == nullptr) { + return common::E_INVALID_ARG; + } + *out_devices = nullptr; + *out_length = 0; + auto* r = static_cast(reader); + const auto ids = r->get_all_devices(); + if (ids.empty()) { + return common::E_OK; + } + auto* arr = static_cast( + malloc(sizeof(TsDeviceDetails) * ids.size())); + if (arr == nullptr) { + return common::E_OOM; + } + memset(arr, 0, sizeof(TsDeviceDetails) * ids.size()); + for (size_t i = 0; i < ids.size(); i++) { + const int rc = fill_tsdevice_details_from_id(ids[i].get(), &arr[i]); + if (rc != common::E_OK) { + tsfile_free_device_details_array(arr, static_cast(i)); + return rc; + } + } + *out_devices = arr; + *out_length = static_cast(ids.size()); + return common::E_OK; +} + +ERRNO tsfile_reader_get_timeseries_metadata_all( + TsFileReader reader, DeviceTimeseriesMetadataMap* out_map) { + if (reader == nullptr || out_map == nullptr) { + return common::E_INVALID_ARG; + } + out_map->entries = nullptr; + out_map->device_count = 0; + auto* r = static_cast(reader); + storage::DeviceTimeseriesMetadataMap cpp_map = r->get_timeseries_metadata(); + return populate_c_metadata_map_from_cpp(cpp_map, out_map); +} + +ERRNO tsfile_reader_get_timeseries_metadata_for_devices( + TsFileReader reader, const TsDeviceDetails* devices, uint32_t length, + DeviceTimeseriesMetadataMap* out_map) { + if (reader == nullptr || out_map == nullptr) { + return common::E_INVALID_ARG; + } + out_map->entries = nullptr; + out_map->device_count = 0; + if (length == 0) { + return common::E_OK; + } + if (devices == nullptr) { + return common::E_INVALID_ARG; + } + for (uint32_t i = 0; i < length; i++) { + if (devices[i].path == nullptr) { + return common::E_INVALID_ARG; + } + } + auto* r = static_cast(reader); + std::vector> query_ids; + query_ids.reserve(length); + for (uint32_t i = 0; i < length; i++) { + query_ids.push_back(std::make_shared( + std::string(devices[i].path))); + } + storage::DeviceTimeseriesMetadataMap cpp_map = + r->get_timeseries_metadata(query_ids); + return populate_c_metadata_map_from_cpp(cpp_map, out_map); +} + void tsfile_free_device_timeseries_metadata_map( DeviceTimeseriesMetadataMap* map) { if (map == nullptr) { diff --git a/cpp/src/cwrapper/tsfile_cwrapper.h b/cpp/src/cwrapper/tsfile_cwrapper.h index 30f2e9a81..31fb5d05a 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.h +++ b/cpp/src/cwrapper/tsfile_cwrapper.h @@ -104,13 +104,6 @@ typedef struct device_schema { int timeseries_num; } DeviceSchema; -/** - * @brief Device identifier for C API (canonical path string from IDeviceID). - */ -typedef struct DeviceID { - char* path; -} DeviceID; - /** * @brief Aggregated statistic for one timeseries (subset of C++ Statistic). * @@ -166,9 +159,11 @@ typedef struct TimeseriesMetadata { } TimeseriesMetadata; /** - * @brief Device identity fields from IDeviceID (path, table name, segments). - * Allocated by tsfile_reader_get_all_device_details; freed by - * tsfile_free_device_details_array. + * @brief Device identity from IDeviceID (path, table name, segments). + * + * Heap fields are freed by tsfile_device_details_free_contents or + * tsfile_free_device_details_array, or as part of + * tsfile_free_device_timeseries_metadata_map for entries. */ typedef struct TsDeviceDetails { char* path; @@ -178,16 +173,12 @@ typedef struct TsDeviceDetails { } TsDeviceDetails; /** - * @brief One device's timeseries metadata list plus structured device fields. + * @brief One device's timeseries metadata list plus TsDeviceDetails. * - * device_table_name / device_segments are malloc'd; freed by - * tsfile_free_device_timeseries_metadata_map (do not free individually). + * @p device heap fields freed by tsfile_free_device_timeseries_metadata_map. */ typedef struct DeviceTimeseriesMetadataEntry { - DeviceID device; - char* device_table_name; - uint32_t device_segment_count; - char** device_segments; + TsDeviceDetails device; TimeseriesMetadata* timeseries; uint32_t timeseries_count; } DeviceTimeseriesMetadataEntry; @@ -201,9 +192,8 @@ typedef struct DeviceTimeseriesMetadataMap { uint32_t device_count; } DeviceTimeseriesMetadataMap; -/** Sentinel: optional address for bindings when querying an empty device_id - * list (length 0). */ -extern const DeviceID tsfile_c_metadata_empty_device_list_marker; +/** Frees path, table_name, and segments inside @p d; zeros @p d. */ +void tsfile_device_details_free_contents(TsDeviceDetails* d); typedef struct result_set_meta_data { char** column_names; @@ -418,38 +408,33 @@ ERRNO tsfile_writer_close(TsFileWriter writer); ERRNO tsfile_reader_close(TsFileReader reader); /** - * @brief Lists all devices in the file. + * @brief Lists all devices (path, table name, segments from IDeviceID). * * @param out_devices [out] Allocated array; caller frees with - * tsfile_free_device_id_array. - * @param out_length [out] Number of devices. + * tsfile_free_device_details_array. */ -ERRNO tsfile_reader_get_all_devices(TsFileReader reader, DeviceID** out_devices, +ERRNO tsfile_reader_get_all_devices(TsFileReader reader, + TsDeviceDetails** out_devices, uint32_t* out_length); -void tsfile_free_device_id_array(DeviceID* devices, uint32_t length); +void tsfile_free_device_details_array(TsDeviceDetails* details, + uint32_t length); /** - * @brief Lists all devices with table name and path segments (from IDeviceID). + * @brief Timeseries metadata for all devices in the file. */ -ERRNO tsfile_reader_get_all_device_details(TsFileReader reader, - TsDeviceDetails** out_details, - uint32_t* out_length); - -void tsfile_free_device_details_array(TsDeviceDetails* details, - uint32_t length); +ERRNO tsfile_reader_get_timeseries_metadata_all( + TsFileReader reader, DeviceTimeseriesMetadataMap* out_map); /** - * @brief Timeseries metadata for none, some, or all devices. + * @brief Timeseries metadata for a subset of devices. * - * @param device_ids NULL: all devices (length ignored). - * Non-NULL with length==0: empty result (E_OK), device_ids - * not read. Non-NULL with length>0: only these devices (existing only). - * @param out_map [out] Must point to zeroed struct; filled on success. - * Free with tsfile_free_device_timeseries_metadata_map. + * @param devices NULL and length>0 is E_INVALID_ARG. length==0: empty result + * (E_OK); @p devices is not read. + * For each entry, @p path must be non-NULL (canonical device path). */ -ERRNO tsfile_reader_get_timeseries_metadata( - TsFileReader reader, const DeviceID* device_ids, uint32_t length, +ERRNO tsfile_reader_get_timeseries_metadata_for_devices( + TsFileReader reader, const TsDeviceDetails* devices, uint32_t length, DeviceTimeseriesMetadataMap* out_map); void tsfile_free_device_timeseries_metadata_map( diff --git a/cpp/test/cwrapper/cwrapper_metadata_test.cc b/cpp/test/cwrapper/cwrapper_metadata_test.cc index 2594fe33e..6dcfd5a37 100644 --- a/cpp/test/cwrapper/cwrapper_metadata_test.cc +++ b/cpp/test/cwrapper/cwrapper_metadata_test.cc @@ -65,18 +65,9 @@ TEST_F(CWrapperMetadataTest, GetAllDevicesAndMetadataWithStatistic) { ASSERT_EQ(RET_OK, code); ASSERT_NE(nullptr, reader); - DeviceID* devices = nullptr; - uint32_t n_dev = 0; - ASSERT_EQ(RET_OK, tsfile_reader_get_all_devices(reader, &devices, &n_dev)); - ASSERT_EQ(1u, n_dev); - ASSERT_NE(nullptr, devices); - ASSERT_STREQ(device, devices[0].path); - tsfile_free_device_id_array(devices, n_dev); - TsDeviceDetails* details = nullptr; uint32_t n_det = 0; - ASSERT_EQ(RET_OK, - tsfile_reader_get_all_device_details(reader, &details, &n_det)); + ASSERT_EQ(RET_OK, tsfile_reader_get_all_devices(reader, &details, &n_det)); ASSERT_EQ(1u, n_det); ASSERT_NE(nullptr, details); ASSERT_STREQ(device, details[0].path); @@ -89,17 +80,16 @@ TEST_F(CWrapperMetadataTest, GetAllDevicesAndMetadataWithStatistic) { tsfile_free_device_details_array(details, n_det); DeviceTimeseriesMetadataMap map{}; - ASSERT_EQ(RET_OK, - tsfile_reader_get_timeseries_metadata(reader, nullptr, 0, &map)); + ASSERT_EQ(RET_OK, tsfile_reader_get_timeseries_metadata_all(reader, &map)); ASSERT_EQ(1u, map.device_count); ASSERT_NE(nullptr, map.entries); ASSERT_STREQ(device, map.entries[0].device.path); - ASSERT_NE(nullptr, map.entries[0].device_table_name); - EXPECT_STREQ("root.sg", map.entries[0].device_table_name); - EXPECT_EQ(2u, map.entries[0].device_segment_count); - ASSERT_NE(nullptr, map.entries[0].device_segments); - EXPECT_STREQ("root.sg", map.entries[0].device_segments[0]); - EXPECT_STREQ("d1", map.entries[0].device_segments[1]); + ASSERT_NE(nullptr, map.entries[0].device.table_name); + EXPECT_STREQ("root.sg", map.entries[0].device.table_name); + EXPECT_EQ(2u, map.entries[0].device.segment_count); + ASSERT_NE(nullptr, map.entries[0].device.segments); + EXPECT_STREQ("root.sg", map.entries[0].device.segments[0]); + EXPECT_STREQ("d1", map.entries[0].device.segments[1]); ASSERT_EQ(1u, map.entries[0].timeseries_count); ASSERT_NE(nullptr, map.entries[0].timeseries); TimeseriesMetadata& tm = map.entries[0].timeseries[0]; @@ -120,17 +110,19 @@ TEST_F(CWrapperMetadataTest, GetAllDevicesAndMetadataWithStatistic) { tsfile_free_device_timeseries_metadata_map(&map); DeviceTimeseriesMetadataMap empty{}; - ASSERT_EQ(RET_OK, tsfile_reader_get_timeseries_metadata( - reader, &tsfile_c_metadata_empty_device_list_marker, - 0, &empty)); + ASSERT_EQ(RET_OK, tsfile_reader_get_timeseries_metadata_for_devices( + reader, nullptr, 0, &empty)); EXPECT_EQ(0u, empty.device_count); EXPECT_EQ(nullptr, empty.entries); - DeviceID q{}; + TsDeviceDetails q{}; q.path = const_cast(device); + q.table_name = nullptr; + q.segment_count = 0; + q.segments = nullptr; DeviceTimeseriesMetadataMap one{}; - ASSERT_EQ(RET_OK, - tsfile_reader_get_timeseries_metadata(reader, &q, 1, &one)); + ASSERT_EQ(RET_OK, tsfile_reader_get_timeseries_metadata_for_devices( + reader, &q, 1, &one)); ASSERT_EQ(1u, one.device_count); tsfile_free_device_timeseries_metadata_map(&one); @@ -172,8 +164,7 @@ TEST_F(CWrapperMetadataTest, GetTimeseriesMetadataBooleanStatistic) { ASSERT_EQ(RET_OK, code); DeviceTimeseriesMetadataMap map{}; - ASSERT_EQ(RET_OK, - tsfile_reader_get_timeseries_metadata(reader, nullptr, 0, &map)); + ASSERT_EQ(RET_OK, tsfile_reader_get_timeseries_metadata_all(reader, &map)); TimeseriesMetadata& tm = map.entries[0].timeseries[0]; ASSERT_STREQ(m_b, tm.measurement_name); ASSERT_EQ(TS_DATATYPE_BOOLEAN, tm.data_type); @@ -224,8 +215,7 @@ TEST_F(CWrapperMetadataTest, GetTimeseriesMetadataStringStatistic) { ASSERT_EQ(RET_OK, code); DeviceTimeseriesMetadataMap map{}; - ASSERT_EQ(RET_OK, - tsfile_reader_get_timeseries_metadata(reader, nullptr, 0, &map)); + ASSERT_EQ(RET_OK, tsfile_reader_get_timeseries_metadata_all(reader, &map)); ASSERT_EQ(1u, map.device_count); TimeseriesMetadata& tm = map.entries[0].timeseries[0]; ASSERT_STREQ(m_str, tm.measurement_name); @@ -260,11 +250,15 @@ TEST_F(CWrapperMetadataTest, GetTimeseriesMetadataNullDevicePath) { TsFileReader reader = tsfile_reader_new(filename, &code); ASSERT_EQ(RET_OK, code); - DeviceID bad{}; + TsDeviceDetails bad{}; bad.path = nullptr; + bad.table_name = nullptr; + bad.segment_count = 0; + bad.segments = nullptr; DeviceTimeseriesMetadataMap map{}; EXPECT_EQ(RET_INVALID_ARG, - tsfile_reader_get_timeseries_metadata(reader, &bad, 1, &map)); + tsfile_reader_get_timeseries_metadata_for_devices(reader, &bad, 1, + &map)); ASSERT_EQ(RET_OK, tsfile_reader_close(reader)); remove(filename); @@ -284,10 +278,9 @@ TEST_F(CWrapperMetadataTest, GetTimeseriesMetadataInvalidArgs) { ASSERT_EQ(RET_OK, code); DeviceTimeseriesMetadataMap map{}; + EXPECT_NE(RET_OK, tsfile_reader_get_timeseries_metadata_all(nullptr, &map)); EXPECT_NE(RET_OK, - tsfile_reader_get_timeseries_metadata(nullptr, nullptr, 0, &map)); - EXPECT_NE(RET_OK, tsfile_reader_get_timeseries_metadata(reader, nullptr, 0, - nullptr)); + tsfile_reader_get_timeseries_metadata_all(reader, nullptr)); ASSERT_EQ(RET_OK, tsfile_reader_close(reader)); remove(filename); diff --git a/python/tests/test_reader_metadata.py b/python/tests/test_reader_metadata.py index 1b6a4633b..185668986 100644 --- a/python/tests/test_reader_metadata.py +++ b/python/tests/test_reader_metadata.py @@ -22,10 +22,10 @@ from tsfile import Field, RowRecord, TimeseriesSchema, TsFileReader, TsFileWriter from tsfile import TSDataType -from tsfile.schema import DeviceID +from tsfile.schema import DeviceDetails -def test_get_all_device_details_segments(): +def test_get_all_devices_segments(): path = os.path.join(tempfile.gettempdir(), "py_reader_metadata_details.tsfile") try: os.unlink(path) @@ -42,7 +42,7 @@ def test_get_all_device_details_segments(): reader = TsFileReader(path) try: - details = reader.get_all_device_details() + details = reader.get_all_devices() assert len(details) == 1 d0 = details[0] assert d0.path == device @@ -114,7 +114,7 @@ def test_get_all_devices_and_timeseries_metadata_statistic(): assert reader.get_timeseries_metadata([]) == {} - sub = reader.get_timeseries_metadata([DeviceID(device)]) + sub = reader.get_timeseries_metadata([DeviceDetails(device, "", ())]) assert device in sub assert len(sub[device].timeseries) == 1 diff --git a/python/tsfile/schema.py b/python/tsfile/schema.py index 48319a34a..3de26767b 100644 --- a/python/tsfile/schema.py +++ b/python/tsfile/schema.py @@ -22,16 +22,6 @@ from .constants import TSDataType, ColumnCategory, TSEncoding, Compressor -@dataclass(frozen=True) -class DeviceID: - """Device path string as returned by the native reader (tree/table file layout).""" - - path: str - - def __str__(self) -> str: - return self.path - - @dataclass(frozen=True) class TimeseriesStatistic: """Subset of file chunk statistic exposed through the C API.""" diff --git a/python/tsfile/tsfile_cpp.pxd b/python/tsfile/tsfile_cpp.pxd index 4ea9df123..4f22f9066 100644 --- a/python/tsfile/tsfile_cpp.pxd +++ b/python/tsfile/tsfile_cpp.pxd @@ -103,9 +103,6 @@ cdef extern from "cwrapper/tsfile_cwrapper.h": TimeseriesSchema * timeseries_schema int timeseries_num - ctypedef struct DeviceID: - char * path - ctypedef struct TimeseriesStatistic: bint has_statistic int32_t row_count @@ -145,10 +142,7 @@ cdef extern from "cwrapper/tsfile_cwrapper.h": char ** segments ctypedef struct DeviceTimeseriesMetadataEntry: - DeviceID device - char * device_table_name - uint32_t device_segment_count - char ** device_segments + TsDeviceDetails device TimeseriesMetadata * timeseries uint32_t timeseries_count @@ -156,8 +150,6 @@ cdef extern from "cwrapper/tsfile_cwrapper.h": DeviceTimeseriesMetadataEntry * entries uint32_t device_count - const DeviceID tsfile_c_metadata_empty_device_list_marker - ctypedef struct ResultSetMetaData: char** column_names TSDataType * data_types @@ -273,19 +265,18 @@ cdef extern from "cwrapper/tsfile_cwrapper.h": DeviceSchema * tsfile_reader_get_all_timeseries_schemas(TsFileReader reader, uint32_t * size); + void tsfile_device_details_free_contents(TsDeviceDetails * d) + ErrorCode tsfile_reader_get_all_devices(TsFileReader reader, - DeviceID ** out_devices, + TsDeviceDetails ** out_devices, uint32_t * out_length); - void tsfile_free_device_id_array(DeviceID * devices, uint32_t length); - - ErrorCode tsfile_reader_get_all_device_details( - TsFileReader reader, TsDeviceDetails ** out_details, - uint32_t * out_length); void tsfile_free_device_details_array(TsDeviceDetails * details, uint32_t length); - ErrorCode tsfile_reader_get_timeseries_metadata( - TsFileReader reader, const DeviceID * device_ids, uint32_t length, + ErrorCode tsfile_reader_get_timeseries_metadata_all( + TsFileReader reader, DeviceTimeseriesMetadataMap * out_map); + ErrorCode tsfile_reader_get_timeseries_metadata_for_devices( + TsFileReader reader, const TsDeviceDetails * devices, uint32_t length, DeviceTimeseriesMetadataMap * out_map); void tsfile_free_device_timeseries_metadata_map( DeviceTimeseriesMetadataMap * map); diff --git a/python/tsfile/tsfile_py_cpp.pxd b/python/tsfile/tsfile_py_cpp.pxd index 7f501fd34..b6baee80d 100644 --- a/python/tsfile/tsfile_py_cpp.pxd +++ b/python/tsfile/tsfile_py_cpp.pxd @@ -68,7 +68,6 @@ cdef public api object get_table_schema(TsFileReader reader, object table_name) cdef public api object get_all_table_schema(TsFileReader reader) cdef public api object get_all_timeseries_schema(TsFileReader reader) cdef public api object reader_get_all_devices_c(TsFileReader reader) -cdef public api object reader_get_all_device_details_c(TsFileReader reader) cdef public api object reader_get_timeseries_metadata_c(TsFileReader reader, object device_ids) cpdef public api object get_tsfile_config() diff --git a/python/tsfile/tsfile_py_cpp.pyx b/python/tsfile/tsfile_py_cpp.pyx index 3fe93bcc6..9e16ff754 100644 --- a/python/tsfile/tsfile_py_cpp.pyx +++ b/python/tsfile/tsfile_py_cpp.pyx @@ -37,7 +37,6 @@ from tsfile.schema import TSDataType as TSDataTypePy, TSEncoding as TSEncodingPy from tsfile.schema import Compressor as CompressorPy, ColumnCategory as CategoryPy from tsfile.schema import TableSchema as TableSchemaPy, ColumnSchema as ColumnSchemaPy from tsfile.schema import DeviceSchema as DeviceSchemaPy, TimeseriesSchema as TimeseriesSchemaPy -from tsfile.schema import DeviceID as ReaderDeviceID from tsfile.schema import DeviceDetails as DeviceDetailsPy from tsfile.schema import DeviceTimeseriesMetadataGroup as DeviceTimeseriesMetadataGroupPy from tsfile.schema import TimeseriesStatistic as TimeseriesStatisticPy @@ -998,14 +997,14 @@ cdef dict device_timeseries_metadata_map_to_py(DeviceTimeseriesMetadataMap* mmap key = "" else: key = p.decode('utf-8') - tnp = mmap.entries[di].device_table_name + tnp = mmap.entries[di].device.table_name if tnp == NULL: table_py = "" else: table_py = tnp.decode('utf-8') segs_py = c_device_segments_to_tuple( - mmap.entries[di].device_segments, - mmap.entries[di].device_segment_count) + mmap.entries[di].device.segments, + mmap.entries[di].device.segment_count) series = [] for ti in range(mmap.entries[di].timeseries_count): series.append( @@ -1016,21 +1015,6 @@ cdef dict device_timeseries_metadata_map_to_py(DeviceTimeseriesMetadataMap* mmap return out cdef public api object reader_get_all_devices_c(TsFileReader reader): - cdef DeviceID* arr = NULL - cdef uint32_t n = 0 - cdef int err - cdef list out = [] - cdef uint32_t i - err = tsfile_reader_get_all_devices(reader, &arr, &n) - check_error(err) - try: - for i in range(n): - out.append(ReaderDeviceID(arr[i].path.decode('utf-8'))) - finally: - tsfile_free_device_id_array(arr, n) - return out - -cdef public api object reader_get_all_device_details_c(TsFileReader reader): cdef TsDeviceDetails* arr = NULL cdef uint32_t n = 0 cdef int err @@ -1039,7 +1023,7 @@ cdef public api object reader_get_all_device_details_c(TsFileReader reader): cdef str path_py cdef str tname_py cdef tuple segs_py - err = tsfile_reader_get_all_device_details(reader, &arr, &n) + err = tsfile_reader_get_all_devices(reader, &arr, &n) check_error(err) try: for i in range(n): @@ -1061,7 +1045,7 @@ cdef public api object reader_get_all_device_details_c(TsFileReader reader): cdef public api object reader_get_timeseries_metadata_c(TsFileReader reader, object device_ids): cdef DeviceTimeseriesMetadataMap mmap - cdef DeviceID* q = NULL + cdef TsDeviceDetails* q = NULL cdef uint32_t qlen = 0 cdef uint32_t i cdef int err @@ -1069,18 +1053,18 @@ cdef public api object reader_get_timeseries_metadata_c(TsFileReader reader, cdef const char* raw memset(&mmap, 0, sizeof(DeviceTimeseriesMetadataMap)) if device_ids is None: - err = tsfile_reader_get_timeseries_metadata(reader, NULL, 0, &mmap) + err = tsfile_reader_get_timeseries_metadata_all(reader, &mmap) check_error(err) elif len(device_ids) == 0: - err = tsfile_reader_get_timeseries_metadata( - reader, &tsfile_c_metadata_empty_device_list_marker, 0, &mmap) + err = tsfile_reader_get_timeseries_metadata_for_devices( + reader, NULL, 0, &mmap) check_error(err) else: qlen = len(device_ids) - q = malloc(sizeof(DeviceID) * qlen) + q = malloc(sizeof(TsDeviceDetails) * qlen) if q == NULL: raise MemoryError() - memset(q, 0, sizeof(DeviceID) * qlen) + memset(q, 0, sizeof(TsDeviceDetails) * qlen) try: for i in range(qlen): dev = device_ids[i] @@ -1093,7 +1077,8 @@ cdef public api object reader_get_timeseries_metadata_c(TsFileReader reader, q[i].path = strdup(raw) if q[i].path == NULL: raise MemoryError() - err = tsfile_reader_get_timeseries_metadata(reader, q, qlen, &mmap) + err = tsfile_reader_get_timeseries_metadata_for_devices( + reader, q, qlen, &mmap) check_error(err) finally: for i in range(qlen): diff --git a/python/tsfile/tsfile_reader.pyx b/python/tsfile/tsfile_reader.pyx index 8164b6f4a..fb7dc4716 100644 --- a/python/tsfile/tsfile_reader.pyx +++ b/python/tsfile/tsfile_reader.pyx @@ -428,18 +428,12 @@ cdef class TsFileReaderPy: """ return get_all_timeseries_schema(self.reader) - def get_all_devices(self): + def get_all_devices(self) -> List[DeviceDetails]: """ - Return all device IDs in the file as :class:`tsfile.schema.DeviceID`. - """ - return reader_get_all_devices_c(self.reader) - - def get_all_device_details(self) -> List[DeviceDetails]: - """ - Return all devices with path, table name, and segment list from + Return all devices (path, table name, segments) as :class:`tsfile.schema.DeviceDetails`. """ - return reader_get_all_device_details_c(self.reader) + return reader_get_all_devices_c(self.reader) def get_timeseries_metadata( self, device_ids: Optional[List] = None From e18a51b5c1ce070c6f0308a765166c66cc89dc31 Mon Sep 17 00:00:00 2001 From: 761417898 <761417898@qq.com> Date: Tue, 7 Apr 2026 14:08:48 +0800 Subject: [PATCH 6/7] fix c/python statistic --- cpp/src/cwrapper/tsfile_cwrapper.cc | 261 +++++++++++++------- cpp/src/cwrapper/tsfile_cwrapper.h | 98 +++++--- cpp/test/cwrapper/cwrapper_metadata_test.cc | 63 ++--- python/tests/test_reader_metadata.py | 17 +- python/tsfile/schema.py | 97 +++++--- python/tsfile/tsfile_cpp.pxd | 55 +++-- python/tsfile/tsfile_py_cpp.pyx | 122 +++++---- python/tsfile/tsfile_reader.pyx | 6 +- 8 files changed, 467 insertions(+), 252 deletions(-) diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc b/cpp/src/cwrapper/tsfile_cwrapper.cc index 809895ce0..f13146007 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.cc +++ b/cpp/src/cwrapper/tsfile_cwrapper.cc @@ -699,7 +699,7 @@ DeviceSchema* tsfile_reader_get_all_timeseries_schemas(TsFileReader reader, return device_schema; } -void tsfile_device_details_free_contents(TsDeviceDetails* d) { +void tsfile_device_id_free_contents(DeviceID* d) { if (d == nullptr) { return; } @@ -732,22 +732,43 @@ char* dup_common_string_to_cstr(const common::String& s) { return p; } +static TSDataType cpp_stat_type_to_c(common::TSDataType t) { + return static_cast(static_cast(t)); +} + void free_timeseries_statistic_heap(TimeseriesStatistic* s) { if (s == nullptr) { return; } - free(s->str_min); - s->str_min = nullptr; - free(s->str_max); - s->str_max = nullptr; - free(s->str_first); - s->str_first = nullptr; - free(s->str_last); - s->str_last = nullptr; + TsFileStatisticBase* b = tsfile_statistic_base(s); + if (!b->has_statistic) { + return; + } + switch (b->type) { + case TS_DATATYPE_STRING: + free(s->u.string_s.str_min); + s->u.string_s.str_min = nullptr; + free(s->u.string_s.str_max); + s->u.string_s.str_max = nullptr; + free(s->u.string_s.str_first); + s->u.string_s.str_first = nullptr; + free(s->u.string_s.str_last); + s->u.string_s.str_last = nullptr; + break; + case TS_DATATYPE_TEXT: + free(s->u.text_s.str_first); + s->u.text_s.str_first = nullptr; + free(s->u.text_s.str_last); + s->u.text_s.str_last = nullptr; + break; + default: + break; + } } void clear_timeseries_statistic(TimeseriesStatistic* s) { memset(s, 0, sizeof(*s)); + tsfile_statistic_base(s)->type = TS_DATATYPE_INVALID; } /** @@ -760,100 +781,151 @@ int fill_timeseries_statistic(storage::Statistic* st, if (st == nullptr) { return common::E_OK; } - out->has_statistic = true; - out->row_count = st->get_count(); - out->start_time = st->start_time_; - out->end_time = st->get_end_time(); - out->sum_valid = false; - out->sum = 0.0; const common::TSDataType t = st->get_type(); switch (t) { case common::BOOLEAN: { auto* bs = static_cast(st); - out->sum_valid = true; - out->sum = static_cast(bs->sum_value_); - out->bool_ext_valid = true; - out->first_bool = bs->first_value_; - out->last_bool = bs->last_value_; + TsFileBoolStatistic* p = &out->u.bool_s; + p->base.has_statistic = true; + p->base.type = cpp_stat_type_to_c(common::BOOLEAN); + p->base.row_count = st->get_count(); + p->base.start_time = st->start_time_; + p->base.end_time = st->get_end_time(); + p->sum = static_cast(bs->sum_value_); + p->first_bool = bs->first_value_; + p->last_bool = bs->last_value_; + break; + } + case common::INT32: { + auto* is = static_cast(st); + TsFileIntStatistic* p = &out->u.int_s; + p->base.has_statistic = true; + p->base.type = cpp_stat_type_to_c(common::INT32); + p->base.row_count = st->get_count(); + p->base.start_time = st->start_time_; + p->base.end_time = st->get_end_time(); + p->sum = static_cast(is->sum_value_); + if (p->base.row_count > 0) { + p->min_int64 = static_cast(is->min_value_); + p->max_int64 = static_cast(is->max_value_); + p->first_int64 = static_cast(is->first_value_); + p->last_int64 = static_cast(is->last_value_); + } break; } - case common::INT32: case common::DATE: { auto* is = static_cast(st); - out->sum_valid = true; - out->sum = static_cast(is->sum_value_); - if (out->row_count > 0) { - out->int_range_valid = true; - out->min_int64 = static_cast(is->min_value_); - out->max_int64 = static_cast(is->max_value_); - out->first_int64 = static_cast(is->first_value_); - out->last_int64 = static_cast(is->last_value_); + TsFileIntStatistic* p = &out->u.int_s; + p->base.has_statistic = true; + p->base.type = cpp_stat_type_to_c(common::DATE); + p->base.row_count = st->get_count(); + p->base.start_time = st->start_time_; + p->base.end_time = st->get_end_time(); + p->sum = static_cast(is->sum_value_); + if (p->base.row_count > 0) { + p->min_int64 = static_cast(is->min_value_); + p->max_int64 = static_cast(is->max_value_); + p->first_int64 = static_cast(is->first_value_); + p->last_int64 = static_cast(is->last_value_); + } + break; + } + case common::INT64: { + auto* ls = static_cast(st); + TsFileIntStatistic* p = &out->u.int_s; + p->base.has_statistic = true; + p->base.type = cpp_stat_type_to_c(common::INT64); + p->base.row_count = st->get_count(); + p->base.start_time = st->start_time_; + p->base.end_time = st->get_end_time(); + p->sum = ls->sum_value_; + if (p->base.row_count > 0) { + p->min_int64 = ls->min_value_; + p->max_int64 = ls->max_value_; + p->first_int64 = ls->first_value_; + p->last_int64 = ls->last_value_; } break; } - case common::INT64: case common::TIMESTAMP: { auto* ls = static_cast(st); - out->sum_valid = true; - out->sum = ls->sum_value_; - if (out->row_count > 0) { - out->int_range_valid = true; - out->min_int64 = ls->min_value_; - out->max_int64 = ls->max_value_; - out->first_int64 = ls->first_value_; - out->last_int64 = ls->last_value_; + TsFileIntStatistic* p = &out->u.int_s; + p->base.has_statistic = true; + p->base.type = cpp_stat_type_to_c(common::TIMESTAMP); + p->base.row_count = st->get_count(); + p->base.start_time = st->start_time_; + p->base.end_time = st->get_end_time(); + p->sum = ls->sum_value_; + if (p->base.row_count > 0) { + p->min_int64 = ls->min_value_; + p->max_int64 = ls->max_value_; + p->first_int64 = ls->first_value_; + p->last_int64 = ls->last_value_; } break; } case common::FLOAT: { auto* fs = static_cast(st); - out->sum_valid = true; - out->sum = static_cast(fs->sum_value_); - if (out->row_count > 0) { - out->float_range_valid = true; - out->min_float64 = static_cast(fs->min_value_); - out->max_float64 = static_cast(fs->max_value_); - out->first_float64 = static_cast(fs->first_value_); - out->last_float64 = static_cast(fs->last_value_); + TsFileFloatStatistic* p = &out->u.float_s; + p->base.has_statistic = true; + p->base.type = cpp_stat_type_to_c(common::FLOAT); + p->base.row_count = st->get_count(); + p->base.start_time = st->start_time_; + p->base.end_time = st->get_end_time(); + p->sum = static_cast(fs->sum_value_); + if (p->base.row_count > 0) { + p->min_float64 = static_cast(fs->min_value_); + p->max_float64 = static_cast(fs->max_value_); + p->first_float64 = static_cast(fs->first_value_); + p->last_float64 = static_cast(fs->last_value_); } break; } case common::DOUBLE: { auto* ds = static_cast(st); - out->sum_valid = true; - out->sum = ds->sum_value_; - if (out->row_count > 0) { - out->float_range_valid = true; - out->min_float64 = ds->min_value_; - out->max_float64 = ds->max_value_; - out->first_float64 = ds->first_value_; - out->last_float64 = ds->last_value_; + TsFileFloatStatistic* p = &out->u.float_s; + p->base.has_statistic = true; + p->base.type = cpp_stat_type_to_c(common::DOUBLE); + p->base.row_count = st->get_count(); + p->base.start_time = st->start_time_; + p->base.end_time = st->get_end_time(); + p->sum = ds->sum_value_; + if (p->base.row_count > 0) { + p->min_float64 = ds->min_value_; + p->max_float64 = ds->max_value_; + p->first_float64 = ds->first_value_; + p->last_float64 = ds->last_value_; } break; } case common::STRING: { auto* ss = static_cast(st); - out->str_ext_valid = true; - out->str_min = dup_common_string_to_cstr(ss->min_value_); - if (out->str_min == nullptr) { + TsFileStringStatistic* p = &out->u.string_s; + p->base.has_statistic = true; + p->base.type = cpp_stat_type_to_c(common::STRING); + p->base.row_count = st->get_count(); + p->base.start_time = st->start_time_; + p->base.end_time = st->get_end_time(); + p->str_min = dup_common_string_to_cstr(ss->min_value_); + if (p->str_min == nullptr) { free_timeseries_statistic_heap(out); clear_timeseries_statistic(out); return common::E_OOM; } - out->str_max = dup_common_string_to_cstr(ss->max_value_); - if (out->str_max == nullptr) { + p->str_max = dup_common_string_to_cstr(ss->max_value_); + if (p->str_max == nullptr) { free_timeseries_statistic_heap(out); clear_timeseries_statistic(out); return common::E_OOM; } - out->str_first = dup_common_string_to_cstr(ss->first_value_); - if (out->str_first == nullptr) { + p->str_first = dup_common_string_to_cstr(ss->first_value_); + if (p->str_first == nullptr) { free_timeseries_statistic_heap(out); clear_timeseries_statistic(out); return common::E_OOM; } - out->str_last = dup_common_string_to_cstr(ss->last_value_); - if (out->str_last == nullptr) { + p->str_last = dup_common_string_to_cstr(ss->last_value_); + if (p->str_last == nullptr) { free_timeseries_statistic_heap(out); clear_timeseries_statistic(out); return common::E_OOM; @@ -862,30 +934,35 @@ int fill_timeseries_statistic(storage::Statistic* st, } case common::TEXT: { auto* ts = static_cast(st); - out->str_ext_valid = true; - out->str_min = strdup(""); - out->str_max = strdup(""); - if (out->str_min == nullptr || out->str_max == nullptr) { - free_timeseries_statistic_heap(out); - clear_timeseries_statistic(out); - return common::E_OOM; - } - out->str_first = dup_common_string_to_cstr(ts->first_value_); - if (out->str_first == nullptr) { + TsFileTextStatistic* p = &out->u.text_s; + p->base.has_statistic = true; + p->base.type = cpp_stat_type_to_c(common::TEXT); + p->base.row_count = st->get_count(); + p->base.start_time = st->start_time_; + p->base.end_time = st->get_end_time(); + p->str_first = dup_common_string_to_cstr(ts->first_value_); + if (p->str_first == nullptr) { free_timeseries_statistic_heap(out); clear_timeseries_statistic(out); return common::E_OOM; } - out->str_last = dup_common_string_to_cstr(ts->last_value_); - if (out->str_last == nullptr) { + p->str_last = dup_common_string_to_cstr(ts->last_value_); + if (p->str_last == nullptr) { free_timeseries_statistic_heap(out); clear_timeseries_statistic(out); return common::E_OOM; } break; } - default: + default: { + TsFileStatisticBase* b = tsfile_statistic_base(out); + b->has_statistic = true; + b->type = TS_DATATYPE_INVALID; + b->row_count = st->get_count(); + b->start_time = st->start_time_; + b->end_time = st->get_end_time(); break; + } } return common::E_OK; } @@ -896,7 +973,7 @@ void free_device_timeseries_metadata_entries_partial( return; } for (size_t i = 0; i < filled_count; i++) { - tsfile_device_details_free_contents(&entries[i].device); + tsfile_device_id_free_contents(&entries[i].device); if (entries[i].timeseries != nullptr) { for (uint32_t j = 0; j < entries[i].timeseries_count; j++) { free_timeseries_statistic_heap( @@ -983,8 +1060,7 @@ int duplicate_ideviceid_to_device_fields(storage::IDeviceID* id, return common::E_OK; } -int fill_tsdevice_details_from_id(storage::IDeviceID* id, - TsDeviceDetails* out) { +int fill_device_id_from_ideviceid(storage::IDeviceID* id, DeviceID* out) { memset(out, 0, sizeof(*out)); return duplicate_ideviceid_to_device_fields( id, &out->path, &out->table_name, &out->segment_count, &out->segments); @@ -994,7 +1070,7 @@ void clear_metadata_entry_device_only(DeviceTimeseriesMetadataEntry* e) { if (e == nullptr) { return; } - tsfile_device_details_free_contents(&e->device); + tsfile_device_id_free_contents(&e->device); } ERRNO populate_c_metadata_map_from_cpp( @@ -1013,7 +1089,7 @@ ERRNO populate_c_metadata_map_from_cpp( size_t di = 0; for (const auto& kv : cpp_map) { DeviceTimeseriesMetadataEntry& e = entries[di]; - const int dup_rc = fill_tsdevice_details_from_id( + const int dup_rc = fill_device_id_from_ideviceid( kv.first ? kv.first.get() : nullptr, &e.device); if (dup_rc != common::E_OK) { free_device_timeseries_metadata_entries_partial(entries, di); @@ -1093,19 +1169,18 @@ ERRNO populate_c_metadata_map_from_cpp( } // namespace -void tsfile_free_device_details_array(TsDeviceDetails* details, - uint32_t length) { - if (details == nullptr) { +void tsfile_free_device_id_array(DeviceID* devices, uint32_t length) { + if (devices == nullptr) { return; } for (uint32_t i = 0; i < length; i++) { - tsfile_device_details_free_contents(&details[i]); + tsfile_device_id_free_contents(&devices[i]); } - free(details); + free(devices); } ERRNO tsfile_reader_get_all_devices(TsFileReader reader, - TsDeviceDetails** out_devices, + DeviceID** out_devices, uint32_t* out_length) { if (reader == nullptr || out_devices == nullptr || out_length == nullptr) { return common::E_INVALID_ARG; @@ -1117,16 +1192,16 @@ ERRNO tsfile_reader_get_all_devices(TsFileReader reader, if (ids.empty()) { return common::E_OK; } - auto* arr = static_cast( - malloc(sizeof(TsDeviceDetails) * ids.size())); + auto* arr = static_cast( + malloc(sizeof(DeviceID) * ids.size())); if (arr == nullptr) { return common::E_OOM; } - memset(arr, 0, sizeof(TsDeviceDetails) * ids.size()); + memset(arr, 0, sizeof(DeviceID) * ids.size()); for (size_t i = 0; i < ids.size(); i++) { - const int rc = fill_tsdevice_details_from_id(ids[i].get(), &arr[i]); + const int rc = fill_device_id_from_ideviceid(ids[i].get(), &arr[i]); if (rc != common::E_OK) { - tsfile_free_device_details_array(arr, static_cast(i)); + tsfile_free_device_id_array(arr, static_cast(i)); return rc; } } @@ -1148,7 +1223,7 @@ ERRNO tsfile_reader_get_timeseries_metadata_all( } ERRNO tsfile_reader_get_timeseries_metadata_for_devices( - TsFileReader reader, const TsDeviceDetails* devices, uint32_t length, + TsFileReader reader, const DeviceID* devices, uint32_t length, DeviceTimeseriesMetadataMap* out_map) { if (reader == nullptr || out_map == nullptr) { return common::E_INVALID_ARG; diff --git a/cpp/src/cwrapper/tsfile_cwrapper.h b/cpp/src/cwrapper/tsfile_cwrapper.h index 31fb5d05a..29bb410a0 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.h +++ b/cpp/src/cwrapper/tsfile_cwrapper.h @@ -105,49 +105,86 @@ typedef struct device_schema { } DeviceSchema; /** - * @brief Aggregated statistic for one timeseries (subset of C++ Statistic). + * @brief Common header for all statistic variants (first member of each + * TsFile*Statistic struct; also aliases the start of TimeseriesStatistic::u). * - * String pointers str_* are allocated with malloc; freed by - * tsfile_free_device_timeseries_metadata_map (do not free individually). + * When @p has_statistic is false, @p type is undefined. Otherwise @p type + * selects which @ref TimeseriesStatisticUnion member is active (INT32/DATE/ + * INT64/TIMESTAMP share @c int_s). @c sum exists only on @c bool_s, @c int_s, + * and @c float_s. Heap strings in string_s/text_s are + * freed by tsfile_free_device_timeseries_metadata_map only. */ -typedef struct TimeseriesStatistic { +typedef struct TsFileStatisticBase { bool has_statistic; + TSDataType type; int32_t row_count; int64_t start_time; int64_t end_time; - /** True when @p sum is meaningful (numeric / boolean aggregate types). */ - bool sum_valid; - /** Sum when sum_valid; boolean uses sum of true as int-like aggregate. */ +} TsFileStatisticBase; + +typedef struct TsFileBoolStatistic { + TsFileStatisticBase base; double sum; + bool first_bool; + bool last_bool; +} TsFileBoolStatistic; - /** INT32, DATE, INT64, TIMESTAMP: min/max/first/last in int64_t form. */ - bool int_range_valid; +typedef struct TsFileIntStatistic { + TsFileStatisticBase base; + double sum; int64_t min_int64; int64_t max_int64; int64_t first_int64; int64_t last_int64; +} TsFileIntStatistic; - /** FLOAT, DOUBLE: min/max/first/last. */ - bool float_range_valid; +typedef struct TsFileFloatStatistic { + TsFileStatisticBase base; + double sum; double min_float64; double max_float64; double first_float64; double last_float64; +} TsFileFloatStatistic; - /** BOOLEAN: first/last sample values. */ - bool bool_ext_valid; - bool first_bool; - bool last_bool; - - /** STRING: min/max lexicographic; TEXT: first/last only (min/max unused). - */ - bool str_ext_valid; +typedef struct TsFileStringStatistic { + TsFileStatisticBase base; char* str_min; char* str_max; char* str_first; char* str_last; +} TsFileStringStatistic; + +typedef struct TsFileTextStatistic { + TsFileStatisticBase base; + char* str_first; + char* str_last; +} TsFileTextStatistic; + +/** + * @brief One of the typed layouts; active member follows @c base.type. + */ +typedef union TimeseriesStatisticUnion { + TsFileBoolStatistic bool_s; + TsFileIntStatistic int_s; + TsFileFloatStatistic float_s; + TsFileStringStatistic string_s; + TsFileTextStatistic text_s; +} TimeseriesStatisticUnion; + +/** + * @brief Aggregated statistic for one timeseries (subset of C++ Statistic). + * + * Read common fields via @c tsfile_statistic_base(s). Type-specific fields + * via @c s->u.int_s, @c s->u.float_s, etc., per @c base.type. + */ +typedef struct TimeseriesStatistic { + TimeseriesStatisticUnion u; } TimeseriesStatistic; +/** Pointer to the common header at the start of @p s->u (any active arm). */ +#define tsfile_statistic_base(s) ((TsFileStatisticBase*)&(s)->u) + /** * @brief One measurement's metadata as exposed to C. */ @@ -161,24 +198,24 @@ typedef struct TimeseriesMetadata { /** * @brief Device identity from IDeviceID (path, table name, segments). * - * Heap fields are freed by tsfile_device_details_free_contents or - * tsfile_free_device_details_array, or as part of + * Heap fields are freed by tsfile_device_id_free_contents or + * tsfile_free_device_id_array, or as part of * tsfile_free_device_timeseries_metadata_map for entries. */ -typedef struct TsDeviceDetails { +typedef struct DeviceID { char* path; char* table_name; uint32_t segment_count; char** segments; -} TsDeviceDetails; +} DeviceID; /** - * @brief One device's timeseries metadata list plus TsDeviceDetails. + * @brief One device's timeseries metadata list plus DeviceID. * * @p device heap fields freed by tsfile_free_device_timeseries_metadata_map. */ typedef struct DeviceTimeseriesMetadataEntry { - TsDeviceDetails device; + DeviceID device; TimeseriesMetadata* timeseries; uint32_t timeseries_count; } DeviceTimeseriesMetadataEntry; @@ -193,7 +230,7 @@ typedef struct DeviceTimeseriesMetadataMap { } DeviceTimeseriesMetadataMap; /** Frees path, table_name, and segments inside @p d; zeros @p d. */ -void tsfile_device_details_free_contents(TsDeviceDetails* d); +void tsfile_device_id_free_contents(DeviceID* d); typedef struct result_set_meta_data { char** column_names; @@ -411,14 +448,13 @@ ERRNO tsfile_reader_close(TsFileReader reader); * @brief Lists all devices (path, table name, segments from IDeviceID). * * @param out_devices [out] Allocated array; caller frees with - * tsfile_free_device_details_array. + * tsfile_free_device_id_array. */ ERRNO tsfile_reader_get_all_devices(TsFileReader reader, - TsDeviceDetails** out_devices, + DeviceID** out_devices, uint32_t* out_length); -void tsfile_free_device_details_array(TsDeviceDetails* details, - uint32_t length); +void tsfile_free_device_id_array(DeviceID* devices, uint32_t length); /** * @brief Timeseries metadata for all devices in the file. @@ -434,7 +470,7 @@ ERRNO tsfile_reader_get_timeseries_metadata_all( * For each entry, @p path must be non-NULL (canonical device path). */ ERRNO tsfile_reader_get_timeseries_metadata_for_devices( - TsFileReader reader, const TsDeviceDetails* devices, uint32_t length, + TsFileReader reader, const DeviceID* devices, uint32_t length, DeviceTimeseriesMetadataMap* out_map); void tsfile_free_device_timeseries_metadata_map( diff --git a/cpp/test/cwrapper/cwrapper_metadata_test.cc b/cpp/test/cwrapper/cwrapper_metadata_test.cc index 6dcfd5a37..57fca4de6 100644 --- a/cpp/test/cwrapper/cwrapper_metadata_test.cc +++ b/cpp/test/cwrapper/cwrapper_metadata_test.cc @@ -65,7 +65,7 @@ TEST_F(CWrapperMetadataTest, GetAllDevicesAndMetadataWithStatistic) { ASSERT_EQ(RET_OK, code); ASSERT_NE(nullptr, reader); - TsDeviceDetails* details = nullptr; + DeviceID* details = nullptr; uint32_t n_det = 0; ASSERT_EQ(RET_OK, tsfile_reader_get_all_devices(reader, &details, &n_det)); ASSERT_EQ(1u, n_det); @@ -77,7 +77,7 @@ TEST_F(CWrapperMetadataTest, GetAllDevicesAndMetadataWithStatistic) { ASSERT_NE(nullptr, details[0].segments); EXPECT_STREQ("root.sg", details[0].segments[0]); EXPECT_STREQ("d1", details[0].segments[1]); - tsfile_free_device_details_array(details, n_det); + tsfile_free_device_id_array(details, n_det); DeviceTimeseriesMetadataMap map{}; ASSERT_EQ(RET_OK, tsfile_reader_get_timeseries_metadata_all(reader, &map)); @@ -95,17 +95,17 @@ TEST_F(CWrapperMetadataTest, GetAllDevicesAndMetadataWithStatistic) { TimeseriesMetadata& tm = map.entries[0].timeseries[0]; ASSERT_STREQ(m_int, tm.measurement_name); ASSERT_EQ(TS_DATATYPE_INT32, tm.data_type); - ASSERT_TRUE(tm.statistic.has_statistic); - EXPECT_EQ(3, tm.statistic.row_count); - EXPECT_EQ(1, tm.statistic.start_time); - EXPECT_EQ(3, tm.statistic.end_time); - ASSERT_TRUE(tm.statistic.sum_valid); - EXPECT_DOUBLE_EQ(60.0, tm.statistic.sum); - ASSERT_TRUE(tm.statistic.int_range_valid); - EXPECT_EQ(10, tm.statistic.min_int64); - EXPECT_EQ(30, tm.statistic.max_int64); - EXPECT_EQ(10, tm.statistic.first_int64); - EXPECT_EQ(30, tm.statistic.last_int64); + TsFileStatisticBase* sb = tsfile_statistic_base(&tm.statistic); + ASSERT_TRUE(sb->has_statistic); + EXPECT_EQ(3, sb->row_count); + EXPECT_EQ(1, sb->start_time); + EXPECT_EQ(3, sb->end_time); + EXPECT_DOUBLE_EQ(60.0, tm.statistic.u.int_s.sum); + ASSERT_EQ(TS_DATATYPE_INT32, sb->type); + EXPECT_EQ(10, tm.statistic.u.int_s.min_int64); + EXPECT_EQ(30, tm.statistic.u.int_s.max_int64); + EXPECT_EQ(10, tm.statistic.u.int_s.first_int64); + EXPECT_EQ(30, tm.statistic.u.int_s.last_int64); tsfile_free_device_timeseries_metadata_map(&map); @@ -115,7 +115,7 @@ TEST_F(CWrapperMetadataTest, GetAllDevicesAndMetadataWithStatistic) { EXPECT_EQ(0u, empty.device_count); EXPECT_EQ(nullptr, empty.entries); - TsDeviceDetails q{}; + DeviceID q{}; q.path = const_cast(device); q.table_name = nullptr; q.segment_count = 0; @@ -168,12 +168,12 @@ TEST_F(CWrapperMetadataTest, GetTimeseriesMetadataBooleanStatistic) { TimeseriesMetadata& tm = map.entries[0].timeseries[0]; ASSERT_STREQ(m_b, tm.measurement_name); ASSERT_EQ(TS_DATATYPE_BOOLEAN, tm.data_type); - ASSERT_TRUE(tm.statistic.has_statistic); - ASSERT_TRUE(tm.statistic.sum_valid); - EXPECT_DOUBLE_EQ(2.0, tm.statistic.sum); - ASSERT_TRUE(tm.statistic.bool_ext_valid); - EXPECT_TRUE(tm.statistic.first_bool); - EXPECT_TRUE(tm.statistic.last_bool); + TsFileStatisticBase* sb = tsfile_statistic_base(&tm.statistic); + ASSERT_TRUE(sb->has_statistic); + EXPECT_DOUBLE_EQ(2.0, tm.statistic.u.bool_s.sum); + ASSERT_EQ(TS_DATATYPE_BOOLEAN, sb->type); + EXPECT_TRUE(tm.statistic.u.bool_s.first_bool); + EXPECT_TRUE(tm.statistic.u.bool_s.last_bool); tsfile_free_device_timeseries_metadata_map(&map); ASSERT_EQ(RET_OK, tsfile_reader_close(reader)); @@ -220,16 +220,17 @@ TEST_F(CWrapperMetadataTest, GetTimeseriesMetadataStringStatistic) { TimeseriesMetadata& tm = map.entries[0].timeseries[0]; ASSERT_STREQ(m_str, tm.measurement_name); ASSERT_EQ(TS_DATATYPE_STRING, tm.data_type); - ASSERT_TRUE(tm.statistic.has_statistic); - ASSERT_TRUE(tm.statistic.str_ext_valid); - ASSERT_NE(nullptr, tm.statistic.str_min); - ASSERT_NE(nullptr, tm.statistic.str_max); - ASSERT_NE(nullptr, tm.statistic.str_first); - ASSERT_NE(nullptr, tm.statistic.str_last); - EXPECT_STREQ("aa", tm.statistic.str_min); - EXPECT_STREQ("cc", tm.statistic.str_max); - EXPECT_STREQ("aa", tm.statistic.str_first); - EXPECT_STREQ("bb", tm.statistic.str_last); + TsFileStatisticBase* sb = tsfile_statistic_base(&tm.statistic); + ASSERT_TRUE(sb->has_statistic); + ASSERT_EQ(TS_DATATYPE_STRING, sb->type); + ASSERT_NE(nullptr, tm.statistic.u.string_s.str_min); + ASSERT_NE(nullptr, tm.statistic.u.string_s.str_max); + ASSERT_NE(nullptr, tm.statistic.u.string_s.str_first); + ASSERT_NE(nullptr, tm.statistic.u.string_s.str_last); + EXPECT_STREQ("aa", tm.statistic.u.string_s.str_min); + EXPECT_STREQ("cc", tm.statistic.u.string_s.str_max); + EXPECT_STREQ("aa", tm.statistic.u.string_s.str_first); + EXPECT_STREQ("bb", tm.statistic.u.string_s.str_last); tsfile_free_device_timeseries_metadata_map(&map); ASSERT_EQ(RET_OK, tsfile_reader_close(reader)); @@ -250,7 +251,7 @@ TEST_F(CWrapperMetadataTest, GetTimeseriesMetadataNullDevicePath) { TsFileReader reader = tsfile_reader_new(filename, &code); ASSERT_EQ(RET_OK, code); - TsDeviceDetails bad{}; + DeviceID bad{}; bad.path = nullptr; bad.table_name = nullptr; bad.segment_count = 0; diff --git a/python/tests/test_reader_metadata.py b/python/tests/test_reader_metadata.py index 185668986..558fcbb13 100644 --- a/python/tests/test_reader_metadata.py +++ b/python/tests/test_reader_metadata.py @@ -22,7 +22,12 @@ from tsfile import Field, RowRecord, TimeseriesSchema, TsFileReader, TsFileWriter from tsfile import TSDataType -from tsfile.schema import DeviceDetails +from tsfile.schema import ( + BoolTimeseriesStatistic, + DeviceID, + IntTimeseriesStatistic, + StringTimeseriesStatistic, +) def test_get_all_devices_segments(): @@ -100,13 +105,12 @@ def test_get_all_devices_and_timeseries_metadata_statistic(): assert m.measurement_name == "m_int" assert m.data_type == TSDataType.INT32 st = m.statistic + assert isinstance(st, IntTimeseriesStatistic) assert st.has_statistic assert st.row_count == 3 assert st.start_time == 1 assert st.end_time == 3 - assert st.sum_valid assert st.sum == pytest.approx(60.0) - assert st.int_range_valid assert st.min_int64 == 10 assert st.max_int64 == 30 assert st.first_int64 == 10 @@ -114,7 +118,7 @@ def test_get_all_devices_and_timeseries_metadata_statistic(): assert reader.get_timeseries_metadata([]) == {} - sub = reader.get_timeseries_metadata([DeviceDetails(device, "", ())]) + sub = reader.get_timeseries_metadata([DeviceID(device, None, ())]) assert device in sub assert len(sub[device].timeseries) == 1 @@ -153,10 +157,9 @@ def test_get_timeseries_metadata_boolean_statistic(): try: meta_all = reader.get_timeseries_metadata(None) st = meta_all[device].timeseries[0].statistic + assert isinstance(st, BoolTimeseriesStatistic) assert st.has_statistic - assert st.sum_valid assert st.sum == pytest.approx(2.0) - assert st.bool_ext_valid assert st.first_bool is True assert st.last_bool is True finally: @@ -195,8 +198,8 @@ def test_get_timeseries_metadata_string_statistic(): assert m.measurement_name == "m_str" assert m.data_type == TSDataType.STRING st = m.statistic + assert isinstance(st, StringTimeseriesStatistic) assert st.has_statistic - assert st.str_ext_valid assert st.str_min == "aa" assert st.str_max == "cc" assert st.str_first == "aa" diff --git a/python/tsfile/schema.py b/python/tsfile/schema.py index 3de26767b..ce85f6839 100644 --- a/python/tsfile/schema.py +++ b/python/tsfile/schema.py @@ -16,7 +16,7 @@ # under the License. # from dataclasses import dataclass -from typing import List, Optional, Tuple +from typing import List, Optional, Tuple, Union from .exceptions import TypeMismatchError from .constants import TSDataType, ColumnCategory, TSEncoding, Compressor @@ -24,32 +24,71 @@ @dataclass(frozen=True) class TimeseriesStatistic: - """Subset of file chunk statistic exposed through the C API.""" + """Common statistic fields from the C API (no type-specific payload).""" has_statistic: bool row_count: int start_time: int end_time: int - sum_valid: bool + + +@dataclass(frozen=True) +class IntTimeseriesStatistic(TimeseriesStatistic): + """INT32, DATE, INT64, TIMESTAMP chunk statistics.""" + + sum: float + min_int64: int + max_int64: int + first_int64: int + last_int64: int + + +@dataclass(frozen=True) +class FloatTimeseriesStatistic(TimeseriesStatistic): + """FLOAT, DOUBLE chunk statistics.""" + + sum: float + min_float64: float + max_float64: float + first_float64: float + last_float64: float + + +@dataclass(frozen=True) +class BoolTimeseriesStatistic(TimeseriesStatistic): + """BOOLEAN chunk statistics.""" + sum: float - int_range_valid: bool = False - min_int64: int = 0 - max_int64: int = 0 - first_int64: int = 0 - last_int64: int = 0 - float_range_valid: bool = False - min_float64: float = 0.0 - max_float64: float = 0.0 - first_float64: float = 0.0 - last_float64: float = 0.0 - bool_ext_valid: bool = False - first_bool: bool = False - last_bool: bool = False - str_ext_valid: bool = False - str_min: Optional[str] = None - str_max: Optional[str] = None - str_first: Optional[str] = None - str_last: Optional[str] = None + first_bool: bool + last_bool: bool + + +@dataclass(frozen=True) +class StringTimeseriesStatistic(TimeseriesStatistic): + """STRING: lexicographic min/max and time-ordered first/last.""" + + str_min: Optional[str] + str_max: Optional[str] + str_first: Optional[str] + str_last: Optional[str] + + +@dataclass(frozen=True) +class TextTimeseriesStatistic(TimeseriesStatistic): + """TEXT: first/last only (no min/max).""" + + str_first: Optional[str] + str_last: Optional[str] + + +TimeseriesStatisticType = Union[ + TimeseriesStatistic, + IntTimeseriesStatistic, + FloatTimeseriesStatistic, + BoolTimeseriesStatistic, + StringTimeseriesStatistic, + TextTimeseriesStatistic, +] @dataclass(frozen=True) @@ -59,24 +98,24 @@ class TimeseriesMetadata: measurement_name: str data_type: TSDataType chunk_meta_count: int - statistic: TimeseriesStatistic + statistic: TimeseriesStatisticType @dataclass(frozen=True) -class DeviceDetails: - """Structured device identity from the native reader (path, table name, segments).""" +class DeviceID: + """Device identity from the native reader (path, table name, segments). NULL C fields become None.""" - path: str - table_name: str - segments: Tuple[str, ...] + path: Optional[str] + table_name: Optional[str] + segments: Tuple[Optional[str], ...] @dataclass(frozen=True) class DeviceTimeseriesMetadataGroup: """One device's timeseries list plus table name and path segments (dict key is device path).""" - table_name: str - segments: Tuple[str, ...] + table_name: Optional[str] + segments: Tuple[Optional[str], ...] timeseries: List[TimeseriesMetadata] diff --git a/python/tsfile/tsfile_cpp.pxd b/python/tsfile/tsfile_cpp.pxd index 4f22f9066..74f3a7d9b 100644 --- a/python/tsfile/tsfile_cpp.pxd +++ b/python/tsfile/tsfile_cpp.pxd @@ -103,46 +103,71 @@ cdef extern from "cwrapper/tsfile_cwrapper.h": TimeseriesSchema * timeseries_schema int timeseries_num - ctypedef struct TimeseriesStatistic: + ctypedef struct TsFileStatisticBase: bint has_statistic + TSDataType type int32_t row_count int64_t start_time int64_t end_time - bint sum_valid + + ctypedef struct TsFileBoolStatistic: + TsFileStatisticBase base + double sum + bint first_bool + bint last_bool + + ctypedef struct TsFileIntStatistic: + TsFileStatisticBase base double sum - bint int_range_valid int64_t min_int64 int64_t max_int64 int64_t first_int64 int64_t last_int64 - bint float_range_valid + + ctypedef struct TsFileFloatStatistic: + TsFileStatisticBase base + double sum double min_float64 double max_float64 double first_float64 double last_float64 - bint bool_ext_valid - bint first_bool - bint last_bool - bint str_ext_valid + + ctypedef struct TsFileStringStatistic: + TsFileStatisticBase base char* str_min char* str_max char* str_first char* str_last + ctypedef struct TsFileTextStatistic: + TsFileStatisticBase base + char* str_first + char* str_last + + ctypedef union TimeseriesStatisticUnion: + TsFileBoolStatistic bool_s + TsFileIntStatistic int_s + TsFileFloatStatistic float_s + TsFileStringStatistic string_s + TsFileTextStatistic text_s + + ctypedef struct TimeseriesStatistic: + TimeseriesStatisticUnion u + ctypedef struct TimeseriesMetadata: char * measurement_name TSDataType data_type int32_t chunk_meta_count TimeseriesStatistic statistic - ctypedef struct TsDeviceDetails: + ctypedef struct DeviceID: char * path char * table_name uint32_t segment_count char ** segments ctypedef struct DeviceTimeseriesMetadataEntry: - TsDeviceDetails device + DeviceID device TimeseriesMetadata * timeseries uint32_t timeseries_count @@ -265,18 +290,18 @@ cdef extern from "cwrapper/tsfile_cwrapper.h": DeviceSchema * tsfile_reader_get_all_timeseries_schemas(TsFileReader reader, uint32_t * size); - void tsfile_device_details_free_contents(TsDeviceDetails * d) + void tsfile_device_id_free_contents(DeviceID * d) ErrorCode tsfile_reader_get_all_devices(TsFileReader reader, - TsDeviceDetails ** out_devices, + DeviceID ** out_devices, uint32_t * out_length); - void tsfile_free_device_details_array(TsDeviceDetails * details, - uint32_t length); + void tsfile_free_device_id_array(DeviceID * devices, + uint32_t length); ErrorCode tsfile_reader_get_timeseries_metadata_all( TsFileReader reader, DeviceTimeseriesMetadataMap * out_map); ErrorCode tsfile_reader_get_timeseries_metadata_for_devices( - TsFileReader reader, const TsDeviceDetails * devices, uint32_t length, + TsFileReader reader, const DeviceID * devices, uint32_t length, DeviceTimeseriesMetadataMap * out_map); void tsfile_free_device_timeseries_metadata_map( DeviceTimeseriesMetadataMap * map); diff --git a/python/tsfile/tsfile_py_cpp.pyx b/python/tsfile/tsfile_py_cpp.pyx index 9e16ff754..c564304fc 100644 --- a/python/tsfile/tsfile_py_cpp.pyx +++ b/python/tsfile/tsfile_py_cpp.pyx @@ -37,8 +37,13 @@ from tsfile.schema import TSDataType as TSDataTypePy, TSEncoding as TSEncodingPy from tsfile.schema import Compressor as CompressorPy, ColumnCategory as CategoryPy from tsfile.schema import TableSchema as TableSchemaPy, ColumnSchema as ColumnSchemaPy from tsfile.schema import DeviceSchema as DeviceSchemaPy, TimeseriesSchema as TimeseriesSchemaPy -from tsfile.schema import DeviceDetails as DeviceDetailsPy +from tsfile.schema import BoolTimeseriesStatistic as BoolTimeseriesStatisticPy +from tsfile.schema import DeviceID as DeviceIDPy from tsfile.schema import DeviceTimeseriesMetadataGroup as DeviceTimeseriesMetadataGroupPy +from tsfile.schema import FloatTimeseriesStatistic as FloatTimeseriesStatisticPy +from tsfile.schema import IntTimeseriesStatistic as IntTimeseriesStatisticPy +from tsfile.schema import StringTimeseriesStatistic as StringTimeseriesStatisticPy +from tsfile.schema import TextTimeseriesStatistic as TextTimeseriesStatisticPy from tsfile.schema import TimeseriesStatistic as TimeseriesStatisticPy from tsfile.schema import TimeseriesMetadata as TimeseriesMetadataPy @@ -933,38 +938,69 @@ cdef object _c_str_to_py_utf8_or_none(char* p): return None return p.decode('utf-8') +cdef object timeseries_statistic_c_to_py(TimeseriesStatistic* s): + cdef TsFileStatisticBase* b + cdef TSDataType dt + if s == NULL: + return TimeseriesStatisticPy(False, 0, 0, 0) + b = &s.u + if not b.has_statistic: + return TimeseriesStatisticPy( + False, int(b.row_count), int(b.start_time), int(b.end_time)) + dt = b.type + if dt == TS_DATATYPE_INVALID: + return TimeseriesStatisticPy( + True, int(b.row_count), int(b.start_time), int(b.end_time)) + if (dt == TS_DATATYPE_INT32 or dt == TS_DATATYPE_DATE or + dt == TS_DATATYPE_INT64 or dt == TS_DATATYPE_TIMESTAMP): + return IntTimeseriesStatisticPy( + True, int(b.row_count), int(b.start_time), int(b.end_time), + float(s.u.int_s.sum), + int(s.u.int_s.min_int64), + int(s.u.int_s.max_int64), + int(s.u.int_s.first_int64), + int(s.u.int_s.last_int64), + ) + if dt == TS_DATATYPE_FLOAT or dt == TS_DATATYPE_DOUBLE: + return FloatTimeseriesStatisticPy( + True, int(b.row_count), int(b.start_time), int(b.end_time), + float(s.u.float_s.sum), + float(s.u.float_s.min_float64), + float(s.u.float_s.max_float64), + float(s.u.float_s.first_float64), + float(s.u.float_s.last_float64), + ) + if dt == TS_DATATYPE_BOOLEAN: + return BoolTimeseriesStatisticPy( + True, int(b.row_count), int(b.start_time), int(b.end_time), + float(s.u.bool_s.sum), + bool(s.u.bool_s.first_bool), + bool(s.u.bool_s.last_bool), + ) + if dt == TS_DATATYPE_STRING: + return StringTimeseriesStatisticPy( + True, int(b.row_count), int(b.start_time), int(b.end_time), + _c_str_to_py_utf8_or_none(s.u.string_s.str_min), + _c_str_to_py_utf8_or_none(s.u.string_s.str_max), + _c_str_to_py_utf8_or_none(s.u.string_s.str_first), + _c_str_to_py_utf8_or_none(s.u.string_s.str_last), + ) + if dt == TS_DATATYPE_TEXT: + return TextTimeseriesStatisticPy( + True, int(b.row_count), int(b.start_time), int(b.end_time), + _c_str_to_py_utf8_or_none(s.u.text_s.str_first), + _c_str_to_py_utf8_or_none(s.u.text_s.str_last), + ) + return TimeseriesStatisticPy( + True, int(b.row_count), int(b.start_time), int(b.end_time)) + cdef object timeseries_metadata_c_to_py(TimeseriesMetadata* m): cdef str name_py if m == NULL or m.measurement_name == NULL: name_py = "" else: name_py = m.measurement_name.decode('utf-8') - cdef object stat = TimeseriesStatisticPy( - bool(m.statistic.has_statistic), - int(m.statistic.row_count), - int(m.statistic.start_time), - int(m.statistic.end_time), - bool(m.statistic.sum_valid), - float(m.statistic.sum), - bool(m.statistic.int_range_valid), - int(m.statistic.min_int64), - int(m.statistic.max_int64), - int(m.statistic.first_int64), - int(m.statistic.last_int64), - bool(m.statistic.float_range_valid), - float(m.statistic.min_float64), - float(m.statistic.max_float64), - float(m.statistic.first_float64), - float(m.statistic.last_float64), - bool(m.statistic.bool_ext_valid), - bool(m.statistic.first_bool), - bool(m.statistic.last_bool), - bool(m.statistic.str_ext_valid), - _c_str_to_py_utf8_or_none(m.statistic.str_min), - _c_str_to_py_utf8_or_none(m.statistic.str_max), - _c_str_to_py_utf8_or_none(m.statistic.str_first), - _c_str_to_py_utf8_or_none(m.statistic.str_last), - ) + cdef object stat = timeseries_statistic_c_to_py(&m.statistic) return TimeseriesMetadataPy( name_py, TSDataTypePy(m.data_type), @@ -976,8 +1012,8 @@ cdef tuple c_device_segments_to_tuple(char** segs, uint32_t n): cdef uint32_t i cdef list out = [] for i in range(n): - if segs[i] == NULL: - out.append("") + if segs == NULL or segs[i] == NULL: + out.append(None) else: out.append(segs[i].decode('utf-8')) return tuple(out) @@ -987,19 +1023,19 @@ cdef dict device_timeseries_metadata_map_to_py(DeviceTimeseriesMetadataMap* mmap cdef uint32_t di, ti cdef char* p cdef char* tnp - cdef str key - cdef str table_py + cdef object key + cdef object table_py cdef tuple segs_py cdef list series for di in range(mmap.device_count): p = mmap.entries[di].device.path if p == NULL: - key = "" + key = None else: key = p.decode('utf-8') tnp = mmap.entries[di].device.table_name if tnp == NULL: - table_py = "" + table_py = None else: table_py = tnp.decode('utf-8') segs_py = c_device_segments_to_tuple( @@ -1015,37 +1051,37 @@ cdef dict device_timeseries_metadata_map_to_py(DeviceTimeseriesMetadataMap* mmap return out cdef public api object reader_get_all_devices_c(TsFileReader reader): - cdef TsDeviceDetails* arr = NULL + cdef DeviceID* arr = NULL cdef uint32_t n = 0 cdef int err cdef list out = [] cdef uint32_t i - cdef str path_py - cdef str tname_py + cdef object path_py + cdef object tname_py cdef tuple segs_py err = tsfile_reader_get_all_devices(reader, &arr, &n) check_error(err) try: for i in range(n): if arr[i].path == NULL: - path_py = "" + path_py = None else: path_py = arr[i].path.decode('utf-8') if arr[i].table_name == NULL: - tname_py = "" + tname_py = None else: tname_py = arr[i].table_name.decode('utf-8') segs_py = c_device_segments_to_tuple(arr[i].segments, arr[i].segment_count) - out.append(DeviceDetailsPy(path_py, tname_py, segs_py)) + out.append(DeviceIDPy(path_py, tname_py, segs_py)) finally: - tsfile_free_device_details_array(arr, n) + tsfile_free_device_id_array(arr, n) return out cdef public api object reader_get_timeseries_metadata_c(TsFileReader reader, object device_ids): cdef DeviceTimeseriesMetadataMap mmap - cdef TsDeviceDetails* q = NULL + cdef DeviceID* q = NULL cdef uint32_t qlen = 0 cdef uint32_t i cdef int err @@ -1061,10 +1097,10 @@ cdef public api object reader_get_timeseries_metadata_c(TsFileReader reader, check_error(err) else: qlen = len(device_ids) - q = malloc(sizeof(TsDeviceDetails) * qlen) + q = malloc(sizeof(DeviceID) * qlen) if q == NULL: raise MemoryError() - memset(q, 0, sizeof(TsDeviceDetails) * qlen) + memset(q, 0, sizeof(DeviceID) * qlen) try: for i in range(qlen): dev = device_ids[i] diff --git a/python/tsfile/tsfile_reader.pyx b/python/tsfile/tsfile_reader.pyx index fb7dc4716..2259f7704 100644 --- a/python/tsfile/tsfile_reader.pyx +++ b/python/tsfile/tsfile_reader.pyx @@ -30,7 +30,7 @@ import pyarrow as pa from libc.stdint cimport INT64_MIN, INT64_MAX, uintptr_t from tsfile.schema import TSDataType as TSDataTypePy -from tsfile.schema import DeviceDetails, DeviceTimeseriesMetadataGroup +from tsfile.schema import DeviceID, DeviceTimeseriesMetadataGroup from .date_utils import parse_int_to_date from .tsfile_cpp cimport * from .tsfile_py_cpp cimport * @@ -428,10 +428,10 @@ cdef class TsFileReaderPy: """ return get_all_timeseries_schema(self.reader) - def get_all_devices(self) -> List[DeviceDetails]: + def get_all_devices(self) -> List[DeviceID]: """ Return all devices (path, table name, segments) as - :class:`tsfile.schema.DeviceDetails`. + :class:`tsfile.schema.DeviceID`. NULL C fields become None. """ return reader_get_all_devices_c(self.reader) From a392dd7287566e7ca1cef31322248968e40e8413 Mon Sep 17 00:00:00 2001 From: 761417898 <761417898@qq.com> Date: Tue, 7 Apr 2026 14:23:57 +0800 Subject: [PATCH 7/7] mvn spotless:apply --- cpp/src/cwrapper/tsfile_cwrapper.cc | 6 ++---- cpp/src/cwrapper/tsfile_cwrapper.h | 3 +-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc b/cpp/src/cwrapper/tsfile_cwrapper.cc index f13146007..e6ecef2a8 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.cc +++ b/cpp/src/cwrapper/tsfile_cwrapper.cc @@ -1179,8 +1179,7 @@ void tsfile_free_device_id_array(DeviceID* devices, uint32_t length) { free(devices); } -ERRNO tsfile_reader_get_all_devices(TsFileReader reader, - DeviceID** out_devices, +ERRNO tsfile_reader_get_all_devices(TsFileReader reader, DeviceID** out_devices, uint32_t* out_length) { if (reader == nullptr || out_devices == nullptr || out_length == nullptr) { return common::E_INVALID_ARG; @@ -1192,8 +1191,7 @@ ERRNO tsfile_reader_get_all_devices(TsFileReader reader, if (ids.empty()) { return common::E_OK; } - auto* arr = static_cast( - malloc(sizeof(DeviceID) * ids.size())); + auto* arr = static_cast(malloc(sizeof(DeviceID) * ids.size())); if (arr == nullptr) { return common::E_OOM; } diff --git a/cpp/src/cwrapper/tsfile_cwrapper.h b/cpp/src/cwrapper/tsfile_cwrapper.h index 29bb410a0..6c0e6d2c8 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.h +++ b/cpp/src/cwrapper/tsfile_cwrapper.h @@ -450,8 +450,7 @@ ERRNO tsfile_reader_close(TsFileReader reader); * @param out_devices [out] Allocated array; caller frees with * tsfile_free_device_id_array. */ -ERRNO tsfile_reader_get_all_devices(TsFileReader reader, - DeviceID** out_devices, +ERRNO tsfile_reader_get_all_devices(TsFileReader reader, DeviceID** out_devices, uint32_t* out_length); void tsfile_free_device_id_array(DeviceID* devices, uint32_t length);