Skip to content

Commit 9e1edf5

Browse files
committed
fix sort data.
1 parent 46de126 commit 9e1edf5

3 files changed

Lines changed: 52 additions & 4 deletions

File tree

python/tests/test_to_tsfile.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ def test_dataframe_to_tsfile_default_table_name():
205205

206206
dataframe_to_tsfile(df, tsfile_path)
207207

208-
df_read = to_dataframe(tsfile_path, table_name="table")
208+
df_read = to_dataframe(tsfile_path, table_name="test_dataframe_to_tsfile_default_name")
209209
assert df_read.shape == (10, 2)
210210
finally:
211211
if os.path.exists(tsfile_path):
@@ -343,3 +343,31 @@ def test_dataframe_to_tsfile_string_vs_blob():
343343
finally:
344344
if os.path.exists(tsfile_path):
345345
os.remove(tsfile_path)
346+
347+
348+
def test_dataframe_to_tsfile_tag_time_unsorted():
349+
tsfile_path = "test_dataframe_to_tsfile_tag_time_unsorted.tsfile"
350+
try:
351+
if os.path.exists(tsfile_path):
352+
os.remove(tsfile_path)
353+
354+
df = pd.DataFrame({
355+
'time': [30, 10, 20, 50, 40, 15, 25, 35, 5, 45],
356+
'device': ['device1', 'device1', 'device1', 'device2', 'device2', 'device1', 'device1', 'device2',
357+
'device1', 'device2'],
358+
'value': [i * 1.5 for i in range(10)]
359+
})
360+
361+
dataframe_to_tsfile(df, tsfile_path, table_name="test_table", tag_column=["device"])
362+
363+
df_read = to_dataframe(tsfile_path, table_name="test_table")
364+
df_expected = df.sort_values(by=['device', 'time']).reset_index(drop=True)
365+
df_expected = convert_to_nullable_types(df_expected)
366+
367+
assert df_read.shape == (10, 3)
368+
assert df_read["device"].equals(df_expected["device"])
369+
assert df_read["time"].equals(df_expected["time"])
370+
assert df_read["value"].equals(df_expected["value"])
371+
finally:
372+
if os.path.exists(tsfile_path):
373+
os.remove(tsfile_path)

python/tsfile/tsfile_table_writer.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import pandas as pd
1919

2020
from tsfile import TableSchema, Tablet, TableNotExistError
21-
from tsfile import TsFileWriter
21+
from tsfile import TsFileWriter, ColumnCategory
2222
from tsfile.constants import TSDataType
2323
from tsfile.exceptions import ColumnNotExistError, TypeMismatchError
2424

@@ -119,6 +119,24 @@ def write_dataframe(self, dataframe: pd.DataFrame):
119119
context=f"Type mismatches: {'; '.join(type_mismatches)}"
120120
)
121121

122+
tag_columns = []
123+
for col in self.tableSchema.get_columns():
124+
if col.get_category() == ColumnCategory.TAG:
125+
tag_col_name = col.get_column_name()
126+
if tag_col_name in df_column_name_map:
127+
tag_columns.append(df_column_name_map[tag_col_name])
128+
129+
time_column = None
130+
for col in dataframe.columns:
131+
if col.lower() == 'time':
132+
time_column = col
133+
break
134+
135+
if time_column:
136+
sort_by = tag_columns.copy()
137+
sort_by.append(time_column)
138+
dataframe = dataframe.sort_values(by=sort_by)
139+
122140
self.writer.write_dataframe(self.tableSchema.get_table_name(), dataframe)
123141

124142
def close(self):

python/tsfile/utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717
#
18+
from pathlib import Path
1819
from typing import Iterator, Union
1920
from typing import Optional
2021

@@ -188,7 +189,7 @@ def dataframe_to_tsfile(dataframe: pd.DataFrame,
188189
Path to the TsFile to write. Will be created if it doesn't exist.
189190
190191
table_name : Optional[str], default None
191-
Name of the table. If None, defaults to "table".
192+
Name of the table. If None, defaults to tsfile file name.
192193
193194
time_column : Optional[str], default None
194195
Name of the time column. If None, will look for a column named 'time' (case-insensitive),
@@ -211,7 +212,8 @@ def dataframe_to_tsfile(dataframe: pd.DataFrame,
211212
raise ValueError("DataFrame cannot be None or empty")
212213

213214
if table_name is None:
214-
table_name = "table"
215+
filename = Path(file_path).stem
216+
table_name = filename
215217

216218
time_col_name = None
217219
if time_column is not None:

0 commit comments

Comments
 (0)