in python/tsfile/tsfile_reader.pyx [0:0]
def read_data_frame(self, max_row_num : int = 1024):
"""
:param max_row_num: default row num: 1024
:return: a dataframe contains data from query result.
"""
self.check_result_set_invalid()
column_names = self.metadata.get_column_list()
column_num = self.metadata.get_column_num()
date_columns = [
column_names[i]
for i in range(column_num)
if self.metadata.get_data_type(i + 1) == TSDataTypePy.DATE
]
data_type = [self.metadata.get_data_type(i + 1).to_pandas_dtype() for i in range(column_num)]
data_container = {
column_name: [] for column_name in column_names
}
cur_line = 0
# User may call result_set.next() before or not, so we just get current data.
# if there is no data in result set, we just get a None list.
row_data = [
self.get_value_by_index(i + 1)
for i in range(column_num)
]
if not all(value is None for value in row_data):
for column_name, value in zip(column_names, row_data):
data_container[column_name].append(value)
cur_line += 1
while cur_line < max_row_num:
if self.next():
row_data = (
self.get_value_by_index(i + 1)
for i in range(column_num)
)
for column_name, value in zip(column_names, row_data):
data_container[column_name].append(value)
cur_line += 1
else:
break
df = pd.DataFrame(data_container)
data_type_dict = {col: dtype for col, dtype in zip(column_names, data_type)}
df = df.astype(data_type_dict)
for col in date_columns:
try:
df[col] = pd.to_datetime(
df[col].astype(str),
format='%Y%m%d',
errors='coerce'
).dt.normalize()
except KeyError:
raise ValueError(f"DATE column '{col}' not found in DataFrame")
return df