def read_data_frame()

in python/tsfile/tsfile_reader.pyx [0:0]


    def read_data_frame(self, max_row_num : int = 1024):
        """
        :param max_row_num: default row num: 1024
        :return: a dataframe contains data from query result.
        """
        self.check_result_set_invalid()
        column_names = self.metadata.get_column_list()
        column_num = self.metadata.get_column_num()

        date_columns = [
            column_names[i]
            for i in range(column_num)
            if self.metadata.get_data_type(i + 1) == TSDataTypePy.DATE
        ]

        data_type = [self.metadata.get_data_type(i + 1).to_pandas_dtype() for i in range(column_num)]

        data_container = {
            column_name: [] for column_name in column_names
        }

        cur_line = 0

        # User may call result_set.next() before or not, so we just get current data.
        # if there is no data in result set, we just get a None list.
        row_data = [
            self.get_value_by_index(i + 1)
            for i in range(column_num)
        ]

        if not all(value is None for value in row_data):
            for column_name, value in zip(column_names, row_data):
                data_container[column_name].append(value)
            cur_line += 1

        while cur_line < max_row_num:
            if self.next():
                row_data = (
                    self.get_value_by_index(i + 1)
                    for i in range(column_num)
                )
                for column_name, value in zip(column_names, row_data):
                    data_container[column_name].append(value)
                cur_line += 1
            else:
                break

        df = pd.DataFrame(data_container)
        data_type_dict = {col: dtype for col, dtype in zip(column_names, data_type)}
        df = df.astype(data_type_dict)
        for col in date_columns:
            try:
                df[col] = pd.to_datetime(
                    df[col].astype(str),
                    format='%Y%m%d',
                    errors='coerce'
                ).dt.normalize()
            except KeyError:
                raise ValueError(f"DATE column '{col}' not found in DataFrame")
        return df