core/maxframe/dataframe/merge/append.py (97 lines of code) (raw):
# Copyright 1999-2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pandas as pd
from ... import opcodes
from ...core import OutputType
from ...serialization.serializables import BoolField
from ..datasource.dataframe import from_pandas
from ..operators import (
DATAFRAME_TYPE,
SERIES_TYPE,
DataFrameOperator,
DataFrameOperatorMixin,
)
from ..utils import parse_index
class DataFrameAppend(DataFrameOperator, DataFrameOperatorMixin):
_op_type_ = opcodes.APPEND
ignore_index = BoolField("ignore_index", default=False)
verify_integrity = BoolField("verify_integrity", default=False)
sort = BoolField("sort", default=False)
def __init__(self, output_types=None, **kw):
super().__init__(_output_types=output_types, **kw)
def _call_dataframe(self, df, other):
if isinstance(other, DATAFRAME_TYPE):
shape = (df.shape[0] + other.shape[0], df.shape[1])
inputs = [df, other]
if self.ignore_index:
index_value = parse_index(pd.RangeIndex(shape[0]))
else:
index_value = parse_index(
df.index_value.to_pandas().append(other.index_value.to_pandas())
)
elif isinstance(other, list):
row_length = df.shape[0]
index = df.index_value.to_pandas()
for item in other:
if not isinstance(item, DATAFRAME_TYPE): # pragma: no cover
raise ValueError(f"Invalid type {type(item)} to append")
row_length += item.shape[0]
index = index.append(item.index_value.to_pandas())
shape = (row_length, df.shape[1])
if self.ignore_index: # pragma: no cover
index_value = parse_index(pd.RangeIndex(shape[0]))
else:
index_value = parse_index(index)
inputs = [df] + other
else: # pragma: no cover
raise ValueError(f"Invalid type {type(other)} to append")
return self.new_dataframe(
inputs,
shape=shape,
dtypes=df.dtypes,
index_value=index_value,
columns_value=df.columns_value,
)
def _call_series(self, df, other):
if isinstance(other, SERIES_TYPE):
shape = (df.shape[0] + other.shape[0],)
inputs = [df, other]
if self.ignore_index:
index_value = parse_index(pd.RangeIndex(shape[0]))
else:
index_value = parse_index(
df.index_value.to_pandas().append(other.index_value.to_pandas())
)
elif isinstance(other, list):
row_length = df.shape[0]
index = df.index_value.to_pandas()
for item in other:
if not isinstance(item, SERIES_TYPE): # pragma: no cover
raise ValueError(f"Invalid type {type(item)} to append")
row_length += item.shape[0]
index = index.append(item.index_value.to_pandas())
shape = (row_length,)
if self.ignore_index: # pragma: no cover
index_value = parse_index(pd.RangeIndex(shape[0]))
else:
index_value = parse_index(index)
inputs = [df] + other
else: # pragma: no cover
raise ValueError(f"Invalid type {type(other)} to append")
return self.new_series(
inputs, shape=shape, dtype=df.dtype, index_value=index_value, name=df.name
)
def __call__(self, df, other):
if isinstance(df, DATAFRAME_TYPE):
self.output_types = [OutputType.dataframe]
return self._call_dataframe(df, other)
else:
self.output_types = [OutputType.series]
return self._call_series(df, other)
def append(df, other, ignore_index=False, verify_integrity=False, sort=False):
if verify_integrity or sort: # pragma: no cover
raise NotImplementedError("verify_integrity and sort are not supported now")
if isinstance(other, dict):
other = from_pandas(pd.DataFrame(dict((k, [v]) for k, v in other.items())))
op = DataFrameAppend(
ignore_index=ignore_index, verify_integrity=verify_integrity, sort=sort
)
return op(df, other)