# Copyright 2022 Alibaba Group Holding Limited. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

import os
import pickle
from abc import ABC, abstractmethod
from typing import Dict, List, Optional, Tuple, Union

import torch

from ..typing import (
  NodeType, EdgeType, as_str, TensorDataType,
  GraphPartitionData, HeteroGraphPartitionData,
  FeaturePartitionData, HeteroFeaturePartitionData,
)
from ..utils import convert_to_tensor, ensure_dir, id2idx, append_tensor_to_file, load_and_concatenate_tensors

class PartitionBook(object):
  @abstractmethod
  def __getitem__(self, indices):
    pass

  @property
  def offset(self):
    return 0

HeteroNodePartitionDict = Dict[NodeType, PartitionBook]
HeteroEdgePartitionDict = Dict[EdgeType, PartitionBook]


def save_meta(
  output_dir: str,
  num_parts: int,
  data_cls: str = 'homo',
  node_types: Optional[List[NodeType]] = None,
  edge_types: Optional[List[EdgeType]] = None,
):
  r""" Save partitioning meta info into the output directory.
  """
  meta = {
    'num_parts': num_parts,
    'data_cls': data_cls,
    'node_types': node_types,
    'edge_types': edge_types
  }
  with open(os.path.join(output_dir, 'META'), 'wb') as outfile:
    pickle.dump(meta, outfile, pickle.HIGHEST_PROTOCOL)


def save_node_pb(
  output_dir: str,
  node_pb: PartitionBook,
  ntype: Optional[NodeType] = None
):
  r""" Save a partition book of graph nodes into the output directory.
  """
  if ntype is not None:
    subdir = os.path.join(output_dir, 'node_pb')
    ensure_dir(subdir)
    fpath = os.path.join(subdir, f'{as_str(ntype)}.pt')
  else:
    fpath = os.path.join(output_dir, 'node_pb.pt')
  torch.save(node_pb, fpath)


def save_edge_pb(
  output_dir: str,
  edge_pb: PartitionBook,
  etype: Optional[EdgeType] = None
):
  r""" Save a partition book of graph edges into the output directory.
  """
  if etype is not None:
    subdir = os.path.join(output_dir, 'edge_pb')
    ensure_dir(subdir)
    fpath = os.path.join(subdir, f'{as_str(etype)}.pt')
  else:
    fpath = os.path.join(output_dir, 'edge_pb.pt')
  torch.save(edge_pb, fpath)

def save_graph_cache(
  output_dir: str,
  graph_partition_list: List[GraphPartitionData],
  etype: Optional[EdgeType] = None,
  with_edge_feat: bool = False
):
  r""" Save full graph topology into the output directory.
  """
  if len(graph_partition_list) == 0:
    return
  subdir = os.path.join(output_dir, 'graph')
  if etype is not None:
    subdir = os.path.join(subdir, as_str(etype))
  ensure_dir(subdir)
  rows = torch.cat([graph_partition.edge_index[0] for graph_partition in graph_partition_list])
  cols = torch.cat([graph_partition.edge_index[1] for graph_partition in graph_partition_list])
  weights = None
  if graph_partition_list[0].weights is not None:
    weights = torch.cat([graph_partition.weights for graph_partition in graph_partition_list])
  torch.save(rows, os.path.join(subdir, 'rows.pt'))
  torch.save(cols, os.path.join(subdir, 'cols.pt'))
  if with_edge_feat:
    edge_ids = torch.cat([graph_partition.eids for graph_partition in graph_partition_list])
    torch.save(edge_ids, os.path.join(subdir, 'eids.pt'))
  if weights is not None:
    torch.save(weights, os.path.join(subdir, 'weights.pt'))


def save_graph_partition(
  output_dir: str,
  partition_idx: int,
  graph_partition: GraphPartitionData,
  etype: Optional[EdgeType] = None
):
  r""" Save a graph topology partition into the output directory.
  """
  subdir = os.path.join(output_dir, f'part{partition_idx}', 'graph')
  if etype is not None:
    subdir = os.path.join(subdir, as_str(etype))
  ensure_dir(subdir)
  torch.save(graph_partition.edge_index[0], os.path.join(subdir, 'rows.pt'))
  torch.save(graph_partition.edge_index[1], os.path.join(subdir, 'cols.pt'))
  torch.save(graph_partition.eids, os.path.join(subdir, 'eids.pt'))
  if graph_partition.weights is not None:
    torch.save(graph_partition.weights, os.path.join(subdir, 'weights.pt'))

def save_feature_partition(
  output_dir: str,
  partition_idx: int,
  feature_partition: FeaturePartitionData,
  group: str = 'node_feat',
  graph_type: Optional[Union[NodeType, EdgeType]] = None
):
  r""" Save a feature partition into the output directory.
  """
  subdir = os.path.join(output_dir, f'part{partition_idx}', group)
  if graph_type is not None:
    subdir = os.path.join(subdir, as_str(graph_type))
  ensure_dir(subdir)
  append_tensor_to_file(os.path.join(subdir, 'feats.pkl'), feature_partition.feats)
  append_tensor_to_file(os.path.join(subdir,'ids.pkl'), feature_partition.ids)
  if feature_partition.cache_feats is not None:
    torch.save(feature_partition.cache_feats, os.path.join(subdir, 'cache_feats.pt'))
    torch.save(feature_partition.cache_ids, os.path.join(subdir, 'cache_ids.pt'))

def save_feature_partition_chunk(
  output_dir: str,
  partition_idx: int,
  feature_partition: FeaturePartitionData,
  group: str = 'node_feat',
  graph_type: Optional[Union[NodeType, EdgeType]] = None
):
  r""" Append a chunk of a feature partition to files in the output directory.
  """
  subdir = os.path.join(output_dir, f'part{partition_idx}', group)
  if graph_type is not None:
    subdir = os.path.join(subdir, as_str(graph_type))
  ensure_dir(subdir)
  append_tensor_to_file(os.path.join(subdir, 'feats.pkl'), feature_partition.feats)
  append_tensor_to_file(os.path.join(subdir,'ids.pkl'), feature_partition.ids)

def save_feature_partition_cache(
  output_dir: str,
  partition_idx: int,
  feature_partition: FeaturePartitionData,
  group: str = 'node_feat',
  graph_type: Optional[Union[NodeType, EdgeType]] = None
):
  r""" Save the feature cache of a partition into the output directory.
  """
  subdir = os.path.join(output_dir, f'part{partition_idx}', group)
  if graph_type is not None:
    subdir = os.path.join(subdir, as_str(graph_type))
  ensure_dir(subdir)
  if feature_partition.cache_feats is not None:
    torch.save(feature_partition.cache_feats, os.path.join(subdir, 'cache_feats.pt'))
    torch.save(feature_partition.cache_ids, os.path.join(subdir, 'cache_ids.pt'))


class PartitionerBase(ABC):
  r""" Base class for partitioning graphs and features.
  """
  def __init__(
    self,
    output_dir: str,
    num_parts: int,
    num_nodes: Union[int, Dict[NodeType, int]],
    edge_index: Union[TensorDataType, Dict[EdgeType, TensorDataType]],
    node_feat: Optional[Union[TensorDataType, Dict[NodeType, TensorDataType]]] = None,
    node_feat_dtype: torch.dtype = torch.float32,
    edge_feat: Optional[Union[TensorDataType, Dict[EdgeType, TensorDataType]]] = None,
    edge_feat_dtype: torch.dtype = torch.float32,
    edge_weights: Optional[Union[TensorDataType, Dict[EdgeType, TensorDataType]]] = None,
    edge_assign_strategy: str = 'by_src',
    chunk_size: int = 10000,
  ):
    self.output_dir = output_dir
    ensure_dir(self.output_dir)

    self.num_parts = num_parts
    assert self.num_parts > 1

    self.num_nodes = num_nodes
    self.edge_index = convert_to_tensor(edge_index, dtype=torch.int64)
    self.node_feat = convert_to_tensor(node_feat, dtype=node_feat_dtype)
    self.edge_feat = convert_to_tensor(edge_feat, dtype=edge_feat_dtype)
    self.edge_weights = convert_to_tensor(edge_weights, dtype=torch.float32)

    if isinstance(self.num_nodes, dict):
      assert isinstance(self.edge_index, dict)
      assert isinstance(self.node_feat, dict) or self.node_feat is None
      assert isinstance(self.edge_feat, dict) or self.edge_feat is None
      self.data_cls = 'hetero'
      self.node_types = list(self.num_nodes.keys())
      self.edge_types = list(self.edge_index.keys())
      self.num_edges = {}
      for etype, index in self.edge_index.items():
        self.num_edges[etype] = len(index[0])
    else:
      self.data_cls = 'homo'
      self.node_types = None
      self.edge_types = None
      self.num_edges = len(self.edge_index[0])

    self.edge_assign_strategy = edge_assign_strategy.lower()
    assert self.edge_assign_strategy in ['by_src', 'by_dst']
    self.chunk_size = chunk_size

  def get_edge_index(self, etype: Optional[EdgeType] = None):
    if 'hetero' == self.data_cls:
      assert etype is not None
      return self.edge_index[etype]
    return self.edge_index

  def get_node_feat(self, ntype: Optional[NodeType] = None):
    if self.node_feat is None:
      return None
    if 'hetero' == self.data_cls:
      assert ntype is not None
      return self.node_feat[ntype]
    return self.node_feat

  def get_edge_feat(self, etype: Optional[EdgeType] = None):
    if self.edge_feat is None:
      return None
    if 'hetero' == self.data_cls:
      assert etype is not None
      return self.edge_feat[etype]
    return self.edge_feat

  @abstractmethod
  def _partition_node(
    self,
    ntype: Optional[NodeType] = None
  ) -> Tuple[List[torch.Tensor], PartitionBook]:
    r""" Partition graph nodes of a specify node type, needs to be overwritten.

    Args:
      ntype (str): The type for input nodes, must be provided for heterogeneous
        graph. (default: ``None``)

    Returns:
      List[torch.Tensor]: The list of partitioned nodes ids.
      PartitionBook: The partition book of graph nodes.
    """

  @abstractmethod
  def _cache_node(
    self,
    ntype: Optional[NodeType] = None
  ) -> List[Optional[torch.Tensor]]:
    r""" Do feature caching and get cached results of a specify
    node type, needs to be overwritten.

    Returns:
      List[Optional[torch.Tensor]]: list of node ids need to be cached on
        each partition.
    """

  def _partition_graph(
    self,
    node_pb: Union[PartitionBook, Dict[NodeType, PartitionBook]],
    etype: Optional[EdgeType] = None
  ) -> Tuple[List[GraphPartitionData], PartitionBook]:
    r""" Partition graph topology of a specified edge type, needs to be
      overwritten.

    Args:
      node_pb (PartitionBook or Dict[NodeType, PartitionBook]): The partition
        books of graph nodes.
      etype (Tuple[str, str, str]): The type for input edges, must be provided
        for heterogeneous graph. (default: ``None``)

    Returns:
      List[GraphPartitionData]: A list of graph data for each partition.
      PartitionBook: The partition book of graph edges.
    """
    edge_index = self.get_edge_index(etype)
    rows, cols = edge_index[0], edge_index[1]
    edge_num = len(rows)
    eids = torch.arange(edge_num, dtype=torch.int64)
    weights = self.edge_weights[etype] if isinstance(self.edge_weights, dict) \
              else self.edge_weights

    if 'hetero' == self.data_cls:
      assert etype is not None
      assert isinstance(node_pb, dict)
      src_ntype, _, dst_ntype = etype

      if 'by_src' == self.edge_assign_strategy:
        target_node_pb = node_pb[src_ntype]
        target_indices = rows
      else:
        target_node_pb = node_pb[dst_ntype]
        target_indices = cols
    else:
      target_node_pb = node_pb
      target_indices = rows if 'by_src' == self.edge_assign_strategy else cols

    chunk_num = (edge_num + self.chunk_size - 1) // self.chunk_size
    chunk_start_pos = 0
    res = [[] for _ in range(self.num_parts)]
    for _ in range(chunk_num):
      chunk_end_pos = min(edge_num, chunk_start_pos + self.chunk_size)
      current_chunk_size = chunk_end_pos - chunk_start_pos
      chunk_idx = torch.arange(current_chunk_size, dtype=torch.long)
      chunk_rows = rows[chunk_start_pos:chunk_end_pos]
      chunk_cols = cols[chunk_start_pos:chunk_end_pos]
      chunk_eids = eids[chunk_start_pos:chunk_end_pos]
      if weights is not None:
        chunk_weights = weights[chunk_start_pos:chunk_end_pos]

      chunk_target_indices = target_indices[chunk_start_pos:chunk_end_pos]
      chunk_partition_idx = target_node_pb[chunk_target_indices]
      for pidx in range(self.num_parts):
        mask = (chunk_partition_idx == pidx)
        idx = torch.masked_select(chunk_idx, mask)
        res[pidx].append(GraphPartitionData(
          edge_index=(chunk_rows[idx], chunk_cols[idx]),
          eids=chunk_eids[idx],
          weights=chunk_weights[idx] if weights is not None else None
        ))
      chunk_start_pos += current_chunk_size

    partition_book = torch.zeros(edge_num, dtype=torch.long)
    partition_results = []
    for pidx in range(self.num_parts):
      p_rows = torch.cat([r.edge_index[0] for r in res[pidx]])
      p_cols = torch.cat([r.edge_index[1] for r in res[pidx]])
      p_eids = torch.cat([r.eids for r in res[pidx]])
      if weights is not None:
        p_weights = torch.cat([r.weights for r in res[pidx]])
      partition_book[p_eids] = pidx
      partition_results.append(GraphPartitionData(
        edge_index=(p_rows, p_cols),
        eids=p_eids,
        weights=p_weights if weights is not None else None
      ))

    return partition_results, partition_book

  def _partition_and_save_node_feat(
    self,
    node_ids_list: List[torch.Tensor],
    ntype: Optional[NodeType] = None,
  ):
    r""" Partition node features by the partitioned node results, and calculate
    the cached nodes if needed.
    """
    node_feat = self.get_node_feat(ntype)
    if node_feat is None:
      return
    cache_node_ids_list = self._cache_node(ntype)
    for pidx in range(self.num_parts):
      # save partitioned node feature cache
      cache_n_ids = cache_node_ids_list[pidx]
      p_node_cache_feat = FeaturePartitionData(
        feats=None, 
        ids=None,
        cache_feats=(node_feat[cache_n_ids] if cache_n_ids is not None else None),
        cache_ids=cache_n_ids
      )
      save_feature_partition_cache(self.output_dir, pidx, p_node_cache_feat,
                                   group='node_feat', graph_type=ntype)
      # save partitioned node feature chunk
      n_ids = node_ids_list[pidx]
      n_ids_chunks = torch.chunk(n_ids, chunks=((n_ids.shape[0] + self.chunk_size - 1) // self.chunk_size))
      for chunk in n_ids_chunks:
        p_node_feat_chunk = FeaturePartitionData(
          feats=node_feat[chunk],
          ids=chunk.clone(),
          cache_feats=None,
          cache_ids=None
        )
        save_feature_partition_chunk(self.output_dir, pidx, p_node_feat_chunk,
                                     group='node_feat', graph_type=ntype)

  def _partition_and_save_edge_feat(
    self,
    graph_list: List[GraphPartitionData],
    etype: Optional[EdgeType] = None
  ):
    r""" Partition edge features by the partitioned edge results.
    """
    edge_feat = self.get_edge_feat(etype)
    if edge_feat is None:
      return
    for pidx in range(self.num_parts):
      eids = graph_list[pidx].eids
      eids_chunks = torch.chunk(
        eids, chunks=((eids.shape[0] + self.chunk_size - 1) // self.chunk_size)
      )
      for chunk in eids_chunks:
        p_edge_feat_chunk = FeaturePartitionData(
          feats=edge_feat[chunk],
          ids=chunk.clone(),
          cache_feats=None,
          cache_ids=None
        )
        save_feature_partition_chunk(self.output_dir, pidx, p_edge_feat_chunk,
                                     group='edge_feat', graph_type=etype)

  def _process_node(self, ntype, with_feature):
    node_ids_list, node_pb = self._partition_node(ntype)
    save_node_pb(self.output_dir, node_pb, ntype)
    self.node_pb_dict[ntype] = node_pb 
    if with_feature:
      self._partition_and_save_node_feat(node_ids_list, ntype)
  
  def _process_edge(self, etype, with_feature):
    graph_list, edge_pb = self._partition_graph(self.node_pb_dict, etype)
    save_edge_pb(self.output_dir, edge_pb, etype)
    for pidx in range(self.num_parts):
      save_graph_partition(self.output_dir, pidx, graph_list[pidx], etype)
    if with_feature:
      self._partition_and_save_edge_feat(graph_list, etype)
  
  def partition(self, with_feature=True, graph_caching=False):
    r""" Partition graph and feature data into different parts. 
    
    Args:
      with_feature (bool): A flag indicating if the feature should be 
        partitioned with the graph (default: ``True``).
      graph_caching (bool): A flag indicating if the full graph topology
        will be saved (default: ``False``).

    The output directory of partitioned graph data will be like:

    * homogeneous

      root_dir/
      |-- META
      |-- node_pb.pt
      |-- edge_pb.pt
      |-- part0/
          |-- graph/
              |-- rows.pt
              |-- cols.pt
              |-- eids.pt
              |-- weights.pt (optional)
          |-- node_feat/
              |-- feats.pkl
              |-- ids.pkl
              |-- cache_feats.pt (optional)
              |-- cache_ids.pt (optional)
          |-- edge_feat/
              |-- feats.pkl
              |-- ids.pkl
              |-- cache_feats.pt (optional)
              |-- cache_ids.pt (optional)
      |-- part1/
          |-- graph/
              ...
          |-- node_feat/
              ...
          |-- edge_feat/
              ...

    * heterogeneous

      root_dir/
      |-- META
      |-- node_pb/
          |-- ntype1.pt
          |-- ntype2.pt
      |-- edge_pb/
          |-- etype1.pt
          |-- etype2.pt
      |-- part0/
          |-- graph/
              |-- etype1/
                  |-- rows.pt
                  |-- cols.pt
                  |-- eids.pt
                  |-- weights.pt
              |-- etype2/
                  ...
          |-- node_feat/
              |-- ntype1/
                  |-- feats.pkl
                  |-- ids.pkl
                  |-- cache_feats.pt (optional)
                  |-- cache_ids.pt (optional)
              |-- ntype2/
                  ...
          |-- edge_feat/
              |-- etype1/
                  |-- feats.pkl
                  |-- ids.pkl
                  |-- cache_feats.pt (optional)
                  |-- cache_ids.pt (optional)
              |-- etype2/
                  ...
      |-- part1/
          |-- graph/
              ...
          |-- node_feat/
              ...
          |-- edge_feat/
              ...

    """
    if 'hetero' == self.data_cls:
      node_pb_dict = {}
      for ntype in self.node_types:
        node_ids_list, node_pb = self._partition_node(ntype)
        save_node_pb(self.output_dir, node_pb, ntype)
        node_pb_dict[ntype] = node_pb
        if with_feature:
          self._partition_and_save_node_feat(node_ids_list, ntype)

      for etype in self.edge_types:
        graph_list, edge_pb = self._partition_graph(node_pb_dict, etype)
        edge_feat = self.get_edge_feat(etype)
        with_edge_feat = (edge_feat != None)
        if graph_caching:
          if with_edge_feat:
            save_edge_pb(self.output_dir, edge_pb, etype)
          save_graph_cache(self.output_dir, graph_list, etype, with_edge_feat)
        else:
          save_edge_pb(self.output_dir, edge_pb, etype)
          for pidx in range(self.num_parts):
            save_graph_partition(self.output_dir, pidx, graph_list[pidx], etype)
        if with_feature:
          self._partition_and_save_edge_feat(graph_list, etype)

    else:
      node_ids_list, node_pb = self._partition_node()
      save_node_pb(self.output_dir, node_pb)
      if with_feature:
        self._partition_and_save_node_feat(node_ids_list)

      graph_list, edge_pb = self._partition_graph(node_pb)
      edge_feat = self.get_edge_feat()
      with_edge_feat = (edge_feat != None)

      if graph_caching:
        if with_edge_feat:
          save_edge_pb(self.output_dir, edge_pb)
        save_graph_cache(self.output_dir, graph_list, with_edge_feat)
      else:
        save_edge_pb(self.output_dir, edge_pb)
        for pidx in range(self.num_parts):
          save_graph_partition(self.output_dir, pidx, graph_list[pidx])
      if with_feature:
        self._partition_and_save_edge_feat(graph_list)

    # save meta.
    save_meta(self.output_dir, self.num_parts, self.data_cls,
              self.node_types, self.edge_types)


def build_partition_feature(
  root_dir: str, 
  partition_idx: int,
  chunk_size: int = 10000,
  node_feat: Optional[Union[TensorDataType, Dict[NodeType, TensorDataType]]] = None,
  node_feat_dtype: torch.dtype = torch.float32,
  edge_feat: Optional[Union[TensorDataType, Dict[EdgeType, TensorDataType]]] = None,
  edge_feat_dtype: torch.dtype = torch.float32):
  
  r""" In the case that the graph topology is partitioned, but the feature
       partitioning is not executed. This method extracts and persist the 
       feature for a specific partition. 
  
  Args:
    root_dir (str): The root directory for saved partition files.
    partition_idx (int): The partition idx.
    chunk_size: The chunk size for partitioning.
    node_feat: The node feature data, should be a dict for hetero data.
    node_feat_dtype: The data type of node features.
    edge_feat: The edge feature data, should be a dict for hetero data.
    edge_feat_dtype: The data type of edge features.

  """
  with open(os.path.join(root_dir, 'META'), 'rb') as infile:
    meta = pickle.load(infile)
  num_partitions = meta['num_parts']
  assert partition_idx >= 0
  assert partition_idx < num_partitions
  partition_dir = os.path.join(root_dir, f'part{partition_idx}')
  assert os.path.exists(partition_dir)
  graph_dir = os.path.join(partition_dir, 'graph')
  device = torch.device('cpu')

  node_feat = convert_to_tensor(node_feat, dtype=node_feat_dtype)
  edge_feat = convert_to_tensor(edge_feat, dtype=edge_feat_dtype)
  
  # homogenous
  if meta['data_cls'] == 'homo':
    # step 1: build and persist the node feature partition
    node_pb = torch.load(os.path.join(root_dir, 'node_pb.pt'), 
      map_location=device)
    node_num = node_pb.size(0)
    ids = torch.arange(node_num, dtype=torch.int64)
    mask = (node_pb == partition_idx)
    n_ids = torch.masked_select(ids, mask)
    # save partitioned node feature chunk
    n_ids_chunks = torch.chunk(n_ids,
      chunks=((n_ids.shape[0] + chunk_size - 1) // chunk_size))
    for chunk in n_ids_chunks:
      p_node_feat_chunk = FeaturePartitionData(
        feats=node_feat[chunk],
        ids=chunk.clone(),
        cache_feats=None,
        cache_ids=None
      )
      save_feature_partition_chunk(root_dir, partition_idx, p_node_feat_chunk,
                                   group='node_feat', graph_type=None)
    
    # step 2: build and persist the edge feature partition
    if edge_feat is None:
      return
    graph = load_graph_partition_data(graph_dir, device)
    eids = graph.eids
    eids_chunks = torch.chunk(
      eids, chunks=((eids.shape[0] + chunk_size - 1) // chunk_size)
    )
    for chunk in eids_chunks:
      p_edge_feat_chunk = FeaturePartitionData(
        feats=edge_feat[chunk],
        ids=chunk.clone(),
        cache_feats=None,
        cache_ids=None
      )
      save_feature_partition_chunk(root_dir, partition_idx, p_edge_feat_chunk,
                                   group='edge_feat', graph_type=None)
  # heterogenous
  else:  
    # step 1: build and persist the node feature partition
    node_pb_dir = os.path.join(root_dir, 'node_pb')
    for ntype in node_feat.keys():
      node_pb = torch.load(
        os.path.join(node_pb_dir, f'{as_str(ntype)}.pt'), map_location=device)
      feat = node_feat[ntype]
      node_num = node_pb.size(0)
      ids = torch.arange(node_num, dtype=torch.int64)
      mask = (node_pb == partition_idx)
      n_ids = torch.masked_select(ids, mask)
      # save partitioned node feature chunk
      n_ids_chunks = torch.chunk(n_ids, 
        chunks=((n_ids.shape[0] + chunk_size - 1) // chunk_size))
      for chunk in n_ids_chunks:
        p_node_feat_chunk = FeaturePartitionData(
          feats=feat[chunk],
          ids=chunk.clone(),
          cache_feats=None,
          cache_ids=None
        )
        save_feature_partition_chunk(root_dir, partition_idx, p_node_feat_chunk,
                                    group='node_feat', graph_type=ntype)
    # step 2: build and persist the edge feature partition
    if edge_feat is None:
        return
    for etype in edge_feat.keys():
      feat = edge_feat[etype]
      graph = load_graph_partition_data(
        os.path.join(graph_dir, as_str(etype)), device)
      eids = graph.eids
      eids_chunks = torch.chunk(
        eids, chunks=((eids.shape[0] + chunk_size - 1) // chunk_size)
      )
      for chunk in eids_chunks:
        p_edge_feat_chunk = FeaturePartitionData(
          feats=feat[chunk],
          ids=chunk.clone(),
          cache_feats=None,
          cache_ids=None
        )
        save_feature_partition_chunk(root_dir, partition_idx, p_edge_feat_chunk,
                                    group='edge_feat', graph_type=etype)

def load_graph_partition_data(
  graph_data_dir: str,
  device: torch.device
) -> GraphPartitionData:
  r""" Load a graph partition data from the specified directory.
  """
  if not os.path.exists(graph_data_dir):
    return None
  rows = torch.load(os.path.join(graph_data_dir, 'rows.pt'),
                    map_location=device)
  cols = torch.load(os.path.join(graph_data_dir, 'cols.pt'),
                    map_location=device)
  eids = None
  eids_dir = os.path.join(graph_data_dir, 'eids.pt')
  if os.path.exists(eids_dir):
    eids = torch.load(eids_dir, map_location=device)

  if os.path.exists(os.path.join(graph_data_dir, 'weights.pt')):
    weights = torch.load(os.path.join(graph_data_dir, 'weights.pt'),
                          map_location=device)
  else:
    weights = None
  pdata = GraphPartitionData(edge_index=(rows, cols), eids=eids, weights=weights)
  return pdata


def load_feature_partition_data(
  feature_data_dir: str,
  device: torch.device
) -> FeaturePartitionData:
  r""" Load a feature partition data from the specified directory.
  """
  if not os.path.exists(feature_data_dir):
    return None

  feats = load_and_concatenate_tensors(os.path.join(feature_data_dir, 'feats.pkl'), device)
  ids = load_and_concatenate_tensors(os.path.join(feature_data_dir, 'ids.pkl'), device)
  cache_feats_path = os.path.join(feature_data_dir, 'cache_feats.pt')
  cache_ids_path = os.path.join(feature_data_dir, 'cache_ids.pt')
  cache_feats = None
  cache_ids = None
  if os.path.exists(cache_feats_path) and os.path.exists(cache_ids_path):
    cache_feats = torch.load(cache_feats_path, map_location=device)
    cache_ids = torch.load(cache_ids_path, map_location=device)
  pdata = FeaturePartitionData(
    feats=feats, ids=ids, cache_feats=cache_feats, cache_ids=cache_ids
  )
  return pdata


def load_partition(
  root_dir: str,
  partition_idx: int,
  graph_caching: bool = False,
  device: torch.device = torch.device('cpu')
) -> Union[Tuple[int, int,
                 GraphPartitionData,
                 Optional[FeaturePartitionData],
                 Optional[FeaturePartitionData],
                 PartitionBook,
                 PartitionBook],
           Tuple[int, int,
                 HeteroGraphPartitionData,
                 Optional[HeteroFeaturePartitionData],
                 Optional[HeteroFeaturePartitionData],
                 HeteroNodePartitionDict,
                 HeteroEdgePartitionDict]]:
  r""" Load a partition from saved directory.

  Args:
    root_dir (str): The root directory for saved files.
    partition_idx (int): The partition idx to load.
    device (torch.device): The device where loaded graph partition data locates.
    graph_caching: (bool): Whether to load entire graph topology

  Returns:
    int: Number of all partitions.
    int: The current partition idx.
    GraphPartitionData/HeteroGraphPartitionData: graph partition data.
    FeaturePartitionData/HeteroFeaturePartitionData: node feature partition
      data, optional.
    FeaturePartitionData/HeteroFeaturePartitionData: edge feature partition
      data, optional.
    PartitionBook/HeteroNodePartitionDict: node partition book.
    PartitionBook/HeteroEdgePartitionDict: edge partition book.
  """
  with open(os.path.join(root_dir, 'META'), 'rb') as infile:
    meta = pickle.load(infile)
  num_partitions = meta['num_parts']
  assert partition_idx >= 0
  assert partition_idx < num_partitions
  partition_dir = os.path.join(root_dir, f'part{partition_idx}')
  assert os.path.exists(partition_dir)

  if graph_caching:
    graph_dir = os.path.join(root_dir, 'graph')
  else:
    graph_dir = os.path.join(partition_dir, 'graph')
  node_feat_dir = os.path.join(partition_dir, 'node_feat')
  edge_feat_dir = os.path.join(partition_dir, 'edge_feat')

  # homogenous

  if meta['data_cls'] == 'homo':
    graph = load_graph_partition_data(graph_dir, device)
    node_feat = load_feature_partition_data(node_feat_dir, device)
    edge_feat = load_feature_partition_data(edge_feat_dir, device)
    node_pb = torch.load(os.path.join(root_dir, 'node_pb.pt'),
                         map_location=device)
    edge_pb = torch.load(os.path.join(root_dir, 'edge_pb.pt'),
                         map_location=device)
    return (
      num_partitions, partition_idx,
      graph, node_feat, edge_feat, node_pb, edge_pb
    )

  # heterogenous

  graph_dict = {}
  for etype in meta['edge_types']:
    graph_dict[etype] = load_graph_partition_data(
      os.path.join(graph_dir, as_str(etype)), device)

  node_feat_dict = {}
  for ntype in meta['node_types']:
    node_feat = load_feature_partition_data(
      os.path.join(node_feat_dir, as_str(ntype)), device)
    if node_feat is not None:
      node_feat_dict[ntype] = node_feat
  if len(node_feat_dict) == 0:
    node_feat_dict = None

  edge_feat_dict = {}
  for etype in meta['edge_types']:
    edge_feat = load_feature_partition_data(
      os.path.join(edge_feat_dir, as_str(etype)), device)
    if edge_feat is not None:
      edge_feat_dict[etype] = edge_feat
  if len(edge_feat_dict) == 0:
    edge_feat_dict = None

  node_pb_dict = {}
  node_pb_dir = os.path.join(root_dir, 'node_pb')
  for ntype in meta['node_types']:
    node_pb_dict[ntype] = torch.load(
      os.path.join(node_pb_dir, f'{as_str(ntype)}.pt'), map_location=device)

  edge_pb_dict = {}
  edge_pb_dir = os.path.join(root_dir, 'edge_pb')
  for etype in meta['edge_types']:
    edge_pb_file = os.path.join(edge_pb_dir, f'{as_str(etype)}.pt')
    if os.path.exists(edge_pb_file):
      edge_pb_dict[etype] = torch.load(
        edge_pb_file, map_location=device)

  return (
    num_partitions, partition_idx,
    graph_dict, node_feat_dict, edge_feat_dict, node_pb_dict, edge_pb_dict
  )


def cat_feature_cache(
  partition_idx: int,
  feat_pdata: FeaturePartitionData,
  feat_pb: PartitionBook
) -> Tuple[float, torch.Tensor, torch.Tensor, PartitionBook]:
  r""" Concatenate and deduplicate partitioned features and its cached
  features into a new feature patition.

  Note that if the input `feat_pdata` does not contain a feature cache, this
  func will do nothing and return the results corresponding to the original
  partition data.

  Returns:
    float: The proportion of cache features.
    torch.Tensor: The new feature tensor, where the cached feature data is
      arranged before the original partition data.
    torch.Tensor: The tensor that indicates the mapping from global node id
      to its local index in new features.
    PartitionBook: The modified partition book for the new feature tensor.
  """
  feats = feat_pdata.feats
  ids = feat_pdata.ids
  cache_feats = feat_pdata.cache_feats
  cache_ids = feat_pdata.cache_ids
  if cache_feats is None or cache_ids is None:
    return 0.0, feats, id2idx(ids), feat_pb
  device = feats.device
  cache_ratio = cache_ids.size(0) / (cache_ids.size(0) + ids.size(0))
  # cat features
  new_feats = torch.cat([cache_feats, feats])
  # compute id2idx
  max_id = max(torch.max(cache_ids).item(), torch.max(ids).item())
  nid2idx = torch.zeros(max_id + 1, dtype=torch.int64, device=device)
  nid2idx[ids] = (torch.arange(ids.size(0), dtype=torch.int64, device=device) +
                  cache_ids.size(0))
  nid2idx[cache_ids] = torch.arange(cache_ids.size(0), dtype=torch.int64,
                                   device=device)
  # modify partition book
  new_feat_pb = feat_pb.clone()
  new_feat_pb[cache_ids] = partition_idx

  return cache_ratio, new_feats, nid2idx, new_feat_pb