def write_hudi_table()

in dataproc/pyspark_apache_hudi.py [0:0]


def write_hudi_table(table_name, table_uri, df):
  """Writes Hudi table."""
  hudi_options = {
      'hoodie.table.name': table_name,
      'hoodie.datasource.write.recordkey.field': 'location_id',
      'hoodie.datasource.write.partitionpath.field': 'borough',
      'hoodie.datasource.write.table.name': table_name,
      'hoodie.datasource.write.operation': 'insert',
      'hoodie.datasource.write.precombine.field': 'ts',
      'hoodie.upsert.shuffle.parallelism': 2,
      'hoodie.insert.shuffle.parallelism': 2,
      # BQ Support
      'hoodie-conf hoodie.partition.metafile.use.base.format' : 'true',
      'hoodie-conf hoodie.metadata.enable' : 'true',
      'hoodie.datasource.write.hive_style_partitioning' :'true',
      'hoodie.datasource.write.drop.partition.columns': 'true',
  }
  df.write.format('hudi').options(**hudi_options).mode('append').save(table_uri)