in dataproc/pyspark_apache_hudi.py [0:0]
def write_hudi_table(table_name, table_uri, df):
"""Writes Hudi table."""
hudi_options = {
'hoodie.table.name': table_name,
'hoodie.datasource.write.recordkey.field': 'location_id',
'hoodie.datasource.write.partitionpath.field': 'borough',
'hoodie.datasource.write.table.name': table_name,
'hoodie.datasource.write.operation': 'insert',
'hoodie.datasource.write.precombine.field': 'ts',
'hoodie.upsert.shuffle.parallelism': 2,
'hoodie.insert.shuffle.parallelism': 2,
# BQ Support
'hoodie-conf hoodie.partition.metafile.use.base.format' : 'true',
'hoodie-conf hoodie.metadata.enable' : 'true',
'hoodie.datasource.write.hive_style_partitioning' :'true',
'hoodie.datasource.write.drop.partition.columns': 'true',
}
df.write.format('hudi').options(**hudi_options).mode('append').save(table_uri)