in spark_scripts/synchronize_topics.py [0:0]
def union_all(dfs):
column_superset = set()
for df in dfs:
for col in df.columns:
column_superset.add(col)
for df in dfs:
for col in column_superset:
if col not in df.columns:
df = df.withColumn(col, func.lit(None).cast(types.NullType()))
return functools.reduce(lambda df1, df2: df1.union(df2.select(df1.columns)), dfs)