in spark_scripts/synchronize_topics.py [0:0]
def join_topics(dfs, col_selection_dict):
filtered_dfs = []
# Take first row per topic per bag_file per second rounded
for topic_name, topic_df in dfs.items():
topic_col_subset = col_selection_dict[topic_name]
if "bag_file" not in topic_col_subset:
topic_col_subset.append("bag_file")
filtered_dfs.append(topic_df.select(*topic_col_subset))