def join_topics()

in spark_scripts/synchronize_topics.py [0:0]

7 lines of code
3 McCabe index (conditional complexity)


def join_topics(dfs, col_selection_dict):
    filtered_dfs = []
    # Take first row per topic per bag_file per second rounded
    for topic_name, topic_df in dfs.items():
        topic_col_subset = col_selection_dict[topic_name]
        if "bag_file" not in topic_col_subset:
            topic_col_subset.append("bag_file")
        filtered_dfs.append(topic_df.select(*topic_col_subset))