merge-data.py (25 lines of code) (raw):
from ruamel.yaml import YAML
import json
# Initialize YAML parser
yaml = YAML()
yaml.preserve_quotes = True
# Load activities.yml
with open('activities.yml', 'r') as yml_file:
activities = yaml.load(yml_file)
# Load gh-data-summary.json
with open('gh-data-summary.json', 'r') as json_file:
gh_data_summary = json.load(json_file)
# Create a dictionary with 'issue' as key for easy lookup in gh-data-summary.json
output_dict = {item['issue']: item for item in gh_data_summary}
def merge(dict1, dict2):
"""
Merges data from dict1 into dict2. Keys in dict1 take precedence if not None.
Strips trailing newline from string values.
"""
for key, value in dict1.items():
if value is not None:
if isinstance(value, str):
dict2[key] = value.rstrip("\n") # Strip trailing newlines
else:
dict2[key] = value
dict2.pop('issue', None) # Remove the 'issue' key after merging
# Merge data
for activity_title, activity_data in activities.items():
if 'issue' in activity_data:
issue_number = activity_data['issue']
if issue_number in output_dict:
merge(activity_data, output_dict[issue_number])
# Output as JSON
with open("merged-data.json", "w") as f:
json.dump(output_dict, f, indent=2, separators=(",", ": "))
f.write("\n")