in aws/lambda/github-webhook-rds-sync/utils.py [0:0]
def extract_github_objects(obj: Dict[str, Any], obj_name: str) -> List[NamedDict]:
"""
GitHub's real 'objects' (i.e. things accessible in the API) all have a
unique "node_id" string. This descends into an object and pulls out anything
with a node_id and removes it from the parent. It also flattens the objects
from a Dict[str, Any] to a Dict[str, str] (with an exception for lists so we
still know later on that they're lists and not ordinary strings)
"""
objects = []
def drop_key(key: str) -> bool:
if key == "target_url":
return False
return (
key.endswith("_url")
or key == "_links"
or key == "url"
or key == "permissions"
)
def visit_dict(curr: Dict[str, Any], full_name: List[str]) -> Tuple[bool, FlatDict]:
result = {}
for key, value in list(curr.items()):
# Objects are not always named consistently (e.g. repository vs
# repo, owner vs. user, so fix that up here)
remapped_key = TABLE_NAME_REMAP.get(key, None)
if drop_key(key):
# Ignore URLs
continue
if isinstance(value, dict):
if remapped_key is not None:
is_gh_object, data = visit_dict(value, full_name + [remapped_key])
else:
is_gh_object, data = visit_dict(value, full_name + [key])
if not is_gh_object:
# Not a separate object so inline all of its fields
for flat_key, flat_value in flatten_object(data).items():
result[f"{key}_{flat_key}"] = flat_value
else:
# It will go into its own table so just put a link to it
# here
result[f"{key}_node_id"] = data["node_id"]
elif (
value is None
and TYPE_MAP.get(full_name[-1], {}).get(key, lambda: None)()
== OBJECT_PLACEHOLDER
):
# We might have a null object, in which case we still need to
# add it as a _node_id
result[f"{key}_node_id"] = None
else:
result[key] = value
if "node_id" in curr:
# It's a github object so stash it for returning later
objects.append((full_name[-1], result))
return True, curr
else:
return False, result
_, newobj = visit_dict(obj, [obj_name])
# Add an entry for the top level object
objects.append((f"{obj_name}_event", flatten_object(newobj)))
# Add the time of creation for each object
for _, object in objects:
object["sync_last_update_at"] = datetime.datetime.now()
return objects