def extract_github_objects()

in aws/lambda/github-webhook-rds-sync/utils.py [0:0]


def extract_github_objects(obj: Dict[str, Any], obj_name: str) -> List[NamedDict]:
    """
    GitHub's real 'objects' (i.e. things accessible in the API) all have a
    unique "node_id" string. This descends into an object and pulls out anything
    with a node_id and removes it from the parent. It also flattens the objects
    from a Dict[str, Any] to a Dict[str, str] (with an exception for lists so we
    still know later on that they're lists and not ordinary strings)
    """
    objects = []

    def drop_key(key: str) -> bool:
        if key == "target_url":
            return False

        return (
            key.endswith("_url")
            or key == "_links"
            or key == "url"
            or key == "permissions"
        )

    def visit_dict(curr: Dict[str, Any], full_name: List[str]) -> Tuple[bool, FlatDict]:
        result = {}

        for key, value in list(curr.items()):
            # Objects are not always named consistently (e.g. repository vs
            # repo, owner vs. user, so fix that up here)
            remapped_key = TABLE_NAME_REMAP.get(key, None)

            if drop_key(key):
                # Ignore URLs
                continue

            if isinstance(value, dict):
                if remapped_key is not None:
                    is_gh_object, data = visit_dict(value, full_name + [remapped_key])
                else:
                    is_gh_object, data = visit_dict(value, full_name + [key])

                if not is_gh_object:
                    # Not a separate object so inline all of its fields
                    for flat_key, flat_value in flatten_object(data).items():
                        result[f"{key}_{flat_key}"] = flat_value
                else:
                    # It will go into its own table so just put a link to it
                    # here
                    result[f"{key}_node_id"] = data["node_id"]
            elif (
                value is None
                and TYPE_MAP.get(full_name[-1], {}).get(key, lambda: None)()
                == OBJECT_PLACEHOLDER
            ):
                # We might have a null object, in which case we still need to
                # add it as a _node_id
                result[f"{key}_node_id"] = None
            else:
                result[key] = value

        if "node_id" in curr:
            # It's a github object so stash it for returning later
            objects.append((full_name[-1], result))
            return True, curr
        else:
            return False, result

    _, newobj = visit_dict(obj, [obj_name])

    # Add an entry for the top level object
    objects.append((f"{obj_name}_event", flatten_object(newobj)))

    # Add the time of creation for each object
    for _, object in objects:
        object["sync_last_update_at"] = datetime.datetime.now()

    return objects