def collect_digest()

in .codex/skills/codex-issue-digest/scripts/collect_issue_digest.py [0:0]


def collect_digest(args):
    since, until = resolve_window(args)
    window_hours = (until - since).total_seconds() / 3600
    attention_thresholds = attention_thresholds_for_window(window_hours)
    requested_labels, all_labels = normalize_requested_labels(
        args.labels, all_labels=args.all_labels
    )
    queries = build_search_queries(
        args.repo, requested_labels, since, all_labels=all_labels
    )
    numbers = search_issue_numbers(queries, args.limit_issues)
    gh_version_output = gh_text(["--version"])

    issues = []
    max_comment_pages = None if args.max_comment_pages <= 0 else args.max_comment_pages
    for number in numbers:
        issue = fetch_issue(args.repo, number)
        comments_since = None if args.fetch_all_comments else since
        comments_payload = fetch_comments(
            args.repo,
            number,
            since=comments_since,
            max_pages=max_comment_pages,
        )
        comments = comments_payload["items"]
        issue_reaction_events = fetch_reactions_for_item(
            f"repos/{args.repo}/issues/{number}/reactions", issue
        )
        comment_reactions_by_id = fetch_comment_reactions(args.repo, comments)
        comments_hydration = {
            "fetched": len(comments),
            "total": int(issue.get("comments") or len(comments) or 0),
            "since": format_timestamp(comments_since) if comments_since else None,
            "truncated": comments_payload["truncated"],
            "max_pages": comments_payload["max_pages"],
            "fetch_all_comments": args.fetch_all_comments,
        }
        summary = summarize_issue(
            issue,
            comments,
            requested_labels,
            since,
            until,
            args.body_chars,
            args.comment_chars,
            issue_reaction_events=issue_reaction_events,
            comment_reactions_by_id=comment_reactions_by_id,
            all_labels=all_labels,
            comments_hydration=comments_hydration,
            attention_thresholds=attention_thresholds,
        )
        if summary is not None:
            issues.append(summary)

    issues.sort(
        key=lambda issue: (issue["updated_at"], int(issue["number"] or 0)), reverse=True
    )
    totals = {
        "candidate_issues": len(numbers),
        "included_issues": len(issues),
        "new_issues": sum(1 for issue in issues if issue["activity"]["new_issue"]),
        "issues_with_new_comments": sum(
            1 for issue in issues if issue["activity"]["new_comments"] > 0
        ),
        "new_comments": sum(issue["activity"]["new_comments"] for issue in issues),
        "comments_fetched": sum(
            issue["comments_hydration"]["fetched"] for issue in issues
        ),
        "issues_with_truncated_comment_hydration": sum(
            1 for issue in issues if issue["comments_hydration"]["truncated"]
        ),
        "updated_without_visible_new_post": sum(
            1
            for issue in issues
            if issue["activity"]["updated_without_visible_new_post"]
        ),
        "issue_reactions_current_total": sum(
            issue["issue_reaction_total"] for issue in issues
        ),
        "comment_reactions_current_total": sum(
            issue["comment_reaction_total"] for issue in issues
        ),
        "new_reactions": sum(issue["new_reactions"] for issue in issues),
        "new_upvotes": sum(issue["new_upvotes"] for issue in issues),
        "user_interactions": sum(issue["user_interactions"] for issue in issues),
    }
    ranked = ranked_digest_issues(issues)
    ref_map = {issue["number"]: ref for ref, issue in enumerate(ranked, start=1)}
    filter_label = "all" if all_labels else requested_labels

    return {
        "generated_at": format_timestamp(datetime.now(timezone.utc)),
        "source": {
            "repo": args.repo,
            "skill": "codex-issue-digest",
            "collector": skill_relative_path(),
            "script_version": SCRIPT_VERSION,
            "git_head": git_head(),
            "gh_version": gh_version_output.splitlines()[0]
            if gh_version_output
            else None,
        },
        "window": {
            "since": format_timestamp(since),
            "until": format_timestamp(until),
            "hours": round(window_hours, 3),
        },
        "attention_thresholds": attention_thresholds,
        "filters": {
            "owner_labels": filter_label,
            "all_labels": all_labels,
            "kind_labels": list(QUALIFYING_KIND_LABELS),
        },
        "collection_notes": [
            "Issues are selected when they currently have bug or enhancement plus at least one requested owner label and were updated during the window.",
            "By default, issue comments are fetched with since=window_start and a max page cap to avoid long historical threads; use --fetch-all-comments when exhaustive comment history is needed.",
            "New issue comments are filtered by comment creation time within the window from the fetched comment set.",
            "Reaction events are counted by GitHub reaction created_at timestamps for hydrated issues and fetched comments.",
            "Current reaction totals are standing engagement signals; new_reactions and new_upvotes are windowed activity.",
            "user_interactions counts unique human users per issue across new issues, new comments, and new reactions; repeated actions by the same user count once.",
            "The collector does not assign semantic clusters; use summary_inputs as model-ready evidence for report-time clustering.",
            "Pure reaction-only issues may be missed if GitHub issue search does not surface them via updated_at.",
            "Issues updated during the window without a new issue body or new comment are retained because label/status edits can still be useful owner signals.",
        ],
        "totals": totals,
        "by_owner_label": count_by_label(
            issues,
            sorted(
                {area for issue in issues for area in issue["owner_labels"]},
                key=str.casefold,
            )
            if all_labels
            else requested_labels,
        ),
        "by_kind_label": count_by_kind(issues),
        "hot_items": hot_items(issues),
        "summary_inputs": summary_inputs(issues, ref_map=ref_map),
        "digest_rows": digest_rows(issues, ref_map=ref_map),
        "issues": issues,
    }