def predict_impacts()

in trend_getter/holidays.py [0:0]


def predict_impacts(countries, holiday_impacts, start_date, end_date):

    future_dates = pd.date_range(start_date, end_date)

    holiday_dates = (
        pd.concat(
            get_calendar(
                country=country,
                holiday_years=np.unique(future_dates.year),
                split_concurrent_holidays=True,
            )
            for country in countries
        )
        .sort_values(by="submission_date")
        .reset_index(drop=True)
    )

    impacts = []  # List to store predicted impact values
    new_holidays = set()  # Track unknown holidays for diagnostic output

    for target_date in future_dates:
        # Compute date difference between target_date and all holiday dates
        date_diffs = pd.to_datetime(target_date) - holiday_dates.submission_date

        # Filter holidays within ±7 days
        nearby = holiday_dates[abs(date_diffs) <= timedelta(days=7)].copy()

        impact = 0

        if len(nearby) and not nearby["holiday"].str.contains("Data Loss").any():
            # Compute integer date_diff for indexing
            nearby["date_diff"] = date_diffs[nearby.index].map(lambda x: x.days)

            # Accumulate known holiday impacts
            for row in nearby.itertuples():
                if row.holiday in holiday_impacts:
                    impact += holiday_impacts[row.holiday][row.date_diff][
                        "average_impact"
                    ]
                else:
                    new_holidays.add(row.holiday)

        impacts.append(impact)

    print("Unaccounted Holidays:\n - " + "\n - ".join(new_holidays))
    return pd.DataFrame({"submission_date": future_dates, "impact": impacts})