in trend_getter/holidays.py [0:0]
def predict_impacts(countries, holiday_impacts, start_date, end_date):
future_dates = pd.date_range(start_date, end_date)
holiday_dates = (
pd.concat(
get_calendar(
country=country,
holiday_years=np.unique(future_dates.year),
split_concurrent_holidays=True,
)
for country in countries
)
.sort_values(by="submission_date")
.reset_index(drop=True)
)
impacts = [] # List to store predicted impact values
new_holidays = set() # Track unknown holidays for diagnostic output
for target_date in future_dates:
# Compute date difference between target_date and all holiday dates
date_diffs = pd.to_datetime(target_date) - holiday_dates.submission_date
# Filter holidays within ±7 days
nearby = holiday_dates[abs(date_diffs) <= timedelta(days=7)].copy()
impact = 0
if len(nearby) and not nearby["holiday"].str.contains("Data Loss").any():
# Compute integer date_diff for indexing
nearby["date_diff"] = date_diffs[nearby.index].map(lambda x: x.days)
# Accumulate known holiday impacts
for row in nearby.itertuples():
if row.holiday in holiday_impacts:
impact += holiday_impacts[row.holiday][row.date_diff][
"average_impact"
]
else:
new_holidays.add(row.holiday)
impacts.append(impact)
print("Unaccounted Holidays:\n - " + "\n - ".join(new_holidays))
return pd.DataFrame({"submission_date": future_dates, "impact": impacts})