in source/lambda/capture_news_feed/util/newscatcher_helper.py [0:0]
def try_parsing_published_date(articles):
for feed in list(articles):
if not feed.get("published", None):
if feed.get("id", None):
parsed_date = urlparse(feed["id"]).path.split("/")[1:4]
try:
d = date(int(parsed_date[0]), int(parsed_date[1]), int(parsed_date[2]))
feed["published"] = f"{d.strftime(rss_datetime_fromat_1)}+0000"
feed["published_parsed"] = [
d.year,
d.month,
d.day,
0,
0,
0,
d.weekday(),
d.timetuple().tm_yday,
0,
]
except ValueError:
logger.error(
f"Removing article with no published date or url path to infer a published date {json.dumps(feed)}"
)
articles.remove(feed)
else:
# Do not process the article because could not infer published date and may result in duplicate processing
logger.error(
f"Removing article with no published date or url path to infer a published date {json.dumps(feed)}"
)
articles.remove(feed)
return articles