def try_parsing_published_date()

in source/lambda/capture_news_feed/util/newscatcher_helper.py [0:0]


def try_parsing_published_date(articles):
    for feed in list(articles):
        if not feed.get("published", None):
            if feed.get("id", None):
                parsed_date = urlparse(feed["id"]).path.split("/")[1:4]
                try:
                    d = date(int(parsed_date[0]), int(parsed_date[1]), int(parsed_date[2]))
                    feed["published"] = f"{d.strftime(rss_datetime_fromat_1)}+0000"
                    feed["published_parsed"] = [
                        d.year,
                        d.month,
                        d.day,
                        0,
                        0,
                        0,
                        d.weekday(),
                        d.timetuple().tm_yday,
                        0,
                    ]
                except ValueError:
                    logger.error(
                        f"Removing article with no published date or url path to infer a published date {json.dumps(feed)}"
                    )
                    articles.remove(feed)
            else:
                # Do not process the article because could not infer published date and may result in duplicate processing
                logger.error(
                    f"Removing article with no published date or url path to infer a published date {json.dumps(feed)}"
                )
                articles.remove(feed)

    return articles