def scrape()

in datasets/geos_fp/pipelines/_images/rolling_copy/script.py [0:0]


def scrape(source_path: str, webpage: bs4.BeautifulSoup) -> typing.List[str]:
    file_paths = []

    # Go through all the URLs in the page and collect the ones ending in ".nc4"
    for a_tag in webpage.find_all("a"):

        # The `href` property is the filename,
        # e.g. GEOS.fp.asm.inst1_2d_smp_Nx.20210101_1700.V01.nc4
        if a_tag.get("href") and a_tag["href"].endswith(".nc4"):
            file_paths.append(f"{source_path}/{a_tag['href']}")

    return file_paths