in src/dist.py [0:0]
def get_dist_page_type(url):
page_type = None
if url.endswith('/') or url.endswith('.html'):
page_type = 'Page'
if url.startswith('community'):
page_type = 'Page_Community'
if url.startswith('docs/reference'):
page_type = 'Page_Reference'
if url.startswith('docs/tutorials'):
page_type = 'Page_Tutorial'
if url.endswith('404.html'):
page_type = 'Page_NotFound'
parsed = get_dist_page_xml(url)
if url.startswith("/api/latest/"):
page_type = "Page_API_stdlib" if "jvm/stdlib" in url else "Page_API_test"
if url.startswith("/spec/"):
page_type = "Page_Spec"
if parsed.select_one("body[data-article-props]"):
page_type = 'Page_Documentation'
if parsed.find("meta", {"http-equiv": "refresh"}):
page_type = 'Redirect'
if parsed.find("meta", {"name": "robots", "content": "noindex"}):
page_type = 'Hidden'
if url.endswith('pdf'):
page_type = 'File_Pdf'
if url.endswith('package-list') or url.endswith('index.yml'):
page_type = 'File_Text'
return page_type