in tools/validator.py [0:0]
def validate_asset_path(self, validation_config: ValidationConfig,
metadata: Mapping[str, AbstractSet[str]],
file_path: str) -> None:
"""Checks whether the given asset path can be downloaded.
If an asset path is added or modified, the function checks whether the path
has the correct file ending. If the path leads to github.com, it checks that
the asset is not forbidden to be fetched by GitHub's robots.txt file. If
`do_smoke_test` is True, it tries to download and parse the asset.
Args:
validation_config: The config specifying whether the referenced asset
should be downloaded. That should only be used for validating individual
files.
metadata: Mapping of metadata fields to their values e.g.
{"asset-path": {"model.tar.gz"}}
file_path: Path to the validated file
Raises:
MarkdownDocumentationError:
- if the asset-path key does not contain exactly one element in its set.
- if the one element does not end in the expected suffix.
- if github.com/robots.txt forbids downloading the asset.
- if the asset can be downloaded but not be resolved to a SavedModel.
"""
if not _is_asset_path_modified(file_path):
logging.info("Skipping asset path validation since the tag is not added "
"or modified.")
return
if len(metadata[ASSET_PATH_KEY]) != 1:
raise MarkdownDocumentationError(
"No more than one asset-path tag may be specified.")
asset_path = list(metadata[ASSET_PATH_KEY])[0]
if not asset_path.endswith(self._supported_asset_path_suffix):
raise MarkdownDocumentationError(
f"Expected asset-path to end with {self._supported_asset_path_suffix}"
f" but was {asset_path}.")
# GitHub's robots.txt disallows fetches to */download, which means that
# the asset-path URL cannot be fetched. Markdown validation should fail if
# asset-path matches this regex.
github_download_url_regex = re.compile(
"https://github.com/.*/releases/download/.*")
if github_download_url_regex.fullmatch(asset_path):
raise MarkdownDocumentationError(
f"The asset-path {asset_path} is a url that cannot be automatically "
"fetched. Please provide an asset-path that is allowed to be fetched "
"by its robots.txt.")
if validation_config.do_smoke_test:
self._check_valid_remote_asset(asset_path)