def validate_asset_path()

in tools/validator.py [0:0]


  def validate_asset_path(self, validation_config: ValidationConfig,
                          metadata: Mapping[str, AbstractSet[str]],
                          file_path: str) -> None:
    """Checks whether the given asset path can be downloaded.

    If an asset path is added or modified, the function checks whether the path
    has the correct file ending. If the path leads to github.com, it checks that
    the asset is not forbidden to be fetched by GitHub's robots.txt file. If
    `do_smoke_test` is True, it tries to download and parse the asset.

    Args:
      validation_config: The config specifying whether the referenced asset
        should be downloaded. That should only be used for validating individual
        files.
      metadata: Mapping of metadata fields to their values e.g.
        {"asset-path": {"model.tar.gz"}}
      file_path: Path to the validated file

    Raises:
      MarkdownDocumentationError:
        - if the asset-path key does not contain exactly one element in its set.
        - if the one element does not end in the expected suffix.
        - if github.com/robots.txt forbids downloading the asset.
        - if the asset can be downloaded but not be resolved to a SavedModel.
    """
    if not _is_asset_path_modified(file_path):
      logging.info("Skipping asset path validation since the tag is not added "
                   "or modified.")
      return

    if len(metadata[ASSET_PATH_KEY]) != 1:
      raise MarkdownDocumentationError(
          "No more than one asset-path tag may be specified.")

    asset_path = list(metadata[ASSET_PATH_KEY])[0]
    if not asset_path.endswith(self._supported_asset_path_suffix):
      raise MarkdownDocumentationError(
          f"Expected asset-path to end with {self._supported_asset_path_suffix}"
          f" but was {asset_path}.")

    # GitHub's robots.txt disallows fetches to */download, which means that
    # the asset-path URL cannot be fetched. Markdown validation should fail if
    # asset-path matches this regex.
    github_download_url_regex = re.compile(
        "https://github.com/.*/releases/download/.*")
    if github_download_url_regex.fullmatch(asset_path):
      raise MarkdownDocumentationError(
          f"The asset-path {asset_path} is a url that cannot be automatically "
          "fetched. Please provide an asset-path that is allowed to be fetched "
          "by its robots.txt.")

    if validation_config.do_smoke_test:
      self._check_valid_remote_asset(asset_path)