notebooks/src/code/data/mlm.py [152:162]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        textract_prefix: str = "",
        max_seq_len: int = 512,
    ):
        """Initialize a TextractLayoutLMDatasetForLM"""
        super().__init__(
            textract_path,
            tokenizer,
            manifest_file_path=manifest_file_path,
            textract_prefix=textract_prefix,
            max_seq_len=max_seq_len,
        )
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



notebooks/src/code/data/ner.py [307:329]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        textract_prefix: str = "",
        max_seq_len: int = 512,
    ):
        """Initialize a TextractLayoutLMDatasetForTokenClassification

        Arguments
        ---------
        num_labels : int
            Number of entity classes to classify tokens between, *including* the implicit "other"
            class
        annotation_attr : str
            Attribute on the input manifest file where SageMaker Ground Truth-compatible bounding
            box annotations are stored.

        Additional arguments as per TextractLayoutLMDatasetBase
        """
        super().__init__(
            textract_path,
            tokenizer,
            manifest_file_path=manifest_file_path,
            textract_prefix=textract_prefix,
            max_seq_len=max_seq_len,
        )
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



