notebooks/util/postproc/config.py [13:79]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class FieldSelectionMethod:
    def __init__(self, name: str, sort: Callable, desc: bool = False):
        self.name = name
        self.sort = sort
        self.desc = desc

    def to_dict(self):
        return self.name


class FieldSelectionMethods(Enum):
    CONFIDENCE = FieldSelectionMethod("confidence", lambda v: v["Confidence"], desc=True)
    FIRST = FieldSelectionMethod("first", lambda v: v["IxFirstDetection"])
    LAST = FieldSelectionMethod("last", lambda v: v["IxLastDetection"], desc=True)
    LONGEST = FieldSelectionMethod("longest", lambda v: len(v["Text"]), desc=True)
    SHORTEST = FieldSelectionMethod("shortest", lambda v: len(v["Text"]))


class FieldConfiguration(PascalJsonableDataClass):
    """A JSON-serializable configuration for a field/entity type"""

    def __init__(
        self,
        class_id: int,
        name: str,
        ignore: Optional[bool] = None,
        optional: Optional[bool] = None,
        select: Optional[str] = None,
        annotation_guidance: Optional[str] = None,
    ):
        """Create a FieldConfiguration

        Parameters
        ----------
        class_id : int
            The ID number (ordinal) of the class per the machine learning model
        name : str
            The human-readable name of the class / entity type
        ignore : Optional[bool]
            Set True to exclude this field from post-processing in the OCR pipeline (the ML model
            will still be trained on it). Useful if for e.g. testing a new field type with unknown
            detection quality.
        optional : Optional[bool]
            Set True to explicitly indicate the field is optional (default None)
        select : Optional[str]
            A (case insensitive) name from the FieldSelectionMethods enum (e.g. 'confidence') to
            indicate how the "winning" detected value of a field should be selected. If omitted,
            the field is treated as multi-value and all detected values passed through.
        annotation_guidance : Optional[str]
            HTML-tagged guidance detailing the specific scope for this entity: I.e. what should
            and should not be included for consistent labelling.
        """
        self.class_id = class_id
        self.name = name
        self.ignore = ignore
        self.optional = optional
        self.annotation_guidance = annotation_guidance
        try:
            self.select = FieldSelectionMethods[select.upper()].value if select else None
        except KeyError as e:
            raise ValueError(
                "Selection method '{}' configured for field '{}' not in the known list {}".format(
                    select,
                    name,
                    [fsm.name for fsm in FieldSelectionMethods],
                )
            ) from e
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


pipeline/postprocessing/fn-postprocess/util/config.py [13:79]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class FieldSelectionMethod:
    def __init__(self, name: str, sort: Callable, desc: bool = False):
        self.name = name
        self.sort = sort
        self.desc = desc

    def to_dict(self):
        return self.name


class FieldSelectionMethods(Enum):
    CONFIDENCE = FieldSelectionMethod("confidence", lambda v: v["Confidence"], desc=True)
    FIRST = FieldSelectionMethod("first", lambda v: v["IxFirstDetection"])
    LAST = FieldSelectionMethod("last", lambda v: v["IxLastDetection"], desc=True)
    LONGEST = FieldSelectionMethod("longest", lambda v: len(v["Text"]), desc=True)
    SHORTEST = FieldSelectionMethod("shortest", lambda v: len(v["Text"]))


class FieldConfiguration(PascalJsonableDataClass):
    """A JSON-serializable configuration for a field/entity type"""

    def __init__(
        self,
        class_id: int,
        name: str,
        ignore: Optional[bool] = None,
        optional: Optional[bool] = None,
        select: Optional[str] = None,
        annotation_guidance: Optional[str] = None,
    ):
        """Create a FieldConfiguration

        Parameters
        ----------
        class_id : int
            The ID number (ordinal) of the class per the machine learning model
        name : str
            The human-readable name of the class / entity type
        ignore : Optional[bool]
            Set True to exclude this field from post-processing in the OCR pipeline (the ML model
            will still be trained on it). Useful if for e.g. testing a new field type with unknown
            detection quality.
        optional : Optional[bool]
            Set True to explicitly indicate the field is optional (default None)
        select : Optional[str]
            A (case insensitive) name from the FieldSelectionMethods enum (e.g. 'confidence') to
            indicate how the "winning" detected value of a field should be selected. If omitted,
            the field is treated as multi-value and all detected values passed through.
        annotation_guidance : Optional[str]
            HTML-tagged guidance detailing the specific scope for this entity: I.e. what should
            and should not be included for consistent labelling.
        """
        self.class_id = class_id
        self.name = name
        self.ignore = ignore
        self.optional = optional
        self.annotation_guidance = annotation_guidance
        try:
            self.select = FieldSelectionMethods[select.upper()].value if select else None
        except KeyError as e:
            raise ValueError(
                "Selection method '{}' configured for field '{}' not in the known list {}".format(
                    select,
                    name,
                    [fsm.name for fsm in FieldSelectionMethods],
                )
            ) from e
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -