scripts/process_json/process_json.py [60:83]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            for key, value in custom_metadata.items():
                if hasattr(metadata, key):
                    setattr(metadata, key, value)

            # screen for pii if requested
            if screen_for_pii:
                pii_detected = screen_text_for_pii(text)
                # if pii detected, print a warning and skip the document
                if pii_detected:
                    logger.info("PII detected in document, skipping")
                    skipped_items.append(item)  # add the skipped item to the list
                    continue

            # extract metadata if requested
            if extract_metadata:
                # extract metadata from the document text
                extracted_metadata = extract_metadata_from_document(
                    f"Text: {text}; Metadata: {str(metadata)}"
                )
                # get a Metadata object from the extracted metadata
                metadata = DocumentMetadata(**extracted_metadata)

            # create a document object with the id or a random id, text and metadata
            document = Document(
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



scripts/process_jsonl/process_jsonl.py [59:82]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            for key, value in custom_metadata.items():
                if hasattr(metadata, key):
                    setattr(metadata, key, value)

            # screen for pii if requested
            if screen_for_pii:
                pii_detected = screen_text_for_pii(text)
                # if pii detected, print a warning and skip the document
                if pii_detected:
                    logger.info("PII detected in document, skipping")
                    skipped_items.append(item)  # add the skipped item to the list
                    continue

            # extract metadata if requested
            if extract_metadata:
                # extract metadata from the document text
                extracted_metadata = extract_metadata_from_document(
                    f"Text: {text}; Metadata: {str(metadata)}"
                )
                # get a Metadata object from the extracted metadata
                metadata = DocumentMetadata(**extracted_metadata)

            # create a document object with the id, text and metadata
            document = Document(
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



