def loadData()

in data_preparation/metadata_completion/text_cleaner.py [0:0]


def loadData(pathFile):

    with open(pathFile, 'r') as file:
        data = file.readlines()

    indexStartProject = -1
    indexProducedBy = -1
    indexEndProject = -1
    for index, line in enumerate(data):
        if indexStartProject < 0:
            value = line.replace(' ', '').find("***START")
            if value >= 0:
                indexStartProject = index
            elif line.find("CONTENTS") >= 0:
                indexStartProject = index
            else:
                continue

        value = line.replace(' ', '').find("***END")
        if value >= 0:
            indexEndProject = index
            break

        if indexProducedBy < 0:
            value = line.find("Produced by")
            if value >= 0:
                indexProducedBy = index

    if indexStartProject < 0:
        return None

    if indexEndProject < 0:
        indexEndProject = len(data)

    startIndex = indexProducedBy + 1 if indexProducedBy > 0 \
        else indexStartProject + 1
    while startIndex < len(data) and data[startIndex] == '\n':
        startIndex += 1

    return ''.join(data[startIndex:indexEndProject])