in data_preparation/metadata_completion/text_cleaner.py [0:0]
def loadData(pathFile):
with open(pathFile, 'r') as file:
data = file.readlines()
indexStartProject = -1
indexProducedBy = -1
indexEndProject = -1
for index, line in enumerate(data):
if indexStartProject < 0:
value = line.replace(' ', '').find("***START")
if value >= 0:
indexStartProject = index
elif line.find("CONTENTS") >= 0:
indexStartProject = index
else:
continue
value = line.replace(' ', '').find("***END")
if value >= 0:
indexEndProject = index
break
if indexProducedBy < 0:
value = line.find("Produced by")
if value >= 0:
indexProducedBy = index
if indexStartProject < 0:
return None
if indexEndProject < 0:
indexEndProject = len(data)
startIndex = indexProducedBy + 1 if indexProducedBy > 0 \
else indexStartProject + 1
while startIndex < len(data) and data[startIndex] == '\n':
startIndex += 1
return ''.join(data[startIndex:indexEndProject])