in data_preparation/text_retrieval/bartleby.py [0:0]
def handle_comment(self, tag):
if tag.find("BEGIN CHAPTERTITLE") >= 0:
self.global_status = BarthelebyParser.GLOBAL_STATUS.IN_TITLE
elif tag.find("END CHAPTERTITLE") >= 0:
if not self.global_status == BarthelebyParser.GLOBAL_STATUS.IN_TITLE:
raise RuntimeError("Page of invalid format")
self.global_status = BarthelebyParser.GLOBAL_STATUS.NONE
elif tag.find("BEGIN CHAPTER") >= 0 or tag.find("END MAIN HEADER CODE") >= 0:
self.global_status = BarthelebyParser.GLOBAL_STATUS.IN_CHAPTER
self.local_status = BarthelebyParser.LOCAL_STATUS.NONE
self.textFound = True
elif tag.find("END CHAPTER") >= 0 or tag.find("AMAZON") >= 0:
if not self.global_status == BarthelebyParser.GLOBAL_STATUS.IN_CHAPTER:
raise RuntimeError("Page of invalid format")
self.global_status = BarthelebyParser.GLOBAL_STATUS.NONE