in data_preparation/text_retrieval/hathitrust.py [0:0]
def handle_starttag(self, tag, attrs):
if tag == "a":
for name, value in attrs:
if name == "href":
self.tmpUrl = value
if tag == "div" and ("id", "mdpPage") in attrs:
self.getNextP = True
if tag == "div" and ("id", "mdpTextEmpty") in attrs:
self.emptyPage = True
if tag == "p" and self.getNextP:
self.getTextData = True