def handle_starttag()

in data_preparation/text_retrieval/hathitrust.py [0:0]


    def handle_starttag(self, tag, attrs):
        if tag == "a":
            for name, value in attrs:
                if name == "href":
                    self.tmpUrl = value

        if tag == "div" and ("id", "mdpPage") in attrs:
            self.getNextP = True

        if tag == "div" and ("id", "mdpTextEmpty") in attrs:
            self.emptyPage = True

        if tag == "p" and self.getNextP:
            self.getTextData = True