def feed()

in antlir/rpm/parse_repodata.py [0:0]


    def feed(self, chunk: bytes) -> Iterator[Rpm]:
        while chunk:
            # Consume the decompressed data in small chunks. This prevents
            # us from using unbounded amounts of RAM for decompression.
            # More crucially, apparently XMLPullParser gets up to 50% slower
            # on package data if we feed it larger chuks.  This buffer size
            # was picked experimentally :)
            #
            # NB: zlib appears to copy bytes into `unconsumed_tail` instead
            # of using something like `memoryview`, so this has poor
            # theoretical complexity due to all the extra copying.  I could
            # add an extra layer of input chunking to mitigate this, but in
            # practice it seems ok to just limit the incoming chunk size.
            self.xml_parser.feed(
                self.decompressor.decompress(chunk, max_length=2 ** 14)
            )
            chunk = self.decompressor.unconsumed_tail
            for _, elt in self.xml_parser.read_events():
                m = self.tag_re.match(elt.tag)
                if not m:
                    continue
                # Keep these `elif` clauses in _KNOWN_TAGS order
                elif m.group(2) == self._NAME:
                    self._package[self._NAME] = elt.text
                elif m.group(2) == self._VERSION:
                    self._package[self._VERSION] = tuple(
                        elt.attrib[x] for x in ("epoch", "ver", "rel")
                    )
                elif m.group(2) == self._ARCH:
                    self._package[self._ARCH] = elt.text
                elif m.group(2) == self._CHECKSUM:
                    assert elt.attrib["pkgid"] == "YES"
                    self._package[self._CHECKSUM] = Checksum(
                        algorithm=elt.attrib["type"],
                        # pyre-fixme[6]: Expected `str` for 2nd param but got
                        #  `Optional[str]`.
                        hexdigest=elt.text,
                    )
                elif m.group(2) == self._LOCATION:
                    self._package[self._LOCATION] = elt.attrib["href"]
                elif m.group(2) == self._PACKAGE:
                    epoch, version, release = self._package[self._VERSION]
                    yield Rpm(
                        # Keep these kwargs in _KNOWN_TAGS order
                        epoch=int(epoch),
                        name=self._package[self._NAME],
                        version=version,
                        release=release,
                        arch=self._package[self._ARCH],
                        checksum=self._package[self._CHECKSUM],
                        location=self._package[self._LOCATION],
                        size=int(self._package[self._SIZE]),
                        source_rpm=self._package[self._SOURCE_RPM],
                        build_timestamp=int(self._package[self._TIME]),
                        # This is set after we download the RPM
                        # pyre-fixme[6]: Expected `Checksum` for 11th param but
                        # got `None`.
                        canonical_checksum=None,
                    )
                    self._package = {}  # Detect missing fields
                    elt.clear()  # Uses less RAM, speeds up the run 50%
                elif m.group(2) == self._SIZE:
                    self._package[self._SIZE] = elt.attrib["package"]
                elif m.group(2) == self._SOURCE_RPM:
                    self._package[self._SOURCE_RPM] = elt.text or None
                elif m.group(2) == self._TIME:
                    self._package[self._TIME] = elt.attrib["build"]