in src/smolagents/default_tools.py [0:0]
def _create_duckduckgo_parser(self):
from html.parser import HTMLParser
class SimpleResultParser(HTMLParser):
def __init__(self):
super().__init__()
self.results = []
self.current = {}
self.capture_title = False
self.capture_description = False
self.capture_link = False
def handle_starttag(self, tag, attrs):
attrs = dict(attrs)
if tag == "a" and attrs.get("class") == "result-link":
self.capture_title = True
elif tag == "td" and attrs.get("class") == "result-snippet":
self.capture_description = True
elif tag == "span" and attrs.get("class") == "link-text":
self.capture_link = True
def handle_endtag(self, tag):
if tag == "a" and self.capture_title:
self.capture_title = False
elif tag == "td" and self.capture_description:
self.capture_description = False
elif tag == "span" and self.capture_link:
self.capture_link = False
elif tag == "tr":
# Store current result if all parts are present
if {"title", "description", "link"} <= self.current.keys():
self.current["description"] = " ".join(self.current["description"])
self.results.append(self.current)
self.current = {}
def handle_data(self, data):
if self.capture_title:
self.current["title"] = data.strip()
elif self.capture_description:
self.current.setdefault("description", [])
self.current["description"].append(data.strip())
elif self.capture_link:
self.current["link"] = "https://" + data.strip()
return SimpleResultParser()