private List parseHtml()

in community/tool-calls/spring-ai-alibaba-starter-tool-calling-baidusearch/src/main/java/com/alibaba/cloud/ai/toolcalling/baidusearch/BaiduSearchService.java [88:157]


	private List<SearchResult> parseHtml(String htmlContent) {
		try {
			Document doc = Jsoup.parse(htmlContent);
			Element contentLeft = doc.selectFirst("div#content_left");
			Elements divContents = contentLeft.children();
			List<SearchResult> listData = new ArrayList<>();

			for (Element div : divContents) {
				if (!div.hasClass("c-container")) {
					continue;
				}
				String title = "";
				String abstractText = "";

				try {
					if (div.hasClass("xpath-log") || div.hasClass("result-op")) {
						if (div.selectFirst("h3") != null) {
							title = div.selectFirst("h3").text().trim();
						}
						else {
							title = div.text().trim().split("\n", 2)[0];
						}

						if (div.selectFirst("div.c-abstract") != null) {
							abstractText = div.selectFirst("div.c-abstract").text().trim();
						}
						else if (div.selectFirst("div") != null) {
							abstractText = div.selectFirst("div").text().trim();
						}
						else {
							abstractText = div.text().trim().split("\n", 2)[1].trim();
						}
					}
					else if ("se_com_default".equals(div.attr("tpl"))) {
						if (div.selectFirst("h3") != null) {
							title = div.selectFirst("h3").text().trim();
						}
						else {
							title = div.children().get(0).text().trim();
						}

						if (div.selectFirst("div.c-abstract") != null) {
							abstractText = div.selectFirst("div.c-abstract").text().trim();
						}
						else if (div.selectFirst("div") != null) {
							abstractText = div.selectFirst("div").text().trim();
						}
						else {
							abstractText = div.text().trim();
						}
					}
					else {
						continue;
					}
				}
				catch (Exception e) {
					logger.error("Failed to parse search result: {}", e.getMessage());
					continue;
				}

				listData.add(new SearchResult(title, abstractText));
			}

			return listData;
		}
		catch (Exception e) {
			logger.error("Failed to parse HTML content: {}", e.getMessage());
			return null;
		}
	}