public List parse()

in community/document-parsers/spring-ai-alibaba-starter-document-parser-bibtex/src/main/java/com/alibaba/cloud/ai/parser/bibtex/BibtexDocumentParser.java [90:161]


	public List<Document> parse(InputStream inputStream) {
		try (Reader reader = new InputStreamReader(inputStream, charsetName)) {
			List<Document> documentList = new ArrayList<>(10);
			BibTeXParser bibtexParser = new BibTeXParser();
			BibTeXDatabase database = bibtexParser.parse(reader);
			Map<Key, BibTeXEntry> entries = database.getEntries();
			if (entries.isEmpty()) {
				return documentList;
			}
			if (maxDocs != null && maxDocs > 0 && entries.size() > maxDocs) {
				entries = entries.entrySet()
					.stream()
					.limit(maxDocs)
					.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue,
							(existing, replacement) -> existing));
			}
			for (BibTeXEntry entry : entries.values()) {
				Map<String, Object> metadata = new HashMap<>();
				metadata.put(entry.getType().getValue(), entry.getKey());
				for (Key key : entry.getFields().keySet()) {
					Value value = entry.getFields().get(key);
					metadata.put(key.getValue(), value.toUserString());
				}
				List<String> fileNames = new ArrayList<>();
				if (metadata.containsKey("file")) {
					String fileValue = metadata.get("file").toString();
					if (!Objects.isNull(filePattern)) {
						Matcher matcher = filePattern.matcher(metadata.get("file").toString());
						while (matcher.find()) {
							fileNames.add(matcher.group());
						}
					}
					else {
						Collections.addAll(fileNames, fileValue.split("[;,\\s]+"));
					}
				}
				StringBuilder content = new StringBuilder(metadata.getOrDefault("abstract", "").toString());
				if (!fileNames.isEmpty()) {
					for (String fileName : fileNames) {
						try (InputStream fileInputStream = new DefaultResourceLoader()
							.getResource("classpath:/" + fileName)
							.getInputStream()) {
							List<Document> docs = parser.parse(fileInputStream);
							if (!docs.isEmpty()) {
								content.append(docs.get(0).getText());
							}
						}
						catch (IOException e) {
							// Log the exception and continue with the next file
							logger.warn("Failed to read file: {}", fileName, e);
						}

					}
				}

				if (maxContentChars != null && maxContentChars > 0) {
					int endIndex = Math.min(maxContentChars, content.length());
					content = new StringBuilder(content.substring(0, endIndex));
				}

				Document document = new Document(content.toString(), metadata);
				documentList.add(document);
			}

			return documentList;
		}
		catch (Exception e) {
			logger.error("Error parsing input stream", e);
			throw new RuntimeException("Error parsing input stream", e);
		}

	}