private List readMboxFile()

in community/document-readers/spring-ai-alibaba-starter-document-reader-mbox/src/main/java/com/alibaba/cloud/ai/reader/mbox/MboxDocumentReader.java [112:165]


	private List<Document> readMboxFile() throws IOException {
		List<Document> documents = new ArrayList<>();
		int count = 0;
		StringBuilder currentMessage = new StringBuilder();
		boolean isFirstMessage = true;
		boolean foundValidFromLine = false;

		try (LineIterator it = FileUtils.lineIterator(mboxFile, StandardCharsets.UTF_8.name())) {
			while (it.hasNext()) {
				String line = it.nextLine();

				// Check if this is a new message
				if (FROM_LINE_PATTERN.matcher(line).matches()) {
					foundValidFromLine = true;
					// Process previous message if exists
					if (!isFirstMessage && !currentMessage.isEmpty()) {
						Document doc = parseMessage(currentMessage.toString());
						if (doc != null) {
							documents.add(doc);
							count++;

							if (maxCount > 0 && count >= maxCount) {
								break;
							}
						}
						currentMessage.setLength(0);
					}
					isFirstMessage = false;
					// Start new message with the From line
					currentMessage.append(line).append("\n");
				}
				else {
					// Append line to current message
					currentMessage.append(line).append("\n");
				}
			}

			// If no valid From line was found, this is not a valid mbox file
			if (!foundValidFromLine) {
				logger.warn("No valid From line found in file: {}", mboxFile.getAbsolutePath());
				return Collections.emptyList();
			}

			// Process the last message
			if (!currentMessage.isEmpty()) {
				Document doc = parseMessage(currentMessage.toString());
				if (doc != null && (maxCount == 0 || count < maxCount)) {
					documents.add(doc);
				}
			}
		}

		return documents;
	}