in community/document-readers/spring-ai-alibaba-starter-document-reader-mbox/src/main/java/com/alibaba/cloud/ai/reader/mbox/MboxDocumentReader.java [112:165]
private List<Document> readMboxFile() throws IOException {
List<Document> documents = new ArrayList<>();
int count = 0;
StringBuilder currentMessage = new StringBuilder();
boolean isFirstMessage = true;
boolean foundValidFromLine = false;
try (LineIterator it = FileUtils.lineIterator(mboxFile, StandardCharsets.UTF_8.name())) {
while (it.hasNext()) {
String line = it.nextLine();
// Check if this is a new message
if (FROM_LINE_PATTERN.matcher(line).matches()) {
foundValidFromLine = true;
// Process previous message if exists
if (!isFirstMessage && !currentMessage.isEmpty()) {
Document doc = parseMessage(currentMessage.toString());
if (doc != null) {
documents.add(doc);
count++;
if (maxCount > 0 && count >= maxCount) {
break;
}
}
currentMessage.setLength(0);
}
isFirstMessage = false;
// Start new message with the From line
currentMessage.append(line).append("\n");
}
else {
// Append line to current message
currentMessage.append(line).append("\n");
}
}
// If no valid From line was found, this is not a valid mbox file
if (!foundValidFromLine) {
logger.warn("No valid From line found in file: {}", mboxFile.getAbsolutePath());
return Collections.emptyList();
}
// Process the last message
if (!currentMessage.isEmpty()) {
Document doc = parseMessage(currentMessage.toString());
if (doc != null && (maxCount == 0 || count < maxCount)) {
documents.add(doc);
}
}
}
return documents;
}