in mailbox/lucene/src/main/java/org/apache/james/mailbox/lucene/search/LuceneMessageSearchIndex.java [564:761]
private Document createMessageDocument(final MailboxSession session, final MailboxMessage membership) throws IOException, MimeException {
final Document doc = new Document();
// TODO: Better handling
doc.add(new Field(USERS, session.getUser().asString().toUpperCase(Locale.US), Store.YES, Index.NOT_ANALYZED));
doc.add(new Field(MAILBOX_ID_FIELD, membership.getMailboxId().serialize().toUpperCase(Locale.US), Store.YES, Index.NOT_ANALYZED));
doc.add(new NumericField(UID_FIELD,Store.YES, true).setLongValue(membership.getUid().asLong()));
doc.add(new Field(HAS_ATTACHMENT_FIELD, Boolean.toString(hasAttachment(membership)), Store.YES, Index.NOT_ANALYZED));
String serializedMessageId = SearchUtil.getSerializedMessageIdIfSupportedByUnderlyingStorageOrNull(membership);
if (serializedMessageId != null) {
doc.add(new Field(MESSAGE_ID_FIELD, serializedMessageId, Store.YES, Index.NOT_ANALYZED));
}
String serializedThreadId = SearchUtil.getSerializedThreadIdIfSupportedByUnderlyingStorageOrNull(membership);
if (serializedThreadId != null) {
doc.add(new Field(THREAD_ID_FIELD, serializedThreadId, Store.YES, Index.NOT_ANALYZED));
}
// create an unqiue key for the document which can be used later on updates to find the document
doc.add(new Field(ID_FIELD, membership.getMailboxId().serialize().toUpperCase(Locale.US) + "-" + Long.toString(membership.getUid().asLong()), Store.YES, Index.NOT_ANALYZED));
doc.add(new Field(INTERNAL_DATE_FIELD_YEAR_RESOLUTION, DateTools.dateToString(membership.getInternalDate(), DateTools.Resolution.YEAR), Store.NO, Index.NOT_ANALYZED));
doc.add(new Field(INTERNAL_DATE_FIELD_MONTH_RESOLUTION, DateTools.dateToString(membership.getInternalDate(), DateTools.Resolution.MONTH), Store.NO, Index.NOT_ANALYZED));
doc.add(new Field(INTERNAL_DATE_FIELD_DAY_RESOLUTION, DateTools.dateToString(membership.getInternalDate(), DateTools.Resolution.DAY), Store.NO, Index.NOT_ANALYZED));
doc.add(new Field(INTERNAL_DATE_FIELD_HOUR_RESOLUTION, DateTools.dateToString(membership.getInternalDate(), DateTools.Resolution.HOUR), Store.NO, Index.NOT_ANALYZED));
doc.add(new Field(INTERNAL_DATE_FIELD_MINUTE_RESOLUTION, DateTools.dateToString(membership.getInternalDate(), DateTools.Resolution.MINUTE), Store.NO, Index.NOT_ANALYZED));
doc.add(new Field(INTERNAL_DATE_FIELD_SECOND_RESOLUTION, DateTools.dateToString(membership.getInternalDate(), DateTools.Resolution.SECOND), Store.NO, Index.NOT_ANALYZED));
doc.add(new Field(INTERNAL_DATE_FIELD_MILLISECOND_RESOLUTION, DateTools.dateToString(membership.getInternalDate(), DateTools.Resolution.MILLISECOND), Store.NO, Index.NOT_ANALYZED));
membership.getSaveDate().ifPresent(saveDate -> {
doc.add(new Field(SAVE_DATE_FIELD_YEAR_RESOLUTION, DateTools.dateToString(saveDate, DateTools.Resolution.YEAR), Store.NO, Index.NOT_ANALYZED));
doc.add(new Field(SAVE_DATE_FIELD_MONTH_RESOLUTION, DateTools.dateToString(saveDate, DateTools.Resolution.MONTH), Store.NO, Index.NOT_ANALYZED));
doc.add(new Field(SAVE_DATE_FIELD_DAY_RESOLUTION, DateTools.dateToString(saveDate, DateTools.Resolution.DAY), Store.NO, Index.NOT_ANALYZED));
doc.add(new Field(SAVE_DATE_FIELD_HOUR_RESOLUTION, DateTools.dateToString(saveDate, DateTools.Resolution.HOUR), Store.NO, Index.NOT_ANALYZED));
doc.add(new Field(SAVE_DATE_FIELD_MINUTE_RESOLUTION, DateTools.dateToString(saveDate, DateTools.Resolution.MINUTE), Store.NO, Index.NOT_ANALYZED));
doc.add(new Field(SAVE_DATE_FIELD_SECOND_RESOLUTION, DateTools.dateToString(saveDate, DateTools.Resolution.SECOND), Store.NO, Index.NOT_ANALYZED));
});
doc.add(new NumericField(SIZE_FIELD,Store.YES, true).setLongValue(membership.getFullContentOctets()));
// content handler which will index the headers and the body of the message
SimpleContentHandler handler = new SimpleContentHandler() {
@Override
public void headers(Header header) {
Date sentDate = null;
String firstFromMailbox = "";
String firstToMailbox = "";
String firstCcMailbox = "";
String firstFromDisplay = "";
String firstToDisplay = "";
for (org.apache.james.mime4j.stream.Field f : header) {
String headerName = f.getName().toUpperCase(Locale.US);
String headerValue = f.getBody().toUpperCase(Locale.US);
String fullValue = f.toString().toUpperCase(Locale.US);
doc.add(new Field(HEADERS_FIELD, fullValue, Store.NO, Index.ANALYZED));
doc.add(new Field(PREFIX_HEADER_FIELD + headerName, headerValue, Store.NO, Index.ANALYZED));
if (f instanceof DateTimeField) {
DateTimeField dateTimeField = (DateTimeField) f;
sentDate = dateTimeField.getDate();
}
String field = null;
if ("To".equalsIgnoreCase(headerName)) {
field = TO_FIELD;
} else if ("From".equalsIgnoreCase(headerName)) {
field = FROM_FIELD;
} else if ("Cc".equalsIgnoreCase(headerName)) {
field = CC_FIELD;
} else if ("Bcc".equalsIgnoreCase(headerName)) {
field = BCC_FIELD;
}
// Check if we can index the the address in the right manner
if (field != null) {
// not sure if we really should reparse it. It maybe be better to check just for the right type.
// But this impl was easier in the first place
AddressList aList = LenientAddressParser.DEFAULT.parseAddressList(MimeUtil.unfold(f.getBody()));
for (int i = 0; i < aList.size(); i++) {
Address address = aList.get(i);
if (address instanceof org.apache.james.mime4j.dom.address.Mailbox) {
org.apache.james.mime4j.dom.address.Mailbox mailbox = (org.apache.james.mime4j.dom.address.Mailbox) address;
String value = AddressFormatter.DEFAULT.encode(mailbox).toUpperCase(Locale.US);
doc.add(new Field(field, value, Store.NO, Index.ANALYZED));
if (i == 0) {
String mailboxAddress = SearchUtil.getMailboxAddress(mailbox);
String mailboxDisplay = SearchUtil.getDisplayAddress(mailbox);
if ("To".equalsIgnoreCase(headerName)) {
firstToMailbox = mailboxAddress;
firstToDisplay = mailboxDisplay;
} else if ("From".equalsIgnoreCase(headerName)) {
firstFromMailbox = mailboxAddress;
firstFromDisplay = mailboxDisplay;
} else if ("Cc".equalsIgnoreCase(headerName)) {
firstCcMailbox = mailboxAddress;
}
}
} else if (address instanceof Group) {
MailboxList mList = ((Group) address).getMailboxes();
for (int a = 0; a < mList.size(); a++) {
org.apache.james.mime4j.dom.address.Mailbox mailbox = mList.get(a);
String value = AddressFormatter.DEFAULT.encode(mailbox).toUpperCase(Locale.US);
doc.add(new Field(field, value, Store.NO, Index.ANALYZED));
if (i == 0 && a == 0) {
String mailboxAddress = SearchUtil.getMailboxAddress(mailbox);
String mailboxDisplay = SearchUtil.getDisplayAddress(mailbox);
if ("To".equalsIgnoreCase(headerName)) {
firstToMailbox = mailboxAddress;
firstToDisplay = mailboxDisplay;
} else if ("From".equalsIgnoreCase(headerName)) {
firstFromMailbox = mailboxAddress;
firstFromDisplay = mailboxDisplay;
} else if ("Cc".equalsIgnoreCase(headerName)) {
firstCcMailbox = mailboxAddress;
}
}
}
}
}
doc.add(new Field(field, headerValue, Store.NO, Index.ANALYZED));
} else if (headerName.equalsIgnoreCase("Subject")) {
doc.add(new Field(BASE_SUBJECT_FIELD, SearchUtil.getBaseSubject(headerValue), Store.YES, Index.NOT_ANALYZED));
}
}
if (sentDate == null) {
sentDate = membership.getInternalDate();
} else {
doc.add(new Field(SENT_DATE_FIELD_YEAR_RESOLUTION, DateTools.dateToString(sentDate, DateTools.Resolution.YEAR), Store.NO, Index.NOT_ANALYZED));
doc.add(new Field(SENT_DATE_FIELD_MONTH_RESOLUTION, DateTools.dateToString(sentDate, DateTools.Resolution.MONTH), Store.NO, Index.NOT_ANALYZED));
doc.add(new Field(SENT_DATE_FIELD_DAY_RESOLUTION, DateTools.dateToString(sentDate, DateTools.Resolution.DAY), Store.NO, Index.NOT_ANALYZED));
doc.add(new Field(SENT_DATE_FIELD_HOUR_RESOLUTION, DateTools.dateToString(sentDate, DateTools.Resolution.HOUR), Store.NO, Index.NOT_ANALYZED));
doc.add(new Field(SENT_DATE_FIELD_MINUTE_RESOLUTION, DateTools.dateToString(sentDate, DateTools.Resolution.MINUTE), Store.NO, Index.NOT_ANALYZED));
doc.add(new Field(SENT_DATE_FIELD_SECOND_RESOLUTION, DateTools.dateToString(sentDate, DateTools.Resolution.SECOND), Store.NO, Index.NOT_ANALYZED));
doc.add(new Field(SENT_DATE_FIELD_MILLISECOND_RESOLUTION, DateTools.dateToString(sentDate, DateTools.Resolution.MILLISECOND), Store.NO, Index.NOT_ANALYZED));
}
doc.add(new Field(SENT_DATE_SORT_FIELD_MILLISECOND_RESOLUTION,DateTools.dateToString(sentDate, DateTools.Resolution.MILLISECOND), Store.NO, Index.NOT_ANALYZED));
doc.add(new Field(FIRST_FROM_MAILBOX_NAME_FIELD, firstFromMailbox, Store.YES, Index.NOT_ANALYZED));
doc.add(new Field(FIRST_TO_MAILBOX_NAME_FIELD, firstToMailbox, Store.YES, Index.NOT_ANALYZED));
doc.add(new Field(FIRST_CC_MAILBOX_NAME_FIELD, firstCcMailbox, Store.YES, Index.NOT_ANALYZED));
doc.add(new Field(FIRST_FROM_MAILBOX_DISPLAY_FIELD, firstFromDisplay, Store.YES, Index.NOT_ANALYZED));
doc.add(new Field(FIRST_TO_MAILBOX_DISPLAY_FIELD, firstToDisplay, Store.YES, Index.NOT_ANALYZED));
}
@Override
public void body(BodyDescriptor desc, InputStream in) throws MimeException, IOException {
String mediaType = desc.getMediaType();
if (MEDIA_TYPE_TEXT.equalsIgnoreCase(mediaType) || MEDIA_TYPE_MESSAGE.equalsIgnoreCase(mediaType)) {
String cset = desc.getCharset();
if (cset == null) {
cset = DEFAULT_ENCODING;
}
Charset charset;
try {
charset = Charset.forName(cset);
} catch (Exception e) {
// Invalid charset found so fallback toe the DEFAULT_ENCODING
charset = Charset.forName(DEFAULT_ENCODING);
}
// Read the content one line after the other and add it to the document
try (BufferedReader bodyReader = new BufferedReader(new InputStreamReader(in, charset))) {
String line = null;
while ((line = bodyReader.readLine()) != null) {
doc.add(new Field(BODY_FIELD, line.toUpperCase(Locale.US), Store.NO, Index.ANALYZED));
}
}
}
}
};
//config.setStrictParsing(false);
MimeStreamParser parser = new MimeStreamParser(MimeConfig.PERMISSIVE);
parser.setContentDecoding(true);
parser.setContentHandler(handler);
// parse the message to index headers and body
parser.parse(membership.getFullContent());
return doc;
}