in geronimo-javamail_1.4_spec/src/main/java/javax/mail/internet/AddressParser.java [235:478]
private List parseSingleAddress(TokenStream tokens, boolean inGroup) throws AddressException
{
List parsedAddresses = new ArrayList();
// index markers for personal information
AddressToken personalStart = null;
AddressToken personalEnd = null;
// and similar bits for the address information.
AddressToken addressStart = null;
AddressToken addressEnd = null;
// there is a fall-back set of rules allowed that will parse the address as a set of blank delimited
// tokens. However, we do NOT allow this if we encounter any tokens that fall outside of these
// rules. For example, comment fields and quoted strings will disallow the very lenient rule set.
boolean nonStrictRules = true;
// we don't know the type of address yet
int addressType = UNKNOWN;
// the parsing goes in two stages. Stage one runs through the tokens locating the bounds
// of the address we're working on, resolving the personal information, and also validating
// some of the larger scale syntax features of an address (matched delimiters for routes and
// groups, invalid nesting checks, etc.).
// get the next token from the queue and save this. We're going to scan ahead a bit to
// figure out what type of address we're looking at, then reset to do the actually parsing
// once we've figured out a form.
AddressToken first = tokens.nextToken();
// push it back on before starting processing.
tokens.pushToken(first);
// scan ahead for a trigger token that tells us what we've got.
while (addressType == UNKNOWN) {
AddressToken token = tokens.nextToken();
switch (token.type) {
// skip these for now...after we've processed everything and found that this is a simple
// address form, then we'll check for a leading comment token in the first position and use
// if as personal information.
case COMMENT:
// comments do, however, denote that this must be parsed according to RFC822 rules.
nonStrictRules = false;
break;
// a semi-colon when processing a group is an address terminator. we need to
// process this like a comma then
case SEMICOLON:
if (inGroup) {
// we need to push the terminator back on for the caller to see.
tokens.pushToken(token);
// if we've not tagged any tokens as being the address beginning, so this must be a
// null address.
if (addressStart == null) {
// just return the empty list from this.
return parsedAddresses;
}
// the end token is the back part.
addressEnd = tokens.previousToken(token);
// without a '<' for a route addr, we can't distinguish address tokens from personal data.
// We'll use a leading comment, if there is one.
personalStart = null;
// this is just a simple form.
addressType = SIMPLE_ADDR;
break;
}
// NOTE: The above falls through if this is not a group.
// any of these tokens are a real token that can be the start of an address. Many of
// them are not valid as first tokens in this context, but we flag them later if validation
// has been requested. For now, we just mark these as the potential address start.
case DOMAIN_LITERAL:
case QUOTED_LITERAL:
// this set of tokens require fuller RFC822 parsing, so turn off the flag.
nonStrictRules = false;
case ATOM:
case AT_SIGN:
case PERIOD:
// if we're not determined the start of the address yet, then check to see if we
// need to consider this the personal start.
if (addressStart == null) {
if (personalStart == null) {
personalStart = token;
}
// This is the first real token of the address, which at this point can
// be either the personal info or the first token of the address. If we hit
// an address terminator without encountering either a route trigger or group
// trigger, then this is the real address.
addressStart = token;
}
break;
// a LEFT_ANGLE indicates we have a full RFC822 mailbox form. The leading phrase
// is the personal info. The address is inside the brackets.
case LEFT_ANGLE:
// a route address automatically switches off the blank-delimited token mode.
nonStrictRules = false;
// this is a route address
addressType = ROUTE_ADDR;
// the address is placed in the InternetAddress object without the route
// brackets, so our start is one past this.
addressStart = tokens.nextRealToken();
// push this back on the queue so the scanner picks it up properly.
tokens.pushToken(addressStart);
// make sure we flag the end of the personal section too.
if (personalStart != null) {
personalEnd = tokens.previousToken(token);
}
// scan the rest of a route address.
addressEnd = scanRouteAddress(tokens, false);
break;
// a COLON indicates this is a group specifier...parse the group.
case COLON:
// Colons would not be valid in simple lists, so turn it off.
nonStrictRules = false;
// if we're scanning a group, we shouldn't encounter a ":". This is a
// recursion error if found.
if (inGroup) {
illegalAddress("Nested group element", token);
}
addressType = GROUP_ADDR;
// groups don't have any personal sections.
personalStart = null;
// our real start was back at the beginning
addressStart = first;
addressEnd = scanGroupAddress(tokens);
break;
// a semi colon can the same as a comma if we're processing a group.
// reached the end of string...this might be a null address, or one of the very simple name
// forms used for non-strict RFC822 versions. Reset, and try that form
case END_OF_TOKENS:
// if we're scanning a group, we shouldn't encounter an end token. This is an
// error if found.
if (inGroup) {
illegalAddress("Missing ';'", token);
}
// NOTE: fall through from above.
// this is either a terminator for an address list or a a group terminator.
case COMMA:
// we need to push the terminator back on for the caller to see.
tokens.pushToken(token);
// if we've not tagged any tokens as being the address beginning, so this must be a
// null address.
if (addressStart == null) {
// just return the empty list from this.
return parsedAddresses;
}
// the end token is the back part.
addressEnd = tokens.previousToken(token);
// without a '<' for a route addr, we can't distinguish address tokens from personal data.
// We'll use a leading comment, if there is one.
personalStart = null;
// this is just a simple form.
addressType = SIMPLE_ADDR;
break;
// right angle tokens are pushed, because parsing of the bracketing is not necessarily simple.
// we need to flag these here.
case RIGHT_ANGLE:
illegalAddress("Unexpected '>'", token);
}
}
String personal = null;
// if we have personal data, then convert it to a string value.
if (personalStart != null) {
TokenStream personalTokens = tokens.section(personalStart, personalEnd);
personal = personalToString(personalTokens);
}
// if we have a simple address, then check the first token to see if it's a comment. For simple addresses,
// we'll accept the first comment token as the personal information.
else {
if (addressType == SIMPLE_ADDR && first.type == COMMENT) {
personal = first.value;
}
}
TokenStream addressTokens = tokens.section(addressStart, addressEnd);
// if this is one of the strictly RFC822 types, then we always validate the address. If this is a
// a simple address, then we only validate if strict parsing rules are in effect or we've been asked
// to validate.
if (validationLevel != PARSE_HEADER) {
switch (addressType) {
case GROUP_ADDR:
validateGroup(addressTokens);
break;
case ROUTE_ADDR:
validateRouteAddr(addressTokens, false);
break;
case SIMPLE_ADDR:
// this is a conditional validation
validateSimpleAddress(addressTokens);
break;
}
}
// more complex addresses and addresses containing tokens other than just simple addresses
// need proper handling.
if (validationLevel != NONSTRICT || addressType != SIMPLE_ADDR || !nonStrictRules) {
// we might have traversed this already when we validated, so reset the
// position before using this again.
addressTokens.reset();
String address = addressToString(addressTokens);
// get the parsed out sections as string values.
InternetAddress result = new InternetAddress();
result.setAddress(address);
try {
result.setPersonal(personal);
} catch (UnsupportedEncodingException e) {
}
// even though we have a single address, we return this as an array. Simple addresses
// can be produce an array of items, so we need to return everything.
parsedAddresses.add(result);
return parsedAddresses;
}
else {
addressTokens.reset();
TokenStream nextAddress = addressTokens.getBlankDelimitedToken();
while (nextAddress != null) {
String address = addressToString(nextAddress);
// get the parsed out sections as string values.
InternetAddress result = new InternetAddress();
result.setAddress(address);
parsedAddresses.add(result);
nextAddress = addressTokens.getBlankDelimitedToken();
}
return parsedAddresses;
}
}