private List parseSingleAddress()

in geronimo-javamail_1.4_spec/src/main/java/javax/mail/internet/AddressParser.java [235:478]


    private List parseSingleAddress(TokenStream tokens, boolean inGroup) throws AddressException
    {
        List parsedAddresses = new ArrayList();

        // index markers for personal information
        AddressToken personalStart = null;
        AddressToken personalEnd = null;

        // and similar bits for the address information.
        AddressToken addressStart = null;
        AddressToken addressEnd = null;

        // there is a fall-back set of rules allowed that will parse the address as a set of blank delimited
        // tokens.  However, we do NOT allow this if we encounter any tokens that fall outside of these
        // rules.  For example, comment fields and quoted strings will disallow the very lenient rule set.
        boolean nonStrictRules = true;

        // we don't know the type of address yet
        int addressType = UNKNOWN;

        // the parsing goes in two stages.  Stage one runs through the tokens locating the bounds
        // of the address we're working on, resolving the personal information, and also validating
        // some of the larger scale syntax features of an address (matched delimiters for routes and
        // groups, invalid nesting checks, etc.).

        // get the next token from the queue and save this.  We're going to scan ahead a bit to
        // figure out what type of address we're looking at, then reset to do the actually parsing
        // once we've figured out a form.
        AddressToken first = tokens.nextToken();
        // push it back on before starting processing.
        tokens.pushToken(first);

        // scan ahead for a trigger token that tells us what we've got.
        while (addressType == UNKNOWN) {

            AddressToken token = tokens.nextToken();
            switch (token.type) {
                // skip these for now...after we've processed everything and found that this is a simple
                // address form, then we'll check for a leading comment token in the first position and use
                // if as personal information.
                case COMMENT:
                    // comments do, however, denote that this must be parsed according to RFC822 rules.
                    nonStrictRules = false;
                    break;

                // a semi-colon when processing a group is an address terminator.  we need to
                // process this like a comma then
                case SEMICOLON:
                    if (inGroup) {
                        // we need to push the terminator back on for the caller to see.
                        tokens.pushToken(token);
                        // if we've not tagged any tokens as being the address beginning, so this must be a
                        // null address.
                        if (addressStart == null) {
                            // just return the empty list from this.
                            return parsedAddresses;
                        }
                        // the end token is the back part.
                        addressEnd = tokens.previousToken(token);
                        // without a '<' for a route addr, we can't distinguish address tokens from personal data.
                        // We'll use a leading comment, if there is one.
                        personalStart = null;
                        // this is just a simple form.
                        addressType = SIMPLE_ADDR;
                        break;
                    }

                // NOTE:  The above falls through if this is not a group.

                // any of these tokens are a real token that can be the start of an address.  Many of
                // them are not valid as first tokens in this context, but we flag them later if validation
                // has been requested.  For now, we just mark these as the potential address start.
                case DOMAIN_LITERAL:
                case QUOTED_LITERAL:
                    // this set of tokens require fuller RFC822 parsing, so turn off the flag.
                    nonStrictRules = false;

                case ATOM:
                case AT_SIGN:
                case PERIOD:
                    // if we're not determined the start of the address yet, then check to see if we
                    // need to consider this the personal start.
                    if (addressStart == null) {
                        if (personalStart == null) {
                            personalStart = token;
                        }
                        // This is the first real token of the address, which at this point can
                        // be either the personal info or the first token of the address.  If we hit
                        // an address terminator without encountering either a route trigger or group
                        // trigger, then this is the real address.
                        addressStart = token;
                    }
                    break;

                // a LEFT_ANGLE indicates we have a full RFC822 mailbox form.  The leading phrase
                // is the personal info.  The address is inside the brackets.
                case LEFT_ANGLE:
                    // a route address automatically switches off the blank-delimited token mode.
                    nonStrictRules = false;
                    // this is a route address
                    addressType = ROUTE_ADDR;
                    // the address is placed in the InternetAddress object without the route
                    // brackets, so our start is one past this.
                    addressStart = tokens.nextRealToken();
                    // push this back on the queue so the scanner picks it up properly.
                    tokens.pushToken(addressStart);
                    // make sure we flag the end of the personal section too.
                    if (personalStart != null) {
                        personalEnd = tokens.previousToken(token);
                    }
                    // scan the rest of a route address.
                    addressEnd = scanRouteAddress(tokens, false);
                    break;

                // a COLON indicates this is a group specifier...parse the group.
                case COLON:
                    // Colons would not be valid in simple lists, so turn it off.
                    nonStrictRules = false;
                    // if we're scanning a group, we shouldn't encounter a ":".  This is a
                    // recursion error if found.
                    if (inGroup) {
                        illegalAddress("Nested group element", token);
                    }
                    addressType = GROUP_ADDR;
                    // groups don't have any personal sections.
                    personalStart = null;
                    // our real start was back at the beginning
                    addressStart = first;
                    addressEnd = scanGroupAddress(tokens);
                    break;

                // a semi colon can the same as a comma if we're processing a group.


                // reached the end of string...this might be a null address, or one of the very simple name
                // forms used for non-strict RFC822 versions.  Reset, and try that form
                case END_OF_TOKENS:
                    // if we're scanning a group, we shouldn't encounter an end token.  This is an
                    // error if found.
                    if (inGroup) {
                        illegalAddress("Missing ';'", token);
                    }

                    // NOTE:  fall through from above.

                // this is either a terminator for an address list or a a group terminator.
                case COMMA:
                    // we need to push the terminator back on for the caller to see.
                    tokens.pushToken(token);
                    // if we've not tagged any tokens as being the address beginning, so this must be a
                    // null address.
                    if (addressStart == null) {
                        // just return the empty list from this.
                        return parsedAddresses;
                    }
                    // the end token is the back part.
                    addressEnd = tokens.previousToken(token);
                    // without a '<' for a route addr, we can't distinguish address tokens from personal data.
                    // We'll use a leading comment, if there is one.
                    personalStart = null;
                    // this is just a simple form.
                    addressType = SIMPLE_ADDR;
                    break;

                // right angle tokens are pushed, because parsing of the bracketing is not necessarily simple.
                // we need to flag these here.
                case RIGHT_ANGLE:
                    illegalAddress("Unexpected '>'", token);

            }
        }

        String personal = null;

        // if we have personal data, then convert it to a string value.
        if (personalStart != null) {
            TokenStream personalTokens = tokens.section(personalStart, personalEnd);
            personal = personalToString(personalTokens);
        }
        // if we have a simple address, then check the first token to see if it's a comment.  For simple addresses,
        // we'll accept the first comment token as the personal information.
        else {
            if (addressType == SIMPLE_ADDR && first.type == COMMENT) {
                personal = first.value;
            }
        }

        TokenStream addressTokens = tokens.section(addressStart, addressEnd);

        // if this is one of the strictly RFC822 types, then we always validate the address.  If this is a
        // a simple address, then we only validate if strict parsing rules are in effect or we've been asked
        // to validate.
        if (validationLevel != PARSE_HEADER) {
            switch (addressType) {
                case GROUP_ADDR:
                    validateGroup(addressTokens);
                    break;

                case ROUTE_ADDR:
                    validateRouteAddr(addressTokens, false);
                    break;

                case SIMPLE_ADDR:
                    // this is a conditional validation
                    validateSimpleAddress(addressTokens);
                    break;
            }
        }

        // more complex addresses and addresses containing tokens other than just simple addresses
        // need proper handling.
        if (validationLevel != NONSTRICT || addressType != SIMPLE_ADDR || !nonStrictRules) {
            // we might have traversed this already when we validated, so reset the
            // position before using this again.
            addressTokens.reset();
            String address = addressToString(addressTokens);

            // get the parsed out sections as string values.
            InternetAddress result = new InternetAddress();
            result.setAddress(address);
            try {
                result.setPersonal(personal);
            } catch (UnsupportedEncodingException e) {
            }
            // even though we have a single address, we return this as an array.  Simple addresses
            // can be produce an array of items, so we need to return everything.
            parsedAddresses.add(result);
            return parsedAddresses;
        }
        else {
            addressTokens.reset();

            TokenStream nextAddress = addressTokens.getBlankDelimitedToken();
            while (nextAddress != null) {
                String address = addressToString(nextAddress);
                // get the parsed out sections as string values.
                InternetAddress result = new InternetAddress();
                result.setAddress(address);
                parsedAddresses.add(result);
                nextAddress = addressTokens.getBlankDelimitedToken();
            }
            return parsedAddresses;
        }
    }